"""Patch a `*.zim` file generated by Zimit.

As of 2026-06, some SVG icons collected by https://zimit.kiwix.org/ are corrupted.
The resulting `*.zim` contains all icons, but the contents of some icons are meaningless bytes.
This is likely a programming error rather than a network error, as the list of corrupted icons can be consistently reproduced.
However, it is not clear whether the cause is typst or zimit.

In any case, this script fixes these icons by downloading from https://typst.app directly.
For convenience, it also reduces the file size by excluding misc-a.typst.app and enabling compression.

Usage:
    uv run patch_zim.py INPUT.zim OUTPUT.zim
"""

# /// script
# requires-python = ">=3.13"
# dependencies = [
#     "libzim>=3.10.0",
#     "requests>=2.34.2",
# ]
# ///

from pathlib import Path

import requests
from libzim.reader import Archive, Entry
from libzim.writer import Creator, Hint, Item, StringProvider


class PassThrough(Item):
    """Pass `libzim.reader.Entry` to `libzim.writer.Item`.

    Adapted from python-libzim's README.
    """

    def __init__(self, entry: Entry, /, *, override_content: bytes | None = None) -> None:
        super().__init__()
        self.entry = entry
        self.override_content = override_content

    def get_path(self) -> str:
        return self.entry.path

    def get_title(self) -> str:
        return self.entry.title

    def get_mimetype(self) -> str:
        return self.entry.get_item().mimetype

    def get_contentprovider(self) -> StringProvider:
        return StringProvider(self.override_content or bytes(self.entry.get_item().content))

    def get_hints(self) -> dict[Hint, bool]:
        return {Hint.FRONT_ARTICLE: True, Hint.COMPRESS: True}


def repackage(src_path: Path, dst_path: Path, /) -> None:
    """Repackage a `*.zim` file and apply the fixes."""
    src = Archive(src_path)
    with Creator(dst_path).config_indexing(True, "eng") as creator:
        # Copy favicon
        for i in src.get_illustration_sizes():
            creator.add_illustration(i, bytes(src.get_illustration_item(i).content))

        # Copy metadata
        for key in src.metadata_keys:
            if key in {"Illustration_48x48@1", "Counter"}:
                # These two keys cause `RuntimeError: Impossible to add M/… dirent's title to add is : … existing dirent's title is : …`.
                continue
            value = src.get_metadata_item(key)
            creator.add_metadata(key, bytes(value.content), mimetype=value.mimetype)

        # Copy entries
        for i in range(src.entry_count):
            entry = src._get_entry_by_id(i)
            # It's safe to use this private method. See https://github.com/openzim/python-libzim/issues/225

            if entry.path.startswith("misc-a.typst.app/"):
                # misc-a.typst.app is a Plausible Analytics server. Drop its pages.
                continue

            override_content = None
            if entry.path.endswith(".svg"):
                content = bytes(entry.get_item().content)
                if not (content.startswith(b"<svg ") or content.startswith(b"<?xml ")):
                    print(f"💔 Found broken SVG: {entry.path = }, {content[:10] = }, {len(content) = }")
                    assert entry.path.startswith("typst.app/assets/icons") or entry.path in {
                        "typst.app/assets/images/typst.svg",
                        "typst.app/sprites/16-social.svg",
                    }
                    url = f"https://{entry.path}"
                    r = requests.get(url)
                    r.raise_for_status()
                    override_content = r.content
                    print(f"💚 Fixed from {url}")

            creator.add_item(PassThrough(entry, override_content=override_content))
            assert not entry.is_redirect, f"This script does not support redirect entries, but found one: {entry}"

        # Copy the mainPage entry
        main_entry = src.main_entry
        while main_entry.is_redirect:
            main_entry = main_entry.get_redirect_entry()
        creator.set_mainpath(main_entry.path)


if __name__ == "__main__":
    from sys import argv

    src = Path(argv[1])
    dst = Path(argv[2])
    assert src.exists()

    repackage(src, dst)
