#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
AKF-Konverter (Analysator + Extraktor)
Geschrieben um Weihnachten 2025 von Robert Pfeffer, Hessen, mit ChatGPT 5.2

Zweck
=====
Dieses Skript kann proprietäre AKF-Kartendateien (Megatel GmbH, Bremen 1994) analysieren und in palettierte TIFFs
umwandeln. Falls eine Georeferenz im AKF-Header erkannt werden kann, wird sie nachträglich in die
TIFFs geschrieben (GeoTIFF). Optional wird außerdem eine VRT-Datei (virtuelles Mosaik) erzeugt.

Wichtiger Hinweis
=================
AKF ist ein proprietäres Format. Das Skript arbeitet deshalb mit plausibilitätsbasierten Heuristiken.
Die Georeferenzierung wird nur dann angewendet, wenn sie im Header ausreichend sicher erkannt wurde.
Andernfalls werden dennoch TIFFs geschrieben, jedoch ohne Koordinatenbezug.

Planarten (intern)
==================
Es gibt zwei beobachtete Grundvarianten:

1) pageplan („klassischer Seitenplan“)
   - Es gibt einen Seitenplan aus 16-Byte-Records.
   - Danach folgt eine Offsettabelle, die pro Seite die Kachel-Offsets enthält.
   - Ausgabe: 1 TIFF pro belegter Seite:  page_rowXX_colYY.tif
   - Georeferenz: pro Seite (je TIFF) möglich.
   - VRT: sinnvoll (Mosaik aus Seiten).

2) tileplan („Kachelplan“, z.B. DTK1000 u.ä.)
   - Kein Seitenplan-Raster, sondern direkt ein 16-Byte-Header + Offsettabelle tc*tr.
   - Jede Kachel entspricht direkt einer „Seite“ (ein Ausgabebaustein).
   - Ausgabe: ein Gesamt-Mosaik als eine Datei:  <name>.tif
   - Georeferenz: einmal für die Gesamtdatei möglich.
   - VRT: hier i.d.R. nicht nötig (nur 1 Datei).

Voraussetzungen
===============
- Python 3.10+ (wegen Typen „X | None“)
- Python-Module:
  - numpy
  - Pillow (PIL)
- Externe Programme (nur für Georeferenz / VRT):
  - gdal_translate
  - gdalbuildvrt
  Diese kommen z.B. mit QGIS oder OSGeo4W.

Benutzung (Kommandozeile)
=========================
A) Extraktion (Standard)
   python AKF-Konverter.py <DATEI.akf> --out-dir <ZIELVERZEICHNIS> [Optionen]

B) Nur Analyse (Bericht, Palette, Georeferenz-Kandidaten)
   python AKF-Konverter.py analyze <DATEI.akf> [Optionen]

C) Extraktion explizit (gleich wie Standard, aber eindeutig)
   python AKF-Konverter.py extract <DATEI.akf> --out-dir <ZIELVERZEICHNIS> [Optionen]

Geführter Dialog
================
Wenn das Skript ohne Parameter gestartet wird (d.h. `python AKF-Konverter.py`), startet ein kurzer
Dialog, der nach AKF-Datei, Zielverzeichnis und gewünschten Optionen fragt.

Optionen (Extraktion)
=====================
--out-dir VERZ
  Zielverzeichnis. Wenn nicht gesetzt: Unterordner mit dem Basisnamen der AKF-Datei.

--no-compress
  Keine TIFF-Kompression.

--verbose
  Zusätzliche Fortschrittsausgaben.

--no-georef
  Georeferenzierung nicht anwenden (auch wenn sie erkannt werden könnte).

--no-vrt
  Keine VRT-Datei erzeugen (nur pageplan relevant).

--lookahead N
  Suchfenster für CLEAR-Kandidaten beim LZW-Resync (Standard: 2000).

--resync-max N
  Maximale Anzahl Offsets (ab Start) die bei Resync geprüft werden (Standard: 200).

--missing-fill WERT
  Füllwert (0..255) für nicht dekodierbare Kacheln (Standard: 0).

--strict
  Bei nicht dekodierbarer Kachel sofort abbrechen.

--log-resync
  Resync-Ereignisse und Fehlschläge in Textdateien im Zielverzeichnis protokollieren.

Optionen (Analyse)
==================
--text
  Lesbare Textausgabe auf der Konsole.

--out DATEI.json
  JSON-Bericht schreiben.

--palette-png DATEI.png
  Palette als PNG schreiben.

--header-len N
  Größe des Headerfensters für Metadaten- und Georeferenzsuche (Standard: 512).

--dmb DATEI.dmb
  Optional: DMB-Datei zur Gegenprüfung (falls vorhanden).

--deep-georef
  Deutlich breitere (langsamere) Suche nach Georeferenzmerkmalen (Debug).

Komfortfunktionen
=================
- Fehlende Python-Module: Es wird angeboten, sie per pip nachzuinstallieren (nach Rückfrage).
- GDAL-Programme nicht gefunden: Es wird (im Dialog-/Terminalbetrieb) nach einem GDAL-/QGIS-Ordner
  oder direkt nach gdal_translate.exe gefragt; der Pfad wird für den aktuellen Lauf zu PATH ergänzt.

"""

from __future__ import annotations

import argparse
import bisect
import glob
import json
import mmap
import os
import re
import shutil
import struct
import subprocess
import sys
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple


# ======================================================================================
# 1) Kleine Hilfen: Interaktivität, Abhängigkeiten, GDAL-Findung
# ======================================================================================

def _is_interactive() -> bool:
    """True, wenn Ein-/Ausgabe ein Terminal ist (Dialog sinnvoll)."""
    try:
        return sys.stdin.isatty() and sys.stdout.isatty()
    except Exception:
        return False


def _ask(prompt: str) -> str:
    """Einfache Eingabehilfe mit robustem Strippen von Anführungszeichen."""
    s = input(prompt).strip()
    if len(s) >= 2 and ((s[0] == s[-1]) and s[0] in ("'", '"')):
        s = s[1:-1].strip()
    return s


def _ask_yes_no(prompt: str, default_yes: bool = False) -> bool:
    """Ja/Nein-Abfrage. Leere Eingabe -> Default."""
    suffix = " [J/n] " if default_yes else " [j/N] "
    s = _ask(prompt + suffix).lower()
    if not s:
        return default_yes
    return s.startswith("j")


def _looks_like_permission_problem(msg: str) -> bool:
    s = (msg or "").lower()
    needles = [
        "permission denied",
        "access is denied",
        "errno 13",
        "not writable",
        "could not install packages due to an oserror",
        "requires elevation",
        "administrator",
        "admin rights",
        "zugriff verweigert",
        "keine berechtigung",
        "nicht beschreibbar",
        "berechtigung verweigert",
    ]
    return any(n in s for n in needles)


def _run_pip_install(pkgs: List[str], *, user: bool) -> tuple[bool, str]:
    """
    Führt pip über denselben Python-Interpreter aus.
    Rückgabe: (ok, meldung_ausgabe)
    """
    args = [sys.executable, "-m", "pip", "install", "--upgrade"]
    if user:
        args.append("--user")
    args += pkgs

    try:
        cp = subprocess.run(args, check=False, capture_output=True, text=True)
        out = (cp.stdout or "") + "\n" + (cp.stderr or "")
        return (cp.returncode == 0), out
    except Exception as e:
        return False, str(e)


def _ensure_python_modules(modules: List[Tuple[str, str]]) -> None:
    """
    Stellt sicher, dass benötigte Python-Module importierbar sind.
    modules: Liste aus (import_name, pip_name)

    Strategie:
      Plan A: systemweit installieren (ohne --user)
      Plan B: bei Rechteproblem benutzerspezifisch (--user)
    """
    missing: List[Tuple[str, str]] = []
    for import_name, pip_name in modules:
        try:
            __import__(import_name)
        except Exception:
            missing.append((import_name, pip_name))

    if not missing:
        return

    msg = "Es fehlen Python-Bibliotheken:\n" + "\n".join(
        f"  - {imp} (pip: {pip})" for imp, pip in missing
    )
    print(msg, file=sys.stderr)

    if not _is_interactive():
        raise SystemExit(
            "Abbruch: fehlende Bibliotheken. Bitte installieren Sie sie (systemweit oder benutzerspezifisch) und starten Sie erneut."
        )

    if not _ask_yes_no("Soll ich die fehlenden Bibliotheken jetzt installieren?", default_yes=True):
        raise SystemExit("Abbruch: fehlende Bibliotheken wurden nicht installiert.")

    pkgs = [pip for _imp, pip in missing]

    # Plan A: systemweit
    print("Versuch 1/2: Installation systemweit (ohne --user) ...")
    ok, out = _run_pip_install(pkgs, user=False)
    if not ok and _looks_like_permission_problem(out):
        # Plan B: benutzerspezifisch
        print("Systemweite Installation scheiterte offenbar wegen fehlender Rechte.")
        print("Versuch 2/2: Installation benutzerspezifisch (--user) ...")
        ok2, out2 = _run_pip_install(pkgs, user=True)
        if not ok2:
            print(out2.strip(), file=sys.stderr)
            raise SystemExit(
                "Installation fehlgeschlagen (auch benutzerspezifisch). "
                "Möglicherweise ist pip/Netzzugriff durch Richtlinien eingeschränkt."
            )
    elif not ok:
        # kein Rechteproblem → Plan B hilft meist nicht sinnvoll
        print(out.strip(), file=sys.stderr)
        raise SystemExit(
            "Installation fehlgeschlagen. (Kein eindeutiger Berechtigungsfehler erkannt.) "
            "Bitte prüfen Sie Netz/Proxy/Zertifikate oder installieren Sie die Bibliotheken manuell."
        )

    # Nochmal prüfen
    for import_name, _pip_name in missing:
        try:
            __import__(import_name)
        except Exception:
            raise SystemExit(
                f"Installation scheint nicht erfolgreich: Modul {import_name!r} ist weiterhin nicht importierbar."
            )


def _maybe_add_to_path(dir_or_exe: str) -> None:
    """Ergänzt PATH für den aktuellen Lauf (vorn)."""
    p = Path(dir_or_exe)
    if p.is_file():
        p = p.parent
    if not p.exists():
        return
    cur = os.environ.get("PATH", "")
    os.environ["PATH"] = str(p) + os.pathsep + cur


def _find_in_known_gdal_locations(exe_name: str) -> Optional[str]:
    """
    Sucht GDAL-Programm zuerst im PATH, dann in typischen QGIS/OSGeo4W-Orten.
    exe_name ohne .exe möglich.
    """
    found = shutil.which(exe_name)
    if found:
        return found

    # Windows-typische Orte
    candidates: List[str] = []
    candidates += glob.glob(r"C:\Program Files\QGIS *\bin\%s.exe" % exe_name)
    candidates += [
        rf"C:\OSGeo4W64\bin\{exe_name}.exe",
        rf"C:\OSGeo4W\bin\{exe_name}.exe",
    ]
    for c in candidates:
        if os.path.exists(c):
            return c
    return None


def ensure_gdal_tools(interactive: bool) -> Tuple[Optional[str], Optional[str]]:
    """
    Liefert (gdal_translate, gdalbuildvrt). Wenn nicht auffindbar:
    - interaktiv: fragt nach einem Ordner oder nach gdal_translate.exe
    - nicht interaktiv: liefert (None, None)
    """
    gt = _find_in_known_gdal_locations("gdal_translate")
    bv = _find_in_known_gdal_locations("gdalbuildvrt")
    if gt and bv:
        return gt, bv

    if not interactive:
        return None, None

    print(
        "\nGDAL wurde nicht automatisch gefunden.\n"
        "Bitte geben Sie einen Ordner an, der gdal_translate.exe und gdalbuildvrt.exe enthält\n"
        "(z.B. ...\\QGIS\\bin oder ...\\OSGeo4W64\\bin), oder den vollen Pfad zu gdal_translate.exe.\n"
    )
    while True:
        s = _ask("GDAL-Ordner oder gdal_translate.exe: ")
        if not s:
            return None, None
        p = Path(s)
        if p.is_file() and p.name.lower().startswith("gdal_translate"):
            _maybe_add_to_path(str(p))
        elif p.is_dir():
            _maybe_add_to_path(str(p))
        else:
            print("Pfad nicht gefunden. Bitte erneut versuchen.")
            continue

        gt2 = _find_in_known_gdal_locations("gdal_translate")
        bv2 = _find_in_known_gdal_locations("gdalbuildvrt")
        if gt2 and bv2:
            return gt2, bv2

        print("Noch nicht gefunden. Bitte prüfen Sie den Pfad und versuchen Sie es erneut.")


# ======================================================================================
# 2) Gemeinsame Low-Level-Hilfen (Adressformat, u16/u32)
# ======================================================================================

def fmt_addr(x: int) -> str:
    return f"0x{x:08X}"


def u16(mm: mmap.mmap, off: int) -> int:
    return struct.unpack_from("<H", mm, off)[0]


def u32(mm: mmap.mmap, off: int) -> int:
    return struct.unpack_from("<I", mm, off)[0]


# ======================================================================================
# 3) Seitenplan / Kachelplan – Erkennung, Auswertung, Rasterableitung
# ======================================================================================

@dataclass
class PageRecord:
    tile_cols: int
    tile_rows: int
    page_w: int
    page_h: int
    flags: int

    @property
    def tiles_per_page(self) -> int:
        return self.tile_cols * self.tile_rows


def record_plausible(mm: mmap.mmap, off: int) -> bool:
    try:
        tc = u16(mm, off)
        tr = u16(mm, off + 2)
        w = u32(mm, off + 4)
        h = u32(mm, off + 8)
        flags = u32(mm, off + 12)
    except struct.error:
        return False

    if flags not in (0, 1, 2, 3, 4):
        return False
    if not (4 <= tc <= 128 and 4 <= tr <= 128):
        return False
    if not (500 <= w <= 500_000 and 500 <= h <= 500_000):
        return False
    return True


def scan_pageplan_candidates(mm: mmap.mmap, min_records: int = 20) -> List[Tuple[int, int]]:
    size = len(mm)
    cands: List[Tuple[int, int]] = []
    step = 2
    rec_len = 16

    off = 0
    while off <= size - rec_len * min_records:
        if record_plausible(mm, off):
            cnt = 0
            j = off
            while j <= size - rec_len and record_plausible(mm, j):
                cnt += 1
                j += rec_len
            if cnt >= min_records:
                cands.append((off, cnt))
            off = max(off + step, j - rec_len + step)
        else:
            off += step
    return cands


def choose_best_pageplan(mm: mmap.mmap) -> Tuple[int, int]:
    cands = scan_pageplan_candidates(mm, min_records=20)
    if not cands:
        raise RuntimeError("Kein plausibler Seitenplan (16-Byte-Records) gefunden.")
    cands.sort(key=lambda t: (-t[1], t[0]))
    return cands[0]


def choose_best_plan(mm: mmap.mmap) -> Tuple[str, Dict[str, Any]]:
    """
    Ermittelt entweder:
      - pageplan: klassischer Seitenplan (Records à 16 Byte)
      - tileplan: 16-Byte-Header + nachfolgende Offsettabelle (tc*tr u32-Offets)

    Rückgabe: (kind, info_dict)
      kind = 'pageplan' | 'tileplan'
    """
    try:
        pp_off, pp_cnt = choose_best_pageplan(mm)
        return "pageplan", {"pp_off": pp_off, "pp_cnt": pp_cnt}
    except RuntimeError:
        pass

    # tileplan-Fallback (DTK1000 u.ä.)
    size = len(mm)
    for off in range(0, max(0, size - 16 - 16), 2):
        tc, tr = struct.unpack_from("<HH", mm, off)
        if not (4 <= tc <= 1024 and 4 <= tr <= 1024):
            continue
        w, h, flags = struct.unpack_from("<III", mm, off + 4)
        if flags not in (0, 1, 2, 3, 4):
            continue
        if not (128 <= w <= 2_000_000 and 128 <= h <= 2_000_000):
            continue
        n = tc * tr
        if n <= 0 or n > 500_000:
            continue
        ot_start = off + 16
        ot_end = ot_start + n * 4
        if ot_end > size:
            continue

        sample_n = min(n, 64)
        sample = struct.unpack_from("<%dI" % sample_n, mm, ot_start)
        nonzero = [x for x in sample if x not in (0, 0xFFFFFFFF)]
        if not nonzero:
            continue
        if min(nonzero) > 65536:
            continue
        if not all(0 <= x < off for x in nonzero):
            continue

        return "tileplan", {
            "pp_off": off,              # hier „Plan-Offset“ (Headerbeginn)
            "tile_cols": tc,
            "tile_rows": tr,
            "total_w": w,
            "total_h": h,
            "flags": flags,
            "ot_off": ot_start,
            "ot_bytes": n * 4,
        }

    raise RuntimeError("Kein plausibler Seiten- oder Kachelplan gefunden.")


def read_pageplan_records(mm: mmap.mmap, off: int, cnt: int) -> List[PageRecord]:
    out: List[PageRecord] = []
    for i in range(cnt):
        base = off + i * 16
        out.append(PageRecord(
            tile_cols=u16(mm, base),
            tile_rows=u16(mm, base + 2),
            page_w=u32(mm, base + 4),
            page_h=u32(mm, base + 8),
            flags=u32(mm, base + 12),
        ))
    return out


def factor_pairs(n: int) -> List[Tuple[int, int]]:
    pairs = []
    for a in range(1, int(n ** 0.5) + 1):
        if n % a == 0:
            b = n // a
            pairs.append((a, b))
            if a != b:
                pairs.append((b, a))
    return sorted(pairs)


def infer_grid(records: List[PageRecord]) -> Tuple[int, int]:
    """
    Leitet aus der Anzahl der Records eine plausible Rasterform (cols, rows) ab.

    Heuristik:
    - Kandidaten sind alle Faktorenpaare der Record-Anzahl.
    - Score bevorzugt Raster, in denen Randspalten/-zeilen häufiger abweichen (Randkacheln).
    """
    n = len(records)
    pairs = factor_pairs(n)

    sig0 = (records[0].tile_cols, records[0].tile_rows, records[0].page_w, records[0].page_h, records[0].flags)
    homogeneous = all((pr.tile_cols, pr.tile_rows, pr.page_w, pr.page_h, pr.flags) == sig0 for pr in records)

    def score(c: int, r: int) -> float:
        mat = [[records[rr * c + cc] for cc in range(c)] for rr in range(r)]

        def sig(pr: PageRecord) -> Tuple[int, int, int, int, int]:
            return (pr.tile_cols, pr.tile_rows, pr.page_w, pr.page_h, pr.flags)

        row_var = sum(len({sig(x) for x in mat[rr]}) for rr in range(r))
        col_var = sum(len({sig(mat[rr][cc]) for rr in range(r)}) for cc in range(c))
        asp = max(c / r, r / c)
        return 10 * (row_var + col_var) + 2 * asp

    best = None
    for c, r in pairs:
        if c * r != n:
            continue
        s = score(c, r)
        if best is None or s < best[0] - 1e-9:
            best = (s, c, r)
        elif best is not None and abs(s - best[0]) <= 1e-9:
            bc, br = best[1], best[2]
            if homogeneous:
                # bei Homogenität: „breiter“ ist oft plausibler
                if c >= r and not (bc >= br):
                    best = (s, c, r)
                elif c >= r and bc >= br and c > bc:
                    best = (s, c, r)
            else:
                # sonst: näher am Quadrat, dann breiter
                if abs(c / r - 1) < abs(bc / br - 1) - 1e-9:
                    best = (s, c, r)
                elif c >= r and not (bc >= br):
                    best = (s, c, r)

    if best is None:
        raise RuntimeError("Konnte Rasterform nicht ableiten.")
    return best[1], best[2]


# ======================================================================================
# 4) Palette – Suche, Dekodierung, optionale Ausgabe
# ======================================================================================

def decode_palette_planar(words48: List[int]) -> List[Tuple[int, int, int]]:
    if len(words48) != 48:
        raise ValueError("Palette erwartet 48 WORDs.")
    r = words48[0:16]
    g = words48[16:32]
    b = words48[32:48]
    rgb = []
    for i in range(16):
        rr = int(round(r[i] / 65535 * 255))
        gg = int(round(g[i] / 65535 * 255))
        bb = int(round(b[i] / 65535 * 255))
        rgb.append((rr, gg, bb))
    return rgb


def find_palette_near_pageplan(mm: mmap.mmap, pageplan_off: int, max_back: int = 256) -> Optional[Tuple[int, List[int], List[Tuple[int, int, int]]]]:
    """
    Sucht 96 Byte (48 WORDs) in den letzten max_back Bytes vor dem Seitenplan.
    Bevorzugt Kandidaten, deren Ende exakt am Seitenplan anliegt.
    """
    start = max(0, pageplan_off - max_back)
    best0 = None
    best = None
    for off in range(start, pageplan_off - 96 + 1, 2):
        try:
            words = list(struct.unpack_from("<48H", mm, off))
        except struct.error:
            continue
        if all(w == 0 for w in words):
            continue
        uniq = len(set(words))
        if uniq < 8:
            continue
        try:
            rgb = decode_palette_planar(words)
        except Exception:
            continue
        dist = pageplan_off - (off + 96)
        if dist < 0:
            continue
        nonzero = sum(1 for c in rgb if c != (0, 0, 0))
        if nonzero < 3:
            continue
        score = dist * 10 - uniq
        cand = (score, off, words, rgb, dist)
        if dist == 0:
            if best0 is None or score < best0[0]:
                best0 = cand
        if best is None or score < best[0]:
            best = cand
    chosen = best0 if best0 is not None else best
    if chosen is None:
        return None
    _, off, words, rgb, _ = chosen
    return off, words, rgb


def find_palette_in_header(mm: mmap.mmap, header_off: int, header_len: int = 512) -> Optional[Tuple[int, List[int], List[Tuple[int, int, int]]]]:
    end = header_off + header_len
    best = None
    for off in range(header_off, end - 96 + 1, 2):
        try:
            words = list(struct.unpack_from("<48H", mm, off))
        except struct.error:
            continue
        if all(w == 0 for w in words):
            continue
        uniq = len(set(words))
        if uniq < 8:
            continue
        try:
            rgb = decode_palette_planar(words)
        except Exception:
            continue
        nonzero = sum(1 for c in rgb if c != (0, 0, 0))
        if nonzero < 3:
            continue
        score = -uniq
        if best is None or score < best[0]:
            best = (score, off, words, rgb)
    if best is None:
        return None
    _, off, words, rgb = best
    return off, words, rgb


def render_palette_png(rgb16: List[Tuple[int, int, int]], out_path: Path, scale: int = 32) -> None:
    from PIL import Image  # import lokal, damit Abhängigkeit erst bei Bedarf greift
    img = Image.new("RGB", (16 * scale, scale), (0, 0, 0))
    px = img.load()
    for i, (r, g, b) in enumerate(rgb16):
        for x in range(i * scale, (i + 1) * scale):
            for y in range(scale):
                px[x, y] = (r, g, b)
    img.save(out_path)


# ======================================================================================
# 5) Offsettabellen & Belegung
# ======================================================================================

def find_offset_table_start(mm: mmap.mmap, after_pageplan: int, expected_bytes: int) -> int:
    """
    Robust:
    - Start kann 0..31 Byte nach after_pageplan liegen (Nullpadding möglich)
    - keine harte 4-Byte-Ausrichtung erzwingen (sonst u.U. 2-Byte-Versatz)
    - es dürfen zusätzliche Daten nach der Offsettabelle folgen
    """
    size = len(mm)
    for delta in range(0, 32):
        start = after_pageplan + delta
        if start >= size:
            continue
        if (size - start) < expected_bytes:
            continue
        if delta > 0 and any(b != 0 for b in mm[after_pageplan:start]):
            continue
        try:
            struct.unpack_from("<I", mm, start)
        except struct.error:
            continue
        return start
    raise RuntimeError(
        f"Konnte Offsettabelle nicht lokalisieren: nach Seitenplan verbleiben {size - after_pageplan} Byte, "
        f"erwartet mindestens {expected_bytes} Byte."
    )


def read_offset_blocks(mm: mmap.mmap, off: int, records: List[PageRecord]) -> Tuple[List[List[int]], int]:
    blocks: List[List[int]] = []
    cur = off
    size = len(mm)
    for pr in records:
        n = pr.tiles_per_page
        need = n * 4
        if cur + need > size:
            raise RuntimeError(f"Offsettabelle läuft über Dateiende bei {fmt_addr(cur)} (benötigt {need} Byte).")
        vals = list(struct.unpack_from(f"<{n}I", mm, cur))
        blocks.append(vals)
        cur += need
    return blocks, cur


def block_is_empty(vals: List[int]) -> bool:
    if not vals:
        return True
    s = set(vals)
    return s == {0} or s == {0xFFFFFFFF}


def occupancy_to_bitrows(occ: List[bool], grid_cols: int, grid_rows: int) -> List[str]:
    rows = []
    for r in range(grid_rows):
        row = occ[r * grid_cols:(r + 1) * grid_cols]
        rows.append("".join("1" if x else "0" for x in row))
    return rows


def page_start_offset(block: List[int]) -> Optional[int]:
    vals = [v for v in block if v not in (0, 0xFFFFFFFF)]
    if not vals:
        return None
    return min(vals)


# ======================================================================================
# 6) Seitentypen (Randkacheln) + Gesamtmaße aus pageplan
# ======================================================================================

def describe_page_formats(records: List[PageRecord]) -> Dict[str, Any]:
    """
    Seitentypen und Randkacheln aus page_w/page_h als effektive Pixelmaße ableiten.
    """
    sig_counts: Dict[Tuple[int, int, int, int, int], int] = {}
    sig_to_rec: Dict[Tuple[int, int, int, int, int], PageRecord] = {}
    for pr in records:
        sig = (pr.tile_cols, pr.tile_rows, pr.page_w, pr.page_h, pr.flags)
        sig_counts[sig] = sig_counts.get(sig, 0) + 1
        sig_to_rec[sig] = pr

    std_sig = sorted(sig_counts.items(), key=lambda kv: (-kv[1], kv[0]))[0][0]
    std = sig_to_rec[std_sig]

    types = []
    for sig, cnt in sorted(sig_counts.items(), key=lambda kv: (-kv[1], kv[0])):
        pr = sig_to_rec[sig]
        raw_w = pr.tile_cols * 256
        raw_h = pr.tile_rows * 256
        eff_w = pr.page_w
        eff_h = pr.page_h
        right_w = eff_w - (pr.tile_cols - 1) * 256
        top_h = eff_h - (pr.tile_rows - 1) * 256

        right_ok = 1 <= right_w <= 256
        top_ok = 1 <= top_h <= 256

        corner = (right_w, top_h) if right_ok and top_ok and (right_w != 256 or top_h != 256) else None

        types.append({
            "tile_cols": pr.tile_cols,
            "tile_rows": pr.tile_rows,
            "flags": pr.flags,
            "count": cnt,
            "raw_page_px": (raw_w, raw_h),
            "effective_page_px": (eff_w, eff_h),
            "right_tile_px_w": right_w if right_ok else None,
            "top_tile_px_h": top_h if top_ok else None,
            "corner_px": corner,
        })

    return {
        "standard_type": {
            "tile_cols": std.tile_cols,
            "tile_rows": std.tile_rows,
            "raw_page_px": (std.tile_cols * 256, std.tile_rows * 256),
            "effective_page_px": (std.page_w, std.page_h),
        },
        "types": types,
    }


def compute_pageplan_totals(records: List[PageRecord], grid_cols: int, grid_rows: int) -> Dict[str, Any]:
    """
    Gesamtbreite/-höhe (effektive Pixel) aus Seitenplan ableiten.
    Records sind zeilenweise (unten->oben) gespeichert.
    """
    if grid_cols * grid_rows != len(records):
        raise ValueError("Raster passt nicht zur Record-Anzahl.")
    mat = [[records[r * grid_cols + c] for c in range(grid_cols)] for r in range(grid_rows)]
    col_widths = [mat[0][c].page_w for c in range(grid_cols)]
    row_heights = [mat[r][0].page_h for r in range(grid_rows)]
    return {
        "col_widths": col_widths,
        "row_heights": row_heights,
        "total_w": sum(col_widths),
        "total_h": sum(row_heights),
    }


def find_header_u32_totals(mm: mmap.mmap, header_off: int, header_len: int, target_w: int, target_h: int) -> Optional[Dict[str, Any]]:
    """Sucht im Headerfenster nach einem uint32-Paar (w,h), das exakt target_w/target_h entspricht."""
    hits = []
    for rel in range(0, header_len - 8 + 1, 2):
        off = header_off + rel
        try:
            w = struct.unpack_from("<I", mm, off)[0]
            h = struct.unpack_from("<I", mm, off + 4)[0]
        except struct.error:
            continue
        if w == target_w and h == target_h:
            hits.append((off, w, h))
    if not hits:
        return None
    hits.sort(key=lambda t: t[0])
    off, w, h = hits[0]
    return {"offset": off, "w": w, "h": h, "all_hits": [{"offset": o, "w": ww, "h": hh} for (o, ww, hh) in hits]}


# ======================================================================================
# 7) DMB (optional) – Gegenprüfung (nur für Analyse)
# ======================================================================================

def dmb_iter_entries(dmb_bytes: bytes) -> List[Dict[str, Any]]:
    """
    Pragmatiker-Parser für Megatel-DMB.
    (Unverändert in der Logik; nur sauber zusammengefasst.)
    """
    entries: List[Dict[str, Any]] = []
    b = dmb_bytes
    pos = 0
    while True:
        p = b.find(b"PMAP=", pos)
        if p < 0:
            break
        line_end = b.find(b"\n", p)
        if line_end < 0:
            line_end = min(len(b), p + 200)
        line = b[p:line_end].decode("latin1", errors="ignore").strip()

        m = re.match(r"PMAP=\s*([A-Za-z0-9_.]+)\s+(\d+)", line)
        name = m.group(1) if m else None
        scale = int(m.group(2)) if m else None

        size_p = b.find(b"SIZE=", p)
        size_line_end = b.find(b"\n", size_p) if size_p >= 0 else -1
        size_w = size_h = None
        if size_p >= 0 and (size_line_end < 0 or size_line_end - size_p < 200):
            size_line = b[size_p:size_line_end if size_line_end > 0 else size_p + 200].decode("latin1", errors="ignore")
            m2 = re.search(r"SIZE=\s*(\d+)\s+(\d+)", size_line)
            if m2:
                size_w = int(m2.group(1))
                size_h = int(m2.group(2))

        bgau_p = b.find(b"BGAU=", p)
        bpol_p = b.find(b"BPOL=", p)
        bgau_blob = None
        if bgau_p >= 0 and bpol_p > bgau_p:
            bgau_blob = b[bgau_p + 5:bpol_p]

        px = x0 = y0 = None
        bbox = None
        if bgau_blob:
            doubles = []
            for off in range(0, len(bgau_blob) - 7, 8):
                v = struct.unpack_from("<d", bgau_blob, off)[0]
                if v == v and abs(v) != float("inf"):
                    doubles.append((off, v))

            if len(doubles) >= 8:
                v2 = doubles[2][1]
                v5 = doubles[5][1]
                v6 = doubles[6][1]
                v7 = doubles[7][1]
                if 0.05 <= v2 <= 1000 and 0.05 <= v5 <= 1000 and 1e6 <= v6 <= 1e7 and 1e6 <= v7 <= 1e7:
                    px = round((v2 + v5) / 2.0, 6)
                    x0, y0 = v6, v7

            if px is None:
                for _off, v in doubles:
                    if v > 0 and 0.1 <= v <= 1000:
                        rv = round(v, 3)
                        if abs(v - rv) <= 1e-3 and rv >= 0.5:
                            px = rv
                            break

            big = [(off, v) for (off, v) in doubles if 1e6 <= v <= 1e7]
            big.sort()
            if x0 is None or y0 is None:
                for i in range(len(big) - 1):
                    o1, v1 = big[i]
                    o2, v2 = big[i + 1]
                    if o2 - o1 == 8:
                        x0, y0 = v1, v2
                        break

            for i in range(len(big) - 3):
                o0, v0 = big[i]
                o1, v1 = big[i + 1]
                o2, v2 = big[i + 2]
                o3, v3 = big[i + 3]
                if o1 - o0 == 8 and o2 - o1 == 8 and o3 - o2 == 8:
                    minx, maxx = (v0, v2) if v0 <= v2 else (v2, v0)
                    miny, maxy = (v1, v3) if v1 <= v3 else (v3, v1)
                    if (maxx - minx) > 0 and (maxy - miny) > 0:
                        bbox = (minx, miny, maxx, maxy)
                        break

        entries.append({
            "pmap_offset": p,
            "name": name,
            "scale": scale,
            "size": (size_w, size_h),
            "bgau_len": len(bgau_blob) if bgau_blob else None,
            "x0y0": (x0, y0),
            "pixel_size": px,
            "bbox": bbox,
        })
        pos = line_end if line_end > 0 else p + 5
    return entries


def dmb_find_entry(dmb_path: Path, akf_basename: str) -> Optional[Dict[str, Any]]:
    b = dmb_path.read_bytes()
    entries = dmb_iter_entries(b)
    target = akf_basename.upper()
    for e in entries:
        if e.get("name") and e["name"].upper() == target:
            return e
    return None


# ======================================================================================
# 8) Georeferenz – Kandidaten aus Header (heuristisch, „Header-first“)
# ======================================================================================

def find_bbox_from_doubles(doubles: List[Tuple[int, float]]) -> Optional[Dict[str, Any]]:
    doubles.sort(key=lambda t: t[0])
    for i in range(len(doubles) - 3):
        o0, v0 = doubles[i]
        o1, v1 = doubles[i + 1]
        o2, v2 = doubles[i + 2]
        o3, v3 = doubles[i + 3]
        if not (o1 - o0 == 8 and o2 - o1 == 8 and o3 - o2 == 8):
            continue
        if not (1e6 <= v0 <= 1e7 and 1e6 <= v2 <= 1e7 and 1e6 <= v1 <= 1e7 and 1e6 <= v3 <= 1e7):
            continue
        minx, maxx = (v0, v2) if v0 <= v2 else (v2, v0)
        miny, maxy = (v1, v3) if v1 <= v3 else (v3, v1)
        if (maxx - minx) <= 0 or (maxy - miny) <= 0:
            continue
        return {"offset": o0, "minx": minx, "miny": miny, "maxx": maxx, "maxy": maxy}
    return None


def georef_extract(
    mm: mmap.mmap,
    header_off: int,
    header_len: int,
    scan_end: int | None = None,
    dmb_expect: Dict[str, Any] | None = None,
    *,
    deep_scan: bool = False,
) -> Dict[str, Any]:
    """
    Extrahiert Georeferenzierungs-Kandidaten aus dem Headerfenster.

    Strategie:
    - Primär nur Header (standardmäßig 512 Byte), weil dort erfahrungsgemäß alles Nötige liegt.
    - Optional (deep_scan=True) erweitertes Fenster bis max. 256 KiB, aber nur für Zusatzindikatoren (BBox).
    """
    scan_base = header_off
    if not deep_scan:
        scan_len = min(header_len, len(mm) - scan_base)
    else:
        if scan_end is None:
            scan_len = min(4096, len(mm) - scan_base)
        else:
            scan_len = min(max(0, scan_end - scan_base), 256 * 1024, len(mm) - scan_base)

    win = memoryview(mm)[scan_base:scan_base + scan_len]

    # 1) Strings
    strings: Dict[str, int] = {}
    win_bytes = win.tobytes()
    for s in (b"GK_S3", b"Rechts", b"Hoch", b"GK", b"S3"):
        i = win_bytes.find(s)
        if i != -1:
            strings[s.decode("ascii", "ignore")] = scan_base + i

    # 2) Zahlen sammeln (Header vs. erweitert)
    doubles_h: List[Tuple[int, float]] = []
    f32_h: List[Tuple[int, float]] = []
    u32_h: List[Tuple[int, int]] = []
    doubles_ext: List[Tuple[int, float]] = []

    for rel in range(0, header_len - 8 + 1):
        off = header_off + rel
        try:
            dv = struct.unpack_from("<d", mm, off)[0]
        except struct.error:
            continue
        if dv == dv and abs(dv) != float("inf"):
            doubles_h.append((off, float(dv)))

    for rel in range(0, header_len - 4 + 1):
        off = header_off + rel
        try:
            fv = struct.unpack_from("<f", mm, off)[0]
            iv = struct.unpack_from("<I", mm, off)[0]
        except struct.error:
            continue
        if fv == fv and abs(fv) != float("inf"):
            f32_h.append((off, float(fv)))
        u32_h.append((off, int(iv)))

    for rel in range(0, scan_len - 8 + 1):
        off = scan_base + rel
        try:
            dv = struct.unpack_from("<d", mm, off)[0]
        except struct.error:
            continue
        if dv == dv and abs(dv) != float("inf"):
            doubles_ext.append((off, float(dv)))

    def _read_f64_abs(off: int) -> Optional[float]:
        try:
            v = struct.unpack_from("<d", mm, off)[0]
            if v == v and abs(v) != float("inf"):
                return float(v)
        except struct.error:
            pass
        return None

    def _is_plausible_pix(v: float) -> bool:
        return 0.01 <= v <= 10000 and (abs(v) > 0.000001)

    def _is_plausible_coord(v: float) -> bool:
        return 1e5 <= v <= 1e8

    # 3) Festes Layout (+0x10/+0x18/+0x20) prüfen
    fixed: Dict[str, Any] = {}
    pix0 = _read_f64_abs(header_off + 0x10)
    x0_ = _read_f64_abs(header_off + 0x18)
    y0_ = _read_f64_abs(header_off + 0x20)
    if pix0 is not None and x0_ is not None and y0_ is not None:
        if _is_plausible_pix(pix0) and _is_plausible_coord(x0_) and _is_plausible_coord(y0_):
            fixed = {
                "pixel_size": {"offset": header_off + 0x10, "value": pix0, "type": "f64"},
                "x0y0": {
                    "kind": "fixed+0x10/+0x18/+0x20",
                    "x_offset": header_off + 0x18, "x": x0_,
                    "y_offset": header_off + 0x20, "y": y0_,
                },
            }

    # 4) Koordinatenpaar (Header-only)
    def _find_pair(seq: List[Tuple[int, float]], label: str) -> Optional[Dict[str, Any]]:
        cand = [(o, float(v)) for (o, v) in seq if _is_plausible_coord(float(v))]
        cand.sort(key=lambda t: t[0])
        for i in range(len(cand) - 1):
            o1, v1 = cand[i]
            o2, v2 = cand[i + 1]
            if 0 < (o2 - o1) <= 32 and abs(float(v1) - float(v2)) >= 100000:
                return {"kind": label, "x_offset": o1, "x": float(v1), "y_offset": o2, "y": float(v2)}
        return None

    coord_pair = None
    if fixed.get("x0y0"):
        coord_pair = fixed["x0y0"]
        if dmb_expect and dmb_expect.get("x0") is not None and dmb_expect.get("y0") is not None:
            dx = abs(coord_pair["x"] - float(dmb_expect["x0"]))
            dy = abs(coord_pair["y"] - float(dmb_expect["y0"]))
            if dx > 5000 or dy > 5000:
                coord_pair = None

    if coord_pair is None:
        coord_pair = (
            _find_pair(doubles_h, "f64")
            or _find_pair(f32_h, "f32")
            or _find_pair([(o, float(v)) for (o, v) in u32_h], "u32")
        )

    # 5) Pixelgröße-Kandidaten (Header f64/f32)
    px_candidates: List[Dict[str, Any]] = []
    for o, v in doubles_h:
        if _is_plausible_pix(v):
            px_candidates.append({"offset": o, "value": float(v), "type": "f64", "raw": float(v)})
    for o, v in f32_h:
        if _is_plausible_pix(v):
            px_candidates.append({"offset": o, "value": float(v), "type": "f32", "raw": float(v)})

    # dedup
    seen = set()
    px2 = []
    for c in px_candidates:
        k = (c["type"], c["offset"])
        if k in seen:
            continue
        seen.add(k)
        px2.append(c)
    px_candidates = px2

    px_sel = None
    if dmb_expect and dmb_expect.get("pixel_size") is not None and px_candidates:
        target = float(dmb_expect["pixel_size"])
        px_sel = min(px_candidates, key=lambda c: (abs(c["value"] - target), 0 if c["type"] == "f64" else 1))
    elif fixed.get("pixel_size"):
        px_sel = fixed["pixel_size"]
    elif px_candidates:
        def _score(c: Dict[str, Any]) -> Tuple[int, float, float, float, float]:
            v = float(c["value"])
            roundish = min(abs(v - round(v, 1)), abs(v - round(v)), abs(v - round(v * 2) / 2))
            return (0 if c["type"] == "f64" else 1, roundish, abs(v - 2.5), abs(v - 12.5), abs(v - 25.0))
        px_sel = sorted(px_candidates, key=_score)[0]

    # 6) Maßstab (Header)
    scale = None
    for o, v in u32_h:
        if v in (25000, 50000, 100000, 200000, 250000, 1000000):
            scale = {"offset": o, "value": int(v), "type": "u32"}
            break
    if scale is None:
        for o, v in doubles_h:
            if abs(v - round(v)) < 1e-9:
                iv = int(round(v))
                if iv in (25000, 50000, 100000, 200000, 250000, 1000000):
                    scale = {"offset": o, "value": iv, "type": "f64"}
                    break

    # 7) Bounding Box (erweitertes Fenster)
    bbox = None
    if strings.get("GK_S3") is not None:
        big64 = [(o, v) for (o, v) in doubles_ext if _is_plausible_coord(v)]
        big64.sort(key=lambda t: t[0])
        bbox = find_bbox_from_doubles(big64)

    return {
        "strings": strings,
        "coord_pair": coord_pair,
        "bbox": bbox,
        "pixel_size_candidate": px_sel,
        "pixel_size_candidates": px_candidates,
        "scale_candidate": scale,
        "scan_window": {"offset": scan_base, "length": scan_len, "note": "Header-first; extended window only for bbox/hints"},
    }


# ======================================================================================
# 9) Analyse-Modus (Bericht) – aus Ihrem Analysator zusammengefasst
# ======================================================================================

def run_analyze(
    akf_path: Path,
    *,
    text: bool,
    out_json: Optional[Path],
    palette_png: Optional[Path],
    header_len: int,
    dmb_path: Optional[Path],
    deep_georef: bool,
) -> int:
    if not akf_path.exists():
        print(f"Datei nicht gefunden: {akf_path}", file=sys.stderr)
        return 2

    # Pillow nur, wenn Palette-PNG gewünscht ist
    if palette_png is not None:
        _ensure_python_modules([("PIL", "pillow")])

    with akf_path.open("rb") as f:
        mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)

        plan_kind, plan = choose_best_plan(mm)
        pp_off = int(plan["pp_off"])

        if plan_kind == "pageplan":
            pp_cnt = int(plan["pp_cnt"])
            records = read_pageplan_records(mm, pp_off, pp_cnt)
            grid_cols, grid_rows = infer_grid(records)
            order = "unten->oben"
            tileplan_meta = None
        else:
            tc = int(plan["tile_cols"])
            tr = int(plan["tile_rows"])
            total_w = int(plan["total_w"])
            total_h = int(plan["total_h"])
            pp_cnt = tc * tr

            col_w = [256] * tc
            col_w[-1] = max(1, total_w - 256 * (tc - 1))
            row_h = [256] * tr
            row_h[-1] = max(1, total_h - 256 * (tr - 1))

            records = []
            for r in range(tr):
                for c in range(tc):
                    records.append(PageRecord(1, 1, col_w[c], row_h[r], 0))
            grid_cols, grid_rows = tc, tr
            order = "unten->oben"
            tileplan_meta = plan

        header_off = max(0, pp_off - int(header_len))

        pal = find_palette_near_pageplan(mm, pp_off, 256)
        if pal is None:
            pal = find_palette_in_header(mm, header_off, int(header_len))
        pal_obj = None
        if pal is not None:
            pal_off, pal_words, pal_rgb = pal
            pal_obj = {"offset": pal_off, "words48": pal_words, "rgb16": pal_rgb}

        page_formats = describe_page_formats(records)
        totals = compute_pageplan_totals(records, grid_cols, grid_rows)
        header_totals = find_header_u32_totals(mm, header_off, int(header_len), totals["total_w"], totals["total_h"])

        # Offsettabelle (für Belegung)
        if tileplan_meta is None:
            after_pp = pp_off + pp_cnt * 16
            expected_bytes = sum(pr.tiles_per_page * 4 for pr in records)
            ot_off = find_offset_table_start(mm, after_pp, expected_bytes)
        else:
            ot_off = int(tileplan_meta["ot_off"])
        blocks, ot_end = read_offset_blocks(mm, ot_off, records)
        occ = [not block_is_empty(b) for b in blocks]
        bitrows = occupancy_to_bitrows(occ, grid_cols, grid_rows)

        nonempty_first = [b[0] for b in blocks if b and b[0] not in (0, 0xFFFFFFFF)]
        monotonic = all(nonempty_first[i] < nonempty_first[i + 1] for i in range(len(nonempty_first) - 1)) if len(nonempty_first) > 1 else True

        dmb_entry = None
        if dmb_path is not None and dmb_path.exists():
            dmb_entry = dmb_find_entry(dmb_path, akf_path.name)

        georef = georef_extract(
            mm,
            header_off,
            int(header_len),
            scan_end=ot_off,
            dmb_expect=dmb_entry,
            deep_scan=bool(deep_georef),
        )

        # Seitenstarts
        page_starts = [page_start_offset(b) for b in blocks]

        result: Dict[str, Any] = {
            "file": {"path": str(akf_path), "size": os.path.getsize(akf_path)},
            "plan_kind": plan_kind,
            "page_plan": {
                "offset": pp_off, "record_count": pp_cnt,
                "grid_cols": grid_cols, "grid_rows": grid_rows,
                "order": order,
                "records": [asdict(r) for r in records],
            },
            "header_window": {"offset": header_off, "length": int(header_len)},
            "palette": pal_obj,
            "page_formats": page_formats,
            "map_totals": {"from_pageplan": totals, "from_header_u32": header_totals},
            "offset_table": {
                "offset": ot_off, "end_offset": ot_end, "bytes": ot_end - ot_off,
                "pages": len(blocks),
                "occupied_pages": sum(1 for x in occ if x),
                "empty_pages": sum(1 for x in occ if not x),
                "first_offsets_monotonic": monotonic,
                "occupancy_bitrows": bitrows,
            },
            "georef_candidates": georef,
            "dmb": dmb_entry,
            "page_starts": page_starts,
        }

        if palette_png is not None:
            if pal_obj is None:
                mm.close()
                print("Palette-PNG angefordert, aber keine Palette gefunden.", file=sys.stderr)
                return 2
            render_palette_png(pal_obj["rgb16"], palette_png)

        if text or out_json is None:
            print("=" * 72)
            print("AKF-Analyse")
            print("=" * 72)
            print(f"Datei: {akf_path}")
            print(f"Dateigröße: {os.path.getsize(akf_path)} Byte\n")

            print(f"Planart: {plan_kind}")
            print(f"Plan-Offset: {fmt_addr(pp_off)}, Records {pp_cnt} (= {grid_cols}×{grid_rows}), Ordnung: {order}\n")

            print("Gesamtmaße der Karte (aus Seitenplan, effektive Pixel):")
            print(f"  Raster: {grid_cols} Spalten × {grid_rows} Zeilen")
            print(f"  Breite: {totals['total_w']} px")
            print(f"  Höhe:   {totals['total_h']} px")
            if header_totals is not None:
                print(f"  (Im Header als uint32-Paar wiedergefunden bei {fmt_addr(header_totals['offset'])})")
            print()

            print(f"Headerfenster: {fmt_addr(header_off)} .. {fmt_addr(header_off + int(header_len))} (Länge {int(header_len)})")
            if pal_obj is None:
                print("Palette: nicht gefunden.")
            else:
                print(f"Palette: Offset {fmt_addr(pal_obj['offset'])} (48 WORDs / 96 Byte)")
                if palette_png is not None:
                    print(f"  PNG: {palette_png}")
            print()

            print("Georeferenzierung (Kandidaten):")
            cp = georef.get("coord_pair")
            px = georef.get("pixel_size_candidate")
            if cp:
                print(f"  Koordinatenpaar: X={cp['x']:.3f} bei {fmt_addr(cp['x_offset'])}, Y={cp['y']:.3f} bei {fmt_addr(cp['y_offset'])}")
            if px:
                print(f"  Pixelgröße-Auswahl: {px.get('value')} ({px.get('type')}) bei {fmt_addr(px.get('offset', 0))}")
            sc = georef.get("scale_candidate")
            if sc:
                print(f"  Maßstab-Kandidat: {sc['value']} bei {fmt_addr(sc['offset'])}")
            bbox = georef.get("bbox")
            if bbox:
                print(f"  Bounding Box: minX={bbox['minx']:.3f}, minY={bbox['miny']:.3f}, maxX={bbox['maxx']:.3f}, maxY={bbox['maxy']:.3f}")
            print("=" * 72)

        if out_json is not None:
            out_json.write_text(json.dumps(result, indent=2, ensure_ascii=False), encoding="utf-8")

        mm.close()
    return 0


# ======================================================================================
# 10) Extraktion – LZW-Variante + Bildaufbau + Georeferenz per GDAL
# ======================================================================================

CLEAR = 0x100
END = 0x101
MISSING = {0, 0xFFFFFFFF}


def collect_clear_candidates(stream: bytes) -> List[int]:
    import numpy as np
    b = np.frombuffer(stream, dtype=np.uint8)
    if len(b) < 2:
        return []
    first9 = ((b[:-1].astype(np.uint16) << 1) | (b[1:] >> 7)).astype(np.uint16)
    return np.where(first9 == CLEAR)[0].tolist()


class AkfLzwState:
    """
    LZW-Decoder (AKF-Variante):
    - CLEAR setzt Wörterbuch zurück, aber ohne den Bitpuffer hart zu nullen.
    - Das ist hier entscheidend, weil AKF-Datenströme offenbar so „laufen“.
    """
    def __init__(self) -> None:
        self.reset()

    def reset(self) -> None:
        self.prev = [-1] * 4096
        self.val = [0] * 4096
        self.ln = [0] * 4096
        for i in range(256):
            self.val[i] = i
            self.ln[i] = 1
        self.next_code = 0x102
        self.code_bits = 9
        self.bitbuf = 0
        self.bitcount = 0
        self.prev_code = None
        self.prev_first = 0

    def _refill(self, data: bytes, pos: int) -> tuple[int, bool]:
        while self.bitcount < self.code_bits:
            if pos >= len(data):
                return pos, False
            self.bitbuf = ((self.bitbuf << 8) | data[pos]) & 0xFFFFFFFF
            pos += 1
            self.bitcount += 8
        return pos, True

    def _get_code(self, data: bytes, pos: int) -> tuple[int | None, int]:
        pos, ok = self._refill(data, pos)
        if not ok:
            return None, pos
        shift = self.bitcount - self.code_bits
        code = (self.bitbuf >> shift) & ((1 << self.code_bits) - 1)
        self.bitcount -= self.code_bits
        return int(code), pos

    def _expand(self, code: int) -> bytes:
        l = self.ln[code]
        out = bytearray(l)
        c = code
        for i in range(l - 1, -1, -1):
            out[i] = self.val[c]
            c = self.prev[c]
            if c == -1:
                break
        return bytes(out)

    def _grow(self) -> None:
        if self.code_bits < 12 and self.next_code >= ((1 << self.code_bits) - 1):
            self.code_bits += 1

    def feed(self, data: bytes, out_limit: int) -> tuple[bytes, int, bool]:
        out = bytearray()
        pos = 0
        ended = False

        while out_limit > 0:
            code, pos2 = self._get_code(data, pos)
            if code is None:
                break
            pos = pos2

            if code == END:
                ended = True
                break

            if code == CLEAR:
                self.next_code = 0x102
                self.code_bits = 9
                for i in range(256, 4096):
                    self.prev[i] = -1
                    self.val[i] = 0
                    self.ln[i] = 0
                self.prev_code = None
                self.prev_first = 0
                continue

            if self.prev_code is None:
                seq = self._expand(code)
                self.prev_code = code
                self.prev_first = seq[0] if seq else 0
                take = min(out_limit, len(seq))
                out.extend(seq[:take])
                out_limit -= take
                continue

            if code < self.next_code and self.ln[code] != 0:
                seq = self._expand(code)
            elif code == self.next_code:
                prev_seq = self._expand(self.prev_code)
                seq = prev_seq + bytes([self.prev_first])
            else:
                raise ValueError("Ungültiger Code (Strom nicht passend / falscher Startpunkt).")

            first = seq[0]
            take = min(out_limit, len(seq))
            out.extend(seq[:take])
            out_limit -= take

            if self.next_code < 4096:
                self.prev[self.next_code] = self.prev_code
                self.val[self.next_code] = first
                self.ln[self.next_code] = self.ln[self.prev_code] + 1
                self.next_code += 1
                self._grow()

            self.prev_code = code
            self.prev_first = first

        return bytes(out), pos, ended


def decode_tile_from_chunk(chunk: bytes, out_limit: int = 0x8000, lookahead: int = 2000) -> Optional[bytes]:
    cands = collect_clear_candidates(chunk)
    if not cands:
        return None
    cands.sort()

    for sp0 in cands[:lookahead]:
        st = AkfLzwState()
        out_buf = bytearray()
        pos = int(sp0)
        try:
            while len(out_buf) < out_limit and pos < len(chunk):
                out, consumed, ended = st.feed(chunk[pos:], out_limit=out_limit - len(out_buf))
                out_buf.extend(out)
                pos += consumed
                if ended or consumed == 0:
                    break
            if len(out_buf) >= 16:
                return bytes(out_buf)
        except Exception:
            continue
    return None


def bytes_to_idx(raw: bytes, width: int, height: int):
    import numpy as np
    row_bytes = (width + 1) // 2
    need = row_bytes * height
    if len(raw) < need:
        raise ValueError(f"Zu wenig Rohdaten: len={len(raw)} benötigt={need}")
    raw = raw[:need]
    arr = np.frombuffer(raw, dtype=np.uint8).reshape((height, row_bytes))
    hi = arr >> 4
    lo = arr & 0x0F
    idx = np.empty((height, width), dtype=np.uint8)
    idx[:, 0::2] = hi[:, : (width + 1) // 2]
    if width > 1:
        idx[:, 1::2] = lo[:, : width // 2]
    return idx


def palette_to_pillow(rgb16: List[Tuple[int, int, int]]) -> List[int]:
    pal = []
    for r, g, b in rgb16:
        pal.extend([int(r), int(g), int(b)])
    pal.extend([0, 0, 0] * (256 - len(rgb16)))
    return pal


def write_tiff(path: Path, idx_img, palette_rgb16: List[Tuple[int, int, int]], compress: bool = True) -> None:
    from PIL import Image
    img = Image.fromarray(idx_img, mode="P")
    img.putpalette(palette_to_pillow(palette_rgb16))
    kwargs = {}
    if compress:
        kwargs["compression"] = "tiff_lzw"
    tmp = path.with_suffix(path.suffix + ".tmp")
    img.save(tmp, format="TIFF", **kwargs)
    tmp.replace(path)


@dataclass
class PageType:
    tile_cols: int
    tile_rows: int
    raw_w: int
    raw_h: int
    eff_w: int
    eff_h: int
    right_w: Optional[int]
    top_h: Optional[int]
    corner_w: Optional[int]
    corner_h: Optional[int]


@dataclass
class PageTask:
    slot: int
    row: int
    col: int
    pt: PageType
    tiles: List[Tuple[int, int]]
    offsets_in_table_order: List[int]


@dataclass
class Analysis:
    grid_cols: int
    grid_rows: int
    palette_rgb16: List[Tuple[int, int, int]]
    tasks: List[PageTask]
    global_offsets_sorted: List[int]
    plan_kind: str
    total_w: Optional[int]
    total_h: Optional[int]
    col_widths: Optional[List[int]]
    row_heights: Optional[List[int]]
    x0: Optional[float]
    y0: Optional[float]
    pixel_size: Optional[float]
    epsg: Optional[int]


def tile_dims(pt: PageType, tc: int, tr: int, base: int) -> Tuple[int, int]:
    is_top = (tr == pt.tile_rows - 1)
    is_right = (tc == pt.tile_cols - 1)
    w = base if not is_right else (pt.right_w if pt.right_w is not None else base)
    h = base if not is_top else (pt.top_h if pt.top_h is not None else base)
    if is_top and is_right and pt.corner_w is not None and pt.corner_h is not None:
        w, h = pt.corner_w, pt.corner_h
    return w, h


def page_bbox_from_grid(
    x0: float,
    y0: float,
    ps: float,
    col_widths: List[int],
    row_heights: List[int],
    row: int,
    col: int,
    w_px: int,
    h_px: int,
) -> Tuple[float, float, float, float]:
    """
    Bounding Box (minx, miny, maxx, maxy) für Seite (row, col).
    x0/y0 sind linke untere Ecke der Gesamtkarte. row=0 unten, col=0 links.
    """
    x_off_px = sum(col_widths[:col])
    y_off_px = sum(row_heights[:row])

    minx = x0 + x_off_px * ps
    miny = y0 + y_off_px * ps
    maxx = minx + w_px * ps
    maxy = miny + h_px * ps
    return (minx, miny, maxx, maxy)


def apply_georef_with_gdal_translate(
    tif_path: Path,
    minx: float, miny: float, maxx: float, maxy: float,
    epsg: Optional[int],
    *,
    compress_lzw: bool,
    gdal_translate: str,
) -> None:
    """
    Schreibt eine georeferenzierte GeoTIFF-Datei (palettiert) per gdal_translate
    in eine temporäre Datei und ersetzt dann die Originaldatei.
    """
    tmp = tif_path.with_suffix(tif_path.suffix + ".georef_tmp.tif")

    cmd = [
        gdal_translate,
        "-of", "GTiff",
        "-a_ullr", f"{minx}", f"{maxy}", f"{maxx}", f"{miny}",
    ]
    if epsg is not None:
        cmd += ["-a_srs", f"EPSG:{epsg}"]

    if compress_lzw:
        cmd += ["-co", "COMPRESS=LZW"]
    cmd += ["-co", "PHOTOMETRIC=PALETTE"]

    cmd += [str(tif_path), str(tmp)]
    subprocess.run(cmd, check=True)
    tmp.replace(tif_path)


def build_vrt(out_dir: Path, vrt_name: str, *, gdalbuildvrt: str) -> Path:
    """
    Erzeugt eine VRT-Datei (virtuelles Mosaik) aus den Seiten-GeoTIFFs im Ausgabeverzeichnis.
    """
    tifs = sorted(out_dir.glob("page_row*_col*.tif"))
    if not tifs:
        raise RuntimeError("Keine Seiten-Dateien (page_row*_col*.tif) gefunden – VRT kann nicht erstellt werden.")

    lst = out_dir / "_vrt_filelist.txt"
    lst.write_text("\n".join(p.name for p in tifs), encoding="utf-8")

    vrt_path = out_dir / vrt_name
    cmd = [
        gdalbuildvrt,
        "-overwrite",
        "-input_file_list", str(lst),
        str(vrt_path),
    ]
    subprocess.run(cmd, check=True, cwd=str(out_dir))
    return vrt_path


def build_chunk_map(akf_path: Path, offsets_sorted: List[int]) -> Dict[int, bytes]:
    with akf_path.open("rb") as f:
        f.seek(0, 2)
        file_end = f.tell()
        m: Dict[int, bytes] = {}
        for i, off in enumerate(offsets_sorted):
            nxt = offsets_sorted[i + 1] if i + 1 < len(offsets_sorted) else file_end
            ln = max(0, nxt - off)
            if ln <= 0:
                m[off] = b""
                continue
            f.seek(off)
            m[off] = f.read(ln)
        return m


def analyze_for_extract(mm: mmap.mmap) -> Analysis:
    """
    Analyse für den Extraktor:
    - Planart bestimmen
    - Palette extrahieren
    - Aufgabenliste (PageTask) und globale Offsets erstellen
    - Georeferenz (wenn sicher genug) aus Header ermitteln
    """
    kind, info = choose_best_plan(mm)
    pp_off = int(info["pp_off"])

    # Palette (beide Planarten gleich)
    pal = find_palette_near_pageplan(mm, pp_off) or find_palette_in_header(mm, max(0, pp_off - 512), 512)
    if pal is None:
        raise RuntimeError("Keine Palette gefunden.")
    palette_rgb16 = [(int(r), int(g), int(b)) for (r, g, b) in pal[2]]

    header_len = 512
    header_off = max(0, pp_off - header_len)

    if kind == "pageplan":
        pp_cnt = int(info["pp_cnt"])
        page_records = read_pageplan_records(mm, pp_off, pp_cnt)
        grid_cols, grid_rows = infer_grid(page_records)
        after_pp = pp_off + pp_cnt * 16

        expected_bytes = sum(int(pr.tiles_per_page) * 4 for pr in page_records)
        ot_off = find_offset_table_start(mm, after_pp, expected_bytes)
        blocks, _end = read_offset_blocks(mm, ot_off, page_records)

        totals = compute_pageplan_totals(page_records, grid_cols, grid_rows)
        col_widths = list(map(int, totals["col_widths"]))
        row_heights = list(map(int, totals["row_heights"]))

        georef = georef_extract(
            mm,
            header_off,
            header_len,
            scan_end=ot_off,
            dmb_expect=None,
            deep_scan=False,
        )

        x0 = y0 = ps = None
        cp = georef.get("coord_pair")
        px = georef.get("pixel_size_candidate")
        if cp is not None and px is not None:
            x0 = float(cp["x"])
            y0 = float(cp["y"])
            ps = float(px["value"])

        epsg = None
        strings = georef.get("strings") or {}
        if "GK_S3" in strings:
            epsg = 31467

        offs = []
        occupancy = []
        for b in blocks:
            ok = any(int(v) not in MISSING for v in b)
            occupancy.append(ok)
            offs.extend([int(v) for v in b if int(v) not in MISSING])
        global_offsets_sorted = sorted(set(offs))

        page_formats = describe_page_formats(page_records)
        page_types: Dict[Tuple[int, int, int], PageType] = {}
        for t in page_formats["types"]:
            tc = int(t["tile_cols"])
            tr = int(t["tile_rows"])
            flags = int(t["flags"])
            raw_w, raw_h = map(int, t["raw_page_px"])
            eff_w, eff_h = map(int, t["effective_page_px"])
            right_w = t.get("right_tile_px_w", None)
            top_h = t.get("top_tile_px_h", None)
            corner_px = t.get("corner_px", None)
            if corner_px is None:
                cw = ch = None
            else:
                cw, ch = map(int, corner_px)
            page_types[(tc, tr, flags)] = PageType(
                tc, tr, raw_w, raw_h, eff_w, eff_h,
                None if right_w is None else int(right_w),
                None if top_h is None else int(top_h),
                cw, ch,
            )

        tasks: List[PageTask] = []
        total = int(grid_cols) * int(grid_rows)
        for slot in range(total):
            if not occupancy[slot]:
                continue
            pr = page_records[slot]
            key = (int(pr.tile_cols), int(pr.tile_rows), int(pr.flags))
            pt = page_types[key]
            base = pt.raw_w // pt.tile_cols

            tiles: List[Tuple[int, int]] = []
            for tr_ in range(pt.tile_rows):
                for tc_ in range(pt.tile_cols):
                    tiles.append(tile_dims(pt, tc_, tr_, base))

            offsets_in_table_order = list(map(int, blocks[slot]))
            if len(offsets_in_table_order) != len(tiles):
                n = min(len(offsets_in_table_order), len(tiles))
                offsets_in_table_order = offsets_in_table_order[:n]
                tiles = tiles[:n]

            row = slot // int(grid_cols)
            col = slot % int(grid_cols)
            tasks.append(PageTask(slot=slot, row=row, col=col, pt=pt, tiles=tiles, offsets_in_table_order=offsets_in_table_order))

        return Analysis(
            int(grid_cols), int(grid_rows), palette_rgb16, tasks, global_offsets_sorted,
            plan_kind="pageplan",
            total_w=int(totals["total_w"]),
            total_h=int(totals["total_h"]),
            col_widths=col_widths,
            row_heights=row_heights,
            x0=x0,
            y0=y0,
            pixel_size=ps,
            epsg=epsg,
        )

    if kind == "tileplan":
        tile_cols = int(info["tile_cols"])
        tile_rows = int(info["tile_rows"])
        total_w = int(info["total_w"])
        total_h = int(info["total_h"])
        ot_off = int(info["ot_off"])
        n = tile_cols * tile_rows

        offs_all = list(struct.unpack_from("<%dI" % n, mm, ot_off))

        georef = georef_extract(
            mm,
            header_off,
            header_len,
            scan_end=ot_off,
            dmb_expect=None,
            deep_scan=False,
        )
        x0 = y0 = ps = None
        cp = georef.get("coord_pair")
        px = georef.get("pixel_size_candidate")
        if cp is not None and px is not None:
            x0 = float(cp["x"])
            y0 = float(cp["y"])
            ps = float(px["value"])

        epsg = None
        strings = georef.get("strings") or {}
        if "GK_S3" in strings:
            epsg = 31467

        offs = [int(v) for v in offs_all if int(v) not in MISSING]
        global_offsets_sorted = sorted(set(offs))

        # Spalten-/Zeilenmaße für das Mosaik
        col_widths = [256] * tile_cols
        col_widths[-1] = max(1, total_w - 256 * (tile_cols - 1))
        row_heights = [256] * tile_rows
        row_heights[-1] = max(1, total_h - 256 * (tile_rows - 1))

        tasks: List[PageTask] = []
        for slot in range(n):
            off0 = int(offs_all[slot])
            if off0 in MISSING:
                continue
            row = slot // tile_cols
            col = slot % tile_cols

            w = 256 if col < tile_cols - 1 else (total_w - 256 * (tile_cols - 1))
            h = 256 if row < tile_rows - 1 else (total_h - 256 * (tile_rows - 1))
            if w <= 0:
                w = 256
            if h <= 0:
                h = 256

            pt = PageType(1, 1, w, h, w, h, None, None, None, None)
            tasks.append(PageTask(slot=slot, row=row, col=col, pt=pt, tiles=[(w, h)], offsets_in_table_order=[off0]))

        return Analysis(
            tile_cols, tile_rows, palette_rgb16, tasks, global_offsets_sorted,
            plan_kind="tileplan",
            total_w=total_w,
            total_h=total_h,
            col_widths=col_widths,
            row_heights=row_heights,
            x0=x0,
            y0=y0,
            pixel_size=ps,
            epsg=epsg,
        )

    raise RuntimeError(f"Unbekannte Planart: {kind!r}")


def run_extract(
    akf_path: Path,
    *,
    out_dir: Optional[Path],
    no_compress: bool,
    verbose: bool,
    no_georef: bool,
    no_vrt: bool,
    lookahead: int,
    resync_max: int,
    missing_fill: int,
    strict: bool,
    log_resync: bool,
) -> int:
    if not akf_path.exists():
        print(f"Datei nicht gefunden: {akf_path}", file=sys.stderr)
        return 2

    # Abhängigkeiten des Extraktors
    _ensure_python_modules([("numpy", "numpy"), ("PIL", "pillow")])

    # GDAL nur nötig, wenn Georeferenz/VRT überhaupt gewünscht ist
    gdal_translate = None
    gdalbuildvrt = None
    if not no_georef or (not no_vrt):
        gdal_translate, gdalbuildvrt = ensure_gdal_tools(interactive=_is_interactive())
        # Wenn Georeferenz explizit gewünscht ist, aber GDAL fehlt: sauberer Hinweis
        if (not no_georef) and not gdal_translate:
            print("Hinweis: GDAL nicht gefunden → Georeferenz wird nicht angewendet.", file=sys.stderr)
            no_georef = True
        if (not no_vrt) and not gdalbuildvrt:
            print("Hinweis: GDAL nicht gefunden → VRT wird nicht erzeugt.", file=sys.stderr)
            no_vrt = True

    out_dir2 = out_dir if out_dir is not None else (Path(".") / akf_path.stem)
    out_dir2.mkdir(parents=True, exist_ok=True)

    with akf_path.open("rb") as f:
        mm = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
        ana = analyze_for_extract(mm)
        mm.close()

    chunk_map = build_chunk_map(akf_path, ana.global_offsets_sorted)

    if verbose:
        print(f"Planart: {ana.plan_kind}")
        print(f"Aufgaben: {len(ana.tasks)}")
        print(f"Globale Offsets/Chunks: {len(chunk_map)}")

    row_digits = max(2, len(str(max(0, ana.grid_rows - 1))))
    col_digits = max(2, len(str(max(0, ana.grid_cols - 1))))

    resync_map: Dict[int, int] = {}
    resync_events: List[str] = []
    failures: List[str] = []
    bad_tiles = 0

    import numpy as np

    # ------------------------------------------------------------------
    # tileplan: Gesamtmosaik (eine Datei)
    # ------------------------------------------------------------------
    if ana.plan_kind == "tileplan":
        if ana.total_w is None or ana.total_h is None:
            raise RuntimeError("tileplan: total_w/total_h fehlen (Analyse unvollständig).")

        total_w = int(ana.total_w)
        total_h = int(ana.total_h)

        mosaic = np.full((total_h, total_w), int(missing_fill), dtype=np.uint8)

        if ana.col_widths is None or ana.row_heights is None:
            raise RuntimeError("tileplan: col_widths/row_heights fehlen.")
        col_widths = ana.col_widths
        row_heights = ana.row_heights

        col_x0 = [0] * len(col_widths)
        acc = 0
        for i, w in enumerate(col_widths):
            col_x0[i] = acc
            acc += w

        row_yb = [0] * len(row_heights)
        acc = 0
        for i, h in enumerate(row_heights):
            row_yb[i] = acc
            acc += h

        for idx, task in enumerate(ana.tasks, start=1):
            w, h = task.tiles[0]
            off0 = int(task.offsets_in_table_order[0])
            if off0 in MISSING:
                continue

            need = ((w + 1) // 2) * h
            off = resync_map.get(off0, off0)
            raw = None

            chunk = chunk_map.get(off)
            if chunk is not None and len(chunk) >= 2:
                raw0 = decode_tile_from_chunk(chunk, out_limit=0x8000, lookahead=lookahead)
                if raw0 is not None and len(raw0) >= need:
                    raw = raw0

            if raw is None:
                pos = bisect.bisect_left(ana.global_offsets_sorted, off0)
                found = None
                for j in range(pos, min(pos + resync_max, len(ana.global_offsets_sorted))):
                    cand_off = ana.global_offsets_sorted[j]
                    cand_chunk = chunk_map.get(cand_off)
                    if cand_chunk is None or len(cand_chunk) < 2:
                        continue
                    cand_raw = decode_tile_from_chunk(cand_chunk, out_limit=0x8000, lookahead=lookahead)
                    if cand_raw is None or len(cand_raw) < need:
                        continue
                    found = (cand_off, cand_raw)
                    break

                if found is not None:
                    cand_off, cand_raw = found
                    resync_map[off0] = cand_off
                    raw = cand_raw
                    if log_resync:
                        resync_events.append(f"tile row={task.row} col={task.col} off0={off0} -> off={cand_off}")
                else:
                    bad_tiles += 1
                    failures.append(f"FAIL tile row={task.row} col={task.col} off0={off0}")
                    if strict:
                        raise RuntimeError(f"Nicht dekodierbare Kachel: row={task.row} col={task.col} off0={off0}")
                    continue

            tile = bytes_to_idx(raw, w, h)

            x0_px = col_x0[task.col]
            y_bottom_px = row_yb[task.row]
            y0_px = total_h - (y_bottom_px + h)
            mosaic[y0_px:y0_px + h, x0_px:x0_px + w] = tile

            if verbose and (idx % 200 == 0):
                print(f"[{idx:4d}/{len(ana.tasks)}] Kachel gesetzt (row={task.row}, col={task.col})")

        out_path = out_dir2 / f"{akf_path.stem}.tif"
        write_tiff(out_path, mosaic, ana.palette_rgb16, compress=not no_compress)

        if (not no_georef) and (ana.x0 is not None) and (ana.y0 is not None) and (ana.pixel_size is not None) and gdal_translate:
            minx = float(ana.x0)
            miny = float(ana.y0)
            maxx = minx + total_w * float(ana.pixel_size)
            maxy = miny + total_h * float(ana.pixel_size)
            apply_georef_with_gdal_translate(
                out_path, minx, miny, maxx, maxy,
                epsg=ana.epsg,
                compress_lzw=not no_compress,
                gdal_translate=gdal_translate,
            )
        elif verbose and (not no_georef):
            print("Hinweis: Georeferenz nicht verfügbar – x0/y0/pixel_size nicht sicher aus AKF-Header erkannt.")

        if log_resync:
            (out_dir2 / "resync_map.txt").write_text("\n".join(resync_events) if resync_events else "(keine Resync-Ereignisse)", encoding="utf-8")
            (out_dir2 / "resync_failures.txt").write_text("\n".join(failures) if failures else "(keine Fehlschläge)", encoding="utf-8")

        if bad_tiles:
            print(f"\nHinweis: {bad_tiles} Kacheln konnten nicht dekodiert werden und blieben gefüllt (missing-fill={missing_fill}).")
            if log_resync:
                print("Details: resync_failures.txt")

        print(f"\nFertig. Geschrieben: 1 Datei → {out_path}")
        return 0

    # ------------------------------------------------------------------
    # pageplan: 1 TIFF pro Seite
    # ------------------------------------------------------------------
    for written, task in enumerate(ana.tasks, start=1):
        pt = task.pt
        base = pt.raw_w // pt.tile_cols if pt.tile_cols else pt.raw_w
        page = np.full((pt.eff_h, pt.eff_w), int(missing_fill), dtype=np.uint8)

        for t_i, (w, h) in enumerate(task.tiles):
            off0 = int(task.offsets_in_table_order[t_i])
            if off0 in MISSING:
                continue
            need = ((w + 1) // 2) * h

            off = resync_map.get(off0, off0)
            raw = None

            chunk = chunk_map.get(off)
            if chunk is not None and len(chunk) >= 2:
                raw0 = decode_tile_from_chunk(chunk, out_limit=0x8000, lookahead=lookahead)
                if raw0 is not None and len(raw0) >= need:
                    raw = raw0

            if raw is None:
                pos = bisect.bisect_left(ana.global_offsets_sorted, off0)
                found = None
                for j in range(pos, min(pos + resync_max, len(ana.global_offsets_sorted))):
                    cand_off = ana.global_offsets_sorted[j]
                    cand_chunk = chunk_map.get(cand_off)
                    if cand_chunk is None or len(cand_chunk) < 2:
                        continue
                    cand_raw = decode_tile_from_chunk(cand_chunk, out_limit=0x8000, lookahead=lookahead)
                    if cand_raw is None or len(cand_raw) < need:
                        continue
                    found = (cand_off, cand_raw)
                    break

                if found is not None:
                    cand_off, cand_raw = found
                    resync_map[off0] = cand_off
                    raw = cand_raw
                    if log_resync:
                        resync_events.append(
                            f"slot={task.slot} row={task.row} col={task.col} tile_index={t_i} off0={off0} -> off={cand_off}"
                        )
                else:
                    bad_tiles += 1
                    msg = f"FAIL slot={task.slot} row={task.row} col={task.col} tile_index={t_i} off0={off0}"
                    failures.append(msg)
                    if strict:
                        raise RuntimeError("Nicht dekodierbare Kachel: " + msg)
                    continue

            tile = bytes_to_idx(raw, w, h)

            tr = t_i // pt.tile_cols if pt.tile_cols else 0
            tc = t_i % pt.tile_cols if pt.tile_cols else 0
            x0 = tc * base
            y_bottom = tr * base
            y0 = pt.eff_h - (y_bottom + h)
            page[y0:y0 + h, x0:x0 + w] = tile

        name = f"page_row{task.row:0{row_digits}d}_col{task.col:0{col_digits}d}.tif"
        out_path = out_dir2 / name
        write_tiff(out_path, page, ana.palette_rgb16, compress=not no_compress)

        # Georeferenz pro Seite (nur wenn erkennbar und GDAL vorhanden)
        if (
            (not no_georef)
            and gdal_translate
            and (ana.x0 is not None) and (ana.y0 is not None) and (ana.pixel_size is not None)
            and (ana.col_widths is not None) and (ana.row_heights is not None)
        ):
            minx, miny, maxx, maxy = page_bbox_from_grid(
                ana.x0, ana.y0, ana.pixel_size,
                ana.col_widths, ana.row_heights,
                task.row, task.col,
                pt.eff_w, pt.eff_h
            )
            apply_georef_with_gdal_translate(
                out_path,
                minx, miny, maxx, maxy,
                epsg=ana.epsg,
                compress_lzw=not no_compress,
                gdal_translate=gdal_translate,
            )
        elif verbose and (not no_georef):
            print("  (Hinweis: Georeferenz nicht verfügbar – x0/y0/pixel_size nicht sicher erkannt oder GDAL fehlt.)")

        print(f"[{written:3d}] {name}  ({pt.eff_w}×{pt.eff_h} px)")

    # VRT (pageplan)
    if (not no_vrt) and (not no_georef) and gdalbuildvrt:
        try:
            vrt_path = build_vrt(out_dir2, vrt_name=f"{akf_path.stem}.vrt", gdalbuildvrt=gdalbuildvrt)
            print(f"VRT erzeugt: {vrt_path}")
        except Exception as e:
            if verbose:
                print(f"(Hinweis: VRT konnte nicht erzeugt werden: {e})", file=sys.stderr)

    if log_resync:
        (out_dir2 / "resync_map.txt").write_text("\n".join(resync_events) if resync_events else "(keine Resync-Ereignisse)", encoding="utf-8")
        (out_dir2 / "resync_failures.txt").write_text("\n".join(failures) if failures else "(keine Fehlschläge)", encoding="utf-8")

    if bad_tiles:
        print(f"\nHinweis: {bad_tiles} Kacheln konnten nicht dekodiert werden und blieben gefüllt (missing-fill={missing_fill}).")
        if log_resync:
            print("Details: resync_failures.txt")

    print(f"\nFertig. Geschrieben: {len(ana.tasks)} Dateien → {out_dir2}")
    if log_resync:
        print(f"Resync-Ereignisse: {len(resync_events)} (siehe resync_map.txt)")
    return 0


# ======================================================================================
# 11) Dialogmodus – ohne Parameter
# ======================================================================================

_DIALOG_OPTIONS = [
    ("--verbose", "Ausführliche Fortschrittsausgaben"),
    ("--no-georef", "Keine Georeferenz schreiben (nur TIFFs)"),
    ("--no-vrt", "Keine VRT-Datei erzeugen (nur bei Seitenplänen relevant)"),
    ("--no-compress", "Keine TIFF-Kompression"),
    ("--log-resync", "Resync-Ereignisse/Fehler in Dateien protokollieren"),
    ("--strict", "Bei erster nicht dekodierbarer Kachel abbrechen"),
]


def run_dialog() -> List[str]:
    print("=" * 72)
    print("AKF-Konverter – geführter Start")
    print("=" * 72)

    while True:
        akf = _ask("Welche AKF-Datei möchten Sie umwandeln? Bitte vollen Pfad angeben: ")
        if not akf:
            raise SystemExit("Abbruch.")
        akf_path = Path(akf)
        if akf_path.exists() and akf_path.is_file():
            break
        print("Datei nicht gefunden. Bitte erneut versuchen.\n")

    while True:
        out_dir = _ask("Wo soll die umgewandelte Karte hin? Bitte Verzeichnis angeben (ohne Dateinamen): ")
        if not out_dir:
            raise SystemExit("Abbruch.")
        out_path = Path(out_dir)
        if out_path.exists() and out_path.is_dir():
            break
        if _ask_yes_no("Verzeichnis existiert nicht. Soll es angelegt werden?", default_yes=True):
            out_path.mkdir(parents=True, exist_ok=True)
            break
        print("Bitte erneut versuchen.\n")

    print("\nMöchten Sie eine der folgenden Optionen nutzen?")
    print("Bitte Nummern angeben (durch Komma getrennt) – oder leer lassen für nichts davon.\n")
    for i, (_opt, desc) in enumerate(_DIALOG_OPTIONS, start=1):
        print(f"  {i:2d}) {desc}")
    sel = _ask("\nAuswahl: ")

    chosen: List[str] = []
    if sel.strip():
        parts = [p.strip() for p in sel.split(",") if p.strip()]
        for p in parts:
            try:
                n = int(p)
            except ValueError:
                continue
            if 1 <= n <= len(_DIALOG_OPTIONS):
                chosen.append(_DIALOG_OPTIONS[n - 1][0])

    # Dialog zielt auf Extraktion
    argv = ["extract", str(akf_path), "--out-dir", str(out_path)] + chosen
    print("\nAufruf (intern):", " ".join(argv))
    print()
    return argv


# ======================================================================================
# 12) Argumente / Hauptprogramm
# ======================================================================================

def build_arg_parser() -> argparse.ArgumentParser:
    ap = argparse.ArgumentParser(
        description="AKF analysieren und extrahieren (pageplan + tileplan).",
        add_help=True,
    )

    sub = ap.add_subparsers(dest="cmd")

    # analyze
    ap_a = sub.add_parser("analyze", help="AKF analysieren (Bericht, Palette, Georeferenz-Kandidaten).")
    ap_a.add_argument("akf", type=Path)
    ap_a.add_argument("--text", action="store_true")
    ap_a.add_argument("--out", type=Path, default=None)
    ap_a.add_argument("--palette-png", type=Path, default=None)
    ap_a.add_argument("--header-len", type=int, default=512)
    ap_a.add_argument("--dmb", type=Path, default=None, help="Optional: DMB-Datei zur Gegenprüfung")
    ap_a.add_argument("--deep-georef", action="store_true", help="Erweitert die Georeferenzsuche (Debug).")

    # extract
    ap_e = sub.add_parser("extract", help="AKF extrahieren (TIFF, optional GeoTIFF/VRT).")
    ap_e.add_argument("akf", type=Path)
    ap_e.add_argument("--out-dir", type=Path, default=None)
    ap_e.add_argument("--no-compress", action="store_true")
    ap_e.add_argument("--verbose", action="store_true")
    ap_e.add_argument("--no-georef", action="store_true", help="Keine Georeferenz schreiben.")
    ap_e.add_argument("--no-vrt", action="store_true", help="Keine VRT-Datei erzeugen (pageplan).")
    ap_e.add_argument("--lookahead", type=int, default=2000)
    ap_e.add_argument("--resync-max", type=int, default=200)
    ap_e.add_argument("--missing-fill", type=int, default=0)
    ap_e.add_argument("--strict", action="store_true")
    ap_e.add_argument("--log-resync", action="store_true")

    # Rückwärtskompatibilität: „ohne Subcommand“ wird als extract interpretiert.
    ap.add_argument(
        "_legacy_akf",
        nargs="?",
        type=Path,
        help="(Kompatibilität) Wenn kein Unterbefehl angegeben ist: AKF-Datei für Extraktion.",
    )

    return ap


def main(argv: Optional[List[str]] = None) -> int:
    if argv is None:
        argv = sys.argv[1:]

    # ------------------------------------------------------------
    # Vorabprüfung: Standardfall ist Extraktion.
    # - Ohne Parameter (Dialog): wir gehen von Extraktion aus.
    # - "extract": Extraktion.
    # - "analyze": nur Pillow, wenn --palette-png gesetzt ist.
    # ------------------------------------------------------------
    interactive = _is_interactive()
    cmd = argv[0] if argv else None

    if (not argv and interactive) or (cmd == "extract"):
        _ensure_python_modules([("numpy", "numpy"), ("PIL", "pillow")])
    elif cmd == "analyze":
        if "--palette-png" in argv:
            _ensure_python_modules([("PIL", "pillow")])
    else:
        # Unklare/sonstige Aufrufe: nichts vorab erzwingen
        pass

    # Dialogmodus: keinerlei Parameter
    if not argv and interactive:
        argv = run_dialog()

    ap = build_arg_parser()
    args = ap.parse_args(argv)

    # Kompatibilitätsmodus: `python AKF-Konverter.py DATEI.akf ...`
    if args.cmd is None and getattr(args, "_legacy_akf", None) is not None:
        # Wir interpretieren das als extract; restliche argv-Optionen hat argparse bereits „geschluckt“,
        # deshalb lösen wir es sauber über eine zweite Parse-Runde:
        # -> Wir bauen argv neu: ["extract", <legacy_akf>] + rest
        # Rest ist hier nicht mehr sauber verfügbar; daher: empfehlen, künftig `extract` zu nutzen.
        # Trotzdem: minimaler Nutzen – wir führen mit Standardwerten aus.
        legacy_akf = args._legacy_akf
        print("Hinweis: Bitte künftig `extract` verwenden. Starte Extraktion mit Standardwerten.", file=sys.stderr)
        return run_extract(
            legacy_akf,
            out_dir=None,
            no_compress=False,
            verbose=False,
            no_georef=False,
            no_vrt=False,
            lookahead=2000,
            resync_max=200,
            missing_fill=0,
            strict=False,
            log_resync=False,
        )

    if args.cmd == "analyze":
        return run_analyze(
            args.akf,
            text=bool(args.text),
            out_json=args.out,
            palette_png=args.palette_png,
            header_len=int(args.header_len),
            dmb_path=args.dmb,
            deep_georef=bool(args.deep_georef),
        )

    if args.cmd == "extract":
        return run_extract(
            args.akf,
            out_dir=args.out_dir,
            no_compress=bool(args.no_compress),
            verbose=bool(args.verbose),
            no_georef=bool(args.no_georef),
            no_vrt=bool(args.no_vrt),
            lookahead=int(args.lookahead),
            resync_max=int(args.resync_max),
            missing_fill=int(args.missing_fill),
            strict=bool(args.strict),
            log_resync=bool(args.log_resync),
        )

    # Wenn wir hier sind, ist nichts Sinnvolles angegeben
    ap.print_help()
    return 2


if __name__ == "__main__":
    code = main()
    input("\nFertig. Zum Schließen bitte Eingabetaste drücken …")
    raise SystemExit(code)
