import os
import re
from get_xivapi_data import readJsonFile, IGNORELIST  # :contentReference[oaicite:0]{index=0}

BASE_DIR = "P:/extras/json/xivapi_data"
LANGS = ("de", "en", "fr", "ja")

# Same "do NOT language-expand" list as in get_language_fields() :contentReference[oaicite:1]{index=1}
NO_LANG_EXPAND = {
    "TerritoryType.Map.Id", "SortKey", "PlaceNameIcon", "ScreenImage.Image",
    "IconObjective0", "IconObjective1", "PlaceNameRegionIcon", "Map.Id", "Index",
    "Icon1", "Name.Icon", "Icon2", "QuestRedoUIWide", "QuestRedoUILarge",
    "QuestRedoUISmall", "StampIcon", "Excellent", "FailImage", "Good", "Great",
    "Poor", "IconMain", "IconOff", "MapIcon", "UiIcon", "IconMap",
    "IconDutyFinder", "Id", "Image", "Icon", "IconReputation", "IconLarge",
    "IconSmall", "IconLarge2", "IconSmall2", "BottomImage", "TopImage"
}

LANG_SUFFIX_RE = re.compile(r"^(.*)_(de|en|fr|ja)$")


def strip_lang_suffix(key: str) -> str:
    m = LANG_SUFFIX_RE.match(key)
    return m.group(1) if m else key


def is_translated_field(sheet: str, field_path: str) -> bool:
    """
    If not excluded, it is a translated field (i.e. expects _de/_en/_fr/_ja for the leaf).
    """
    if field_path in NO_LANG_EXPAND:
        return False
    if field_path in IGNORELIST.get(sheet, []):
        return False
    return True


def extract_expected_specs(sheet: str, fields: list) -> list[dict]:
    """
    From translation_filenames.json fields list :contentReference[oaicite:2]{index=2}
    produce a list of expected checks:
      { "path": str, "translated": bool }

    Uses the FINAL name in case of rename specs (dict: second key).
    """
    specs: list[dict] = []

    def add(path: str):
        if not isinstance(path, str) or not path:
            return
        specs.append({
            "path": path,
            "translated": is_translated_field(sheet, path),
        })

    for spec in fields:
        if isinstance(spec, str):
            add(spec)
        elif isinstance(spec, dict):
            keys = list(spec.keys())
            if len(keys) >= 2:
                add(keys[1])
        elif isinstance(spec, list) and spec:
            base = spec[0]
            if isinstance(base, dict):
                keys = list(base.keys())
                if len(keys) >= 2:
                    add(keys[1])
            else:
                add(base)

    return specs


def check_translated_leaf(container: dict, leaf: str) -> list[str]:
    missing = []
    for lang in LANGS:
        k = f"{leaf}_{lang}"
        if k not in container:
            missing.append(k)
    return missing


def get_value_for_langkey(container: dict, leaf: str):
    if leaf in container:
        return container[leaf]
    for lang in LANGS:
        lk = f"{leaf}_{lang}"
        if lk in container:
            return container[lk]
    return None


def traverse_path(row: dict, path: str):
    """
    Traverse row by dot-separated segments, handling "[]" segments.

    Returns:
      ok, container, leaf, errors, stopped_on_empty_array

    - container: object that should contain leaf (usually dict), or a list when traversal stops early on empty array
    - leaf: last segment name with trailing [] stripped
    - stopped_on_empty_array: True if we hit an empty array before reaching the final leaf
    """
    segments = path.split(".")
    cur = row

    for seg in segments[:-1]:
        is_arr = seg.endswith("[]")
        seg_key = seg[:-2] if is_arr else seg

        if not isinstance(cur, dict) or seg_key not in cur:
            return False, None, None, [f"missing '{seg_key}'"], False

        cur = cur[seg_key]

        if is_arr:
            if not isinstance(cur, list):
                return False, None, None, [f"'{seg_key}' expected array"], False
            if not cur:
                # array exists but empty -> cannot validate deeper structure; treat as "present enough" for this row
                leaf_seg = segments[-1]
                leaf = leaf_seg[:-2] if leaf_seg.endswith("[]") else leaf_seg
                return True, cur, leaf, [], True
            cur = cur[0]

    leaf_seg = segments[-1]
    leaf_is_arr = leaf_seg.endswith("[]")
    leaf = leaf_seg[:-2] if leaf_is_arr else leaf_seg
    return True, cur, leaf, [], False


def validate_row_against_specs(sheet: str, row: dict, specs: list[dict]) -> list[str]:
    issues: list[str] = []

    for s in specs:
        path = s["path"]
        translated = s["translated"]

        ok, container, leaf, errs, stopped_on_empty_array = traverse_path(row, path)
        if not ok:
            issues.append(f"{path}: {', '.join(errs)}")
            continue

        # if we stopped early on empty array, accept this spec for this row (array existence/type already checked)
        if stopped_on_empty_array:
            continue

        leaf_is_arr = path.split(".")[-1].endswith("[]")

        if not isinstance(container, dict):
            issues.append(f"{path}: parent is not an object")
            continue

        if leaf_is_arr:
            if leaf not in container:
                issues.append(f"{path}: missing '{leaf}'")
            elif not isinstance(container[leaf], list):
                issues.append(f"{path}: '{leaf}' expected array")
            continue

        if translated:
            missing_langs = check_translated_leaf(container, leaf)
            if missing_langs:
                issues.append(f"{path}: missing {', '.join(missing_langs)}")
        else:
            v = get_value_for_langkey(container, leaf)
            if v is None:
                issues.append(f"{path}: missing '{leaf}'")

    return issues


def validate_file(sheet: str, fields: list) -> tuple[bool, dict]:
    """
    Check rows until a valid row is found.
    If none are valid, return the "best" row (fewest issues) as the report.
    """
    path = os.path.join(BASE_DIR, f"{sheet}.json")
    if not os.path.exists(path):
        return False, {"file": ["FILE_NOT_FOUND"]}

    data = readJsonFile(path)
    if not data:
        return False, {"file": ["EMPTY_FILE"]}

    specs = extract_expected_specs(sheet, fields)

    best_idx = None
    best_issues = None

    for idx, row in enumerate(data.values()):
        issues = validate_row_against_specs(sheet, row, specs)
        if not issues:
            return True, {"valid_row_index": idx}

        if best_issues is None or len(issues) < len(best_issues):
            best_idx = idx
            best_issues = issues

    # No row fully valid
    return False, {
        "best_row_index": best_idx,
        "missing_or_invalid": best_issues or []
    }


def main():
    config = readJsonFile("./translation_filenames.json")  # :contentReference[oaicite:3]{index=3}
    broken = {}

    for _, sheets in config.items():
        for sheet, fields in sheets.items():
            ok, info = validate_file(sheet, fields)
            if not ok:
                broken[sheet] = info

    if not broken:
        print("✔ All JSON files valid (found at least one matching row per sheet)")
        return

    print("✖ Invalid JSON files (no row matches all expected fields):\n")
    for sheet, info in broken.items():
        if "file" in info:
            print(f"{sheet}: {', '.join(info['file'])}")
            continue

        print(f"{sheet} (best row = {info.get('best_row_index')}):")
        # print only unique issues (and keep output short)
        seen = set()
        for m in info.get("missing_or_invalid", []):
            if m in seen:
                continue
            seen.add(m)
            print(f"  - {m}")
        print()


if __name__ == "__main__":
    main()
