import os
import re
import time
import threading
import requests
#import requests_cache
from concurrent.futures import ThreadPoolExecutor, as_completed
from requests.exceptions import ConnectTimeout, ReadTimeout, ConnectionError, HTTPError
from ffxiv_aku import *

#requests_cache.install_cache(r"xivapi_cache2.db")

SOFT_HYPHEN = "\u00AD"
_LIMIT_LEVELS = [5000, 2500, 1000, 500, 250, 150, 100, 75, 50, 25, 10]
MAX_WORKERS = 8

_thread_local = threading.local()


class RowLimitError(RuntimeError):
    pass


def _session() -> requests.Session:
    s = getattr(_thread_local, "session", None)
    if s is None:
        s = requests.Session()
        _thread_local.session = s
    return s


def strip_soft_hyphen(text: str) -> str:
    return text.replace(SOFT_HYPHEN, "").replace("[p]", "")


def normalize_key(key: str) -> str:
    key = strip_soft_hyphen(key)
    match = re.match(r"^(.*?)@lang\((.*?)\)$", key)
    if match:
        return f"{match.group(1)}_{match.group(2)}".replace("@as(html)", "")
    return key.replace("@as(html)", "")


def normalize_dict(d):
    if isinstance(d, str):
        return strip_soft_hyphen(d)
    if isinstance(d, int):
        return str(d)
    if isinstance(d, list):
        return [normalize_dict(item) for item in d]
    if not isinstance(d, dict):
        return d

    raw_fields = d.get("fields", {})
    raw_transient = d.get("transient", {})

    normalized_fields = {normalize_key(k): normalize_dict(v) for k, v in raw_fields.items()}
    normalized_transient = {normalize_key(k): normalize_dict(v) for k, v in raw_transient.items()}

    merged = {**normalized_fields, **normalized_transient}

    for k, v in d.items():
        if k not in ("fields", "transient"):
            merged[normalize_key(k)] = normalize_dict(v)

    return merged


def _looks_like_row_limit_message(text: str) -> bool:
    if not text:
        return False
    t = text.lower()
    return ("20,000 rows" in t) or ("20000 rows" in t) or ("20,000 row" in t) or ("20000 row" in t)


def _raise_if_row_limit_from_json(payload):
    if isinstance(payload, dict):
        # common patterns: {"error": "..."} or {"message": "..."} or {"detail": "..."}
        for k in ("error", "message", "detail", "errors"):
            v = payload.get(k)
            if isinstance(v, str) and _looks_like_row_limit_message(v):
                raise RowLimitError(v)
            if isinstance(v, list):
                joined = " ".join([x for x in v if isinstance(x, str)])
                if _looks_like_row_limit_message(joined):
                    raise RowLimitError(joined)


def _get_json_with_retries(url: str, max_retries: int = 10, retry_delay: int = 1):
    attempt = 0
    while True:
        try:
            resp = _session().get(url, timeout=120)

            # Try parse JSON regardless of status; some APIs return 200 with {"error": "..."}.
            payload = None
            ct = (resp.headers.get("content-type") or "").lower()
            if "application/json" in ct:
                payload = resp.json()
                _raise_if_row_limit_from_json(payload)

            # Non-2xx -> raise, but still allow row-limit detection from body.
            if resp.status_code >= 400:
                body = resp.text or ""
                if _looks_like_row_limit_message(body):
                    raise RowLimitError(body)
                resp.raise_for_status()

            # If not JSON but still 200, return parsed JSON attempt, else try json anyway.
            if payload is not None:
                return payload

            payload = resp.json()
            _raise_if_row_limit_from_json(payload)
            return payload

        except RowLimitError:
            raise
        except (ConnectTimeout, ReadTimeout, ConnectionError) as e:
            attempt += 1
            if attempt >= max_retries:
                raise
            time.sleep(retry_delay)
        except HTTPError as e:
            # Detect row-limit in HTTP error bodies too
            body = ""
            try:
                body = e.response.text or ""
            except Exception:
                body = ""
            if _looks_like_row_limit_message(body):
                raise RowLimitError(body)
            raise


def _handle_request(base_url, max_retries=10, retry_delay=1, limit_levels=None):
    limit_levels = limit_levels or _LIMIT_LEVELS
    after = None
    after2 = None
    all_rows = {}

    limit_idx = 0
    limit = limit_levels[limit_idx]

    while True:
        url = base_url + f"&limit={limit}" + (f"&after={after}" if after is not None else "")

        try:
            data = _get_json_with_retries(url, max_retries=max_retries, retry_delay=retry_delay)
        except RowLimitError:
            if limit_idx < len(limit_levels) - 1:
                limit_idx += 1
                limit = limit_levels[limit_idx]
                print(f"[LIMIT] row cap hit, switching to limit={limit} and retrying page (after={after})")
                continue
            raise

        rows = data.get("rows", [])
        if not rows:
            break

        has_subrows = any(r.get("subrow_id", 0) != 0 for r in rows)
        for row in rows:
            key = f'{row["row_id"]}.{row["subrow_id"]}' if has_subrows else str(row["row_id"])
            all_rows[key] = normalize_dict(row)

        # pagination cursor: use last returned row_id (works even if ids are sparse)
        after = rows[-1]["row_id"]

        if after == after2:  # small sheets with subrows can loop otherwise
            break
        after2 = after

    return all_rows


def get_all_sheets(language: str = "de"):
    url = f"https://xivapi.akurosia.de/api/1/sheet?language={language}"
    return _get_json_with_retries(url)


def get_first_row_ref(sheet: str, language: str = "de") -> str:
    url = f"https://xivapi.akurosia.de/api/1/sheet/{sheet}?language={language}&limit=1"
    data = _get_json_with_retries(url)
    rows = data.get("rows", [])
    if not rows:
        raise ValueError(f"No rows returned for sheet={sheet} with limit=1")

    r0 = rows[0]
    row_id = r0.get("row_id")
    subrow_id = r0.get("subrow_id", 0)
    if row_id is None:
        raise ValueError(f"First row missing row_id for sheet={sheet}")

    return f"{row_id}:{subrow_id}" if subrow_id not in (None, 0) else str(row_id)


def get_fields_for_sheet(sheet: str, language: str = "de") -> str:
    first_ref = get_first_row_ref(sheet, language=language)
    url = f"https://xivapi.akurosia.de/api/1/sheet/{sheet}/{first_ref}?language={language}"
    data = _get_json_with_retries(url)

    fields_obj = data.get("fields", {}) or {}
    out = []
    for k, v in fields_obj.items():
        if isinstance(v, dict) and "value" in v:
            out.append(f"{k}.value")
        else:
            out.append(k)

    return ",".join(out)


def get_sheet(sheet: str, language: str = "de", fields: str | None = None):
    base_url = f"https://xivapi.akurosia.de/api/1/sheet/{sheet}?language={language}"
    if fields:
        base_url += f"&fields={fields}"
    return _handle_request(base_url)


def process_one_sheet(name: str, out_dir: str, language: str = "de") -> str:
    out_path = os.path.join(out_dir, f"{name}.json")
    if os.path.exists(out_path):
        return f"[SKIP] {name}"

    fields = get_fields_for_sheet(name, language=language)
    data = get_sheet(name, language=language, fields=fields)
    writeJsonFile(out_path, data, keys_per_line=True)
    return f"[OK] {name}"


if __name__ == "__main__":
    out_dir = r"../xivapi_data2"
    os.makedirs(out_dir, exist_ok=True)

    sheets = get_all_sheets(language="de")
    #sheets = {"sheets":[{"name":"Action"},{"name":"Quest"}]}
    names = [s["name"] for s in sheets["sheets"] if "/" not in s["name"]]
    names = [n for n in names if not os.path.exists(os.path.join(out_dir, f"{n}.json"))]

    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as ex:
        futures = [ex.submit(process_one_sheet, name, out_dir, "de") for name in names]
        for fut in as_completed(futures):
            try:
                print(fut.result())
            except Exception as e:
                print(f"[ERROR] {e} ({fut})")
