from __future__ import annotations

from pathlib import Path
from typing import Any

from ..services.canonical_service import CanonicalService
from ..utils.json_utils import read_json, write_json
from ..utils.time_utils import now_iso


class ExtractionService:
    """Processes scanned documents into canonical JSON + markdown summaries."""

    def __init__(self):
        self.canonical_service = CanonicalService()

    def process_document(self, manifest_path: Path) -> dict[str, Any]:
        manifest = read_json(manifest_path, default={}) or {}
        if not manifest:
            raise ValueError(f"Manifest not found or empty: {manifest_path}")

        doc_dir = manifest_path.parents[1]
        original_dir = doc_dir / "original"
        original_files = sorted([p for p in original_dir.iterdir() if p.is_file()]) if original_dir.exists() else []
        if not original_files:
            raise ValueError(f"No original file found for document at {doc_dir}")

        original_path = original_files[0]
        text_preview = self._extract_text_preview(original_path)

        document_json = self.canonical_service.build_document_json(manifest=manifest, text_preview=text_preview)
        entities_json = self.canonical_service.build_entities_json(manifest=manifest, text_preview=text_preview)
        quality_json = self.canonical_service.build_quality_json(manifest=manifest, text_preview=text_preview)

        if quality_json.get("auto_confirmed"):
            document_json["review_status"] = "auto_confirmed"
            document_json["confirmed_at"] = now_iso()
            document_json["confirmed_by"] = "system_auto_threshold"
            document_json["verified_evidence"] = list(document_json.get("verified_evidence") or []) + [
                {
                    "field": "quality.quality.confidence",
                    "value": quality_json.get("quality", {}).get("confidence"),
                    "confirmed": True,
                    "confirmed_at": now_iso(),
                    "confirmed_by": "system_auto_threshold",
                    "note": quality_json.get("auto_confirm_reason"),
                }
            ]

        write_json(doc_dir / "extracted" / "document.json", document_json)
        write_json(doc_dir / "extracted" / "entities.json", entities_json)
        write_json(doc_dir / "extracted" / "quality.json", quality_json)

        summary_md = self.canonical_service.build_summary_markdown(
            manifest=manifest,
            document_json=document_json,
            entities_json=entities_json,
            quality_json=quality_json,
        )
        (doc_dir / "summary" / "summary.md").write_text(summary_md + "\n", encoding="utf-8")

        if quality_json.get("auto_confirmed"):
            manifest["status"] = "confirmed"
            manifest["confirmed_at"] = now_iso()
            manifest["confirmed_by"] = "system_auto_threshold"
        else:
            manifest["status"] = "processed"
        manifest["processed_at"] = now_iso()
        manifest["updated_at"] = now_iso()
        write_json(manifest_path, manifest)

        return {
            "document_id": manifest.get("document_id", manifest.get("doc_id")),
            "doc_id": manifest.get("doc_id"),
            "category": manifest.get("category"),
            "subcategory": manifest.get("subcategory"),
            "manifest": str(manifest_path),
            "original": str(original_path),
            "document_json": str(doc_dir / "extracted" / "document.json"),
            "entities_json": str(doc_dir / "extracted" / "entities.json"),
            "quality_json": str(doc_dir / "extracted" / "quality.json"),
            "summary": str(doc_dir / "summary" / "summary.md"),
        }

    @staticmethod
    def _extract_text_preview(path: Path, max_chars: int = 1200) -> str:
        if path.suffix.lower() in {".txt", ".md", ".json", ".csv"}:
            try:
                return path.read_text(encoding="utf-8", errors="ignore")[:max_chars]
            except Exception:
                return ""
        return f"Binary evidence file detected ({path.suffix.lower() or 'no extension'})."[:max_chars]
