from __future__ import annotations

from typing import Any

from ..config import AUTO_CONFIRM_THRESHOLD
from ..utils.time_utils import now_iso


class CanonicalService:
    """Builds canonical extraction payloads from low-level file observations."""

    def build_document_json(self, *, manifest: dict[str, Any], text_preview: str) -> dict[str, Any]:
        source_file = (manifest.get("files") or {}).get("original", manifest.get("filename"))
        return {
            "document_id": manifest.get("document_id", manifest.get("doc_id")),
            "doc_id": manifest.get("doc_id"),
            "applicant_id": manifest.get("applicant_id"),
            "document_type": manifest.get("document_type"),
            "category": manifest.get("category"),
            "subcategory": manifest.get("subcategory"),
            "filename": manifest.get("filename"),
            "extension": manifest.get("extension"),
            "source_type": self._infer_source_type(manifest.get("extension", "")),
            "source_file": source_file,
            "text_preview": text_preview,
            "owner": {
                "full_name": None,
                "given_names": None,
                "surname": None,
                "normalized_name": None,
                "nationality": None,
            },
            "identifiers": {
                "passport_number": None,
                "national_id": None,
                "personal_number": None,
            },
            "dates": {
                "date_of_birth": None,
                "issue_date": None,
                "expiry_date": None,
            },
            "relevance": {
                "identity": manifest.get("category") == "identity",
                "visa_482": True,
                "visa_186": True,
                "skill_assessment": False,
            },
            "review_status": "pending_review",
            "verified_evidence": [],
            "inferred_suggestions": [],
            "missing_evidence": [],
            "processed_at": now_iso(),
        }

    def build_entities_json(self, *, manifest: dict[str, Any], text_preview: str) -> dict[str, Any]:
        filename = str(manifest.get("filename", ""))
        hints = []

        for token in ["passport", "degree", "employment", "contract", "reference", "translation"]:
            if token in filename.lower() or token in text_preview.lower():
                hints.append(token)

        return {
            "document_id": manifest.get("document_id", manifest.get("doc_id")),
            "doc_id": manifest.get("doc_id"),
            "category": manifest.get("category"),
            "subcategory": manifest.get("subcategory"),
            "entities": {
                "detected_keywords": sorted(set(hints)),
                "people": [],
                "organizations": [],
                "dates": [],
                "occupations": [],
            },
            "verification_notes": [
                "V1 extraction is metadata + text-preview based. Add OCR/NER parsers in extraction_service for production."
            ],
            "processed_at": now_iso(),
        }

    def build_quality_json(self, *, manifest: dict[str, Any], text_preview: str) -> dict[str, Any]:
        has_text = bool(text_preview.strip())
        extension = str(manifest.get("extension") or "").lower()
        is_image = extension in {".jpg", ".jpeg", ".png", ".webp", ".tif", ".tiff"}
        confidence = self._estimate_confidence(extension=extension, has_text=has_text)
        auto_confirm = confidence >= AUTO_CONFIRM_THRESHOLD and has_text
        return {
            "document_id": manifest.get("document_id", manifest.get("doc_id")),
            "doc_id": manifest.get("doc_id"),
            "ocr_used": is_image,
            "image_quality": "unknown" if not is_image else "manual_review_required",
            "needs_rotation": is_image,
            "issues": [] if has_text else ["No text preview extracted; likely scanned image or binary file."],
            "needs_manual_review": not auto_confirm,
            "verified": auto_confirm,
            "auto_confirmed": auto_confirm,
            "auto_confirm_threshold": AUTO_CONFIRM_THRESHOLD,
            "auto_confirm_reason": f"confidence {confidence:.2f} >= threshold {AUTO_CONFIRM_THRESHOLD:.2f}"
            if auto_confirm
            else None,
            "quality": {
                "readable_text_detected": has_text,
                "confidence": confidence,
                "recommended_next_steps": [
                    "Validate key facts manually against original evidence."
                    if not auto_confirm
                    else "Auto-confirmed by confidence threshold; optional manual spot-check recommended.",
                    "Use verified_evidence only for sponsor submission drafts.",
                ],
            },
            "processed_at": now_iso(),
        }

    def build_summary_markdown(
        self,
        *,
        manifest: dict[str, Any],
        document_json: dict[str, Any],
        entities_json: dict[str, Any],
        quality_json: dict[str, Any],
    ) -> str:
        return "\n".join(
            [
                f"# Evidence Summary: {manifest.get('filename', 'unknown')}",
                "",
                "## Purpose",
                "This summary supports migration evidence preparation workflows only.",
                "It does not determine visa approval and does not replace legal advice.",
                "",
                "## Verified Evidence",
                "- None marked yet (manual review required).",
                "",
                "## Inferred Suggestions",
                "- Review detected keywords and map facts into verified evidence fields.",
                "",
                "## Missing Evidence",
                "- Confirm mandatory fields for subclass 482/186 package are complete.",
                "",
                "## Extracted Hints",
                f"- Keywords: {', '.join(entities_json.get('entities', {}).get('detected_keywords', [])) or 'None'}",
                "",
                "## Quality",
                f"- Confidence: {quality_json.get('quality', {}).get('confidence', 0)}",
                f"- Readable text detected: {quality_json.get('quality', {}).get('readable_text_detected', False)}",
                "",
                "## Canonical JSON",
                f"- doc_id: {document_json.get('doc_id')}",
                f"- category: {document_json.get('category')}",
                f"- subcategory: {document_json.get('subcategory')}",
                f"- document_type: {document_json.get('document_type')}",
            ]
        )

    @staticmethod
    def _infer_source_type(extension: str) -> str:
        mapping = {
            ".pdf": "pdf",
            ".jpg": "image",
            ".jpeg": "image",
            ".png": "image",
            ".txt": "text",
            ".md": "text",
            ".doc": "document",
            ".docx": "document",
        }
        return mapping.get(extension.lower(), "unknown")

    @staticmethod
    def _estimate_confidence(*, extension: str, has_text: bool) -> float:
        if not has_text:
            return 0.10
        if extension in {".pdf", ".txt", ".md", ".json", ".csv"}:
            return 0.93
        if extension in {".jpg", ".jpeg", ".png", ".webp", ".tif", ".tiff"}:
            return 0.89
        return 0.70
