from __future__ import annotations

from dataclasses import dataclass
from datetime import datetime
import logging
import re
from typing import Any

from bson import ObjectId
from fastapi.encoders import jsonable_encoder

from app.batch_processing.infrastructure.mongo_repositories import MongoBatchCaseRepository
from app.services.clinical_processing import (
    extraer_diagnosticos_quirurgicos,
    extraer_factura_json,
    extraer_nombre_paciente,
    extraer_procedimientos_quirurgicos,
    procesar_documento_generico,
    procesar_documento_quirurgico,
    procesar_factura,
    procesar_laboratorio,
    procesar_prescripcion,
    procesar_radiologia,
)
from app.services.patient_name_extraction import extract_patient_name_from_text
from app.services.soat_processing import _parse_cie10_lines
from modules.processing.resumen_google import HistoriaClinicaRequest


logger = logging.getLogger(__name__)


_CIE10_INLINE_PATTERN = re.compile(
    r"^\s*(?P<codigo>[A-TV-Z][0-9]{2}[0-9A-Z]?(?:\.[0-9A-Z]{1,2})?)\s*(?:[-:–—]\s*|\s+)(?P<descripcion>.+)$",
    flags=re.IGNORECASE,
)


@dataclass
class ClinicalDocumentRequest:
    raw_text: str
    detected_type: str
    username: str
    original_name: str
    case_key: str = ""
    case_number: str = ""
    patient_id: str = ""
    batch_id: str = ""
    batch_file_id: str = ""
    ingestion_source: str = "manual"
    provided_patient_name: str = ""


class ClinicalDocumentService:
    """Materializa documentos clinicos usando el pipeline existente."""

    def __init__(
        self,
        *,
        mongo_storage,
        mongo_analyses,
        client_groq,
        client_gemini,
        cie10_retriever,
        cups_retriever,
        colombia_tz,
    ) -> None:
        self.mongo_analyses = mongo_analyses
        self.mongo_storage = mongo_storage
        self.client_groq = client_groq
        self.client_gemini = client_gemini
        self.cie10_retriever = cie10_retriever
        self.cups_retriever = cups_retriever
        self.colombia_tz = colombia_tz
        self.batch_case_repository = MongoBatchCaseRepository()

    def process_and_persist(self, request: ClinicalDocumentRequest) -> dict[str, Any]:
        existing = self._find_existing_document(request)
        if existing:
            self._ensure_legacy_history(existing)
            return self._serialize(existing, reused=True)

        payload = self._build_payload(request)
        safe_payload = jsonable_encoder(payload)
        inserted = self.mongo_analyses.collection.insert_one(safe_payload)
        payload["_id"] = str(inserted.inserted_id)
        self._ensure_legacy_history(payload)
        return self._serialize(payload, reused=False)

    def repair_existing_documents_for_user(self, username: str, *, limit: int = 200) -> dict[str, int]:
        repaired_patient_names = 0
        repaired_legacy_histories = 0
        cursor = self.mongo_analyses.collection.find(
            {
                "usuario": username,
                "tipo_documento": {"$nin": ["epicrisis", "epicrisis_case_cache"]},
            }
        ).sort([("fecha_analisis", -1)]).limit(max(1, int(limit or 200)))

        for doc in cursor:
            payload_updates: dict[str, Any] = {}
            nombre_paciente = self._resolve_patient_name_from_sources(
                provided_patient_name=doc.get("nombre_paciente", ""),
                case_key=doc.get("case_key", ""),
                username=username,
                analisis_html=doc.get("analisis_html", ""),
                raw_text=doc.get("descripcion", ""),
            )
            if nombre_paciente and nombre_paciente != doc.get("nombre_paciente"):
                payload_updates["nombre_paciente"] = nombre_paciente
                repaired_patient_names += 1

            if payload_updates and doc.get("_id") is not None:
                self.mongo_analyses.collection.update_one(
                    {"_id": doc["_id"]},
                    {"$set": payload_updates},
                )
                doc = {**doc, **payload_updates}

            legacy_id = self._ensure_legacy_history(doc)
            if legacy_id and legacy_id != doc.get("legacy_historia_id"):
                repaired_legacy_histories += 1

        return {
            "repaired_patient_names": repaired_patient_names,
            "repaired_legacy_histories": repaired_legacy_histories,
        }

    def _find_existing_document(
        self, request: ClinicalDocumentRequest
    ) -> dict[str, Any] | None:
        if not request.batch_file_id:
            return None
        return self.mongo_analyses.collection.find_one(
            {
                "usuario": request.username,
                "batch_file_id": request.batch_file_id,
                "ingestion_source": request.ingestion_source,
            }
        )

    def _build_payload(self, request: ClinicalDocumentRequest) -> dict[str, Any]:
        detected_type = str(request.detected_type or "generico").strip() or "generico"
        raw_text = str(request.raw_text or "")
        now = datetime.now(self.colombia_tz).isoformat()
        analisis_html, error_analisis = self._generate_analysis(raw_text, detected_type)
        nombre_paciente = self._resolve_patient_name_from_sources(
            provided_patient_name=request.provided_patient_name,
            case_key=request.case_key,
            username=request.username,
            analisis_html=analisis_html,
            raw_text=raw_text,
        )

        payload: dict[str, Any] = {
            "mensaje": f"Documento {detected_type} procesado exitosamente",
            "nombre_archivo": request.original_name,
            "usuario": request.username,
            "nombre_paciente": nombre_paciente,
            "tipo_documento": detected_type,
            "fecha_analisis": now,
            "analisis_html": analisis_html,
            "descripcion": raw_text,
            "case_key": request.case_key,
            "case_number": request.case_number,
            "patient_id": request.patient_id,
            "batch_id": request.batch_id,
            "batch_file_id": request.batch_file_id,
            "ingestion_source": request.ingestion_source,
            "error_analisis": error_analisis,
            "legacy_historia_id": "",
        }

        if detected_type == "factura":
            self._enrich_factura(payload)
        elif detected_type == "quirurgico":
            self._enrich_quirurgico(payload)
        elif detected_type == "historia_clinica":
            self._enrich_historia(payload)

        if payload.get("error_analisis") or payload.get("error_cie10") or payload.get(
            "error_cups"
        ):
            payload["mensaje"] = "Procesamiento completado con advertencias"

        return payload

    def _generate_analysis(self, raw_text: str, detected_type: str) -> tuple[str, str | None]:
        try:
            if detected_type == "historia_clinica":
                try:
                    html = HistoriaClinicaRequest.analizar_historiaclinica(raw_text)
                    if not html:
                        raise RuntimeError("Respuesta vacia en historia clinica.")
                    return html, None
                except Exception as exc:
                    logger.exception("Fallo analisis principal de historia clinica")
                    html = procesar_documento_generico(
                        raw_text,
                        "historia_clinica",
                        self.client_groq,
                        self.client_gemini,
                        force_provider="groq",
                    )
                    return html, str(exc)

            if detected_type == "laboratorio":
                return (
                    procesar_laboratorio(raw_text, self.client_groq, self.client_gemini),
                    None,
                )
            if detected_type == "radiologia":
                return (
                    procesar_radiologia(raw_text, self.client_groq, self.client_gemini),
                    None,
                )
            if detected_type == "prescripcion":
                return (
                    procesar_prescripcion(raw_text, self.client_groq, self.client_gemini),
                    None,
                )
            if detected_type == "quirurgico":
                return (
                    procesar_documento_quirurgico(
                        raw_text, self.client_groq, self.client_gemini
                    ),
                    None,
                )
            if detected_type == "factura":
                return (
                    procesar_factura(raw_text, self.client_groq, self.client_gemini),
                    None,
                )
            return (
                procesar_documento_generico(
                    raw_text,
                    detected_type,
                    self.client_groq,
                    self.client_gemini,
                ),
                None,
            )
        except Exception as exc:
            logger.exception("Error generando analisis clinico para %s", detected_type)
            return (
                f"<p><b>Error procesando documento:</b> {str(exc)}</p>",
                str(exc),
            )

    def _enrich_factura(self, payload: dict[str, Any]) -> None:
        try:
            payload["factura_json"] = extraer_factura_json(payload.get("analisis_html", ""))
        except Exception as exc:
            logger.warning("Error extrayendo factura_json: %s", exc)
            payload["factura_json"] = {}
            payload["error_factura_json"] = str(exc)

    def _enrich_quirurgico(self, payload: dict[str, Any]) -> None:
        analisis_html = payload.get("analisis_html", "")
        try:
            procedimientos = extraer_procedimientos_quirurgicos(analisis_html)
            payload["procedimientos_extraidos"] = procedimientos
            payload["total_procedimientos"] = len(procedimientos)
            if procedimientos:
                df_cups = self.cups_retriever.asignar_codigos(procedimientos)
                payload["codigos_cups"] = df_cups.to_dict(orient="records")
            else:
                payload["codigos_cups"] = []
        except Exception as exc:
            logger.warning("Error al asignar codigos CUPS: %s", exc)
            payload["codigos_cups"] = []
            payload["error_cups"] = "No fue posible codificar CUPS automaticamente."

        try:
            diagnosticos = extraer_diagnosticos_quirurgicos(analisis_html)
            payload["diagnosticos_extraidos"] = diagnosticos
            payload["total_diagnosticos"] = len(diagnosticos)
            payload["codigos_cie10"] = self._assign_cie10_list(diagnosticos)
        except Exception as exc:
            logger.warning("Error al asignar codigos CIE-10: %s", exc)
            payload["codigos_cie10"] = []
            payload["error_cie10"] = "No fue posible codificar CIE-10 automaticamente."

    def _enrich_historia(self, payload: dict[str, Any]) -> None:
        analisis_html = payload.get("analisis_html", "")
        error_cie10 = None
        try:
            df_cie = self.cie10_retriever.asignar_codigos_batch(analisis_html)
            payload["codigos_cie10"] = df_cie.to_dict(orient="records")
        except Exception as exc:
            logger.exception("Error al asignar codigos CIE-10 para historia clinica")
            payload["codigos_cie10"] = []
            error_cie10 = str(exc)

        error_cups = None
        try:
            proc_match = re.search(
                r"<p><b>Procedimientos</b>\s*</p>\s*<ol>(.*?)</ol>",
                analisis_html,
                flags=re.DOTALL | re.IGNORECASE,
            )
            proc_html = proc_match.group(1) if proc_match else ""
            procedimientos = re.findall(r"<li>(.*?)</li>", proc_html, flags=re.DOTALL)
            procedimientos = [item.strip() for item in procedimientos if item.strip()]
            pattern = re.compile(r"^(?P<cup>\d{6})\s+(.+)$")
            procedimientos_filtrados = [
                item for item in procedimientos if pattern.match(item)
            ]
            if procedimientos_filtrados:
                df_cups = self.cups_retriever.asignar_codigos(procedimientos_filtrados)
                payload["codigos_cups"] = df_cups.to_dict(orient="records")
            else:
                payload["codigos_cups"] = []
        except Exception as exc:
            logger.exception("Error al asignar codigos CUPS para historia clinica")
            payload["codigos_cups"] = []
            error_cups = str(exc)

        if error_cie10:
            payload["error_cie10"] = error_cie10
        if error_cups:
            payload["error_cups"] = error_cups

    def _assign_cie10_list(self, diagnosticos: list[str]) -> list[dict[str, str]]:
        cie10_list: list[dict[str, str]] = []
        cie10_index_disponible = getattr(self.cie10_retriever, "db", None) is not None

        for diagnostico in diagnosticos:
            inline = self._extraer_cie10_inline(diagnostico)
            if inline:
                cie10_list.append(
                    {
                        "diagnostico": diagnostico,
                        "codigo": inline["codigo"],
                        "descripcion": inline["descripcion"],
                    }
                )
                continue

            if not cie10_index_disponible:
                cie10_list.append(
                    {
                        "diagnostico": diagnostico,
                        "codigo": "Pendiente",
                        "descripcion": "Codificacion automatica no disponible para este diagnostico.",
                    }
                )
                continue

            try:
                texto_cie10 = self.cie10_retriever.asignar_codigo_cie10(
                    diagnostico, temperature=0.0, k=30
                )
                codigos_parseados = _parse_cie10_lines(texto_cie10)
                if codigos_parseados:
                    codigo_principal = codigos_parseados[0]
                    cie10_list.append(
                        {
                            "diagnostico": diagnostico,
                            "codigo": codigo_principal["codigo"],
                            "descripcion": codigo_principal["descripcion"],
                        }
                    )
                else:
                    cie10_list.append(
                        {
                            "diagnostico": diagnostico,
                            "codigo": "Sin codigo",
                            "descripcion": "No se encontro codigo CIE-10 correspondiente",
                        }
                    )
            except Exception as exc:
                logger.warning(
                    "Error codificando CIE-10 para '%s...': %s",
                    diagnostico[:50],
                    exc,
                )
                cie10_list.append(
                    {
                        "diagnostico": diagnostico,
                        "codigo": "Pendiente",
                        "descripcion": "No se pudo codificar automaticamente este diagnostico.",
                    }
                )

        return cie10_list

    def _extract_patient_name(self, analisis_html: str) -> str:
        try:
            return extraer_nombre_paciente(analisis_html)
        except Exception:
            logger.exception("Error extrayendo nombre_paciente desde analisis")
            return ""

    def _resolve_patient_name_from_sources(
        self,
        *,
        provided_patient_name: str,
        case_key: str,
        username: str,
        analisis_html: str,
        raw_text: str,
    ) -> str:
        resolved = str(provided_patient_name or "").strip()
        if resolved and resolved.lower() != "desconocido":
            return resolved

        normalized_case_key = str(case_key or "").strip()
        if normalized_case_key:
            try:
                case = self.batch_case_repository.get_user_case(username, normalized_case_key) or {}
            except Exception:
                logger.exception("Error consultando caso batch para nombre_paciente")
                case = {}
            resolved = str(case.get("patient_name") or "").strip()
            if resolved and resolved.lower() != "desconocido":
                return resolved

        resolved = self._extract_patient_name(analisis_html)
        if resolved and resolved.lower() != "desconocido":
            return resolved

        try:
            resolved = extract_patient_name_from_text(raw_text)
        except Exception:
            logger.exception("Error extrayendo nombre_paciente desde texto crudo")
            resolved = ""
        return resolved or "desconocido"

    def _ensure_legacy_history(self, payload: dict[str, Any]) -> str:
        if str(payload.get("tipo_documento") or "").strip() != "historia_clinica":
            return ""

        existing_legacy_id = str(payload.get("legacy_historia_id") or "").strip()
        if existing_legacy_id:
            return existing_legacy_id

        analysis_document_id = payload.get("_id")
        existing_doc = None
        if analysis_document_id:
            existing_doc = self.mongo_storage.collection.find_one(
                {"analysis_document_id": str(analysis_document_id)}
            )
        if existing_doc and existing_doc.get("_id") is not None:
            legacy_id = str(existing_doc["_id"])
            self._set_legacy_history_id(payload, legacy_id)
            return legacy_id

        legacy_id = self.mongo_storage.guardar_analisis(
            descripcion=str(payload.get("descripcion") or ""),
            analisis_html=str(payload.get("analisis_html") or ""),
            codigos_cie10=list(payload.get("codigos_cie10") or []),
            codigos_cups=list(payload.get("codigos_cups") or []),
            usuario=str(payload.get("usuario") or ""),
            nombre_archivo=str(payload.get("nombre_archivo") or ""),
            nombre_paciente=str(payload.get("nombre_paciente") or "desconocido"),
            analysis_document_id=str(analysis_document_id or ""),
            case_key=str(payload.get("case_key") or ""),
            case_number=str(payload.get("case_number") or ""),
            batch_id=str(payload.get("batch_id") or ""),
            batch_file_id=str(payload.get("batch_file_id") or ""),
            ingestion_source=str(payload.get("ingestion_source") or "manual"),
        )
        self._set_legacy_history_id(payload, legacy_id)
        return legacy_id

    def _set_legacy_history_id(self, payload: dict[str, Any], legacy_id: str) -> None:
        payload["legacy_historia_id"] = legacy_id
        if payload.get("_id") is None:
            return
        analysis_id = payload["_id"]
        if not ObjectId.is_valid(str(analysis_id)):
            return
        self.mongo_analyses.collection.update_one(
            {"_id": ObjectId(str(analysis_id))},
            {"$set": {"legacy_historia_id": legacy_id}},
        )

    def _extraer_cie10_inline(self, diagnostico: str) -> dict[str, str] | None:
        if not diagnostico:
            return None
        match = _CIE10_INLINE_PATTERN.match(str(diagnostico).strip())
        if not match:
            return None
        return {
            "codigo": match.group("codigo").upper(),
            "descripcion": match.group("descripcion").strip(),
        }

    def _serialize(self, payload: dict[str, Any], *, reused: bool) -> dict[str, Any]:
        document_id = payload.get("_id")
        if document_id and not isinstance(document_id, str):
            document_id = str(document_id)
        return {
            "id_documento": document_id,
            "nombre_paciente": payload.get("nombre_paciente", "desconocido"),
            "tipo_documento": payload.get("tipo_documento", "generico"),
            "mensaje": payload.get("mensaje", ""),
            "analisis_html": payload.get("analisis_html", ""),
            "codigos_cie10": payload.get("codigos_cie10", []),
            "codigos_cups": payload.get("codigos_cups", []),
            "factura_json": payload.get("factura_json", {}),
            "reused": reused,
            "case_key": payload.get("case_key", ""),
            "case_number": payload.get("case_number", ""),
            "patient_id": payload.get("patient_id", ""),
            "batch_id": payload.get("batch_id", ""),
            "batch_file_id": payload.get("batch_file_id", ""),
            "ingestion_source": payload.get("ingestion_source", "manual"),
            "legacy_historia_id": payload.get("legacy_historia_id", ""),
            "error_analisis": payload.get("error_analisis"),
            "error_cie10": payload.get("error_cie10"),
            "error_cups": payload.get("error_cups"),
            "fecha_analisis": payload.get("fecha_analisis"),
        }
