import os
from pathlib import Path

import pandas as pd
from langchain_community.vectorstores import FAISS
from modules.processing.embeddings import get_hf_embeddings


EMBEDDINGS_MODEL = os.getenv("EMBEDDINGS_MODEL", "intfloat/e5-small-v2")
PROJECT_ROOT = Path(__file__).resolve().parents[3]
DEFAULT_CUPS_EXCEL = PROJECT_ROOT / "TablaReferencia_CUPS__1.xlsx"
DEFAULT_CUPS_INDEX = PROJECT_ROOT / "cups_faiss"


def _resolve_path(env_var: str, default_path: Path) -> Path:
    raw = os.getenv(env_var)
    path = Path(raw) if raw else default_path
    if not path.is_absolute():
        path = PROJECT_ROOT / path
    return path


def configurar_faiss_cups(ruta_excel: Path, indice_path: Path):
    if not ruta_excel.exists():
        raise FileNotFoundError(f"No se encontro archivo Excel de CUPS: {ruta_excel}")

    df = pd.read_excel(ruta_excel)
    df = df.dropna(subset=["Codigo", "Nombre"])
    embeddings = get_hf_embeddings(EMBEDDINGS_MODEL)

    textos = []
    metadatos = []
    for _, row in df.iterrows():
        code = str(row["Codigo"]).strip()
        name = str(row["Nombre"]).strip()
        desc = str(row.get("Descripcion", "")).strip()
        text = f"{code} {name} {desc}".strip()
        textos.append(text)
        metadatos.append({"codigo": code, "nombre": name, "descripcion": desc})

    if not textos:
        raise ValueError("No se encontraron filas validas para construir el indice CUPS")

    db = FAISS.from_texts(textos, embedding=embeddings, metadatas=metadatos)
    indice_path.mkdir(parents=True, exist_ok=True)
    db.save_local(str(indice_path))
    return db


def main() -> None:
    ruta_excel = _resolve_path("CUPS_EXCEL_PATH", DEFAULT_CUPS_EXCEL)
    indice_path = _resolve_path("CUPS_FAISS_INDEX", DEFAULT_CUPS_INDEX)

    configurar_faiss_cups(ruta_excel=ruta_excel, indice_path=indice_path)
    print(f"Indice FAISS CUPS generado en: {indice_path}")


if __name__ == "__main__":
    main()
