#!/usr/bin/env python3
from __future__ import annotations

import os
import re
import subprocess
from datetime import datetime, timezone

CONFIG_PATH = "app/config/config.yml"
CHANGELOG_PATH = "changelog.md"
MAX_FILES_IN_ENTRY = 60
MAX_COMMITS_IN_ENTRY = 60
MAX_IMPACT_ITEMS = 4
MAX_FEATURE_ITEMS = 4
MAX_ANALYSIS_FILES = 50
MAX_PATCH_LINES_PER_FILE = 120

BUCKET_PRIORITY = [
    "Configuracion del sistema",
    "Base de datos",
    "Flujos de negocio",
    "Logica de negocio y datos",
    "Pantallas y experiencia visual",
    "Comportamiento de interfaz",
    "Recursos y documentos",
    "Codigo de aplicacion",
    "Otros cambios",
]

STATUS_SYMBOL = {
    "A": "A",
    "M": "M",
    "D": "D",
    "R": "R",
    "C": "C",
    "T": "M",
}

CONDITION_KEYWORDS = (
    " where ",
    " if ",
    " elseif",
    "&&",
    "||",
    "==",
    "!=",
    "<>",
    " and ",
    " or ",
    " validar",
    " valid",
    " filtro",
    " nit",
    " estado",
    " activo",
    " coddirectorio",
)

FEATURE_COMMIT_KEYWORDS = (
    "feat",
    "feature",
    "nueva",
    "nuevo",
    "agrega",
    "agregar",
    "implementa",
    "habilita",
    "soporte",
    "crear",
)

METHOD_RE = re.compile(r"(?:public|protected|private)?\s*function\s+([A-Za-z_][A-Za-z0-9_]*)\s*\(")


def run_cmd(cmd: list[str], allow_fail: bool = False) -> str:
    result = subprocess.run(cmd, text=True, capture_output=True)
    if result.returncode != 0:
        if allow_fail:
            return ""
        stderr = (result.stderr or "").strip()
        raise RuntimeError(f"Command failed ({result.returncode}): {' '.join(cmd)}\n{stderr}")
    return (result.stdout or "").strip()


def git(*args: str, allow_fail: bool = False) -> str:
    return run_cmd(["git", *args], allow_fail=allow_fail)


def read_text(path: str) -> str:
    with open(path, "r", encoding="utf-8", errors="replace") as handle:
        return handle.read()


def write_text(path: str, content: str) -> None:
    with open(path, "w", encoding="utf-8") as handle:
        handle.write(content)


def normalize_spaces(text: str) -> str:
    return re.sub(r"\s+", " ", text).strip()


def shorten_fragment(text: str, max_len: int = 120) -> str:
    cleaned = normalize_spaces(text).replace("`", "'")
    if len(cleaned) > max_len:
        return cleaned[: max_len - 1] + "..."
    return cleaned


def clean_yaml_value(raw_value: str) -> str:
    value = raw_value.split(" #", 1)[0].strip()
    if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}:
        value = value[1:-1]
    return value.strip()


def extract_version_from_config(config_text: str) -> tuple[str, str]:
    version_number = ""
    version_date = ""

    in_parameters_block = False
    parameters_indent = 0

    for raw_line in config_text.splitlines():
        line = raw_line.rstrip()
        stripped = line.strip()
        if not stripped or stripped.startswith("#"):
            continue

        indent = len(line) - len(line.lstrip(" "))

        if stripped == "parameters:":
            in_parameters_block = True
            parameters_indent = indent
            continue

        if in_parameters_block and indent <= parameters_indent and re.match(r"^[A-Za-z0-9_]+\s*:", stripped):
            break

        if not in_parameters_block:
            continue

        match = re.match(r"^(version_numero|version_fecha)\s*:\s*(.+?)\s*$", stripped)
        if not match:
            continue

        key = match.group(1)
        value = clean_yaml_value(match.group(2))

        if key == "version_numero":
            version_number = value
        elif key == "version_fecha":
            version_date = value

    if version_number and version_date:
        return version_number, version_date

    for raw_line in config_text.splitlines():
        stripped = raw_line.strip()
        if stripped.startswith("version_numero:") and "%version_numero%" not in stripped:
            version_number = clean_yaml_value(stripped.split(":", 1)[1])
        if stripped.startswith("version_fecha:") and "%version_fecha%" not in stripped:
            version_date = clean_yaml_value(stripped.split(":", 1)[1])

    return version_number, version_date


def short_sha(sha: str) -> str:
    return sha[:8] if sha else "inicio"


def human_bucket(filename: str) -> str:
    normalized = filename.lower().replace("\\", "/")

    if normalized == CONFIG_PATH.lower() or "/config/" in normalized or normalized.endswith((".yml", ".yaml", ".ini", ".env", ".xml", ".json")):
        return "Configuracion del sistema"
    if normalized.endswith(".sql"):
        return "Base de datos"
    if "/controller/" in normalized:
        return "Flujos de negocio"
    if "/entity/" in normalized or "/repository/" in normalized or "/clases/" in normalized:
        return "Logica de negocio y datos"
    if "/resources/views/" in normalized or normalized.endswith((".twig", ".html", ".css", ".scss", ".sass")):
        return "Pantallas y experiencia visual"
    if normalized.endswith((".js", ".ts", ".jsx", ".tsx", ".vue")):
        return "Comportamiento de interfaz"
    if normalized.startswith("web/") and normalized.endswith((".png", ".jpg", ".jpeg", ".gif", ".svg", ".ico", ".pdf", ".xlsx", ".csv", ".mp3", ".wav")):
        return "Recursos y documentos"
    if normalized.endswith(".php"):
        return "Codigo de aplicacion"

    return "Otros cambios"


def bucket_sort_key(name: str) -> tuple[int, int | str]:
    if name in BUCKET_PRIORITY:
        return (0, BUCKET_PRIORITY.index(name))
    return (1, name.lower())


def parse_name_status(line: str) -> tuple[str, str] | None:
    parts = line.split("\t")
    if len(parts) < 2:
        return None

    status_token = parts[0].strip()
    if not status_token:
        return None

    status_code = status_token[0].upper()
    if status_code == "R":
        if len(parts) < 3:
            return None
        filename = parts[2].strip()
    else:
        filename = parts[1].strip()

    if not filename:
        return None

    return status_code, filename


def list_config_history() -> list[str]:
    output = git("log", "--format=%H", "--", CONFIG_PATH, allow_fail=True)
    return [line.strip() for line in output.splitlines() if line.strip()]


def is_ancestor(ancestor_sha: str, descendant_sha: str) -> bool:
    result = subprocess.run(
        ["git", "merge-base", "--is-ancestor", ancestor_sha, descendant_sha],
        text=True,
        capture_output=True,
    )
    return result.returncode == 0


def resolve_range(previous_cut_sha: str, end_sha: str) -> tuple[str, str]:
    if previous_cut_sha and is_ancestor(previous_cut_sha, end_sha):
        return previous_cut_sha, f"{previous_cut_sha}..{end_sha}"

    parent_sha = git("rev-parse", f"{end_sha}^", allow_fail=True)
    if parent_sha:
        return parent_sha, f"{parent_sha}..{end_sha}"

    return "", end_sha


def list_changed_files(start_sha: str, end_sha: str, range_spec: str) -> list[dict]:
    if start_sha:
        output = git("diff", "--name-status", "--find-renames", range_spec, allow_fail=True)
    else:
        output = git("show", "--name-status", "--pretty=format:", "--find-renames", end_sha, allow_fail=True)

    rows: list[dict] = []
    for line in output.splitlines():
        parsed = parse_name_status(line)
        if not parsed:
            continue

        status_code, filename = parsed
        normalized = filename.replace("\\", "/")

        if normalized == CHANGELOG_PATH:
            continue

        bucket = human_bucket(normalized)
        rows.append(
            {
                "status": STATUS_SYMBOL.get(status_code, status_code),
                "filename": normalized,
                "bucket": bucket,
            }
        )

    rows.sort(key=lambda item: (bucket_sort_key(item["bucket"]), item["filename"]))
    return rows


def is_noise_line(line: str) -> bool:
    stripped = line.strip()
    if not stripped:
        return True
    if stripped in {"{", "}", "};", "(", ")", "[", "]"}:
        return True
    if stripped.startswith(("//", "#", "*", "/*", "*/", "use ", "namespace ")):
        return True
    return False


def is_textual_for_analysis(path: str) -> bool:
    normalized = path.lower().replace("\\", "/")
    _, _, ext = normalized.rpartition(".")
    if not ext:
        return False

    return f".{ext}" in {
        ".php",
        ".twig",
        ".yml",
        ".yaml",
        ".sql",
        ".js",
        ".ts",
        ".json",
        ".xml",
        ".ini",
        ".txt",
        ".md",
        ".html",
        ".css",
    }


def collect_patch(start_sha: str, end_sha: str, range_spec: str) -> str:
    if start_sha:
        return git("diff", "--unified=0", "--find-renames", range_spec, allow_fail=True)
    return git("show", "--pretty=format:", "--unified=0", "--find-renames", end_sha, allow_fail=True)


def parse_patch_by_file(patch_text: str) -> list[dict]:
    diffs: list[dict] = []
    current: dict | None = None

    for raw_line in patch_text.splitlines():
        line = raw_line.rstrip("\n")

        if line.startswith("diff --git "):
            if current and current.get("analyze") and current.get("file"):
                if current.get("added") or current.get("removed"):
                    diffs.append(current)

            match = re.match(r"^diff --git a/(.+?) b/(.+)$", line)
            file_path = match.group(2) if match else ""
            current = {
                "file": file_path,
                "analyze": is_textual_for_analysis(file_path),
                "added": [],
                "removed": [],
            }
            continue

        if current is None:
            continue

        if line.startswith("+++ b/"):
            file_path = line[6:].strip()
            current["file"] = file_path
            current["analyze"] = is_textual_for_analysis(file_path)
            continue

        if not current.get("analyze"):
            continue

        if line.startswith("+") and not line.startswith("+++"):
            value = line[1:].strip()
            if value and len(current["added"]) < MAX_PATCH_LINES_PER_FILE:
                current["added"].append(value)
        elif line.startswith("-") and not line.startswith("---"):
            value = line[1:].strip()
            if value and len(current["removed"]) < MAX_PATCH_LINES_PER_FILE:
                current["removed"].append(value)

    if current and current.get("analyze") and current.get("file"):
        if current.get("added") or current.get("removed"):
            diffs.append(current)

    return diffs[:MAX_ANALYSIS_FILES]


def line_has_condition_signal(line: str) -> bool:
    wrapped = f" {line.lower()} "
    return any(token in wrapped for token in CONDITION_KEYWORDS)


def first_condition_line(lines: list[str]) -> str:
    for line in lines:
        if is_noise_line(line):
            continue
        if line_has_condition_signal(line):
            return line
    return ""


def first_sql_line(lines: list[str]) -> str:
    for line in lines:
        if is_noise_line(line):
            continue
        lowered = line.lower()
        if "select " in lowered or " from " in lowered or " where " in lowered or " join " in lowered:
            return line
    return ""


def extract_method_names(lines: list[str]) -> set[str]:
    methods: set[str] = set()
    for line in lines:
        match = METHOD_RE.search(line)
        if not match:
            continue
        name = match.group(1)
        if name:
            methods.add(name)
    return methods


def subject_is_version_update(subject: str) -> bool:
    lowered = subject.lower()
    if lowered.startswith("docs(changelog):"):
        return True
    return "version_numero" in lowered or "version_fecha" in lowered


def list_commits(start_sha: str, end_sha: str, range_spec: str) -> list[dict]:
    pretty = "%H%x1f%an%x1f%ae%x1f%ad%x1f%s"

    if start_sha:
        output = git(
            "log",
            "--reverse",
            "--date=format:%Y-%m-%d %H:%M",
            f"--pretty=format:{pretty}",
            range_spec,
            allow_fail=True,
        )
    else:
        output = git(
            "log",
            "--reverse",
            "-1",
            "--date=format:%Y-%m-%d %H:%M",
            f"--pretty=format:{pretty}",
            end_sha,
            allow_fail=True,
        )

    rows: list[dict] = []
    for line in output.splitlines():
        parts = line.split("\x1f")
        if len(parts) != 5:
            continue

        sha, author, email, date_text, subject = [part.strip() for part in parts]
        if not subject:
            continue
        if subject_is_version_update(subject):
            continue

        lowered = subject.lower()
        if "changelog-bot" in email.lower():
            continue

        is_merge = lowered.startswith("merge pull request") or lowered.startswith("merge branch")
        rows.append(
            {
                "sha": sha,
                "author": author,
                "date": date_text,
                "subject": subject,
                "is_merge": is_merge,
            }
        )

    non_merge = [row for row in rows if not row["is_merge"]]
    filtered = non_merge if non_merge else rows

    deduped: list[dict] = []
    seen_subjects: set[str] = set()

    for row in filtered:
        key = row["subject"].lower()
        if key in seen_subjects:
            continue
        seen_subjects.add(key)
        deduped.append(row)

    return deduped


def build_bucket_map(files: list[dict]) -> dict[str, list[str]]:
    bucket_map: dict[str, list[str]] = {}
    for row in files:
        bucket_map.setdefault(row["bucket"], []).append(row["filename"])
    return bucket_map


def add_unique(lines: list[str], text: str) -> None:
    if text not in lines:
        lines.append(text)


def build_impact_insights(files: list[dict], commits: list[dict], patch_diffs: list[dict]) -> list[str]:
    insights: list[str] = []
    bucket_map = build_bucket_map(files)

    if bucket_map:
        counts = sorted(((name, len(paths)) for name, paths in bucket_map.items()), key=lambda item: item[1], reverse=True)
        if len(counts) >= 2:
            add_unique(
                insights,
                f"El mayor impacto cae en {counts[0][0]} ({counts[0][1]} archivos) y {counts[1][0]} ({counts[1][1]} archivos).",
            )
        else:
            add_unique(insights, f"El impacto principal cae en {counts[0][0]} ({counts[0][1]} archivos).")

    if any(row["filename"] == CONFIG_PATH for row in files):
        add_unique(
            insights,
            "Se actualizo la version en app/config/config.yml; este corte consolida todos los cambios acumulados desde el corte anterior.",
        )

    for diff in patch_diffs:
        if len(insights) >= MAX_IMPACT_ITEMS:
            break

        removed_cond = first_condition_line(diff.get("removed", []))
        added_cond = first_condition_line(diff.get("added", []))

        if not removed_cond or not added_cond:
            continue

        if normalize_spaces(removed_cond).lower() == normalize_spaces(added_cond).lower():
            continue

        old_fragment = shorten_fragment(removed_cond)
        new_fragment = shorten_fragment(added_cond)
        add_unique(
            insights,
            (
                f"En `{diff['file']}` se ajusto una regla de validacion/filtro: "
                f"`{old_fragment}` -> `{new_fragment}`. Esto cambia que casos pasan o se bloquean."
            ),
        )

    if len(insights) < MAX_IMPACT_ITEMS:
        for diff in patch_diffs:
            if len(insights) >= MAX_IMPACT_ITEMS:
                break
            removed_sql = first_sql_line(diff.get("removed", []))
            added_sql = first_sql_line(diff.get("added", []))
            if not removed_sql or not added_sql:
                continue
            if normalize_spaces(removed_sql).lower() == normalize_spaces(added_sql).lower():
                continue

            add_unique(
                insights,
                (
                    f"En `{diff['file']}` hubo ajuste de consulta de datos: "
                    f"`{shorten_fragment(removed_sql)}` -> `{shorten_fragment(added_sql)}`. "
                    "Esto puede cambiar resultados, validaciones o conciliaciones."
                ),
            )

    interface_files = [
        row["filename"] for row in files if "/interfaces/" in row["filename"].lower().replace("\\", "/")
    ]
    if interface_files and len(insights) < MAX_IMPACT_ITEMS:
        add_unique(
            insights,
            f"Se modifico el contrato tecnico `{interface_files[0]}`; revisar consistencia en modulos que lo implementan.",
        )

    if not insights:
        add_unique(
            insights,
            "No se detecta un cambio funcional critico en el diff; el corte parece centrado en mantenimiento o refactor.",
        )

    return insights[:MAX_IMPACT_ITEMS]


def build_feature_highlights(files: list[dict], commits: list[dict], patch_diffs: list[dict]) -> list[str]:
    highlights: list[str] = []

    for commit in commits:
        subject = commit["subject"]
        lowered = subject.lower()
        if any(keyword in lowered for keyword in FEATURE_COMMIT_KEYWORDS):
            add_unique(highlights, f"Commit con foco funcional: {subject}.")
        if len(highlights) >= MAX_FEATURE_ITEMS:
            break

    if len(highlights) < MAX_FEATURE_ITEMS:
        for diff in patch_diffs:
            lowered_path = diff["file"].lower().replace("\\", "/")
            if not lowered_path.endswith(".php"):
                continue

            removed_methods = extract_method_names(diff.get("removed", []))
            added_methods = extract_method_names(diff.get("added", []))

            fresh_methods = [name for name in sorted(added_methods) if name not in removed_methods and name != "__construct"]
            if not fresh_methods:
                continue

            method_name = fresh_methods[0]
            if "/controller/" in lowered_path:
                text = f"Posible nuevo flujo en `{diff['file']}`: se detecta la funcion `{method_name}()` en el diff."
            else:
                text = f"Posible capacidad nueva en `{diff['file']}`: se detecta la funcion `{method_name}()` en el diff."

            add_unique(highlights, text)
            if len(highlights) >= MAX_FEATURE_ITEMS:
                break

    if len(highlights) < MAX_FEATURE_ITEMS:
        new_files = [row for row in files if row["status"] == "A" and row["filename"] != CHANGELOG_PATH]
        if new_files:
            add_unique(
                highlights,
                f"Se agregaron {len(new_files)} archivos nuevos en este corte, lo que sugiere ampliacion de funcionalidad.",
            )

    if not highlights:
        highlights.append(
            "No se observa una funcionalidad completamente nueva; predominan mejoras o correcciones sobre funciones existentes."
        )

    return highlights[:MAX_FEATURE_ITEMS]


def build_entry(
    marker: str,
    version_number: str,
    version_date: str,
    actor: str,
    start_sha: str,
    end_sha: str,
    compare_url: str,
    files: list[dict],
    commits: list[dict],
    impact_insights: list[str],
    feature_highlights: list[str],
) -> str:
    bucket_map = build_bucket_map(files)

    lines: list[str] = [marker, f"## Version {version_number}", ""]
    lines.append(f"**Fecha:** {version_date}")
    lines.append(f"**Autor del corte:** @{actor}" if actor else "**Autor del corte:** N/D")
    lines.append(f"**RangoGit:** {short_sha(start_sha)}..{short_sha(end_sha)}")
    lines.append(f"**Commit corte:** {end_sha}")
    if compare_url:
        lines.append(f"**Comparacion:** {compare_url}")

    lines.extend(
        [
            "",
            "**Resumen para el equipo:**",
            "- Este corte incluye todo lo integrado desde el ultimo cambio en app/config/config.yml.",
            f"- Commits considerados: {len(commits)}",
            f"- Archivos con cambios: {len(files)}",
            "",
            "**Impacto en el sistema (estimado):**",
        ]
    )

    for item in impact_insights:
        lines.append(f"- {item}")

    lines.extend(["", "**Nuevas funcionalidades o mejoras visibles:**"])
    for item in feature_highlights:
        lines.append(f"- {item}")

    lines.extend(["", "**Cambios por area:**"])

    if bucket_map:
        for bucket_name in sorted(bucket_map.keys(), key=bucket_sort_key):
            file_count = len(bucket_map[bucket_name])
            label = "archivo" if file_count == 1 else "archivos"
            lines.append(f"- {bucket_name}: {file_count} {label}.")
    else:
        lines.append("- No se detectaron archivos en el rango.")

    lines.extend(["", "**Archivos destacados:**"])

    if files:
        displayed_files = files[:MAX_FILES_IN_ENTRY]
        for row in displayed_files:
            lines.append(f"- [{row['status']}] [{row['bucket']}] {row['filename']}")

        remaining_files = len(files) - len(displayed_files)
        if remaining_files > 0:
            lines.append(f"- (+{remaining_files} archivos adicionales)")
    else:
        lines.append("- Sin archivos para mostrar.")

    lines.extend(["", "**Commits incluidos (referencia):**"])

    if commits:
        displayed_commits = commits[:MAX_COMMITS_IN_ENTRY]
        for row in displayed_commits:
            lines.append(f"- {row['subject']} ({row['author']}, {row['date']})")

        remaining_commits = len(commits) - len(displayed_commits)
        if remaining_commits > 0:
            lines.append(f"- (+{remaining_commits} commits adicionales)")
    else:
        lines.append("- Sin commits para mostrar.")

    lines.extend(["", "---", ""])
    return "\n".join(lines)


def current_actor(end_sha: str) -> str:
    actor = os.environ.get("GITHUB_ACTOR", "").strip()
    if actor:
        return actor
    return git("show", "-s", "--format=%an", end_sha, allow_fail=True)


def compare_url(start_sha: str, end_sha: str) -> str:
    server = os.environ.get("GITHUB_SERVER_URL", "").strip()
    repo = os.environ.get("GITHUB_REPOSITORY", "").strip()

    if not server or not repo:
        return ""

    base = f"{server}/{repo}"
    if start_sha:
        return f"{base}/compare/{start_sha}...{end_sha}"
    return f"{base}/commit/{end_sha}"


def main() -> None:
    end_sha_env = os.environ.get("GITHUB_SHA", "").strip()
    end_sha = git("rev-parse", end_sha_env or "HEAD")

    config_text = git("show", f"{end_sha}:{CONFIG_PATH}", allow_fail=True)
    if not config_text:
        config_text = read_text(CONFIG_PATH)

    version_number, version_date = extract_version_from_config(config_text)
    if not version_number:
        version_number = "version-no-detectada"
    if not version_date:
        version_date = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M UTC")

    config_history = list_config_history()
    previous_cut_sha = config_history[1] if len(config_history) > 1 else ""

    start_sha, range_spec = resolve_range(previous_cut_sha, end_sha)

    files = list_changed_files(start_sha, end_sha, range_spec)
    commits = list_commits(start_sha, end_sha, range_spec)
    patch_text = collect_patch(start_sha, end_sha, range_spec)
    patch_diffs = parse_patch_by_file(patch_text)

    actor = current_actor(end_sha)
    url = compare_url(start_sha, end_sha)
    marker = f"<!-- changelog-cut:{end_sha} -->"

    impact_insights = build_impact_insights(files, commits, patch_diffs)
    feature_highlights = build_feature_highlights(files, commits, patch_diffs)

    try:
        existing = read_text(CHANGELOG_PATH)
    except FileNotFoundError:
        existing = ""

    if marker in existing:
        print(f"Entry already exists for cut {end_sha}.")
        return

    entry = build_entry(
        marker=marker,
        version_number=version_number,
        version_date=version_date,
        actor=actor,
        start_sha=start_sha,
        end_sha=end_sha,
        compare_url=url,
        files=files,
        commits=commits,
        impact_insights=impact_insights,
        feature_highlights=feature_highlights,
    )

    dry_run = os.environ.get("DRY_RUN", "").strip() == "1"
    if dry_run:
        print(entry)
        return

    if existing:
        write_text(CHANGELOG_PATH, entry + existing.lstrip())
    else:
        write_text(CHANGELOG_PATH, entry)

    print(
        "Changelog actualizado "
        f"(version {version_number}, rango {short_sha(start_sha)}..{short_sha(end_sha)})."
    )


if __name__ == "__main__":
    main()