import re
from typing import Any, Optional
from urllib.parse import urlsplit, urlunsplit

_SCHEMES = {"http", "https"}
_LABEL_RE = re.compile(r"^[A-Za-z0-9](?:[A-Za-z0-9-]{0,61}[A-Za-z0-9])?$")
_TLD_RE   = re.compile(r"^[A-Za-z]{2,63}$")

def _is_valid_host(host: str) -> bool:
    if not host or len(host) > 253:
        return False
    if host.endswith("."):
        host = host[:-1]
    parts = host.split(".")
    if len(parts) < 2:
        return False
    if not all(_LABEL_RE.match(p) for p in parts):
        return False
    if not _TLD_RE.match(parts[-1]):
        return False
    return True

def _normalize_host(host: str) -> str:
    host = host.strip().strip(".").lower()
    host = re.sub(r"\.{2,}", ".", host)  # colapsa '..' -> '.'
    return host

def normalizar_url_para_insert(texto: Any) -> Optional[str]:
    """
    Devuelve una URL normalizada lista para insertar en BD.
    Si no es válida, devuelve None (-> SQL NULL).
    """
    if texto is None:
        return None

    s = str(texto).strip()
    if not s:
        return None

    # Correcciones ligeras
    s = s.replace(",", ".")
    s = re.sub(r"\s+", "", s)

    # Añadir esquema por defecto si falta
    candidate = s if "://" in s else f"http://{s}"

    sp = urlsplit(candidate)
    scheme = (sp.scheme or "http").lower()
    netloc, path = sp.netloc, sp.path

    # Caso: dominio sin esquema puede quedar en path
    if not netloc and path and "." in path and "/" not in path:
        netloc, path = path, ""

    # Separar userinfo/host:port (no soportamos IPv6 con corchetes aquí)
    hostport = netloc.rsplit("@", 1)[-1]
    host, port = hostport, ""
    if ":" in hostport:
        h, p = hostport.rsplit(":", 1)
        if h and p.isdigit():
            host, port = h, p

    host = _normalize_host(host)

    # Validaciones
    if scheme not in _SCHEMES:
        return None
    if not _is_valid_host(host):
        return None
    if port:
        try:
            pi = int(port)
            if not (1 <= pi <= 65535):
                return None
        except ValueError:
            return None

    # Reconstrucción netloc (preservando userinfo si existía)
    userinfo = netloc[:-len(hostport)] if netloc.endswith(hostport) else ""
    new_netloc = f"{userinfo}{host}{(':' + port) if port else ''}"

    fixed = urlunsplit((scheme, new_netloc, path or "", sp.query, sp.fragment))
    return fixed