"""
Scraper du site officiel de Bessèges.
Cherche associations, PV, actualités, élus.
"""

import httpx
import logging
from bs4 import BeautifulSoup

BASE_URL = "https://www.besseges.fr"
TIMEOUT = 8
HEADERS = {
    "User-Agent": (
        "Mozilla/5.0 (compatible; ICAC/1.0; "
        "mairie-besseges-bot)"
    )
}

logger = logging.getLogger("icac.scraper_besseges")


async def search_association(name: str) -> dict:
    """
    Recherche une association sur besseges.fr.
    Retourne : texte, url, photo_url si trouvée.
    """
    result = {
        "found":     False,
        "name":      name,
        "text":      "",
        "url":       "",
        "photo_url": None,
        "source":    "besseges.fr"
    }

    # Pages à scanner
    urls_to_scan = [
        f"{BASE_URL}/associations/",
        f"{BASE_URL}/vie-associative/",
        f"{BASE_URL}/associations-et-loisirs/",
        f"{BASE_URL}/?s={name.replace(' ', '+')}",
    ]

    async with httpx.AsyncClient(
        headers=HEADERS,
        timeout=TIMEOUT,
        follow_redirects=True
    ) as client:

        for url in urls_to_scan:
            try:
                resp = await client.get(url)
                if resp.status_code != 200:
                    continue
                soup = BeautifulSoup(resp.text, "html.parser")

                # Chercher le nom de l'association dans la page
                text_content = soup.get_text(" ", strip=True)
                name_lower = name.lower()

                if name_lower not in text_content.lower():
                    continue

                # Trouvé sur cette page — extraire le contexte
                result["found"] = True
                result["url"] = url

                # Extraire paragraphe contenant le nom
                for tag in soup.find_all(
                    ["p", "div", "article", "section"]
                ):
                    if name_lower in tag.get_text().lower():
                        text = tag.get_text(" ", strip=True)
                        if len(text) > 50:
                            result["text"] = text[:2000]
                            break

                # Chercher une image associée
                photo = _find_photo_near_text(soup, name_lower)
                if photo:
                    result["photo_url"] = photo

                # Chercher la page dédiée de l'asso
                asso_link = _find_association_link(
                    soup, name_lower, BASE_URL
                )
                if asso_link:
                    detail = await _fetch_detail_page(
                        client, asso_link, name_lower
                    )
                    if detail["text"]:
                        result["text"] = detail["text"]
                    if detail["photo_url"]:
                        result["photo_url"] = detail["photo_url"]
                    result["url"] = asso_link

                break  # arrêter au premier résultat

            except Exception as e:
                logger.warning(f"Erreur scraping {url}: {e}")
                continue

    return result


async def _fetch_detail_page(
    client, url: str, name_lower: str
) -> dict:
    result = {"text": "", "photo_url": None}
    try:
        resp = await client.get(url)
        if resp.status_code != 200:
            return result
        soup = BeautifulSoup(resp.text, "html.parser")
        result["text"] = soup.get_text(" ", strip=True)[:3000]
        result["photo_url"] = _find_photo_near_text(
            soup, name_lower
        )
    except Exception as e:
        logger.warning(f"Erreur page détail {url}: {e}")
    return result


def _find_photo_near_text(soup, name_lower: str) -> str | None:
    """
    Cherche une image dans le voisinage du texte contenant le nom.
    Stratégie : img dans le même article/section/div parent.
    """
    for tag in soup.find_all(
        ["p", "div", "article", "section", "li"]
    ):
        if name_lower not in tag.get_text().lower():
            continue

        # Chercher img dans le parent et ses enfants
        parent = tag.parent or tag
        for img in parent.find_all("img"):
            src = img.get("src", "")
            if not src:
                continue
            # Exclure logos, icônes, pictos
            src_lower = src.lower()
            if any(x in src_lower for x in [
                "logo", "icon", "sprite", "arrow",
                "btn", "bullet", "separator", "1x1"
            ]):
                continue
            # Préférer les images de taille raisonnable
            width = img.get("width", "0")
            height = img.get("height", "0")
            try:
                if int(width) < 50 or int(height) < 50:
                    continue
            except (ValueError, TypeError):
                pass
            # Construire URL absolue
            if src.startswith("http"):
                return src
            elif src.startswith("/"):
                return f"https://www.besseges.fr{src}"
            else:
                return f"https://www.besseges.fr/{src}"
    return None


def _find_association_link(
    soup, name_lower: str, base_url: str
) -> str | None:
    """
    Cherche un lien vers la page dédiée de l'association.
    """
    for a in soup.find_all("a", href=True):
        link_text = a.get_text().lower()
        href = a["href"]
        if name_lower in link_text or \
           name_lower.split()[0] in link_text:
            if href.startswith("http"):
                return href
            elif href.startswith("/"):
                return f"{base_url}{href}"
    return None


async def list_associations() -> list:
    """
    Liste toutes les associations sur besseges.fr.
    """
    results = []
    urls_to_scan = [
        f"{BASE_URL}/associations/",
        f"{BASE_URL}/vie-associative/",
        f"{BASE_URL}/associations-et-loisirs/",
    ]
    async with httpx.AsyncClient(
        headers=HEADERS,
        timeout=TIMEOUT,
        follow_redirects=True
    ) as client:
        for url in urls_to_scan:
            try:
                resp = await client.get(url)
                if resp.status_code != 200:
                    continue
                soup = BeautifulSoup(resp.text, "html.parser")
                for a in soup.find_all("a", href=True):
                    text = a.get_text(strip=True)
                    if len(text) > 5 and len(text) < 80:
                        results.append({
                            "nom": text,
                            "url": a["href"]
                        })
            except Exception as e:
                logger.warning(f"list_associations {url}: {e}")
    return results


async def scrape_page(url: str) -> str:
    """
    Scrape générique d'une URL besseges.fr.
    Retourne le texte principal nettoyé.
    """
    try:
        async with httpx.AsyncClient(
            headers=HEADERS,
            timeout=TIMEOUT,
            follow_redirects=True
        ) as client:
            resp = await client.get(url)
            if resp.status_code != 200:
                return ""
            soup = BeautifulSoup(resp.text, "html.parser")
            # Supprimer nav, header, footer, scripts
            for tag in soup(
                ["nav", "header", "footer",
                 "script", "style", "aside"]
            ):
                tag.decompose()
            return soup.get_text(" ", strip=True)[:4000]
    except Exception as e:
        logger.error(f"scrape_page {url}: {e}")
        return ""