#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
[CYBER-STRAT] Client Wikidata — donnees structurees
API gratuite, zero cle requise
Agent responsable : DATA_SCRAPER
"""

import urllib.request
import urllib.parse
import json


WIKIDATA_API = "https://www.wikidata.org/w/api.php"
FETCH_TIMEOUT = 5


def _log(level, message):
    """Log prefixe pour le module Wikidata"""
    print(f"[CYBER-STRAT][WIKIDATA][{level}] {message}")


class WikidataClient:
    """Client pour l'API Wikidata — donnees structurees
    Fournit nationalite, profession, dates, description
    pour enrichir les resultats du pipeline multi-sources
    """

    # Proprietes Wikidata courantes pour l'intelligence geopolitique
    PROPS = {
        "P31": "instance_of",
        "P27": "nationality",
        "P106": "occupation",
        "P569": "birth_date",
        "P570": "death_date",
        "P19": "birth_place",
        "P20": "death_place",
        "P108": "employer",
        "P39": "position",
        "P17": "country",
        "P36": "capital",
        "P30": "continent",
        "P1082": "population",
        "P571": "inception",
        "P159": "headquarters",
    }

    def get_structured_data(self, query, lang="fr"):
        """Recuperer les donnees structurees pour une entite
        Point d'entree principal du client
        Retourne None si entite non trouvee
        """
        entity_id = self._search_entity(query, lang)
        if not entity_id:
            _log("WARN", f"Entite non trouvee: '{query}'")
            return None

        data = self._get_entity(entity_id, lang)
        if data:
            _log("INFO", f"Donnees recuperees pour '{query}' ({entity_id})")
        return data

    def _search_entity(self, query, lang="fr"):
        """Rechercher une entite Wikidata par nom
        Retourne le premier entity_id (ex: Q7747) ou None
        """
        params = urllib.parse.urlencode({
            "action": "wbsearchentities",
            "search": query,
            "language": lang,
            "format": "json",
            "limit": 1
        })
        url = f"{WIKIDATA_API}?{params}"

        result = self._fetch(url)
        if not result or not result.get("search"):
            return None

        return result["search"][0].get("id")

    def _get_entity(self, entity_id, lang="fr"):
        """Recuperer les details d'une entite par son ID Wikidata"""
        params = urllib.parse.urlencode({
            "action": "wbgetentities",
            "ids": entity_id,
            "languages": f"{lang}|en",
            "format": "json",
            "props": "descriptions|claims|labels"
        })
        url = f"{WIKIDATA_API}?{params}"

        result = self._fetch(url)
        if not result or "entities" not in result:
            return None

        entity = result["entities"].get(entity_id, {})

        # Extraire le label (nom affichable)
        labels = entity.get("labels", {})
        label = (
            labels.get(lang, {}).get("value")
            or labels.get("en", {}).get("value")
            or ""
        )

        # Extraire la description
        descriptions = entity.get("descriptions", {})
        description = (
            descriptions.get(lang, {}).get("value")
            or descriptions.get("en", {}).get("value")
            or ""
        )

        # Extraire les proprietes structurees
        claims = entity.get("claims", {})
        structured = {}
        for prop_id, prop_name in self.PROPS.items():
            if prop_id in claims:
                values = self._extract_claim_values(claims[prop_id])
                if values:
                    structured[prop_name] = values

        return {
            "wikidata_id": entity_id,
            "label": label,
            "description": description,
            "properties": structured
        }

    def _extract_claim_values(self, claims):
        """Extraire les valeurs d'une propriete Wikidata
        Gere les types : entity-id, string, time, quantity
        Max 3 valeurs par propriete
        """
        values = []
        for claim in claims[:3]:
            mainsnak = claim.get("mainsnak", {})
            datavalue = mainsnak.get("datavalue", {})
            vtype = datavalue.get("type", "")

            if vtype == "wikibase-entityid":
                # Entite liee — on stocke l'ID (Q...)
                values.append(datavalue["value"].get("id", ""))

            elif vtype == "string":
                values.append(datavalue["value"])

            elif vtype == "time":
                # Format Wikidata : +YYYY-MM-DDT00:00:00Z
                time_val = datavalue["value"].get("time", "")
                if time_val:
                    year = time_val.lstrip("+").split("-")[0]
                    values.append(year)

            elif vtype == "quantity":
                amount = datavalue["value"].get("amount", "")
                # Retirer le + initial si present
                values.append(amount.lstrip("+"))

        return values if values else None

    def _fetch(self, url):
        """Requete HTTP GET vers l'API Wikidata
        Timeout 5 secondes, gestion erreurs propre
        """
        try:
            req = urllib.request.Request(url, headers={
                "User-Agent": "CyberStrat/1.0 (console-intelligence-geopolitique)"
            })
            with urllib.request.urlopen(req, timeout=FETCH_TIMEOUT) as resp:
                return json.loads(resp.read().decode("utf-8"))
        except Exception as e:
            _log("ERROR", f"Echec requete Wikidata: {e}")
            return None
