#!/usr/bin/env python3
"""Tests pour CORRECTION 1 : deduplication par domaine + compensation + media."""
import sys
import os
import unittest
from unittest.mock import patch, MagicMock

# Ajouter agents_python au path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "agents_python"))


class TestDeduplicateByDomain(unittest.TestCase):
    """Tests pour _deduplicate_by_domain."""

    def setUp(self):
        from media_fetcher import _deduplicate_by_domain
        self.dedup = _deduplicate_by_domain

    def test_no_duplicates(self):
        results = [
            {"url": "https://lemonde.fr/article1", "title": "A"},
            {"url": "https://lefigaro.fr/article2", "title": "B"},
            {"url": "https://liberation.fr/article3", "title": "C"},
        ]
        deduped, removed = self.dedup(results)
        self.assertEqual(len(deduped), 3)
        self.assertEqual(removed, 0)

    def test_five_same_domain(self):
        """Test 1 du prompt : 5 resultats tv5monde.com."""
        results = [
            {"url": f"https://information.tv5monde.com/page{i}", "title": f"T{i}"}
            for i in range(5)
        ] + [
            {"url": "https://pascalherard.fr/", "title": "Site perso"},
        ]
        deduped, removed = self.dedup(results)
        # Max 2 par domaine
        tv5_count = sum(
            1 for r in deduped
            if "tv5monde.com" in r["url"])
        self.assertEqual(tv5_count, 2)
        self.assertEqual(removed, 3)
        # Le site perso est conserve
        self.assertTrue(any("pascalherard" in r["url"] for r in deduped))

    def test_preserves_order(self):
        results = [
            {"url": "https://a.com/1", "title": "first"},
            {"url": "https://a.com/2", "title": "second"},
            {"url": "https://a.com/3", "title": "third"},
            {"url": "https://b.com/1", "title": "other"},
        ]
        deduped, _ = self.dedup(results)
        self.assertEqual(deduped[0]["title"], "first")
        self.assertEqual(deduped[1]["title"], "second")
        self.assertEqual(deduped[2]["title"], "other")

    def test_custom_max(self):
        results = [
            {"url": f"https://site.com/p{i}", "title": f"T{i}"}
            for i in range(5)
        ]
        deduped, removed = self.dedup(results, max_per_domain=3)
        self.assertEqual(len(deduped), 3)
        self.assertEqual(removed, 2)

    def test_www_stripped(self):
        results = [
            {"url": "https://www.lemonde.fr/a", "title": "A"},
            {"url": "https://lemonde.fr/b", "title": "B"},
            {"url": "https://www.lemonde.fr/c", "title": "C"},
        ]
        deduped, removed = self.dedup(results)
        self.assertEqual(len(deduped), 2)
        self.assertEqual(removed, 1)


class TestGetDominantDomain(unittest.TestCase):
    """Tests pour _get_dominant_domain."""

    def setUp(self):
        from media_fetcher import _get_dominant_domain
        self.get_dom = _get_dominant_domain

    def test_clear_dominant(self):
        results = [
            {"url": "https://tv5monde.com/p1"},
            {"url": "https://tv5monde.com/p2"},
            {"url": "https://tv5monde.com/p3"},
            {"url": "https://lemonde.fr/a"},
        ]
        self.assertEqual(self.get_dom(results), "tv5monde.com")

    def test_empty(self):
        self.assertEqual(self.get_dom([]), "")


class TestSearchWithCompensation(unittest.TestCase):
    """Test 1+2 du prompt : compensation quand doublons supprimes."""

    @patch("media_fetcher._serper_search")
    def test_compensation_triggered(self, mock_search):
        from media_fetcher import _search_with_compensation

        initial = [
            {"url": f"https://information.tv5monde.com/page{i}",
             "title": f"TV5 {i}", "snippet": f"S{i}"}
            for i in range(5)
        ] + [
            {"url": "https://pascalherard.fr/", "title": "Site perso",
             "snippet": "Formation"},
            {"url": "https://radiofrance.fr/epis", "title": "Radio",
             "snippet": "Interview"},
        ]

        # La requete de compensation retourne des sources variees
        mock_search.return_value = [
            {"url": "https://franceculture.fr/pascal", "title": "FC",
             "snippet": "Interview"},
            {"url": "https://filsdu.net/pascal", "title": "Fils",
             "snippet": "Article"},
            {"url": "https://scani.fr/pascal", "title": "SCANI",
             "snippet": "Candidature"},
        ]

        results, dominant = _search_with_compensation(
            "Pascal Herard", initial)

        # La compensation a ete declenchee
        mock_search.assert_called_once()
        # Le domaine dominant est tv5monde
        self.assertIn("tv5monde", dominant)
        # Max 2 par domaine
        domain_counts = {}
        for r in results:
            d = r["url"].split("/")[2].replace("www.", "")
            domain_counts[d] = domain_counts.get(d, 0) + 1
        for d, c in domain_counts.items():
            self.assertLessEqual(c, 2, f"Domaine {d} a {c} resultats")
        # Des sources variees
        self.assertGreaterEqual(len(results), 5)

    @patch("media_fetcher._serper_search")
    def test_no_compensation_needed(self, mock_search):
        from media_fetcher import _search_with_compensation

        initial = [
            {"url": f"https://site{i}.com/p", "title": f"T{i}",
             "snippet": f"S{i}"}
            for i in range(8)
        ]

        results, _ = _search_with_compensation("Test Person", initial)
        # Pas de compensation car 0 doublons
        mock_search.assert_not_called()
        self.assertEqual(len(results), 8)


class TestHasPressFootprint(unittest.TestCase):
    """Tests pour _has_press_footprint."""

    def setUp(self):
        from media_fetcher import _has_press_footprint
        self.has_press = _has_press_footprint

    def test_tv5monde_detected(self):
        results = [
            {"url": "https://information.tv5monde.com/auteur/pascal"},
        ]
        self.assertTrue(self.has_press(results))

    def test_no_press(self):
        """Test 4 : Gil Charpenet sans presse nationale."""
        results = [
            {"url": "https://wiza.co/gil-charpenet"},
            {"url": "https://mobygames.com/credits"},
            {"url": "https://demonwinner.free.fr/peintre"},
        ]
        self.assertFalse(self.has_press(results))

    def test_lemonde_detected(self):
        results = [
            {"url": "https://www.lemonde.fr/article"},
            {"url": "https://wiza.co/person"},
        ]
        self.assertTrue(self.has_press(results))


class TestSearchNationalMedia(unittest.TestCase):
    """Tests pour _search_national_media."""

    @patch("media_fetcher._serper_search")
    def test_keyword_different_from_dominant(self, mock_search):
        """Test 5 : Laurelli (lemonde dominant) → mot-cle pas lemonde."""
        from media_fetcher import _search_national_media

        mock_search.return_value = [
            {"url": "https://radiofrance.fr/laurelli", "title": "RF",
             "snippet": "Interview"},
        ]

        initial = [
            {"url": "https://lemonde.fr/article1"},
            {"url": "https://lemonde.fr/article2"},
            {"url": "https://lefigaro.fr/article3"},
            {"url": "https://franceinfo.fr/article4"},
        ]
        existing = initial[:2]

        results = _search_national_media(
            "Olivier Laurelli", initial, existing, "lemonde.fr")

        # La requete ne doit PAS utiliser "lemonde" comme mot-cle
        call_args = mock_search.call_args
        query_sent = call_args[0][0]
        self.assertNotIn("lemonde", query_sent.lower().split('"')[2])
        # Resultats non-vides
        self.assertGreater(len(results), 0)

    @patch("media_fetcher._serper_search")
    def test_tv5monde_maps_to_radiofrance(self, mock_search):
        """Test 3 : Herard (tv5monde dominant) → mot-cle radiofrance."""
        from media_fetcher import _search_national_media

        mock_search.return_value = [
            {"url": "https://franceculture.fr/herard", "title": "FC",
             "snippet": "Podcast"},
        ]

        initial = [
            {"url": "https://information.tv5monde.com/p1"},
            {"url": "https://information.tv5monde.com/p2"},
            {"url": "https://information.tv5monde.com/p3"},
        ]

        results = _search_national_media(
            "Pascal Herard", initial, [], "information.tv5monde.com")

        call_args = mock_search.call_args
        query_sent = call_args[0][0]
        # Le mot-cle doit contenir "radiofrance"
        # car tv5monde mappe vers radiofrance comme dominant
        # et les brands detectees incluent "radiofrance" (tv5monde mapping)
        # mais dominant_brand est aussi "radiofrance"
        # Therefore fallback should be used
        self.assertIn('"Pascal Herard"', query_sent)

    @patch("media_fetcher._serper_search")
    def test_no_duplicate_urls(self, mock_search):
        from media_fetcher import _search_national_media

        mock_search.return_value = [
            {"url": "https://radiofrance.fr/new", "title": "New"},
            {"url": "https://lemonde.fr/existing", "title": "Existing"},
        ]

        existing = [{"url": "https://lemonde.fr/existing"}]

        results = _search_national_media(
            "Test Person", [], existing, "lemonde.fr")

        urls = [r["url"] for r in results]
        self.assertNotIn("https://lemonde.fr/existing", urls)

    @patch("media_fetcher._serper_search")
    def test_quoted_name_in_query(self, mock_search):
        """Test 6 : guillemets autour du nom pour eviter bruit."""
        from media_fetcher import _search_national_media

        mock_search.return_value = []

        _search_national_media(
            "Antoine Champagne",
            [{"url": "https://lemonde.fr/a"}],
            [], "lemonde.fr")

        call_args = mock_search.call_args
        query_sent = call_args[0][0]
        self.assertIn('"Antoine Champagne"', query_sent)


if __name__ == "__main__":
    unittest.main()
