
    si(                         d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZmZmZ d dlmZ d dlmZ d dlmZ erd dlmZ  ej        e          Z G d	 d
e          ZdS )    N)nullcontext)TYPE_CHECKINGDictListOptionalUnion)paired_cosine_distancespaired_euclidean_distancespaired_manhattan_distances)SentenceEvaluator)InputExample)SimilarityFunction)SentenceTransformerc                        e Zd ZdZ	 	 	 	 	 	 ddee         dee         d	ee         d
eeeef                  dede	de
de
dee	         f fdZedee         fd            Z	 ddddede	de	deeef         f
dZ xZS )TripletEvaluatora  
    Evaluate a model based on a triplet: (sentence, positive_example, negative_example).
    Checks if distance(sentence, positive_example) < distance(sentence, negative_example).

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TripletEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('all-mpnet-base-v2')

            # Load a dataset with (anchor, positive, negative) triplets
            dataset = load_dataset("sentence-transformers/all-nli", "triplet", split="dev")

            # Initialize the TripletEvaluator using anchors, positives, and negatives
            triplet_evaluator = TripletEvaluator(
                anchors=dataset[:1000]["anchor"],
                positives=dataset[:1000]["positive"],
                negatives=dataset[:1000]["negative"],
                name="all-nli-dev",
            )
            results = triplet_evaluator(model)
            '''
            TripletEvaluator: Evaluating the model on the all-nli-dev dataset:
            Accuracy Cosine Distance:        95.60
            Accuracy Dot Product:            4.40
            Accuracy Manhattan Distance:     95.40
            Accuracy Euclidean Distance:     95.60
            '''
            print(triplet_evaluator.primary_metric)
            # => "all-nli-dev_max_accuracy"
            print(results[triplet_evaluator.primary_metric])
            # => 0.956
    N    FTanchors	positives	negativesmain_distance_functionname
batch_sizeshow_progress_bar	write_csvtruncate_dimc
                 r   t                                                       || _        || _        || _        || _        |	| _        t          | j                  t          | j                  k    sJ t          | j                  t          | j                  k    sJ |rt          |          nd| _	        || _
        |Nt                                          t          j        k    p&t                                          t          j        k    }|| _        d|rd|z   ndz   dz   | _        g d| _        || _        dS )a  
        Initializes a TripletEvaluator object.

        Args:
            anchors (List[str]): Sentences to check similarity to. (e.g. a query)
            positives (List[str]): List of positive sentences
            negatives (List[str]): List of negative sentences
            main_distance_function (Union[str, SimilarityFunction], optional):
                The distance function to use. If not specified, use cosine similarity,
                dot product, Euclidean, and Manhattan. Defaults to None.
            name (str): Name for the output. Defaults to "".
            batch_size (int): Batch size used to compute embeddings. Defaults to 16.
            show_progress_bar (bool): If true, prints a progress bar. Defaults to False.
            write_csv (bool): Write results to a CSV file. Defaults to True.
            truncate_dim (int, optional): The dimension to truncate sentence embeddings to.
                `None` uses the model's current truncation dimension. Defaults to None.
        Ntriplet_evaluation_r   z_results.csv)epochstepsaccuracy_cosinusaccuracy_manhattanaccuracy_euclidean)super__init__r   r   r   r   r   lenr   r   r   loggergetEffectiveLevelloggingINFODEBUGr   csv_filecsv_headersr   )selfr   r   r   r   r   r   r   r   r   	__class__s             d/var/www/icac/venv/lib/python3.11/site-packages/sentence_transformers/evaluation/TripletEvaluator.pyr&   zTripletEvaluator.__init__;   s*   : 	""	(4<  C$7$777774<  C$7$77777Tj&t&89O&P&P&Ppt#$$((**gl:if>V>V>X>X\c\i>i  "3145OS4ZZRPSaammm"    examplesc                     g }g }g }|D ]b}|                     |j        d                    |                     |j        d                    |                     |j        d                    c | |||fi |S )Nr         )appendtexts)clsr3   kwargsr   r   r   examples          r1   from_input_examplesz$TripletEvaluator.from_input_exampleso   s    		 	/ 	/GNN7=+,,,W]1-...W]1-....s7Iy;;F;;;r2   modelr   output_pathr    r!   returnc           	      |	   |dk    r|dk    rd| }nd| d| d}nd}| j         |d| j          dz  }t                              d	| j         d
| d           d}d\  }}}	}
| j         t	                      n|                    | j                   5  |                    | j        | j        | j	        d          }|                    | j
        | j        | j	        d          }|                    | j        | j        | j	        d          }d d d            n# 1 swxY w Y   t          ||          }t          ||          }t          j        ||z  d          }t          j        ||z  d          }t          ||          }t          ||          }t!          ||          }t!          ||          }t#          t%          |                    D ]c}|dz  }||         ||         k     r|dz  }||         ||         k     r|dz  }||         ||         k     r|	dz  }	||         ||         k     r|
dz  }
d||z  }||z  }|	|z  }|
|z  }t                              d                    |dz                       t                              d                    |dz                       t                              d                    |dz                       t                              d                    |dz                       || j        rt*          j                            || j                  }t*          j                            |          stt5          |ddd          5 }t7          j        |          }|                    | j                   |                    |||||g           d d d            n# 1 swxY w Y   nYt5          |ddd          5 }t7          j        |          }|                    |||||g           d d d            n# 1 swxY w Y   t>          j         dt>          j!        dt>          j"        dt>          j#        di$                    | j%        d          | _&        ||||tO          |||          d }| (                    || j                  }| )                    ||           |S )!Nr=   z after epoch z
 in epoch z after z stepsr   z (truncated to )z.TripletEvaluator: Evaluating the model on the z dataset:r   )r   r   r   r   T)r   r   convert_to_numpy)axisr5   z#Accuracy Cosine Distance:   	{:.2f}d   z#Accuracy Dot Product:       	{:.2f}z#Accuracy Manhattan Distance:	{:.2f}z$Accuracy Euclidean Distance:	{:.2f}
wzutf-8)newlinemodeencodingacosine_accuracydot_accuracyeuclidean_accuracymanhattan_accuracymax_accuracy)rL   rM   rO   rN   rP   )*r   r(   infor   r   truncate_sentence_embeddingsencoder   r   r   r   r   r	   npsumr   r
   ranger'   formatr   ospathjoinr-   isfileopencsvwriterwriterowr.   r   COSINEDOT_PRODUCT	EUCLIDEAN	MANHATTANgetr   primary_metricmaxprefix_name_to_metrics store_metrics_in_model_card_data)r/   r>   r?   r    r!   out_txtnum_tripletsnum_correct_cos_tripletsnum_correct_dot_tripletsnum_correct_manhattan_tripletsnum_correct_euclidean_tripletsembeddings_anchorsembeddings_positivesembeddings_negativespos_cos_distanceneg_cos_distancespos_dot_distanceneg_dot_distancespos_manhattan_distanceneg_manhattan_distancespos_euclidean_distanceneg_euclidean_distancesidxaccuracy_cosaccuracy_dotr#   r$   csv_pathfr^   metricss                                  r1   __call__zTripletEvaluator.__call__{   s    B;;{{1%11BuBBUBBBG(=):====GbTYbbX_bbbccc 	
$$** #/7[]]]U=_=_`d`q=r=r 	 	!&?"&"8!%	 ". " " $)<<?"&"8!%	 $0 $ $  $)<<?"&"8!%	 $0 $ $ 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	* 33EG[\\34FH\]] 6"47K"KRTUUUF#58L#LSUVVV "<<NPd!e!e"<=OQe"f"f "<<NPd!e!e"<=OQe"f"f-..// 	4 	4CAL$'8'===(A-($'8'===(A-(%c*-DS-III.!3.%c*-DS-III.!3./,>/,>;lJ;lJ:AA,QTBTUUVVV:AA,QTBTUUVVV:AABTWZBZ[[\\\<CCDVY\D\]]^^^"t~"w||K??H7>>(++ 	j(BS7KKK jq Z]]FOOD$4555OOUE<ASUg$hiiij j j j j j j j j j j j j j j (BS7KKK jq Z]]FOOUE<ASUg$hiiij j j j j j j j j j j j j j j
 %'8*N(*>(*>	

 #d)>
:
: 	  ,("4"4.@BTUU
 
 --gtyAA--eW===s8   A9DDD*A	N??OO/PP P)Nr   r   FTN)Nr=   r=   )__name__
__module____qualname____doc__r   strr   r   r   intboolr&   classmethodr   r<   r   floatr   __classcell__)r0   s   @r1   r   r      sg       $ $V LP"'&*2# 2#c2# 92# 9	2#
 !)s4F/F)G H2# 2# 2#  2# 2# sm2# 2# 2# 2# 2# 2#h 	<4+= 	< 	< 	< [	< dfp p*p9<pLOp]`p	c5j	p p p p p p p pr2   r   )r]   r*   rX   
contextlibr   typingr   r   r   r   r   numpyrT   sklearn.metrics.pairwiser	   r
   r   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.readersr   *sentence_transformers.similarity_functionsr   )sentence_transformers.SentenceTransformerr   	getLoggerr   r(   r    r2   r1   <module>r      s(   



  				 " " " " " " = = = = = = = = = = = = = =     t t t t t t t t t t P P P P P P 6 6 6 6 6 6 I I I I I I NMMMMMM		8	$	$W W W W W( W W W W Wr2   