
    si                         d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 d dl
Zd dlZd dlmZ d dlmZ erd dlmZ  ej        e          Z G d de          ZdS )	    N)nullcontext)TYPE_CHECKINGDictListOptional)SentenceEvaluator)pytorch_cos_sim)SentenceTransformerc                        e Zd ZdZ	 	 	 	 	 	 ddee         dee         d	ed
ededededee         f fdZ		 ddddededede
eef         f
dZ xZS )TranslationEvaluatora  
    Given two sets of sentences in different languages, e.g. (en_1, en_2, en_3...) and (fr_1, fr_2, fr_3, ...),
    and assuming that fr_i is the translation of en_i.
    Checks if vec(en_i) has the highest similarity to vec(fr_i). Computes the accuracy in both directions

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import TranslationEvaluator
            from datasets import load_dataset

            # Load a model
            model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')

            # Load a parallel sentences dataset
            dataset = load_dataset("sentence-transformers/parallel-sentences-news-commentary", "en-nl", split="train[:1000]")

            # Initialize the TranslationEvaluator using the same texts from two languages
            translation_evaluator = TranslationEvaluator(
                source_sentences=dataset["english"],
                target_sentences=dataset["non_english"],
                name="news-commentary-en-nl",
            )
            results = translation_evaluator(model)
            '''
            Evaluating translation matching Accuracy of the model on the news-commentary-en-nl dataset:
            Accuracy src2trg: 90.80
            Accuracy trg2src: 90.40
            '''
            print(translation_evaluator.primary_metric)
            # => "news-commentary-en-nl_mean_accuracy"
            print(results[translation_evaluator.primary_metric])
            # => 0.906
    F    TNsource_sentencestarget_sentencesshow_progress_bar
batch_sizenameprint_wrong_matches	write_csvtruncate_dimc	                 X   t                                                       || _        || _        || _        || _        || _        || _        || _        t          | j                  t          | j                  k    sJ |rd|z   }d|z   dz   | _
        g d| _        || _        d| _        dS )a  
        Constructs an evaluator based for the dataset

        The labels need to indicate the similarity between the sentences.

        Args:
            source_sentences (List[str]): List of sentences in the source language.
            target_sentences (List[str]): List of sentences in the target language.
            show_progress_bar (bool): Whether to show a progress bar when computing embeddings. Defaults to False.
            batch_size (int): The batch size to compute sentence embeddings. Defaults to 16.
            name (str): The name of the evaluator. Defaults to an empty string.
            print_wrong_matches (bool): Whether to print incorrect matches. Defaults to False.
            write_csv (bool): Whether to write the evaluation results to a CSV file. Defaults to True.
            truncate_dim (int, optional): The dimension to truncate sentence embeddings to. If None, the model's
                current truncation dimension will be used. Defaults to None.
        _translation_evaluationz_results.csv)epochstepssrc2trgtrg2srcmean_accuracyN)super__init__r   r   r   r   r   r   r   lencsv_filecsv_headersr   primary_metric)
selfr   r   r   r   r   r   r   r   	__class__s
            h/var/www/icac/venv/lib/python3.11/site-packages/sentence_transformers/evaluation/TranslationEvaluator.pyr    zTranslationEvaluator.__init__8   s    6 	 0 0	$!2#6 (4())S1F-G-GGGGG 	:D047.HCCC"-    modelr
   output_pathr   r   returnc           	         |dk    r|dk    rd| }nd| d| d}nd}| j         |d| j          dz  }t                              d	| j         d
| d           | j         t	                      n|                    | j                   5  t          j        |                    | j	        | j
        | j        d                    }t          j        |                    | j        | j
        | j        d                    }d d d            n# 1 swxY w Y   t          ||                                                                                                          }d}	d}
t#          t%          |                    D ]}t'          j        ||                   }||k    r|	dz  }	)| j        rt-          d|d|d|           t-          d| j	        |                    t-          d| j        |         d||         |         dd           t-          d| j        |         d||         |         dd           t/          ||                   }t1          |d d          }|d d         D ]'\  }}t-          d|d|dd| j        |                    (|j        }t#          t%          |                    D ]'}t'          j        ||                   }||k    r|
dz  }
(|	t%          |          z  }|
t%          |          z  }t                              d                    |dz                       t                              d                    |dz                       || j        rt8          j                            || j                  }t8          j                             |          }tC          |d|rd nd!d"#          5 }tE          j#        |          }|s|$                    | j%                   |$                    ||||g           d d d            n# 1 swxY w Y   ||||z   d$z  d%}| &                    || j                  }| '                    ||           |S )&Nr)   z after epoch z
 in epoch z after z stepsr   z (truncated to )z=Evaluating translation matching Accuracy of the model on the z dataset:F)r   r   convert_to_numpyr      z
Incorrect  : Sourcezis most similar to targetzinstead of targetzSource     :zPred Target:z(Score: z.4fzTrue Target:c                     | d         S )Nr1    )xs    r'   <lambda>z/TranslationEvaluator.__call__.<locals>.<lambda>   s
    ! r(   T)keyreverse   	zAccuracy src2trg: {:.2f}d   zAccuracy trg2src: {:.2f}awzutf-8)newlinemodeencoding   )src2trg_accuracytrg2src_accuracyr   )(r   loggerinfor   r   truncate_sentence_embeddingstorchstackencoder   r   r   r   r	   detachcpunumpyranger!   npargmaxr   print	enumeratesortedTformatr   ospathjoinr"   isfileopencsvwriterwriterowr#   prefix_name_to_metrics store_metrics_in_model_card_data)r%   r*   r+   r   r   out_txtembeddings1embeddings2cos_simscorrect_src2trgcorrect_trg2srcimax_idxresultsidxscoreacc_src2trgacc_trg2srccsv_pathoutput_file_existsfrZ   metricss                          r'   __call__zTranslationEvaluator.__call__f   s8    B;;{{1%11BuBBUBBBG(=):====GqTXT]qqgnqqqrrr"/7[]]]U=_=_`d`q=r=r 	 	+)&*&<#%*	    K  +)&*&<#%*	    K	 	 	 	 	 	 	 	 	 	 	 	 	 	 	$ #;<<CCEEIIKKQQSSs8}}%% 	Z 	ZAi,,GG||1$) 	Z-q2MwXkmnooond&;A&>???nd&;G&DFlQYZ[Q\]dQeFlFlFlFlmmmnd&;A&>@`8TU;WX>@`@`@`@`aaa#HQK00 nndKKK")"1"+ Z ZJC$%<%<%<%<%<d>STW>XYYYY:s8}}%% 	% 	%Ai,,GG||1$%H5%H5.55kC6GHHIII.55kC6GHHIII"t~"w||K??H!#!9!9h8J1SPS^efff JjkA) 6OOD$4555{K HIIIJ J J J J J J J J J J J J J J !, +)K71<
 

 --gtyAA--eW===s&   	A5D

DDA
PP P)Fr   r   FTN)Nr)   r)   )__name__
__module____qualname____doc__r   strboolintr   r    r   floatro   __classcell__)r&   s   @r'   r   r      s       " "P #($)&*,. ,.s),. s),.  	,.
 ,. ,. ",. ,. sm,. ,. ,. ,. ,. ,.^ dfS S*S9<SLOS]`S	c5j	S S S S S S S Sr(   r   )rY   loggingrT   
contextlibr   typingr   r   r   r   rK   rM   rF   2sentence_transformers.evaluation.SentenceEvaluatorr   sentence_transformers.utilr	   )sentence_transformers.SentenceTransformerr
   	getLoggerrp   rC   r   r3   r(   r'   <module>r      s    



  				 " " " " " " 6 6 6 6 6 6 6 6 6 6 6 6      P P P P P P 6 6 6 6 6 6 NMMMMMM		8	$	$f f f f f, f f f f fr(   