
    si                         d dl Z d dlZd dlZd dlmZ d dlmZmZmZm	Z	 d dl
mZ erd dlmZ  ej        e          Z G d de          ZdS )    N)nullcontext)TYPE_CHECKINGDictListOptional)SentenceEvaluator)SentenceTransformerc                        e Zd ZdZ	 	 	 	 	 	 ddee         dee         d	ed
edededee         f fdZ	ddddede
eef         fdZedefd            Z xZS )MSEEvaluatora
  
    Computes the mean squared error (x100) between the computed sentence embedding
    and some target sentence embedding.

    The MSE is computed between ||teacher.encode(source_sentences) - student.encode(target_sentences)||.

    For multilingual knowledge distillation (https://arxiv.org/abs/2004.09813), source_sentences are in English
    and target_sentences are in a different language like German, Chinese, Spanish...

    Args:
        source_sentences (List[str]): Source sentences to embed with the teacher model.
        target_sentences (List[str]): Target sentences to embed with the student model.
        teacher_model (SentenceTransformer, optional): The teacher model to compute the source sentence embeddings.
        show_progress_bar (bool, optional): Show progress bar when computing embeddings. Defaults to False.
        batch_size (int, optional): Batch size to compute sentence embeddings. Defaults to 32.
        name (str, optional): Name of the evaluator. Defaults to "".
        write_csv (bool, optional): Write results to CSV file. Defaults to True.
        truncate_dim (int, optional): The dimension to truncate sentence embeddings to. `None` uses the model's current truncation
            dimension. Defaults to None.

    Example:
        ::

            from sentence_transformers import SentenceTransformer
            from sentence_transformers.evaluation import MSEEvaluator
            from datasets import load_dataset

            # Load a model
            student_model = SentenceTransformer('paraphrase-multilingual-mpnet-base-v2')
            teacher_model = SentenceTransformer('all-mpnet-base-v2')

            # Load any dataset with some texts
            dataset = load_dataset("sentence-transformers/stsb", split="validation")
            sentences = dataset["sentence1"] + dataset["sentence2"]

            # Given queries, a corpus and a mapping with relevant documents, the InformationRetrievalEvaluator computes different IR metrics.
            mse_evaluator = MSEEvaluator(
                source_sentences=sentences,
                target_sentences=sentences,
                teacher_model=teacher_model,
                name="stsb-dev",
            )
            results = mse_evaluator(student_model)
            '''
            MSE evaluation (lower = better) on the stsb-dev dataset:
            MSE (*100):  0.805045
            '''
            print(mse_evaluator.primary_metric)
            # => "stsb-dev_negative_mse"
            print(results[mse_evaluator.primary_metric])
            # => -0.8050452917814255
    NF     Tsource_sentencestarget_sentencesshow_progress_bar
batch_sizename	write_csvtruncate_dimc	                    t                                                       || _        | j        t                      n|                    | j                  5  |                    |||d          | _        d d d            n# 1 swxY w Y   || _        || _        || _	        || _
        d|z   dz   | _        g d| _        || _        d| _        d S )NTr   r   convert_to_numpymse_evaluation_z_results.csv)epochstepsMSEnegative_mse)super__init__r   r   truncate_sentence_embeddingsencodesource_embeddingsr   r   r   r   csv_filecsv_headersr   primary_metric)
selfr   r   teacher_modelr   r   r   r   r   	__class__s
            `/var/www/icac/venv/lib/python3.11/site-packages/sentence_transformers/evaluation/MSEEvaluator.pyr   zMSEEvaluator.__init__E   s#    	("/7[]]]]=g=g>
 >
 	 	 &3%9%9 4ER\os &: & &D"	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 !1!2$	)D0>A444",s   BB
Bmodelr	   output_pathreturnc                 ,   |dk    r|dk    rd| }nd| d| d}nd}| j         |d| j          dz  }| j         t                      n|                    | j                   5  |                    | j        | j        | j        d	
          }d d d            n# 1 swxY w Y   | j        |z
  dz                                  }|dz  }t          
                    d| j         d| d           t          
                    d                    |                     || j        rt          j                            || j                  }t          j                            |          }	t'          |d|	rdndd          5 }
t)          j        |
          }|	s|                    | j                   |                    |||g           d d d            n# 1 swxY w Y   d| i}|                     || j                  }|                     ||           |S )Nr)   z after epoch z
 in epoch z after z stepsr   z (truncated to )Tr      d   z'MSE evaluation (lower = better) on the z dataset:zMSE (*100):	{:4f}awzutf-8)newlinemodeencodingr   )r   r   r   r    r   r   r   r!   meanloggerinfor   formatr   ospathjoinr"   isfileopencsvwriterwriterowr#   prefix_name_to_metrics store_metrics_in_model_card_data)r%   r*   r+   r   r   out_txttarget_embeddingsmsecsv_pathoutput_file_existsfrA   metricss                r(   __call__zMSEEvaluator.__call__c   s   B;;{{1%11BuBBUBBBG(=):====G"/7[]]]U=_=_`d`q=r=r 	 	 %%"&"8?!%	 !- ! !	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 &)::q@FFHHs
[di[[QX[[[\\\(//44555"t~"w||K??H!#!9!9h8J1SPS^efff 5jkA) 6OOD$4555s 34445 5 5 5 5 5 5 5 5 5 5 5 5 5 5 "C4(--gtyAA--eW===s%   #)BBB>A	GGGc                     dS )NzKnowledge Distillation )r%   s    r(   descriptionzMSEEvaluator.description   s    ''    )NFr   r   TN)Nr)   r)   )__name__
__module____qualname____doc__r   strboolintr   r   r   floatrL   propertyrO   __classcell__)r'   s   @r(   r   r      s       3 3r "'&*- -s)- s)-
  - - - - sm- - - - - -<' '3 '# 'eijmotjteu ' ' ' 'R (S ( ( ( X( ( ( ( (rP   r   )r@   loggingr;   
contextlibr   typingr   r   r   r   2sentence_transformers.evaluation.SentenceEvaluatorr   )sentence_transformers.SentenceTransformerr	   	getLoggerrQ   r8   r   rN   rP   r(   <module>ra      s    



  				 " " " " " " 6 6 6 6 6 6 6 6 6 6 6 6 P P P P P P NMMMMMM		8	$	$( ( ( ( ($ ( ( ( ( (rP   