
    si(                     n   d dl Z d dlZd dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlZd dlZd d	lmZmZ d d
lm Z! d dlm"Z# d dl$m%Z%m&Z& d dl'm(Z( d dlm)Z) d dl*m+Z+ d dlm,Z, d dl-m.Z. d dl/m0Z0 d dl1m2Z2m3Z3 d dl4m5Z6 d dl7m8Z8 d dl9m:Z: d dl;m<Z<m=Z=m>Z>  e>            r
d dl?m@Z@mAZAmBZB  ejC        eD          ZEerd dlFmGZG d dlHmIZI d dlJmKZK  G d de,          ZLg dZMg dZNd eeOef         fd!ZPe	 G d" d#e                      ZQd$d%d eOfd&ZRdS )'    N)Counterdefaultdict)copy)	dataclassfieldfields)Path)python_versionindent)TYPE_CHECKINGAnyDictListLiteralOptionalTupleUnion)CardData	ModelCard)dataset_info)
model_info)
EvalResulteval_results_to_model_index)	yaml_dump)nn)tqdm)TrainerCallback)CodeCarbonCallback)make_markdown_table)TrainerControlTrainerState__version__)Transformer)$SentenceTransformerTrainingArguments)fullnameis_accelerate_availableis_datasets_available)DatasetDatasetDictValue)SentenceEvaluator)SentenceTransformer)SentenceTransformerTrainerc                        e Zd Zdddeeef         ddf fdZdeded	e	d
dddf
dZ
deded	e	d
dddf
dZdeded	e	d
ddeeef         ddfdZdeded	e	d
ddeeef         ddfdZ xZS )ModelCardCallbacktrainerr/   default_args_dictreturnNc                 ,   t                                                       || _        || _        d | j        j        j        D             }|r|d         |j        j        _        ||j        j        _        |j        j        	                    d           d S )Nc                 <    g | ]}t          |t                    |S  )
isinstancer   ).0callbacks     S/var/www/icac/venv/lib/python3.11/site-packages/sentence_transformers/model_card.py
<listcomp>z.ModelCardCallback.__init__.<locals>.<listcomp>1   s9     
 
 
($677

 
 
    r   generated_from_trainer)
super__init__r2   r3   callback_handler	callbacksmodelmodel_card_datacode_carbon_callbackadd_tags)selfr2   r3   rB   	__class__s       r;   r@   zModelCardCallback.__init__,   s    !2
 
 L9C
 
 
	
  	NAJ1GM)>07%-%../GHHHHHr=   argsstatecontrolrC   r.   c                    ddl m}m}m} | j        j        r:|j                            | j        j        |j        j        d          |j        _        | j        j	        r:|j                            | j        j	        |j        j
        d          |j        _
        t          | j        j        t                    r,t          | j        j                                                  }	n| j        j        g}	d}
|
t!          |	          k     rf|	|
         }t          ||||f          r3t#          |d          r#|j        |	vr|	                    |j                   |
dz  }
|
t!          |	          k     f|j                            |	           d S )Nr   )AdaptiveLayerLossMatryoshka2dLossMatryoshkaLosstrainevalloss   )sentence_transformers.lossesrM   rN   rO   r2   train_datasetrD   extract_dataset_metadatatrain_datasetseval_dataseteval_datasetsr8   rR   dictlistvalueslenhasattrappend
set_losses)rG   rI   rJ   rK   rC   kwargsrM   rN   rO   lossesloss_idxrR   s               r;   on_init_endzModelCardCallback.on_init_end<   s    	eddddddddd <% 	383H3a3a*E,A,PRY4 4E!0 <$ 	272G2`2`)5+@+NPV3 3E!/ dl'.. 	)$,+224455FFl'(F V$$(#D4.2CEU!VWW)D&))) IV++di(((MH V$$ 	((00000r=   c                      h d|                                 }fd|                                D             |j        _         fd|                                D             |j        _        d S )N>   do_evaldo_testdo_trainrun_name	hub_token	report_to
eval_delay
eval_steps
output_dir
save_stepslogging_dirlogging_stepssave_strategylogging_strategysave_total_limitgreater_is_betterpush_to_hub_tokensamples_per_labelshow_progress_barlogging_first_stepevaluation_strategymetric_for_best_modelc                 $    i | ]\  }}|v	||S r7   r7   )r9   keyvalueignore_keyss      r;   
<dictcomp>z4ModelCardCallback.on_train_begin.<locals>.<dictcomp>   s0     5
 5
 5
%3c>T>TC>T>T>Tr=   c                 X    i | ]&\  }}|v	|j         v |j         |         k    #||'S r7   )r3   )r9   r}   r~   r   rG   s      r;   r   z4ModelCardCallback.on_train_begin.<locals>.<dictcomp>   sU     =
 =
 =
U+%%#1G*G*GEUYUkloUpLpLp LpLpLpr=   )to_dictitemsrD   all_hyperparametersnon_default_hyperparameters)rG   rI   rJ   rK   rC   ra   	args_dictr   s   `      @r;   on_train_beginz ModelCardCallback.on_train_begind   s    
 
 
0 LLNN	5
 5
 5
 5
)2):):5
 5
 5
1=
 =
 =
 =
 =
'oo//=
 =
 =
999r=   metricsc                 (   fdD             }|j         j        rH|j         j        d         d         |j        k    r'|j         j        d                             |           d S |j         j                            |j        |j        d|           d S )Nc                     i | ]N}|                     d           d                    |                    d          dd                   |         OS )_loss _rS   N)endswithjoinsplit)r9   r}   r   s     r;   r   z1ModelCardCallback.on_evaluate.<locals>.<dictcomp>   sS    lllCVYVbVbcjVkVklSXXciinnQRR01173<lllr=   StepEpochr   )rD   training_logsglobal_stepupdater_   epoch)rG   rI   rJ   rK   rC   r   ra   	loss_dicts        `  r;   on_evaluatezModelCardCallback.on_evaluate   s     mlll7lll	!/	%3B7?5CTTT!/3::9EEEEE!/66"[!-       r=   logsc                 v   dht          |          z  }|r|j        j        rP|j        j        d         d         |j        k    r/||                                         |j        j        d         d<   d S |j        j                            |j        |j        ||                                         d           d S d S )NrR   r   r   Training Loss)r   r   r   )setrD   r   r   popr_   r   )rG   rI   rJ   rK   rC   r   ra   keyss           r;   on_logzModelCardCallback.on_log   s     x#d))# 	%3)7;FCuGXXXKOPTPXPXPZPZK[%3B7HHH%3::!& % 1)-dhhjj)9     	 	r=   )__name__
__module____qualname__r   strr   r@   r&   r"   r!   rd   r   floatr   r   __classcell__)rH   s   @r;   r1   r1   +   s       I < IQUVY[^V^Q_ Idh I I I I I I &12&1 &1  	&1
 %&1 
&1 &1 &1 &1P)
2)
 )
  	)

 %)
 
)
 )
 )
 )
V2   	
 % c5j! 
   02   	
 % 3: 
       r=   r1   )languagelicenselibrary_nametagsdatasetsr   pipeline_tagwidgetmodel-indexco2_eq_emissions
base_model)rC   r2   eval_results_dictr4   c                      t                      t          t          j        t          j        d} t                      rddlm} || d<   t                      rddlm} || d<   ddl	m} || d<   | S )N)pythonsentence_transformerstransformerstorchr   r#   
accelerater   
tokenizers)
r
   sentence_transformers_versionr   r$   r   r(   r   r)   r   r   )versionsaccelerate_versiondatasets_versiontokenizers_versions       r;   get_versionsr      s     ""!>$0"	 H    4@@@@@@!3 0<<<<<</<<<<<</H\Or=   c            
       X   e Zd ZU dZ ee          Zeee	e
e	         f                  ed<   dZee	         ed<   dZee	         ed<   dZee	         ed<    ee          Ze
ee	e	f                  ed<    ee          Ze
ee	e	f                  ed	<   d
Ze	ed<    ed           Zee
e	                  ed<   dZed         ed<    edd          Zee	         ed<    edd          Zee	         ed<    eed          Zee	ef         ed<    eed          Zee	ef         ed<    eed          Zeedee	ef         f                  ed<    eed          Ze
ee	ef                  ed<    eed          Ze
ee	e	f                  ed<    edd          Z ee	         ed<    eed          Z!e
ee	e	f                  ed<    edd          Z"ee#         ed<    eed          Z$ee	e	f         ed<    edd          Z%ee&         ed<    eddd           Z'ed!         ed"<    eedd#          Z(e
e	         ed$<    ed%d          Z)e*ed&<    ed'd          Z+e&ed(<    ed)d          Z,e	ed*<    ed+d          Z-e	ed,<    ee.d          Z/ee	e	f         ed-<    eddd           Z0ed.         ed/<   dVd1Z1dWd2e*d0dfd3Z2d4e
e3j4                 d0dfd5Z5d6e&d0dfd7Z6d8ed9         d0dfd:Z7d;dd<ee	ef         d0dfd=Z8dXd?Z9	 dYd8ed9         d@ee	         d0e
ee	e	f                  fdAZ:d8ee	e	f         dBee	ef         dCeeee	e3j4        f         e3j4        f                  d0ee	e	f         fdDZ;d8ed9         dEedF         d0ee	ef         fdGZ<dZdHZ=de	d0dfdIZ>dYde	dJee	         d0dfdKZ?dee	e
e	         f         d0dfdLZ@de	d0dfdMZAdee	e
e	         f         d0dfdNZBdVdOZCd0ee	ef         fdPZDdQ ZEd0eedR         ee	ef         f         fdSZFd0ee	ef         fdTZGdYd0e	fdUZHdS )[ SentenceTransformerModelCardDataa  A dataclass storing data used in the model card.

    Args:
        language (`Optional[Union[str, List[str]]]`): The model language, either a string or a list,
            e.g. "en" or ["en", "de", "nl"]
        license (`Optional[str]`): The license of the model, e.g. "apache-2.0", "mit",
            or "cc-by-nc-sa-4.0"
        model_name (`Optional[str]`): The pretty name of the model, e.g. "SentenceTransformer based on microsoft/mpnet-base".
        model_id (`Optional[str]`): The model ID when pushing the model to the Hub,
            e.g. "tomaarsen/sbert-mpnet-base-allnli".
        train_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the training datasets.
            e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}, {"name": "STSB"}]
        eval_datasets (`List[Dict[str, str]]`): A list of the names and/or Hugging Face dataset IDs of the evaluation datasets.
            e.g. [{"name": "SNLI", "id": "stanfordnlp/snli"}, {"id": "mteb/stsbenchmark-sts"}]
        task_name (`str`): The human-readable task the model is trained on,
            e.g. "semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more".
        tags (`Optional[List[str]]`): A list of tags for the model,
            e.g. ["sentence-transformers", "sentence-similarity", "feature-extraction"].

    .. tip::

        Install `codecarbon <https://github.com/mlco2/codecarbon>`_ to automatically track carbon emission usage and
        include it in your model cards.

    Example::

        >>> model = SentenceTransformer(
        ...     "microsoft/mpnet-base",
        ...     model_card_data=SentenceTransformerModelCardData(
        ...         model_id="tomaarsen/sbert-mpnet-base-allnli",
        ...         train_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
        ...         eval_datasets=[{"name": "SNLI", "id": "stanfordnlp/snli"}, {"name": "MultiNLI", "id": "nyu-mll/multi_nli"}],
        ...         license="apache-2.0",
        ...         language="en",
        ...     ),
        ... )
    )default_factoryr   Nr   
model_namemodel_idrW   rY   zjsemantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more	task_namec                  
    g dS )N)sentence-transformerssentence-similarityzfeature-extractionr7   r7   r=   r;   <lambda>z)SentenceTransformerModelCardData.<lambda>  s     !
 !
 !
 r=   r   
deprecatedgenerate_widget_examplesF)defaultinitr   base_model_revision)r   r   r   r   r-   r   r   r   predict_examplelabel_example_listrE   	citationsbest_model_step)r   r   reprr/   r2   )r   r   r   r   T
first_saver   widget_stepr   r   r   r   versionr.   rC   r4   c                    | j          }t          | j         t                    r| j         g| _         |                     | j        |          | _        |                     | j        |          | _        | j        rJ| j                            d          dk    r.t          	                    d| j        d           d | _        d S d S d S )N)infer_languages/rS   zThe provided z} model ID should include the organization or user, such as "tomaarsen/mpnet-base-nli-matryoshka". Setting `model_id` to None.)
r   r8   r   validate_datasetsrW   rY   r   countloggerwarning)rG   r   s     r;   __post_init__z.SentenceTransformerModelCardData.__post_init__>  s    "m+dmS)) 	,!]ODM"44T5HZi4jj!33D4FXg3hh= 	!T]0055::NN^ ^ ^ ^   !DMMM	! 	!::r=   r   c                 F   g }|D ]}d|vrd|v r|d         |d<   d|v r	 t          |d                   }|j        ri|rgd|j        v r^|j                            d          }| nt          |t                    r|g}|D ]%}|| j        vr| j                            |           &|j        | j        vr| j                            |j                   n7# t          $ r* t                              d|d         d           |d= Y nw xY w|                    |           |S )Nnameidr   zThe dataset `id` z5 does not exist on the Hub. Setting the `id` to None.)get_dataset_infocardDatagetr8   r   r   r_   r   r   	Exceptionr   r   )rG   dataset_listr   output_dataset_listdatasetinfodataset_languager   s           r;   r   z2SentenceTransformerModelCardData.validate_datasetsN  s    # 	0 	0GW$$7??&-dmGFOw6+GDM::D } ? ?Z4==X=X+/=+<+<Z+H+H(+3!E%&6<< B0@/A,(8 ? ?H't}<< $ 4 4X > > > wdm33,,TW555% ! & & &NNrGDMrrr    	&(  &&w////""s   C1DDrb   c                    ddi}|D ]'}	 |j         ||j        j        <   # t          $ r Y $w xY wt	          t
                    }|                                D ] \  }}||                             |           !dt          t                   dt          fdfd|                                D             | _
        |                     d d |D             D                        d S )	NzSentence Transformersa  
@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}
rb   r4   c                     t          |           dk    r)d                    | d d                   dz   | d         z   S | d         S )NrS   z, r   z and r   )r]   r   )rb   s    r;   	join_listz>SentenceTransformerModelCardData.set_losses.<locals>.join_list  sC    6{{Qyy--7&*DD!9r=   c                 .    i | ]\  }} |          |S r7   r7   )r9   citationrb   r   s      r;   r   z?SentenceTransformerModelCardData.set_losses.<locals>.<dictcomp>  s*    iii:J(F))F++Xiiir=   c                     g | ]}d | S )zloss:r7   r9   rR   s     r;   r<   z?SentenceTransformerModelCardData.set_losses.<locals>.<listcomp>  s    eee$~t~~eeer=   c                 (    i | ]}|j         j        |S r7   )rH   r   r   s     r;   r   z?SentenceTransformerModelCardData.set_losses.<locals>.<dictcomp>  s     2d2d2dUY4>3JD2d2d2dr=   )r   rH   r   r   r   r[   r   r_   r   r   r   rF   )rG   rb   r   rR   inverted_citationsr   r   s         @r;   r`   z+SentenceTransformerModelCardData.set_lossesp  s+   # 
&
	  	 	D59]	$.122   (..'oo// 	6 	6ND(x(//5555	d3i 	C 	 	 	 	
 jiiiN`NfNfNhNhiiiee2d2d]c2d2d2deeefffffs    
--stepc                     || _         d S N)r   )rG   r   s     r;   set_best_model_stepz4SentenceTransformerModelCardData.set_best_model_step  s    #r=   r   )r*   r+   c                 6   t          |t                    rt          |          }g | _        t	          t          j        t          |                                          d                    }d}t          |
                                ddd          D ]\  }}d	 ||         j        
                                D             }||                             |          }t          |          }i }	t          |                    t          j        t#          |          t%          ||                                        D ]3\  }
}t'          d
 |                                D                       |	|
<   4t+          t-          |	
                                d            \  }}|d |         t          ||d          d d d                   }}|D ]E}
t          ||
                                                   }t          |          dk     r|r|                                }t          ||                                                   }t          |          dk    r|                    |           n|                    |d                    t          |          dk     r|t          |          dk     r| j                            |d         t          j        |dd          t          |          dz
            d           |d d         | _        Gd S )N)r      )k  zComputing widget examplesexampleF)descunitleavec                 d    g | ]-\  }}t          |t                    r|j        d k    %|dk    +|.S )stringdataset_name)r8   r,   dtype)r9   columnfeatures      r;   r<   zHSentenceTransformerModelCardData.set_widget_examples.<locals>.<listcomp>  sS       #FGgu-- 3:-82K2KPVZhPhPh PhPhPhr=   c              3   4   K   | ]}t          |          V  d S r   r]   )r9   r~   s     r;   	<genexpr>zGSentenceTransformerModelCardData.set_widget_examples.<locals>.<genexpr>  s(      "K"K%3u::"K"K"K"K"K"Kr=   c                     | d         S )NrS   r7   )xs    r;   r   zFSentenceTransformerModelCardData.set_widget_examples.<locals>.<lambda>  s
    AaD r=   r}   r      rS   r   )source_sentence	sentences   )r8   r*   r+   r   r   randomchoicesr[   r   r   r   featuresselect_columnsr]   	enumerateselectsamplerangeminsumr\   zipsortedr   extendr_   r   )rG   r   dataset_namesnum_samples_to_checkr  num_samplescolumnsstr_datasetdataset_sizelengthsidxr  indicesr   target_indicesbackup_indicesr  
backup_idxbackup_samples                      r;   set_widget_examplesz4SentenceTransformerModelCardData.set_widget_examples  s.   gw'' 	3!'222GtGLLNN/C/Cq I I IJJ#)-!!(C)[`*
 *
 *
 )	5 )	5%L+
 '.|'<'E'K'K'M'M  G
 ",/>>wGGK{++LG(""6=|1D1DL`bnHoHo#p#p#pqq    L LV  #"K"K6==??"K"K"KKKfW]]__..IIIJJGQ-4\k\-BDQ\Q]Q]I^_c_cac_cIdDeDeNN & 5 5 S!1!8!8!:!:;;	)nnq((^(!/!3!3!5!5J$(Z)@)G)G)I)I$J$JM=))Q..!((7777 "((q)9::: )nnq((^( y>>A%%""(1!6=QZ[\[][]Q^befobpbpstbtCuCuCuvv   (1!}$$'5-)	5 )	5r=   	evaluatorr   c                 J   ddl m} t          |          | j        |<   t	          |d          r|j        xrt          ||          rd |j        D             nt          t                    rg| j	        d}d}n"| j	        j
        j        }| j	        j
        j        }fd|                                D             }| j        r9| j        d         d         |k    r"| j        d                             |           d S | j                            ||d|           d S d S d S )	Nr   )SequentialEvaluatorprimary_metricc                     g | ]	}|j         
S r7   )r.  )r9   sub_evaluators     r;   r<   zKSentenceTransformerModelCardData.set_evaluation_metrics.<locals>.<listcomp>  s    "j"j"jM=#?"j"j"jr=   c                 $    i | ]\  }}|v 	||S r7   r7   )r9   r}   r~   primary_metricss      r;   r   zKSentenceTransformerModelCardData.set_evaluation_metrics.<locals>.<dictcomp>  s+    #k#k#k:3TW[jTjTjCTjTjTjr=   r   r   r   ) sentence_transformers.evaluationr-  r   r   r^   r.  r8   
evaluatorsr   r2   rJ   r   r   r   r   r   r_   )rG   r+  r   r-  r   r   training_log_metricsr2  s          @r;   set_evaluation_metricsz7SentenceTransformerModelCardData.set_evaluation_metrics  sz   HHHHHH,0MMy) 9.// 		H`5`_ 	)%899 4"j"jU^Ui"j"j"jOS11 4#2"3|#|)5*0#k#k#k#k#k#k#k ! 	d&8&<V&D&L&L"2&--.BCCCCC"))!& $  /    #	 	 	 	r=   r*   c                     d}t          t                    }t                      }|D ]}|d         }|d         }||vrZ||                             dt	          |           d           t          ||                   |k    r|                    |           t          |           j        k    r n fd|                                D              _	        d S )Nr  textlabelz<li>z</li>c                     g | ]V\  }}j         j        r't          |t                    rj         j        |         n|d d                    |          z   dz   dWS )z<ul> z</ul>)LabelExamples)rC   labelsr8   intr   )r9   r9  example_setrG   s      r;   r<   zGSentenceTransformerModelCardData.set_label_examples.<locals>.<listcomp>  s}     #
 #
 #

 #{ 6:Z5Fl:V[]`KaKal*511gl"RWW[%9%99GC #
 #
 #
r=   )
r   r[   r   r_   r   r]   addnum_classesr   r   )rG   r   num_examples_per_labelexamplesfinished_labelsr  r8  r9  s   `       r;   set_label_examplesz3SentenceTransformerModelCardData.set_label_examples  s    !"t$$%% 	 	F&>D7OEO++&&'?d4jj'?'?'?@@@x''+AAA#''...?##t'777 8#
 #
 #
 #

 '/nn&6&6#
 #
 #
r=   r  c                     t          |t                    r  fd|                                D             S dt          t                   dt          t                   dt
          fd}|j        }i }|rt          j        d|          rd }|r||d<   |rd|d	         v rt          |d	         d                   j
        }d
} |||          }|dk    r|gS ||t          |          z            }	d|	v r#|	                    d          \  }
}|
 d| |d<   n-d }
|	}	 t          |          j        |d<   n# t          $ r Y nw xY wt          |d                   dk    r|d         |d<   |gS )Nc                 N    g | ]!\  }}                     ||           D ]}|"S ))r  )infer_datasets)r9   r  sub_datasetr   rG   s       r;   r<   zCSentenceTransformerModelCardData.infer_datasets.<locals>.<listcomp>  sW       -L+#22;\2ZZ      r=   tuplesubtupler4   c                     t          |           D ]3\  }}||d         k    r"| ||t          |          z            |k    r|c S 4dS )Nr   r   )r  r]   )rK  rL  ielements       r;   subtuple_finderzHSentenceTransformerModelCardData.infer_datasets.<locals>.subtuple_finder  sZ    '..  
7hqk))eACMM8I4I.Jh.V.VHHH2r=   z_dataset_\d+r   filenamer   )huggingfacer   r   ___r   r   (   revision)r8   r+   r   r   r   r?  cache_filesrematchr	   partsr]   r   r   r   r   )rG   r   r  rP  rW  dataset_outputcache_path_partsrL  indexcache_dataset_nameauthors   `          r;   rI  z/SentenceTransformerModelCardData.infer_datasets  s    g{++ 	   18   	5: 	s 	 	 	 	 	 ) 	 BH^\BB 	 L 	2%1N6" 	B:Q77#KN:$>??E2H#O$4h??E{{&'' "2%#h--2G!H***'9'?'?'F'F$*0'A'A<'A'At$$1+;L+I+I+LN4((     D #B'((B..-=b-Az*s   D. .
D;:D;r   rR   c           
      
   |si S t          |          |d<   d |j        D             |d<   i |d<   |j        D ]}|dd         |         d         }t          |t                    r| j                                      }t          |t                    r5d|v r1|d                             d	
                                          }d}nd D             }d}dt          t          |          d           d| t          t          |          t          |          z  d           d| t          t          |          d           d| dd|d         |<   &t          |t          t          f          r9t                    dfdt                    D             d|d         |<   {t          |t                     rdt          t          ||                   d          t          t          ||                   t          |          z  d          t          t          ||                   d          dd|d         |<   t          |t"                    rt          d D                       t                    d	k    r"ddt          |           did|d         |<   zdt                     dt                    t                    z  ddt                     ddd|d         |<   t%          |          i d|d         |<   dt          fdddid |d                                         D             ddifd |d                                         D             g}	t)          t+          |	                              d!d"          d#          |d$<   |dd%         |d&<   t          |d&         t#          |d&                   d                            }
g }t/          |
          D ]}i }|j        D ]}|d&         |         |         }t          |t"                    r5t          |          d'k    r"t          |dd'                   dd(         d)z   }t          |                              d*d+          }d,| d-||<   |                    |           t)          t+          |                              d!d"          d#          |d.<   d/t%          |          i|d0<   t3          |d1          rh|                                }	 t7          j        |d23          }n# t:          $ r t          |          }Y nw xY wt)          d4| d5d#          |d0         d6<   |S )7a  
        Given a dataset, compute the following:
        * Dataset Size
        * Dataset Columns
        * Dataset Stats
            - Strings: min, mean, max word count/token length
            - Integers: Counter() instance
            - Floats: min, mean, max range
            - List: number of elements or min, mean, max number of elements
        * 3 Example samples
        * Loss function name
            - Loss function config
        sizec                     g | ]}d | d	S )<code></code>r7   )r9   r  s     r;   r<   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<listcomp>F  s$    "_"_"_#;F#;#;#;"_"_"_r=   r   statsNr   r   attention_maskrS   )dimtokensc                 ,    g | ]}t          |          S r7   r  )r9   sentences     r;   r<   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<listcomp>Q  s    HHHs8}}HHHr=   
charactersr     r   )r  meanmax)r  datar?  c                 v    i | ]5}|t                    d k    rdnd |         t                    z  d6S )rS   ~r;  z.2%r  )r9   r}   counter
subsections     r;   r   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<dictcomp>_  s`        s7||a'7'7RePST^P_P_A_eee  r=   r   c                 ,    g | ]}t          |          S r7   r  )r9   lsts     r;   r<   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<listcomp>n  s    "B"B"B3s88"B"B"Br=   r[   z	 elementsz.2fro  c                 p    dd                     d |                                 D                       z   dz   S )Nz<ul><li>z	</li><li>c              3   *   K   | ]\  }}| d | V  dS )z: Nr7   r9   r}   r~   s      r;   r  zaSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.to_html_list.<locals>.<genexpr>  s4      0b0bzsEC1B1B51B1B0b0b0b0b0b0br=   z
</li></ul>)r   r   ro  s    r;   to_html_listzNSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.to_html_list  s=     0 00b0bUYU_U_UaUa0b0b0b b bbeqqqr=   r;  typec                 &    i | ]\  }}||d          S )r  r7   rx  s      r;   r   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<dictcomp>  s"    ```JCCw```r=   detailsc                 :    i | ]\  }}| |d                    S ry  r7   )r9   r}   r~   rz  s      r;   r   zLSentenceTransformerModelCardData.compute_dataset_metrics.<locals>.<dictcomp>  s-    pppJCsLLv$?$?pppr=   -:|--|z  stats_tabler  rD  r   r   z, ...]
z<br>rc  rd  examples_tabler'   rR   get_config_dictr  r   z```json
z
```config_code)r]   column_namesr8   r   rC   tokenizerZ   r  tolistroundr  rn  r?  boolr   r  r   r[   r'   r   r   r    replacer  r_   r^   r  jsondumps	TypeError)rG   r   r   rR   r  first	tokenizedr#  suffixstats_linesr  examples_lines
sample_idxr   r~   config
str_configrr  rs  rz  s                    @@@r;   compute_dataset_metricsz8SentenceTransformerModelCardData.compute_dataset_metrics/  sN   &  	I"7||V"_"_'J^"_"_"_Y "W* 8	W 8	WF $/JqME%%% 5W J//
;;	i.. *3Cy3P3P'(89==!=DDKKMMG%FFHHZHHHG)F%"'Ga"8"8CC6CC#(WG)Da#H#H S S6 S S"'Ga"8"8CC6CC 1 1W%f-- EC;// %W!*--"    #)'??  1 1W%f-- E5)) W$$S%9%91== %c'&/&:&:S\\&I1 M M$S%9%91== 1 1W%f-- E4(( W!"B"Bz"B"B"BCCw<<1$$!'"s5zz$<$<$<!5 5L)&11 "(&)'ll#=#=#='*7||c'll'B$Q$Q$Q$Q&)'ll#=#=#=! !5 5L)&11 ;C5//SU0V0VW%f--	rt 	r 	r 	r 	r a``,wBWB]B]B_B_```aqppppR^_fRgRmRmRoRopppq
 '--@-M-M-U-UV[]b-c-cei&j&j]##*2A2;Z ,z24Z8P3Q3QRS3TUVV,, 
	+ 
	+JG!. : :$Z08DeT** ;s5zzA~~bqb	NN3B3/(:EE

**488"95"9"9"9!!'****)/0CN0S0S0[0[\ach0i0iko)p)p%&  
V 4*++ 	^))++F)!Zq999

 ) ) ) [[


)289VZ9V9V9VX\2]2]L /s   $T; ;UUdataset_type)rP   rQ   c           	          |r|rt          |t                    r t          |          t          |          k    s(t          |t                    r9t          |          dk    r&t                              d| d| d| d           g }|s                     |          }t          |t                    rB fdt          |                                |	                                |          D             }n( 
                    ||d          j        j                  g}|dk    r3t          d	 |D                       }|r                     d
|                                 |          S )NrS   zThe number of `z?_datasets` in the model card data does not match the number of z1 datasets in the Trainer. Removing the provided `z$_datasets` from the model card data.c           
          g | ]X\  }}}                     ||t          j        j        t                    rj        j        |         nj        j                  YS r7   )r  r8   r2   rR   rZ   )r9   r  dataset_valuer   rG   s       r;   r<   zMSentenceTransformerModelCardData.extract_dataset_metadata.<locals>.<listcomp>  sv     	$ 	$ 	$ Bm\ 00%$;EdlFWY];^;^u),77dhdpdu 	$ 	$ 	$r=   r   rP   c                 :    g | ]}|                     d d          S )ra  r   )r   )r9   metadatas     r;   r<   zMSentenceTransformerModelCardData.extract_dataset_metadata.<locals>.<listcomp>  s&    'a'a'aHVQ(?(?'a'a'ar=   zdataset_size:)r8   r+   r]   r*   r   r   rI  r  r   r\   r  r2   rR   r  rF   r   )rG   r   dataset_metadatar  num_training_sampless   `    r;   rV   z9SentenceTransformerModelCardData.extract_dataset_metadata  s     	s &G[11&69:J6K6KsSZ||6[6[w00 7\589I5J5Ja5O5Oal a as a a.:a a a   $& # @#'#6#6w#?#? ';// s	$ 	$ 	$ 	$ FI(8(8:JF F	$ 	$ 	$   %)$@$@JZ[\J]_c_k_p$q$q#r  7""#&'a'aP`'a'a'a#b#b # FD.BDDEEE%%&6777r=   c                     || _         d S r   )rC   )rG   rC   s     r;   register_modelz/SentenceTransformerModelCardData.register_model  s    


r=   c                     || _         d S r   )r   )rG   r   s     r;   set_model_idz-SentenceTransformerModelCardData.set_model_id  s     r=   rV  c                     	 t          |          }n# t          $ r Y dS w xY w|j        | _        ||dk    r|j        }|| _        dS )NFmainT)get_model_infor   r   r   shar   )rG   r   rV  r   s       r;   set_base_modelz/SentenceTransformerModelCardData.set_base_model  sg    	'11JJ 	 	 	55	 %-x611!~H#+ ts    
  c                 D    t          |t                    r|g}|| _        d S r   )r8   r   r   )rG   r   s     r;   set_languagez-SentenceTransformerModelCardData.set_language  s&    h$$ 	" zH r=   c                     || _         d S r   )r   )rG   r   s     r;   set_licensez,SentenceTransformerModelCardData.set_license  s    r=   c                     t          |t                    r|g}|D ]%}|| j        vr| j                            |           &d S r   )r8   r   r   r_   )rG   r   tags      r;   rF   z)SentenceTransformerModelCardData.add_tags  sX    dC   	6D 	& 	&C$)##	  %%%	& 	&r=   c                    t          | j        d         t                    r| j        d         j        j        j        }t          |          }d                    |j        dd                    g}|j	        
                    d          |fdt          dt                              D             z  }|D ]}|                     |          r d S d S d S )Nr   r   rT  r   c                     g | ]@}d                      d|                   dz   d                      |d                   z   AS )r   Nr   )r   )r9   r$  splitss     r;   r<   zJSentenceTransformerModelCardData.try_to_set_base_model.<locals>.<listcomp>  sX     $ $ $JM&&,sxxstt/E/EE$ $ $r=   rS   )r8   rC   r%   
auto_modelr  _name_or_pathr	   r   rZ  r   r   r  r]   r  )rG   r   base_model_pathcandidate_model_idsr   r  s        @r;   try_to_set_base_modelz6SentenceTransformerModelCardData.try_to_set_base_model  s   djm[11 	A18FJ":..O $'88O,A"##,F#G#G"H
 %)//44F $ $ $ $QVWXZ]^dZeZeQfQf$ $ $  0  &&x00 EE	 	 r=   c           
      
  	
 g }i }g }| j                                         D ]\  }}t          |dd          	t          |dd          
	rt          	fd|                                D                       rT	fd|                                D             }
r2
                    	dz             r
t          	          dz   d         
dt          d	t          fd
fd|                                D             }
fd|                                D             }|j        t          |dd          |	                    t          |          t          |                              dd          d           |                    fd|                                D                        |                    |           |t          |                                          t!          | j        |          dS )au  Format the evaluation metrics for the model card.

        The following keys will be returned:
        - eval_metrics: A list of dictionaries containing the class name, description, dataset name, and a markdown table
          This is used to display the evaluation metrics in the model card.
        - metrics: A list of all metric keys. This is used in the model card metadata.
        - model-index: A list of dictionaries containing the task name, task type, dataset type, dataset name, metric name,
          metric type, and metric value. This is used to display the evaluation metrics in the model card metadata.
        r   Nr.  c              3   H   K   | ]}|                     d z             V  dS )r   N)
startswith)r9   r}   r   s     r;   r  zGSentenceTransformerModelCardData.format_eval_metrics.<locals>.<genexpr>  s3      QQ3CNN4#:66QQQQQQr=   c                 L    i | ] \  }}|t                    d z   d         |!S )rS   Nr  )r9   r}   r~   r   s      r;   r   zHSentenceTransformerModelCardData.format_eval_metrics.<locals>.<dictcomp>  s2    YYY:33s4yy1}/YYYr=   r   rS   r~   r4   c                 r    	 t          | d          r|                                 S n# t          $ r Y nw xY w| S )z^Try to convert a value from a Numpy or Torch scalar to pure Python, if not already pure Pythonr  )r^   itemr   )r~   s    r;   try_to_pure_pythonzPSentenceTransformerModelCardData.format_eval_metrics.<locals>.try_to_pure_python  sQ    ug.. ,$zz||+,    Ds   #' 
44c                 .    i | ]\  }}| |          S r7   r7   )r9   r}   r~   r  s      r;   r   zHSentenceTransformerModelCardData.format_eval_metrics.<locals>.<dictcomp>   s+    XXX*#us..u55XXXr=   c                     g | ]>\  }}|k    rd | d n||k    rd t          |d           d nt          |d          d?S )**r  )Metricr,   )r  )r9   
metric_keymetric_valuer.  s      r;   r<   zHSentenceTransformerModelCardData.format_eval_metrics.<locals>.<listcomp>"  s        -J 6@>5Q5Q1:1111Wa!^33 =%a"8"8<<<<|Q//	   r=   r  r  )
class_namedescriptionr  tablec                 j   g | ]\  }}t          |t          t          f          !t                                                              d d          pd                    dd                               dd           pd|                    dd                                           ||          S )r   -unknownr   Unknown)r   	task_typer  r  metric_namemetric_typer  )r8   r?  r   r   lowerr  title)r9   r  r  r  r  s      r;   r<   zHSentenceTransformerModelCardData.format_eval_metrics.<locals>.<listcomp>8  s        1
L!,e=="-"-"3"3"5"5"="=c3"G"G%1%>Y%1%9%9#s%C%C%K%KCQT%U%U%bYb$.$6$6sC$@$@$F$F$H$H$.%1    r=   )eval_metricsr   r   )r   r   getattrallr   r  r]   r   r  r_   r'   r    r  r  r   r[   r   r   )rG   r  all_metricseval_resultsr+  r   table_linesr  r  r   r.  r  s          @@@@@r;   format_eval_metricsz4SentenceTransformerModelCardData.format_eval_metrics  ss    "&"8">">"@"@ 7	( 7	(Iw9fd33D$Y0@$GGN EQQQQ',,..QQQQQ EYYYYYYY! En&?&?s
&K&K E%3CIIMOO%DN# #     YXXXXXXG    18  K $/K"9fd;;L"*9"5"5#.$00==EEeUSS	         5<MMOO     w'''' )K,,..//6tUU
 
 	
r=   c                      d  j         D             }dt          dt          fd}t          ||           fd j         D             }t          |          }|d|v dS )	Nc                 @    h | ]}|                                 D ]}|S r7   )r   )r9   linesr}   s      r;   	<setcomp>zHSentenceTransformerModelCardData.format_training_logs.<locals>.<setcomp>P  s-    WWW5%**,,WW33WWWWr=   r}   r4   c                 d    | dk    rdS | dk    rdS | dk    rdS |                      d          rdS | S )	Nr   0r   1r   2rR   3)r   r  s    r;   sort_metricszKSentenceTransformerModelCardData.format_training_logs.<locals>.sort_metricsS  sL    g~~sf}}so%%s||F## sJr=   r  c                 0    g | ]fd D             S )c                     i | ]J}|d          j         k    r d|v rt          |         d          nd dn                    |d          KS )r   r  r  r  )r   r  r   )r9   r}   linerG   s     r;   r   zTSentenceTransformerModelCardData.format_training_logs.<locals>.<listcomp>.<dictcomp>`  s}         <4#777 J%S	1---#IIIIXXc3''  r=   r7   )r9   r  rG   sorted_eval_lines_keyss    @r;   r<   zISentenceTransformerModelCardData.format_training_logs.<locals>.<listcomp>_  sT     
 
 
      2	  
 
 
r=   r  )
eval_linesexplain_bold_in_eval)r   r   r  r    )rG   eval_lines_keysr  r   r  r  s   `    @r;   format_training_logsz5SentenceTransformerModelCardData.format_training_logsN  s    WWD,>WWW		c 		c 		 		 		 		 "(\!J!J!J
 
 
 
 
 *
 
 
 )77
$$(J$6
 
 	
r=   r   c                 *   | j         j                                        }dt          |j                  dz  t          |j                  dd|j        dk    |j        |j        t          |j
        dz  d          di}|j        r|j        |d         d	<   |S )
Nr   r   
codecarbonzfine-tuningYi  r  )	emissionsenergy_consumedsourcetraining_typeon_cloud	cpu_modelram_total_size
hours_usedhardware_used)rE   tracker_prepare_emissions_datar   r  r  r  r  r  r  duration	gpu_model)rG   emissions_dataresultss      r;   get_codecarbon_dataz4SentenceTransformerModelCardData.get_codecarbon_datan  s    2:RRTT">#;<<tC#()G#H#H&!.*3s:+5"0"?#N$;d$BAFF
! 
!
 # 	T;I;SG&'8r=   c                      j         s6 j        / j        j        p j        j        x}r                     |            j        r- j        s&	                                   n# t          $ r Y nw xY w j	        s j        rd j          _	        nd _	         fdt                     D             } j        rZ	 |                                                                n1# t          $ r$}t                              d|            |d }~ww xY w j        r]	 |                                                                n4# t          $ r'}t                              d|            Y d }~nd }~ww xY wt%           j                  dk    |d<    j        rD j        j        r8 j        j        j        '|                                                                 j                                        |d<    j                                        |d	<   t5           j                  |d
<    j        j        rYddddd                     j        j         j        j                            dd                                                    |d<   nd|d<   d _        t>          D ]}|                     |d            |S )NzSentenceTransformer based on r.   c                 F    i | ]}|j         t          |j                   S r7   )r   r  )r9   r   rG   s     r;   r   z<SentenceTransformerModelCardData.to_dict.<locals>.<dictcomp>  s)    VVVej'$
";";VVVr=   z+Error while formatting evaluation metrics: z&Error while formatting training logs: d   hide_eval_linesmodel_max_lengthoutput_dimensionalitymodel_stringzCosine SimilarityzDot ProductzEuclidean DistancezManhattan Distance)cosinedot	euclidean	manhattanr   r   similarity_fn_nameF)!r   r2   rX   rU   r*  r   r   r  r   r   r   r   r   r  r   r   r   r  r]   rE   r  _start_timer  rC   get_max_seq_length get_sentence_embedding_dimensionr   r  r   r  r  IGNORED_FIELDSr   )rG   r   
super_dictexcr}   s   `    r;   r   z(SentenceTransformerModelCardData.to_dict  s;    	.( L5S9SS ) $$W--- ? 	4? 	**,,,,     	8 8"S$/"S"S"7VVVVPTVVV
 ! 	!!$":":"<"<====   RSRRSSS	
  	OO!!$";";"="=>>>> O O OMMMNNNNNNNNO ),D,>(?(?#(E
$% %	:)1	: )1=Id6688999 *.)F)F)H)H
%&.2j.Y.Y.[.[
*+%(__
>":( 	C-$11	0 0
 c$*/1N1V1VWZ\_1`1`1f1f1h1hii +,, 0CJ+,! 	& 	&CNN3%%%%sB   A# #
A0/A0;'C# #
D-DD'E 
E5E00E5c                     t          d |                                                                 D             d|                                          S )Nc                 0    i | ]\  }}|t           v |||S r   )YAML_FIELDSrx  s      r;   r   z<SentenceTransformerModelCardData.to_yaml.<locals>.<dictcomp>  s.    mmmJCC;DVDV[`[lS%[l[l[lr=   F)	sort_keys
line_break)r   r   r   strip)rG   r  s     r;   to_yamlz(SentenceTransformerModelCardData.to_yaml  sO    mm$,,..*>*>*@*@mmm!
 
 
 %''		r=   )r4   N)T)r   r*   r4   Nr   )rC   r.   r4   N)Ir   r   r   __doc__r   r[   r   r   r   r   r   __annotations__r   r   r   rW   r   rY   r   r   r   r   r   r   rZ   r   r   r   r   r   r   r   r   r   rE   r   r   r   r?  r2   r   r   r  r   r   r   r   r   rC   r   r   r   Moduler`   r   r*  r6  rF  rI  r  rV   r  r  r  r  r  rF   r  r  r  r  r   r  r7   r=   r;   r   r      s        $ $N 16d0K0K0KHhuS$s)^,-KKK!GXc]!!! $J$$$"Hhsm"""+05+F+F+FNDc3h(FFF*/%*E*E*EM4S#X'EEEt s    !&
 
! ! !D(49
    7Cgl3BBB !&d ? ? ?J???).t%)H)H)H#HHH27%SX2Y2Y2Yc3hYYY*/%5*Q*Q*Qc3hQQQMRUcgnsMtMtMtx%8$sCx.%H IJttt,1E$U,S,S,SM4S%Z()SSS#(5E#J#J#JFDc3h JJJ%*U4e%D%D%DOXc]DDD/4uTPU/V/V/VT#s(^,VVV9>tRW9X9X9X(#56XXX %d G G GItCH~GGG%*U4e%D%D%DOXc]DDD6;eDu[`6a6a6aGX23aaa%5uMMMHd3iMMM uT666J666uRe444K444 &;%HHHL#HHH&=EJJJL#JJJ#eLuMMMGT#s(^MMM .3U4eRW-X-X-XE8)*XXX! ! ! !  #  #t  #t  #  #  #  #Dgbi gT g g g g>$ $ $ $ $ $1551I+J 15t 15 15 15 15f0C dSVX[S[n ae    >
 
 
 
, W[4  4 564 FNsm4 	d38n	4  4  4  4 lvc3hv 38nv uT#ry.129<=>	v
 
c3hv v v vp%856%8X_`oXp%8	c3h%8 %8 %8 %8N   !S !T ! ! ! !
 
s 
hsm 
t 
 
 
 
!U3S	>%: !t ! ! ! !
3 4    &U3S	>2 &t & & & &   &J
T#s(^ J
 J
 J
 J
X
 
 
@T'2D*EtCQTH~*U%V    &Dc3h D D D DL #      r=   r   rC   r.   c                     t          t                    j        dz  }t          j        | j        |d          }|j        S )Nzmodel_card_template.mdu   🤗)	card_datatemplate_pathhf_emoji)r	   __file__parentr   from_templaterD   content)rC   r  
model_cards      r;   generate_model_cardr!    s;    NN),DDM(53HXepvwwwJr=   )Sr  loggingr  rX  collectionsr   r   r   dataclassesr   r   r   pathlibr	   platformr
   textwrapr   typingr   r   r   r   r   r   r   r   r   r   huggingface_hubr   r   r   r   r   r  huggingface_hub.repocard_datar   r   huggingface_hub.utilsr   r   tqdm.autonotebookr   r   transformers.integrationsr   transformers.modelcardr    transformers.trainer_callbackr!   r"   r   r$   r   sentence_transformers.modelsr%   #sentence_transformers.training_argsr&   sentence_transformers.utilr'   r(   r)   r   r*   r+   r,   	getLoggerr   r   2sentence_transformers.evaluation.SentenceEvaluatorr-   )sentence_transformers.SentenceTransformerr.   sentence_transformers.trainerr/   r1   r  r  r   r   r   r!  r7   r=   r;   <module>r7     s      				 , , , , , , , ,       0 0 0 0 0 0 0 0 0 0       # # # # # #       R R R R R R R R R R R R R R R R R R R R      / / / / / / / / < < < < < < 8 8 8 8 8 8 Q Q Q Q Q Q Q Q + + + + + +       " " " " " " ( ( ( ( ( ( 8 8 8 8 8 8 6 6 6 6 6 6 F F F F F F F F N N N N N N 4 4 4 4 4 4 T T T T T T _ _ _ _ _ _ _ _ _ _ 54444444444		8	$	$ ITTTTTTMMMMMMHHHHHHS S S S S S S Sl   ;::d38n    , d d d d dx d d dN4       r=   