
    si                     `    d dl Z d dlZd dlZddlmZ  G d d          Z G d de          ZdS )    N   )InputExamplec                   <    e Zd ZdZddddej        dddfdZdd	Zd
S )STSDataReadera1  Reads in the STS dataset. Each line contains two sentences (s1_col_idx, s2_col_idx) and one label (score_col_idx)

    Default values expects a tab separated file with the first & second column the sentence pair and third column the score (0...1). Default config normalizes scores from 0...5 to 0...1
    r   r      	T   c
                     || _         || _        || _        || _        || _        || _        || _        || _        |	| _        d S )N)	dataset_folderscore_col_idx
s1_col_idx
s2_col_idx	delimiterquotingnormalize_scores	min_score	max_score)
selfr   r   r   r   r   r   r   r   r   s
             ^/var/www/icac/venv/lib/python3.11/site-packages/sentence_transformers/readers/STSDataReader.py__init__zSTSDataReader.__init__   sJ     -*$$" 0""    c           
         t           j                            | j        |          }|                    d          rt          j        |dd          nt          |d          5 }t          j        || j	        | j
                  }g }t          |          D ]\  }}t          || j                           }	| j        r|	| j        z
  | j        | j        z
  z  }	|| j                 }
|| j                 }|                    t)          |t+          |          z   |
|g|	                     |dk    rt-          |          |k    r nd	d	d	           n# 1 swxY w Y   |S )
zJfilename specified which data split to use (train.csv, dev.csv, test.csv).z.gzrtutf8)encodingzutf-8)r   r   )guidtextslabelr   N)ospathjoinr   endswithgzipopencsvreaderr   r   	enumeratefloatr   r   r   r   r   r   appendr   strlen)r   filenamemax_examplesfilepathfIndataexamplesidrowscores1s2s               r   get_exampleszSTSDataReader.get_examples$   s   7<< 3X>>;C;L;LU;S;S 
TYx7777Y]wZ
 Z
 Z
 	:cT^T\RRRDH$T?? 
 
Cc$"4566( Y"T^38WXE))(SWW2DRQSH\a b b bccc!##H(E(EE	 	 	 	 	 	 	 	 	 	 	 	 	 	 	" s   #C EEEN)r   )__name__
__module____qualname____doc__r%   
QUOTE_NONEr   r7    r   r   r   r      sb          # # # #,     r   r   c                   >     e Zd ZdZddddej        dddf fd	Z xZS )	STSBenchmarkDataReaderzReader especially for the STS benchmark dataset. There, the sentences are in column 5 and 6, the score is in column 4.
    Scores are normalized from 0...5 to 0...1
    r	         r   Tr   c
                 \    t                                          |||||||||		  	         d S )N)	r   r   r   r   r   r   r   r   r   )superr   )r   r   r   r   r   r   r   r   r   r   	__class__s             r   r   zSTSBenchmarkDataReader.__init__@   sK     	)!!'- 	 
	
 
	
 
	
 
	
 
	
r   )r8   r9   r:   r;   r%   r<   r   __classcell__)rD   s   @r   r?   r?   ;   sc          
 
 
 
 
 
 
 
 
 
r   r?   )r%   r#   r    r   r   r?   r=   r   r   <module>rG      s    



  				      0 0 0 0 0 0 0 0f
 
 
 
 
] 
 
 
 
 
r   