
    si                         d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZmZ  ej	        e
          Z G d dej                  ZdS )    N)DictList)Tensornnc                        e Zd ZdZddee         deeef         def fdZdeee	f         fdZ
d	 Zd
 Zed             Z xZS )WordWeightszDThis model can weight word embeddings, for example, with idf-values.   vocabword_weightsunknown_word_weightc                    t          t          |                                            g d| _        || _        || _        || _        g }d}|D ]\}|}||v r	||         }n6|                                |v r||                                         }n|dz  }|                    |           ]t          
                    d                    |t          |          |                     t          j        t          |          d          | _        | j                            dt#          j        |                              d          i           dS )aZ  
        Initializes the WordWeights class.

        Args:
            vocab (List[str]): Vocabulary of the tokenizer.
            word_weights (Dict[str, float]): Mapping of tokens to a float weight value. Word embeddings are multiplied
                by this float value. Tokens in word_weights must not be equal to the vocab (can contain more or less values).
            unknown_word_weight (float, optional): Weight for words in vocab that do not appear in the word_weights lookup.
                These can be, for example, rare words in the vocab where no weight exists. Defaults to 1.
        )r
   r   r   r   r	   z:{} of {} words without a weighting value. Set weight to {}weightN)superr   __init__config_keysr
   r   r   lowerappendloggerinfoformatlenr   	Embedding	emb_layerload_state_dicttorchFloatTensor	unsqueeze)	selfr
   r   r   weightsnum_unknown_wordswordr   	__class__s	           [/var/www/icac/venv/lib/python3.11/site-packages/sentence_transformers/models/WordWeights.pyr   zWordWeights.__init__   sP    	k4  ))+++KKK
(#6  	# 	#D(F|##%d+--%djjll3!Q&!NN6""""HOO!3u::/B 	
 	
 	
 c%jj!44&&%2CG2L2L2V2VWX2Y2Y'Z[[[[[    featuresc                    |d         }|d         }|                      |d                                       d          }||                                z  }t          j        |d          }|                    d                              |                                          }||z  }|                    ||d           |S )Nattention_masktoken_embeddings	input_idsr	   )r(   token_weights_sum)	r   squeezefloatr   sumr   expandsizeupdate)r   r%   r'   r(   token_weights_rawtoken_weightsr+   token_weights_expandeds           r#   forwardzWordWeights.forward5   s    !"23#$67 !NN8K+@AAII"MM)N,@,@,B,BB!ImQ77 "/!8!8!<!<!C!CDTDYDYD[D[!\!\+.DD-=Teffgggr$   c                 *      fd j         D             S )Nc                 ,    i | ]}|j         |         S  )__dict__).0keyr   s     r#   
<dictcomp>z/WordWeights.get_config_dict.<locals>.<dictcomp>F   s"    DDDCT]3'DDDr$   )r   )r   s   `r#   get_config_dictzWordWeights.get_config_dictE   s     DDDD43CDDDDr$   c                     t          t          j                            |d          d          5 }t	          j        |                                 |d           d d d            d S # 1 swxY w Y   d S )Nconfig.jsonw   )indent)openospathjoinjsondumpr=   )r   output_pathfOuts      r#   savezWordWeights.saveH   s    "',,{M::C@@ 	>DId**,,d1====	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	> 	>s   *A&&A*-A*c                     t          t          j                            | d                    5 }t	          j        |          }d d d            n# 1 swxY w Y   t          di |S )Nr?   r8   )rC   rD   rE   rF   rG   loadr   )
input_pathfInconfigs      r#   rM   zWordWeights.loadL   s    "',,z=99:: 	$cYs^^F	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ 	$ $$V$$$s   AAA)r	   )__name__
__module____qualname____doc__r   strr   r-   r   r   r5   r=   rK   staticmethodrM   __classcell__)r"   s   @r#   r   r      s        NN$\ $\d3i $\tCJ7G $\^c $\ $\ $\ $\ $\ $\LS&[ 1     E E E> > > % % \% % % % %r$   r   )rG   loggingrD   typingr   r   r   r   r   	getLoggerrQ   r   Moduler   r8   r$   r#   <module>r\      s      				                 		8	$	$E% E% E% E% E%") E% E% E% E% E%r$   