
    si                     v    d dl mZmZ d dlZd dlmc mZ d dlmZmZ d dl	m
Z
mZ  G d dej                  ZdS )    )DictIterableN)Tensornn)SentenceTransformerutilc                        e Zd Z	 	 	 	 ddedededed	ed
df fdZdee	e
ef                  ded
efdZdee	e
ef                  ded
efdZed
e
fd            Z xZS )MegaBatchMarginLoss皙?333333?T2   modelpositive_marginnegative_marginuse_mini_batched_versionmini_batch_sizereturnNc                     t          t          |                                            || _        || _        || _        || _        |r| j        n| j        | _	        dS )a	  
        Given a large batch (like 500 or more examples) of (anchor_i, positive_i) pairs, find for each pair in the batch
        the hardest negative, i.e. find j != i such that cos_sim(anchor_i, positive_j) is maximal. Then create from this a
        triplet (anchor_i, positive_i, positive_j) where positive_j serves as the negative for this triplet.

        Then train as with the triplet loss.

        Args:
            model: SentenceTransformerModel
            positive_margin: Positive margin, cos(anchor, positive)
                should be > positive_margin
            negative_margin: Negative margin, cos(anchor, negative)
                should be < negative_margin
            use_mini_batched_version: As large batch sizes require a lot
                of memory, we can use a mini-batched version. We break
                down the large batch into smaller batches with fewer
                examples.
            mini_batch_size: Size for the mini-batches. Should be a
                devisor for the batch size in your data loader.

        References:
            - This loss function was inspired by the ParaNMT paper: https://www.aclweb.org/anthology/P18-1042/

        Requirements:
            1. (anchor, positive) pairs
            2. Large batches (500 or more examples)

        Input:
            +---------------------------------------+--------+
            | Texts                                 | Labels |
            +=======================================+========+
            | (anchor, positive) pairs              | none   |
            +---------------------------------------+--------+

        Example:
            ::

                from sentence_transformers import SentenceTransformer, InputExample, losses
                from torch.utils.data import DataLoader

                model = SentenceTransformer('all-MiniLM-L6-v2')

                total_examples = 500
                train_batch_size = 250
                train_mini_batch_size = 32

                train_examples = [
                    InputExample(texts=[f"This is sentence number {i}", f"This is sentence number {i+1}"]) for i in range(total_examples)
                ]
                train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=train_batch_size)
                train_loss = losses.MegaBatchMarginLoss(model=model, mini_batch_size=train_mini_batch_size)

                model.fit(
                    [(train_dataloader, train_loss)],
                    epochs=10,
                )
        N)
superr
   __init__r   r   r   r   forward_mini_batchedforward_non_mini_batchedforward)selfr   r   r   r   r   	__class__s         c/var/www/icac/venv/lib/python3.11/site-packages/sentence_transformers/losses/MegaBatchMarginLoss.pyr   zMegaBatchMarginLoss.__init__   s^    B 	!4((11333
...4Lot00RVRo    sentence_featureslabelsc                 ,   |\  t                                                    }t          j                    5  | j                                         |                               d                                         }| j                                         d d d            n# 1 swxY w Y   t          j        t          |          t          |          |j
                  }t          dt          |          | j                  D ]| j        z   |                     fd|D                       d         }d |D             }t          j                    5  t          j        ||          }|d|         z  z
  }	t          j        |	d          \  }
}d d d            n# 1 swxY w Y   |D ].}|D ])}||                             |         |                    */|D ]}t          j        ||                   ||<    |                     fd	|D                       d         }|                     |          d         }|j        |j        k    sJ |j        |j        k    sJ t'          j        ||          }t'          j        ||          }t'          j        | j        |z
            t'          j        || j        z
            z   }|                                }t          |          k     r|                                 |S )
Nsentence_embedding)devicer   c                 2    i | ]}||                  S  r$   ).0keyanchorend_idx	start_idxs     r   
<dictcomp>z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>a   s)    $b$b$bSS&+i6G*H$b$b$br   c                     i | ]}|g S r$   r$   )r%   r&   s     r   r*   z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>g   s    %G%G%G#c2%G%G%Gr         dimc                 2    i | ]}||                  S r$   r$   )r%   r&   r(   positiver)   s     r   r*   z<MegaBatchMarginLoss.forward_mini_batched.<locals>.<dictcomp>w   s*    &f&f&fQTsHSM)G:K,L&f&f&fr   )listkeystorchno_gradr   evaldetachtraineyelenr"   ranger   r   pytorch_cos_simmaxappendstackshapeFcosine_similarityrelur   r   meanbackward)r   r   r   feature_namesall_positive_embdiagonal_matrix
anchor_embhard_negative_features
cos_scoresnegative_scoresnegatives_maxnegatives_idshard_negative_idr&   positive_embnegative_emb
pos_cosine
neg_cosinelossesr'   r(   r1   r)   s                      @@@@r   r   z(MegaBatchMarginLoss.forward_mini_batchedS   s   ,V[[]]++]__ 	 	JOO#zz(334HIPPRRJ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	
  )C(8$9$93?O;P;PYiYpqqq q#&6"7"79MNN (	" (	"I$"66G$b$b$b$b$b$bTa$b$b$bcc$J &H%G%G%G%G" Q Q!1*>NOO
_Yw5F%G!GG   05ya/P/P/P,}Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q %2 X X ( X XC*3/66x}EU7VWWWWX % W W.3k:PQT:U.V.V&s++  ::&f&f&f&f&f&fXe&f&f&fgg$L  ::&<==>RSL#|'99999#|'99999 ,ZFFJ,ZFFJVD0:=>>
UYUiHiAjAjjF[[]]F Z((!!!s$   A B**B.1B.#?F..F2	5F2	c                      fd|D             }|\  }}t          j        ||          }t          j        |          }|dt          j        |j        d|j        iz  z
  }t          j        |d          \  }	}
t          j	         j
        |z
            t          j	        |	 j        z
            z   }|                                S )Nc                 F    g | ]}                     |          d          S )r!   )r   )r%   sentence_featurer   s     r   
<listcomp>z@MegaBatchMarginLoss.forward_non_mini_batched.<locals>.<listcomp>   s-    mmmGW

+,,-ABmmmr   r,   r"   r-   r.   )r   r<   r4   diagonalr9   r@   r"   r=   rA   rC   r   r   rD   )r   r   r   repsembeddings_aembeddings_brK   positive_scoresrL   rM   _rT   s   `           r   r   z,MegaBatchMarginLoss.forward_non_mini_batched   s    mmmm[lmmm%)"l),EE
.44$	:+FJ4EFFF
 !9_!<<<q,>??!&Y]YmImBnBnn{{}}r   c                     dS )Na  
@inproceedings{wieting-gimpel-2018-paranmt,
    title = "{P}ara{NMT}-50{M}: Pushing the Limits of Paraphrastic Sentence Embeddings with Millions of Machine Translations",
    author = "Wieting, John and Gimpel, Kevin",
    editor = "Gurevych, Iryna and Miyao, Yusuke",
    booktitle = "Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
    month = jul,
    year = "2018",
    address = "Melbourne, Australia",
    publisher = "Association for Computational Linguistics",
    url = "https://aclanthology.org/P18-1042",
    doi = "10.18653/v1/P18-1042",
    pages = "451--462",
}
r$   )r   s    r   citationzMegaBatchMarginLoss.citation   s     r   )r   r   Tr   )__name__
__module____qualname__r   floatboolintr   r   r   strr   r   r   propertyr`   __classcell__)r   s   @r   r
   r
   
   s?        "%!$)-!Fp Fp"Fp Fp 	Fp
 #'Fp Fp 
Fp Fp Fp Fp Fp FpP6htCK?P6Q 6[a 6fl 6 6 6 6r(4VCT:U _e jp     #    X    r   r
   )typingr   r   r4   torch.nn.functionalr   
functionalrA   r   sentence_transformersr   r   Moduler
   r$   r   r   <module>ro      s    ! ! ! ! ! ! ! !                   ; ; ; ; ; ; ; ;_ _ _ _ _") _ _ _ _ _r   