
    si                     h    d dl mZmZ d dlmZmZ d dlmZ ddlmZm	Z	  G d dej
                  ZdS )	    )DictIterable)Tensornn)SentenceTransformer   )BatchHardTripletLoss$BatchHardTripletLossDistanceFunctionc                        e Zd Zej        dfdededdf fdZdee	e
ef                  dedefd	Zded
edefdZede
fd            Z xZS )BatchAllTripletLoss   modelmarginreturnNc                     t          t          |                                            || _        || _        || _        dS )a  
        BatchAllTripletLoss takes a batch with (sentence, label) pairs and computes the loss for all possible, valid
        triplets, i.e., anchor and positive must have the same label, anchor and negative a different label. The labels
        must be integers, with same label indicating sentences from the same class. Your train dataset
        must contain at least 2 examples per label class.

        Args:
            model: SentenceTransformer model
            distance_metric: Function that returns a distance between
                two embeddings. The class SiameseDistanceMetric contains
                pre-defined metrics that can be used.
            margin: Negative samples should be at least margin further
                apart from the anchor than the positive.

        References:
            * Source: https://github.com/NegatioN/OnlineMiningTripletLoss/blob/master/online_triplet_loss/losses.py
            * Paper: In Defense of the Triplet Loss for Person Re-Identification, https://arxiv.org/abs/1703.07737
            * Blog post: https://omoindrot.github.io/triplet-loss

        Requirements:
            1. Each sentence must be labeled with a class.
            2. Your dataset must contain at least 2 examples per labels class.

        Relations:
            * :class:`BatchHardTripletLoss` uses only the hardest positive and negative samples, rather than all possible, valid triplets.
            * :class:`BatchHardSoftMarginTripletLoss` uses only the hardest positive and negative samples, rather than all possible, valid triplets.
              Also, it does not require setting a margin.
            * :class:`BatchSemiHardTripletLoss` uses only semi-hard triplets, valid triplets, rather than all possible, valid triplets.

        Inputs:
            +------------------+--------+
            | Texts            | Labels |
            +==================+========+
            | single sentences | class  |
            +------------------+--------+

        Example:
            ::

                from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer, losses
                from datasets import Dataset

                model = SentenceTransformer("microsoft/mpnet-base")
                # E.g. 0: sports, 1: economy, 2: politics
                train_dataset = Dataset.from_dict({
                    "sentence": [
                        "He played a great game.",
                        "The stock is up 20%",
                        "They won 2-1.",
                        "The last goal was amazing.",
                        "They all voted against the bill.",
                    ],
                    "label": [0, 1, 0, 0, 2],
                })
                loss = losses.BatchAllTripletLoss(model)

                trainer = SentenceTransformerTrainer(
                    model=model,
                    train_dataset=train_dataset,
                    loss=loss,
                )
                trainer.train()

        N)superr   __init__sentence_embeddertriplet_margindistance_metric)selfr   r   r   	__class__s       c/var/www/icac/venv/lib/python3.11/site-packages/sentence_transformers/losses/BatchAllTripletLoss.pyr   zBatchAllTripletLoss.__init__   sA    L 	!4((11333!&$.    sentence_featureslabelsc                 p    |                      |d                   d         }|                     ||          S )Nr   sentence_embedding)r   batch_all_triplet_loss)r   r   r   reps       r   forwardzBatchAllTripletLoss.forwardV   s7    $$%6q%9::;OP**63777r   
embeddingsc                 |   |                      |          }|                    d          }|                    d          }||z
  | j        z   }t          j        |          }|                                |z  }d||dk     <   ||dk             }|                    d          }	|                                |	dz   z  }|S )a]  Build the triplet loss over a batch of embeddings.
        We generate all the valid triplets and average the loss over the positive ones.
        Args:
            labels: labels of the batch, of size (batch_size,)
            embeddings: tensor of shape (batch_size, embed_dim)
            margin: margin for triplet loss
            squared: Boolean. If true, output is the pairwise squared euclidean distance matrix.
                     If false, output is the pairwise euclidean distance matrix.
        Returns:
            Label_Sentence_Triplet: scalar tensor containing the triplet loss
           r   r   gؗҜ<)r   	unsqueezer   r	   get_triplet_maskfloatsizesum)
r   r   r"   pairwise_distanchor_positive_distanchor_negative_disttriplet_lossmaskvalid_tripletsnum_positive_tripletss
             r   r   z*BatchAllTripletLoss.batch_all_triplet_lossZ   s     ,,Z88,66q99,66q99 ,.BBTEXX $4V<<zz||l2 *+\A%& &lU&:; . 3 3A 6 6
 $''))-BU-JKr   c                     dS )Na  
@misc{hermans2017defense,
    title={In Defense of the Triplet Loss for Person Re-Identification}, 
    author={Alexander Hermans and Lucas Beyer and Bastian Leibe},
    year={2017},
    eprint={1703.07737},
    archivePrefix={arXiv},
    primaryClass={cs.CV}
}
 )r   s    r   citationzBatchAllTripletLoss.citation   s    	 	r   )__name__
__module____qualname__r
   eucledian_distancer   r'   r   r   r   strr   r!   r   propertyr3   __classcell__)r   s   @r   r   r   
   s         =O	I/ I/"I/ 	I/
 
I/ I/ I/ I/ I/ I/V8$sF{2C)D 8f 8Y_ 8 8 8 8)V ) )F ) ) ) )V 
# 
 
 
 X
 
 
 
 
r   r   N)typingr   r   torchr   r   )sentence_transformers.SentenceTransformerr   r	   r
   Moduler   r2   r   r   <module>r?      s    ! ! ! ! ! ! ! !         I I I I I I \ \ \ \ \ \ \ \F F F F F") F F F F Fr   