
    si                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZmZmZmZmZmZmZmZ d dlZd dlZd dlZd dlmZmZ d dlmZmZ d dlmZ d dlmZ  ej         e!          Z"dee#ej$        ef         d	efd
Z%ded	efdZ&dee#ej$        ef         d	efdZ'deded	efdZ(dee#ej$        ef         dee#ej$        ef         d	efdZ)deded	efdZ*dee#ej$        ef         dee#ej$        ef         d	efdZ+deded	efdZ,dee#ej$        ef         dee#ej$        ef         d	efdZ-dee#ej$        ef         dee#ej$        ef         fdZ.dee#ej$        ef         dee#ej$        ef         d	efdZ/dee#ej$        ef         dee#ej$        ef         fdZ0deded	efdZ1ded	efdZ2edej$        dee3         d	ej$        fd            Z4edej        dee3         d	ej        fd            Z4deej$        ej        f         dee3         d	eej$        ej        f         fdZ4d d!d"d#d$d%e)fd&ee5         d'e6d(e3d)e3d*e3d+e3d,e3d-eeegef         d	eeee7e3f                           fd.Z8d"d#d$d%e)fded)e3d*e3d+e3d,e3d-eeegef         d	eeee7e3f                           fd/Z9d	eeee5ee3e7f         f                           fd0Z:d%d$d1e)fd2ed3ed)e3d*e3d,e3d-eeegef         d	eeee5ee3e7f         f                           fd4Z;d5e5d6e5d	dfd7Z<d8ee5e
f         d9ed	ee5e
f         fd:Z=d	e5fd;Z>d<e5d	efd=Z?	 	 	 	 dVdeej        ej$        f         d@e7dAe3d(e3d'e6d	eee3                  fdBZ@ G dC dDe          ZAeejB        fdE            ZC	 	 	 	 dWdFe5dGeee6e5f                  dHee5         dIee5         dJe6d	e6fdKZD	 	 dXdFe5dLe5dGeee6e5f                  dHee5         dIee5         dJe6d	ee5         fdMZE	 	 dXdFe5dNe5dGeee6e5f                  dHee5         dIee5         dJe6d	ee5         fdOZFdP ZGd	edQ         fdRZHd	e6fdSZId	e6fdTZJd	e6fdUZKdS )Y    N)contextmanager)	AnyCallableDictListLiteralOptionalTypeUnionoverload)hf_hub_downloadsnapshot_download)Tensordevice)tqdm)is_torch_npu_availableareturnc                 X    t          | t                    st          j        |           } | S )z
    Converts the input `a` to a PyTorch tensor if it is not already a tensor.

    Args:
        a (Union[list, np.ndarray, Tensor]): The input array or tensor.

    Returns:
        Tensor: The converted tensor.
    )
isinstancer   torchtensorr   s    M/var/www/icac/venv/lib/python3.11/site-packages/sentence_transformers/util.py_convert_to_tensorr      s(     a   LOOH    c                 `    |                                  dk    r|                     d          } | S )z
    If the tensor `a` is 1-dimensional, it is unsqueezed to add a batch dimension.

    Args:
        a (Tensor): The input tensor.

    Returns:
        Tensor: The tensor with a batch dimension.
       r   )dim	unsqueezer   s    r   _convert_to_batchr!   %   s)     	uuww!||KKNNHr   c                 B    t          |           } t          |           } | S )z
    Converts the input data to a tensor with a batch dimension.

    Args:
        a (Union[list, np.ndarray, Tensor]): The input data to be converted.

    Returns:
        Tensor: The converted tensor with a batch dimension.
    )r   r!   r   s    r   _convert_to_batch_tensorr#   4   s#     	1A!AHr   bc                 "    t          | |          S )  
    Computes the cosine similarity between two tensors.

    Args:
        a (Union[list, np.ndarray, Tensor]): The first tensor.
        b (Union[list, np.ndarray, Tensor]): The second tensor.

    Returns:
        Tensor: Matrix with res[i][j] = cos_sim(a[i], b[j])
    )cos_simr   r$   s     r   pytorch_cos_simr)   C   s     1a==r   c                     t          |           } t          |          }t          |           }t          |          }t          j        ||                    dd                    S )r&   r   r   )r#   normalize_embeddingsr   mm	transpose)r   r$   a_normb_norms       r   r'   r'   Q   sZ     	!##A ##A!!$$F!!$$F8FF,,Q22333r   c                     t          |           } t          |          }t          t          |           t          |                    S )a  
    Computes the pairwise cosine similarity cos_sim(a[i], b[i]).

    Args:
        a (Union[list, np.ndarray, Tensor]): The first tensor.
        b (Union[list, np.ndarray, Tensor]): The second tensor.

    Returns:
        Tensor: Vector with res[i] = cos_sim(a[i], b[i])
    )r   pairwise_dot_scorer+   r(   s     r   pairwise_cos_simr2   d   s@     	1A1A21557KA7N7NOOOr   c                     t          |           } t          |          }t          j        | |                    dd                    S )a  
    Computes the dot-product dot_prod(a[i], b[j]) for all i and j.

    Args:
        a (Union[list, np.ndarray, Tensor]): The first tensor.
        b (Union[list, np.ndarray, Tensor]): The second tensor.

    Returns:
        Tensor: Matrix with res[i][j] = dot_prod(a[i], b[j])
    r   r   )r#   r   r,   r-   r(   s     r   	dot_scorer4   u   s>     	!##A ##A8Aq{{1a(()))r   c                 p    t          |           } t          |          }| |z                      d          S )a  
    Computes the pairwise dot-product dot_prod(a[i], b[i]).

    Args:
        a (Union[list, np.ndarray, Tensor]): The first tensor.
        b (Union[list, np.ndarray, Tensor]): The second tensor.

    Returns:
        Tensor: Vector with res[i] = dot_prod(a[i], b[i])
    r   )r   sumr(   s     r   r1   r1      s6     	1A1AE;;2;r   c                 n    t          |           } t          |          }t          j        | |d           S )a:  
    Computes the manhattan similarity (i.e., negative distance) between two tensors.

    Args:
        a (Union[list, np.ndarray, Tensor]): The first tensor.
        b (Union[list, np.ndarray, Tensor]): The second tensor.

    Returns:
        Tensor: Matrix with res[i][j] = -manhattan_distance(a[i], b[j])
    g      ?pr#   r   cdistr(   s     r   manhattan_simr>      8     	!##A ##AK1$$$$$r   c                     t          |           } t          |          }t          j        t          j        | |z
            d           S )a<  
    Computes the manhattan similarity (i.e., negative distance) between pairs of tensors.

    Args:
        a (Union[list, np.ndarray, Tensor]): The first tensor.
        b (Union[list, np.ndarray, Tensor]): The second tensor.

    Returns:
        Tensor: Vector with res[i] = -manhattan_distance(a[i], b[i])
    r6   r7   )r   r   r8   absr(   s     r   pairwise_manhattan_simrB      sD     	1A1AIeiA&&B/////r   c                 n    t          |           } t          |          }t          j        | |d           S )a:  
    Computes the euclidean similarity (i.e., negative distance) between two tensors.

    Args:
        a (Union[list, np.ndarray, Tensor]): The first tensor.
        b (Union[list, np.ndarray, Tensor]): The second tensor.

    Returns:
        Tensor: Matrix with res[i][j] = -euclidean_distance(a[i], b[j])
    g       @r:   r<   r(   s     r   euclidean_simrD      r?   r   c                     t          |           } t          |          }t          j        t          j        | |z
  dz  d                     S )a:  
    Computes the euclidean distance (i.e., negative distance) between pairs of tensors.

    Args:
        a (Union[list, np.ndarray, Tensor]): The first tensor.
        b (Union[list, np.ndarray, Tensor]): The second tensor.

    Returns:
        Tensor: Vector with res[i] = -euclidean_distance(a[i], b[i])
       r6   r7   )r   r   sqrtr8   r(   s     r   pairwise_euclidean_simrH      sH     	1A1AJuy!a%A26667777r   xyc                 J   t          |           } t          |          }t          j        | dd          \  }}t          j        |dd          \  }}t          j        |dz  |dz  z   dd          }||z  ||z  z   |z  }||z  ||z  z
  |z  }t          j        |dz  |dz  z   dd          dz  }	t          j        |dz  |dz  z   dd          dz  }
||	|
z  z  }||	|
z  z  }t          j        t          j        ||fd          d          }t          j        |          S )aP  
    Computes the absolute normalized angle distance. See :class:`~sentence_transformers.losses.AnglELoss`
    or https://arxiv.org/abs/2309.12871v1 for more information.

    Args:
        x (Tensor): The first tensor.
        y (Tensor): The second tensor.

    Returns:
        Tensor: Vector with res[i] = angle_sim(a[i], b[i])
    rF   r   r7   T)r   keepdimg      ?)r   r   chunkr8   concatrA   )rI   rJ   r   r$   cdzreimdzdw
norm_angles               r   pairwise_angle_simrW      sA    	1A1A ;q!###DAq;q!###DAq	!Q$A+1d333A
a%!a%-1	B
a%!a%-1	B	1a4!Q$;At	4	4	4	;B	1a4!Q$;At	4	4	4	;B"r'MB"r'MB5<Ra888a@@@J9Z   r   
embeddingsc                 P    t           j        j                            | dd          S )z
    Normalizes the embeddings matrix, so that each sentence embedding has unit length.

    Args:
        embeddings (Tensor): The input embeddings matrix.

    Returns:
        Tensor: The normalized embeddings matrix.
    rF   r   )r;   r   )r   nn
functional	normalize)rX   s    r   r+   r+      s$     8((qa(@@@r   truncate_dimc                     d S N rX   r]   s     r   truncate_embeddingsrb   
  s    \_\_r   c                     d S r_   r`   ra   s     r   rb   rb     s    `c`cr   c                     | dd|f         S )a  
    Truncates the embeddings matrix.

    Args:
        embeddings (Union[np.ndarray, torch.Tensor]): Embeddings to truncate.
        truncate_dim (Optional[int]): The dimension to truncate sentence embeddings to. `None` does no truncation.

    Example:
        >>> from sentence_transformers import SentenceTransformer
        >>> from sentence_transformers.util import truncate_embeddings
        >>> model = SentenceTransformer("tomaarsen/mpnet-base-nli-matryoshka")
        >>> embeddings = model.encode(["It's so nice outside!", "Today is a beautiful day.", "He drove to work earlier"])
        >>> embeddings.shape
        (3, 768)
        >>> model.similarity(embeddings, embeddings)
        tensor([[1.0000, 0.8100, 0.1426],
                [0.8100, 1.0000, 0.2121],
                [0.1426, 0.2121, 1.0000]])
        >>> truncated_embeddings = truncate_embeddings(embeddings, 128)
        >>> truncated_embeddings.shape
        >>> model.similarity(truncated_embeddings, truncated_embeddings)
        tensor([[1.0000, 0.8092, 0.1987],
                [0.8092, 1.0000, 0.2716],
                [0.1987, 0.2716, 1.0000]])

    Returns:
        Union[np.ndarray, torch.Tensor]: Truncated embeddings.
    .Nr`   ra   s     r   rb   rb     s    > c=L=())r   F    i  i i  d   	sentencesshow_progress_bar
batch_sizequery_chunk_sizecorpus_chunk_size	max_pairstop_kscore_functionc	                 ^    |                      |||d          }	t          |	|||||          S )a  
    Given a list of sentences / texts, this function performs paraphrase mining. It compares all sentences against all
    other sentences and returns a list with the pairs that have the highest cosine similarity score.

    Args:
        model (SentenceTransformer): SentenceTransformer model for embedding computation
        sentences (List[str]): A list of strings (texts or sentences)
        show_progress_bar (bool, optional): Plotting of a progress bar. Defaults to False.
        batch_size (int, optional): Number of texts that are encoded simultaneously by the model. Defaults to 32.
        query_chunk_size (int, optional): Search for most similar pairs for #query_chunk_size at the same time. Decrease, to lower memory footprint (increases run-time). Defaults to 5000.
        corpus_chunk_size (int, optional): Compare a sentence simultaneously against #corpus_chunk_size other sentences. Decrease, to lower memory footprint (increases run-time). Defaults to 100000.
        max_pairs (int, optional): Maximal number of text pairs returned. Defaults to 500000.
        top_k (int, optional): For each sentence, we retrieve up to top_k other sentences. Defaults to 100.
        score_function (Callable[[Tensor, Tensor], Tensor], optional): Function for computing scores. By default, cosine similarity. Defaults to cos_sim.

    Returns:
        List[List[Union[float, int]]]: Returns a list of triplets with the format [score, id1, id2]
    T)rh   ri   convert_to_tensor)rj   rk   rl   rm   rn   )encodeparaphrase_mining_embeddings)
modelrg   rh   ri   rj   rk   rl   rm   rn   rX   s
             r   paraphrase_miningrt   4  sR    > %6:ae   J ()+%   r   c                    |dz  }t          j                    }d}d}t          dt          |           |          D ]x}	t          dt          |           |          D ]U}
 || |
|
|z            | |	|	|z                      }t	          j        |t          |t          |d                             ddd          \  }}|                                                                }|                                                                }t          t          |                    D ]}t          ||                   D ]r\  }}|
|z   }|	|z   }||k    r]||         |         |k    rK|
                    ||         |         ||f           |dz  }||k    r|                                }|d         }sWzt                      }g }|                                s{|                                \  }}}t          ||g          \  }}||k    r5||f|vr/|                    ||f           |                    |||g           |                                {t          |d d          }|S )	a  
    Given a list of sentences / texts, this function performs paraphrase mining. It compares all sentences against all
    other sentences and returns a list with the pairs that have the highest cosine similarity score.

    Args:
        embeddings (Tensor): A tensor with the embeddings
        query_chunk_size (int): Search for most similar pairs for #query_chunk_size at the same time. Decrease, to lower memory footprint (increases run-time).
        corpus_chunk_size (int): Compare a sentence simultaneously against #corpus_chunk_size other sentences. Decrease, to lower memory footprint (increases run-time).
        max_pairs (int): Maximal number of text pairs returned.
        top_k (int): For each sentence, we retrieve up to top_k other sentences
        score_function (Callable[[Tensor, Tensor], Tensor]): Function for computing scores. By default, cosine similarity.

    Returns:
        List[List[Union[float, int]]]: Returns a list of triplets with the format [score, id1, id2]
    r   r6   r   TFr   largestsortedc                     | d         S )Nr   r`   rI   s    r   <lambda>z.paraphrase_mining_embeddings.<locals>.<lambda>  s
    !A$ r   keyreverse)queuePriorityQueuerangelenr   topkmincputolist	enumerateputgetsetemptyrx   addappend)rX   rj   rk   rl   rm   rn   pairs	min_score	num_addedcorpus_start_idxquery_start_idxscoresscores_top_k_valuesscores_top_k_idx	query_itr	top_k_idx
corpus_itrijentryadded_pairs
pairs_listscoresorted_isorted_js                            r   rr   rr   a  s   0 
QJE !!EII!!S__6GHH 1 1$QJ9IJJ 	1 	1O#^?_?O-OOP+.>AR.RRS F
 5:JE3vay>>224PU5 5 51!1 #6"9"9";";"B"B"D"D/3355<<>>"3v;;// 1 1	-67G	7R-S-S 
1 
1)Iz')3A(:5AAvv"5i"@"Ki"W"W		#6y#A)#LaQR"STTT!Q	$	11$)IIKKE(-aI
11	14 %%KJkkmm ;iikkq!#QF^^(xXx$8$K$KOOXx0111uh9::: kkmm ; 
EEEJr   c                      t          | i |S )z8This function is deprecated. Use semantic_search instead)semantic_search)argskwargss     r   information_retrievalr     s    D+F+++r   
   query_embeddingscorpus_embeddingsc                    t          | t          j        t          j        f          rt	          j        |           } n)t          | t                    rt	          j        |           } t          | j	                  dk    r| 
                    d          } t          |t          j        t          j        f          rt	          j        |          }n)t          |t                    rt	          j        |          }|j        | j        k    r|                     |j                  } d t          t          |                     D             }t          dt          |           |          D ]p}t          dt          |          |          D ]M} || |||z            ||||z                      }	t	          j        |	t          |t          |	d                             ddd          \  }
}|
                                                                }
|                                                                }t          t          |	                    D ]}t%          ||         |
|                   D ]c\  }}||z   }||z   }t          ||                   |k     rt'          j        ||         ||f           Ft'          j        ||         ||f           dOrt          t          |                    D ]b}t          t          ||                             D ]!}||         |         \  }}||d||         |<   "t-          ||         d d	          ||<   c|S )
a  
    This function performs a cosine similarity search between a list of query embeddings  and a list of corpus embeddings.
    It can be used for Information Retrieval / Semantic Search for corpora up to about 1 Million entries.

    Args:
        query_embeddings (Tensor): A 2 dimensional tensor with the query embeddings.
        corpus_embeddings (Tensor): A 2 dimensional tensor with the corpus embeddings.
        query_chunk_size (int, optional): Process 100 queries simultaneously. Increasing that value increases the speed, but requires more memory. Defaults to 100.
        corpus_chunk_size (int, optional): Scans the corpus 100k entries at a time. Increasing that value increases the speed, but requires more memory. Defaults to 500000.
        top_k (int, optional): Retrieve top k matching entries. Defaults to 10.
        score_function (Callable[[Tensor, Tensor], Tensor], optional): Function for computing scores. By default, cosine similarity.

    Returns:
        List[List[Dict[str, Union[int, float]]]]: A list with one entry for each query. Each entry is a list of dictionaries with the keys 'corpus_id' and 'score', sorted by decreasing cosine similarity scores.
    r   r   c                     g | ]}g S r`   r`   ).0_s     r   
<listcomp>z#semantic_search.<locals>.<listcomp>  s    DDD!2DDDr   TFrv   )	corpus_idr   c                     | d         S )Nr   r`   rz   s    r   r{   z!semantic_search.<locals>.<lambda>  s    \]^e\f r   r|   )r   npndarraygenericr   
from_numpyliststackr   shaper    r   tor   r   r   r   r   zipheapqheappushheappushpoprx   )r   r   rj   rk   rm   rn   queries_result_listr   r   
cos_scorescos_scores_top_k_valuescos_scores_top_k_idxr   sub_corpus_idr   r   query_iddoc_itrs                     r   r   r     s   0 "RZ$<== 9 +,<==	$d	+	+ 9 ;'788
!""a''+55a88#bj"*%=>> ;!,->??	%t	,	, ;!K(9:: #3#:::+../@/GHHDDuS1A-B-B'C'CDDD C(8$9$9;KLL ] ] %a->)?)?AR S S 	] 	]' ?EU3U!UV!"25EHY5Y"YZ J =BJCs:a='9'9::4X]= = =9#%9 '>&A&A&C&C&J&J&L&L##7#;#;#=#=#D#D#F#F "3z??33 	] 	]	,/0DY0OQhirQs,t,t ] ](M5 0= @I.:H.x899EAA/9E9;M    )*=h*G%QZI[\\\\]	]	]4 #12233 v vS!4X!>??@@ 	^ 	^G28<WEE9CLW\5]5])'22(./B8/LRfRfpt(u(u(uH%%r   urlpathc                 X   t           j                            |          dk    r3t          j        t           j                            |          d           t	          j        | d          }|j        dk    rJt          d                    | |j                  t          j
                   |                                 dS |d	z   }t          |d
          5 }|j                            d          }|t          |          nd}t          d|d          }|                    d          D ];}|r7|                    t%          |                     |                    |           <	 ddd           n# 1 swxY w Y   t          j        ||           |                                 dS )a  
    Downloads a URL to a given path on disk.

    Args:
        url (str): The URL to download.
        path (str): The path to save the downloaded file.

    Raises:
        requests.HTTPError: If the HTTP request returns a non-200 status code.

    Returns:
        None
     T)exist_ok)stream   z1Exception when trying to download {}. Response {})fileN_partwbzContent-LengthB)unittotal
unit_scale   )
chunk_size)osr   dirnamemakedirsrequestsr   status_codeprintformatsysstderrraise_for_statusopenheadersintr   iter_contentupdater   writerenameclose)	r   r   reqdownload_filepathfile_binarycontent_lengthr   progressrM   s	            r   http_getr     s    
wt""
BGOOD))D9999
,s4
(
(
(C
#AHHco^^eheoppppw		&	& )+)9::'5'AN###tS$???%%%66 	) 	)E )E

+++!!%(((	)	) ) ) ) ) ) ) ) ) ) ) ) ) ) ) I&&&NNs   BE66E:=E:batchtarget_devicec                     | D ];}t          | |         t                    r| |                             |          | |<   <| S )au  
    Send a PyTorch batch (i.e., a dictionary of string keys to Tensors) to a device (e.g. "cpu", "cuda", "mps").

    Args:
        batch (Dict[str, Tensor]): The batch to send to the device.
        target_device (torch.device): The target device (e.g. "cpu", "cuda", "mps").

    Returns:
        Dict[str, Tensor]: The batch with tensors sent to the target device.
    )r   r   r   )r   r   r}   s      r   batch_to_devicer   $  sH      6 6eCj&)) 	6s}55E#JLr   c                     | j         j        }||t          j         j        k    r| j         j        S |dz   | j         j        z   S )a  
    Gives a full name (package_name.class_name) for a class / object in Python. Will
    be used to load the correct classes from JSON files

    Args:
        o: The object for which to get the full name.

    Returns:
        str: The full name of the object.

    Example:
        >>> from sentence_transformers.losses import MultipleNegativesRankingLoss
        >>> from sentence_transformers import SentenceTransformer
        >>> from sentence_transformers.util import fullname
        >>> model = SentenceTransformer('all-MiniLM-L6-v2')
        >>> loss = MultipleNegativesRankingLoss(model)
        >>> fullname(loss)
        'sentence_transformers.losses.MultipleNegativesRankingLoss.MultipleNegativesRankingLoss'
    N.)	__class__
__module__str__name__)omodules     r   fullnamer   5  sA    * [#F~3=#;;;{##|ak222r   dotted_pathc                 ^   	 |                      dd          \  }}n"# t          $ r d| z  }t          |          w xY w	 t          j        |           }n$# t
          $ r t          j        |          }Y nw xY w	 t          ||          S # t          $ r d|d|d}t          |          w xY w)a:  
    Import a dotted module path and return the attribute/class designated by the
    last name in the path. Raise ImportError if the import failed.

    Args:
        dotted_path (str): The dotted module path.

    Returns:
        Any: The attribute/class designated by the last name in the path.

    Raises:
        ImportError: If the import failed.

    Example:
        >>> import_from_string('sentence_transformers.losses.MultipleNegativesRankingLoss')
        <class 'sentence_transformers.losses.MultipleNegativesRankingLoss.MultipleNegativesRankingLoss'>
    r   r   z"%s doesn't look like a module pathzModule "z" does not define a "z" attribute/class)rsplit
ValueErrorImportError	importlibimport_module	ExceptiongetattrAttributeError)r   module_path
class_namemsgr   s        r   import_from_stringr  Q  s    $"-"4"4S!"<"<ZZ   2[@#6(55 6 6 6(556vz***    FQkkS]S]S]^#s%    ;A A54A59B	 	#B,      ?r   	thresholdmin_community_sizec                    t          | t          j                  st          j        |           } t          j        || j                  }t          |           } g }t          |t          |                     }t          t          d|z  d          t          |                     }t          t          dt          |           |          d|           D ]}| |||z            | j        z  }| j        j        dv r||k    }	|	                    d          }
|
|k    }|                                s\|
|         }
||         }|
                                }|                    |d	
          \  }}t!          |
|          D ]4\  }}|                    |d|                                                    5|                    |d	
          \  }}t          t          |                    D ]}||         d         |k    r||                             |d	
          \  }}|d         |k    rr|t          |           k     r_t          d|z  t          |                     }||                             |d	
          \  }}|d         |k    r|t          |           k     _|                    |||k                                                        t'          |d d	          }g }t)                      }t+          |          D ]b\  }}g }|D ]}||vr|                    |           t          |          |k    r*|                    |           |                    |           ct'          |d d	          }|S )a  
    Function for Fast Community Detection.

    Finds in the embeddings all communities, i.e. embeddings that are close (closer than threshold).
    Returns only communities that are larger than min_community_size. The communities are returned
    in decreasing order. The first element in each list is the central point in the community.

    Args:
        embeddings (torch.Tensor or numpy.ndarray): The input embeddings.
        threshold (float): The threshold for determining if two embeddings are close. Defaults to 0.75.
        min_community_size (int): The minimum size of a community to be considered. Defaults to 10.
        batch_size (int): The batch size for computing cosine similarity scores. Defaults to 1024.
        show_progress_bar (bool): Whether to show a progress bar during computation. Defaults to False.

    Returns:
        List[List[int]]: A list of communities, where each community is represented as a list of indices.
    )r   rF   2   r   zFinding clusters)descdisable)cudanpur   T)krw   Nr6   c                      t          |           S r_   r   rz   s    r   r{   z%community_detection.<locals>.<lambda>  s    A r   r|   c                      t          |           S r_   r  rz   s    r   r{   z%community_detection.<locals>.<lambda>  s    #a&& r   )r   r   r   r   r   r+   r   r   maxr   r   Ttyper8   anyr   r   r   r   rx   r   r   r   )rX   r  r  ri   rh   extracted_communitiessort_max_size	start_idxr   threshold_maskrow_wise_countlarge_enough_maskr  r   top_k_indicescountindicestop_k_valuesr   top_val_largetop_idx_largeunique_communitiesextracted_ids
cluster_id	communitynon_overlapped_communityidxs                              r   community_detectionr)  u  s   0 j%,// .\*--
Yz/@AAAI%j11J /ZAAA 22B77ZIIMaZ*--4FTePe   *e *e	  	I
,B BCjlR
 !_44'94N+//22N !/2D D$((** +,=>N#$56J ""$$A)DAAA} #&nm"D"D G Gw%,,WVeV_-C-C-E-EFFFFG )oo0BDoQQOL! 3|,,-- 
e 
e?2&)333=a=3E3E_c3E3d3d0M= (+i77MCPZOO<[<[(+A,=s:(O(O7A!}7I7IMcg7I7h7h4} (+i77MCPZOO<[<[ *00}PY?Y1Z1a1a1c1cddd
e ##8>N>NX\]]] EEM!*+@!A!A ; ;
I#%  	5 	5C-''(//444'((,>>>%%&>???  !9::: 28H8HRVWWWr   c                   6     e Zd ZdZ fdZdeddf fdZ xZS )disabled_tqdmz
    Class to override `disable` argument in case progress bars are globally disabled.

    Taken from https://github.com/tqdm/tqdm/issues/619#issuecomment-619639324.
    c                 D    d|d<    t                      j        |i | d S )NTr  )super__init__)selfr   r   r   s      r   r.  zdisabled_tqdm.__init__  s/     y$)&)))))r   attrr   Nc                 |    	 t                                          |           dS # t          $ r |dk    r Y dS w xY w)zBFix for https://github.com/huggingface/huggingface_hub/issues/1603_lockN)r-  __delattr__r  )r/  r0  r   s     r   r3  zdisabled_tqdm.__delattr__  sV    	GG%%%%% 	 	 	w 	s   !& ;;)r   r   __qualname____doc__r.  r   r3  __classcell__)r   s   @r   r+  r+    sn         * * * * *           r   r+  c              #      K   t           j        j        j        }t          j        |            	 dV  t          j        |           dS # t          j        |           w xY w)z
    A context manager that will prevent any logging messages
    triggered during the body from being processed.

    Args:
        highest_level: the maximum logging level allowed.
    N)loggingrootmanagerr  )highest_levelprevious_levels     r   disable_loggingr=    s_       \)1NOM"""('''''''''s   A Amodel_name_or_pathtokencache_folderrevisionlocal_files_onlyc           
      F    t          t          | d||||                    S )a}  
    Checks if the given model name or path corresponds to a SentenceTransformer model.

    Args:
        model_name_or_path (str): The name or path of the model.
        token (Optional[Union[bool, str]]): The token to be used for authentication. Defaults to None.
        cache_folder (Optional[str]): The folder to cache the model files. Defaults to None.
        revision (Optional[str]): The revision of the model. Defaults to None.
        local_files_only (bool): Whether to only use local files for the model. Defaults to False.

    Returns:
        bool: True if the model is a SentenceTransformer model, False otherwise.
    zmodules.json)rA  rB  )boolload_file_path)r>  r?  r@  rA  rB  s        r   is_sentence_transformer_modelrF    s:    ( -	
 	
 	
	 	 	r   filenamec           	          t           j                            | |          }t           j                            |          r|S 	 t	          | ||d|||          S # t
          $ r Y dS w xY w)a  
    Loads a file from a local or remote location.

    Args:
        model_name_or_path (str): The model name or path.
        filename (str): The name of the file to load.
        token (Optional[Union[bool, str]]): The token to access the remote file (if applicable).
        cache_folder (Optional[str]): The folder to cache the downloaded file (if applicable).
        revision (Optional[str], optional): The revision of the file (if applicable). Defaults to None.
        local_files_only (bool, optional): Whether to only consider local files. Defaults to False.

    Returns:
        Optional[str]: The path to the loaded file, or None if the file could not be found or loaded.
    sentence-transformers)rG  rA  library_namer?  	cache_dirrB  N)r   r   joinexistsr   r   )r>  rG  r?  r@  rA  rB  	file_paths          r   rE  rE  (  s    . /::I	w~~i   0"-
 
 
 	
    tts   A 
A'&A'	directoryc           	      H   t           j                            | |          }t           j                            |          r|S | || dd|||t          d}	 t          di |}n!# t          $ r d|d<   t          di |}Y nw xY wt           j                            ||          S )af  
    Loads the directory path for a given model name or path.

    Args:
        model_name_or_path (str): The name or path of the model.
        directory (str): The directory to load.
        token (Optional[Union[bool, str]]): The token for authentication.
        cache_folder (Optional[str]): The folder to cache the downloaded files.
        revision (Optional[str], optional): The revision of the model. Defaults to None.
        local_files_only (bool, optional): Whether to only use local files. Defaults to False.

    Returns:
        Optional[str]: The directory path if it exists, otherwise None.
    z/**rI  )repo_idrA  allow_patternsrJ  r?  rK  rB  
tqdm_classTrB  r`   )r   r   rL  rM  r+  r   r   )	r>  rO  r?  r@  rA  rB  dir_pathdownload_kwargs	repo_paths	            r   load_dir_pathrW  R  s    . w||.	::H	w~~h  &&+++/!,#	 	O9%8888		 9 9 9.2*+%8888			9 7<<	9---s   A# #B Bc                 F     t          j                    fd            }|S )Nc                     |                     dd           }|r#d|vrt                              d           ||d<   t          |          dk    rg |d d         d |dd          R } | g|R i |S )N	repo_namerQ  zfProviding a `repo_name` keyword argument to `save_to_hub` is deprecated, please use `repo_id` instead.rF   )poploggerwarningr   )r/  r   r   rZ  funcs       r   wrapperz+save_to_hub_args_decorator.<locals>.wrapper  s     JJ{D11	 	*&00NNx   !*F9 t99>>/T"1"X/t/d122h//DtD*4***6***r   )	functoolswraps)r^  r_  s   ` r   save_to_hub_args_decoratorrb    s8    _T+ + + + + Nr   )mpsr  r  hpur   c                  ,   t           j                                        rdS t           j        j                                        rdS t                      rdS t          j                            d          ddl	m c m
}  |                                 rdS dS )	aH  
    Returns the name of the device where this module is running on.

    It's a simple implementation that doesn't cover cases when more powerful GPUs are available and
    not a primary device ('cuda:0') or MPS device is available, but not configured properly.

    Returns:
        str: Device name, like 'cuda' or 'cpu'
    r  rc  r  habana_frameworksNr   rd  r   )r   r  is_availablebackendsrc  r   r   util	find_spechabana_frameworks.torch.hpurd  )hthpus    r   get_device_namerm    s     z   
v			(	(	*	* u		!	! u		!	!"5	6	6	B333333333 	55r   c                  D    t           j                            d          duS )z>
    Returns True if the accelerate library is available.
    
accelerateNr   ri  rj  r`   r   r   is_accelerate_availablerq    s     >##L11==r   c                  D    t           j                            d          duS )z<
    Returns True if the datasets library is available.
    datasetsNrp  r`   r   r   is_datasets_availablert    s     >##J//t;;r   c                  :    t                      ot                      S )zd
    Returns True if we have the required dependencies for training Sentence Transformer models
    )rq  rt  r`   r   r   is_training_availablerv    s     #$$@)>)@)@@r   )r  r   r   F)NNNF)NF)Lr`  r   r   r8  r   r   r   
contextlibr   typingr   r   r   r   r   r	   r
   r   r   numpyr   r   r   huggingface_hubr   r   r   r   tqdm.autonotebookr   transformersr   	getLoggerr   r\  r   r   r   r!   r#   r)   r'   r2   r4   r1   r>   rB   rD   rH   rW   r+   r   rb   r   rD  floatrt   rr   r   r   r   r   r   r  r)  r+  CRITICALr=  rF  rE  rW  rb  rm  rq  rt  rv  r`   r   r   <module>r     sY
             				  



 % % % % % % V V V V V V V V V V V V V V V V V V V V V V       > > > > > > > >                 " " " " " " / / / / / /		8	$	$%bj& 89 f     F    dBJ&> ? F    v & V    4uT2:v-. 45rz69Q3R 4W] 4 4 4 4&P P6 Pf P P P P"*tRZ/0 *U4V;S5T *Y_ * * * *"& V     "%U4V34 %tRZQW?W9X %]c % % % %"0eD"*f$<= 0%bjZ`H`Ba 0 0 0 0"%U4V34 %tRZQW?W9X %]c % % % %"8eD"*f$<= 8%bjZ`H`Ba 8 8 8 8"!& !V ! ! ! ! !D
AV 
A 
A 
A 
A 
A 
 _BJ _hsm _PRPZ _ _ _ 
 _ 
 cEL c cRWR^ c c c 
 c*bj%,./*?G}*
2:u|#$* * * *J $ #9@* *Cy* * 	*
 * * * * ff-v56* 
$uUCZ 
!"* * * *^ !#9@F FFF F 	F
 F ff-v56F 
$uUCZ 
!"F F F FR,d4S%U
BS=S8T3U.V , , , ,  #9@M MMM M 	M
 M ff-v56M 
$tCsEz**+
,-M M M M`"# "S "T " " " "J4S> & T#s(^    "33 3 3 3 38!C !D ! ! ! !L  #c celBJ./cc c 	c
 c 
$s)_c c c cV    D   ( ")"2 ( ( ( (* )-"&"" E$)$% 3- sm	
  
   J #"' ''' E$)$%' 3-	'
 sm' ' c]' ' ' '^ #",. ,.,.,. E$)$%,. 3-	,.
 sm,. ,. c],. ,. ,. ,.^  (!CD    0> > > > ><t < < < <At A A A A A Ar   