
    uiE                        d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	m
Z
mZmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlZd dlZd dlZd dlmZmZmZmZmZm Z m!Z!m"Z" d dl#Z$d dl%mZ& d dl'Z'd dl(Z(d dl)Z)d dl*Z*d dl+Z+	 d dl,m-Z- n# e.$ r d	Z-Y nw xY werd d
l/m0Z0 d dl1m2Z2  ej3        e4          Z5de6de6de7fdZ8 G d dee                   Z9 G d dee                   Z: G d dee                   Z; G d dee                   Z< G d dee                   Z= G d dee                   Z> G d dee                   Z? G d dee                   Z@de ee                  fd ZA G d! d"ee                   ZB G d# d$ee                   ZC G d% d&ee                   ZD G d' d(ee!eef                            ZE G d) d*ee!eef                            ZF G d+ d,ee                   ZG G d- d.ee                   ZHd/efd0ZI G d1 d2ee                   ZJd3  e(jK        e*jL        e4         e(jM                  D             ZNdee6         fd4ZOdS )5    N)cached_property)stop_after_attemptwait_randomretryretry_if_exception)	Document	Documents	EmbeddingImageImagesEmbeddingFunction
Embeddingsis_imageis_document)BytesIO)Path)TYPE_CHECKINGAnyDictListMappingOptionalUnioncast)is_thin_clientF)InferenceSession)	Tokenizerfnameexpected_sha256returnc                    t          j                    }t          | d          5 t          fdd          D ]}|                    |           	 d d d            n# 1 swxY w Y   |                                |k    S )Nrbc                  .                          d          S )Ni   )read)fs   U/var/www/icac/venv/lib/python3.11/site-packages/chromadb/utils/embedding_functions.py<lambda>z _verify_sha256.<locals>.<lambda>1   s    qvvd||     r(   )hashlibsha256openiterupdate	hexdigest)r   r   sha256_hash
byte_blockr%   s       @r&   _verify_sha256r1   -   s    .""K	eT		 +a3333S99 	+ 	+Jz****	++ + + + + + + + + + + + + + +
   ""o55s   ,AA"%A"c            	       ^    e Zd ZU i Zeeef         ed<   	 	 	 ddedededefd	Z	d
e
defdZdS )$SentenceTransformerEmbeddingFunctionmodelsall-MiniLM-L6-v2cpuF
model_namedevicenormalize_embeddingskwargsc                     || j         vr8	 ddlm} n# t          $ r t	          d          w xY w ||fd|i|| j         |<   | j         |         | _        || _        dS )a  Initialize SentenceTransformerEmbeddingFunction.

        Args:
            model_name (str, optional): Identifier of the SentenceTransformer model, defaults to "all-MiniLM-L6-v2"
            device (str, optional): Device used for computation, defaults to "cpu"
            normalize_embeddings (bool, optional): Whether to normalize returned vectors, defaults to False
            **kwargs: Additional arguments to pass to the SentenceTransformer model.
        r   )SentenceTransformerzuThe sentence_transformers python package is not installed. Please install it with `pip install sentence_transformers`r8   N)r4   sentence_transformersr<   ImportError
ValueError_model_normalize_embeddings)selfr7   r8   r9   r:   r<   s         r&   __init__z-SentenceTransformerEmbeddingFunction.__init__=   s     T[((EEEEEEE     L   ':&9' '#)'-3' 'DK
# k*-%9"""s    ,inputr    c                     t          t          | j                            t	          |          d| j                                                            S )NT)convert_to_numpyr9   )r   r   r@   encodelistrA   tolistrB   rD   s     r&   __call__z-SentenceTransformerEmbeddingFunction.__call__Y   sN    KU!%%)%?    fhh
 
 	
r(   N)r5   r6   F)__name__
__module____qualname__r4   r   strr   __annotations__boolrC   r	   r   rK    r(   r&   r3   r3   7   s         FDcN -%*	: :: : #	:
 : : : :8
i 
J 
 
 
 
 
 
r(   r3   c                   ,    e Zd ZddefdZdedefdZdS )	Text2VecEmbeddingFunction shibing624/text2vec-base-chineser7   c                 r    	 ddl m} n# t          $ r t          d          w xY w ||          | _        d S )Nr   )SentenceModelz[The text2vec python package is not installed. Please install it with `pip install text2vec`)model_name_or_path)text2vecrW   r>   r?   r@   )rB   r7   rW   s      r&   rC   z"Text2VecEmbeddingFunction.__init__e   sf    	....... 	 	 	m  	 $mzBBB   	 #rD   r    c                     t          t          | j                            t	          |          d                                                    S )NT)rF   )r   r   r@   rG   rH   rI   rJ   s     r&   rK   z"Text2VecEmbeddingFunction.__call__n   s@    **4;;*NNUUWW
 
 	
r(   N)rU   rL   rM   rN   rO   rC   r	   r   rK   rR   r(   r&   rT   rT   d   s\        C C3 C C C C
i 
J 
 
 
 
 
 
r(   rT   c                       e Zd Z	 	 	 	 	 	 	 	 ddee         dedee         dee         dee         dee         d	ee         d
eeeef                  fdZdedefdZ	dS )OpenAIEmbeddingFunctionNtext-embedding-ada-002api_keyr7   organization_idapi_baseapi_typeapi_versiondeployment_iddefault_headersc	                    	 ddl }	n# t          $ r t          d          w xY w|||	_        n|	j        t          d          |||	_        |||	_        || _        |||	_        |||	_        |	j	        
                    d          | _        | j        rM|dk    r$|	                    ||||          j        | _        n/|	                    |||          j        | _        n|	j        | _        || _        || _        dS )	aV  
        Initialize the OpenAIEmbeddingFunction.
        Args:
            api_key (str, optional): Your API key for the OpenAI API. If not
                provided, it will raise an error to provide an OpenAI API key.
            organization_id(str, optional): The OpenAI organization ID if applicable
            model_name (str, optional): The name of the model to use for text
                embeddings. Defaults to "text-embedding-ada-002".
            api_base (str, optional): The base path for the API. If not provided,
                it will use the base path for the OpenAI API. This can be used to
                point to a different deployment, such as an Azure deployment.
            api_type (str, optional): The type of the API deployment. This can be
                used to specify a different deployment, such as 'azure'. If not
                provided, it will use the default OpenAI deployment.
            api_version (str, optional): The api version for the API. If not provided,
                it will use the api version for the OpenAI API. This can be used to
                point to a different deployment, such as an Azure deployment.
            deployment_id (str, optional): Deployment ID for Azure OpenAI.
            default_headers (Mapping, optional): A mapping of default headers to be sent with each API request.

        r   NzWThe openai python package is not installed. Please install it with `pip install openai`zaPlease provide an OpenAI API key. You can get one at https://platform.openai.com/account/api-keysz1.azure)r`   rd   azure_endpointrf   )r`   base_urlrf   )openair>   r?   r`   rb   rd   	_api_typerc   organization__version__
startswith_v1AzureOpenAI
embeddings_clientOpenAIr
   _model_name_deployment_id)
rB   r`   r7   ra   rb   rc   rd   re   rf   rk   s
             r&   rC   z OpenAIEmbeddingFunction.__init__u   s\   @	MMMM 	 	 	i  	
 $FNN^#s   &FO"!,F!&FO&"1F%00668 	,7""%11# +#+$3	  2    
    &}}#h  -      "+DL%+    !rD   r    c                    d |D             }| j         r^| j                            || j        p| j                  j        }t          |d           }t          t          d |D                       S | j	        dk    r/| j                            || j        p| j                  d         }n'| j                            || j                  d         }t          |d	           }t          t          d
 |D                       S )Nc                 :    g | ]}|                     d d          S )
 )replace).0ts     r&   
<listcomp>z4OpenAIEmbeddingFunction.__call__.<locals>.<listcomp>   s&    555!4%%555r(   rD   modelc                     | j         S N)indexes    r&   r'   z2OpenAIEmbeddingFunction.__call__.<locals>.<lambda>   s     r(   keyc                     g | ]	}|j         
S rR   	embeddingr}   results     r&   r   z4OpenAIEmbeddingFunction.__call__.<locals>.<listcomp>   s    $V$V$V&V%5$V$V$Vr(   rh   )rD   enginedatac                     | d         S Nr   rR   r   s    r&   r'   z2OpenAIEmbeddingFunction.__call__.<locals>.<lambda>   s
    7 r(   c                     g | ]
}|d          S r   rR   r   s     r&   r   z4OpenAIEmbeddingFunction.__call__.<locals>.<listcomp>   s    QQQVVK0QQQr(   )
rp   rs   createrv   ru   r   sortedr   r   rl   )rB   rD   rr   sorted_embeddingss       r&   rK   z OpenAIEmbeddingFunction.__call__   s:   55u555 8 	,,4#6#J$:J -   
 !'z7H7H I I I 
$V$VDU$V$V$VWWW~((!\00(;(Ot?O 1  

 "\00uDDT0UU

 !'z7K7K L L L QQ?PQQQ  r(   )Nr_   NNNNNN)
rL   rM   rN   r   rO   r   rC   r	   r   rK   rR   r(   r&   r^   r^   t   s         "&2)-"&"&%)'+7;L, L,#L, L, "#	L,
 3-L, 3-L, c]L,  }L, "'#s("34L, L, L, L,\i J      r(   r^   c                   0    e Zd Zd	dedefdZdedefdZdS )
CohereEmbeddingFunctionlarger`   r7   c                     	 dd l }n# t          $ r t          d          w xY w|                    |          | _        || _        d S )Nr   zWThe cohere python package is not installed. Please install it with `pip install cohere`)coherer>   r?   Clientrs   ru   )rB   r`   r7   r   s       r&   rC   z CohereEmbeddingFunction.__init__   se    	MMMM 	 	 	i  	
 }}W--%rw   rD   r    c                 Z    d | j                             || j        d          D             S )Nc                     g | ]}|S rR   rR   )r}   rr   s     r&   r   z4CohereEmbeddingFunction.__call__.<locals>.<listcomp>   s(     
 
 
 
 
 
r(   search_document)textsr   
input_type)rs   embedru   rJ   s     r&   rK   z CohereEmbeddingFunction.__call__   sE    
 
"l004#3@Q 1  
 
 
 	
r(   N)r   r\   rR   r(   r&   r   r      s]        	& 	& 	& 	& 	& 	& 	&
i 
J 
 
 
 
 
 
r(   r   c                   6    e Zd ZdZ	 d
dedefdZdedefdZd	S )HuggingFaceEmbeddingFunctionz
    This class is used to get embeddings for a list of texts using the HuggingFace API.
    It requires an API key and a model name. The default model name is "sentence-transformers/all-MiniLM-L6-v2".
    &sentence-transformers/all-MiniLM-L6-v2r`   r7   c                     d| | _         t          j                    | _        | j        j                            dd| i           dS )a  
        Initialize the HuggingFaceEmbeddingFunction.

        Args:
            api_key (str): Your API key for the HuggingFace API.
            model_name (str, optional): The name of the model to use for text embeddings. Defaults to "sentence-transformers/all-MiniLM-L6-v2".
        zAhttps://api-inference.huggingface.co/pipeline/feature-extraction/AuthorizationBearer N_api_urlrequestsSession_sessionheadersr-   rB   r`   r7   s      r&   rC   z%HuggingFaceEmbeddingFunction.__init__  sR     i\fhh (**$$o7J7J7J%KLLLLLr(   rD   r    c           	          t          t          | j                            | j        |ddid                                                    S )a  
        Get the embeddings for a list of texts.

        Args:
            texts (Documents): A list of texts to get embeddings for.

        Returns:
            Embeddings: The embeddings for the texts.

        Example:
            >>> hugging_face = HuggingFaceEmbeddingFunction(api_key="your_api_key")
            >>> texts = ["Hello, world!", "How are you?"]
            >>> embeddings = hugging_face(texts)
        wait_for_modelT)inputsoptionsjsonr   r   r   postr   r   rJ   s     r&   rK   z%HuggingFaceEmbeddingFunction.__call__  sT      M %3CT2JKK    dff
 
 	
r(   N)r   	rL   rM   rN   __doc__rO   rC   r	   r   rK   rR   r(   r&   r   r      sv          /WM MM(+M M M M
i 
J 
 
 
 
 
 
r(   r   c                   4    e Zd ZdZd
dedefdZdedefdZd	S )JinaEmbeddingFunctionz
    This class is used to get embeddings for a list of texts using the Jina AI API.
    It requires an API key and a model name. The default model name is "jina-embeddings-v2-base-en".
    jina-embeddings-v2-base-enr`   r7   c                     || _         d| _        t          j                    | _        | j        j                            d| dd           dS )a  
        Initialize the JinaEmbeddingFunction.

        Args:
            api_key (str): Your API key for the Jina AI API.
            model_name (str, optional): The name of the model to use for text embeddings. Defaults to "jina-embeddings-v2-base-en".
        z!https://api.jina.ai/v1/embeddingsr   identity)r   zAccept-EncodingN)ru   r   r   r   r   r   r-   r   s      r&   rC   zJinaEmbeddingFunction.__init__.  s]     &; (**$$111jQQ	
 	
 	
 	
 	
r(   rD   r    c                    | j                             | j        || j        d                                          }d|vrt          |d                   |d         }t          |d           }t          t          d |D                       S )a  
        Get the embeddings for a list of texts.

        Args:
            texts (Documents): A list of texts to get embeddings for.

        Returns:
            Embeddings: The embeddings for the texts.

        Example:
            >>> jina_ai_fn = JinaEmbeddingFunction(api_key="your_api_key")
            >>> input = ["Hello, world!", "How are you?"]
            >>> embeddings = jina_ai_fn(input)
        r   r   r   detailc                     | d         S r   rR   r   s    r&   r'   z0JinaEmbeddingFunction.__call__.<locals>.<lambda>V  s
    QwZ r(   r   c                     g | ]
}|d          S r   rR   r   s     r&   r   z2JinaEmbeddingFunction.__call__.<locals>.<listcomp>Y  s     U U U!4 U U Ur(   )	r   r   r   ru   r   RuntimeErrorr   r   r   )rB   rD   resprr   r   s        r&   rK   zJinaEmbeddingFunction.__call__=  s      }!!M%$:J K K " 
 

$&& 	 tH~...&\
 #:3G3GHHH J U UCT U U UVVVr(   N)r   r   rR   r(   r&   r   r   (  so         

 
 
 
 
 
 
Wi WJ W W W W W Wr(   r   c                   F    e Zd Z	 	 	 ddededee         fdZded	efd
ZdS )InstructorEmbeddingFunctionhkunlp/instructor-baser6   Nr7   r8   instructionc                     	 ddl m} n# t          $ r t          d          w xY w |||          | _        || _        d S )Nr   )
INSTRUCTORzqThe InstructorEmbedding python package is not installed. Please install it with `pip install InstructorEmbedding`)r8   )InstructorEmbeddingr   r>   r?   r@   _instruction)rB   r7   r8   r   r   s        r&   rC   z$InstructorEmbeddingFunction.__init___  su    	6666666 	 	 	 D  	 !jF;;;'rZ   rD   r    c                 *     j         ?t          t           j                            |                                                    S  fd|D             }t          t           j                            |                                                    S )Nc                 "    g | ]}j         |gS rR   )r   r}   textrB   s     r&   r   z8InstructorEmbeddingFunction.__call__.<locals>.<listcomp>r  s!    "O"O"OD$5t#<"O"O"Or(   )r   r   r   r@   rG   rI   )rB   rD   texts_with_instructionss   `  r&   rK   z$InstructorEmbeddingFunction.__call__n  s    $
DK$6$6u$=$=$D$D$F$FGGG"O"O"O"O"O"O"OJ 2 23J K K R R T TUUUr(   )r   r6   N)	rL   rM   rN   rO   r   rC   r	   r   rK   rR   r(   r&   r   r   \  s        
 3%)	( (( ( c]	( ( ( (Vi VJ V V V V V Vr(   r   c            
          e Zd ZdZ ej                    dz  dz  dz  ez  ZdZdZdZ	dZ
d%d
eee                  dd	fdZ ed ed           edd           ed                     d&dedededd	fd            Zdej        dej        fdZd'dee         dedej        fdZed(d            Zed)d!            Zd"edefd#Zd*d$Zd	S )+ONNXMiniLM_L6_V2r5   z.cachechromaonnx_modelsonnxzonnx.tar.gzzHhttps://chroma-onnx-models.s3.amazonaws.com/all-MiniLM-L6-v2/onnx.tar.gz@913d7300ceae3b2dbc2c50d1de4baacab4be7b9380491c27fab7418616a16ec3Npreferred_providersr    c                 F   |r(t          d |D                       st          d          |r<t          |          t          t          |                    k    rt          d          || _        	 t          j        d          | _        n# t          $ r t          d          w xY w	 t          j        d          j	        | _	        n# t          $ r t          d          w xY w	 t          j        d          j
        | _
        d S # t          $ r t          d	          w xY w)
Nc                 8    g | ]}t          |t                    S rR   )
isinstancerO   )r}   is     r&   r   z-ONNXMiniLM_L6_V2.__init__.<locals>.<listcomp>  s"    ===AZ3===r(   z-Preferred providers must be a list of stringsz"Preferred providers must be uniqueonnxruntimezaThe onnxruntime python package is not installed. Please install it with `pip install onnxruntime`
tokenizersz_The tokenizers python package is not installed. Please install it with `pip install tokenizers`tqdmzSThe tqdm python package is not installed. Please install it with `pip install tqdm`)allr?   lenset_preferred_providers	importlibimport_moduleortr>   r   r   )rB   r   s     r&   rC   zONNXMiniLM_L6_V2.__init__  sv     	Ns==)<===(
 (
 	N LMMM 	C3':#;#;s#$$@
 @
 $
 $
 ABBB$7!	 .}==DHH 	 	 	s  		&4\BBLDNN 	 	 	q  		!/77<DIII 	 	 	e  	s$   1B B%)C C"&D D T      )minmaxc                 $    dt          |           v S )Nzdoes not match expected SHA256)rO   r   s    r&   r'   zONNXMiniLM_L6_V2.<lambda>  s    +KsSTvv+U r(   )reraisestopwaitr      urlr   
chunk_sizec                 P   t          j        |d          }t          |j                            dd                    }t	          |d          5 }|                     t          |          |ddd          5 }|                    |	          D ],}|                    |          }	|	                    |	           -	 d d d            n# 1 swxY w Y   d d d            n# 1 swxY w Y   t          || j                  s't          j        |           t          d
| d          d S )NT)streamzcontent-lengthr   wbiBr   )desctotalunit
unit_scaleunit_divisor)r   zDownloaded file zK does not match expected SHA256 hash. Corrupted download or malicious file.)r   getintr   r+   r   rO   iter_contentwriter-   r1   _MODEL_SHA256osremover?   )
rB   r   r   r   r   r   filebarr   sizes
             r&   	_downloadzONNXMiniLM_L6_V2._download  s    |C---DL$$%5q99::% 		!$		U )2 )
 )
 		! ))Z)@@ ! !zz$''

4    !		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! 		! eT%788 	Ieu5uuu  	 	s7   (C7AC<CC	CC	CC#&C#vc                     t           j                            |d          }d||dk    <   t          t          j        ||d d t           j        f         z            S )Nr   )axisg-q=r   )nplinalgnormr   nptNDArraynewaxis)rB   r  r
  s      r&   
_normalizezONNXMiniLM_L6_V2._normalize  sL    y~~aa~((TQYCKT!!!RZ-%8!8999r(       	documents
batch_sizec                 2    t           j         j                   _        t           j        j         j                   _        g }t          dt          |          |          D ]}||||z            } fd|D             }t          j	        d |D                       }t          j	        d |D                       }t          j	        |t          j
                  t          j	        |t          j
                  t          j	        d |D             t          j
                  d}	 j                            d |	          }
|
d         }t          j        t          j        |d          |j                  }t          j        ||z  d	          t          j        |                    d	          d
d           z  }                     |                              t          j                  }|                    |           t          j        |          S )Nr   c                 D    g | ]}j                             |          S rR   )	tokenizerrG   )r}   drB   s     r&   r   z-ONNXMiniLM_L6_V2._forward.<locals>.<listcomp>  s)    ???At~,,Q//???r(   c                     g | ]	}|j         
S rR   )idsr}   r   s     r&   r   z-ONNXMiniLM_L6_V2._forward.<locals>.<listcomp>  s    !9!9!9A!%!9!9!9r(   c                     g | ]	}|j         
S rR   )attention_maskr  s     r&   r   z-ONNXMiniLM_L6_V2._forward.<locals>.<listcomp>  s    &I&I&IAq'7&I&I&Ir(   dtypec                 h    g | ]/}t          j        t          |          t           j                   0S )r  )r  zerosr   int64r  s     r&   r   z-ONNXMiniLM_L6_V2._forward.<locals>.<listcomp>  s/    III!RXc!ffBH555IIIr(   )	input_idsr  token_type_idsr   g&.>)a_mina_max)r   r   r  r   r   r   ranger   r  arrayr  runbroadcast_toexpand_dimsshapesumclipr  astypefloat32appendconcatenate)rB   r  r  all_embeddingsr   batchencodedr   r  
onnx_inputmodel_outputlast_hidden_stateinput_mask_expandedrr   s   `             r&   _forwardzONNXMiniLM_L6_V2._forward  s   dndn==$(3TZ@@
q#i..*55 	. 	.Aa!j.01E???????G!9!9!9!9!9::IX&I&I&I&I&IJJNXirx@@@"$(>"J"J"J"$(IIyIII(# # # J  :>>$
;;L ,Q"$/~r224E4K# #  14G GKKbg#''**$dO O O J 44;;BJGGJ!!*----~n---r(   r   c                     | j                             t          j                            | j        | j        d                    }|                    d           |                    ddd           |S )Ntokenizer.json   )
max_lengthr   z[PAD])pad_id	pad_tokenlength)	r   	from_filer   pathjoinDOWNLOAD_PATHEXTRACTED_FOLDER_NAMEenable_truncationenable_padding)rB   r  s     r&   r  zONNXMiniLM_L6_V2.tokenizer  st    N,,GLL"D$>@P 
 
	 	##s#333  WS IIIr(   r   c                    | j         t          | j                   dk    r}t          | j                                                  dk    r4t                              d| j                                                    | j                                        | _         ntt          | j                                       t          | j                                                            s)t          d| j                                                   | j        	                                }d|_
        | j                            t          j                            | j        | j        d          | j         |          S )Nr   zHWARNING: No ONNX providers provided, defaulting to available providers: z;Preferred providers must be subset of available providers: r   
model.onnx)	providerssess_options)r   r   r   get_available_providersloggerdebugr   issubsetr?   SessionOptionslog_severity_levelr   r   rA  rB  rC  rD  )rB   sos     r&   r   zONNXMiniLM_L6_V2.model  s^   $,D4M0N0NRS0S0S48335566::<x7799< <   )-(H(H(J(JD%%T.//88002233
 
 	 rdhNnNnNpNprr  
 X$$&& !x((GLL+T-GVV / ) 
 
 	
r(   rD   c                     |                                   t          t          |                     |                                                    S r   )_download_model_if_not_existsr   r   r8  rI   rJ   s     r&   rK   zONNXMiniLM_L6_V2.__call__  s<    **,,,Je 4 4 ; ; = =>>>r(   c                    g d}t           j                            | j        | j                  }d}|D ]C}t           j                            t           j                            ||                    sd} nD|sWt          j        | j        d           t           j                            t           j                            | j        | j                            r=t          t           j                            | j        | j                  | j	                  sD| 
                    | j        t           j                            | j        | j                             t          j        t           j                            | j        | j                  d          5 }|                    | j                   d d d            d S # 1 swxY w Y   d S d S )	N)zconfig.jsonrH  zspecial_tokens_map.jsonztokenizer_config.jsonr:  z	vocab.txtTF)exist_ok)r   r   zr:gz)namemode)rA  )r   rA  rB  rC  rD  existsmakedirsARCHIVE_FILENAMEr1   r   r  MODEL_DOWNLOAD_URLtarfiler+   
extractall)rB   
onnx_filesextracted_folderonnx_files_existr%   tars         r&   rS  z.ONNXMiniLM_L6_V2._download_model_if_not_exists  s   
 
 

 7<<(:D<VWW 	 	A7>>"',,/?"C"CDD #(    	8K*T::::7>>T/1FGG  	#T/1FGG" 	 /',,t'94;PQQ     W\\$"4d6KLL   8 D$6777	8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8	8 	8s   GGGr   )r   )r  )r    r   )r    r   )r    N) rL   rM   rN   
MODEL_NAMEr   homerC  rD  rZ  r[  r   r   r   rO   rC   r   r   r   r   r   r  r  r  r  r8  r   r  r   r	   r   rK   rS  rR   r(   r&   r   r   |  s       #JDIKK(*X5E
RM"$R  WM" "HT#Y,? "4 " " " "L U""[QA&&&  !U!UVV	   S  #     ,:CK :CK : : : :
. .$s) . .ck . . . .> 
 
 
 _
 
 
 
 _
6?i ?J ? ? ? ?
 8  8  8  8  8  8r(   r   c                  0    t           rd S t                      S r   )r   r   rR   r(   r&   DefaultEmbeddingFunctionre  ?  s     "t!!!r(   c                   4    e Zd ZdZd
dedefdZdedefdZd	S )GooglePalmEmbeddingFunctionzvTo use this EmbeddingFunction, you must have the google.generativeai Python package installed and have a PaLM API key.models/embedding-gecko-001r`   r7   c                     |st          d          |st          d          	 dd lm} n# t          $ r t          d          w xY w|                    |           || _        || _        d S )NzPlease provide a PaLM API key.Please provide the model name.r   rThe Google Generative AI python package is not installed. Please install it with `pip install google-generativeai`r`   )r?   google.generativeaigenerativeair>   	configure_palmru   )rB   r`   r7   palms       r&   rC   z$GooglePalmEmbeddingFunction.__init__I  s     	?=>>> 	?=>>>	....... 	 	 	 E  	
 	w'''
%	   + ArD   r    c                        fd|D             S )Nc                 ^    g | ])}j                             j        |           d         *S ))r   r   r   )rp  generate_embeddingsru   r   s     r&   r   z8GooglePalmEmbeddingFunction.__call__.<locals>.<listcomp>\  sK     
 
 
  J**1A*MM
 
 
r(   rR   rJ   s   ` r&   rK   z$GooglePalmEmbeddingFunction.__call__[  s0    
 
 
 
 	
 
 
 	
r(   N)rh  r   rR   r(   r&   rg  rg  F  si         A  A& & & & & & &$
i 
J 
 
 
 
 
 
r(   rg  c                   >    e Zd ZdZ	 	 	 ddededefdZded	efd
ZdS )#GoogleGenerativeAiEmbeddingFunctionzxTo use this EmbeddingFunction, you must have the google.generativeai Python package installed and have a Google API key.models/embedding-001RETRIEVAL_DOCUMENTr`   r7   	task_typec                     |st          d          |st          d          	 dd lm} n# t          $ r t          d          w xY w|                    |           || _        || _        || _        d | _        | j        dk    r	d| _        d S d S )Nz Please provide a Google API key.rj  r   rk  rl  ry  zEmbedding of single string)	r?   rm  rn  r>   ro  _genairu   
_task_type_task_title)rB   r`   r7   rz  genais        r&   rC   z,GoogleGenerativeAiEmbeddingFunction.__init__i  s      	A?@@@ 	?=>>>	/////// 	 	 	 E  	
 	(((%#?222;D 32rr  rD   r    c                        fd|D             S )Nc                 v    g | ]5}j                             j        |j        j                   d         6S ))r   contentrz  titler   )r|  embed_contentru   r}  r~  r   s     r&   r   z@GoogleGenerativeAiEmbeddingFunction.__call__.<locals>.<listcomp>  s`     
 
 
  K%%&/&	 &  
 
 
 
r(   rR   rJ   s   ` r&   rK   z,GoogleGenerativeAiEmbeddingFunction.__call__  s0    
 
 
 
 
 
 
 	
r(   N)rx  ry  r   rR   r(   r&   rw  rw  d  s         C  Cv
 1-	< << < 	< < < <6	
i 	
J 	
 	
 	
 	
 	
 	
r(   rw  c            	       >    e Zd Z	 	 	 ddedededefdZd	ed
efdZdS )GoogleVertexEmbeddingFunctiontextembedding-geckocloud-large-language-modelsus-central1r`   r7   
project_idregionc           	          d| d| d| d| d	| _         t          j                    | _        | j        j                            dd| i           d S )Nzhttps://z'-aiplatform.googleapis.com/v1/projects/z/locations/z/publishers/goole/models/z:predictr   r   r   )rB   r`   r7   r  r  s        r&   rC   z&GoogleVertexEmbeddingFunction.__init__  s     _6  _  _R\  _  _io  _  _  KU  _  _  _ (**$$o7J7J7J%KLLLLLr(   rD   r    c                     g }|D ]e}| j                             | j        dd|igi                                          }d|v r'|                    |d         d         d                    f|S )N	instancesr  r   predictionsrr   values)r   r   r   r   r/  )rB   rD   rr   r   responses        r&   rK   z&GoogleVertexEmbeddingFunction.__call__  s    
 	S 	SD}))[It3D2E$F *  dff  ((!!(="9,"G"QRRRr(   N)r  r  r  r\   rR   r(   r&   r  r    s         07#	M 	M	M 	M 		M
 	M 	M 	M 	M
i 
J 
 
 
 
 
 
r(   r  c            	       z    e Zd Z	 	 	 ddededee         ddfd	Zd
edefdZde	defdZ
deeef         defdZdS )OpenCLIPEmbeddingFunctionViT-B-32laion2b_s34b_b79kr6   r7   
checkpointr8   r    Nc                    	 dd l }n# t          $ r t          d          w xY w	 t          j        d          | _        n# t          $ r t          d          w xY w	 t          j        d          | _        n# t          $ r t          d          w xY w|                    ||          \  }}}|| _        | j        	                    |           || _
        |                    |          | _        d S )	Nr   zThe open_clip python package is not installed. Please install it with `pip install open-clip-torch`. https://github.com/mlfoundations/open_cliptorchzUThe torch python package is not installed. Please install it with `pip install torch`	PIL.ImageTThe PIL python package is not installed. Please install it with `pip install pillow`)r7   
pretrained)r7   )	open_clipr>   r?   r   r   _torch	_PILImagecreate_model_and_transformsr@   to_preprocessget_tokenizer
_tokenizer)rB   r7   r  r8   r  r   _
preprocesss           r&   rC   z"OpenCLIPEmbeddingFunction.__init__  sE   	 	 	 	 b  		#1'::DKK 	 	 	g  	
	&4[AADNN 	 	 	f  	
  )DD!j  E  
  
q* v%#11Z1HHs    !? AA7 7Bimagec                    | j                             |          }| j                                        5  | j                            |                     |                              d                    }||                    dd          z  }t          t          |                                                                          cd d d            S # 1 swxY w Y   d S )Nr   r"  Tdimkeepdim)r  	fromarrayr  no_gradr@   encode_imager  	unsqueezer
  r   r
   squeezerI   )rB   r  	pil_imageimage_featuress       r&   _encode_imagez'OpenCLIPEmbeddingFunction._encode_image  s   N,,U33	[  "" 	F 	F![55  ++55a88 N n11b$1GGGN	>#9#9#;#;#B#B#D#DEE	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	F 	Fs   BCCCr   c                 h   | j                                         5  | j                            |                     |                    }||                    dd          z  }t          t          |                                	                                          cd d d            S # 1 swxY w Y   d S )Nr"  Tr  )
r  r  r@   encode_textr  r
  r   r
   r  rI   )rB   r   text_featuress      r&   _encode_textz&OpenCLIPEmbeddingFunction._encode_text  s    [  "" 	E 	E K33DOOD4I4IJJM]//B/EEEM	=#8#8#:#:#A#A#C#CDD	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	E 	Es   B B''B+.B+rD   c           	      >   g }|D ]}t          |          r<|                    |                     t          t          |                               Mt          |          r;|                    |                     t          t          |                               |S r   )r   r/  r  r   r   r   r  r   )rB   rD   rr   items       r&   rK   z"OpenCLIPEmbeddingFunction.__call__  s    !#
 	K 	KD~~ K!!$"4"4T%5F5F"G"GHHHHT"" K!!$"3"3D44H4H"I"IJJJr(   )r  r  r6   )rL   rM   rN   rO   r   rC   r   r
   r  r   r  r   r	   r   r   rK   rR   r(   r&   r  r    s         %- %	 I  I I  I 	 I
 
 I  I  I  IDF5 FY F F F FE Ei E E E EeIv$56 :      r(   r  c                   @    e Zd Zd	deddfdZdeeef         defdZ	dS )
RoboflowEmbeddingFunction https://infer.roboflow.comr`   r    Nc                     |st           j                            d          }|| _        || _        	 t          j        d          | _        dS # t          $ r t          d          w xY w)z
        Create a RoboflowEmbeddingFunction.

        Args:
            api_key (str): Your API key for the Roboflow API.
            api_url (str, optional): The URL of the Roboflow API. Defaults to "https://infer.roboflow.com".
        ROBOFLOW_API_KEYr  r  N)
r   environr   r   _api_keyr   r   r  r>   r?   )rB   r`   api_urls      r&   rC   z"RoboflowEmbeddingFunction.__init__  sy      	9jnn%788G	&4[AADNNN 	 	 	f  	s   A A&rD   c                    g }|D ]Y}t          |          r| j                            |          }t                      }|                    |d           t          j        |                                                              d          }dd|di}t          j
        | j         d| j         |          }|                                d	         }	|                    |	d
                    t          |          r^d|i}t          j
        | j         d| j         |          }|                                d	         }	|                    |	d
                    [|S )NJPEG)formatzutf-8r  base64)typevaluez/clip/embed_image?api_key=r   rr   r   r   z/clip/embed_text?api_key=)r   r  r  r   saver  	b64encodegetvaluedecoder   r   r   r  r   r/  r   )
rB   rD   rr   r  r  bufferbase64_imageinfer_clip_payloadresr   s
             r&   rK   z"RoboflowEmbeddingFunction.__call__  s|   
 $	- $	-D~~ #-0066 

6&
111%/0A0ABBII'RR  (!- &" m}OOOO+  
 L1!!&),,,,T"" -E&" m}NNt}NN+  
 L1!!&),,,r(   )r  r  )
rL   rM   rN   rO   rC   r   r	   r   r   rK   rR   r(   r&   r  r    sh          SW    *)eIv$56 ): ) ) ) ) ) )r(   r  c                   6    e Zd Z	 ddddedefdZdedefd	Zd
S )AmazonBedrockEmbeddingFunctionamazon.titan-embed-text-v1sessionzboto3.Sessionr7   r:   c                 <    || _          |j        dddi|| _        dS )ak  Initialize AmazonBedrockEmbeddingFunction.

        Args:
            session (boto3.Session): The boto3 session to use.
            model_name (str, optional): Identifier of the model, defaults to "amazon.titan-embed-text-v1"
            **kwargs: Additional arguments to pass to the boto3 client.

        Example:
            >>> import boto3
            >>> session = boto3.Session(profile_name="profile", region_name="us-east-1")
            >>> bedrock = AmazonBedrockEmbeddingFunction(session=session)
            >>> texts = ["Hello, world!", "How are you?"]
            >>> embeddings = bedrock(texts)
        service_namezbedrock-runtimeNrR   )ru   clientrs   )rB   r  r7   r:   s       r&   rC   z'AmazonBedrockEmbeddingFunction.__init__,  s:    * &%w~ 
 
*

 
r(   rD   r    c                 0   d}d}g }|D ]}d|i}t          j        |          }| j                            || j        ||          }t          j        |                    d                                        d          }	|                    |	           |S )Nzapplication/json	inputText)bodymodelIdacceptcontentTyper  r   )r   dumpsrs   invoke_modelru   loadr   r/  )
rB   rD   r  content_typerr   r   
input_bodyr  r  r   s
             r&   rK   z'AmazonBedrockEmbeddingFunction.__call__H  s    #)
 
	) 
	)D%t,J:j))D|00((	 1  H 	(,,v"6"677;;KHHIi((((r(   N)r  )	rL   rM   rN   rO   r   rC   r	   r   rK   rR   r(   r&   r  r  +  sp         7
 
 
 
 	
 
 
 
8i J      r(   r  c                   .    e Zd ZdZdefdZdedefdZdS )HuggingFaceEmbeddingServerz
    This class is used to get embeddings for a list of texts using the HuggingFace Embedding server (https://github.com/huggingface/text-embeddings-inference).
    The embedding model is configured in the server.
    r   c                     	 ddl }n# t          $ r t          d          w xY w| | _         |j                    | _        dS )z
        Initialize the HuggingFaceEmbeddingServer.

        Args:
            url (str): The URL of the HuggingFace Embedding Server.
        r   N[The requests python package is not installed. Please install it with `pip install requests`)r   r>   r?   r   r   r   )rB   r   r   s      r&   rC   z#HuggingFaceEmbeddingServer.__init__`  sc    	OOOO 	 	 	m  	 ((**rw   rD   r    c                     t          t          | j                            | j        d|i                                                    S )a  
        Get the embeddings for a list of texts.

        Args:
            texts (Documents): A list of texts to get embeddings for.

        Returns:
            Embeddings: The embeddings for the texts.

        Example:
            >>> hugging_face = HuggingFaceEmbeddingServer(url="http://localhost:8080/embed")
            >>> texts = ["Hello, world!", "How are you?"]
            >>> embeddings = hugging_face(texts)
        r   r   r   rJ   s     r&   rK   z#HuggingFaceEmbeddingServer.__call__p  sC      **4=%?P*QQVVXX
 
 	
r(   Nr   rR   r(   r&   r  r  Z  s[         
+C + + + + 
i 
J 
 
 
 
 
 
r(   r  langchain_embdding_fnc           	          	 ddl m n# t          $ r t          d          w xY w G fddt          t
          t          t          f                            } ||           S )Nr   )r   zgThe langchain_core python package is not installed. Please install it with `pip install langchain-core`c                       e Zd ZdZd ddfdZdedeee                  fdZde	dee         fd	Z
d
ee	         deee                  fdZdedefdZdS )Dcreate_langchain_embedding.<locals>.ChromaLangchainEmbeddingFunctionz{
        This class is used as bridge between langchain embedding functions and custom chroma embedding functions.
        embedding_functionr    Nc                     || _         dS )z
            Initialize the ChromaLangchainEmbeddingFunction

            Args:
                embedding_function : The embedding function implementing Embeddings from langchain_core.
            Nr  )rB   r  s     r&   rC   zMcreate_langchain_embedding.<locals>.ChromaLangchainEmbeddingFunction.__init__  s     '9D###r(   r  c                 6    | j                             |          S r   )r  embed_documents)rB   r  s     r&   r  zTcreate_langchain_embedding.<locals>.ChromaLangchainEmbeddingFunction.embed_documents  s    *::9EEEr(   queryc                 6    | j                             |          S r   )r  embed_query)rB   r  s     r&   r  zPcreate_langchain_embedding.<locals>.ChromaLangchainEmbeddingFunction.embed_query  s    *66u===r(   urisc                 ~    t          | j        d          r| j                            |          S t          d          )Nembed_imagezBThe provided embedding function does not support image embeddings.)hasattrr  r  r?   )rB   r  s     r&   r  zPcreate_langchain_embedding.<locals>.ChromaLangchainEmbeddingFunction.embed_image  sC    t.>> .::4@@@ X  r(   rD   c                     |d         dk    r(|                      t          |d                             S |                     t          |                    S )a  
            Get the embeddings for a list of texts or images.

            Args:
                input (Documents | Images): A list of texts or images to get embeddings for.
                Images should be provided as a list of URIs passed through the langchain data loader

            Returns:
                Embeddings: The embeddings for the texts or images.

            Example:
                >>> langchain_embedding = ChromaLangchainEmbeddingFunction(embedding_function=OpenAIEmbeddings(model="text-embedding-3-large"))
                >>> texts = ["Hello, world!", "How are you?"]
                >>> embeddings = langchain_embedding(texts)
            r   imagesr   )r  rH   r  rJ   s     r&   rK   zMcreate_langchain_embedding.<locals>.ChromaLangchainEmbeddingFunction.__call__  sK    " Qx8##''U1X777''U444r(   )rL   rM   rN   r   rC   r	   r   floatr  rO   r  r  r   rK   )LangchainEmbeddingss   r&    ChromaLangchainEmbeddingFunctionr    s        	 		9/B 	9t 	9 	9 	9 	9	FY 	F4U;L 	F 	F 	F 	F	>S 	>T%[ 	> 	> 	> 	>	DI 	$tE{2C 	 	 	 		5) 	5
 	5 	5 	5 	5 	5 	5r(   r  r  )langchain_core.embeddingsr   r>   r?   r   r   r	   r   )r  r  r  s     @r&   create_langchain_embeddingr    s    
OOOOOOO 
 
 
u
 
 	


25 25 25 25 25 25 25.uY5F/GH25 25 25h ,+?TUUUUs   
 $c                   6    e Zd ZdZdededdfdZdedefdZdS )	OllamaEmbeddingFunctionz
    This class is used to generate embeddings for a list of texts using the Ollama Embedding API (https://github.com/ollama/ollama/blob/main/docs/api.md#generate-embeddings).
    r   r7   r    Nc                     	 ddl }n# t          $ r t          d          w xY w| | _        || _         |j                    | _        dS )a  
        Initialize the Ollama Embedding Function.

        Args:
            url (str): The URL of the Ollama Server.
            model_name (str): The name of the model to use for text embeddings. E.g. "nomic-embed-text" (see https://ollama.com/library for available models).
        r   Nr  )r   r>   r?   r   ru   r   r   )rB   r   r7   r   s       r&   rC   z OllamaEmbeddingFunction.__init__  sk    	OOOO 	 	 	m  	 %((**rw   rD   c                      t          |t                    r|n|g} fd|D             }t          t          d |D                       S )a  
        Get the embeddings for a list of texts.

        Args:
            input (Documents): A list of texts to get embeddings for.

        Returns:
            Embeddings: The embeddings for the texts.

        Example:
            >>> ollama_ef = OllamaEmbeddingFunction(url="http://localhost:11434/api/embeddings", model_name="nomic-embed-text")
            >>> texts = ["Hello, world!", "How are you?"]
            >>> embeddings = ollama_ef(texts)
        c                     g | ]=}j                             j        j        |d                                           >S ))r   promptr   )r   r   r   ru   r   r   s     r&   r   z4OllamaEmbeddingFunction.__call__.<locals>.<listcomp>  s]     
 
 
  Md.>$$O$O   dff
 
 
r(   c                 &    g | ]}d |v |d          S r   rR   )r}   r   s     r&   r   z4OllamaEmbeddingFunction.__call__.<locals>.<listcomp>  s2       )++ +&+++r(   )r   rH   r   r   )rB   rD   r   rr   s   `   r&   rK   z OllamaEmbeddingFunction.__call__  s|      $E400=ug
 
 
 
 	
 
 

  !+  
 
 	
r(   r   rR   r(   r&   r  r    si         +C +S +T + + + +$
i 
J 
 
 
 
 
 
r(   r  c                 8    g | ]\  }}|j         t          k    |S rR   )rM   rL   )r}   rV  objs      r&   r   r     s3       c
~!! 	!!!r(   c                      t           S r   )_classesrR   r(   r&   get_builtinsr	    s    Or(   )Pr)   logging	functoolsr   tenacityr   r   r   r   chromadb.api.typesr   r	   r
   r   r   r   r   r   r   ior   pathlibr   r   r\  r   typingr   r   r   r   r   r   r   r   numpyr  numpy.typingr  r   inspectr   sysr  chromadb.is_thin_clientr   r>   r   r   r   r   	getLoggerrL   rL  rO   rQ   r1   r3   rT   r^   r   r   r   r   r   re  rg  rw  r  r  r  r  r  r  r  
getmembersmodulesisclassr  r	  rR   r(   r&   <module>r     s     % % % % % % O O O O O O O O O O O O
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
             				   Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q Q                 



 6666666   NNN  %,,,,,,$$$$$$		8	$	$6# 6 6 6 6 6 6*
 *
 *
 *
 *
+<Y+G *
 *
 *
Z
 
 
 
 
 1) < 
 
 
 n n n n n/	: n n nb
 
 
 
 
/	: 
 
 
,*
 *
 *
 *
 *
#4Y#? *
 *
 *
Z1W 1W 1W 1W 1W-i8 1W 1W 1WhV V V V V"3I"> V V V@@8 @8 @8 @8 @8(3 @8 @8 @8F"(+<Y+G"H " " " "
 
 
 
 
"3I"> 
 
 
<)
 )
 )
 )
 )
*;I*F )
 )
 )
X    $5i$@   :9 9 9 9 9 1%	68I2J K 9 9 9x? ? ? ? ? 1%	68I2J K ? ? ?D, , , , ,%6y%A , , ,^(
 (
 (
 (
 (
!29!= (
 (
 (
V<Vc <V <V <V <V~5
 5
 5
 5
 5
/	: 5
 5
 5
r 'W'H(=wOO  d3i      s   <B BB