
    i/                    D   U d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d
dlmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$  ee          dUd            Z% ee          dVd            Z& ee          dWd            Z' e(d  ej)                    D                       Z*de+d<   d e*D             Z,de+d<    ee          dXd            Z- ee          dVd            Z. ee          dVd            Z/ ee          dVd            Z0 ee          dVd             Z1 ee          dVd!            Z2 ee          dVd"            Z3 ee          dVd#            Z4 ee          dVd$            Z5 ee          dVd%            Z6 ee          dVd&            Z7 ee          dVd'            Z8 ee          dVd(            Z9 ee          dVd)            Z: ee          dVd*            Z; e e<e                    dYd,            Z= ee          dVd-            Z>dZd[d2Z? ed3          d\d5            Z@d]d7ZAd^d9ZBd_d`d=ZCdadAZDdbdBZEdCejF        dDfdcdHZG	 dddedTZHdS )f    )annotationsN)bisect_right)IncrementalDecoder)aliases)	lru_cache)findall)	Generator)MultibyteIncrementalDecoder   )ENCODING_MARKSIANA_SUPPORTED_SIMILARRE_POSSIBLE_ENCODING_INDICATIONUNICODE_RANGES_COMBINEDUNICODE_SECONDARY_RANGE_KEYWORDUTF8_MAXIMAL_ALLOCATIONCOMMON_CJK_CHARACTERS_LATIN_CJK_HANGUL	_KATAKANA	_HIRAGANA_THAI_ARABIC_ARABIC_ISOLATED_FORM_ACCENT_KEYWORDS_ACCENTUATED)maxsize	characterstrreturnintc                l   	 t          j        |           }n# t          $ r Y dS w xY wd}d|v r
|t          z  }d|v r
|t          z  }d|v r
|t
          z  }d|v r
|t          z  }d|v r
|t          z  }d|v r
|t          z  }d|v r|t          z  }d	|v r
|t          z  }t          D ]}||v r|t          z  } n|S )
zRCompute all name-based classification flags with a single unicodedata.name() call.r   LATINCJKHANGULKATAKANAHIRAGANATHAIARABICzISOLATED FORM)unicodedataname
ValueErrorr   r   r   r   r   r   r   r   r   r   )r   descflagskws       R/var/www/cyber-strat/venv/lib/python3.11/site-packages/charset_normalizer/utils.py_character_flagsr1   &   s   $Y//   qq E$}}4TT~~4d""**E  ::\!EE  Ls    
%%boolc                J    t          t          |           t          z            S N)r2   r1   r   r   s    r0   is_accentuatedr6   I   s     ++l:;;;    c                    t          j        |           }|s| S |                    d          }t          t	          |d         d                    S )N r      )r*   decompositionsplitchrr!   )r   
decomposedcodess      r0   remove_accentr@   N   sO    !/	::J !'',,Es58R  !!!r7   c              #  :   K   | ]\  }}|j         |j        |fV  d S r4   )startstop).0r+   	ord_ranges      r0   	<genexpr>rF   [   sE       < <i _ind+< < < < < <r7   zlist[tuple[int, int, str]]_UNICODE_RANGES_SORTEDc                    g | ]
}|d          S )r    )rD   es     r0   
<listcomp>rK   _   s    #I#I#IQAaD#I#I#Ir7   z	list[int]_UNICODE_RANGE_STARTS
str | Nonec                    t          |           }t          t          |          dz
  }|dk    rt          |         \  }}}||k     r|S dS )zK
    Retrieve the Unicode range official name from a single character.
    r   r   N)ordr   rL   rG   )r   character_ordidxrB   rC   r+   s         r0   unicode_rangerR   b   sU    
 YM ,m
<
<q
@C
axx237tT4K4r7   c                J    t          t          |           t          z            S r4   )r2   r1   r   r5   s    r0   is_latinrT   s   s     ++f4555r7   c                d    t          j        |           }d|v rdS t          |           }|dS d|v S )NPTFPunctuationr*   categoryrR   r   character_categorycharacter_ranges      r0   is_punctuationr]   x   sG    )29==
   t"/	":":OuO++r7   c                x    t          j        |           }d|v sd|v rdS t          |           }|dS d|v o|dk    S )NSNTFFormsLorX   rZ   s      r0   	is_symbolrc      s]    )29==
   C+=$=$=t"/	":":Ouo%D*<*DDr7   c                8    t          |           }|dS d|v pd|v S )NF	EmoticonsPictographs)rR   )r   r\   s     r0   is_emoticonrg      s/    "/	":":Ou/)M]o-MMr7   c                n    |                                  s| dv rdS t          j        |           }d|v p|dv S )N>      ｜+<>TZ>   PcPdPo)isspacer*   rY   )r   r[   s     r0   is_separatorrr      sO     i+AAAt)29==$$P(:>P(PPr7   c                V    |                                  |                                 k    S r4   )islowerisupperr5   s    r0   is_case_variablerv      s%    )"3"3"5"555r7   c                J    t          t          |           t          z            S r4   )r2   r1   r   r5   s    r0   is_cjkrx      s     ++d2333r7   c                J    t          t          |           t          z            S r4   )r2   r1   r   r5   s    r0   is_hiraganarz           ++i7888r7   c                J    t          t          |           t          z            S r4   )r2   r1   r   r5   s    r0   is_katakanar}      r{   r7   c                J    t          t          |           t          z            S r4   )r2   r1   r   r5   s    r0   	is_hangulr           ++g5666r7   c                J    t          t          |           t          z            S r4   )r2   r1   r   r5   s    r0   is_thair      s     ++e3444r7   c                J    t          t          |           t          z            S r4   )r2   r1   r   r5   s    r0   	is_arabicr      r   r7   c                J    t          t          |           t          z            S r4   )r2   r1   r   r5   s    r0   is_arabic_isolated_formr      s     ++.CCDDDr7   c                    | t           vS r4   )r   r5   s    r0   is_cjk_uncommonr      s    111r7   
range_namec                D     t           fdt          D                       S )Nc              3      K   | ]}|v V  	d S r4   rI   )rD   keywordr   s     r0   rF   z-is_unicode_range_secondary.<locals>.<genexpr>   s(      TTw*$TTTTTTr7   )anyr   )r   s   `r0   is_unicode_range_secondaryr      s'    TTTT4STTTTTTr7   c                r    |                                  du o!|                                 du o| dk    o| dk    S )NFu   ﻿)rq   isprintabler5   s    r0   is_unprintabler      sR     	u$ 	"!!##u,	"	" !	r7       sequencebytessearch_zonec           	        t          | t          t          f          st          t	          |           }t          t          | dt          ||                                       dd                    }t	          |          dk    rdS |D ][}|	                                
                    dd          }t          j                    D ]\  }}||k    r|c c S ||k    r|c c S \dS )zW
    Extract using ASCII-only decoder any specified encoding in the first n-bytes.
    Nasciiignoreerrorsr   -_)
isinstancer   	bytearray	TypeErrorlenr   r   mindecodelowerreplacer   items)r   r   seq_lenresultsspecified_encodingencoding_aliasencoding_ianas          r0   any_specified_encodingr      s    h	 233 x==G ',3w,,,-44WX4NN G
 7||qt% 
% 
%/5577??SII
 .5]__ 	% 	%)NM!333$$$$$$ 222$$$$$$ 3	% 4r7      r+   c                h    | dv p.t          t          j        d|            j        t                    S )zQ
    Verify is a specific encoding is a multi byte one based on it IANA name
    >	   utf_7utf_8utf_16utf_32	utf_16_be	utf_16_le	utf_32_be	utf_32_le	utf_8_sig
encodings.)
issubclass	importlibimport_moduler   r
   )r+   s    r0   is_multi_byte_encodingr     sG    
  
 
  
 3T 3 344G#
 
r7   tuple[str | None, bytes]c                    t           D ]I}t           |         }t          |t                    r|g}|D ]}|                     |          r||fc c S  JdS )z9
    Identify and extract SIG/BOM in given sequence.
    )Nr7   )r   r   r   
startswith)r   iana_encodingmarksmarks       r0   identify_sig_or_bomr     s    
 ( + +%3M%BeU## 	GE 	+ 	+D""4(( +$d******+	+ 9r7   r   c                
    | dvS )N>   r   r   rI   )r   s    r0   should_strip_sig_or_bomr   .  s     444r7   Tcp_namestrictc                    |                                                      dd          } t          j                    D ]\  }}| ||fv r|c S |rt	          d|  d          | S )zIReturns the Python normalized encoding name (Not the IANA official name).r   r   zUnable to retrieve IANA for '')r   r   r   r   r,   )r   r   r   r   s       r0   	iana_namer   2  s    mmoo%%c3//G
 *1 ! !%~}555     6  ECCCCDDDNr7   iana_name_aiana_name_bfloatc                   t          |           st          |          rdS t          j        d|            j        }t          j        d|           j        } |d          } |d          }d}t	          d          D ]C}t          |g          }|                    |          |                    |          k    r|dz  }D|dz  S )Ng        r   r   r   r      r   )r   r   r   r   ranger   r   )	r   r   	decoder_a	decoder_bid_aid_bcharacter_match_countito_be_decodeds	            r0   cp_similarityr   C  s    k** .D[.Q.Q s'(B[(B(BCCVI'(B[(B(BCCVI(y999D(y999D!"3ZZ ' '$aSzz;;}%%])C)CCC!Q&! 3&&r7   c                2    | t           v o|t           |          v S )z
    Determine if two code page are at least 80% similar. IANA_SUPPORTED_SIMILAR dict was generated using
    the function cp_similarity.
    )r   )r   r   s     r0   is_cp_similarr   W  s%     	-- 	?1+>>r7   charset_normalizerz)%(asctime)s | %(levelname)s | %(message)slevelformat_stringNonec                    t          j        |           }|                    |           t          j                    }|                    t          j        |                     |                    |           d S r4   )logging	getLoggersetLevelStreamHandlersetFormatter	Formatter
addHandler)r+   r   r   loggerhandlers        r0   set_logging_handlerr   b  sm    
 t$$F
OOE#%%G*=99:::
gr7   	sequencesr   offsetsr   
chunk_sizebom_or_sig_availablestrip_sig_or_bomsig_payloadis_multi_byte_decoderdecoded_payloadGenerator[str, None, None]c	              #    K   |r!|du r|D ]}	||	|	|z            }
|
s d S |
V  d S |D ]}	|	|z   }|t          |           dz   k    r| |	|	|z            }|r	|du r||z   }|                    ||rdnd          }
|ru|	dk    rot          |d          }|r]|
d |         |vrQt          |	|	dz
  d	          D ]<}| ||         }|r	|du r||z   }|                    |d          }
|
d |         |v r n=|
V  d S )
NF   r   r   r   r   r:      )r   r   r   r   )r   r   r   r   r   r   r   r   r   r   chunk	chunk_endcut_sequencechunk_partial_size_chkjs                  r0   cut_sequence_chunksr   o  s       *0E99 	 	A#AJ$67E KKKK		 	  #	 #	AJI3y>>A---$QZ%78L# :(8E(A(A*\9 ''#8Fxxh (  E % "Q.1*b.A.A& $"5556oMM"1a!eR00 	" 	"'09'=/ F4D4M4M+6+EL , 3 3M( 3 S S !8"8!89_LL!E M KKKKG#	 #	r7   )r   r   r    r!   )r   r   r    r2   )r   r   r    r   )r   r   r    rM   )r   r   r    r2   )r   )r   r   r   r!   r    rM   )r+   r   r    r2   )r   r   r    r   )r   r   r    r2   )T)r   r   r   r2   r    r   )r   r   r   r   r    r   )r   r   r   r   r    r2   )r+   r   r   r!   r   r   r    r   r4   )r   r   r   r   r   r   r   r!   r   r2   r   r2   r   r   r   r2   r   rM   r    r   )I
__future__r   r   r   r*   bisectr   codecsr   encodings.aliasesr   	functoolsr   rer   typingr	   _multibytecodecr
   constantr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r1   r6   r@   sortedr   rG   __annotations__rL   rR   rT   r]   rc   rg   rr   rv   rx   rz   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   INFOr   r   rI   r7   r0   <module>r	     s   " " " " " " "                % % % % % % % % % % % %                                                            * *+++   ,+D *+++< < < ,+< *+++" " " ,+" 6<V < <828::< < < 6 6      $J#I2H#I#I#I  I I I I *+++   ,+  *+++6 6 6 ,+6 *+++, , , ,+, *+++E E E ,+E *+++N N N ,+N *+++Q Q Q ,+Q *+++6 6 6 ,+6 *+++4 4 4 ,+4 *+++9 9 9 ,+9 *+++9 9 9 ,+9 *+++7 7 7 ,+7 *+++5 5 5 ,+5 *+++7 7 7 ,+7 *+++E E E ,+E *+++2 2 2 ,+2 33.//000U U U 10U *+++   ,+    @ 3   (   $5 5 5 5    "' ' ' '(    %D
 
 
 
 
, #'5 5 5 5 5 5 5r7   