
    fXf9!                     J    d dl mZmZ ddlmZ ddlmZ  G d de          ZdS )    )ListUnion   )CharSetProber)ProbingStatec                   8    e Zd ZdZdZdZd fdZd fdZede	fd            Z
ede	fd	            Zdefd
ZdefdZdefdZdefdZdefdZdefdZdee         ddfdZdee         ddfdZdeeef         defdZedefd            ZdefdZ xZS )UTF1632Proberad  
    This class simply looks for occurrences of zero bytes, and infers
    whether the file is UTF16 or UTF32 (low-endian or big-endian)
    For instance, files looking like (       [nonzero] )+
    have a good probability to be UTF32BE.  Files looking like (   [nonzero] )+
    may be guessed to be UTF16BE, and inversely for little-endian varieties.
       gGz?returnNc                 2   t                                                       d| _        dgdz  | _        dgdz  | _        t
          j        | _        g d| _        d| _	        d| _
        d| _        d| _        d| _        d| _        |                                  d S )Nr      r   r   r   r   F)super__init__positionzeros_at_modnonzeros_at_modr   	DETECTING_statequadinvalid_utf16beinvalid_utf16leinvalid_utf32beinvalid_utf32le'first_half_surrogate_pair_detected_16be'first_half_surrogate_pair_detected_16leresetself	__class__s    Z/var/www/html/Qu*py/bism/lib/python3.11/site-packages/pip/_vendor/chardet/utf1632prober.pyr   zUTF1632Prober.__init__)   s    C!G !sQw", LL	$$$$7<47<4

    c                 
   t                                                       d| _        dgdz  | _        dgdz  | _        t
          j        | _        d| _        d| _	        d| _
        d| _        d| _        d| _        g d| _        d S )Nr   r   Fr   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   s    r!   r   zUTF1632Prober.reset8   s    C!G !sQw",$$$$7<47<4 LL			r"   c                     |                                  rdS |                                 rdS |                                 rdS |                                 rdS dS )Nzutf-32bezutf-32lezutf-16bezutf-16lezutf-16)is_likely_utf32beis_likely_utf32leis_likely_utf16beis_likely_utf16ler   s    r!   charset_namezUTF1632Prober.charset_nameF   sk    !!## 	:!!## 	:!!## 	:!!## 	:xr"   c                     dS )N  r)   s    r!   languagezUTF1632Prober.languageS   s    rr"   c                 2    t          d| j        dz            S )N      ?g      @maxr   r)   s    r!   approx_32bit_charsz UTF1632Prober.approx_32bit_charsW       3+,,,r"   c                 2    t          d| j        dz            S )Nr0   g       @r1   r)   s    r!   approx_16bit_charsz UTF1632Prober.approx_16bit_charsZ   r4   r"   c                    |                                  }|| j        k    ok| j        d         |z  | j        k    oR| j        d         |z  | j        k    o9| j        d         |z  | j        k    o | j        d         |z  | j        k    o| j         S Nr   r         )r3   MIN_CHARS_FOR_DETECTIONr   EXPECTED_RATIOr   r   r   approx_charss     r!   r%   zUTF1632Prober.is_likely_utf32be]   s    ..00t;; 
a </$2EE )!!$|3d6II)!!$|3d6II) $Q',69LL) ((	
r"   c                    |                                  }|| j        k    ok| j        d         |z  | j        k    oR| j        d         |z  | j        k    o9| j        d         |z  | j        k    o | j        d         |z  | j        k    o| j         S r8   )r3   r;   r   r<   r   r   r=   s     r!   r&   zUTF1632Prober.is_likely_utf32leg   s    ..00t;; 
 #l2T5HH )!!$|3d6II)!!$|3d6II) !!$|3d6II) ((	
r"   c                     |                                  }|| j        k    oU| j        d         | j        d         z   |z  | j        k    o.| j        d         | j        d         z   |z  | j        k    o| j         S )Nr   r:   r   r9   )r6   r;   r   r<   r   r   r=   s     r!   r'   zUTF1632Prober.is_likely_utf16beq       ..00t;; 
!!$t';A'>>,N!" )"1%(9!(<<L!") ((	
r"   c                     |                                  }|| j        k    oU| j        d         | j        d         z   |z  | j        k    o.| j        d         | j        d         z   |z  | j        k    o| j         S )Nr   r9   r   r:   )r6   r;   r   r<   r   r   r=   s     r!   r(   zUTF1632Prober.is_likely_utf16le{   rA   r"   r   c                 H   |d         dk    s:|d         dk    s.|d         dk    r)|d         dk    rd|d         cxk    rdk    r
n nd| _         |d         dk    s;|d         dk    s/|d         dk    r,|d         dk    r"d|d         cxk    rdk    rn d	S d| _        d	S d	S d	S d	S )
z
        Validate if the quad of bytes is valid UTF-32.

        UTF-32 is valid in the range 0x00000000 - 0x0010FFFF
        excluding 0x0000D800 - 0x0000DFFF

        https://en.wikipedia.org/wiki/UTF-32
        r   r         r9      Tr:   N)r   r   )r   r   s     r!   validate_utf32_charactersz'UTF1632Prober.validate_utf32_characters   s     GqLLAw~~Q1aA$$q'2I2I2I2IT2I2I2I2I2I#'D GqLLAw~~Q1aA$$q'2I2I2I2IT2I2I2I2I2I2I#'D    2I2Ir"   pairc                    | j         s<d|d         cxk    rdk    rn nd| _         nCd|d         cxk    rdk    r
n n-d| _        n%d|d         cxk    rdk    rn nd| _         nd| _        | j        sAd|d         cxk    rdk    rn n	d| _        d	S d|d         cxk    rdk    rn d	S d| _        d	S d	S d|d         cxk    rdk    rn n	d| _        d	S d| _        d	S )
a9  
        Validate if the pair of bytes is  valid UTF-16.

        UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF
        with an exception for surrogate pairs, which must be in the range
        0xD800-0xDBFF followed by 0xDC00-0xDFFF

        https://en.wikipedia.org/wiki/UTF-16
        rE   r      T   rF   Fr   N)r   r   r   r   )r   rH   s     r!   validate_utf16_charactersz'UTF1632Prober.validate_utf16_characters   sj    ; 		,tAw&&&&$&&&&&?C<<a((((D((((('+$tAw&&&&$&&&&&?D<<'+$; 		,tAw&&&&$&&&&&?C<<<a((((D(((((('+$$$ )( tAw&&&&$&&&&&?D<<<'+$$$r"   byte_strc                    |D ]}| j         dz  }|| j        |<   |dk    r^|                     | j                   |                     | j        dd                    |                     | j        dd                    |dk    r| j        |xx         dz  cc<   n| j        |xx         dz  cc<   | xj         dz  c_         | j        S )Nr   r:   r   r9   r   )r   r   rG   rL   r   r   state)r   rM   cmod4s       r!   feedzUTF1632Prober.feed   s     	 	A=1$DDIdOqyy..ty999..ty1~>>>..ty1~>>>Avv!$'''1,''''$T***a/***MMQMMMzr"   c                     | j         t          j        t          j        hv r| j         S |                                 dk    rt          j        | _         n| j        dk    rt          j        | _         | j         S )Ng?i   )r   r   NOT_MEFOUND_ITget_confidencer   r)   s    r!   rO   zUTF1632Prober.state   sf    ;<.0EFFF;  4''&/DKK]X%% '-DK{r"   c                     |                                  s<|                                 s(|                                 s|                                 rdndS )Ng333333?g        )r(   r'   r&   r%   r)   s    r!   rV   zUTF1632Prober.get_confidence   sh     &&(( ))++ ))++	
 ))++DD 		
r"   )r   N) __name__
__module____qualname____doc__r;   r<   r   r   propertystrr*   r.   floatr3   r6   boolr%   r&   r'   r(   r   intrG   rL   r   bytes	bytearrayr   rR   rO   rV   __classcell__)r    s   @r!   r	   r	      s         !N     ! ! ! ! ! ! 
c 
 
 
 X
 #    X-E - - - --E - - - -
4 
 
 
 

4 
 
 
 

4 
 
 
 

4 
 
 
 
(d3i (D ( ( ( (,,d3i ,D , , , ,@U5)#34      
| 
 
 
 X


 

 

 

 

 

 

 

 

r"   r	   N)typingr   r   charsetproberr   enumsr   r	   r-   r"   r!   <module>rg      s   *         ( ( ( ( ( (      F
 F
 F
 F
 F
M F
 F
 F
 F
 F
r"   