
    =i                         d dl mZ d dlZd dlmZ d dlZd dlZej                  j                  ej                  j                  e
            ZdZdZdZ ed      Z G d d	      Z G d
 d      Z G d d      Zy)    )defaultdictN)product            c                       e Zd ZdZej
                  j                  ed      Ze	d        Z
e	d        Ze	d        Ze	d        Zy)
Categoriesze
    Work with aliases from ISO 15924.
    https://en.wikipedia.org/wiki/ISO_15924#List_of_codes
    zcategories.jsonc              #     K   t        | j                        5 }t        j                  |      }ddd       |D ]#  }|d   vst	        dj                  |             d   D ]  }|d   |v s|dd   y# 1 sw Y   KxY ww)zL
        :return: iter: (start code, end code)
        :rtype: list
        NaliaseszInvalid category: {}pointsr   )openfpathjsonload
ValueErrorformat)cls
categoriesfdatacategorypoints         k/home/developers/rajanand/mypropertyqr-fmb-refixing-v2/venv/lib/python3.12/site-packages/homoglyphs/core.py_get_rangeszCategories._get_ranges   s      #))_ 	 99Q<D	  # 	JHtI. !7!>!>x!HII	J (^ 	 EQx:%BQi	 	  	 s'   BA8B*B.
B8B=Bc                     t               }| j                  |      D ],  \  }}d t        ||dz         D        }|j                  |       . |S )zZ
        :return: set of chars in alphabet by categories list
        :rtype: set
        c              3   2   K   | ]  }t        |        y wN)chr).0codes     r   	<genexpr>z*Categories.get_alphabet.<locals>.<genexpr>3   s     A4SYAs   r   )setr   rangeupdate)r   r   alphabetstartendcharss         r   get_alphabetzCategories.get_alphabet+   sO     5//*5 	#JE3A5a+@AEOOE"	#     c                 `   t        | j                        5 }t        j                  |      }ddd       	 t	        j
                  |      j                         d   }|d   v r|S t        |      }d   D ]  }|d   |cxk  r	|d   k  sn |d   c S  y# 1 sw Y   jxY w# t        $ r Y Hw xY w)z7
        :return: category
        :rtype: str
        Nr   r   r   r   r   )	r   r   r   r   unicodedatanamesplit	TypeErrorord)r   charr   r   r   r!   r   s          r   detectzCategories.detect7   s     #))_ 	 99Q<D	 	 "''-335a8H
 4	?* 4y(^ 	 EQx4+58+Qx	 	  	   		s   B&B! B!	B-,B-c                     t        | j                        5 }t        j                  |      }d d d        t	        d         S # 1 sw Y   xY w)Nr   )r   r   r   r   r#   r   r   r   s      r   get_allzCategories.get_allP   s@    #))_ 	 99Q<D	 4	?##	  	 s   AAN)__name__
__module____qualname____doc__ospathjoinCURRENT_DIRr   classmethodr   r*   r3   r6    r+   r   r
   r
      sl     GGLL&78E     	 	    0 $ $r+   r
   c                   t    e Zd Zej                  j                  ed      Zed        Z	ed        Z
ed        Zy)	Languageszlanguages.jsonc                    t        | j                  d      5 }t        j                  |      }ddd       t	               }|D ]4  }|vrt        dj                  |            |j                  ||          6 |S # 1 sw Y   NxY w)zY
        :return: set of chars in alphabet by languages list
        :rtype: set
        utf-8encodingNzInvalid language code: {})r   r   r   r   r#   r   r   r%   )r   	languagesr   r   r&   langs         r   r*   zLanguages.get_alphabetZ   s     #))g. 	 !99Q<D	 5 	(D4 !<!C!CD!IJJOODJ'	( 	  	 s   A;;Bc                     t        | j                        5 }t        j                  |      }ddd       t	               }j                         D ]  \  }}||v s|j                  |        |S # 1 sw Y   CxY w)zd
        :return: set of languages which alphabet contains passed char.
        :rtype: set
        N)r   r   r   r   r#   itemsadd)r   r2   r   r   rG   rH   r&   s          r   r3   zLanguages.detecti   so     #))_ 	 99Q<D	 E	"jjl 	$ND(xd#	$ 	  	 s   A..A7c                     t        | j                  d      5 }t        j                  |      }d d d        t	        j                               S # 1 sw Y   "xY w)NrD   rE   )r   r   r   r   r#   keysr5   s      r   r6   zLanguages.get_allw   sE    #))g. 	 !99Q<D	 499;	  	 s   AAN)r7   r8   r9   r;   r<   r=   r>   r   r?   r*   r3   r6   r@   r+   r   rB   rB   W   sQ    GGLL&67E       r+   rB   c                   f    e Zd ZdddeeefdZed        Zed        Zd Z	d Z
ddZd Zd	 Zd
 Zy)
HomoglyphsNc                 J   |t         t        t        fvrt        d      || _        || _        || _        |s|s|sd}t        |xs g       | _        t        |xs g       | _	        t        |xs g       | _
        | j                  r:t        j                  | j                        }| j                  j                  |       | j                  r:t        j                  | j                        }| j                  j                  |       | j                  | j                        | _        y )NzInvalid strategy)LATINCOMMON)STRATEGY_LOADSTRATEGY_IGNORESTRATEGY_REMOVEr   strategyascii_strategyascii_ranger#   r   rG   r&   r
   r*   r%   rB   	get_tabletable)selfr   rG   r&   rV   rW   rX   s          r   __init__zHomoglyphs.__init__   s     M?OLL/00 ,& )H,J j.B/Y_"- HN+??!..t?HMM  *>> --dnn=HMM  *^^DMM2
r+   c                 0   t        t              }t        t        j                  j                  t        d            5 }t        j                  |      }d d d        | D ]*  }|v s||   D ]  }|| v s||   j                  |        , |S # 1 sw Y   :xY w)Nzconfusables.json)
r   r#   r   r;   r<   r=   r>   r   r   rK   )r&   rZ   r   r   r2   	homoglyphs         r   rY   zHomoglyphs.get_table   s    C "'',,{,>?@ 	 A99Q<D	  	3Dt|!%d 3I H,d	23	3
 	  	 s   BBc                 T    t        t        |             }|j                  d        |S )Nc                     t        |        | fS r   )len)xs    r   <lambda>z*Homoglyphs.uniq_and_sort.<locals>.<lambda>   s    CF7A, r+   )key)listr#   sort)r   results     r   uniq_and_sortzHomoglyphs.uniq_and_sort   s#    c$i./r+   c                    t         j                  |      }|rL| j                  j                  |       t         j	                  |      }| j
                  j                  |       ndt        j                  |      }|y| j                  j                  |       t        j	                  |g      }| j
                  j                  |       | j                  | j
                        | _
        y)NFT)rB   r3   rG   r%   r*   r&   r
   r   rK   rY   rZ   )r[   r2   langsr&   r   s        r   _update_alphabetzHomoglyphs._update_alphabet   s      &NN!!%( --e4HMM  * "((.HOO)!..z:HMM  *^^DMM2
r+   c                    || j                   vrQ| j                  t        k(  r| j                  |      s-g S | j                  t        k(  r|gS | j                  t
        k(  rg S | j                  j                  |t                     }|r@|D cg c]&  }| j                  j                  |t                     ( }} |j                  |  |j                  |       | j                  |      S c c}w r   )r&   rV   rS   rk   rT   rU   rZ   getr#   r%   rK   rh   )r[   r2   	alt_charsalt_char
alt_chars2s        r   _get_char_variantszHomoglyphs._get_char_variants   s    t}}$}}-,,T2I/1v/1	 JJNN4/	JSTh$**..359TJTIj)d !!),, Us   
+C(c              #   8  K   g }|D ]g  }| j                  |      }|r>|D cg c]  }t        |      | j                  v s| }}|s| j                  t        k(  r y |sW|j                  |       i |r!t        | D ]  }dj                  |        y y c c}w w)N )rq   r1   rX   rW   rT   appendr   r=   )r[   textascii
variationsr2   rn   variants          r   _get_combinationszHomoglyphs._get_combinations   s     
 		-D//5I.7Yd3t9HXHX;XTY	Y T%8%8O%K!!),		- "J/ 'ggg&&'  Zs   BBBB<Bc                 6    t        | j                  |            S r   )re   ry   r[   ru   s     r   get_combinationszHomoglyphs.get_combinations   s    D**4011r+   c              #      K   | j                  |d      D ],  }t        t        t        |            | j                  v s)| . y w)NT)rv   )ry   maxmapr1   rX   )r[   ru   rx   s      r   	_to_asciizHomoglyphs._to_ascii   sD     --d$-? 	G3sG$%)9)99	s
   <AAc                 B    | j                  | j                  |            S r   )rh   r   r{   s     r   to_asciizHomoglyphs.to_ascii   s    !!$.."677r+   )F)r7   r8   r9   rT   ASCII_RANGEr\   staticmethodrY   rh   rk   rq   ry   r|   r   r   r@   r+   r   rO   rO   ~   s\    "&$)/(38 	 	  
&-.' 2
8r+   rO   )collectionsr   r   	itertoolsr   r;   r-   r<   dirnameabspath__file__r>   rS   rT   rU   r$   r   r
   rB   rO   r@   r+   r   <module>r      ss    #   	  ggoobggooh78 CjA$ A$H$  $ Nr8 r8r+   