Ë
    •iÉ<  ã                   óÄ   — d Z ddlZddlZddddddd	d
dddddddœZ ej                   dj
                  di e¤Žej                  «      Zdede	fd„Z
dede	fd„Zdedefd„Zy)ab  
`ftfy.badness` contains a heuristic that detects likely mojibake.

This heuristic signals to ftfy which segments of text need to be fixed, and
also indicates when the text can stop being fixed.

The design of this heuristic is that we categorize the approximately 400
Unicode characters that occur in UTF-8 mojibake, specifically the characters
that come from mixing up UTF-8 with the other encodings we support. We
identify sequences and contexts of these characters that are much more likely
to be mojibake than intended strings, such as lowercase accented letters
followed immediately by currency symbols.
é    Nu   Â Â­Â·Â´â€“â€”â€•â€¦â€™u   Â€-ÂŸu.   Â¦Â¤Â¨Â¬Â¯Â¸Æ’Ë†Ë‡Ë˜Ë›Ëœâ€ â€¡â€°âŒâ—Šï¿½ÂªÂºu   Â¶Â§u   Â¢Â£Â¥â‚§â‚¬u!   Â¡Â«Â¿Â©Î„Î…â€˜â€šâ€œâ€žâ€¢â€¹ï£¿u   Â®Â»Ëâ€â€ºâ„¢uA   Â²Â³Â¹Â±Â¼Â½Â¾Ã—ÂµÃ·â„âˆ‚âˆ†âˆâˆ‘âˆšâˆžâˆ©âˆ«â‰ˆâ‰ â‰¡â‰¤â‰¥â„–u   Ã’-Ã–Ã™-ÃœÃ²-Ã¶Ã¸-Ã¼ÅÅŒÅªÅ²Â°uU   Ã€-Ã‘Ã˜ÃœÃÄ‚Ä€Ä„Ä†ÄŒÄŽÄÄ˜ÄšÄ’Ä–ÄžÄ¢Ä°ÄªÄ¶Ä¹Ä½ÅÄ»ÅƒÅ‡Å…Å’Å˜ÅšÅžÅ Å¢Å¤Å®Å°Å¸Å¹Å»Å½ÒuK   ÃŸÃ -Ã±ÄƒÄ…ÄÄ‡ÄÄÄ‘Ä™Ä›Ä“Ä—ÄŸÄ£Ä¯Ä«Ä·ÄºÄ¾Å‚Ä¼Å“Å•Å›ÅŸÅ¡Å¥Ã¼ÅºÅ¼Å¾Ò‘ï¬ï¬‚u   ÃžÎ‘-Î©Î†ÎˆÎ‰ÎŠÎŒÎŽÎÎªÎ«Ð-Ð¯u   Î±-Ï‰Î¬Î­Î®Î¯Î°Ð°-ÑŸu7   â”‚â”Œâ”â”˜â”œâ”¤â”¬â”¼â•-â•¬â–€â–„â–ˆâ–Œâ–â–‘â–’â–“)ÚcommonÚc1ÚbadÚlawÚcurrencyÚstart_punctuationÚend_punctuationÚnumericÚkaomojiÚupper_accentedÚlower_accentedÚupper_commonÚlower_commonÚboxu   
    [{c1}]
    |
    [{bad}{lower_accented}{upper_accented}{box}{start_punctuation}{end_punctuation}{currency}{numeric}{law}] [{bad}]
    |
    [a-zA-Z] [{lower_common}{upper_common}] [{bad}]
    |
    [{bad}] [{lower_accented}{upper_accented}{box}{start_punctuation}{end_punctuation}{currency}{numeric}{law}]
    |
    [{lower_accented}{lower_common}{box}{end_punctuation}{currency}{numeric}] [{upper_accented}]
    |
    [{box}{end_punctuation}{currency}{numeric}] [{lower_accented}]
    |
    [{lower_accented}{box}{end_punctuation}] [{currency}]
    |
    \s [{upper_accented}] [{currency}]
    |
    [{upper_accented}{box}] [{numeric}{law}]
    |
    [{lower_accented}{upper_accented}{box}{currency}{end_punctuation}] [{start_punctuation}] [{numeric}]
    |
    [{lower_accented}{upper_accented}{currency}{numeric}{box}{law}] [{end_punctuation}] [{start_punctuation}]
    |
    [{currency}{numeric}{box}] [{start_punctuation}]
    |
    [a-z] [{upper_accented}] [{start_punctuation}{currency}]
    |
    [{box}] [{kaomoji}]
    |
    [{lower_accented}{upper_accented}{currency}{numeric}{start_punctuation}{end_punctuation}{law}] [{box}]
    |
    [{box}] [{end_punctuation}]
    |
    [{lower_accented}{upper_accented}] [{start_punctuation}{end_punctuation}] \w
    |

    # The ligature Å“ when not followed by an unaccented Latin letter
    [Å’Å“][^A-Za-z]
    |

    # Degree signs after capital letters
    [{upper_accented}]Â°
    |

    # Common Windows-1252 2-character mojibake that isn't covered by the cases above
    [Ã‚ÃƒÃŽÃ][â‚¬Å“Å Å¡Â¢Â£Å¸Å¾\xa0\xadÂ®Â©Â°Â·Â»{start_punctuation}{end_punctuation}â€“â€”Â´]
    |
    Ã— [Â²Â³]
    |
    # Windows-1252 mojibake of Arabic words needs to include the 'common' characters.
    # To compensate, we require four characters to be matched.
      [Ã˜Ã™] [{common}{currency}{bad}{numeric}{start_punctuation}Å¸Å Â®Â°ÂµÂ»]
      [Ã˜Ã™] [{common}{currency}{bad}{numeric}{start_punctuation}Å¸Å Â®Â°ÂµÂ»]
    |

    # Windows-1252 mojibake that starts 3-character sequences for some South Asian
    # alphabets
    Ã [Â²ÂµÂ¹Â¼Â½Â¾]
    |

    # MacRoman mojibake that isn't covered by the cases above
    âˆš[Â±âˆ‚â€ â‰ Â®â„¢Â´â‰¤â‰¥Â¥ÂµÃ¸]
    |
    â‰ˆ[Â°Â¢]
    |
    â€šÃ„[Ã¬Ã®Ã¯Ã²Ã´ÃºÃ¹Ã»â€ Â°Â¢Ï€]
    |
    â€š[Ã¢Ã³][Ã Ã¤Â°Ãª]
    |

    # Windows-1251 mojibake of characters in the U+2000 range
    Ð²Ð‚
    |

    # Windows-1251 mojibake of Latin-1 characters and/or the Cyrillic alphabet.
    # Because the 2-character sequences involved here may be common, we require
    # seeing a 3-character sequence.
    [Ð’Ð“Ð Ð¡][{c1}{bad}{start_punctuation}{end_punctuation}{currency}Â°Âµ][Ð’Ð“Ð Ð¡]
    |
    # A distinctive five-character sequence of Cyrillic letters, which can be
    # Windows-1251 mojibake on top of Latin-1 mojibake of Windows-1252 characters.
    # Require a Latin letter nearby.
    Ð“ÑžÐ’Ð‚Ð’.[A-Za-z ]
    |

    # Windows-1252 encodings of 'Ã ' and 'Ã¡', as well as \xa0 itself
    Ãƒ[\xa0Â¡]
    |
    [a-z]\s?[ÃƒÃ‚][ ]
    |
    ^[ÃƒÃ‚][ ]
    |

    # Cases where Ã‚ precedes a character as an encoding of exactly the same
    # character, and the character is common enough
    [a-z.,?!{end_punctuation}] Ã‚ [ {start_punctuation}{end_punctuation}]
    |

    # Windows-1253 mojibake of characters in the U+2000 range
    Î²â‚¬[â„¢\xa0Î†\xadÂ®Â°]
    |

    # Windows-1253 mojibake of Latin-1 characters and/or the Greek alphabet
    [Î’Î“ÎžÎŸ][{c1}{bad}{start_punctuation}{end_punctuation}{currency}Â°][Î’Î“ÎžÎŸ]
    |

    # Windows-1257 mojibake of characters in the U+2000 range
    Äâ‚¬
    ÚtextÚreturnc                 óB   — t        j                  d«       t        | «      S )zÚ
    This was the name of the heuristic used in ftfy 2.x through 5.x. As an
    attempt at compatibility with external code that calls the heuristic
    directly, we redirect to our new heuristic, :func:`badness`.
    zj`sequence_weirdness()` is an old heuristic, and the current closest equivalent is `ftfy.badness.badness()`)ÚwarningsÚwarnÚbadness©r   s    úh/home/developers/rajanand/mypropertyqr-fmb-refixing-v2/venv/lib/python3.12/site-packages/ftfy/badness.pyÚsequence_weirdnessr   †  s!   € ô ‡MMð	9ôô 4‹=Ðó    c                 ó>   — t        t        j                  | «      «      S )z¸
    Get the 'badness' of a sequence of text, counting the number of unlikely
    character sequences. A badness greater than 0 indicates that some of it
    seems to be mojibake.
    )ÚlenÚ
BADNESS_REÚfindallr   s    r   r   r   “  s   € ô Œz×!Ñ! $Ó'Ó(Ð(r   c                 ó>   — t        t        j                  | «      «      S )a6  
    Returns true iff the given text looks like it contains mojibake.

    This can be faster than `badness`, because it returns when the first match
    is found to a regex instead of counting matches. Note that as strings get
    longer, they have a higher chance of returning True for `is_bad(string)`.
    )Úboolr   Úsearchr   s    r   Úis_badr"   œ  s   € ô ”
×!Ñ! $Ó'Ó(Ð(r   © )Ú__doc__r   ÚreÚMOJIBAKE_CATEGORIESÚcompileÚformatÚVERBOSEr   ÚstrÚintr   r   r    r"   r#   r   r   ú<module>r,      sè   ðñó Û 	ð	*ð ð	*ð2	ð	ð	ð	ð	ðD	ð*	6ðZ$	&ðN	Hð$	Eð	#ñWoÐ ðx ˆRZ‰Zðlð l÷X 	‰ñYnðZ ñ[nð^ ‡JJóaq€
ðh
˜Sð 
 Só 
ð)#ð )˜#ó )ð)ð )˜ô )r   