Upload model files
Browse filesThis view is limited to 50 files because it contains too many changes.  
							See raw diff
- .gitattributes +11 -0
- .venv/.gitignore +1 -0
- .venv/bin/Activate.ps1 +248 -0
- .venv/bin/activate +76 -0
- .venv/bin/activate.csh +27 -0
- .venv/bin/activate.fish +69 -0
- .venv/bin/huggingface-cli +8 -0
- .venv/bin/normalizer +8 -0
- .venv/bin/pip +8 -0
- .venv/bin/pip3 +8 -0
- .venv/bin/pip3.13 +8 -0
- .venv/bin/python +0 -0
- .venv/bin/python3 +0 -0
- .venv/bin/python3.13 +0 -0
- .venv/bin/tqdm +8 -0
- .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/INSTALLER +1 -0
- .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/LICENSE +20 -0
- .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/METADATA +46 -0
- .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/RECORD +43 -0
- .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/WHEEL +5 -0
- .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/top_level.txt +2 -0
- .venv/lib/python3.13/site-packages/_yaml/__init__.py +33 -0
- .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/INSTALLER +1 -0
- .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/LICENSE +20 -0
- .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/METADATA +77 -0
- .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/RECORD +14 -0
- .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/WHEEL +5 -0
- .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/top_level.txt +1 -0
- .venv/lib/python3.13/site-packages/certifi/__init__.py +4 -0
- .venv/lib/python3.13/site-packages/certifi/__main__.py +12 -0
- .venv/lib/python3.13/site-packages/certifi/cacert.pem +0 -0
- .venv/lib/python3.13/site-packages/certifi/core.py +114 -0
- .venv/lib/python3.13/site-packages/certifi/py.typed +0 -0
- .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER +1 -0
- .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE +21 -0
- .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/METADATA +721 -0
- .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/RECORD +35 -0
- .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL +5 -0
- .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt +2 -0
- .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt +1 -0
- .venv/lib/python3.13/site-packages/charset_normalizer/__init__.py +48 -0
- .venv/lib/python3.13/site-packages/charset_normalizer/__main__.py +6 -0
- .venv/lib/python3.13/site-packages/charset_normalizer/api.py +668 -0
- .venv/lib/python3.13/site-packages/charset_normalizer/cd.py +395 -0
- .venv/lib/python3.13/site-packages/charset_normalizer/cli/__init__.py +8 -0
- .venv/lib/python3.13/site-packages/charset_normalizer/cli/__main__.py +321 -0
- .venv/lib/python3.13/site-packages/charset_normalizer/constant.py +1998 -0
- .venv/lib/python3.13/site-packages/charset_normalizer/legacy.py +66 -0
- .venv/lib/python3.13/site-packages/charset_normalizer/md.cpython-313-darwin.so +3 -0
- .venv/lib/python3.13/site-packages/charset_normalizer/md.py +630 -0
    	
        .gitattributes
    CHANGED
    
    | @@ -33,3 +33,14 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 33 | 
             
            *.zip filter=lfs diff=lfs merge=lfs -text
         | 
| 34 | 
             
            *.zst filter=lfs diff=lfs merge=lfs -text
         | 
| 35 | 
             
            *tfevents* filter=lfs diff=lfs merge=lfs -text
         | 
| 36 | 
            +
            <<<<<<< HEAD
         | 
| 37 | 
            +
            tokenizer.json filter=lfs diff=lfs merge=lfs -text
         | 
| 38 | 
            +
            =======
         | 
| 39 | 
            +
            >>>>>>> 4873013eddb1c2d779f664501b56e56d5e261341
         | 
| 40 | 
            +
            .venv/lib/python3.13/site-packages/charset_normalizer/md.cpython-313-darwin.so filter=lfs diff=lfs merge=lfs -text
         | 
| 41 | 
            +
            .venv/lib/python3.13/site-packages/charset_normalizer/md__mypyc.cpython-313-darwin.so filter=lfs diff=lfs merge=lfs -text
         | 
| 42 | 
            +
            .venv/lib/python3.13/site-packages/pip/_vendor/distlib/t64-arm.exe filter=lfs diff=lfs merge=lfs -text
         | 
| 43 | 
            +
            .venv/lib/python3.13/site-packages/pip/_vendor/distlib/t64.exe filter=lfs diff=lfs merge=lfs -text
         | 
| 44 | 
            +
            .venv/lib/python3.13/site-packages/pip/_vendor/distlib/w64-arm.exe filter=lfs diff=lfs merge=lfs -text
         | 
| 45 | 
            +
            .venv/lib/python3.13/site-packages/pip/_vendor/distlib/w64.exe filter=lfs diff=lfs merge=lfs -text
         | 
| 46 | 
            +
            .venv/lib/python3.13/site-packages/yaml/_yaml.cpython-313-darwin.so filter=lfs diff=lfs merge=lfs -text
         | 
    	
        .venv/.gitignore
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            *
         | 
    	
        .venv/bin/Activate.ps1
    ADDED
    
    | @@ -0,0 +1,248 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            <#
         | 
| 2 | 
            +
            .Synopsis
         | 
| 3 | 
            +
            Activate a Python virtual environment for the current PowerShell session.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            .Description
         | 
| 6 | 
            +
            Pushes the python executable for a virtual environment to the front of the
         | 
| 7 | 
            +
            $Env:PATH environment variable and sets the prompt to signify that you are
         | 
| 8 | 
            +
            in a Python virtual environment. Makes use of the command line switches as
         | 
| 9 | 
            +
            well as the `pyvenv.cfg` file values present in the virtual environment.
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            .Parameter VenvDir
         | 
| 12 | 
            +
            Path to the directory that contains the virtual environment to activate. The
         | 
| 13 | 
            +
            default value for this is the parent of the directory that the Activate.ps1
         | 
| 14 | 
            +
            script is located within.
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            .Parameter Prompt
         | 
| 17 | 
            +
            The prompt prefix to display when this virtual environment is activated. By
         | 
| 18 | 
            +
            default, this prompt is the name of the virtual environment folder (VenvDir)
         | 
| 19 | 
            +
            surrounded by parentheses and followed by a single space (ie. '(.venv) ').
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            .Example
         | 
| 22 | 
            +
            Activate.ps1
         | 
| 23 | 
            +
            Activates the Python virtual environment that contains the Activate.ps1 script.
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            .Example
         | 
| 26 | 
            +
            Activate.ps1 -Verbose
         | 
| 27 | 
            +
            Activates the Python virtual environment that contains the Activate.ps1 script,
         | 
| 28 | 
            +
            and shows extra information about the activation as it executes.
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            .Example
         | 
| 31 | 
            +
            Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
         | 
| 32 | 
            +
            Activates the Python virtual environment located in the specified location.
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            .Example
         | 
| 35 | 
            +
            Activate.ps1 -Prompt "MyPython"
         | 
| 36 | 
            +
            Activates the Python virtual environment that contains the Activate.ps1 script,
         | 
| 37 | 
            +
            and prefixes the current prompt with the specified string (surrounded in
         | 
| 38 | 
            +
            parentheses) while the virtual environment is active.
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            .Notes
         | 
| 41 | 
            +
            On Windows, it may be required to enable this Activate.ps1 script by setting the
         | 
| 42 | 
            +
            execution policy for the user. You can do this by issuing the following PowerShell
         | 
| 43 | 
            +
            command:
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            For more information on Execution Policies: 
         | 
| 48 | 
            +
            https://go.microsoft.com/fwlink/?LinkID=135170
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            #>
         | 
| 51 | 
            +
            Param(
         | 
| 52 | 
            +
                [Parameter(Mandatory = $false)]
         | 
| 53 | 
            +
                [String]
         | 
| 54 | 
            +
                $VenvDir,
         | 
| 55 | 
            +
                [Parameter(Mandatory = $false)]
         | 
| 56 | 
            +
                [String]
         | 
| 57 | 
            +
                $Prompt
         | 
| 58 | 
            +
            )
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            <# Function declarations --------------------------------------------------- #>
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            <#
         | 
| 63 | 
            +
            .Synopsis
         | 
| 64 | 
            +
            Remove all shell session elements added by the Activate script, including the
         | 
| 65 | 
            +
            addition of the virtual environment's Python executable from the beginning of
         | 
| 66 | 
            +
            the PATH variable.
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            .Parameter NonDestructive
         | 
| 69 | 
            +
            If present, do not remove this function from the global namespace for the
         | 
| 70 | 
            +
            session.
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            #>
         | 
| 73 | 
            +
            function global:deactivate ([switch]$NonDestructive) {
         | 
| 74 | 
            +
                # Revert to original values
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                # The prior prompt:
         | 
| 77 | 
            +
                if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
         | 
| 78 | 
            +
                    Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
         | 
| 79 | 
            +
                    Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
         | 
| 80 | 
            +
                }
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                # The prior PYTHONHOME:
         | 
| 83 | 
            +
                if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
         | 
| 84 | 
            +
                    Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
         | 
| 85 | 
            +
                    Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
         | 
| 86 | 
            +
                }
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                # The prior PATH:
         | 
| 89 | 
            +
                if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
         | 
| 90 | 
            +
                    Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
         | 
| 91 | 
            +
                    Remove-Item -Path Env:_OLD_VIRTUAL_PATH
         | 
| 92 | 
            +
                }
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                # Just remove the VIRTUAL_ENV altogether:
         | 
| 95 | 
            +
                if (Test-Path -Path Env:VIRTUAL_ENV) {
         | 
| 96 | 
            +
                    Remove-Item -Path env:VIRTUAL_ENV
         | 
| 97 | 
            +
                }
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                # Just remove VIRTUAL_ENV_PROMPT altogether.
         | 
| 100 | 
            +
                if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
         | 
| 101 | 
            +
                    Remove-Item -Path env:VIRTUAL_ENV_PROMPT
         | 
| 102 | 
            +
                }
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
         | 
| 105 | 
            +
                if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
         | 
| 106 | 
            +
                    Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
         | 
| 107 | 
            +
                }
         | 
| 108 | 
            +
             | 
| 109 | 
            +
                # Leave deactivate function in the global namespace if requested:
         | 
| 110 | 
            +
                if (-not $NonDestructive) {
         | 
| 111 | 
            +
                    Remove-Item -Path function:deactivate
         | 
| 112 | 
            +
                }
         | 
| 113 | 
            +
            }
         | 
| 114 | 
            +
             | 
| 115 | 
            +
            <#
         | 
| 116 | 
            +
            .Description
         | 
| 117 | 
            +
            Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
         | 
| 118 | 
            +
            given folder, and returns them in a map.
         | 
| 119 | 
            +
             | 
| 120 | 
            +
            For each line in the pyvenv.cfg file, if that line can be parsed into exactly
         | 
| 121 | 
            +
            two strings separated by `=` (with any amount of whitespace surrounding the =)
         | 
| 122 | 
            +
            then it is considered a `key = value` line. The left hand string is the key,
         | 
| 123 | 
            +
            the right hand is the value.
         | 
| 124 | 
            +
             | 
| 125 | 
            +
            If the value starts with a `'` or a `"` then the first and last character is
         | 
| 126 | 
            +
            stripped from the value before being captured.
         | 
| 127 | 
            +
             | 
| 128 | 
            +
            .Parameter ConfigDir
         | 
| 129 | 
            +
            Path to the directory that contains the `pyvenv.cfg` file.
         | 
| 130 | 
            +
            #>
         | 
| 131 | 
            +
            function Get-PyVenvConfig(
         | 
| 132 | 
            +
                [String]
         | 
| 133 | 
            +
                $ConfigDir
         | 
| 134 | 
            +
            ) {
         | 
| 135 | 
            +
                Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
         | 
| 138 | 
            +
                $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                # An empty map will be returned if no config file is found.
         | 
| 141 | 
            +
                $pyvenvConfig = @{ }
         | 
| 142 | 
            +
             | 
| 143 | 
            +
                if ($pyvenvConfigPath) {
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                    Write-Verbose "File exists, parse `key = value` lines"
         | 
| 146 | 
            +
                    $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                    $pyvenvConfigContent | ForEach-Object {
         | 
| 149 | 
            +
                        $keyval = $PSItem -split "\s*=\s*", 2
         | 
| 150 | 
            +
                        if ($keyval[0] -and $keyval[1]) {
         | 
| 151 | 
            +
                            $val = $keyval[1]
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                            # Remove extraneous quotations around a string value.
         | 
| 154 | 
            +
                            if ("'""".Contains($val.Substring(0, 1))) {
         | 
| 155 | 
            +
                                $val = $val.Substring(1, $val.Length - 2)
         | 
| 156 | 
            +
                            }
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                            $pyvenvConfig[$keyval[0]] = $val
         | 
| 159 | 
            +
                            Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
         | 
| 160 | 
            +
                        }
         | 
| 161 | 
            +
                    }
         | 
| 162 | 
            +
                }
         | 
| 163 | 
            +
                return $pyvenvConfig
         | 
| 164 | 
            +
            }
         | 
| 165 | 
            +
             | 
| 166 | 
            +
             | 
| 167 | 
            +
            <# Begin Activate script --------------------------------------------------- #>
         | 
| 168 | 
            +
             | 
| 169 | 
            +
            # Determine the containing directory of this script
         | 
| 170 | 
            +
            $VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
         | 
| 171 | 
            +
            $VenvExecDir = Get-Item -Path $VenvExecPath
         | 
| 172 | 
            +
             | 
| 173 | 
            +
            Write-Verbose "Activation script is located in path: '$VenvExecPath'"
         | 
| 174 | 
            +
            Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
         | 
| 175 | 
            +
            Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
         | 
| 176 | 
            +
             | 
| 177 | 
            +
            # Set values required in priority: CmdLine, ConfigFile, Default
         | 
| 178 | 
            +
            # First, get the location of the virtual environment, it might not be
         | 
| 179 | 
            +
            # VenvExecDir if specified on the command line.
         | 
| 180 | 
            +
            if ($VenvDir) {
         | 
| 181 | 
            +
                Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
         | 
| 182 | 
            +
            }
         | 
| 183 | 
            +
            else {
         | 
| 184 | 
            +
                Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
         | 
| 185 | 
            +
                $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
         | 
| 186 | 
            +
                Write-Verbose "VenvDir=$VenvDir"
         | 
| 187 | 
            +
            }
         | 
| 188 | 
            +
             | 
| 189 | 
            +
            # Next, read the `pyvenv.cfg` file to determine any required value such
         | 
| 190 | 
            +
            # as `prompt`.
         | 
| 191 | 
            +
            $pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
         | 
| 192 | 
            +
             | 
| 193 | 
            +
            # Next, set the prompt from the command line, or the config file, or
         | 
| 194 | 
            +
            # just use the name of the virtual environment folder.
         | 
| 195 | 
            +
            if ($Prompt) {
         | 
| 196 | 
            +
                Write-Verbose "Prompt specified as argument, using '$Prompt'"
         | 
| 197 | 
            +
            }
         | 
| 198 | 
            +
            else {
         | 
| 199 | 
            +
                Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
         | 
| 200 | 
            +
                if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
         | 
| 201 | 
            +
                    Write-Verbose "  Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
         | 
| 202 | 
            +
                    $Prompt = $pyvenvCfg['prompt'];
         | 
| 203 | 
            +
                }
         | 
| 204 | 
            +
                else {
         | 
| 205 | 
            +
                    Write-Verbose "  Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
         | 
| 206 | 
            +
                    Write-Verbose "  Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
         | 
| 207 | 
            +
                    $Prompt = Split-Path -Path $venvDir -Leaf
         | 
| 208 | 
            +
                }
         | 
| 209 | 
            +
            }
         | 
| 210 | 
            +
             | 
| 211 | 
            +
            Write-Verbose "Prompt = '$Prompt'"
         | 
| 212 | 
            +
            Write-Verbose "VenvDir='$VenvDir'"
         | 
| 213 | 
            +
             | 
| 214 | 
            +
            # Deactivate any currently active virtual environment, but leave the
         | 
| 215 | 
            +
            # deactivate function in place.
         | 
| 216 | 
            +
            deactivate -nondestructive
         | 
| 217 | 
            +
             | 
| 218 | 
            +
            # Now set the environment variable VIRTUAL_ENV, used by many tools to determine
         | 
| 219 | 
            +
            # that there is an activated venv.
         | 
| 220 | 
            +
            $env:VIRTUAL_ENV = $VenvDir
         | 
| 221 | 
            +
             | 
| 222 | 
            +
            $env:VIRTUAL_ENV_PROMPT = $Prompt
         | 
| 223 | 
            +
             | 
| 224 | 
            +
            if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                Write-Verbose "Setting prompt to '$Prompt'"
         | 
| 227 | 
            +
             | 
| 228 | 
            +
                # Set the prompt to include the env name
         | 
| 229 | 
            +
                # Make sure _OLD_VIRTUAL_PROMPT is global
         | 
| 230 | 
            +
                function global:_OLD_VIRTUAL_PROMPT { "" }
         | 
| 231 | 
            +
                Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
         | 
| 232 | 
            +
                New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
         | 
| 233 | 
            +
             | 
| 234 | 
            +
                function global:prompt {
         | 
| 235 | 
            +
                    Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
         | 
| 236 | 
            +
                    _OLD_VIRTUAL_PROMPT
         | 
| 237 | 
            +
                }
         | 
| 238 | 
            +
            }
         | 
| 239 | 
            +
             | 
| 240 | 
            +
            # Clear PYTHONHOME
         | 
| 241 | 
            +
            if (Test-Path -Path Env:PYTHONHOME) {
         | 
| 242 | 
            +
                Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
         | 
| 243 | 
            +
                Remove-Item -Path Env:PYTHONHOME
         | 
| 244 | 
            +
            }
         | 
| 245 | 
            +
             | 
| 246 | 
            +
            # Add the venv to the PATH
         | 
| 247 | 
            +
            Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
         | 
| 248 | 
            +
            $Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
         | 
    	
        .venv/bin/activate
    ADDED
    
    | @@ -0,0 +1,76 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # This file must be used with "source bin/activate" *from bash*
         | 
| 2 | 
            +
            # You cannot run it directly
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            deactivate () {
         | 
| 5 | 
            +
                # reset old environment variables
         | 
| 6 | 
            +
                if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
         | 
| 7 | 
            +
                    PATH="${_OLD_VIRTUAL_PATH:-}"
         | 
| 8 | 
            +
                    export PATH
         | 
| 9 | 
            +
                    unset _OLD_VIRTUAL_PATH
         | 
| 10 | 
            +
                fi
         | 
| 11 | 
            +
                if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
         | 
| 12 | 
            +
                    PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
         | 
| 13 | 
            +
                    export PYTHONHOME
         | 
| 14 | 
            +
                    unset _OLD_VIRTUAL_PYTHONHOME
         | 
| 15 | 
            +
                fi
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                # Call hash to forget past locations. Without forgetting
         | 
| 18 | 
            +
                # past locations the $PATH changes we made may not be respected.
         | 
| 19 | 
            +
                # See "man bash" for more details. hash is usually a builtin of your shell
         | 
| 20 | 
            +
                hash -r 2> /dev/null
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
         | 
| 23 | 
            +
                    PS1="${_OLD_VIRTUAL_PS1:-}"
         | 
| 24 | 
            +
                    export PS1
         | 
| 25 | 
            +
                    unset _OLD_VIRTUAL_PS1
         | 
| 26 | 
            +
                fi
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                unset VIRTUAL_ENV
         | 
| 29 | 
            +
                unset VIRTUAL_ENV_PROMPT
         | 
| 30 | 
            +
                if [ ! "${1:-}" = "nondestructive" ] ; then
         | 
| 31 | 
            +
                # Self destruct!
         | 
| 32 | 
            +
                    unset -f deactivate
         | 
| 33 | 
            +
                fi
         | 
| 34 | 
            +
            }
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            # unset irrelevant variables
         | 
| 37 | 
            +
            deactivate nondestructive
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            # on Windows, a path can contain colons and backslashes and has to be converted:
         | 
| 40 | 
            +
            case "$(uname)" in
         | 
| 41 | 
            +
                CYGWIN*|MSYS*|MINGW*)
         | 
| 42 | 
            +
                    # transform D:\path\to\venv to /d/path/to/venv on MSYS and MINGW
         | 
| 43 | 
            +
                    # and to /cygdrive/d/path/to/venv on Cygwin
         | 
| 44 | 
            +
                    VIRTUAL_ENV=$(cygpath /Users/isaacdavid/Qwen2.5-3B-Instruct-Ori/.venv)
         | 
| 45 | 
            +
                    export VIRTUAL_ENV
         | 
| 46 | 
            +
                    ;;
         | 
| 47 | 
            +
                *)
         | 
| 48 | 
            +
                    # use the path as-is
         | 
| 49 | 
            +
                    export VIRTUAL_ENV=/Users/isaacdavid/Qwen2.5-3B-Instruct-Ori/.venv
         | 
| 50 | 
            +
                    ;;
         | 
| 51 | 
            +
            esac
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            _OLD_VIRTUAL_PATH="$PATH"
         | 
| 54 | 
            +
            PATH="$VIRTUAL_ENV/"bin":$PATH"
         | 
| 55 | 
            +
            export PATH
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            VIRTUAL_ENV_PROMPT=.venv
         | 
| 58 | 
            +
            export VIRTUAL_ENV_PROMPT
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            # unset PYTHONHOME if set
         | 
| 61 | 
            +
            # this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
         | 
| 62 | 
            +
            # could use `if (set -u; : $PYTHONHOME) ;` in bash
         | 
| 63 | 
            +
            if [ -n "${PYTHONHOME:-}" ] ; then
         | 
| 64 | 
            +
                _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
         | 
| 65 | 
            +
                unset PYTHONHOME
         | 
| 66 | 
            +
            fi
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
         | 
| 69 | 
            +
                _OLD_VIRTUAL_PS1="${PS1:-}"
         | 
| 70 | 
            +
                PS1="(".venv") ${PS1:-}"
         | 
| 71 | 
            +
                export PS1
         | 
| 72 | 
            +
            fi
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            # Call hash to forget past commands. Without forgetting
         | 
| 75 | 
            +
            # past commands the $PATH changes we made may not be respected
         | 
| 76 | 
            +
            hash -r 2> /dev/null
         | 
    	
        .venv/bin/activate.csh
    ADDED
    
    | @@ -0,0 +1,27 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # This file must be used with "source bin/activate.csh" *from csh*.
         | 
| 2 | 
            +
            # You cannot run it directly.
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            # Created by Davide Di Blasi <[email protected]>.
         | 
| 5 | 
            +
            # Ported to Python 3.3 venv by Andrew Svetlov <[email protected]>
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            alias deactivate 'test $?_OLD_VIRTUAL_PATH != 0 && setenv PATH "$_OLD_VIRTUAL_PATH" && unset _OLD_VIRTUAL_PATH; rehash; test $?_OLD_VIRTUAL_PROMPT != 0 && set prompt="$_OLD_VIRTUAL_PROMPT" && unset _OLD_VIRTUAL_PROMPT; unsetenv VIRTUAL_ENV; unsetenv VIRTUAL_ENV_PROMPT; test "\!:*" != "nondestructive" && unalias deactivate'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # Unset irrelevant variables.
         | 
| 10 | 
            +
            deactivate nondestructive
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            setenv VIRTUAL_ENV /Users/isaacdavid/Qwen2.5-3B-Instruct-Ori/.venv
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            set _OLD_VIRTUAL_PATH="$PATH"
         | 
| 15 | 
            +
            setenv PATH "$VIRTUAL_ENV/"bin":$PATH"
         | 
| 16 | 
            +
            setenv VIRTUAL_ENV_PROMPT .venv
         | 
| 17 | 
            +
             | 
| 18 | 
            +
             | 
| 19 | 
            +
            set _OLD_VIRTUAL_PROMPT="$prompt"
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            if (! "$?VIRTUAL_ENV_DISABLE_PROMPT") then
         | 
| 22 | 
            +
                set prompt = "(".venv") $prompt:q"
         | 
| 23 | 
            +
            endif
         | 
| 24 | 
            +
             | 
| 25 | 
            +
            alias pydoc python -m pydoc
         | 
| 26 | 
            +
             | 
| 27 | 
            +
            rehash
         | 
    	
        .venv/bin/activate.fish
    ADDED
    
    | @@ -0,0 +1,69 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # This file must be used with "source <venv>/bin/activate.fish" *from fish*
         | 
| 2 | 
            +
            # (https://fishshell.com/). You cannot run it directly.
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            function deactivate  -d "Exit virtual environment and return to normal shell environment"
         | 
| 5 | 
            +
                # reset old environment variables
         | 
| 6 | 
            +
                if test -n "$_OLD_VIRTUAL_PATH"
         | 
| 7 | 
            +
                    set -gx PATH $_OLD_VIRTUAL_PATH
         | 
| 8 | 
            +
                    set -e _OLD_VIRTUAL_PATH
         | 
| 9 | 
            +
                end
         | 
| 10 | 
            +
                if test -n "$_OLD_VIRTUAL_PYTHONHOME"
         | 
| 11 | 
            +
                    set -gx PYTHONHOME $_OLD_VIRTUAL_PYTHONHOME
         | 
| 12 | 
            +
                    set -e _OLD_VIRTUAL_PYTHONHOME
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                if test -n "$_OLD_FISH_PROMPT_OVERRIDE"
         | 
| 16 | 
            +
                    set -e _OLD_FISH_PROMPT_OVERRIDE
         | 
| 17 | 
            +
                    # prevents error when using nested fish instances (Issue #93858)
         | 
| 18 | 
            +
                    if functions -q _old_fish_prompt
         | 
| 19 | 
            +
                        functions -e fish_prompt
         | 
| 20 | 
            +
                        functions -c _old_fish_prompt fish_prompt
         | 
| 21 | 
            +
                        functions -e _old_fish_prompt
         | 
| 22 | 
            +
                    end
         | 
| 23 | 
            +
                end
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                set -e VIRTUAL_ENV
         | 
| 26 | 
            +
                set -e VIRTUAL_ENV_PROMPT
         | 
| 27 | 
            +
                if test "$argv[1]" != "nondestructive"
         | 
| 28 | 
            +
                    # Self-destruct!
         | 
| 29 | 
            +
                    functions -e deactivate
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
            end
         | 
| 32 | 
            +
             | 
| 33 | 
            +
            # Unset irrelevant variables.
         | 
| 34 | 
            +
            deactivate nondestructive
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            set -gx VIRTUAL_ENV /Users/isaacdavid/Qwen2.5-3B-Instruct-Ori/.venv
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            set -gx _OLD_VIRTUAL_PATH $PATH
         | 
| 39 | 
            +
            set -gx PATH "$VIRTUAL_ENV/"bin $PATH
         | 
| 40 | 
            +
            set -gx VIRTUAL_ENV_PROMPT .venv
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            # Unset PYTHONHOME if set.
         | 
| 43 | 
            +
            if set -q PYTHONHOME
         | 
| 44 | 
            +
                set -gx _OLD_VIRTUAL_PYTHONHOME $PYTHONHOME
         | 
| 45 | 
            +
                set -e PYTHONHOME
         | 
| 46 | 
            +
            end
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            if test -z "$VIRTUAL_ENV_DISABLE_PROMPT"
         | 
| 49 | 
            +
                # fish uses a function instead of an env var to generate the prompt.
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                # Save the current fish_prompt function as the function _old_fish_prompt.
         | 
| 52 | 
            +
                functions -c fish_prompt _old_fish_prompt
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                # With the original prompt function renamed, we can override with our own.
         | 
| 55 | 
            +
                function fish_prompt
         | 
| 56 | 
            +
                    # Save the return status of the last command.
         | 
| 57 | 
            +
                    set -l old_status $status
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                    # Output the venv prompt; color taken from the blue of the Python logo.
         | 
| 60 | 
            +
                    printf "%s(%s)%s " (set_color 4B8BBE) .venv (set_color normal)
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                    # Restore the return status of the previous command.
         | 
| 63 | 
            +
                    echo "exit $old_status" | .
         | 
| 64 | 
            +
                    # Output the original/"old" prompt.
         | 
| 65 | 
            +
                    _old_fish_prompt
         | 
| 66 | 
            +
                end
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                set -gx _OLD_FISH_PROMPT_OVERRIDE "$VIRTUAL_ENV"
         | 
| 69 | 
            +
            end
         | 
    	
        .venv/bin/huggingface-cli
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/Users/isaacdavid/Qwen2.5-3B-Instruct-Ori/.venv/bin/python
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import re
         | 
| 4 | 
            +
            import sys
         | 
| 5 | 
            +
            from huggingface_hub.commands.huggingface_cli import main
         | 
| 6 | 
            +
            if __name__ == '__main__':
         | 
| 7 | 
            +
                sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
         | 
| 8 | 
            +
                sys.exit(main())
         | 
    	
        .venv/bin/normalizer
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/Users/isaacdavid/Qwen2.5-3B-Instruct-Ori/.venv/bin/python
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import re
         | 
| 4 | 
            +
            import sys
         | 
| 5 | 
            +
            from charset_normalizer import cli
         | 
| 6 | 
            +
            if __name__ == '__main__':
         | 
| 7 | 
            +
                sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
         | 
| 8 | 
            +
                sys.exit(cli.cli_detect())
         | 
    	
        .venv/bin/pip
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/Users/isaacdavid/Qwen2.5-3B-Instruct-Ori/.venv/bin/python
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import re
         | 
| 4 | 
            +
            import sys
         | 
| 5 | 
            +
            from pip._internal.cli.main import main
         | 
| 6 | 
            +
            if __name__ == '__main__':
         | 
| 7 | 
            +
                sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
         | 
| 8 | 
            +
                sys.exit(main())
         | 
    	
        .venv/bin/pip3
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/Users/isaacdavid/Qwen2.5-3B-Instruct-Ori/.venv/bin/python
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import re
         | 
| 4 | 
            +
            import sys
         | 
| 5 | 
            +
            from pip._internal.cli.main import main
         | 
| 6 | 
            +
            if __name__ == '__main__':
         | 
| 7 | 
            +
                sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
         | 
| 8 | 
            +
                sys.exit(main())
         | 
    	
        .venv/bin/pip3.13
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/Users/isaacdavid/Qwen2.5-3B-Instruct-Ori/.venv/bin/python
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import re
         | 
| 4 | 
            +
            import sys
         | 
| 5 | 
            +
            from pip._internal.cli.main import main
         | 
| 6 | 
            +
            if __name__ == '__main__':
         | 
| 7 | 
            +
                sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
         | 
| 8 | 
            +
                sys.exit(main())
         | 
    	
        .venv/bin/python
    ADDED
    
    | Binary file (52.6 kB). View file | 
|  | 
    	
        .venv/bin/python3
    ADDED
    
    | Binary file (52.6 kB). View file | 
|  | 
    	
        .venv/bin/python3.13
    ADDED
    
    | Binary file (52.6 kB). View file | 
|  | 
    	
        .venv/bin/tqdm
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            #!/Users/isaacdavid/Qwen2.5-3B-Instruct-Ori/.venv/bin/python
         | 
| 2 | 
            +
            # -*- coding: utf-8 -*-
         | 
| 3 | 
            +
            import re
         | 
| 4 | 
            +
            import sys
         | 
| 5 | 
            +
            from tqdm.cli import main
         | 
| 6 | 
            +
            if __name__ == '__main__':
         | 
| 7 | 
            +
                sys.argv[0] = re.sub(r'(-script\.pyw|\.exe)?$', '', sys.argv[0])
         | 
| 8 | 
            +
                sys.exit(main())
         | 
    	
        .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/INSTALLER
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            pip
         | 
    	
        .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/LICENSE
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            Copyright (c) 2017-2021 Ingy döt Net
         | 
| 2 | 
            +
            Copyright (c) 2006-2016 Kirill Simonov
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            Permission is hereby granted, free of charge, to any person obtaining a copy of
         | 
| 5 | 
            +
            this software and associated documentation files (the "Software"), to deal in
         | 
| 6 | 
            +
            the Software without restriction, including without limitation the rights to
         | 
| 7 | 
            +
            use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
         | 
| 8 | 
            +
            of the Software, and to permit persons to whom the Software is furnished to do
         | 
| 9 | 
            +
            so, subject to the following conditions:
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            The above copyright notice and this permission notice shall be included in all
         | 
| 12 | 
            +
            copies or substantial portions of the Software.
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         | 
| 15 | 
            +
            IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         | 
| 16 | 
            +
            FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         | 
| 17 | 
            +
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         | 
| 18 | 
            +
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         | 
| 19 | 
            +
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         | 
| 20 | 
            +
            SOFTWARE.
         | 
    	
        .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/METADATA
    ADDED
    
    | @@ -0,0 +1,46 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            Metadata-Version: 2.1
         | 
| 2 | 
            +
            Name: PyYAML
         | 
| 3 | 
            +
            Version: 6.0.2
         | 
| 4 | 
            +
            Summary: YAML parser and emitter for Python
         | 
| 5 | 
            +
            Home-page: https://pyyaml.org/
         | 
| 6 | 
            +
            Download-URL: https://pypi.org/project/PyYAML/
         | 
| 7 | 
            +
            Author: Kirill Simonov
         | 
| 8 | 
            +
            Author-email: [email protected]
         | 
| 9 | 
            +
            License: MIT
         | 
| 10 | 
            +
            Project-URL: Bug Tracker, https://github.com/yaml/pyyaml/issues
         | 
| 11 | 
            +
            Project-URL: CI, https://github.com/yaml/pyyaml/actions
         | 
| 12 | 
            +
            Project-URL: Documentation, https://pyyaml.org/wiki/PyYAMLDocumentation
         | 
| 13 | 
            +
            Project-URL: Mailing lists, http://lists.sourceforge.net/lists/listinfo/yaml-core
         | 
| 14 | 
            +
            Project-URL: Source Code, https://github.com/yaml/pyyaml
         | 
| 15 | 
            +
            Platform: Any
         | 
| 16 | 
            +
            Classifier: Development Status :: 5 - Production/Stable
         | 
| 17 | 
            +
            Classifier: Intended Audience :: Developers
         | 
| 18 | 
            +
            Classifier: License :: OSI Approved :: MIT License
         | 
| 19 | 
            +
            Classifier: Operating System :: OS Independent
         | 
| 20 | 
            +
            Classifier: Programming Language :: Cython
         | 
| 21 | 
            +
            Classifier: Programming Language :: Python
         | 
| 22 | 
            +
            Classifier: Programming Language :: Python :: 3
         | 
| 23 | 
            +
            Classifier: Programming Language :: Python :: 3.8
         | 
| 24 | 
            +
            Classifier: Programming Language :: Python :: 3.9
         | 
| 25 | 
            +
            Classifier: Programming Language :: Python :: 3.10
         | 
| 26 | 
            +
            Classifier: Programming Language :: Python :: 3.11
         | 
| 27 | 
            +
            Classifier: Programming Language :: Python :: 3.12
         | 
| 28 | 
            +
            Classifier: Programming Language :: Python :: 3.13
         | 
| 29 | 
            +
            Classifier: Programming Language :: Python :: Implementation :: CPython
         | 
| 30 | 
            +
            Classifier: Programming Language :: Python :: Implementation :: PyPy
         | 
| 31 | 
            +
            Classifier: Topic :: Software Development :: Libraries :: Python Modules
         | 
| 32 | 
            +
            Classifier: Topic :: Text Processing :: Markup
         | 
| 33 | 
            +
            Requires-Python: >=3.8
         | 
| 34 | 
            +
            License-File: LICENSE
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            YAML is a data serialization format designed for human readability
         | 
| 37 | 
            +
            and interaction with scripting languages.  PyYAML is a YAML parser
         | 
| 38 | 
            +
            and emitter for Python.
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            PyYAML features a complete YAML 1.1 parser, Unicode support, pickle
         | 
| 41 | 
            +
            support, capable extension API, and sensible error messages.  PyYAML
         | 
| 42 | 
            +
            supports standard YAML tags and provides Python-specific tags that
         | 
| 43 | 
            +
            allow to represent an arbitrary Python object.
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            PyYAML is applicable for a broad range of tasks from complex
         | 
| 46 | 
            +
            configuration files to object serialization and persistence.
         | 
    	
        .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/RECORD
    ADDED
    
    | @@ -0,0 +1,43 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            PyYAML-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
         | 
| 2 | 
            +
            PyYAML-6.0.2.dist-info/LICENSE,sha256=jTko-dxEkP1jVwfLiOsmvXZBAqcoKVQwfT5RZ6V36KQ,1101
         | 
| 3 | 
            +
            PyYAML-6.0.2.dist-info/METADATA,sha256=9-odFB5seu4pGPcEv7E8iyxNF51_uKnaNGjLAhz2lto,2060
         | 
| 4 | 
            +
            PyYAML-6.0.2.dist-info/RECORD,,
         | 
| 5 | 
            +
            PyYAML-6.0.2.dist-info/WHEEL,sha256=9IiDymhRAZGpezdLosJoTs0FRVFmaCNfCbrLwpjM2to,110
         | 
| 6 | 
            +
            PyYAML-6.0.2.dist-info/top_level.txt,sha256=rpj0IVMTisAjh_1vG3Ccf9v5jpCQwAz6cD1IVU5ZdhQ,11
         | 
| 7 | 
            +
            _yaml/__init__.py,sha256=04Ae_5osxahpJHa3XBZUAf4wi6XX32gR8D6X6p64GEA,1402
         | 
| 8 | 
            +
            _yaml/__pycache__/__init__.cpython-313.pyc,,
         | 
| 9 | 
            +
            yaml/__init__.py,sha256=N35S01HMesFTe0aRRMWkPj0Pa8IEbHpE9FK7cr5Bdtw,12311
         | 
| 10 | 
            +
            yaml/__pycache__/__init__.cpython-313.pyc,,
         | 
| 11 | 
            +
            yaml/__pycache__/composer.cpython-313.pyc,,
         | 
| 12 | 
            +
            yaml/__pycache__/constructor.cpython-313.pyc,,
         | 
| 13 | 
            +
            yaml/__pycache__/cyaml.cpython-313.pyc,,
         | 
| 14 | 
            +
            yaml/__pycache__/dumper.cpython-313.pyc,,
         | 
| 15 | 
            +
            yaml/__pycache__/emitter.cpython-313.pyc,,
         | 
| 16 | 
            +
            yaml/__pycache__/error.cpython-313.pyc,,
         | 
| 17 | 
            +
            yaml/__pycache__/events.cpython-313.pyc,,
         | 
| 18 | 
            +
            yaml/__pycache__/loader.cpython-313.pyc,,
         | 
| 19 | 
            +
            yaml/__pycache__/nodes.cpython-313.pyc,,
         | 
| 20 | 
            +
            yaml/__pycache__/parser.cpython-313.pyc,,
         | 
| 21 | 
            +
            yaml/__pycache__/reader.cpython-313.pyc,,
         | 
| 22 | 
            +
            yaml/__pycache__/representer.cpython-313.pyc,,
         | 
| 23 | 
            +
            yaml/__pycache__/resolver.cpython-313.pyc,,
         | 
| 24 | 
            +
            yaml/__pycache__/scanner.cpython-313.pyc,,
         | 
| 25 | 
            +
            yaml/__pycache__/serializer.cpython-313.pyc,,
         | 
| 26 | 
            +
            yaml/__pycache__/tokens.cpython-313.pyc,,
         | 
| 27 | 
            +
            yaml/_yaml.cpython-313-darwin.so,sha256=BQ22sZBX0PgEZn-OJGbRD1MnakXV6RUiI8XbdJW6oK4,358728
         | 
| 28 | 
            +
            yaml/composer.py,sha256=_Ko30Wr6eDWUeUpauUGT3Lcg9QPBnOPVlTnIMRGJ9FM,4883
         | 
| 29 | 
            +
            yaml/constructor.py,sha256=kNgkfaeLUkwQYY_Q6Ff1Tz2XVw_pG1xVE9Ak7z-viLA,28639
         | 
| 30 | 
            +
            yaml/cyaml.py,sha256=6ZrAG9fAYvdVe2FK_w0hmXoG7ZYsoYUwapG8CiC72H0,3851
         | 
| 31 | 
            +
            yaml/dumper.py,sha256=PLctZlYwZLp7XmeUdwRuv4nYOZ2UBnDIUy8-lKfLF-o,2837
         | 
| 32 | 
            +
            yaml/emitter.py,sha256=jghtaU7eFwg31bG0B7RZea_29Adi9CKmXq_QjgQpCkQ,43006
         | 
| 33 | 
            +
            yaml/error.py,sha256=Ah9z-toHJUbE9j-M8YpxgSRM5CgLCcwVzJgLLRF2Fxo,2533
         | 
| 34 | 
            +
            yaml/events.py,sha256=50_TksgQiE4up-lKo_V-nBy-tAIxkIPQxY5qDhKCeHw,2445
         | 
| 35 | 
            +
            yaml/loader.py,sha256=UVa-zIqmkFSCIYq_PgSGm4NSJttHY2Rf_zQ4_b1fHN0,2061
         | 
| 36 | 
            +
            yaml/nodes.py,sha256=gPKNj8pKCdh2d4gr3gIYINnPOaOxGhJAUiYhGRnPE84,1440
         | 
| 37 | 
            +
            yaml/parser.py,sha256=ilWp5vvgoHFGzvOZDItFoGjD6D42nhlZrZyjAwa0oJo,25495
         | 
| 38 | 
            +
            yaml/reader.py,sha256=0dmzirOiDG4Xo41RnuQS7K9rkY3xjHiVasfDMNTqCNw,6794
         | 
| 39 | 
            +
            yaml/representer.py,sha256=IuWP-cAW9sHKEnS0gCqSa894k1Bg4cgTxaDwIcbRQ-Y,14190
         | 
| 40 | 
            +
            yaml/resolver.py,sha256=9L-VYfm4mWHxUD1Vg4X7rjDRK_7VZd6b92wzq7Y2IKY,9004
         | 
| 41 | 
            +
            yaml/scanner.py,sha256=YEM3iLZSaQwXcQRg2l2R4MdT0zGP2F9eHkKGKnHyWQY,51279
         | 
| 42 | 
            +
            yaml/serializer.py,sha256=ChuFgmhU01hj4xgI8GaKv6vfM2Bujwa9i7d2FAHj7cA,4165
         | 
| 43 | 
            +
            yaml/tokens.py,sha256=lTQIzSVw8Mg9wv459-TjiOQe6wVziqaRlqX2_89rp54,2573
         | 
    	
        .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/WHEEL
    ADDED
    
    | @@ -0,0 +1,5 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            Wheel-Version: 1.0
         | 
| 2 | 
            +
            Generator: bdist_wheel (0.44.0)
         | 
| 3 | 
            +
            Root-Is-Purelib: false
         | 
| 4 | 
            +
            Tag: cp313-cp313-macosx_11_0_arm64
         | 
| 5 | 
            +
             | 
    	
        .venv/lib/python3.13/site-packages/PyYAML-6.0.2.dist-info/top_level.txt
    ADDED
    
    | @@ -0,0 +1,2 @@ | |
|  | |
|  | 
|  | |
| 1 | 
            +
            _yaml
         | 
| 2 | 
            +
            yaml
         | 
    	
        .venv/lib/python3.13/site-packages/_yaml/__init__.py
    ADDED
    
    | @@ -0,0 +1,33 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            # This is a stub package designed to roughly emulate the _yaml
         | 
| 2 | 
            +
            # extension module, which previously existed as a standalone module
         | 
| 3 | 
            +
            # and has been moved into the `yaml` package namespace.
         | 
| 4 | 
            +
            # It does not perfectly mimic its old counterpart, but should get
         | 
| 5 | 
            +
            # close enough for anyone who's relying on it even when they shouldn't.
         | 
| 6 | 
            +
            import yaml
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            # in some circumstances, the yaml module we imoprted may be from a different version, so we need
         | 
| 9 | 
            +
            # to tread carefully when poking at it here (it may not have the attributes we expect)
         | 
| 10 | 
            +
            if not getattr(yaml, '__with_libyaml__', False):
         | 
| 11 | 
            +
                from sys import version_info
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                exc = ModuleNotFoundError if version_info >= (3, 6) else ImportError
         | 
| 14 | 
            +
                raise exc("No module named '_yaml'")
         | 
| 15 | 
            +
            else:
         | 
| 16 | 
            +
                from yaml._yaml import *
         | 
| 17 | 
            +
                import warnings
         | 
| 18 | 
            +
                warnings.warn(
         | 
| 19 | 
            +
                    'The _yaml extension module is now located at yaml._yaml'
         | 
| 20 | 
            +
                    ' and its location is subject to change.  To use the'
         | 
| 21 | 
            +
                    ' LibYAML-based parser and emitter, import from `yaml`:'
         | 
| 22 | 
            +
                    ' `from yaml import CLoader as Loader, CDumper as Dumper`.',
         | 
| 23 | 
            +
                    DeprecationWarning
         | 
| 24 | 
            +
                )
         | 
| 25 | 
            +
                del warnings
         | 
| 26 | 
            +
                # Don't `del yaml` here because yaml is actually an existing
         | 
| 27 | 
            +
                # namespace member of _yaml.
         | 
| 28 | 
            +
             | 
| 29 | 
            +
            __name__ = '_yaml'
         | 
| 30 | 
            +
            # If the module is top-level (i.e. not a part of any specific package)
         | 
| 31 | 
            +
            # then the attribute should be set to ''.
         | 
| 32 | 
            +
            # https://docs.python.org/3.8/library/types.html
         | 
| 33 | 
            +
            __package__ = ''
         | 
    	
        .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/INSTALLER
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            pip
         | 
    	
        .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/LICENSE
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            This package contains a modified version of ca-bundle.crt:
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            ca-bundle.crt -- Bundle of CA Root Certificates
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            This is a bundle of X.509 certificates of public Certificate Authorities
         | 
| 6 | 
            +
            (CA). These were automatically extracted from Mozilla's root certificates
         | 
| 7 | 
            +
            file (certdata.txt).  This file can be found in the mozilla source tree:
         | 
| 8 | 
            +
            https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt
         | 
| 9 | 
            +
            It contains the certificates in PEM format and therefore
         | 
| 10 | 
            +
            can be directly used with curl / libcurl / php_curl, or with
         | 
| 11 | 
            +
            an Apache+mod_ssl webserver for SSL client authentication.
         | 
| 12 | 
            +
            Just configure this file as the SSLCACertificateFile.#
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            ***** BEGIN LICENSE BLOCK *****
         | 
| 15 | 
            +
            This Source Code Form is subject to the terms of the Mozilla Public License,
         | 
| 16 | 
            +
            v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain
         | 
| 17 | 
            +
            one at http://mozilla.org/MPL/2.0/.
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            ***** END LICENSE BLOCK *****
         | 
| 20 | 
            +
            @(#) $RCSfile: certdata.txt,v $ $Revision: 1.80 $ $Date: 2011/11/03 15:11:58 $
         | 
    	
        .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/METADATA
    ADDED
    
    | @@ -0,0 +1,77 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            Metadata-Version: 2.2
         | 
| 2 | 
            +
            Name: certifi
         | 
| 3 | 
            +
            Version: 2025.1.31
         | 
| 4 | 
            +
            Summary: Python package for providing Mozilla's CA Bundle.
         | 
| 5 | 
            +
            Home-page: https://github.com/certifi/python-certifi
         | 
| 6 | 
            +
            Author: Kenneth Reitz
         | 
| 7 | 
            +
            Author-email: [email protected]
         | 
| 8 | 
            +
            License: MPL-2.0
         | 
| 9 | 
            +
            Project-URL: Source, https://github.com/certifi/python-certifi
         | 
| 10 | 
            +
            Classifier: Development Status :: 5 - Production/Stable
         | 
| 11 | 
            +
            Classifier: Intended Audience :: Developers
         | 
| 12 | 
            +
            Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)
         | 
| 13 | 
            +
            Classifier: Natural Language :: English
         | 
| 14 | 
            +
            Classifier: Programming Language :: Python
         | 
| 15 | 
            +
            Classifier: Programming Language :: Python :: 3
         | 
| 16 | 
            +
            Classifier: Programming Language :: Python :: 3 :: Only
         | 
| 17 | 
            +
            Classifier: Programming Language :: Python :: 3.6
         | 
| 18 | 
            +
            Classifier: Programming Language :: Python :: 3.7
         | 
| 19 | 
            +
            Classifier: Programming Language :: Python :: 3.8
         | 
| 20 | 
            +
            Classifier: Programming Language :: Python :: 3.9
         | 
| 21 | 
            +
            Classifier: Programming Language :: Python :: 3.10
         | 
| 22 | 
            +
            Classifier: Programming Language :: Python :: 3.11
         | 
| 23 | 
            +
            Classifier: Programming Language :: Python :: 3.12
         | 
| 24 | 
            +
            Classifier: Programming Language :: Python :: 3.13
         | 
| 25 | 
            +
            Requires-Python: >=3.6
         | 
| 26 | 
            +
            License-File: LICENSE
         | 
| 27 | 
            +
            Dynamic: author
         | 
| 28 | 
            +
            Dynamic: author-email
         | 
| 29 | 
            +
            Dynamic: classifier
         | 
| 30 | 
            +
            Dynamic: description
         | 
| 31 | 
            +
            Dynamic: home-page
         | 
| 32 | 
            +
            Dynamic: license
         | 
| 33 | 
            +
            Dynamic: project-url
         | 
| 34 | 
            +
            Dynamic: requires-python
         | 
| 35 | 
            +
            Dynamic: summary
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            Certifi: Python SSL Certificates
         | 
| 38 | 
            +
            ================================
         | 
| 39 | 
            +
             | 
| 40 | 
            +
            Certifi provides Mozilla's carefully curated collection of Root Certificates for
         | 
| 41 | 
            +
            validating the trustworthiness of SSL certificates while verifying the identity
         | 
| 42 | 
            +
            of TLS hosts. It has been extracted from the `Requests`_ project.
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            Installation
         | 
| 45 | 
            +
            ------------
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            ``certifi`` is available on PyPI. Simply install it with ``pip``::
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                $ pip install certifi
         | 
| 50 | 
            +
             | 
| 51 | 
            +
            Usage
         | 
| 52 | 
            +
            -----
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            To reference the installed certificate authority (CA) bundle, you can use the
         | 
| 55 | 
            +
            built-in function::
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                >>> import certifi
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                >>> certifi.where()
         | 
| 60 | 
            +
                '/usr/local/lib/python3.7/site-packages/certifi/cacert.pem'
         | 
| 61 | 
            +
             | 
| 62 | 
            +
            Or from the command line::
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                $ python -m certifi
         | 
| 65 | 
            +
                /usr/local/lib/python3.7/site-packages/certifi/cacert.pem
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            Enjoy!
         | 
| 68 | 
            +
             | 
| 69 | 
            +
            .. _`Requests`: https://requests.readthedocs.io/en/master/
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            Addition/Removal of Certificates
         | 
| 72 | 
            +
            --------------------------------
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            Certifi does not support any addition/removal or other modification of the
         | 
| 75 | 
            +
            CA trust store content. This project is intended to provide a reliable and
         | 
| 76 | 
            +
            highly portable root of trust to python deployments. Look to upstream projects
         | 
| 77 | 
            +
            for methods to use alternate trust.
         | 
    	
        .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/RECORD
    ADDED
    
    | @@ -0,0 +1,14 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            certifi-2025.1.31.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
         | 
| 2 | 
            +
            certifi-2025.1.31.dist-info/LICENSE,sha256=6TcW2mucDVpKHfYP5pWzcPBpVgPSH2-D8FPkLPwQyvc,989
         | 
| 3 | 
            +
            certifi-2025.1.31.dist-info/METADATA,sha256=t5kcT5aGu0dQ6_psUNZYTqnC0uCRnponewm3uYjeHbg,2451
         | 
| 4 | 
            +
            certifi-2025.1.31.dist-info/RECORD,,
         | 
| 5 | 
            +
            certifi-2025.1.31.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
         | 
| 6 | 
            +
            certifi-2025.1.31.dist-info/top_level.txt,sha256=KMu4vUCfsjLrkPbSNdgdekS-pVJzBAJFO__nI8NF6-U,8
         | 
| 7 | 
            +
            certifi/__init__.py,sha256=neIaAf7BM36ygmQCmy-ZsSyjnvjWghFeu13wwEAnjj0,94
         | 
| 8 | 
            +
            certifi/__main__.py,sha256=xBBoj905TUWBLRGANOcf7oi6e-3dMP4cEoG9OyMs11g,243
         | 
| 9 | 
            +
            certifi/__pycache__/__init__.cpython-313.pyc,,
         | 
| 10 | 
            +
            certifi/__pycache__/__main__.cpython-313.pyc,,
         | 
| 11 | 
            +
            certifi/__pycache__/core.cpython-313.pyc,,
         | 
| 12 | 
            +
            certifi/cacert.pem,sha256=xVsh-Qf3-G1IrdCTVS-1ZRdJ_1-GBQjMu0I9bB-9gMc,297255
         | 
| 13 | 
            +
            certifi/core.py,sha256=qRDDFyXVJwTB_EmoGppaXU_R9qCZvhl-EzxPMuV3nTA,4426
         | 
| 14 | 
            +
            certifi/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
    	
        .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/WHEEL
    ADDED
    
    | @@ -0,0 +1,5 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            Wheel-Version: 1.0
         | 
| 2 | 
            +
            Generator: setuptools (75.8.0)
         | 
| 3 | 
            +
            Root-Is-Purelib: true
         | 
| 4 | 
            +
            Tag: py3-none-any
         | 
| 5 | 
            +
             | 
    	
        .venv/lib/python3.13/site-packages/certifi-2025.1.31.dist-info/top_level.txt
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            certifi
         | 
    	
        .venv/lib/python3.13/site-packages/certifi/__init__.py
    ADDED
    
    | @@ -0,0 +1,4 @@ | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from .core import contents, where
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            __all__ = ["contents", "where"]
         | 
| 4 | 
            +
            __version__ = "2025.01.31"
         | 
    	
        .venv/lib/python3.13/site-packages/certifi/__main__.py
    ADDED
    
    | @@ -0,0 +1,12 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import argparse
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            from certifi import contents, where
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            parser = argparse.ArgumentParser()
         | 
| 6 | 
            +
            parser.add_argument("-c", "--contents", action="store_true")
         | 
| 7 | 
            +
            args = parser.parse_args()
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            if args.contents:
         | 
| 10 | 
            +
                print(contents())
         | 
| 11 | 
            +
            else:
         | 
| 12 | 
            +
                print(where())
         | 
    	
        .venv/lib/python3.13/site-packages/certifi/cacert.pem
    ADDED
    
    | The diff for this file is too large to render. 
		See raw diff | 
|  | 
    	
        .venv/lib/python3.13/site-packages/certifi/core.py
    ADDED
    
    | @@ -0,0 +1,114 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            """
         | 
| 2 | 
            +
            certifi.py
         | 
| 3 | 
            +
            ~~~~~~~~~~
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            This module returns the installation location of cacert.pem or its contents.
         | 
| 6 | 
            +
            """
         | 
| 7 | 
            +
            import sys
         | 
| 8 | 
            +
            import atexit
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            def exit_cacert_ctx() -> None:
         | 
| 11 | 
            +
                _CACERT_CTX.__exit__(None, None, None)  # type: ignore[union-attr]
         | 
| 12 | 
            +
             | 
| 13 | 
            +
             | 
| 14 | 
            +
            if sys.version_info >= (3, 11):
         | 
| 15 | 
            +
             | 
| 16 | 
            +
                from importlib.resources import as_file, files
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                _CACERT_CTX = None
         | 
| 19 | 
            +
                _CACERT_PATH = None
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                def where() -> str:
         | 
| 22 | 
            +
                    # This is slightly terrible, but we want to delay extracting the file
         | 
| 23 | 
            +
                    # in cases where we're inside of a zipimport situation until someone
         | 
| 24 | 
            +
                    # actually calls where(), but we don't want to re-extract the file
         | 
| 25 | 
            +
                    # on every call of where(), so we'll do it once then store it in a
         | 
| 26 | 
            +
                    # global variable.
         | 
| 27 | 
            +
                    global _CACERT_CTX
         | 
| 28 | 
            +
                    global _CACERT_PATH
         | 
| 29 | 
            +
                    if _CACERT_PATH is None:
         | 
| 30 | 
            +
                        # This is slightly janky, the importlib.resources API wants you to
         | 
| 31 | 
            +
                        # manage the cleanup of this file, so it doesn't actually return a
         | 
| 32 | 
            +
                        # path, it returns a context manager that will give you the path
         | 
| 33 | 
            +
                        # when you enter it and will do any cleanup when you leave it. In
         | 
| 34 | 
            +
                        # the common case of not needing a temporary file, it will just
         | 
| 35 | 
            +
                        # return the file system location and the __exit__() is a no-op.
         | 
| 36 | 
            +
                        #
         | 
| 37 | 
            +
                        # We also have to hold onto the actual context manager, because
         | 
| 38 | 
            +
                        # it will do the cleanup whenever it gets garbage collected, so
         | 
| 39 | 
            +
                        # we will also store that at the global level as well.
         | 
| 40 | 
            +
                        _CACERT_CTX = as_file(files("certifi").joinpath("cacert.pem"))
         | 
| 41 | 
            +
                        _CACERT_PATH = str(_CACERT_CTX.__enter__())
         | 
| 42 | 
            +
                        atexit.register(exit_cacert_ctx)
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                    return _CACERT_PATH
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                def contents() -> str:
         | 
| 47 | 
            +
                    return files("certifi").joinpath("cacert.pem").read_text(encoding="ascii")
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            elif sys.version_info >= (3, 7):
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                from importlib.resources import path as get_path, read_text
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                _CACERT_CTX = None
         | 
| 54 | 
            +
                _CACERT_PATH = None
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                def where() -> str:
         | 
| 57 | 
            +
                    # This is slightly terrible, but we want to delay extracting the
         | 
| 58 | 
            +
                    # file in cases where we're inside of a zipimport situation until
         | 
| 59 | 
            +
                    # someone actually calls where(), but we don't want to re-extract
         | 
| 60 | 
            +
                    # the file on every call of where(), so we'll do it once then store
         | 
| 61 | 
            +
                    # it in a global variable.
         | 
| 62 | 
            +
                    global _CACERT_CTX
         | 
| 63 | 
            +
                    global _CACERT_PATH
         | 
| 64 | 
            +
                    if _CACERT_PATH is None:
         | 
| 65 | 
            +
                        # This is slightly janky, the importlib.resources API wants you
         | 
| 66 | 
            +
                        # to manage the cleanup of this file, so it doesn't actually
         | 
| 67 | 
            +
                        # return a path, it returns a context manager that will give
         | 
| 68 | 
            +
                        # you the path when you enter it and will do any cleanup when
         | 
| 69 | 
            +
                        # you leave it. In the common case of not needing a temporary
         | 
| 70 | 
            +
                        # file, it will just return the file system location and the
         | 
| 71 | 
            +
                        # __exit__() is a no-op.
         | 
| 72 | 
            +
                        #
         | 
| 73 | 
            +
                        # We also have to hold onto the actual context manager, because
         | 
| 74 | 
            +
                        # it will do the cleanup whenever it gets garbage collected, so
         | 
| 75 | 
            +
                        # we will also store that at the global level as well.
         | 
| 76 | 
            +
                        _CACERT_CTX = get_path("certifi", "cacert.pem")
         | 
| 77 | 
            +
                        _CACERT_PATH = str(_CACERT_CTX.__enter__())
         | 
| 78 | 
            +
                        atexit.register(exit_cacert_ctx)
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                    return _CACERT_PATH
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                def contents() -> str:
         | 
| 83 | 
            +
                    return read_text("certifi", "cacert.pem", encoding="ascii")
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            else:
         | 
| 86 | 
            +
                import os
         | 
| 87 | 
            +
                import types
         | 
| 88 | 
            +
                from typing import Union
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                Package = Union[types.ModuleType, str]
         | 
| 91 | 
            +
                Resource = Union[str, "os.PathLike"]
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                # This fallback will work for Python versions prior to 3.7 that lack the
         | 
| 94 | 
            +
                # importlib.resources module but relies on the existing `where` function
         | 
| 95 | 
            +
                # so won't address issues with environments like PyOxidizer that don't set
         | 
| 96 | 
            +
                # __file__ on modules.
         | 
| 97 | 
            +
                def read_text(
         | 
| 98 | 
            +
                    package: Package,
         | 
| 99 | 
            +
                    resource: Resource,
         | 
| 100 | 
            +
                    encoding: str = 'utf-8',
         | 
| 101 | 
            +
                    errors: str = 'strict'
         | 
| 102 | 
            +
                ) -> str:
         | 
| 103 | 
            +
                    with open(where(), encoding=encoding) as data:
         | 
| 104 | 
            +
                        return data.read()
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                # If we don't have importlib.resources, then we will just do the old logic
         | 
| 107 | 
            +
                # of assuming we're on the filesystem and munge the path directly.
         | 
| 108 | 
            +
                def where() -> str:
         | 
| 109 | 
            +
                    f = os.path.dirname(__file__)
         | 
| 110 | 
            +
             | 
| 111 | 
            +
                    return os.path.join(f, "cacert.pem")
         | 
| 112 | 
            +
             | 
| 113 | 
            +
                def contents() -> str:
         | 
| 114 | 
            +
                    return read_text("certifi", "cacert.pem", encoding="ascii")
         | 
    	
        .venv/lib/python3.13/site-packages/certifi/py.typed
    ADDED
    
    | 
            File without changes
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/INSTALLER
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            pip
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/LICENSE
    ADDED
    
    | @@ -0,0 +1,21 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            MIT License
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            Copyright (c) 2025 TAHRI Ahmed R.
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            Permission is hereby granted, free of charge, to any person obtaining a copy
         | 
| 6 | 
            +
            of this software and associated documentation files (the "Software"), to deal
         | 
| 7 | 
            +
            in the Software without restriction, including without limitation the rights
         | 
| 8 | 
            +
            to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         | 
| 9 | 
            +
            copies of the Software, and to permit persons to whom the Software is
         | 
| 10 | 
            +
            furnished to do so, subject to the following conditions:
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            The above copyright notice and this permission notice shall be included in all
         | 
| 13 | 
            +
            copies or substantial portions of the Software.
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         | 
| 16 | 
            +
            IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         | 
| 17 | 
            +
            FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         | 
| 18 | 
            +
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         | 
| 19 | 
            +
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         | 
| 20 | 
            +
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         | 
| 21 | 
            +
            SOFTWARE.
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/METADATA
    ADDED
    
    | @@ -0,0 +1,721 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            Metadata-Version: 2.1
         | 
| 2 | 
            +
            Name: charset-normalizer
         | 
| 3 | 
            +
            Version: 3.4.1
         | 
| 4 | 
            +
            Summary: The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet.
         | 
| 5 | 
            +
            Author-email: "Ahmed R. TAHRI" <[email protected]>
         | 
| 6 | 
            +
            Maintainer-email: "Ahmed R. TAHRI" <[email protected]>
         | 
| 7 | 
            +
            License: MIT
         | 
| 8 | 
            +
            Project-URL: Changelog, https://github.com/jawah/charset_normalizer/blob/master/CHANGELOG.md
         | 
| 9 | 
            +
            Project-URL: Documentation, https://charset-normalizer.readthedocs.io/
         | 
| 10 | 
            +
            Project-URL: Code, https://github.com/jawah/charset_normalizer
         | 
| 11 | 
            +
            Project-URL: Issue tracker, https://github.com/jawah/charset_normalizer/issues
         | 
| 12 | 
            +
            Keywords: encoding,charset,charset-detector,detector,normalization,unicode,chardet,detect
         | 
| 13 | 
            +
            Classifier: Development Status :: 5 - Production/Stable
         | 
| 14 | 
            +
            Classifier: Intended Audience :: Developers
         | 
| 15 | 
            +
            Classifier: License :: OSI Approved :: MIT License
         | 
| 16 | 
            +
            Classifier: Operating System :: OS Independent
         | 
| 17 | 
            +
            Classifier: Programming Language :: Python
         | 
| 18 | 
            +
            Classifier: Programming Language :: Python :: 3
         | 
| 19 | 
            +
            Classifier: Programming Language :: Python :: 3.7
         | 
| 20 | 
            +
            Classifier: Programming Language :: Python :: 3.8
         | 
| 21 | 
            +
            Classifier: Programming Language :: Python :: 3.9
         | 
| 22 | 
            +
            Classifier: Programming Language :: Python :: 3.10
         | 
| 23 | 
            +
            Classifier: Programming Language :: Python :: 3.11
         | 
| 24 | 
            +
            Classifier: Programming Language :: Python :: 3.12
         | 
| 25 | 
            +
            Classifier: Programming Language :: Python :: 3.13
         | 
| 26 | 
            +
            Classifier: Programming Language :: Python :: 3 :: Only
         | 
| 27 | 
            +
            Classifier: Programming Language :: Python :: Implementation :: CPython
         | 
| 28 | 
            +
            Classifier: Programming Language :: Python :: Implementation :: PyPy
         | 
| 29 | 
            +
            Classifier: Topic :: Text Processing :: Linguistic
         | 
| 30 | 
            +
            Classifier: Topic :: Utilities
         | 
| 31 | 
            +
            Classifier: Typing :: Typed
         | 
| 32 | 
            +
            Requires-Python: >=3.7
         | 
| 33 | 
            +
            Description-Content-Type: text/markdown
         | 
| 34 | 
            +
            License-File: LICENSE
         | 
| 35 | 
            +
            Provides-Extra: unicode-backport
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            <h1 align="center">Charset Detection, for Everyone 👋</h1>
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            <p align="center">
         | 
| 40 | 
            +
              <sup>The Real First Universal Charset Detector</sup><br>
         | 
| 41 | 
            +
              <a href="https://pypi.org/project/charset-normalizer">
         | 
| 42 | 
            +
                <img src="https://img.shields.io/pypi/pyversions/charset_normalizer.svg?orange=blue" />
         | 
| 43 | 
            +
              </a>
         | 
| 44 | 
            +
              <a href="https://pepy.tech/project/charset-normalizer/">
         | 
| 45 | 
            +
                <img alt="Download Count Total" src="https://static.pepy.tech/badge/charset-normalizer/month" />
         | 
| 46 | 
            +
              </a>
         | 
| 47 | 
            +
              <a href="https://bestpractices.coreinfrastructure.org/projects/7297">
         | 
| 48 | 
            +
                <img src="https://bestpractices.coreinfrastructure.org/projects/7297/badge">
         | 
| 49 | 
            +
              </a>
         | 
| 50 | 
            +
            </p>
         | 
| 51 | 
            +
            <p align="center">
         | 
| 52 | 
            +
              <sup><i>Featured Packages</i></sup><br>
         | 
| 53 | 
            +
              <a href="https://github.com/jawah/niquests">
         | 
| 54 | 
            +
               <img alt="Static Badge" src="https://img.shields.io/badge/Niquests-Best_HTTP_Client-cyan">
         | 
| 55 | 
            +
              </a>
         | 
| 56 | 
            +
              <a href="https://github.com/jawah/wassima">
         | 
| 57 | 
            +
               <img alt="Static Badge" src="https://img.shields.io/badge/Wassima-Certifi_Killer-cyan">
         | 
| 58 | 
            +
              </a>
         | 
| 59 | 
            +
            </p>
         | 
| 60 | 
            +
            <p align="center">
         | 
| 61 | 
            +
              <sup><i>In other language (unofficial port - by the community)</i></sup><br>
         | 
| 62 | 
            +
              <a href="https://github.com/nickspring/charset-normalizer-rs">
         | 
| 63 | 
            +
               <img alt="Static Badge" src="https://img.shields.io/badge/Rust-red">
         | 
| 64 | 
            +
              </a>
         | 
| 65 | 
            +
            </p>
         | 
| 66 | 
            +
             | 
| 67 | 
            +
            > A library that helps you read text from an unknown charset encoding.<br /> Motivated by `chardet`,
         | 
| 68 | 
            +
            > I'm trying to resolve the issue by taking a new approach.
         | 
| 69 | 
            +
            > All IANA character set names for which the Python core library provides codecs are supported.
         | 
| 70 | 
            +
             | 
| 71 | 
            +
            <p align="center">
         | 
| 72 | 
            +
              >>>>> <a href="https://charsetnormalizerweb.ousret.now.sh" target="_blank">👉 Try Me Online Now, Then Adopt Me 👈 </a> <<<<<
         | 
| 73 | 
            +
            </p>
         | 
| 74 | 
            +
             | 
| 75 | 
            +
            This project offers you an alternative to **Universal Charset Encoding Detector**, also known as **Chardet**.
         | 
| 76 | 
            +
             | 
| 77 | 
            +
            | Feature                                          | [Chardet](https://github.com/chardet/chardet) |                                         Charset Normalizer                                         | [cChardet](https://github.com/PyYoshi/cChardet) |
         | 
| 78 | 
            +
            |--------------------------------------------------|:---------------------------------------------:|:--------------------------------------------------------------------------------------------------:|:-----------------------------------------------:|
         | 
| 79 | 
            +
            | `Fast`                                           |                       ❌                       |                                                 ✅                                                  |                        ✅                        |
         | 
| 80 | 
            +
            | `Universal**`                                    |                       ❌                       |                                                 ✅                                                  |                        ❌                        |
         | 
| 81 | 
            +
            | `Reliable` **without** distinguishable standards |                       ❌                       |                                                 ✅                                                  |                        ✅                        |
         | 
| 82 | 
            +
            | `Reliable` **with** distinguishable standards    |                       ✅                       |                                                 ✅                                                  |                        ✅                        |
         | 
| 83 | 
            +
            | `License`                                        |           LGPL-2.1<br>_restrictive_           |                                                MIT                                                 |            MPL-1.1<br>_restrictive_             |
         | 
| 84 | 
            +
            | `Native Python`                                  |                       ✅                       |                                                 ✅                                                  |                        ❌                        |
         | 
| 85 | 
            +
            | `Detect spoken language`                         |                       ❌                       |                                                 ✅                                                  |                       N/A                       |
         | 
| 86 | 
            +
            | `UnicodeDecodeError Safety`                      |                       ❌                       |                                                 ✅                                                  |                        ❌                        |
         | 
| 87 | 
            +
            | `Whl Size (min)`                                 |                   193.6 kB                    |                                               42 kB                                                |                     ~200 kB                     |
         | 
| 88 | 
            +
            | `Supported Encoding`                             |                      33                       | 🎉 [99](https://charset-normalizer.readthedocs.io/en/latest/user/support.html#supported-encodings) |                       40                        |
         | 
| 89 | 
            +
             | 
| 90 | 
            +
            <p align="center">
         | 
| 91 | 
            +
            <img src="https://i.imgflip.com/373iay.gif" alt="Reading Normalized Text" width="226"/><img src="https://media.tenor.com/images/c0180f70732a18b4965448d33adba3d0/tenor.gif" alt="Cat Reading Text" width="200"/>
         | 
| 92 | 
            +
            </p>
         | 
| 93 | 
            +
             | 
| 94 | 
            +
            *\*\* : They are clearly using specific code for a specific encoding even if covering most of used one*<br>
         | 
| 95 | 
            +
             | 
| 96 | 
            +
            ## ⚡ Performance
         | 
| 97 | 
            +
             | 
| 98 | 
            +
            This package offer better performance than its counterpart Chardet. Here are some numbers.
         | 
| 99 | 
            +
             | 
| 100 | 
            +
            | Package                                       | Accuracy | Mean per file (ms) | File per sec (est) |
         | 
| 101 | 
            +
            |-----------------------------------------------|:--------:|:------------------:|:------------------:|
         | 
| 102 | 
            +
            | [chardet](https://github.com/chardet/chardet) |   86 %   |       63 ms        |    16 file/sec     |
         | 
| 103 | 
            +
            | charset-normalizer                            | **98 %** |     **10 ms**      |    100 file/sec    |
         | 
| 104 | 
            +
             | 
| 105 | 
            +
            | Package                                       | 99th percentile | 95th percentile | 50th percentile |
         | 
| 106 | 
            +
            |-----------------------------------------------|:---------------:|:---------------:|:---------------:|
         | 
| 107 | 
            +
            | [chardet](https://github.com/chardet/chardet) |     265 ms      |      71 ms      |      7 ms       |
         | 
| 108 | 
            +
            | charset-normalizer                            |     100 ms      |      50 ms      |      5 ms       |
         | 
| 109 | 
            +
             | 
| 110 | 
            +
            _updated as of december 2024 using CPython 3.12_
         | 
| 111 | 
            +
             | 
| 112 | 
            +
            Chardet's performance on larger file (1MB+) are very poor. Expect huge difference on large payload.
         | 
| 113 | 
            +
             | 
| 114 | 
            +
            > Stats are generated using 400+ files using default parameters. More details on used files, see GHA workflows.
         | 
| 115 | 
            +
            > And yes, these results might change at any time. The dataset can be updated to include more files.
         | 
| 116 | 
            +
            > The actual delays heavily depends on your CPU capabilities. The factors should remain the same.
         | 
| 117 | 
            +
            > Keep in mind that the stats are generous and that Chardet accuracy vs our is measured using Chardet initial capability
         | 
| 118 | 
            +
            > (e.g. Supported Encoding) Challenge-them if you want.
         | 
| 119 | 
            +
             | 
| 120 | 
            +
            ## ✨ Installation
         | 
| 121 | 
            +
             | 
| 122 | 
            +
            Using pip:
         | 
| 123 | 
            +
             | 
| 124 | 
            +
            ```sh
         | 
| 125 | 
            +
            pip install charset-normalizer -U
         | 
| 126 | 
            +
            ```
         | 
| 127 | 
            +
             | 
| 128 | 
            +
            ## 🚀 Basic Usage
         | 
| 129 | 
            +
             | 
| 130 | 
            +
            ### CLI
         | 
| 131 | 
            +
            This package comes with a CLI.
         | 
| 132 | 
            +
             | 
| 133 | 
            +
            ```
         | 
| 134 | 
            +
            usage: normalizer [-h] [-v] [-a] [-n] [-m] [-r] [-f] [-t THRESHOLD]
         | 
| 135 | 
            +
                              file [file ...]
         | 
| 136 | 
            +
             | 
| 137 | 
            +
            The Real First Universal Charset Detector. Discover originating encoding used
         | 
| 138 | 
            +
            on text file. Normalize text to unicode.
         | 
| 139 | 
            +
             | 
| 140 | 
            +
            positional arguments:
         | 
| 141 | 
            +
              files                 File(s) to be analysed
         | 
| 142 | 
            +
             | 
| 143 | 
            +
            optional arguments:
         | 
| 144 | 
            +
              -h, --help            show this help message and exit
         | 
| 145 | 
            +
              -v, --verbose         Display complementary information about file if any.
         | 
| 146 | 
            +
                                    Stdout will contain logs about the detection process.
         | 
| 147 | 
            +
              -a, --with-alternative
         | 
| 148 | 
            +
                                    Output complementary possibilities if any. Top-level
         | 
| 149 | 
            +
                                    JSON WILL be a list.
         | 
| 150 | 
            +
              -n, --normalize       Permit to normalize input file. If not set, program
         | 
| 151 | 
            +
                                    does not write anything.
         | 
| 152 | 
            +
              -m, --minimal         Only output the charset detected to STDOUT. Disabling
         | 
| 153 | 
            +
                                    JSON output.
         | 
| 154 | 
            +
              -r, --replace         Replace file when trying to normalize it instead of
         | 
| 155 | 
            +
                                    creating a new one.
         | 
| 156 | 
            +
              -f, --force           Replace file without asking if you are sure, use this
         | 
| 157 | 
            +
                                    flag with caution.
         | 
| 158 | 
            +
              -t THRESHOLD, --threshold THRESHOLD
         | 
| 159 | 
            +
                                    Define a custom maximum amount of chaos allowed in
         | 
| 160 | 
            +
                                    decoded content. 0. <= chaos <= 1.
         | 
| 161 | 
            +
              --version             Show version information and exit.
         | 
| 162 | 
            +
            ```
         | 
| 163 | 
            +
             | 
| 164 | 
            +
            ```bash
         | 
| 165 | 
            +
            normalizer ./data/sample.1.fr.srt
         | 
| 166 | 
            +
            ```
         | 
| 167 | 
            +
             | 
| 168 | 
            +
            or
         | 
| 169 | 
            +
             | 
| 170 | 
            +
            ```bash
         | 
| 171 | 
            +
            python -m charset_normalizer ./data/sample.1.fr.srt
         | 
| 172 | 
            +
            ```
         | 
| 173 | 
            +
             | 
| 174 | 
            +
            🎉 Since version 1.4.0 the CLI produce easily usable stdout result in JSON format.
         | 
| 175 | 
            +
             | 
| 176 | 
            +
            ```json
         | 
| 177 | 
            +
            {
         | 
| 178 | 
            +
                "path": "/home/default/projects/charset_normalizer/data/sample.1.fr.srt",
         | 
| 179 | 
            +
                "encoding": "cp1252",
         | 
| 180 | 
            +
                "encoding_aliases": [
         | 
| 181 | 
            +
                    "1252",
         | 
| 182 | 
            +
                    "windows_1252"
         | 
| 183 | 
            +
                ],
         | 
| 184 | 
            +
                "alternative_encodings": [
         | 
| 185 | 
            +
                    "cp1254",
         | 
| 186 | 
            +
                    "cp1256",
         | 
| 187 | 
            +
                    "cp1258",
         | 
| 188 | 
            +
                    "iso8859_14",
         | 
| 189 | 
            +
                    "iso8859_15",
         | 
| 190 | 
            +
                    "iso8859_16",
         | 
| 191 | 
            +
                    "iso8859_3",
         | 
| 192 | 
            +
                    "iso8859_9",
         | 
| 193 | 
            +
                    "latin_1",
         | 
| 194 | 
            +
                    "mbcs"
         | 
| 195 | 
            +
                ],
         | 
| 196 | 
            +
                "language": "French",
         | 
| 197 | 
            +
                "alphabets": [
         | 
| 198 | 
            +
                    "Basic Latin",
         | 
| 199 | 
            +
                    "Latin-1 Supplement"
         | 
| 200 | 
            +
                ],
         | 
| 201 | 
            +
                "has_sig_or_bom": false,
         | 
| 202 | 
            +
                "chaos": 0.149,
         | 
| 203 | 
            +
                "coherence": 97.152,
         | 
| 204 | 
            +
                "unicode_path": null,
         | 
| 205 | 
            +
                "is_preferred": true
         | 
| 206 | 
            +
            }
         | 
| 207 | 
            +
            ```
         | 
| 208 | 
            +
             | 
| 209 | 
            +
            ### Python
         | 
| 210 | 
            +
            *Just print out normalized text*
         | 
| 211 | 
            +
            ```python
         | 
| 212 | 
            +
            from charset_normalizer import from_path
         | 
| 213 | 
            +
             | 
| 214 | 
            +
            results = from_path('./my_subtitle.srt')
         | 
| 215 | 
            +
             | 
| 216 | 
            +
            print(str(results.best()))
         | 
| 217 | 
            +
            ```
         | 
| 218 | 
            +
             | 
| 219 | 
            +
            *Upgrade your code without effort*
         | 
| 220 | 
            +
            ```python
         | 
| 221 | 
            +
            from charset_normalizer import detect
         | 
| 222 | 
            +
            ```
         | 
| 223 | 
            +
             | 
| 224 | 
            +
            The above code will behave the same as **chardet**. We ensure that we offer the best (reasonable) BC result possible.
         | 
| 225 | 
            +
             | 
| 226 | 
            +
            See the docs for advanced usage : [readthedocs.io](https://charset-normalizer.readthedocs.io/en/latest/)
         | 
| 227 | 
            +
             | 
| 228 | 
            +
            ## 😇 Why
         | 
| 229 | 
            +
             | 
| 230 | 
            +
            When I started using Chardet, I noticed that it was not suited to my expectations, and I wanted to propose a
         | 
| 231 | 
            +
            reliable alternative using a completely different method. Also! I never back down on a good challenge!
         | 
| 232 | 
            +
             | 
| 233 | 
            +
            I **don't care** about the **originating charset** encoding, because **two different tables** can
         | 
| 234 | 
            +
            produce **two identical rendered string.**
         | 
| 235 | 
            +
            What I want is to get readable text, the best I can.
         | 
| 236 | 
            +
             | 
| 237 | 
            +
            In a way, **I'm brute forcing text decoding.** How cool is that ? 😎
         | 
| 238 | 
            +
             | 
| 239 | 
            +
            Don't confuse package **ftfy** with charset-normalizer or chardet. ftfy goal is to repair Unicode string whereas charset-normalizer to convert raw file in unknown encoding to unicode.
         | 
| 240 | 
            +
             | 
| 241 | 
            +
            ## 🍰 How
         | 
| 242 | 
            +
             | 
| 243 | 
            +
              - Discard all charset encoding table that could not fit the binary content.
         | 
| 244 | 
            +
              - Measure noise, or the mess once opened (by chunks) with a corresponding charset encoding.
         | 
| 245 | 
            +
              - Extract matches with the lowest mess detected.
         | 
| 246 | 
            +
              - Additionally, we measure coherence / probe for a language.
         | 
| 247 | 
            +
             | 
| 248 | 
            +
            **Wait a minute**, what is noise/mess and coherence according to **YOU ?**
         | 
| 249 | 
            +
             | 
| 250 | 
            +
            *Noise :* I opened hundred of text files, **written by humans**, with the wrong encoding table. **I observed**, then
         | 
| 251 | 
            +
            **I established** some ground rules about **what is obvious** when **it seems like** a mess (aka. defining noise in rendered text).
         | 
| 252 | 
            +
             I know that my interpretation of what is noise is probably incomplete, feel free to contribute in order to
         | 
| 253 | 
            +
             improve or rewrite it.
         | 
| 254 | 
            +
             | 
| 255 | 
            +
            *Coherence :* For each language there is on earth, we have computed ranked letter appearance occurrences (the best we can). So I thought
         | 
| 256 | 
            +
            that intel is worth something here. So I use those records against decoded text to check if I can detect intelligent design.
         | 
| 257 | 
            +
             | 
| 258 | 
            +
            ## ⚡ Known limitations
         | 
| 259 | 
            +
             | 
| 260 | 
            +
              - Language detection is unreliable when text contains two or more languages sharing identical letters. (eg. HTML (english tags) + Turkish content (Sharing Latin characters))
         | 
| 261 | 
            +
              - Every charset detector heavily depends on sufficient content. In common cases, do not bother run detection on very tiny content.
         | 
| 262 | 
            +
             | 
| 263 | 
            +
            ## ⚠️ About Python EOLs
         | 
| 264 | 
            +
             | 
| 265 | 
            +
            **If you are running:**
         | 
| 266 | 
            +
             | 
| 267 | 
            +
            - Python >=2.7,<3.5: Unsupported
         | 
| 268 | 
            +
            - Python 3.5: charset-normalizer < 2.1
         | 
| 269 | 
            +
            - Python 3.6: charset-normalizer < 3.1
         | 
| 270 | 
            +
            - Python 3.7: charset-normalizer < 4.0
         | 
| 271 | 
            +
             | 
| 272 | 
            +
            Upgrade your Python interpreter as soon as possible.
         | 
| 273 | 
            +
             | 
| 274 | 
            +
            ## 👤 Contributing
         | 
| 275 | 
            +
             | 
| 276 | 
            +
            Contributions, issues and feature requests are very much welcome.<br />
         | 
| 277 | 
            +
            Feel free to check [issues page](https://github.com/ousret/charset_normalizer/issues) if you want to contribute.
         | 
| 278 | 
            +
             | 
| 279 | 
            +
            ## 📝 License
         | 
| 280 | 
            +
             | 
| 281 | 
            +
            Copyright © [Ahmed TAHRI @Ousret](https://github.com/Ousret).<br />
         | 
| 282 | 
            +
            This project is [MIT](https://github.com/Ousret/charset_normalizer/blob/master/LICENSE) licensed.
         | 
| 283 | 
            +
             | 
| 284 | 
            +
            Characters frequencies used in this project © 2012 [Denny Vrandečić](http://simia.net/letters/)
         | 
| 285 | 
            +
             | 
| 286 | 
            +
            ## 💼 For Enterprise
         | 
| 287 | 
            +
             | 
| 288 | 
            +
            Professional support for charset-normalizer is available as part of the [Tidelift
         | 
| 289 | 
            +
            Subscription][1]. Tidelift gives software development teams a single source for
         | 
| 290 | 
            +
            purchasing and maintaining their software, with professional grade assurances
         | 
| 291 | 
            +
            from the experts who know it best, while seamlessly integrating with existing
         | 
| 292 | 
            +
            tools.
         | 
| 293 | 
            +
             | 
| 294 | 
            +
            [1]: https://tidelift.com/subscription/pkg/pypi-charset-normalizer?utm_source=pypi-charset-normalizer&utm_medium=readme
         | 
| 295 | 
            +
             | 
| 296 | 
            +
            [](https://www.bestpractices.dev/projects/7297)
         | 
| 297 | 
            +
             | 
| 298 | 
            +
            # Changelog
         | 
| 299 | 
            +
            All notable changes to charset-normalizer will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
         | 
| 300 | 
            +
            The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
         | 
| 301 | 
            +
             | 
| 302 | 
            +
            ## [3.4.1](https://github.com/Ousret/charset_normalizer/compare/3.4.0...3.4.1) (2024-12-24)
         | 
| 303 | 
            +
             | 
| 304 | 
            +
            ### Changed
         | 
| 305 | 
            +
            - Project metadata are now stored using `pyproject.toml` instead of `setup.cfg` using setuptools as the build backend.
         | 
| 306 | 
            +
            - Enforce annotation delayed loading for a simpler and consistent types in the project.
         | 
| 307 | 
            +
            - Optional mypyc compilation upgraded to version 1.14 for Python >= 3.8
         | 
| 308 | 
            +
             | 
| 309 | 
            +
            ### Added
         | 
| 310 | 
            +
            - pre-commit configuration.
         | 
| 311 | 
            +
            - noxfile.
         | 
| 312 | 
            +
             | 
| 313 | 
            +
            ### Removed
         | 
| 314 | 
            +
            - `build-requirements.txt` as per using `pyproject.toml` native build configuration.
         | 
| 315 | 
            +
            - `bin/integration.py` and `bin/serve.py` in favor of downstream integration test (see noxfile).
         | 
| 316 | 
            +
            - `setup.cfg` in favor of `pyproject.toml` metadata configuration.
         | 
| 317 | 
            +
            - Unused `utils.range_scan` function.
         | 
| 318 | 
            +
             | 
| 319 | 
            +
            ### Fixed
         | 
| 320 | 
            +
            - Converting content to Unicode bytes may insert `utf_8` instead of preferred `utf-8`. (#572)
         | 
| 321 | 
            +
            - Deprecation warning "'count' is passed as positional argument" when converting to Unicode bytes on Python 3.13+
         | 
| 322 | 
            +
             | 
| 323 | 
            +
            ## [3.4.0](https://github.com/Ousret/charset_normalizer/compare/3.3.2...3.4.0) (2024-10-08)
         | 
| 324 | 
            +
             | 
| 325 | 
            +
            ### Added
         | 
| 326 | 
            +
            - Argument `--no-preemptive` in the CLI to prevent the detector to search for hints.
         | 
| 327 | 
            +
            - Support for Python 3.13 (#512)
         | 
| 328 | 
            +
             | 
| 329 | 
            +
            ### Fixed
         | 
| 330 | 
            +
            - Relax the TypeError exception thrown when trying to compare a CharsetMatch with anything else than a CharsetMatch.
         | 
| 331 | 
            +
            - Improved the general reliability of the detector based on user feedbacks. (#520) (#509) (#498) (#407) (#537)
         | 
| 332 | 
            +
            - Declared charset in content (preemptive detection) not changed when converting to utf-8 bytes. (#381)
         | 
| 333 | 
            +
             | 
| 334 | 
            +
            ## [3.3.2](https://github.com/Ousret/charset_normalizer/compare/3.3.1...3.3.2) (2023-10-31)
         | 
| 335 | 
            +
             | 
| 336 | 
            +
            ### Fixed
         | 
| 337 | 
            +
            - Unintentional memory usage regression when using large payload that match several encoding (#376)
         | 
| 338 | 
            +
            - Regression on some detection case showcased in the documentation (#371)
         | 
| 339 | 
            +
             | 
| 340 | 
            +
            ### Added
         | 
| 341 | 
            +
            - Noise (md) probe that identify malformed arabic representation due to the presence of letters in isolated form (credit to my wife)
         | 
| 342 | 
            +
             | 
| 343 | 
            +
            ## [3.3.1](https://github.com/Ousret/charset_normalizer/compare/3.3.0...3.3.1) (2023-10-22)
         | 
| 344 | 
            +
             | 
| 345 | 
            +
            ### Changed
         | 
| 346 | 
            +
            - Optional mypyc compilation upgraded to version 1.6.1 for Python >= 3.8
         | 
| 347 | 
            +
            - Improved the general detection reliability based on reports from the community
         | 
| 348 | 
            +
             | 
| 349 | 
            +
            ## [3.3.0](https://github.com/Ousret/charset_normalizer/compare/3.2.0...3.3.0) (2023-09-30)
         | 
| 350 | 
            +
             | 
| 351 | 
            +
            ### Added
         | 
| 352 | 
            +
            - Allow to execute the CLI (e.g. normalizer) through `python -m charset_normalizer.cli` or `python -m charset_normalizer`
         | 
| 353 | 
            +
            - Support for 9 forgotten encoding that are supported by Python but unlisted in `encoding.aliases` as they have no alias (#323)
         | 
| 354 | 
            +
             | 
| 355 | 
            +
            ### Removed
         | 
| 356 | 
            +
            - (internal) Redundant utils.is_ascii function and unused function is_private_use_only
         | 
| 357 | 
            +
            - (internal) charset_normalizer.assets is moved inside charset_normalizer.constant
         | 
| 358 | 
            +
             | 
| 359 | 
            +
            ### Changed
         | 
| 360 | 
            +
            - (internal) Unicode code blocks in constants are updated using the latest v15.0.0 definition to improve detection
         | 
| 361 | 
            +
            - Optional mypyc compilation upgraded to version 1.5.1 for Python >= 3.8
         | 
| 362 | 
            +
             | 
| 363 | 
            +
            ### Fixed
         | 
| 364 | 
            +
            - Unable to properly sort CharsetMatch when both chaos/noise and coherence were close due to an unreachable condition in \_\_lt\_\_ (#350)
         | 
| 365 | 
            +
             | 
| 366 | 
            +
            ## [3.2.0](https://github.com/Ousret/charset_normalizer/compare/3.1.0...3.2.0) (2023-06-07)
         | 
| 367 | 
            +
             | 
| 368 | 
            +
            ### Changed
         | 
| 369 | 
            +
            - Typehint for function `from_path` no longer enforce `PathLike` as its first argument
         | 
| 370 | 
            +
            - Minor improvement over the global detection reliability
         | 
| 371 | 
            +
             | 
| 372 | 
            +
            ### Added
         | 
| 373 | 
            +
            - Introduce function `is_binary` that relies on main capabilities, and optimized to detect binaries
         | 
| 374 | 
            +
            - Propagate `enable_fallback` argument throughout `from_bytes`, `from_path`, and `from_fp` that allow a deeper control over the detection (default True)
         | 
| 375 | 
            +
            - Explicit support for Python 3.12
         | 
| 376 | 
            +
             | 
| 377 | 
            +
            ### Fixed
         | 
| 378 | 
            +
            - Edge case detection failure where a file would contain 'very-long' camel cased word (Issue #289)
         | 
| 379 | 
            +
             | 
| 380 | 
            +
            ## [3.1.0](https://github.com/Ousret/charset_normalizer/compare/3.0.1...3.1.0) (2023-03-06)
         | 
| 381 | 
            +
             | 
| 382 | 
            +
            ### Added
         | 
| 383 | 
            +
            - Argument `should_rename_legacy` for legacy function `detect` and disregard any new arguments without errors (PR #262)
         | 
| 384 | 
            +
             | 
| 385 | 
            +
            ### Removed
         | 
| 386 | 
            +
            - Support for Python 3.6 (PR #260)
         | 
| 387 | 
            +
             | 
| 388 | 
            +
            ### Changed
         | 
| 389 | 
            +
            - Optional speedup provided by mypy/c 1.0.1
         | 
| 390 | 
            +
             | 
| 391 | 
            +
            ## [3.0.1](https://github.com/Ousret/charset_normalizer/compare/3.0.0...3.0.1) (2022-11-18)
         | 
| 392 | 
            +
             | 
| 393 | 
            +
            ### Fixed
         | 
| 394 | 
            +
            - Multi-bytes cutter/chunk generator did not always cut correctly (PR #233)
         | 
| 395 | 
            +
             | 
| 396 | 
            +
            ### Changed
         | 
| 397 | 
            +
            - Speedup provided by mypy/c 0.990 on Python >= 3.7
         | 
| 398 | 
            +
             | 
| 399 | 
            +
            ## [3.0.0](https://github.com/Ousret/charset_normalizer/compare/2.1.1...3.0.0) (2022-10-20)
         | 
| 400 | 
            +
             | 
| 401 | 
            +
            ### Added
         | 
| 402 | 
            +
            - Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
         | 
| 403 | 
            +
            - Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
         | 
| 404 | 
            +
            - Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
         | 
| 405 | 
            +
            - `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
         | 
| 406 | 
            +
             | 
| 407 | 
            +
            ### Changed
         | 
| 408 | 
            +
            - Build with static metadata using 'build' frontend
         | 
| 409 | 
            +
            - Make the language detection stricter
         | 
| 410 | 
            +
            - Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
         | 
| 411 | 
            +
             | 
| 412 | 
            +
            ### Fixed
         | 
| 413 | 
            +
            - CLI with opt --normalize fail when using full path for files
         | 
| 414 | 
            +
            - TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
         | 
| 415 | 
            +
            - Sphinx warnings when generating the documentation
         | 
| 416 | 
            +
             | 
| 417 | 
            +
            ### Removed
         | 
| 418 | 
            +
            - Coherence detector no longer return 'Simple English' instead return 'English'
         | 
| 419 | 
            +
            - Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
         | 
| 420 | 
            +
            - Breaking: Method `first()` and `best()` from CharsetMatch
         | 
| 421 | 
            +
            - UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
         | 
| 422 | 
            +
            - Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
         | 
| 423 | 
            +
            - Breaking: Top-level function `normalize`
         | 
| 424 | 
            +
            - Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
         | 
| 425 | 
            +
            - Support for the backport `unicodedata2`
         | 
| 426 | 
            +
             | 
| 427 | 
            +
            ## [3.0.0rc1](https://github.com/Ousret/charset_normalizer/compare/3.0.0b2...3.0.0rc1) (2022-10-18)
         | 
| 428 | 
            +
             | 
| 429 | 
            +
            ### Added
         | 
| 430 | 
            +
            - Extend the capability of explain=True when cp_isolation contains at most two entries (min one), will log in details of the Mess-detector results
         | 
| 431 | 
            +
            - Support for alternative language frequency set in charset_normalizer.assets.FREQUENCIES
         | 
| 432 | 
            +
            - Add parameter `language_threshold` in `from_bytes`, `from_path` and `from_fp` to adjust the minimum expected coherence ratio
         | 
| 433 | 
            +
             | 
| 434 | 
            +
            ### Changed
         | 
| 435 | 
            +
            - Build with static metadata using 'build' frontend
         | 
| 436 | 
            +
            - Make the language detection stricter
         | 
| 437 | 
            +
             | 
| 438 | 
            +
            ### Fixed
         | 
| 439 | 
            +
            - CLI with opt --normalize fail when using full path for files
         | 
| 440 | 
            +
            - TooManyAccentuatedPlugin induce false positive on the mess detection when too few alpha character have been fed to it
         | 
| 441 | 
            +
             | 
| 442 | 
            +
            ### Removed
         | 
| 443 | 
            +
            - Coherence detector no longer return 'Simple English' instead return 'English'
         | 
| 444 | 
            +
            - Coherence detector no longer return 'Classical Chinese' instead return 'Chinese'
         | 
| 445 | 
            +
             | 
| 446 | 
            +
            ## [3.0.0b2](https://github.com/Ousret/charset_normalizer/compare/3.0.0b1...3.0.0b2) (2022-08-21)
         | 
| 447 | 
            +
             | 
| 448 | 
            +
            ### Added
         | 
| 449 | 
            +
            - `normalizer --version` now specify if current version provide extra speedup (meaning mypyc compilation whl)
         | 
| 450 | 
            +
             | 
| 451 | 
            +
            ### Removed
         | 
| 452 | 
            +
            - Breaking: Method `first()` and `best()` from CharsetMatch
         | 
| 453 | 
            +
            - UTF-7 will no longer appear as "detected" without a recognized SIG/mark (is unreliable/conflict with ASCII)
         | 
| 454 | 
            +
             | 
| 455 | 
            +
            ### Fixed
         | 
| 456 | 
            +
            - Sphinx warnings when generating the documentation
         | 
| 457 | 
            +
             | 
| 458 | 
            +
            ## [3.0.0b1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...3.0.0b1) (2022-08-15)
         | 
| 459 | 
            +
             | 
| 460 | 
            +
            ### Changed
         | 
| 461 | 
            +
            - Optional: Module `md.py` can be compiled using Mypyc to provide an extra speedup up to 4x faster than v2.1
         | 
| 462 | 
            +
             | 
| 463 | 
            +
            ### Removed
         | 
| 464 | 
            +
            - Breaking: Class aliases CharsetDetector, CharsetDoctor, CharsetNormalizerMatch and CharsetNormalizerMatches
         | 
| 465 | 
            +
            - Breaking: Top-level function `normalize`
         | 
| 466 | 
            +
            - Breaking: Properties `chaos_secondary_pass`, `coherence_non_latin` and `w_counter` from CharsetMatch
         | 
| 467 | 
            +
            - Support for the backport `unicodedata2`
         | 
| 468 | 
            +
             | 
| 469 | 
            +
            ## [2.1.1](https://github.com/Ousret/charset_normalizer/compare/2.1.0...2.1.1) (2022-08-19)
         | 
| 470 | 
            +
             | 
| 471 | 
            +
            ### Deprecated
         | 
| 472 | 
            +
            - Function `normalize` scheduled for removal in 3.0
         | 
| 473 | 
            +
             | 
| 474 | 
            +
            ### Changed
         | 
| 475 | 
            +
            - Removed useless call to decode in fn is_unprintable (#206)
         | 
| 476 | 
            +
             | 
| 477 | 
            +
            ### Fixed
         | 
| 478 | 
            +
            - Third-party library (i18n xgettext) crashing not recognizing utf_8 (PEP 263) with underscore from [@aleksandernovikov](https://github.com/aleksandernovikov) (#204)
         | 
| 479 | 
            +
             | 
| 480 | 
            +
            ## [2.1.0](https://github.com/Ousret/charset_normalizer/compare/2.0.12...2.1.0) (2022-06-19)
         | 
| 481 | 
            +
             | 
| 482 | 
            +
            ### Added
         | 
| 483 | 
            +
            - Output the Unicode table version when running the CLI with `--version` (PR #194)
         | 
| 484 | 
            +
             | 
| 485 | 
            +
            ### Changed
         | 
| 486 | 
            +
            - Re-use decoded buffer for single byte character sets from [@nijel](https://github.com/nijel) (PR #175)
         | 
| 487 | 
            +
            - Fixing some performance bottlenecks from [@deedy5](https://github.com/deedy5) (PR #183)
         | 
| 488 | 
            +
             | 
| 489 | 
            +
            ### Fixed
         | 
| 490 | 
            +
            - Workaround potential bug in cpython with Zero Width No-Break Space located in Arabic Presentation Forms-B, Unicode 1.1 not acknowledged as space (PR #175)
         | 
| 491 | 
            +
            - CLI default threshold aligned with the API threshold from [@oleksandr-kuzmenko](https://github.com/oleksandr-kuzmenko) (PR #181)
         | 
| 492 | 
            +
             | 
| 493 | 
            +
            ### Removed
         | 
| 494 | 
            +
            - Support for Python 3.5 (PR #192)
         | 
| 495 | 
            +
             | 
| 496 | 
            +
            ### Deprecated
         | 
| 497 | 
            +
            - Use of backport unicodedata from `unicodedata2` as Python is quickly catching up, scheduled for removal in 3.0 (PR #194)
         | 
| 498 | 
            +
             | 
| 499 | 
            +
            ## [2.0.12](https://github.com/Ousret/charset_normalizer/compare/2.0.11...2.0.12) (2022-02-12)
         | 
| 500 | 
            +
             | 
| 501 | 
            +
            ### Fixed
         | 
| 502 | 
            +
            - ASCII miss-detection on rare cases (PR #170)
         | 
| 503 | 
            +
             | 
| 504 | 
            +
            ## [2.0.11](https://github.com/Ousret/charset_normalizer/compare/2.0.10...2.0.11) (2022-01-30)
         | 
| 505 | 
            +
             | 
| 506 | 
            +
            ### Added
         | 
| 507 | 
            +
            - Explicit support for Python 3.11 (PR #164)
         | 
| 508 | 
            +
             | 
| 509 | 
            +
            ### Changed
         | 
| 510 | 
            +
            - The logging behavior have been completely reviewed, now using only TRACE and DEBUG levels (PR #163 #165)
         | 
| 511 | 
            +
             | 
| 512 | 
            +
            ## [2.0.10](https://github.com/Ousret/charset_normalizer/compare/2.0.9...2.0.10) (2022-01-04)
         | 
| 513 | 
            +
             | 
| 514 | 
            +
            ### Fixed
         | 
| 515 | 
            +
            - Fallback match entries might lead to UnicodeDecodeError for large bytes sequence (PR #154)
         | 
| 516 | 
            +
             | 
| 517 | 
            +
            ### Changed
         | 
| 518 | 
            +
            - Skipping the language-detection (CD) on ASCII (PR #155)
         | 
| 519 | 
            +
             | 
| 520 | 
            +
            ## [2.0.9](https://github.com/Ousret/charset_normalizer/compare/2.0.8...2.0.9) (2021-12-03)
         | 
| 521 | 
            +
             | 
| 522 | 
            +
            ### Changed
         | 
| 523 | 
            +
            - Moderating the logging impact (since 2.0.8) for specific environments (PR #147)
         | 
| 524 | 
            +
             | 
| 525 | 
            +
            ### Fixed
         | 
| 526 | 
            +
            - Wrong logging level applied when setting kwarg `explain` to True (PR #146)
         | 
| 527 | 
            +
             | 
| 528 | 
            +
            ## [2.0.8](https://github.com/Ousret/charset_normalizer/compare/2.0.7...2.0.8) (2021-11-24)
         | 
| 529 | 
            +
            ### Changed
         | 
| 530 | 
            +
            - Improvement over Vietnamese detection (PR #126)
         | 
| 531 | 
            +
            - MD improvement on trailing data and long foreign (non-pure latin) data (PR #124)
         | 
| 532 | 
            +
            - Efficiency improvements in cd/alphabet_languages from [@adbar](https://github.com/adbar) (PR #122)
         | 
| 533 | 
            +
            - call sum() without an intermediary list following PEP 289 recommendations from [@adbar](https://github.com/adbar) (PR #129)
         | 
| 534 | 
            +
            - Code style as refactored by Sourcery-AI (PR #131)
         | 
| 535 | 
            +
            - Minor adjustment on the MD around european words (PR #133)
         | 
| 536 | 
            +
            - Remove and replace SRTs from assets / tests (PR #139)
         | 
| 537 | 
            +
            - Initialize the library logger with a `NullHandler` by default from [@nmaynes](https://github.com/nmaynes) (PR #135)
         | 
| 538 | 
            +
            - Setting kwarg `explain` to True will add provisionally (bounded to function lifespan) a specific stream handler (PR #135)
         | 
| 539 | 
            +
             | 
| 540 | 
            +
            ### Fixed
         | 
| 541 | 
            +
            - Fix large (misleading) sequence giving UnicodeDecodeError (PR #137)
         | 
| 542 | 
            +
            - Avoid using too insignificant chunk (PR #137)
         | 
| 543 | 
            +
             | 
| 544 | 
            +
            ### Added
         | 
| 545 | 
            +
            - Add and expose function `set_logging_handler` to configure a specific StreamHandler from [@nmaynes](https://github.com/nmaynes) (PR #135)
         | 
| 546 | 
            +
            - Add `CHANGELOG.md` entries, format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) (PR #141)
         | 
| 547 | 
            +
             | 
| 548 | 
            +
            ## [2.0.7](https://github.com/Ousret/charset_normalizer/compare/2.0.6...2.0.7) (2021-10-11)
         | 
| 549 | 
            +
            ### Added
         | 
| 550 | 
            +
            - Add support for Kazakh (Cyrillic) language detection (PR #109)
         | 
| 551 | 
            +
             | 
| 552 | 
            +
            ### Changed
         | 
| 553 | 
            +
            - Further, improve inferring the language from a given single-byte code page (PR #112)
         | 
| 554 | 
            +
            - Vainly trying to leverage PEP263 when PEP3120 is not supported (PR #116)
         | 
| 555 | 
            +
            - Refactoring for potential performance improvements in loops from [@adbar](https://github.com/adbar) (PR #113)
         | 
| 556 | 
            +
            - Various detection improvement (MD+CD) (PR #117)
         | 
| 557 | 
            +
             | 
| 558 | 
            +
            ### Removed
         | 
| 559 | 
            +
            - Remove redundant logging entry about detected language(s) (PR #115)
         | 
| 560 | 
            +
             | 
| 561 | 
            +
            ### Fixed
         | 
| 562 | 
            +
            - Fix a minor inconsistency between Python 3.5 and other versions regarding language detection (PR #117 #102)
         | 
| 563 | 
            +
             | 
| 564 | 
            +
            ## [2.0.6](https://github.com/Ousret/charset_normalizer/compare/2.0.5...2.0.6) (2021-09-18)
         | 
| 565 | 
            +
            ### Fixed
         | 
| 566 | 
            +
            - Unforeseen regression with the loss of the backward-compatibility with some older minor of Python 3.5.x (PR #100)
         | 
| 567 | 
            +
            - Fix CLI crash when using --minimal output in certain cases (PR #103)
         | 
| 568 | 
            +
             | 
| 569 | 
            +
            ### Changed
         | 
| 570 | 
            +
            - Minor improvement to the detection efficiency (less than 1%) (PR #106 #101)
         | 
| 571 | 
            +
             | 
| 572 | 
            +
            ## [2.0.5](https://github.com/Ousret/charset_normalizer/compare/2.0.4...2.0.5) (2021-09-14)
         | 
| 573 | 
            +
            ### Changed
         | 
| 574 | 
            +
            - The project now comply with: flake8, mypy, isort and black to ensure a better overall quality (PR #81)
         | 
| 575 | 
            +
            - The BC-support with v1.x was improved, the old staticmethods are restored (PR #82)
         | 
| 576 | 
            +
            - The Unicode detection is slightly improved (PR #93)
         | 
| 577 | 
            +
            - Add syntax sugar \_\_bool\_\_ for results CharsetMatches list-container (PR #91)
         | 
| 578 | 
            +
             | 
| 579 | 
            +
            ### Removed
         | 
| 580 | 
            +
            - The project no longer raise warning on tiny content given for detection, will be simply logged as warning instead (PR #92)
         | 
| 581 | 
            +
             | 
| 582 | 
            +
            ### Fixed
         | 
| 583 | 
            +
            - In some rare case, the chunks extractor could cut in the middle of a multi-byte character and could mislead the mess detection (PR #95)
         | 
| 584 | 
            +
            - Some rare 'space' characters could trip up the UnprintablePlugin/Mess detection (PR #96)
         | 
| 585 | 
            +
            - The MANIFEST.in was not exhaustive (PR #78)
         | 
| 586 | 
            +
             | 
| 587 | 
            +
            ## [2.0.4](https://github.com/Ousret/charset_normalizer/compare/2.0.3...2.0.4) (2021-07-30)
         | 
| 588 | 
            +
            ### Fixed
         | 
| 589 | 
            +
            - The CLI no longer raise an unexpected exception when no encoding has been found (PR #70)
         | 
| 590 | 
            +
            - Fix accessing the 'alphabets' property when the payload contains surrogate characters (PR #68)
         | 
| 591 | 
            +
            - The logger could mislead (explain=True) on detected languages and the impact of one MBCS match (PR #72)
         | 
| 592 | 
            +
            - Submatch factoring could be wrong in rare edge cases (PR #72)
         | 
| 593 | 
            +
            - Multiple files given to the CLI were ignored when publishing results to STDOUT. (After the first path) (PR #72)
         | 
| 594 | 
            +
            - Fix line endings from CRLF to LF for certain project files (PR #67)
         | 
| 595 | 
            +
             | 
| 596 | 
            +
            ### Changed
         | 
| 597 | 
            +
            - Adjust the MD to lower the sensitivity, thus improving the global detection reliability (PR #69 #76)
         | 
| 598 | 
            +
            - Allow fallback on specified encoding if any (PR #71)
         | 
| 599 | 
            +
             | 
| 600 | 
            +
            ## [2.0.3](https://github.com/Ousret/charset_normalizer/compare/2.0.2...2.0.3) (2021-07-16)
         | 
| 601 | 
            +
            ### Changed
         | 
| 602 | 
            +
            - Part of the detection mechanism has been improved to be less sensitive, resulting in more accurate detection results. Especially ASCII. (PR #63)
         | 
| 603 | 
            +
            - According to the community wishes, the detection will fall back on ASCII or UTF-8 in a last-resort case. (PR #64)
         | 
| 604 | 
            +
             | 
| 605 | 
            +
            ## [2.0.2](https://github.com/Ousret/charset_normalizer/compare/2.0.1...2.0.2) (2021-07-15)
         | 
| 606 | 
            +
            ### Fixed
         | 
| 607 | 
            +
            - Empty/Too small JSON payload miss-detection fixed. Report from [@tseaver](https://github.com/tseaver) (PR #59)
         | 
| 608 | 
            +
             | 
| 609 | 
            +
            ### Changed
         | 
| 610 | 
            +
            - Don't inject unicodedata2 into sys.modules from [@akx](https://github.com/akx) (PR #57)
         | 
| 611 | 
            +
             | 
| 612 | 
            +
            ## [2.0.1](https://github.com/Ousret/charset_normalizer/compare/2.0.0...2.0.1) (2021-07-13)
         | 
| 613 | 
            +
            ### Fixed
         | 
| 614 | 
            +
            - Make it work where there isn't a filesystem available, dropping assets frequencies.json. Report from [@sethmlarson](https://github.com/sethmlarson). (PR #55)
         | 
| 615 | 
            +
            - Using explain=False permanently disable the verbose output in the current runtime (PR #47)
         | 
| 616 | 
            +
            - One log entry (language target preemptive) was not show in logs when using explain=True (PR #47)
         | 
| 617 | 
            +
            - Fix undesired exception (ValueError) on getitem of instance CharsetMatches (PR #52)
         | 
| 618 | 
            +
             | 
| 619 | 
            +
            ### Changed
         | 
| 620 | 
            +
            - Public function normalize default args values were not aligned with from_bytes (PR #53)
         | 
| 621 | 
            +
             | 
| 622 | 
            +
            ### Added
         | 
| 623 | 
            +
            - You may now use charset aliases in cp_isolation and cp_exclusion arguments (PR #47)
         | 
| 624 | 
            +
             | 
| 625 | 
            +
            ## [2.0.0](https://github.com/Ousret/charset_normalizer/compare/1.4.1...2.0.0) (2021-07-02)
         | 
| 626 | 
            +
            ### Changed
         | 
| 627 | 
            +
            - 4x to 5 times faster than the previous 1.4.0 release. At least 2x faster than Chardet.
         | 
| 628 | 
            +
            - Accent has been made on UTF-8 detection, should perform rather instantaneous.
         | 
| 629 | 
            +
            - The backward compatibility with Chardet has been greatly improved. The legacy detect function returns an identical charset name whenever possible.
         | 
| 630 | 
            +
            - The detection mechanism has been slightly improved, now Turkish content is detected correctly (most of the time)
         | 
| 631 | 
            +
            - The program has been rewritten to ease the readability and maintainability. (+Using static typing)+
         | 
| 632 | 
            +
            - utf_7 detection has been reinstated.
         | 
| 633 | 
            +
             | 
| 634 | 
            +
            ### Removed
         | 
| 635 | 
            +
            - This package no longer require anything when used with Python 3.5 (Dropped cached_property)
         | 
| 636 | 
            +
            - Removed support for these languages: Catalan, Esperanto, Kazakh, Baque, Volapük, Azeri, Galician, Nynorsk, Macedonian, and Serbocroatian.
         | 
| 637 | 
            +
            - The exception hook on UnicodeDecodeError has been removed.
         | 
| 638 | 
            +
             | 
| 639 | 
            +
            ### Deprecated
         | 
| 640 | 
            +
            - Methods coherence_non_latin, w_counter, chaos_secondary_pass of the class CharsetMatch are now deprecated and scheduled for removal in v3.0
         | 
| 641 | 
            +
             | 
| 642 | 
            +
            ### Fixed
         | 
| 643 | 
            +
            - The CLI output used the relative path of the file(s). Should be absolute.
         | 
| 644 | 
            +
             | 
| 645 | 
            +
            ## [1.4.1](https://github.com/Ousret/charset_normalizer/compare/1.4.0...1.4.1) (2021-05-28)
         | 
| 646 | 
            +
            ### Fixed
         | 
| 647 | 
            +
            - Logger configuration/usage no longer conflict with others (PR #44)
         | 
| 648 | 
            +
             | 
| 649 | 
            +
            ## [1.4.0](https://github.com/Ousret/charset_normalizer/compare/1.3.9...1.4.0) (2021-05-21)
         | 
| 650 | 
            +
            ### Removed
         | 
| 651 | 
            +
            - Using standard logging instead of using the package loguru.
         | 
| 652 | 
            +
            - Dropping nose test framework in favor of the maintained pytest.
         | 
| 653 | 
            +
            - Choose to not use dragonmapper package to help with gibberish Chinese/CJK text.
         | 
| 654 | 
            +
            - Require cached_property only for Python 3.5 due to constraint. Dropping for every other interpreter version.
         | 
| 655 | 
            +
            - Stop support for UTF-7 that does not contain a SIG.
         | 
| 656 | 
            +
            - Dropping PrettyTable, replaced with pure JSON output in CLI.
         | 
| 657 | 
            +
             | 
| 658 | 
            +
            ### Fixed
         | 
| 659 | 
            +
            - BOM marker in a CharsetNormalizerMatch instance could be False in rare cases even if obviously present. Due to the sub-match factoring process.
         | 
| 660 | 
            +
            - Not searching properly for the BOM when trying utf32/16 parent codec.
         | 
| 661 | 
            +
             | 
| 662 | 
            +
            ### Changed
         | 
| 663 | 
            +
            - Improving the package final size by compressing frequencies.json.
         | 
| 664 | 
            +
            - Huge improvement over the larges payload.
         | 
| 665 | 
            +
             | 
| 666 | 
            +
            ### Added
         | 
| 667 | 
            +
            - CLI now produces JSON consumable output.
         | 
| 668 | 
            +
            - Return ASCII if given sequences fit. Given reasonable confidence.
         | 
| 669 | 
            +
             | 
| 670 | 
            +
            ## [1.3.9](https://github.com/Ousret/charset_normalizer/compare/1.3.8...1.3.9) (2021-05-13)
         | 
| 671 | 
            +
             | 
| 672 | 
            +
            ### Fixed
         | 
| 673 | 
            +
            - In some very rare cases, you may end up getting encode/decode errors due to a bad bytes payload (PR #40)
         | 
| 674 | 
            +
             | 
| 675 | 
            +
            ## [1.3.8](https://github.com/Ousret/charset_normalizer/compare/1.3.7...1.3.8) (2021-05-12)
         | 
| 676 | 
            +
             | 
| 677 | 
            +
            ### Fixed
         | 
| 678 | 
            +
            - Empty given payload for detection may cause an exception if trying to access the `alphabets` property. (PR #39)
         | 
| 679 | 
            +
             | 
| 680 | 
            +
            ## [1.3.7](https://github.com/Ousret/charset_normalizer/compare/1.3.6...1.3.7) (2021-05-12)
         | 
| 681 | 
            +
             | 
| 682 | 
            +
            ### Fixed
         | 
| 683 | 
            +
            - The legacy detect function should return UTF-8-SIG if sig is present in the payload. (PR #38)
         | 
| 684 | 
            +
             | 
| 685 | 
            +
            ## [1.3.6](https://github.com/Ousret/charset_normalizer/compare/1.3.5...1.3.6) (2021-02-09)
         | 
| 686 | 
            +
             | 
| 687 | 
            +
            ### Changed
         | 
| 688 | 
            +
            - Amend the previous release to allow prettytable 2.0 (PR #35)
         | 
| 689 | 
            +
             | 
| 690 | 
            +
            ## [1.3.5](https://github.com/Ousret/charset_normalizer/compare/1.3.4...1.3.5) (2021-02-08)
         | 
| 691 | 
            +
             | 
| 692 | 
            +
            ### Fixed
         | 
| 693 | 
            +
            - Fix error while using the package with a python pre-release interpreter (PR #33)
         | 
| 694 | 
            +
             | 
| 695 | 
            +
            ### Changed
         | 
| 696 | 
            +
            - Dependencies refactoring, constraints revised.
         | 
| 697 | 
            +
             | 
| 698 | 
            +
            ### Added
         | 
| 699 | 
            +
            - Add python 3.9 and 3.10 to the supported interpreters
         | 
| 700 | 
            +
             | 
| 701 | 
            +
            MIT License
         | 
| 702 | 
            +
             | 
| 703 | 
            +
            Copyright (c) 2025 TAHRI Ahmed R.
         | 
| 704 | 
            +
             | 
| 705 | 
            +
            Permission is hereby granted, free of charge, to any person obtaining a copy
         | 
| 706 | 
            +
            of this software and associated documentation files (the "Software"), to deal
         | 
| 707 | 
            +
            in the Software without restriction, including without limitation the rights
         | 
| 708 | 
            +
            to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
         | 
| 709 | 
            +
            copies of the Software, and to permit persons to whom the Software is
         | 
| 710 | 
            +
            furnished to do so, subject to the following conditions:
         | 
| 711 | 
            +
             | 
| 712 | 
            +
            The above copyright notice and this permission notice shall be included in all
         | 
| 713 | 
            +
            copies or substantial portions of the Software.
         | 
| 714 | 
            +
             | 
| 715 | 
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
         | 
| 716 | 
            +
            IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
         | 
| 717 | 
            +
            FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
         | 
| 718 | 
            +
            AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
         | 
| 719 | 
            +
            LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
         | 
| 720 | 
            +
            OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
         | 
| 721 | 
            +
            SOFTWARE.
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/RECORD
    ADDED
    
    | @@ -0,0 +1,35 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            ../../../bin/normalizer,sha256=EEF2YGZQp06ajmmHxz5vp4FlVLcMI9diMu84EJnUtis,268
         | 
| 2 | 
            +
            charset_normalizer-3.4.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
         | 
| 3 | 
            +
            charset_normalizer-3.4.1.dist-info/LICENSE,sha256=bQ1Bv-FwrGx9wkjJpj4lTQ-0WmDVCoJX0K-SxuJJuIc,1071
         | 
| 4 | 
            +
            charset_normalizer-3.4.1.dist-info/METADATA,sha256=JbyHzhmqZh_ugEn1Y7TY7CDYZA9FoU6BP25hrCNDf50,35313
         | 
| 5 | 
            +
            charset_normalizer-3.4.1.dist-info/RECORD,,
         | 
| 6 | 
            +
            charset_normalizer-3.4.1.dist-info/WHEEL,sha256=8V5JjwATQfL0d9dd03DBSgMiHY8GVsHwXVusby9L_SY,115
         | 
| 7 | 
            +
            charset_normalizer-3.4.1.dist-info/entry_points.txt,sha256=8C-Y3iXIfyXQ83Tpir2B8t-XLJYpxF5xbb38d_js-h4,65
         | 
| 8 | 
            +
            charset_normalizer-3.4.1.dist-info/top_level.txt,sha256=7ASyzePr8_xuZWJsnqJjIBtyV8vhEo0wBCv1MPRRi3Q,19
         | 
| 9 | 
            +
            charset_normalizer/__init__.py,sha256=OKRxRv2Zhnqk00tqkN0c1BtJjm165fWXLydE52IKuHc,1590
         | 
| 10 | 
            +
            charset_normalizer/__main__.py,sha256=yzYxMR-IhKRHYwcSlavEv8oGdwxsR89mr2X09qXGdps,109
         | 
| 11 | 
            +
            charset_normalizer/__pycache__/__init__.cpython-313.pyc,,
         | 
| 12 | 
            +
            charset_normalizer/__pycache__/__main__.cpython-313.pyc,,
         | 
| 13 | 
            +
            charset_normalizer/__pycache__/api.cpython-313.pyc,,
         | 
| 14 | 
            +
            charset_normalizer/__pycache__/cd.cpython-313.pyc,,
         | 
| 15 | 
            +
            charset_normalizer/__pycache__/constant.cpython-313.pyc,,
         | 
| 16 | 
            +
            charset_normalizer/__pycache__/legacy.cpython-313.pyc,,
         | 
| 17 | 
            +
            charset_normalizer/__pycache__/md.cpython-313.pyc,,
         | 
| 18 | 
            +
            charset_normalizer/__pycache__/models.cpython-313.pyc,,
         | 
| 19 | 
            +
            charset_normalizer/__pycache__/utils.cpython-313.pyc,,
         | 
| 20 | 
            +
            charset_normalizer/__pycache__/version.cpython-313.pyc,,
         | 
| 21 | 
            +
            charset_normalizer/api.py,sha256=qBRz8mJ_R5E713R6TOyqHEdnmyxbEDnCSHvx32ubDGg,22617
         | 
| 22 | 
            +
            charset_normalizer/cd.py,sha256=WKTo1HDb-H9HfCDc3Bfwq5jzS25Ziy9SE2a74SgTq88,12522
         | 
| 23 | 
            +
            charset_normalizer/cli/__init__.py,sha256=D8I86lFk2-py45JvqxniTirSj_sFyE6sjaY_0-G1shc,136
         | 
| 24 | 
            +
            charset_normalizer/cli/__main__.py,sha256=VGC9klOoi6_R2z8rmyrc936kv7u2A1udjjHtlmNPDTM,10410
         | 
| 25 | 
            +
            charset_normalizer/cli/__pycache__/__init__.cpython-313.pyc,,
         | 
| 26 | 
            +
            charset_normalizer/cli/__pycache__/__main__.cpython-313.pyc,,
         | 
| 27 | 
            +
            charset_normalizer/constant.py,sha256=4VuTcZNLew1j_8ixA-Rt_VVqNWD4pwgHOHMCMlr0964,40477
         | 
| 28 | 
            +
            charset_normalizer/legacy.py,sha256=yhNXsPHkBfqPXKRb-sPXNj3Bscp9-mFGcYOkJ62tg9c,2328
         | 
| 29 | 
            +
            charset_normalizer/md.cpython-313-darwin.so,sha256=mUvSZMr6ty3_yLX68YZ5BJQr91RHfsfhiJCWQCFBMnE,115664
         | 
| 30 | 
            +
            charset_normalizer/md.py,sha256=iyXXQGWl54nnLQLueMWTmUtlivO0-rTBgVkmJxIIAGU,20036
         | 
| 31 | 
            +
            charset_normalizer/md__mypyc.cpython-313-darwin.so,sha256=yf7dzVSgLFK-NZBo4MXVDlEpN4VoXWXFuLSbUTd0XFw,482184
         | 
| 32 | 
            +
            charset_normalizer/models.py,sha256=lKXhOnIPtiakbK3i__J9wpOfzx3JDTKj7Dn3Rg0VaRI,12394
         | 
| 33 | 
            +
            charset_normalizer/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 34 | 
            +
            charset_normalizer/utils.py,sha256=T5UHo8AS7NVMmgruWoZyqEf0WrZVcQpgUNetRoborSk,12002
         | 
| 35 | 
            +
            charset_normalizer/version.py,sha256=Ambcj3O8FfvdLfDLc8dkaxZx97O1IM_R4_aKGD_TDdE,115
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/WHEEL
    ADDED
    
    | @@ -0,0 +1,5 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            Wheel-Version: 1.0
         | 
| 2 | 
            +
            Generator: setuptools (75.6.0)
         | 
| 3 | 
            +
            Root-Is-Purelib: false
         | 
| 4 | 
            +
            Tag: cp313-cp313-macosx_10_13_universal2
         | 
| 5 | 
            +
             | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/entry_points.txt
    ADDED
    
    | @@ -0,0 +1,2 @@ | |
|  | |
|  | 
|  | |
| 1 | 
            +
            [console_scripts]
         | 
| 2 | 
            +
            normalizer = charset_normalizer:cli.cli_detect
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer-3.4.1.dist-info/top_level.txt
    ADDED
    
    | @@ -0,0 +1 @@ | |
|  | 
|  | |
| 1 | 
            +
            charset_normalizer
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer/__init__.py
    ADDED
    
    | @@ -0,0 +1,48 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            """
         | 
| 2 | 
            +
            Charset-Normalizer
         | 
| 3 | 
            +
            ~~~~~~~~~~~~~~
         | 
| 4 | 
            +
            The Real First Universal Charset Detector.
         | 
| 5 | 
            +
            A library that helps you read text from an unknown charset encoding.
         | 
| 6 | 
            +
            Motivated by chardet, This package is trying to resolve the issue by taking a new approach.
         | 
| 7 | 
            +
            All IANA character set names for which the Python core library provides codecs are supported.
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            Basic usage:
         | 
| 10 | 
            +
               >>> from charset_normalizer import from_bytes
         | 
| 11 | 
            +
               >>> results = from_bytes('Bсеки човек има право на образование. Oбразованието!'.encode('utf_8'))
         | 
| 12 | 
            +
               >>> best_guess = results.best()
         | 
| 13 | 
            +
               >>> str(best_guess)
         | 
| 14 | 
            +
               'Bсеки човек има право на образование. Oбразованието!'
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            Others methods and usages are available - see the full documentation
         | 
| 17 | 
            +
            at <https://github.com/Ousret/charset_normalizer>.
         | 
| 18 | 
            +
            :copyright: (c) 2021 by Ahmed TAHRI
         | 
| 19 | 
            +
            :license: MIT, see LICENSE for more details.
         | 
| 20 | 
            +
            """
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            from __future__ import annotations
         | 
| 23 | 
            +
             | 
| 24 | 
            +
            import logging
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            from .api import from_bytes, from_fp, from_path, is_binary
         | 
| 27 | 
            +
            from .legacy import detect
         | 
| 28 | 
            +
            from .models import CharsetMatch, CharsetMatches
         | 
| 29 | 
            +
            from .utils import set_logging_handler
         | 
| 30 | 
            +
            from .version import VERSION, __version__
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            __all__ = (
         | 
| 33 | 
            +
                "from_fp",
         | 
| 34 | 
            +
                "from_path",
         | 
| 35 | 
            +
                "from_bytes",
         | 
| 36 | 
            +
                "is_binary",
         | 
| 37 | 
            +
                "detect",
         | 
| 38 | 
            +
                "CharsetMatch",
         | 
| 39 | 
            +
                "CharsetMatches",
         | 
| 40 | 
            +
                "__version__",
         | 
| 41 | 
            +
                "VERSION",
         | 
| 42 | 
            +
                "set_logging_handler",
         | 
| 43 | 
            +
            )
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            # Attach a NullHandler to the top level logger by default
         | 
| 46 | 
            +
            # https://docs.python.org/3.3/howto/logging.html#configuring-logging-for-a-library
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            logging.getLogger("charset_normalizer").addHandler(logging.NullHandler())
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer/__main__.py
    ADDED
    
    | @@ -0,0 +1,6 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from __future__ import annotations
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            from .cli import cli_detect
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            if __name__ == "__main__":
         | 
| 6 | 
            +
                cli_detect()
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer/api.py
    ADDED
    
    | @@ -0,0 +1,668 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from __future__ import annotations
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import logging
         | 
| 4 | 
            +
            from os import PathLike
         | 
| 5 | 
            +
            from typing import BinaryIO
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            from .cd import (
         | 
| 8 | 
            +
                coherence_ratio,
         | 
| 9 | 
            +
                encoding_languages,
         | 
| 10 | 
            +
                mb_encoding_languages,
         | 
| 11 | 
            +
                merge_coherence_ratios,
         | 
| 12 | 
            +
            )
         | 
| 13 | 
            +
            from .constant import IANA_SUPPORTED, TOO_BIG_SEQUENCE, TOO_SMALL_SEQUENCE, TRACE
         | 
| 14 | 
            +
            from .md import mess_ratio
         | 
| 15 | 
            +
            from .models import CharsetMatch, CharsetMatches
         | 
| 16 | 
            +
            from .utils import (
         | 
| 17 | 
            +
                any_specified_encoding,
         | 
| 18 | 
            +
                cut_sequence_chunks,
         | 
| 19 | 
            +
                iana_name,
         | 
| 20 | 
            +
                identify_sig_or_bom,
         | 
| 21 | 
            +
                is_cp_similar,
         | 
| 22 | 
            +
                is_multi_byte_encoding,
         | 
| 23 | 
            +
                should_strip_sig_or_bom,
         | 
| 24 | 
            +
            )
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            logger = logging.getLogger("charset_normalizer")
         | 
| 27 | 
            +
            explain_handler = logging.StreamHandler()
         | 
| 28 | 
            +
            explain_handler.setFormatter(
         | 
| 29 | 
            +
                logging.Formatter("%(asctime)s | %(levelname)s | %(message)s")
         | 
| 30 | 
            +
            )
         | 
| 31 | 
            +
             | 
| 32 | 
            +
             | 
| 33 | 
            +
            def from_bytes(
         | 
| 34 | 
            +
                sequences: bytes | bytearray,
         | 
| 35 | 
            +
                steps: int = 5,
         | 
| 36 | 
            +
                chunk_size: int = 512,
         | 
| 37 | 
            +
                threshold: float = 0.2,
         | 
| 38 | 
            +
                cp_isolation: list[str] | None = None,
         | 
| 39 | 
            +
                cp_exclusion: list[str] | None = None,
         | 
| 40 | 
            +
                preemptive_behaviour: bool = True,
         | 
| 41 | 
            +
                explain: bool = False,
         | 
| 42 | 
            +
                language_threshold: float = 0.1,
         | 
| 43 | 
            +
                enable_fallback: bool = True,
         | 
| 44 | 
            +
            ) -> CharsetMatches:
         | 
| 45 | 
            +
                """
         | 
| 46 | 
            +
                Given a raw bytes sequence, return the best possibles charset usable to render str objects.
         | 
| 47 | 
            +
                If there is no results, it is a strong indicator that the source is binary/not text.
         | 
| 48 | 
            +
                By default, the process will extract 5 blocks of 512o each to assess the mess and coherence of a given sequence.
         | 
| 49 | 
            +
                And will give up a particular code page after 20% of measured mess. Those criteria are customizable at will.
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                The preemptive behavior DOES NOT replace the traditional detection workflow, it prioritize a particular code page
         | 
| 52 | 
            +
                but never take it for granted. Can improve the performance.
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                You may want to focus your attention to some code page or/and not others, use cp_isolation and cp_exclusion for that
         | 
| 55 | 
            +
                purpose.
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                This function will strip the SIG in the payload/sequence every time except on UTF-16, UTF-32.
         | 
| 58 | 
            +
                By default the library does not setup any handler other than the NullHandler, if you choose to set the 'explain'
         | 
| 59 | 
            +
                toggle to True it will alter the logger configuration to add a StreamHandler that is suitable for debugging.
         | 
| 60 | 
            +
                Custom logging format and handler can be set manually.
         | 
| 61 | 
            +
                """
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                if not isinstance(sequences, (bytearray, bytes)):
         | 
| 64 | 
            +
                    raise TypeError(
         | 
| 65 | 
            +
                        "Expected object of type bytes or bytearray, got: {}".format(
         | 
| 66 | 
            +
                            type(sequences)
         | 
| 67 | 
            +
                        )
         | 
| 68 | 
            +
                    )
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                if explain:
         | 
| 71 | 
            +
                    previous_logger_level: int = logger.level
         | 
| 72 | 
            +
                    logger.addHandler(explain_handler)
         | 
| 73 | 
            +
                    logger.setLevel(TRACE)
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                length: int = len(sequences)
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                if length == 0:
         | 
| 78 | 
            +
                    logger.debug("Encoding detection on empty bytes, assuming utf_8 intention.")
         | 
| 79 | 
            +
                    if explain:  # Defensive: ensure exit path clean handler
         | 
| 80 | 
            +
                        logger.removeHandler(explain_handler)
         | 
| 81 | 
            +
                        logger.setLevel(previous_logger_level or logging.WARNING)
         | 
| 82 | 
            +
                    return CharsetMatches([CharsetMatch(sequences, "utf_8", 0.0, False, [], "")])
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                if cp_isolation is not None:
         | 
| 85 | 
            +
                    logger.log(
         | 
| 86 | 
            +
                        TRACE,
         | 
| 87 | 
            +
                        "cp_isolation is set. use this flag for debugging purpose. "
         | 
| 88 | 
            +
                        "limited list of encoding allowed : %s.",
         | 
| 89 | 
            +
                        ", ".join(cp_isolation),
         | 
| 90 | 
            +
                    )
         | 
| 91 | 
            +
                    cp_isolation = [iana_name(cp, False) for cp in cp_isolation]
         | 
| 92 | 
            +
                else:
         | 
| 93 | 
            +
                    cp_isolation = []
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                if cp_exclusion is not None:
         | 
| 96 | 
            +
                    logger.log(
         | 
| 97 | 
            +
                        TRACE,
         | 
| 98 | 
            +
                        "cp_exclusion is set. use this flag for debugging purpose. "
         | 
| 99 | 
            +
                        "limited list of encoding excluded : %s.",
         | 
| 100 | 
            +
                        ", ".join(cp_exclusion),
         | 
| 101 | 
            +
                    )
         | 
| 102 | 
            +
                    cp_exclusion = [iana_name(cp, False) for cp in cp_exclusion]
         | 
| 103 | 
            +
                else:
         | 
| 104 | 
            +
                    cp_exclusion = []
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                if length <= (chunk_size * steps):
         | 
| 107 | 
            +
                    logger.log(
         | 
| 108 | 
            +
                        TRACE,
         | 
| 109 | 
            +
                        "override steps (%i) and chunk_size (%i) as content does not fit (%i byte(s) given) parameters.",
         | 
| 110 | 
            +
                        steps,
         | 
| 111 | 
            +
                        chunk_size,
         | 
| 112 | 
            +
                        length,
         | 
| 113 | 
            +
                    )
         | 
| 114 | 
            +
                    steps = 1
         | 
| 115 | 
            +
                    chunk_size = length
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                if steps > 1 and length / steps < chunk_size:
         | 
| 118 | 
            +
                    chunk_size = int(length / steps)
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                is_too_small_sequence: bool = len(sequences) < TOO_SMALL_SEQUENCE
         | 
| 121 | 
            +
                is_too_large_sequence: bool = len(sequences) >= TOO_BIG_SEQUENCE
         | 
| 122 | 
            +
             | 
| 123 | 
            +
                if is_too_small_sequence:
         | 
| 124 | 
            +
                    logger.log(
         | 
| 125 | 
            +
                        TRACE,
         | 
| 126 | 
            +
                        "Trying to detect encoding from a tiny portion of ({}) byte(s).".format(
         | 
| 127 | 
            +
                            length
         | 
| 128 | 
            +
                        ),
         | 
| 129 | 
            +
                    )
         | 
| 130 | 
            +
                elif is_too_large_sequence:
         | 
| 131 | 
            +
                    logger.log(
         | 
| 132 | 
            +
                        TRACE,
         | 
| 133 | 
            +
                        "Using lazy str decoding because the payload is quite large, ({}) byte(s).".format(
         | 
| 134 | 
            +
                            length
         | 
| 135 | 
            +
                        ),
         | 
| 136 | 
            +
                    )
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                prioritized_encodings: list[str] = []
         | 
| 139 | 
            +
             | 
| 140 | 
            +
                specified_encoding: str | None = (
         | 
| 141 | 
            +
                    any_specified_encoding(sequences) if preemptive_behaviour else None
         | 
| 142 | 
            +
                )
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                if specified_encoding is not None:
         | 
| 145 | 
            +
                    prioritized_encodings.append(specified_encoding)
         | 
| 146 | 
            +
                    logger.log(
         | 
| 147 | 
            +
                        TRACE,
         | 
| 148 | 
            +
                        "Detected declarative mark in sequence. Priority +1 given for %s.",
         | 
| 149 | 
            +
                        specified_encoding,
         | 
| 150 | 
            +
                    )
         | 
| 151 | 
            +
             | 
| 152 | 
            +
                tested: set[str] = set()
         | 
| 153 | 
            +
                tested_but_hard_failure: list[str] = []
         | 
| 154 | 
            +
                tested_but_soft_failure: list[str] = []
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                fallback_ascii: CharsetMatch | None = None
         | 
| 157 | 
            +
                fallback_u8: CharsetMatch | None = None
         | 
| 158 | 
            +
                fallback_specified: CharsetMatch | None = None
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                results: CharsetMatches = CharsetMatches()
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                early_stop_results: CharsetMatches = CharsetMatches()
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                sig_encoding, sig_payload = identify_sig_or_bom(sequences)
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                if sig_encoding is not None:
         | 
| 167 | 
            +
                    prioritized_encodings.append(sig_encoding)
         | 
| 168 | 
            +
                    logger.log(
         | 
| 169 | 
            +
                        TRACE,
         | 
| 170 | 
            +
                        "Detected a SIG or BOM mark on first %i byte(s). Priority +1 given for %s.",
         | 
| 171 | 
            +
                        len(sig_payload),
         | 
| 172 | 
            +
                        sig_encoding,
         | 
| 173 | 
            +
                    )
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                prioritized_encodings.append("ascii")
         | 
| 176 | 
            +
             | 
| 177 | 
            +
                if "utf_8" not in prioritized_encodings:
         | 
| 178 | 
            +
                    prioritized_encodings.append("utf_8")
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                for encoding_iana in prioritized_encodings + IANA_SUPPORTED:
         | 
| 181 | 
            +
                    if cp_isolation and encoding_iana not in cp_isolation:
         | 
| 182 | 
            +
                        continue
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                    if cp_exclusion and encoding_iana in cp_exclusion:
         | 
| 185 | 
            +
                        continue
         | 
| 186 | 
            +
             | 
| 187 | 
            +
                    if encoding_iana in tested:
         | 
| 188 | 
            +
                        continue
         | 
| 189 | 
            +
             | 
| 190 | 
            +
                    tested.add(encoding_iana)
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                    decoded_payload: str | None = None
         | 
| 193 | 
            +
                    bom_or_sig_available: bool = sig_encoding == encoding_iana
         | 
| 194 | 
            +
                    strip_sig_or_bom: bool = bom_or_sig_available and should_strip_sig_or_bom(
         | 
| 195 | 
            +
                        encoding_iana
         | 
| 196 | 
            +
                    )
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                    if encoding_iana in {"utf_16", "utf_32"} and not bom_or_sig_available:
         | 
| 199 | 
            +
                        logger.log(
         | 
| 200 | 
            +
                            TRACE,
         | 
| 201 | 
            +
                            "Encoding %s won't be tested as-is because it require a BOM. Will try some sub-encoder LE/BE.",
         | 
| 202 | 
            +
                            encoding_iana,
         | 
| 203 | 
            +
                        )
         | 
| 204 | 
            +
                        continue
         | 
| 205 | 
            +
                    if encoding_iana in {"utf_7"} and not bom_or_sig_available:
         | 
| 206 | 
            +
                        logger.log(
         | 
| 207 | 
            +
                            TRACE,
         | 
| 208 | 
            +
                            "Encoding %s won't be tested as-is because detection is unreliable without BOM/SIG.",
         | 
| 209 | 
            +
                            encoding_iana,
         | 
| 210 | 
            +
                        )
         | 
| 211 | 
            +
                        continue
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    try:
         | 
| 214 | 
            +
                        is_multi_byte_decoder: bool = is_multi_byte_encoding(encoding_iana)
         | 
| 215 | 
            +
                    except (ModuleNotFoundError, ImportError):
         | 
| 216 | 
            +
                        logger.log(
         | 
| 217 | 
            +
                            TRACE,
         | 
| 218 | 
            +
                            "Encoding %s does not provide an IncrementalDecoder",
         | 
| 219 | 
            +
                            encoding_iana,
         | 
| 220 | 
            +
                        )
         | 
| 221 | 
            +
                        continue
         | 
| 222 | 
            +
             | 
| 223 | 
            +
                    try:
         | 
| 224 | 
            +
                        if is_too_large_sequence and is_multi_byte_decoder is False:
         | 
| 225 | 
            +
                            str(
         | 
| 226 | 
            +
                                (
         | 
| 227 | 
            +
                                    sequences[: int(50e4)]
         | 
| 228 | 
            +
                                    if strip_sig_or_bom is False
         | 
| 229 | 
            +
                                    else sequences[len(sig_payload) : int(50e4)]
         | 
| 230 | 
            +
                                ),
         | 
| 231 | 
            +
                                encoding=encoding_iana,
         | 
| 232 | 
            +
                            )
         | 
| 233 | 
            +
                        else:
         | 
| 234 | 
            +
                            decoded_payload = str(
         | 
| 235 | 
            +
                                (
         | 
| 236 | 
            +
                                    sequences
         | 
| 237 | 
            +
                                    if strip_sig_or_bom is False
         | 
| 238 | 
            +
                                    else sequences[len(sig_payload) :]
         | 
| 239 | 
            +
                                ),
         | 
| 240 | 
            +
                                encoding=encoding_iana,
         | 
| 241 | 
            +
                            )
         | 
| 242 | 
            +
                    except (UnicodeDecodeError, LookupError) as e:
         | 
| 243 | 
            +
                        if not isinstance(e, LookupError):
         | 
| 244 | 
            +
                            logger.log(
         | 
| 245 | 
            +
                                TRACE,
         | 
| 246 | 
            +
                                "Code page %s does not fit given bytes sequence at ALL. %s",
         | 
| 247 | 
            +
                                encoding_iana,
         | 
| 248 | 
            +
                                str(e),
         | 
| 249 | 
            +
                            )
         | 
| 250 | 
            +
                        tested_but_hard_failure.append(encoding_iana)
         | 
| 251 | 
            +
                        continue
         | 
| 252 | 
            +
             | 
| 253 | 
            +
                    similar_soft_failure_test: bool = False
         | 
| 254 | 
            +
             | 
| 255 | 
            +
                    for encoding_soft_failed in tested_but_soft_failure:
         | 
| 256 | 
            +
                        if is_cp_similar(encoding_iana, encoding_soft_failed):
         | 
| 257 | 
            +
                            similar_soft_failure_test = True
         | 
| 258 | 
            +
                            break
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                    if similar_soft_failure_test:
         | 
| 261 | 
            +
                        logger.log(
         | 
| 262 | 
            +
                            TRACE,
         | 
| 263 | 
            +
                            "%s is deemed too similar to code page %s and was consider unsuited already. Continuing!",
         | 
| 264 | 
            +
                            encoding_iana,
         | 
| 265 | 
            +
                            encoding_soft_failed,
         | 
| 266 | 
            +
                        )
         | 
| 267 | 
            +
                        continue
         | 
| 268 | 
            +
             | 
| 269 | 
            +
                    r_ = range(
         | 
| 270 | 
            +
                        0 if not bom_or_sig_available else len(sig_payload),
         | 
| 271 | 
            +
                        length,
         | 
| 272 | 
            +
                        int(length / steps),
         | 
| 273 | 
            +
                    )
         | 
| 274 | 
            +
             | 
| 275 | 
            +
                    multi_byte_bonus: bool = (
         | 
| 276 | 
            +
                        is_multi_byte_decoder
         | 
| 277 | 
            +
                        and decoded_payload is not None
         | 
| 278 | 
            +
                        and len(decoded_payload) < length
         | 
| 279 | 
            +
                    )
         | 
| 280 | 
            +
             | 
| 281 | 
            +
                    if multi_byte_bonus:
         | 
| 282 | 
            +
                        logger.log(
         | 
| 283 | 
            +
                            TRACE,
         | 
| 284 | 
            +
                            "Code page %s is a multi byte encoding table and it appear that at least one character "
         | 
| 285 | 
            +
                            "was encoded using n-bytes.",
         | 
| 286 | 
            +
                            encoding_iana,
         | 
| 287 | 
            +
                        )
         | 
| 288 | 
            +
             | 
| 289 | 
            +
                    max_chunk_gave_up: int = int(len(r_) / 4)
         | 
| 290 | 
            +
             | 
| 291 | 
            +
                    max_chunk_gave_up = max(max_chunk_gave_up, 2)
         | 
| 292 | 
            +
                    early_stop_count: int = 0
         | 
| 293 | 
            +
                    lazy_str_hard_failure = False
         | 
| 294 | 
            +
             | 
| 295 | 
            +
                    md_chunks: list[str] = []
         | 
| 296 | 
            +
                    md_ratios = []
         | 
| 297 | 
            +
             | 
| 298 | 
            +
                    try:
         | 
| 299 | 
            +
                        for chunk in cut_sequence_chunks(
         | 
| 300 | 
            +
                            sequences,
         | 
| 301 | 
            +
                            encoding_iana,
         | 
| 302 | 
            +
                            r_,
         | 
| 303 | 
            +
                            chunk_size,
         | 
| 304 | 
            +
                            bom_or_sig_available,
         | 
| 305 | 
            +
                            strip_sig_or_bom,
         | 
| 306 | 
            +
                            sig_payload,
         | 
| 307 | 
            +
                            is_multi_byte_decoder,
         | 
| 308 | 
            +
                            decoded_payload,
         | 
| 309 | 
            +
                        ):
         | 
| 310 | 
            +
                            md_chunks.append(chunk)
         | 
| 311 | 
            +
             | 
| 312 | 
            +
                            md_ratios.append(
         | 
| 313 | 
            +
                                mess_ratio(
         | 
| 314 | 
            +
                                    chunk,
         | 
| 315 | 
            +
                                    threshold,
         | 
| 316 | 
            +
                                    explain is True and 1 <= len(cp_isolation) <= 2,
         | 
| 317 | 
            +
                                )
         | 
| 318 | 
            +
                            )
         | 
| 319 | 
            +
             | 
| 320 | 
            +
                            if md_ratios[-1] >= threshold:
         | 
| 321 | 
            +
                                early_stop_count += 1
         | 
| 322 | 
            +
             | 
| 323 | 
            +
                            if (early_stop_count >= max_chunk_gave_up) or (
         | 
| 324 | 
            +
                                bom_or_sig_available and strip_sig_or_bom is False
         | 
| 325 | 
            +
                            ):
         | 
| 326 | 
            +
                                break
         | 
| 327 | 
            +
                    except (
         | 
| 328 | 
            +
                        UnicodeDecodeError
         | 
| 329 | 
            +
                    ) as e:  # Lazy str loading may have missed something there
         | 
| 330 | 
            +
                        logger.log(
         | 
| 331 | 
            +
                            TRACE,
         | 
| 332 | 
            +
                            "LazyStr Loading: After MD chunk decode, code page %s does not fit given bytes sequence at ALL. %s",
         | 
| 333 | 
            +
                            encoding_iana,
         | 
| 334 | 
            +
                            str(e),
         | 
| 335 | 
            +
                        )
         | 
| 336 | 
            +
                        early_stop_count = max_chunk_gave_up
         | 
| 337 | 
            +
                        lazy_str_hard_failure = True
         | 
| 338 | 
            +
             | 
| 339 | 
            +
                    # We might want to check the sequence again with the whole content
         | 
| 340 | 
            +
                    # Only if initial MD tests passes
         | 
| 341 | 
            +
                    if (
         | 
| 342 | 
            +
                        not lazy_str_hard_failure
         | 
| 343 | 
            +
                        and is_too_large_sequence
         | 
| 344 | 
            +
                        and not is_multi_byte_decoder
         | 
| 345 | 
            +
                    ):
         | 
| 346 | 
            +
                        try:
         | 
| 347 | 
            +
                            sequences[int(50e3) :].decode(encoding_iana, errors="strict")
         | 
| 348 | 
            +
                        except UnicodeDecodeError as e:
         | 
| 349 | 
            +
                            logger.log(
         | 
| 350 | 
            +
                                TRACE,
         | 
| 351 | 
            +
                                "LazyStr Loading: After final lookup, code page %s does not fit given bytes sequence at ALL. %s",
         | 
| 352 | 
            +
                                encoding_iana,
         | 
| 353 | 
            +
                                str(e),
         | 
| 354 | 
            +
                            )
         | 
| 355 | 
            +
                            tested_but_hard_failure.append(encoding_iana)
         | 
| 356 | 
            +
                            continue
         | 
| 357 | 
            +
             | 
| 358 | 
            +
                    mean_mess_ratio: float = sum(md_ratios) / len(md_ratios) if md_ratios else 0.0
         | 
| 359 | 
            +
                    if mean_mess_ratio >= threshold or early_stop_count >= max_chunk_gave_up:
         | 
| 360 | 
            +
                        tested_but_soft_failure.append(encoding_iana)
         | 
| 361 | 
            +
                        logger.log(
         | 
| 362 | 
            +
                            TRACE,
         | 
| 363 | 
            +
                            "%s was excluded because of initial chaos probing. Gave up %i time(s). "
         | 
| 364 | 
            +
                            "Computed mean chaos is %f %%.",
         | 
| 365 | 
            +
                            encoding_iana,
         | 
| 366 | 
            +
                            early_stop_count,
         | 
| 367 | 
            +
                            round(mean_mess_ratio * 100, ndigits=3),
         | 
| 368 | 
            +
                        )
         | 
| 369 | 
            +
                        # Preparing those fallbacks in case we got nothing.
         | 
| 370 | 
            +
                        if (
         | 
| 371 | 
            +
                            enable_fallback
         | 
| 372 | 
            +
                            and encoding_iana in ["ascii", "utf_8", specified_encoding]
         | 
| 373 | 
            +
                            and not lazy_str_hard_failure
         | 
| 374 | 
            +
                        ):
         | 
| 375 | 
            +
                            fallback_entry = CharsetMatch(
         | 
| 376 | 
            +
                                sequences,
         | 
| 377 | 
            +
                                encoding_iana,
         | 
| 378 | 
            +
                                threshold,
         | 
| 379 | 
            +
                                False,
         | 
| 380 | 
            +
                                [],
         | 
| 381 | 
            +
                                decoded_payload,
         | 
| 382 | 
            +
                                preemptive_declaration=specified_encoding,
         | 
| 383 | 
            +
                            )
         | 
| 384 | 
            +
                            if encoding_iana == specified_encoding:
         | 
| 385 | 
            +
                                fallback_specified = fallback_entry
         | 
| 386 | 
            +
                            elif encoding_iana == "ascii":
         | 
| 387 | 
            +
                                fallback_ascii = fallback_entry
         | 
| 388 | 
            +
                            else:
         | 
| 389 | 
            +
                                fallback_u8 = fallback_entry
         | 
| 390 | 
            +
                        continue
         | 
| 391 | 
            +
             | 
| 392 | 
            +
                    logger.log(
         | 
| 393 | 
            +
                        TRACE,
         | 
| 394 | 
            +
                        "%s passed initial chaos probing. Mean measured chaos is %f %%",
         | 
| 395 | 
            +
                        encoding_iana,
         | 
| 396 | 
            +
                        round(mean_mess_ratio * 100, ndigits=3),
         | 
| 397 | 
            +
                    )
         | 
| 398 | 
            +
             | 
| 399 | 
            +
                    if not is_multi_byte_decoder:
         | 
| 400 | 
            +
                        target_languages: list[str] = encoding_languages(encoding_iana)
         | 
| 401 | 
            +
                    else:
         | 
| 402 | 
            +
                        target_languages = mb_encoding_languages(encoding_iana)
         | 
| 403 | 
            +
             | 
| 404 | 
            +
                    if target_languages:
         | 
| 405 | 
            +
                        logger.log(
         | 
| 406 | 
            +
                            TRACE,
         | 
| 407 | 
            +
                            "{} should target any language(s) of {}".format(
         | 
| 408 | 
            +
                                encoding_iana, str(target_languages)
         | 
| 409 | 
            +
                            ),
         | 
| 410 | 
            +
                        )
         | 
| 411 | 
            +
             | 
| 412 | 
            +
                    cd_ratios = []
         | 
| 413 | 
            +
             | 
| 414 | 
            +
                    # We shall skip the CD when its about ASCII
         | 
| 415 | 
            +
                    # Most of the time its not relevant to run "language-detection" on it.
         | 
| 416 | 
            +
                    if encoding_iana != "ascii":
         | 
| 417 | 
            +
                        for chunk in md_chunks:
         | 
| 418 | 
            +
                            chunk_languages = coherence_ratio(
         | 
| 419 | 
            +
                                chunk,
         | 
| 420 | 
            +
                                language_threshold,
         | 
| 421 | 
            +
                                ",".join(target_languages) if target_languages else None,
         | 
| 422 | 
            +
                            )
         | 
| 423 | 
            +
             | 
| 424 | 
            +
                            cd_ratios.append(chunk_languages)
         | 
| 425 | 
            +
             | 
| 426 | 
            +
                    cd_ratios_merged = merge_coherence_ratios(cd_ratios)
         | 
| 427 | 
            +
             | 
| 428 | 
            +
                    if cd_ratios_merged:
         | 
| 429 | 
            +
                        logger.log(
         | 
| 430 | 
            +
                            TRACE,
         | 
| 431 | 
            +
                            "We detected language {} using {}".format(
         | 
| 432 | 
            +
                                cd_ratios_merged, encoding_iana
         | 
| 433 | 
            +
                            ),
         | 
| 434 | 
            +
                        )
         | 
| 435 | 
            +
             | 
| 436 | 
            +
                    current_match = CharsetMatch(
         | 
| 437 | 
            +
                        sequences,
         | 
| 438 | 
            +
                        encoding_iana,
         | 
| 439 | 
            +
                        mean_mess_ratio,
         | 
| 440 | 
            +
                        bom_or_sig_available,
         | 
| 441 | 
            +
                        cd_ratios_merged,
         | 
| 442 | 
            +
                        (
         | 
| 443 | 
            +
                            decoded_payload
         | 
| 444 | 
            +
                            if (
         | 
| 445 | 
            +
                                is_too_large_sequence is False
         | 
| 446 | 
            +
                                or encoding_iana in [specified_encoding, "ascii", "utf_8"]
         | 
| 447 | 
            +
                            )
         | 
| 448 | 
            +
                            else None
         | 
| 449 | 
            +
                        ),
         | 
| 450 | 
            +
                        preemptive_declaration=specified_encoding,
         | 
| 451 | 
            +
                    )
         | 
| 452 | 
            +
             | 
| 453 | 
            +
                    results.append(current_match)
         | 
| 454 | 
            +
             | 
| 455 | 
            +
                    if (
         | 
| 456 | 
            +
                        encoding_iana in [specified_encoding, "ascii", "utf_8"]
         | 
| 457 | 
            +
                        and mean_mess_ratio < 0.1
         | 
| 458 | 
            +
                    ):
         | 
| 459 | 
            +
                        # If md says nothing to worry about, then... stop immediately!
         | 
| 460 | 
            +
                        if mean_mess_ratio == 0.0:
         | 
| 461 | 
            +
                            logger.debug(
         | 
| 462 | 
            +
                                "Encoding detection: %s is most likely the one.",
         | 
| 463 | 
            +
                                current_match.encoding,
         | 
| 464 | 
            +
                            )
         | 
| 465 | 
            +
                            if explain:  # Defensive: ensure exit path clean handler
         | 
| 466 | 
            +
                                logger.removeHandler(explain_handler)
         | 
| 467 | 
            +
                                logger.setLevel(previous_logger_level)
         | 
| 468 | 
            +
                            return CharsetMatches([current_match])
         | 
| 469 | 
            +
             | 
| 470 | 
            +
                        early_stop_results.append(current_match)
         | 
| 471 | 
            +
             | 
| 472 | 
            +
                    if (
         | 
| 473 | 
            +
                        len(early_stop_results)
         | 
| 474 | 
            +
                        and (specified_encoding is None or specified_encoding in tested)
         | 
| 475 | 
            +
                        and "ascii" in tested
         | 
| 476 | 
            +
                        and "utf_8" in tested
         | 
| 477 | 
            +
                    ):
         | 
| 478 | 
            +
                        probable_result: CharsetMatch = early_stop_results.best()  # type: ignore[assignment]
         | 
| 479 | 
            +
                        logger.debug(
         | 
| 480 | 
            +
                            "Encoding detection: %s is most likely the one.",
         | 
| 481 | 
            +
                            probable_result.encoding,
         | 
| 482 | 
            +
                        )
         | 
| 483 | 
            +
                        if explain:  # Defensive: ensure exit path clean handler
         | 
| 484 | 
            +
                            logger.removeHandler(explain_handler)
         | 
| 485 | 
            +
                            logger.setLevel(previous_logger_level)
         | 
| 486 | 
            +
             | 
| 487 | 
            +
                        return CharsetMatches([probable_result])
         | 
| 488 | 
            +
             | 
| 489 | 
            +
                    if encoding_iana == sig_encoding:
         | 
| 490 | 
            +
                        logger.debug(
         | 
| 491 | 
            +
                            "Encoding detection: %s is most likely the one as we detected a BOM or SIG within "
         | 
| 492 | 
            +
                            "the beginning of the sequence.",
         | 
| 493 | 
            +
                            encoding_iana,
         | 
| 494 | 
            +
                        )
         | 
| 495 | 
            +
                        if explain:  # Defensive: ensure exit path clean handler
         | 
| 496 | 
            +
                            logger.removeHandler(explain_handler)
         | 
| 497 | 
            +
                            logger.setLevel(previous_logger_level)
         | 
| 498 | 
            +
                        return CharsetMatches([results[encoding_iana]])
         | 
| 499 | 
            +
             | 
| 500 | 
            +
                if len(results) == 0:
         | 
| 501 | 
            +
                    if fallback_u8 or fallback_ascii or fallback_specified:
         | 
| 502 | 
            +
                        logger.log(
         | 
| 503 | 
            +
                            TRACE,
         | 
| 504 | 
            +
                            "Nothing got out of the detection process. Using ASCII/UTF-8/Specified fallback.",
         | 
| 505 | 
            +
                        )
         | 
| 506 | 
            +
             | 
| 507 | 
            +
                    if fallback_specified:
         | 
| 508 | 
            +
                        logger.debug(
         | 
| 509 | 
            +
                            "Encoding detection: %s will be used as a fallback match",
         | 
| 510 | 
            +
                            fallback_specified.encoding,
         | 
| 511 | 
            +
                        )
         | 
| 512 | 
            +
                        results.append(fallback_specified)
         | 
| 513 | 
            +
                    elif (
         | 
| 514 | 
            +
                        (fallback_u8 and fallback_ascii is None)
         | 
| 515 | 
            +
                        or (
         | 
| 516 | 
            +
                            fallback_u8
         | 
| 517 | 
            +
                            and fallback_ascii
         | 
| 518 | 
            +
                            and fallback_u8.fingerprint != fallback_ascii.fingerprint
         | 
| 519 | 
            +
                        )
         | 
| 520 | 
            +
                        or (fallback_u8 is not None)
         | 
| 521 | 
            +
                    ):
         | 
| 522 | 
            +
                        logger.debug("Encoding detection: utf_8 will be used as a fallback match")
         | 
| 523 | 
            +
                        results.append(fallback_u8)
         | 
| 524 | 
            +
                    elif fallback_ascii:
         | 
| 525 | 
            +
                        logger.debug("Encoding detection: ascii will be used as a fallback match")
         | 
| 526 | 
            +
                        results.append(fallback_ascii)
         | 
| 527 | 
            +
             | 
| 528 | 
            +
                if results:
         | 
| 529 | 
            +
                    logger.debug(
         | 
| 530 | 
            +
                        "Encoding detection: Found %s as plausible (best-candidate) for content. With %i alternatives.",
         | 
| 531 | 
            +
                        results.best().encoding,  # type: ignore
         | 
| 532 | 
            +
                        len(results) - 1,
         | 
| 533 | 
            +
                    )
         | 
| 534 | 
            +
                else:
         | 
| 535 | 
            +
                    logger.debug("Encoding detection: Unable to determine any suitable charset.")
         | 
| 536 | 
            +
             | 
| 537 | 
            +
                if explain:
         | 
| 538 | 
            +
                    logger.removeHandler(explain_handler)
         | 
| 539 | 
            +
                    logger.setLevel(previous_logger_level)
         | 
| 540 | 
            +
             | 
| 541 | 
            +
                return results
         | 
| 542 | 
            +
             | 
| 543 | 
            +
             | 
| 544 | 
            +
            def from_fp(
         | 
| 545 | 
            +
                fp: BinaryIO,
         | 
| 546 | 
            +
                steps: int = 5,
         | 
| 547 | 
            +
                chunk_size: int = 512,
         | 
| 548 | 
            +
                threshold: float = 0.20,
         | 
| 549 | 
            +
                cp_isolation: list[str] | None = None,
         | 
| 550 | 
            +
                cp_exclusion: list[str] | None = None,
         | 
| 551 | 
            +
                preemptive_behaviour: bool = True,
         | 
| 552 | 
            +
                explain: bool = False,
         | 
| 553 | 
            +
                language_threshold: float = 0.1,
         | 
| 554 | 
            +
                enable_fallback: bool = True,
         | 
| 555 | 
            +
            ) -> CharsetMatches:
         | 
| 556 | 
            +
                """
         | 
| 557 | 
            +
                Same thing than the function from_bytes but using a file pointer that is already ready.
         | 
| 558 | 
            +
                Will not close the file pointer.
         | 
| 559 | 
            +
                """
         | 
| 560 | 
            +
                return from_bytes(
         | 
| 561 | 
            +
                    fp.read(),
         | 
| 562 | 
            +
                    steps,
         | 
| 563 | 
            +
                    chunk_size,
         | 
| 564 | 
            +
                    threshold,
         | 
| 565 | 
            +
                    cp_isolation,
         | 
| 566 | 
            +
                    cp_exclusion,
         | 
| 567 | 
            +
                    preemptive_behaviour,
         | 
| 568 | 
            +
                    explain,
         | 
| 569 | 
            +
                    language_threshold,
         | 
| 570 | 
            +
                    enable_fallback,
         | 
| 571 | 
            +
                )
         | 
| 572 | 
            +
             | 
| 573 | 
            +
             | 
| 574 | 
            +
            def from_path(
         | 
| 575 | 
            +
                path: str | bytes | PathLike,  # type: ignore[type-arg]
         | 
| 576 | 
            +
                steps: int = 5,
         | 
| 577 | 
            +
                chunk_size: int = 512,
         | 
| 578 | 
            +
                threshold: float = 0.20,
         | 
| 579 | 
            +
                cp_isolation: list[str] | None = None,
         | 
| 580 | 
            +
                cp_exclusion: list[str] | None = None,
         | 
| 581 | 
            +
                preemptive_behaviour: bool = True,
         | 
| 582 | 
            +
                explain: bool = False,
         | 
| 583 | 
            +
                language_threshold: float = 0.1,
         | 
| 584 | 
            +
                enable_fallback: bool = True,
         | 
| 585 | 
            +
            ) -> CharsetMatches:
         | 
| 586 | 
            +
                """
         | 
| 587 | 
            +
                Same thing than the function from_bytes but with one extra step. Opening and reading given file path in binary mode.
         | 
| 588 | 
            +
                Can raise IOError.
         | 
| 589 | 
            +
                """
         | 
| 590 | 
            +
                with open(path, "rb") as fp:
         | 
| 591 | 
            +
                    return from_fp(
         | 
| 592 | 
            +
                        fp,
         | 
| 593 | 
            +
                        steps,
         | 
| 594 | 
            +
                        chunk_size,
         | 
| 595 | 
            +
                        threshold,
         | 
| 596 | 
            +
                        cp_isolation,
         | 
| 597 | 
            +
                        cp_exclusion,
         | 
| 598 | 
            +
                        preemptive_behaviour,
         | 
| 599 | 
            +
                        explain,
         | 
| 600 | 
            +
                        language_threshold,
         | 
| 601 | 
            +
                        enable_fallback,
         | 
| 602 | 
            +
                    )
         | 
| 603 | 
            +
             | 
| 604 | 
            +
             | 
| 605 | 
            +
            def is_binary(
         | 
| 606 | 
            +
                fp_or_path_or_payload: PathLike | str | BinaryIO | bytes,  # type: ignore[type-arg]
         | 
| 607 | 
            +
                steps: int = 5,
         | 
| 608 | 
            +
                chunk_size: int = 512,
         | 
| 609 | 
            +
                threshold: float = 0.20,
         | 
| 610 | 
            +
                cp_isolation: list[str] | None = None,
         | 
| 611 | 
            +
                cp_exclusion: list[str] | None = None,
         | 
| 612 | 
            +
                preemptive_behaviour: bool = True,
         | 
| 613 | 
            +
                explain: bool = False,
         | 
| 614 | 
            +
                language_threshold: float = 0.1,
         | 
| 615 | 
            +
                enable_fallback: bool = False,
         | 
| 616 | 
            +
            ) -> bool:
         | 
| 617 | 
            +
                """
         | 
| 618 | 
            +
                Detect if the given input (file, bytes, or path) points to a binary file. aka. not a string.
         | 
| 619 | 
            +
                Based on the same main heuristic algorithms and default kwargs at the sole exception that fallbacks match
         | 
| 620 | 
            +
                are disabled to be stricter around ASCII-compatible but unlikely to be a string.
         | 
| 621 | 
            +
                """
         | 
| 622 | 
            +
                if isinstance(fp_or_path_or_payload, (str, PathLike)):
         | 
| 623 | 
            +
                    guesses = from_path(
         | 
| 624 | 
            +
                        fp_or_path_or_payload,
         | 
| 625 | 
            +
                        steps=steps,
         | 
| 626 | 
            +
                        chunk_size=chunk_size,
         | 
| 627 | 
            +
                        threshold=threshold,
         | 
| 628 | 
            +
                        cp_isolation=cp_isolation,
         | 
| 629 | 
            +
                        cp_exclusion=cp_exclusion,
         | 
| 630 | 
            +
                        preemptive_behaviour=preemptive_behaviour,
         | 
| 631 | 
            +
                        explain=explain,
         | 
| 632 | 
            +
                        language_threshold=language_threshold,
         | 
| 633 | 
            +
                        enable_fallback=enable_fallback,
         | 
| 634 | 
            +
                    )
         | 
| 635 | 
            +
                elif isinstance(
         | 
| 636 | 
            +
                    fp_or_path_or_payload,
         | 
| 637 | 
            +
                    (
         | 
| 638 | 
            +
                        bytes,
         | 
| 639 | 
            +
                        bytearray,
         | 
| 640 | 
            +
                    ),
         | 
| 641 | 
            +
                ):
         | 
| 642 | 
            +
                    guesses = from_bytes(
         | 
| 643 | 
            +
                        fp_or_path_or_payload,
         | 
| 644 | 
            +
                        steps=steps,
         | 
| 645 | 
            +
                        chunk_size=chunk_size,
         | 
| 646 | 
            +
                        threshold=threshold,
         | 
| 647 | 
            +
                        cp_isolation=cp_isolation,
         | 
| 648 | 
            +
                        cp_exclusion=cp_exclusion,
         | 
| 649 | 
            +
                        preemptive_behaviour=preemptive_behaviour,
         | 
| 650 | 
            +
                        explain=explain,
         | 
| 651 | 
            +
                        language_threshold=language_threshold,
         | 
| 652 | 
            +
                        enable_fallback=enable_fallback,
         | 
| 653 | 
            +
                    )
         | 
| 654 | 
            +
                else:
         | 
| 655 | 
            +
                    guesses = from_fp(
         | 
| 656 | 
            +
                        fp_or_path_or_payload,
         | 
| 657 | 
            +
                        steps=steps,
         | 
| 658 | 
            +
                        chunk_size=chunk_size,
         | 
| 659 | 
            +
                        threshold=threshold,
         | 
| 660 | 
            +
                        cp_isolation=cp_isolation,
         | 
| 661 | 
            +
                        cp_exclusion=cp_exclusion,
         | 
| 662 | 
            +
                        preemptive_behaviour=preemptive_behaviour,
         | 
| 663 | 
            +
                        explain=explain,
         | 
| 664 | 
            +
                        language_threshold=language_threshold,
         | 
| 665 | 
            +
                        enable_fallback=enable_fallback,
         | 
| 666 | 
            +
                    )
         | 
| 667 | 
            +
             | 
| 668 | 
            +
                return not guesses
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer/cd.py
    ADDED
    
    | @@ -0,0 +1,395 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from __future__ import annotations
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import importlib
         | 
| 4 | 
            +
            from codecs import IncrementalDecoder
         | 
| 5 | 
            +
            from collections import Counter
         | 
| 6 | 
            +
            from functools import lru_cache
         | 
| 7 | 
            +
            from typing import Counter as TypeCounter
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            from .constant import (
         | 
| 10 | 
            +
                FREQUENCIES,
         | 
| 11 | 
            +
                KO_NAMES,
         | 
| 12 | 
            +
                LANGUAGE_SUPPORTED_COUNT,
         | 
| 13 | 
            +
                TOO_SMALL_SEQUENCE,
         | 
| 14 | 
            +
                ZH_NAMES,
         | 
| 15 | 
            +
            )
         | 
| 16 | 
            +
            from .md import is_suspiciously_successive_range
         | 
| 17 | 
            +
            from .models import CoherenceMatches
         | 
| 18 | 
            +
            from .utils import (
         | 
| 19 | 
            +
                is_accentuated,
         | 
| 20 | 
            +
                is_latin,
         | 
| 21 | 
            +
                is_multi_byte_encoding,
         | 
| 22 | 
            +
                is_unicode_range_secondary,
         | 
| 23 | 
            +
                unicode_range,
         | 
| 24 | 
            +
            )
         | 
| 25 | 
            +
             | 
| 26 | 
            +
             | 
| 27 | 
            +
            def encoding_unicode_range(iana_name: str) -> list[str]:
         | 
| 28 | 
            +
                """
         | 
| 29 | 
            +
                Return associated unicode ranges in a single byte code page.
         | 
| 30 | 
            +
                """
         | 
| 31 | 
            +
                if is_multi_byte_encoding(iana_name):
         | 
| 32 | 
            +
                    raise OSError("Function not supported on multi-byte code page")
         | 
| 33 | 
            +
             | 
| 34 | 
            +
                decoder = importlib.import_module(f"encodings.{iana_name}").IncrementalDecoder
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                p: IncrementalDecoder = decoder(errors="ignore")
         | 
| 37 | 
            +
                seen_ranges: dict[str, int] = {}
         | 
| 38 | 
            +
                character_count: int = 0
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                for i in range(0x40, 0xFF):
         | 
| 41 | 
            +
                    chunk: str = p.decode(bytes([i]))
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                    if chunk:
         | 
| 44 | 
            +
                        character_range: str | None = unicode_range(chunk)
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                        if character_range is None:
         | 
| 47 | 
            +
                            continue
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                        if is_unicode_range_secondary(character_range) is False:
         | 
| 50 | 
            +
                            if character_range not in seen_ranges:
         | 
| 51 | 
            +
                                seen_ranges[character_range] = 0
         | 
| 52 | 
            +
                            seen_ranges[character_range] += 1
         | 
| 53 | 
            +
                            character_count += 1
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                return sorted(
         | 
| 56 | 
            +
                    [
         | 
| 57 | 
            +
                        character_range
         | 
| 58 | 
            +
                        for character_range in seen_ranges
         | 
| 59 | 
            +
                        if seen_ranges[character_range] / character_count >= 0.15
         | 
| 60 | 
            +
                    ]
         | 
| 61 | 
            +
                )
         | 
| 62 | 
            +
             | 
| 63 | 
            +
             | 
| 64 | 
            +
            def unicode_range_languages(primary_range: str) -> list[str]:
         | 
| 65 | 
            +
                """
         | 
| 66 | 
            +
                Return inferred languages used with a unicode range.
         | 
| 67 | 
            +
                """
         | 
| 68 | 
            +
                languages: list[str] = []
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                for language, characters in FREQUENCIES.items():
         | 
| 71 | 
            +
                    for character in characters:
         | 
| 72 | 
            +
                        if unicode_range(character) == primary_range:
         | 
| 73 | 
            +
                            languages.append(language)
         | 
| 74 | 
            +
                            break
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                return languages
         | 
| 77 | 
            +
             | 
| 78 | 
            +
             | 
| 79 | 
            +
            @lru_cache()
         | 
| 80 | 
            +
            def encoding_languages(iana_name: str) -> list[str]:
         | 
| 81 | 
            +
                """
         | 
| 82 | 
            +
                Single-byte encoding language association. Some code page are heavily linked to particular language(s).
         | 
| 83 | 
            +
                This function does the correspondence.
         | 
| 84 | 
            +
                """
         | 
| 85 | 
            +
                unicode_ranges: list[str] = encoding_unicode_range(iana_name)
         | 
| 86 | 
            +
                primary_range: str | None = None
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                for specified_range in unicode_ranges:
         | 
| 89 | 
            +
                    if "Latin" not in specified_range:
         | 
| 90 | 
            +
                        primary_range = specified_range
         | 
| 91 | 
            +
                        break
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                if primary_range is None:
         | 
| 94 | 
            +
                    return ["Latin Based"]
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                return unicode_range_languages(primary_range)
         | 
| 97 | 
            +
             | 
| 98 | 
            +
             | 
| 99 | 
            +
            @lru_cache()
         | 
| 100 | 
            +
            def mb_encoding_languages(iana_name: str) -> list[str]:
         | 
| 101 | 
            +
                """
         | 
| 102 | 
            +
                Multi-byte encoding language association. Some code page are heavily linked to particular language(s).
         | 
| 103 | 
            +
                This function does the correspondence.
         | 
| 104 | 
            +
                """
         | 
| 105 | 
            +
                if (
         | 
| 106 | 
            +
                    iana_name.startswith("shift_")
         | 
| 107 | 
            +
                    or iana_name.startswith("iso2022_jp")
         | 
| 108 | 
            +
                    or iana_name.startswith("euc_j")
         | 
| 109 | 
            +
                    or iana_name == "cp932"
         | 
| 110 | 
            +
                ):
         | 
| 111 | 
            +
                    return ["Japanese"]
         | 
| 112 | 
            +
                if iana_name.startswith("gb") or iana_name in ZH_NAMES:
         | 
| 113 | 
            +
                    return ["Chinese"]
         | 
| 114 | 
            +
                if iana_name.startswith("iso2022_kr") or iana_name in KO_NAMES:
         | 
| 115 | 
            +
                    return ["Korean"]
         | 
| 116 | 
            +
             | 
| 117 | 
            +
                return []
         | 
| 118 | 
            +
             | 
| 119 | 
            +
             | 
| 120 | 
            +
            @lru_cache(maxsize=LANGUAGE_SUPPORTED_COUNT)
         | 
| 121 | 
            +
            def get_target_features(language: str) -> tuple[bool, bool]:
         | 
| 122 | 
            +
                """
         | 
| 123 | 
            +
                Determine main aspects from a supported language if it contains accents and if is pure Latin.
         | 
| 124 | 
            +
                """
         | 
| 125 | 
            +
                target_have_accents: bool = False
         | 
| 126 | 
            +
                target_pure_latin: bool = True
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                for character in FREQUENCIES[language]:
         | 
| 129 | 
            +
                    if not target_have_accents and is_accentuated(character):
         | 
| 130 | 
            +
                        target_have_accents = True
         | 
| 131 | 
            +
                    if target_pure_latin and is_latin(character) is False:
         | 
| 132 | 
            +
                        target_pure_latin = False
         | 
| 133 | 
            +
             | 
| 134 | 
            +
                return target_have_accents, target_pure_latin
         | 
| 135 | 
            +
             | 
| 136 | 
            +
             | 
| 137 | 
            +
            def alphabet_languages(
         | 
| 138 | 
            +
                characters: list[str], ignore_non_latin: bool = False
         | 
| 139 | 
            +
            ) -> list[str]:
         | 
| 140 | 
            +
                """
         | 
| 141 | 
            +
                Return associated languages associated to given characters.
         | 
| 142 | 
            +
                """
         | 
| 143 | 
            +
                languages: list[tuple[str, float]] = []
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                source_have_accents = any(is_accentuated(character) for character in characters)
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                for language, language_characters in FREQUENCIES.items():
         | 
| 148 | 
            +
                    target_have_accents, target_pure_latin = get_target_features(language)
         | 
| 149 | 
            +
             | 
| 150 | 
            +
                    if ignore_non_latin and target_pure_latin is False:
         | 
| 151 | 
            +
                        continue
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                    if target_have_accents is False and source_have_accents:
         | 
| 154 | 
            +
                        continue
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                    character_count: int = len(language_characters)
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                    character_match_count: int = len(
         | 
| 159 | 
            +
                        [c for c in language_characters if c in characters]
         | 
| 160 | 
            +
                    )
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                    ratio: float = character_match_count / character_count
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                    if ratio >= 0.2:
         | 
| 165 | 
            +
                        languages.append((language, ratio))
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                languages = sorted(languages, key=lambda x: x[1], reverse=True)
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                return [compatible_language[0] for compatible_language in languages]
         | 
| 170 | 
            +
             | 
| 171 | 
            +
             | 
| 172 | 
            +
            def characters_popularity_compare(
         | 
| 173 | 
            +
                language: str, ordered_characters: list[str]
         | 
| 174 | 
            +
            ) -> float:
         | 
| 175 | 
            +
                """
         | 
| 176 | 
            +
                Determine if a ordered characters list (by occurrence from most appearance to rarest) match a particular language.
         | 
| 177 | 
            +
                The result is a ratio between 0. (absolutely no correspondence) and 1. (near perfect fit).
         | 
| 178 | 
            +
                Beware that is function is not strict on the match in order to ease the detection. (Meaning close match is 1.)
         | 
| 179 | 
            +
                """
         | 
| 180 | 
            +
                if language not in FREQUENCIES:
         | 
| 181 | 
            +
                    raise ValueError(f"{language} not available")
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                character_approved_count: int = 0
         | 
| 184 | 
            +
                FREQUENCIES_language_set = set(FREQUENCIES[language])
         | 
| 185 | 
            +
             | 
| 186 | 
            +
                ordered_characters_count: int = len(ordered_characters)
         | 
| 187 | 
            +
                target_language_characters_count: int = len(FREQUENCIES[language])
         | 
| 188 | 
            +
             | 
| 189 | 
            +
                large_alphabet: bool = target_language_characters_count > 26
         | 
| 190 | 
            +
             | 
| 191 | 
            +
                for character, character_rank in zip(
         | 
| 192 | 
            +
                    ordered_characters, range(0, ordered_characters_count)
         | 
| 193 | 
            +
                ):
         | 
| 194 | 
            +
                    if character not in FREQUENCIES_language_set:
         | 
| 195 | 
            +
                        continue
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                    character_rank_in_language: int = FREQUENCIES[language].index(character)
         | 
| 198 | 
            +
                    expected_projection_ratio: float = (
         | 
| 199 | 
            +
                        target_language_characters_count / ordered_characters_count
         | 
| 200 | 
            +
                    )
         | 
| 201 | 
            +
                    character_rank_projection: int = int(character_rank * expected_projection_ratio)
         | 
| 202 | 
            +
             | 
| 203 | 
            +
                    if (
         | 
| 204 | 
            +
                        large_alphabet is False
         | 
| 205 | 
            +
                        and abs(character_rank_projection - character_rank_in_language) > 4
         | 
| 206 | 
            +
                    ):
         | 
| 207 | 
            +
                        continue
         | 
| 208 | 
            +
             | 
| 209 | 
            +
                    if (
         | 
| 210 | 
            +
                        large_alphabet is True
         | 
| 211 | 
            +
                        and abs(character_rank_projection - character_rank_in_language)
         | 
| 212 | 
            +
                        < target_language_characters_count / 3
         | 
| 213 | 
            +
                    ):
         | 
| 214 | 
            +
                        character_approved_count += 1
         | 
| 215 | 
            +
                        continue
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                    characters_before_source: list[str] = FREQUENCIES[language][
         | 
| 218 | 
            +
                        0:character_rank_in_language
         | 
| 219 | 
            +
                    ]
         | 
| 220 | 
            +
                    characters_after_source: list[str] = FREQUENCIES[language][
         | 
| 221 | 
            +
                        character_rank_in_language:
         | 
| 222 | 
            +
                    ]
         | 
| 223 | 
            +
                    characters_before: list[str] = ordered_characters[0:character_rank]
         | 
| 224 | 
            +
                    characters_after: list[str] = ordered_characters[character_rank:]
         | 
| 225 | 
            +
             | 
| 226 | 
            +
                    before_match_count: int = len(
         | 
| 227 | 
            +
                        set(characters_before) & set(characters_before_source)
         | 
| 228 | 
            +
                    )
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                    after_match_count: int = len(
         | 
| 231 | 
            +
                        set(characters_after) & set(characters_after_source)
         | 
| 232 | 
            +
                    )
         | 
| 233 | 
            +
             | 
| 234 | 
            +
                    if len(characters_before_source) == 0 and before_match_count <= 4:
         | 
| 235 | 
            +
                        character_approved_count += 1
         | 
| 236 | 
            +
                        continue
         | 
| 237 | 
            +
             | 
| 238 | 
            +
                    if len(characters_after_source) == 0 and after_match_count <= 4:
         | 
| 239 | 
            +
                        character_approved_count += 1
         | 
| 240 | 
            +
                        continue
         | 
| 241 | 
            +
             | 
| 242 | 
            +
                    if (
         | 
| 243 | 
            +
                        before_match_count / len(characters_before_source) >= 0.4
         | 
| 244 | 
            +
                        or after_match_count / len(characters_after_source) >= 0.4
         | 
| 245 | 
            +
                    ):
         | 
| 246 | 
            +
                        character_approved_count += 1
         | 
| 247 | 
            +
                        continue
         | 
| 248 | 
            +
             | 
| 249 | 
            +
                return character_approved_count / len(ordered_characters)
         | 
| 250 | 
            +
             | 
| 251 | 
            +
             | 
| 252 | 
            +
            def alpha_unicode_split(decoded_sequence: str) -> list[str]:
         | 
| 253 | 
            +
                """
         | 
| 254 | 
            +
                Given a decoded text sequence, return a list of str. Unicode range / alphabet separation.
         | 
| 255 | 
            +
                Ex. a text containing English/Latin with a bit a Hebrew will return two items in the resulting list;
         | 
| 256 | 
            +
                One containing the latin letters and the other hebrew.
         | 
| 257 | 
            +
                """
         | 
| 258 | 
            +
                layers: dict[str, str] = {}
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                for character in decoded_sequence:
         | 
| 261 | 
            +
                    if character.isalpha() is False:
         | 
| 262 | 
            +
                        continue
         | 
| 263 | 
            +
             | 
| 264 | 
            +
                    character_range: str | None = unicode_range(character)
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                    if character_range is None:
         | 
| 267 | 
            +
                        continue
         | 
| 268 | 
            +
             | 
| 269 | 
            +
                    layer_target_range: str | None = None
         | 
| 270 | 
            +
             | 
| 271 | 
            +
                    for discovered_range in layers:
         | 
| 272 | 
            +
                        if (
         | 
| 273 | 
            +
                            is_suspiciously_successive_range(discovered_range, character_range)
         | 
| 274 | 
            +
                            is False
         | 
| 275 | 
            +
                        ):
         | 
| 276 | 
            +
                            layer_target_range = discovered_range
         | 
| 277 | 
            +
                            break
         | 
| 278 | 
            +
             | 
| 279 | 
            +
                    if layer_target_range is None:
         | 
| 280 | 
            +
                        layer_target_range = character_range
         | 
| 281 | 
            +
             | 
| 282 | 
            +
                    if layer_target_range not in layers:
         | 
| 283 | 
            +
                        layers[layer_target_range] = character.lower()
         | 
| 284 | 
            +
                        continue
         | 
| 285 | 
            +
             | 
| 286 | 
            +
                    layers[layer_target_range] += character.lower()
         | 
| 287 | 
            +
             | 
| 288 | 
            +
                return list(layers.values())
         | 
| 289 | 
            +
             | 
| 290 | 
            +
             | 
| 291 | 
            +
            def merge_coherence_ratios(results: list[CoherenceMatches]) -> CoherenceMatches:
         | 
| 292 | 
            +
                """
         | 
| 293 | 
            +
                This function merge results previously given by the function coherence_ratio.
         | 
| 294 | 
            +
                The return type is the same as coherence_ratio.
         | 
| 295 | 
            +
                """
         | 
| 296 | 
            +
                per_language_ratios: dict[str, list[float]] = {}
         | 
| 297 | 
            +
                for result in results:
         | 
| 298 | 
            +
                    for sub_result in result:
         | 
| 299 | 
            +
                        language, ratio = sub_result
         | 
| 300 | 
            +
                        if language not in per_language_ratios:
         | 
| 301 | 
            +
                            per_language_ratios[language] = [ratio]
         | 
| 302 | 
            +
                            continue
         | 
| 303 | 
            +
                        per_language_ratios[language].append(ratio)
         | 
| 304 | 
            +
             | 
| 305 | 
            +
                merge = [
         | 
| 306 | 
            +
                    (
         | 
| 307 | 
            +
                        language,
         | 
| 308 | 
            +
                        round(
         | 
| 309 | 
            +
                            sum(per_language_ratios[language]) / len(per_language_ratios[language]),
         | 
| 310 | 
            +
                            4,
         | 
| 311 | 
            +
                        ),
         | 
| 312 | 
            +
                    )
         | 
| 313 | 
            +
                    for language in per_language_ratios
         | 
| 314 | 
            +
                ]
         | 
| 315 | 
            +
             | 
| 316 | 
            +
                return sorted(merge, key=lambda x: x[1], reverse=True)
         | 
| 317 | 
            +
             | 
| 318 | 
            +
             | 
| 319 | 
            +
            def filter_alt_coherence_matches(results: CoherenceMatches) -> CoherenceMatches:
         | 
| 320 | 
            +
                """
         | 
| 321 | 
            +
                We shall NOT return "English—" in CoherenceMatches because it is an alternative
         | 
| 322 | 
            +
                of "English". This function only keeps the best match and remove the em-dash in it.
         | 
| 323 | 
            +
                """
         | 
| 324 | 
            +
                index_results: dict[str, list[float]] = dict()
         | 
| 325 | 
            +
             | 
| 326 | 
            +
                for result in results:
         | 
| 327 | 
            +
                    language, ratio = result
         | 
| 328 | 
            +
                    no_em_name: str = language.replace("—", "")
         | 
| 329 | 
            +
             | 
| 330 | 
            +
                    if no_em_name not in index_results:
         | 
| 331 | 
            +
                        index_results[no_em_name] = []
         | 
| 332 | 
            +
             | 
| 333 | 
            +
                    index_results[no_em_name].append(ratio)
         | 
| 334 | 
            +
             | 
| 335 | 
            +
                if any(len(index_results[e]) > 1 for e in index_results):
         | 
| 336 | 
            +
                    filtered_results: CoherenceMatches = []
         | 
| 337 | 
            +
             | 
| 338 | 
            +
                    for language in index_results:
         | 
| 339 | 
            +
                        filtered_results.append((language, max(index_results[language])))
         | 
| 340 | 
            +
             | 
| 341 | 
            +
                    return filtered_results
         | 
| 342 | 
            +
             | 
| 343 | 
            +
                return results
         | 
| 344 | 
            +
             | 
| 345 | 
            +
             | 
| 346 | 
            +
            @lru_cache(maxsize=2048)
         | 
| 347 | 
            +
            def coherence_ratio(
         | 
| 348 | 
            +
                decoded_sequence: str, threshold: float = 0.1, lg_inclusion: str | None = None
         | 
| 349 | 
            +
            ) -> CoherenceMatches:
         | 
| 350 | 
            +
                """
         | 
| 351 | 
            +
                Detect ANY language that can be identified in given sequence. The sequence will be analysed by layers.
         | 
| 352 | 
            +
                A layer = Character extraction by alphabets/ranges.
         | 
| 353 | 
            +
                """
         | 
| 354 | 
            +
             | 
| 355 | 
            +
                results: list[tuple[str, float]] = []
         | 
| 356 | 
            +
                ignore_non_latin: bool = False
         | 
| 357 | 
            +
             | 
| 358 | 
            +
                sufficient_match_count: int = 0
         | 
| 359 | 
            +
             | 
| 360 | 
            +
                lg_inclusion_list = lg_inclusion.split(",") if lg_inclusion is not None else []
         | 
| 361 | 
            +
                if "Latin Based" in lg_inclusion_list:
         | 
| 362 | 
            +
                    ignore_non_latin = True
         | 
| 363 | 
            +
                    lg_inclusion_list.remove("Latin Based")
         | 
| 364 | 
            +
             | 
| 365 | 
            +
                for layer in alpha_unicode_split(decoded_sequence):
         | 
| 366 | 
            +
                    sequence_frequencies: TypeCounter[str] = Counter(layer)
         | 
| 367 | 
            +
                    most_common = sequence_frequencies.most_common()
         | 
| 368 | 
            +
             | 
| 369 | 
            +
                    character_count: int = sum(o for c, o in most_common)
         | 
| 370 | 
            +
             | 
| 371 | 
            +
                    if character_count <= TOO_SMALL_SEQUENCE:
         | 
| 372 | 
            +
                        continue
         | 
| 373 | 
            +
             | 
| 374 | 
            +
                    popular_character_ordered: list[str] = [c for c, o in most_common]
         | 
| 375 | 
            +
             | 
| 376 | 
            +
                    for language in lg_inclusion_list or alphabet_languages(
         | 
| 377 | 
            +
                        popular_character_ordered, ignore_non_latin
         | 
| 378 | 
            +
                    ):
         | 
| 379 | 
            +
                        ratio: float = characters_popularity_compare(
         | 
| 380 | 
            +
                            language, popular_character_ordered
         | 
| 381 | 
            +
                        )
         | 
| 382 | 
            +
             | 
| 383 | 
            +
                        if ratio < threshold:
         | 
| 384 | 
            +
                            continue
         | 
| 385 | 
            +
                        elif ratio >= 0.8:
         | 
| 386 | 
            +
                            sufficient_match_count += 1
         | 
| 387 | 
            +
             | 
| 388 | 
            +
                        results.append((language, round(ratio, 4)))
         | 
| 389 | 
            +
             | 
| 390 | 
            +
                        if sufficient_match_count >= 3:
         | 
| 391 | 
            +
                            break
         | 
| 392 | 
            +
             | 
| 393 | 
            +
                return sorted(
         | 
| 394 | 
            +
                    filter_alt_coherence_matches(results), key=lambda x: x[1], reverse=True
         | 
| 395 | 
            +
                )
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer/cli/__init__.py
    ADDED
    
    | @@ -0,0 +1,8 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from __future__ import annotations
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            from .__main__ import cli_detect, query_yes_no
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            __all__ = (
         | 
| 6 | 
            +
                "cli_detect",
         | 
| 7 | 
            +
                "query_yes_no",
         | 
| 8 | 
            +
            )
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer/cli/__main__.py
    ADDED
    
    | @@ -0,0 +1,321 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from __future__ import annotations
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            import argparse
         | 
| 4 | 
            +
            import sys
         | 
| 5 | 
            +
            from json import dumps
         | 
| 6 | 
            +
            from os.path import abspath, basename, dirname, join, realpath
         | 
| 7 | 
            +
            from platform import python_version
         | 
| 8 | 
            +
            from unicodedata import unidata_version
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            import charset_normalizer.md as md_module
         | 
| 11 | 
            +
            from charset_normalizer import from_fp
         | 
| 12 | 
            +
            from charset_normalizer.models import CliDetectionResult
         | 
| 13 | 
            +
            from charset_normalizer.version import __version__
         | 
| 14 | 
            +
             | 
| 15 | 
            +
             | 
| 16 | 
            +
            def query_yes_no(question: str, default: str = "yes") -> bool:
         | 
| 17 | 
            +
                """Ask a yes/no question via input() and return their answer.
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                "question" is a string that is presented to the user.
         | 
| 20 | 
            +
                "default" is the presumed answer if the user just hits <Enter>.
         | 
| 21 | 
            +
                    It must be "yes" (the default), "no" or None (meaning
         | 
| 22 | 
            +
                    an answer is required of the user).
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                The "answer" return value is True for "yes" or False for "no".
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                Credit goes to (c) https://stackoverflow.com/questions/3041986/apt-command-line-interface-like-yes-no-input
         | 
| 27 | 
            +
                """
         | 
| 28 | 
            +
                valid = {"yes": True, "y": True, "ye": True, "no": False, "n": False}
         | 
| 29 | 
            +
                if default is None:
         | 
| 30 | 
            +
                    prompt = " [y/n] "
         | 
| 31 | 
            +
                elif default == "yes":
         | 
| 32 | 
            +
                    prompt = " [Y/n] "
         | 
| 33 | 
            +
                elif default == "no":
         | 
| 34 | 
            +
                    prompt = " [y/N] "
         | 
| 35 | 
            +
                else:
         | 
| 36 | 
            +
                    raise ValueError("invalid default answer: '%s'" % default)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                while True:
         | 
| 39 | 
            +
                    sys.stdout.write(question + prompt)
         | 
| 40 | 
            +
                    choice = input().lower()
         | 
| 41 | 
            +
                    if default is not None and choice == "":
         | 
| 42 | 
            +
                        return valid[default]
         | 
| 43 | 
            +
                    elif choice in valid:
         | 
| 44 | 
            +
                        return valid[choice]
         | 
| 45 | 
            +
                    else:
         | 
| 46 | 
            +
                        sys.stdout.write("Please respond with 'yes' or 'no' " "(or 'y' or 'n').\n")
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            def cli_detect(argv: list[str] | None = None) -> int:
         | 
| 50 | 
            +
                """
         | 
| 51 | 
            +
                CLI assistant using ARGV and ArgumentParser
         | 
| 52 | 
            +
                :param argv:
         | 
| 53 | 
            +
                :return: 0 if everything is fine, anything else equal trouble
         | 
| 54 | 
            +
                """
         | 
| 55 | 
            +
                parser = argparse.ArgumentParser(
         | 
| 56 | 
            +
                    description="The Real First Universal Charset Detector. "
         | 
| 57 | 
            +
                    "Discover originating encoding used on text file. "
         | 
| 58 | 
            +
                    "Normalize text to unicode."
         | 
| 59 | 
            +
                )
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                parser.add_argument(
         | 
| 62 | 
            +
                    "files", type=argparse.FileType("rb"), nargs="+", help="File(s) to be analysed"
         | 
| 63 | 
            +
                )
         | 
| 64 | 
            +
                parser.add_argument(
         | 
| 65 | 
            +
                    "-v",
         | 
| 66 | 
            +
                    "--verbose",
         | 
| 67 | 
            +
                    action="store_true",
         | 
| 68 | 
            +
                    default=False,
         | 
| 69 | 
            +
                    dest="verbose",
         | 
| 70 | 
            +
                    help="Display complementary information about file if any. "
         | 
| 71 | 
            +
                    "Stdout will contain logs about the detection process.",
         | 
| 72 | 
            +
                )
         | 
| 73 | 
            +
                parser.add_argument(
         | 
| 74 | 
            +
                    "-a",
         | 
| 75 | 
            +
                    "--with-alternative",
         | 
| 76 | 
            +
                    action="store_true",
         | 
| 77 | 
            +
                    default=False,
         | 
| 78 | 
            +
                    dest="alternatives",
         | 
| 79 | 
            +
                    help="Output complementary possibilities if any. Top-level JSON WILL be a list.",
         | 
| 80 | 
            +
                )
         | 
| 81 | 
            +
                parser.add_argument(
         | 
| 82 | 
            +
                    "-n",
         | 
| 83 | 
            +
                    "--normalize",
         | 
| 84 | 
            +
                    action="store_true",
         | 
| 85 | 
            +
                    default=False,
         | 
| 86 | 
            +
                    dest="normalize",
         | 
| 87 | 
            +
                    help="Permit to normalize input file. If not set, program does not write anything.",
         | 
| 88 | 
            +
                )
         | 
| 89 | 
            +
                parser.add_argument(
         | 
| 90 | 
            +
                    "-m",
         | 
| 91 | 
            +
                    "--minimal",
         | 
| 92 | 
            +
                    action="store_true",
         | 
| 93 | 
            +
                    default=False,
         | 
| 94 | 
            +
                    dest="minimal",
         | 
| 95 | 
            +
                    help="Only output the charset detected to STDOUT. Disabling JSON output.",
         | 
| 96 | 
            +
                )
         | 
| 97 | 
            +
                parser.add_argument(
         | 
| 98 | 
            +
                    "-r",
         | 
| 99 | 
            +
                    "--replace",
         | 
| 100 | 
            +
                    action="store_true",
         | 
| 101 | 
            +
                    default=False,
         | 
| 102 | 
            +
                    dest="replace",
         | 
| 103 | 
            +
                    help="Replace file when trying to normalize it instead of creating a new one.",
         | 
| 104 | 
            +
                )
         | 
| 105 | 
            +
                parser.add_argument(
         | 
| 106 | 
            +
                    "-f",
         | 
| 107 | 
            +
                    "--force",
         | 
| 108 | 
            +
                    action="store_true",
         | 
| 109 | 
            +
                    default=False,
         | 
| 110 | 
            +
                    dest="force",
         | 
| 111 | 
            +
                    help="Replace file without asking if you are sure, use this flag with caution.",
         | 
| 112 | 
            +
                )
         | 
| 113 | 
            +
                parser.add_argument(
         | 
| 114 | 
            +
                    "-i",
         | 
| 115 | 
            +
                    "--no-preemptive",
         | 
| 116 | 
            +
                    action="store_true",
         | 
| 117 | 
            +
                    default=False,
         | 
| 118 | 
            +
                    dest="no_preemptive",
         | 
| 119 | 
            +
                    help="Disable looking at a charset declaration to hint the detector.",
         | 
| 120 | 
            +
                )
         | 
| 121 | 
            +
                parser.add_argument(
         | 
| 122 | 
            +
                    "-t",
         | 
| 123 | 
            +
                    "--threshold",
         | 
| 124 | 
            +
                    action="store",
         | 
| 125 | 
            +
                    default=0.2,
         | 
| 126 | 
            +
                    type=float,
         | 
| 127 | 
            +
                    dest="threshold",
         | 
| 128 | 
            +
                    help="Define a custom maximum amount of noise allowed in decoded content. 0. <= noise <= 1.",
         | 
| 129 | 
            +
                )
         | 
| 130 | 
            +
                parser.add_argument(
         | 
| 131 | 
            +
                    "--version",
         | 
| 132 | 
            +
                    action="version",
         | 
| 133 | 
            +
                    version="Charset-Normalizer {} - Python {} - Unicode {} - SpeedUp {}".format(
         | 
| 134 | 
            +
                        __version__,
         | 
| 135 | 
            +
                        python_version(),
         | 
| 136 | 
            +
                        unidata_version,
         | 
| 137 | 
            +
                        "OFF" if md_module.__file__.lower().endswith(".py") else "ON",
         | 
| 138 | 
            +
                    ),
         | 
| 139 | 
            +
                    help="Show version information and exit.",
         | 
| 140 | 
            +
                )
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                args = parser.parse_args(argv)
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                if args.replace is True and args.normalize is False:
         | 
| 145 | 
            +
                    if args.files:
         | 
| 146 | 
            +
                        for my_file in args.files:
         | 
| 147 | 
            +
                            my_file.close()
         | 
| 148 | 
            +
                    print("Use --replace in addition of --normalize only.", file=sys.stderr)
         | 
| 149 | 
            +
                    return 1
         | 
| 150 | 
            +
             | 
| 151 | 
            +
                if args.force is True and args.replace is False:
         | 
| 152 | 
            +
                    if args.files:
         | 
| 153 | 
            +
                        for my_file in args.files:
         | 
| 154 | 
            +
                            my_file.close()
         | 
| 155 | 
            +
                    print("Use --force in addition of --replace only.", file=sys.stderr)
         | 
| 156 | 
            +
                    return 1
         | 
| 157 | 
            +
             | 
| 158 | 
            +
                if args.threshold < 0.0 or args.threshold > 1.0:
         | 
| 159 | 
            +
                    if args.files:
         | 
| 160 | 
            +
                        for my_file in args.files:
         | 
| 161 | 
            +
                            my_file.close()
         | 
| 162 | 
            +
                    print("--threshold VALUE should be between 0. AND 1.", file=sys.stderr)
         | 
| 163 | 
            +
                    return 1
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                x_ = []
         | 
| 166 | 
            +
             | 
| 167 | 
            +
                for my_file in args.files:
         | 
| 168 | 
            +
                    matches = from_fp(
         | 
| 169 | 
            +
                        my_file,
         | 
| 170 | 
            +
                        threshold=args.threshold,
         | 
| 171 | 
            +
                        explain=args.verbose,
         | 
| 172 | 
            +
                        preemptive_behaviour=args.no_preemptive is False,
         | 
| 173 | 
            +
                    )
         | 
| 174 | 
            +
             | 
| 175 | 
            +
                    best_guess = matches.best()
         | 
| 176 | 
            +
             | 
| 177 | 
            +
                    if best_guess is None:
         | 
| 178 | 
            +
                        print(
         | 
| 179 | 
            +
                            'Unable to identify originating encoding for "{}". {}'.format(
         | 
| 180 | 
            +
                                my_file.name,
         | 
| 181 | 
            +
                                (
         | 
| 182 | 
            +
                                    "Maybe try increasing maximum amount of chaos."
         | 
| 183 | 
            +
                                    if args.threshold < 1.0
         | 
| 184 | 
            +
                                    else ""
         | 
| 185 | 
            +
                                ),
         | 
| 186 | 
            +
                            ),
         | 
| 187 | 
            +
                            file=sys.stderr,
         | 
| 188 | 
            +
                        )
         | 
| 189 | 
            +
                        x_.append(
         | 
| 190 | 
            +
                            CliDetectionResult(
         | 
| 191 | 
            +
                                abspath(my_file.name),
         | 
| 192 | 
            +
                                None,
         | 
| 193 | 
            +
                                [],
         | 
| 194 | 
            +
                                [],
         | 
| 195 | 
            +
                                "Unknown",
         | 
| 196 | 
            +
                                [],
         | 
| 197 | 
            +
                                False,
         | 
| 198 | 
            +
                                1.0,
         | 
| 199 | 
            +
                                0.0,
         | 
| 200 | 
            +
                                None,
         | 
| 201 | 
            +
                                True,
         | 
| 202 | 
            +
                            )
         | 
| 203 | 
            +
                        )
         | 
| 204 | 
            +
                    else:
         | 
| 205 | 
            +
                        x_.append(
         | 
| 206 | 
            +
                            CliDetectionResult(
         | 
| 207 | 
            +
                                abspath(my_file.name),
         | 
| 208 | 
            +
                                best_guess.encoding,
         | 
| 209 | 
            +
                                best_guess.encoding_aliases,
         | 
| 210 | 
            +
                                [
         | 
| 211 | 
            +
                                    cp
         | 
| 212 | 
            +
                                    for cp in best_guess.could_be_from_charset
         | 
| 213 | 
            +
                                    if cp != best_guess.encoding
         | 
| 214 | 
            +
                                ],
         | 
| 215 | 
            +
                                best_guess.language,
         | 
| 216 | 
            +
                                best_guess.alphabets,
         | 
| 217 | 
            +
                                best_guess.bom,
         | 
| 218 | 
            +
                                best_guess.percent_chaos,
         | 
| 219 | 
            +
                                best_guess.percent_coherence,
         | 
| 220 | 
            +
                                None,
         | 
| 221 | 
            +
                                True,
         | 
| 222 | 
            +
                            )
         | 
| 223 | 
            +
                        )
         | 
| 224 | 
            +
             | 
| 225 | 
            +
                        if len(matches) > 1 and args.alternatives:
         | 
| 226 | 
            +
                            for el in matches:
         | 
| 227 | 
            +
                                if el != best_guess:
         | 
| 228 | 
            +
                                    x_.append(
         | 
| 229 | 
            +
                                        CliDetectionResult(
         | 
| 230 | 
            +
                                            abspath(my_file.name),
         | 
| 231 | 
            +
                                            el.encoding,
         | 
| 232 | 
            +
                                            el.encoding_aliases,
         | 
| 233 | 
            +
                                            [
         | 
| 234 | 
            +
                                                cp
         | 
| 235 | 
            +
                                                for cp in el.could_be_from_charset
         | 
| 236 | 
            +
                                                if cp != el.encoding
         | 
| 237 | 
            +
                                            ],
         | 
| 238 | 
            +
                                            el.language,
         | 
| 239 | 
            +
                                            el.alphabets,
         | 
| 240 | 
            +
                                            el.bom,
         | 
| 241 | 
            +
                                            el.percent_chaos,
         | 
| 242 | 
            +
                                            el.percent_coherence,
         | 
| 243 | 
            +
                                            None,
         | 
| 244 | 
            +
                                            False,
         | 
| 245 | 
            +
                                        )
         | 
| 246 | 
            +
                                    )
         | 
| 247 | 
            +
             | 
| 248 | 
            +
                        if args.normalize is True:
         | 
| 249 | 
            +
                            if best_guess.encoding.startswith("utf") is True:
         | 
| 250 | 
            +
                                print(
         | 
| 251 | 
            +
                                    '"{}" file does not need to be normalized, as it already came from unicode.'.format(
         | 
| 252 | 
            +
                                        my_file.name
         | 
| 253 | 
            +
                                    ),
         | 
| 254 | 
            +
                                    file=sys.stderr,
         | 
| 255 | 
            +
                                )
         | 
| 256 | 
            +
                                if my_file.closed is False:
         | 
| 257 | 
            +
                                    my_file.close()
         | 
| 258 | 
            +
                                continue
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                            dir_path = dirname(realpath(my_file.name))
         | 
| 261 | 
            +
                            file_name = basename(realpath(my_file.name))
         | 
| 262 | 
            +
             | 
| 263 | 
            +
                            o_: list[str] = file_name.split(".")
         | 
| 264 | 
            +
             | 
| 265 | 
            +
                            if args.replace is False:
         | 
| 266 | 
            +
                                o_.insert(-1, best_guess.encoding)
         | 
| 267 | 
            +
                                if my_file.closed is False:
         | 
| 268 | 
            +
                                    my_file.close()
         | 
| 269 | 
            +
                            elif (
         | 
| 270 | 
            +
                                args.force is False
         | 
| 271 | 
            +
                                and query_yes_no(
         | 
| 272 | 
            +
                                    'Are you sure to normalize "{}" by replacing it ?'.format(
         | 
| 273 | 
            +
                                        my_file.name
         | 
| 274 | 
            +
                                    ),
         | 
| 275 | 
            +
                                    "no",
         | 
| 276 | 
            +
                                )
         | 
| 277 | 
            +
                                is False
         | 
| 278 | 
            +
                            ):
         | 
| 279 | 
            +
                                if my_file.closed is False:
         | 
| 280 | 
            +
                                    my_file.close()
         | 
| 281 | 
            +
                                continue
         | 
| 282 | 
            +
             | 
| 283 | 
            +
                            try:
         | 
| 284 | 
            +
                                x_[0].unicode_path = join(dir_path, ".".join(o_))
         | 
| 285 | 
            +
             | 
| 286 | 
            +
                                with open(x_[0].unicode_path, "wb") as fp:
         | 
| 287 | 
            +
                                    fp.write(best_guess.output())
         | 
| 288 | 
            +
                            except OSError as e:
         | 
| 289 | 
            +
                                print(str(e), file=sys.stderr)
         | 
| 290 | 
            +
                                if my_file.closed is False:
         | 
| 291 | 
            +
                                    my_file.close()
         | 
| 292 | 
            +
                                return 2
         | 
| 293 | 
            +
             | 
| 294 | 
            +
                    if my_file.closed is False:
         | 
| 295 | 
            +
                        my_file.close()
         | 
| 296 | 
            +
             | 
| 297 | 
            +
                if args.minimal is False:
         | 
| 298 | 
            +
                    print(
         | 
| 299 | 
            +
                        dumps(
         | 
| 300 | 
            +
                            [el.__dict__ for el in x_] if len(x_) > 1 else x_[0].__dict__,
         | 
| 301 | 
            +
                            ensure_ascii=True,
         | 
| 302 | 
            +
                            indent=4,
         | 
| 303 | 
            +
                        )
         | 
| 304 | 
            +
                    )
         | 
| 305 | 
            +
                else:
         | 
| 306 | 
            +
                    for my_file in args.files:
         | 
| 307 | 
            +
                        print(
         | 
| 308 | 
            +
                            ", ".join(
         | 
| 309 | 
            +
                                [
         | 
| 310 | 
            +
                                    el.encoding or "undefined"
         | 
| 311 | 
            +
                                    for el in x_
         | 
| 312 | 
            +
                                    if el.path == abspath(my_file.name)
         | 
| 313 | 
            +
                                ]
         | 
| 314 | 
            +
                            )
         | 
| 315 | 
            +
                        )
         | 
| 316 | 
            +
             | 
| 317 | 
            +
                return 0
         | 
| 318 | 
            +
             | 
| 319 | 
            +
             | 
| 320 | 
            +
            if __name__ == "__main__":
         | 
| 321 | 
            +
                cli_detect()
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer/constant.py
    ADDED
    
    | @@ -0,0 +1,1998 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from __future__ import annotations
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            from codecs import BOM_UTF8, BOM_UTF16_BE, BOM_UTF16_LE, BOM_UTF32_BE, BOM_UTF32_LE
         | 
| 4 | 
            +
            from encodings.aliases import aliases
         | 
| 5 | 
            +
            from re import IGNORECASE
         | 
| 6 | 
            +
            from re import compile as re_compile
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            # Contain for each eligible encoding a list of/item bytes SIG/BOM
         | 
| 9 | 
            +
            ENCODING_MARKS: dict[str, bytes | list[bytes]] = {
         | 
| 10 | 
            +
                "utf_8": BOM_UTF8,
         | 
| 11 | 
            +
                "utf_7": [
         | 
| 12 | 
            +
                    b"\x2b\x2f\x76\x38",
         | 
| 13 | 
            +
                    b"\x2b\x2f\x76\x39",
         | 
| 14 | 
            +
                    b"\x2b\x2f\x76\x2b",
         | 
| 15 | 
            +
                    b"\x2b\x2f\x76\x2f",
         | 
| 16 | 
            +
                    b"\x2b\x2f\x76\x38\x2d",
         | 
| 17 | 
            +
                ],
         | 
| 18 | 
            +
                "gb18030": b"\x84\x31\x95\x33",
         | 
| 19 | 
            +
                "utf_32": [BOM_UTF32_BE, BOM_UTF32_LE],
         | 
| 20 | 
            +
                "utf_16": [BOM_UTF16_BE, BOM_UTF16_LE],
         | 
| 21 | 
            +
            }
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            TOO_SMALL_SEQUENCE: int = 32
         | 
| 24 | 
            +
            TOO_BIG_SEQUENCE: int = int(10e6)
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            UTF8_MAXIMAL_ALLOCATION: int = 1_112_064
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            # Up-to-date Unicode ucd/15.0.0
         | 
| 29 | 
            +
            UNICODE_RANGES_COMBINED: dict[str, range] = {
         | 
| 30 | 
            +
                "Control character": range(32),
         | 
| 31 | 
            +
                "Basic Latin": range(32, 128),
         | 
| 32 | 
            +
                "Latin-1 Supplement": range(128, 256),
         | 
| 33 | 
            +
                "Latin Extended-A": range(256, 384),
         | 
| 34 | 
            +
                "Latin Extended-B": range(384, 592),
         | 
| 35 | 
            +
                "IPA Extensions": range(592, 688),
         | 
| 36 | 
            +
                "Spacing Modifier Letters": range(688, 768),
         | 
| 37 | 
            +
                "Combining Diacritical Marks": range(768, 880),
         | 
| 38 | 
            +
                "Greek and Coptic": range(880, 1024),
         | 
| 39 | 
            +
                "Cyrillic": range(1024, 1280),
         | 
| 40 | 
            +
                "Cyrillic Supplement": range(1280, 1328),
         | 
| 41 | 
            +
                "Armenian": range(1328, 1424),
         | 
| 42 | 
            +
                "Hebrew": range(1424, 1536),
         | 
| 43 | 
            +
                "Arabic": range(1536, 1792),
         | 
| 44 | 
            +
                "Syriac": range(1792, 1872),
         | 
| 45 | 
            +
                "Arabic Supplement": range(1872, 1920),
         | 
| 46 | 
            +
                "Thaana": range(1920, 1984),
         | 
| 47 | 
            +
                "NKo": range(1984, 2048),
         | 
| 48 | 
            +
                "Samaritan": range(2048, 2112),
         | 
| 49 | 
            +
                "Mandaic": range(2112, 2144),
         | 
| 50 | 
            +
                "Syriac Supplement": range(2144, 2160),
         | 
| 51 | 
            +
                "Arabic Extended-B": range(2160, 2208),
         | 
| 52 | 
            +
                "Arabic Extended-A": range(2208, 2304),
         | 
| 53 | 
            +
                "Devanagari": range(2304, 2432),
         | 
| 54 | 
            +
                "Bengali": range(2432, 2560),
         | 
| 55 | 
            +
                "Gurmukhi": range(2560, 2688),
         | 
| 56 | 
            +
                "Gujarati": range(2688, 2816),
         | 
| 57 | 
            +
                "Oriya": range(2816, 2944),
         | 
| 58 | 
            +
                "Tamil": range(2944, 3072),
         | 
| 59 | 
            +
                "Telugu": range(3072, 3200),
         | 
| 60 | 
            +
                "Kannada": range(3200, 3328),
         | 
| 61 | 
            +
                "Malayalam": range(3328, 3456),
         | 
| 62 | 
            +
                "Sinhala": range(3456, 3584),
         | 
| 63 | 
            +
                "Thai": range(3584, 3712),
         | 
| 64 | 
            +
                "Lao": range(3712, 3840),
         | 
| 65 | 
            +
                "Tibetan": range(3840, 4096),
         | 
| 66 | 
            +
                "Myanmar": range(4096, 4256),
         | 
| 67 | 
            +
                "Georgian": range(4256, 4352),
         | 
| 68 | 
            +
                "Hangul Jamo": range(4352, 4608),
         | 
| 69 | 
            +
                "Ethiopic": range(4608, 4992),
         | 
| 70 | 
            +
                "Ethiopic Supplement": range(4992, 5024),
         | 
| 71 | 
            +
                "Cherokee": range(5024, 5120),
         | 
| 72 | 
            +
                "Unified Canadian Aboriginal Syllabics": range(5120, 5760),
         | 
| 73 | 
            +
                "Ogham": range(5760, 5792),
         | 
| 74 | 
            +
                "Runic": range(5792, 5888),
         | 
| 75 | 
            +
                "Tagalog": range(5888, 5920),
         | 
| 76 | 
            +
                "Hanunoo": range(5920, 5952),
         | 
| 77 | 
            +
                "Buhid": range(5952, 5984),
         | 
| 78 | 
            +
                "Tagbanwa": range(5984, 6016),
         | 
| 79 | 
            +
                "Khmer": range(6016, 6144),
         | 
| 80 | 
            +
                "Mongolian": range(6144, 6320),
         | 
| 81 | 
            +
                "Unified Canadian Aboriginal Syllabics Extended": range(6320, 6400),
         | 
| 82 | 
            +
                "Limbu": range(6400, 6480),
         | 
| 83 | 
            +
                "Tai Le": range(6480, 6528),
         | 
| 84 | 
            +
                "New Tai Lue": range(6528, 6624),
         | 
| 85 | 
            +
                "Khmer Symbols": range(6624, 6656),
         | 
| 86 | 
            +
                "Buginese": range(6656, 6688),
         | 
| 87 | 
            +
                "Tai Tham": range(6688, 6832),
         | 
| 88 | 
            +
                "Combining Diacritical Marks Extended": range(6832, 6912),
         | 
| 89 | 
            +
                "Balinese": range(6912, 7040),
         | 
| 90 | 
            +
                "Sundanese": range(7040, 7104),
         | 
| 91 | 
            +
                "Batak": range(7104, 7168),
         | 
| 92 | 
            +
                "Lepcha": range(7168, 7248),
         | 
| 93 | 
            +
                "Ol Chiki": range(7248, 7296),
         | 
| 94 | 
            +
                "Cyrillic Extended-C": range(7296, 7312),
         | 
| 95 | 
            +
                "Georgian Extended": range(7312, 7360),
         | 
| 96 | 
            +
                "Sundanese Supplement": range(7360, 7376),
         | 
| 97 | 
            +
                "Vedic Extensions": range(7376, 7424),
         | 
| 98 | 
            +
                "Phonetic Extensions": range(7424, 7552),
         | 
| 99 | 
            +
                "Phonetic Extensions Supplement": range(7552, 7616),
         | 
| 100 | 
            +
                "Combining Diacritical Marks Supplement": range(7616, 7680),
         | 
| 101 | 
            +
                "Latin Extended Additional": range(7680, 7936),
         | 
| 102 | 
            +
                "Greek Extended": range(7936, 8192),
         | 
| 103 | 
            +
                "General Punctuation": range(8192, 8304),
         | 
| 104 | 
            +
                "Superscripts and Subscripts": range(8304, 8352),
         | 
| 105 | 
            +
                "Currency Symbols": range(8352, 8400),
         | 
| 106 | 
            +
                "Combining Diacritical Marks for Symbols": range(8400, 8448),
         | 
| 107 | 
            +
                "Letterlike Symbols": range(8448, 8528),
         | 
| 108 | 
            +
                "Number Forms": range(8528, 8592),
         | 
| 109 | 
            +
                "Arrows": range(8592, 8704),
         | 
| 110 | 
            +
                "Mathematical Operators": range(8704, 8960),
         | 
| 111 | 
            +
                "Miscellaneous Technical": range(8960, 9216),
         | 
| 112 | 
            +
                "Control Pictures": range(9216, 9280),
         | 
| 113 | 
            +
                "Optical Character Recognition": range(9280, 9312),
         | 
| 114 | 
            +
                "Enclosed Alphanumerics": range(9312, 9472),
         | 
| 115 | 
            +
                "Box Drawing": range(9472, 9600),
         | 
| 116 | 
            +
                "Block Elements": range(9600, 9632),
         | 
| 117 | 
            +
                "Geometric Shapes": range(9632, 9728),
         | 
| 118 | 
            +
                "Miscellaneous Symbols": range(9728, 9984),
         | 
| 119 | 
            +
                "Dingbats": range(9984, 10176),
         | 
| 120 | 
            +
                "Miscellaneous Mathematical Symbols-A": range(10176, 10224),
         | 
| 121 | 
            +
                "Supplemental Arrows-A": range(10224, 10240),
         | 
| 122 | 
            +
                "Braille Patterns": range(10240, 10496),
         | 
| 123 | 
            +
                "Supplemental Arrows-B": range(10496, 10624),
         | 
| 124 | 
            +
                "Miscellaneous Mathematical Symbols-B": range(10624, 10752),
         | 
| 125 | 
            +
                "Supplemental Mathematical Operators": range(10752, 11008),
         | 
| 126 | 
            +
                "Miscellaneous Symbols and Arrows": range(11008, 11264),
         | 
| 127 | 
            +
                "Glagolitic": range(11264, 11360),
         | 
| 128 | 
            +
                "Latin Extended-C": range(11360, 11392),
         | 
| 129 | 
            +
                "Coptic": range(11392, 11520),
         | 
| 130 | 
            +
                "Georgian Supplement": range(11520, 11568),
         | 
| 131 | 
            +
                "Tifinagh": range(11568, 11648),
         | 
| 132 | 
            +
                "Ethiopic Extended": range(11648, 11744),
         | 
| 133 | 
            +
                "Cyrillic Extended-A": range(11744, 11776),
         | 
| 134 | 
            +
                "Supplemental Punctuation": range(11776, 11904),
         | 
| 135 | 
            +
                "CJK Radicals Supplement": range(11904, 12032),
         | 
| 136 | 
            +
                "Kangxi Radicals": range(12032, 12256),
         | 
| 137 | 
            +
                "Ideographic Description Characters": range(12272, 12288),
         | 
| 138 | 
            +
                "CJK Symbols and Punctuation": range(12288, 12352),
         | 
| 139 | 
            +
                "Hiragana": range(12352, 12448),
         | 
| 140 | 
            +
                "Katakana": range(12448, 12544),
         | 
| 141 | 
            +
                "Bopomofo": range(12544, 12592),
         | 
| 142 | 
            +
                "Hangul Compatibility Jamo": range(12592, 12688),
         | 
| 143 | 
            +
                "Kanbun": range(12688, 12704),
         | 
| 144 | 
            +
                "Bopomofo Extended": range(12704, 12736),
         | 
| 145 | 
            +
                "CJK Strokes": range(12736, 12784),
         | 
| 146 | 
            +
                "Katakana Phonetic Extensions": range(12784, 12800),
         | 
| 147 | 
            +
                "Enclosed CJK Letters and Months": range(12800, 13056),
         | 
| 148 | 
            +
                "CJK Compatibility": range(13056, 13312),
         | 
| 149 | 
            +
                "CJK Unified Ideographs Extension A": range(13312, 19904),
         | 
| 150 | 
            +
                "Yijing Hexagram Symbols": range(19904, 19968),
         | 
| 151 | 
            +
                "CJK Unified Ideographs": range(19968, 40960),
         | 
| 152 | 
            +
                "Yi Syllables": range(40960, 42128),
         | 
| 153 | 
            +
                "Yi Radicals": range(42128, 42192),
         | 
| 154 | 
            +
                "Lisu": range(42192, 42240),
         | 
| 155 | 
            +
                "Vai": range(42240, 42560),
         | 
| 156 | 
            +
                "Cyrillic Extended-B": range(42560, 42656),
         | 
| 157 | 
            +
                "Bamum": range(42656, 42752),
         | 
| 158 | 
            +
                "Modifier Tone Letters": range(42752, 42784),
         | 
| 159 | 
            +
                "Latin Extended-D": range(42784, 43008),
         | 
| 160 | 
            +
                "Syloti Nagri": range(43008, 43056),
         | 
| 161 | 
            +
                "Common Indic Number Forms": range(43056, 43072),
         | 
| 162 | 
            +
                "Phags-pa": range(43072, 43136),
         | 
| 163 | 
            +
                "Saurashtra": range(43136, 43232),
         | 
| 164 | 
            +
                "Devanagari Extended": range(43232, 43264),
         | 
| 165 | 
            +
                "Kayah Li": range(43264, 43312),
         | 
| 166 | 
            +
                "Rejang": range(43312, 43360),
         | 
| 167 | 
            +
                "Hangul Jamo Extended-A": range(43360, 43392),
         | 
| 168 | 
            +
                "Javanese": range(43392, 43488),
         | 
| 169 | 
            +
                "Myanmar Extended-B": range(43488, 43520),
         | 
| 170 | 
            +
                "Cham": range(43520, 43616),
         | 
| 171 | 
            +
                "Myanmar Extended-A": range(43616, 43648),
         | 
| 172 | 
            +
                "Tai Viet": range(43648, 43744),
         | 
| 173 | 
            +
                "Meetei Mayek Extensions": range(43744, 43776),
         | 
| 174 | 
            +
                "Ethiopic Extended-A": range(43776, 43824),
         | 
| 175 | 
            +
                "Latin Extended-E": range(43824, 43888),
         | 
| 176 | 
            +
                "Cherokee Supplement": range(43888, 43968),
         | 
| 177 | 
            +
                "Meetei Mayek": range(43968, 44032),
         | 
| 178 | 
            +
                "Hangul Syllables": range(44032, 55216),
         | 
| 179 | 
            +
                "Hangul Jamo Extended-B": range(55216, 55296),
         | 
| 180 | 
            +
                "High Surrogates": range(55296, 56192),
         | 
| 181 | 
            +
                "High Private Use Surrogates": range(56192, 56320),
         | 
| 182 | 
            +
                "Low Surrogates": range(56320, 57344),
         | 
| 183 | 
            +
                "Private Use Area": range(57344, 63744),
         | 
| 184 | 
            +
                "CJK Compatibility Ideographs": range(63744, 64256),
         | 
| 185 | 
            +
                "Alphabetic Presentation Forms": range(64256, 64336),
         | 
| 186 | 
            +
                "Arabic Presentation Forms-A": range(64336, 65024),
         | 
| 187 | 
            +
                "Variation Selectors": range(65024, 65040),
         | 
| 188 | 
            +
                "Vertical Forms": range(65040, 65056),
         | 
| 189 | 
            +
                "Combining Half Marks": range(65056, 65072),
         | 
| 190 | 
            +
                "CJK Compatibility Forms": range(65072, 65104),
         | 
| 191 | 
            +
                "Small Form Variants": range(65104, 65136),
         | 
| 192 | 
            +
                "Arabic Presentation Forms-B": range(65136, 65280),
         | 
| 193 | 
            +
                "Halfwidth and Fullwidth Forms": range(65280, 65520),
         | 
| 194 | 
            +
                "Specials": range(65520, 65536),
         | 
| 195 | 
            +
                "Linear B Syllabary": range(65536, 65664),
         | 
| 196 | 
            +
                "Linear B Ideograms": range(65664, 65792),
         | 
| 197 | 
            +
                "Aegean Numbers": range(65792, 65856),
         | 
| 198 | 
            +
                "Ancient Greek Numbers": range(65856, 65936),
         | 
| 199 | 
            +
                "Ancient Symbols": range(65936, 66000),
         | 
| 200 | 
            +
                "Phaistos Disc": range(66000, 66048),
         | 
| 201 | 
            +
                "Lycian": range(66176, 66208),
         | 
| 202 | 
            +
                "Carian": range(66208, 66272),
         | 
| 203 | 
            +
                "Coptic Epact Numbers": range(66272, 66304),
         | 
| 204 | 
            +
                "Old Italic": range(66304, 66352),
         | 
| 205 | 
            +
                "Gothic": range(66352, 66384),
         | 
| 206 | 
            +
                "Old Permic": range(66384, 66432),
         | 
| 207 | 
            +
                "Ugaritic": range(66432, 66464),
         | 
| 208 | 
            +
                "Old Persian": range(66464, 66528),
         | 
| 209 | 
            +
                "Deseret": range(66560, 66640),
         | 
| 210 | 
            +
                "Shavian": range(66640, 66688),
         | 
| 211 | 
            +
                "Osmanya": range(66688, 66736),
         | 
| 212 | 
            +
                "Osage": range(66736, 66816),
         | 
| 213 | 
            +
                "Elbasan": range(66816, 66864),
         | 
| 214 | 
            +
                "Caucasian Albanian": range(66864, 66928),
         | 
| 215 | 
            +
                "Vithkuqi": range(66928, 67008),
         | 
| 216 | 
            +
                "Linear A": range(67072, 67456),
         | 
| 217 | 
            +
                "Latin Extended-F": range(67456, 67520),
         | 
| 218 | 
            +
                "Cypriot Syllabary": range(67584, 67648),
         | 
| 219 | 
            +
                "Imperial Aramaic": range(67648, 67680),
         | 
| 220 | 
            +
                "Palmyrene": range(67680, 67712),
         | 
| 221 | 
            +
                "Nabataean": range(67712, 67760),
         | 
| 222 | 
            +
                "Hatran": range(67808, 67840),
         | 
| 223 | 
            +
                "Phoenician": range(67840, 67872),
         | 
| 224 | 
            +
                "Lydian": range(67872, 67904),
         | 
| 225 | 
            +
                "Meroitic Hieroglyphs": range(67968, 68000),
         | 
| 226 | 
            +
                "Meroitic Cursive": range(68000, 68096),
         | 
| 227 | 
            +
                "Kharoshthi": range(68096, 68192),
         | 
| 228 | 
            +
                "Old South Arabian": range(68192, 68224),
         | 
| 229 | 
            +
                "Old North Arabian": range(68224, 68256),
         | 
| 230 | 
            +
                "Manichaean": range(68288, 68352),
         | 
| 231 | 
            +
                "Avestan": range(68352, 68416),
         | 
| 232 | 
            +
                "Inscriptional Parthian": range(68416, 68448),
         | 
| 233 | 
            +
                "Inscriptional Pahlavi": range(68448, 68480),
         | 
| 234 | 
            +
                "Psalter Pahlavi": range(68480, 68528),
         | 
| 235 | 
            +
                "Old Turkic": range(68608, 68688),
         | 
| 236 | 
            +
                "Old Hungarian": range(68736, 68864),
         | 
| 237 | 
            +
                "Hanifi Rohingya": range(68864, 68928),
         | 
| 238 | 
            +
                "Rumi Numeral Symbols": range(69216, 69248),
         | 
| 239 | 
            +
                "Yezidi": range(69248, 69312),
         | 
| 240 | 
            +
                "Arabic Extended-C": range(69312, 69376),
         | 
| 241 | 
            +
                "Old Sogdian": range(69376, 69424),
         | 
| 242 | 
            +
                "Sogdian": range(69424, 69488),
         | 
| 243 | 
            +
                "Old Uyghur": range(69488, 69552),
         | 
| 244 | 
            +
                "Chorasmian": range(69552, 69600),
         | 
| 245 | 
            +
                "Elymaic": range(69600, 69632),
         | 
| 246 | 
            +
                "Brahmi": range(69632, 69760),
         | 
| 247 | 
            +
                "Kaithi": range(69760, 69840),
         | 
| 248 | 
            +
                "Sora Sompeng": range(69840, 69888),
         | 
| 249 | 
            +
                "Chakma": range(69888, 69968),
         | 
| 250 | 
            +
                "Mahajani": range(69968, 70016),
         | 
| 251 | 
            +
                "Sharada": range(70016, 70112),
         | 
| 252 | 
            +
                "Sinhala Archaic Numbers": range(70112, 70144),
         | 
| 253 | 
            +
                "Khojki": range(70144, 70224),
         | 
| 254 | 
            +
                "Multani": range(70272, 70320),
         | 
| 255 | 
            +
                "Khudawadi": range(70320, 70400),
         | 
| 256 | 
            +
                "Grantha": range(70400, 70528),
         | 
| 257 | 
            +
                "Newa": range(70656, 70784),
         | 
| 258 | 
            +
                "Tirhuta": range(70784, 70880),
         | 
| 259 | 
            +
                "Siddham": range(71040, 71168),
         | 
| 260 | 
            +
                "Modi": range(71168, 71264),
         | 
| 261 | 
            +
                "Mongolian Supplement": range(71264, 71296),
         | 
| 262 | 
            +
                "Takri": range(71296, 71376),
         | 
| 263 | 
            +
                "Ahom": range(71424, 71504),
         | 
| 264 | 
            +
                "Dogra": range(71680, 71760),
         | 
| 265 | 
            +
                "Warang Citi": range(71840, 71936),
         | 
| 266 | 
            +
                "Dives Akuru": range(71936, 72032),
         | 
| 267 | 
            +
                "Nandinagari": range(72096, 72192),
         | 
| 268 | 
            +
                "Zanabazar Square": range(72192, 72272),
         | 
| 269 | 
            +
                "Soyombo": range(72272, 72368),
         | 
| 270 | 
            +
                "Unified Canadian Aboriginal Syllabics Extended-A": range(72368, 72384),
         | 
| 271 | 
            +
                "Pau Cin Hau": range(72384, 72448),
         | 
| 272 | 
            +
                "Devanagari Extended-A": range(72448, 72544),
         | 
| 273 | 
            +
                "Bhaiksuki": range(72704, 72816),
         | 
| 274 | 
            +
                "Marchen": range(72816, 72896),
         | 
| 275 | 
            +
                "Masaram Gondi": range(72960, 73056),
         | 
| 276 | 
            +
                "Gunjala Gondi": range(73056, 73136),
         | 
| 277 | 
            +
                "Makasar": range(73440, 73472),
         | 
| 278 | 
            +
                "Kawi": range(73472, 73568),
         | 
| 279 | 
            +
                "Lisu Supplement": range(73648, 73664),
         | 
| 280 | 
            +
                "Tamil Supplement": range(73664, 73728),
         | 
| 281 | 
            +
                "Cuneiform": range(73728, 74752),
         | 
| 282 | 
            +
                "Cuneiform Numbers and Punctuation": range(74752, 74880),
         | 
| 283 | 
            +
                "Early Dynastic Cuneiform": range(74880, 75088),
         | 
| 284 | 
            +
                "Cypro-Minoan": range(77712, 77824),
         | 
| 285 | 
            +
                "Egyptian Hieroglyphs": range(77824, 78896),
         | 
| 286 | 
            +
                "Egyptian Hieroglyph Format Controls": range(78896, 78944),
         | 
| 287 | 
            +
                "Anatolian Hieroglyphs": range(82944, 83584),
         | 
| 288 | 
            +
                "Bamum Supplement": range(92160, 92736),
         | 
| 289 | 
            +
                "Mro": range(92736, 92784),
         | 
| 290 | 
            +
                "Tangsa": range(92784, 92880),
         | 
| 291 | 
            +
                "Bassa Vah": range(92880, 92928),
         | 
| 292 | 
            +
                "Pahawh Hmong": range(92928, 93072),
         | 
| 293 | 
            +
                "Medefaidrin": range(93760, 93856),
         | 
| 294 | 
            +
                "Miao": range(93952, 94112),
         | 
| 295 | 
            +
                "Ideographic Symbols and Punctuation": range(94176, 94208),
         | 
| 296 | 
            +
                "Tangut": range(94208, 100352),
         | 
| 297 | 
            +
                "Tangut Components": range(100352, 101120),
         | 
| 298 | 
            +
                "Khitan Small Script": range(101120, 101632),
         | 
| 299 | 
            +
                "Tangut Supplement": range(101632, 101760),
         | 
| 300 | 
            +
                "Kana Extended-B": range(110576, 110592),
         | 
| 301 | 
            +
                "Kana Supplement": range(110592, 110848),
         | 
| 302 | 
            +
                "Kana Extended-A": range(110848, 110896),
         | 
| 303 | 
            +
                "Small Kana Extension": range(110896, 110960),
         | 
| 304 | 
            +
                "Nushu": range(110960, 111360),
         | 
| 305 | 
            +
                "Duployan": range(113664, 113824),
         | 
| 306 | 
            +
                "Shorthand Format Controls": range(113824, 113840),
         | 
| 307 | 
            +
                "Znamenny Musical Notation": range(118528, 118736),
         | 
| 308 | 
            +
                "Byzantine Musical Symbols": range(118784, 119040),
         | 
| 309 | 
            +
                "Musical Symbols": range(119040, 119296),
         | 
| 310 | 
            +
                "Ancient Greek Musical Notation": range(119296, 119376),
         | 
| 311 | 
            +
                "Kaktovik Numerals": range(119488, 119520),
         | 
| 312 | 
            +
                "Mayan Numerals": range(119520, 119552),
         | 
| 313 | 
            +
                "Tai Xuan Jing Symbols": range(119552, 119648),
         | 
| 314 | 
            +
                "Counting Rod Numerals": range(119648, 119680),
         | 
| 315 | 
            +
                "Mathematical Alphanumeric Symbols": range(119808, 120832),
         | 
| 316 | 
            +
                "Sutton SignWriting": range(120832, 121520),
         | 
| 317 | 
            +
                "Latin Extended-G": range(122624, 122880),
         | 
| 318 | 
            +
                "Glagolitic Supplement": range(122880, 122928),
         | 
| 319 | 
            +
                "Cyrillic Extended-D": range(122928, 123024),
         | 
| 320 | 
            +
                "Nyiakeng Puachue Hmong": range(123136, 123216),
         | 
| 321 | 
            +
                "Toto": range(123536, 123584),
         | 
| 322 | 
            +
                "Wancho": range(123584, 123648),
         | 
| 323 | 
            +
                "Nag Mundari": range(124112, 124160),
         | 
| 324 | 
            +
                "Ethiopic Extended-B": range(124896, 124928),
         | 
| 325 | 
            +
                "Mende Kikakui": range(124928, 125152),
         | 
| 326 | 
            +
                "Adlam": range(125184, 125280),
         | 
| 327 | 
            +
                "Indic Siyaq Numbers": range(126064, 126144),
         | 
| 328 | 
            +
                "Ottoman Siyaq Numbers": range(126208, 126288),
         | 
| 329 | 
            +
                "Arabic Mathematical Alphabetic Symbols": range(126464, 126720),
         | 
| 330 | 
            +
                "Mahjong Tiles": range(126976, 127024),
         | 
| 331 | 
            +
                "Domino Tiles": range(127024, 127136),
         | 
| 332 | 
            +
                "Playing Cards": range(127136, 127232),
         | 
| 333 | 
            +
                "Enclosed Alphanumeric Supplement": range(127232, 127488),
         | 
| 334 | 
            +
                "Enclosed Ideographic Supplement": range(127488, 127744),
         | 
| 335 | 
            +
                "Miscellaneous Symbols and Pictographs": range(127744, 128512),
         | 
| 336 | 
            +
                "Emoticons range(Emoji)": range(128512, 128592),
         | 
| 337 | 
            +
                "Ornamental Dingbats": range(128592, 128640),
         | 
| 338 | 
            +
                "Transport and Map Symbols": range(128640, 128768),
         | 
| 339 | 
            +
                "Alchemical Symbols": range(128768, 128896),
         | 
| 340 | 
            +
                "Geometric Shapes Extended": range(128896, 129024),
         | 
| 341 | 
            +
                "Supplemental Arrows-C": range(129024, 129280),
         | 
| 342 | 
            +
                "Supplemental Symbols and Pictographs": range(129280, 129536),
         | 
| 343 | 
            +
                "Chess Symbols": range(129536, 129648),
         | 
| 344 | 
            +
                "Symbols and Pictographs Extended-A": range(129648, 129792),
         | 
| 345 | 
            +
                "Symbols for Legacy Computing": range(129792, 130048),
         | 
| 346 | 
            +
                "CJK Unified Ideographs Extension B": range(131072, 173792),
         | 
| 347 | 
            +
                "CJK Unified Ideographs Extension C": range(173824, 177984),
         | 
| 348 | 
            +
                "CJK Unified Ideographs Extension D": range(177984, 178208),
         | 
| 349 | 
            +
                "CJK Unified Ideographs Extension E": range(178208, 183984),
         | 
| 350 | 
            +
                "CJK Unified Ideographs Extension F": range(183984, 191472),
         | 
| 351 | 
            +
                "CJK Compatibility Ideographs Supplement": range(194560, 195104),
         | 
| 352 | 
            +
                "CJK Unified Ideographs Extension G": range(196608, 201552),
         | 
| 353 | 
            +
                "CJK Unified Ideographs Extension H": range(201552, 205744),
         | 
| 354 | 
            +
                "Tags": range(917504, 917632),
         | 
| 355 | 
            +
                "Variation Selectors Supplement": range(917760, 918000),
         | 
| 356 | 
            +
                "Supplementary Private Use Area-A": range(983040, 1048576),
         | 
| 357 | 
            +
                "Supplementary Private Use Area-B": range(1048576, 1114112),
         | 
| 358 | 
            +
            }
         | 
| 359 | 
            +
             | 
| 360 | 
            +
             | 
| 361 | 
            +
            UNICODE_SECONDARY_RANGE_KEYWORD: list[str] = [
         | 
| 362 | 
            +
                "Supplement",
         | 
| 363 | 
            +
                "Extended",
         | 
| 364 | 
            +
                "Extensions",
         | 
| 365 | 
            +
                "Modifier",
         | 
| 366 | 
            +
                "Marks",
         | 
| 367 | 
            +
                "Punctuation",
         | 
| 368 | 
            +
                "Symbols",
         | 
| 369 | 
            +
                "Forms",
         | 
| 370 | 
            +
                "Operators",
         | 
| 371 | 
            +
                "Miscellaneous",
         | 
| 372 | 
            +
                "Drawing",
         | 
| 373 | 
            +
                "Block",
         | 
| 374 | 
            +
                "Shapes",
         | 
| 375 | 
            +
                "Supplemental",
         | 
| 376 | 
            +
                "Tags",
         | 
| 377 | 
            +
            ]
         | 
| 378 | 
            +
             | 
| 379 | 
            +
            RE_POSSIBLE_ENCODING_INDICATION = re_compile(
         | 
| 380 | 
            +
                r"(?:(?:encoding)|(?:charset)|(?:coding))(?:[\:= ]{1,10})(?:[\"\']?)([a-zA-Z0-9\-_]+)(?:[\"\']?)",
         | 
| 381 | 
            +
                IGNORECASE,
         | 
| 382 | 
            +
            )
         | 
| 383 | 
            +
             | 
| 384 | 
            +
            IANA_NO_ALIASES = [
         | 
| 385 | 
            +
                "cp720",
         | 
| 386 | 
            +
                "cp737",
         | 
| 387 | 
            +
                "cp856",
         | 
| 388 | 
            +
                "cp874",
         | 
| 389 | 
            +
                "cp875",
         | 
| 390 | 
            +
                "cp1006",
         | 
| 391 | 
            +
                "koi8_r",
         | 
| 392 | 
            +
                "koi8_t",
         | 
| 393 | 
            +
                "koi8_u",
         | 
| 394 | 
            +
            ]
         | 
| 395 | 
            +
             | 
| 396 | 
            +
            IANA_SUPPORTED: list[str] = sorted(
         | 
| 397 | 
            +
                filter(
         | 
| 398 | 
            +
                    lambda x: x.endswith("_codec") is False
         | 
| 399 | 
            +
                    and x not in {"rot_13", "tactis", "mbcs"},
         | 
| 400 | 
            +
                    list(set(aliases.values())) + IANA_NO_ALIASES,
         | 
| 401 | 
            +
                )
         | 
| 402 | 
            +
            )
         | 
| 403 | 
            +
             | 
| 404 | 
            +
            IANA_SUPPORTED_COUNT: int = len(IANA_SUPPORTED)
         | 
| 405 | 
            +
             | 
| 406 | 
            +
            # pre-computed code page that are similar using the function cp_similarity.
         | 
| 407 | 
            +
            IANA_SUPPORTED_SIMILAR: dict[str, list[str]] = {
         | 
| 408 | 
            +
                "cp037": ["cp1026", "cp1140", "cp273", "cp500"],
         | 
| 409 | 
            +
                "cp1026": ["cp037", "cp1140", "cp273", "cp500"],
         | 
| 410 | 
            +
                "cp1125": ["cp866"],
         | 
| 411 | 
            +
                "cp1140": ["cp037", "cp1026", "cp273", "cp500"],
         | 
| 412 | 
            +
                "cp1250": ["iso8859_2"],
         | 
| 413 | 
            +
                "cp1251": ["kz1048", "ptcp154"],
         | 
| 414 | 
            +
                "cp1252": ["iso8859_15", "iso8859_9", "latin_1"],
         | 
| 415 | 
            +
                "cp1253": ["iso8859_7"],
         | 
| 416 | 
            +
                "cp1254": ["iso8859_15", "iso8859_9", "latin_1"],
         | 
| 417 | 
            +
                "cp1257": ["iso8859_13"],
         | 
| 418 | 
            +
                "cp273": ["cp037", "cp1026", "cp1140", "cp500"],
         | 
| 419 | 
            +
                "cp437": ["cp850", "cp858", "cp860", "cp861", "cp862", "cp863", "cp865"],
         | 
| 420 | 
            +
                "cp500": ["cp037", "cp1026", "cp1140", "cp273"],
         | 
| 421 | 
            +
                "cp850": ["cp437", "cp857", "cp858", "cp865"],
         | 
| 422 | 
            +
                "cp857": ["cp850", "cp858", "cp865"],
         | 
| 423 | 
            +
                "cp858": ["cp437", "cp850", "cp857", "cp865"],
         | 
| 424 | 
            +
                "cp860": ["cp437", "cp861", "cp862", "cp863", "cp865"],
         | 
| 425 | 
            +
                "cp861": ["cp437", "cp860", "cp862", "cp863", "cp865"],
         | 
| 426 | 
            +
                "cp862": ["cp437", "cp860", "cp861", "cp863", "cp865"],
         | 
| 427 | 
            +
                "cp863": ["cp437", "cp860", "cp861", "cp862", "cp865"],
         | 
| 428 | 
            +
                "cp865": ["cp437", "cp850", "cp857", "cp858", "cp860", "cp861", "cp862", "cp863"],
         | 
| 429 | 
            +
                "cp866": ["cp1125"],
         | 
| 430 | 
            +
                "iso8859_10": ["iso8859_14", "iso8859_15", "iso8859_4", "iso8859_9", "latin_1"],
         | 
| 431 | 
            +
                "iso8859_11": ["tis_620"],
         | 
| 432 | 
            +
                "iso8859_13": ["cp1257"],
         | 
| 433 | 
            +
                "iso8859_14": [
         | 
| 434 | 
            +
                    "iso8859_10",
         | 
| 435 | 
            +
                    "iso8859_15",
         | 
| 436 | 
            +
                    "iso8859_16",
         | 
| 437 | 
            +
                    "iso8859_3",
         | 
| 438 | 
            +
                    "iso8859_9",
         | 
| 439 | 
            +
                    "latin_1",
         | 
| 440 | 
            +
                ],
         | 
| 441 | 
            +
                "iso8859_15": [
         | 
| 442 | 
            +
                    "cp1252",
         | 
| 443 | 
            +
                    "cp1254",
         | 
| 444 | 
            +
                    "iso8859_10",
         | 
| 445 | 
            +
                    "iso8859_14",
         | 
| 446 | 
            +
                    "iso8859_16",
         | 
| 447 | 
            +
                    "iso8859_3",
         | 
| 448 | 
            +
                    "iso8859_9",
         | 
| 449 | 
            +
                    "latin_1",
         | 
| 450 | 
            +
                ],
         | 
| 451 | 
            +
                "iso8859_16": [
         | 
| 452 | 
            +
                    "iso8859_14",
         | 
| 453 | 
            +
                    "iso8859_15",
         | 
| 454 | 
            +
                    "iso8859_2",
         | 
| 455 | 
            +
                    "iso8859_3",
         | 
| 456 | 
            +
                    "iso8859_9",
         | 
| 457 | 
            +
                    "latin_1",
         | 
| 458 | 
            +
                ],
         | 
| 459 | 
            +
                "iso8859_2": ["cp1250", "iso8859_16", "iso8859_4"],
         | 
| 460 | 
            +
                "iso8859_3": ["iso8859_14", "iso8859_15", "iso8859_16", "iso8859_9", "latin_1"],
         | 
| 461 | 
            +
                "iso8859_4": ["iso8859_10", "iso8859_2", "iso8859_9", "latin_1"],
         | 
| 462 | 
            +
                "iso8859_7": ["cp1253"],
         | 
| 463 | 
            +
                "iso8859_9": [
         | 
| 464 | 
            +
                    "cp1252",
         | 
| 465 | 
            +
                    "cp1254",
         | 
| 466 | 
            +
                    "cp1258",
         | 
| 467 | 
            +
                    "iso8859_10",
         | 
| 468 | 
            +
                    "iso8859_14",
         | 
| 469 | 
            +
                    "iso8859_15",
         | 
| 470 | 
            +
                    "iso8859_16",
         | 
| 471 | 
            +
                    "iso8859_3",
         | 
| 472 | 
            +
                    "iso8859_4",
         | 
| 473 | 
            +
                    "latin_1",
         | 
| 474 | 
            +
                ],
         | 
| 475 | 
            +
                "kz1048": ["cp1251", "ptcp154"],
         | 
| 476 | 
            +
                "latin_1": [
         | 
| 477 | 
            +
                    "cp1252",
         | 
| 478 | 
            +
                    "cp1254",
         | 
| 479 | 
            +
                    "cp1258",
         | 
| 480 | 
            +
                    "iso8859_10",
         | 
| 481 | 
            +
                    "iso8859_14",
         | 
| 482 | 
            +
                    "iso8859_15",
         | 
| 483 | 
            +
                    "iso8859_16",
         | 
| 484 | 
            +
                    "iso8859_3",
         | 
| 485 | 
            +
                    "iso8859_4",
         | 
| 486 | 
            +
                    "iso8859_9",
         | 
| 487 | 
            +
                ],
         | 
| 488 | 
            +
                "mac_iceland": ["mac_roman", "mac_turkish"],
         | 
| 489 | 
            +
                "mac_roman": ["mac_iceland", "mac_turkish"],
         | 
| 490 | 
            +
                "mac_turkish": ["mac_iceland", "mac_roman"],
         | 
| 491 | 
            +
                "ptcp154": ["cp1251", "kz1048"],
         | 
| 492 | 
            +
                "tis_620": ["iso8859_11"],
         | 
| 493 | 
            +
            }
         | 
| 494 | 
            +
             | 
| 495 | 
            +
             | 
| 496 | 
            +
            CHARDET_CORRESPONDENCE: dict[str, str] = {
         | 
| 497 | 
            +
                "iso2022_kr": "ISO-2022-KR",
         | 
| 498 | 
            +
                "iso2022_jp": "ISO-2022-JP",
         | 
| 499 | 
            +
                "euc_kr": "EUC-KR",
         | 
| 500 | 
            +
                "tis_620": "TIS-620",
         | 
| 501 | 
            +
                "utf_32": "UTF-32",
         | 
| 502 | 
            +
                "euc_jp": "EUC-JP",
         | 
| 503 | 
            +
                "koi8_r": "KOI8-R",
         | 
| 504 | 
            +
                "iso8859_1": "ISO-8859-1",
         | 
| 505 | 
            +
                "iso8859_2": "ISO-8859-2",
         | 
| 506 | 
            +
                "iso8859_5": "ISO-8859-5",
         | 
| 507 | 
            +
                "iso8859_6": "ISO-8859-6",
         | 
| 508 | 
            +
                "iso8859_7": "ISO-8859-7",
         | 
| 509 | 
            +
                "iso8859_8": "ISO-8859-8",
         | 
| 510 | 
            +
                "utf_16": "UTF-16",
         | 
| 511 | 
            +
                "cp855": "IBM855",
         | 
| 512 | 
            +
                "mac_cyrillic": "MacCyrillic",
         | 
| 513 | 
            +
                "gb2312": "GB2312",
         | 
| 514 | 
            +
                "gb18030": "GB18030",
         | 
| 515 | 
            +
                "cp932": "CP932",
         | 
| 516 | 
            +
                "cp866": "IBM866",
         | 
| 517 | 
            +
                "utf_8": "utf-8",
         | 
| 518 | 
            +
                "utf_8_sig": "UTF-8-SIG",
         | 
| 519 | 
            +
                "shift_jis": "SHIFT_JIS",
         | 
| 520 | 
            +
                "big5": "Big5",
         | 
| 521 | 
            +
                "cp1250": "windows-1250",
         | 
| 522 | 
            +
                "cp1251": "windows-1251",
         | 
| 523 | 
            +
                "cp1252": "Windows-1252",
         | 
| 524 | 
            +
                "cp1253": "windows-1253",
         | 
| 525 | 
            +
                "cp1255": "windows-1255",
         | 
| 526 | 
            +
                "cp1256": "windows-1256",
         | 
| 527 | 
            +
                "cp1254": "Windows-1254",
         | 
| 528 | 
            +
                "cp949": "CP949",
         | 
| 529 | 
            +
            }
         | 
| 530 | 
            +
             | 
| 531 | 
            +
             | 
| 532 | 
            +
            COMMON_SAFE_ASCII_CHARACTERS: set[str] = {
         | 
| 533 | 
            +
                "<",
         | 
| 534 | 
            +
                ">",
         | 
| 535 | 
            +
                "=",
         | 
| 536 | 
            +
                ":",
         | 
| 537 | 
            +
                "/",
         | 
| 538 | 
            +
                "&",
         | 
| 539 | 
            +
                ";",
         | 
| 540 | 
            +
                "{",
         | 
| 541 | 
            +
                "}",
         | 
| 542 | 
            +
                "[",
         | 
| 543 | 
            +
                "]",
         | 
| 544 | 
            +
                ",",
         | 
| 545 | 
            +
                "|",
         | 
| 546 | 
            +
                '"',
         | 
| 547 | 
            +
                "-",
         | 
| 548 | 
            +
                "(",
         | 
| 549 | 
            +
                ")",
         | 
| 550 | 
            +
            }
         | 
| 551 | 
            +
             | 
| 552 | 
            +
             | 
| 553 | 
            +
            KO_NAMES: set[str] = {"johab", "cp949", "euc_kr"}
         | 
| 554 | 
            +
            ZH_NAMES: set[str] = {"big5", "cp950", "big5hkscs", "hz"}
         | 
| 555 | 
            +
             | 
| 556 | 
            +
            # Logging LEVEL below DEBUG
         | 
| 557 | 
            +
            TRACE: int = 5
         | 
| 558 | 
            +
             | 
| 559 | 
            +
             | 
| 560 | 
            +
            # Language label that contain the em dash "—"
         | 
| 561 | 
            +
            # character are to be considered alternative seq to origin
         | 
| 562 | 
            +
            FREQUENCIES: dict[str, list[str]] = {
         | 
| 563 | 
            +
                "English": [
         | 
| 564 | 
            +
                    "e",
         | 
| 565 | 
            +
                    "a",
         | 
| 566 | 
            +
                    "t",
         | 
| 567 | 
            +
                    "i",
         | 
| 568 | 
            +
                    "o",
         | 
| 569 | 
            +
                    "n",
         | 
| 570 | 
            +
                    "s",
         | 
| 571 | 
            +
                    "r",
         | 
| 572 | 
            +
                    "h",
         | 
| 573 | 
            +
                    "l",
         | 
| 574 | 
            +
                    "d",
         | 
| 575 | 
            +
                    "c",
         | 
| 576 | 
            +
                    "u",
         | 
| 577 | 
            +
                    "m",
         | 
| 578 | 
            +
                    "f",
         | 
| 579 | 
            +
                    "p",
         | 
| 580 | 
            +
                    "g",
         | 
| 581 | 
            +
                    "w",
         | 
| 582 | 
            +
                    "y",
         | 
| 583 | 
            +
                    "b",
         | 
| 584 | 
            +
                    "v",
         | 
| 585 | 
            +
                    "k",
         | 
| 586 | 
            +
                    "x",
         | 
| 587 | 
            +
                    "j",
         | 
| 588 | 
            +
                    "z",
         | 
| 589 | 
            +
                    "q",
         | 
| 590 | 
            +
                ],
         | 
| 591 | 
            +
                "English—": [
         | 
| 592 | 
            +
                    "e",
         | 
| 593 | 
            +
                    "a",
         | 
| 594 | 
            +
                    "t",
         | 
| 595 | 
            +
                    "i",
         | 
| 596 | 
            +
                    "o",
         | 
| 597 | 
            +
                    "n",
         | 
| 598 | 
            +
                    "s",
         | 
| 599 | 
            +
                    "r",
         | 
| 600 | 
            +
                    "h",
         | 
| 601 | 
            +
                    "l",
         | 
| 602 | 
            +
                    "d",
         | 
| 603 | 
            +
                    "c",
         | 
| 604 | 
            +
                    "m",
         | 
| 605 | 
            +
                    "u",
         | 
| 606 | 
            +
                    "f",
         | 
| 607 | 
            +
                    "p",
         | 
| 608 | 
            +
                    "g",
         | 
| 609 | 
            +
                    "w",
         | 
| 610 | 
            +
                    "b",
         | 
| 611 | 
            +
                    "y",
         | 
| 612 | 
            +
                    "v",
         | 
| 613 | 
            +
                    "k",
         | 
| 614 | 
            +
                    "j",
         | 
| 615 | 
            +
                    "x",
         | 
| 616 | 
            +
                    "z",
         | 
| 617 | 
            +
                    "q",
         | 
| 618 | 
            +
                ],
         | 
| 619 | 
            +
                "German": [
         | 
| 620 | 
            +
                    "e",
         | 
| 621 | 
            +
                    "n",
         | 
| 622 | 
            +
                    "i",
         | 
| 623 | 
            +
                    "r",
         | 
| 624 | 
            +
                    "s",
         | 
| 625 | 
            +
                    "t",
         | 
| 626 | 
            +
                    "a",
         | 
| 627 | 
            +
                    "d",
         | 
| 628 | 
            +
                    "h",
         | 
| 629 | 
            +
                    "u",
         | 
| 630 | 
            +
                    "l",
         | 
| 631 | 
            +
                    "g",
         | 
| 632 | 
            +
                    "o",
         | 
| 633 | 
            +
                    "c",
         | 
| 634 | 
            +
                    "m",
         | 
| 635 | 
            +
                    "b",
         | 
| 636 | 
            +
                    "f",
         | 
| 637 | 
            +
                    "k",
         | 
| 638 | 
            +
                    "w",
         | 
| 639 | 
            +
                    "z",
         | 
| 640 | 
            +
                    "p",
         | 
| 641 | 
            +
                    "v",
         | 
| 642 | 
            +
                    "ü",
         | 
| 643 | 
            +
                    "ä",
         | 
| 644 | 
            +
                    "ö",
         | 
| 645 | 
            +
                    "j",
         | 
| 646 | 
            +
                ],
         | 
| 647 | 
            +
                "French": [
         | 
| 648 | 
            +
                    "e",
         | 
| 649 | 
            +
                    "a",
         | 
| 650 | 
            +
                    "s",
         | 
| 651 | 
            +
                    "n",
         | 
| 652 | 
            +
                    "i",
         | 
| 653 | 
            +
                    "t",
         | 
| 654 | 
            +
                    "r",
         | 
| 655 | 
            +
                    "l",
         | 
| 656 | 
            +
                    "u",
         | 
| 657 | 
            +
                    "o",
         | 
| 658 | 
            +
                    "d",
         | 
| 659 | 
            +
                    "c",
         | 
| 660 | 
            +
                    "p",
         | 
| 661 | 
            +
                    "m",
         | 
| 662 | 
            +
                    "é",
         | 
| 663 | 
            +
                    "v",
         | 
| 664 | 
            +
                    "g",
         | 
| 665 | 
            +
                    "f",
         | 
| 666 | 
            +
                    "b",
         | 
| 667 | 
            +
                    "h",
         | 
| 668 | 
            +
                    "q",
         | 
| 669 | 
            +
                    "à",
         | 
| 670 | 
            +
                    "x",
         | 
| 671 | 
            +
                    "è",
         | 
| 672 | 
            +
                    "y",
         | 
| 673 | 
            +
                    "j",
         | 
| 674 | 
            +
                ],
         | 
| 675 | 
            +
                "Dutch": [
         | 
| 676 | 
            +
                    "e",
         | 
| 677 | 
            +
                    "n",
         | 
| 678 | 
            +
                    "a",
         | 
| 679 | 
            +
                    "i",
         | 
| 680 | 
            +
                    "r",
         | 
| 681 | 
            +
                    "t",
         | 
| 682 | 
            +
                    "o",
         | 
| 683 | 
            +
                    "d",
         | 
| 684 | 
            +
                    "s",
         | 
| 685 | 
            +
                    "l",
         | 
| 686 | 
            +
                    "g",
         | 
| 687 | 
            +
                    "h",
         | 
| 688 | 
            +
                    "v",
         | 
| 689 | 
            +
                    "m",
         | 
| 690 | 
            +
                    "u",
         | 
| 691 | 
            +
                    "k",
         | 
| 692 | 
            +
                    "c",
         | 
| 693 | 
            +
                    "p",
         | 
| 694 | 
            +
                    "b",
         | 
| 695 | 
            +
                    "w",
         | 
| 696 | 
            +
                    "j",
         | 
| 697 | 
            +
                    "z",
         | 
| 698 | 
            +
                    "f",
         | 
| 699 | 
            +
                    "y",
         | 
| 700 | 
            +
                    "x",
         | 
| 701 | 
            +
                    "ë",
         | 
| 702 | 
            +
                ],
         | 
| 703 | 
            +
                "Italian": [
         | 
| 704 | 
            +
                    "e",
         | 
| 705 | 
            +
                    "i",
         | 
| 706 | 
            +
                    "a",
         | 
| 707 | 
            +
                    "o",
         | 
| 708 | 
            +
                    "n",
         | 
| 709 | 
            +
                    "l",
         | 
| 710 | 
            +
                    "t",
         | 
| 711 | 
            +
                    "r",
         | 
| 712 | 
            +
                    "s",
         | 
| 713 | 
            +
                    "c",
         | 
| 714 | 
            +
                    "d",
         | 
| 715 | 
            +
                    "u",
         | 
| 716 | 
            +
                    "p",
         | 
| 717 | 
            +
                    "m",
         | 
| 718 | 
            +
                    "g",
         | 
| 719 | 
            +
                    "v",
         | 
| 720 | 
            +
                    "f",
         | 
| 721 | 
            +
                    "b",
         | 
| 722 | 
            +
                    "z",
         | 
| 723 | 
            +
                    "h",
         | 
| 724 | 
            +
                    "q",
         | 
| 725 | 
            +
                    "è",
         | 
| 726 | 
            +
                    "à",
         | 
| 727 | 
            +
                    "k",
         | 
| 728 | 
            +
                    "y",
         | 
| 729 | 
            +
                    "ò",
         | 
| 730 | 
            +
                ],
         | 
| 731 | 
            +
                "Polish": [
         | 
| 732 | 
            +
                    "a",
         | 
| 733 | 
            +
                    "i",
         | 
| 734 | 
            +
                    "o",
         | 
| 735 | 
            +
                    "e",
         | 
| 736 | 
            +
                    "n",
         | 
| 737 | 
            +
                    "r",
         | 
| 738 | 
            +
                    "z",
         | 
| 739 | 
            +
                    "w",
         | 
| 740 | 
            +
                    "s",
         | 
| 741 | 
            +
                    "c",
         | 
| 742 | 
            +
                    "t",
         | 
| 743 | 
            +
                    "k",
         | 
| 744 | 
            +
                    "y",
         | 
| 745 | 
            +
                    "d",
         | 
| 746 | 
            +
                    "p",
         | 
| 747 | 
            +
                    "m",
         | 
| 748 | 
            +
                    "u",
         | 
| 749 | 
            +
                    "l",
         | 
| 750 | 
            +
                    "j",
         | 
| 751 | 
            +
                    "ł",
         | 
| 752 | 
            +
                    "g",
         | 
| 753 | 
            +
                    "b",
         | 
| 754 | 
            +
                    "h",
         | 
| 755 | 
            +
                    "ą",
         | 
| 756 | 
            +
                    "ę",
         | 
| 757 | 
            +
                    "ó",
         | 
| 758 | 
            +
                ],
         | 
| 759 | 
            +
                "Spanish": [
         | 
| 760 | 
            +
                    "e",
         | 
| 761 | 
            +
                    "a",
         | 
| 762 | 
            +
                    "o",
         | 
| 763 | 
            +
                    "n",
         | 
| 764 | 
            +
                    "s",
         | 
| 765 | 
            +
                    "r",
         | 
| 766 | 
            +
                    "i",
         | 
| 767 | 
            +
                    "l",
         | 
| 768 | 
            +
                    "d",
         | 
| 769 | 
            +
                    "t",
         | 
| 770 | 
            +
                    "c",
         | 
| 771 | 
            +
                    "u",
         | 
| 772 | 
            +
                    "m",
         | 
| 773 | 
            +
                    "p",
         | 
| 774 | 
            +
                    "b",
         | 
| 775 | 
            +
                    "g",
         | 
| 776 | 
            +
                    "v",
         | 
| 777 | 
            +
                    "f",
         | 
| 778 | 
            +
                    "y",
         | 
| 779 | 
            +
                    "ó",
         | 
| 780 | 
            +
                    "h",
         | 
| 781 | 
            +
                    "q",
         | 
| 782 | 
            +
                    "í",
         | 
| 783 | 
            +
                    "j",
         | 
| 784 | 
            +
                    "z",
         | 
| 785 | 
            +
                    "á",
         | 
| 786 | 
            +
                ],
         | 
| 787 | 
            +
                "Russian": [
         | 
| 788 | 
            +
                    "о",
         | 
| 789 | 
            +
                    "а",
         | 
| 790 | 
            +
                    "е",
         | 
| 791 | 
            +
                    "и",
         | 
| 792 | 
            +
                    "н",
         | 
| 793 | 
            +
                    "с",
         | 
| 794 | 
            +
                    "т",
         | 
| 795 | 
            +
                    "р",
         | 
| 796 | 
            +
                    "в",
         | 
| 797 | 
            +
                    "л",
         | 
| 798 | 
            +
                    "к",
         | 
| 799 | 
            +
                    "м",
         | 
| 800 | 
            +
                    "д",
         | 
| 801 | 
            +
                    "п",
         | 
| 802 | 
            +
                    "у",
         | 
| 803 | 
            +
                    "г",
         | 
| 804 | 
            +
                    "я",
         | 
| 805 | 
            +
                    "ы",
         | 
| 806 | 
            +
                    "з",
         | 
| 807 | 
            +
                    "б",
         | 
| 808 | 
            +
                    "й",
         | 
| 809 | 
            +
                    "ь",
         | 
| 810 | 
            +
                    "ч",
         | 
| 811 | 
            +
                    "х",
         | 
| 812 | 
            +
                    "ж",
         | 
| 813 | 
            +
                    "ц",
         | 
| 814 | 
            +
                ],
         | 
| 815 | 
            +
                # Jap-Kanji
         | 
| 816 | 
            +
                "Japanese": [
         | 
| 817 | 
            +
                    "人",
         | 
| 818 | 
            +
                    "一",
         | 
| 819 | 
            +
                    "大",
         | 
| 820 | 
            +
                    "亅",
         | 
| 821 | 
            +
                    "丁",
         | 
| 822 | 
            +
                    "丨",
         | 
| 823 | 
            +
                    "竹",
         | 
| 824 | 
            +
                    "笑",
         | 
| 825 | 
            +
                    "口",
         | 
| 826 | 
            +
                    "日",
         | 
| 827 | 
            +
                    "今",
         | 
| 828 | 
            +
                    "二",
         | 
| 829 | 
            +
                    "彳",
         | 
| 830 | 
            +
                    "行",
         | 
| 831 | 
            +
                    "十",
         | 
| 832 | 
            +
                    "土",
         | 
| 833 | 
            +
                    "丶",
         | 
| 834 | 
            +
                    "寸",
         | 
| 835 | 
            +
                    "寺",
         | 
| 836 | 
            +
                    "時",
         | 
| 837 | 
            +
                    "乙",
         | 
| 838 | 
            +
                    "丿",
         | 
| 839 | 
            +
                    "乂",
         | 
| 840 | 
            +
                    "气",
         | 
| 841 | 
            +
                    "気",
         | 
| 842 | 
            +
                    "冂",
         | 
| 843 | 
            +
                    "巾",
         | 
| 844 | 
            +
                    "亠",
         | 
| 845 | 
            +
                    "市",
         | 
| 846 | 
            +
                    "目",
         | 
| 847 | 
            +
                    "儿",
         | 
| 848 | 
            +
                    "見",
         | 
| 849 | 
            +
                    "八",
         | 
| 850 | 
            +
                    "小",
         | 
| 851 | 
            +
                    "凵",
         | 
| 852 | 
            +
                    "県",
         | 
| 853 | 
            +
                    "月",
         | 
| 854 | 
            +
                    "彐",
         | 
| 855 | 
            +
                    "門",
         | 
| 856 | 
            +
                    "間",
         | 
| 857 | 
            +
                    "木",
         | 
| 858 | 
            +
                    "東",
         | 
| 859 | 
            +
                    "山",
         | 
| 860 | 
            +
                    "出",
         | 
| 861 | 
            +
                    "本",
         | 
| 862 | 
            +
                    "中",
         | 
| 863 | 
            +
                    "刀",
         | 
| 864 | 
            +
                    "分",
         | 
| 865 | 
            +
                    "耳",
         | 
| 866 | 
            +
                    "又",
         | 
| 867 | 
            +
                    "取",
         | 
| 868 | 
            +
                    "最",
         | 
| 869 | 
            +
                    "言",
         | 
| 870 | 
            +
                    "田",
         | 
| 871 | 
            +
                    "心",
         | 
| 872 | 
            +
                    "思",
         | 
| 873 | 
            +
                    "刂",
         | 
| 874 | 
            +
                    "前",
         | 
| 875 | 
            +
                    "京",
         | 
| 876 | 
            +
                    "尹",
         | 
| 877 | 
            +
                    "事",
         | 
| 878 | 
            +
                    "生",
         | 
| 879 | 
            +
                    "厶",
         | 
| 880 | 
            +
                    "云",
         | 
| 881 | 
            +
                    "会",
         | 
| 882 | 
            +
                    "未",
         | 
| 883 | 
            +
                    "来",
         | 
| 884 | 
            +
                    "白",
         | 
| 885 | 
            +
                    "冫",
         | 
| 886 | 
            +
                    "楽",
         | 
| 887 | 
            +
                    "灬",
         | 
| 888 | 
            +
                    "馬",
         | 
| 889 | 
            +
                    "尸",
         | 
| 890 | 
            +
                    "尺",
         | 
| 891 | 
            +
                    "駅",
         | 
| 892 | 
            +
                    "明",
         | 
| 893 | 
            +
                    "耂",
         | 
| 894 | 
            +
                    "者",
         | 
| 895 | 
            +
                    "了",
         | 
| 896 | 
            +
                    "阝",
         | 
| 897 | 
            +
                    "都",
         | 
| 898 | 
            +
                    "高",
         | 
| 899 | 
            +
                    "卜",
         | 
| 900 | 
            +
                    "占",
         | 
| 901 | 
            +
                    "厂",
         | 
| 902 | 
            +
                    "广",
         | 
| 903 | 
            +
                    "店",
         | 
| 904 | 
            +
                    "子",
         | 
| 905 | 
            +
                    "申",
         | 
| 906 | 
            +
                    "奄",
         | 
| 907 | 
            +
                    "亻",
         | 
| 908 | 
            +
                    "俺",
         | 
| 909 | 
            +
                    "上",
         | 
| 910 | 
            +
                    "方",
         | 
| 911 | 
            +
                    "冖",
         | 
| 912 | 
            +
                    "学",
         | 
| 913 | 
            +
                    "衣",
         | 
| 914 | 
            +
                    "艮",
         | 
| 915 | 
            +
                    "食",
         | 
| 916 | 
            +
                    "自",
         | 
| 917 | 
            +
                ],
         | 
| 918 | 
            +
                # Jap-Katakana
         | 
| 919 | 
            +
                "Japanese—": [
         | 
| 920 | 
            +
                    "ー",
         | 
| 921 | 
            +
                    "ン",
         | 
| 922 | 
            +
                    "ス",
         | 
| 923 | 
            +
                    "・",
         | 
| 924 | 
            +
                    "ル",
         | 
| 925 | 
            +
                    "ト",
         | 
| 926 | 
            +
                    "リ",
         | 
| 927 | 
            +
                    "イ",
         | 
| 928 | 
            +
                    "ア",
         | 
| 929 | 
            +
                    "ラ",
         | 
| 930 | 
            +
                    "ッ",
         | 
| 931 | 
            +
                    "ク",
         | 
| 932 | 
            +
                    "ド",
         | 
| 933 | 
            +
                    "シ",
         | 
| 934 | 
            +
                    "レ",
         | 
| 935 | 
            +
                    "ジ",
         | 
| 936 | 
            +
                    "タ",
         | 
| 937 | 
            +
                    "フ",
         | 
| 938 | 
            +
                    "ロ",
         | 
| 939 | 
            +
                    "カ",
         | 
| 940 | 
            +
                    "テ",
         | 
| 941 | 
            +
                    "マ",
         | 
| 942 | 
            +
                    "ィ",
         | 
| 943 | 
            +
                    "グ",
         | 
| 944 | 
            +
                    "バ",
         | 
| 945 | 
            +
                    "ム",
         | 
| 946 | 
            +
                    "プ",
         | 
| 947 | 
            +
                    "オ",
         | 
| 948 | 
            +
                    "コ",
         | 
| 949 | 
            +
                    "デ",
         | 
| 950 | 
            +
                    "ニ",
         | 
| 951 | 
            +
                    "ウ",
         | 
| 952 | 
            +
                    "メ",
         | 
| 953 | 
            +
                    "サ",
         | 
| 954 | 
            +
                    "ビ",
         | 
| 955 | 
            +
                    "ナ",
         | 
| 956 | 
            +
                    "ブ",
         | 
| 957 | 
            +
                    "ャ",
         | 
| 958 | 
            +
                    "エ",
         | 
| 959 | 
            +
                    "ュ",
         | 
| 960 | 
            +
                    "チ",
         | 
| 961 | 
            +
                    "キ",
         | 
| 962 | 
            +
                    "ズ",
         | 
| 963 | 
            +
                    "ダ",
         | 
| 964 | 
            +
                    "パ",
         | 
| 965 | 
            +
                    "ミ",
         | 
| 966 | 
            +
                    "ェ",
         | 
| 967 | 
            +
                    "ョ",
         | 
| 968 | 
            +
                    "ハ",
         | 
| 969 | 
            +
                    "セ",
         | 
| 970 | 
            +
                    "ベ",
         | 
| 971 | 
            +
                    "ガ",
         | 
| 972 | 
            +
                    "モ",
         | 
| 973 | 
            +
                    "ツ",
         | 
| 974 | 
            +
                    "ネ",
         | 
| 975 | 
            +
                    "ボ",
         | 
| 976 | 
            +
                    "ソ",
         | 
| 977 | 
            +
                    "ノ",
         | 
| 978 | 
            +
                    "ァ",
         | 
| 979 | 
            +
                    "ヴ",
         | 
| 980 | 
            +
                    "ワ",
         | 
| 981 | 
            +
                    "ポ",
         | 
| 982 | 
            +
                    "ペ",
         | 
| 983 | 
            +
                    "ピ",
         | 
| 984 | 
            +
                    "ケ",
         | 
| 985 | 
            +
                    "ゴ",
         | 
| 986 | 
            +
                    "ギ",
         | 
| 987 | 
            +
                    "ザ",
         | 
| 988 | 
            +
                    "ホ",
         | 
| 989 | 
            +
                    "ゲ",
         | 
| 990 | 
            +
                    "ォ",
         | 
| 991 | 
            +
                    "ヤ",
         | 
| 992 | 
            +
                    "ヒ",
         | 
| 993 | 
            +
                    "ユ",
         | 
| 994 | 
            +
                    "ヨ",
         | 
| 995 | 
            +
                    "ヘ",
         | 
| 996 | 
            +
                    "ゼ",
         | 
| 997 | 
            +
                    "ヌ",
         | 
| 998 | 
            +
                    "ゥ",
         | 
| 999 | 
            +
                    "ゾ",
         | 
| 1000 | 
            +
                    "ヶ",
         | 
| 1001 | 
            +
                    "ヂ",
         | 
| 1002 | 
            +
                    "ヲ",
         | 
| 1003 | 
            +
                    "ヅ",
         | 
| 1004 | 
            +
                    "ヵ",
         | 
| 1005 | 
            +
                    "ヱ",
         | 
| 1006 | 
            +
                    "ヰ",
         | 
| 1007 | 
            +
                    "ヮ",
         | 
| 1008 | 
            +
                    "ヽ",
         | 
| 1009 | 
            +
                    "゠",
         | 
| 1010 | 
            +
                    "ヾ",
         | 
| 1011 | 
            +
                    "ヷ",
         | 
| 1012 | 
            +
                    "ヿ",
         | 
| 1013 | 
            +
                    "ヸ",
         | 
| 1014 | 
            +
                    "ヹ",
         | 
| 1015 | 
            +
                    "ヺ",
         | 
| 1016 | 
            +
                ],
         | 
| 1017 | 
            +
                # Jap-Hiragana
         | 
| 1018 | 
            +
                "Japanese——": [
         | 
| 1019 | 
            +
                    "の",
         | 
| 1020 | 
            +
                    "に",
         | 
| 1021 | 
            +
                    "る",
         | 
| 1022 | 
            +
                    "た",
         | 
| 1023 | 
            +
                    "と",
         | 
| 1024 | 
            +
                    "は",
         | 
| 1025 | 
            +
                    "し",
         | 
| 1026 | 
            +
                    "い",
         | 
| 1027 | 
            +
                    "を",
         | 
| 1028 | 
            +
                    "で",
         | 
| 1029 | 
            +
                    "て",
         | 
| 1030 | 
            +
                    "が",
         | 
| 1031 | 
            +
                    "な",
         | 
| 1032 | 
            +
                    "れ",
         | 
| 1033 | 
            +
                    "か",
         | 
| 1034 | 
            +
                    "ら",
         | 
| 1035 | 
            +
                    "さ",
         | 
| 1036 | 
            +
                    "っ",
         | 
| 1037 | 
            +
                    "り",
         | 
| 1038 | 
            +
                    "す",
         | 
| 1039 | 
            +
                    "あ",
         | 
| 1040 | 
            +
                    "も",
         | 
| 1041 | 
            +
                    "こ",
         | 
| 1042 | 
            +
                    "ま",
         | 
| 1043 | 
            +
                    "う",
         | 
| 1044 | 
            +
                    "く",
         | 
| 1045 | 
            +
                    "よ",
         | 
| 1046 | 
            +
                    "き",
         | 
| 1047 | 
            +
                    "ん",
         | 
| 1048 | 
            +
                    "め",
         | 
| 1049 | 
            +
                    "お",
         | 
| 1050 | 
            +
                    "け",
         | 
| 1051 | 
            +
                    "そ",
         | 
| 1052 | 
            +
                    "つ",
         | 
| 1053 | 
            +
                    "だ",
         | 
| 1054 | 
            +
                    "や",
         | 
| 1055 | 
            +
                    "え",
         | 
| 1056 | 
            +
                    "ど",
         | 
| 1057 | 
            +
                    "わ",
         | 
| 1058 | 
            +
                    "ち",
         | 
| 1059 | 
            +
                    "み",
         | 
| 1060 | 
            +
                    "せ",
         | 
| 1061 | 
            +
                    "じ",
         | 
| 1062 | 
            +
                    "ば",
         | 
| 1063 | 
            +
                    "へ",
         | 
| 1064 | 
            +
                    "び",
         | 
| 1065 | 
            +
                    "ず",
         | 
| 1066 | 
            +
                    "ろ",
         | 
| 1067 | 
            +
                    "ほ",
         | 
| 1068 | 
            +
                    "げ",
         | 
| 1069 | 
            +
                    "む",
         | 
| 1070 | 
            +
                    "べ",
         | 
| 1071 | 
            +
                    "ひ",
         | 
| 1072 | 
            +
                    "ょ",
         | 
| 1073 | 
            +
                    "ゆ",
         | 
| 1074 | 
            +
                    "ぶ",
         | 
| 1075 | 
            +
                    "ご",
         | 
| 1076 | 
            +
                    "ゃ",
         | 
| 1077 | 
            +
                    "ね",
         | 
| 1078 | 
            +
                    "ふ",
         | 
| 1079 | 
            +
                    "ぐ",
         | 
| 1080 | 
            +
                    "ぎ",
         | 
| 1081 | 
            +
                    "ぼ",
         | 
| 1082 | 
            +
                    "ゅ",
         | 
| 1083 | 
            +
                    "づ",
         | 
| 1084 | 
            +
                    "ざ",
         | 
| 1085 | 
            +
                    "ぞ",
         | 
| 1086 | 
            +
                    "ぬ",
         | 
| 1087 | 
            +
                    "ぜ",
         | 
| 1088 | 
            +
                    "ぱ",
         | 
| 1089 | 
            +
                    "ぽ",
         | 
| 1090 | 
            +
                    "ぷ",
         | 
| 1091 | 
            +
                    "ぴ",
         | 
| 1092 | 
            +
                    "ぃ",
         | 
| 1093 | 
            +
                    "ぁ",
         | 
| 1094 | 
            +
                    "ぇ",
         | 
| 1095 | 
            +
                    "ぺ",
         | 
| 1096 | 
            +
                    "ゞ",
         | 
| 1097 | 
            +
                    "ぢ",
         | 
| 1098 | 
            +
                    "ぉ",
         | 
| 1099 | 
            +
                    "ぅ",
         | 
| 1100 | 
            +
                    "ゐ",
         | 
| 1101 | 
            +
                    "ゝ",
         | 
| 1102 | 
            +
                    "ゑ",
         | 
| 1103 | 
            +
                    "゛",
         | 
| 1104 | 
            +
                    "゜",
         | 
| 1105 | 
            +
                    "ゎ",
         | 
| 1106 | 
            +
                    "ゔ",
         | 
| 1107 | 
            +
                    "゚",
         | 
| 1108 | 
            +
                    "ゟ",
         | 
| 1109 | 
            +
                    "゙",
         | 
| 1110 | 
            +
                    "ゕ",
         | 
| 1111 | 
            +
                    "ゖ",
         | 
| 1112 | 
            +
                ],
         | 
| 1113 | 
            +
                "Portuguese": [
         | 
| 1114 | 
            +
                    "a",
         | 
| 1115 | 
            +
                    "e",
         | 
| 1116 | 
            +
                    "o",
         | 
| 1117 | 
            +
                    "s",
         | 
| 1118 | 
            +
                    "i",
         | 
| 1119 | 
            +
                    "r",
         | 
| 1120 | 
            +
                    "d",
         | 
| 1121 | 
            +
                    "n",
         | 
| 1122 | 
            +
                    "t",
         | 
| 1123 | 
            +
                    "m",
         | 
| 1124 | 
            +
                    "u",
         | 
| 1125 | 
            +
                    "c",
         | 
| 1126 | 
            +
                    "l",
         | 
| 1127 | 
            +
                    "p",
         | 
| 1128 | 
            +
                    "g",
         | 
| 1129 | 
            +
                    "v",
         | 
| 1130 | 
            +
                    "b",
         | 
| 1131 | 
            +
                    "f",
         | 
| 1132 | 
            +
                    "h",
         | 
| 1133 | 
            +
                    "ã",
         | 
| 1134 | 
            +
                    "q",
         | 
| 1135 | 
            +
                    "é",
         | 
| 1136 | 
            +
                    "ç",
         | 
| 1137 | 
            +
                    "á",
         | 
| 1138 | 
            +
                    "z",
         | 
| 1139 | 
            +
                    "í",
         | 
| 1140 | 
            +
                ],
         | 
| 1141 | 
            +
                "Swedish": [
         | 
| 1142 | 
            +
                    "e",
         | 
| 1143 | 
            +
                    "a",
         | 
| 1144 | 
            +
                    "n",
         | 
| 1145 | 
            +
                    "r",
         | 
| 1146 | 
            +
                    "t",
         | 
| 1147 | 
            +
                    "s",
         | 
| 1148 | 
            +
                    "i",
         | 
| 1149 | 
            +
                    "l",
         | 
| 1150 | 
            +
                    "d",
         | 
| 1151 | 
            +
                    "o",
         | 
| 1152 | 
            +
                    "m",
         | 
| 1153 | 
            +
                    "k",
         | 
| 1154 | 
            +
                    "g",
         | 
| 1155 | 
            +
                    "v",
         | 
| 1156 | 
            +
                    "h",
         | 
| 1157 | 
            +
                    "f",
         | 
| 1158 | 
            +
                    "u",
         | 
| 1159 | 
            +
                    "p",
         | 
| 1160 | 
            +
                    "ä",
         | 
| 1161 | 
            +
                    "c",
         | 
| 1162 | 
            +
                    "b",
         | 
| 1163 | 
            +
                    "ö",
         | 
| 1164 | 
            +
                    "å",
         | 
| 1165 | 
            +
                    "y",
         | 
| 1166 | 
            +
                    "j",
         | 
| 1167 | 
            +
                    "x",
         | 
| 1168 | 
            +
                ],
         | 
| 1169 | 
            +
                "Chinese": [
         | 
| 1170 | 
            +
                    "的",
         | 
| 1171 | 
            +
                    "一",
         | 
| 1172 | 
            +
                    "是",
         | 
| 1173 | 
            +
                    "不",
         | 
| 1174 | 
            +
                    "了",
         | 
| 1175 | 
            +
                    "在",
         | 
| 1176 | 
            +
                    "人",
         | 
| 1177 | 
            +
                    "有",
         | 
| 1178 | 
            +
                    "我",
         | 
| 1179 | 
            +
                    "他",
         | 
| 1180 | 
            +
                    "这",
         | 
| 1181 | 
            +
                    "个",
         | 
| 1182 | 
            +
                    "们",
         | 
| 1183 | 
            +
                    "中",
         | 
| 1184 | 
            +
                    "来",
         | 
| 1185 | 
            +
                    "上",
         | 
| 1186 | 
            +
                    "大",
         | 
| 1187 | 
            +
                    "为",
         | 
| 1188 | 
            +
                    "和",
         | 
| 1189 | 
            +
                    "国",
         | 
| 1190 | 
            +
                    "地",
         | 
| 1191 | 
            +
                    "到",
         | 
| 1192 | 
            +
                    "以",
         | 
| 1193 | 
            +
                    "说",
         | 
| 1194 | 
            +
                    "时",
         | 
| 1195 | 
            +
                    "要",
         | 
| 1196 | 
            +
                    "就",
         | 
| 1197 | 
            +
                    "出",
         | 
| 1198 | 
            +
                    "会",
         | 
| 1199 | 
            +
                    "可",
         | 
| 1200 | 
            +
                    "也",
         | 
| 1201 | 
            +
                    "你",
         | 
| 1202 | 
            +
                    "对",
         | 
| 1203 | 
            +
                    "生",
         | 
| 1204 | 
            +
                    "能",
         | 
| 1205 | 
            +
                    "而",
         | 
| 1206 | 
            +
                    "子",
         | 
| 1207 | 
            +
                    "那",
         | 
| 1208 | 
            +
                    "得",
         | 
| 1209 | 
            +
                    "于",
         | 
| 1210 | 
            +
                    "着",
         | 
| 1211 | 
            +
                    "下",
         | 
| 1212 | 
            +
                    "自",
         | 
| 1213 | 
            +
                    "之",
         | 
| 1214 | 
            +
                    "年",
         | 
| 1215 | 
            +
                    "过",
         | 
| 1216 | 
            +
                    "发",
         | 
| 1217 | 
            +
                    "后",
         | 
| 1218 | 
            +
                    "作",
         | 
| 1219 | 
            +
                    "里",
         | 
| 1220 | 
            +
                    "用",
         | 
| 1221 | 
            +
                    "道",
         | 
| 1222 | 
            +
                    "行",
         | 
| 1223 | 
            +
                    "所",
         | 
| 1224 | 
            +
                    "然",
         | 
| 1225 | 
            +
                    "家",
         | 
| 1226 | 
            +
                    "种",
         | 
| 1227 | 
            +
                    "事",
         | 
| 1228 | 
            +
                    "成",
         | 
| 1229 | 
            +
                    "方",
         | 
| 1230 | 
            +
                    "多",
         | 
| 1231 | 
            +
                    "经",
         | 
| 1232 | 
            +
                    "么",
         | 
| 1233 | 
            +
                    "去",
         | 
| 1234 | 
            +
                    "法",
         | 
| 1235 | 
            +
                    "学",
         | 
| 1236 | 
            +
                    "如",
         | 
| 1237 | 
            +
                    "都",
         | 
| 1238 | 
            +
                    "同",
         | 
| 1239 | 
            +
                    "现",
         | 
| 1240 | 
            +
                    "当",
         | 
| 1241 | 
            +
                    "没",
         | 
| 1242 | 
            +
                    "动",
         | 
| 1243 | 
            +
                    "面",
         | 
| 1244 | 
            +
                    "起",
         | 
| 1245 | 
            +
                    "看",
         | 
| 1246 | 
            +
                    "定",
         | 
| 1247 | 
            +
                    "天",
         | 
| 1248 | 
            +
                    "分",
         | 
| 1249 | 
            +
                    "还",
         | 
| 1250 | 
            +
                    "进",
         | 
| 1251 | 
            +
                    "好",
         | 
| 1252 | 
            +
                    "小",
         | 
| 1253 | 
            +
                    "部",
         | 
| 1254 | 
            +
                    "其",
         | 
| 1255 | 
            +
                    "些",
         | 
| 1256 | 
            +
                    "主",
         | 
| 1257 | 
            +
                    "样",
         | 
| 1258 | 
            +
                    "理",
         | 
| 1259 | 
            +
                    "心",
         | 
| 1260 | 
            +
                    "她",
         | 
| 1261 | 
            +
                    "本",
         | 
| 1262 | 
            +
                    "前",
         | 
| 1263 | 
            +
                    "开",
         | 
| 1264 | 
            +
                    "但",
         | 
| 1265 | 
            +
                    "因",
         | 
| 1266 | 
            +
                    "只",
         | 
| 1267 | 
            +
                    "从",
         | 
| 1268 | 
            +
                    "想",
         | 
| 1269 | 
            +
                    "实",
         | 
| 1270 | 
            +
                ],
         | 
| 1271 | 
            +
                "Ukrainian": [
         | 
| 1272 | 
            +
                    "о",
         | 
| 1273 | 
            +
                    "а",
         | 
| 1274 | 
            +
                    "н",
         | 
| 1275 | 
            +
                    "і",
         | 
| 1276 | 
            +
                    "и",
         | 
| 1277 | 
            +
                    "р",
         | 
| 1278 | 
            +
                    "в",
         | 
| 1279 | 
            +
                    "т",
         | 
| 1280 | 
            +
                    "е",
         | 
| 1281 | 
            +
                    "с",
         | 
| 1282 | 
            +
                    "к",
         | 
| 1283 | 
            +
                    "л",
         | 
| 1284 | 
            +
                    "у",
         | 
| 1285 | 
            +
                    "д",
         | 
| 1286 | 
            +
                    "м",
         | 
| 1287 | 
            +
                    "п",
         | 
| 1288 | 
            +
                    "з",
         | 
| 1289 | 
            +
                    "я",
         | 
| 1290 | 
            +
                    "ь",
         | 
| 1291 | 
            +
                    "б",
         | 
| 1292 | 
            +
                    "г",
         | 
| 1293 | 
            +
                    "й",
         | 
| 1294 | 
            +
                    "ч",
         | 
| 1295 | 
            +
                    "х",
         | 
| 1296 | 
            +
                    "ц",
         | 
| 1297 | 
            +
                    "ї",
         | 
| 1298 | 
            +
                ],
         | 
| 1299 | 
            +
                "Norwegian": [
         | 
| 1300 | 
            +
                    "e",
         | 
| 1301 | 
            +
                    "r",
         | 
| 1302 | 
            +
                    "n",
         | 
| 1303 | 
            +
                    "t",
         | 
| 1304 | 
            +
                    "a",
         | 
| 1305 | 
            +
                    "s",
         | 
| 1306 | 
            +
                    "i",
         | 
| 1307 | 
            +
                    "o",
         | 
| 1308 | 
            +
                    "l",
         | 
| 1309 | 
            +
                    "d",
         | 
| 1310 | 
            +
                    "g",
         | 
| 1311 | 
            +
                    "k",
         | 
| 1312 | 
            +
                    "m",
         | 
| 1313 | 
            +
                    "v",
         | 
| 1314 | 
            +
                    "f",
         | 
| 1315 | 
            +
                    "p",
         | 
| 1316 | 
            +
                    "u",
         | 
| 1317 | 
            +
                    "b",
         | 
| 1318 | 
            +
                    "h",
         | 
| 1319 | 
            +
                    "å",
         | 
| 1320 | 
            +
                    "y",
         | 
| 1321 | 
            +
                    "j",
         | 
| 1322 | 
            +
                    "ø",
         | 
| 1323 | 
            +
                    "c",
         | 
| 1324 | 
            +
                    "æ",
         | 
| 1325 | 
            +
                    "w",
         | 
| 1326 | 
            +
                ],
         | 
| 1327 | 
            +
                "Finnish": [
         | 
| 1328 | 
            +
                    "a",
         | 
| 1329 | 
            +
                    "i",
         | 
| 1330 | 
            +
                    "n",
         | 
| 1331 | 
            +
                    "t",
         | 
| 1332 | 
            +
                    "e",
         | 
| 1333 | 
            +
                    "s",
         | 
| 1334 | 
            +
                    "l",
         | 
| 1335 | 
            +
                    "o",
         | 
| 1336 | 
            +
                    "u",
         | 
| 1337 | 
            +
                    "k",
         | 
| 1338 | 
            +
                    "ä",
         | 
| 1339 | 
            +
                    "m",
         | 
| 1340 | 
            +
                    "r",
         | 
| 1341 | 
            +
                    "v",
         | 
| 1342 | 
            +
                    "j",
         | 
| 1343 | 
            +
                    "h",
         | 
| 1344 | 
            +
                    "p",
         | 
| 1345 | 
            +
                    "y",
         | 
| 1346 | 
            +
                    "d",
         | 
| 1347 | 
            +
                    "ö",
         | 
| 1348 | 
            +
                    "g",
         | 
| 1349 | 
            +
                    "c",
         | 
| 1350 | 
            +
                    "b",
         | 
| 1351 | 
            +
                    "f",
         | 
| 1352 | 
            +
                    "w",
         | 
| 1353 | 
            +
                    "z",
         | 
| 1354 | 
            +
                ],
         | 
| 1355 | 
            +
                "Vietnamese": [
         | 
| 1356 | 
            +
                    "n",
         | 
| 1357 | 
            +
                    "h",
         | 
| 1358 | 
            +
                    "t",
         | 
| 1359 | 
            +
                    "i",
         | 
| 1360 | 
            +
                    "c",
         | 
| 1361 | 
            +
                    "g",
         | 
| 1362 | 
            +
                    "a",
         | 
| 1363 | 
            +
                    "o",
         | 
| 1364 | 
            +
                    "u",
         | 
| 1365 | 
            +
                    "m",
         | 
| 1366 | 
            +
                    "l",
         | 
| 1367 | 
            +
                    "r",
         | 
| 1368 | 
            +
                    "à",
         | 
| 1369 | 
            +
                    "đ",
         | 
| 1370 | 
            +
                    "s",
         | 
| 1371 | 
            +
                    "e",
         | 
| 1372 | 
            +
                    "v",
         | 
| 1373 | 
            +
                    "p",
         | 
| 1374 | 
            +
                    "b",
         | 
| 1375 | 
            +
                    "y",
         | 
| 1376 | 
            +
                    "ư",
         | 
| 1377 | 
            +
                    "d",
         | 
| 1378 | 
            +
                    "á",
         | 
| 1379 | 
            +
                    "k",
         | 
| 1380 | 
            +
                    "ộ",
         | 
| 1381 | 
            +
                    "ế",
         | 
| 1382 | 
            +
                ],
         | 
| 1383 | 
            +
                "Czech": [
         | 
| 1384 | 
            +
                    "o",
         | 
| 1385 | 
            +
                    "e",
         | 
| 1386 | 
            +
                    "a",
         | 
| 1387 | 
            +
                    "n",
         | 
| 1388 | 
            +
                    "t",
         | 
| 1389 | 
            +
                    "s",
         | 
| 1390 | 
            +
                    "i",
         | 
| 1391 | 
            +
                    "l",
         | 
| 1392 | 
            +
                    "v",
         | 
| 1393 | 
            +
                    "r",
         | 
| 1394 | 
            +
                    "k",
         | 
| 1395 | 
            +
                    "d",
         | 
| 1396 | 
            +
                    "u",
         | 
| 1397 | 
            +
                    "m",
         | 
| 1398 | 
            +
                    "p",
         | 
| 1399 | 
            +
                    "í",
         | 
| 1400 | 
            +
                    "c",
         | 
| 1401 | 
            +
                    "h",
         | 
| 1402 | 
            +
                    "z",
         | 
| 1403 | 
            +
                    "á",
         | 
| 1404 | 
            +
                    "y",
         | 
| 1405 | 
            +
                    "j",
         | 
| 1406 | 
            +
                    "b",
         | 
| 1407 | 
            +
                    "ě",
         | 
| 1408 | 
            +
                    "é",
         | 
| 1409 | 
            +
                    "ř",
         | 
| 1410 | 
            +
                ],
         | 
| 1411 | 
            +
                "Hungarian": [
         | 
| 1412 | 
            +
                    "e",
         | 
| 1413 | 
            +
                    "a",
         | 
| 1414 | 
            +
                    "t",
         | 
| 1415 | 
            +
                    "l",
         | 
| 1416 | 
            +
                    "s",
         | 
| 1417 | 
            +
                    "n",
         | 
| 1418 | 
            +
                    "k",
         | 
| 1419 | 
            +
                    "r",
         | 
| 1420 | 
            +
                    "i",
         | 
| 1421 | 
            +
                    "o",
         | 
| 1422 | 
            +
                    "z",
         | 
| 1423 | 
            +
                    "á",
         | 
| 1424 | 
            +
                    "é",
         | 
| 1425 | 
            +
                    "g",
         | 
| 1426 | 
            +
                    "m",
         | 
| 1427 | 
            +
                    "b",
         | 
| 1428 | 
            +
                    "y",
         | 
| 1429 | 
            +
                    "v",
         | 
| 1430 | 
            +
                    "d",
         | 
| 1431 | 
            +
                    "h",
         | 
| 1432 | 
            +
                    "u",
         | 
| 1433 | 
            +
                    "p",
         | 
| 1434 | 
            +
                    "j",
         | 
| 1435 | 
            +
                    "ö",
         | 
| 1436 | 
            +
                    "f",
         | 
| 1437 | 
            +
                    "c",
         | 
| 1438 | 
            +
                ],
         | 
| 1439 | 
            +
                "Korean": [
         | 
| 1440 | 
            +
                    "이",
         | 
| 1441 | 
            +
                    "다",
         | 
| 1442 | 
            +
                    "에",
         | 
| 1443 | 
            +
                    "의",
         | 
| 1444 | 
            +
                    "는",
         | 
| 1445 | 
            +
                    "로",
         | 
| 1446 | 
            +
                    "하",
         | 
| 1447 | 
            +
                    "을",
         | 
| 1448 | 
            +
                    "가",
         | 
| 1449 | 
            +
                    "고",
         | 
| 1450 | 
            +
                    "지",
         | 
| 1451 | 
            +
                    "서",
         | 
| 1452 | 
            +
                    "한",
         | 
| 1453 | 
            +
                    "은",
         | 
| 1454 | 
            +
                    "기",
         | 
| 1455 | 
            +
                    "으",
         | 
| 1456 | 
            +
                    "년",
         | 
| 1457 | 
            +
                    "대",
         | 
| 1458 | 
            +
                    "사",
         | 
| 1459 | 
            +
                    "시",
         | 
| 1460 | 
            +
                    "를",
         | 
| 1461 | 
            +
                    "리",
         | 
| 1462 | 
            +
                    "도",
         | 
| 1463 | 
            +
                    "인",
         | 
| 1464 | 
            +
                    "스",
         | 
| 1465 | 
            +
                    "일",
         | 
| 1466 | 
            +
                ],
         | 
| 1467 | 
            +
                "Indonesian": [
         | 
| 1468 | 
            +
                    "a",
         | 
| 1469 | 
            +
                    "n",
         | 
| 1470 | 
            +
                    "e",
         | 
| 1471 | 
            +
                    "i",
         | 
| 1472 | 
            +
                    "r",
         | 
| 1473 | 
            +
                    "t",
         | 
| 1474 | 
            +
                    "u",
         | 
| 1475 | 
            +
                    "s",
         | 
| 1476 | 
            +
                    "d",
         | 
| 1477 | 
            +
                    "k",
         | 
| 1478 | 
            +
                    "m",
         | 
| 1479 | 
            +
                    "l",
         | 
| 1480 | 
            +
                    "g",
         | 
| 1481 | 
            +
                    "p",
         | 
| 1482 | 
            +
                    "b",
         | 
| 1483 | 
            +
                    "o",
         | 
| 1484 | 
            +
                    "h",
         | 
| 1485 | 
            +
                    "y",
         | 
| 1486 | 
            +
                    "j",
         | 
| 1487 | 
            +
                    "c",
         | 
| 1488 | 
            +
                    "w",
         | 
| 1489 | 
            +
                    "f",
         | 
| 1490 | 
            +
                    "v",
         | 
| 1491 | 
            +
                    "z",
         | 
| 1492 | 
            +
                    "x",
         | 
| 1493 | 
            +
                    "q",
         | 
| 1494 | 
            +
                ],
         | 
| 1495 | 
            +
                "Turkish": [
         | 
| 1496 | 
            +
                    "a",
         | 
| 1497 | 
            +
                    "e",
         | 
| 1498 | 
            +
                    "i",
         | 
| 1499 | 
            +
                    "n",
         | 
| 1500 | 
            +
                    "r",
         | 
| 1501 | 
            +
                    "l",
         | 
| 1502 | 
            +
                    "ı",
         | 
| 1503 | 
            +
                    "k",
         | 
| 1504 | 
            +
                    "d",
         | 
| 1505 | 
            +
                    "t",
         | 
| 1506 | 
            +
                    "s",
         | 
| 1507 | 
            +
                    "m",
         | 
| 1508 | 
            +
                    "y",
         | 
| 1509 | 
            +
                    "u",
         | 
| 1510 | 
            +
                    "o",
         | 
| 1511 | 
            +
                    "b",
         | 
| 1512 | 
            +
                    "ü",
         | 
| 1513 | 
            +
                    "ş",
         | 
| 1514 | 
            +
                    "v",
         | 
| 1515 | 
            +
                    "g",
         | 
| 1516 | 
            +
                    "z",
         | 
| 1517 | 
            +
                    "h",
         | 
| 1518 | 
            +
                    "c",
         | 
| 1519 | 
            +
                    "p",
         | 
| 1520 | 
            +
                    "ç",
         | 
| 1521 | 
            +
                    "ğ",
         | 
| 1522 | 
            +
                ],
         | 
| 1523 | 
            +
                "Romanian": [
         | 
| 1524 | 
            +
                    "e",
         | 
| 1525 | 
            +
                    "i",
         | 
| 1526 | 
            +
                    "a",
         | 
| 1527 | 
            +
                    "r",
         | 
| 1528 | 
            +
                    "n",
         | 
| 1529 | 
            +
                    "t",
         | 
| 1530 | 
            +
                    "u",
         | 
| 1531 | 
            +
                    "l",
         | 
| 1532 | 
            +
                    "o",
         | 
| 1533 | 
            +
                    "c",
         | 
| 1534 | 
            +
                    "s",
         | 
| 1535 | 
            +
                    "d",
         | 
| 1536 | 
            +
                    "p",
         | 
| 1537 | 
            +
                    "m",
         | 
| 1538 | 
            +
                    "ă",
         | 
| 1539 | 
            +
                    "f",
         | 
| 1540 | 
            +
                    "v",
         | 
| 1541 | 
            +
                    "î",
         | 
| 1542 | 
            +
                    "g",
         | 
| 1543 | 
            +
                    "b",
         | 
| 1544 | 
            +
                    "ș",
         | 
| 1545 | 
            +
                    "ț",
         | 
| 1546 | 
            +
                    "z",
         | 
| 1547 | 
            +
                    "h",
         | 
| 1548 | 
            +
                    "â",
         | 
| 1549 | 
            +
                    "j",
         | 
| 1550 | 
            +
                ],
         | 
| 1551 | 
            +
                "Farsi": [
         | 
| 1552 | 
            +
                    "ا",
         | 
| 1553 | 
            +
                    "ی",
         | 
| 1554 | 
            +
                    "ر",
         | 
| 1555 | 
            +
                    "د",
         | 
| 1556 | 
            +
                    "ن",
         | 
| 1557 | 
            +
                    "ه",
         | 
| 1558 | 
            +
                    "و",
         | 
| 1559 | 
            +
                    "م",
         | 
| 1560 | 
            +
                    "ت",
         | 
| 1561 | 
            +
                    "ب",
         | 
| 1562 | 
            +
                    "س",
         | 
| 1563 | 
            +
                    "ل",
         | 
| 1564 | 
            +
                    "ک",
         | 
| 1565 | 
            +
                    "ش",
         | 
| 1566 | 
            +
                    "ز",
         | 
| 1567 | 
            +
                    "ف",
         | 
| 1568 | 
            +
                    "گ",
         | 
| 1569 | 
            +
                    "ع",
         | 
| 1570 | 
            +
                    "خ",
         | 
| 1571 | 
            +
                    "ق",
         | 
| 1572 | 
            +
                    "ج",
         | 
| 1573 | 
            +
                    "آ",
         | 
| 1574 | 
            +
                    "پ",
         | 
| 1575 | 
            +
                    "ح",
         | 
| 1576 | 
            +
                    "ط",
         | 
| 1577 | 
            +
                    "ص",
         | 
| 1578 | 
            +
                ],
         | 
| 1579 | 
            +
                "Arabic": [
         | 
| 1580 | 
            +
                    "ا",
         | 
| 1581 | 
            +
                    "ل",
         | 
| 1582 | 
            +
                    "ي",
         | 
| 1583 | 
            +
                    "م",
         | 
| 1584 | 
            +
                    "و",
         | 
| 1585 | 
            +
                    "ن",
         | 
| 1586 | 
            +
                    "ر",
         | 
| 1587 | 
            +
                    "ت",
         | 
| 1588 | 
            +
                    "ب",
         | 
| 1589 | 
            +
                    "ة",
         | 
| 1590 | 
            +
                    "ع",
         | 
| 1591 | 
            +
                    "د",
         | 
| 1592 | 
            +
                    "س",
         | 
| 1593 | 
            +
                    "ف",
         | 
| 1594 | 
            +
                    "ه",
         | 
| 1595 | 
            +
                    "ك",
         | 
| 1596 | 
            +
                    "ق",
         | 
| 1597 | 
            +
                    "أ",
         | 
| 1598 | 
            +
                    "ح",
         | 
| 1599 | 
            +
                    "ج",
         | 
| 1600 | 
            +
                    "ش",
         | 
| 1601 | 
            +
                    "ط",
         | 
| 1602 | 
            +
                    "ص",
         | 
| 1603 | 
            +
                    "ى",
         | 
| 1604 | 
            +
                    "خ",
         | 
| 1605 | 
            +
                    "إ",
         | 
| 1606 | 
            +
                ],
         | 
| 1607 | 
            +
                "Danish": [
         | 
| 1608 | 
            +
                    "e",
         | 
| 1609 | 
            +
                    "r",
         | 
| 1610 | 
            +
                    "n",
         | 
| 1611 | 
            +
                    "t",
         | 
| 1612 | 
            +
                    "a",
         | 
| 1613 | 
            +
                    "i",
         | 
| 1614 | 
            +
                    "s",
         | 
| 1615 | 
            +
                    "d",
         | 
| 1616 | 
            +
                    "l",
         | 
| 1617 | 
            +
                    "o",
         | 
| 1618 | 
            +
                    "g",
         | 
| 1619 | 
            +
                    "m",
         | 
| 1620 | 
            +
                    "k",
         | 
| 1621 | 
            +
                    "f",
         | 
| 1622 | 
            +
                    "v",
         | 
| 1623 | 
            +
                    "u",
         | 
| 1624 | 
            +
                    "b",
         | 
| 1625 | 
            +
                    "h",
         | 
| 1626 | 
            +
                    "p",
         | 
| 1627 | 
            +
                    "å",
         | 
| 1628 | 
            +
                    "y",
         | 
| 1629 | 
            +
                    "ø",
         | 
| 1630 | 
            +
                    "æ",
         | 
| 1631 | 
            +
                    "c",
         | 
| 1632 | 
            +
                    "j",
         | 
| 1633 | 
            +
                    "w",
         | 
| 1634 | 
            +
                ],
         | 
| 1635 | 
            +
                "Serbian": [
         | 
| 1636 | 
            +
                    "а",
         | 
| 1637 | 
            +
                    "и",
         | 
| 1638 | 
            +
                    "о",
         | 
| 1639 | 
            +
                    "е",
         | 
| 1640 | 
            +
                    "н",
         | 
| 1641 | 
            +
                    "р",
         | 
| 1642 | 
            +
                    "с",
         | 
| 1643 | 
            +
                    "у",
         | 
| 1644 | 
            +
                    "т",
         | 
| 1645 | 
            +
                    "к",
         | 
| 1646 | 
            +
                    "ј",
         | 
| 1647 | 
            +
                    "в",
         | 
| 1648 | 
            +
                    "д",
         | 
| 1649 | 
            +
                    "м",
         | 
| 1650 | 
            +
                    "п",
         | 
| 1651 | 
            +
                    "л",
         | 
| 1652 | 
            +
                    "г",
         | 
| 1653 | 
            +
                    "з",
         | 
| 1654 | 
            +
                    "б",
         | 
| 1655 | 
            +
                    "a",
         | 
| 1656 | 
            +
                    "i",
         | 
| 1657 | 
            +
                    "e",
         | 
| 1658 | 
            +
                    "o",
         | 
| 1659 | 
            +
                    "n",
         | 
| 1660 | 
            +
                    "ц",
         | 
| 1661 | 
            +
                    "ш",
         | 
| 1662 | 
            +
                ],
         | 
| 1663 | 
            +
                "Lithuanian": [
         | 
| 1664 | 
            +
                    "i",
         | 
| 1665 | 
            +
                    "a",
         | 
| 1666 | 
            +
                    "s",
         | 
| 1667 | 
            +
                    "o",
         | 
| 1668 | 
            +
                    "r",
         | 
| 1669 | 
            +
                    "e",
         | 
| 1670 | 
            +
                    "t",
         | 
| 1671 | 
            +
                    "n",
         | 
| 1672 | 
            +
                    "u",
         | 
| 1673 | 
            +
                    "k",
         | 
| 1674 | 
            +
                    "m",
         | 
| 1675 | 
            +
                    "l",
         | 
| 1676 | 
            +
                    "p",
         | 
| 1677 | 
            +
                    "v",
         | 
| 1678 | 
            +
                    "d",
         | 
| 1679 | 
            +
                    "j",
         | 
| 1680 | 
            +
                    "g",
         | 
| 1681 | 
            +
                    "ė",
         | 
| 1682 | 
            +
                    "b",
         | 
| 1683 | 
            +
                    "y",
         | 
| 1684 | 
            +
                    "ų",
         | 
| 1685 | 
            +
                    "š",
         | 
| 1686 | 
            +
                    "ž",
         | 
| 1687 | 
            +
                    "c",
         | 
| 1688 | 
            +
                    "ą",
         | 
| 1689 | 
            +
                    "į",
         | 
| 1690 | 
            +
                ],
         | 
| 1691 | 
            +
                "Slovene": [
         | 
| 1692 | 
            +
                    "e",
         | 
| 1693 | 
            +
                    "a",
         | 
| 1694 | 
            +
                    "i",
         | 
| 1695 | 
            +
                    "o",
         | 
| 1696 | 
            +
                    "n",
         | 
| 1697 | 
            +
                    "r",
         | 
| 1698 | 
            +
                    "s",
         | 
| 1699 | 
            +
                    "l",
         | 
| 1700 | 
            +
                    "t",
         | 
| 1701 | 
            +
                    "j",
         | 
| 1702 | 
            +
                    "v",
         | 
| 1703 | 
            +
                    "k",
         | 
| 1704 | 
            +
                    "d",
         | 
| 1705 | 
            +
                    "p",
         | 
| 1706 | 
            +
                    "m",
         | 
| 1707 | 
            +
                    "u",
         | 
| 1708 | 
            +
                    "z",
         | 
| 1709 | 
            +
                    "b",
         | 
| 1710 | 
            +
                    "g",
         | 
| 1711 | 
            +
                    "h",
         | 
| 1712 | 
            +
                    "č",
         | 
| 1713 | 
            +
                    "c",
         | 
| 1714 | 
            +
                    "š",
         | 
| 1715 | 
            +
                    "ž",
         | 
| 1716 | 
            +
                    "f",
         | 
| 1717 | 
            +
                    "y",
         | 
| 1718 | 
            +
                ],
         | 
| 1719 | 
            +
                "Slovak": [
         | 
| 1720 | 
            +
                    "o",
         | 
| 1721 | 
            +
                    "a",
         | 
| 1722 | 
            +
                    "e",
         | 
| 1723 | 
            +
                    "n",
         | 
| 1724 | 
            +
                    "i",
         | 
| 1725 | 
            +
                    "r",
         | 
| 1726 | 
            +
                    "v",
         | 
| 1727 | 
            +
                    "t",
         | 
| 1728 | 
            +
                    "s",
         | 
| 1729 | 
            +
                    "l",
         | 
| 1730 | 
            +
                    "k",
         | 
| 1731 | 
            +
                    "d",
         | 
| 1732 | 
            +
                    "m",
         | 
| 1733 | 
            +
                    "p",
         | 
| 1734 | 
            +
                    "u",
         | 
| 1735 | 
            +
                    "c",
         | 
| 1736 | 
            +
                    "h",
         | 
| 1737 | 
            +
                    "j",
         | 
| 1738 | 
            +
                    "b",
         | 
| 1739 | 
            +
                    "z",
         | 
| 1740 | 
            +
                    "á",
         | 
| 1741 | 
            +
                    "y",
         | 
| 1742 | 
            +
                    "ý",
         | 
| 1743 | 
            +
                    "í",
         | 
| 1744 | 
            +
                    "č",
         | 
| 1745 | 
            +
                    "é",
         | 
| 1746 | 
            +
                ],
         | 
| 1747 | 
            +
                "Hebrew": [
         | 
| 1748 | 
            +
                    "י",
         | 
| 1749 | 
            +
                    "ו",
         | 
| 1750 | 
            +
                    "ה",
         | 
| 1751 | 
            +
                    "ל",
         | 
| 1752 | 
            +
                    "ר",
         | 
| 1753 | 
            +
                    "ב",
         | 
| 1754 | 
            +
                    "ת",
         | 
| 1755 | 
            +
                    "מ",
         | 
| 1756 | 
            +
                    "א",
         | 
| 1757 | 
            +
                    "ש",
         | 
| 1758 | 
            +
                    "נ",
         | 
| 1759 | 
            +
                    "ע",
         | 
| 1760 | 
            +
                    "ם",
         | 
| 1761 | 
            +
                    "ד",
         | 
| 1762 | 
            +
                    "ק",
         | 
| 1763 | 
            +
                    "ח",
         | 
| 1764 | 
            +
                    "פ",
         | 
| 1765 | 
            +
                    "ס",
         | 
| 1766 | 
            +
                    "כ",
         | 
| 1767 | 
            +
                    "ג",
         | 
| 1768 | 
            +
                    "ט",
         | 
| 1769 | 
            +
                    "צ",
         | 
| 1770 | 
            +
                    "ן",
         | 
| 1771 | 
            +
                    "ז",
         | 
| 1772 | 
            +
                    "ך",
         | 
| 1773 | 
            +
                ],
         | 
| 1774 | 
            +
                "Bulgarian": [
         | 
| 1775 | 
            +
                    "а",
         | 
| 1776 | 
            +
                    "и",
         | 
| 1777 | 
            +
                    "о",
         | 
| 1778 | 
            +
                    "е",
         | 
| 1779 | 
            +
                    "н",
         | 
| 1780 | 
            +
                    "т",
         | 
| 1781 | 
            +
                    "р",
         | 
| 1782 | 
            +
                    "с",
         | 
| 1783 | 
            +
                    "в",
         | 
| 1784 | 
            +
                    "л",
         | 
| 1785 | 
            +
                    "к",
         | 
| 1786 | 
            +
                    "д",
         | 
| 1787 | 
            +
                    "п",
         | 
| 1788 | 
            +
                    "м",
         | 
| 1789 | 
            +
                    "з",
         | 
| 1790 | 
            +
                    "г",
         | 
| 1791 | 
            +
                    "я",
         | 
| 1792 | 
            +
                    "ъ",
         | 
| 1793 | 
            +
                    "у",
         | 
| 1794 | 
            +
                    "б",
         | 
| 1795 | 
            +
                    "ч",
         | 
| 1796 | 
            +
                    "ц",
         | 
| 1797 | 
            +
                    "й",
         | 
| 1798 | 
            +
                    "ж",
         | 
| 1799 | 
            +
                    "щ",
         | 
| 1800 | 
            +
                    "х",
         | 
| 1801 | 
            +
                ],
         | 
| 1802 | 
            +
                "Croatian": [
         | 
| 1803 | 
            +
                    "a",
         | 
| 1804 | 
            +
                    "i",
         | 
| 1805 | 
            +
                    "o",
         | 
| 1806 | 
            +
                    "e",
         | 
| 1807 | 
            +
                    "n",
         | 
| 1808 | 
            +
                    "r",
         | 
| 1809 | 
            +
                    "j",
         | 
| 1810 | 
            +
                    "s",
         | 
| 1811 | 
            +
                    "t",
         | 
| 1812 | 
            +
                    "u",
         | 
| 1813 | 
            +
                    "k",
         | 
| 1814 | 
            +
                    "l",
         | 
| 1815 | 
            +
                    "v",
         | 
| 1816 | 
            +
                    "d",
         | 
| 1817 | 
            +
                    "m",
         | 
| 1818 | 
            +
                    "p",
         | 
| 1819 | 
            +
                    "g",
         | 
| 1820 | 
            +
                    "z",
         | 
| 1821 | 
            +
                    "b",
         | 
| 1822 | 
            +
                    "c",
         | 
| 1823 | 
            +
                    "č",
         | 
| 1824 | 
            +
                    "h",
         | 
| 1825 | 
            +
                    "š",
         | 
| 1826 | 
            +
                    "ž",
         | 
| 1827 | 
            +
                    "ć",
         | 
| 1828 | 
            +
                    "f",
         | 
| 1829 | 
            +
                ],
         | 
| 1830 | 
            +
                "Hindi": [
         | 
| 1831 | 
            +
                    "क",
         | 
| 1832 | 
            +
                    "र",
         | 
| 1833 | 
            +
                    "स",
         | 
| 1834 | 
            +
                    "न",
         | 
| 1835 | 
            +
                    "त",
         | 
| 1836 | 
            +
                    "म",
         | 
| 1837 | 
            +
                    "ह",
         | 
| 1838 | 
            +
                    "प",
         | 
| 1839 | 
            +
                    "य",
         | 
| 1840 | 
            +
                    "ल",
         | 
| 1841 | 
            +
                    "व",
         | 
| 1842 | 
            +
                    "ज",
         | 
| 1843 | 
            +
                    "द",
         | 
| 1844 | 
            +
                    "ग",
         | 
| 1845 | 
            +
                    "ब",
         | 
| 1846 | 
            +
                    "श",
         | 
| 1847 | 
            +
                    "ट",
         | 
| 1848 | 
            +
                    "अ",
         | 
| 1849 | 
            +
                    "ए",
         | 
| 1850 | 
            +
                    "थ",
         | 
| 1851 | 
            +
                    "भ",
         | 
| 1852 | 
            +
                    "ड",
         | 
| 1853 | 
            +
                    "च",
         | 
| 1854 | 
            +
                    "ध",
         | 
| 1855 | 
            +
                    "ष",
         | 
| 1856 | 
            +
                    "इ",
         | 
| 1857 | 
            +
                ],
         | 
| 1858 | 
            +
                "Estonian": [
         | 
| 1859 | 
            +
                    "a",
         | 
| 1860 | 
            +
                    "i",
         | 
| 1861 | 
            +
                    "e",
         | 
| 1862 | 
            +
                    "s",
         | 
| 1863 | 
            +
                    "t",
         | 
| 1864 | 
            +
                    "l",
         | 
| 1865 | 
            +
                    "u",
         | 
| 1866 | 
            +
                    "n",
         | 
| 1867 | 
            +
                    "o",
         | 
| 1868 | 
            +
                    "k",
         | 
| 1869 | 
            +
                    "r",
         | 
| 1870 | 
            +
                    "d",
         | 
| 1871 | 
            +
                    "m",
         | 
| 1872 | 
            +
                    "v",
         | 
| 1873 | 
            +
                    "g",
         | 
| 1874 | 
            +
                    "p",
         | 
| 1875 | 
            +
                    "j",
         | 
| 1876 | 
            +
                    "h",
         | 
| 1877 | 
            +
                    "ä",
         | 
| 1878 | 
            +
                    "b",
         | 
| 1879 | 
            +
                    "õ",
         | 
| 1880 | 
            +
                    "ü",
         | 
| 1881 | 
            +
                    "f",
         | 
| 1882 | 
            +
                    "c",
         | 
| 1883 | 
            +
                    "ö",
         | 
| 1884 | 
            +
                    "y",
         | 
| 1885 | 
            +
                ],
         | 
| 1886 | 
            +
                "Thai": [
         | 
| 1887 | 
            +
                    "า",
         | 
| 1888 | 
            +
                    "น",
         | 
| 1889 | 
            +
                    "ร",
         | 
| 1890 | 
            +
                    "อ",
         | 
| 1891 | 
            +
                    "ก",
         | 
| 1892 | 
            +
                    "เ",
         | 
| 1893 | 
            +
                    "ง",
         | 
| 1894 | 
            +
                    "ม",
         | 
| 1895 | 
            +
                    "ย",
         | 
| 1896 | 
            +
                    "ล",
         | 
| 1897 | 
            +
                    "ว",
         | 
| 1898 | 
            +
                    "ด",
         | 
| 1899 | 
            +
                    "ท",
         | 
| 1900 | 
            +
                    "ส",
         | 
| 1901 | 
            +
                    "ต",
         | 
| 1902 | 
            +
                    "ะ",
         | 
| 1903 | 
            +
                    "ป",
         | 
| 1904 | 
            +
                    "บ",
         | 
| 1905 | 
            +
                    "ค",
         | 
| 1906 | 
            +
                    "ห",
         | 
| 1907 | 
            +
                    "แ",
         | 
| 1908 | 
            +
                    "จ",
         | 
| 1909 | 
            +
                    "พ",
         | 
| 1910 | 
            +
                    "ช",
         | 
| 1911 | 
            +
                    "ข",
         | 
| 1912 | 
            +
                    "ใ",
         | 
| 1913 | 
            +
                ],
         | 
| 1914 | 
            +
                "Greek": [
         | 
| 1915 | 
            +
                    "α",
         | 
| 1916 | 
            +
                    "τ",
         | 
| 1917 | 
            +
                    "ο",
         | 
| 1918 | 
            +
                    "ι",
         | 
| 1919 | 
            +
                    "ε",
         | 
| 1920 | 
            +
                    "ν",
         | 
| 1921 | 
            +
                    "ρ",
         | 
| 1922 | 
            +
                    "σ",
         | 
| 1923 | 
            +
                    "κ",
         | 
| 1924 | 
            +
                    "η",
         | 
| 1925 | 
            +
                    "π",
         | 
| 1926 | 
            +
                    "ς",
         | 
| 1927 | 
            +
                    "υ",
         | 
| 1928 | 
            +
                    "μ",
         | 
| 1929 | 
            +
                    "λ",
         | 
| 1930 | 
            +
                    "ί",
         | 
| 1931 | 
            +
                    "ό",
         | 
| 1932 | 
            +
                    "ά",
         | 
| 1933 | 
            +
                    "γ",
         | 
| 1934 | 
            +
                    "έ",
         | 
| 1935 | 
            +
                    "δ",
         | 
| 1936 | 
            +
                    "ή",
         | 
| 1937 | 
            +
                    "ω",
         | 
| 1938 | 
            +
                    "χ",
         | 
| 1939 | 
            +
                    "θ",
         | 
| 1940 | 
            +
                    "ύ",
         | 
| 1941 | 
            +
                ],
         | 
| 1942 | 
            +
                "Tamil": [
         | 
| 1943 | 
            +
                    "க",
         | 
| 1944 | 
            +
                    "த",
         | 
| 1945 | 
            +
                    "ப",
         | 
| 1946 | 
            +
                    "ட",
         | 
| 1947 | 
            +
                    "ர",
         | 
| 1948 | 
            +
                    "ம",
         | 
| 1949 | 
            +
                    "ல",
         | 
| 1950 | 
            +
                    "ன",
         | 
| 1951 | 
            +
                    "வ",
         | 
| 1952 | 
            +
                    "ற",
         | 
| 1953 | 
            +
                    "ய",
         | 
| 1954 | 
            +
                    "ள",
         | 
| 1955 | 
            +
                    "ச",
         | 
| 1956 | 
            +
                    "ந",
         | 
| 1957 | 
            +
                    "இ",
         | 
| 1958 | 
            +
                    "ண",
         | 
| 1959 | 
            +
                    "அ",
         | 
| 1960 | 
            +
                    "ஆ",
         | 
| 1961 | 
            +
                    "ழ",
         | 
| 1962 | 
            +
                    "ங",
         | 
| 1963 | 
            +
                    "எ",
         | 
| 1964 | 
            +
                    "உ",
         | 
| 1965 | 
            +
                    "ஒ",
         | 
| 1966 | 
            +
                    "ஸ",
         | 
| 1967 | 
            +
                ],
         | 
| 1968 | 
            +
                "Kazakh": [
         | 
| 1969 | 
            +
                    "а",
         | 
| 1970 | 
            +
                    "ы",
         | 
| 1971 | 
            +
                    "е",
         | 
| 1972 | 
            +
                    "н",
         | 
| 1973 | 
            +
                    "т",
         | 
| 1974 | 
            +
                    "р",
         | 
| 1975 | 
            +
                    "л",
         | 
| 1976 | 
            +
                    "і",
         | 
| 1977 | 
            +
                    "д",
         | 
| 1978 | 
            +
                    "с",
         | 
| 1979 | 
            +
                    "м",
         | 
| 1980 | 
            +
                    "қ",
         | 
| 1981 | 
            +
                    "к",
         | 
| 1982 | 
            +
                    "о",
         | 
| 1983 | 
            +
                    "б",
         | 
| 1984 | 
            +
                    "и",
         | 
| 1985 | 
            +
                    "у",
         | 
| 1986 | 
            +
                    "ғ",
         | 
| 1987 | 
            +
                    "ж",
         | 
| 1988 | 
            +
                    "ң",
         | 
| 1989 | 
            +
                    "з",
         | 
| 1990 | 
            +
                    "ш",
         | 
| 1991 | 
            +
                    "й",
         | 
| 1992 | 
            +
                    "п",
         | 
| 1993 | 
            +
                    "г",
         | 
| 1994 | 
            +
                    "ө",
         | 
| 1995 | 
            +
                ],
         | 
| 1996 | 
            +
            }
         | 
| 1997 | 
            +
             | 
| 1998 | 
            +
            LANGUAGE_SUPPORTED_COUNT: int = len(FREQUENCIES)
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer/legacy.py
    ADDED
    
    | @@ -0,0 +1,66 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from __future__ import annotations
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            from typing import TYPE_CHECKING, Any
         | 
| 4 | 
            +
            from warnings import warn
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            from .api import from_bytes
         | 
| 7 | 
            +
            from .constant import CHARDET_CORRESPONDENCE
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # TODO: remove this check when dropping Python 3.7 support
         | 
| 10 | 
            +
            if TYPE_CHECKING:
         | 
| 11 | 
            +
                from typing_extensions import TypedDict
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                class ResultDict(TypedDict):
         | 
| 14 | 
            +
                    encoding: str | None
         | 
| 15 | 
            +
                    language: str
         | 
| 16 | 
            +
                    confidence: float | None
         | 
| 17 | 
            +
             | 
| 18 | 
            +
             | 
| 19 | 
            +
            def detect(
         | 
| 20 | 
            +
                byte_str: bytes, should_rename_legacy: bool = False, **kwargs: Any
         | 
| 21 | 
            +
            ) -> ResultDict:
         | 
| 22 | 
            +
                """
         | 
| 23 | 
            +
                chardet legacy method
         | 
| 24 | 
            +
                Detect the encoding of the given byte string. It should be mostly backward-compatible.
         | 
| 25 | 
            +
                Encoding name will match Chardet own writing whenever possible. (Not on encoding name unsupported by it)
         | 
| 26 | 
            +
                This function is deprecated and should be used to migrate your project easily, consult the documentation for
         | 
| 27 | 
            +
                further information. Not planned for removal.
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                :param byte_str:     The byte sequence to examine.
         | 
| 30 | 
            +
                :param should_rename_legacy:  Should we rename legacy encodings
         | 
| 31 | 
            +
                                              to their more modern equivalents?
         | 
| 32 | 
            +
                """
         | 
| 33 | 
            +
                if len(kwargs):
         | 
| 34 | 
            +
                    warn(
         | 
| 35 | 
            +
                        f"charset-normalizer disregard arguments '{','.join(list(kwargs.keys()))}' in legacy function detect()"
         | 
| 36 | 
            +
                    )
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                if not isinstance(byte_str, (bytearray, bytes)):
         | 
| 39 | 
            +
                    raise TypeError(  # pragma: nocover
         | 
| 40 | 
            +
                        "Expected object of type bytes or bytearray, got: " "{}".format(
         | 
| 41 | 
            +
                            type(byte_str)
         | 
| 42 | 
            +
                        )
         | 
| 43 | 
            +
                    )
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                if isinstance(byte_str, bytearray):
         | 
| 46 | 
            +
                    byte_str = bytes(byte_str)
         | 
| 47 | 
            +
             | 
| 48 | 
            +
                r = from_bytes(byte_str).best()
         | 
| 49 | 
            +
             | 
| 50 | 
            +
                encoding = r.encoding if r is not None else None
         | 
| 51 | 
            +
                language = r.language if r is not None and r.language != "Unknown" else ""
         | 
| 52 | 
            +
                confidence = 1.0 - r.chaos if r is not None else None
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                # Note: CharsetNormalizer does not return 'UTF-8-SIG' as the sig get stripped in the detection/normalization process
         | 
| 55 | 
            +
                # but chardet does return 'utf-8-sig' and it is a valid codec name.
         | 
| 56 | 
            +
                if r is not None and encoding == "utf_8" and r.bom:
         | 
| 57 | 
            +
                    encoding += "_sig"
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                if should_rename_legacy is False and encoding in CHARDET_CORRESPONDENCE:
         | 
| 60 | 
            +
                    encoding = CHARDET_CORRESPONDENCE[encoding]
         | 
| 61 | 
            +
             | 
| 62 | 
            +
                return {
         | 
| 63 | 
            +
                    "encoding": encoding,
         | 
| 64 | 
            +
                    "language": language,
         | 
| 65 | 
            +
                    "confidence": confidence,
         | 
| 66 | 
            +
                }
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer/md.cpython-313-darwin.so
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:994bd264cafab72dffc8b5faf1867904942bf754477ec7e18890964021413271
         | 
| 3 | 
            +
            size 115664
         | 
    	
        .venv/lib/python3.13/site-packages/charset_normalizer/md.py
    ADDED
    
    | @@ -0,0 +1,630 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from __future__ import annotations
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            from functools import lru_cache
         | 
| 4 | 
            +
            from logging import getLogger
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            from .constant import (
         | 
| 7 | 
            +
                COMMON_SAFE_ASCII_CHARACTERS,
         | 
| 8 | 
            +
                TRACE,
         | 
| 9 | 
            +
                UNICODE_SECONDARY_RANGE_KEYWORD,
         | 
| 10 | 
            +
            )
         | 
| 11 | 
            +
            from .utils import (
         | 
| 12 | 
            +
                is_accentuated,
         | 
| 13 | 
            +
                is_arabic,
         | 
| 14 | 
            +
                is_arabic_isolated_form,
         | 
| 15 | 
            +
                is_case_variable,
         | 
| 16 | 
            +
                is_cjk,
         | 
| 17 | 
            +
                is_emoticon,
         | 
| 18 | 
            +
                is_hangul,
         | 
| 19 | 
            +
                is_hiragana,
         | 
| 20 | 
            +
                is_katakana,
         | 
| 21 | 
            +
                is_latin,
         | 
| 22 | 
            +
                is_punctuation,
         | 
| 23 | 
            +
                is_separator,
         | 
| 24 | 
            +
                is_symbol,
         | 
| 25 | 
            +
                is_thai,
         | 
| 26 | 
            +
                is_unprintable,
         | 
| 27 | 
            +
                remove_accent,
         | 
| 28 | 
            +
                unicode_range,
         | 
| 29 | 
            +
            )
         | 
| 30 | 
            +
             | 
| 31 | 
            +
             | 
| 32 | 
            +
            class MessDetectorPlugin:
         | 
| 33 | 
            +
                """
         | 
| 34 | 
            +
                Base abstract class used for mess detection plugins.
         | 
| 35 | 
            +
                All detectors MUST extend and implement given methods.
         | 
| 36 | 
            +
                """
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                def eligible(self, character: str) -> bool:
         | 
| 39 | 
            +
                    """
         | 
| 40 | 
            +
                    Determine if given character should be fed in.
         | 
| 41 | 
            +
                    """
         | 
| 42 | 
            +
                    raise NotImplementedError  # pragma: nocover
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                def feed(self, character: str) -> None:
         | 
| 45 | 
            +
                    """
         | 
| 46 | 
            +
                    The main routine to be executed upon character.
         | 
| 47 | 
            +
                    Insert the logic in witch the text would be considered chaotic.
         | 
| 48 | 
            +
                    """
         | 
| 49 | 
            +
                    raise NotImplementedError  # pragma: nocover
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                def reset(self) -> None:  # pragma: no cover
         | 
| 52 | 
            +
                    """
         | 
| 53 | 
            +
                    Permit to reset the plugin to the initial state.
         | 
| 54 | 
            +
                    """
         | 
| 55 | 
            +
                    raise NotImplementedError
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                @property
         | 
| 58 | 
            +
                def ratio(self) -> float:
         | 
| 59 | 
            +
                    """
         | 
| 60 | 
            +
                    Compute the chaos ratio based on what your feed() has seen.
         | 
| 61 | 
            +
                    Must NOT be lower than 0.; No restriction gt 0.
         | 
| 62 | 
            +
                    """
         | 
| 63 | 
            +
                    raise NotImplementedError  # pragma: nocover
         | 
| 64 | 
            +
             | 
| 65 | 
            +
             | 
| 66 | 
            +
            class TooManySymbolOrPunctuationPlugin(MessDetectorPlugin):
         | 
| 67 | 
            +
                def __init__(self) -> None:
         | 
| 68 | 
            +
                    self._punctuation_count: int = 0
         | 
| 69 | 
            +
                    self._symbol_count: int = 0
         | 
| 70 | 
            +
                    self._character_count: int = 0
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                    self._last_printable_char: str | None = None
         | 
| 73 | 
            +
                    self._frenzy_symbol_in_word: bool = False
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                def eligible(self, character: str) -> bool:
         | 
| 76 | 
            +
                    return character.isprintable()
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                def feed(self, character: str) -> None:
         | 
| 79 | 
            +
                    self._character_count += 1
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                    if (
         | 
| 82 | 
            +
                        character != self._last_printable_char
         | 
| 83 | 
            +
                        and character not in COMMON_SAFE_ASCII_CHARACTERS
         | 
| 84 | 
            +
                    ):
         | 
| 85 | 
            +
                        if is_punctuation(character):
         | 
| 86 | 
            +
                            self._punctuation_count += 1
         | 
| 87 | 
            +
                        elif (
         | 
| 88 | 
            +
                            character.isdigit() is False
         | 
| 89 | 
            +
                            and is_symbol(character)
         | 
| 90 | 
            +
                            and is_emoticon(character) is False
         | 
| 91 | 
            +
                        ):
         | 
| 92 | 
            +
                            self._symbol_count += 2
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                    self._last_printable_char = character
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                def reset(self) -> None:  # Abstract
         | 
| 97 | 
            +
                    self._punctuation_count = 0
         | 
| 98 | 
            +
                    self._character_count = 0
         | 
| 99 | 
            +
                    self._symbol_count = 0
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                @property
         | 
| 102 | 
            +
                def ratio(self) -> float:
         | 
| 103 | 
            +
                    if self._character_count == 0:
         | 
| 104 | 
            +
                        return 0.0
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                    ratio_of_punctuation: float = (
         | 
| 107 | 
            +
                        self._punctuation_count + self._symbol_count
         | 
| 108 | 
            +
                    ) / self._character_count
         | 
| 109 | 
            +
             | 
| 110 | 
            +
                    return ratio_of_punctuation if ratio_of_punctuation >= 0.3 else 0.0
         | 
| 111 | 
            +
             | 
| 112 | 
            +
             | 
| 113 | 
            +
            class TooManyAccentuatedPlugin(MessDetectorPlugin):
         | 
| 114 | 
            +
                def __init__(self) -> None:
         | 
| 115 | 
            +
                    self._character_count: int = 0
         | 
| 116 | 
            +
                    self._accentuated_count: int = 0
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                def eligible(self, character: str) -> bool:
         | 
| 119 | 
            +
                    return character.isalpha()
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                def feed(self, character: str) -> None:
         | 
| 122 | 
            +
                    self._character_count += 1
         | 
| 123 | 
            +
             | 
| 124 | 
            +
                    if is_accentuated(character):
         | 
| 125 | 
            +
                        self._accentuated_count += 1
         | 
| 126 | 
            +
             | 
| 127 | 
            +
                def reset(self) -> None:  # Abstract
         | 
| 128 | 
            +
                    self._character_count = 0
         | 
| 129 | 
            +
                    self._accentuated_count = 0
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                @property
         | 
| 132 | 
            +
                def ratio(self) -> float:
         | 
| 133 | 
            +
                    if self._character_count < 8:
         | 
| 134 | 
            +
                        return 0.0
         | 
| 135 | 
            +
             | 
| 136 | 
            +
                    ratio_of_accentuation: float = self._accentuated_count / self._character_count
         | 
| 137 | 
            +
                    return ratio_of_accentuation if ratio_of_accentuation >= 0.35 else 0.0
         | 
| 138 | 
            +
             | 
| 139 | 
            +
             | 
| 140 | 
            +
            class UnprintablePlugin(MessDetectorPlugin):
         | 
| 141 | 
            +
                def __init__(self) -> None:
         | 
| 142 | 
            +
                    self._unprintable_count: int = 0
         | 
| 143 | 
            +
                    self._character_count: int = 0
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                def eligible(self, character: str) -> bool:
         | 
| 146 | 
            +
                    return True
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                def feed(self, character: str) -> None:
         | 
| 149 | 
            +
                    if is_unprintable(character):
         | 
| 150 | 
            +
                        self._unprintable_count += 1
         | 
| 151 | 
            +
                    self._character_count += 1
         | 
| 152 | 
            +
             | 
| 153 | 
            +
                def reset(self) -> None:  # Abstract
         | 
| 154 | 
            +
                    self._unprintable_count = 0
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                @property
         | 
| 157 | 
            +
                def ratio(self) -> float:
         | 
| 158 | 
            +
                    if self._character_count == 0:
         | 
| 159 | 
            +
                        return 0.0
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                    return (self._unprintable_count * 8) / self._character_count
         | 
| 162 | 
            +
             | 
| 163 | 
            +
             | 
| 164 | 
            +
            class SuspiciousDuplicateAccentPlugin(MessDetectorPlugin):
         | 
| 165 | 
            +
                def __init__(self) -> None:
         | 
| 166 | 
            +
                    self._successive_count: int = 0
         | 
| 167 | 
            +
                    self._character_count: int = 0
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                    self._last_latin_character: str | None = None
         | 
| 170 | 
            +
             | 
| 171 | 
            +
                def eligible(self, character: str) -> bool:
         | 
| 172 | 
            +
                    return character.isalpha() and is_latin(character)
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                def feed(self, character: str) -> None:
         | 
| 175 | 
            +
                    self._character_count += 1
         | 
| 176 | 
            +
                    if (
         | 
| 177 | 
            +
                        self._last_latin_character is not None
         | 
| 178 | 
            +
                        and is_accentuated(character)
         | 
| 179 | 
            +
                        and is_accentuated(self._last_latin_character)
         | 
| 180 | 
            +
                    ):
         | 
| 181 | 
            +
                        if character.isupper() and self._last_latin_character.isupper():
         | 
| 182 | 
            +
                            self._successive_count += 1
         | 
| 183 | 
            +
                        # Worse if its the same char duplicated with different accent.
         | 
| 184 | 
            +
                        if remove_accent(character) == remove_accent(self._last_latin_character):
         | 
| 185 | 
            +
                            self._successive_count += 1
         | 
| 186 | 
            +
                    self._last_latin_character = character
         | 
| 187 | 
            +
             | 
| 188 | 
            +
                def reset(self) -> None:  # Abstract
         | 
| 189 | 
            +
                    self._successive_count = 0
         | 
| 190 | 
            +
                    self._character_count = 0
         | 
| 191 | 
            +
                    self._last_latin_character = None
         | 
| 192 | 
            +
             | 
| 193 | 
            +
                @property
         | 
| 194 | 
            +
                def ratio(self) -> float:
         | 
| 195 | 
            +
                    if self._character_count == 0:
         | 
| 196 | 
            +
                        return 0.0
         | 
| 197 | 
            +
             | 
| 198 | 
            +
                    return (self._successive_count * 2) / self._character_count
         | 
| 199 | 
            +
             | 
| 200 | 
            +
             | 
| 201 | 
            +
            class SuspiciousRange(MessDetectorPlugin):
         | 
| 202 | 
            +
                def __init__(self) -> None:
         | 
| 203 | 
            +
                    self._suspicious_successive_range_count: int = 0
         | 
| 204 | 
            +
                    self._character_count: int = 0
         | 
| 205 | 
            +
                    self._last_printable_seen: str | None = None
         | 
| 206 | 
            +
             | 
| 207 | 
            +
                def eligible(self, character: str) -> bool:
         | 
| 208 | 
            +
                    return character.isprintable()
         | 
| 209 | 
            +
             | 
| 210 | 
            +
                def feed(self, character: str) -> None:
         | 
| 211 | 
            +
                    self._character_count += 1
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    if (
         | 
| 214 | 
            +
                        character.isspace()
         | 
| 215 | 
            +
                        or is_punctuation(character)
         | 
| 216 | 
            +
                        or character in COMMON_SAFE_ASCII_CHARACTERS
         | 
| 217 | 
            +
                    ):
         | 
| 218 | 
            +
                        self._last_printable_seen = None
         | 
| 219 | 
            +
                        return
         | 
| 220 | 
            +
             | 
| 221 | 
            +
                    if self._last_printable_seen is None:
         | 
| 222 | 
            +
                        self._last_printable_seen = character
         | 
| 223 | 
            +
                        return
         | 
| 224 | 
            +
             | 
| 225 | 
            +
                    unicode_range_a: str | None = unicode_range(self._last_printable_seen)
         | 
| 226 | 
            +
                    unicode_range_b: str | None = unicode_range(character)
         | 
| 227 | 
            +
             | 
| 228 | 
            +
                    if is_suspiciously_successive_range(unicode_range_a, unicode_range_b):
         | 
| 229 | 
            +
                        self._suspicious_successive_range_count += 1
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                    self._last_printable_seen = character
         | 
| 232 | 
            +
             | 
| 233 | 
            +
                def reset(self) -> None:  # Abstract
         | 
| 234 | 
            +
                    self._character_count = 0
         | 
| 235 | 
            +
                    self._suspicious_successive_range_count = 0
         | 
| 236 | 
            +
                    self._last_printable_seen = None
         | 
| 237 | 
            +
             | 
| 238 | 
            +
                @property
         | 
| 239 | 
            +
                def ratio(self) -> float:
         | 
| 240 | 
            +
                    if self._character_count <= 13:
         | 
| 241 | 
            +
                        return 0.0
         | 
| 242 | 
            +
             | 
| 243 | 
            +
                    ratio_of_suspicious_range_usage: float = (
         | 
| 244 | 
            +
                        self._suspicious_successive_range_count * 2
         | 
| 245 | 
            +
                    ) / self._character_count
         | 
| 246 | 
            +
             | 
| 247 | 
            +
                    return ratio_of_suspicious_range_usage
         | 
| 248 | 
            +
             | 
| 249 | 
            +
             | 
| 250 | 
            +
            class SuperWeirdWordPlugin(MessDetectorPlugin):
         | 
| 251 | 
            +
                def __init__(self) -> None:
         | 
| 252 | 
            +
                    self._word_count: int = 0
         | 
| 253 | 
            +
                    self._bad_word_count: int = 0
         | 
| 254 | 
            +
                    self._foreign_long_count: int = 0
         | 
| 255 | 
            +
             | 
| 256 | 
            +
                    self._is_current_word_bad: bool = False
         | 
| 257 | 
            +
                    self._foreign_long_watch: bool = False
         | 
| 258 | 
            +
             | 
| 259 | 
            +
                    self._character_count: int = 0
         | 
| 260 | 
            +
                    self._bad_character_count: int = 0
         | 
| 261 | 
            +
             | 
| 262 | 
            +
                    self._buffer: str = ""
         | 
| 263 | 
            +
                    self._buffer_accent_count: int = 0
         | 
| 264 | 
            +
                    self._buffer_glyph_count: int = 0
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                def eligible(self, character: str) -> bool:
         | 
| 267 | 
            +
                    return True
         | 
| 268 | 
            +
             | 
| 269 | 
            +
                def feed(self, character: str) -> None:
         | 
| 270 | 
            +
                    if character.isalpha():
         | 
| 271 | 
            +
                        self._buffer += character
         | 
| 272 | 
            +
                        if is_accentuated(character):
         | 
| 273 | 
            +
                            self._buffer_accent_count += 1
         | 
| 274 | 
            +
                        if (
         | 
| 275 | 
            +
                            self._foreign_long_watch is False
         | 
| 276 | 
            +
                            and (is_latin(character) is False or is_accentuated(character))
         | 
| 277 | 
            +
                            and is_cjk(character) is False
         | 
| 278 | 
            +
                            and is_hangul(character) is False
         | 
| 279 | 
            +
                            and is_katakana(character) is False
         | 
| 280 | 
            +
                            and is_hiragana(character) is False
         | 
| 281 | 
            +
                            and is_thai(character) is False
         | 
| 282 | 
            +
                        ):
         | 
| 283 | 
            +
                            self._foreign_long_watch = True
         | 
| 284 | 
            +
                        if (
         | 
| 285 | 
            +
                            is_cjk(character)
         | 
| 286 | 
            +
                            or is_hangul(character)
         | 
| 287 | 
            +
                            or is_katakana(character)
         | 
| 288 | 
            +
                            or is_hiragana(character)
         | 
| 289 | 
            +
                            or is_thai(character)
         | 
| 290 | 
            +
                        ):
         | 
| 291 | 
            +
                            self._buffer_glyph_count += 1
         | 
| 292 | 
            +
                        return
         | 
| 293 | 
            +
                    if not self._buffer:
         | 
| 294 | 
            +
                        return
         | 
| 295 | 
            +
                    if (
         | 
| 296 | 
            +
                        character.isspace() or is_punctuation(character) or is_separator(character)
         | 
| 297 | 
            +
                    ) and self._buffer:
         | 
| 298 | 
            +
                        self._word_count += 1
         | 
| 299 | 
            +
                        buffer_length: int = len(self._buffer)
         | 
| 300 | 
            +
             | 
| 301 | 
            +
                        self._character_count += buffer_length
         | 
| 302 | 
            +
             | 
| 303 | 
            +
                        if buffer_length >= 4:
         | 
| 304 | 
            +
                            if self._buffer_accent_count / buffer_length >= 0.5:
         | 
| 305 | 
            +
                                self._is_current_word_bad = True
         | 
| 306 | 
            +
                            # Word/Buffer ending with an upper case accentuated letter are so rare,
         | 
| 307 | 
            +
                            # that we will consider them all as suspicious. Same weight as foreign_long suspicious.
         | 
| 308 | 
            +
                            elif (
         | 
| 309 | 
            +
                                is_accentuated(self._buffer[-1])
         | 
| 310 | 
            +
                                and self._buffer[-1].isupper()
         | 
| 311 | 
            +
                                and all(_.isupper() for _ in self._buffer) is False
         | 
| 312 | 
            +
                            ):
         | 
| 313 | 
            +
                                self._foreign_long_count += 1
         | 
| 314 | 
            +
                                self._is_current_word_bad = True
         | 
| 315 | 
            +
                            elif self._buffer_glyph_count == 1:
         | 
| 316 | 
            +
                                self._is_current_word_bad = True
         | 
| 317 | 
            +
                                self._foreign_long_count += 1
         | 
| 318 | 
            +
                        if buffer_length >= 24 and self._foreign_long_watch:
         | 
| 319 | 
            +
                            camel_case_dst = [
         | 
| 320 | 
            +
                                i
         | 
| 321 | 
            +
                                for c, i in zip(self._buffer, range(0, buffer_length))
         | 
| 322 | 
            +
                                if c.isupper()
         | 
| 323 | 
            +
                            ]
         | 
| 324 | 
            +
                            probable_camel_cased: bool = False
         | 
| 325 | 
            +
             | 
| 326 | 
            +
                            if camel_case_dst and (len(camel_case_dst) / buffer_length <= 0.3):
         | 
| 327 | 
            +
                                probable_camel_cased = True
         | 
| 328 | 
            +
             | 
| 329 | 
            +
                            if not probable_camel_cased:
         | 
| 330 | 
            +
                                self._foreign_long_count += 1
         | 
| 331 | 
            +
                                self._is_current_word_bad = True
         | 
| 332 | 
            +
             | 
| 333 | 
            +
                        if self._is_current_word_bad:
         | 
| 334 | 
            +
                            self._bad_word_count += 1
         | 
| 335 | 
            +
                            self._bad_character_count += len(self._buffer)
         | 
| 336 | 
            +
                            self._is_current_word_bad = False
         | 
| 337 | 
            +
             | 
| 338 | 
            +
                        self._foreign_long_watch = False
         | 
| 339 | 
            +
                        self._buffer = ""
         | 
| 340 | 
            +
                        self._buffer_accent_count = 0
         | 
| 341 | 
            +
                        self._buffer_glyph_count = 0
         | 
| 342 | 
            +
                    elif (
         | 
| 343 | 
            +
                        character not in {"<", ">", "-", "=", "~", "|", "_"}
         | 
| 344 | 
            +
                        and character.isdigit() is False
         | 
| 345 | 
            +
                        and is_symbol(character)
         | 
| 346 | 
            +
                    ):
         | 
| 347 | 
            +
                        self._is_current_word_bad = True
         | 
| 348 | 
            +
                        self._buffer += character
         | 
| 349 | 
            +
             | 
| 350 | 
            +
                def reset(self) -> None:  # Abstract
         | 
| 351 | 
            +
                    self._buffer = ""
         | 
| 352 | 
            +
                    self._is_current_word_bad = False
         | 
| 353 | 
            +
                    self._foreign_long_watch = False
         | 
| 354 | 
            +
                    self._bad_word_count = 0
         | 
| 355 | 
            +
                    self._word_count = 0
         | 
| 356 | 
            +
                    self._character_count = 0
         | 
| 357 | 
            +
                    self._bad_character_count = 0
         | 
| 358 | 
            +
                    self._foreign_long_count = 0
         | 
| 359 | 
            +
             | 
| 360 | 
            +
                @property
         | 
| 361 | 
            +
                def ratio(self) -> float:
         | 
| 362 | 
            +
                    if self._word_count <= 10 and self._foreign_long_count == 0:
         | 
| 363 | 
            +
                        return 0.0
         | 
| 364 | 
            +
             | 
| 365 | 
            +
                    return self._bad_character_count / self._character_count
         | 
| 366 | 
            +
             | 
| 367 | 
            +
             | 
| 368 | 
            +
            class CjkInvalidStopPlugin(MessDetectorPlugin):
         | 
| 369 | 
            +
                """
         | 
| 370 | 
            +
                GB(Chinese) based encoding often render the stop incorrectly when the content does not fit and
         | 
| 371 | 
            +
                can be easily detected. Searching for the overuse of '丅' and '丄'.
         | 
| 372 | 
            +
                """
         | 
| 373 | 
            +
             | 
| 374 | 
            +
                def __init__(self) -> None:
         | 
| 375 | 
            +
                    self._wrong_stop_count: int = 0
         | 
| 376 | 
            +
                    self._cjk_character_count: int = 0
         | 
| 377 | 
            +
             | 
| 378 | 
            +
                def eligible(self, character: str) -> bool:
         | 
| 379 | 
            +
                    return True
         | 
| 380 | 
            +
             | 
| 381 | 
            +
                def feed(self, character: str) -> None:
         | 
| 382 | 
            +
                    if character in {"丅", "丄"}:
         | 
| 383 | 
            +
                        self._wrong_stop_count += 1
         | 
| 384 | 
            +
                        return
         | 
| 385 | 
            +
                    if is_cjk(character):
         | 
| 386 | 
            +
                        self._cjk_character_count += 1
         | 
| 387 | 
            +
             | 
| 388 | 
            +
                def reset(self) -> None:  # Abstract
         | 
| 389 | 
            +
                    self._wrong_stop_count = 0
         | 
| 390 | 
            +
                    self._cjk_character_count = 0
         | 
| 391 | 
            +
             | 
| 392 | 
            +
                @property
         | 
| 393 | 
            +
                def ratio(self) -> float:
         | 
| 394 | 
            +
                    if self._cjk_character_count < 16:
         | 
| 395 | 
            +
                        return 0.0
         | 
| 396 | 
            +
                    return self._wrong_stop_count / self._cjk_character_count
         | 
| 397 | 
            +
             | 
| 398 | 
            +
             | 
| 399 | 
            +
            class ArchaicUpperLowerPlugin(MessDetectorPlugin):
         | 
| 400 | 
            +
                def __init__(self) -> None:
         | 
| 401 | 
            +
                    self._buf: bool = False
         | 
| 402 | 
            +
             | 
| 403 | 
            +
                    self._character_count_since_last_sep: int = 0
         | 
| 404 | 
            +
             | 
| 405 | 
            +
                    self._successive_upper_lower_count: int = 0
         | 
| 406 | 
            +
                    self._successive_upper_lower_count_final: int = 0
         | 
| 407 | 
            +
             | 
| 408 | 
            +
                    self._character_count: int = 0
         | 
| 409 | 
            +
             | 
| 410 | 
            +
                    self._last_alpha_seen: str | None = None
         | 
| 411 | 
            +
                    self._current_ascii_only: bool = True
         | 
| 412 | 
            +
             | 
| 413 | 
            +
                def eligible(self, character: str) -> bool:
         | 
| 414 | 
            +
                    return True
         | 
| 415 | 
            +
             | 
| 416 | 
            +
                def feed(self, character: str) -> None:
         | 
| 417 | 
            +
                    is_concerned = character.isalpha() and is_case_variable(character)
         | 
| 418 | 
            +
                    chunk_sep = is_concerned is False
         | 
| 419 | 
            +
             | 
| 420 | 
            +
                    if chunk_sep and self._character_count_since_last_sep > 0:
         | 
| 421 | 
            +
                        if (
         | 
| 422 | 
            +
                            self._character_count_since_last_sep <= 64
         | 
| 423 | 
            +
                            and character.isdigit() is False
         | 
| 424 | 
            +
                            and self._current_ascii_only is False
         | 
| 425 | 
            +
                        ):
         | 
| 426 | 
            +
                            self._successive_upper_lower_count_final += (
         | 
| 427 | 
            +
                                self._successive_upper_lower_count
         | 
| 428 | 
            +
                            )
         | 
| 429 | 
            +
             | 
| 430 | 
            +
                        self._successive_upper_lower_count = 0
         | 
| 431 | 
            +
                        self._character_count_since_last_sep = 0
         | 
| 432 | 
            +
                        self._last_alpha_seen = None
         | 
| 433 | 
            +
                        self._buf = False
         | 
| 434 | 
            +
                        self._character_count += 1
         | 
| 435 | 
            +
                        self._current_ascii_only = True
         | 
| 436 | 
            +
             | 
| 437 | 
            +
                        return
         | 
| 438 | 
            +
             | 
| 439 | 
            +
                    if self._current_ascii_only is True and character.isascii() is False:
         | 
| 440 | 
            +
                        self._current_ascii_only = False
         | 
| 441 | 
            +
             | 
| 442 | 
            +
                    if self._last_alpha_seen is not None:
         | 
| 443 | 
            +
                        if (character.isupper() and self._last_alpha_seen.islower()) or (
         | 
| 444 | 
            +
                            character.islower() and self._last_alpha_seen.isupper()
         | 
| 445 | 
            +
                        ):
         | 
| 446 | 
            +
                            if self._buf is True:
         | 
| 447 | 
            +
                                self._successive_upper_lower_count += 2
         | 
| 448 | 
            +
                                self._buf = False
         | 
| 449 | 
            +
                            else:
         | 
| 450 | 
            +
                                self._buf = True
         | 
| 451 | 
            +
                        else:
         | 
| 452 | 
            +
                            self._buf = False
         | 
| 453 | 
            +
             | 
| 454 | 
            +
                    self._character_count += 1
         | 
| 455 | 
            +
                    self._character_count_since_last_sep += 1
         | 
| 456 | 
            +
                    self._last_alpha_seen = character
         | 
| 457 | 
            +
             | 
| 458 | 
            +
                def reset(self) -> None:  # Abstract
         | 
| 459 | 
            +
                    self._character_count = 0
         | 
| 460 | 
            +
                    self._character_count_since_last_sep = 0
         | 
| 461 | 
            +
                    self._successive_upper_lower_count = 0
         | 
| 462 | 
            +
                    self._successive_upper_lower_count_final = 0
         | 
| 463 | 
            +
                    self._last_alpha_seen = None
         | 
| 464 | 
            +
                    self._buf = False
         | 
| 465 | 
            +
                    self._current_ascii_only = True
         | 
| 466 | 
            +
             | 
| 467 | 
            +
                @property
         | 
| 468 | 
            +
                def ratio(self) -> float:
         | 
| 469 | 
            +
                    if self._character_count == 0:
         | 
| 470 | 
            +
                        return 0.0
         | 
| 471 | 
            +
             | 
| 472 | 
            +
                    return self._successive_upper_lower_count_final / self._character_count
         | 
| 473 | 
            +
             | 
| 474 | 
            +
             | 
| 475 | 
            +
            class ArabicIsolatedFormPlugin(MessDetectorPlugin):
         | 
| 476 | 
            +
                def __init__(self) -> None:
         | 
| 477 | 
            +
                    self._character_count: int = 0
         | 
| 478 | 
            +
                    self._isolated_form_count: int = 0
         | 
| 479 | 
            +
             | 
| 480 | 
            +
                def reset(self) -> None:  # Abstract
         | 
| 481 | 
            +
                    self._character_count = 0
         | 
| 482 | 
            +
                    self._isolated_form_count = 0
         | 
| 483 | 
            +
             | 
| 484 | 
            +
                def eligible(self, character: str) -> bool:
         | 
| 485 | 
            +
                    return is_arabic(character)
         | 
| 486 | 
            +
             | 
| 487 | 
            +
                def feed(self, character: str) -> None:
         | 
| 488 | 
            +
                    self._character_count += 1
         | 
| 489 | 
            +
             | 
| 490 | 
            +
                    if is_arabic_isolated_form(character):
         | 
| 491 | 
            +
                        self._isolated_form_count += 1
         | 
| 492 | 
            +
             | 
| 493 | 
            +
                @property
         | 
| 494 | 
            +
                def ratio(self) -> float:
         | 
| 495 | 
            +
                    if self._character_count < 8:
         | 
| 496 | 
            +
                        return 0.0
         | 
| 497 | 
            +
             | 
| 498 | 
            +
                    isolated_form_usage: float = self._isolated_form_count / self._character_count
         | 
| 499 | 
            +
             | 
| 500 | 
            +
                    return isolated_form_usage
         | 
| 501 | 
            +
             | 
| 502 | 
            +
             | 
| 503 | 
            +
            @lru_cache(maxsize=1024)
         | 
| 504 | 
            +
            def is_suspiciously_successive_range(
         | 
| 505 | 
            +
                unicode_range_a: str | None, unicode_range_b: str | None
         | 
| 506 | 
            +
            ) -> bool:
         | 
| 507 | 
            +
                """
         | 
| 508 | 
            +
                Determine if two Unicode range seen next to each other can be considered as suspicious.
         | 
| 509 | 
            +
                """
         | 
| 510 | 
            +
                if unicode_range_a is None or unicode_range_b is None:
         | 
| 511 | 
            +
                    return True
         | 
| 512 | 
            +
             | 
| 513 | 
            +
                if unicode_range_a == unicode_range_b:
         | 
| 514 | 
            +
                    return False
         | 
| 515 | 
            +
             | 
| 516 | 
            +
                if "Latin" in unicode_range_a and "Latin" in unicode_range_b:
         | 
| 517 | 
            +
                    return False
         | 
| 518 | 
            +
             | 
| 519 | 
            +
                if "Emoticons" in unicode_range_a or "Emoticons" in unicode_range_b:
         | 
| 520 | 
            +
                    return False
         | 
| 521 | 
            +
             | 
| 522 | 
            +
                # Latin characters can be accompanied with a combining diacritical mark
         | 
| 523 | 
            +
                # eg. Vietnamese.
         | 
| 524 | 
            +
                if ("Latin" in unicode_range_a or "Latin" in unicode_range_b) and (
         | 
| 525 | 
            +
                    "Combining" in unicode_range_a or "Combining" in unicode_range_b
         | 
| 526 | 
            +
                ):
         | 
| 527 | 
            +
                    return False
         | 
| 528 | 
            +
             | 
| 529 | 
            +
                keywords_range_a, keywords_range_b = (
         | 
| 530 | 
            +
                    unicode_range_a.split(" "),
         | 
| 531 | 
            +
                    unicode_range_b.split(" "),
         | 
| 532 | 
            +
                )
         | 
| 533 | 
            +
             | 
| 534 | 
            +
                for el in keywords_range_a:
         | 
| 535 | 
            +
                    if el in UNICODE_SECONDARY_RANGE_KEYWORD:
         | 
| 536 | 
            +
                        continue
         | 
| 537 | 
            +
                    if el in keywords_range_b:
         | 
| 538 | 
            +
                        return False
         | 
| 539 | 
            +
             | 
| 540 | 
            +
                # Japanese Exception
         | 
| 541 | 
            +
                range_a_jp_chars, range_b_jp_chars = (
         | 
| 542 | 
            +
                    unicode_range_a
         | 
| 543 | 
            +
                    in (
         | 
| 544 | 
            +
                        "Hiragana",
         | 
| 545 | 
            +
                        "Katakana",
         | 
| 546 | 
            +
                    ),
         | 
| 547 | 
            +
                    unicode_range_b in ("Hiragana", "Katakana"),
         | 
| 548 | 
            +
                )
         | 
| 549 | 
            +
                if (range_a_jp_chars or range_b_jp_chars) and (
         | 
| 550 | 
            +
                    "CJK" in unicode_range_a or "CJK" in unicode_range_b
         | 
| 551 | 
            +
                ):
         | 
| 552 | 
            +
                    return False
         | 
| 553 | 
            +
                if range_a_jp_chars and range_b_jp_chars:
         | 
| 554 | 
            +
                    return False
         | 
| 555 | 
            +
             | 
| 556 | 
            +
                if "Hangul" in unicode_range_a or "Hangul" in unicode_range_b:
         | 
| 557 | 
            +
                    if "CJK" in unicode_range_a or "CJK" in unicode_range_b:
         | 
| 558 | 
            +
                        return False
         | 
| 559 | 
            +
                    if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
         | 
| 560 | 
            +
                        return False
         | 
| 561 | 
            +
             | 
| 562 | 
            +
                # Chinese/Japanese use dedicated range for punctuation and/or separators.
         | 
| 563 | 
            +
                if ("CJK" in unicode_range_a or "CJK" in unicode_range_b) or (
         | 
| 564 | 
            +
                    unicode_range_a in ["Katakana", "Hiragana"]
         | 
| 565 | 
            +
                    and unicode_range_b in ["Katakana", "Hiragana"]
         | 
| 566 | 
            +
                ):
         | 
| 567 | 
            +
                    if "Punctuation" in unicode_range_a or "Punctuation" in unicode_range_b:
         | 
| 568 | 
            +
                        return False
         | 
| 569 | 
            +
                    if "Forms" in unicode_range_a or "Forms" in unicode_range_b:
         | 
| 570 | 
            +
                        return False
         | 
| 571 | 
            +
                    if unicode_range_a == "Basic Latin" or unicode_range_b == "Basic Latin":
         | 
| 572 | 
            +
                        return False
         | 
| 573 | 
            +
             | 
| 574 | 
            +
                return True
         | 
| 575 | 
            +
             | 
| 576 | 
            +
             | 
| 577 | 
            +
            @lru_cache(maxsize=2048)
         | 
| 578 | 
            +
            def mess_ratio(
         | 
| 579 | 
            +
                decoded_sequence: str, maximum_threshold: float = 0.2, debug: bool = False
         | 
| 580 | 
            +
            ) -> float:
         | 
| 581 | 
            +
                """
         | 
| 582 | 
            +
                Compute a mess ratio given a decoded bytes sequence. The maximum threshold does stop the computation earlier.
         | 
| 583 | 
            +
                """
         | 
| 584 | 
            +
             | 
| 585 | 
            +
                detectors: list[MessDetectorPlugin] = [
         | 
| 586 | 
            +
                    md_class() for md_class in MessDetectorPlugin.__subclasses__()
         | 
| 587 | 
            +
                ]
         | 
| 588 | 
            +
             | 
| 589 | 
            +
                length: int = len(decoded_sequence) + 1
         | 
| 590 | 
            +
             | 
| 591 | 
            +
                mean_mess_ratio: float = 0.0
         | 
| 592 | 
            +
             | 
| 593 | 
            +
                if length < 512:
         | 
| 594 | 
            +
                    intermediary_mean_mess_ratio_calc: int = 32
         | 
| 595 | 
            +
                elif length <= 1024:
         | 
| 596 | 
            +
                    intermediary_mean_mess_ratio_calc = 64
         | 
| 597 | 
            +
                else:
         | 
| 598 | 
            +
                    intermediary_mean_mess_ratio_calc = 128
         | 
| 599 | 
            +
             | 
| 600 | 
            +
                for character, index in zip(decoded_sequence + "\n", range(length)):
         | 
| 601 | 
            +
                    for detector in detectors:
         | 
| 602 | 
            +
                        if detector.eligible(character):
         | 
| 603 | 
            +
                            detector.feed(character)
         | 
| 604 | 
            +
             | 
| 605 | 
            +
                    if (
         | 
| 606 | 
            +
                        index > 0 and index % intermediary_mean_mess_ratio_calc == 0
         | 
| 607 | 
            +
                    ) or index == length - 1:
         | 
| 608 | 
            +
                        mean_mess_ratio = sum(dt.ratio for dt in detectors)
         | 
| 609 | 
            +
             | 
| 610 | 
            +
                        if mean_mess_ratio >= maximum_threshold:
         | 
| 611 | 
            +
                            break
         | 
| 612 | 
            +
             | 
| 613 | 
            +
                if debug:
         | 
| 614 | 
            +
                    logger = getLogger("charset_normalizer")
         | 
| 615 | 
            +
             | 
| 616 | 
            +
                    logger.log(
         | 
| 617 | 
            +
                        TRACE,
         | 
| 618 | 
            +
                        "Mess-detector extended-analysis start. "
         | 
| 619 | 
            +
                        f"intermediary_mean_mess_ratio_calc={intermediary_mean_mess_ratio_calc} mean_mess_ratio={mean_mess_ratio} "
         | 
| 620 | 
            +
                        f"maximum_threshold={maximum_threshold}",
         | 
| 621 | 
            +
                    )
         | 
| 622 | 
            +
             | 
| 623 | 
            +
                    if len(decoded_sequence) > 16:
         | 
| 624 | 
            +
                        logger.log(TRACE, f"Starting with: {decoded_sequence[:16]}")
         | 
| 625 | 
            +
                        logger.log(TRACE, f"Ending with: {decoded_sequence[-16::]}")
         | 
| 626 | 
            +
             | 
| 627 | 
            +
                    for dt in detectors:
         | 
| 628 | 
            +
                        logger.log(TRACE, f"{dt.__class__}: {dt.ratio}")
         | 
| 629 | 
            +
             | 
| 630 | 
            +
                return round(mean_mess_ratio, 3)
         | 
