diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index c691a0058fe..e28e5408208 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -60,15 +60,12 @@ body: label: Output description: >- Provide the output of the steps above, including the commands - themselves and pip's output/traceback etc. If you're familiar with - Markdown, this block will have triple backticks added automatically - around it -- you don't have to add them. + themselves and pip's output/traceback etc. - If you want to present output from multiple commands, please present - that as a shell session (commands you run get prefixed with `$ `). - Please also ensure that the "How to reproduce" section contains matching - instructions for reproducing this. - render: sh-session + If you want to present output from multiple commands, please prefix + the line containing the command with `$ `. Please also ensure that + the "How to reproduce" section contains matching instructions for + reproducing this. - type: checkboxes attributes: diff --git a/.github/ISSUE_TEMPLATE/~good-first-issue.yml b/.github/ISSUE_TEMPLATE/~good-first-issue.yml deleted file mode 100644 index 81e206a35f9..00000000000 --- a/.github/ISSUE_TEMPLATE/~good-first-issue.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Good first issue -description: If you're a pip maintainer, use this to create a "good first issue" for new contributors. -labels: "good first issue" - -body: - - type: textarea - attributes: - label: Description - description: >- - A clear and concise description of what the task is. - validations: - required: true - - - type: textarea - attributes: - label: What needs to be done - description: >- - Describe what the contributor would need to do, describing the change. - See https://github.com/pypa/pip/issues/7661 for example. - validations: - required: true - - - type: textarea - attributes: - label: Guidance for potential contributors - description: >- - Usually, you don't have to modify the content here. - value: >- - This issue is a good starting point for first time contributors -- the - process of fixing this should be a good introduction to pip's - development workflow. If there is not a corresponding pull request for - this issue, it is up for grabs. For directions for getting set up, see our - [Getting Started Guide](https://pip.pypa.io/en/latest/development/getting-started/). - If you are working on this issue and have questions, feel free to ask - them here. If you've contributed code to pip before, we encourage you to - pick up an issue without this label. - validations: - required: true diff --git a/.github/chronographer.yml b/.github/chronographer.yml new file mode 100644 index 00000000000..b42883f8f4a --- /dev/null +++ b/.github/chronographer.yml @@ -0,0 +1,30 @@ +--- + +action-hints: + # check-title-prefix: chng # default: `{{ branch-protection-check-name }}: ` + external-docs-url: https://pip.pypa.io/how-to-changelog + inline-markdown: > + Check out https://pip.pypa.io/how-to-changelog + +branch-protection-check-name: Timeline protection + +enforce-name: + # suffix: .md + suffix: .rst + +exclude: + bots: + - dependabot-preview + - dependabot + - patchback + humans: + - pyup-bot + +labels: + skip-changelog: skip news + +paths: # relative modified file paths that do or don't need changelog mention + exclude: [] + include: [] + +... diff --git a/.github/no-response.yml b/.github/no-response.yml deleted file mode 100644 index 47afde800e0..00000000000 --- a/.github/no-response.yml +++ /dev/null @@ -1,11 +0,0 @@ -# Number of days of inactivity before issue is closed for lack of response -daysUntilClose: 15 -# Label requiring a response -responseRequiredLabel: "S: awaiting response" -# Comment to post when closing an Issue for lack of response. Set to `false` to disable -closeComment: > - This issue has been automatically closed because there has been no response - to our request for more information from the original author. With only the - information that is currently in the issue, we don't have enough information - to take action. Please reach out if you have or find the answers we need so - that we can investigate further. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 51f7b8f9bbb..3b35e93b21f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,14 +11,20 @@ on: schedule: - cron: 0 0 * * MON # Run every Monday at 00:00 UTC +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + jobs: docs: name: docs runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" - run: pip install nox - run: nox -s docs @@ -37,31 +43,24 @@ jobs: # Anything that's touching "vendored code" - "src/pip/_vendor/**" - "pyproject.toml" + - "noxfile.py" tests: - # Anything that's touching testable stuff + # Anything that's touching code-related stuff - ".github/workflows/ci.yml" - "src/**" - "tests/**" + - "noxfile.py" if: github.event_name == 'pull_request' - pre-commit: - name: pre-commit - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 - - uses: pre-commit/action@v2.0.0 - with: - extra_args: --all-files --hook-stage=manual - packaging: name: packaging runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" - name: Set up git credentials run: | git config --global user.email "pypa-dev@googlegroups.com" @@ -70,6 +69,7 @@ jobs: - run: pip install nox - run: nox -s prepare-release -- 99.9 - run: nox -s build-release -- 99.9 + - run: pipx run check-manifest vendoring: name: vendoring @@ -81,18 +81,20 @@ jobs: github.event_name != 'pull_request' steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.x" - - run: pip install vendoring - - run: vendoring sync . --verbose + - run: pip install nox + - run: nox -s vendoring - run: git diff --exit-code tests-unix: name: tests / ${{ matrix.python }} / ${{ matrix.os }} runs-on: ${{ matrix.os }}-latest - needs: [pre-commit, packaging, determine-changes] + needs: [packaging, determine-changes] if: >- needs.determine-changes.outputs.tests == 'true' || github.event_name != 'pull_request' @@ -102,15 +104,15 @@ jobs: matrix: os: [Ubuntu, MacOS] python: - - 3.6 - - 3.7 - - 3.8 - - 3.9 - - "3.10.0-alpha - 3.10" + - "3.7" + - "3.8" + - "3.9" + - "3.10" + - "3.11" steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} @@ -120,19 +122,19 @@ jobs: - name: Install MacOS dependencies if: matrix.os == 'MacOS' - run: brew install bzr + run: brew install breezy - - run: pip install tox 'virtualenv<20' + - run: pip install nox # Main check - name: Run unit tests run: >- - tox -e py -- + nox -s test-${{ matrix.python }} -- -m unit --verbose --numprocesses auto --showlocals - name: Run integration tests run: >- - tox -e py -- + nox -s test-${{ matrix.python }} -- -m integration --verbose --numprocesses auto --showlocals --durations=5 @@ -141,7 +143,7 @@ jobs: name: tests / ${{ matrix.python }} / ${{ matrix.os }} / ${{ matrix.group }} runs-on: ${{ matrix.os }}-latest - needs: [pre-commit, packaging, determine-changes] + needs: [packaging, determine-changes] if: >- needs.determine-changes.outputs.tests == 'true' || github.event_name != 'pull_request' @@ -151,17 +153,17 @@ jobs: matrix: os: [Windows] python: - - 3.6 + - "3.7" # Commented out, since Windows tests are expensively slow. - # - 3.7 - # - 3.8 - - 3.9 - - "3.10.0-alpha - 3.10" + # - "3.8" + # - "3.9" + # - "3.10" + - "3.11" group: [1, 2] steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v2 + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} @@ -180,7 +182,7 @@ jobs: $acl.AddAccessRule($rule) Set-Acl "R:\Temp" $acl - - run: pip install tox 'virtualenv<20' + - run: pip install nox env: TEMP: "R:\\Temp" @@ -188,7 +190,7 @@ jobs: - name: Run unit tests if: matrix.group == 1 run: >- - tox -e py -- + nox -s test-${{ matrix.python }} -- -m unit --verbose --numprocesses auto --showlocals env: @@ -197,7 +199,7 @@ jobs: - name: Run integration tests (group 1) if: matrix.group == 1 run: >- - tox -e py -- + nox -s test-${{ matrix.python }} -- -m integration -k "not test_install" --verbose --numprocesses auto --showlocals env: @@ -206,8 +208,80 @@ jobs: - name: Run integration tests (group 2) if: matrix.group == 2 run: >- - tox -e py -- + nox -s test-${{ matrix.python }} -- -m integration -k "test_install" --verbose --numprocesses auto --showlocals env: TEMP: "R:\\Temp" + + tests-zipapp: + name: tests / zipapp + runs-on: ubuntu-latest + + needs: [packaging, determine-changes] + if: >- + needs.determine-changes.outputs.tests == 'true' || + github.event_name != 'pull_request' + + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Install Ubuntu dependencies + run: sudo apt-get install bzr + + - run: pip install nox 'virtualenv<20' 'setuptools != 60.6.0' + + # Main check + - name: Run integration tests + run: >- + nox -s test-3.10 -- + -m integration + --verbose --numprocesses auto --showlocals + --durations=5 + --use-zipapp + + check: # This job does nothing and is only used for the branch protection + if: always() + + needs: + - determine-changes + - docs + - packaging + - tests-unix + - tests-windows + - tests-zipapp + - vendoring + + runs-on: ubuntu-latest + + steps: + - name: Decide whether the needed jobs succeeded or failed + uses: re-actors/alls-green@release/v1 + with: + allowed-skips: >- + ${{ + ( + needs.determine-changes.outputs.vendoring != 'true' + && github.event_name == 'pull_request' + ) + && 'vendoring' + || '' + }} + , + ${{ + ( + needs.determine-changes.outputs.tests != 'true' + && github.event_name == 'pull_request' + ) + && ' + tests-unix, + tests-windows, + tests-zipapp, + tests-importlib-metadata, + ' + || '' + }} + jobs: ${{ toJSON(needs) }} diff --git a/.github/workflows/lock-threads.yml b/.github/workflows/lock-threads.yml index 1841ce1f7af..990440dd6c8 100644 --- a/.github/workflows/lock-threads.yml +++ b/.github/workflows/lock-threads.yml @@ -14,6 +14,7 @@ concurrency: jobs: action: + if: github.repository_owner == 'pypa' runs-on: ubuntu-latest steps: - uses: dessant/lock-threads@v3 diff --git a/.github/workflows/news-file.yml b/.github/workflows/news-file.yml index 2c7680d0b68..371e12fd755 100644 --- a/.github/workflows/news-file.yml +++ b/.github/workflows/news-file.yml @@ -10,14 +10,14 @@ jobs: runs-on: ubuntu-20.04 steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: # `towncrier check` runs `git diff --name-only origin/main...`, which # needs a non-shallow clone. fetch-depth: 0 - name: Check news entry - if: "!contains(github.event.pull_request.labels.*.name, 'trivial')" + if: "!contains(github.event.pull_request.labels.*.name, 'skip news')" run: | if ! pipx run towncrier check --compare-with origin/${{ github.base_ref }}; then echo "Please see https://pip.pypa.io/dev/news-entry-failure for guidance." diff --git a/.github/workflows/no-response.yml b/.github/workflows/no-response.yml new file mode 100644 index 00000000000..939290b93e5 --- /dev/null +++ b/.github/workflows/no-response.yml @@ -0,0 +1,19 @@ +name: No Response + +# Both `issue_comment` and `scheduled` event types are required for this Action +# to work properly. +on: + issue_comment: + types: [created] + schedule: + # Schedule for five minutes after the hour, every hour + - cron: '5 * * * *' + +jobs: + noResponse: + runs-on: ubuntu-latest + steps: + - uses: lee-dohm/no-response@v0.5.0 + with: + token: ${{ github.token }} + responseRequiredLabel: "S: awaiting response" diff --git a/.github/workflows/update-rtd-redirects.yml b/.github/workflows/update-rtd-redirects.yml new file mode 100644 index 00000000000..8259b6c0b6a --- /dev/null +++ b/.github/workflows/update-rtd-redirects.yml @@ -0,0 +1,28 @@ +name: Update documentation redirects + +on: + push: + branches: [main] + schedule: + - cron: 0 0 * * MON # Run every Monday at 00:00 UTC + +env: + FORCE_COLOR: "1" + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} + cancel-in-progress: true + +jobs: + update-rtd-redirects: + runs-on: ubuntu-latest + environment: RTD Deploys + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.11" + - run: pip install httpx pyyaml rich + - run: python tools/update-rtd-redirects.py + env: + RTD_API_TOKEN: ${{ secrets.RTD_API_TOKEN }} diff --git a/.mailmap b/.mailmap index 4206a022a9d..d0c64300fd2 100644 --- a/.mailmap +++ b/.mailmap @@ -34,6 +34,7 @@ Ludovic Gasc Markus Hametner Masklinn Matthew Iversen +Ofek Lev Pi Delport Pradyun Gedam diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a2a147be0bc..7a37d18c51d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ exclude: 'src/pip/_vendor/' repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.4.0 + rev: v4.3.0 hooks: - id: check-builtin-literals - id: check-added-large-files @@ -17,29 +17,29 @@ repos: exclude: .patch - repo: https://github.com/psf/black - rev: 21.7b0 + rev: 22.6.0 hooks: - id: black -- repo: https://gitlab.com/pycqa/flake8 - rev: 3.8.4 +- repo: https://github.com/PyCQA/flake8 + rev: 4.0.1 hooks: - id: flake8 additional_dependencies: [ - 'flake8-bugbear==20.1.4', - 'flake8-logging-format==0.6.0', - 'flake8-implicit-str-concat==0.2.0', + 'flake8-bugbear==22.10.27', + 'flake8-logging-format==0.9.0', + 'flake8-implicit-str-concat==0.3.0', ] exclude: tests/data - repo: https://github.com/PyCQA/isort - rev: 5.7.0 + rev: 5.12.0 hooks: - id: isort files: \.py$ - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.910 + rev: v0.961 hooks: - id: mypy exclude: tests/data @@ -47,14 +47,16 @@ repos: additional_dependencies: [ 'keyring==23.0.1', 'nox==2021.6.12', - 'pytest==6.2.5', - 'types-docutils==0.1.8', - 'types-setuptools==57.0.2', - 'types-six==0.1.9', + 'pytest', + 'types-docutils==0.18.3', + 'types-setuptools==57.4.14', + 'types-freezegun==1.1.9', + 'types-six==1.16.15', + 'types-pyyaml==6.0.12.2', ] - repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.7.0 + rev: v1.9.0 hooks: - id: python-no-log-warn - id: python-no-eval @@ -72,8 +74,7 @@ repos: exclude: ^news/(.gitignore|.*\.(process|removal|feature|bugfix|vendor|doc|trivial).rst) files: ^news/ -- repo: https://github.com/mgedmin/check-manifest - rev: '0.46' - hooks: - - id: check-manifest - stages: [manual] +ci: + autofix_prs: false + autoupdate_commit_msg: 'pre-commit autoupdate' + autoupdate_schedule: monthly diff --git a/.readthedocs-custom-redirects.yml b/.readthedocs-custom-redirects.yml new file mode 100644 index 00000000000..d0c072fbf96 --- /dev/null +++ b/.readthedocs-custom-redirects.yml @@ -0,0 +1,15 @@ +# This file is read by tools/update-rtd-redirects.py. +# It is related to Read the Docs, but is not a file processed by the platform. + +/dev/news-entry-failure: >- + https://pip.pypa.io/en/latest/development/contributing/#news-entries +/errors/resolution-impossible: >- + https://pip.pypa.io/en/stable/topics/dependency-resolution/#dealing-with-dependency-conflicts +/surveys/backtracking: >- + https://forms.gle/LkZP95S4CfqBAU1N6 +/warnings/backtracking: >- + https://pip.pypa.io/en/stable/topics/dependency-resolution/#possible-ways-to-reduce-backtracking +/warnings/enable-long-paths: >- + https://docs.microsoft.com/en-us/windows/win32/fileio/maximum-file-path-limitation?tabs=cmd#enable-long-paths-in-windows-10-version-1607-and-later +/warnings/venv: >- + https://docs.python.org/3/tutorial/venv.html diff --git a/AUTHORS.txt b/AUTHORS.txt index a65fc021997..0f0fb3caf98 100644 --- a/AUTHORS.txt +++ b/AUTHORS.txt @@ -18,6 +18,7 @@ Alan Yee Albert Tugushev Albert-Guan albertg +Alberto Sottile Aleks Bunin Alethea Flowers Alex Gaynor @@ -38,6 +39,7 @@ Andre Aguiar Andreas Lutro Andrei Geacar Andrew Gaul +Andrew Shymanel Andrey Bienkowski Andrey Bulgakov Andrés Delfino @@ -98,6 +100,7 @@ Bradley Ayers Brandon L. Reiss Brandt Bucher Brett Randall +Brett Rosen Brian Cristante Brian Rosner briantracy @@ -124,6 +127,8 @@ Chris Brinker Chris Hunt Chris Jerdonek Chris McDonough +Chris Pawley +Chris Pryer Chris Wolfe Christian Clauss Christian Heimes @@ -133,6 +138,7 @@ Christopher Hunt Christopher Snyder cjc7373 Clark Boylan +Claudio Jolowicz Clay McClure Cody Cody Soyland @@ -155,6 +161,7 @@ Damian Shaw Dan Black Dan Savilonis Dan Sully +Dane Hillard daniel Daniel Collins Daniel Hahler @@ -163,7 +170,9 @@ Daniel Jost Daniel Katz Daniel Shaulov Daniele Esposti +Daniele Nicolodi Daniele Procida +Daniil Konovalenko Danny Hermes Danny McClanahan Darren Kavanagh @@ -180,6 +189,7 @@ David Hewitt David Linke David Poggi David Pursehouse +David Runge David Tucker David Wales Davidovich @@ -193,17 +203,22 @@ Diego Caraballo Diego Ramirez DiegoCaraballo Dimitri Merejkowsky +Dimitri Papadopoulos Dirk Stolle Dmitry Gladkov +Dmitry Volodin Domen Kožar Dominic Davis-Foster Donald Stufft Dongweiming +doron zarhi Douglas Thor DrFeathers Dustin Ingram Dwayne Bailey Ed Morley +Edgar Ramírez +Ee Durbin Eitan Adler ekristina elainechan @@ -222,15 +237,17 @@ Eric Hanchrow Eric Hopper Erik M. Bray Erik Rose -Ernest W Durbin III -Ernest W. Durbin III Erwin Janssen Eugene Vereshchagin everdimension +Federico +Felipe Peter Felix Yan fiber-space Filip Kokosiński Filipe Laíns +Finn Womack +finnagin Florian Briand Florian Rathgeber Francesco @@ -239,16 +256,20 @@ Frost Ming Gabriel Curio Gabriel de Perthuis Garry Polley +gavin gdanielson Geoffrey Sneddon George Song Georgi Valkov +Georgy Pchelkin ghost Giftlin Rajaiah gizmoguy1 gkdoc +Godefroid Chapelle Gopinath M GOTO Hayato +gousaiyang gpiks Greg Roodt Greg Ward @@ -262,11 +283,15 @@ Hari Charan Harsh Vardhan harupy Harutaka Kawamura +hauntsaninja +Henrich Hartzer Henry Schreiner Herbert Pfennig +Holly Stotelmyer Hsiaoming Yang Hugo Lopes Tavares Hugo van Kemenade +Hugues Bruant Hynek Schlawack Ian Bicking Ian Cordasco @@ -284,17 +309,22 @@ Ionel Maries Cristian Ivan Pozdeev Jacob Kim Jacob Walls +Jaime Sanz jakirkham +Jakub Kuczys Jakub Stasiak Jakub Vysoky Jakub Wilk James Cleveland James Curtin James Firth +James Gerity James Polley Jan Pokorný Jannis Leidel +Jarek Potiuk jarondl +Jason Curtis Jason R. Coombs Jay Graves Jean-Christophe Fillion-Robin @@ -308,6 +338,7 @@ Jesse Rittner Jiashuo Li Jim Fisher Jim Garrison +Jiun Bae Jivan Amara Joe Michelini John Paton @@ -319,6 +350,7 @@ Jon Dufresne Jon Parise Jonas Nockert Jonathan Herbert +Joonatan Partanen Joost Molenaar Jorge Niedbalski Joseph Bylund @@ -327,6 +359,7 @@ Josh Bronson Josh Hansen Josh Schneier Juanjo Bazán +Judah Rand Julian Berman Julian Gethmann Julien Demoor @@ -334,7 +367,9 @@ Jussi Kukkonen jwg4 Jyrki Pulliainen Kai Chen +Kai Mueller Kamal Bin Mustafa +kasium kaustav haldar keanemind Keith Maxwell @@ -359,6 +394,7 @@ Laurent Bristiel Laurent LAPORTE Laurie O Laurie Opperman +layday Leon Sasson Lev Givon Lincoln de Sousa @@ -366,11 +402,13 @@ Lipis Loren Carvalho Lucas Cimon Ludovic Gasc +Lukas Juhrich Luke Macken Luo Jiebin luojiebin luz.paz László Kiss Kollár +M00nL1ght Marc Abramowitz Marc Tamlyn Marcus Smith @@ -378,6 +416,8 @@ Mariatta Mark Kohler Mark Williams Markus Hametner +Martey Dodoo +Martin Fischer Martin Häcker Martin Pavlasek Masaki @@ -385,6 +425,8 @@ Masklinn Matej Stuchlik Mathew Jennings Mathieu Bridon +Mathieu Kniewallner +Matt Bacchi Matt Good Matt Maker Matt Robenolt @@ -396,6 +438,7 @@ Matthew Trumbell Matthew Willson Matthias Bussonnier mattip +Maurits van Rees Max W Chase Maxim Kurnikov Maxime Rouyrre @@ -410,6 +453,7 @@ Michael E. Karpeles Michael Klich Michael Williamson michaelpacer +Michał Górny Mickaël Schoentgen Miguel Araujo Perez Mihir Singh @@ -421,7 +465,12 @@ Miro Hrončok Monica Baluna montefra Monty Taylor +Muha Ajjan‮ +Nadav Wexler +Nahuel Ambrosini Nate Coraor +Nate Prewitt +Nathan Houghton Nathaniel J. Smith Nehal J Wani Neil Botelho @@ -444,8 +493,9 @@ Nowell Strite NtaleGrey nvdv OBITORASU -Ofekmeister +Ofek Lev ofrinevo +Oliver Freund Oliver Jeeves Oliver Mannion Oliver Tonnhofer @@ -473,6 +523,7 @@ Paulus Schoutsen Pavel Safronov Pavithra Eswaramoorthy Pawel Jasinski +Paweł Szramowski Pekka Klärck Peter Gessler Peter Lisák @@ -499,6 +550,7 @@ Preet Thakkar Preston Holmes Przemek Wrzos Pulkit Goyal +q0w Qiangning Hong Quentin Lee Quentin Pradet @@ -511,9 +563,11 @@ Reece Dunham Remi Rampin Rene Dudfield Riccardo Magliocchetti +Riccardo Schirone Richard Jones Richard Si Ricky Ng-Adam +Rishi RobberPhex Robert Collins Robert McGibbon @@ -523,15 +577,18 @@ Roey Berman Rohan Jain Roman Bogorodskiy Romuald Brunet +ronaudinho Ronny Pfannschmidt Rory McCann Ross Brattain Roy Wellington Ⅳ Ruairidh MacLeod +Russell Keith-Magee Ryan Wooden ryneeverett Sachi King Salvatore Rinchiera +sandeepkiran-js Savio Jomton schlamar Scott Kitterman @@ -542,8 +599,10 @@ Sebastian Schaetz Segev Finer SeongSoo Cho Sergey Vasilyev +Seth Michael Larson Seth Woodworth shireenrao +Shivansh-007 Shlomi Fish Shovan Maity Simeon Visser @@ -559,6 +618,7 @@ Stavros Korokithakis Stefan Scherfke Stefano Rivera Stephan Erb +Stephen Rosen stepshal Steve (Gadget) Barnes Steve Barnes @@ -577,6 +637,7 @@ Sylvain Takayuki SHIMIZUKAWA Taneli Hukkinen tbeswick +Thiago Thijs Triemstra Thomas Fenzl Thomas Grainger @@ -584,6 +645,7 @@ Thomas Guettler Thomas Johansson Thomas Kluyver Thomas Smith +Thomas VINCENT Tim D. Smith Tim Gates Tim Harder @@ -596,13 +658,17 @@ Tom V Tomas Hrnciar Tomas Orsava Tomer Chachamu +Tommi Enenkel | AnB +Tomáš Hrnčiar Tony Beswick +Tony Narlock Tony Zhaocheng Tan TonyBeswick toonarmycaptain Toshio Kuratomi toxinu Travis Swicegood +Tushar Sadhwani Tzu-ping Chung Valentin Haenel Victor Stinner @@ -621,6 +687,7 @@ Wil Tan Wilfred Hughes William ML Leslie William T Olson +William Woodruff Wilson Mo wim glenn Winson Luk @@ -635,10 +702,12 @@ Yeray Diaz Diaz Yoval P Yu Jian Yuan Jing Vincent Yan +Yusuke Hayashi Zearin Zhiping Deng ziebam Zvezdan Petkovic Łukasz Langa +Роман Донченко Семён Марьясин ‮rekcäH nitraM‮ diff --git a/LICENSE.txt b/LICENSE.txt index 00addc2725a..8e7b65eaf62 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2008-2021 The pip developers (see AUTHORS.txt file) +Copyright (c) 2008-present The pip developers (see AUTHORS.txt file) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the diff --git a/MANIFEST.in b/MANIFEST.in index 59c6cdb22be..4716f415730 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,10 +2,12 @@ include AUTHORS.txt include LICENSE.txt include NEWS.rst include README.rst +include SECURITY.md include pyproject.toml include src/pip/_vendor/README.rst include src/pip/_vendor/vendor.txt +include src/pip/_vendor/pyparsing/diagram/template.jinja2 recursive-include src/pip/_vendor *LICENSE* recursive-include src/pip/_vendor *COPYING* @@ -17,12 +19,14 @@ exclude .mailmap exclude .appveyor.yml exclude .readthedocs.yml exclude .pre-commit-config.yaml +exclude .readthedocs-custom-redirects.yml exclude tox.ini exclude noxfile.py recursive-include src/pip/_vendor *.pem recursive-include src/pip/_vendor py.typed recursive-include docs *.css *.py *.rst *.md +recursive-include docs *.dot *.png exclude src/pip/_vendor/six exclude src/pip/_vendor/six/moves diff --git a/NEWS.rst b/NEWS.rst index 866b5c3c0c7..5169dad1d43 100644 --- a/NEWS.rst +++ b/NEWS.rst @@ -9,6 +9,504 @@ .. towncrier release notes start +23.0.1 (2023-02-17) +=================== + +Features +-------- + +- Ignore PIP_REQUIRE_VIRTUALENV for ``pip index`` (`#11671 `_) +- Implement ``--break-system-packages`` to permit installing packages into + ``EXTERNALLY-MANAGED`` Python installations. (`#11780 `_) + +Bug Fixes +--------- + +- Improve handling of isolated build environments on platforms that + customize the Python's installation schemes, such as Debian and + Homebrew. (`#11740 `_) +- Do not crash in presence of misformatted hash field in ``direct_url.json``. (`#11773 `_) + + +23.0 (2023-01-30) +================= + +Features +-------- + +- Change the hashes in the installation report to be a mapping. Emit the + ``archive_info.hashes`` dictionary in ``direct_url.json``. (`#11312 `_) +- Implement logic to read the ``EXTERNALLY-MANAGED`` file as specified in PEP 668. + This allows a downstream Python distributor to prevent users from using pip to + modify the externally managed environment. (`#11381 `_) +- Enable the use of ``keyring`` found on ``PATH``. This allows ``keyring`` + installed using ``pipx`` to be used by ``pip``. (`#11589 `_) +- The inspect and installation report formats are now declared stable, and their version + has been bumped from ``0`` to ``1``. (`#11757 `_) + +Bug Fixes +--------- + +- Wheel cache behavior is restored to match previous versions, allowing the + cache to find existing entries. (`#11527 `_) +- Use the "venv" scheme if available to obtain prefixed lib paths. (`#11598 `_) +- Deprecated a historical ambiguity in how ``egg`` fragments in URL-style + requirements are formatted and handled. ``egg`` fragments that do not look + like PEP 508 names now produce a deprecation warning. (`#11617 `_) +- Fix scripts path in isolated build environment on Debian. (`#11623 `_) +- Make ``pip show`` show the editable location if package is editable (`#11638 `_) +- Stop checking that ``wheel`` is present when ``build-system.requires`` + is provided without ``build-system.build-backend`` as ``setuptools`` + (which we still check for) will inject it anyway. (`#11673 `_) +- Fix an issue when an already existing in-memory distribution would cause + exceptions in ``pip install`` (`#11704 `_) + +Vendored Libraries +------------------ + +- Upgrade certifi to 2022.12.7 +- Upgrade chardet to 5.1.0 +- Upgrade colorama to 0.4.6 +- Upgrade distro to 1.8.0 +- Remove pep517 from vendored packages +- Upgrade platformdirs to 2.6.2 +- Add pyproject-hooks 1.0.0 +- Upgrade requests to 2.28.2 +- Upgrade rich to 12.6.0 +- Upgrade urllib3 to 1.26.14 + +Improved Documentation +---------------------- + +- Fixed the description of the option "--install-options" in the documentation (`#10265 `_) +- Remove mention that editable installs are necessary for pip freeze to report the VCS + URL. (`#11675 `_) +- Clarify that the egg URL fragment is only necessary for editable VCS installs, and + otherwise not necessary anymore. (`#11676 `_) + + +22.3.1 (2022-11-05) +=================== + +Bug Fixes +--------- + +- Fix entry point generation of ``pip.X``, ``pipX.Y``, and ``easy_install-X.Y`` + to correctly account for multi-digit Python version segments (e.g. the "11" + part of 3.11). (`#11547 `_) + + +22.3 (2022-10-15) +================= + +Deprecations and Removals +------------------------- + +- Deprecate ``--install-options`` which forces pip to use the deprecated ``install`` + command of ``setuptools``. (`#11358 `_) +- Deprecate installation with 'setup.py install' when no-binary is enabled for + source distributions without 'pyproject.toml'. (`#11452 `_) +- Deprecate ```--no-binary`` disabling the wheel cache. (`#11454 `_) +- Remove ``--use-feature=2020-resolver`` opt-in flag. This was supposed to be removed in 21.0, but missed during that release cycle. (`#11493 `_) +- Deprecate installation with 'setup.py install' when the 'wheel' package is absent for + source distributions without 'pyproject.toml'. (`#8559 `_) +- Remove the ability to use ``pip list --outdated`` in combination with ``--format=freeze``. (`#9789 `_) + +Features +-------- + +- Use ``shell=True`` for opening the editor with ``pip config edit``. (`#10716 `_) +- Use the ``data-dist-info-metadata`` attribute from :pep:`658` to resolve distribution metadata without downloading the dist yet. (`#11111 `_) +- Add an option to run the test suite with pip built as a zipapp. (`#11250 `_) +- Add a ``--python`` option to allow pip to manage Python environments other + than the one pip is installed in. (`#11320 `_) +- Document the new (experimental) zipapp distribution of pip. (`#11459 `_) +- Use the much faster 'bzr co --lightweight' to obtain a copy of a Bazaar tree. (`#5444 `_) + +Bug Fixes +--------- + +- Fix ``--no-index`` when ``--index-url`` or ``--extra-index-url`` is specified + inside a requirements file. (`#11276 `_) +- Ensure that the candidate ``pip`` executable exists, when checking for a new version of pip. (`#11309 `_) +- Ignore distributions with invalid ``Name`` in metadata instead of crashing, when + using the ``importlib.metadata`` backend. (`#11352 `_) +- Raise RequirementsFileParseError when parsing malformed requirements options that can't be successfully parsed by shlex. (`#11491 `_) +- Fix build environment isolation on some system Pythons. (`#6264 `_) + +Vendored Libraries +------------------ + +- Upgrade certifi to 2022.9.24 +- Upgrade distlib to 0.3.6 +- Upgrade idna to 3.4 +- Upgrade pep517 to 0.13.0 +- Upgrade pygments to 2.13.0 +- Upgrade tenacity to 8.1.0 +- Upgrade typing_extensions to 4.4.0 +- Upgrade urllib3 to 1.26.12 + +Improved Documentation +---------------------- + +- Mention that --quiet must be used when writing the installation report to stdout. (`#11357 `_) + + +22.2.2 (2022-08-03) +=================== + +Bug Fixes +--------- + +- Avoid ``AttributeError`` when removing the setuptools-provided ``_distutils_hack`` and it is missing its implementation. (`#11314 `_) +- Fix import error when reinstalling pip in user site. (`#11319 `_) +- Show pip deprecation warnings by default. (`#11330 `_) + + +22.2.1 (2022-07-27) +=================== + +Bug Fixes +--------- + +- Send the pip upgrade prompt to stderr. (`#11282 `_) +- Ensure that things work correctly in environments where setuptools-injected + ``distutils`` is available by default. This is done by cooperating with + setuptools' injection logic to ensure that pip uses the ``distutils`` from the + Python standard library instead. (`#11298 `_) +- Clarify that ``pip cache``'s wheels-related output is about locally built wheels only. (`#11300 `_) + + +22.2 (2022-07-21) +================= + +Deprecations and Removals +------------------------- + +- Remove the ``html5lib`` deprecated feature flag. (`#10825 `_) +- Remove ``--use-deprecated=backtrack-on-build-failures``. (`#11241 `_) + +Features +-------- + +- Add support to use `truststore `_ as an + alternative SSL certificate verification backend. The backend can be enabled on Python + 3.10 and later by installing ``truststore`` into the environment, and adding the + ``--use-feature=truststore`` flag to various pip commands. + + ``truststore`` differs from the current default verification backend (provided by + ``certifi``) in it uses the operating system’s trust store, which can be better + controlled and augmented to better support non-standard certificates. Depending on + feedback, pip may switch to this as the default certificate verification backend in + the future. (`#11082 `_) +- Add ``--dry-run`` option to ``pip install``, to let it print what it would install but + not actually change anything in the target environment. (`#11096 `_) +- Record in wheel cache entries the URL of the original artifact that was downloaded + to build the cached wheels. The record is named ``origin.json`` and uses the PEP 610 + Direct URL format. (`#11137 `_) +- Support `PEP 691 `_. (`#11158 `_) +- pip's deprecation warnings now subclass the built-in ``DeprecationWarning``, and + can be suppressed by running the Python interpreter with + ``-W ignore::DeprecationWarning``. (`#11225 `_) +- Add ``pip inspect`` command to obtain the list of installed distributions and other + information about the Python environment, in JSON format. (`#11245 `_) +- Significantly speed up isolated environment creation, by using the same + sources for pip instead of creating a standalone installation for each + environment. (`#11257 `_) +- Add an experimental ``--report`` option to the install command to generate a JSON report + of what was installed. In combination with ``--dry-run`` and ``--ignore-installed`` it + can be used to resolve the requirements. (`#53 `_) + +Bug Fixes +--------- + +- Fix ``pip install --pre`` for packages with pre-release build dependencies defined + both in ``pyproject.toml``'s ``build-system.requires`` and ``setup.py``'s + ``setup_requires``. (`#10222 `_) +- When pip rewrites the shebang line in a script during wheel installation, + update the hash and size in the corresponding ``RECORD`` file entry. (`#10744 `_) +- Do not consider a ``.dist-info`` directory found inside a wheel-like zip file + as metadata for an installed distribution. A package in a wheel is (by + definition) not installed, and is not guaranteed to work due to how a wheel is + structured. (`#11217 `_) +- Use ``importlib.resources`` to read the ``vendor.txt`` file in ``pip debug``. + This makes the command safe for use from a zipapp. (`#11248 `_) +- Make the ``--use-pep517`` option of the ``download`` command apply not just + to the requirements specified on the command line, but to their dependencies, + as well. (`#9523 `_) + +Process +------- + +- Remove reliance on the stdlib cgi module, which is deprecated in Python 3.11. + +Vendored Libraries +------------------ + +- Remove html5lib. +- Upgrade certifi to 2022.6.15 +- Upgrade chardet to 5.0.0 +- Upgrade colorama to 0.4.5 +- Upgrade distlib to 0.3.5 +- Upgrade msgpack to 1.0.4 +- Upgrade pygments to 2.12.0 +- Upgrade pyparsing to 3.0.9 +- Upgrade requests to 2.28.1 +- Upgrade rich to 12.5.1 +- Upgrade typing_extensions to 4.3.0 +- Upgrade urllib3 to 1.26.10 + + +22.1.2 (2022-05-31) +=================== + +Bug Fixes +--------- + +- Revert `#10979 `_ since it introduced a regression in certain edge cases. (`#10979 `_) +- Fix an incorrect assertion in the logging logic, that prevented the upgrade prompt from being presented. (`#11136 `_) + + +22.1.1 (2022-05-20) +=================== + +Bug Fixes +--------- + +- Properly filter out optional dependencies (i.e. extras) when checking build environment distributions. (`#11112 `_) +- Change the build environment dependency checking to be opt-in. (`#11116 `_) +- Allow using a pre-release version to satisfy a build requirement. This helps + manually populated build environments to more accurately detect build-time + requirement conflicts. (`#11123 `_) + + +22.1 (2022-05-11) +================= + +Process +------- + +- Enable the ``importlib.metadata`` metadata implementation by default on + Python 3.11 (or later). The environment variable ``_PIP_USE_IMPORTLIB_METADATA`` + can still be used to enable the implementation on 3.10 and earlier, or disable + it on 3.11 (by setting it to ``0`` or ``false``). + +Bug Fixes +--------- + +- Revert `#9243 `_ since it introduced a regression in certain edge cases. (`#10962 `_) +- Fix missing ``REQUESTED`` metadata when using URL constraints. (`#11079 `_) +- ``pip config`` now normalizes names by converting underscores into dashes. (`#9330 `_) + + +22.1b1 (2022-04-30) +=================== + +Process +------- + +- Start migration of distribution metadata implementation from ``pkg_resources`` + to ``importlib.metadata``. The new implementation is currently not exposed in + any user-facing way, but included in the code base for easier development. + +Deprecations and Removals +------------------------- + +- Drop ``--use-deprecated=out-of-tree-build``, according to deprecation message. (`#11001 `_) + +Features +-------- + +- Add option to install and uninstall commands to opt-out from running-as-root warning. (`#10556 `_) +- Include Project-URLs in ``pip show`` output. (`#10799 `_) +- Improve error message when ``pip config edit`` is provided an editor that + doesn't exist. (`#10812 `_) +- Add a user interface for supplying config settings to build backends. (`#11059 `_) +- Add support for Powershell autocompletion. (`#9024 `_) +- Explains why specified version cannot be retrieved when *Requires-Python* is not satisfied. (`#9615 `_) +- Validate build dependencies when using ``--no-build-isolation``. (`#9794 `_) + +Bug Fixes +--------- + +- Fix conditional checks to prevent ``pip.exe`` from trying to modify itself, on Windows. (`#10560 `_) +- Fix uninstall editable from Windows junction link. (`#10696 `_) +- Fallback to pyproject.toml-based builds if ``setup.py`` is present in a project, but ``setuptools`` cannot be imported. (`#10717 `_) +- When checking for conflicts in the build environment, correctly skip requirements + containing markers that do not match the current environment. (`#10883 `_) +- Disable brotli import in vendored urllib3 so brotli could be uninstalled/upgraded by pip. (`#10950 `_) +- Prioritize URL credentials over netrc. (`#10979 `_) +- Filter available distributions using hash declarations from constraints files. (`#9243 `_) +- Fix an error when trying to uninstall packages installed as editable from a network drive. (`#9452 `_) +- Fix pip install issues using a proxy due to an inconsistency in how Requests is currently handling variable precedence in session. (`#9691 `_) + +Vendored Libraries +------------------ + +- Upgrade CacheControl to 0.12.11 +- Upgrade distro to 1.7.0 +- Upgrade platformdirs to 2.5.2 +- Remove ``progress`` from vendored dependencies. +- Upgrade ``pyparsing`` to 3.0.8 for startup performance improvements. +- Upgrade rich to 12.2.0 +- Upgrade tomli to 2.0.1 +- Upgrade typing_extensions to 4.2.0 + +Improved Documentation +---------------------- + +- Add more dedicated topic and reference pages to the documentation. (`#10899 `_) +- Capitalise Y as the default for "Proceed (y/n)?" when uninstalling. (`#10936 `_) +- Add ``scheme://`` requirement to ``--proxy`` option's description (`#10951 `_) +- The wheel command now references the build interface section instead of stating the legacy + setuptools behavior as the default. (`#10972 `_) +- Improved usefulness of ``pip config --help`` output. (`#11074 `_) + + +22.0.4 (2022-03-06) +=================== + +Deprecations and Removals +------------------------- + +- Drop the doctype check, that presented a warning for index pages that use non-compliant HTML 5. (`#10903 `_) + +Vendored Libraries +------------------ + +- Downgrade distlib to 0.3.3. + + +22.0.3 (2022-02-03) +=================== + +Features +-------- + +- Print the exception via ``rich.traceback``, when running with ``--debug``. (`#10791 `_) + +Bug Fixes +--------- + +- Only calculate topological installation order, for packages that are going to be installed/upgraded. + + This fixes an `AssertionError` that occurred when determining installation order, for a very specific combination of upgrading-already-installed-package + change of dependencies + fetching some packages from a package index. This combination was especially common in Read the Docs' builds. (`#10851 `_) +- Use ``html.parser`` by default, instead of falling back to ``html5lib`` when ``--use-deprecated=html5lib`` is not passed. (`#10869 `_) + +Improved Documentation +---------------------- + +- Clarify that using per-requirement overrides disables the usage of wheels. (`#9674 `_) + + +22.0.2 (2022-01-30) +=================== + +Deprecations and Removals +------------------------- + +- Instead of failing on index pages that use non-compliant HTML 5, print a deprecation warning and fall back to ``html5lib``-based parsing for now. This simplifies the migration for non-compliant index pages, by letting such indexes function with a warning. (`#10847 `_) + + +22.0.1 (2022-01-30) +=================== + +Bug Fixes +--------- + +- Accept lowercase ```` on index pages. (`#10844 `_) +- Properly handle links parsed by html5lib, when using ``--use-deprecated=html5lib``. (`#10846 `_) + + +22.0 (2022-01-29) +================= + +Process +------- + +- Completely replace :pypi:`tox` in our development workflow, with :pypi:`nox`. + +Deprecations and Removals +------------------------- + +- Deprecate alternative progress bar styles, leaving only ``on`` and ``off`` as available choices. (`#10462 `_) +- Drop support for Python 3.6. (`#10641 `_) +- Disable location mismatch warnings on Python versions prior to 3.10. + + These warnings were helping identify potential issues as part of the sysconfig -> distutils transition, and we no longer need to rely on reports from older Python versions for information on the transition. (`#10840 `_) + +Features +-------- + +- Changed ``PackageFinder`` to parse HTML documents using the stdlib :class:`html.parser.HTMLParser` class instead of the ``html5lib`` package. + + For now, the deprecated ``html5lib`` code remains and can be used with the ``--use-deprecated=html5lib`` command line option. However, it will be removed in a future pip release. (`#10291 `_) +- Utilise ``rich`` for presenting pip's default download progress bar. (`#10462 `_) +- Present a better error message when an invalid wheel file is encountered, providing more context where the invalid wheel file is. (`#10535 `_) +- Documents the ``--require-virtualenv`` flag for ``pip install``. (`#10588 `_) +- ``pip install `` autocompletes paths. (`#10646 `_) +- Allow Python distributors to opt-out from or opt-in to the ``sysconfig`` installation scheme backend by setting ``sysconfig._PIP_USE_SYSCONFIG`` to ``True`` or ``False``. (`#10647 `_) +- Make it possible to deselect tests requiring cryptography package on systems where it cannot be installed. (`#10686 `_) +- Start using Rich for presenting error messages in a consistent format. (`#10703 `_) +- Improve presentation of errors from subprocesses. (`#10705 `_) +- Forward pip's verbosity configuration to VCS tools to control their output accordingly. (`#8819 `_) + +Bug Fixes +--------- + +- Optimize installation order calculation to improve performance when installing requirements that form a complex dependency graph with a large amount of edges. (`#10557 `_) +- When a package is requested by the user for upgrade, correctly identify that the extra-ed variant of that same package depended by another user-requested package is requesting the same package, and upgrade it accordingly. (`#10613 `_) +- Prevent pip from installing yanked releases unless explicitly pinned via the ``==`` or ``===`` operators. (`#10617 `_) +- Stop backtracking on build failures, by instead surfacing them to the user and aborting immediately. This behaviour provides more immediate feedback when a package cannot be built due to missing build dependencies or platform incompatibility. (`#10655 `_) +- Silence ``Value for does not match`` warning caused by an erroneous patch in Slackware-distributed Python 3.9. (`#10668 `_) +- Fix an issue where pip did not consider dependencies with and without extras to be equal (`#9644 `_) + +Vendored Libraries +------------------ + +- Upgrade CacheControl to 0.12.10 +- Upgrade certifi to 2021.10.8 +- Upgrade distlib to 0.3.4 +- Upgrade idna to 3.3 +- Upgrade msgpack to 1.0.3 +- Upgrade packaging to 21.3 +- Upgrade platformdirs to 2.4.1 +- Add pygments 2.11.2 as a vendored dependency. +- Tree-trim unused portions of vendored pygments, to reduce the distribution size. +- Upgrade pyparsing to 3.0.7 +- Upgrade Requests to 2.27.1 +- Upgrade resolvelib to 0.8.1 +- Add rich 11.0.0 as a vendored dependency. +- Tree-trim unused portions of vendored rich, to reduce the distribution size. +- Add typing_extensions 4.0.1 as a vendored dependency. +- Upgrade urllib3 to 1.26.8 + + +21.3.1 (2021-10-22) +=================== + + +Bug Fixes +--------- + + +- Always refuse installing or building projects that have no ``pyproject.toml`` nor + ``setup.py``. (`#10531 `_) +- Tweak running-as-root detection, to check ``os.getuid`` if it exists, on Unix-y and non-Linux/non-MacOS machines. (`#10565 `_) +- When installing projects with a ``pyproject.toml`` in editable mode, and the build + backend does not support :pep:`660`, prepare metadata using + ``prepare_metadata_for_build_wheel`` instead of ``setup.py egg_info``. Also, refuse + installing projects that only have a ``setup.cfg`` and no ``setup.py`` nor + ``pyproject.toml``. These restore the pre-21.3 behaviour. (`#10573 `_) +- Restore compatibility of where configuration files are loaded from on MacOS (back to ``Library/Application Support/pip``, instead of ``Preferences/pip``). (`#10585 `_) + +Vendored Libraries +------------------ + + +- Upgrade pep517 to 0.12.0 + + 21.3 (2021-10-11) ================= diff --git a/README.rst b/README.rst index 6810315526b..7e08f857c4c 100644 --- a/README.rst +++ b/README.rst @@ -33,7 +33,6 @@ If you want to get involved head over to GitHub to get the source code, look at * `GitHub page`_ * `Development documentation`_ -* `Development mailing list`_ * `Development IRC`_ Code of Conduct @@ -56,7 +55,6 @@ rooms, and mailing lists is expected to follow the `PSF Code of Conduct`_. .. _Python 2 support policy: https://pip.pypa.io/en/latest/development/release-process/#python-2-support .. _Issue tracking: https://github.com/pypa/pip/issues .. _Discourse channel: https://discuss.python.org/c/packaging -.. _Development mailing list: https://mail.python.org/mailman3/lists/distutils-sig.python.org/ .. _User IRC: https://kiwiirc.com/nextclient/#ircs://irc.libera.chat:+6697/pypa .. _Development IRC: https://kiwiirc.com/nextclient/#ircs://irc.libera.chat:+6697/pypa-dev .. _PSF Code of Conduct: https://github.com/pypa/.github/blob/main/CODE_OF_CONDUCT.md diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000000..4e423805aee --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,3 @@ +# Security and Vulnerability Reporting + +If you find any security issues, please report to [security@python.org](mailto:security@python.org) diff --git a/docs/html/cli/index.md b/docs/html/cli/index.md index a3497c308c2..4b56dbeb4cb 100644 --- a/docs/html/cli/index.md +++ b/docs/html/cli/index.md @@ -16,6 +16,7 @@ pip pip_install pip_uninstall +pip_inspect pip_list pip_show pip_freeze diff --git a/docs/html/cli/pip.rst b/docs/html/cli/pip.rst index 77aac009fa1..88ef33e1163 100644 --- a/docs/html/cli/pip.rst +++ b/docs/html/cli/pip.rst @@ -71,12 +71,11 @@ when decision is needed. Rename the file or checkout to ``{name}{'.bak' * n}``, where n is some number of ``.bak`` extensions, such that the file didn't exist at some point. So the most recent backup will be the one with the largest number after ``.bak``. -*(a)abort* +*(a)bort* Abort pip and return non-zero exit status. - -Build System Interface -====================== +.. _`2-build-system-interface`: +.. rubric:: Build System Interface This is now covered in :doc:`../reference/build-system/index`. diff --git a/docs/html/cli/pip_download.rst b/docs/html/cli/pip_download.rst index 4f15314d765..d247c51ccfb 100644 --- a/docs/html/cli/pip_download.rst +++ b/docs/html/cli/pip_download.rst @@ -43,7 +43,9 @@ match the constraint of the current interpreter (but not your target one), it is recommended to specify all of these options if you are specifying one of them. Generic dependencies (e.g. universal wheels, or dependencies with no platform, abi, or implementation constraints) will still match an over- -constrained download requirement. +constrained download requirement. If some of your dependencies are not +available as binaries, you can build them manually for your target platform +and let pip download know where to find them using ``--find-links``. @@ -79,7 +81,7 @@ Examples #. Download a package and all of its dependencies with OSX specific interpreter constraints. This forces OSX 10.10 or lower compatibility. Since OSX deps are forward compatible, - this will also match ``macosx-10_9_x86_64``, ``macosx-10_8_x86_64``, ``macosx-10_8_intel``, + this will also match ``macosx_10_9_x86_64``, ``macosx_10_8_x86_64``, ``macosx_10_8_intel``, etc. It will also match deps with platform ``any``. Also force the interpreter version to ``27`` (or more generic, i.e. ``2``) and implementation to ``cp`` (or more generic, i.e. ``py``). @@ -90,7 +92,7 @@ Examples python -m pip download \ --only-binary=:all: \ - --platform macosx-10_10_x86_64 \ + --platform macosx_10_10_x86_64 \ --python-version 27 \ --implementation cp \ SomePackage @@ -101,7 +103,7 @@ Examples py -m pip download ^ --only-binary=:all: ^ - --platform macosx-10_10_x86_64 ^ + --platform macosx_10_10_x86_64 ^ --python-version 27 ^ --implementation cp ^ SomePackage diff --git a/docs/html/cli/pip_freeze.rst b/docs/html/cli/pip_freeze.rst index 3533db7930c..b90bcad70fb 100644 --- a/docs/html/cli/pip_freeze.rst +++ b/docs/html/cli/pip_freeze.rst @@ -84,7 +84,7 @@ This error occurs, for instance, when the command is installed only for another user, and the current user doesn't have the permission to execute the other user's command. -To solve that issue, you can try one of the followings: +To solve that issue, you can try one of the following: - Install the command for yourself (e.g. in your home directory). - Ask the system admin to allow this command for all users. diff --git a/docs/html/cli/pip_inspect.rst b/docs/html/cli/pip_inspect.rst new file mode 100644 index 00000000000..2aa8bd3b846 --- /dev/null +++ b/docs/html/cli/pip_inspect.rst @@ -0,0 +1,33 @@ +.. _`pip inspect`: + +=========== +pip inspect +=========== + +.. versionadded:: 22.2 + + +Usage +===== + +.. tab:: Unix/macOS + + .. pip-command-usage:: inspect "python -m pip" + +.. tab:: Windows + + .. pip-command-usage:: inspect "py -m pip" + + +Description +=========== + +.. pip-command-description:: inspect + +The format of the JSON output is described in :doc:`../reference/inspect-report`. + + +Options +======= + +.. pip-command-options:: inspect diff --git a/docs/html/cli/pip_install.rst b/docs/html/cli/pip_install.rst index 8f5a3191439..951dc2705a3 100644 --- a/docs/html/cli/pip_install.rst +++ b/docs/html/cli/pip_install.rst @@ -11,7 +11,7 @@ Usage .. tab:: Unix/macOS - .. pip-command-usage:: install "python -m pip" + .. pip-command-usage:: install 'python -m pip' .. tab:: Windows @@ -79,6 +79,18 @@ for an exception regarding pre-release versions). Where more than one source of the chosen version is available, it is assumed that any source is acceptable (as otherwise the versions would differ). +Obtaining information about what was installed +---------------------------------------------- + +The install command has a ``--report`` option that will generate a JSON report of what +pip has installed. In combination with the ``--dry-run`` and ``--ignore-installed`` it +can be used to *resolve* a set of requirements without actually installing them. + +The report can be written to a file, or to standard output (using ``--report -`` in +combination with ``--quiet``). + +The format of the JSON report is described in :doc:`../reference/installation-report`. + Installation Order ------------------ @@ -148,94 +160,20 @@ profile: 3. For whatever reason, they don't or won't declare their build dependencies using ``setup_requires``. - -Requirements File Format ------------------------- +.. _`0-requirements-file-format`: +.. rubric:: Requirements File Format This section has been moved to :doc:`../reference/requirements-file-format`. -.. _`Requirement Specifiers`: - -Requirement Specifiers ----------------------- - -pip supports installing from a package index using a :term:`requirement -specifier `. Generally speaking, a requirement -specifier is composed of a project name followed by optional :term:`version -specifiers `. :pep:`508` contains a full specification -of the format of a requirement. Since version 18.1 pip supports the -``url_req``-form specification. - -Some examples: - - :: - - SomeProject - SomeProject == 1.3 - SomeProject >=1.2,<2.0 - SomeProject[foo, bar] - SomeProject~=1.4.2 - -Since version 6.0, pip also supports specifiers containing `environment markers -`__ like so: - - :: - - SomeProject ==5.4 ; python_version < '3.8' - SomeProject; sys_platform == 'win32' - -Since version 19.1, pip also supports `direct references -`__ like so: - - :: - - SomeProject @ file:///somewhere/... - -Environment markers are supported in the command line and in requirements files. - -.. note:: - - Use quotes around specifiers in the shell when using ``>``, ``<``, or when - using environment markers. Don't use quotes in requirement files. [1]_ - - -.. _`Per-requirement Overrides`: - -Per-requirement Overrides -------------------------- - -Since version 7.0 pip supports controlling the command line options given to -``setup.py`` via requirements files. This disables the use of wheels (cached or -otherwise) for that package, as ``setup.py`` does not exist for wheels. - -The ``--global-option`` and ``--install-option`` options are used to pass -options to ``setup.py``. For example: - - :: - - FooProject >= 1.2 --global-option="--no-user-cfg" \ - --install-option="--prefix='/usr/local'" \ - --install-option="--no-compile" - -The above translates roughly into running FooProject's ``setup.py`` -script as: +.. _`0-requirement-specifiers`: +.. rubric:: Requirement Specifiers - :: +This section has been moved to :doc:`../reference/requirement-specifiers`. - python setup.py --no-user-cfg install --prefix='/usr/local' --no-compile - -Note that the only way of giving more than one option to ``setup.py`` -is through multiple ``--global-option`` and ``--install-option`` -options, as shown in the example above. The value of each option is -passed as a single argument to the ``setup.py`` script. Therefore, a -line such as the following is invalid and would result in an -installation error. - -:: - - # Invalid. Please use '--install-option' twice as shown above. - FooProject >= 1.2 --install-option="--prefix=/usr/local --no-compile" +.. _`0-per-requirement-overrides`: +.. rubric:: Per-requirement Overrides +This is now covered in :doc:`../reference/requirements-file-format`. .. _`Pre Release Versions`: @@ -256,11 +194,8 @@ that enables installation of pre-releases and development releases. .. _pre-releases: https://www.python.org/dev/peps/pep-0440/#handling-of-pre-releases - -.. _`VCS Support`: - -VCS Support ------------ +.. _`0-vcs-support`: +.. rubric:: VCS Support This is now covered in :doc:`../topics/vcs-support`. @@ -284,270 +219,43 @@ details) is selected. See the :ref:`pip install Examples`. +.. _`0-ssl certificate verification`: +.. rubric:: SSL Certificate Verification -.. _`SSL Certificate Verification`: - -SSL Certificate Verification ----------------------------- - -Starting with v1.3, pip provides SSL certificate verification over HTTP, to -prevent man-in-the-middle attacks against PyPI downloads. This does not use -the system certificate store but instead uses a bundled CA certificate -store. The default bundled CA certificate store certificate store may be -overridden by using ``--cert`` option or by using ``PIP_CERT``, -``REQUESTS_CA_BUNDLE``, or ``CURL_CA_BUNDLE`` environment variables. +This is now covered in :doc:`../topics/https-certificates`. - -.. _`Caching`: - -Caching -------- +.. _`0-caching`: +.. rubric:: Caching This is now covered in :doc:`../topics/caching`. -.. _`Wheel cache`: - -Wheel Cache -^^^^^^^^^^^ +.. _`0-wheel-cache`: +.. rubric:: Wheel Cache This is now covered in :doc:`../topics/caching`. -.. _`hash-checking mode`: +.. _`0-hash-checking-mode`: +.. rubric:: Hash checking mode -Hash-Checking Mode ------------------- +This is now covered in :doc:`../topics/secure-installs`. -Since version 8.0, pip can check downloaded package archives against local -hashes to protect against remote tampering. To verify a package against one or -more hashes, add them to the end of the line:: - - FooProject == 1.2 --hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 \ - --hash=sha256:486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7 - -(The ability to use multiple hashes is important when a package has both -binary and source distributions or when it offers binary distributions for a -variety of platforms.) - -The recommended hash algorithm at the moment is sha256, but stronger ones are -allowed, including all those supported by ``hashlib``. However, weaker ones -such as md5, sha1, and sha224 are excluded to avoid giving a false sense of -security. - -Hash verification is an all-or-nothing proposition. Specifying a ``--hash`` -against any requirement not only checks that hash but also activates a global -*hash-checking mode*, which imposes several other security restrictions: - -* Hashes are required for all requirements. This is because a partially-hashed - requirements file is of little use and thus likely an error: a malicious - actor could slip bad code into the installation via one of the unhashed - requirements. Note that hashes embedded in URL-style requirements via the - ``#md5=...`` syntax suffice to satisfy this rule (regardless of hash - strength, for legacy reasons), though you should use a stronger - hash like sha256 whenever possible. -* Hashes are required for all dependencies. An error results if there is a - dependency that is not spelled out and hashed in the requirements file. -* Requirements that take the form of project names (rather than URLs or local - filesystem paths) must be pinned to a specific version using ``==``. This - prevents a surprising hash mismatch upon the release of a new version - that matches the requirement specifier. -* ``--egg`` is disallowed, because it delegates installation of dependencies - to setuptools, giving up pip's ability to enforce any of the above. - -.. _`--require-hashes`: - -Hash-checking mode can be forced on with the ``--require-hashes`` command-line -option: +.. _`0-local-project-installs`: +.. rubric:: Local Project Installs -.. tab:: Unix/macOS +This is now covered in :doc:`../topics/local-project-installs`. - .. code-block:: console +.. _`0-editable-installs`: +.. rubric:: Editable installs - $ python -m pip install --require-hashes -r requirements.txt - ... - Hashes are required in --require-hashes mode (implicitly on when a hash is - specified for any package). These requirements were missing hashes, - leaving them open to tampering. These are the hashes the downloaded - archives actually had. You can add lines like these to your requirements - files to prevent tampering. - pyelasticsearch==1.0 --hash=sha256:44ddfb1225054d7d6b1d02e9338e7d4809be94edbe9929a2ec0807d38df993fa - more-itertools==2.2 --hash=sha256:93e62e05c7ad3da1a233def6731e8285156701e3419a5fe279017c429ec67ce0 +This is now covered in :doc:`../topics/local-project-installs`. -.. tab:: Windows - - .. code-block:: console - - C:\> py -m pip install --require-hashes -r requirements.txt - ... - Hashes are required in --require-hashes mode (implicitly on when a hash is - specified for any package). These requirements were missing hashes, - leaving them open to tampering. These are the hashes the downloaded - archives actually had. You can add lines like these to your requirements - files to prevent tampering. - pyelasticsearch==1.0 --hash=sha256:44ddfb1225054d7d6b1d02e9338e7d4809be94edbe9929a2ec0807d38df993fa - more-itertools==2.2 --hash=sha256:93e62e05c7ad3da1a233def6731e8285156701e3419a5fe279017c429ec67ce0 - - -This can be useful in deploy scripts, to ensure that the author of the -requirements file provided hashes. It is also a convenient way to bootstrap -your list of hashes, since it shows the hashes of the packages it fetched. It -fetches only the preferred archive for each package, so you may still need to -add hashes for alternatives archives using :ref:`pip hash`: for instance if -there is both a binary and a source distribution. - -The :ref:`wheel cache ` is disabled in hash-checking mode to -prevent spurious hash mismatch errors. These would otherwise occur while -installing sdists that had already been automatically built into cached wheels: -those wheels would be selected for installation, but their hashes would not -match the sdist ones from the requirements file. A further complication is that -locally built wheels are nondeterministic: contemporary modification times make -their way into the archive, making hashes unpredictable across machines and -cache flushes. Compilation of C code adds further nondeterminism, as many -compilers include random-seeded values in their output. However, wheels fetched -from index servers are the same every time. They land in pip's HTTP cache, not -its wheel cache, and are used normally in hash-checking mode. The only downside -of having the wheel cache disabled is thus extra build time for sdists, and -this can be solved by making sure pre-built wheels are available from the index -server. - -Hash-checking mode also works with :ref:`pip download` and :ref:`pip wheel`. -See :doc:`../topics/repeatable-installs` for a comparison of hash-checking mode -with other repeatability strategies. - -.. warning:: - - Beware of the ``setup_requires`` keyword arg in :file:`setup.py`. The - (rare) packages that use it will cause those dependencies to be downloaded - by setuptools directly, skipping pip's hash-checking. If you need to use - such a package, see :ref:`Controlling - setup_requires `. - -.. warning:: - - Be careful not to nullify all your security work when you install your - actual project by using setuptools directly: for example, by calling - ``python setup.py install``, ``python setup.py develop``, or - ``easy_install``. Setuptools will happily go out and download, unchecked, - anything you missed in your requirements file—and it’s easy to miss things - as your project evolves. To be safe, install your project using pip and - :ref:`--no-deps `. - - Instead of ``python setup.py develop``, use... - - .. tab:: Unix/macOS - - .. code-block:: shell - - python -m pip install --no-deps -e . - - .. tab:: Windows - - .. code-block:: shell - - py -m pip install --no-deps -e . - - - Instead of ``python setup.py install``, use... - - .. tab:: Unix/macOS - - .. code-block:: shell - - python -m pip install --no-deps . - - .. tab:: Windows - - .. code-block:: shell - - py -m pip install --no-deps . - -Hashes from PyPI -^^^^^^^^^^^^^^^^ - -PyPI provides an MD5 hash in the fragment portion of each package download URL, -like ``#md5=123...``, which pip checks as a protection against download -corruption. Other hash algorithms that have guaranteed support from ``hashlib`` -are also supported here: sha1, sha224, sha384, sha256, and sha512. Since this -hash originates remotely, it is not a useful guard against tampering and thus -does not satisfy the ``--require-hashes`` demand that every package have a -local hash. - - -Local project installs ----------------------- - -pip supports installing local project in both regular mode and editable mode. -You can install local projects by specifying the project path to pip: - -.. tab:: Unix/macOS - - .. code-block:: shell - - python -m pip install path/to/SomeProject - -.. tab:: Windows - - .. code-block:: shell - - py -m pip install path/to/SomeProject - -.. note:: - - Depending on the build backend used by the project, this may generate - secondary build artifacts in the project directory, such as the - ``.egg-info`` and ``build`` directories in the case of the setuptools - backend. - - Pip has a legacy behaviour that copies the entire project directory to a - temporary location and installs from there. This approach was the cause of - several performance and correctness issues, so it is now disabled by - default, and it is planned that pip 22.1 will remove it. - - To opt in to the legacy behavior, specify the - ``--use-deprecated=out-of-tree-build`` option in pip's command line. - - -.. _`editable-installs`: - -"Editable" Installs -^^^^^^^^^^^^^^^^^^^ - -"Editable" installs are fundamentally `"setuptools develop mode" -`_ -installs. - -You can install local projects or VCS projects in "editable" mode: - -.. tab:: Unix/macOS - - .. code-block:: shell - - python -m pip install -e path/to/SomeProject - python -m pip install -e git+http://repo/my_project.git#egg=SomeProject - -.. tab:: Windows - - .. code-block:: shell - - py -m pip install -e path/to/SomeProject - py -m pip install -e git+http://repo/my_project.git#egg=SomeProject - - -(See the :doc:`../topics/vcs-support` section above for more information on VCS-related syntax.) - -For local projects, the "SomeProject.egg-info" directory is created relative to -the project path. This is one advantage over just using ``setup.py develop``, -which creates the "egg-info" directly relative the current working directory. - - -Build System Interface ----------------------- +.. _`0-build-system-interface`: +.. rubric:: Build System Interface This is now covered in :doc:`../reference/build-system/index`. - .. _`pip install Options`: - Options ======= @@ -569,7 +277,7 @@ Examples .. code-block:: shell python -m pip install SomePackage # latest version - python -m pip install SomePackage==1.0.4 # specific version + python -m pip install 'SomePackage==1.0.4' # specific version python -m pip install 'SomePackage>=1.0.4' # minimum version .. tab:: Windows @@ -577,8 +285,8 @@ Examples .. code-block:: shell py -m pip install SomePackage # latest version - py -m pip install SomePackage==1.0.4 # specific version - py -m pip install 'SomePackage>=1.0.4' # minimum version + py -m pip install "SomePackage==1.0.4" # specific version + py -m pip install "SomePackage>=1.0.4" # minimum version #. Install a list of requirements specified in a file. See the :ref:`Requirements files `. @@ -641,13 +349,13 @@ Examples .. code-block:: shell - python -m pip install SomeProject@git+https://git.repo/some_pkg.git@1.3.1 + python -m pip install 'SomeProject@git+https://git.repo/some_pkg.git@1.3.1' .. tab:: Windows .. code-block:: shell - py -m pip install SomeProject@git+https://git.repo/some_pkg.git@1.3.1 + py -m pip install "SomeProject@git+https://git.repo/some_pkg.git@1.3.1" #. Install a project from VCS in "editable" mode. See the sections on :doc:`../topics/vcs-support` and :ref:`Editable Installs `. @@ -656,43 +364,43 @@ Examples .. code-block:: shell - python -m pip install -e git+https://git.repo/some_pkg.git#egg=SomePackage # from git - python -m pip install -e hg+https://hg.repo/some_pkg.git#egg=SomePackage # from mercurial - python -m pip install -e svn+svn://svn.repo/some_pkg/trunk/#egg=SomePackage # from svn - python -m pip install -e git+https://git.repo/some_pkg.git@feature#egg=SomePackage # from 'feature' branch - python -m pip install -e "git+https://git.repo/some_repo.git#egg=subdir&subdirectory=subdir_path" # install a python package from a repo subdirectory + python -m pip install -e 'git+https://git.repo/some_pkg.git#egg=SomePackage' # from git + python -m pip install -e 'hg+https://hg.repo/some_pkg.git#egg=SomePackage' # from mercurial + python -m pip install -e 'svn+svn://svn.repo/some_pkg/trunk/#egg=SomePackage' # from svn + python -m pip install -e 'git+https://git.repo/some_pkg.git@feature#egg=SomePackage' # from 'feature' branch + python -m pip install -e 'git+https://git.repo/some_repo.git#egg=subdir&subdirectory=subdir_path' # install a python package from a repo subdirectory .. tab:: Windows .. code-block:: shell - py -m pip install -e git+https://git.repo/some_pkg.git#egg=SomePackage # from git - py -m pip install -e hg+https://hg.repo/some_pkg.git#egg=SomePackage # from mercurial - py -m pip install -e svn+svn://svn.repo/some_pkg/trunk/#egg=SomePackage # from svn - py -m pip install -e git+https://git.repo/some_pkg.git@feature#egg=SomePackage # from 'feature' branch + py -m pip install -e "git+https://git.repo/some_pkg.git#egg=SomePackage" # from git + py -m pip install -e "hg+https://hg.repo/some_pkg.git#egg=SomePackage" # from mercurial + py -m pip install -e "svn+svn://svn.repo/some_pkg/trunk/#egg=SomePackage" # from svn + py -m pip install -e "git+https://git.repo/some_pkg.git@feature#egg=SomePackage" # from 'feature' branch py -m pip install -e "git+https://git.repo/some_repo.git#egg=subdir&subdirectory=subdir_path" # install a python package from a repo subdirectory -#. Install a package with `setuptools extras`_. +#. Install a package with `extras`_. .. tab:: Unix/macOS .. code-block:: shell - python -m pip install SomePackage[PDF] - python -m pip install "SomePackage[PDF] @ git+https://git.repo/SomePackage@main#subdirectory=subdir_path" - python -m pip install .[PDF] # project in current directory - python -m pip install SomePackage[PDF]==3.0 - python -m pip install SomePackage[PDF,EPUB] # multiple extras + python -m pip install 'SomePackage[PDF]' + python -m pip install 'SomePackage[PDF] @ git+https://git.repo/SomePackage@main#subdirectory=subdir_path' + python -m pip install '.[PDF]' # project in current directory + python -m pip install 'SomePackage[PDF]==3.0' + python -m pip install 'SomePackage[PDF,EPUB]' # multiple extras .. tab:: Windows .. code-block:: shell - py -m pip install SomePackage[PDF] + py -m pip install "SomePackage[PDF]" py -m pip install "SomePackage[PDF] @ git+https://git.repo/SomePackage@main#subdirectory=subdir_path" - py -m pip install .[PDF] # project in current directory - py -m pip install SomePackage[PDF]==3.0 - py -m pip install SomePackage[PDF,EPUB] # multiple extras + py -m pip install ".[PDF]" # project in current directory + py -m pip install "SomePackage[PDF]==3.0" + py -m pip install "SomePackage[PDF,EPUB]" # multiple extras #. Install a particular source archive file. @@ -700,15 +408,15 @@ Examples .. code-block:: shell - python -m pip install ./downloads/SomePackage-1.0.4.tar.gz - python -m pip install http://my.package.repo/SomePackage-1.0.4.zip + python -m pip install './downloads/SomePackage-1.0.4.tar.gz' + python -m pip install 'http://my.package.repo/SomePackage-1.0.4.zip' .. tab:: Windows .. code-block:: shell - py -m pip install ./downloads/SomePackage-1.0.4.tar.gz - py -m pip install http://my.package.repo/SomePackage-1.0.4.zip + py -m pip install "./downloads/SomePackage-1.0.4.tar.gz" + py -m pip install "http://my.package.repo/SomePackage-1.0.4.zip" #. Install a particular source archive file following :pep:`440` direct references. @@ -716,17 +424,17 @@ Examples .. code-block:: shell - python -m pip install SomeProject@http://my.package.repo/SomeProject-1.2.3-py33-none-any.whl - python -m pip install "SomeProject @ http://my.package.repo/SomeProject-1.2.3-py33-none-any.whl" - python -m pip install SomeProject@http://my.package.repo/1.2.3.tar.gz + python -m pip install 'SomeProject@http://my.package.repo/SomeProject-1.2.3-py33-none-any.whl' + python -m pip install 'SomeProject @ http://my.package.repo/SomeProject-1.2.3-py33-none-any.whl' + python -m pip install 'SomeProject@http://my.package.repo/1.2.3.tar.gz' .. tab:: Windows .. code-block:: shell - py -m pip install SomeProject@http://my.package.repo/SomeProject-1.2.3-py33-none-any.whl + py -m pip install "SomeProject@http://my.package.repo/SomeProject-1.2.3-py33-none-any.whl" py -m pip install "SomeProject @ http://my.package.repo/SomeProject-1.2.3-py33-none-any.whl" - py -m pip install SomeProject@http://my.package.repo/1.2.3.tar.gz + py -m pip install "SomeProject@http://my.package.repo/1.2.3.tar.gz" #. Install from alternative package repositories. @@ -831,10 +539,5 @@ Examples py -m pip install SomePackage1 SomePackage2 --no-binary SomePackage1 ----- - -.. [1] This is true with the exception that pip v7.0 and v7.0.1 required quotes - around specifiers containing environment markers in requirement files. - -.. _setuptools extras: https://setuptools.readthedocs.io/en/latest/userguide/dependency_management.html#optional-dependencies +.. _extras: https://www.python.org/dev/peps/pep-0508/#extras .. _PyPI: https://pypi.org/ diff --git a/docs/html/cli/pip_list.rst b/docs/html/cli/pip_list.rst index c84cb8c094d..739435981c3 100644 --- a/docs/html/cli/pip_list.rst +++ b/docs/html/cli/pip_list.rst @@ -226,6 +226,6 @@ The json format outputs an additional ``editable_project_location`` field. .. note:: - Contrary to the ``freeze`` comand, ``pip list --format=freeze`` will not + Contrary to the ``freeze`` command, ``pip list --format=freeze`` will not report editable install information, but the version of the package at the time it was installed. diff --git a/docs/html/cli/pip_uninstall.rst b/docs/html/cli/pip_uninstall.rst index e6eeb5ebf6a..0dd52619d80 100644 --- a/docs/html/cli/pip_uninstall.rst +++ b/docs/html/cli/pip_uninstall.rst @@ -43,7 +43,7 @@ Examples Uninstalling simplejson: /home/me/env/lib/python3.9/site-packages/simplejson /home/me/env/lib/python3.9/site-packages/simplejson-2.2.1-py3.9.egg-info - Proceed (y/n)? y + Proceed (Y/n)? y Successfully uninstalled simplejson .. tab:: Windows @@ -54,5 +54,5 @@ Examples Uninstalling simplejson: /home/me/env/lib/python3.9/site-packages/simplejson /home/me/env/lib/python3.9/site-packages/simplejson-2.2.1-py3.9.egg-info - Proceed (y/n)? y + Proceed (Y/n)? y Successfully uninstalled simplejson diff --git a/docs/html/cli/pip_wheel.rst b/docs/html/cli/pip_wheel.rst index bf371f28506..bfd19a0ccb1 100644 --- a/docs/html/cli/pip_wheel.rst +++ b/docs/html/cli/pip_wheel.rst @@ -25,11 +25,19 @@ Description .. pip-command-description:: wheel -Build System Interface ----------------------- +.. _`1-build-system-interface`: +.. rubric:: Build System Interface This is now covered in :doc:`../reference/build-system/index`. +Differences to ``build`` +------------------------ + +`build `_ is a simple tool which can among other things build +wheels for projects using PEP 517. It is comparable to the execution of ``pip wheel --no-deps .``. +It can also build source distributions which is not possible with ``pip``. +``pip wheel`` covers the wheel scope of ``build`` but offers many additional features. + Options ======= diff --git a/docs/html/conf.py b/docs/html/conf.py index 64fddeffc3e..cc967e0ba3c 100644 --- a/docs/html/conf.py +++ b/docs/html/conf.py @@ -55,6 +55,7 @@ # -- Options for myst-parser ---------------------------------------------------------- myst_enable_extensions = ["deflist"] +myst_heading_anchors = 3 # -- Options for smartquotes ---------------------------------------------------------- diff --git a/docs/html/development/architecture/anatomy.rst b/docs/html/development/architecture/anatomy.rst index 2f1ab0fe8bb..98708f2afeb 100644 --- a/docs/html/development/architecture/anatomy.rst +++ b/docs/html/development/architecture/anatomy.rst @@ -24,24 +24,23 @@ The ``README``, license, ``pyproject.toml``, ``setup.py``, and so on are in the * ``README.rst`` * ``setup.cfg`` * ``setup.py`` -* ``tox.ini`` -- ``pip`` uses Tox, an automation tool, configured by this `tox.ini`_ file. ``tox.ini`` describes a few environments ``pip`` uses during development for simplifying how tests are run (complicated situation there). Example: ``tox -e -py36``. We can run tests for different versions of Python by changing “36” to “27” or similar. -* ``.coveragerc`` +* ``noxfile.py`` -- ``pip`` uses Nox, an automation tool, configured by this file. ``noxfile.py`` describes a few environments ``pip`` uses during development for simplifying how tests are run (complicated situation there). Example: ``nox -s lint``, ``nox -s test-3.10``. We can run tests for different versions of Python by changing “3.10” to “3.7” or similar. * ``.gitattributes`` * ``.gitignore`` * ``.mailmap`` -* ``.readthedocs.yml`` * ``docs/`` *[documentation, built with Sphinx]* * ``html/`` *[sources to HTML documentation avail. online]* * ``man/`` has man pages the distros can use by running ``man pip`` * ``pip_sphinxext.py`` *[an extension -- pip-specific plugins to Sphinx that do not apply to other packages]* + * ``requirements.txt`` * ``news/`` *[pip stores news fragments… Every time pip makes a user-facing change, a file is added to this directory (usually a short note referring to a GitHub issue) with the right extension & name so it gets included in changelog…. So every release the maintainers will be deleting old files in this directory? Yes - we use the towncrier automation to generate a NEWS file, and auto-delete old stuff. There’s more about this in the contributor documentation!]* * ``template.rst`` *[template for changelog -- this is a file towncrier uses…. Is this jinja? I don’t know, check towncrier docs]* * ``src/`` *[source; see below]* -* ``tasks/`` *[invoke is a PyPI library which uses files in this directory to define automation commands that are used in pip’s development processes -- not discussing further right now. For instance, automating the release.]* +* ``tools/`` *[misc development workflow tools, like requirements files & CI files & helpers. For instance, automating the release.]* * ``tests/`` -- contains tests you can run. There are instructions in :doc:`../getting-started`. * ``__init__.py`` @@ -51,10 +50,7 @@ The ``README``, license, ``pyproject.toml``, ``setup.py``, and so on are in the * ``lib/`` *[helpers for tests]* * ``unit/`` *[unit tests -- fast and small and nice!]* -* ``tools`` *[misc development workflow tools, like requirements files & CI files & helpers for tox]* * ``.github`` -* ``.tox`` - src directory @@ -67,24 +63,23 @@ dependencies (code from other packages). Within ``src/``: -* ``pip.egg-info/`` *[ignore the contents for now]* * ``pip/`` * ``__init__.py`` * ``__main__.py`` - * ``__pycache__/`` *[not discussing contents right now]* * ``_internal/`` *[where all the pip code lives that’s written by pip maintainers -- underscore means private. pip is not a library -- it’s a command line tool! A very important distinction! People who want to install stuff with pip should not use the internals -- they should use the CLI. There’s a note on this in the docs.]* * ``__init__.py`` - * ``build_env.py`` [not discussing now] + * ``build_env.py`` * ``cache.py`` *[has all the info for how to handle caching within pip -- cache-handling stuff. Uses cachecontrol from PyPI, vendored into pip]* * ``cli/`` *[subpackage containing helpers & additional code for managing the command line interface. Uses argparse from stdlib]* * ``commands/`` *[literally - each file is the name of the command on the pip CLI. Each has a class that defines what’s needed to set it up, what happens]* * ``configuration.py`` * ``download.py`` * ``exceptions.py`` - * ``index.py`` - * ``locations.py`` + * ``index/`` + * ``locations/`` + * ``main.py`` *[legacy entry point]* * ``models/`` *[in-process refactoring! Goal: improve how pip internally models representations it has for data -- data representation. General overall cleanup. Data reps are spread throughout codebase….link is defined in a class in 1 file, and then another file imports Link from that file. Sometimes cyclic dependency?!?! To prevent future situations like this, etc., Pradyun started moving these into a models directory.]* * ``operations/`` -- a bit of a weird directory….. ``Freeze.py`` used to be in there. Freeze is an operation -- there was an operations.freeze. Then “prepare” got added (the operation of preparing a pkg). Then “check” got added for checking the state of an env.] [what’s a command vs an operation? Command is on CLI; an operation would be an internal bit of code that actually does some subset of the operation the command says. ``install`` command uses bits of ``check`` and ``prepare``, for instance. In the long run, Pradyun’s goal: ``prepare.py`` goes away (gets refactored into other files) such that ``operations`` is just ``check`` and ``freeze``..... … Pradyun plans to refactor this. [how does this compare to ``utils``?] @@ -102,5 +97,4 @@ Within ``src/``: .. _`tracking issue`: https://github.com/pypa/pip/issues/6831 .. _GitHub repository: https://github.com/pypa/pip/ -.. _tox.ini: https://github.com/pypa/pip/blob/main/tox.ini .. _improving the pip dependency resolver: https://github.com/pypa/pip/issues/988 diff --git a/docs/html/development/architecture/overview.rst b/docs/html/development/architecture/overview.rst index f9bcfb8731e..63445754a76 100644 --- a/docs/html/development/architecture/overview.rst +++ b/docs/html/development/architecture/overview.rst @@ -24,9 +24,9 @@ Things pip does: backwards compatibility reasons. But thing with setuptools: has a ``setup.py`` file that it invokes to …… get info? -2. Decides where to install stuff. Once the package is built, resulting - artifact is then installed into system in appropriate place. :pep:`517` - defines interface between build backend & installer. +2. Decides where to install stuff. Once the package is built, the resulting + artifact is then installed to the system in its appropriate place. :pep:`517` + defines the interface between the build backend & installer. Broad overview of flow ====================== @@ -111,24 +111,24 @@ The package index gives pip a list of files for that package (via the existing P pip chooses from the list a single file to download. -It may go back and choose another file to download +It may go back and choose another file to download. When pip looks at the package index, the place where it looks has -basically a link. The link’s text is the name of the file +basically a link. The link’s text is the name of the file. This is the `PyPI Simple API`_ (PyPI has several APIs, some are being deprecated). pip looks at Simple API, documented initially at :pep:`503` -- packaging.python.org has PyPA specifications with more details for -Simple Repository API +Simple Repository API. -For this package name -- this is the list of files available +For this package name -- this is the list of files available. Looks there for: * The list of filenames * Other info -Once it has those, selects one file, downloads it +Once it has those, it selects one file and downloads it. (Question: If I want to ``pip install flask``, I think the whole list of filenames cannot….should not be …. ? I want only the Flask …. Why am I getting the diff --git a/docs/html/development/ci.rst b/docs/html/development/ci.rst index 39b9cdc8e74..ac65f816594 100644 --- a/docs/html/development/ci.rst +++ b/docs/html/development/ci.rst @@ -18,17 +18,17 @@ Supported interpreters pip support a variety of Python interpreters: -- CPython 3.6 - CPython 3.7 - CPython 3.8 - CPython 3.9 +- CPython 3.10 - Latest PyPy3 on different operating systems: - Linux - Windows -- MacOS +- macOS and on different architectures: @@ -77,9 +77,9 @@ Developer tasks ======== =============== ================ ================== ============= OS docs lint vendoring packaging ======== =============== ================ ================== ============= -Linux Github Github Github Github -Windows Github Github Github Github -MacOS Github Github Github Github +Linux GitHub GitHub GitHub GitHub +Windows GitHub GitHub GitHub GitHub +macOS GitHub GitHub GitHub GitHub ======== =============== ================ ================== ============= Actual testing @@ -88,28 +88,26 @@ Actual testing +------------------------------+---------------+-----------------+ | **interpreter** | **unit** | **integration** | +-----------+----------+-------+---------------+-----------------+ -| | | CP3.6 | | | -| | +-------+---------------+-----------------+ | | x86 | CP3.7 | | | | | +-------+---------------+-----------------+ | | | CP3.8 | | | | | +-------+---------------+-----------------+ | | | CP3.9 | | | | | +-------+---------------+-----------------+ +| | | CP3.10| | | +| | +-------+---------------+-----------------+ | | | PyPy3 | | | | Windows +----------+-------+---------------+-----------------+ -| | | CP3.6 | Github | Github | -| | +-------+---------------+-----------------+ -| | x64 | CP3.7 | | | +| | x64 | CP3.7 | GitHub | GitHub | | | +-------+---------------+-----------------+ | | | CP3.8 | | | | | +-------+---------------+-----------------+ -| | | CP3.9 | Github | Github | +| | | CP3.9 | | | +| | +-------+---------------+-----------------+ +| | | CP3.10| GitHub | GitHub | | | +-------+---------------+-----------------+ | | | PyPy3 | | | +-----------+----------+-------+---------------+-----------------+ -| | | CP3.6 | | | -| | +-------+---------------+-----------------+ | | x86 | CP3.7 | | | | | +-------+---------------+-----------------+ | | | CP3.8 | | | @@ -118,33 +116,33 @@ Actual testing | | +-------+---------------+-----------------+ | | | PyPy3 | | | | Linux +----------+-------+---------------+-----------------+ -| | | CP3.6 | Github | Github | +| | x64 | CP3.7 | GitHub | GitHub | | | +-------+---------------+-----------------+ -| | x64 | CP3.7 | Github | Github | +| | | CP3.8 | GitHub | GitHub | | | +-------+---------------+-----------------+ -| | | CP3.8 | Github | Github | +| | | CP3.9 | GitHub | GitHub | | | +-------+---------------+-----------------+ -| | | CP3.9 | Github | Github | +| | | CP3.10| GitHub | GitHub | | | +-------+---------------+-----------------+ | | | PyPy3 | | | +-----------+----------+-------+---------------+-----------------+ -| | | CP3.6 | | | -| | +-------+---------------+-----------------+ -| | x86 | CP3.7 | | | +| | arm64 | CP3.7 | | | | | +-------+---------------+-----------------+ | | | CP3.8 | | | | | +-------+---------------+-----------------+ | | | CP3.9 | | | | | +-------+---------------+-----------------+ +| | | CP3.10| | | +| | +-------+---------------+-----------------+ | | | PyPy3 | | | -| MacOS +----------+-------+---------------+-----------------+ -| | | CP3.6 | Github | Github | +| macOS +----------+-------+---------------+-----------------+ +| | x64 | CP3.7 | GitHub | GitHub | | | +-------+---------------+-----------------+ -| | x64 | CP3.7 | Github | Github | +| | | CP3.8 | GitHub | GitHub | | | +-------+---------------+-----------------+ -| | | CP3.8 | Github | Github | +| | | CP3.9 | GitHub | GitHub | | | +-------+---------------+-----------------+ -| | | CP3.9 | Github | Github | +| | | CP3.10| GitHub | GitHub | | | +-------+---------------+-----------------+ | | | PyPy3 | | | +-----------+----------+-------+---------------+-----------------+ diff --git a/docs/html/development/contributing.rst b/docs/html/development/contributing.rst index e443bfa57cd..87734ee4d55 100644 --- a/docs/html/development/contributing.rst +++ b/docs/html/development/contributing.rst @@ -88,7 +88,7 @@ The contents of this file are reStructuredText formatted text that will be used as the content of the news file entry. You do not need to reference the issue or PR numbers in the entry, since ``towncrier`` will automatically add a reference to all of the affected issues when -rendering the NEWS file. +rendering the NEWS file. There must be a newline at the end of the file. In order to maintain a consistent style in the ``NEWS.rst`` file, it is preferred to keep the news entry to the point, in sentence case, shorter than diff --git a/docs/html/development/getting-started.rst b/docs/html/development/getting-started.rst index 2c40daa1e78..730f5ece08f 100644 --- a/docs/html/development/getting-started.rst +++ b/docs/html/development/getting-started.rst @@ -27,8 +27,8 @@ Development Environment pip is a command line application written in Python. For developing pip, you should `install Python`_ on your computer. -For developing pip, you need to install :pypi:`tox`. Often, you can run -``python -m pip install tox`` to install and use it. +For developing pip, you need to install :pypi:`nox`. Often, you can run +``python -m pip install nox`` to install and use it. Running pip From Source Tree @@ -42,7 +42,7 @@ You can then invoke your local source tree pip normally. .. code-block:: shell - virtualenv .venv # You can also use "python -m venv .venv" + python -m venv .venv source .venv/bin/activate python -m pip install -e . python -m pip --version @@ -51,7 +51,7 @@ You can then invoke your local source tree pip normally. .. code-block:: shell - virtualenv .venv # You can also use "py -m venv .venv" + py -m venv .venv .venv\Scripts\activate py -m pip install -e . py -m pip --version @@ -60,7 +60,7 @@ Running Tests ============= pip's tests are written using the :pypi:`pytest` test framework and -:mod:`unittest.mock`. :pypi:`tox` is used to automate the setup and execution +:mod:`unittest.mock`. :pypi:`nox` is used to automate the setup and execution of pip's tests. It is preferable to run the tests in parallel for better experience during development, @@ -70,38 +70,39 @@ To run tests: .. code-block:: console - $ tox -e py36 -- -n auto + $ nox -s test-3.10 -- -n auto To run tests without parallelization, run: .. code-block:: console - $ tox -e py36 + $ nox -s test-3.10 -The example above runs tests against Python 3.6. You can also use other -versions like ``py39`` and ``pypy3``. +The example above runs tests against Python 3.10. You can also use other +versions like ``3.9`` and ``pypy3``. -``tox`` has been configured to forward any additional arguments it is given to +``nox`` has been configured to forward any additional arguments it is given to ``pytest``. This enables the use of pytest's `rich CLI`_. As an example, you can select tests using the various ways that pytest provides: .. code-block:: console $ # Using file name - $ tox -e py36 -- tests/functional/test_install.py + $ nox -s test-3.10 -- tests/functional/test_install.py $ # Using markers - $ tox -e py36 -- -m unit + $ nox -s test-3.10 -- -m unit $ # Using keywords - $ tox -e py36 -- -k "install and not wheel" + $ nox -s test-3.10 -- -k "install and not wheel" -Running pip's test suite requires supported version control tools (subversion, -bazaar, git, and mercurial) to be installed. If you are missing one of the VCS -tools, you can tell pip to skip those tests: +Running pip's entire test suite requires supported version control tools +(subversion, bazaar, git, and mercurial) to be installed. If you are missing +any of these VCS, those tests should be skipped automatically. You can also +explicitly tell pytest to skip those tests: .. code-block:: console - $ tox -e py36 -- -k "not svn" - $ tox -e py36 -- -k "not (svn or git)" + $ nox -s test-3.10 -- -k "not svn" + $ nox -s test-3.10 -- -k "not (svn or git)" Running Linters @@ -115,7 +116,7 @@ To use linters locally, run: .. code-block:: console - $ tox -e lint + $ nox -s lint .. note:: @@ -154,7 +155,7 @@ To build it locally, run: .. code-block:: console - $ tox -e docs + $ nox -s docs The built documentation can be found in the ``docs/build`` folder. diff --git a/docs/html/development/issue-triage.md b/docs/html/development/issue-triage.md new file mode 100644 index 00000000000..6861df7d8ae --- /dev/null +++ b/docs/html/development/issue-triage.md @@ -0,0 +1,346 @@ +```{note} +This section of the documentation is currently being written. pip +developers welcome your help to complete this documentation. If +you're interested in helping out, please let us know in the +[tracking issue](https://github.com/pypa/pip/issues/6583), or +just submit a pull request and mention it in that tracking issue. +``` + +# Issue Triage + +This serves as an introduction to issue tracking in pip as well as +how to help triage reported issues. + +## Issue Tracker + +The [pip issue tracker](https://github.com/pypa/pip/issues) is hosted on +GitHub alongside the project. + +Currently, the issue tracker is used for bugs, feature requests, and general +user support. + +In the pip issue tracker, we make use of labels and milestones to organize and +track work. + +### Labels + +Issue labels are used to: + +1. Categorize issues +2. Provide status information for contributors and reporters +3. Help contributors find tasks to work on + +The current set of labels are divided into several categories identified by +prefix: + +**C - Category** +: which area of `pip` functionality a feature request or issue is related to + +**K - Kind** +**O - Operating System** +: for issues that are OS-specific + +**P - Project/Platform** +: related to something external to `pip` + +**R - Resolution** +: no more discussion is really needed, an action has been identified and the +issue is waiting or closed + +**S - State** +: for some automatic labels and other indicators that work is needed + +**type** +: the role or flavor of an issue + +The specific labels falling into each category have a description that can be +seen on the [Labels](https://github.com/pypa/pip/labels) page. + +In addition, there are several standalone labels: + +**good first issue** +: this label marks an issue as beginner-friendly and shows up in banners that +GitHub displays for first-time visitors to the repository + +**triage** +: default label given to issues when they are created + +**trivial** +: special label for pull requests that removes the +{ref}`news file requirement ` + +**needs rebase or merge** + +: this is a special label used by BrownTruck to mark PRs that have merge +conflicts + +### Automation + +There are several helpers to manage issues and pull requests. + +Issues created on the issue tracker are automatically given the +`triage` label by the +[triage-new-issues](https://github.com/apps/triage-new-issues) +bot. The label is automatically removed when another label is added. + +When an issue needs feedback from the author we can label it with +`S: awaiting response`. When the author responds, the +[no-response](https://github.com/apps/no-response) bot removes the label. + +After an issue has been closed for 30 days, the +[lock](https://github.com/apps/lock) bot locks the issue and adds the +`S: auto-locked` label. This allows us to avoid monitoring existing closed +issues, but unfortunately prevents and references to issues from showing up as +links on the closed issue. + +## Triage Issues + +Users can make issues for a number of reasons: + +1. Suggestions about pip features that could be added or improved +2. Problems using pip +3. Concerns about pip usability +4. General packaging problems to be solved with pip +5. Problems installing or using Python packages +6. Problems managing virtual environments +7. Problems managing Python installations + +To triage issues means to identify what kind of issue is happening and + +- confirm bugs +- provide support +- discuss and design around the uses of the tool + +Specifically, to address an issue: + +1. Read issue title +2. Scan issue description +3. Ask questions +4. If time is available, try to reproduce +5. Search for or remember related issues and link to them +6. Identify an appropriate area of concern (if applicable) + +Keep in mind that all communication is happening with other people and +should be done with respect per the +[Code of Conduct](https://www.pypa.io/en/latest/code-of-conduct/). + +The lifecycle of an issue (bug or support) generally looks like: + +1. waiting for triage (marked with label `triage`) + +2. confirming issue - some discussion with the user, gathering + details, trying to reproduce the issue (may be marked with a specific + category, `S: awaiting-response`, `S: discussion-needed`, or + `S: need-repro`) + +3. confirmed - the issue is pretty consistently reproducible in a + straightforward way, or a mechanism that could be causing the issue has been + identified + +4. awaiting fix - the fix is identified and no real discussion on the issue + is needed, should be marked `R: awaiting PR` + +5. closed - can be for several reasons + + - fixed + - could not be reproduced, no more details could be obtained, and no + progress can be made + - actual issue was with another project or related to system + configuration and pip cannot (or will not) be adapted for it + +### Requesting information + +Requesting more information to better understand the context and environment +that led to the issue. Examples of specific information that may be useful +depending on the situation: + +- pip debug: `pip debug` +- pip version: `pip -V` +- Python version: `python -VV` +- Python path: `python -c 'import sys; print(sys.executable)'` +- `python` on `PATH`: Unix: `which python`; Windows: `where python` +- Python as resolved by the shell: `type python` +- Origin of pip (get-pip.py, OS-level package manager, ensurepip, manual + installation) +- Using a virtual environment (with `--system-site-packages`?) +- Using a conda environment +- `PATH` environment variable +- Network situation (e.g. airgapped environment, firewalls) +- `--verbose` output of a failing command +- (Unix) `strace` output from a failing command (be careful not to output + into the same directory as a package that's being installed, otherwise pip + will loop forever copying the log file...) +- (Windows) + [procmon](https://docs.microsoft.com/en-us/sysinternals/downloads/procmon) + output during a failing command + ([example request](https://github.com/pypa/pip/issues/6814#issuecomment-516611389)) +- Listing of files relevant to the issue (e.g. `ls -l venv/lib/pythonX.Y/problem-package.dist-info/`) +- whether the unexpected behavior ever worked as expected - if so then what + were the details of the setup (same information as above) + +Generally, information is good to request if it can help confirm or rule out +possible sources of error. We shouldn't request information that does not +improve our understanding of the situation. + +### Reproducing issues + +Whenever an issue happens and the cause isn't obvious, it is important +that we be able to reproduce it independently. This serves several purposes: + +1. If it is a pip bug, then any fix will need tests - a good reproducer + is most of the way towards that. +2. If it is not reproducible using the provided instructions, that helps + rule out a lot of possible causes. +3. A clear set of instructions is an easy way to get on the same page as + someone reporting an issue. + +The best way to reproduce an issue is with a script. + +A script can be copied into a file and executed, whereas shell output +has to be manually copied a line at a time. + +Scripts to reproduce issues should be: + +- portable (few/no assumptions about the system, other that it being Unix or Windows as applicable) +- non-destructive +- convenient +- require little/no setup on the part of the runner + +Examples: + +- creating and installing multiple wheels with different versions + ([link](https://github.com/pypa/pip/issues/4331#issuecomment-520156471)) +- using a small web server for authentication errors + ([link](https://github.com/pypa/pip/issues/2920#issuecomment-508953118)) +- using docker to test system or global configuration-related issues + ([link](https://github.com/pypa/pip/issues/5533#issuecomment-520159896)) +- using docker to test special filesystem permission/configurations + ([link](https://github.com/pypa/pip/issues/6364#issuecomment-507074729)) +- using docker for global installation with get-pip + ([link](https://github.com/pypa/pip/issues/6498#issuecomment-513501112)) +- get-pip on system with no `/usr/lib64` + ([link](https://github.com/pypa/pip/issues/5379#issuecomment-515270576)) +- reproducing with `pip` from current development branch + ([link](https://github.com/pypa/pip/issues/6707#issue-467770959)) + +### Reaching resolution + +Some user support questions are more related to system configuration than pip. +It's important to treat these issues with the same care and attention as +others, specifically: + +1. Unless the issue is very old and the user doesn't seem active, wait for + confirmation before closing the issue + +2. Direct the user to the most appropriate forum for their questions: + + - For Ubuntu, [askubuntu](https://askubuntu.com/) + - For Other linuxes/unixes, [serverfault](https://serverfault.com/) + - For network connectivity issues, + [serverfault](https://serverfault.com/) + +3. Just because a user support question is best solved using some other forum + doesn't mean that we can't make things easier. Try to extract and + understand from the user query how things could have been made easier for + them or you, for example with better warning or error messages. If an issue + does not exist covering that case then create one. If an issue does exist then + make sure to reference that issue before closing this one. + +4. A user may be having trouble installing a package, where the package + `setup.py` or build-backend configuration is non-trivial. In these cases we + can help to troubleshoot but the best advice is going to be to direct them + to the support channels for the related projects. + +5. Do not be hasty to assume it is one cause or another. What looks like + someone else's problem may still be an issue in pip or at least something + that could be improved. + +6. For general discussion on Python packaging: + + - [pypa/packaging](https://github.com/pypa/packaging-problems) + - [discuss.python.org/packaging](https://discuss.python.org/c/packaging) + +### Closing issues + +An issue may be considered resolved and closed when: + +- for each possible improvement or problem represented in the issue + discussion: + + - Consensus has been reached on a specific action and the actions + appear to be external to the project, with no follow up needed + in the project afterwards. + + - PEP updates (with a corresponding issue in + [python/peps](https://github.com/python/peps)) + - already tracked by another issue + + - A project-specific issue has been identified and the issue no + longer occurs as of the latest commit on the main branch. + +- An enhancement or feature request no longer has a proponent and the maintainers + don't think it's worth keeping open. + +- An issue has been identified as a duplicate, and it is clearly a duplicate (i.e. the + original report was very good and points directly to the issue) + +- The issue has been fixed, and can be independently validated as no longer being an + issue. If this is with code then the specific change/PR that led to it should be + identified and posted for tracking. + +## Common issues + +1. network-related issues - any issue involving retries, address lookup, or + anything like that are typically network issues. + +2. issues related to having multiple Python versions, or an OS package + manager-managed pip/python installation (specifically with Debian/Ubuntu). + These typically present themselves as: + + 1. Not being able to find installed packages + 2. basic libraries not able to be found, fundamental OS components missing + 3. In these situations you will want to make sure that we know how they got + their Python and pip. Knowing the relevant package manager commands can + help, e.g. `dpkg -S`. + +## For issues caused by changes by redistributors + +Certain issues are caused by patches that redistributors of Python/pip +make to Python/pip. + +Certain redistributors have shared preferred wording to redirect users +to their issue trackers. + +Fedora, RHEL, CentOS (and probably other derivatives – Rocky, Scientific, CloudLinux): + +``` +This issue looks like it's caused by changes that Fedora or Red Hat +made in their pip packaging. Please file a Fedora bug at +https://bugzilla.redhat.com/enter_bug.cgi?product=Fedora&component=python-pip + +cc @encukou @hroncok +``` + +Debian: + +``` +This issue looks like it's caused by changes that Debian made in +their pip packaging. Please file a bug with Debian, with +`reportbug python3-pip` [Docs](https://www.debian.org/Bugs/Reporting). +You can link to this issue in your bug report. + +In the meantime, you may be able to work-around your issue by upgrading +pip inside your virtualenv: `python -m pip install -U pip` +``` + +Ubuntu: + +``` +This issue looks like it's caused by changes that Ubuntu made in +their pip packaging. Please file a bug with Ubuntu, with +`ubuntu-bug python3-pip` [Docs](https://help.ubuntu.com/community/ReportingBugs). +You can link to this issue in your bug report. + +In the meantime, you may be able to work-around your issue by upgrading +pip inside your virtualenv: `python -m pip install -U pip` +``` diff --git a/docs/html/development/issue-triage.rst b/docs/html/development/issue-triage.rst deleted file mode 100644 index 7a67a0a89aa..00000000000 --- a/docs/html/development/issue-triage.rst +++ /dev/null @@ -1,312 +0,0 @@ -.. note:: - - This section of the documentation is currently being written. pip - developers welcome your help to complete this documentation. If - you're interested in helping out, please let us know in the - `tracking issue `__, or - just submit a pull request and mention it in that tracking issue. - -============ -Issue Triage -============ - -This serves as an introduction to issue tracking in pip as well as -how to help triage reported issues. - - -Issue Tracker -============= - -The `pip issue tracker `__ is hosted on -GitHub alongside the project. - -Currently, the issue tracker is used for bugs, feature requests, and general -user support. - -In the pip issue tracker, we make use of labels and milestones to organize and -track work. - -Labels ------- - -Issue labels are used to: - -#. Categorize issues -#. Provide status information for contributors and reporters -#. Help contributors find tasks to work on - -The current set of labels are divided into several categories identified by -prefix: - -**C - Category** - which area of ``pip`` functionality a feature request or issue is related to - -**K - Kind** - -**O - Operating System** - for issues that are OS-specific - -**P - Project/Platform** - related to something external to ``pip`` - -**R - Resolution** - no more discussion is really needed, an action has been identified and the - issue is waiting or closed - -**S - State** - for some automatic labels and other indicators that work is needed - -**type** - the role or flavor of an issue - -The specific labels falling into each category have a description that can be -seen on the `Labels `__ page. - -In addition, there are several standalone labels: - -**good first issue** - this label marks an issue as beginner-friendly and shows up in banners that - GitHub displays for first-time visitors to the repository - -**triage** - default label given to issues when they are created - -**trivial** - special label for pull requests that removes the - :ref:`news file requirement ` - -**needs rebase or merge** - this is a special label used by BrownTruck to mark PRs that have merge - conflicts - -Automation ----------- - -There are several helpers to manage issues and pull requests. - -Issues created on the issue tracker are automatically given the -``triage`` label by the -`triage-new-issues `__ -bot. The label is automatically removed when another label is added. - -When an issue needs feedback from the author we can label it with -``S: awaiting response``. When the author responds, the -`no-response `__ bot removes the label. - -After an issue has been closed for 30 days, the -`lock `__ bot locks the issue and adds the -``S: auto-locked`` label. This allows us to avoid monitoring existing closed -issues, but unfortunately prevents and references to issues from showing up as -links on the closed issue. - - -Triage Issues -============= - -Users can make issues for a number of reasons: - -#. Suggestions about pip features that could be added or improved -#. Problems using pip -#. Concerns about pip usability -#. General packaging problems to be solved with pip -#. Problems installing or using Python packages -#. Problems managing virtual environments -#. Problems managing Python installations - -To triage issues means to identify what kind of issue is happening and - -* confirm bugs -* provide support -* discuss and design around the uses of the tool - -Specifically, to address an issue: - -#. Read issue title -#. Scan issue description -#. Ask questions -#. If time is available, try to reproduce -#. Search for or remember related issues and link to them -#. Identify an appropriate area of concern (if applicable) - -Keep in mind that all communication is happening with other people and -should be done with respect per the -`Code of Conduct `__. - -The lifecycle of an issue (bug or support) generally looks like: - -#. waiting for triage (marked with label ``triage``) -#. confirming issue - some discussion with the user, gathering - details, trying to reproduce the issue (may be marked with a specific - category, ``S: awaiting-response``, ``S: discussion-needed``, or - ``S: need-repro``) -#. confirmed - the issue is pretty consistently reproducible in a - straightforward way, or a mechanism that could be causing the issue has been - identified -#. awaiting fix - the fix is identified and no real discussion on the issue - is needed, should be marked ``R: awaiting PR`` -#. closed - can be for several reasons - - * fixed - * could not be reproduced, no more details could be obtained, and no - progress can be made - * actual issue was with another project or related to system - configuration and pip cannot (or will not) be adapted for it - - -Requesting information ----------------------- - -Requesting more information to better understand the context and environment -that led to the issue. Examples of specific information that may be useful -depending on the situation: - -* pip debug: ``pip debug`` -* pip version: ``pip -V`` -* Python version: ``python -VV`` -* Python path: ``python -c 'import sys; print(sys.executable)'`` -* ``python`` on ``PATH``: Unix: ``which python``; Windows: ``where python`` -* Python as resolved by the shell: ``type python`` -* Origin of pip (get-pip.py, OS-level package manager, ensurepip, manual - installation) -* Using a virtual environment (with ``--system-site-packages``?) -* Using a conda environment -* ``PATH`` environment variable -* Network situation (e.g. airgapped environment, firewalls) -* ``--verbose`` output of a failing command -* (Unix) ``strace`` output from a failing command (be careful not to output - into the same directory as a package that's being installed, otherwise pip - will loop forever copying the log file...) -* (Windows) - `procmon `__ - output during a failing command - (`example request `__) -* Listing of files relevant to the issue (e.g. ``ls -l venv/lib/pythonX.Y/problem-package.dist-info/``) -* whether the unexpected behavior ever worked as expected - if so then what - were the details of the setup (same information as above) - - -Generally, information is good to request if it can help confirm or rule out -possible sources of error. We shouldn't request information that does not -improve our understanding of the situation. - - -Reproducing issues ------------------- - -Whenever an issue happens and the cause isn't obvious, it is important -that we be able to reproduce it independently. This serves several purposes: - -#. If it is a pip bug, then any fix will need tests - a good reproducer - is most of the way towards that. -#. If it is not reproducible using the provided instructions, that helps - rule out a lot of possible causes. -#. A clear set of instructions is an easy way to get on the same page as - someone reporting an issue. - -The best way to reproduce an issue is with a script. - -A script can be copied into a file and executed, whereas shell output -has to be manually copied a line at a time. - -Scripts to reproduce issues should be: - -- portable (few/no assumptions about the system, other that it being Unix or Windows as applicable) -- non-destructive -- convenient -- require little/no setup on the part of the runner - -Examples: - -- creating and installing multiple wheels with different versions - (`link `__) -- using a small web server for authentication errors - (`link `__) -- using docker to test system or global configuration-related issues - (`link `__) -- using docker to test special filesystem permission/configurations - (`link `__) -- using docker for global installation with get-pip - (`link `__) -- get-pip on system with no ``/usr/lib64`` - (`link `__) -- reproducing with ``pip`` from current development branch - (`link `__) - - -Reaching resolution -------------------- - -Some user support questions are more related to system configuration than pip. -It's important to treat these issues with the same care and attention as -others, specifically: - -#. Unless the issue is very old and the user doesn't seem active, wait for - confirmation before closing the issue -#. Direct the user to the most appropriate forum for their questions: - - * For Ubuntu, `askubuntu `__ - * For Other linuxes/unixes, `serverfault `__ - * For network connectivity issues, - `serverfault `__ - -#. Just because a user support question is best solved using some other forum - doesn't mean that we can't make things easier. Try to extract and - understand from the user query how things could have been made easier for - them or you, for example with better warning or error messages. If an issue - does not exist covering that case then create one. If an issue does exist then - make sure to reference that issue before closing this one. -#. A user may be having trouble installing a package, where the package - ``setup.py`` or build-backend configuration is non-trivial. In these cases we - can help to troubleshoot but the best advice is going to be to direct them - to the support channels for the related projects. -#. Do not be hasty to assume it is one cause or another. What looks like - someone else's problem may still be an issue in pip or at least something - that could be improved. -#. For general discussion on Python packaging: - - * `pypa/packaging `__ - * `discuss.python.org/packaging `__ - - -Closing issues --------------- - -An issue may be considered resolved and closed when: - -- for each possible improvement or problem represented in the issue - discussion: - - - Consensus has been reached on a specific action and the actions - appear to be external to the project, with no follow up needed - in the project afterwards. - - - PEP updates (with a corresponding issue in - `python/peps `__) - - already tracked by another issue - - - A project-specific issue has been identified and the issue no - longer occurs as of the latest commit on the main branch. - -- An enhancement or feature request no longer has a proponent and the maintainers - don't think it's worth keeping open. -- An issue has been identified as a duplicate, and it is clearly a duplicate (i.e. the - original report was very good and points directly to the issue) -- The issue has been fixed, and can be independently validated as no longer being an - issue. If this is with code then the specific change/PR that led to it should be - identified and posted for tracking. - - -Common issues -============= - -#. network-related issues - any issue involving retries, address lookup, or - anything like that are typically network issues. -#. issues related to having multiple Python versions, or an OS package - manager-managed pip/python installation (specifically with Debian/Ubuntu). - These typically present themselves as: - - #. Not being able to find installed packages - #. basic libraries not able to be found, fundamental OS components missing - #. In these situations you will want to make sure that we know how they got - their Python and pip. Knowing the relevant package manager commands can - help, e.g. ``dpkg -S``. diff --git a/docs/html/development/release-process.rst b/docs/html/development/release-process.rst index 188d3e87bec..b71e2820bd2 100644 --- a/docs/html/development/release-process.rst +++ b/docs/html/development/release-process.rst @@ -31,6 +31,15 @@ to need extra work before being released, the release manager always has the option to back out the partial change prior to a release. The PR can then be reworked and resubmitted for the next release. +Vendoring updates will be picked up from the ``main`` branch, as for any other +update. Ideally, vendoring updates should be merged between releases, just like +any other change. If there are outstanding updates to vendored packages, the +release manager *may* at their discretion choose to do a vendoring update +before the release. However this is *not* a requirement and in particular, +updates to vendored packages that fix issues in pip should be merged +proactively, to ensure that they will be present in the next release. + + .. _`Deprecation Policy`: Deprecation Policy @@ -65,6 +74,13 @@ their merits. ``pip._internal.utils.deprecation.deprecated``. The function is not a part of pip's public API. +Supported Versions +================== + +The latest version of the pip is the only supported version, previous +versions should be considered unsupported. Users are encouraged to make +regular updates to their version of pip in order to remain supported. + .. _`Python 2 Support`: Python 2 Support @@ -159,6 +175,11 @@ Sometimes we need to release a bugfix release of the form ``YY.N.Z+1``. In order to create one of these the changes should already be merged into the ``main`` branch. +Note that this process is only needed when there are changes on the main branch +that you do *not* want to include in the bugfix release. For a bugfix release +that will include everything that is on the ``main`` branch, the above process +for creating a new release can be used, simply changing the version number. + #. Create a new ``release/YY.N.Z+1`` branch off of the ``YY.N`` tag using the command ``git checkout -b release/YY.N.Z+1 YY.N``. #. Cherry pick the fixed commits off of the ``main`` branch, fixing any diff --git a/docs/html/getting-started.md b/docs/html/getting-started.md index 5c22d1abebc..0967b0eb99f 100644 --- a/docs/html/getting-started.md +++ b/docs/html/getting-started.md @@ -81,10 +81,8 @@ Successfully installed sampleproject ```{pip-cli} $ pip install --upgrade sampleproject -Uninstalling sampleproject: [...] -Proceed (y/n)? y -Successfully uninstalled sampleproject +Successfully installed sampleproject ``` ### Uninstall a package @@ -93,7 +91,7 @@ Successfully uninstalled sampleproject $ pip uninstall sampleproject Uninstalling sampleproject: [...] -Proceed (y/n)? y +Proceed (Y/n)? y Successfully uninstalled sampleproject ``` diff --git a/docs/html/index.md b/docs/html/index.md index 34a01744996..ab0b40dc180 100644 --- a/docs/html/index.md +++ b/docs/html/index.md @@ -48,3 +48,5 @@ lists or chat rooms: [packaging-discourse]: https://discuss.python.org/c/packaging/14 [irc-pypa]: https://kiwiirc.com/nextclient/#ircs://irc.libera.chat:+6697/pypa [irc-pypa-dev]: https://kiwiirc.com/nextclient/#ircs://irc.libera.chat:+6697/pypa-dev + +If you find any security issues, please report to [security@python.org](mailto:security@python.org) diff --git a/docs/html/installation.md b/docs/html/installation.md index 293750ff6ed..036a91397a5 100644 --- a/docs/html/installation.md +++ b/docs/html/installation.md @@ -14,7 +14,7 @@ If your Python environment does not have pip installed, there are 2 mechanisms to install pip supported directly by pip's maintainers: - [`ensurepip`](#ensurepip) -- [`get-pip.py`](#get-pip-py) +- [`get-pip.py`](#get-pippy) ### `ensurepip` @@ -45,6 +45,34 @@ More details about this script can be found in [pypa/get-pip]'s README. [pypa/get-pip]: https://github.com/pypa/get-pip +### Standalone zip application + +```{note} +The zip application is currently experimental. We test that pip runs correctly +in this form, but it is possible that there could be issues in some situations. +We will accept bug reports in such cases, but for now the zip application should +not be used in production environments. +``` + +In addition to installing pip in your environment, pip is available as a +standalone [zip application](https://docs.python.org/3.11/library/zipapp.html). +This can be downloaded from . There are +also zip applications for specific pip versions, named `pip-X.Y.Z.pyz`. + +The zip application can be run using any supported version of Python: + +```{pip-cli} +$ python pip.pyz --help +``` + +If run directly: + +```{pip-cli} +$ pip.pyz --help +``` + +then the currently active Python interpreter will be used. + ## Alternative Methods Depending on how you installed Python, there might be other mechanisms @@ -62,7 +90,7 @@ distro community, cloud provider support channels, etc). ## Upgrading `pip` -Upgrading your `pip` by running: +Upgrade your `pip` by running: ```{pip-cli} $ pip install --upgrade pip @@ -75,14 +103,15 @@ $ pip install --upgrade pip The current version of pip works on: - Windows, Linux and MacOS. -- CPython 3.6, 3.7, 3.8, 3.9, 3.10 and latest PyPy3. +- CPython 3.7, 3.8, 3.9, 3.10 and latest PyPy3. pip is tested to work on the latest patch version of the Python interpreter, for each of the minor versions listed above. Previous patch versions are supported on a best effort approach. -pip's maintainers do not provide support for users on older versions of Python, -and these users should request for support from the relevant provider -(eg: Linux distro community, cloud provider support channels, etc). +Other operating systems and Python versions are not supported by pip's +maintainers. + +Users who are on unsupported platforms should be aware that if they hit issues, they may have to resolve them for themselves. If they received pip from a source which provides support for their platform, they should request pip support from that source. [^python]: The `ensurepip` module was added to the Python standard library in Python 3.4. diff --git a/docs/html/reference/build-system/pyproject-toml.md b/docs/html/reference/build-system/pyproject-toml.md index ee93df034a7..d2ec0323e6a 100644 --- a/docs/html/reference/build-system/pyproject-toml.md +++ b/docs/html/reference/build-system/pyproject-toml.md @@ -49,6 +49,14 @@ hook will be called by pip, and dependencies it describes will also be installed in the build environment. For example, newer versions of setuptools expose the contents of `setup_requires` to pip via this hook. +Build-time requirement specifiers follow {pep}`508`, so it's possible to +reference packages with URLs. For example: + +```toml +[build-system] +requires = ["setuptools @ git+https://github.com/pypa/setuptools.git@main"] +``` + ### Metadata Generation ```{versionadded} 19.0 @@ -98,6 +106,16 @@ This is considered a stopgap solution until setuptools adds support for regular {ref}`deprecation policy `. ``` +### Backend Configuration + +Build backends have the ability to accept configuration settings, which can +change the way the build is handled. These settings take the form of a +series of `key=value` pairs. The user can supply configuration settings +using the `--config-settings` command line option (which can be supplied +multiple times, in order to specify multiple settings). + +The supplied configuration settings are passed to every backend hook call. + ## Build output It is the responsibility of the build backend to ensure that the output is diff --git a/docs/html/reference/index.md b/docs/html/reference/index.md index 8d690188d7a..63ce1ca4dbf 100644 --- a/docs/html/reference/index.md +++ b/docs/html/reference/index.md @@ -7,5 +7,8 @@ interoperability standards that pip utilises/implements. :titlesonly: build-system/index +requirement-specifiers requirements-file-format +installation-report +inspect-report ``` diff --git a/docs/html/reference/inspect-report.md b/docs/html/reference/inspect-report.md new file mode 100644 index 00000000000..1355e5d4274 --- /dev/null +++ b/docs/html/reference/inspect-report.md @@ -0,0 +1,219 @@ +# `pip inspect` JSON output specification + +```{versionadded} 22.2 +``` + +```{versionchanged} 23.0 +``version`` has been bumped to ``1`` and the format declared stable. +``` + +The `pip inspect` command produces a detailed JSON report of the Python +environment, including installed distributions. + +## Specification + +The report is a JSON object with the following properties: + +- `version`: the string `1`. It will change only if + and when backward incompatible changes are introduced, such as removing mandatory + fields or changing the semantics or data type of existing fields. The introduction of + backward incompatible changes will follow the usual pip processes such as the + deprecation cycle or feature flags. Tools must check this field to ensure they support + the corresponding version. + +- `pip_version`: a string with the version of pip used to produce the report. + +- `installed`: an array of [`InspectReportItem`](InspectReportItem) representing the + distribution packages that are installed. + +- `environment`: an object describing the environment where the installation report was + generated. See [PEP 508 environment + markers](https://peps.python.org/pep-0508/#environment-markers) for more information. + Values have a string type. + +(InspectReportItem)= + +An `InspectReportItem` is an object describing an installed distribution package with +the following properties: + +- `metadata`: the metadata of the distribution, converted to a JSON object according to + the [PEP 566 + transformation](https://www.python.org/dev/peps/pep-0566/#json-compatible-metadata). + +- `metadata_location`: the location of the metadata of the installed distribution. Most + of the time this is the `.dist-info` directory. For legacy installs it is the + `.egg-info` directory. + + ```{warning} + This field may not necessarily point to a directory, for instance, in the case of older + `.egg` installs. + ``` + +- `direct_url`: Information about the direct URL that was used for installation, if any, + using the [direct URL data + structure](https://packaging.python.org/en/latest/specifications/direct-url-data-structure/). + In most case, this field corresponds to the + [`direct_url.json`](https://packaging.python.org/en/latest/specifications/direct-url) + metadata, except for legacy editable installs, where it is emulated. + +- `requested`: `true` if the `REQUESTED` metadata is present, `false` otherwise. This + field is only present for modern `.dist-info` installations. + + ```{note} + The `REQUESTED` metadata may not be generated by all installers. + It is generated by pip since version 20.2. + ``` + +- `installer`: the content of the `INSTALLER` metadata, if present and not empty. + +## Example + +Running the ``pip inspect`` command, in an environment where `pip` is installed in +editable mode and `packaging` is installed as well, will produce an output similar to +this (metadata abriged for brevity): + +```json +{ + "version": "1", + "pip_version": "22.2.dev0", + "installed": [ + { + "metadata": { + "metadata_version": "2.1", + "name": "pyparsing", + "version": "3.0.9", + "summary": "pyparsing module - Classes and methods to define and execute parsing grammars", + "description_content_type": "text/x-rst", + "author_email": "Paul McGuire ", + "classifier": [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "Intended Audience :: Information Technology", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Typing :: Typed" + ], + "requires_dist": [ + "railroad-diagrams ; extra == \"diagrams\"", + "jinja2 ; extra == \"diagrams\"" + ], + "requires_python": ">=3.6.8", + "project_url": [ + "Homepage, https://github.com/pyparsing/pyparsing/" + ], + "provides_extra": [ + "diagrams" + ], + "description": "..." + }, + "metadata_location": "/home/me/.virtualenvs/demoenv/lib/python3.8/site-packages/pyparsing-3.0.9.dist-info", + "installer": "pip", + "requested": false + }, + { + "metadata": { + "metadata_version": "2.1", + "name": "packaging", + "version": "21.3", + "platform": [ + "UNKNOWN" + ], + "summary": "Core utilities for Python packages", + "description_content_type": "text/x-rst", + "home_page": "https://github.com/pypa/packaging", + "author": "Donald Stufft and individual contributors", + "author_email": "donald@stufft.io", + "license": "BSD-2-Clause or Apache-2.0", + "classifier": [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved :: BSD License", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy" + ], + "requires_dist": [ + "pyparsing (!=3.0.5,>=2.0.2)" + ], + "requires_python": ">=3.6", + "description": "..." + }, + "metadata_location": "/home/me/.virtualenvs/demoenv/lib/python3.8/site-packages/packaging-21.3.dist-info", + "installer": "pip", + "requested": true + }, + { + "metadata": { + "metadata_version": "2.1", + "name": "pip", + "version": "22.2.dev0", + "summary": "The PyPA recommended tool for installing Python packages.", + "home_page": "https://pip.pypa.io/", + "author": "The pip developers", + "author_email": "distutils-sig@python.org", + "license": "MIT", + "classifier": [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Topic :: Software Development :: Build Tools", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy" + ], + "requires_python": ">=3.7", + "project_url": [ + "Documentation, https://pip.pypa.io", + "Source, https://github.com/pypa/pip", + "Changelog, https://pip.pypa.io/en/stable/news/" + ], + "description": "..." + }, + "metadata_location": "/home/me/pip/src/pip.egg-info", + "direct_url": { + "url": "file:///home/me/pip/src", + "dir_info": { + "editable": true + } + } + } + ], + "environment": { + "implementation_name": "cpython", + "implementation_version": "3.8.10", + "os_name": "posix", + "platform_machine": "x86_64", + "platform_release": "5.13-generic", + "platform_system": "Linux", + "platform_version": "...", + "python_full_version": "3.8.10", + "platform_python_implementation": "CPython", + "python_version": "3.8", + "sys_platform": "linux" + } +} +``` diff --git a/docs/html/reference/installation-report.md b/docs/html/reference/installation-report.md new file mode 100644 index 00000000000..cc2e23b2a20 --- /dev/null +++ b/docs/html/reference/installation-report.md @@ -0,0 +1,200 @@ +# Installation Report + +```{versionadded} 22.2 +``` + +```{versionchanged} 23.0 +``version`` has been bumped to ``1`` and the format declared stable. +``` + +The `--report` option of the pip install command produces a detailed JSON report of what +it did install (or what it would have installed, if used with the `--dry-run` option). + +```{note} +When considering use cases, please bear in mind that + +- while the `--report` option may be used to implement requirement locking tools (among + other use cases), this format is *not* meant to be a lock file format as such; +- there is no plan for pip to accept an installation report as input for the `install`, + `download` or `wheel` commands; +- the `--report` option and this format is intended to become a supported pip feature + (when the format is stabilized to version 1); +- it is however *not* a PyPA interoperability standard and as such its evolution will be + governed by the pip processes and not the PyPA standardization processes. +``` + +## Specification + +The report is a JSON object with the following properties: + +- `version`: the string `1`. It will change only if + and when backward incompatible changes are introduced, such as removing mandatory + fields or changing the semantics or data type of existing fields. The introduction of + backward incompatible changes will follow the usual pip processes such as the + deprecation cycle or feature flags. Tools must check this field to ensure they support + the corresponding version. + +- `pip_version`: a string with the version of pip used to produce the report. + +- `install`: an array of [`InstallationReportItem`](InstallationReportItem) representing + the distribution packages (to be) installed. + +- `environment`: an object describing the environment where the installation report was + generated. See [PEP 508 environment + markers](https://peps.python.org/pep-0508/#environment-markers) for more information. + Values have a string type. + +(InstallationReportItem)= + +An `InstallationReportItem` is an object describing a (to be) installed distribution +package with the following properties: + +- `metadata`: the metadata of the distribution, converted to a JSON object according to + the [PEP 566 + transformation](https://www.python.org/dev/peps/pep-0566/#json-compatible-metadata). + +- `is_direct`: `true` if the requirement was provided as, or constrained to, a direct + URL reference. `false` if the requirements was provided as a name and version + specifier. + +- `download_info`: Information about the artifact (to be) downloaded for installation, + using the [direct URL data + structure](https://packaging.python.org/en/latest/specifications/direct-url-data-structure/). + When `is_direct` is `true`, this field is the same as the + [`direct_url.json`](https://packaging.python.org/en/latest/specifications/direct-url) + metadata, otherwise it represents the URL of the artifact obtained from the index or + `--find-links`. + + ```{note} + For source archives, `download_info.archive_info.hashes` may + be absent when the requirement was installed from the wheel cache + and the cache entry was populated by an older pip version that did not + record the origin URL of the downloaded artifact. + ``` + +- `requested`: `true` if the requirement was explicitly provided by the user, either + directly via a command line argument or indirectly via a requirements file. `false` + if the requirement was installed as a dependency of another requirement. + +- `requested_extras`: extras requested by the user. This field is only present when the + `requested` field is true. + +## Example + +The following command: + +```console +pip install \ + --ignore-installed --dry-run --quiet \ + --report - \ + "pydantic>=1.9" git+https://github.com/pypa/packaging@main +``` + +will produce an output similar to this (metadata abriged for brevity): + +```json +{ + "version": "1", + "pip_version": "22.2", + "install": [ + { + "download_info": { + "url": "https://files.pythonhosted.org/packages/a4/0c/fbaa7319dcb5eecd3484686eb5a5c5702a6445adb566f01aee6de3369bc4/pydantic-1.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", + "archive_info": { + "hashes": { + "sha256": "18f3e912f9ad1bdec27fb06b8198a2ccc32f201e24174cec1b3424dda605a310" + } + } + }, + "is_direct": false, + "requested": true, + "metadata": { + "name": "pydantic", + "version": "1.9.1", + "requires_dist": [ + "typing-extensions (>=3.7.4.3)", + "dataclasses (>=0.6) ; python_version < \"3.7\"", + "python-dotenv (>=0.10.4) ; extra == 'dotenv'", + "email-validator (>=1.0.3) ; extra == 'email'" + ], + "requires_python": ">=3.6.1", + "provides_extra": [ + "dotenv", + "email" + ] + } + }, + { + "download_info": { + "url": "https://github.com/pypa/packaging", + "vcs_info": { + "vcs": "git", + "requested_revision": "main", + "commit_id": "4f42225e91a0be634625c09e84dd29ea82b85e27" + } + }, + "is_direct": true, + "requested": true, + "metadata": { + "name": "packaging", + "version": "21.4.dev0", + "requires_dist": [ + "pyparsing (!=3.0.5,>=2.0.2)" + ], + "requires_python": ">=3.7" + } + }, + { + "download_info": { + "url": "https://files.pythonhosted.org/packages/6c/10/a7d0fa5baea8fe7b50f448ab742f26f52b80bfca85ac2be9d35cdd9a3246/pyparsing-3.0.9-py3-none-any.whl", + "archive_info": { + "hashes": { + "sha256": "5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc" + } + } + }, + "is_direct": false, + "requested": false, + "metadata": { + "name": "pyparsing", + "version": "3.0.9", + "requires_dist": [ + "railroad-diagrams ; extra == \"diagrams\"", + "jinja2 ; extra == \"diagrams\"" + ], + "requires_python": ">=3.6.8" + } + }, + { + "download_info": { + "url": "https://files.pythonhosted.org/packages/75/e1/932e06004039dd670c9d5e1df0cd606bf46e29a28e65d5bb28e894ea29c9/typing_extensions-4.2.0-py3-none-any.whl", + "archive_info": { + "hashes": { + "sha256": "6657594ee297170d19f67d55c05852a874e7eb634f4f753dbd667855e07c1708" + } + } + }, + "is_direct": false, + "requested": false, + "metadata": { + "name": "typing_extensions", + "version": "4.2.0", + "requires_python": ">=3.7" + } + } + ], + "environment": { + "implementation_name": "cpython", + "implementation_version": "3.10.5", + "os_name": "posix", + "platform_machine": "x86_64", + "platform_release": "5.13-generic", + "platform_system": "Linux", + "platform_version": "...", + "python_full_version": "3.10.5", + "platform_python_implementation": "CPython", + "python_version": "3.10", + "sys_platform": "linux" + } +} +``` diff --git a/docs/html/reference/requirement-specifiers.md b/docs/html/reference/requirement-specifiers.md new file mode 100644 index 00000000000..d1449e5ef3a --- /dev/null +++ b/docs/html/reference/requirement-specifiers.md @@ -0,0 +1,61 @@ +(Requirement Specifiers)= + +# Requirement Specifiers + +pip supports installing from a package index using a {term}`requirement specifier `. Generally speaking, a requirement specifier is composed of a project name followed by optional {term}`version specifiers `. + +{pep}`508` contains a full specification of the format of a requirement. + +```{versionadded} 6.0 +Support for environment markers. +``` + +```{versionadded} 19.1 +Support for the direct URL reference form. +``` + +## Overview + +A requirement specifier comes in two forms: + +- name-based, which is composed of: + + - a package name (eg: `requests`) + - optionally, a set of "extras" that serve to install optional dependencies (eg: `security`) + - optionally, constraints to apply on the version of the package + - optionally, environment markers + +- URL-based, which is composed of: + + - a package name (eg: `requests`) + - optionally, a set of "extras" that serve to install optional dependencies (eg: `security`) + - a URL for the package + - optionally, environment markers + +## Examples + +A few example name-based requirement specifiers: + +``` +SomeProject +SomeProject == 1.3 +SomeProject >= 1.2, < 2.0 +SomeProject[foo, bar] +SomeProject ~= 1.4.2 +SomeProject == 5.4 ; python_version < '3.8' +SomeProject ; sys_platform == 'win32' +requests [security] >= 2.8.1, == 2.8.* ; python_version < "2.7" +``` + +```{note} +Use quotes around specifiers in the shell when using `>`, `<`, or when using environment markers. + +Do _not_ use quotes in requirement files. There is only one exception: pip v7.0 and v7.0.1 (from May 2015) required quotes around specifiers containing environment markers in requirement files. +``` + +A few example URL-based requirement specifiers: + +```none +pip @ https://github.com/pypa/pip/archive/22.0.2.zip +requests [security] @ https://github.com/psf/requests/archive/refs/heads/main.zip ; python_version >= "3.11" +``` diff --git a/docs/html/reference/requirements-file-format.md b/docs/html/reference/requirements-file-format.md index d05f1acca10..01047587161 100644 --- a/docs/html/reference/requirements-file-format.md +++ b/docs/html/reference/requirements-file-format.md @@ -15,13 +15,38 @@ consumption by pip, and other tools should take that into account before using it for their own purposes. ``` +## Example + +``` +# This is a comment, to show how #-prefixed lines are ignored. +# It is possible to specify requirements as plain names. +pytest +pytest-cov +beautifulsoup4 + +# The syntax supported here is the same as that of requirement specifiers. +docopt == 0.6.1 +requests [security] >= 2.8.1, == 2.8.* ; python_version < "2.7" +urllib3 @ https://github.com/urllib3/urllib3/archive/refs/tags/1.26.8.zip + +# It is possible to refer to other requirement files or constraints files. +-r other-requirements.txt +-c constraints.txt + +# It is possible to refer to specific local distribution paths. +./downloads/numpy-1.9.2-cp34-none-win32.whl + +# It is possible to refer to URLs. +http://wxpython.org/Phoenix/snapshot-builds/wxPython_Phoenix-3.0.3.dev1820+49a8884-cp34-none-win_amd64.whl +``` + ## Structure Each line of the requirements file indicates something to be installed, or arguments to {ref}`pip install`. The following forms are supported: - `[[--option]...]` -- ` [; markers] [[--option]...]` +- `` - `` - `[-e] ` - `[-e] ` @@ -74,13 +99,21 @@ and two {ref}`--find-links ` locations: ``` ```` +(per-requirement-options)= + ### Per-requirement options +```{versionadded} 7.0 + +``` + The options which can be applied to individual requirements are: -- {ref}`--install-option ` - {ref}`--global-option ` -- `--hash` (for {ref}`Hash-Checking mode`) +- {ref}`--config-settings ` +- `--hash` (for {ref}`Hash-checking mode`) + +## Referring to other requirements files If you wish, you can refer to other requirements files, like this: @@ -118,30 +151,29 @@ and only specify the variable name for your requirements, letting pip lookup the value at runtime. This approach aligns with the commonly used [12-factor configuration pattern](https://12factor.net/config). -## Example -``` -###### Requirements without Version Specifiers ###### -pytest -pytest-cov -beautifulsoup4 +## Influencing the build system -###### Requirements with Version Specifiers ###### -# See https://www.python.org/dev/peps/pep-0440/#version-specifiers -docopt == 0.6.1 # Version Matching. Must be version 0.6.1 -keyring >= 4.1.1 # Minimum version 4.1.1 -coverage != 3.5 # Version Exclusion. Anything except version 3.5 -Mopidy-Dirble ~= 1.1 # Compatible release. Same as >= 1.1, == 1.* +```{danger} +This disables the use of wheels (cached or otherwise). This could mean that builds will be slower, less deterministic, less reliable and may not behave correctly upon installation. -###### Refer to other requirements files ###### --r other-requirements.txt +This mechanism is only preserved for backwards compatibility and should be considered deprecated. A future release of pip may drop these options. +``` -###### A particular file ###### -./downloads/numpy-1.9.2-cp34-none-win32.whl -http://wxpython.org/Phoenix/snapshot-builds/wxPython_Phoenix-3.0.3.dev1820+49a8884-cp34-none-win_amd64.whl +The `--global-option` option is used to pass options to `setup.py`. -###### Additional Requirements without Version Specifiers ###### -# Same as 1st section, just here to show that you can put things in any order. -rejected -green +```{attention} +These options are highly coupled with how pip invokes setuptools using the {doc}`../reference/build-system/setup-py` build system interface. It is not compatible with newer {doc}`../reference/build-system/pyproject-toml` build system interface. + +This is will not work with other build-backends or newer setup.cfg-only projects. ``` + +If you have a declaration like: + + FooProject >= 1.2 --global-option="--no-user-cfg" + +The above translates roughly into running FooProject's `setup.py` script as: + + python setup.py --no-user-cfg install + +Note that the only way of giving more than one option to `setup.py` is through multiple `--global-option` options. diff --git a/docs/html/topics/authentication.md b/docs/html/topics/authentication.md index 981aab5abd7..966ac3e7a0d 100644 --- a/docs/html/topics/authentication.md +++ b/docs/html/topics/authentication.md @@ -66,12 +66,120 @@ man pages][netrc-docs]. ## Keyring Support pip supports loading credentials stored in your keyring using the -{pypi}`keyring` library. +{pypi}`keyring` library, which can be enabled py passing `--keyring-provider` +with a value of `auto`, `disabled`, `import`, or `subprocess`. The default +value `auto` respects `--no-input` and not query keyring at all if the option +is used; otherwise it tries the `import`, `subprocess`, and `disabled` +providers (in this order) and uses the first one that works. + +### Configuring pip's keyring usage + +Since the keyring configuration is likely system-wide, a more common way to +configure its usage would be to use a configuration instead: + +```{seealso} +{doc}`./configuration` describes how pip configuration works. +``` + +```bash +$ pip config set --global global.keyring-provider subprocess + +# A different user on the same system which has PYTHONPATH configured and and +# wanting to use keyring installed that way could then run +$ pip config set --user global.keyring-provider import + +# For a specific virtual environment you might want to use disable it again +# because you will only be using PyPI and the private repo (and mirror) +# requires 2FA with a keycard and a pincode +$ pip config set --site global.index https://pypi.org/simple +$ pip config set --site global.keyring-provider disabled + +# configuring it via environment variable is also possible +$ export PIP_KEYRING_PROVIDER=disabled +``` + +### Using keyring's Python module + +Setting `keyring-provider` to `import` makes pip communicate with `keyring` via +its Python interface. ```bash -$ pip install keyring # install keyring from PyPI +# install keyring from PyPI +$ pip install keyring --index-url https://pypi.org/simple $ echo "your-password" | keyring set pypi.company.com your-username -$ pip install your-package --index-url https://pypi.company.com/ +$ pip install your-package --keyring-provider import --index-url https://pypi.company.com/ +``` + +### Using keyring as a command line application + +Setting `keyring-provider` to `subprocess` makes pip look for and use the +`keyring` command found on `PATH`. + +For this use case, a username *must* be included in the URL, since it is +required by `keyring`'s command line interface. See the example below or the +basic HTTP authentication section at the top of this page. + +```bash +# Install keyring from PyPI using pipx, which we assume is installed properly +# you can also create a venv somewhere and add it to the PATH yourself instead +$ pipx install keyring --index-url https://pypi.org/simple + +# For Azure DevOps, also install its keyring backend. +$ pipx inject keyring artifacts-keyring --index-url https://pypi.org/simple + +# For Google Artifact Registry, also install and initialize its keyring backend. +$ pipx inject keyring keyrings.google-artifactregistry-auth --index-url https://pypi.org/simple +$ gcloud auth login + +# Note that a username is required in the index URL. +$ pip install your-package --keyring-provider subprocess --index-url https://username@pypi.example.com/ +``` + +### Here be dragons + +The `auto` provider is conservative and does not query keyring at all when +`--no-input` is used because the keyring might require user interaction such as +prompting the user on the console. Third party tools frequently call Pip for +you and do indeed pass `--no-input` as they are well-behaved and don't have +much information to work with. (Keyring does have an api to request a backend +that does not require user input.) You have more information about your system, +however! + +You can force keyring usage by requesting a keyring provider other than `auto` +(or `disabled`). Leaving `import` and `subprocess`. You do this by passing +`--keyring-provider import` or one of the following methods: + +```bash +# via config file, possibly with --user, --global or --site +$ pip config set global.keyring-provider subprocess +# or via environment variable +$ export PIP_KEYRING_PROVIDER=import +``` + +```{warning} +Be careful when doing this since it could cause tools such as pipx and Pipenv +to appear to hang. They show their own progress indicator while hiding output +from the subprocess in which they run Pip. You won't know whether the keyring +backend is waiting the user input or not in such situations. +``` + +pip is conservative and does not query keyring at all when `--no-input` is used +because the keyring might require user interaction such as prompting the user +on the console. You can force keyring usage by passing `--force-keyring` or one +of the following: + +```bash +# possibly with --user, --global or --site +$ pip config set global.force-keyring true +# or +$ export PIP_FORCE_KEYRING=1 +``` + +```{warning} +Be careful when doing this since it could cause tools such as pipx and Pipenv +to appear to hang. They show their own progress indicator while hiding output +from the subprocess in which they run Pip. You won't know whether the keyring +backend is waiting the user input or not in such situations. ``` Note that `keyring` (the Python package) needs to be installed separately from @@ -79,5 +187,4 @@ pip. This can create a bootstrapping issue if you need the credentials stored in the keyring to download and install keyring. It is, thus, expected that users that wish to use pip's keyring support have -some mechanism for downloading and installing {pypi}`keyring` in their Python -environment. +some mechanism for downloading and installing {pypi}`keyring`. diff --git a/docs/html/topics/caching.md b/docs/html/topics/caching.md index d584859c334..954cebe402d 100644 --- a/docs/html/topics/caching.md +++ b/docs/html/topics/caching.md @@ -50,6 +50,43 @@ pip now caches wheels when building from an immutable Git reference (i.e. a commit hash). ``` +## Where is the cache stored + +```{caution} +The exact filesystem structure of pip's cache's contents is considered to be +an implementation detail and may change between any two versions of pip. +``` + +### `pip cache dir` + +```{versionadded} 20.1 + +``` + +You can use `pip cache dir` to get the cache directory that pip is currently configured to use. + +### Default paths + +````{tab} Linux +``` +~/.cache/pip +``` + +pip will also respect `XDG_CACHE_HOME`. +```` + +````{tab} MacOS +``` +~/Library/Caches/pip +``` +```` + +````{tab} Windows +``` +%LocalAppData%\pip\Cache +``` +```` + ## Avoiding caching pip tries to use its cache whenever possible, and it is designed do the right @@ -59,7 +96,7 @@ In some cases, pip's caching behaviour can be undesirable. As an example, if you have package with optional C extensions, that generates a pure Python wheel when the C extension can’t be built, pip will use that cached wheel even when you later invoke it from an environment that could have built those optional C -extensions. This is because pip is seeing a cached wheel for that matches the +extensions. This is because pip is seeing a cached wheel that matches the package being built, and pip assumes that the result of building a package from a package index is deterministic. @@ -81,13 +118,28 @@ It is also a good idea to remove the offending cached wheel using the The {ref}`pip cache` command can be used to manage pip's cache. -The exact filesystem structure of pip's cache is considered to be an -implementation detail and may change between any two versions of pip. +### General overview + +`pip cache info` provides an overview of the contents of pip's cache, such as the total size and location of various parts of it. + +### Removing a single package + +`pip cache remove setuptools` removes all wheel files related to setuptools from pip's cache. + +### Removing the cache + +`pip cache purge` will clear all wheel files from pip's cache. + +### Listing cached files + +`pip cache list` will list all wheel files from pip's cache. + +`pip cache list setuptools` will list all setuptools-related wheel files from pip's cache. ## Disabling caching pip's caching behaviour is disabled by passing the `--no-cache-dir` option. -It is, however, recommended to **NOT** disable pip's caching. Doing so can +It is, however, recommended to **NOT** disable pip's caching unless you have caching at a higher level (eg: layered caches in container builds). Doing so can significantly slow down pip (due to repeated operations and package builds) and result in significantly more network usage. diff --git a/docs/html/topics/configuration.md b/docs/html/topics/configuration.md index 2d63ac1413f..521bc9af4b9 100644 --- a/docs/html/topics/configuration.md +++ b/docs/html/topics/configuration.md @@ -1,3 +1,5 @@ +(configuration)= + # Configuration pip allows a user to change its behaviour via 3 mechanisms: @@ -9,15 +11,22 @@ pip allows a user to change its behaviour via 3 mechanisms: This page explains how the configuration files and environment variables work, and how they are related to pip's various command line options. +```{seealso} +{doc}`../cli/pip_config` command, which helps manage pip's configuration. +``` + +(config-file)= + ## Configuration Files -Configuration files can change the default values for command line option. -They are written using a standard INI style configuration files. +Configuration files can change the default values for command line options. +They are written using standard INI style configuration files. -pip has 3 "levels" of configuration files: +pip has 4 "levels" of configuration files: -- `global`: system-wide configuration file, shared across users. -- `user`: per-user configuration file. +- `global`: system-wide configuration file, shared across all users. +- `user`: per-user configuration file, shared across all environments. +- `base` : per-base environment configuration file, shared across all virtualenvs with the same base. (available since pip 23.0) - `site`: per-environment configuration file; i.e. per-virtualenv. ### Location @@ -39,6 +48,9 @@ User The legacy "per-user" configuration file is also loaded, if it exists: {file}`$HOME/.pip/pip.conf`. +Base +: {file}`\{sys.base_prefix\}/pip.conf` + Site : {file}`$VIRTUAL_ENV/pip.conf` ``` @@ -55,6 +67,9 @@ User The legacy "per-user" configuration file is also loaded, if it exists: {file}`$HOME/.pip/pip.conf`. +Base +: {file}`\{sys.base_prefix\}/pip.conf` + Site : {file}`$VIRTUAL_ENV/pip.conf` ``` @@ -73,6 +88,9 @@ User The legacy "per-user" configuration file is also loaded, if it exists: {file}`%HOME%\\pip\\pip.ini` +Base +: {file}`\{sys.base_prefix\}\\pip.ini` + Site : {file}`%VIRTUAL_ENV%\\pip.ini` ``` @@ -84,6 +102,8 @@ a configuration file that's loaded first, and whose values are overridden by the values set in the aforementioned files. Setting this to {any}`os.devnull` disables the loading of _all_ configuration files. +(config-precedence)= + ### Loading order When multiple configuration files are found, pip combines them in the following @@ -92,6 +112,7 @@ order: - `PIP_CONFIG_FILE`, if given. - Global - User +- Base - Site Each file read overrides any values read from previous files, so if the @@ -213,9 +234,9 @@ Use `no`, `false` or `0` instead. ## Precedence / Override order -Command line options have override environment variables, which override the +Command line options override environment variables, which override the values in a configuration file. Within the configuration file, values in -command-specific sections over values in the global section. +command-specific sections override values in the global section. Examples: diff --git a/docs/html/topics/dependency-resolution.md b/docs/html/topics/dependency-resolution.md index a8a4d69a7ce..03f276baa2f 100644 --- a/docs/html/topics/dependency-resolution.md +++ b/docs/html/topics/dependency-resolution.md @@ -155,12 +155,14 @@ how to inspect: - their release notes and changelogs from past versions During deployment, you can create a lockfile stating the exact package and -version number for for each dependency of that package. You can create this -with `pip-tools `\_\_. +version number for each dependency of that package. You can create this +with [pip-tools](https://github.com/jazzband/pip-tools/). This means the "work" is done once during development process, and thus will avoid performing dependency resolution during deployment. +(Fixing conflicting dependencies)= + ## Dealing with dependency conflicts This section provides practical suggestions to pip users who encounter @@ -277,10 +279,10 @@ your _dependency_ by: - Requesting that the package maintainers loosen _their_ dependencies - Forking the package and loosening the dependencies yourself -:::{warning} +```{warning} If you choose to fork the package yourself, you are _opting out_ of any support provided by the package maintainers. Proceed at your own risk! -::: +``` #### All requirements are appropriate, but a solution does not exist diff --git a/docs/html/topics/deps.dot b/docs/html/topics/deps.dot new file mode 100644 index 00000000000..8c7ec10c9b2 --- /dev/null +++ b/docs/html/topics/deps.dot @@ -0,0 +1,19 @@ +digraph G { + graph [fontname = "Handlee"]; + node [fontname = "Handlee"]; + edge [fontname = "Handlee"]; + + bgcolor=transparent; + + A [color=blue fontcolor=blue]; + A -> B [color=red]; + A -> C [color=red]; + node [color=lightgrey fontcolor=lightgrey]; + edge [color=lightgrey]; + node [color=lightgrey]; + B -> B1; + B -> B2; + C -> C1; + C -> C2; + +} diff --git a/docs/html/topics/deps.png b/docs/html/topics/deps.png new file mode 100644 index 00000000000..cf9e0ca6683 Binary files /dev/null and b/docs/html/topics/deps.png differ diff --git a/docs/html/topics/https-certificates.md b/docs/html/topics/https-certificates.md new file mode 100644 index 00000000000..b42c463e6cc --- /dev/null +++ b/docs/html/topics/https-certificates.md @@ -0,0 +1,71 @@ +(SSL Certificate Verification)= + +# HTTPS Certificates + +```{versionadded} 1.3 + +``` + +By default, pip will perform SSL certificate verification for network +connections it makes over HTTPS. These serve to prevent man-in-the-middle +attacks against package downloads. This does not use the system certificate +store but, instead, uses a bundled CA certificate store from {pypi}`certifi`. + +## Using a specific certificate store + +The `--cert` option (and the corresponding `PIP_CERT` environment variable) +allow users to specify a different certificate store/bundle for pip to use. It +is also possible to use `REQUESTS_CA_BUNDLE` or `CURL_CA_BUNDLE` environment +variables. + +## Using system certificate stores + +```{versionadded} 22.2 +Experimental support, behind `--use-feature=truststore`. +``` + +It is possible to use the system trust store, instead of the bundled certifi +certificates for verifying HTTPS certificates. This approach will typically +support corporate proxy certificates without additional configuration. + +In order to use system trust stores, you need to: + +- Use Python 3.10 or newer. +- Install the {pypi}`truststore` package, in the Python environment you're + running pip in. + + This is typically done by installing this package using a system package + manager or by using pip in {ref}`Hash-checking mode` for this package and + trusting the network using the `--trusted-host` flag. + + ```{pip-cli} + $ python -m pip install truststore + [...] + $ python -m pip install SomePackage --use-feature=truststore + [...] + Successfully installed SomePackage + ``` + +### When to use + +You should try using system trust stores when there is a custom certificate +chain configured for your system that pip isn't aware of. Typically, this +situation will manifest with an `SSLCertVerificationError` with the message +"certificate verify failed: unable to get local issuer certificate": + +```{pip-cli} +$ pip install -U SomePackage +[...] + SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: unable to get local issuer certificate (\_ssl.c:997)'))) - skipping +``` + +This error means that OpenSSL wasn't able to find a trust anchor to verify the +chain against. Using system trust stores instead of certifi will likely solve +this issue. + +If you encounter a TLS/SSL error when using the `truststore` feature you should +open an issue on the [truststore GitHub issue tracker] instead of pip's issue +tracker. The maintainers of truststore will help diagnose and fix the issue. + +[truststore github issue tracker]: + https://github.com/sethmlarson/truststore/issues diff --git a/docs/html/topics/index.md b/docs/html/topics/index.md index 10eb5dddaee..ad467615090 100644 --- a/docs/html/topics/index.md +++ b/docs/html/topics/index.md @@ -14,6 +14,11 @@ authentication caching configuration dependency-resolution +more-dependency-resolution +https-certificates +local-project-installs repeatable-installs +secure-installs vcs-support +python-option ``` diff --git a/docs/html/topics/local-project-installs.md b/docs/html/topics/local-project-installs.md new file mode 100644 index 00000000000..bd70dbe2f78 --- /dev/null +++ b/docs/html/topics/local-project-installs.md @@ -0,0 +1,67 @@ +# Local project installs + +It is extremely common to have a project, available in a folder/directory on your computer [^1] that you wish to install. + +With pip, depending on your usecase, there are two ways to do this: + +- A regular install +- An editable install + +## Regular installs + +You can install local projects by specifying the project path to pip: + +```{pip-cli} +$ pip install path/to/SomeProject +``` + +This will install the project into the Python that pip is associated with, in a manner similar to how it would actually be installed. + +This is what should be used in CI system and for deployments, since it most closely mirrors how a package would get installed if you build a distribution and installed from it (because that's _exactly_ what it does). + +(editable-installs)= + +## Editable installs + +You can install local projects in "editable" mode: + +```{pip-cli} +$ pip install -e path/to/SomeProject +``` + +Editable installs allow you to install your project without copying any files. Instead, the files in the development directory are added to Python's import path. This approach is well suited for development and is also known as a "development installation". + +With an editable install, you only need to perform a re-installation if you change the project metadata (eg: version, what scripts need to be generated etc). You will still need to run build commands when you need to perform a compilation for non-Python code in the project (eg: C extensions). + +```{caution} +It is possible to see behaviour differences between regular installs vs editable installs. These differences depend on the build-backend, and you should check the build-backend documentation for the details. In case you distribute the project as a "distribution package", users will see the behaviour of regular installs -- thus, it is important to ensure that regular installs work correctly. +``` + +```{note} +This is functionally the same as [setuptools' develop mode], and that's precisely the mechanism used for setuptools-based projects. + +There are two advantages over using `setup.py develop` directly: + +- This works with non-setuptools build-backends as well. +- The ".egg-info" directory is created relative to the project path, when using pip. This is generally a better location than setuptools, which dumps it in the current working directory. +``` + +[setuptools' develop mode]: https://setuptools.readthedocs.io/en/latest/userguide/development_mode.html + +## Build artifacts + +```{versionchanged} 21.3 +The project being installed is no longer copied to a temporary directory before invoking the build system, by default. A `--use-deprecated=out-of-tree-build` option is provided as a temporary fallback to aid user migrations. +``` + +```{versionchanged} 22.1 +The `--use-deprecated=out-of-tree-build` option has been removed. +``` + +When provided with a project that's in a local directory, pip will invoke the build system "in place". This behaviour has several consequences: + +- Local project builds will now be significantly faster, for certain kinds of projects and on systems with slow I/O (eg: via network attached storage or overly aggressive antivirus software). +- Certain build backends (eg: `setuptools`) will litter the project directory with secondary build artifacts (eg: `.egg-info` directories). +- Certain build backends (eg: `setuptools`) may not be able to perform with parallel builds anymore, since they previously relied on the fact that pip invoked them in a separate directory for each build. + +[^1]: Specifically, the current machine's filesystem. diff --git a/docs/html/topics/more-dependency-resolution.md b/docs/html/topics/more-dependency-resolution.md new file mode 100644 index 00000000000..31967a6a920 --- /dev/null +++ b/docs/html/topics/more-dependency-resolution.md @@ -0,0 +1,168 @@ +# More on Dependency Resolution + +This article goes into more detail about pip's dependency resolution algorithm. +In certain situations, pip can take a long time to determine what to install, +and this article is intended to help readers understand what is happening +"behind the scenes" during that process. + +```{note} +This document is a work in progress. The details included are accurate (at the +time of writing), but there is additional information, in particular around +pip's interface with resolvelib, which have not yet been included. + +Contributions to improve this document are welcome. +``` + +## The dependency resolution problem + +The process of finding a set of packages to install, given a set of dependencies +between them, is known to be an [NP-hard](https://en.wikipedia.org/wiki/NP-hardness) +problem. What this means in practice is roughly that the process scales +*extremely* badly as the size of the problem increases. So when you have a lot +of dependencies, working out what to install will, in the worst case, take a +very long time. + +The practical implication of that is that there will always be some situations +where pip cannot determine what to install in a reasonable length of time. We +make every effort to ensure that such situations happen rarely, but eliminating +them altogether isn't even theoretically possible. We'll discuss what options +yopu have if you hit a problem situation like this a little later. + +## Python specific issues + +Many algorithms for handling dependency resolution assume that you know the +full details of the problem at the start - that is, you know all of the +dependencies up front. Unfortunately, that is not the case for Python packages. +With the current package index structure, dependency metadata is only available +by downloading the package file, and extracting the data from it. And in the +case of source distributions, the situation is even worse as the project must +be built after being downloaded in order to determine the dependencies. + +Work is ongoing to try to make metadata more readily available at lower cost, +but at the time of writing, this has not been completed. + +As downloading projects is a costly operation, pip cannot pre-compute the full +dependency tree. This means that we are unable to use a number of techniques +for solving the dependency resolution problem. In practice, we have to use a +*backtracking algorithm*. + +## Dependency metadata + +It is worth discussing precisely what metadata is needed in order to drive the +package resolution process. There are essentially three key pieces of +information: + +* The project name +* The release version +* The dependencies themselves + +There are other pieces of data (e.g., extras, python version restrictions, wheel +compatibility tags) which are used as well, but they do not fundamentally +alter the process, so we will ignore them here. + +The most important information is the project name and version. Those two pieces +of information identify an individual "candidate" for installation, and must +uniquely identify such a candidate. Name and version must be available from the +moment the candidate object is created. This is not an issue for distribution +files (sdists and wheels) as that data is available from the filename, but for +unpackaged source trees, pip needs to call the build backend to ask for that +data. This is done before resolution proper starts. + +The dependency data is *not* requested in advance (as noted above, doing so +would be prohibitively costly, and for a backtracking algorithm it isn't +needed). Instead, pip requests dependency data "on demand", as the algorithm +starts to check that particular candidate. + +One particular implication of the lazy fetching of dependency data is that +often, pip *does not know* things that might be obvious to a human looking at +the dependency tree as a whole. For example, if package A depends on version +1.0 of package B, it's obvious to a human that there's no point in looking at +other versions of package B. But if pip starts looking at B before it has +considered A, it doesn't have access to A's dependency data, and so has no way +of knowing that looking at other versions of B is wasted work. And worse still, +pip cannot even know that there's vital information in A's dependencies. + +This latter point is a common theme with many cases where pip takes a long time +to complete a resolution - there's information pip doesn't know at the point +where it makes a "wrong" choice. Most of the heuristics added to the resolver +to guide the algorithm are designed to guess correctly in the face of that +lack of knowledge. + +## The resolver and the finder + +So far, we have been talking about the "resolver" as a single entity. While that +is mostly true, the process of getting package data from an index is handled +by another component of pip, the "finder". The finder is responsible for +feeding candidates to the resolver, and has a key role to play in selecting +suitable candidates. + +Note that the resolver is *only* relevant for packages fetched from an index. +Candidates coming from other sources (local source directories, PEP 508 +direct URL references) do *not* go through the finder, and are merged with the +candidates provided by the finder as part of the resolver's "provider" +implementation. + +As well as determining what versions exist in the index for a given project, +the finder selects the best distribution file to use for that candidate. This +may be a wheel or a source distribution, and precisely what is selected is +controlled by wheel compatibility tags, pip's options (whether to prefer binary +or source) and metadata supplied by the index. In particular, if a file is +marked as only being for specific Python versions, the file will be ignored by +the finder (and the resolver may never even see that version). + +The finder also provides candidates for a project to the resolver in order of +preference - the provider implements the rule that later versions are preferred +over older versions, for example. + +## The resolver algorithm + +The resolver itself is based on a separate package, [resolvelib](https://pypi.org/project/resolvelib/). +This implements an abstract backtracking resolution algorithm, in a way that is +independent of the specifics of Python packages - those specifics are abstracted +away by pip before calling the resolver. + +Pip's interface to resolvelib is in the form of a "provider", which is the +interface between pip's model of packages and the resolution algorithm. The +provider deals in "candidates" and "requirements" and implements the following +operations: + +* `identify` - implements identity for candidates and requirements. It is this + operation that implements the rule that candidates are identified by their + name and version, for example. +* `get_preference` - this provides information to the resolver to help it choose + which requirement to look at "next" when working through the resolution + process. +* `find_matches` - given a set of constraints, determine what candidates exist + that satisfy them. This is essentially where the finder interacts with the + resolver. +* `is_satisfied_by` - checks if a candidate satisfies a requirement. This is + basically the implementation of what a requirement meams. +* `get_dependencies` - get the dependency metadata for a candidate. This is + the implementation of the process of getting and reading package metadata. + +Of these methods, the only non-trivial one is the `get_preference` method. This +implements the heuristics used to guide the resolution, telling it which +requirement to try to satisfy next. It's this method that is responsible for +trying to guess which route through the dependency tree will be most productive. +As noted above, it's doing this with limited information. See the following +diagram + +![](deps.png) + +When the provider is asked to choose between the red requirements (A->B and +A->C) it doesn't know anything about the dependencies of B or C (i.e., the +grey parts of the graph). + +Pip's current implementation of the provider implements `get_preference` as +follows: + +* Prefer if any of the known requirements is "direct", e.g. points to an + explicit URL. +* If equal, prefer if any requirement is "pinned", i.e. contains + operator ``===`` or ``==``. +* If equal, calculate an approximate "depth" and resolve requirements + closer to the user-specified requirements first. +* Order user-specified requirements by the order they are specified. +* If equal, prefers "non-free" requirements, i.e. contains at least one + operator, such as ``>=`` or ``<``. +* If equal, order alphabetically for consistency (helps debuggability). diff --git a/docs/html/topics/python-option.md b/docs/html/topics/python-option.md new file mode 100644 index 00000000000..5ad46e7af9c --- /dev/null +++ b/docs/html/topics/python-option.md @@ -0,0 +1,29 @@ +# Managing a different Python interpreter + +```{versionadded} 22.3 +``` + +Occasionally, you may want to use pip to manage a Python installation other than +the one pip is installed into. In this case, you can use the `--python` option +to specify the interpreter you want to manage. This option can take one of two +values: + +1. The path to a Python executable. +2. The path to a virtual environment. + +In both cases, pip will run exactly as if it had been invoked from that Python +environment. + +One example of where this might be useful is to manage a virtual environment +that does not have pip installed. + +```{pip-cli} +$ python -m venv .venv --without-pip +$ pip --python .venv install SomePackage +[...] +Successfully installed SomePackage +``` + +You could also use `--python .venv/bin/python` (or on Windows, +`--python .venv\Scripts\python.exe`) if you wanted to be explicit, but the +virtual environment name is shorter and works exactly the same. diff --git a/docs/html/topics/repeatable-installs.md b/docs/html/topics/repeatable-installs.md index c6e8f9689e4..d3f06679587 100644 --- a/docs/html/topics/repeatable-installs.md +++ b/docs/html/topics/repeatable-installs.md @@ -1,3 +1,4 @@ +(repeatability)= # Repeatable Installs pip can be used to achieve various levels of repeatable environments. This page @@ -19,7 +20,7 @@ specific version. ``` A requirements file, containing pinned package versions can be generated using -{ref}`pip freeze`. This would not only the top-level packages, but also all of +{ref}`pip freeze`. This would pin not only the top-level packages, but also all of their transitive dependencies. Performing the installation using {ref}`--no-deps ` would provide an extra dose of insurance against installing anything not explicitly listed. diff --git a/docs/html/topics/secure-installs.md b/docs/html/topics/secure-installs.md new file mode 100644 index 00000000000..f012842b2ac --- /dev/null +++ b/docs/html/topics/secure-installs.md @@ -0,0 +1,100 @@ +# Secure installs + +By default, pip does not perform any checks to protect against remote tampering and involves running arbitrary code from distributions. It is, however, possible to use pip in a manner that changes these behaviours, to provide a more secure installation mechanism. + +This can be achieved by doing the following: + +- Enable {ref}`Hash-checking mode`, by passing {any}`--require-hashes` +- Disallow source distributions, by passing {any}`--only-binary :all: <--only-binary>` + +(Hash-checking mode)= + +## Hash-checking Mode + +```{versionadded} 8.0 + +``` + +This mode uses local hashes, embedded in a requirements.txt file, to protect against remote tampering and network issues. These hashes are specified using a `--hash` [per requirement option](per-requirement-options). + +Note that hash-checking is an all-or-nothing proposition. Specifying `--hash` against _any_ requirement will activate this mode globally. + +To add hashes for a package, add them to line as follows: + +``` +FooProject == 1.2 \ + --hash=sha256:2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 \ + --hash=sha256:486ea46224d1bb4fb680f34f7c9ad96a8f24ec88be73ea8e5a6c65260e9cb8a7 +``` + +### Additional restrictions + +- Hashes are required for _all_ requirements. + + This is because a partially-hashed requirements file is of little use and thus likely an error: a malicious actor could slip bad code into the installation via one of the unhashed requirements. + + Note that hashes embedded in URL-style requirements via the `#md5=...` syntax suffice to satisfy this rule (regardless of hash strength, for legacy reasons), though you should use a stronger hash like sha256 whenever possible. + +- Hashes are required for _all_ dependencies. + + If there is a dependency that is not spelled out and hashed in the requirements file, it will result in an error. + +- Requirements must be pinned (either to a URL, filesystem path or using `==`). + + This prevents a surprising hash mismatch upon the release of a new version that matches the requirement specifier. + +### Forcing Hash-checking mode + +It is possible to force the hash checking mode to be enabled, by passing `--require-hashes` command-line option. + +This can be useful in deploy scripts, to ensure that the author of the requirements file provided hashes. It is also a convenient way to bootstrap your list of hashes, since it shows the hashes of the packages it fetched. It fetches only the preferred archive for each package, so you may still need to add hashes for alternatives archives using {ref}`pip hash`: for instance if there is both a binary and a source distribution. + +### Hash algorithms + +The recommended hash algorithm at the moment is sha256, but stronger ones are allowed, including all those supported by `hashlib`. However, weaker ones such as md5, sha1, and sha224 are excluded to avoid giving a false sense of security. + +### Multiple hashes per package + +It is possible to use multiple hashes for each package. This is important when a package offers binary distributions for a variety of platforms or when it is important to allow both binary and source distributions. + +### Interaction with caching + +The {ref}`locally-built wheel cache ` is disabled in hash-checking mode to prevent spurious hash mismatch errors. + +These would otherwise occur while installing sdists that had already been automatically built into cached wheels: those wheels would be selected for installation, but their hashes would not match the sdist ones from the requirements file. + +A further complication is that locally built wheels are nondeterministic: contemporary modification times make their way into the archive, making hashes unpredictable across machines and cache flushes. Compilation of C code adds further nondeterminism, as many compilers include random-seeded values in their output. + +However, wheels fetched from index servers are required to be the same every time. They land in pip's HTTP cache, not its wheel cache, and are used normally in hash-checking mode. The only downside of having the wheel cache disabled is thus extra build time for sdists, and this can be solved by making sure pre-built wheels are available from the index server. + +### Using hashes from PyPI (or other index servers) + +PyPI (and certain other index servers) provides a hash for the distribution, in the fragment portion of each download URL, like `#sha256=123...`, which pip checks as a protection against download corruption. + +Other hash algorithms that have guaranteed support from `hashlib` are also supported here: sha1, sha224, sha384, sha256, and sha512. Since this hash originates remotely, it is not a useful guard against tampering and thus does not satisfy the `--require-hashes` demand that every package have a local hash. + +## Repeatable installs + +Hash-checking mode also works with {ref}`pip download` and {ref}`pip wheel`. See {doc}`../topics/repeatable-installs` for a comparison of hash-checking mode with other repeatability strategies. + +```{warning} +Beware of the `setup_requires` keyword arg in {file}`setup.py`. The (rare) packages that use it will cause those dependencies to be downloaded by setuptools directly, skipping pip's hash-checking. If you need to use such a package, see {ref}`controlling setup_requires `. +``` + +## Do not use setuptools directly + +Be careful not to nullify all your security work by installing your actual project by using setuptools' deprecated interfaces directly: for example, by calling `python setup.py install`, `python setup.py develop`, or `easy_install`. + +These will happily go out and download, unchecked, anything you missed in your requirements file and it’s easy to miss things as your project evolves. To be safe, install your project using pip and {any}`--no-deps`. + +Instead of `python setup.py install`, use: + +```{pip-cli} +$ pip install --no-deps . +``` + +Instead of `python setup.py develop`, use: + +```{pip-cli} +$ pip install --no-deps -e . +``` diff --git a/docs/html/topics/vcs-support.md b/docs/html/topics/vcs-support.md index 30bb579300a..465d5ecb78c 100644 --- a/docs/html/topics/vcs-support.md +++ b/docs/html/topics/vcs-support.md @@ -1,3 +1,4 @@ +(vcs support)= # VCS Support pip supports installing from various version control systems (VCS). @@ -17,9 +18,9 @@ The supported schemes are `git+file`, `git+https`, `git+ssh`, `git+http`, `git+git` and `git`. Here are some of the supported forms: ```none -git+ssh://git.example.com/MyProject#egg=MyProject -git+file:///home/user/projects/MyProject#egg=MyProject -git+https://git.example.com/MyProject#egg=MyProject +MyProject @ git+ssh://git@git.example.com/MyProject +MyProject @ git+file:///home/user/projects/MyProject +MyProject @ git+https://git.example.com/MyProject ``` ```{warning} @@ -34,10 +35,10 @@ It is also possible to specify a "git ref" such as branch name, a commit hash or a tag name: ```none -git+https://git.example.com/MyProject.git@master#egg=MyProject -git+https://git.example.com/MyProject.git@v1.0#egg=MyProject -git+https://git.example.com/MyProject.git@da39a3ee5e6b4b0d3255bfef95601890afd80709#egg=MyProject -git+https://git.example.com/MyProject.git@refs/pull/123/head#egg=MyProject +MyProject @ git+https://git.example.com/MyProject.git@master +MyProject @ git+https://git.example.com/MyProject.git@v1.0 +MyProject @ git+https://git.example.com/MyProject.git@da39a3ee5e6b4b0d3255bfef95601890afd80709 +MyProject @ git+https://git.example.com/MyProject.git@refs/pull/123/head ``` When passing a commit hash, specifying a full hash is preferable to a partial @@ -50,20 +51,20 @@ The supported schemes are `hg+file`, `hg+http`, `hg+https`, `hg+ssh` and `hg+static-http`. Here are some of the supported forms: ``` -hg+http://hg.myproject.org/MyProject#egg=MyProject -hg+https://hg.myproject.org/MyProject#egg=MyProject -hg+ssh://hg.myproject.org/MyProject#egg=MyProject -hg+file:///home/user/projects/MyProject#egg=MyProject +MyProject @ hg+http://hg.myproject.org/MyProject +MyProject @ hg+https://hg.myproject.org/MyProject +MyProject @ hg+ssh://hg.myproject.org/MyProject +MyProject @ hg+file:///home/user/projects/MyProject ``` It is also possible to specify a revision number, a revision hash, a tag name or a local branch name: ```none -hg+http://hg.example.com/MyProject@da39a3ee5e6b#egg=MyProject -hg+http://hg.example.com/MyProject@2019#egg=MyProject -hg+http://hg.example.com/MyProject@v1.0#egg=MyProject -hg+http://hg.example.com/MyProject@special_feature#egg=MyProject +MyProject @ hg+http://hg.example.com/MyProject@da39a3ee5e6b +MyProject @ hg+http://hg.example.com/MyProject@2019 +MyProject @ hg+http://hg.example.com/MyProject@v1.0 +MyProject @ hg+http://hg.example.com/MyProject@special_feature ``` ### Subversion @@ -72,9 +73,9 @@ The supported schemes are `svn`, `svn+svn`, `svn+http`, `svn+https` and `svn+ssh`. Here are some of the supported forms: ```none -svn+https://svn.example.com/MyProject#egg=MyProject -svn+ssh://svn.example.com/MyProject#egg=MyProject -svn+ssh://user@svn.example.com/MyProject#egg=MyProject +MyProject @ svn+https://svn.example.com/MyProject +MyProject @ svn+ssh://svn.example.com/MyProject +MyProject @ svn+ssh://user@svn.example.com/MyProject ``` You can also give specific revisions to an SVN URL, like so: @@ -93,18 +94,18 @@ The supported schemes are `bzr+http`, `bzr+https`, `bzr+ssh`, `bzr+sftp`, `bzr+ftp` and `bzr+lp`. Here are the supported forms: ```none -bzr+http://bzr.example.com/MyProject/trunk#egg=MyProject -bzr+sftp://user@example.com/MyProject/trunk#egg=MyProject -bzr+ssh://user@example.com/MyProject/trunk#egg=MyProject -bzr+ftp://user@example.com/MyProject/trunk#egg=MyProject -bzr+lp:MyProject#egg=MyProject +MyProject @ bzr+http://bzr.example.com/MyProject/trunk +MyProject @ bzr+sftp://user@example.com/MyProject/trunk +MyProject @ bzr+ssh://user@example.com/MyProject/trunk +MyProject @ bzr+ftp://user@example.com/MyProject/trunk +MyProject @ bzr+lp:MyProject ``` Tags or revisions can be installed like so: ```none -bzr+https://bzr.example.com/MyProject/trunk@2019#egg=MyProject -bzr+http://bzr.example.com/MyProject/trunk@v1.0#egg=MyProject +MyProject @ bzr+https://bzr.example.com/MyProject/trunk@2019 +MyProject @ bzr+http://bzr.example.com/MyProject/trunk@v1.0 ``` (editable-vcs-installs)= @@ -129,18 +130,19 @@ VCS source will not overwrite it without an `--upgrade` flag. Further, pip looks at the package version, at the target revision to determine what action to take on the VCS requirement (not the commit itself). -The {ref}`pip freeze` subcommand will record the VCS requirement specifier -(referencing a specific commit) only if the install is done with the editable -option. - ## URL fragments -pip looks at 2 fragments for VCS URLs: +pip looks at the `subdirectory` fragments of VCS URLs for specifying the path to the +Python package, when it is not in the root of the VCS directory. eg: `pkg_dir`. + +pip also looks at the `egg` fragment specifying the "project name". In practice the +`egg` fragment is only required to help pip determine the VCS clone location in editable +mode. In all other circumstances, the `egg` fragment is not necessary and its use is +discouraged. -- `egg`: For specifying the "project name" for use in pip's dependency - resolution logic. eg: `egg=project_name` -- `subdirectory`: For specifying the path to the Python package, when it is not - in the root of the VCS directory. eg: `pkg_dir` +The `egg` fragment **should** be a bare +[PEP 508](https://peps.python.org/pep-0508/) project name. Anything else +is not guaranteed to work. ````{admonition} Example If your repository layout is: @@ -156,6 +158,12 @@ some_other_file Then, to install from this repository, the syntax would be: +```{pip-cli} +$ pip install "pkg @ vcs+protocol://repo_url/#subdirectory=pkg_dir" +``` + +or: + ```{pip-cli} $ pip install -e "vcs+protocol://repo_url/#egg=pkg&subdirectory=pkg_dir" ``` diff --git a/docs/html/user_guide.rst b/docs/html/user_guide.rst index 059fd7cdc35..966b200f4f5 100644 --- a/docs/html/user_guide.rst +++ b/docs/html/user_guide.rst @@ -2,6 +2,15 @@ User Guide ========== +.. Hello there! + + If you're thinking of adding content to this page... please take a moment + to consider if this content can live on its own, within a topic guide or a + reference page. + + There is active effort being put toward *reducing* the amount of content on + this specific page (https://github.com/pypa/pip/issues/9475) and moving it + into more focused single-page documents that cover that specific topic. Running pip =========== @@ -17,7 +26,7 @@ to your system, which can be run from the command prompt as follows: ``python -m pip`` executes pip using the Python interpreter you specified as python. So ``/usr/bin/python3.7 -m pip`` means - you are executing pip for your interpreter located at /usr/bin/python3.7. + you are executing pip for your interpreter located at ``/usr/bin/python3.7``. .. tab:: Windows @@ -59,19 +68,18 @@ For more information and examples, see the :ref:`pip install` reference. .. _PyPI: https://pypi.org/ - -Basic Authentication Credentials -================================ +.. _`0-basic-authentication-credentials`: +.. rubric:: Basic Authentication Credentials This is now covered in :doc:`topics/authentication`. -netrc Support -------------- +.. _`0-netrc-support`: +.. rubric:: netrc Support This is now covered in :doc:`topics/authentication`. -Keyring Support ---------------- +.. _`0-keyring-support`: +.. rubric:: Keyring Support This is now covered in :doc:`topics/authentication`. @@ -84,7 +92,7 @@ in many corporate environments requires an outbound HTTP proxy server. pip can be configured to connect through a proxy server in various ways: * using the ``--proxy`` command-line option to specify a proxy in the form - ``[user:passwd@]proxy.server:port`` + ``scheme://[user:passwd@]proxy.server:port`` * using ``proxy`` in a :ref:`config-file` * by setting the standard environment-variables ``http_proxy``, ``https_proxy`` and ``no_proxy``. @@ -451,34 +459,26 @@ packages. For more information and examples, see the :ref:`pip search` reference. -.. _`Configuration`: - - -Configuration -============= +.. _`0-configuration`: +.. rubric:: Configuration This is now covered in :doc:`topics/configuration`. -.. _config-file: - -Config file ------------ +.. _`0-config-file`: +.. rubric:: Config file This is now covered in :doc:`topics/configuration`. -Environment Variables ---------------------- +.. _`0-environment-variables`: +.. rubric:: Environment Variables This is now covered in :doc:`topics/configuration`. -.. _config-precedence: - -Config Precedence ------------------ +.. _`0-config-precedence`: +.. rubric:: Config Precedence This is now covered in :doc:`topics/configuration`. - Command Completion ================== @@ -496,6 +496,10 @@ To setup for fish:: python -m pip completion --fish > ~/.config/fish/completions/pip.fish +To setup for powershell:: + + python -m pip completion --powershell | Out-File -Encoding default -Append $PROFILE + Alternatively, you can use the result of the ``completion`` command directly with the eval function of your shell, e.g. by adding the following to your startup file:: @@ -629,7 +633,7 @@ Moreover, the "user scheme" can be customized by setting the ``PYTHONUSERBASE`` environment variable, which updates the value of ``site.USER_BASE``. -To install "SomePackage" into an environment with site.USER_BASE customized to +To install "SomePackage" into an environment with ``site.USER_BASE`` customized to '/myappenv', do the following: .. tab:: Unix/macOS @@ -776,18 +780,14 @@ is the latest version: [...] Successfully installed SomePackage -.. _`Repeatability`: - - -Ensuring Repeatability -====================== +.. _`0-repeatability`: +.. _`0-ensuring-repeatability`: +.. rubric:: Ensuring Repeatability This is now covered in :doc:`../topics/repeatable-installs`. -.. _`Fixing conflicting dependencies`: - -Fixing conflicting dependencies -=============================== +.. _`0-fixing-conflicting-dependencies`: +.. rubric:: Fixing conflicting dependencies This is now covered in :doc:`../topics/dependency-resolution`. @@ -800,7 +800,7 @@ As noted previously, pip is a command line program. While it is implemented in Python, and so is available from your Python code via ``import pip``, you must not use pip's internal APIs in this way. There are a number of reasons for this: -#. The pip code assumes that is in sole control of the global state of the +#. The pip code assumes that it is in sole control of the global state of the program. pip manages things like the logging system configuration, or the values of the standard IO streams, without considering the possibility that user code @@ -1146,3 +1146,8 @@ announcements on the `low-traffic packaging announcements list`_ and .. _our survey on upgrades that create conflicts: https://docs.google.com/forms/d/e/1FAIpQLSeBkbhuIlSofXqCyhi3kGkLmtrpPOEBwr6iJA6SzHdxWKfqdA/viewform .. _the official Python blog: https://blog.python.org/ .. _Python Windows launcher: https://docs.python.org/3/using/windows.html#launcher + +.. _`0-using-system-trust-stores-for-verifying-https`: +.. rubric:: Using system trust stores for verifying HTTPS + +This is now covered in :doc:`topics/https-certificates`. diff --git a/docs/requirements.txt b/docs/requirements.txt index e311562e669..fa3a7390c15 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -sphinx ~= 4.1.0 +sphinx ~= 4.2, != 4.4.0 towncrier furo myst_parser diff --git a/news/10531.bugfix.rst b/news/10531.bugfix.rst deleted file mode 100644 index 0a33944a10d..00000000000 --- a/news/10531.bugfix.rst +++ /dev/null @@ -1,2 +0,0 @@ -Always refuse installing or building projects that have no ``pyproject.toml`` nor -``setup.py``. diff --git a/news/10565.bugfix.rst b/news/10565.bugfix.rst deleted file mode 100644 index 2f0141ffd85..00000000000 --- a/news/10565.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Tweak running-as-root detection, to check ``os.getuid`` if it exists, on Unix-y and non-Linux/non-MacOS machines. diff --git a/news/10573.bugfix.rst b/news/10573.bugfix.rst deleted file mode 100644 index 5b69ab6d7e7..00000000000 --- a/news/10573.bugfix.rst +++ /dev/null @@ -1,5 +0,0 @@ -When installing projects with a ``pyproject.toml`` in editable mode, and the build -backend does not support :pep:`660`, prepare metadata using -``prepare_metadata_for_build_wheel`` instead of ``setup.py egg_info``. Also, refuse -installing projects that only have a ``setup.cfg`` and no ``setup.py`` nor -``pyproject.toml``. These restore the pre-21.3 behaviour. diff --git a/news/10585.bugfix.rst b/news/10585.bugfix.rst deleted file mode 100644 index 9e78a4609cf..00000000000 --- a/news/10585.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Restore compatibility of where configuration files are loaded from on MacOS (back to ``Library/Application Support/pip``, instead of ``Preferences/pip``). diff --git a/news/10588.feature.rst b/news/10588.feature.rst deleted file mode 100644 index c8985b7a05a..00000000000 --- a/news/10588.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Documents the ``--require-virtualenv`` flag for ``pip install``. diff --git a/news/10937.feature.rst b/news/10937.feature.rst new file mode 100644 index 00000000000..2974c577a10 --- /dev/null +++ b/news/10937.feature.rst @@ -0,0 +1 @@ +Present conflict information during installation after each choice that is rejected (pass ``-vv`` to ``pip install`` to show it) diff --git a/news/11169.feature.rst b/news/11169.feature.rst new file mode 100644 index 00000000000..54cc6637bc6 --- /dev/null +++ b/news/11169.feature.rst @@ -0,0 +1 @@ +Display dependency chain on each Collecting/Processing log line. diff --git a/news/11358.removal.rst b/news/11358.removal.rst new file mode 100644 index 00000000000..23e388a9a39 --- /dev/null +++ b/news/11358.removal.rst @@ -0,0 +1 @@ +Remove support for the deprecated ``--install-options``. diff --git a/news/11451.removal.rst b/news/11451.removal.rst new file mode 100644 index 00000000000..c0d1100ed92 --- /dev/null +++ b/news/11451.removal.rst @@ -0,0 +1,2 @@ +``--no-binary`` does not imply ``setup.py install`` anymore. Instead a wheel will be +built locally and installed. diff --git a/news/11529.bugfix.rst b/news/11529.bugfix.rst new file mode 100644 index 00000000000..d05e404602e --- /dev/null +++ b/news/11529.bugfix.rst @@ -0,0 +1 @@ +Fix grammar by changing "A new release of pip available:" to "A new release of pip is available:" in the notice used for indicating that. diff --git a/news/11681.feature.rst b/news/11681.feature.rst new file mode 100644 index 00000000000..00cd05ee18d --- /dev/null +++ b/news/11681.feature.rst @@ -0,0 +1,4 @@ +The ``--config-settings``/``-C`` option now supports using the same key multiple +times. When the same key is specified multiple times, all values are passed to +the build backend as a list, as opposed to the previous behavior, where pip would +only pass the last value if the same key was used multiple times. diff --git a/news/11774.bugfix.rst b/news/11774.bugfix.rst new file mode 100644 index 00000000000..771246b0b54 --- /dev/null +++ b/news/11774.bugfix.rst @@ -0,0 +1 @@ +Correct the way to decide if keyring is available. diff --git a/news/11775.doc.rst b/news/11775.doc.rst new file mode 100644 index 00000000000..18274b7692a --- /dev/null +++ b/news/11775.doc.rst @@ -0,0 +1,2 @@ +Cross-reference the ``--python`` flag from the ``--prefix`` flag, +and mention limitations of ``--prefix`` regarding script installation. diff --git a/news/11809.doc.rst b/news/11809.doc.rst new file mode 100644 index 00000000000..68c49ea50d5 --- /dev/null +++ b/news/11809.doc.rst @@ -0,0 +1 @@ +Add SECURITY.md to make the policy offical. diff --git a/news/11837.bugfix.rst b/news/11837.bugfix.rst new file mode 100644 index 00000000000..6d33ed6800c --- /dev/null +++ b/news/11837.bugfix.rst @@ -0,0 +1 @@ +More consistent resolution backtracking by removing legacy hack related to setuptools resolution diff --git a/news/11838.doc.rst b/news/11838.doc.rst new file mode 100644 index 00000000000..9630aa59885 --- /dev/null +++ b/news/11838.doc.rst @@ -0,0 +1 @@ +Add username to Git over SSH example. diff --git a/news/11842.doc.rst b/news/11842.doc.rst new file mode 100644 index 00000000000..bd063996f54 --- /dev/null +++ b/news/11842.doc.rst @@ -0,0 +1,2 @@ +Quote extras in the pip install docs to guard shells with default glob +qualifiers, like zsh. diff --git a/news/11882.bugfix.rst b/news/11882.bugfix.rst new file mode 100644 index 00000000000..5373487b188 --- /dev/null +++ b/news/11882.bugfix.rst @@ -0,0 +1 @@ +Include ``AUTHORS.txt`` in pip's wheels. diff --git a/news/8719.feature.rst b/news/8719.feature.rst new file mode 100644 index 00000000000..3f3caf2db89 --- /dev/null +++ b/news/8719.feature.rst @@ -0,0 +1 @@ +Add ``--keyring-provider`` flag. See the Authentication page in the documentation for more info. diff --git a/news/9752.feature.rst b/news/9752.feature.rst new file mode 100644 index 00000000000..d515267be21 --- /dev/null +++ b/news/9752.feature.rst @@ -0,0 +1 @@ +In the case of virtual environments, configuration files are now also included from the base installation. diff --git a/news/pep517.vendor.rst b/news/pep517.vendor.rst deleted file mode 100644 index 2f9070b9cdd..00000000000 --- a/news/pep517.vendor.rst +++ /dev/null @@ -1 +0,0 @@ -Upgrade pep517 to 0.12.0 diff --git a/news/pkg_resources.vendor.rst b/news/pkg_resources.vendor.rst new file mode 100644 index 00000000000..a20817dfb24 --- /dev/null +++ b/news/pkg_resources.vendor.rst @@ -0,0 +1 @@ +Patch pkg_resources to remove dependency on ``jaraco.text``. diff --git a/news/resolvelib.vendor.rst b/news/resolvelib.vendor.rst index 50f702cc923..c8b5c928d19 100644 --- a/news/resolvelib.vendor.rst +++ b/news/resolvelib.vendor.rst @@ -1 +1 @@ -Upgrade resolvelib to 0.8.1 +Upgrade resolvelib to 0.9.0 diff --git a/news/setuptools.vendor.rst b/news/setuptools.vendor.rst new file mode 100644 index 00000000000..f86cecbca92 --- /dev/null +++ b/news/setuptools.vendor.rst @@ -0,0 +1 @@ +Update pkg_resources (via setuptools) to 65.6.3 diff --git a/news/short-config-settings-option.feature.rst b/news/short-config-settings-option.feature.rst new file mode 100644 index 00000000000..0da7f86373e --- /dev/null +++ b/news/short-config-settings-option.feature.rst @@ -0,0 +1 @@ +Add ``-C`` as a short version of the ``--config-settings`` option. diff --git a/noxfile.py b/noxfile.py index 5b5a66d5307..5c4683b7d79 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,6 +1,7 @@ """Automation using nox. """ +import argparse import glob import os import shutil @@ -21,7 +22,7 @@ LOCATIONS = { "common-wheels": "tests/data/common_wheels", - "protected-pip": "tools/tox_pip.py", + "protected-pip": "tools/protected_pip.py", } REQUIREMENTS = { "docs": "docs/requirements.txt", @@ -65,11 +66,8 @@ def should_update_common_wheels() -> bool: # ----------------------------------------------------------------------------- # Development Commands -# These are currently prototypes to evaluate whether we want to switch over -# completely to nox for all our automation. Contributors should prefer using -# `tox -e ...` until this note is removed. # ----------------------------------------------------------------------------- -@nox.session(python=["3.6", "3.7", "3.8", "3.9", "3.10", "pypy3"]) +@nox.session(python=["3.7", "3.8", "3.9", "3.10", "3.11", "pypy3"]) def test(session: nox.Session) -> None: # Get the common wheels. if should_update_common_wheels(): @@ -113,7 +111,13 @@ def test(session: nox.Session) -> None: # Run the tests # LC_CTYPE is set to get UTF-8 output inside of the subprocesses that our # tests use. - session.run("pytest", *arguments, env={"LC_CTYPE": "en_US.UTF-8"}) + session.run( + "pytest", + *arguments, + env={ + "LC_CTYPE": "en_US.UTF-8", + }, + ) @nox.session @@ -169,11 +173,24 @@ def lint(session: nox.Session) -> None: session.run("pre-commit", "run", *args) +# NOTE: This session will COMMIT upgrades to vendored libraries. +# You should therefore not run it directly against `main`. If you +# do (assuming you started with a clean main), you can run: +# +# git checkout -b vendoring-updates +# git checkout main +# git reset --hard origin/main @nox.session def vendoring(session: nox.Session) -> None: session.install("vendoring~=1.2.0") - if "--upgrade" not in session.posargs: + parser = argparse.ArgumentParser(prog="nox -s vendoring") + parser.add_argument("--upgrade-all", action="store_true") + parser.add_argument("--upgrade", action="append", default=[]) + parser.add_argument("--skip", action="append", default=[]) + args = parser.parse_args(session.posargs) + + if not (args.upgrade or args.upgrade_all): session.run("vendoring", "sync", "-v") return @@ -189,7 +206,9 @@ def pinned_requirements(path: Path) -> Iterator[Tuple[str, str]]: vendor_txt = Path("src/pip/_vendor/vendor.txt") for name, old_version in pinned_requirements(vendor_txt): - if name == "setuptools": + if name in args.skip: + continue + if args.upgrade and name not in args.upgrade: continue # update requirements.txt @@ -223,6 +242,28 @@ def pinned_requirements(path: Path) -> Iterator[Tuple[str, str]]: release.commit_file(session, ".", message=message) +@nox.session +def coverage(session: nox.Session) -> None: + # Install source distribution + run_with_protected_pip(session, "install", ".") + + # Install test dependencies + run_with_protected_pip(session, "install", "-r", REQUIREMENTS["tests"]) + + if not os.path.exists(".coverage-output"): + os.mkdir(".coverage-output") + session.run( + "pytest", + "--cov=pip", + "--cov-config=./setup.cfg", + *session.posargs, + env={ + "COVERAGE_OUTPUT_DIR": "./.coverage-output", + "COVERAGE_PROCESS_START": os.fsdecode(Path("setup.cfg").resolve()), + }, + ) + + # ----------------------------------------------------------------------------- # Release Commands # ----------------------------------------------------------------------------- diff --git a/pyproject.toml b/pyproject.toml index 53343bfced0..139c37e18d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ substitute = [ # pkg_resource's vendored packages are directly vendored in pip. { match='pkg_resources\.extern', replace="pip._vendor" }, { match='from \.extern', replace="from pip._vendor" }, + { match='''\('pygments\.lexers\.''', replace="('pip._vendor.pygments.lexers." }, ] drop = [ # contains unnecessary scripts @@ -49,7 +50,14 @@ drop = [ "easy_install.py", "setuptools", "pkg_resources/_vendor/", + "_distutils_hack", + "distutils-precedence.pth", "pkg_resources/extern/", + # trim vendored pygments styles and lexers + "pygments/styles/[!_]*.py", + '^pygments/lexers/(?!python|__init__|_mapping).*\.py$', + # trim rich's markdown support + "rich/markdown.py", ] [tool.vendoring.typing-stubs] diff --git a/setup.cfg b/setup.cfg index f8f069ef722..2e35be30dd6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,6 +24,10 @@ extend-ignore = G200, G202, # black adds spaces around ':' E203, + # using a cache + B019, + # reassigning variables in a loop + B020, per-file-ignores = # G: The plugin logging-format treats every .log and .error as logging. noxfile.py: G @@ -31,10 +35,15 @@ per-file-ignores = tests/*: B011 [mypy] +mypy_path = $MYPY_CONFIG_FILE_DIR/src ignore_missing_imports = True disallow_untyped_defs = True disallow_any_generics = True warn_unused_ignores = True +no_implicit_optional = True + +[mypy-pip._internal.utils._jaraco_text] +ignore_errors = True [mypy-pip._vendor.*] ignore_errors = True @@ -51,17 +60,12 @@ follow_imports = skip [mypy-pip._vendor.requests.*] follow_imports = skip -# TODO: The following option should be removed at some point in the future. -[mypy-tests.functional.*] -allow_untyped_defs = True - [tool:pytest] addopts = --ignore src/pip/_vendor --ignore tests/tests_cache -r aR --color=yes xfail_strict = True markers = network: tests that need network incompatible_with_sysconfig - incompatible_with_test_venv incompatible_with_venv no_auto_tempdir_manager unit: unit tests @@ -107,8 +111,3 @@ exclude_lines = pragma: no cover # This excludes typing-specific code, which will be validated by mypy anyway. if TYPE_CHECKING - # Can be set to exclude e.g. `if PY2:` on Python 3 - ${PIP_CI_COVERAGE_EXCLUDES} - -[metadata] -license_file = LICENSE.txt diff --git a/setup.py b/setup.py index 4e5ce301526..2179d34d2bf 100644 --- a/setup.py +++ b/setup.py @@ -37,11 +37,11 @@ def get_version(rel_path: str) -> str: "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ], @@ -81,5 +81,7 @@ def get_version(rel_path: str) -> str: ], }, zip_safe=False, - python_requires=">=3.6", + # NOTE: python_requires is duplicated in __pip-runner__.py. + # When changing this value, please change the other copy as well. + python_requires=">=3.7", ) diff --git a/src/pip/__init__.py b/src/pip/__init__.py index 09e946fbe1f..ce90d06bfd4 100644 --- a/src/pip/__init__.py +++ b/src/pip/__init__.py @@ -1,6 +1,6 @@ from typing import List, Optional -__version__ = "22.0.dev0" +__version__ = "23.1.dev0" def main(args: Optional[List[str]] = None) -> int: diff --git a/src/pip/__pip-runner__.py b/src/pip/__pip-runner__.py new file mode 100644 index 00000000000..49a148a097e --- /dev/null +++ b/src/pip/__pip-runner__.py @@ -0,0 +1,50 @@ +"""Execute exactly this copy of pip, within a different environment. + +This file is named as it is, to ensure that this module can't be imported via +an import statement. +""" + +# /!\ This version compatibility check section must be Python 2 compatible. /!\ + +import sys + +# Copied from setup.py +PYTHON_REQUIRES = (3, 7) + + +def version_str(version): # type: ignore + return ".".join(str(v) for v in version) + + +if sys.version_info[:2] < PYTHON_REQUIRES: + raise SystemExit( + "This version of pip does not support python {} (requires >={}).".format( + version_str(sys.version_info[:2]), version_str(PYTHON_REQUIRES) + ) + ) + +# From here on, we can use Python 3 features, but the syntax must remain +# Python 2 compatible. + +import runpy # noqa: E402 +from importlib.machinery import PathFinder # noqa: E402 +from os.path import dirname # noqa: E402 + +PIP_SOURCES_ROOT = dirname(dirname(__file__)) + + +class PipImportRedirectingFinder: + @classmethod + def find_spec(self, fullname, path=None, target=None): # type: ignore + if fullname != "pip": + return None + + spec = PathFinder.find_spec(fullname, [PIP_SOURCES_ROOT], target) + assert spec, (PIP_SOURCES_ROOT, fullname) + return spec + + +sys.meta_path.insert(0, PipImportRedirectingFinder()) + +assert __name__ == "__main__", "Cannot run __pip-runner__.py as a non-main module" +runpy.run_module("pip", run_name="__main__", alter_sys=True) diff --git a/src/pip/_internal/build_env.py b/src/pip/_internal/build_env.py index a30b45826a9..4f704a3547d 100644 --- a/src/pip/_internal/build_env.py +++ b/src/pip/_internal/build_env.py @@ -1,17 +1,15 @@ """Build Environment used for isolation during sdist building """ -import contextlib import logging import os import pathlib +import site import sys import textwrap -import zipfile from collections import OrderedDict -from sysconfig import get_paths from types import TracebackType -from typing import TYPE_CHECKING, Iterable, Iterator, List, Optional, Set, Tuple, Type +from typing import TYPE_CHECKING, Iterable, List, Optional, Set, Tuple, Type, Union from pip._vendor.certifi import where from pip._vendor.packaging.requirements import Requirement @@ -19,8 +17,8 @@ from pip import __file__ as pip_location from pip._internal.cli.spinners import open_spinner -from pip._internal.locations import get_platlib, get_prefixed_libs, get_purelib -from pip._internal.metadata import get_environment +from pip._internal.locations import get_platlib, get_purelib, get_scheme +from pip._internal.metadata import get_default_environment, get_environment from pip._internal.utils.subprocess import call_subprocess from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds @@ -30,41 +28,53 @@ logger = logging.getLogger(__name__) +def _dedup(a: str, b: str) -> Union[Tuple[str], Tuple[str, str]]: + return (a, b) if a != b else (a,) + + class _Prefix: def __init__(self, path: str) -> None: self.path = path self.setup = False - self.bin_dir = get_paths( - "nt" if os.name == "nt" else "posix_prefix", - vars={"base": path, "platbase": path}, - )["scripts"] - self.lib_dirs = get_prefixed_libs(path) + scheme = get_scheme("", prefix=path) + self.bin_dir = scheme.scripts + self.lib_dirs = _dedup(scheme.purelib, scheme.platlib) -@contextlib.contextmanager -def _create_standalone_pip() -> Iterator[str]: - """Create a "standalone pip" zip file. +def get_runnable_pip() -> str: + """Get a file to pass to a Python executable, to run the currently-running pip. - The zip file's content is identical to the currently-running pip. - It will be used to install requirements into the build environment. + This is used to run a pip subprocess, for installing requirements into the build + environment. """ source = pathlib.Path(pip_location).resolve().parent - # Return the current instance if `source` is not a directory. We can't build - # a zip from this, and it likely means the instance is already standalone. if not source.is_dir(): - yield str(source) - return + # This would happen if someone is using pip from inside a zip file. In that + # case, we can use that directly. + return str(source) + + return os.fsdecode(source / "__pip-runner__.py") + - with TempDirectory(kind="standalone-pip") as tmp_dir: - pip_zip = os.path.join(tmp_dir.path, "__env_pip__.zip") - kwargs = {} - if sys.version_info >= (3, 8): - kwargs["strict_timestamps"] = False - with zipfile.ZipFile(pip_zip, "w", **kwargs) as zf: - for child in source.rglob("*"): - zf.write(child, child.relative_to(source.parent).as_posix()) - yield os.path.join(pip_zip, "pip") +def _get_system_sitepackages() -> Set[str]: + """Get system site packages + + Usually from site.getsitepackages, + but fallback on `get_purelib()/get_platlib()` if unavailable + (e.g. in a virtualenv created by virtualenv<20) + + Returns normalized set of strings. + """ + if hasattr(site, "getsitepackages"): + system_sites = site.getsitepackages() + else: + # virtualenv < 20 overwrites site.py without getsitepackages + # fallback on get_purelib/get_platlib. + # this is known to miss things, but shouldn't in the cases + # where getsitepackages() has been removed (inside a virtualenv) + system_sites = [get_purelib(), get_platlib()] + return {os.path.normcase(path) for path in system_sites} class BuildEnvironment: @@ -87,9 +97,8 @@ def __init__(self) -> None: # Customize site to: # - ensure .pth files are honored # - prevent access to system site packages - system_sites = { - os.path.normcase(site) for site in (get_purelib(), get_platlib()) - } + system_sites = _get_system_sitepackages() + self._site_dir = os.path.join(temp_dir.path, "site") if not os.path.exists(self._site_dir): os.mkdir(self._site_dir) @@ -168,9 +177,17 @@ def check_requirements( missing = set() conflicting = set() if reqs: - env = get_environment(self._lib_dirs) + env = ( + get_environment(self._lib_dirs) + if hasattr(self, "_lib_dirs") + else get_default_environment() + ) for req_str in reqs: req = Requirement(req_str) + # We're explicitly evaluating with an empty extra value, since build + # environments are not provided any mechanism to select specific extras. + if req.marker is not None and not req.marker.evaluate({"extra": ""}): + continue dist = env.get_distribution(req.name) if not dist: missing.add(req_str) @@ -179,7 +196,7 @@ def check_requirements( installed_req_str = f"{req.name}=={dist.version}" else: installed_req_str = f"{req.name}==={dist.version}" - if dist.version not in req.specifier: + if not req.specifier.contains(dist.version, prereleases=True): conflicting.add((installed_req_str, req_str)) # FIXME: Consider direct URL? return conflicting, missing @@ -189,29 +206,21 @@ def install_requirements( finder: "PackageFinder", requirements: Iterable[str], prefix_as_string: str, - message: str, + *, + kind: str, ) -> None: prefix = self._prefixes[prefix_as_string] assert not prefix.setup prefix.setup = True if not requirements: return - with contextlib.ExitStack() as ctx: - # TODO: Remove this block when dropping 3.6 support. Python 3.6 - # lacks importlib.resources and pep517 has issues loading files in - # a zip, so we fallback to the "old" method by adding the current - # pip directory to the child process's sys.path. - if sys.version_info < (3, 7): - pip_runnable = os.path.dirname(pip_location) - else: - pip_runnable = ctx.enter_context(_create_standalone_pip()) - self._install_requirements( - pip_runnable, - finder, - requirements, - prefix, - message, - ) + self._install_requirements( + get_runnable_pip(), + finder, + requirements, + prefix, + kind=kind, + ) @staticmethod def _install_requirements( @@ -219,7 +228,8 @@ def _install_requirements( finder: "PackageFinder", requirements: Iterable[str], prefix: _Prefix, - message: str, + *, + kind: str, ) -> None: args: List[str] = [ sys.executable, @@ -261,8 +271,13 @@ def _install_requirements( args.append("--") args.extend(requirements) extra_environ = {"_PIP_STANDALONE_CERT": where()} - with open_spinner(message) as spinner: - call_subprocess(args, spinner=spinner, extra_environ=extra_environ) + with open_spinner(f"Installing {kind}") as spinner: + call_subprocess( + args, + command_desc=f"pip subprocess to install {kind}", + spinner=spinner, + extra_environ=extra_environ, + ) class NoOpBuildEnvironment(BuildEnvironment): @@ -290,6 +305,7 @@ def install_requirements( finder: "PackageFinder", requirements: Iterable[str], prefix_as_string: str, - message: str, + *, + kind: str, ) -> None: raise NotImplementedError() diff --git a/src/pip/_internal/cache.py b/src/pip/_internal/cache.py index 1d6df220118..c53b7f023a1 100644 --- a/src/pip/_internal/cache.py +++ b/src/pip/_internal/cache.py @@ -5,12 +5,14 @@ import json import logging import os +from pathlib import Path from typing import Any, Dict, List, Optional, Set from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version from pip._vendor.packaging.utils import canonicalize_name from pip._internal.exceptions import InvalidWheelFilename +from pip._internal.models.direct_url import DirectUrl from pip._internal.models.format_control import FormatControl from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel @@ -19,6 +21,8 @@ logger = logging.getLogger(__name__) +ORIGIN_JSON_NAME = "origin.json" + def _hash_dict(d: Dict[str, str]) -> str: """Return a stable sha224 of a dictionary.""" @@ -204,6 +208,10 @@ def __init__( ): self.link = link self.persistent = persistent + self.origin: Optional[DirectUrl] = None + origin_direct_url_path = Path(self.link.file_path).parent / ORIGIN_JSON_NAME + if origin_direct_url_path.exists(): + self.origin = DirectUrl.from_json(origin_direct_url_path.read_text()) class WheelCache(Cache): @@ -213,7 +221,11 @@ class WheelCache(Cache): when a certain link is not found in the simple wheel cache first. """ - def __init__(self, cache_dir: str, format_control: FormatControl) -> None: + def __init__( + self, cache_dir: str, format_control: Optional[FormatControl] = None + ) -> None: + if format_control is None: + format_control = FormatControl() super().__init__(cache_dir, format_control, {"binary"}) self._wheel_cache = SimpleWheelCache(cache_dir, format_control) self._ephem_cache = EphemWheelCache(format_control) @@ -262,3 +274,20 @@ def get_cache_entry( return CacheEntry(retval, persistent=False) return None + + @staticmethod + def record_download_origin(cache_dir: str, download_info: DirectUrl) -> None: + origin_path = Path(cache_dir) / ORIGIN_JSON_NAME + if origin_path.is_file(): + origin = DirectUrl.from_json(origin_path.read_text()) + # TODO: use DirectUrl.equivalent when https://github.com/pypa/pip/pull/10564 + # is merged. + if origin.url != download_info.url: + logger.warning( + "Origin URL %s in cache entry %s does not match download URL %s. " + "This is likely a pip bug or a cache corruption issue.", + origin.url, + cache_dir, + download_info.url, + ) + origin_path.write_text(download_info.to_json(), encoding="utf-8") diff --git a/src/pip/_internal/cli/autocompletion.py b/src/pip/_internal/cli/autocompletion.py index 3cad1486071..226fe84dc0d 100644 --- a/src/pip/_internal/cli/autocompletion.py +++ b/src/pip/_internal/cli/autocompletion.py @@ -59,6 +59,14 @@ def autocomplete() -> None: print(dist) sys.exit(1) + should_list_installables = ( + not current.startswith("-") and subcommand_name == "install" + ) + if should_list_installables: + for path in auto_complete_paths(current, "path"): + print(path) + sys.exit(1) + subcommand = create_command(subcommand_name) for opt in subcommand.parser.option_list_all: @@ -138,7 +146,7 @@ def auto_complete_paths(current: str, completion_type: str) -> Iterable[str]: starting with ``current``. :param current: The word to be completed - :param completion_type: path completion type(`file`, `path` or `dir`)i + :param completion_type: path completion type(``file``, ``path`` or ``dir``) :return: A generator of regular files and/or directories """ directory, filename = os.path.split(current) diff --git a/src/pip/_internal/cli/base_command.py b/src/pip/_internal/cli/base_command.py index 0afe7e75d46..5bd7e67e649 100644 --- a/src/pip/_internal/cli/base_command.py +++ b/src/pip/_internal/cli/base_command.py @@ -10,6 +10,8 @@ from optparse import Values from typing import Any, Callable, List, Optional, Tuple +from pip._vendor.rich import traceback as rich_traceback + from pip._internal.cli import cmdoptions from pip._internal.cli.command_context import CommandContextMixIn from pip._internal.cli.parser import ConfigOptionParser, UpdatingDefaultsHelpFormatter @@ -22,6 +24,7 @@ from pip._internal.exceptions import ( BadCommand, CommandError, + DiagnosticPipError, InstallationError, NetworkConnectionError, PreviousBuildDirError, @@ -148,13 +151,6 @@ def _main(self, args: List[str]) -> int: ) options.cache_dir = None - if "2020-resolver" in options.features_enabled: - logger.warning( - "--use-feature=2020-resolver no longer has any effect, " - "since it is now the default dependency resolver in pip. " - "This will become an error in pip 21.0." - ) - def intercepts_unhandled_exc( run_func: Callable[..., int] ) -> Callable[..., int]: @@ -164,6 +160,11 @@ def exc_logging_wrapper(*args: Any) -> int: status = run_func(*args) assert isinstance(status, int) return status + except DiagnosticPipError as exc: + logger.error("[present-rich] %s", exc) + logger.debug("Exception information:", exc_info=True) + + return ERROR except PreviousBuildDirError as exc: logger.critical(str(exc)) logger.debug("Exception information:", exc_info=True) @@ -209,6 +210,7 @@ def exc_logging_wrapper(*args: Any) -> int: run = intercepts_unhandled_exc(self.run) else: run = self.run + rich_traceback.install(show_locals=True) return run(options, args) finally: self.handle_pip_version_check(options) diff --git a/src/pip/_internal/cli/cmdoptions.py b/src/pip/_internal/cli/cmdoptions.py index 11ddc610c14..2bbff2d4dc1 100644 --- a/src/pip/_internal/cli/cmdoptions.py +++ b/src/pip/_internal/cli/cmdoptions.py @@ -10,9 +10,10 @@ # The following comment should be removed at some point in the future. # mypy: strict-optional=False +import importlib.util +import logging import os import textwrap -import warnings from functools import partial from optparse import SUPPRESS_HELP, Option, OptionGroup, OptionParser, Values from textwrap import dedent @@ -21,7 +22,6 @@ from pip._vendor.packaging.utils import canonicalize_name from pip._internal.cli.parser import ConfigOptionParser -from pip._internal.cli.progress_bars import BAR_TYPES from pip._internal.exceptions import CommandError from pip._internal.locations import USER_CACHE_DIR, get_src_prefix from pip._internal.models.format_control import FormatControl @@ -30,6 +30,8 @@ from pip._internal.utils.hashes import STRONG_HASHES from pip._internal.utils.misc import strtobool +logger = logging.getLogger(__name__) + def raise_option_error(parser: OptionParser, option: Option, msg: str) -> None: """ @@ -57,32 +59,6 @@ def make_option_group(group: Dict[str, Any], parser: ConfigOptionParser) -> Opti return option_group -def check_install_build_global( - options: Values, check_options: Optional[Values] = None -) -> None: - """Disable wheels if per-setup.py call options are set. - - :param options: The OptionParser options to update. - :param check_options: The options to check, if not supplied defaults to - options. - """ - if check_options is None: - check_options = options - - def getname(n: str) -> Optional[Any]: - return getattr(check_options, n, None) - - names = ["build_options", "global_options", "install_options"] - if any(map(getname, names)): - control = options.format_control - control.disallow_binaries() - warnings.warn( - "Disabling all use of wheels due to the use of --build-option " - "/ --global-option / --install-option.", - stacklevel=2, - ) - - def check_dist_restriction(options: Values, check_target: bool = False) -> None: """Function for determining if custom platform options are allowed. @@ -188,6 +164,21 @@ class PipOption(Option): ), ) +override_externally_managed: Callable[..., Option] = partial( + Option, + "--break-system-packages", + dest="override_externally_managed", + action="store_true", + help="Allow pip to modify an EXTERNALLY-MANAGED Python installation", +) + +python: Callable[..., Option] = partial( + Option, + "--python", + dest="python", + help="Run pip with the specified Python interpreter.", +) + verbose: Callable[..., Option] = partial( Option, "-v", @@ -235,13 +226,9 @@ class PipOption(Option): "--progress-bar", dest="progress_bar", type="choice", - choices=list(BAR_TYPES.keys()), + choices=["on", "off"], default="on", - help=( - "Specify type of progress to be displayed [" - + "|".join(BAR_TYPES.keys()) - + "] (default: %default)" - ), + help="Specify whether the progress bar should be used [on, off] (default: on)", ) log: Callable[..., Option] = partial( @@ -265,13 +252,26 @@ class PipOption(Option): help="Disable prompting for input.", ) +keyring_provider: Callable[..., Option] = partial( + Option, + "--keyring-provider", + dest="keyring_provider", + choices=["auto", "disabled", "import", "subprocess"], + default="disabled", + help=( + "Enable the credential lookup via the keyring library if user input is allowed." + " Specify which mechanism to use [disabled, import, subprocess]." + " (default: disabled)" + ), +) + proxy: Callable[..., Option] = partial( Option, "--proxy", dest="proxy", type="str", default="", - help="Specify a proxy in the form [user:passwd@]proxy.server:port.", + help="Specify a proxy in the form scheme://[user:passwd@]proxy.server:port.", ) retries: Callable[..., Option] = partial( @@ -752,6 +752,15 @@ def _handle_no_cache_dir( "if this option is used.", ) +check_build_deps: Callable[..., Option] = partial( + Option, + "--check-build-dependencies", + dest="check_build_deps", + action="store_true", + default=False, + help="Check the build dependencies when PEP517 is used.", +) + def _handle_no_use_pep517( option: Option, opt: str, value: str, parser: OptionParser @@ -774,6 +783,12 @@ def _handle_no_use_pep517( """ raise_option_error(parser, option=option, msg=msg) + # If user doesn't wish to use pep517, we check if setuptools is installed + # and raise error if it is not. + if not importlib.util.find_spec("setuptools"): + msg = "It is not possible to use --no-use-pep517 without setuptools installed." + raise_option_error(parser, option=option, msg=msg) + # Otherwise, --no-use-pep517 was passed via the command-line. parser.values.use_pep517 = False @@ -798,17 +813,38 @@ def _handle_no_use_pep517( help=SUPPRESS_HELP, ) -install_options: Callable[..., Option] = partial( + +def _handle_config_settings( + option: Option, opt_str: str, value: str, parser: OptionParser +) -> None: + key, sep, val = value.partition("=") + if sep != "=": + parser.error(f"Arguments to {opt_str} must be of the form KEY=VAL") # noqa + dest = getattr(parser.values, option.dest) + if dest is None: + dest = {} + setattr(parser.values, option.dest, dest) + if key in dest: + if isinstance(dest[key], list): + dest[key].append(val) + else: + dest[key] = [dest[key], val] + else: + dest[key] = val + + +config_settings: Callable[..., Option] = partial( Option, - "--install-option", - dest="install_options", - action="append", - metavar="options", - help="Extra arguments to be supplied to the setup.py install " - 'command (use like --install-option="--install-scripts=/usr/local/' - 'bin"). Use multiple --install-option options to pass multiple ' - "options to setup.py install. If you are using an option with a " - "directory path, be sure to use absolute path.", + "-C", + "--config-settings", + dest="config_settings", + type=str, + action="callback", + callback=_handle_config_settings, + metavar="settings", + help="Configuration settings to be passed to the PEP 517 build backend. " + "Settings take the form KEY=VALUE. Use multiple --config-settings options " + "to pass multiple keys to the backend.", ) build_options: Callable[..., Option] = partial( @@ -857,6 +893,15 @@ def _handle_no_use_pep517( "of pip is available for download. Implied with --no-index.", ) +root_user_action: Callable[..., Option] = partial( + Option, + "--root-user-action", + dest="root_user_action", + default="warn", + choices=["warn", "ignore"], + help="Action if pip is run as a root user. By default, a warning message is shown.", +) + def _handle_merge_hash( option: Option, opt_str: str, value: str, parser: OptionParser @@ -952,7 +997,11 @@ def check_list_path_option(options: Values) -> None: metavar="feature", action="append", default=[], - choices=["2020-resolver", "fast-deps", "in-tree-build"], + choices=[ + "fast-deps", + "truststore", + "no-binary-enable-wheel-cache", + ], help="Enable new functionality, that may be backward incompatible.", ) @@ -963,7 +1012,9 @@ def check_list_path_option(options: Values) -> None: metavar="feature", action="append", default=[], - choices=["legacy-resolver", "out-of-tree-build"], + choices=[ + "legacy-resolver", + ], help=("Enable deprecated functionality, that will be removed in the future."), ) @@ -979,11 +1030,13 @@ def check_list_path_option(options: Values) -> None: debug_mode, isolated_mode, require_virtualenv, + python, verbose, version, quiet, log, no_input, + keyring_provider, proxy, retries, timeout, diff --git a/src/pip/_internal/cli/command_context.py b/src/pip/_internal/cli/command_context.py index ed68322376d..139995ac3f1 100644 --- a/src/pip/_internal/cli/command_context.py +++ b/src/pip/_internal/cli/command_context.py @@ -1,5 +1,5 @@ from contextlib import ExitStack, contextmanager -from typing import ContextManager, Iterator, TypeVar +from typing import ContextManager, Generator, TypeVar _T = TypeVar("_T", covariant=True) @@ -11,7 +11,7 @@ def __init__(self) -> None: self._main_context = ExitStack() @contextmanager - def main_context(self) -> Iterator[None]: + def main_context(self) -> Generator[None, None, None]: assert not self._in_main_context self._in_main_context = True diff --git a/src/pip/_internal/cli/main_parser.py b/src/pip/_internal/cli/main_parser.py index 3666ab04ca6..5ade356b9c2 100644 --- a/src/pip/_internal/cli/main_parser.py +++ b/src/pip/_internal/cli/main_parser.py @@ -2,9 +2,11 @@ """ import os +import subprocess import sys -from typing import List, Tuple +from typing import List, Optional, Tuple +from pip._internal.build_env import get_runnable_pip from pip._internal.cli import cmdoptions from pip._internal.cli.parser import ConfigOptionParser, UpdatingDefaultsHelpFormatter from pip._internal.commands import commands_dict, get_similar_commands @@ -45,6 +47,25 @@ def create_main_parser() -> ConfigOptionParser: return parser +def identify_python_interpreter(python: str) -> Optional[str]: + # If the named file exists, use it. + # If it's a directory, assume it's a virtual environment and + # look for the environment's Python executable. + if os.path.exists(python): + if os.path.isdir(python): + # bin/python for Unix, Scripts/python.exe for Windows + # Try both in case of odd cases like cygwin. + for exe in ("bin/python", "Scripts/python.exe"): + py = os.path.join(python, exe) + if os.path.exists(py): + return py + else: + return python + + # Could not find the interpreter specified + return None + + def parse_command(args: List[str]) -> Tuple[str, List[str]]: parser = create_main_parser() @@ -57,6 +78,32 @@ def parse_command(args: List[str]) -> Tuple[str, List[str]]: # args_else: ['install', '--user', 'INITools'] general_options, args_else = parser.parse_args(args) + # --python + if general_options.python and "_PIP_RUNNING_IN_SUBPROCESS" not in os.environ: + # Re-invoke pip using the specified Python interpreter + interpreter = identify_python_interpreter(general_options.python) + if interpreter is None: + raise CommandError( + f"Could not locate Python interpreter {general_options.python}" + ) + + pip_cmd = [ + interpreter, + get_runnable_pip(), + ] + pip_cmd.extend(args) + + # Set a flag so the child doesn't re-invoke itself, causing + # an infinite loop. + os.environ["_PIP_RUNNING_IN_SUBPROCESS"] = "1" + returncode = 0 + try: + proc = subprocess.run(pip_cmd) + returncode = proc.returncode + except (subprocess.SubprocessError, OSError) as exc: + raise CommandError(f"Failed to run pip under {interpreter}: {exc}") + sys.exit(returncode) + # --version if general_options.version: sys.stdout.write(parser.version) diff --git a/src/pip/_internal/cli/parser.py b/src/pip/_internal/cli/parser.py index a1c99a8cb30..c762cf2781d 100644 --- a/src/pip/_internal/cli/parser.py +++ b/src/pip/_internal/cli/parser.py @@ -6,7 +6,7 @@ import sys import textwrap from contextlib import suppress -from typing import Any, Dict, Iterator, List, Tuple +from typing import Any, Dict, Generator, List, Tuple from pip._internal.cli.status_codes import UNKNOWN_ERROR from pip._internal.configuration import Configuration, ConfigurationError @@ -175,7 +175,9 @@ def check_default(self, option: optparse.Option, key: str, val: Any) -> Any: print(f"An error occurred during configuration: {exc}") sys.exit(3) - def _get_ordered_configuration_items(self) -> Iterator[Tuple[str, Any]]: + def _get_ordered_configuration_items( + self, + ) -> Generator[Tuple[str, Any], None, None]: # Configuration gives keys in an unordered manner. Order them. override_order = ["global", self.name, ":env:"] diff --git a/src/pip/_internal/cli/progress_bars.py b/src/pip/_internal/cli/progress_bars.py index f3db2951986..0ad14031ca5 100644 --- a/src/pip/_internal/cli/progress_bars.py +++ b/src/pip/_internal/cli/progress_bars.py @@ -1,250 +1,68 @@ -import itertools -import sys -from signal import SIGINT, default_int_handler, signal -from typing import Any +import functools +from typing import Callable, Generator, Iterable, Iterator, Optional, Tuple + +from pip._vendor.rich.progress import ( + BarColumn, + DownloadColumn, + FileSizeColumn, + Progress, + ProgressColumn, + SpinnerColumn, + TextColumn, + TimeElapsedColumn, + TimeRemainingColumn, + TransferSpeedColumn, +) -from pip._vendor.progress.bar import Bar, FillingCirclesBar, IncrementalBar -from pip._vendor.progress.spinner import Spinner - -from pip._internal.utils.compat import WINDOWS from pip._internal.utils.logging import get_indentation -from pip._internal.utils.misc import format_size - -try: - from pip._vendor import colorama -# Lots of different errors can come from this, including SystemError and -# ImportError. -except Exception: - colorama = None - - -def _select_progress_class(preferred: Bar, fallback: Bar) -> Bar: - encoding = getattr(preferred.file, "encoding", None) - # If we don't know what encoding this file is in, then we'll just assume - # that it doesn't support unicode and use the ASCII bar. - if not encoding: - return fallback - - # Collect all of the possible characters we want to use with the preferred - # bar. - characters = [ - getattr(preferred, "empty_fill", ""), - getattr(preferred, "fill", ""), - ] - characters += list(getattr(preferred, "phases", [])) - - # Try to decode the characters we're using for the bar using the encoding - # of the given file, if this works then we'll assume that we can use the - # fancier bar and if not we'll fall back to the plaintext bar. - try: - "".join(characters).encode(encoding) - except UnicodeEncodeError: - return fallback +DownloadProgressRenderer = Callable[[Iterable[bytes]], Iterator[bytes]] + + +def _rich_progress_bar( + iterable: Iterable[bytes], + *, + bar_type: str, + size: int, +) -> Generator[bytes, None, None]: + assert bar_type == "on", "This should only be used in the default mode." + + if not size: + total = float("inf") + columns: Tuple[ProgressColumn, ...] = ( + TextColumn("[progress.description]{task.description}"), + SpinnerColumn("line", speed=1.5), + FileSizeColumn(), + TransferSpeedColumn(), + TimeElapsedColumn(), + ) else: - return preferred - - -_BaseBar: Any = _select_progress_class(IncrementalBar, Bar) - - -class InterruptibleMixin: - """ - Helper to ensure that self.finish() gets called on keyboard interrupt. - - This allows downloads to be interrupted without leaving temporary state - (like hidden cursors) behind. - - This class is similar to the progress library's existing SigIntMixin - helper, but as of version 1.2, that helper has the following problems: - - 1. It calls sys.exit(). - 2. It discards the existing SIGINT handler completely. - 3. It leaves its own handler in place even after an uninterrupted finish, - which will have unexpected delayed effects if the user triggers an - unrelated keyboard interrupt some time after a progress-displaying - download has already completed, for example. - """ - - def __init__(self, *args: Any, **kwargs: Any) -> None: - """ - Save the original SIGINT handler for later. - """ - # https://github.com/python/mypy/issues/5887 - super().__init__(*args, **kwargs) # type: ignore - - self.original_handler = signal(SIGINT, self.handle_sigint) - - # If signal() returns None, the previous handler was not installed from - # Python, and we cannot restore it. This probably should not happen, - # but if it does, we must restore something sensible instead, at least. - # The least bad option should be Python's default SIGINT handler, which - # just raises KeyboardInterrupt. - if self.original_handler is None: - self.original_handler = default_int_handler - - def finish(self) -> None: - """ - Restore the original SIGINT handler after finishing. - - This should happen regardless of whether the progress display finishes - normally, or gets interrupted. - """ - super().finish() # type: ignore - signal(SIGINT, self.original_handler) - - def handle_sigint(self, signum, frame): # type: ignore - """ - Call self.finish() before delegating to the original SIGINT handler. - - This handler should only be in place while the progress display is - active. - """ - self.finish() - self.original_handler(signum, frame) - - -class SilentBar(Bar): - def update(self) -> None: - pass - - -class BlueEmojiBar(IncrementalBar): - - suffix = "%(percent)d%%" - bar_prefix = " " - bar_suffix = " " - phases = ("\U0001F539", "\U0001F537", "\U0001F535") - - -class DownloadProgressMixin: - def __init__(self, *args: Any, **kwargs: Any) -> None: - # https://github.com/python/mypy/issues/5887 - super().__init__(*args, **kwargs) # type: ignore - self.message: str = (" " * (get_indentation() + 2)) + self.message - - @property - def downloaded(self) -> str: - return format_size(self.index) # type: ignore - - @property - def download_speed(self) -> str: - # Avoid zero division errors... - if self.avg == 0.0: # type: ignore - return "..." - return format_size(1 / self.avg) + "/s" # type: ignore - - @property - def pretty_eta(self) -> str: - if self.eta: # type: ignore - return f"eta {self.eta_td}" # type: ignore - return "" - - def iter(self, it): # type: ignore - for x in it: - yield x - # B305 is incorrectly raised here - # https://github.com/PyCQA/flake8-bugbear/issues/59 - self.next(len(x)) # noqa: B305 - self.finish() - - -class WindowsMixin: - def __init__(self, *args: Any, **kwargs: Any) -> None: - # The Windows terminal does not support the hide/show cursor ANSI codes - # even with colorama. So we'll ensure that hide_cursor is False on - # Windows. - # This call needs to go before the super() call, so that hide_cursor - # is set in time. The base progress bar class writes the "hide cursor" - # code to the terminal in its init, so if we don't set this soon - # enough, we get a "hide" with no corresponding "show"... - if WINDOWS and self.hide_cursor: # type: ignore - self.hide_cursor = False - - # https://github.com/python/mypy/issues/5887 - super().__init__(*args, **kwargs) # type: ignore - - # Check if we are running on Windows and we have the colorama module, - # if we do then wrap our file with it. - if WINDOWS and colorama: - self.file = colorama.AnsiToWin32(self.file) # type: ignore - # The progress code expects to be able to call self.file.isatty() - # but the colorama.AnsiToWin32() object doesn't have that, so we'll - # add it. - self.file.isatty = lambda: self.file.wrapped.isatty() - # The progress code expects to be able to call self.file.flush() - # but the colorama.AnsiToWin32() object doesn't have that, so we'll - # add it. - self.file.flush = lambda: self.file.wrapped.flush() - - -class BaseDownloadProgressBar(WindowsMixin, InterruptibleMixin, DownloadProgressMixin): - - file = sys.stdout - message = "%(percent)d%%" - suffix = "%(downloaded)s %(download_speed)s %(pretty_eta)s" - - -class DefaultDownloadProgressBar(BaseDownloadProgressBar, _BaseBar): - pass - - -class DownloadSilentBar(BaseDownloadProgressBar, SilentBar): - pass - - -class DownloadBar(BaseDownloadProgressBar, Bar): - pass - - -class DownloadFillingCirclesBar(BaseDownloadProgressBar, FillingCirclesBar): - pass - - -class DownloadBlueEmojiProgressBar(BaseDownloadProgressBar, BlueEmojiBar): - pass - - -class DownloadProgressSpinner( - WindowsMixin, InterruptibleMixin, DownloadProgressMixin, Spinner -): - - file = sys.stdout - suffix = "%(downloaded)s %(download_speed)s" - - def next_phase(self) -> str: - if not hasattr(self, "_phaser"): - self._phaser = itertools.cycle(self.phases) - return next(self._phaser) - - def update(self) -> None: - message = self.message % self - phase = self.next_phase() - suffix = self.suffix % self - line = "".join( - [ - message, - " " if message else "", - phase, - " " if suffix else "", - suffix, - ] + total = size + columns = ( + TextColumn("[progress.description]{task.description}"), + BarColumn(), + DownloadColumn(), + TransferSpeedColumn(), + TextColumn("eta"), + TimeRemainingColumn(), ) - self.writeln(line) - + progress = Progress(*columns, refresh_per_second=30) + task_id = progress.add_task(" " * (get_indentation() + 2), total=total) + with progress: + for chunk in iterable: + yield chunk + progress.update(task_id, advance=len(chunk)) -BAR_TYPES = { - "off": (DownloadSilentBar, DownloadSilentBar), - "on": (DefaultDownloadProgressBar, DownloadProgressSpinner), - "ascii": (DownloadBar, DownloadProgressSpinner), - "pretty": (DownloadFillingCirclesBar, DownloadProgressSpinner), - "emoji": (DownloadBlueEmojiProgressBar, DownloadProgressSpinner), -} +def get_download_progress_renderer( + *, bar_type: str, size: Optional[int] = None +) -> DownloadProgressRenderer: + """Get an object that can be used to render the download progress. -def DownloadProgressProvider(progress_bar, max=None): # type: ignore - if max is None or max == 0: - return BAR_TYPES[progress_bar][1]().iter + Returns a callable, that takes an iterable to "wrap". + """ + if bar_type == "on": + return functools.partial(_rich_progress_bar, bar_type=bar_type, size=size) else: - return BAR_TYPES[progress_bar][0](max=max).iter + return iter # no-op, when passed an iterator diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py index dbd15cbce63..048b9c99e41 100644 --- a/src/pip/_internal/cli/req_command.py +++ b/src/pip/_internal/cli/req_command.py @@ -10,7 +10,7 @@ import sys from functools import partial from optparse import Values -from typing import Any, List, Optional, Tuple +from typing import TYPE_CHECKING, Any, List, Optional, Tuple from pip._internal.cache import WheelCache from pip._internal.cli import cmdoptions @@ -22,6 +22,7 @@ from pip._internal.models.selection_prefs import SelectionPreferences from pip._internal.models.target_python import TargetPython from pip._internal.network.session import PipSession +from pip._internal.operations.build.build_tracker import BuildTracker from pip._internal.operations.prepare import RequirementPreparer from pip._internal.req.constructors import ( install_req_from_editable, @@ -31,10 +32,8 @@ ) from pip._internal.req.req_file import parse_requirements from pip._internal.req.req_install import InstallRequirement -from pip._internal.req.req_tracker import RequirementTracker from pip._internal.resolution.base import BaseResolver from pip._internal.self_outdated_check import pip_self_version_check -from pip._internal.utils.deprecation import deprecated from pip._internal.utils.temp_dir import ( TempDirectory, TempDirectoryTypeRegistry, @@ -42,9 +41,33 @@ ) from pip._internal.utils.virtualenv import running_under_virtualenv +if TYPE_CHECKING: + from ssl import SSLContext + logger = logging.getLogger(__name__) +def _create_truststore_ssl_context() -> Optional["SSLContext"]: + if sys.version_info < (3, 10): + raise CommandError("The truststore feature is only available for Python 3.10+") + + try: + import ssl + except ImportError: + logger.warning("Disabling truststore since ssl support is missing") + return None + + try: + import truststore + except ImportError: + raise CommandError( + "To use the truststore feature, 'truststore' must be installed into " + "pip's current environment." + ) + + return truststore.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + + class SessionCommandMixin(CommandContextMixIn): """ @@ -84,15 +107,27 @@ def _build_session( options: Values, retries: Optional[int] = None, timeout: Optional[int] = None, + fallback_to_certifi: bool = False, ) -> PipSession: - assert not options.cache_dir or os.path.isabs(options.cache_dir) + cache_dir = options.cache_dir + assert not cache_dir or os.path.isabs(cache_dir) + + if "truststore" in options.features_enabled: + try: + ssl_context = _create_truststore_ssl_context() + except Exception: + if not fallback_to_certifi: + raise + ssl_context = None + else: + ssl_context = None + session = PipSession( - cache=( - os.path.join(options.cache_dir, "http") if options.cache_dir else None - ), + cache=os.path.join(cache_dir, "http") if cache_dir else None, retries=retries if retries is not None else options.retries, trusted_hosts=options.trusted_hosts, index_urls=self._get_index_urls(options), + ssl_context=ssl_context, ) # Handle custom ca-bundles from the user @@ -116,6 +151,7 @@ def _build_session( # Determine if we can prompt the user for authentication or not session.auth.prompting = not options.no_input + session.auth.keyring_provider = options.keyring_provider return session @@ -142,7 +178,14 @@ def handle_pip_version_check(self, options: Values) -> None: # Otherwise, check if we're using the latest version of pip available. session = self._build_session( - options, retries=0, timeout=min(5, options.timeout) + options, + retries=0, + timeout=min(5, options.timeout), + # This is set to ensure the function does not fail when truststore is + # specified in use-feature but cannot be loaded. This usually raises a + # CommandError and shows a nice user-facing error, but this function is not + # called in that try-except block. + fallback_to_certifi=True, ) with session: pip_self_version_check(session, options) @@ -232,11 +275,12 @@ def make_requirement_preparer( cls, temp_build_dir: TempDirectory, options: Values, - req_tracker: RequirementTracker, + build_tracker: BuildTracker, session: PipSession, finder: PackageFinder, use_user_site: bool, download_dir: Optional[str] = None, + verbosity: int = 0, ) -> RequirementPreparer: """ Create a RequirementPreparer instance for the given parameters. @@ -262,33 +306,20 @@ def make_requirement_preparer( "fast-deps has no effect when used with the legacy resolver." ) - in_tree_build = "out-of-tree-build" not in options.deprecated_features_enabled - if "in-tree-build" in options.features_enabled: - deprecated( - reason="In-tree builds are now the default.", - replacement="to remove the --use-feature=in-tree-build flag", - gone_in="22.1", - ) - if "out-of-tree-build" in options.deprecated_features_enabled: - deprecated( - reason="Out-of-tree builds are deprecated.", - replacement=None, - gone_in="22.1", - ) - return RequirementPreparer( build_dir=temp_build_dir_path, src_dir=options.src_dir, download_dir=download_dir, build_isolation=options.build_isolation, - req_tracker=req_tracker, + check_build_deps=options.check_build_deps, + build_tracker=build_tracker, session=session, progress_bar=options.progress_bar, finder=finder, require_hashes=options.require_hashes, use_user_site=use_user_site, lazy_wheel=lazy_wheel, - in_tree_build=in_tree_build, + verbosity=verbosity, ) @classmethod @@ -313,6 +344,7 @@ def make_resolver( install_req_from_req_string, isolated=options.isolated_mode, use_pep517=use_pep517, + config_settings=getattr(options, "config_settings", None), ) resolver_variant = cls.determine_resolver_variant(options) # The long import name and duplicated invocation is needed to convince @@ -383,6 +415,7 @@ def get_requirements( isolated=options.isolated_mode, use_pep517=options.use_pep517, user_supplied=True, + config_settings=getattr(options, "config_settings", None), ) requirements.append(req_to_add) @@ -392,6 +425,7 @@ def get_requirements( user_supplied=True, isolated=options.isolated_mode, use_pep517=options.use_pep517, + config_settings=getattr(options, "config_settings", None), ) requirements.append(req_to_add) diff --git a/src/pip/_internal/cli/spinners.py b/src/pip/_internal/cli/spinners.py index 1e313e1090a..cf2b976f377 100644 --- a/src/pip/_internal/cli/spinners.py +++ b/src/pip/_internal/cli/spinners.py @@ -3,9 +3,7 @@ import logging import sys import time -from typing import IO, Iterator - -from pip._vendor.progress import HIDE_CURSOR, SHOW_CURSOR +from typing import IO, Generator, Optional from pip._internal.utils.compat import WINDOWS from pip._internal.utils.logging import get_indentation @@ -25,7 +23,7 @@ class InteractiveSpinner(SpinnerInterface): def __init__( self, message: str, - file: IO[str] = None, + file: Optional[IO[str]] = None, spin_chars: str = "-\\|/", # Empirically, 8 updates/second looks nice min_update_interval_seconds: float = 0.125, @@ -115,7 +113,7 @@ def reset(self) -> None: @contextlib.contextmanager -def open_spinner(message: str) -> Iterator[SpinnerInterface]: +def open_spinner(message: str) -> Generator[SpinnerInterface, None, None]: # Interactive spinner goes directly to sys.stdout rather than being routed # through the logging system, but it acts like it has level INFO, # i.e. it's only displayed if we're at level INFO or better. @@ -138,8 +136,12 @@ def open_spinner(message: str) -> Iterator[SpinnerInterface]: spinner.finish("done") +HIDE_CURSOR = "\x1b[?25l" +SHOW_CURSOR = "\x1b[?25h" + + @contextlib.contextmanager -def hidden_cursor(file: IO[str]) -> Iterator[None]: +def hidden_cursor(file: IO[str]) -> Generator[None, None, None]: # The Windows terminal does not support the hide/show cursor ANSI codes, # even via colorama. So don't even try. if WINDOWS: diff --git a/src/pip/_internal/commands/__init__.py b/src/pip/_internal/commands/__init__.py index c72f24f30e2..858a4101416 100644 --- a/src/pip/_internal/commands/__init__.py +++ b/src/pip/_internal/commands/__init__.py @@ -38,6 +38,11 @@ "FreezeCommand", "Output installed packages in requirements format.", ), + "inspect": CommandInfo( + "pip._internal.commands.inspect", + "InspectCommand", + "Inspect the python environment.", + ), "list": CommandInfo( "pip._internal.commands.list", "ListCommand", diff --git a/src/pip/_internal/commands/cache.py b/src/pip/_internal/commands/cache.py index f1a489d324f..c5f03302d6b 100644 --- a/src/pip/_internal/commands/cache.py +++ b/src/pip/_internal/commands/cache.py @@ -105,9 +105,9 @@ def get_cache_info(self, options: Values, args: List[Any]) -> None: Package index page cache location: {http_cache_location} Package index page cache size: {http_cache_size} Number of HTTP files: {num_http_files} - Wheels location: {wheels_cache_location} - Wheels size: {wheels_cache_size} - Number of wheels: {package_count} + Locally built wheels location: {wheels_cache_location} + Locally built wheels size: {wheels_cache_size} + Number of locally built wheels: {package_count} """ ) .format( @@ -140,7 +140,7 @@ def list_cache_items(self, options: Values, args: List[Any]) -> None: def format_for_human(self, files: List[str]) -> None: if not files: - logger.info("Nothing cached.") + logger.info("No locally built wheels cached.") return results = [] diff --git a/src/pip/_internal/commands/completion.py b/src/pip/_internal/commands/completion.py index c0fb4caf8b2..deaa30899e6 100644 --- a/src/pip/_internal/commands/completion.py +++ b/src/pip/_internal/commands/completion.py @@ -43,6 +43,28 @@ end complete -fa "(__fish_complete_pip)" -c {prog} """, + "powershell": """ + if ((Test-Path Function:\\TabExpansion) -and -not ` + (Test-Path Function:\\_pip_completeBackup)) {{ + Rename-Item Function:\\TabExpansion _pip_completeBackup + }} + function TabExpansion($line, $lastWord) {{ + $lastBlock = [regex]::Split($line, '[|;]')[-1].TrimStart() + if ($lastBlock.StartsWith("{prog} ")) {{ + $Env:COMP_WORDS=$lastBlock + $Env:COMP_CWORD=$lastBlock.Split().Length - 1 + $Env:PIP_AUTO_COMPLETE=1 + (& {prog}).Split() + Remove-Item Env:COMP_WORDS + Remove-Item Env:COMP_CWORD + Remove-Item Env:PIP_AUTO_COMPLETE + }} + elseif (Test-Path Function:\\_pip_completeBackup) {{ + # Fall back on existing tab expansion + _pip_completeBackup $line $lastWord + }} + }} + """, } @@ -76,6 +98,14 @@ def add_options(self) -> None: dest="shell", help="Emit completion code for fish", ) + self.cmd_opts.add_option( + "--powershell", + "-p", + action="store_const", + const="powershell", + dest="shell", + help="Emit completion code for powershell", + ) self.parser.insert_option_group(0, self.cmd_opts) diff --git a/src/pip/_internal/commands/configuration.py b/src/pip/_internal/commands/configuration.py index c6c74ed50ba..84b134e490b 100644 --- a/src/pip/_internal/commands/configuration.py +++ b/src/pip/_internal/commands/configuration.py @@ -27,11 +27,17 @@ class ConfigurationCommand(Command): - list: List the active configuration (or from the file specified) - edit: Edit the configuration file in an editor - - get: Get the value associated with name - - set: Set the name=value - - unset: Unset the value associated with name + - get: Get the value associated with command.option + - set: Set the command.option=value + - unset: Unset the value associated with command.option - debug: List the configuration files and values defined under them + Configuration keys should be dot separated command and option name, + with the special prefix "global" affecting any command. For example, + "pip config set global.index-url https://example.org/" would configure + the index url for all commands, but "pip config set download.timeout 10" + would configure a 10 second timeout only for "pip download" commands. + If none of --user, --global and --site are passed, a virtual environment configuration file is used if one is active and the file exists. Otherwise, all modifications happen to the user file by @@ -43,9 +49,9 @@ class ConfigurationCommand(Command): %prog [] list %prog [] [--editor ] edit - %prog [] get name - %prog [] set name value - %prog [] unset name + %prog [] get command.option + %prog [] set command.option value + %prog [] unset command.option %prog [] debug """ @@ -222,9 +228,19 @@ def open_in_editor(self, options: Values, args: List[str]) -> None: fname = self.configuration.get_file_to_edit() if fname is None: raise PipError("Could not determine appropriate file.") + elif '"' in fname: + # This shouldn't happen, unless we see a username like that. + # If that happens, we'd appreciate a pull request fixing this. + raise PipError( + f'Can not open an editor for a file name containing "\n{fname}' + ) try: - subprocess.check_call([editor, fname]) + subprocess.check_call(f'{editor} "{fname}"', shell=True) + except FileNotFoundError as e: + if not e.filename: + e.filename = editor + raise except subprocess.CalledProcessError as e: raise PipError( "Editor Subprocess exited with exit code {}".format(e.returncode) diff --git a/src/pip/_internal/commands/debug.py b/src/pip/_internal/commands/debug.py index d3f1f28de4c..2a3e7d298f3 100644 --- a/src/pip/_internal/commands/debug.py +++ b/src/pip/_internal/commands/debug.py @@ -1,3 +1,4 @@ +import importlib.resources import locale import logging import os @@ -10,7 +11,6 @@ from pip._vendor.certifi import where from pip._vendor.packaging.version import parse as parse_version -from pip import __file__ as pip_location from pip._internal.cli import cmdoptions from pip._internal.cli.base_command import Command from pip._internal.cli.cmdoptions import make_target_python @@ -35,11 +35,7 @@ def show_sys_implementation() -> None: def create_vendor_txt_map() -> Dict[str, str]: - vendor_txt_path = os.path.join( - os.path.dirname(pip_location), "_vendor", "vendor.txt" - ) - - with open(vendor_txt_path) as f: + with importlib.resources.open_text("pip._vendor", "vendor.txt") as f: # Purge non version specifying lines. # Also, remove any space prefix or suffixes (including comments). lines = [ @@ -47,12 +43,12 @@ def create_vendor_txt_map() -> Dict[str, str]: ] # Transform into "module" -> version dict. - return dict(line.split("==", 1) for line in lines) # type: ignore + return dict(line.split("==", 1) for line in lines) def get_module_from_module_name(module_name: str) -> ModuleType: # Module name can be uppercase in vendor.txt for some reason... - module_name = module_name.lower() + module_name = module_name.lower().replace("-", "_") # PATCH: setuptools is actually only pkg_resources. if module_name == "setuptools": module_name = "pkg_resources" @@ -67,6 +63,7 @@ def get_vendor_version_from_module(module_name: str) -> Optional[str]: if not version: # Try to find version in debundled module info. + assert module.__file__ is not None env = get_environment([os.path.dirname(module.__file__)]) dist = env.get_distribution(module_name) if dist: diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index 7de207f1365..90388d11857 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -7,7 +7,8 @@ from pip._internal.cli.cmdoptions import make_target_python from pip._internal.cli.req_command import RequirementCommand, with_cleanup from pip._internal.cli.status_codes import SUCCESS -from pip._internal.req.req_tracker import get_requirement_tracker +from pip._internal.operations.build.build_tracker import get_build_tracker +from pip._internal.req.req_install import check_legacy_setup_py_options from pip._internal.utils.misc import ensure_dir, normalize_path, write_output from pip._internal.utils.temp_dir import TempDirectory @@ -49,6 +50,7 @@ def add_options(self) -> None: self.cmd_opts.add_option(cmdoptions.no_build_isolation()) self.cmd_opts.add_option(cmdoptions.use_pep517()) self.cmd_opts.add_option(cmdoptions.no_use_pep517()) + self.cmd_opts.add_option(cmdoptions.check_build_deps()) self.cmd_opts.add_option(cmdoptions.ignore_requires_python()) self.cmd_opts.add_option( @@ -95,7 +97,7 @@ def run(self, options: Values, args: List[str]) -> int: ignore_requires_python=options.ignore_requires_python, ) - req_tracker = self.enter_context(get_requirement_tracker()) + build_tracker = self.enter_context(get_build_tracker()) directory = TempDirectory( delete=not options.no_clean, @@ -104,15 +106,17 @@ def run(self, options: Values, args: List[str]) -> int: ) reqs = self.get_requirements(args, options, finder, session) + check_legacy_setup_py_options(options, reqs) preparer = self.make_requirement_preparer( temp_build_dir=directory, options=options, - req_tracker=req_tracker, + build_tracker=build_tracker, session=session, finder=finder, download_dir=options.download_dir, use_user_site=False, + verbosity=self.verbosity, ) resolver = self.make_resolver( @@ -120,6 +124,7 @@ def run(self, options: Values, args: List[str]) -> int: finder=finder, options=options, ignore_requires_python=options.ignore_requires_python, + use_pep517=options.use_pep517, py_version_info=options.python_version, ) diff --git a/src/pip/_internal/commands/index.py b/src/pip/_internal/commands/index.py index b4bf0ac06e1..7267effed24 100644 --- a/src/pip/_internal/commands/index.py +++ b/src/pip/_internal/commands/index.py @@ -24,6 +24,7 @@ class IndexCommand(IndexGroupCommand): Inspect information available from package indexes. """ + ignore_require_venv = True usage = """ %prog versions """ diff --git a/src/pip/_internal/commands/inspect.py b/src/pip/_internal/commands/inspect.py new file mode 100644 index 00000000000..27c8fa3d5b6 --- /dev/null +++ b/src/pip/_internal/commands/inspect.py @@ -0,0 +1,92 @@ +import logging +from optparse import Values +from typing import Any, Dict, List + +from pip._vendor.packaging.markers import default_environment +from pip._vendor.rich import print_json + +from pip import __version__ +from pip._internal.cli import cmdoptions +from pip._internal.cli.req_command import Command +from pip._internal.cli.status_codes import SUCCESS +from pip._internal.metadata import BaseDistribution, get_environment +from pip._internal.utils.compat import stdlib_pkgs +from pip._internal.utils.urls import path_to_url + +logger = logging.getLogger(__name__) + + +class InspectCommand(Command): + """ + Inspect the content of a Python environment and produce a report in JSON format. + """ + + ignore_require_venv = True + usage = """ + %prog [options]""" + + def add_options(self) -> None: + self.cmd_opts.add_option( + "--local", + action="store_true", + default=False, + help=( + "If in a virtualenv that has global access, do not list " + "globally-installed packages." + ), + ) + self.cmd_opts.add_option( + "--user", + dest="user", + action="store_true", + default=False, + help="Only output packages installed in user-site.", + ) + self.cmd_opts.add_option(cmdoptions.list_path()) + self.parser.insert_option_group(0, self.cmd_opts) + + def run(self, options: Values, args: List[str]) -> int: + cmdoptions.check_list_path_option(options) + dists = get_environment(options.path).iter_installed_distributions( + local_only=options.local, + user_only=options.user, + skip=set(stdlib_pkgs), + ) + output = { + "version": "1", + "pip_version": __version__, + "installed": [self._dist_to_dict(dist) for dist in dists], + "environment": default_environment(), + # TODO tags? scheme? + } + print_json(data=output) + return SUCCESS + + def _dist_to_dict(self, dist: BaseDistribution) -> Dict[str, Any]: + res: Dict[str, Any] = { + "metadata": dist.metadata_dict, + "metadata_location": dist.info_location, + } + # direct_url. Note that we don't have download_info (as in the installation + # report) since it is not recorded in installed metadata. + direct_url = dist.direct_url + if direct_url is not None: + res["direct_url"] = direct_url.to_dict() + else: + # Emulate direct_url for legacy editable installs. + editable_project_location = dist.editable_project_location + if editable_project_location is not None: + res["direct_url"] = { + "url": path_to_url(editable_project_location), + "dir_info": { + "editable": True, + }, + } + # installer + installer = dist.installer + if dist.installer: + res["installer"] = installer + # requested + if dist.installed_with_dist_info: + res["requested"] = dist.requested + return res diff --git a/src/pip/_internal/commands/install.py b/src/pip/_internal/commands/install.py index eedb1ff5d64..4783f807fca 100644 --- a/src/pip/_internal/commands/install.py +++ b/src/pip/_internal/commands/install.py @@ -1,12 +1,13 @@ import errno +import json import operator import os import shutil import site from optparse import SUPPRESS_HELP, Values -from typing import Iterable, List, Optional +from typing import List, Optional -from pip._vendor.packaging.utils import canonicalize_name +from pip._vendor.rich import print_json from pip._internal.cache import WheelCache from pip._internal.cli import cmdoptions @@ -20,16 +21,23 @@ from pip._internal.exceptions import CommandError, InstallationError from pip._internal.locations import get_scheme from pip._internal.metadata import get_environment -from pip._internal.models.format_control import FormatControl +from pip._internal.models.installation_report import InstallationReport +from pip._internal.operations.build.build_tracker import get_build_tracker from pip._internal.operations.check import ConflictDetails, check_install_conflicts from pip._internal.req import install_given_reqs -from pip._internal.req.req_install import InstallRequirement -from pip._internal.req.req_tracker import get_requirement_tracker +from pip._internal.req.req_install import ( + InstallRequirement, + check_legacy_setup_py_options, +) from pip._internal.utils.compat import WINDOWS -from pip._internal.utils.distutils_args import parse_distutils_args +from pip._internal.utils.deprecation import ( + LegacyInstallReasonFailedBdistWheel, + deprecated, +) from pip._internal.utils.filesystem import test_writable_dir from pip._internal.utils.logging import getLogger from pip._internal.utils.misc import ( + check_externally_managed, ensure_dir, get_pip_version, protect_pip_from_modification_on_windows, @@ -40,24 +48,11 @@ running_under_virtualenv, virtualenv_no_global, ) -from pip._internal.wheel_builder import ( - BinaryAllowedPredicate, - build, - should_build_for_install_command, -) +from pip._internal.wheel_builder import build, should_build_for_install_command logger = getLogger(__name__) -def get_check_binary_allowed(format_control: FormatControl) -> BinaryAllowedPredicate: - def check_binary_allowed(req: InstallRequirement) -> bool: - canonical_name = canonicalize_name(req.name or "") - allowed_formats = format_control.get_allowed_formats(canonical_name) - return "binary" in allowed_formats - - return check_binary_allowed - - class InstallCommand(RequirementCommand): """ Install packages from: @@ -85,6 +80,17 @@ def add_options(self) -> None: self.cmd_opts.add_option(cmdoptions.pre()) self.cmd_opts.add_option(cmdoptions.editable()) + self.cmd_opts.add_option( + "--dry-run", + action="store_true", + dest="dry_run", + default=False, + help=( + "Don't actually install anything, just print what would be. " + "Can be used in combination with --ignore-installed " + "to 'resolve' the requirements." + ), + ) self.cmd_opts.add_option( "-t", "--target", @@ -131,7 +137,12 @@ def add_options(self) -> None: default=None, help=( "Installation prefix where lib, bin and other top-level " - "folders are placed" + "folders are placed. Note that the resulting installation may " + "contain scripts and other resources which reference the " + "Python interpreter of pip, and not that of ``--prefix``. " + "See also the ``--python`` option if the intention is to " + "install packages into another (possibly pip-free) " + "environment." ), ) @@ -189,8 +200,10 @@ def add_options(self) -> None: self.cmd_opts.add_option(cmdoptions.no_build_isolation()) self.cmd_opts.add_option(cmdoptions.use_pep517()) self.cmd_opts.add_option(cmdoptions.no_use_pep517()) + self.cmd_opts.add_option(cmdoptions.check_build_deps()) + self.cmd_opts.add_option(cmdoptions.override_externally_managed()) - self.cmd_opts.add_option(cmdoptions.install_options()) + self.cmd_opts.add_option(cmdoptions.config_settings()) self.cmd_opts.add_option(cmdoptions.global_options()) self.cmd_opts.add_option( @@ -222,12 +235,12 @@ def add_options(self) -> None: default=True, help="Do not warn about broken dependencies", ) - self.cmd_opts.add_option(cmdoptions.no_binary()) self.cmd_opts.add_option(cmdoptions.only_binary()) self.cmd_opts.add_option(cmdoptions.prefer_binary()) self.cmd_opts.add_option(cmdoptions.require_hashes()) self.cmd_opts.add_option(cmdoptions.progress_bar()) + self.cmd_opts.add_option(cmdoptions.root_user_action()) index_opts = cmdoptions.make_option_group( cmdoptions.index_group, @@ -237,20 +250,50 @@ def add_options(self) -> None: self.parser.insert_option_group(0, index_opts) self.parser.insert_option_group(0, self.cmd_opts) + self.cmd_opts.add_option( + "--report", + dest="json_report_file", + metavar="file", + default=None, + help=( + "Generate a JSON file describing what pip did to install " + "the provided requirements. " + "Can be used in combination with --dry-run and --ignore-installed " + "to 'resolve' the requirements. " + "When - is used as file name it writes to stdout. " + "When writing to stdout, please combine with the --quiet option " + "to avoid mixing pip logging output with JSON output." + ), + ) + @with_cleanup def run(self, options: Values, args: List[str]) -> int: if options.use_user_site and options.target_dir is not None: raise CommandError("Can not combine '--user' and '--target'") - cmdoptions.check_install_build_global(options) + # Check whether the environment we're installing into is externally + # managed, as specified in PEP 668. Specifying --root, --target, or + # --prefix disables the check, since there's no reliable way to locate + # the EXTERNALLY-MANAGED file for those cases. An exception is also + # made specifically for "--dry-run --report" for convenience. + installing_into_current_environment = ( + not (options.dry_run and options.json_report_file) + and options.root_path is None + and options.target_dir is None + and options.prefix_path is None + ) + if ( + installing_into_current_environment + and not options.override_externally_managed + ): + check_externally_managed() + upgrade_strategy = "to-satisfy-only" if options.upgrade: upgrade_strategy = options.upgrade_strategy cmdoptions.check_dist_restriction(options, check_target=True) - install_options = options.install_options or [] - logger.verbose("Using %s", get_pip_version()) options.use_user_site = decide_user_install( options.use_user_site, @@ -291,9 +334,7 @@ def run(self, options: Values, args: List[str]) -> int: target_python=target_python, ignore_requires_python=options.ignore_requires_python, ) - wheel_cache = WheelCache(options.cache_dir, options.format_control) - - req_tracker = self.enter_context(get_requirement_tracker()) + build_tracker = self.enter_context(get_build_tracker()) directory = TempDirectory( delete=not options.no_clean, @@ -303,6 +344,26 @@ def run(self, options: Values, args: List[str]) -> int: try: reqs = self.get_requirements(args, options, finder, session) + check_legacy_setup_py_options(options, reqs) + + if "no-binary-enable-wheel-cache" in options.features_enabled: + # TODO: remove format_control from WheelCache when the deprecation cycle + # is over + wheel_cache = WheelCache(options.cache_dir) + else: + if options.format_control.no_binary: + deprecated( + reason=( + "--no-binary currently disables reading from " + "the cache of locally built wheels. In the future " + "--no-binary will not influence the wheel cache." + ), + replacement="to use the --no-cache-dir option", + feature_flag="no-binary-enable-wheel-cache", + issue=11453, + gone_in="23.1", + ) + wheel_cache = WheelCache(options.cache_dir, options.format_control) # Only when installing is it permitted to use PEP 660. # In other circumstances (pip wheel, pip download) we generate @@ -310,15 +371,14 @@ def run(self, options: Values, args: List[str]) -> int: for req in reqs: req.permit_editable_wheels = True - reject_location_related_install_options(reqs, options.install_options) - preparer = self.make_requirement_preparer( temp_build_dir=directory, options=options, - req_tracker=req_tracker, + build_tracker=build_tracker, session=session, finder=finder, use_user_site=options.use_user_site, + verbosity=self.verbosity, ) resolver = self.make_resolver( preparer=preparer, @@ -339,6 +399,26 @@ def run(self, options: Values, args: List[str]) -> int: reqs, check_supported_wheels=not options.target_dir ) + if options.json_report_file: + report = InstallationReport(requirement_set.requirements_to_install) + if options.json_report_file == "-": + print_json(data=report.to_dict()) + else: + with open(options.json_report_file, "w", encoding="utf-8") as f: + json.dump(report.to_dict(), f, indent=2, ensure_ascii=False) + + if options.dry_run: + would_install_items = sorted( + (r.metadata["name"], r.metadata["version"]) + for r in requirement_set.requirements_to_install + ) + if would_install_items: + write_output( + "Would install %s", + " ".join("-".join(item) for item in would_install_items), + ) + return SUCCESS + try: pip_req = requirement_set.get_requirement("pip") except KeyError: @@ -349,12 +429,10 @@ def run(self, options: Values, args: List[str]) -> int: modifying_pip = pip_req.satisfied_by is None protect_pip_from_modification_on_windows(modifying_pip=modifying_pip) - check_binary_allowed = get_check_binary_allowed(finder.format_control) - reqs_to_build = [ r for r in requirement_set.requirements.values() - if should_build_for_install_command(r, check_binary_allowed) + if should_build_for_install_command(r) ] _, build_failures = build( @@ -362,7 +440,7 @@ def run(self, options: Values, args: List[str]) -> int: wheel_cache=wheel_cache, verify=True, build_options=[], - global_options=[], + global_options=global_options, ) # If we're using PEP 517, we cannot do a legacy setup.py install @@ -383,7 +461,7 @@ def run(self, options: Values, args: List[str]) -> int: # those. for r in build_failures: if not r.use_pep517: - r.legacy_install_reason = 8368 + r.legacy_install_reason = LegacyInstallReasonFailedBdistWheel to_install = resolver.get_installation_order(requirement_set) @@ -403,7 +481,6 @@ def run(self, options: Values, args: List[str]) -> int: installed = install_given_reqs( to_install, - install_options, global_options, root=options.root_path, home=target_temp_dir_path, @@ -463,8 +540,8 @@ def run(self, options: Values, args: List[str]) -> int: self._handle_target_dir( options.target_dir, target_temp_dir, options.upgrade ) - - warn_if_run_as_root() + if options.root_user_action == "warn": + warn_if_run_as_root() return SUCCESS def _handle_target_dir( @@ -674,45 +751,6 @@ def decide_user_install( return True -def reject_location_related_install_options( - requirements: List[InstallRequirement], options: Optional[List[str]] -) -> None: - """If any location-changing --install-option arguments were passed for - requirements or on the command-line, then show a deprecation warning. - """ - - def format_options(option_names: Iterable[str]) -> List[str]: - return ["--{}".format(name.replace("_", "-")) for name in option_names] - - offenders = [] - - for requirement in requirements: - install_options = requirement.install_options - location_options = parse_distutils_args(install_options) - if location_options: - offenders.append( - "{!r} from {}".format( - format_options(location_options.keys()), requirement - ) - ) - - if options: - location_options = parse_distutils_args(options) - if location_options: - offenders.append( - "{!r} from command line".format(format_options(location_options.keys())) - ) - - if not offenders: - return - - raise CommandError( - "Location-changing options found in --install-option: {}." - " This is unsupported, use pip-level options like --user," - " --prefix, --root, and --target instead.".format("; ".join(offenders)) - ) - - def create_os_error_message( error: OSError, show_traceback: bool, using_user_site: bool ) -> str: diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index 75d8dd465ba..8e1426dbb6c 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -1,7 +1,7 @@ import json import logging from optparse import Values -from typing import TYPE_CHECKING, Iterator, List, Optional, Sequence, Tuple, cast +from typing import TYPE_CHECKING, Generator, List, Optional, Sequence, Tuple, cast from pip._vendor.packaging.utils import canonicalize_name @@ -16,7 +16,6 @@ from pip._internal.network.session import PipSession from pip._internal.utils.compat import stdlib_pkgs from pip._internal.utils.misc import tabulate, write_output -from pip._internal.utils.parallel import map_multithread if TYPE_CHECKING: from pip._internal.metadata.base import DistributionVersion @@ -156,6 +155,11 @@ def run(self, options: Values, args: List[str]) -> int: if options.outdated and options.uptodate: raise CommandError("Options --outdated and --uptodate cannot be combined.") + if options.outdated and options.list_format == "freeze": + raise CommandError( + "List format 'freeze' can not be used with the --outdated option." + ) + cmdoptions.check_list_path_option(options) skip = set(stdlib_pkgs) @@ -222,7 +226,7 @@ def get_not_required( def iter_packages_latest_infos( self, packages: "_ProcessedDists", options: Values - ) -> Iterator["_DistWithLatestInfo"]: + ) -> Generator["_DistWithLatestInfo", None, None]: with self._build_session(options) as session: finder = self._build_package_finder(options, session) @@ -254,7 +258,7 @@ def latest_info( dist.latest_filetype = typ return dist - for dist in map_multithread(latest_info, packages): + for dist in map(latest_info, packages): if dist is not None: yield dist diff --git a/src/pip/_internal/commands/show.py b/src/pip/_internal/commands/show.py index 872292a2b6a..3f10701f6b2 100644 --- a/src/pip/_internal/commands/show.py +++ b/src/pip/_internal/commands/show.py @@ -1,8 +1,6 @@ -import csv import logging -import pathlib from optparse import Values -from typing import Iterator, List, NamedTuple, Optional, Tuple +from typing import Generator, Iterable, Iterator, List, NamedTuple, Optional from pip._vendor.packaging.utils import canonicalize_name @@ -55,6 +53,7 @@ class _PackageInfo(NamedTuple): name: str version: str location: str + editable_project_location: Optional[str] requires: List[str] required_by: List[str] installer: str @@ -62,6 +61,7 @@ class _PackageInfo(NamedTuple): classifiers: List[str] summary: str homepage: str + project_urls: List[str] author: str author_email: str license: str @@ -69,34 +69,7 @@ class _PackageInfo(NamedTuple): files: Optional[List[str]] -def _convert_legacy_entry(entry: Tuple[str, ...], info: Tuple[str, ...]) -> str: - """Convert a legacy installed-files.txt path into modern RECORD path. - - The legacy format stores paths relative to the info directory, while the - modern format stores paths relative to the package root, e.g. the - site-packages directory. - - :param entry: Path parts of the installed-files.txt entry. - :param info: Path parts of the egg-info directory relative to package root. - :returns: The converted entry. - - For best compatibility with symlinks, this does not use ``abspath()`` or - ``Path.resolve()``, but tries to work with path parts: - - 1. While ``entry`` starts with ``..``, remove the equal amounts of parts - from ``info``; if ``info`` is empty, start appending ``..`` instead. - 2. Join the two directly. - """ - while entry and entry[0] == "..": - if not info or info[-1] == "..": - info += ("..",) - else: - info = info[:-1] - entry = entry[1:] - return str(pathlib.Path(*info, *entry)) - - -def search_packages_info(query: List[str]) -> Iterator[_PackageInfo]: +def search_packages_info(query: List[str]) -> Generator[_PackageInfo, None, None]: """ Gather details from installed distributions. Print distribution name, version, location, and installed files. Installed files requires a @@ -105,7 +78,7 @@ def search_packages_info(query: List[str]) -> Iterator[_PackageInfo]: """ env = get_default_environment() - installed = {dist.canonical_name: dist for dist in env.iter_distributions()} + installed = {dist.canonical_name: dist for dist in env.iter_all_distributions()} query_names = [canonicalize_name(name) for name in query] missing = sorted( [name for name, pkg in zip(query, query_names) if pkg not in installed] @@ -121,34 +94,6 @@ def _get_requiring_packages(current_dist: BaseDistribution) -> Iterator[str]: in {canonicalize_name(d.name) for d in dist.iter_dependencies()} ) - def _files_from_record(dist: BaseDistribution) -> Optional[Iterator[str]]: - try: - text = dist.read_text("RECORD") - except FileNotFoundError: - return None - # This extra Path-str cast normalizes entries. - return (str(pathlib.Path(row[0])) for row in csv.reader(text.splitlines())) - - def _files_from_legacy(dist: BaseDistribution) -> Optional[Iterator[str]]: - try: - text = dist.read_text("installed-files.txt") - except FileNotFoundError: - return None - paths = (p for p in text.splitlines(keepends=False) if p) - root = dist.location - info = dist.info_directory - if root is None or info is None: - return paths - try: - info_rel = pathlib.Path(info).relative_to(root) - except ValueError: # info is not relative to root. - return paths - if not info_rel.parts: # info *is* root. - return paths - return ( - _convert_legacy_entry(pathlib.Path(p).parts, info_rel.parts) for p in paths - ) - for query_name in query_names: try: dist = installed[query_name] @@ -164,7 +109,7 @@ def _files_from_legacy(dist: BaseDistribution) -> Optional[Iterator[str]]: except FileNotFoundError: entry_points = [] - files_iter = _files_from_record(dist) or _files_from_legacy(dist) + files_iter = dist.iter_declared_entries() if files_iter is None: files: Optional[List[str]] = None else: @@ -176,6 +121,7 @@ def _files_from_legacy(dist: BaseDistribution) -> Optional[Iterator[str]]: name=dist.raw_name, version=str(dist.version), location=dist.location or "", + editable_project_location=dist.editable_project_location, requires=requires, required_by=required_by, installer=dist.installer, @@ -183,6 +129,7 @@ def _files_from_legacy(dist: BaseDistribution) -> Optional[Iterator[str]]: classifiers=metadata.get_all("Classifier", []), summary=metadata.get("Summary", ""), homepage=metadata.get("Home-page", ""), + project_urls=metadata.get_all("Project-URL", []), author=metadata.get("Author", ""), author_email=metadata.get("Author-email", ""), license=metadata.get("License", ""), @@ -192,7 +139,7 @@ def _files_from_legacy(dist: BaseDistribution) -> Optional[Iterator[str]]: def print_results( - distributions: Iterator[_PackageInfo], + distributions: Iterable[_PackageInfo], list_files: bool, verbose: bool, ) -> bool: @@ -213,6 +160,10 @@ def print_results( write_output("Author-email: %s", dist.author_email) write_output("License: %s", dist.license) write_output("Location: %s", dist.location) + if dist.editable_project_location is not None: + write_output( + "Editable project location: %s", dist.editable_project_location + ) write_output("Requires: %s", ", ".join(dist.requires)) write_output("Required-by: %s", ", ".join(dist.required_by)) @@ -225,6 +176,9 @@ def print_results( write_output("Entry-points:") for entry in dist.entry_points: write_output(" %s", entry.strip()) + write_output("Project-URLs:") + for project_url in dist.project_urls: + write_output(" %s", project_url) if list_files: write_output("Files:") if dist.files is None: diff --git a/src/pip/_internal/commands/uninstall.py b/src/pip/_internal/commands/uninstall.py index bb9e8e6a380..f198fc313ff 100644 --- a/src/pip/_internal/commands/uninstall.py +++ b/src/pip/_internal/commands/uninstall.py @@ -4,6 +4,7 @@ from pip._vendor.packaging.utils import canonicalize_name +from pip._internal.cli import cmdoptions from pip._internal.cli.base_command import Command from pip._internal.cli.req_command import SessionCommandMixin, warn_if_run_as_root from pip._internal.cli.status_codes import SUCCESS @@ -13,7 +14,10 @@ install_req_from_line, install_req_from_parsed_requirement, ) -from pip._internal.utils.misc import protect_pip_from_modification_on_windows +from pip._internal.utils.misc import ( + check_externally_managed, + protect_pip_from_modification_on_windows, +) logger = logging.getLogger(__name__) @@ -53,7 +57,8 @@ def add_options(self) -> None: action="store_true", help="Don't ask for confirmation of uninstall deletions.", ) - + self.cmd_opts.add_option(cmdoptions.root_user_action()) + self.cmd_opts.add_option(cmdoptions.override_externally_managed()) self.parser.insert_option_group(0, self.cmd_opts) def run(self, options: Values, args: List[str]) -> int: @@ -89,6 +94,9 @@ def run(self, options: Values, args: List[str]) -> int: f'"pip help {self.name}")' ) + if not options.override_externally_managed: + check_externally_managed() + protect_pip_from_modification_on_windows( modifying_pip="pip" in reqs_to_uninstall ) @@ -100,6 +108,6 @@ def run(self, options: Values, args: List[str]) -> int: ) if uninstall_pathset: uninstall_pathset.commit() - - warn_if_run_as_root() + if options.root_user_action == "warn": + warn_if_run_as_root() return SUCCESS diff --git a/src/pip/_internal/commands/wheel.py b/src/pip/_internal/commands/wheel.py index cea81ee5195..a8483559c19 100644 --- a/src/pip/_internal/commands/wheel.py +++ b/src/pip/_internal/commands/wheel.py @@ -9,8 +9,12 @@ from pip._internal.cli.req_command import RequirementCommand, with_cleanup from pip._internal.cli.status_codes import SUCCESS from pip._internal.exceptions import CommandError -from pip._internal.req.req_install import InstallRequirement -from pip._internal.req.req_tracker import get_requirement_tracker +from pip._internal.operations.build.build_tracker import get_build_tracker +from pip._internal.req.req_install import ( + InstallRequirement, + check_legacy_setup_py_options, +) +from pip._internal.utils.deprecation import deprecated from pip._internal.utils.misc import ensure_dir, normalize_path from pip._internal.utils.temp_dir import TempDirectory from pip._internal.wheel_builder import build, should_build_for_wheel_command @@ -26,10 +30,8 @@ class WheelCommand(RequirementCommand): recompiling your software during every install. For more details, see the wheel docs: https://wheel.readthedocs.io/en/latest/ - Requirements: setuptools>=0.8, and wheel. - - 'pip wheel' uses the bdist_wheel setuptools extension from the wheel - package to build individual wheels. + 'pip wheel' uses the build system interface as described here: + https://pip.pypa.io/en/stable/reference/build-system/ """ @@ -59,6 +61,7 @@ def add_options(self) -> None: self.cmd_opts.add_option(cmdoptions.no_build_isolation()) self.cmd_opts.add_option(cmdoptions.use_pep517()) self.cmd_opts.add_option(cmdoptions.no_use_pep517()) + self.cmd_opts.add_option(cmdoptions.check_build_deps()) self.cmd_opts.add_option(cmdoptions.constraints()) self.cmd_opts.add_option(cmdoptions.editable()) self.cmd_opts.add_option(cmdoptions.requirements()) @@ -75,6 +78,7 @@ def add_options(self) -> None: help="Don't verify if built wheel is valid.", ) + self.cmd_opts.add_option(cmdoptions.config_settings()) self.cmd_opts.add_option(cmdoptions.build_options()) self.cmd_opts.add_option(cmdoptions.global_options()) @@ -100,8 +104,6 @@ def add_options(self) -> None: @with_cleanup def run(self, options: Values, args: List[str]) -> int: - cmdoptions.check_install_build_global(options) - session = self.get_default_session(options) finder = self._build_package_finder(options, session) @@ -110,7 +112,7 @@ def run(self, options: Values, args: List[str]) -> int: options.wheel_dir = normalize_path(options.wheel_dir) ensure_dir(options.wheel_dir) - req_tracker = self.enter_context(get_requirement_tracker()) + build_tracker = self.enter_context(get_build_tracker()) directory = TempDirectory( delete=not options.no_clean, @@ -119,15 +121,36 @@ def run(self, options: Values, args: List[str]) -> int: ) reqs = self.get_requirements(args, options, finder, session) + check_legacy_setup_py_options(options, reqs) + + if "no-binary-enable-wheel-cache" in options.features_enabled: + # TODO: remove format_control from WheelCache when the deprecation cycle + # is over + wheel_cache = WheelCache(options.cache_dir) + else: + if options.format_control.no_binary: + deprecated( + reason=( + "--no-binary currently disables reading from " + "the cache of locally built wheels. In the future " + "--no-binary will not influence the wheel cache." + ), + replacement="to use the --no-cache-dir option", + feature_flag="no-binary-enable-wheel-cache", + issue=11453, + gone_in="23.1", + ) + wheel_cache = WheelCache(options.cache_dir, options.format_control) preparer = self.make_requirement_preparer( temp_build_dir=directory, options=options, - req_tracker=req_tracker, + build_tracker=build_tracker, session=session, finder=finder, download_dir=options.wheel_dir, use_user_site=False, + verbosity=self.verbosity, ) resolver = self.make_resolver( diff --git a/src/pip/_internal/configuration.py b/src/pip/_internal/configuration.py index a8092d1ae06..6cce8bcbcce 100644 --- a/src/pip/_internal/configuration.py +++ b/src/pip/_internal/configuration.py @@ -36,12 +36,20 @@ kinds = enum( USER="user", # User Specific GLOBAL="global", # System Wide - SITE="site", # [Virtual] Environment Specific + BASE="base", # Base environment specific (e.g. for all venvs with the same base) + SITE="site", # Environment Specific (e.g. per venv) ENV="env", # from PIP_CONFIG_FILE ENV_VAR="env-var", # from Environment Variables ) -OVERRIDE_ORDER = kinds.GLOBAL, kinds.USER, kinds.SITE, kinds.ENV, kinds.ENV_VAR -VALID_LOAD_ONLY = kinds.USER, kinds.GLOBAL, kinds.SITE +OVERRIDE_ORDER = ( + kinds.GLOBAL, + kinds.USER, + kinds.BASE, + kinds.SITE, + kinds.ENV, + kinds.ENV_VAR, +) +VALID_LOAD_ONLY = kinds.USER, kinds.GLOBAL, kinds.BASE, kinds.SITE logger = getLogger(__name__) @@ -70,6 +78,7 @@ def get_configuration_files() -> Dict[Kind, List[str]]: os.path.join(path, CONFIG_BASENAME) for path in appdirs.site_config_dirs("pip") ] + base_config_file = os.path.join(sys.base_prefix, CONFIG_BASENAME) site_config_file = os.path.join(sys.prefix, CONFIG_BASENAME) legacy_config_file = os.path.join( os.path.expanduser("~"), @@ -78,6 +87,7 @@ def get_configuration_files() -> Dict[Kind, List[str]]: ) new_config_file = os.path.join(appdirs.user_config_dir("pip"), CONFIG_BASENAME) return { + kinds.BASE: [base_config_file], kinds.GLOBAL: global_config_files, kinds.SITE: [site_config_file], kinds.USER: [legacy_config_file, new_config_file], @@ -142,13 +152,19 @@ def items(self) -> Iterable[Tuple[str, Any]]: def get_value(self, key: str) -> Any: """Get a value from the configuration.""" + orig_key = key + key = _normalize_name(key) try: return self._dictionary[key] except KeyError: - raise ConfigurationError(f"No such key - {key}") + # disassembling triggers a more useful error message than simply + # "No such key" in the case that the key isn't in the form command.option + _disassemble_key(key) + raise ConfigurationError(f"No such key - {orig_key}") def set_value(self, key: str, value: Any) -> None: """Modify a value in the configuration.""" + key = _normalize_name(key) self._ensure_have_load_only() assert self.load_only @@ -167,11 +183,13 @@ def set_value(self, key: str, value: Any) -> None: def unset_value(self, key: str) -> None: """Unset a value in the configuration.""" + orig_key = key + key = _normalize_name(key) self._ensure_have_load_only() assert self.load_only if key not in self._config[self.load_only]: - raise ConfigurationError(f"No such key - {key}") + raise ConfigurationError(f"No such key - {orig_key}") fname, parser = self._get_parser_to_modify() @@ -336,6 +354,8 @@ def iter_config_files(self) -> Iterable[Tuple[Kind, List[str]]]: # The legacy config file is overridden by the new config file yield kinds.USER, config_files[kinds.USER] + yield kinds.BASE, config_files[kinds.BASE] + # finally virtualenv configuration first trumping others yield kinds.SITE, config_files[kinds.SITE] diff --git a/src/pip/_internal/distributions/base.py b/src/pip/_internal/distributions/base.py index 149fff55dab..75ce2dc9057 100644 --- a/src/pip/_internal/distributions/base.py +++ b/src/pip/_internal/distributions/base.py @@ -31,6 +31,9 @@ def get_metadata_distribution(self) -> BaseDistribution: @abc.abstractmethod def prepare_distribution_metadata( - self, finder: PackageFinder, build_isolation: bool + self, + finder: PackageFinder, + build_isolation: bool, + check_build_deps: bool, ) -> None: raise NotImplementedError() diff --git a/src/pip/_internal/distributions/installed.py b/src/pip/_internal/distributions/installed.py index 6c8c179299b..edb38aa1a6c 100644 --- a/src/pip/_internal/distributions/installed.py +++ b/src/pip/_internal/distributions/installed.py @@ -11,12 +11,13 @@ class InstalledDistribution(AbstractDistribution): """ def get_metadata_distribution(self) -> BaseDistribution: - from pip._internal.metadata.pkg_resources import Distribution as _Dist - assert self.req.satisfied_by is not None, "not actually installed" - return _Dist(self.req.satisfied_by) + return self.req.satisfied_by def prepare_distribution_metadata( - self, finder: PackageFinder, build_isolation: bool + self, + finder: PackageFinder, + build_isolation: bool, + check_build_deps: bool, ) -> None: pass diff --git a/src/pip/_internal/distributions/sdist.py b/src/pip/_internal/distributions/sdist.py index cd85ac5c439..4c25647930c 100644 --- a/src/pip/_internal/distributions/sdist.py +++ b/src/pip/_internal/distributions/sdist.py @@ -19,12 +19,13 @@ class SourceDistribution(AbstractDistribution): """ def get_metadata_distribution(self) -> BaseDistribution: - from pip._internal.metadata.pkg_resources import Distribution as _Dist - - return _Dist(self.req.get_dist()) + return self.req.get_dist() def prepare_distribution_metadata( - self, finder: PackageFinder, build_isolation: bool + self, + finder: PackageFinder, + build_isolation: bool, + check_build_deps: bool, ) -> None: # Load pyproject.toml, to determine whether PEP 517 is to be used self.req.load_pyproject_toml() @@ -45,7 +46,18 @@ def prepare_distribution_metadata( self.req.isolated_editable_sanity_check() # Install the dynamic build requirements. self._install_build_reqs(finder) - + # Check if the current environment provides build dependencies + should_check_deps = self.req.use_pep517 and check_build_deps + if should_check_deps: + pyproject_requires = self.req.pyproject_requires + assert pyproject_requires is not None + conflicting, missing = self.req.build_env.check_requirements( + pyproject_requires + ) + if conflicting: + self._raise_conflicts("the backend dependencies", conflicting) + if missing: + self._raise_missing_reqs(missing) self.req.prepare_metadata() def _prepare_build_backend(self, finder: PackageFinder) -> None: @@ -56,7 +68,7 @@ def _prepare_build_backend(self, finder: PackageFinder) -> None: self.req.build_env = BuildEnvironment() self.req.build_env.install_requirements( - finder, pyproject_requires, "overlay", "Installing build dependencies" + finder, pyproject_requires, "overlay", kind="build dependencies" ) conflicting, missing = self.req.build_env.check_requirements( self.req.requirements_to_check @@ -108,7 +120,7 @@ def _install_build_reqs(self, finder: PackageFinder) -> None: if conflicting: self._raise_conflicts("the backend dependencies", conflicting) self.req.build_env.install_requirements( - finder, missing, "normal", "Installing backend dependencies" + finder, missing, "normal", kind="backend dependencies" ) def _raise_conflicts( @@ -127,3 +139,12 @@ def _raise_conflicts( ), ) raise InstallationError(error_message) + + def _raise_missing_reqs(self, missing: Set[str]) -> None: + format_string = ( + "Some build dependencies for {requirement} are missing: {missing}." + ) + error_message = format_string.format( + requirement=self.req, missing=", ".join(map(repr, sorted(missing))) + ) + raise InstallationError(error_message) diff --git a/src/pip/_internal/distributions/wheel.py b/src/pip/_internal/distributions/wheel.py index 340b0f3c5c7..03aac775b53 100644 --- a/src/pip/_internal/distributions/wheel.py +++ b/src/pip/_internal/distributions/wheel.py @@ -26,6 +26,9 @@ def get_metadata_distribution(self) -> BaseDistribution: return get_wheel_distribution(wheel, canonicalize_name(self.req.name)) def prepare_distribution_metadata( - self, finder: PackageFinder, build_isolation: bool + self, + finder: PackageFinder, + build_isolation: bool, + check_build_deps: bool, ) -> None: pass diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py index ef5bc75118b..d4527295da3 100644 --- a/src/pip/_internal/exceptions.py +++ b/src/pip/_internal/exceptions.py @@ -1,23 +1,181 @@ -"""Exceptions used throughout package""" +"""Exceptions used throughout package. + +This module MUST NOT try to import from anything within `pip._internal` to +operate. This is expected to be importable from any/all files within the +subpackage and, thus, should not depend on them. +""" import configparser +import contextlib +import locale +import logging +import pathlib +import re +import sys from itertools import chain, groupby, repeat -from typing import TYPE_CHECKING, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Union -from pip._vendor.pkg_resources import Distribution from pip._vendor.requests.models import Request, Response +from pip._vendor.rich.console import Console, ConsoleOptions, RenderResult +from pip._vendor.rich.markup import escape +from pip._vendor.rich.text import Text if TYPE_CHECKING: from hashlib import _Hash + from typing import Literal from pip._internal.metadata import BaseDistribution from pip._internal.req.req_install import InstallRequirement +logger = logging.getLogger(__name__) + + +# +# Scaffolding +# +def _is_kebab_case(s: str) -> bool: + return re.match(r"^[a-z]+(-[a-z]+)*$", s) is not None + + +def _prefix_with_indent( + s: Union[Text, str], + console: Console, + *, + prefix: str, + indent: str, +) -> Text: + if isinstance(s, Text): + text = s + else: + text = console.render_str(s) + + return console.render_str(prefix, overflow="ignore") + console.render_str( + f"\n{indent}", overflow="ignore" + ).join(text.split(allow_blank=True)) + class PipError(Exception): - """Base pip exception""" + """The base pip error.""" + + +class DiagnosticPipError(PipError): + """An error, that presents diagnostic information to the user. + + This contains a bunch of logic, to enable pretty presentation of our error + messages. Each error gets a unique reference. Each error can also include + additional context, a hint and/or a note -- which are presented with the + main error message in a consistent style. + + This is adapted from the error output styling in `sphinx-theme-builder`. + """ + + reference: str + + def __init__( + self, + *, + kind: 'Literal["error", "warning"]' = "error", + reference: Optional[str] = None, + message: Union[str, Text], + context: Optional[Union[str, Text]], + hint_stmt: Optional[Union[str, Text]], + note_stmt: Optional[Union[str, Text]] = None, + link: Optional[str] = None, + ) -> None: + # Ensure a proper reference is provided. + if reference is None: + assert hasattr(self, "reference"), "error reference not provided!" + reference = self.reference + assert _is_kebab_case(reference), "error reference must be kebab-case!" + + self.kind = kind + self.reference = reference + + self.message = message + self.context = context + + self.note_stmt = note_stmt + self.hint_stmt = hint_stmt + + self.link = link + + super().__init__(f"<{self.__class__.__name__}: {self.reference}>") + def __repr__(self) -> str: + return ( + f"<{self.__class__.__name__}(" + f"reference={self.reference!r}, " + f"message={self.message!r}, " + f"context={self.context!r}, " + f"note_stmt={self.note_stmt!r}, " + f"hint_stmt={self.hint_stmt!r}" + ")>" + ) + def __rich_console__( + self, + console: Console, + options: ConsoleOptions, + ) -> RenderResult: + colour = "red" if self.kind == "error" else "yellow" + + yield f"[{colour} bold]{self.kind}[/]: [bold]{self.reference}[/]" + yield "" + + if not options.ascii_only: + # Present the main message, with relevant context indented. + if self.context is not None: + yield _prefix_with_indent( + self.message, + console, + prefix=f"[{colour}]×[/] ", + indent=f"[{colour}]│[/] ", + ) + yield _prefix_with_indent( + self.context, + console, + prefix=f"[{colour}]╰─>[/] ", + indent=f"[{colour}] [/] ", + ) + else: + yield _prefix_with_indent( + self.message, + console, + prefix="[red]×[/] ", + indent=" ", + ) + else: + yield self.message + if self.context is not None: + yield "" + yield self.context + + if self.note_stmt is not None or self.hint_stmt is not None: + yield "" + + if self.note_stmt is not None: + yield _prefix_with_indent( + self.note_stmt, + console, + prefix="[magenta bold]note[/]: ", + indent=" ", + ) + if self.hint_stmt is not None: + yield _prefix_with_indent( + self.hint_stmt, + console, + prefix="[cyan bold]hint[/]: ", + indent=" ", + ) + + if self.link is not None: + yield "" + yield f"Link: {self.link}" + + +# +# Actual Errors +# class ConfigurationError(PipError): """General exception in configuration""" @@ -30,18 +188,52 @@ class UninstallationError(PipError): """General exception during uninstallation""" +class MissingPyProjectBuildRequires(DiagnosticPipError): + """Raised when pyproject.toml has `build-system`, but no `build-system.requires`.""" + + reference = "missing-pyproject-build-system-requires" + + def __init__(self, *, package: str) -> None: + super().__init__( + message=f"Can not process {escape(package)}", + context=Text( + "This package has an invalid pyproject.toml file.\n" + "The [build-system] table is missing the mandatory `requires` key." + ), + note_stmt="This is an issue with the package mentioned above, not pip.", + hint_stmt=Text("See PEP 518 for the detailed specification."), + ) + + +class InvalidPyProjectBuildRequires(DiagnosticPipError): + """Raised when pyproject.toml an invalid `build-system.requires`.""" + + reference = "invalid-pyproject-build-system-requires" + + def __init__(self, *, package: str, reason: str) -> None: + super().__init__( + message=f"Can not process {escape(package)}", + context=Text( + "This package has an invalid `build-system.requires` key in " + f"pyproject.toml.\n{reason}" + ), + note_stmt="This is an issue with the package mentioned above, not pip.", + hint_stmt=Text("See PEP 518 for the detailed specification."), + ) + + class NoneMetadataError(PipError): - """ - Raised when accessing "METADATA" or "PKG-INFO" metadata for a - pip._vendor.pkg_resources.Distribution object and - `dist.has_metadata('METADATA')` returns True but - `dist.get_metadata('METADATA')` returns None (and similarly for - "PKG-INFO"). + """Raised when accessing a Distribution's "METADATA" or "PKG-INFO". + + This signifies an inconsistency, when the Distribution claims to have + the metadata file (if not, raise ``FileNotFoundError`` instead), but is + not actually able to produce its content. This may be due to permission + errors. """ def __init__( self, - dist: Union[Distribution, "BaseDistribution"], + dist: "BaseDistribution", metadata_name: str, ) -> None: """ @@ -103,7 +295,10 @@ class NetworkConnectionError(PipError): """HTTP connection error""" def __init__( - self, error_msg: str, response: Response = None, request: Request = None + self, + error_msg: str, + response: Optional[Response] = None, + request: Optional[Request] = None, ) -> None: """ Initialize NetworkConnectionError with `request` and `response` @@ -132,12 +327,23 @@ class UnsupportedWheel(InstallationError): """Unsupported wheel.""" +class InvalidWheel(InstallationError): + """Invalid (e.g. corrupt) wheel.""" + + def __init__(self, location: str, name: str): + self.location = location + self.name = name + + def __str__(self) -> str: + return f"Wheel '{self.name}' located at {self.location} is invalid." + + class MetadataInconsistent(InstallationError): """Built metadata contains inconsistent information. This is raised when the metadata contains values (e.g. name and version) - that do not match the information previously obtained from sdist filename - or user-supplied ``#egg=`` value. + that do not match the information previously obtained from sdist filename, + user-supplied ``#egg=`` value, or an install requirement name. """ def __init__( @@ -149,25 +355,84 @@ def __init__( self.m_val = m_val def __str__(self) -> str: - template = ( - "Requested {} has inconsistent {}: " - "filename has {!r}, but metadata has {!r}" + return ( + f"Requested {self.ireq} has inconsistent {self.field}: " + f"expected {self.f_val!r}, but metadata has {self.m_val!r}" + ) + + +class LegacyInstallFailure(DiagnosticPipError): + """Error occurred while executing `setup.py install`""" + + reference = "legacy-install-failure" + + def __init__(self, package_details: str) -> None: + super().__init__( + message="Encountered error while trying to install package.", + context=package_details, + hint_stmt="See above for output from the failure.", + note_stmt="This is an issue with the package mentioned above, not pip.", ) - return template.format(self.ireq, self.field, self.f_val, self.m_val) -class InstallationSubprocessError(InstallationError): - """A subprocess call failed during installation.""" +class InstallationSubprocessError(DiagnosticPipError, InstallationError): + """A subprocess call failed.""" - def __init__(self, returncode: int, description: str) -> None: - self.returncode = returncode - self.description = description + reference = "subprocess-exited-with-error" + + def __init__( + self, + *, + command_description: str, + exit_code: int, + output_lines: Optional[List[str]], + ) -> None: + if output_lines is None: + output_prompt = Text("See above for output.") + else: + output_prompt = ( + Text.from_markup(f"[red][{len(output_lines)} lines of output][/]\n") + + Text("".join(output_lines)) + + Text.from_markup(R"[red]\[end of output][/]") + ) + + super().__init__( + message=( + f"[green]{escape(command_description)}[/] did not run successfully.\n" + f"exit code: {exit_code}" + ), + context=output_prompt, + hint_stmt=None, + note_stmt=( + "This error originates from a subprocess, and is likely not a " + "problem with pip." + ), + ) + + self.command_description = command_description + self.exit_code = exit_code def __str__(self) -> str: - return ( - "Command errored out with exit status {}: {} " - "Check the logs for full command output." - ).format(self.returncode, self.description) + return f"{self.command_description} exited with {self.exit_code}" + + +class MetadataGenerationFailed(InstallationSubprocessError, InstallationError): + reference = "metadata-generation-failed" + + def __init__( + self, + *, + package_details: str, + ) -> None: + super(InstallationSubprocessError, self).__init__( + message="Encountered error while generating package metadata.", + context=escape(package_details), + hint_stmt="See above for details.", + note_stmt="This is an issue with the package mentioned above, not pip.", + ) + + def __str__(self) -> str: + return "metadata generation failed" class HashErrors(InstallationError): @@ -400,3 +665,83 @@ def __str__(self) -> str: assert self.error is not None message_part = f".\n{self.error}\n" return f"Configuration file {self.reason}{message_part}" + + +_DEFAULT_EXTERNALLY_MANAGED_ERROR = f"""\ +The Python environment under {sys.prefix} is managed externally, and may not be +manipulated by the user. Please use specific tooling from the distributor of +the Python installation to interact with this environment instead. +""" + + +class ExternallyManagedEnvironment(DiagnosticPipError): + """The current environment is externally managed. + + This is raised when the current environment is externally managed, as + defined by `PEP 668`_. The ``EXTERNALLY-MANAGED`` configuration is checked + and displayed when the error is bubbled up to the user. + + :param error: The error message read from ``EXTERNALLY-MANAGED``. + """ + + reference = "externally-managed-environment" + + def __init__(self, error: Optional[str]) -> None: + if error is None: + context = Text(_DEFAULT_EXTERNALLY_MANAGED_ERROR) + else: + context = Text(error) + super().__init__( + message="This environment is externally managed", + context=context, + note_stmt=( + "If you believe this is a mistake, please contact your " + "Python installation or OS distribution provider. " + "You can override this, at the risk of breaking your Python " + "installation or OS, by passing --break-system-packages." + ), + hint_stmt=Text("See PEP 668 for the detailed specification."), + ) + + @staticmethod + def _iter_externally_managed_error_keys() -> Iterator[str]: + # LC_MESSAGES is in POSIX, but not the C standard. The most common + # platform that does not implement this category is Windows, where + # using other categories for console message localization is equally + # unreliable, so we fall back to the locale-less vendor message. This + # can always be re-evaluated when a vendor proposes a new alternative. + try: + category = locale.LC_MESSAGES + except AttributeError: + lang: Optional[str] = None + else: + lang, _ = locale.getlocale(category) + if lang is not None: + yield f"Error-{lang}" + for sep in ("-", "_"): + before, found, _ = lang.partition(sep) + if not found: + continue + yield f"Error-{before}" + yield "Error" + + @classmethod + def from_config( + cls, + config: Union[pathlib.Path, str], + ) -> "ExternallyManagedEnvironment": + parser = configparser.ConfigParser(interpolation=None) + try: + parser.read(config, encoding="utf-8") + section = parser["externally-managed"] + for key in cls._iter_externally_managed_error_keys(): + with contextlib.suppress(KeyError): + return cls(section[key]) + except KeyError: + pass + except (OSError, UnicodeDecodeError, configparser.ParsingError): + from pip._internal.utils._log import VERBOSE + + exc_info = logger.isEnabledFor(VERBOSE) + logger.warning("Failed to read %s", config, exc_info=exc_info) + return cls(None) diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py index d9412234eed..b3e293ea3a5 100644 --- a/src/pip/_internal/index/collector.py +++ b/src/pip/_internal/index/collector.py @@ -2,29 +2,32 @@ The main purpose of this module is to expose LinkCollector.collect_sources(). """ -import cgi import collections +import email.message import functools import itertools +import json import logging import os -import re import urllib.parse import urllib.request -import xml.etree.ElementTree +from html.parser import HTMLParser from optparse import Values from typing import ( + TYPE_CHECKING, Callable, + Dict, Iterable, List, MutableMapping, NamedTuple, Optional, Sequence, + Tuple, Union, ) -from pip._vendor import html5lib, requests +from pip._vendor import requests from pip._vendor.requests import Response from pip._vendor.requests.exceptions import RetryError, SSLError @@ -34,14 +37,18 @@ from pip._internal.network.session import PipSession from pip._internal.network.utils import raise_for_status from pip._internal.utils.filetypes import is_archive_file -from pip._internal.utils.misc import pairwise, redact_auth_from_url +from pip._internal.utils.misc import redact_auth_from_url from pip._internal.vcs import vcs from .sources import CandidatesFromPage, LinkSource, build_source +if TYPE_CHECKING: + from typing import Protocol +else: + Protocol = object + logger = logging.getLogger(__name__) -HTMLElement = xml.etree.ElementTree.Element ResponseHeaders = MutableMapping[str, str] @@ -56,32 +63,46 @@ def _match_vcs_scheme(url: str) -> Optional[str]: return None -class _NotHTML(Exception): +class _NotAPIContent(Exception): def __init__(self, content_type: str, request_desc: str) -> None: super().__init__(content_type, request_desc) self.content_type = content_type self.request_desc = request_desc -def _ensure_html_header(response: Response) -> None: - """Check the Content-Type header to ensure the response contains HTML. +def _ensure_api_header(response: Response) -> None: + """ + Check the Content-Type header to ensure the response contains a Simple + API Response. - Raises `_NotHTML` if the content type is not text/html. + Raises `_NotAPIContent` if the content type is not a valid content-type. """ - content_type = response.headers.get("Content-Type", "") - if not content_type.lower().startswith("text/html"): - raise _NotHTML(content_type, response.request.method) + content_type = response.headers.get("Content-Type", "Unknown") + + content_type_l = content_type.lower() + if content_type_l.startswith( + ( + "text/html", + "application/vnd.pypi.simple.v1+html", + "application/vnd.pypi.simple.v1+json", + ) + ): + return + + raise _NotAPIContent(content_type, response.request.method) class _NotHTTP(Exception): pass -def _ensure_html_response(url: str, session: PipSession) -> None: - """Send a HEAD request to the URL, and ensure the response contains HTML. +def _ensure_api_response(url: str, session: PipSession) -> None: + """ + Send a HEAD request to the URL, and ensure the response contains a simple + API Response. Raises `_NotHTTP` if the URL is not available for a HEAD request, or - `_NotHTML` if the content type is not text/html. + `_NotAPIContent` if the content type is not a valid content type. """ scheme, netloc, path, query, fragment = urllib.parse.urlsplit(url) if scheme not in {"http", "https"}: @@ -90,31 +111,37 @@ def _ensure_html_response(url: str, session: PipSession) -> None: resp = session.head(url, allow_redirects=True) raise_for_status(resp) - _ensure_html_header(resp) + _ensure_api_header(resp) -def _get_html_response(url: str, session: PipSession) -> Response: - """Access an HTML page with GET, and return the response. +def _get_simple_response(url: str, session: PipSession) -> Response: + """Access an Simple API response with GET, and return the response. This consists of three parts: 1. If the URL looks suspiciously like an archive, send a HEAD first to - check the Content-Type is HTML, to avoid downloading a large file. - Raise `_NotHTTP` if the content type cannot be determined, or - `_NotHTML` if it is not HTML. + check the Content-Type is HTML or Simple API, to avoid downloading a + large file. Raise `_NotHTTP` if the content type cannot be determined, or + `_NotAPIContent` if it is not HTML or a Simple API. 2. Actually perform the request. Raise HTTP exceptions on network failures. - 3. Check the Content-Type header to make sure we got HTML, and raise - `_NotHTML` otherwise. + 3. Check the Content-Type header to make sure we got a Simple API response, + and raise `_NotAPIContent` otherwise. """ if is_archive_file(Link(url).filename): - _ensure_html_response(url, session=session) + _ensure_api_response(url, session=session) logger.debug("Getting page %s", redact_auth_from_url(url)) resp = session.get( url, headers={ - "Accept": "text/html", + "Accept": ", ".join( + [ + "application/vnd.pypi.simple.v1+json", + "application/vnd.pypi.simple.v1+html; q=0.1", + "text/html; q=0.01", + ] + ), # We don't want to blindly returned cached data for # /simple/, because authors generally expecting that # twine upload && pip install will function, but if @@ -136,9 +163,16 @@ def _get_html_response(url: str, session: PipSession) -> Response: # The check for archives above only works if the url ends with # something that looks like an archive. However that is not a # requirement of an url. Unless we issue a HEAD request on every - # url we cannot know ahead of time for sure if something is HTML - # or not. However we can check after we've downloaded it. - _ensure_html_header(resp) + # url we cannot know ahead of time for sure if something is a + # Simple API response or not. However we can check after we've + # downloaded it. + _ensure_api_header(resp) + + logger.debug( + "Fetched page %s as %s", + redact_auth_from_url(url), + resp.headers.get("Content-Type", "Unknown"), + ) return resp @@ -146,121 +180,16 @@ def _get_html_response(url: str, session: PipSession) -> Response: def _get_encoding_from_headers(headers: ResponseHeaders) -> Optional[str]: """Determine if we have any encoding information in our headers.""" if headers and "Content-Type" in headers: - content_type, params = cgi.parse_header(headers["Content-Type"]) - if "charset" in params: - return params["charset"] + m = email.message.Message() + m["content-type"] = headers["Content-Type"] + charset = m.get_param("charset") + if charset: + return str(charset) return None -def _determine_base_url(document: HTMLElement, page_url: str) -> str: - """Determine the HTML document's base URL. - - This looks for a ```` tag in the HTML document. If present, its href - attribute denotes the base URL of anchor tags in the document. If there is - no such tag (or if it does not have a valid href attribute), the HTML - file's URL is used as the base URL. - - :param document: An HTML document representation. The current - implementation expects the result of ``html5lib.parse()``. - :param page_url: The URL of the HTML document. - """ - for base in document.findall(".//base"): - href = base.get("href") - if href is not None: - return href - return page_url - - -def _clean_url_path_part(part: str) -> str: - """ - Clean a "part" of a URL path (i.e. after splitting on "@" characters). - """ - # We unquote prior to quoting to make sure nothing is double quoted. - return urllib.parse.quote(urllib.parse.unquote(part)) - - -def _clean_file_url_path(part: str) -> str: - """ - Clean the first part of a URL path that corresponds to a local - filesystem path (i.e. the first part after splitting on "@" characters). - """ - # We unquote prior to quoting to make sure nothing is double quoted. - # Also, on Windows the path part might contain a drive letter which - # should not be quoted. On Linux where drive letters do not - # exist, the colon should be quoted. We rely on urllib.request - # to do the right thing here. - return urllib.request.pathname2url(urllib.request.url2pathname(part)) - - -# percent-encoded: / -_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) - - -def _clean_url_path(path: str, is_local_path: bool) -> str: - """ - Clean the path portion of a URL. - """ - if is_local_path: - clean_func = _clean_file_url_path - else: - clean_func = _clean_url_path_part - - # Split on the reserved characters prior to cleaning so that - # revision strings in VCS URLs are properly preserved. - parts = _reserved_chars_re.split(path) - - cleaned_parts = [] - for to_clean, reserved in pairwise(itertools.chain(parts, [""])): - cleaned_parts.append(clean_func(to_clean)) - # Normalize %xx escapes (e.g. %2f -> %2F) - cleaned_parts.append(reserved.upper()) - - return "".join(cleaned_parts) - - -def _clean_link(url: str) -> str: - """ - Make sure a link is fully quoted. - For example, if ' ' occurs in the URL, it will be replaced with "%20", - and without double-quoting other characters. - """ - # Split the URL into parts according to the general structure - # `scheme://netloc/path;parameters?query#fragment`. - result = urllib.parse.urlparse(url) - # If the netloc is empty, then the URL refers to a local filesystem path. - is_local_path = not result.netloc - path = _clean_url_path(result.path, is_local_path=is_local_path) - return urllib.parse.urlunparse(result._replace(path=path)) - - -def _create_link_from_element( - anchor: HTMLElement, - page_url: str, - base_url: str, -) -> Optional[Link]: - """ - Convert an anchor element in a simple repository page to a Link. - """ - href = anchor.get("href") - if not href: - return None - - url = _clean_link(urllib.parse.urljoin(base_url, href)) - pyrequire = anchor.get("data-requires-python") - yanked_reason = anchor.get("data-yanked") - - link = Link( - url, - comes_from=page_url, - requires_python=pyrequire, - yanked_reason=yanked_reason, - ) - - return link - - class CacheablePageContent: - def __init__(self, page: "HTMLPage") -> None: + def __init__(self, page: "IndexContent") -> None: assert page.cache_link_parsing self.page = page @@ -271,12 +200,15 @@ def __hash__(self) -> int: return hash(self.page.url) -def with_cached_html_pages( - fn: Callable[["HTMLPage"], Iterable[Link]], -) -> Callable[["HTMLPage"], List[Link]]: +class ParseLinks(Protocol): + def __call__(self, page: "IndexContent") -> Iterable[Link]: + ... + + +def with_cached_index_content(fn: ParseLinks) -> ParseLinks: """ - Given a function that parses an Iterable[Link] from an HTMLPage, cache the - function's result (keyed by CacheablePageContent), unless the HTMLPage + Given a function that parses an Iterable[Link] from an IndexContent, cache the + function's result (keyed by CacheablePageContent), unless the IndexContent `page` has `page.cache_link_parsing == False`. """ @@ -285,7 +217,7 @@ def wrapper(cacheable_page: CacheablePageContent) -> List[Link]: return list(fn(cacheable_page.page)) @functools.wraps(fn) - def wrapper_wrapper(page: "HTMLPage") -> List[Link]: + def wrapper_wrapper(page: "IndexContent") -> List[Link]: if page.cache_link_parsing: return wrapper(CacheablePageContent(page)) return list(fn(page)) @@ -293,36 +225,42 @@ def wrapper_wrapper(page: "HTMLPage") -> List[Link]: return wrapper_wrapper -@with_cached_html_pages -def parse_links(page: "HTMLPage") -> Iterable[Link]: +@with_cached_index_content +def parse_links(page: "IndexContent") -> Iterable[Link]: """ - Parse an HTML document, and yield its anchor elements as Link objects. + Parse a Simple API's Index Content, and yield its anchor elements as Link objects. """ - document = html5lib.parse( - page.content, - transport_encoding=page.encoding, - namespaceHTMLElements=False, - ) + + content_type_l = page.content_type.lower() + if content_type_l.startswith("application/vnd.pypi.simple.v1+json"): + data = json.loads(page.content) + for file in data.get("files", []): + link = Link.from_json(file, page.url) + if link is None: + continue + yield link + return + + parser = HTMLLinkParser(page.url) + encoding = page.encoding or "utf-8" + parser.feed(page.content.decode(encoding)) url = page.url - base_url = _determine_base_url(document, url) - for anchor in document.findall(".//a"): - link = _create_link_from_element( - anchor, - page_url=url, - base_url=base_url, - ) + base_url = parser.base_url or url + for anchor in parser.anchors: + link = Link.from_element(anchor, page_url=url, base_url=base_url) if link is None: continue yield link -class HTMLPage: - """Represents one page, along with its URL""" +class IndexContent: + """Represents one response (or page), along with its URL""" def __init__( self, content: bytes, + content_type: str, encoding: Optional[str], url: str, cache_link_parsing: bool = True, @@ -335,6 +273,7 @@ def __init__( have this set to False, for example. """ self.content = content + self.content_type = content_type self.encoding = encoding self.url = url self.cache_link_parsing = cache_link_parsing @@ -343,7 +282,35 @@ def __str__(self) -> str: return redact_auth_from_url(self.url) -def _handle_get_page_fail( +class HTMLLinkParser(HTMLParser): + """ + HTMLParser that keeps the first base HREF and a list of all anchor + elements' attributes. + """ + + def __init__(self, url: str) -> None: + super().__init__(convert_charrefs=True) + + self.url: str = url + self.base_url: Optional[str] = None + self.anchors: List[Dict[str, Optional[str]]] = [] + + def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: + if tag == "base" and self.base_url is None: + href = self.get_href(attrs) + if href is not None: + self.base_url = href + elif tag == "a": + self.anchors.append(dict(attrs)) + + def get_href(self, attrs: List[Tuple[str, Optional[str]]]) -> Optional[str]: + for name, value in attrs: + if name == "href": + return value + return None + + +def _handle_get_simple_fail( link: Link, reason: Union[str, Exception], meth: Optional[Callable[..., None]] = None, @@ -353,24 +320,20 @@ def _handle_get_page_fail( meth("Could not fetch URL %s: %s - skipping", link, reason) -def _make_html_page(response: Response, cache_link_parsing: bool = True) -> HTMLPage: +def _make_index_content( + response: Response, cache_link_parsing: bool = True +) -> IndexContent: encoding = _get_encoding_from_headers(response.headers) - return HTMLPage( + return IndexContent( response.content, + response.headers["Content-Type"], encoding=encoding, url=response.url, cache_link_parsing=cache_link_parsing, ) -def _get_html_page( - link: Link, session: Optional[PipSession] = None -) -> Optional["HTMLPage"]: - if session is None: - raise TypeError( - "_get_html_page() missing 1 required keyword argument: 'session'" - ) - +def _get_index_content(link: Link, *, session: PipSession) -> Optional["IndexContent"]: url = link.url.split("#", 1)[0] # Check for VCS schemes that do not support lookup as web pages. @@ -390,39 +353,44 @@ def _get_html_page( # final segment if not url.endswith("/"): url += "/" + # TODO: In the future, it would be nice if pip supported PEP 691 + # style responses in the file:// URLs, however there's no + # standard file extension for application/vnd.pypi.simple.v1+json + # so we'll need to come up with something on our own. url = urllib.parse.urljoin(url, "index.html") logger.debug(" file: URL is directory, getting %s", url) try: - resp = _get_html_response(url, session=session) + resp = _get_simple_response(url, session=session) except _NotHTTP: logger.warning( "Skipping page %s because it looks like an archive, and cannot " "be checked by a HTTP HEAD request.", link, ) - except _NotHTML as exc: + except _NotAPIContent as exc: logger.warning( - "Skipping page %s because the %s request got Content-Type: %s." - "The only supported Content-Type is text/html", + "Skipping page %s because the %s request got Content-Type: %s. " + "The only supported Content-Types are application/vnd.pypi.simple.v1+json, " + "application/vnd.pypi.simple.v1+html, and text/html", link, exc.request_desc, exc.content_type, ) except NetworkConnectionError as exc: - _handle_get_page_fail(link, exc) + _handle_get_simple_fail(link, exc) except RetryError as exc: - _handle_get_page_fail(link, exc) + _handle_get_simple_fail(link, exc) except SSLError as exc: reason = "There was a problem confirming the ssl certificate: " reason += str(exc) - _handle_get_page_fail(link, reason, meth=logger.info) + _handle_get_simple_fail(link, reason, meth=logger.info) except requests.ConnectionError as exc: - _handle_get_page_fail(link, f"connection error: {exc}") + _handle_get_simple_fail(link, f"connection error: {exc}") except requests.Timeout: - _handle_get_page_fail(link, "timed out") + _handle_get_simple_fail(link, "timed out") else: - return _make_html_page(resp, cache_link_parsing=link.cache_link_parsing) + return _make_index_content(resp, cache_link_parsing=link.cache_link_parsing) return None @@ -474,6 +442,7 @@ def create( search_scope = SearchScope.create( find_links=find_links, index_urls=index_urls, + no_index=options.no_index, ) link_collector = LinkCollector( session=session, @@ -485,11 +454,11 @@ def create( def find_links(self) -> List[str]: return self.search_scope.find_links - def fetch_page(self, location: Link) -> Optional[HTMLPage]: + def fetch_response(self, location: Link) -> Optional[IndexContent]: """ Fetch an HTML page containing package links. """ - return _get_html_page(location, session=self.session) + return _get_index_content(location, session=self.session) def collect_sources( self, diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py index a2702db7d35..b6f8d57e854 100644 --- a/src/pip/_internal/index/package_finder.py +++ b/src/pip/_internal/index/package_finder.py @@ -1,13 +1,11 @@ """Routines related to PyPI, indexes""" -# The following comment should be removed at some point in the future. -# mypy: strict-optional=False - +import enum import functools import itertools import logging import re -from typing import FrozenSet, Iterable, List, Optional, Set, Tuple, Union +from typing import TYPE_CHECKING, FrozenSet, Iterable, List, Optional, Set, Tuple, Union from pip._vendor.packaging import specifiers from pip._vendor.packaging.tags import Tag @@ -37,7 +35,9 @@ from pip._internal.utils.misc import build_netloc from pip._internal.utils.packaging import check_requires_python from pip._internal.utils.unpacking import SUPPORTED_EXTENSIONS -from pip._internal.utils.urls import url_to_path + +if TYPE_CHECKING: + from pip._vendor.typing_extensions import TypeGuard __all__ = ["FormatControl", "BestCandidateResult", "PackageFinder"] @@ -95,6 +95,16 @@ def _check_link_requires_python( return True +class LinkType(enum.Enum): + candidate = enum.auto() + different_project = enum.auto() + yanked = enum.auto() + format_unsupported = enum.auto() + format_invalid = enum.auto() + platform_mismatch = enum.auto() + requires_python_mismatch = enum.auto() + + class LinkEvaluator: """ @@ -144,19 +154,20 @@ def __init__( self.project_name = project_name - def evaluate_link(self, link: Link) -> Tuple[bool, Optional[str]]: + def evaluate_link(self, link: Link) -> Tuple[LinkType, str]: """ Determine whether a link is a candidate for installation. - :return: A tuple (is_candidate, result), where `result` is (1) a - version string if `is_candidate` is True, and (2) if - `is_candidate` is False, an optional string to log the reason - the link fails to qualify. + :return: A tuple (result, detail), where *result* is an enum + representing whether the evaluation found a candidate, or the reason + why one is not found. If a candidate is found, *detail* will be the + candidate's version string; if one is not found, it contains the + reason the link fails to qualify. """ version = None if link.is_yanked and not self._allow_yanked: reason = link.yanked_reason or "" - return (False, f"yanked for reason: {reason}") + return (LinkType.yanked, f"yanked for reason: {reason}") if link.egg_fragment: egg_info = link.egg_fragment @@ -164,42 +175,46 @@ def evaluate_link(self, link: Link) -> Tuple[bool, Optional[str]]: else: egg_info, ext = link.splitext() if not ext: - return (False, "not a file") + return (LinkType.format_unsupported, "not a file") if ext not in SUPPORTED_EXTENSIONS: - return (False, f"unsupported archive format: {ext}") + return ( + LinkType.format_unsupported, + f"unsupported archive format: {ext}", + ) if "binary" not in self._formats and ext == WHEEL_EXTENSION: - reason = "No binaries permitted for {}".format(self.project_name) - return (False, reason) + reason = f"No binaries permitted for {self.project_name}" + return (LinkType.format_unsupported, reason) if "macosx10" in link.path and ext == ".zip": - return (False, "macosx10 one") + return (LinkType.format_unsupported, "macosx10 one") if ext == WHEEL_EXTENSION: try: wheel = Wheel(link.filename) except InvalidWheelFilename: - return (False, "invalid wheel filename") + return ( + LinkType.format_invalid, + "invalid wheel filename", + ) if canonicalize_name(wheel.name) != self._canonical_name: - reason = "wrong project name (not {})".format(self.project_name) - return (False, reason) + reason = f"wrong project name (not {self.project_name})" + return (LinkType.different_project, reason) supported_tags = self._target_python.get_tags() if not wheel.supported(supported_tags): # Include the wheel's tags in the reason string to # simplify troubleshooting compatibility issues. - file_tags = wheel.get_formatted_file_tags() + file_tags = ", ".join(wheel.get_formatted_file_tags()) reason = ( - "none of the wheel's tags ({}) are compatible " - "(run pip debug --verbose to show compatible tags)".format( - ", ".join(file_tags) - ) + f"none of the wheel's tags ({file_tags}) are compatible " + f"(run pip debug --verbose to show compatible tags)" ) - return (False, reason) + return (LinkType.platform_mismatch, reason) version = wheel.version # This should be up by the self.ok_binary check, but see issue 2700. if "source" not in self._formats and ext != WHEEL_EXTENSION: reason = f"No sources permitted for {self.project_name}" - return (False, reason) + return (LinkType.format_unsupported, reason) if not version: version = _extract_version_from_fragment( @@ -208,14 +223,17 @@ def evaluate_link(self, link: Link) -> Tuple[bool, Optional[str]]: ) if not version: reason = f"Missing project version for {self.project_name}" - return (False, reason) + return (LinkType.format_invalid, reason) match = self._py_version_re.search(version) if match: version = version[: match.start()] py_version = match.group(1) if py_version != self._target_python.py_version: - return (False, "Python version is incorrect") + return ( + LinkType.platform_mismatch, + "Python version is incorrect", + ) supports_python = _check_link_requires_python( link, @@ -223,18 +241,17 @@ def evaluate_link(self, link: Link) -> Tuple[bool, Optional[str]]: ignore_requires_python=self._ignore_requires_python, ) if not supports_python: - # Return None for the reason text to suppress calling - # _log_skipped_link(). - return (False, None) + reason = f"{version} Requires-Python {link.requires_python}" + return (LinkType.requires_python_mismatch, reason) logger.debug("Found link %s, version: %s", link, version) - return (True, version) + return (LinkType.candidate, version) def filter_unallowed_hashes( candidates: List[InstallationCandidate], - hashes: Hashes, + hashes: Optional[Hashes], project_name: str, ) -> List[InstallationCandidate]: """ @@ -523,6 +540,7 @@ def _sort_key(self, candidate: InstallationCandidate) -> CandidateSortingKey: binary_preference = 1 if wheel.build_tag is not None: match = re.match(r"^(\d+)(.*)$", wheel.build_tag) + assert match is not None, "guaranteed by filename validation" build_tag_groups = match.groups() build_tag = (int(build_tag_groups[0]), build_tag_groups[1]) else: # sdist @@ -609,7 +627,7 @@ def __init__( self.format_control = format_control # These are boring links that have already been logged somehow. - self._logged_links: Set[Link] = set() + self._logged_links: Set[Tuple[Link, LinkType, str]] = set() # Don't include an allow_yanked default value to make sure each call # site considers whether yanked releases are allowed. This also causes @@ -686,6 +704,14 @@ def prefer_binary(self) -> bool: def set_prefer_binary(self) -> None: self._candidate_prefs.prefer_binary = True + def requires_python_skipped_reasons(self) -> List[str]: + reasons = { + detail + for _, result, detail in self._logged_links + if result == LinkType.requires_python_mismatch + } + return sorted(reasons) + def make_link_evaluator(self, project_name: str) -> LinkEvaluator: canonical_name = canonicalize_name(project_name) formats = self.format_control.get_allowed_formats(canonical_name) @@ -715,12 +741,13 @@ def _sort_links(self, links: Iterable[Link]) -> List[Link]: no_eggs.append(link) return no_eggs + eggs - def _log_skipped_link(self, link: Link, reason: str) -> None: - if link not in self._logged_links: + def _log_skipped_link(self, link: Link, result: LinkType, detail: str) -> None: + entry = (link, result, detail) + if entry not in self._logged_links: # Put the link at the end so the reason is more visible and because # the link string is usually very long. - logger.debug("Skipping link: %s: %s", reason, link) - self._logged_links.add(link) + logger.debug("Skipping link: %s: %s", detail, link) + self._logged_links.add(entry) def get_install_candidate( self, link_evaluator: LinkEvaluator, link: Link @@ -729,16 +756,15 @@ def get_install_candidate( If the link is a candidate for install, convert it to an InstallationCandidate and return it. Otherwise, return None. """ - is_candidate, result = link_evaluator.evaluate_link(link) - if not is_candidate: - if result: - self._log_skipped_link(link, reason=result) + result, detail = link_evaluator.evaluate_link(link) + if result != LinkType.candidate: + self._log_skipped_link(link, result, detail) return None return InstallationCandidate( name=link_evaluator.project_name, link=link, - version=result, + version=detail, ) def evaluate_links( @@ -762,11 +788,11 @@ def process_project_url( "Fetching project page and analyzing links: %s", project_url, ) - html_page = self._link_collector.fetch_page(project_url) - if html_page is None: + index_response = self._link_collector.fetch_response(project_url) + if index_response is None: return [] - page_links = list(parse_links(html_page)) + page_links = list(parse_links(index_response)) with indent_log(): package_links = self.evaluate_links( @@ -816,7 +842,14 @@ def find_all_candidates(self, project_name: str) -> List[InstallationCandidate]: ) if logger.isEnabledFor(logging.DEBUG) and file_candidates: - paths = [url_to_path(c.link.url) for c in file_candidates] + paths = [] + for candidate in file_candidates: + assert candidate.link.url # we need to have a URL + try: + paths.append(candidate.link.file_path) + except Exception: + paths.append(candidate.link.url) # it's not a local file + logger.debug("Local files found: %s", ", ".join(paths)) # This is an intentional priority ordering @@ -881,7 +914,7 @@ def find_requirement( installed_version: Optional[_BaseVersion] = None if req.satisfied_by is not None: - installed_version = parse_version(req.satisfied_by.version) + installed_version = req.satisfied_by.version def _format_versions(cand_iter: Iterable[InstallationCandidate]) -> str: # This repeated parse_version and str() conversion is needed to @@ -910,43 +943,46 @@ def _format_versions(cand_iter: Iterable[InstallationCandidate]) -> str: "No matching distribution found for {}".format(req) ) - best_installed = False - if installed_version and ( - best_candidate is None or best_candidate.version <= installed_version - ): - best_installed = True + def _should_install_candidate( + candidate: Optional[InstallationCandidate], + ) -> "TypeGuard[InstallationCandidate]": + if installed_version is None: + return True + if best_candidate is None: + return False + return best_candidate.version > installed_version if not upgrade and installed_version is not None: - if best_installed: + if _should_install_candidate(best_candidate): logger.debug( - "Existing installed version (%s) is most up-to-date and " - "satisfies requirement", + "Existing installed version (%s) satisfies requirement " + "(most up-to-date version is %s)", installed_version, + best_candidate.version, ) else: logger.debug( - "Existing installed version (%s) satisfies requirement " - "(most up-to-date version is %s)", + "Existing installed version (%s) is most up-to-date and " + "satisfies requirement", installed_version, - best_candidate.version, ) return None - if best_installed: - # We have an existing version, and its the best version + if _should_install_candidate(best_candidate): logger.debug( - "Installed version (%s) is most up-to-date (past versions: %s)", - installed_version, + "Using version %s (newest of versions: %s)", + best_candidate.version, _format_versions(best_candidate_result.iter_applicable()), ) - raise BestVersionAlreadyInstalled + return best_candidate + # We have an existing version, and its the best version logger.debug( - "Using version %s (newest of versions: %s)", - best_candidate.version, + "Installed version (%s) is most up-to-date (past versions: %s)", + installed_version, _format_versions(best_candidate_result.iter_applicable()), ) - return best_candidate + raise BestVersionAlreadyInstalled def _find_name_version_sep(fragment: str, canonical_name: str) -> int: diff --git a/src/pip/_internal/locations/__init__.py b/src/pip/_internal/locations/__init__.py index dba182d7578..d54bc63eba3 100644 --- a/src/pip/_internal/locations/__init__.py +++ b/src/pip/_internal/locations/__init__.py @@ -4,14 +4,14 @@ import pathlib import sys import sysconfig -from typing import Any, Dict, Iterator, List, Optional, Tuple +from typing import Any, Dict, Generator, Optional, Tuple from pip._internal.models.scheme import SCHEME_KEYS, Scheme from pip._internal.utils.compat import WINDOWS from pip._internal.utils.deprecation import deprecated from pip._internal.utils.virtualenv import running_under_virtualenv -from . import _distutils, _sysconfig +from . import _sysconfig from .base import ( USER_CACHE_DIR, get_major_minor_version, @@ -27,7 +27,6 @@ "get_bin_user", "get_major_minor_version", "get_platlib", - "get_prefixed_libs", "get_purelib", "get_scheme", "get_src_prefix", @@ -38,14 +37,40 @@ logger = logging.getLogger(__name__) -if os.environ.get("_PIP_LOCATIONS_NO_WARN_ON_MISMATCH"): - _MISMATCH_LEVEL = logging.DEBUG -else: - _MISMATCH_LEVEL = logging.WARNING _PLATLIBDIR: str = getattr(sys, "platlibdir", "lib") -_USE_SYSCONFIG = sys.version_info >= (3, 10) +_USE_SYSCONFIG_DEFAULT = sys.version_info >= (3, 10) + + +def _should_use_sysconfig() -> bool: + """This function determines the value of _USE_SYSCONFIG. + + By default, pip uses sysconfig on Python 3.10+. + But Python distributors can override this decision by setting: + sysconfig._PIP_USE_SYSCONFIG = True / False + Rationale in https://github.com/pypa/pip/issues/10647 + + This is a function for testability, but should be constant during any one + run. + """ + return bool(getattr(sysconfig, "_PIP_USE_SYSCONFIG", _USE_SYSCONFIG_DEFAULT)) + + +_USE_SYSCONFIG = _should_use_sysconfig() + +if not _USE_SYSCONFIG: + # Import distutils lazily to avoid deprecation warnings, + # but import it soon enough that it is in memory and available during + # a pip reinstall. + from . import _distutils + +# Be noisy about incompatibilities if this platforms "should" be using +# sysconfig, but is explicitly opting out and using distutils instead. +if _USE_SYSCONFIG_DEFAULT and not _USE_SYSCONFIG: + _MISMATCH_LEVEL = logging.WARNING +else: + _MISMATCH_LEVEL = logging.DEBUG def _looks_like_bpo_44860() -> bool: @@ -53,7 +78,7 @@ def _looks_like_bpo_44860() -> bool: See . """ - from distutils.command.install import INSTALL_SCHEMES # type: ignore + from distutils.command.install import INSTALL_SCHEMES try: unix_user_platlib = INSTALL_SCHEMES["unix_user"]["platlib"] @@ -64,8 +89,8 @@ def _looks_like_bpo_44860() -> bool: def _looks_like_red_hat_patched_platlib_purelib(scheme: Dict[str, str]) -> bool: platlib = scheme["platlib"] - if "/$platlibdir/" in platlib and hasattr(sys, "platlibdir"): - platlib = platlib.replace("/$platlibdir/", f"/{sys.platlibdir}/") + if "/$platlibdir/" in platlib: + platlib = platlib.replace("/$platlibdir/", f"/{_PLATLIBDIR}/") if "/lib64/" not in platlib: return False unpatched = platlib.replace("/lib64/", "/lib/") @@ -78,7 +103,7 @@ def _looks_like_red_hat_lib() -> bool: This is the only way I can see to tell a Red Hat-patched Python. """ - from distutils.command.install import INSTALL_SCHEMES # type: ignore + from distutils.command.install import INSTALL_SCHEMES return all( k in INSTALL_SCHEMES @@ -90,7 +115,7 @@ def _looks_like_red_hat_lib() -> bool: @functools.lru_cache(maxsize=None) def _looks_like_debian_scheme() -> bool: """Debian adds two additional schemes.""" - from distutils.command.install import INSTALL_SCHEMES # type: ignore + from distutils.command.install import INSTALL_SCHEMES return "deb_system" in INSTALL_SCHEMES and "unix_local" in INSTALL_SCHEMES @@ -115,6 +140,22 @@ def _looks_like_red_hat_scheme() -> bool: ) +@functools.lru_cache(maxsize=None) +def _looks_like_slackware_scheme() -> bool: + """Slackware patches sysconfig but fails to patch distutils and site. + + Slackware changes sysconfig's user scheme to use ``"lib64"`` for the lib + path, but does not do the same to the site module. + """ + if user_site is None: # User-site not available. + return False + try: + paths = sysconfig.get_paths(scheme="posix_user", expand=False) + except KeyError: # User-site not available. + return False + return "/lib64/" in paths["purelib"] and "/lib64/" not in user_site + + @functools.lru_cache(maxsize=None) def _looks_like_msys2_mingw_scheme() -> bool: """MSYS2 patches distutils and sysconfig to use a UNIX-like scheme. @@ -133,9 +174,9 @@ def _looks_like_msys2_mingw_scheme() -> bool: ) -def _fix_abiflags(parts: Tuple[str]) -> Iterator[str]: +def _fix_abiflags(parts: Tuple[str]) -> Generator[str, None, None]: ldversion = sysconfig.get_config_var("LDVERSION") - abiflags: str = getattr(sys, "abiflags", None) + abiflags = getattr(sys, "abiflags", None) # LDVERSION does not end with sys.abiflags. Just return the path unchanged. if not ldversion or not abiflags or not ldversion.endswith(abiflags): @@ -270,6 +311,17 @@ def get_scheme( if skip_bpo_44860: continue + # Slackware incorrectly patches posix_user to use lib64 instead of lib, + # but not usersite to match the location. + skip_slackware_user_scheme = ( + user + and k in ("platlib", "purelib") + and not WINDOWS + and _looks_like_slackware_scheme() + ) + if skip_slackware_user_scheme: + continue + # Both Debian and Red Hat patch Python to place the system site under # /usr/local instead of /usr. Debian also places lib in dist-packages # instead of site-packages, but the /usr/local check should cover it. @@ -405,42 +457,11 @@ def get_platlib() -> str: if _USE_SYSCONFIG: return new + from . import _distutils + old = _distutils.get_platlib() if _looks_like_deb_system_dist_packages(old): return old if _warn_if_mismatch(pathlib.Path(old), pathlib.Path(new), key="platlib"): _log_context() return old - - -def _deduplicated(v1: str, v2: str) -> List[str]: - """Deduplicate values from a list.""" - if v1 == v2: - return [v1] - return [v1, v2] - - -def get_prefixed_libs(prefix: str) -> List[str]: - """Return the lib locations under ``prefix``.""" - new_pure, new_plat = _sysconfig.get_prefixed_libs(prefix) - if _USE_SYSCONFIG: - return _deduplicated(new_pure, new_plat) - - old_pure, old_plat = _distutils.get_prefixed_libs(prefix) - - warned = [ - _warn_if_mismatch( - pathlib.Path(old_pure), - pathlib.Path(new_pure), - key="prefixed-purelib", - ), - _warn_if_mismatch( - pathlib.Path(old_plat), - pathlib.Path(new_plat), - key="prefixed-platlib", - ), - ] - if any(warned): - _log_context(prefix=prefix) - - return _deduplicated(old_pure, old_plat) diff --git a/src/pip/_internal/locations/_distutils.py b/src/pip/_internal/locations/_distutils.py index 2ec79e65bea..92bd93179c5 100644 --- a/src/pip/_internal/locations/_distutils.py +++ b/src/pip/_internal/locations/_distutils.py @@ -3,6 +3,17 @@ # The following comment should be removed at some point in the future. # mypy: strict-optional=False +# If pip's going to use distutils, it should not be using the copy that setuptools +# might have injected into the environment. This is done by removing the injected +# shim, if it's injected. +# +# See https://github.com/pypa/pip/issues/8761 for the original discussion and +# rationale for why this is done within pip. +try: + __import__("_distutils_hack").remove_shim() +except (ImportError, AttributeError): + pass + import logging import os import sys @@ -10,7 +21,7 @@ from distutils.command.install import SCHEME_KEYS from distutils.command.install import install as distutils_install_command from distutils.sysconfig import get_python_lib -from typing import Dict, List, Optional, Tuple, Union, cast +from typing import Dict, List, Optional, Union, cast from pip._internal.models.scheme import Scheme from pip._internal.utils.compat import WINDOWS @@ -24,10 +35,10 @@ def distutils_scheme( dist_name: str, user: bool = False, - home: str = None, - root: str = None, + home: Optional[str] = None, + root: Optional[str] = None, isolated: bool = False, - prefix: str = None, + prefix: Optional[str] = None, *, ignore_config_files: bool = False, ) -> Dict[str, str]: @@ -84,7 +95,7 @@ def distutils_scheme( if home: prefix = home elif user: - prefix = i.install_userbase # type: ignore + prefix = i.install_userbase else: prefix = i.prefix scheme["headers"] = os.path.join( @@ -160,10 +171,3 @@ def get_purelib() -> str: def get_platlib() -> str: return get_python_lib(plat_specific=True) - - -def get_prefixed_libs(prefix: str) -> Tuple[str, str]: - return ( - get_python_lib(plat_specific=False, prefix=prefix), - get_python_lib(plat_specific=True, prefix=prefix), - ) diff --git a/src/pip/_internal/locations/_sysconfig.py b/src/pip/_internal/locations/_sysconfig.py index 5e141aa1be7..97aef1f1ac2 100644 --- a/src/pip/_internal/locations/_sysconfig.py +++ b/src/pip/_internal/locations/_sysconfig.py @@ -1,4 +1,3 @@ -import distutils.util # FIXME: For change_root. import logging import os import sys @@ -9,7 +8,7 @@ from pip._internal.models.scheme import SCHEME_KEYS, Scheme from pip._internal.utils.virtualenv import running_under_virtualenv -from .base import get_major_minor_version, is_osx_framework +from .base import change_root, get_major_minor_version, is_osx_framework logger = logging.getLogger(__name__) @@ -194,7 +193,7 @@ def get_scheme( ) if root is not None: for key in SCHEME_KEYS: - value = distutils.util.change_root(root, getattr(scheme, key)) + value = change_root(root, getattr(scheme, key)) setattr(scheme, key, value) return scheme @@ -212,8 +211,3 @@ def get_purelib() -> str: def get_platlib() -> str: return sysconfig.get_paths()["platlib"] - - -def get_prefixed_libs(prefix: str) -> typing.Tuple[str, str]: - paths = sysconfig.get_paths(vars={"base": prefix, "platbase": prefix}) - return (paths["purelib"], paths["platlib"]) diff --git a/src/pip/_internal/locations/base.py b/src/pip/_internal/locations/base.py index 86dad4a3a84..3f9f896e632 100644 --- a/src/pip/_internal/locations/base.py +++ b/src/pip/_internal/locations/base.py @@ -5,6 +5,7 @@ import sysconfig import typing +from pip._internal.exceptions import InstallationError from pip._internal.utils import appdirs from pip._internal.utils.virtualenv import running_under_virtualenv @@ -12,7 +13,7 @@ USER_CACHE_DIR = appdirs.user_cache_dir("pip") # FIXME doesn't account for venv linked to global site-packages -site_packages: typing.Optional[str] = sysconfig.get_path("purelib") +site_packages: str = sysconfig.get_path("purelib") def get_major_minor_version() -> str: @@ -23,6 +24,34 @@ def get_major_minor_version() -> str: return "{}.{}".format(*sys.version_info) +def change_root(new_root: str, pathname: str) -> str: + """Return 'pathname' with 'new_root' prepended. + + If 'pathname' is relative, this is equivalent to os.path.join(new_root, pathname). + Otherwise, it requires making 'pathname' relative and then joining the + two, which is tricky on DOS/Windows and Mac OS. + + This is borrowed from Python's standard library's distutils module. + """ + if os.name == "posix": + if not os.path.isabs(pathname): + return os.path.join(new_root, pathname) + else: + return os.path.join(new_root, pathname[1:]) + + elif os.name == "nt": + (drive, path) = os.path.splitdrive(pathname) + if path[0] == "\\": + path = path[1:] + return os.path.join(new_root, path) + + else: + raise InstallationError( + f"Unknown platform: {os.name}\n" + "Can not change root path prefix on unknown platform." + ) + + def get_src_prefix() -> str: if running_under_virtualenv(): src_prefix = os.path.join(sys.prefix, "src") diff --git a/src/pip/_internal/metadata/__init__.py b/src/pip/_internal/metadata/__init__.py index f4f2a4f6cdb..9f73ca7105f 100644 --- a/src/pip/_internal/metadata/__init__.py +++ b/src/pip/_internal/metadata/__init__.py @@ -1,7 +1,18 @@ -from typing import List, Optional +import contextlib +import functools +import os +import sys +from typing import TYPE_CHECKING, List, Optional, Type, cast + +from pip._internal.utils.misc import strtobool from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel +if TYPE_CHECKING: + from typing import Protocol +else: + Protocol = object + __all__ = [ "BaseDistribution", "BaseEnvironment", @@ -11,9 +22,49 @@ "get_default_environment", "get_environment", "get_wheel_distribution", + "select_backend", ] +def _should_use_importlib_metadata() -> bool: + """Whether to use the ``importlib.metadata`` or ``pkg_resources`` backend. + + By default, pip uses ``importlib.metadata`` on Python 3.11+, and + ``pkg_resourcess`` otherwise. This can be overridden by a couple of ways: + + * If environment variable ``_PIP_USE_IMPORTLIB_METADATA`` is set, it + dictates whether ``importlib.metadata`` is used, regardless of Python + version. + * On Python 3.11+, Python distributors can patch ``importlib.metadata`` + to add a global constant ``_PIP_USE_IMPORTLIB_METADATA = False``. This + makes pip use ``pkg_resources`` (unless the user set the aforementioned + environment variable to *True*). + """ + with contextlib.suppress(KeyError, ValueError): + return bool(strtobool(os.environ["_PIP_USE_IMPORTLIB_METADATA"])) + if sys.version_info < (3, 11): + return False + import importlib.metadata + + return bool(getattr(importlib.metadata, "_PIP_USE_IMPORTLIB_METADATA", True)) + + +class Backend(Protocol): + Distribution: Type[BaseDistribution] + Environment: Type[BaseEnvironment] + + +@functools.lru_cache(maxsize=None) +def select_backend() -> Backend: + if _should_use_importlib_metadata(): + from . import importlib + + return cast(Backend, importlib) + from . import pkg_resources + + return cast(Backend, pkg_resources) + + def get_default_environment() -> BaseEnvironment: """Get the default representation for the current environment. @@ -21,9 +72,7 @@ def get_default_environment() -> BaseEnvironment: Environment instance should be built from ``sys.path`` and may use caching to share instance state accorss calls. """ - from .pkg_resources import Environment - - return Environment.default() + return select_backend().Environment.default() def get_environment(paths: Optional[List[str]]) -> BaseEnvironment: @@ -33,9 +82,16 @@ def get_environment(paths: Optional[List[str]]) -> BaseEnvironment: given import paths. The backend must build a fresh instance representing the state of installed distributions when this function is called. """ - from .pkg_resources import Environment + return select_backend().Environment.from_paths(paths) - return Environment.from_paths(paths) + +def get_directory_distribution(directory: str) -> BaseDistribution: + """Get the distribution metadata representation in the specified directory. + + This returns a Distribution instance from the chosen backend based on + the given on-disk ``.dist-info`` directory. + """ + return select_backend().Distribution.from_directory(directory) def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistribution: @@ -46,6 +102,26 @@ def get_wheel_distribution(wheel: Wheel, canonical_name: str) -> BaseDistributio :param canonical_name: Normalized project name of the given wheel. """ - from .pkg_resources import Distribution + return select_backend().Distribution.from_wheel(wheel, canonical_name) + + +def get_metadata_distribution( + metadata_contents: bytes, + filename: str, + canonical_name: str, +) -> BaseDistribution: + """Get the dist representation of the specified METADATA file contents. - return Distribution.from_wheel(wheel, canonical_name) + This returns a Distribution instance from the chosen backend sourced from the data + in `metadata_contents`. + + :param metadata_contents: Contents of a METADATA file within a dist, or one served + via PEP 658. + :param filename: Filename for the dist this metadata represents. + :param canonical_name: Normalized project name of the given dist. + """ + return select_backend().Distribution.from_metadata_file_contents( + metadata_contents, + filename, + canonical_name, + ) diff --git a/src/pip/_internal/metadata/_json.py b/src/pip/_internal/metadata/_json.py new file mode 100644 index 00000000000..336b52f1efd --- /dev/null +++ b/src/pip/_internal/metadata/_json.py @@ -0,0 +1,84 @@ +# Extracted from https://github.com/pfmoore/pkg_metadata + +from email.header import Header, decode_header, make_header +from email.message import Message +from typing import Any, Dict, List, Union + +METADATA_FIELDS = [ + # Name, Multiple-Use + ("Metadata-Version", False), + ("Name", False), + ("Version", False), + ("Dynamic", True), + ("Platform", True), + ("Supported-Platform", True), + ("Summary", False), + ("Description", False), + ("Description-Content-Type", False), + ("Keywords", False), + ("Home-page", False), + ("Download-URL", False), + ("Author", False), + ("Author-email", False), + ("Maintainer", False), + ("Maintainer-email", False), + ("License", False), + ("Classifier", True), + ("Requires-Dist", True), + ("Requires-Python", False), + ("Requires-External", True), + ("Project-URL", True), + ("Provides-Extra", True), + ("Provides-Dist", True), + ("Obsoletes-Dist", True), +] + + +def json_name(field: str) -> str: + return field.lower().replace("-", "_") + + +def msg_to_json(msg: Message) -> Dict[str, Any]: + """Convert a Message object into a JSON-compatible dictionary.""" + + def sanitise_header(h: Union[Header, str]) -> str: + if isinstance(h, Header): + chunks = [] + for bytes, encoding in decode_header(h): + if encoding == "unknown-8bit": + try: + # See if UTF-8 works + bytes.decode("utf-8") + encoding = "utf-8" + except UnicodeDecodeError: + # If not, latin1 at least won't fail + encoding = "latin1" + chunks.append((bytes, encoding)) + return str(make_header(chunks)) + return str(h) + + result = {} + for field, multi in METADATA_FIELDS: + if field not in msg: + continue + key = json_name(field) + if multi: + value: Union[str, List[str]] = [ + sanitise_header(v) for v in msg.get_all(field) + ] + else: + value = sanitise_header(msg.get(field)) + if key == "keywords": + # Accept both comma-separated and space-separated + # forms, for better compatibility with old data. + if "," in value: + value = [v.strip() for v in value.split(",")] + else: + value = value.split() + result[key] = value + + payload = msg.get_payload() + if payload: + result["description"] = payload + + return result diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index 4788360a009..cafb79fb3dc 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -1,17 +1,24 @@ +import csv import email.message +import functools import json import logging +import pathlib import re import zipfile from typing import ( IO, TYPE_CHECKING, + Any, Collection, Container, + Dict, Iterable, Iterator, List, + NamedTuple, Optional, + Tuple, Union, ) @@ -20,6 +27,8 @@ from pip._vendor.packaging.utils import NormalizedName from pip._vendor.packaging.version import LegacyVersion, Version +from pip._internal.exceptions import NoneMetadataError +from pip._internal.locations import site_packages, user_site from pip._internal.models.direct_url import ( DIRECT_URL_METADATA_NAME, DirectUrl, @@ -27,8 +36,12 @@ ) from pip._internal.utils.compat import stdlib_pkgs # TODO: Move definition here. from pip._internal.utils.egg_link import egg_link_path_from_sys_path +from pip._internal.utils.misc import is_local, normalize_path +from pip._internal.utils.packaging import safe_extra from pip._internal.utils.urls import url_to_path +from ._json import msg_to_json + if TYPE_CHECKING: from typing import Protocol else: @@ -36,6 +49,8 @@ DistributionVersion = Union[LegacyVersion, Version] +InfoPath = Union[str, pathlib.PurePath] + logger = logging.getLogger(__name__) @@ -53,7 +68,83 @@ def group(self) -> str: raise NotImplementedError() +def _convert_installed_files_path( + entry: Tuple[str, ...], + info: Tuple[str, ...], +) -> str: + """Convert a legacy installed-files.txt path into modern RECORD path. + + The legacy format stores paths relative to the info directory, while the + modern format stores paths relative to the package root, e.g. the + site-packages directory. + + :param entry: Path parts of the installed-files.txt entry. + :param info: Path parts of the egg-info directory relative to package root. + :returns: The converted entry. + + For best compatibility with symlinks, this does not use ``abspath()`` or + ``Path.resolve()``, but tries to work with path parts: + + 1. While ``entry`` starts with ``..``, remove the equal amounts of parts + from ``info``; if ``info`` is empty, start appending ``..`` instead. + 2. Join the two directly. + """ + while entry and entry[0] == "..": + if not info or info[-1] == "..": + info += ("..",) + else: + info = info[:-1] + entry = entry[1:] + return str(pathlib.Path(*info, *entry)) + + +class RequiresEntry(NamedTuple): + requirement: str + extra: str + marker: str + + class BaseDistribution(Protocol): + @classmethod + def from_directory(cls, directory: str) -> "BaseDistribution": + """Load the distribution from a metadata directory. + + :param directory: Path to a metadata directory, e.g. ``.dist-info``. + """ + raise NotImplementedError() + + @classmethod + def from_metadata_file_contents( + cls, + metadata_contents: bytes, + filename: str, + project_name: str, + ) -> "BaseDistribution": + """Load the distribution from the contents of a METADATA file. + + This is used to implement PEP 658 by generating a "shallow" dist object that can + be used for resolution without downloading or building the actual dist yet. + + :param metadata_contents: The contents of a METADATA file. + :param filename: File name for the dist with this metadata. + :param project_name: Name of the project this dist represents. + """ + raise NotImplementedError() + + @classmethod + def from_wheel(cls, wheel: "Wheel", name: str) -> "BaseDistribution": + """Load the distribution from a given wheel. + + :param wheel: A concrete wheel definition. + :param name: File name of the wheel. + + :raises InvalidWheel: Whenever loading of the wheel causes a + :py:exc:`zipfile.BadZipFile` exception to be thrown. + :raises UnsupportedWheel: If the wheel is a valid zip, but malformed + internally. + """ + raise NotImplementedError() + def __repr__(self) -> str: return f"{self.raw_name} {self.version} ({self.location})" @@ -97,8 +188,21 @@ def editable_project_location(self) -> Optional[str]: return None @property - def info_directory(self) -> Optional[str]: - """Location of the .[egg|dist]-info directory. + def installed_location(self) -> Optional[str]: + """The distribution's "installed" location. + + This should generally be a ``site-packages`` directory. This is + usually ``dist.location``, except for legacy develop-installed packages, + where ``dist.location`` is the source code location, and this is where + the ``.egg-link`` file is. + + The returned location is normalized (in particular, with symlinks removed). + """ + raise NotImplementedError() + + @property + def info_location(self) -> Optional[str]: + """Location of the .[egg|dist]-info directory or file. Similarly to ``location``, a string value is not necessarily a filesystem path. ``None`` means the distribution is created in-memory. @@ -112,6 +216,65 @@ def info_directory(self) -> Optional[str]: """ raise NotImplementedError() + @property + def installed_by_distutils(self) -> bool: + """Whether this distribution is installed with legacy distutils format. + + A distribution installed with "raw" distutils not patched by setuptools + uses one single file at ``info_location`` to store metadata. We need to + treat this specially on uninstallation. + """ + info_location = self.info_location + if not info_location: + return False + return pathlib.Path(info_location).is_file() + + @property + def installed_as_egg(self) -> bool: + """Whether this distribution is installed as an egg. + + This usually indicates the distribution was installed by (older versions + of) easy_install. + """ + location = self.location + if not location: + return False + return location.endswith(".egg") + + @property + def installed_with_setuptools_egg_info(self) -> bool: + """Whether this distribution is installed with the ``.egg-info`` format. + + This usually indicates the distribution was installed with setuptools + with an old pip version or with ``single-version-externally-managed``. + + Note that this ensure the metadata store is a directory. distutils can + also installs an ``.egg-info``, but as a file, not a directory. This + property is *False* for that case. Also see ``installed_by_distutils``. + """ + info_location = self.info_location + if not info_location: + return False + if not info_location.endswith(".egg-info"): + return False + return pathlib.Path(info_location).is_dir() + + @property + def installed_with_dist_info(self) -> bool: + """Whether this distribution is installed with the "modern format". + + This indicates a "modern" installation, e.g. storing metadata in the + ``.dist-info`` directory. This applies to installations made by + setuptools (but through pip, not directly), or anything using the + standardized build backend interface (PEP 517). + """ + info_location = self.info_location + if not info_location: + return False + if not info_location.endswith(".dist-info"): + return False + return pathlib.Path(info_location).is_dir() + @property def canonical_name(self) -> NormalizedName: raise NotImplementedError() @@ -120,6 +283,14 @@ def canonical_name(self) -> NormalizedName: def version(self) -> DistributionVersion: raise NotImplementedError() + @property + def setuptools_filename(self) -> str: + """Convert a project name to its setuptools-compatible filename. + + This is a copy of ``pkg_resources.to_filename()`` for compatibility. + """ + return self.raw_name.replace("-", "_") + @property def direct_url(self) -> Optional[DirectUrl]: """Obtain a DirectUrl from this distribution. @@ -148,7 +319,19 @@ def direct_url(self) -> Optional[DirectUrl]: @property def installer(self) -> str: - raise NotImplementedError() + try: + installer_text = self.read_text("INSTALLER") + except (OSError, ValueError, NoneMetadataError): + return "" # Fail silently if the installer file cannot be read. + for line in installer_text.splitlines(): + cleaned_line = line.strip() + if cleaned_line: + return cleaned_line + return "" + + @property + def requested(self) -> bool: + return self.is_file("REQUESTED") @property def editable(self) -> bool: @@ -156,31 +339,82 @@ def editable(self) -> bool: @property def local(self) -> bool: - raise NotImplementedError() + """If distribution is installed in the current virtual environment. + + Always True if we're not in a virtualenv. + """ + if self.installed_location is None: + return False + return is_local(self.installed_location) @property def in_usersite(self) -> bool: - raise NotImplementedError() + if self.installed_location is None or user_site is None: + return False + return self.installed_location.startswith(normalize_path(user_site)) @property def in_site_packages(self) -> bool: + if self.installed_location is None or site_packages is None: + return False + return self.installed_location.startswith(normalize_path(site_packages)) + + def is_file(self, path: InfoPath) -> bool: + """Check whether an entry in the info directory is a file.""" + raise NotImplementedError() + + def iter_distutils_script_names(self) -> Iterator[str]: + """Find distutils 'scripts' entries metadata. + + If 'scripts' is supplied in ``setup.py``, distutils records those in the + installed distribution's ``scripts`` directory, a file for each script. + """ raise NotImplementedError() - def read_text(self, name: str) -> str: - """Read a file in the .dist-info (or .egg-info) directory. + def read_text(self, path: InfoPath) -> str: + """Read a file in the info directory. - Should raise ``FileNotFoundError`` if ``name`` does not exist in the - metadata directory. + :raise FileNotFoundError: If ``path`` does not exist in the directory. + :raise NoneMetadataError: If ``path`` exists in the info directory, but + cannot be read. """ raise NotImplementedError() def iter_entry_points(self) -> Iterable[BaseEntryPoint]: raise NotImplementedError() + def _metadata_impl(self) -> email.message.Message: + raise NotImplementedError() + + @functools.lru_cache(maxsize=1) + def _metadata_cached(self) -> email.message.Message: + # When we drop python 3.7 support, move this to the metadata property and use + # functools.cached_property instead of lru_cache. + metadata = self._metadata_impl() + self._add_egg_info_requires(metadata) + return metadata + @property def metadata(self) -> email.message.Message: - """Metadata of distribution parsed from e.g. METADATA or PKG-INFO.""" - raise NotImplementedError() + """Metadata of distribution parsed from e.g. METADATA or PKG-INFO. + + This should return an empty message if the metadata file is unavailable. + + :raises NoneMetadataError: If the metadata file is available, but does + not contain valid metadata. + """ + return self._metadata_cached() + + @property + def metadata_dict(self) -> Dict[str, Any]: + """PEP 566 compliant JSON-serializable representation of METADATA or PKG-INFO. + + This should return an empty dict if the metadata file is unavailable. + + :raises NoneMetadataError: If the metadata file is available, but does + not contain valid metadata. + """ + return msg_to_json(self.metadata) @property def metadata_version(self) -> Optional[str]: @@ -229,6 +463,121 @@ def iter_provided_extras(self) -> Iterable[str]: """ raise NotImplementedError() + def _iter_declared_entries_from_record(self) -> Optional[Iterator[str]]: + try: + text = self.read_text("RECORD") + except FileNotFoundError: + return None + # This extra Path-str cast normalizes entries. + return (str(pathlib.Path(row[0])) for row in csv.reader(text.splitlines())) + + def _iter_declared_entries_from_legacy(self) -> Optional[Iterator[str]]: + try: + text = self.read_text("installed-files.txt") + except FileNotFoundError: + return None + paths = (p for p in text.splitlines(keepends=False) if p) + root = self.location + info = self.info_location + if root is None or info is None: + return paths + try: + info_rel = pathlib.Path(info).relative_to(root) + except ValueError: # info is not relative to root. + return paths + if not info_rel.parts: # info *is* root. + return paths + return ( + _convert_installed_files_path(pathlib.Path(p).parts, info_rel.parts) + for p in paths + ) + + def iter_declared_entries(self) -> Optional[Iterator[str]]: + """Iterate through file entries declared in this distribution. + + For modern .dist-info distributions, this is the files listed in the + ``RECORD`` metadata file. For legacy setuptools distributions, this + comes from ``installed-files.txt``, with entries normalized to be + compatible with the format used by ``RECORD``. + + :return: An iterator for listed entries, or None if the distribution + contains neither ``RECORD`` nor ``installed-files.txt``. + """ + return ( + self._iter_declared_entries_from_record() + or self._iter_declared_entries_from_legacy() + ) + + def _iter_requires_txt_entries(self) -> Iterator[RequiresEntry]: + """Parse a ``requires.txt`` in an egg-info directory. + + This is an INI-ish format where an egg-info stores dependencies. A + section name describes extra other environment markers, while each entry + is an arbitrary string (not a key-value pair) representing a dependency + as a requirement string (no markers). + + There is a construct in ``importlib.metadata`` called ``Sectioned`` that + does mostly the same, but the format is currently considered private. + """ + try: + content = self.read_text("requires.txt") + except FileNotFoundError: + return + extra = marker = "" # Section-less entries don't have markers. + for line in content.splitlines(): + line = line.strip() + if not line or line.startswith("#"): # Comment; ignored. + continue + if line.startswith("[") and line.endswith("]"): # A section header. + extra, _, marker = line.strip("[]").partition(":") + continue + yield RequiresEntry(requirement=line, extra=extra, marker=marker) + + def _iter_egg_info_extras(self) -> Iterable[str]: + """Get extras from the egg-info directory.""" + known_extras = {""} + for entry in self._iter_requires_txt_entries(): + if entry.extra in known_extras: + continue + known_extras.add(entry.extra) + yield entry.extra + + def _iter_egg_info_dependencies(self) -> Iterable[str]: + """Get distribution dependencies from the egg-info directory. + + To ease parsing, this converts a legacy dependency entry into a PEP 508 + requirement string. Like ``_iter_requires_txt_entries()``, there is code + in ``importlib.metadata`` that does mostly the same, but not do exactly + what we need. + + Namely, ``importlib.metadata`` does not normalize the extra name before + putting it into the requirement string, which causes marker comparison + to fail because the dist-info format do normalize. This is consistent in + all currently available PEP 517 backends, although not standardized. + """ + for entry in self._iter_requires_txt_entries(): + if entry.extra and entry.marker: + marker = f'({entry.marker}) and extra == "{safe_extra(entry.extra)}"' + elif entry.extra: + marker = f'extra == "{safe_extra(entry.extra)}"' + elif entry.marker: + marker = entry.marker + else: + marker = "" + if marker: + yield f"{entry.requirement} ; {marker}" + else: + yield entry.requirement + + def _add_egg_info_requires(self, metadata: email.message.Message) -> None: + """Add egg-info requires.txt information to the metadata.""" + if not metadata.get_all("Requires-Dist"): + for dep in self._iter_egg_info_dependencies(): + metadata["Requires-Dist"] = dep + if not metadata.get_all("Provides-Extra"): + for extra in self._iter_egg_info_extras(): + metadata["Provides-Extra"] = extra + class BaseEnvironment: """An environment containing distributions to introspect.""" @@ -242,7 +591,11 @@ def from_paths(cls, paths: Optional[List[str]]) -> "BaseEnvironment": raise NotImplementedError() def get_distribution(self, name: str) -> Optional["BaseDistribution"]: - """Given a requirement name, return the installed distributions.""" + """Given a requirement name, return the installed distributions. + + The name may not be normalized. The implementation must canonicalize + it for lookup. + """ raise NotImplementedError() def _iter_distributions(self) -> Iterator["BaseDistribution"]: @@ -254,8 +607,8 @@ def _iter_distributions(self) -> Iterator["BaseDistribution"]: """ raise NotImplementedError() - def iter_distributions(self) -> Iterator["BaseDistribution"]: - """Iterate through installed distributions.""" + def iter_all_distributions(self) -> Iterator[BaseDistribution]: + """Iterate through all installed distributions without any filtering.""" for dist in self._iter_distributions(): # Make sure the distribution actually comes from a valid Python # packaging distribution. Pip's AdjacentTempDirectory leaves folders @@ -285,6 +638,11 @@ def iter_installed_distributions( ) -> Iterator[BaseDistribution]: """Return a list of installed distributions. + This is based on ``iter_all_distributions()`` with additional filtering + options. Note that ``iter_installed_distributions()`` without arguments + is *not* equal to ``iter_all_distributions()``, since some of the + configurations exclude packages by default. + :param local_only: If True (default), only return installations local to the current virtualenv, if in a virtualenv. :param skip: An iterable of canonicalized project names to ignore; @@ -294,7 +652,7 @@ def iter_installed_distributions( :param user_only: If True, only report installations in the user site directory. """ - it = self.iter_distributions() + it = self.iter_all_distributions() if local_only: it = (d for d in it if d.local) if not include_editables: diff --git a/src/pip/_internal/metadata/importlib/__init__.py b/src/pip/_internal/metadata/importlib/__init__.py new file mode 100644 index 00000000000..5e7af9fe521 --- /dev/null +++ b/src/pip/_internal/metadata/importlib/__init__.py @@ -0,0 +1,4 @@ +from ._dists import Distribution +from ._envs import Environment + +__all__ = ["Distribution", "Environment"] diff --git a/src/pip/_internal/metadata/importlib/_compat.py b/src/pip/_internal/metadata/importlib/_compat.py new file mode 100644 index 00000000000..593bff23ede --- /dev/null +++ b/src/pip/_internal/metadata/importlib/_compat.py @@ -0,0 +1,55 @@ +import importlib.metadata +from typing import Any, Optional, Protocol, cast + + +class BadMetadata(ValueError): + def __init__(self, dist: importlib.metadata.Distribution, *, reason: str) -> None: + self.dist = dist + self.reason = reason + + def __str__(self) -> str: + return f"Bad metadata in {self.dist} ({self.reason})" + + +class BasePath(Protocol): + """A protocol that various path objects conform. + + This exists because importlib.metadata uses both ``pathlib.Path`` and + ``zipfile.Path``, and we need a common base for type hints (Union does not + work well since ``zipfile.Path`` is too new for our linter setup). + + This does not mean to be exhaustive, but only contains things that present + in both classes *that we need*. + """ + + @property + def name(self) -> str: + raise NotImplementedError() + + @property + def parent(self) -> "BasePath": + raise NotImplementedError() + + +def get_info_location(d: importlib.metadata.Distribution) -> Optional[BasePath]: + """Find the path to the distribution's metadata directory. + + HACK: This relies on importlib.metadata's private ``_path`` attribute. Not + all distributions exist on disk, so importlib.metadata is correct to not + expose the attribute as public. But pip's code base is old and not as clean, + so we do this to avoid having to rewrite too many things. Hopefully we can + eliminate this some day. + """ + return getattr(d, "_path", None) + + +def get_dist_name(dist: importlib.metadata.Distribution) -> str: + """Get the distribution's project name. + + The ``name`` attribute is only available in Python 3.10 or later. We are + targeting exactly that, but Mypy does not know this. + """ + name = cast(Any, dist).name + if not isinstance(name, str): + raise BadMetadata(dist, reason="invalid metadata entry 'name'") + return name diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py new file mode 100644 index 00000000000..65c043c87ef --- /dev/null +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -0,0 +1,224 @@ +import email.message +import importlib.metadata +import os +import pathlib +import zipfile +from typing import ( + Collection, + Dict, + Iterable, + Iterator, + Mapping, + Optional, + Sequence, + cast, +) + +from pip._vendor.packaging.requirements import Requirement +from pip._vendor.packaging.utils import NormalizedName, canonicalize_name +from pip._vendor.packaging.version import parse as parse_version + +from pip._internal.exceptions import InvalidWheel, UnsupportedWheel +from pip._internal.metadata.base import ( + BaseDistribution, + BaseEntryPoint, + DistributionVersion, + InfoPath, + Wheel, +) +from pip._internal.utils.misc import normalize_path +from pip._internal.utils.packaging import safe_extra +from pip._internal.utils.temp_dir import TempDirectory +from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file + +from ._compat import BasePath, get_dist_name + + +class WheelDistribution(importlib.metadata.Distribution): + """An ``importlib.metadata.Distribution`` read from a wheel. + + Although ``importlib.metadata.PathDistribution`` accepts ``zipfile.Path``, + its implementation is too "lazy" for pip's needs (we can't keep the ZipFile + handle open for the entire lifetime of the distribution object). + + This implementation eagerly reads the entire metadata directory into the + memory instead, and operates from that. + """ + + def __init__( + self, + files: Mapping[pathlib.PurePosixPath, bytes], + info_location: pathlib.PurePosixPath, + ) -> None: + self._files = files + self.info_location = info_location + + @classmethod + def from_zipfile( + cls, + zf: zipfile.ZipFile, + name: str, + location: str, + ) -> "WheelDistribution": + info_dir, _ = parse_wheel(zf, name) + paths = ( + (name, pathlib.PurePosixPath(name.split("/", 1)[-1])) + for name in zf.namelist() + if name.startswith(f"{info_dir}/") + ) + files = { + relpath: read_wheel_metadata_file(zf, fullpath) + for fullpath, relpath in paths + } + info_location = pathlib.PurePosixPath(location, info_dir) + return cls(files, info_location) + + def iterdir(self, path: InfoPath) -> Iterator[pathlib.PurePosixPath]: + # Only allow iterating through the metadata directory. + if pathlib.PurePosixPath(str(path)) in self._files: + return iter(self._files) + raise FileNotFoundError(path) + + def read_text(self, filename: str) -> Optional[str]: + try: + data = self._files[pathlib.PurePosixPath(filename)] + except KeyError: + return None + try: + text = data.decode("utf-8") + except UnicodeDecodeError as e: + wheel = self.info_location.parent + error = f"Error decoding metadata for {wheel}: {e} in {filename} file" + raise UnsupportedWheel(error) + return text + + +class Distribution(BaseDistribution): + def __init__( + self, + dist: importlib.metadata.Distribution, + info_location: Optional[BasePath], + installed_location: Optional[BasePath], + ) -> None: + self._dist = dist + self._info_location = info_location + self._installed_location = installed_location + + @classmethod + def from_directory(cls, directory: str) -> BaseDistribution: + info_location = pathlib.Path(directory) + dist = importlib.metadata.Distribution.at(info_location) + return cls(dist, info_location, info_location.parent) + + @classmethod + def from_metadata_file_contents( + cls, + metadata_contents: bytes, + filename: str, + project_name: str, + ) -> BaseDistribution: + # Generate temp dir to contain the metadata file, and write the file contents. + temp_dir = pathlib.Path( + TempDirectory(kind="metadata", globally_managed=True).path + ) + metadata_path = temp_dir / "METADATA" + metadata_path.write_bytes(metadata_contents) + # Construct dist pointing to the newly created directory. + dist = importlib.metadata.Distribution.at(metadata_path.parent) + return cls(dist, metadata_path.parent, None) + + @classmethod + def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: + try: + with wheel.as_zipfile() as zf: + dist = WheelDistribution.from_zipfile(zf, name, wheel.location) + except zipfile.BadZipFile as e: + raise InvalidWheel(wheel.location, name) from e + except UnsupportedWheel as e: + raise UnsupportedWheel(f"{name} has an invalid wheel, {e}") + return cls(dist, dist.info_location, pathlib.PurePosixPath(wheel.location)) + + @property + def location(self) -> Optional[str]: + if self._info_location is None: + return None + return str(self._info_location.parent) + + @property + def info_location(self) -> Optional[str]: + if self._info_location is None: + return None + return str(self._info_location) + + @property + def installed_location(self) -> Optional[str]: + if self._installed_location is None: + return None + return normalize_path(str(self._installed_location)) + + def _get_dist_name_from_location(self) -> Optional[str]: + """Try to get the name from the metadata directory name. + + This is much faster than reading metadata. + """ + if self._info_location is None: + return None + stem, suffix = os.path.splitext(self._info_location.name) + if suffix not in (".dist-info", ".egg-info"): + return None + return stem.split("-", 1)[0] + + @property + def canonical_name(self) -> NormalizedName: + name = self._get_dist_name_from_location() or get_dist_name(self._dist) + return canonicalize_name(name) + + @property + def version(self) -> DistributionVersion: + return parse_version(self._dist.version) + + def is_file(self, path: InfoPath) -> bool: + return self._dist.read_text(str(path)) is not None + + def iter_distutils_script_names(self) -> Iterator[str]: + # A distutils installation is always "flat" (not in e.g. egg form), so + # if this distribution's info location is NOT a pathlib.Path (but e.g. + # zipfile.Path), it can never contain any distutils scripts. + if not isinstance(self._info_location, pathlib.Path): + return + for child in self._info_location.joinpath("scripts").iterdir(): + yield child.name + + def read_text(self, path: InfoPath) -> str: + content = self._dist.read_text(str(path)) + if content is None: + raise FileNotFoundError(path) + return content + + def iter_entry_points(self) -> Iterable[BaseEntryPoint]: + # importlib.metadata's EntryPoint structure sasitfies BaseEntryPoint. + return self._dist.entry_points + + def _metadata_impl(self) -> email.message.Message: + # From Python 3.10+, importlib.metadata declares PackageMetadata as the + # return type. This protocol is unfortunately a disaster now and misses + # a ton of fields that we need, including get() and get_payload(). We + # rely on the implementation that the object is actually a Message now, + # until upstream can improve the protocol. (python/cpython#94952) + return cast(email.message.Message, self._dist.metadata) + + def iter_provided_extras(self) -> Iterable[str]: + return ( + safe_extra(extra) for extra in self.metadata.get_all("Provides-Extra", []) + ) + + def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]: + contexts: Sequence[Dict[str, str]] = [{"extra": safe_extra(e)} for e in extras] + for req_string in self.metadata.get_all("Requires-Dist", []): + req = Requirement(req_string) + if not req.marker: + yield req + elif not extras and req.marker.evaluate({"extra": ""}): + yield req + elif any(req.marker.evaluate(context) for context in contexts): + yield req diff --git a/src/pip/_internal/metadata/importlib/_envs.py b/src/pip/_internal/metadata/importlib/_envs.py new file mode 100644 index 00000000000..cbec59e2c6d --- /dev/null +++ b/src/pip/_internal/metadata/importlib/_envs.py @@ -0,0 +1,188 @@ +import functools +import importlib.metadata +import logging +import os +import pathlib +import sys +import zipfile +import zipimport +from typing import Iterator, List, Optional, Sequence, Set, Tuple + +from pip._vendor.packaging.utils import NormalizedName, canonicalize_name + +from pip._internal.metadata.base import BaseDistribution, BaseEnvironment +from pip._internal.models.wheel import Wheel +from pip._internal.utils.deprecation import deprecated +from pip._internal.utils.filetypes import WHEEL_EXTENSION + +from ._compat import BadMetadata, BasePath, get_dist_name, get_info_location +from ._dists import Distribution + +logger = logging.getLogger(__name__) + + +def _looks_like_wheel(location: str) -> bool: + if not location.endswith(WHEEL_EXTENSION): + return False + if not os.path.isfile(location): + return False + if not Wheel.wheel_file_re.match(os.path.basename(location)): + return False + return zipfile.is_zipfile(location) + + +class _DistributionFinder: + """Finder to locate distributions. + + The main purpose of this class is to memoize found distributions' names, so + only one distribution is returned for each package name. At lot of pip code + assumes this (because it is setuptools's behavior), and not doing the same + can potentially cause a distribution in lower precedence path to override a + higher precedence one if the caller is not careful. + + Eventually we probably want to make it possible to see lower precedence + installations as well. It's useful feature, after all. + """ + + FoundResult = Tuple[importlib.metadata.Distribution, Optional[BasePath]] + + def __init__(self) -> None: + self._found_names: Set[NormalizedName] = set() + + def _find_impl(self, location: str) -> Iterator[FoundResult]: + """Find distributions in a location.""" + # Skip looking inside a wheel. Since a package inside a wheel is not + # always valid (due to .data directories etc.), its .dist-info entry + # should not be considered an installed distribution. + if _looks_like_wheel(location): + return + # To know exactly where we find a distribution, we have to feed in the + # paths one by one, instead of dumping the list to importlib.metadata. + for dist in importlib.metadata.distributions(path=[location]): + info_location = get_info_location(dist) + try: + raw_name = get_dist_name(dist) + except BadMetadata as e: + logger.warning("Skipping %s due to %s", info_location, e.reason) + continue + normalized_name = canonicalize_name(raw_name) + if normalized_name in self._found_names: + continue + self._found_names.add(normalized_name) + yield dist, info_location + + def find(self, location: str) -> Iterator[BaseDistribution]: + """Find distributions in a location. + + The path can be either a directory, or a ZIP archive. + """ + for dist, info_location in self._find_impl(location): + if info_location is None: + installed_location: Optional[BasePath] = None + else: + installed_location = info_location.parent + yield Distribution(dist, info_location, installed_location) + + def find_linked(self, location: str) -> Iterator[BaseDistribution]: + """Read location in egg-link files and return distributions in there. + + The path should be a directory; otherwise this returns nothing. This + follows how setuptools does this for compatibility. The first non-empty + line in the egg-link is read as a path (resolved against the egg-link's + containing directory if relative). Distributions found at that linked + location are returned. + """ + path = pathlib.Path(location) + if not path.is_dir(): + return + for child in path.iterdir(): + if child.suffix != ".egg-link": + continue + with child.open() as f: + lines = (line.strip() for line in f) + target_rel = next((line for line in lines if line), "") + if not target_rel: + continue + target_location = str(path.joinpath(target_rel)) + for dist, info_location in self._find_impl(target_location): + yield Distribution(dist, info_location, path) + + def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]: + from pip._vendor.pkg_resources import find_distributions + + from pip._internal.metadata import pkg_resources as legacy + + with os.scandir(location) as it: + for entry in it: + if not entry.name.endswith(".egg"): + continue + for dist in find_distributions(entry.path): + yield legacy.Distribution(dist) + + def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]: + from pip._vendor.pkg_resources import find_eggs_in_zip + + from pip._internal.metadata import pkg_resources as legacy + + try: + importer = zipimport.zipimporter(location) + except zipimport.ZipImportError: + return + for dist in find_eggs_in_zip(importer, location): + yield legacy.Distribution(dist) + + def find_eggs(self, location: str) -> Iterator[BaseDistribution]: + """Find eggs in a location. + + This actually uses the old *pkg_resources* backend. We likely want to + deprecate this so we can eventually remove the *pkg_resources* + dependency entirely. Before that, this should first emit a deprecation + warning for some versions when using the fallback since importing + *pkg_resources* is slow for those who don't need it. + """ + if os.path.isdir(location): + yield from self._find_eggs_in_dir(location) + if zipfile.is_zipfile(location): + yield from self._find_eggs_in_zip(location) + + +@functools.lru_cache(maxsize=None) # Warn a distribution exactly once. +def _emit_egg_deprecation(location: Optional[str]) -> None: + deprecated( + reason=f"Loading egg at {location} is deprecated.", + replacement="to use pip for package installation.", + gone_in=None, + ) + + +class Environment(BaseEnvironment): + def __init__(self, paths: Sequence[str]) -> None: + self._paths = paths + + @classmethod + def default(cls) -> BaseEnvironment: + return cls(sys.path) + + @classmethod + def from_paths(cls, paths: Optional[List[str]]) -> BaseEnvironment: + if paths is None: + return cls(sys.path) + return cls(paths) + + def _iter_distributions(self) -> Iterator[BaseDistribution]: + finder = _DistributionFinder() + for location in self._paths: + yield from finder.find(location) + for dist in finder.find_eggs(location): + # _emit_egg_deprecation(dist.location) # TODO: Enable this. + yield dist + # This must go last because that's how pkg_resources tie-breaks. + yield from finder.find_linked(location) + + def get_distribution(self, name: str) -> Optional[BaseDistribution]: + matches = ( + distribution + for distribution in self.iter_all_distributions() + if distribution.canonical_name == canonicalize_name(name) + ) + return next(matches, None) diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index e8a8a38076a..f330ef12a2c 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -1,21 +1,26 @@ import email.message +import email.parser import logging -from typing import Collection, Iterable, Iterator, List, NamedTuple, Optional +import os +import zipfile +from typing import Collection, Iterable, Iterator, List, Mapping, NamedTuple, Optional from pip._vendor import pkg_resources from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.packaging.version import parse as parse_version -from pip._internal.utils import misc # TODO: Move definition here. -from pip._internal.utils.packaging import get_installer, get_metadata -from pip._internal.utils.wheel import pkg_resources_distribution_for_wheel +from pip._internal.exceptions import InvalidWheel, NoneMetadataError, UnsupportedWheel +from pip._internal.utils.egg_link import egg_link_path_from_location +from pip._internal.utils.misc import display_path, normalize_path +from pip._internal.utils.wheel import parse_wheel, read_wheel_metadata_file from .base import ( BaseDistribution, BaseEntryPoint, BaseEnvironment, DistributionVersion, + InfoPath, Wheel, ) @@ -28,14 +33,101 @@ class EntryPoint(NamedTuple): group: str +class InMemoryMetadata: + """IMetadataProvider that reads metadata files from a dictionary. + + This also maps metadata decoding exceptions to our internal exception type. + """ + + def __init__(self, metadata: Mapping[str, bytes], wheel_name: str) -> None: + self._metadata = metadata + self._wheel_name = wheel_name + + def has_metadata(self, name: str) -> bool: + return name in self._metadata + + def get_metadata(self, name: str) -> str: + try: + return self._metadata[name].decode() + except UnicodeDecodeError as e: + # Augment the default error with the origin of the file. + raise UnsupportedWheel( + f"Error decoding metadata for {self._wheel_name}: {e} in {name} file" + ) + + def get_metadata_lines(self, name: str) -> Iterable[str]: + return pkg_resources.yield_lines(self.get_metadata(name)) + + def metadata_isdir(self, name: str) -> bool: + return False + + def metadata_listdir(self, name: str) -> List[str]: + return [] + + def run_script(self, script_name: str, namespace: str) -> None: + pass + + class Distribution(BaseDistribution): def __init__(self, dist: pkg_resources.Distribution) -> None: self._dist = dist @classmethod - def from_wheel(cls, wheel: Wheel, name: str) -> "Distribution": - with wheel.as_zipfile() as zf: - dist = pkg_resources_distribution_for_wheel(zf, name, wheel.location) + def from_directory(cls, directory: str) -> BaseDistribution: + dist_dir = directory.rstrip(os.sep) + + # Build a PathMetadata object, from path to metadata. :wink: + base_dir, dist_dir_name = os.path.split(dist_dir) + metadata = pkg_resources.PathMetadata(base_dir, dist_dir) + + # Determine the correct Distribution object type. + if dist_dir.endswith(".egg-info"): + dist_cls = pkg_resources.Distribution + dist_name = os.path.splitext(dist_dir_name)[0] + else: + assert dist_dir.endswith(".dist-info") + dist_cls = pkg_resources.DistInfoDistribution + dist_name = os.path.splitext(dist_dir_name)[0].split("-")[0] + + dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata) + return cls(dist) + + @classmethod + def from_metadata_file_contents( + cls, + metadata_contents: bytes, + filename: str, + project_name: str, + ) -> BaseDistribution: + metadata_dict = { + "METADATA": metadata_contents, + } + dist = pkg_resources.DistInfoDistribution( + location=filename, + metadata=InMemoryMetadata(metadata_dict, filename), + project_name=project_name, + ) + return cls(dist) + + @classmethod + def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: + try: + with wheel.as_zipfile() as zf: + info_dir, _ = parse_wheel(zf, name) + metadata_dict = { + path.split("/", 1)[-1]: read_wheel_metadata_file(zf, path) + for path in zf.namelist() + if path.startswith(f"{info_dir}/") + } + except zipfile.BadZipFile as e: + raise InvalidWheel(wheel.location, name) from e + except UnsupportedWheel as e: + raise UnsupportedWheel(f"{name} has an invalid wheel, {e}") + dist = pkg_resources.DistInfoDistribution( + location=wheel.location, + metadata=InMemoryMetadata(metadata_dict, wheel.location), + project_name=name, + ) return cls(dist) @property @@ -43,9 +135,30 @@ def location(self) -> Optional[str]: return self._dist.location @property - def info_directory(self) -> Optional[str]: + def installed_location(self) -> Optional[str]: + egg_link = egg_link_path_from_location(self.raw_name) + if egg_link: + location = egg_link + elif self.location: + location = self.location + else: + return None + return normalize_path(location) + + @property + def info_location(self) -> Optional[str]: return self._dist.egg_info + @property + def installed_by_distutils(self) -> bool: + # A distutils-installed distribution is provided by FileMetadata. This + # provider has a "path" attribute not present anywhere else. Not the + # best introspection logic, but pip has been doing this for a long time. + try: + return bool(self._dist._provider.path) + except AttributeError: + return False + @property def canonical_name(self) -> NormalizedName: return canonicalize_name(self._dist.project_name) @@ -54,26 +167,20 @@ def canonical_name(self) -> NormalizedName: def version(self) -> DistributionVersion: return parse_version(self._dist.version) - @property - def installer(self) -> str: - return get_installer(self._dist) - - @property - def local(self) -> bool: - return misc.dist_is_local(self._dist) + def is_file(self, path: InfoPath) -> bool: + return self._dist.has_metadata(str(path)) - @property - def in_usersite(self) -> bool: - return misc.dist_in_usersite(self._dist) + def iter_distutils_script_names(self) -> Iterator[str]: + yield from self._dist.metadata_listdir("scripts") - @property - def in_site_packages(self) -> bool: - return misc.dist_in_site_packages(self._dist) - - def read_text(self, name: str) -> str: + def read_text(self, path: InfoPath) -> str: + name = str(path) if not self._dist.has_metadata(name): raise FileNotFoundError(name) - return self._dist.get_metadata(name) + content = self._dist.get_metadata(name) + if content is None: + raise NoneMetadataError(self, name) + return content def iter_entry_points(self) -> Iterable[BaseEntryPoint]: for group, entries in self._dist.get_entry_map().items(): @@ -81,9 +188,27 @@ def iter_entry_points(self) -> Iterable[BaseEntryPoint]: name, _, value = str(entry_point).partition("=") yield EntryPoint(name=name.strip(), value=value.strip(), group=group) - @property - def metadata(self) -> email.message.Message: - return get_metadata(self._dist) + def _metadata_impl(self) -> email.message.Message: + """ + :raises NoneMetadataError: if the distribution reports `has_metadata()` + True but `get_metadata()` returns None. + """ + if isinstance(self._dist, pkg_resources.DistInfoDistribution): + metadata_name = "METADATA" + else: + metadata_name = "PKG-INFO" + try: + metadata = self.read_text(metadata_name) + except FileNotFoundError: + if self.location: + displaying_path = display_path(self.location) + else: + displaying_path = repr(self.location) + logger.warning("No metadata found in %s", displaying_path) + metadata = "" + feed_parser = email.parser.FeedParser() + feed_parser.feed(metadata) + return feed_parser.close() def iter_dependencies(self, extras: Collection[str] = ()) -> Iterable[Requirement]: if extras: # pkg_resources raises on invalid extras, so we sanitize. @@ -106,6 +231,10 @@ def default(cls) -> BaseEnvironment: def from_paths(cls, paths: Optional[List[str]]) -> BaseEnvironment: return cls(pkg_resources.WorkingSet(paths)) + def _iter_distributions(self) -> Iterator[BaseDistribution]: + for dist in self._ws: + yield Distribution(dist) + def _search_distribution(self, name: str) -> Optional[BaseDistribution]: """Find a distribution matching the ``name`` in the environment. @@ -113,13 +242,12 @@ def _search_distribution(self, name: str) -> Optional[BaseDistribution]: match the behavior of ``pkg_resources.get_distribution()``. """ canonical_name = canonicalize_name(name) - for dist in self.iter_distributions(): + for dist in self.iter_all_distributions(): if dist.canonical_name == canonical_name: return dist return None def get_distribution(self, name: str) -> Optional[BaseDistribution]: - # Search the distribution by looking through the working set. dist = self._search_distribution(name) if dist: @@ -140,7 +268,3 @@ def get_distribution(self, name: str) -> Optional[BaseDistribution]: except pkg_resources.DistributionNotFound: return None return self._search_distribution(name) - - def _iter_distributions(self) -> Iterator[BaseDistribution]: - for dist in self._ws: - yield Distribution(dist) diff --git a/src/pip/_internal/models/direct_url.py b/src/pip/_internal/models/direct_url.py index 92060d45db8..c3de70a749c 100644 --- a/src/pip/_internal/models/direct_url.py +++ b/src/pip/_internal/models/direct_url.py @@ -74,14 +74,10 @@ def __init__( vcs: str, commit_id: str, requested_revision: Optional[str] = None, - resolved_revision: Optional[str] = None, - resolved_revision_type: Optional[str] = None, ) -> None: self.vcs = vcs self.requested_revision = requested_revision self.commit_id = commit_id - self.resolved_revision = resolved_revision - self.resolved_revision_type = resolved_revision_type @classmethod def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["VcsInfo"]: @@ -91,8 +87,6 @@ def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["VcsInfo"]: vcs=_get_required(d, str, "vcs"), commit_id=_get_required(d, str, "commit_id"), requested_revision=_get(d, str, "requested_revision"), - resolved_revision=_get(d, str, "resolved_revision"), - resolved_revision_type=_get(d, str, "resolved_revision_type"), ) def _to_dict(self) -> Dict[str, Any]: @@ -100,8 +94,6 @@ def _to_dict(self) -> Dict[str, Any]: vcs=self.vcs, requested_revision=self.requested_revision, commit_id=self.commit_id, - resolved_revision=self.resolved_revision, - resolved_revision_type=self.resolved_revision_type, ) @@ -111,17 +103,33 @@ class ArchiveInfo: def __init__( self, hash: Optional[str] = None, + hashes: Optional[Dict[str, str]] = None, ) -> None: + if hash is not None: + # Auto-populate the hashes key to upgrade to the new format automatically. + # We don't back-populate the legacy hash key. + try: + hash_name, hash_value = hash.split("=", 1) + except ValueError: + raise DirectUrlValidationError( + f"invalid archive_info.hash format: {hash!r}" + ) + if hashes is None: + hashes = {hash_name: hash_value} + elif hash_name not in hash: + hashes = hashes.copy() + hashes[hash_name] = hash_value self.hash = hash + self.hashes = hashes @classmethod def _from_dict(cls, d: Optional[Dict[str, Any]]) -> Optional["ArchiveInfo"]: if d is None: return None - return cls(hash=_get(d, str, "hash")) + return cls(hash=_get(d, str, "hash"), hashes=_get(d, dict, "hashes")) def _to_dict(self) -> Dict[str, Any]: - return _filter_none(hash=self.hash) + return _filter_none(hash=self.hash, hashes=self.hashes) class DirInfo: diff --git a/src/pip/_internal/models/installation_report.py b/src/pip/_internal/models/installation_report.py new file mode 100644 index 00000000000..b54afb109b4 --- /dev/null +++ b/src/pip/_internal/models/installation_report.py @@ -0,0 +1,53 @@ +from typing import Any, Dict, Sequence + +from pip._vendor.packaging.markers import default_environment + +from pip import __version__ +from pip._internal.req.req_install import InstallRequirement + + +class InstallationReport: + def __init__(self, install_requirements: Sequence[InstallRequirement]): + self._install_requirements = install_requirements + + @classmethod + def _install_req_to_dict(cls, ireq: InstallRequirement) -> Dict[str, Any]: + assert ireq.download_info, f"No download_info for {ireq}" + res = { + # PEP 610 json for the download URL. download_info.archive_info.hash may + # be absent when the requirement was installed from the wheel cache + # and the cache entry was populated by an older pip version that did not + # record origin.json. + "download_info": ireq.download_info.to_dict(), + # is_direct is true if the requirement was a direct URL reference (which + # includes editable requirements), and false if the requirement was + # downloaded from a PEP 503 index or --find-links. + "is_direct": bool(ireq.original_link), + # requested is true if the requirement was specified by the user (aka + # top level requirement), and false if it was installed as a dependency of a + # requirement. https://peps.python.org/pep-0376/#requested + "requested": ireq.user_supplied, + # PEP 566 json encoding for metadata + # https://www.python.org/dev/peps/pep-0566/#json-compatible-metadata + "metadata": ireq.get_dist().metadata_dict, + } + if ireq.user_supplied and ireq.extras: + # For top level requirements, the list of requested extras, if any. + res["requested_extras"] = list(sorted(ireq.extras)) + return res + + def to_dict(self) -> Dict[str, Any]: + return { + "version": "1", + "pip_version": __version__, + "install": [ + self._install_req_to_dict(ireq) for ireq in self._install_requirements + ], + # https://peps.python.org/pep-0508/#environment-markers + # TODO: currently, the resolver uses the default environment to evaluate + # environment markers, so that is what we report here. In the future, it + # should also take into account options such as --python-version or + # --platform, perhaps under the form of an environment_override field? + # https://github.com/pypa/pip/issues/11198 + "environment": default_environment(), + } diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 6069b278b9b..a1e4d5a08df 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -1,14 +1,28 @@ import functools +import itertools import logging import os import posixpath import re import urllib.parse -from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Tuple, Union +from dataclasses import dataclass +from typing import ( + TYPE_CHECKING, + Any, + Dict, + List, + Mapping, + NamedTuple, + Optional, + Tuple, + Union, +) +from pip._internal.utils.deprecation import deprecated from pip._internal.utils.filetypes import WHEEL_EXTENSION from pip._internal.utils.hashes import Hashes from pip._internal.utils.misc import ( + pairwise, redact_auth_from_url, split_auth_from_netloc, splitext, @@ -17,12 +31,130 @@ from pip._internal.utils.urls import path_to_url, url_to_path if TYPE_CHECKING: - from pip._internal.index.collector import HTMLPage + from pip._internal.index.collector import IndexContent logger = logging.getLogger(__name__) -_SUPPORTED_HASHES = ("sha1", "sha224", "sha384", "sha256", "sha512", "md5") +# Order matters, earlier hashes have a precedence over later hashes for what +# we will pick to use. +_SUPPORTED_HASHES = ("sha512", "sha384", "sha256", "sha224", "sha1", "md5") + + +@dataclass(frozen=True) +class LinkHash: + """Links to content may have embedded hash values. This class parses those. + + `name` must be any member of `_SUPPORTED_HASHES`. + + This class can be converted to and from `ArchiveInfo`. While ArchiveInfo intends to + be JSON-serializable to conform to PEP 610, this class contains the logic for + parsing a hash name and value for correctness, and then checking whether that hash + conforms to a schema with `.is_hash_allowed()`.""" + + name: str + value: str + + _hash_re = re.compile( + # NB: we do not validate that the second group (.*) is a valid hex + # digest. Instead, we simply keep that string in this class, and then check it + # against Hashes when hash-checking is needed. This is easier to debug than + # proactively discarding an invalid hex digest, as we handle incorrect hashes + # and malformed hashes in the same place. + r"({choices})=(.*)".format( + choices="|".join(re.escape(hash_name) for hash_name in _SUPPORTED_HASHES) + ), + ) + + def __post_init__(self) -> None: + assert self._hash_re.match(f"{self.name}={self.value}") + + @classmethod + @functools.lru_cache(maxsize=None) + def split_hash_name_and_value(cls, url: str) -> Optional["LinkHash"]: + """Search a string for a checksum algorithm name and encoded output value.""" + match = cls._hash_re.search(url) + if match is None: + return None + name, value = match.groups() + return cls(name=name, value=value) + + def as_dict(self) -> Dict[str, str]: + return {self.name: self.value} + + def as_hashes(self) -> Hashes: + """Return a Hashes instance which checks only for the current hash.""" + return Hashes({self.name: [self.value]}) + + def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool: + """ + Return True if the current hash is allowed by `hashes`. + """ + if hashes is None: + return False + return hashes.is_hash_allowed(self.name, hex_digest=self.value) + + +def _clean_url_path_part(part: str) -> str: + """ + Clean a "part" of a URL path (i.e. after splitting on "@" characters). + """ + # We unquote prior to quoting to make sure nothing is double quoted. + return urllib.parse.quote(urllib.parse.unquote(part)) + + +def _clean_file_url_path(part: str) -> str: + """ + Clean the first part of a URL path that corresponds to a local + filesystem path (i.e. the first part after splitting on "@" characters). + """ + # We unquote prior to quoting to make sure nothing is double quoted. + # Also, on Windows the path part might contain a drive letter which + # should not be quoted. On Linux where drive letters do not + # exist, the colon should be quoted. We rely on urllib.request + # to do the right thing here. + return urllib.request.pathname2url(urllib.request.url2pathname(part)) + + +# percent-encoded: / +_reserved_chars_re = re.compile("(@|%2F)", re.IGNORECASE) + + +def _clean_url_path(path: str, is_local_path: bool) -> str: + """ + Clean the path portion of a URL. + """ + if is_local_path: + clean_func = _clean_file_url_path + else: + clean_func = _clean_url_path_part + + # Split on the reserved characters prior to cleaning so that + # revision strings in VCS URLs are properly preserved. + parts = _reserved_chars_re.split(path) + + cleaned_parts = [] + for to_clean, reserved in pairwise(itertools.chain(parts, [""])): + cleaned_parts.append(clean_func(to_clean)) + # Normalize %xx escapes (e.g. %2f -> %2F) + cleaned_parts.append(reserved.upper()) + + return "".join(cleaned_parts) + + +def _ensure_quoted_url(url: str) -> str: + """ + Make sure a link is fully quoted. + For example, if ' ' occurs in the URL, it will be replaced with "%20", + and without double-quoting other characters. + """ + # Split the URL into parts according to the general structure + # `scheme://netloc/path;parameters?query#fragment`. + result = urllib.parse.urlparse(url) + # If the netloc is empty, then the URL refers to a local filesystem path. + is_local_path = not result.netloc + path = _clean_url_path(result.path, is_local_path=is_local_path) + return urllib.parse.urlunparse(result._replace(path=path)) class Link(KeyBasedCompareMixin): @@ -31,23 +163,28 @@ class Link(KeyBasedCompareMixin): __slots__ = [ "_parsed_url", "_url", + "_hashes", "comes_from", "requires_python", "yanked_reason", + "dist_info_metadata", "cache_link_parsing", + "egg_fragment", ] def __init__( self, url: str, - comes_from: Optional[Union[str, "HTMLPage"]] = None, + comes_from: Optional[Union[str, "IndexContent"]] = None, requires_python: Optional[str] = None, yanked_reason: Optional[str] = None, + dist_info_metadata: Optional[str] = None, cache_link_parsing: bool = True, + hashes: Optional[Mapping[str, str]] = None, ) -> None: """ :param url: url of the resource pointed to (href of the link) - :param comes_from: instance of HTMLPage where the link was found, + :param comes_from: instance of IndexContent where the link was found, or string. :param requires_python: String containing the `Requires-Python` metadata field, specified in PEP 345. This may be specified by @@ -59,11 +196,16 @@ def __init__( a simple repository HTML link. If the file has been yanked but no reason was provided, this should be the empty string. See PEP 592 for more information and the specification. + :param dist_info_metadata: the metadata attached to the file, or None if no such + metadata is provided. This is the value of the "data-dist-info-metadata" + attribute, if present, in a simple repository HTML link. This may be parsed + into its own `Link` by `self.metadata_link()`. See PEP 658 for more + information and the specification. :param cache_link_parsing: A flag that is used elsewhere to determine - whether resources retrieved from this link - should be cached. PyPI index urls should - generally have this set to False, for - example. + whether resources retrieved from this link should be cached. PyPI + URLs should generally have this set to False, for example. + :param hashes: A mapping of hash names to digests to allow us to + determine the validity of a download. """ # url can be a UNC windows share @@ -75,13 +217,84 @@ def __init__( # trying to set a new value. self._url = url + link_hash = LinkHash.split_hash_name_and_value(url) + hashes_from_link = {} if link_hash is None else link_hash.as_dict() + if hashes is None: + self._hashes = hashes_from_link + else: + self._hashes = {**hashes, **hashes_from_link} + self.comes_from = comes_from self.requires_python = requires_python if requires_python else None self.yanked_reason = yanked_reason + self.dist_info_metadata = dist_info_metadata super().__init__(key=url, defining_class=Link) self.cache_link_parsing = cache_link_parsing + self.egg_fragment = self._egg_fragment() + + @classmethod + def from_json( + cls, + file_data: Dict[str, Any], + page_url: str, + ) -> Optional["Link"]: + """ + Convert an pypi json document from a simple repository page into a Link. + """ + file_url = file_data.get("url") + if file_url is None: + return None + + url = _ensure_quoted_url(urllib.parse.urljoin(page_url, file_url)) + pyrequire = file_data.get("requires-python") + yanked_reason = file_data.get("yanked") + dist_info_metadata = file_data.get("dist-info-metadata") + hashes = file_data.get("hashes", {}) + + # The Link.yanked_reason expects an empty string instead of a boolean. + if yanked_reason and not isinstance(yanked_reason, str): + yanked_reason = "" + # The Link.yanked_reason expects None instead of False. + elif not yanked_reason: + yanked_reason = None + + return cls( + url, + comes_from=page_url, + requires_python=pyrequire, + yanked_reason=yanked_reason, + hashes=hashes, + dist_info_metadata=dist_info_metadata, + ) + + @classmethod + def from_element( + cls, + anchor_attribs: Dict[str, Optional[str]], + page_url: str, + base_url: str, + ) -> Optional["Link"]: + """ + Convert an anchor element's attributes in a simple repository page to a Link. + """ + href = anchor_attribs.get("href") + if not href: + return None + + url = _ensure_quoted_url(urllib.parse.urljoin(base_url, href)) + pyrequire = anchor_attribs.get("data-requires-python") + yanked_reason = anchor_attribs.get("data-yanked") + dist_info_metadata = anchor_attribs.get("data-dist-info-metadata") + + return cls( + url, + comes_from=page_url, + requires_python=pyrequire, + yanked_reason=yanked_reason, + dist_info_metadata=dist_info_metadata, + ) def __str__(self) -> str: if self.requires_python: @@ -149,12 +362,28 @@ def url_without_fragment(self) -> str: _egg_fragment_re = re.compile(r"[#&]egg=([^&]*)") - @property - def egg_fragment(self) -> Optional[str]: + # Per PEP 508. + _project_name_re = re.compile( + r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE + ) + + def _egg_fragment(self) -> Optional[str]: match = self._egg_fragment_re.search(self._url) if not match: return None - return match.group(1) + + # An egg fragment looks like a PEP 508 project name, along with + # an optional extras specifier. Anything else is invalid. + project_name = match.group(1) + if not self._project_name_re.match(project_name): + deprecated( + reason=f"{self} contains an egg fragment with a non-PEP 508 name", + replacement="to use the req @ url syntax, and remove the egg fragment", + gone_in="25.0", + issue=11617, + ) + + return project_name _subdirectory_fragment_re = re.compile(r"[#&]subdirectory=([^&]*)") @@ -165,23 +394,34 @@ def subdirectory_fragment(self) -> Optional[str]: return None return match.group(1) - _hash_re = re.compile( - r"({choices})=([a-f0-9]+)".format(choices="|".join(_SUPPORTED_HASHES)) - ) + def metadata_link(self) -> Optional["Link"]: + """Implementation of PEP 658 parsing.""" + # Note that Link.from_element() parsing the "data-dist-info-metadata" attribute + # from an HTML anchor tag is typically how the Link.dist_info_metadata attribute + # gets set. + if self.dist_info_metadata is None: + return None + metadata_url = f"{self.url_without_fragment}.metadata" + # If data-dist-info-metadata="true" is set, then the metadata file exists, + # but there is no information about its checksum or anything else. + if self.dist_info_metadata != "true": + link_hash = LinkHash.split_hash_name_and_value(self.dist_info_metadata) + else: + link_hash = None + if link_hash is None: + return Link(metadata_url) + return Link(metadata_url, hashes=link_hash.as_dict()) + + def as_hashes(self) -> Hashes: + return Hashes({k: [v] for k, v in self._hashes.items()}) @property def hash(self) -> Optional[str]: - match = self._hash_re.search(self._url) - if match: - return match.group(2) - return None + return next(iter(self._hashes.values()), None) @property def hash_name(self) -> Optional[str]: - match = self._hash_re.search(self._url) - if match: - return match.group(1) - return None + return next(iter(self._hashes), None) @property def show_url(self) -> str: @@ -210,19 +450,15 @@ def is_yanked(self) -> bool: @property def has_hash(self) -> bool: - return self.hash_name is not None + return bool(self._hashes) def is_hash_allowed(self, hashes: Optional[Hashes]) -> bool: """ - Return True if the link has a hash and it is allowed. + Return True if the link has a hash and it is allowed by `hashes`. """ - if hashes is None or not self.has_hash: + if hashes is None: return False - # Assert non-None so mypy knows self.hash_name and self.hash are str. - assert self.hash_name is not None - assert self.hash is not None - - return hashes.is_hash_allowed(self.hash_name, hex_digest=self.hash) + return any(hashes.is_hash_allowed(k, v) for k, v in self._hashes.items()) class _CleanResult(NamedTuple): diff --git a/src/pip/_internal/models/search_scope.py b/src/pip/_internal/models/search_scope.py index e4e54c2f4c6..a64af73899d 100644 --- a/src/pip/_internal/models/search_scope.py +++ b/src/pip/_internal/models/search_scope.py @@ -20,13 +20,14 @@ class SearchScope: Encapsulates the locations that pip is configured to search. """ - __slots__ = ["find_links", "index_urls"] + __slots__ = ["find_links", "index_urls", "no_index"] @classmethod def create( cls, find_links: List[str], index_urls: List[str], + no_index: bool, ) -> "SearchScope": """ Create a SearchScope object after normalizing the `find_links`. @@ -60,15 +61,18 @@ def create( return cls( find_links=built_find_links, index_urls=index_urls, + no_index=no_index, ) def __init__( self, find_links: List[str], index_urls: List[str], + no_index: bool, ) -> None: self.find_links = find_links self.index_urls = index_urls + self.no_index = no_index def get_formatted_locations(self) -> str: lines = [] diff --git a/src/pip/_internal/models/wheel.py b/src/pip/_internal/models/wheel.py index e0916122763..a5dc12bdd63 100644 --- a/src/pip/_internal/models/wheel.py +++ b/src/pip/_internal/models/wheel.py @@ -13,8 +13,8 @@ class Wheel: """A wheel file""" wheel_file_re = re.compile( - r"""^(?P(?P.+?)-(?P.*?)) - ((-(?P\d[^-]*?))?-(?P.+?)-(?P.+?)-(?P.+?) + r"""^(?P(?P[^\s-]+?)-(?P[^\s-]*?)) + ((-(?P\d[^-]*?))?-(?P[^\s-]+?)-(?P[^\s-]+?)-(?P[^\s-]+?) \.whl|\.dist-info)$""", re.VERBOSE, ) @@ -58,7 +58,10 @@ def support_index_min(self, tags: List[Tag]) -> int: :raises ValueError: If none of the wheel's file tags match one of the supported tags. """ - return min(tags.index(tag) for tag in self.file_tags if tag in tags) + try: + return next(i for i, t in enumerate(tags) if t in self.file_tags) + except StopIteration: + raise ValueError() def find_most_preferred_tag( self, tags: List[Tag], tag_to_priority: Dict[Tag, int] diff --git a/src/pip/_internal/network/auth.py b/src/pip/_internal/network/auth.py index ca42798bd95..c0efa765c85 100644 --- a/src/pip/_internal/network/auth.py +++ b/src/pip/_internal/network/auth.py @@ -3,9 +3,18 @@ Contains interface (MultiDomainBasicAuth) and associated glue code for providing credentials in the context of network requests. """ - +import logging +import os +import shutil +import subprocess +import sysconfig +import typing import urllib.parse -from typing import Any, Dict, List, Optional, Tuple +from abc import ABC, abstractmethod +from functools import lru_cache +from os.path import commonprefix +from pathlib import Path +from typing import Any, Dict, List, NamedTuple, Optional, Tuple from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth from pip._vendor.requests.models import Request, Response @@ -23,59 +32,204 @@ logger = getLogger(__name__) -Credentials = Tuple[str, str, str] +KEYRING_DISABLED = False + + +class Credentials(NamedTuple): + url: str + username: str + password: str + + +class KeyRingBaseProvider(ABC): + """Keyring base provider interface""" + + has_keyring: bool + + @abstractmethod + def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]: + ... + + @abstractmethod + def save_auth_info(self, url: str, username: str, password: str) -> None: + ... + -try: - import keyring -except ImportError: - keyring = None # type: ignore[assignment] -except Exception as exc: - logger.warning( - "Keyring is skipped due to an exception: %s", - str(exc), - ) - keyring = None # type: ignore[assignment] +class KeyRingNullProvider(KeyRingBaseProvider): + """Keyring null provider""" + has_keyring = False -def get_keyring_auth(url: Optional[str], username: Optional[str]) -> Optional[AuthInfo]: - """Return the tuple auth for a given url from keyring.""" - global keyring - if not url or not keyring: + def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]: return None - try: - try: - get_credential = keyring.get_credential - except AttributeError: - pass - else: + def save_auth_info(self, url: str, username: str, password: str) -> None: + return None + + +class KeyRingPythonProvider(KeyRingBaseProvider): + """Keyring interface which uses locally imported `keyring`""" + + has_keyring = True + + def __init__(self) -> None: + import keyring + + self.keyring = keyring + + def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]: + # Support keyring's get_credential interface which supports getting + # credentials without a username. This is only available for + # keyring>=15.2.0. + if hasattr(self.keyring, "get_credential"): logger.debug("Getting credentials from keyring for %s", url) - cred = get_credential(url, username) + cred = self.keyring.get_credential(url, username) if cred is not None: return cred.username, cred.password return None - if username: + if username is not None: logger.debug("Getting password from keyring for %s", url) - password = keyring.get_password(url, username) + password = self.keyring.get_password(url, username) if password: return username, password + return None + + def save_auth_info(self, url: str, username: str, password: str) -> None: + self.keyring.set_password(url, username, password) + + +class KeyRingCliProvider(KeyRingBaseProvider): + """Provider which uses `keyring` cli + + Instead of calling the keyring package installed alongside pip + we call keyring on the command line which will enable pip to + use which ever installation of keyring is available first in + PATH. + """ + + has_keyring = True + + def __init__(self, cmd: str) -> None: + self.keyring = cmd + + def get_auth_info(self, url: str, username: Optional[str]) -> Optional[AuthInfo]: + # This is the default implementation of keyring.get_credential + # https://github.com/jaraco/keyring/blob/97689324abcf01bd1793d49063e7ca01e03d7d07/keyring/backend.py#L134-L139 + if username is not None: + password = self._get_password(url, username) + if password is not None: + return username, password + return None + + def save_auth_info(self, url: str, username: str, password: str) -> None: + return self._set_password(url, username, password) + + def _get_password(self, service_name: str, username: str) -> Optional[str]: + """Mirror the implementation of keyring.get_password using cli""" + if self.keyring is None: + return None + + cmd = [self.keyring, "get", service_name, username] + env = os.environ.copy() + env["PYTHONIOENCODING"] = "utf-8" + res = subprocess.run( + cmd, + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + env=env, + ) + if res.returncode: + return None + return res.stdout.decode("utf-8").strip(os.linesep) - except Exception as exc: - logger.warning( - "Keyring is skipped due to an exception: %s", - str(exc), + def _set_password(self, service_name: str, username: str, password: str) -> None: + """Mirror the implementation of keyring.set_password using cli""" + if self.keyring is None: + return None + env = os.environ.copy() + env["PYTHONIOENCODING"] = "utf-8" + subprocess.run( + [self.keyring, "set", service_name, username], + input=f"{password}{os.linesep}".encode("utf-8"), + env=env, + check=True, ) - keyring = None # type: ignore[assignment] - return None + return None + + +@lru_cache(maxsize=None) +def get_keyring_provider(provider: str) -> KeyRingBaseProvider: + logger.verbose("Keyring provider requested: %s", provider) + + # keyring has previously failed and been disabled + if KEYRING_DISABLED: + provider = "disabled" + if provider in ["import", "auto"]: + try: + impl = KeyRingPythonProvider() + logger.verbose("Keyring provider set: import") + return impl + except ImportError: + pass + except Exception as exc: + # In the event of an unexpected exception + # we should warn the user + msg = "Installed copy of keyring fails with exception %s" + if provider == "auto": + msg = msg + ", trying to find a keyring executable as a fallback" + logger.warning(msg, exc, exc_info=logger.isEnabledFor(logging.DEBUG)) + if provider in ["subprocess", "auto"]: + cli = shutil.which("keyring") + if cli and cli.startswith(sysconfig.get_path("scripts")): + # all code within this function is stolen from shutil.which implementation + @typing.no_type_check + def PATH_as_shutil_which_determines_it() -> str: + path = os.environ.get("PATH", None) + if path is None: + try: + path = os.confstr("CS_PATH") + except (AttributeError, ValueError): + # os.confstr() or CS_PATH is not available + path = os.defpath + # bpo-35755: Don't use os.defpath if the PATH environment variable is + # set to an empty string + + return path + + scripts = Path(sysconfig.get_path("scripts")) + + paths = [] + for path in PATH_as_shutil_which_determines_it().split(os.pathsep): + p = Path(path) + try: + if not p.samefile(scripts): + paths.append(path) + except FileNotFoundError: + pass + + path = os.pathsep.join(paths) + + cli = shutil.which("keyring", path=path) + + if cli: + logger.verbose("Keyring provider set: subprocess with executable %s", cli) + return KeyRingCliProvider(cli) + + logger.verbose("Keyring provider set: disabled") + return KeyRingNullProvider() class MultiDomainBasicAuth(AuthBase): def __init__( - self, prompting: bool = True, index_urls: Optional[List[str]] = None + self, + prompting: bool = True, + index_urls: Optional[List[str]] = None, + keyring_provider: str = "auto", ) -> None: self.prompting = prompting self.index_urls = index_urls + self.keyring_provider = keyring_provider # type: ignore[assignment] self.passwords: Dict[str, AuthInfo] = {} # When the user is prompted to enter credentials and keyring is # available, we will offer to save them. If the user accepts, @@ -84,6 +238,47 @@ def __init__( # ``save_credentials`` to save these. self._credentials_to_save: Optional[Credentials] = None + @property + def keyring_provider(self) -> KeyRingBaseProvider: + return get_keyring_provider(self._keyring_provider) + + @keyring_provider.setter + def keyring_provider(self, provider: str) -> None: + # The free function get_keyring_provider has been decorated with + # functools.cache. If an exception occurs in get_keyring_auth that + # cache will be cleared and keyring disabled, take that into account + # if you want to remove this indirection. + self._keyring_provider = provider + + @property + def use_keyring(self) -> bool: + # We won't use keyring when --no-input is passed unless + # a specific provider is requested because it might require + # user interaction + return self.prompting or self._keyring_provider not in ["auto", "disabled"] + + def _get_keyring_auth( + self, + url: Optional[str], + username: Optional[str], + ) -> Optional[AuthInfo]: + """Return the tuple auth for a given url from keyring.""" + # Do nothing if no url was provided + if not url: + return None + + try: + return self.keyring_provider.get_auth_info(url, username) + except Exception as exc: + logger.warning( + "Keyring is skipped due to an exception: %s", + str(exc), + ) + global KEYRING_DISABLED + KEYRING_DISABLED = True + get_keyring_provider.cache_clear() + return None + def _get_index_url(self, url: str) -> Optional[str]: """Return the original index URL matching the requested URL. @@ -100,15 +295,42 @@ def _get_index_url(self, url: str) -> Optional[str]: if not url or not self.index_urls: return None - for u in self.index_urls: - prefix = remove_auth_from_url(u).rstrip("/") + "/" - if url.startswith(prefix): - return u - return None + url = remove_auth_from_url(url).rstrip("/") + "/" + parsed_url = urllib.parse.urlsplit(url) + + candidates = [] + + for index in self.index_urls: + index = index.rstrip("/") + "/" + parsed_index = urllib.parse.urlsplit(remove_auth_from_url(index)) + if parsed_url == parsed_index: + return index + + if parsed_url.netloc != parsed_index.netloc: + continue + + candidate = urllib.parse.urlsplit(index) + candidates.append(candidate) + + if not candidates: + return None + + candidates.sort( + reverse=True, + key=lambda candidate: commonprefix( + [ + parsed_url.path, + candidate.path, + ] + ).rfind("/"), + ) + + return urllib.parse.urlunsplit(candidates[0]) def _get_new_credentials( self, original_url: str, + *, allow_netrc: bool = True, allow_keyring: bool = False, ) -> AuthInfo: @@ -152,8 +374,8 @@ def _get_new_credentials( # The index url is more specific than the netloc, so try it first # fmt: off kr_auth = ( - get_keyring_auth(index_url, username) or - get_keyring_auth(netloc, username) + self._get_keyring_auth(index_url, username) or + self._get_keyring_auth(netloc, username) ) # fmt: on if kr_auth: @@ -230,18 +452,23 @@ def __call__(self, req: Request) -> Request: def _prompt_for_password( self, netloc: str ) -> Tuple[Optional[str], Optional[str], bool]: - username = ask_input(f"User for {netloc}: ") + username = ask_input(f"User for {netloc}: ") if self.prompting else None if not username: return None, None, False - auth = get_keyring_auth(netloc, username) - if auth and auth[0] is not None and auth[1] is not None: - return auth[0], auth[1], False + if self.use_keyring: + auth = self._get_keyring_auth(netloc, username) + if auth and auth[0] is not None and auth[1] is not None: + return auth[0], auth[1], False password = ask_password("Password: ") return username, password, True # Factored out to allow for easy patching in tests def _should_save_password_to_keyring(self) -> bool: - if not keyring: + if ( + not self.prompting + or not self.use_keyring + or not self.keyring_provider.has_keyring + ): return False return ask("Save credentials to keyring [y/N]: ", ["y", "n"]) == "y" @@ -251,19 +478,22 @@ def handle_401(self, resp: Response, **kwargs: Any) -> Response: if resp.status_code != 401: return resp + username, password = None, None + + # Query the keyring for credentials: + if self.use_keyring: + username, password = self._get_new_credentials( + resp.url, + allow_netrc=False, + allow_keyring=True, + ) + # We are not able to prompt the user so simply return the response - if not self.prompting: + if not self.prompting and not username and not password: return resp parsed = urllib.parse.urlparse(resp.url) - # Query the keyring for credentials: - username, password = self._get_new_credentials( - resp.url, - allow_netrc=False, - allow_keyring=True, - ) - # Prompt the user for a new username and password save = False if not username and not password: @@ -276,7 +506,11 @@ def handle_401(self, resp: Response, **kwargs: Any) -> Response: # Prompt to save the password to keyring if save and self._should_save_password_to_keyring(): - self._credentials_to_save = (parsed.netloc, username, password) + self._credentials_to_save = Credentials( + url=parsed.netloc, + username=username, + password=password, + ) # Consume content and release the original connection to allow our new # request to reuse the same one. @@ -309,15 +543,17 @@ def warn_on_401(self, resp: Response, **kwargs: Any) -> None: def save_credentials(self, resp: Response, **kwargs: Any) -> None: """Response callback to save credentials on success.""" - assert keyring is not None, "should never reach here without keyring" - if not keyring: - return + assert ( + self.keyring_provider.has_keyring + ), "should never reach here without keyring" creds = self._credentials_to_save self._credentials_to_save = None if creds and resp.status_code < 400: try: logger.info("Saving credentials to keyring") - keyring.set_password(*creds) + self.keyring_provider.save_auth_info( + creds.url, creds.username, creds.password + ) except Exception: logger.exception("Failed to save credentials") diff --git a/src/pip/_internal/network/cache.py b/src/pip/_internal/network/cache.py index 2d915e6fcec..a81a2398519 100644 --- a/src/pip/_internal/network/cache.py +++ b/src/pip/_internal/network/cache.py @@ -3,7 +3,7 @@ import os from contextlib import contextmanager -from typing import Iterator, Optional +from typing import Generator, Optional from pip._vendor.cachecontrol.cache import BaseCache from pip._vendor.cachecontrol.caches import FileCache @@ -18,7 +18,7 @@ def is_from_cache(response: Response) -> bool: @contextmanager -def suppressed_cache_errors() -> Iterator[None]: +def suppressed_cache_errors() -> Generator[None, None, None]: """If we can't access the cache then we can just skip caching and process requests as if caching wasn't enabled. """ @@ -53,7 +53,7 @@ def get(self, key: str) -> Optional[bytes]: with open(path, "rb") as f: return f.read() - def set(self, key: str, value: bytes) -> None: + def set(self, key: str, value: bytes, expires: Optional[int] = None) -> None: path = self._get_cache_path(key) with suppressed_cache_errors(): ensure_dir(os.path.dirname(path)) diff --git a/src/pip/_internal/network/download.py b/src/pip/_internal/network/download.py index 47af547d6fc..79b82a570e5 100644 --- a/src/pip/_internal/network/download.py +++ b/src/pip/_internal/network/download.py @@ -1,6 +1,6 @@ """Download files with progress indicators. """ -import cgi +import email.message import logging import mimetypes import os @@ -8,7 +8,7 @@ from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response -from pip._internal.cli.progress_bars import DownloadProgressProvider +from pip._internal.cli.progress_bars import get_download_progress_renderer from pip._internal.exceptions import NetworkConnectionError from pip._internal.models.index import PyPI from pip._internal.models.link import Link @@ -65,7 +65,8 @@ def _prepare_download( if not show_progress: return chunks - return DownloadProgressProvider(progress_bar, max=total_length)(chunks) + renderer = get_download_progress_renderer(bar_type=progress_bar, size=total_length) + return renderer(chunks) def sanitize_content_filename(filename: str) -> str: @@ -80,12 +81,13 @@ def parse_content_disposition(content_disposition: str, default_filename: str) - Parse the "filename" value from a Content-Disposition header, and return the default filename if the result is empty. """ - _type, params = cgi.parse_header(content_disposition) - filename = params.get("filename") + m = email.message.Message() + m["content-type"] = content_disposition + filename = m.get_param("filename") if filename: # We need to sanitize the filename to prevent directory traversal # in case the filename contains ".." path parts. - filename = sanitize_content_filename(filename) + filename = sanitize_content_filename(str(filename)) return filename or default_filename diff --git a/src/pip/_internal/network/lazy_wheel.py b/src/pip/_internal/network/lazy_wheel.py index c9e44d5be58..82ec50d5106 100644 --- a/src/pip/_internal/network/lazy_wheel.py +++ b/src/pip/_internal/network/lazy_wheel.py @@ -5,8 +5,8 @@ from bisect import bisect_left, bisect_right from contextlib import contextmanager from tempfile import NamedTemporaryFile -from typing import Any, Dict, Iterator, List, Optional, Tuple -from zipfile import BadZipfile, ZipFile +from typing import Any, Dict, Generator, List, Optional, Tuple +from zipfile import BadZipFile, ZipFile from pip._vendor.packaging.utils import canonicalize_name from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response @@ -23,7 +23,7 @@ class HTTPRangeRequestUnsupported(Exception): def dist_from_wheel_url(name: str, url: str, session: PipSession) -> BaseDistribution: """Return a distribution object from the given wheel URL. - This uses HTTP range requests to only fetch the potion of the wheel + This uses HTTP range requests to only fetch the portion of the wheel containing metadata, just enough for the object to be constructed. If such requests are not supported, HTTPRangeRequestUnsupported is raised. @@ -135,11 +135,11 @@ def __enter__(self) -> "LazyZipOverHTTP": self._file.__enter__() return self - def __exit__(self, *exc: Any) -> Optional[bool]: - return self._file.__exit__(*exc) + def __exit__(self, *exc: Any) -> None: + self._file.__exit__(*exc) @contextmanager - def _stay(self) -> Iterator[None]: + def _stay(self) -> Generator[None, None, None]: """Return a context manager keeping the position. At the end of the block, seek back to original position. @@ -160,7 +160,7 @@ def _check_zip(self) -> None: # For read-only ZIP files, ZipFile only needs # methods read, seek, seekable and tell. ZipFile(self) # type: ignore - except BadZipfile: + except BadZipFile: pass else: break @@ -177,8 +177,8 @@ def _stream_response( def _merge( self, start: int, end: int, left: int, right: int - ) -> Iterator[Tuple[int, int]]: - """Return an iterator of intervals to be fetched. + ) -> Generator[Tuple[int, int], None, None]: + """Return a generator of intervals to be fetched. Args: start (int): Start of needed interval diff --git a/src/pip/_internal/network/session.py b/src/pip/_internal/network/session.py index cbe743ba6a1..e512ac78464 100644 --- a/src/pip/_internal/network/session.py +++ b/src/pip/_internal/network/session.py @@ -15,11 +15,23 @@ import sys import urllib.parse import warnings -from typing import Any, Dict, Iterator, List, Mapping, Optional, Sequence, Tuple, Union +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generator, + List, + Mapping, + Optional, + Sequence, + Tuple, + Union, +) from pip._vendor import requests, urllib3 -from pip._vendor.cachecontrol import CacheControlAdapter -from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter +from pip._vendor.cachecontrol import CacheControlAdapter as _BaseCacheControlAdapter +from pip._vendor.requests.adapters import DEFAULT_POOLBLOCK, BaseAdapter +from pip._vendor.requests.adapters import HTTPAdapter as _BaseHTTPAdapter from pip._vendor.requests.models import PreparedRequest, Response from pip._vendor.requests.structures import CaseInsensitiveDict from pip._vendor.urllib3.connectionpool import ConnectionPool @@ -37,6 +49,12 @@ from pip._internal.utils.misc import build_url_from_netloc, parse_netloc from pip._internal.utils.urls import url_to_path +if TYPE_CHECKING: + from ssl import SSLContext + + from pip._vendor.urllib3.poolmanager import PoolManager + + logger = logging.getLogger(__name__) SecureOrigin = Tuple[str, str, Optional[Union[int, str]]] @@ -233,6 +251,48 @@ def close(self) -> None: pass +class _SSLContextAdapterMixin: + """Mixin to add the ``ssl_context`` constructor argument to HTTP adapters. + + The additional argument is forwarded directly to the pool manager. This allows us + to dynamically decide what SSL store to use at runtime, which is used to implement + the optional ``truststore`` backend. + """ + + def __init__( + self, + *, + ssl_context: Optional["SSLContext"] = None, + **kwargs: Any, + ) -> None: + self._ssl_context = ssl_context + super().__init__(**kwargs) + + def init_poolmanager( + self, + connections: int, + maxsize: int, + block: bool = DEFAULT_POOLBLOCK, + **pool_kwargs: Any, + ) -> "PoolManager": + if self._ssl_context is not None: + pool_kwargs.setdefault("ssl_context", self._ssl_context) + return super().init_poolmanager( # type: ignore[misc] + connections=connections, + maxsize=maxsize, + block=block, + **pool_kwargs, + ) + + +class HTTPAdapter(_SSLContextAdapterMixin, _BaseHTTPAdapter): + pass + + +class CacheControlAdapter(_SSLContextAdapterMixin, _BaseCacheControlAdapter): + pass + + class InsecureHTTPAdapter(HTTPAdapter): def cert_verify( self, @@ -266,6 +326,7 @@ def __init__( cache: Optional[str] = None, trusted_hosts: Sequence[str] = (), index_urls: Optional[List[str]] = None, + ssl_context: Optional["SSLContext"] = None, **kwargs: Any, ) -> None: """ @@ -318,13 +379,14 @@ def __init__( secure_adapter = CacheControlAdapter( cache=SafeFileCache(cache), max_retries=retries, + ssl_context=ssl_context, ) self._trusted_host_adapter = InsecureCacheControlAdapter( cache=SafeFileCache(cache), max_retries=retries, ) else: - secure_adapter = HTTPAdapter(max_retries=retries) + secure_adapter = HTTPAdapter(max_retries=retries, ssl_context=ssl_context) self._trusted_host_adapter = insecure_adapter self.mount("https://", secure_adapter) @@ -374,7 +436,7 @@ def add_trusted_host( # Mount wildcard ports for the same host. self.mount(build_url_from_netloc(host) + ":", self._trusted_host_adapter) - def iter_secure_origins(self) -> Iterator[SecureOrigin]: + def iter_secure_origins(self) -> Generator[SecureOrigin, None, None]: yield from SECURE_ORIGINS for host, port in self.pip_trusted_origins: yield ("*", host, "*" if port is None else port) @@ -403,7 +465,7 @@ def is_secure_origin(self, location: Link) -> bool: continue try: - addr = ipaddress.ip_address(origin_host) + addr = ipaddress.ip_address(origin_host or "") network = ipaddress.ip_network(secure_host) except ValueError: # We don't have both a valid address or a valid network, so @@ -449,6 +511,8 @@ def is_secure_origin(self, location: Link) -> bool: def request(self, method: str, url: str, *args: Any, **kwargs: Any) -> Response: # Allow setting a default timeout on a session kwargs.setdefault("timeout", self.timeout) + # Allow setting a default proxies on a session + kwargs.setdefault("proxies", self.proxies) # Dispatch the actual request return super().request(method, url, *args, **kwargs) diff --git a/src/pip/_internal/network/utils.py b/src/pip/_internal/network/utils.py index 094cf1b4a97..134848ae526 100644 --- a/src/pip/_internal/network/utils.py +++ b/src/pip/_internal/network/utils.py @@ -1,4 +1,4 @@ -from typing import Dict, Iterator +from typing import Dict, Generator from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response @@ -56,7 +56,7 @@ def raise_for_status(resp: Response) -> None: def response_chunks( response: Response, chunk_size: int = CONTENT_CHUNK_SIZE -) -> Iterator[bytes]: +) -> Generator[bytes, None, None]: """Given a requests Response, provide the data chunks.""" try: # Special case for urllib3. diff --git a/src/pip/_internal/req/req_tracker.py b/src/pip/_internal/operations/build/build_tracker.py similarity index 85% rename from src/pip/_internal/req/req_tracker.py rename to src/pip/_internal/operations/build/build_tracker.py index 24d3c530335..6621549b844 100644 --- a/src/pip/_internal/req/req_tracker.py +++ b/src/pip/_internal/operations/build/build_tracker.py @@ -3,7 +3,7 @@ import logging import os from types import TracebackType -from typing import Dict, Iterator, Optional, Set, Type, Union +from typing import Dict, Generator, Optional, Set, Type, Union from pip._internal.models.link import Link from pip._internal.req.req_install import InstallRequirement @@ -13,7 +13,7 @@ @contextlib.contextmanager -def update_env_context_manager(**changes: str) -> Iterator[None]: +def update_env_context_manager(**changes: str) -> Generator[None, None, None]: target = os.environ # Save values from the target and change them. @@ -39,25 +39,25 @@ def update_env_context_manager(**changes: str) -> Iterator[None]: @contextlib.contextmanager -def get_requirement_tracker() -> Iterator["RequirementTracker"]: - root = os.environ.get("PIP_REQ_TRACKER") +def get_build_tracker() -> Generator["BuildTracker", None, None]: + root = os.environ.get("PIP_BUILD_TRACKER") with contextlib.ExitStack() as ctx: if root is None: - root = ctx.enter_context(TempDirectory(kind="req-tracker")).path - ctx.enter_context(update_env_context_manager(PIP_REQ_TRACKER=root)) + root = ctx.enter_context(TempDirectory(kind="build-tracker")).path + ctx.enter_context(update_env_context_manager(PIP_BUILD_TRACKER=root)) logger.debug("Initialized build tracking at %s", root) - with RequirementTracker(root) as tracker: + with BuildTracker(root) as tracker: yield tracker -class RequirementTracker: +class BuildTracker: def __init__(self, root: str) -> None: self._root = root self._entries: Set[InstallRequirement] = set() logger.debug("Created build tracker: %s", self._root) - def __enter__(self) -> "RequirementTracker": + def __enter__(self) -> "BuildTracker": logger.debug("Entered build tracker: %s", self._root) return self @@ -118,7 +118,7 @@ def cleanup(self) -> None: logger.debug("Removed build tracker: %r", self._root) @contextlib.contextmanager - def track(self, req: InstallRequirement) -> Iterator[None]: + def track(self, req: InstallRequirement) -> Generator[None, None, None]: self.add(req) yield self.remove(req) diff --git a/src/pip/_internal/operations/build/metadata.py b/src/pip/_internal/operations/build/metadata.py index 7d12438d6ed..c66ac354deb 100644 --- a/src/pip/_internal/operations/build/metadata.py +++ b/src/pip/_internal/operations/build/metadata.py @@ -3,14 +3,20 @@ import os -from pip._vendor.pep517.wrappers import Pep517HookCaller +from pip._vendor.pyproject_hooks import BuildBackendHookCaller from pip._internal.build_env import BuildEnvironment +from pip._internal.exceptions import ( + InstallationSubprocessError, + MetadataGenerationFailed, +) from pip._internal.utils.subprocess import runner_with_spinner_message from pip._internal.utils.temp_dir import TempDirectory -def generate_metadata(build_env: BuildEnvironment, backend: Pep517HookCaller) -> str: +def generate_metadata( + build_env: BuildEnvironment, backend: BuildBackendHookCaller, details: str +) -> str: """Generate metadata using mechanisms described in PEP 517. Returns the generated metadata directory. @@ -20,11 +26,14 @@ def generate_metadata(build_env: BuildEnvironment, backend: Pep517HookCaller) -> metadata_dir = metadata_tmpdir.path with build_env: - # Note that Pep517HookCaller implements a fallback for + # Note that BuildBackendHookCaller implements a fallback for # prepare_metadata_for_build_wheel, so we don't have to # consider the possibility that this hook doesn't exist. runner = runner_with_spinner_message("Preparing metadata (pyproject.toml)") with backend.subprocess_runner(runner): - distinfo_dir = backend.prepare_metadata_for_build_wheel(metadata_dir) + try: + distinfo_dir = backend.prepare_metadata_for_build_wheel(metadata_dir) + except InstallationSubprocessError as error: + raise MetadataGenerationFailed(package_details=details) from error return os.path.join(metadata_dir, distinfo_dir) diff --git a/src/pip/_internal/operations/build/metadata_editable.py b/src/pip/_internal/operations/build/metadata_editable.py index 13de75f6c98..27c69f0d1ea 100644 --- a/src/pip/_internal/operations/build/metadata_editable.py +++ b/src/pip/_internal/operations/build/metadata_editable.py @@ -3,15 +3,19 @@ import os -from pip._vendor.pep517.wrappers import Pep517HookCaller +from pip._vendor.pyproject_hooks import BuildBackendHookCaller from pip._internal.build_env import BuildEnvironment +from pip._internal.exceptions import ( + InstallationSubprocessError, + MetadataGenerationFailed, +) from pip._internal.utils.subprocess import runner_with_spinner_message from pip._internal.utils.temp_dir import TempDirectory def generate_editable_metadata( - build_env: BuildEnvironment, backend: Pep517HookCaller + build_env: BuildEnvironment, backend: BuildBackendHookCaller, details: str ) -> str: """Generate metadata using mechanisms described in PEP 660. @@ -22,13 +26,16 @@ def generate_editable_metadata( metadata_dir = metadata_tmpdir.path with build_env: - # Note that Pep517HookCaller implements a fallback for + # Note that BuildBackendHookCaller implements a fallback for # prepare_metadata_for_build_wheel/editable, so we don't have to # consider the possibility that this hook doesn't exist. runner = runner_with_spinner_message( "Preparing editable metadata (pyproject.toml)" ) with backend.subprocess_runner(runner): - distinfo_dir = backend.prepare_metadata_for_build_editable(metadata_dir) + try: + distinfo_dir = backend.prepare_metadata_for_build_editable(metadata_dir) + except InstallationSubprocessError as error: + raise MetadataGenerationFailed(package_details=details) from error return os.path.join(metadata_dir, distinfo_dir) diff --git a/src/pip/_internal/operations/build/metadata_legacy.py b/src/pip/_internal/operations/build/metadata_legacy.py index ff52de9c4cf..e60988d643e 100644 --- a/src/pip/_internal/operations/build/metadata_legacy.py +++ b/src/pip/_internal/operations/build/metadata_legacy.py @@ -6,7 +6,11 @@ from pip._internal.build_env import BuildEnvironment from pip._internal.cli.spinners import open_spinner -from pip._internal.exceptions import InstallationError +from pip._internal.exceptions import ( + InstallationError, + InstallationSubprocessError, + MetadataGenerationFailed, +) from pip._internal.utils.setuptools_build import make_setuptools_egg_info_args from pip._internal.utils.subprocess import call_subprocess from pip._internal.utils.temp_dir import TempDirectory @@ -56,12 +60,15 @@ def generate_metadata( with build_env: with open_spinner("Preparing metadata (setup.py)") as spinner: - call_subprocess( - args, - cwd=source_dir, - command_desc="python setup.py egg_info", - spinner=spinner, - ) + try: + call_subprocess( + args, + cwd=source_dir, + command_desc="python setup.py egg_info", + spinner=spinner, + ) + except InstallationSubprocessError as error: + raise MetadataGenerationFailed(package_details=details) from error # Return the .egg-info directory. return _find_egg_info(egg_info_dir) diff --git a/src/pip/_internal/operations/build/wheel.py b/src/pip/_internal/operations/build/wheel.py index b0d2fc9eadb..064811ad11b 100644 --- a/src/pip/_internal/operations/build/wheel.py +++ b/src/pip/_internal/operations/build/wheel.py @@ -2,7 +2,7 @@ import os from typing import Optional -from pip._vendor.pep517.wrappers import Pep517HookCaller +from pip._vendor.pyproject_hooks import BuildBackendHookCaller from pip._internal.utils.subprocess import runner_with_spinner_message @@ -11,7 +11,7 @@ def build_wheel_pep517( name: str, - backend: Pep517HookCaller, + backend: BuildBackendHookCaller, metadata_directory: str, tempd: str, ) -> Optional[str]: diff --git a/src/pip/_internal/operations/build/wheel_editable.py b/src/pip/_internal/operations/build/wheel_editable.py index cf7b01aed5a..719d69dd801 100644 --- a/src/pip/_internal/operations/build/wheel_editable.py +++ b/src/pip/_internal/operations/build/wheel_editable.py @@ -2,7 +2,7 @@ import os from typing import Optional -from pip._vendor.pep517.wrappers import HookMissing, Pep517HookCaller +from pip._vendor.pyproject_hooks import BuildBackendHookCaller, HookMissing from pip._internal.utils.subprocess import runner_with_spinner_message @@ -11,7 +11,7 @@ def build_wheel_editable( name: str, - backend: Pep517HookCaller, + backend: BuildBackendHookCaller, metadata_directory: str, tempd: str, ) -> Optional[str]: diff --git a/src/pip/_internal/operations/build/wheel_legacy.py b/src/pip/_internal/operations/build/wheel_legacy.py index 2d5cb264ee6..c5f0492ccbe 100644 --- a/src/pip/_internal/operations/build/wheel_legacy.py +++ b/src/pip/_internal/operations/build/wheel_legacy.py @@ -4,11 +4,7 @@ from pip._internal.cli.spinners import open_spinner from pip._internal.utils.setuptools_build import make_setuptools_bdist_wheel_args -from pip._internal.utils.subprocess import ( - LOG_DIVIDER, - call_subprocess, - format_command_args, -) +from pip._internal.utils.subprocess import call_subprocess, format_command_args logger = logging.getLogger(__name__) @@ -28,7 +24,7 @@ def format_command_result( else: if not command_output.endswith("\n"): command_output += "\n" - text += f"Command output:\n{command_output}{LOG_DIVIDER}" + text += f"Command output:\n{command_output}" return text @@ -86,6 +82,7 @@ def build_wheel_legacy( try: output = call_subprocess( wheel_args, + command_desc="python setup.py bdist_wheel", cwd=source_dir, spinner=spinner, ) diff --git a/src/pip/_internal/operations/check.py b/src/pip/_internal/operations/check.py index fb3ac8b9c9e..e3bce69b204 100644 --- a/src/pip/_internal/operations/check.py +++ b/src/pip/_internal/operations/check.py @@ -75,7 +75,7 @@ def check_package_set( if name not in package_set: missed = True if req.marker is not None: - missed = req.marker.evaluate() + missed = req.marker.evaluate({"extra": ""}) if missed: missing_deps.add((name, req)) continue diff --git a/src/pip/_internal/operations/freeze.py b/src/pip/_internal/operations/freeze.py index 456554085df..930d4c6005e 100644 --- a/src/pip/_internal/operations/freeze.py +++ b/src/pip/_internal/operations/freeze.py @@ -1,7 +1,7 @@ import collections import logging import os -from typing import Container, Dict, Iterable, Iterator, List, NamedTuple, Optional, Set +from typing import Container, Dict, Generator, Iterable, List, NamedTuple, Optional, Set from pip._vendor.packaging.utils import canonicalize_name from pip._vendor.packaging.version import Version @@ -31,7 +31,7 @@ def freeze( isolated: bool = False, exclude_editable: bool = False, skip: Container[str] = (), -) -> Iterator[str]: +) -> Generator[str, None, None]: installations: Dict[str, FrozenRequirement] = {} dists = get_environment(paths).iter_installed_distributions( diff --git a/src/pip/_internal/operations/install/editable_legacy.py b/src/pip/_internal/operations/install/editable_legacy.py index 5bd72ca198d..bebe24e6d3a 100644 --- a/src/pip/_internal/operations/install/editable_legacy.py +++ b/src/pip/_internal/operations/install/editable_legacy.py @@ -1,7 +1,7 @@ """Legacy editable installation process, i.e. `setup.py develop`. """ import logging -from typing import List, Optional, Sequence +from typing import Optional, Sequence from pip._internal.build_env import BuildEnvironment from pip._internal.utils.logging import indent_log @@ -12,7 +12,7 @@ def install_editable( - install_options: List[str], + *, global_options: Sequence[str], prefix: Optional[str], home: Optional[str], @@ -31,7 +31,6 @@ def install_editable( args = make_setuptools_develop_args( setup_py_path, global_options=global_options, - install_options=install_options, no_user_config=isolated, prefix=prefix, home=home, @@ -42,5 +41,6 @@ def install_editable( with build_env: call_subprocess( args, + command_desc="python setup.py develop", cwd=unpacked_source_directory, ) diff --git a/src/pip/_internal/operations/install/legacy.py b/src/pip/_internal/operations/install/legacy.py index 2206c930913..0b108d0ca71 100644 --- a/src/pip/_internal/operations/install/legacy.py +++ b/src/pip/_internal/operations/install/legacy.py @@ -3,13 +3,12 @@ import logging import os -from distutils.util import change_root from typing import List, Optional, Sequence from pip._internal.build_env import BuildEnvironment -from pip._internal.exceptions import InstallationError +from pip._internal.exceptions import InstallationError, LegacyInstallFailure +from pip._internal.locations.base import change_root from pip._internal.models.scheme import Scheme -from pip._internal.utils.logging import indent_log from pip._internal.utils.misc import ensure_dir from pip._internal.utils.setuptools_build import make_setuptools_install_args from pip._internal.utils.subprocess import runner_with_spinner_message @@ -18,10 +17,6 @@ logger = logging.getLogger(__name__) -class LegacyInstallFailure(Exception): - pass - - def write_installed_files_from_setuptools_record( record_lines: List[str], root: Optional[str], @@ -60,7 +55,6 @@ def prepend_root(path: str) -> str: def install( - install_options: List[str], global_options: Sequence[str], root: Optional[str], home: Optional[str], @@ -84,7 +78,6 @@ def install( install_args = make_setuptools_install_args( setup_py_path, global_options=global_options, - install_options=install_options, record_filename=record_filename, root=root, prefix=prefix, @@ -98,7 +91,7 @@ def install( runner = runner_with_spinner_message( f"Running setup.py install for {req_name}" ) - with indent_log(), build_env: + with build_env: runner( cmd=install_args, cwd=unpacked_source_directory, @@ -111,7 +104,7 @@ def install( except Exception as e: # Signal to the caller that we didn't install the new package - raise LegacyInstallFailure from e + raise LegacyInstallFailure(package_details=req_name) from e # At this point, we have successfully installed the requirement. diff --git a/src/pip/_internal/operations/install/wheel.py b/src/pip/_internal/operations/install/wheel.py index e191b13431d..c79941398a2 100644 --- a/src/pip/_internal/operations/install/wheel.py +++ b/src/pip/_internal/operations/install/wheel.py @@ -22,6 +22,7 @@ BinaryIO, Callable, Dict, + Generator, Iterable, Iterator, List, @@ -223,19 +224,16 @@ def _normalized_outrows( ) -def _record_to_fs_path(record_path: RecordPath) -> str: - return record_path +def _record_to_fs_path(record_path: RecordPath, lib_dir: str) -> str: + return os.path.join(lib_dir, record_path) -def _fs_to_record_path(path: str, relative_to: Optional[str] = None) -> RecordPath: - if relative_to is not None: - # On Windows, do not handle relative paths if they belong to different - # logical disks - if ( - os.path.splitdrive(path)[0].lower() - == os.path.splitdrive(relative_to)[0].lower() - ): - path = os.path.relpath(path, relative_to) +def _fs_to_record_path(path: str, lib_dir: str) -> RecordPath: + # On Windows, do not handle relative paths if they belong to different + # logical disks + if os.path.splitdrive(path)[0].lower() == os.path.splitdrive(lib_dir)[0].lower(): + path = os.path.relpath(path, lib_dir) + path = path.replace(os.path.sep, "/") return cast("RecordPath", path) @@ -258,7 +256,7 @@ def get_csv_rows_for_installed( old_record_path = cast("RecordPath", row[0]) new_record_path = installed.pop(old_record_path, old_record_path) if new_record_path in changed: - digest, length = rehash(_record_to_fs_path(new_record_path)) + digest, length = rehash(_record_to_fs_path(new_record_path, lib_dir)) else: digest = row[1] if len(row) > 1 else "" length = row[2] if len(row) > 2 else "" @@ -327,7 +325,7 @@ def get_console_script_specs(console: Dict[str, str]) -> List[str]: scripts_to_generate.append(f"pip{get_major_minor_version()} = {pip_script}") # Delete any other versioned pip entry points - pip_ep = [k for k in console if re.match(r"pip(\d(\.\d)?)?$", k)] + pip_ep = [k for k in console if re.match(r"pip(\d+(\.\d+)?)?$", k)] for k in pip_ep: del console[k] easy_install_script = console.pop("easy_install", None) @@ -342,7 +340,7 @@ def get_console_script_specs(console: Dict[str, str]) -> List[str]: ) # Delete any other versioned easy_install entry points easy_install_ep = [ - k for k in console if re.match(r"easy_install(-\d\.\d)?$", k) + k for k in console if re.match(r"easy_install(-\d+\.\d+)?$", k) ] for k in easy_install_ep: del console[k] @@ -422,7 +420,9 @@ def _raise_for_invalid_entrypoint(specification: str) -> None: class PipScriptMaker(ScriptMaker): - def make(self, specification: str, options: Dict[str, Any] = None) -> List[str]: + def make( + self, specification: str, options: Optional[Dict[str, Any]] = None + ) -> List[str]: _raise_for_invalid_entrypoint(specification) return super().make(specification, options) @@ -474,7 +474,7 @@ def record_installed( newpath = _fs_to_record_path(destfile, lib_dir) installed[srcfile] = newpath if modified: - changed.add(_fs_to_record_path(destfile)) + changed.add(newpath) def is_dir_path(path: RecordPath) -> bool: return path.endswith("/") @@ -589,7 +589,7 @@ def is_entrypoint_wrapper(file: "File") -> bool: file.save() record_installed(file.src_record_path, file.dest_path, file.changed) - def pyc_source_file_paths() -> Iterator[str]: + def pyc_source_file_paths() -> Generator[str, None, None]: # We de-duplicate installation paths, since there can be overlap (e.g. # file in .data maps to same location as file in wheel root). # Sorting installation paths makes it easier to reproduce and debug @@ -656,7 +656,7 @@ def pyc_output_path(path: str) -> str: generated_file_mode = 0o666 & ~current_umask() @contextlib.contextmanager - def _generate_file(path: str, **kwargs: Any) -> Iterator[BinaryIO]: + def _generate_file(path: str, **kwargs: Any) -> Generator[BinaryIO, None, None]: with adjacent_tmp_file(path, **kwargs) as f: yield f os.chmod(f.name, generated_file_mode) @@ -706,7 +706,7 @@ def _generate_file(path: str, **kwargs: Any) -> Iterator[BinaryIO]: @contextlib.contextmanager -def req_error_context(req_description: str) -> Iterator[None]: +def req_error_context(req_description: str) -> Generator[None, None, None]: try: yield except InstallationError as e: diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 34cf9a51b6e..343a01bef4b 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -19,12 +19,14 @@ HashMismatch, HashUnpinned, InstallationError, + MetadataInconsistent, NetworkConnectionError, PreviousBuildDirError, VcsHashUnsupported, ) from pip._internal.index.package_finder import PackageFinder -from pip._internal.metadata import BaseDistribution +from pip._internal.metadata import BaseDistribution, get_metadata_distribution +from pip._internal.models.direct_url import ArchiveInfo from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel from pip._internal.network.download import BatchDownloader, Downloader @@ -33,12 +35,20 @@ dist_from_wheel_url, ) from pip._internal.network.session import PipSession +from pip._internal.operations.build.build_tracker import BuildTracker from pip._internal.req.req_install import InstallRequirement -from pip._internal.req.req_tracker import RequirementTracker -from pip._internal.utils.filesystem import copy2_fixed +from pip._internal.utils.direct_url_helpers import ( + direct_url_for_editable, + direct_url_from_link, +) from pip._internal.utils.hashes import Hashes, MissingHashes from pip._internal.utils.logging import indent_log -from pip._internal.utils.misc import display_path, hide_url, is_installable_dir, rmtree +from pip._internal.utils.misc import ( + display_path, + hash_file, + hide_url, + is_installable_dir, +) from pip._internal.utils.temp_dir import TempDirectory from pip._internal.utils.unpacking import unpack_file from pip._internal.vcs import vcs @@ -48,21 +58,24 @@ def _get_prepared_distribution( req: InstallRequirement, - req_tracker: RequirementTracker, + build_tracker: BuildTracker, finder: PackageFinder, build_isolation: bool, + check_build_deps: bool, ) -> BaseDistribution: """Prepare a distribution for installation.""" abstract_dist = make_distribution_for_install_requirement(req) - with req_tracker.track(req): - abstract_dist.prepare_distribution_metadata(finder, build_isolation) + with build_tracker.track(req): + abstract_dist.prepare_distribution_metadata( + finder, build_isolation, check_build_deps + ) return abstract_dist.get_metadata_distribution() -def unpack_vcs_link(link: Link, location: str) -> None: +def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None: vcs_backend = vcs.get_backend_for_scheme(link.scheme) assert vcs_backend is not None - vcs_backend.unpack(location, url=hide_url(link.url)) + vcs_backend.unpack(location, url=hide_url(link.url), verbosity=verbosity) class File: @@ -98,55 +111,6 @@ def get_http_url( return File(from_path, content_type) -def _copy2_ignoring_special_files(src: str, dest: str) -> None: - """Copying special files is not supported, but as a convenience to users - we skip errors copying them. This supports tools that may create e.g. - socket files in the project source directory. - """ - try: - copy2_fixed(src, dest) - except shutil.SpecialFileError as e: - # SpecialFileError may be raised due to either the source or - # destination. If the destination was the cause then we would actually - # care, but since the destination directory is deleted prior to - # copy we ignore all of them assuming it is caused by the source. - logger.warning( - "Ignoring special file error '%s' encountered copying %s to %s.", - str(e), - src, - dest, - ) - - -def _copy_source_tree(source: str, target: str) -> None: - target_abspath = os.path.abspath(target) - target_basename = os.path.basename(target_abspath) - target_dirname = os.path.dirname(target_abspath) - - def ignore(d: str, names: List[str]) -> List[str]: - skipped: List[str] = [] - if d == source: - # Pulling in those directories can potentially be very slow, - # exclude the following directories if they appear in the top - # level dir (and only it). - # See discussion at https://github.com/pypa/pip/pull/6770 - skipped += [".tox", ".nox"] - if os.path.abspath(d) == target_dirname: - # Prevent an infinite recursion if the target is in source. - # This can happen when TMPDIR is set to ${PWD}/... - # and we copy PWD to TMPDIR. - skipped += [target_basename] - return skipped - - shutil.copytree( - source, - target, - ignore=ignore, - symlinks=True, - copy_function=_copy2_ignoring_special_files, - ) - - def get_file_url( link: Link, download_dir: Optional[str] = None, hashes: Optional[Hashes] = None ) -> File: @@ -175,6 +139,7 @@ def unpack_url( link: Link, location: str, download: Downloader, + verbosity: int, download_dir: Optional[str] = None, hashes: Optional[Hashes] = None, ) -> Optional[File]: @@ -187,22 +152,10 @@ def unpack_url( """ # non-editable vcs urls if link.is_vcs: - unpack_vcs_link(link, location) + unpack_vcs_link(link, location, verbosity=verbosity) return None - # Once out-of-tree-builds are no longer supported, could potentially - # replace the below condition with `assert not link.is_existing_dir` - # - unpack_url does not need to be called for in-tree-builds. - # - # As further cleanup, _copy_source_tree and accompanying tests can - # be removed. - # - # TODO when use-deprecated=out-of-tree-build is removed - if link.is_existing_dir(): - if os.path.isdir(location): - rmtree(location) - _copy_source_tree(link.file_path, location) - return None + assert not link.is_existing_dir() # file urls if link.is_file: @@ -260,20 +213,21 @@ def __init__( download_dir: Optional[str], src_dir: str, build_isolation: bool, - req_tracker: RequirementTracker, + check_build_deps: bool, + build_tracker: BuildTracker, session: PipSession, progress_bar: str, finder: PackageFinder, require_hashes: bool, use_user_site: bool, lazy_wheel: bool, - in_tree_build: bool, + verbosity: int, ) -> None: super().__init__() self.src_dir = src_dir self.build_dir = build_dir - self.req_tracker = req_tracker + self.build_tracker = build_tracker self._session = session self._download = Downloader(session, progress_bar) self._batch_download = BatchDownloader(session, progress_bar) @@ -286,6 +240,9 @@ def __init__( # Is build isolation allowed? self.build_isolation = build_isolation + # Should check build dependencies? + self.check_build_deps = check_build_deps + # Should hash-checking be required? self.require_hashes = require_hashes @@ -295,8 +252,8 @@ def __init__( # Should wheels be downloaded lazily? self.use_lazy_wheel = lazy_wheel - # Should in-tree builds be used for local paths? - self.in_tree_build = in_tree_build + # How verbose should underlying tooling be? + self.verbosity = verbosity # Memoized downloaded files, as mapping of url: path. self._downloaded: Dict[str, str] = {} @@ -313,6 +270,16 @@ def _log_preparing_link(self, req: InstallRequirement) -> None: message = "Collecting %s" information = str(req.req or req) + # If we used req.req, inject requirement source if available (this + # would already be included if we used req directly) + if req.req and req.comes_from: + if isinstance(req.comes_from, str): + comes_from: Optional[str] = req.comes_from + else: + comes_from = req.comes_from.from_path() + if comes_from: + information += f" (from {comes_from})" + if (message, information) != self._previous_requirement_header: self._previous_requirement_header = (message, information) logger.info(message, information) @@ -331,7 +298,7 @@ def _ensure_link_req_src_dir( # directory. return assert req.source_dir is None - if req.link.is_existing_dir() and self.in_tree_build: + if req.link.is_existing_dir(): # build local directories in-tree req.source_dir = req.link.file_path return @@ -390,19 +357,72 @@ def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes: # showing the user what the hash should be. return req.hashes(trust_internet=False) or MissingHashes() + def _fetch_metadata_only( + self, + req: InstallRequirement, + ) -> Optional[BaseDistribution]: + if self.require_hashes: + logger.debug( + "Metadata-only fetching is not used as hash checking is required", + ) + return None + # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable. + return self._fetch_metadata_using_link_data_attr( + req + ) or self._fetch_metadata_using_lazy_wheel(req.link) + + def _fetch_metadata_using_link_data_attr( + self, + req: InstallRequirement, + ) -> Optional[BaseDistribution]: + """Fetch metadata from the data-dist-info-metadata attribute, if possible.""" + # (1) Get the link to the metadata file, if provided by the backend. + metadata_link = req.link.metadata_link() + if metadata_link is None: + return None + assert req.req is not None + logger.info( + "Obtaining dependency information for %s from %s", + req.req, + metadata_link, + ) + # (2) Download the contents of the METADATA file, separate from the dist itself. + metadata_file = get_http_url( + metadata_link, + self._download, + hashes=metadata_link.as_hashes(), + ) + with open(metadata_file.path, "rb") as f: + metadata_contents = f.read() + # (3) Generate a dist just from those file contents. + metadata_dist = get_metadata_distribution( + metadata_contents, + req.link.filename, + req.req.name, + ) + # (4) Ensure the Name: field from the METADATA file matches the name from the + # install requirement. + # + # NB: raw_name will fall back to the name from the install requirement if + # the Name: field is not present, but it's noted in the raw_name docstring + # that that should NEVER happen anyway. + if metadata_dist.raw_name != req.req.name: + raise MetadataInconsistent( + req, "Name", req.req.name, metadata_dist.raw_name + ) + return metadata_dist + def _fetch_metadata_using_lazy_wheel( self, link: Link, ) -> Optional[BaseDistribution]: """Fetch metadata using lazy wheel, if possible.""" + # --use-feature=fast-deps must be provided. if not self.use_lazy_wheel: return None - if self.require_hashes: - logger.debug("Lazy wheel is not used as hash checking is required") - return None if link.is_file or not link.is_wheel: logger.debug( - "Lazy wheel is not used as %r does not points to a remote wheel", + "Lazy wheel is not used as %r does not point to a remote wheel", link, ) return None @@ -458,13 +478,12 @@ def prepare_linked_requirement( ) -> BaseDistribution: """Prepare a requirement to be obtained from req.link.""" assert req.link - link = req.link self._log_preparing_link(req) with indent_log(): # Check if the relevant file is already available # in the download directory file_path = None - if self.download_dir is not None and link.is_wheel: + if self.download_dir is not None and req.link.is_wheel: hashes = self._get_linked_req_hashes(req) file_path = _check_download_dir(req.link, self.download_dir, hashes) @@ -473,10 +492,10 @@ def prepare_linked_requirement( self._downloaded[req.link.url] = file_path else: # The file is not available, attempt to fetch only metadata - wheel_dist = self._fetch_metadata_using_lazy_wheel(link) - if wheel_dist is not None: + metadata_dist = self._fetch_metadata_only(req) + if metadata_dist is not None: req.needs_more_preparation = True - return wheel_dist + return metadata_dist # None of the optimizations worked, fully prepare the requirement return self._prepare_linked_requirement(req, parallel_builds) @@ -520,12 +539,17 @@ def _prepare_linked_requirement( self._ensure_link_req_src_dir(req, parallel_builds) hashes = self._get_linked_req_hashes(req) - if link.is_existing_dir() and self.in_tree_build: + if link.is_existing_dir(): local_file = None elif link.url not in self._downloaded: try: local_file = unpack_url( - link, req.source_dir, self._download, self.download_dir, hashes + link, + req.source_dir, + self._download, + self.verbosity, + self.download_dir, + hashes, ) except NetworkConnectionError as exc: raise InstallationError( @@ -538,6 +562,23 @@ def _prepare_linked_requirement( hashes.check_against_path(file_path) local_file = File(file_path, content_type=None) + # If download_info is set, we got it from the wheel cache. + if req.download_info is None: + # Editables don't go through this function (see + # prepare_editable_requirement). + assert not req.editable + req.download_info = direct_url_from_link(link, req.source_dir) + # Make sure we have a hash in download_info. If we got it as part of the + # URL, it will have been verified and we can rely on it. Otherwise we + # compute it from the downloaded file. + if ( + isinstance(req.download_info.info, ArchiveInfo) + and not req.download_info.info.hash + and local_file + ): + hash = hash_file(local_file.path)[0].hexdigest() + req.download_info.info.hash = f"sha256={hash}" + # For use in later processing, # preserve the file path on the requirement. if local_file: @@ -545,9 +586,10 @@ def _prepare_linked_requirement( dist = _get_prepared_distribution( req, - self.req_tracker, + self.build_tracker, self.finder, self.build_isolation, + self.check_build_deps, ) return dist @@ -595,12 +637,15 @@ def prepare_editable_requirement( ) req.ensure_has_source_dir(self.src_dir) req.update_editable() + assert req.source_dir + req.download_info = direct_url_for_editable(req.unpacked_source_directory) dist = _get_prepared_distribution( req, - self.req_tracker, + self.build_tracker, self.finder, self.build_isolation, + self.check_build_deps, ) req.check_if_exists(self.use_user_site) diff --git a/src/pip/_internal/pyproject.py b/src/pip/_internal/pyproject.py index 31534a3a9d3..1de9f0fde5d 100644 --- a/src/pip/_internal/pyproject.py +++ b/src/pip/_internal/pyproject.py @@ -1,3 +1,4 @@ +import importlib.util import os from collections import namedtuple from typing import Any, List, Optional @@ -5,7 +6,11 @@ from pip._vendor import tomli from pip._vendor.packaging.requirements import InvalidRequirement, Requirement -from pip._internal.exceptions import InstallationError +from pip._internal.exceptions import ( + InstallationError, + InvalidPyProjectBuildRequires, + MissingPyProjectBuildRequires, +) def _is_list_of_str(obj: Any) -> bool: @@ -56,7 +61,7 @@ def load_pyproject_toml( if has_pyproject: with open(pyproject_toml, encoding="utf-8") as f: - pp_toml = tomli.load(f) + pp_toml = tomli.loads(f.read()) build_system = pp_toml.get("build-system") else: build_system = None @@ -85,9 +90,15 @@ def load_pyproject_toml( # If we haven't worked out whether to use PEP 517 yet, # and the user hasn't explicitly stated a preference, - # we do so if the project has a pyproject.toml file. + # we do so if the project has a pyproject.toml file + # or if we cannot import setuptools. + + # We fallback to PEP 517 when without setuptools, + # so setuptools can be installed as a default build backend. + # For more info see: + # https://discuss.python.org/t/pip-without-setuptools-could-the-experience-be-improved/11810/9 elif use_pep517 is None: - use_pep517 = has_pyproject + use_pep517 = has_pyproject or not importlib.util.find_spec("setuptools") # At this point, we know whether we're going to use PEP 517. assert use_pep517 is not None @@ -119,47 +130,28 @@ def load_pyproject_toml( # Ensure that the build-system section in pyproject.toml conforms # to PEP 518. - error_template = ( - "{package} has a pyproject.toml file that does not comply " - "with PEP 518: {reason}" - ) # Specifying the build-system table but not the requires key is invalid if "requires" not in build_system: - raise InstallationError( - error_template.format( - package=req_name, - reason=( - "it has a 'build-system' table but not " - "'build-system.requires' which is mandatory in the table" - ), - ) - ) + raise MissingPyProjectBuildRequires(package=req_name) # Error out if requires is not a list of strings requires = build_system["requires"] if not _is_list_of_str(requires): - raise InstallationError( - error_template.format( - package=req_name, - reason="'build-system.requires' is not a list of strings.", - ) + raise InvalidPyProjectBuildRequires( + package=req_name, + reason="It is not a list of strings.", ) # Each requirement must be valid as per PEP 508 for requirement in requires: try: Requirement(requirement) - except InvalidRequirement: - raise InstallationError( - error_template.format( - package=req_name, - reason=( - "'build-system.requires' contains an invalid " - "requirement: {!r}".format(requirement) - ), - ) - ) + except InvalidRequirement as error: + raise InvalidPyProjectBuildRequires( + package=req_name, + reason=f"It contains an invalid requirement: {requirement!r}", + ) from error backend = build_system.get("build-backend") backend_path = build_system.get("backend-path", []) @@ -167,9 +159,8 @@ def load_pyproject_toml( if backend is None: # If the user didn't specify a backend, we assume they want to use # the setuptools backend. But we can't be sure they have included - # a version of setuptools which supplies the backend, or wheel - # (which is needed by the backend) in their requirements. So we - # make a note to check that those requirements are present once + # a version of setuptools which supplies the backend. So we + # make a note to check that this requirement is present once # we have set up the environment. # This is quite a lot of work to check for a very specific case. But # the problem is, that case is potentially quite common - projects that @@ -178,6 +169,6 @@ def load_pyproject_toml( # tools themselves. The original PEP 518 code had a similar check (but # implemented in a different way). backend = "setuptools.build_meta:__legacy__" - check = ["setuptools>=40.8.0", "wheel"] + check = ["setuptools>=40.8.0"] return BuildSystemDetails(requires, backend, check, backend_path) diff --git a/src/pip/_internal/req/__init__.py b/src/pip/_internal/req/__init__.py index 70dea27a6a8..16de903a44c 100644 --- a/src/pip/_internal/req/__init__.py +++ b/src/pip/_internal/req/__init__.py @@ -1,6 +1,6 @@ import collections import logging -from typing import Iterator, List, Optional, Sequence, Tuple +from typing import Generator, List, Optional, Sequence, Tuple from pip._internal.utils.logging import indent_log @@ -28,7 +28,7 @@ def __repr__(self) -> str: def _validate_requirements( requirements: List[InstallRequirement], -) -> Iterator[Tuple[str, InstallRequirement]]: +) -> Generator[Tuple[str, InstallRequirement], None, None]: for req in requirements: assert req.name, f"invalid to-be-installed requirement: {req}" yield req.name, req @@ -36,7 +36,6 @@ def _validate_requirements( def install_given_reqs( requirements: List[InstallRequirement], - install_options: List[str], global_options: Sequence[str], root: Optional[str], home: Optional[str], @@ -71,7 +70,6 @@ def install_given_reqs( try: requirement.install( - install_options, global_options, root=root, home=home, diff --git a/src/pip/_internal/req/constructors.py b/src/pip/_internal/req/constructors.py index 4a594037fd1..854b1b058d8 100644 --- a/src/pip/_internal/req/constructors.py +++ b/src/pip/_internal/req/constructors.py @@ -16,7 +16,6 @@ from pip._vendor.packaging.markers import Marker from pip._vendor.packaging.requirements import InvalidRequirement, Requirement from pip._vendor.packaging.specifiers import Specifier -from pip._vendor.pkg_resources import RequirementParseError, parse_requirements from pip._internal.exceptions import InstallationError from pip._internal.models.index import PyPI, TestPyPI @@ -113,31 +112,56 @@ def parse_editable(editable_req: str) -> Tuple[Optional[str], str, Set[str]]: return package_name, url, set() +def check_first_requirement_in_file(filename: str) -> None: + """Check if file is parsable as a requirements file. + + This is heavily based on ``pkg_resources.parse_requirements``, but + simplified to just check the first meaningful line. + + :raises InvalidRequirement: If the first meaningful line cannot be parsed + as an requirement. + """ + with open(filename, encoding="utf-8", errors="ignore") as f: + # Create a steppable iterator, so we can handle \-continuations. + lines = ( + line + for line in (line.strip() for line in f) + if line and not line.startswith("#") # Skip blank lines/comments. + ) + + for line in lines: + # Drop comments -- a hash without a space may be in a URL. + if " #" in line: + line = line[: line.find(" #")] + # If there is a line continuation, drop it, and append the next line. + if line.endswith("\\"): + line = line[:-2].strip() + next(lines, "") + Requirement(line) + return + + def deduce_helpful_msg(req: str) -> str: """Returns helpful msg in case requirements file does not exist, or cannot be parsed. :params req: Requirements file path """ - msg = "" - if os.path.exists(req): - msg = " The path does exist. " - # Try to parse and check if it is a requirements file. - try: - with open(req) as fp: - # parse first line only - next(parse_requirements(fp.read())) - msg += ( - "The argument you provided " - "({}) appears to be a" - " requirements file. If that is the" - " case, use the '-r' flag to install" - " the packages specified within it." - ).format(req) - except RequirementParseError: - logger.debug("Cannot parse '%s' as requirements file", req, exc_info=True) + if not os.path.exists(req): + return f" File '{req}' does not exist." + msg = " The path does exist. " + # Try to parse and check if it is a requirements file. + try: + check_first_requirement_in_file(req) + except InvalidRequirement: + logger.debug("Cannot parse '%s' as requirements file", req) else: - msg += f" File '{req}' does not exist." + msg += ( + f"The argument you provided " + f"({req}) appears to be a" + f" requirements file. If that is the" + f" case, use the '-r' flag to install" + f" the packages specified within it." + ) return msg @@ -183,6 +207,7 @@ def install_req_from_editable( constraint: bool = False, user_supplied: bool = False, permit_editable_wheels: bool = False, + config_settings: Optional[Dict[str, str]] = None, ) -> InstallRequirement: parts = parse_req_from_editable(editable_req) @@ -197,9 +222,9 @@ def install_req_from_editable( constraint=constraint, use_pep517=use_pep517, isolated=isolated, - install_options=options.get("install_options", []) if options else [], global_options=options.get("global_options", []) if options else [], hash_options=options.get("hashes", {}) if options else {}, + config_settings=config_settings, extras=parts.extras, ) @@ -356,6 +381,7 @@ def install_req_from_line( constraint: bool = False, line_source: Optional[str] = None, user_supplied: bool = False, + config_settings: Optional[Dict[str, str]] = None, ) -> InstallRequirement: """Creates an InstallRequirement from a name, which might be a requirement, directory containing 'setup.py', filename, or URL. @@ -372,9 +398,9 @@ def install_req_from_line( markers=parts.markers, use_pep517=use_pep517, isolated=isolated, - install_options=options.get("install_options", []) if options else [], global_options=options.get("global_options", []) if options else [], hash_options=options.get("hashes", {}) if options else {}, + config_settings=config_settings, constraint=constraint, extras=parts.extras, user_supplied=user_supplied, @@ -387,6 +413,7 @@ def install_req_from_req_string( isolated: bool = False, use_pep517: Optional[bool] = None, user_supplied: bool = False, + config_settings: Optional[Dict[str, str]] = None, ) -> InstallRequirement: try: req = get_requirement(req_string) @@ -416,6 +443,7 @@ def install_req_from_req_string( isolated=isolated, use_pep517=use_pep517, user_supplied=user_supplied, + config_settings=config_settings, ) @@ -424,6 +452,7 @@ def install_req_from_parsed_requirement( isolated: bool = False, use_pep517: Optional[bool] = None, user_supplied: bool = False, + config_settings: Optional[Dict[str, str]] = None, ) -> InstallRequirement: if parsed_req.is_editable: req = install_req_from_editable( @@ -433,6 +462,7 @@ def install_req_from_parsed_requirement( constraint=parsed_req.constraint, isolated=isolated, user_supplied=user_supplied, + config_settings=config_settings, ) else: @@ -445,6 +475,7 @@ def install_req_from_parsed_requirement( constraint=parsed_req.constraint, line_source=parsed_req.line_source, user_supplied=user_supplied, + config_settings=config_settings, ) return req @@ -460,7 +491,8 @@ def install_req_from_link_and_ireq( markers=ireq.markers, use_pep517=ireq.use_pep517, isolated=ireq.isolated, - install_options=ireq.install_options, global_options=ireq.global_options, hash_options=ireq.hash_options, + config_settings=ireq.config_settings, + user_supplied=ireq.user_supplied, ) diff --git a/src/pip/_internal/req/req_file.py b/src/pip/_internal/req/req_file.py index 03ae50492c5..f8f07b0cd96 100644 --- a/src/pip/_internal/req/req_file.py +++ b/src/pip/_internal/req/req_file.py @@ -13,8 +13,8 @@ Any, Callable, Dict, + Generator, Iterable, - Iterator, List, Optional, Tuple, @@ -69,7 +69,6 @@ # options to be passed to requirements SUPPORTED_OPTIONS_REQ: List[Callable[..., optparse.Option]] = [ - cmdoptions.install_options, cmdoptions.global_options, cmdoptions.hash, ] @@ -129,7 +128,7 @@ def parse_requirements( finder: Optional["PackageFinder"] = None, options: Optional[optparse.Values] = None, constraint: bool = False, -) -> Iterator[ParsedRequirement]: +) -> Generator[ParsedRequirement, None, None]: """Parse a requirements file and yield ParsedRequirement instances. :param filename: Path or url of requirements file. @@ -186,10 +185,6 @@ def handle_requirement_line( constraint=line.constraint, ) else: - if options: - # Disable wheels if the user has specified build options - cmdoptions.check_install_build_global(options, line.opts) - # get the options that apply to requirements req_options = {} for dest in SUPPORTED_OPTIONS_REQ_DEST: @@ -229,11 +224,13 @@ def handle_option_line( if finder: find_links = finder.find_links index_urls = finder.index_urls - if opts.index_url: - index_urls = [opts.index_url] + no_index = finder.search_scope.no_index if opts.no_index is True: + no_index = True index_urls = [] - if opts.extra_index_urls: + if opts.index_url and not no_index: + index_urls = [opts.index_url] + if opts.extra_index_urls and not no_index: index_urls.extend(opts.extra_index_urls) if opts.find_links: # FIXME: it would be nice to keep track of the source @@ -253,6 +250,7 @@ def handle_option_line( search_scope = SearchScope( find_links=find_links, index_urls=index_urls, + no_index=no_index, ) finder.search_scope = search_scope @@ -321,13 +319,15 @@ def __init__( self._session = session self._line_parser = line_parser - def parse(self, filename: str, constraint: bool) -> Iterator[ParsedLine]: + def parse( + self, filename: str, constraint: bool + ) -> Generator[ParsedLine, None, None]: """Parse a given file, yielding parsed lines.""" yield from self._parse_and_recurse(filename, constraint) def _parse_and_recurse( self, filename: str, constraint: bool - ) -> Iterator[ParsedLine]: + ) -> Generator[ParsedLine, None, None]: for line in self._parse_file(filename, constraint): if not line.is_requirement and ( line.opts.requirements or line.opts.constraints @@ -356,7 +356,9 @@ def _parse_and_recurse( else: yield line - def _parse_file(self, filename: str, constraint: bool) -> Iterator[ParsedLine]: + def _parse_file( + self, filename: str, constraint: bool + ) -> Generator[ParsedLine, None, None]: _, content = get_file_content(filename, self._session) lines_enum = preprocess(content) @@ -390,7 +392,12 @@ def parse_line(line: str) -> Tuple[str, Values]: args_str, options_str = break_args_options(line) - opts, _ = parser.parse_args(shlex.split(options_str), defaults) + try: + options = shlex.split(options_str) + except ValueError as e: + raise OptionParsingError(f"Could not split options: {options_str}") from e + + opts, _ = parser.parse_args(options, defaults) return args_str, opts diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index 19f1904af67..9807f690f37 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -8,21 +8,28 @@ import sys import uuid import zipfile +from optparse import Values from typing import Any, Collection, Dict, Iterable, List, Optional, Sequence, Union -from pip._vendor import pkg_resources from pip._vendor.packaging.markers import Marker from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.utils import canonicalize_name from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version -from pip._vendor.pep517.wrappers import Pep517HookCaller -from pip._vendor.pkg_resources import Distribution +from pip._vendor.pyproject_hooks import BuildBackendHookCaller from pip._internal.build_env import BuildEnvironment, NoOpBuildEnvironment -from pip._internal.exceptions import InstallationError +from pip._internal.exceptions import InstallationError, LegacyInstallFailure from pip._internal.locations import get_scheme +from pip._internal.metadata import ( + BaseDistribution, + get_default_environment, + get_directory_distribution, + get_wheel_distribution, +) +from pip._internal.metadata.base import FilesystemWheel +from pip._internal.models.direct_url import DirectUrl from pip._internal.models.link import Link from pip._internal.operations.build.metadata import generate_metadata from pip._internal.operations.build.metadata_editable import generate_editable_metadata @@ -32,28 +39,25 @@ from pip._internal.operations.install.editable_legacy import ( install_editable as install_editable_legacy, ) -from pip._internal.operations.install.legacy import LegacyInstallFailure from pip._internal.operations.install.legacy import install as install_legacy from pip._internal.operations.install.wheel import install_wheel from pip._internal.pyproject import load_pyproject_toml, make_pyproject_path from pip._internal.req.req_uninstall import UninstallPathSet -from pip._internal.utils.deprecation import deprecated +from pip._internal.utils.deprecation import LegacyInstallReason, deprecated from pip._internal.utils.direct_url_helpers import ( direct_url_for_editable, direct_url_from_link, ) from pip._internal.utils.hashes import Hashes from pip._internal.utils.misc import ( + ConfiguredBuildBackendHookCaller, ask_path_exists, backup_dir, display_path, - dist_in_site_packages, - dist_in_usersite, - get_distribution, hide_url, redact_auth_from_url, ) -from pip._internal.utils.packaging import get_metadata +from pip._internal.utils.packaging import safe_extra from pip._internal.utils.subprocess import runner_with_spinner_message from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds from pip._internal.utils.virtualenv import running_under_virtualenv @@ -62,32 +66,6 @@ logger = logging.getLogger(__name__) -def _get_dist(metadata_directory: str) -> Distribution: - """Return a pkg_resources.Distribution for the provided - metadata directory. - """ - dist_dir = metadata_directory.rstrip(os.sep) - - # Build a PathMetadata object, from path to metadata. :wink: - base_dir, dist_dir_name = os.path.split(dist_dir) - metadata = pkg_resources.PathMetadata(base_dir, dist_dir) - - # Determine the correct Distribution object type. - if dist_dir.endswith(".egg-info"): - dist_cls = pkg_resources.Distribution - dist_name = os.path.splitext(dist_dir_name)[0] - else: - assert dist_dir.endswith(".dist-info") - dist_cls = pkg_resources.DistInfoDistribution - dist_name = os.path.splitext(dist_dir_name)[0].split("-")[0] - - return dist_cls( - base_dir, - project_name=dist_name, - metadata=metadata, - ) - - class InstallRequirement: """ Represents something that may be installed later on, may have information @@ -104,9 +82,10 @@ def __init__( markers: Optional[Marker] = None, use_pep517: Optional[bool] = None, isolated: bool = False, - install_options: Optional[List[str]] = None, + *, global_options: Optional[List[str]] = None, hash_options: Optional[Dict[str, List[str]]] = None, + config_settings: Optional[Dict[str, str]] = None, constraint: bool = False, extras: Collection[str] = (), user_supplied: bool = False, @@ -118,7 +97,7 @@ def __init__( self.constraint = constraint self.editable = editable self.permit_editable_wheels = permit_editable_wheels - self.legacy_install_reason: Optional[int] = None + self.legacy_install_reason: Optional[LegacyInstallReason] = None # source_dir is the local directory where the linked requirement is # located, or unpacked. In case unpacking is needed, creating and @@ -137,6 +116,10 @@ def __init__( self.link = self.original_link = link self.original_link_is_in_wheel_cache = False + # Information about the location of the artifact that was downloaded . This + # property is guaranteed to be set in resolver results. + self.download_info: Optional[DirectUrl] = None + # Path to any downloaded or already-existing package. self.local_file_path: Optional[str] = None if self.link and self.link.is_file: @@ -145,16 +128,15 @@ def __init__( if extras: self.extras = extras elif req: - self.extras = {pkg_resources.safe_extra(extra) for extra in req.extras} + self.extras = {safe_extra(extra) for extra in req.extras} else: self.extras = set() if markers is None and req: markers = req.marker self.markers = markers - # This holds the pkg_resources.Distribution object if this requirement - # is already available: - self.satisfied_by: Optional[Distribution] = None + # This holds the Distribution object if this requirement is already installed. + self.satisfied_by: Optional[BaseDistribution] = None # Whether the installation process should try to uninstall an existing # distribution before installing this requirement. self.should_reinstall = False @@ -163,9 +145,9 @@ def __init__( # Set to True after successful installation self.install_succeeded: Optional[bool] = None # Supplied options - self.install_options = install_options if install_options else [] self.global_options = global_options if global_options else [] self.hash_options = hash_options if hash_options else {} + self.config_settings = config_settings # Set to True after successful preparation of this requirement self.prepared = False # User supplied requirement are explicitly requested for installation @@ -189,7 +171,7 @@ def __init__( self.requirements_to_check: List[str] = [] # The PEP 517 backend we should use to build the project - self.pep517_backend: Optional[Pep517HookCaller] = None + self.pep517_backend: Optional[BuildBackendHookCaller] = None # Are we using PEP 517 for this requirement? # After pyproject.toml has been loaded, the only valid values are True @@ -211,7 +193,11 @@ def __str__(self) -> str: else: s = "" if self.satisfied_by is not None: - s += " in {}".format(display_path(self.satisfied_by.location)) + if self.satisfied_by.location is not None: + location = display_path(self.satisfied_by.location) + else: + location = "" + s += f" in {location}" if self.comes_from: if isinstance(self.comes_from, str): comes_from: Optional[str] = self.comes_from @@ -242,7 +228,7 @@ def format_debug(self) -> str: def name(self) -> Optional[str]: if self.req is None: return None - return pkg_resources.safe_name(self.req.name) + return self.req.name @functools.lru_cache() # use cached_property in python 3.8+ def supports_pyproject_editable(self) -> bool: @@ -415,32 +401,24 @@ def check_if_exists(self, use_user_site: bool) -> None: """ if self.req is None: return - existing_dist = get_distribution(self.req.name) + existing_dist = get_default_environment().get_distribution(self.req.name) if not existing_dist: return - # pkg_resources may contain a different copy of packaging.version from - # pip in if the downstream distributor does a poor job debundling pip. - # We avoid existing_dist.parsed_version and let SpecifierSet.contains - # parses the version instead. - existing_version = existing_dist.version - version_compatible = ( - existing_version is not None - and self.req.specifier.contains(existing_version, prereleases=True) + version_compatible = self.req.specifier.contains( + existing_dist.version, + prereleases=True, ) if not version_compatible: self.satisfied_by = None if use_user_site: - if dist_in_usersite(existing_dist): + if existing_dist.in_usersite: self.should_reinstall = True - elif running_under_virtualenv() and dist_in_site_packages( - existing_dist - ): + elif running_under_virtualenv() and existing_dist.in_site_packages: raise InstallationError( - "Will not install to the user site because it will " - "lack sys.path precedence to {} in {}".format( - existing_dist.project_name, existing_dist.location - ) + f"Will not install to the user site because it will " + f"lack sys.path precedence to {existing_dist.raw_name} " + f"in {existing_dist.location}" ) else: self.should_reinstall = True @@ -506,7 +484,8 @@ def load_pyproject_toml(self) -> None: requires, backend, check, backend_path = pyproject_toml_data self.requirements_to_check = check self.pyproject_requires = requires - self.pep517_backend = Pep517HookCaller( + self.pep517_backend = ConfiguredBuildBackendHookCaller( + self, self.unpacked_source_directory, backend, backend_path=backend_path, @@ -540,6 +519,7 @@ def prepare_metadata(self) -> None: Under legacy processing, call setup.py egg-info. """ assert self.source_dir + details = self.name or f"from {self.link}" if self.use_pep517: assert self.pep517_backend is not None @@ -551,11 +531,13 @@ def prepare_metadata(self) -> None: self.metadata_directory = generate_editable_metadata( build_env=self.build_env, backend=self.pep517_backend, + details=details, ) else: self.metadata_directory = generate_metadata( build_env=self.build_env, backend=self.pep517_backend, + details=details, ) else: self.metadata_directory = generate_metadata_legacy( @@ -563,7 +545,7 @@ def prepare_metadata(self) -> None: setup_py_path=self.setup_py_path, source_dir=self.unpacked_source_directory, isolated=self.isolated, - details=self.name or f"from {self.link}", + details=details, ) # Act on the newly generated metadata, based on the name and version. @@ -577,12 +559,21 @@ def prepare_metadata(self) -> None: @property def metadata(self) -> Any: if not hasattr(self, "_metadata"): - self._metadata = get_metadata(self.get_dist()) + self._metadata = self.get_dist().metadata return self._metadata - def get_dist(self) -> Distribution: - return _get_dist(self.metadata_directory) + def get_dist(self) -> BaseDistribution: + if self.metadata_directory: + return get_directory_distribution(self.metadata_directory) + elif self.local_file_path and self.is_wheel: + return get_wheel_distribution( + FilesystemWheel(self.local_file_path), canonicalize_name(self.name) + ) + raise AssertionError( + f"InstallRequirement {self} has no metadata directory and no wheel: " + f"can't make a distribution." + ) def assert_source_matches_version(self) -> None: assert self.source_dir @@ -642,7 +633,7 @@ def update_editable(self) -> None: # So here, if it's neither a path nor a valid VCS URL, it's a bug. assert vcs_backend, f"Unsupported VCS URL {self.link.url}" hidden_url = hide_url(self.link.url) - vcs_backend.obtain(self.source_dir, url=hidden_url) + vcs_backend.obtain(self.source_dir, url=hidden_url, verbosity=0) # Top-level Actions def uninstall( @@ -661,7 +652,7 @@ def uninstall( """ assert self.req - dist = get_distribution(self.req.name) + dist = get_default_environment().get_distribution(self.req.name) if not dist: logger.warning("Skipping %s as it is not installed.", self.name) return None @@ -753,7 +744,6 @@ def archive(self, build_dir: Optional[str]) -> None: def install( self, - install_options: List[str], global_options: Optional[Sequence[str]] = None, root: Optional[str] = None, home: Optional[str] = None, @@ -774,8 +764,7 @@ def install( global_options = global_options if global_options is not None else [] if self.editable and not self.is_wheel: install_editable_legacy( - install_options, - global_options, + global_options=global_options, prefix=prefix, home=home, use_user_site=use_user_site, @@ -791,6 +780,7 @@ def install( if self.is_wheel: assert self.local_file_path direct_url = None + # TODO this can be refactored to direct_url = self.download_info if self.editable: direct_url = direct_url_for_editable(self.unpacked_source_directory) elif self.original_link: @@ -814,17 +804,20 @@ def install( # TODO: Why don't we do this for editable installs? - # Extend the list of global and install options passed on to + # Extend the list of global options passed on to # the setup.py call with the ones from the requirements file. # Options specified in requirements file override those # specified on the command line, since the last option given # to setup.py is the one that is used. global_options = list(global_options) + self.global_options - install_options = list(install_options) + self.install_options try: + if ( + self.legacy_install_reason is not None + and self.legacy_install_reason.emit_before_install + ): + self.legacy_install_reason.emit_deprecation(self.name) success = install_legacy( - install_options=install_options, global_options=global_options, root=root, home=home, @@ -841,25 +834,19 @@ def install( ) except LegacyInstallFailure as exc: self.install_succeeded = False - raise exc.__cause__ + raise exc except Exception: self.install_succeeded = True raise self.install_succeeded = success - if success and self.legacy_install_reason == 8368: - deprecated( - reason=( - "{} was installed using the legacy 'setup.py install' " - "method, because a wheel could not be built for it.".format( - self.name - ) - ), - replacement="to fix the wheel build issue reported above", - gone_in=None, - issue=8368, - ) + if ( + success + and self.legacy_install_reason is not None + and self.legacy_install_reason.emit_after_success + ): + self.legacy_install_reason.emit_deprecation(self.name) def check_invalid_constraint_type(req: InstallRequirement) -> str: @@ -889,3 +876,27 @@ def check_invalid_constraint_type(req: InstallRequirement) -> str: ) return problem + + +def _has_option(options: Values, reqs: List[InstallRequirement], option: str) -> bool: + if getattr(options, option, None): + return True + for req in reqs: + if getattr(req, option, None): + return True + return False + + +def check_legacy_setup_py_options( + options: Values, + reqs: List[InstallRequirement], +) -> None: + has_build_options = _has_option(options, reqs, "build_options") + has_global_options = _has_option(options, reqs, "global_options") + if has_build_options or has_global_options: + logger.warning( + "Implying --no-binary=:all: due to the presence of " + "--build-option / --global-option. " + "Consider using --config-settings for more flexibility.", + ) + options.format_control.disallow_binaries() diff --git a/src/pip/_internal/req/req_set.py b/src/pip/_internal/req/req_set.py index 6626c37e2e1..ec7a6e07a25 100644 --- a/src/pip/_internal/req/req_set.py +++ b/src/pip/_internal/req/req_set.py @@ -1,13 +1,10 @@ import logging from collections import OrderedDict -from typing import Dict, Iterable, List, Optional, Tuple +from typing import Dict, List from pip._vendor.packaging.utils import canonicalize_name -from pip._internal.exceptions import InstallationError -from pip._internal.models.wheel import Wheel from pip._internal.req.req_install import InstallRequirement -from pip._internal.utils import compatibility_tags logger = logging.getLogger(__name__) @@ -51,123 +48,6 @@ def add_named_requirement(self, install_req: InstallRequirement) -> None: project_name = canonicalize_name(install_req.name) self.requirements[project_name] = install_req - def add_requirement( - self, - install_req: InstallRequirement, - parent_req_name: Optional[str] = None, - extras_requested: Optional[Iterable[str]] = None, - ) -> Tuple[List[InstallRequirement], Optional[InstallRequirement]]: - """Add install_req as a requirement to install. - - :param parent_req_name: The name of the requirement that needed this - added. The name is used because when multiple unnamed requirements - resolve to the same name, we could otherwise end up with dependency - links that point outside the Requirements set. parent_req must - already be added. Note that None implies that this is a user - supplied requirement, vs an inferred one. - :param extras_requested: an iterable of extras used to evaluate the - environment markers. - :return: Additional requirements to scan. That is either [] if - the requirement is not applicable, or [install_req] if the - requirement is applicable and has just been added. - """ - # If the markers do not match, ignore this requirement. - if not install_req.match_markers(extras_requested): - logger.info( - "Ignoring %s: markers '%s' don't match your environment", - install_req.name, - install_req.markers, - ) - return [], None - - # If the wheel is not supported, raise an error. - # Should check this after filtering out based on environment markers to - # allow specifying different wheels based on the environment/OS, in a - # single requirements file. - if install_req.link and install_req.link.is_wheel: - wheel = Wheel(install_req.link.filename) - tags = compatibility_tags.get_supported() - if self.check_supported_wheels and not wheel.supported(tags): - raise InstallationError( - "{} is not a supported wheel on this platform.".format( - wheel.filename - ) - ) - - # This next bit is really a sanity check. - assert ( - not install_req.user_supplied or parent_req_name is None - ), "a user supplied req shouldn't have a parent" - - # Unnamed requirements are scanned again and the requirement won't be - # added as a dependency until after scanning. - if not install_req.name: - self.add_unnamed_requirement(install_req) - return [install_req], None - - try: - existing_req: Optional[InstallRequirement] = self.get_requirement( - install_req.name - ) - except KeyError: - existing_req = None - - has_conflicting_requirement = ( - parent_req_name is None - and existing_req - and not existing_req.constraint - and existing_req.extras == install_req.extras - and existing_req.req - and install_req.req - and existing_req.req.specifier != install_req.req.specifier - ) - if has_conflicting_requirement: - raise InstallationError( - "Double requirement given: {} (already in {}, name={!r})".format( - install_req, existing_req, install_req.name - ) - ) - - # When no existing requirement exists, add the requirement as a - # dependency and it will be scanned again after. - if not existing_req: - self.add_named_requirement(install_req) - # We'd want to rescan this requirement later - return [install_req], install_req - - # Assume there's no need to scan, and that we've already - # encountered this for scanning. - if install_req.constraint or not existing_req.constraint: - return [], existing_req - - does_not_satisfy_constraint = install_req.link and not ( - existing_req.link and install_req.link.path == existing_req.link.path - ) - if does_not_satisfy_constraint: - raise InstallationError( - "Could not satisfy constraints for '{}': " - "installation from path or url cannot be " - "constrained to a version".format(install_req.name) - ) - # If we're now installing a constraint, mark the existing - # object for real installation. - existing_req.constraint = False - # If we're now installing a user supplied requirement, - # mark the existing object as such. - if install_req.user_supplied: - existing_req.user_supplied = True - existing_req.extras = tuple( - sorted(set(existing_req.extras) | set(install_req.extras)) - ) - logger.debug( - "Setting %s extras to: %s", - existing_req, - existing_req.extras, - ) - # Return the existing requirement for addition to the parent and - # scanning again. - return [existing_req], existing_req - def has_requirement(self, name: str) -> bool: project_name = canonicalize_name(name) @@ -187,3 +67,16 @@ def get_requirement(self, name: str) -> InstallRequirement: @property def all_requirements(self) -> List[InstallRequirement]: return self.unnamed_requirements + list(self.requirements.values()) + + @property + def requirements_to_install(self) -> List[InstallRequirement]: + """Return the list of requirements that need to be installed. + + TODO remove this property together with the legacy resolver, since the new + resolver only returns requirements that need to be installed. + """ + return [ + install_req + for install_req in self.all_requirements + if not install_req.constraint and not install_req.satisfied_by + ] diff --git a/src/pip/_internal/req/req_uninstall.py b/src/pip/_internal/req/req_uninstall.py index b135017907b..641aaad76bd 100644 --- a/src/pip/_internal/req/req_uninstall.py +++ b/src/pip/_internal/req/req_uninstall.py @@ -1,57 +1,46 @@ -import csv import functools import os import sys import sysconfig from importlib.util import cache_from_source -from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Set, Tuple - -from pip._vendor import pkg_resources -from pip._vendor.pkg_resources import Distribution +from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Set, Tuple from pip._internal.exceptions import UninstallationError from pip._internal.locations import get_bin_prefix, get_bin_user +from pip._internal.metadata import BaseDistribution from pip._internal.utils.compat import WINDOWS from pip._internal.utils.egg_link import egg_link_path_from_location from pip._internal.utils.logging import getLogger, indent_log -from pip._internal.utils.misc import ( - ask, - dist_in_usersite, - dist_is_local, - is_local, - normalize_path, - renames, - rmtree, -) +from pip._internal.utils.misc import ask, is_local, normalize_path, renames, rmtree from pip._internal.utils.temp_dir import AdjacentTempDirectory, TempDirectory logger = getLogger(__name__) -def _script_names(dist: Distribution, script_name: str, is_gui: bool) -> List[str]: +def _script_names( + bin_dir: str, script_name: str, is_gui: bool +) -> Generator[str, None, None]: """Create the fully qualified name of the files created by {console,gui}_scripts for the given ``dist``. Returns the list of file names """ - if dist_in_usersite(dist): - bin_dir = get_bin_user() - else: - bin_dir = get_bin_prefix() exe_name = os.path.join(bin_dir, script_name) - paths_to_remove = [exe_name] - if WINDOWS: - paths_to_remove.append(exe_name + ".exe") - paths_to_remove.append(exe_name + ".exe.manifest") - if is_gui: - paths_to_remove.append(exe_name + "-script.pyw") - else: - paths_to_remove.append(exe_name + "-script.py") - return paths_to_remove + yield exe_name + if not WINDOWS: + return + yield f"{exe_name}.exe" + yield f"{exe_name}.exe.manifest" + if is_gui: + yield f"{exe_name}-script.pyw" + else: + yield f"{exe_name}-script.py" -def _unique(fn: Callable[..., Iterator[Any]]) -> Callable[..., Iterator[Any]]: +def _unique( + fn: Callable[..., Generator[Any, None, None]] +) -> Callable[..., Generator[Any, None, None]]: @functools.wraps(fn) - def unique(*args: Any, **kw: Any) -> Iterator[Any]: + def unique(*args: Any, **kw: Any) -> Generator[Any, None, None]: seen: Set[Any] = set() for item in fn(*args, **kw): if item not in seen: @@ -62,7 +51,7 @@ def unique(*args: Any, **kw: Any) -> Iterator[Any]: @_unique -def uninstallation_paths(dist: Distribution) -> Iterator[str]: +def uninstallation_paths(dist: BaseDistribution) -> Generator[str, None, None]: """ Yield all the uninstallation paths for dist based on RECORD-without-.py[co] @@ -76,25 +65,25 @@ def uninstallation_paths(dist: Distribution) -> Iterator[str]: https://packaging.python.org/specifications/recording-installed-packages/ """ - try: - r = csv.reader(dist.get_metadata_lines("RECORD")) - except FileNotFoundError as missing_record_exception: + location = dist.location + assert location is not None, "not installed" + + entries = dist.iter_declared_entries() + if entries is None: msg = "Cannot uninstall {dist}, RECORD file not found.".format(dist=dist) - try: - installer = next(dist.get_metadata_lines("INSTALLER")) - if not installer or installer == "pip": - raise ValueError() - except (OSError, StopIteration, ValueError): - dep = "{}=={}".format(dist.project_name, dist.version) + installer = dist.installer + if not installer or installer == "pip": + dep = "{}=={}".format(dist.raw_name, dist.version) msg += ( " You might be able to recover from this via: " "'pip install --force-reinstall --no-deps {}'.".format(dep) ) else: msg += " Hint: The package was installed by {}.".format(installer) - raise UninstallationError(msg) from missing_record_exception - for row in r: - path = os.path.join(dist.location, row[0]) + raise UninstallationError(msg) + + for entry in entries: + path = os.path.join(location, entry) yield path if path.endswith(".py"): dn, fn = os.path.split(path) @@ -317,11 +306,11 @@ class UninstallPathSet: """A set of file paths to be removed in the uninstallation of a requirement.""" - def __init__(self, dist: Distribution) -> None: - self.paths: Set[str] = set() + def __init__(self, dist: BaseDistribution) -> None: + self._paths: Set[str] = set() self._refuse: Set[str] = set() - self.pth: Dict[str, UninstallPthEntries] = {} - self.dist = dist + self._pth: Dict[str, UninstallPthEntries] = {} + self._dist = dist self._moved_paths = StashedUninstallPathSet() def _permitted(self, path: str) -> bool: @@ -342,7 +331,7 @@ def add(self, path: str) -> None: if not os.path.exists(path): return if self._permitted(path): - self.paths.add(path) + self._paths.add(path) else: self._refuse.add(path) @@ -354,37 +343,37 @@ def add(self, path: str) -> None: def add_pth(self, pth_file: str, entry: str) -> None: pth_file = normalize_path(pth_file) if self._permitted(pth_file): - if pth_file not in self.pth: - self.pth[pth_file] = UninstallPthEntries(pth_file) - self.pth[pth_file].add(entry) + if pth_file not in self._pth: + self._pth[pth_file] = UninstallPthEntries(pth_file) + self._pth[pth_file].add(entry) else: self._refuse.add(pth_file) def remove(self, auto_confirm: bool = False, verbose: bool = False) -> None: - """Remove paths in ``self.paths`` with confirmation (unless + """Remove paths in ``self._paths`` with confirmation (unless ``auto_confirm`` is True).""" - if not self.paths: + if not self._paths: logger.info( "Can't uninstall '%s'. No files were found to uninstall.", - self.dist.project_name, + self._dist.raw_name, ) return - dist_name_version = self.dist.project_name + "-" + self.dist.version + dist_name_version = f"{self._dist.raw_name}-{self._dist.version}" logger.info("Uninstalling %s:", dist_name_version) with indent_log(): if auto_confirm or self._allowed_to_proceed(verbose): moved = self._moved_paths - for_rename = compress_for_rename(self.paths) + for_rename = compress_for_rename(self._paths) for path in sorted(compact(for_rename)): moved.stash(path) logger.verbose("Removing file or directory %s", path) - for pth in self.pth.values(): + for pth in self._pth.values(): pth.remove() logger.info("Successfully uninstalled %s", dist_name_version) @@ -402,18 +391,18 @@ def _display(msg: str, paths: Iterable[str]) -> None: logger.info(path) if not verbose: - will_remove, will_skip = compress_for_output_listing(self.paths) + will_remove, will_skip = compress_for_output_listing(self._paths) else: # In verbose mode, display all the files that are going to be # deleted. - will_remove = set(self.paths) + will_remove = set(self._paths) will_skip = set() _display("Would remove:", will_remove) _display("Would not remove (might be manually added):", will_skip) _display("Would not remove (outside of prefix):", self._refuse) if verbose: - _display("Will actually move:", compress_for_rename(self.paths)) + _display("Will actually move:", compress_for_rename(self._paths)) return ask("Proceed (Y/n)? ", ("y", "n", "")) != "n" @@ -422,12 +411,12 @@ def rollback(self) -> None: if not self._moved_paths.can_rollback: logger.error( "Can't roll back %s; was not uninstalled", - self.dist.project_name, + self._dist.raw_name, ) return - logger.info("Rolling back uninstall of %s", self.dist.project_name) + logger.info("Rolling back uninstall of %s", self._dist.raw_name) self._moved_paths.rollback() - for pth in self.pth.values(): + for pth in self._pth.values(): pth.rollback() def commit(self) -> None: @@ -435,142 +424,159 @@ def commit(self) -> None: self._moved_paths.commit() @classmethod - def from_dist(cls, dist: Distribution) -> "UninstallPathSet": - dist_path = normalize_path(dist.location) - if not dist_is_local(dist): + def from_dist(cls, dist: BaseDistribution) -> "UninstallPathSet": + dist_location = dist.location + info_location = dist.info_location + if dist_location is None: + logger.info( + "Not uninstalling %s since it is not installed", + dist.canonical_name, + ) + return cls(dist) + + normalized_dist_location = normalize_path(dist_location) + if not dist.local: logger.info( "Not uninstalling %s at %s, outside environment %s", - dist.key, - dist_path, + dist.canonical_name, + normalized_dist_location, sys.prefix, ) return cls(dist) - if dist_path in { + if normalized_dist_location in { p for p in {sysconfig.get_path("stdlib"), sysconfig.get_path("platstdlib")} if p }: logger.info( "Not uninstalling %s at %s, as it is in the standard library.", - dist.key, - dist_path, + dist.canonical_name, + normalized_dist_location, ) return cls(dist) paths_to_remove = cls(dist) - develop_egg_link = egg_link_path_from_location(dist.project_name) - develop_egg_link_egg_info = "{}.egg-info".format( - pkg_resources.to_filename(dist.project_name) + develop_egg_link = egg_link_path_from_location(dist.raw_name) + + # Distribution is installed with metadata in a "flat" .egg-info + # directory. This means it is not a modern .dist-info installation, an + # egg, or legacy editable. + setuptools_flat_installation = ( + dist.installed_with_setuptools_egg_info + and info_location is not None + and os.path.exists(info_location) + # If dist is editable and the location points to a ``.egg-info``, + # we are in fact in the legacy editable case. + and not info_location.endswith(f"{dist.setuptools_filename}.egg-info") ) - egg_info_exists = dist.egg_info and os.path.exists(dist.egg_info) - # Special case for distutils installed package - distutils_egg_info = getattr(dist._provider, "path", None) # Uninstall cases order do matter as in the case of 2 installs of the # same package, pip needs to uninstall the currently detected version - if ( - egg_info_exists - and dist.egg_info.endswith(".egg-info") - and not dist.egg_info.endswith(develop_egg_link_egg_info) - ): - # if dist.egg_info.endswith(develop_egg_link_egg_info), we - # are in fact in the develop_egg_link case - paths_to_remove.add(dist.egg_info) - if dist.has_metadata("installed-files.txt"): - for installed_file in dist.get_metadata( - "installed-files.txt" - ).splitlines(): - path = os.path.normpath(os.path.join(dist.egg_info, installed_file)) - paths_to_remove.add(path) + if setuptools_flat_installation: + if info_location is not None: + paths_to_remove.add(info_location) + installed_files = dist.iter_declared_entries() + if installed_files is not None: + for installed_file in installed_files: + paths_to_remove.add(os.path.join(dist_location, installed_file)) # FIXME: need a test for this elif block # occurs with --single-version-externally-managed/--record outside # of pip - elif dist.has_metadata("top_level.txt"): - if dist.has_metadata("namespace_packages.txt"): - namespaces = dist.get_metadata("namespace_packages.txt") - else: + elif dist.is_file("top_level.txt"): + try: + namespace_packages = dist.read_text("namespace_packages.txt") + except FileNotFoundError: namespaces = [] + else: + namespaces = namespace_packages.splitlines(keepends=False) for top_level_pkg in [ p - for p in dist.get_metadata("top_level.txt").splitlines() + for p in dist.read_text("top_level.txt").splitlines() if p and p not in namespaces ]: - path = os.path.join(dist.location, top_level_pkg) + path = os.path.join(dist_location, top_level_pkg) paths_to_remove.add(path) - paths_to_remove.add(path + ".py") - paths_to_remove.add(path + ".pyc") - paths_to_remove.add(path + ".pyo") + paths_to_remove.add(f"{path}.py") + paths_to_remove.add(f"{path}.pyc") + paths_to_remove.add(f"{path}.pyo") - elif distutils_egg_info: + elif dist.installed_by_distutils: raise UninstallationError( "Cannot uninstall {!r}. It is a distutils installed project " "and thus we cannot accurately determine which files belong " "to it which would lead to only a partial uninstall.".format( - dist.project_name, + dist.raw_name, ) ) - elif dist.location.endswith(".egg"): + elif dist.installed_as_egg: # package installed by easy_install # We cannot match on dist.egg_name because it can slightly vary # i.e. setuptools-0.6c11-py2.6.egg vs setuptools-0.6rc11-py2.6.egg - paths_to_remove.add(dist.location) - easy_install_egg = os.path.split(dist.location)[1] + paths_to_remove.add(dist_location) + easy_install_egg = os.path.split(dist_location)[1] easy_install_pth = os.path.join( - os.path.dirname(dist.location), "easy-install.pth" + os.path.dirname(dist_location), + "easy-install.pth", ) paths_to_remove.add_pth(easy_install_pth, "./" + easy_install_egg) - elif egg_info_exists and dist.egg_info.endswith(".dist-info"): + elif dist.installed_with_dist_info: for path in uninstallation_paths(dist): paths_to_remove.add(path) elif develop_egg_link: - # develop egg + # PEP 660 modern editable is handled in the ``.dist-info`` case + # above, so this only covers the setuptools-style editable. with open(develop_egg_link) as fh: link_pointer = os.path.normcase(fh.readline().strip()) - assert link_pointer == dist.location, ( - "Egg-link located at {} and pointing to {} does not match " - "installed location of {} at {}".format( - develop_egg_link, link_pointer, dist.project_name, dist.location - ) + normalized_link_pointer = normalize_path(link_pointer) + assert os.path.samefile( + normalized_link_pointer, normalized_dist_location + ), ( + f"Egg-link {develop_egg_link} (to {link_pointer}) does not match " + f"installed location of {dist.raw_name} (at {dist_location})" ) paths_to_remove.add(develop_egg_link) easy_install_pth = os.path.join( os.path.dirname(develop_egg_link), "easy-install.pth" ) - paths_to_remove.add_pth(easy_install_pth, dist.location) + paths_to_remove.add_pth(easy_install_pth, dist_location) else: logger.debug( "Not sure how to uninstall: %s - Check: %s", dist, - dist.location, + dist_location, ) + if dist.in_usersite: + bin_dir = get_bin_user() + else: + bin_dir = get_bin_prefix() + # find distutils scripts= scripts - if dist.has_metadata("scripts") and dist.metadata_isdir("scripts"): - for script in dist.metadata_listdir("scripts"): - if dist_in_usersite(dist): - bin_dir = get_bin_user() - else: - bin_dir = get_bin_prefix() + try: + for script in dist.iter_distutils_script_names(): paths_to_remove.add(os.path.join(bin_dir, script)) if WINDOWS: - paths_to_remove.add(os.path.join(bin_dir, script) + ".bat") - - # find console_scripts - _scripts_to_remove = [] - console_scripts = dist.get_entry_map(group="console_scripts") - for name in console_scripts.keys(): - _scripts_to_remove.extend(_script_names(dist, name, False)) - # find gui_scripts - gui_scripts = dist.get_entry_map(group="gui_scripts") - for name in gui_scripts.keys(): - _scripts_to_remove.extend(_script_names(dist, name, True)) - - for s in _scripts_to_remove: + paths_to_remove.add(os.path.join(bin_dir, f"{script}.bat")) + except (FileNotFoundError, NotADirectoryError): + pass + + # find console_scripts and gui_scripts + def iter_scripts_to_remove( + dist: BaseDistribution, + bin_dir: str, + ) -> Generator[str, None, None]: + for entry_point in dist.iter_entry_points(): + if entry_point.group == "console_scripts": + yield from _script_names(bin_dir, entry_point.name, False) + elif entry_point.group == "gui_scripts": + yield from _script_names(bin_dir, entry_point.name, True) + + for s in iter_scripts_to_remove(dist, bin_dir): paths_to_remove.add(s) return paths_to_remove diff --git a/src/pip/_internal/resolution/legacy/resolver.py b/src/pip/_internal/resolution/legacy/resolver.py index 09caaa6d534..fb49d41695f 100644 --- a/src/pip/_internal/resolution/legacy/resolver.py +++ b/src/pip/_internal/resolution/legacy/resolver.py @@ -28,12 +28,14 @@ DistributionNotFound, HashError, HashErrors, + InstallationError, NoneMetadataError, UnsupportedPythonVersion, ) from pip._internal.index.package_finder import PackageFinder from pip._internal.metadata import BaseDistribution from pip._internal.models.link import Link +from pip._internal.models.wheel import Wheel from pip._internal.operations.prepare import RequirementPreparer from pip._internal.req.req_install import ( InstallRequirement, @@ -41,9 +43,11 @@ ) from pip._internal.req.req_set import RequirementSet from pip._internal.resolution.base import BaseResolver, InstallRequirementProvider +from pip._internal.utils import compatibility_tags from pip._internal.utils.compatibility_tags import get_supported +from pip._internal.utils.direct_url_helpers import direct_url_from_link from pip._internal.utils.logging import indent_log -from pip._internal.utils.misc import dist_in_usersite, normalize_version_info +from pip._internal.utils.misc import normalize_version_info from pip._internal.utils.packaging import check_requires_python logger = logging.getLogger(__name__) @@ -168,7 +172,7 @@ def resolve( for req in root_reqs: if req.constraint: check_invalid_constraint_type(req) - requirement_set.add_requirement(req) + self._add_requirement_to_set(requirement_set, req) # Actually prepare the files, and collect any exceptions. Most hash # exceptions cannot be checked ahead of time, because @@ -188,6 +192,124 @@ def resolve( return requirement_set + def _add_requirement_to_set( + self, + requirement_set: RequirementSet, + install_req: InstallRequirement, + parent_req_name: Optional[str] = None, + extras_requested: Optional[Iterable[str]] = None, + ) -> Tuple[List[InstallRequirement], Optional[InstallRequirement]]: + """Add install_req as a requirement to install. + + :param parent_req_name: The name of the requirement that needed this + added. The name is used because when multiple unnamed requirements + resolve to the same name, we could otherwise end up with dependency + links that point outside the Requirements set. parent_req must + already be added. Note that None implies that this is a user + supplied requirement, vs an inferred one. + :param extras_requested: an iterable of extras used to evaluate the + environment markers. + :return: Additional requirements to scan. That is either [] if + the requirement is not applicable, or [install_req] if the + requirement is applicable and has just been added. + """ + # If the markers do not match, ignore this requirement. + if not install_req.match_markers(extras_requested): + logger.info( + "Ignoring %s: markers '%s' don't match your environment", + install_req.name, + install_req.markers, + ) + return [], None + + # If the wheel is not supported, raise an error. + # Should check this after filtering out based on environment markers to + # allow specifying different wheels based on the environment/OS, in a + # single requirements file. + if install_req.link and install_req.link.is_wheel: + wheel = Wheel(install_req.link.filename) + tags = compatibility_tags.get_supported() + if requirement_set.check_supported_wheels and not wheel.supported(tags): + raise InstallationError( + "{} is not a supported wheel on this platform.".format( + wheel.filename + ) + ) + + # This next bit is really a sanity check. + assert ( + not install_req.user_supplied or parent_req_name is None + ), "a user supplied req shouldn't have a parent" + + # Unnamed requirements are scanned again and the requirement won't be + # added as a dependency until after scanning. + if not install_req.name: + requirement_set.add_unnamed_requirement(install_req) + return [install_req], None + + try: + existing_req: Optional[ + InstallRequirement + ] = requirement_set.get_requirement(install_req.name) + except KeyError: + existing_req = None + + has_conflicting_requirement = ( + parent_req_name is None + and existing_req + and not existing_req.constraint + and existing_req.extras == install_req.extras + and existing_req.req + and install_req.req + and existing_req.req.specifier != install_req.req.specifier + ) + if has_conflicting_requirement: + raise InstallationError( + "Double requirement given: {} (already in {}, name={!r})".format( + install_req, existing_req, install_req.name + ) + ) + + # When no existing requirement exists, add the requirement as a + # dependency and it will be scanned again after. + if not existing_req: + requirement_set.add_named_requirement(install_req) + # We'd want to rescan this requirement later + return [install_req], install_req + + # Assume there's no need to scan, and that we've already + # encountered this for scanning. + if install_req.constraint or not existing_req.constraint: + return [], existing_req + + does_not_satisfy_constraint = install_req.link and not ( + existing_req.link and install_req.link.path == existing_req.link.path + ) + if does_not_satisfy_constraint: + raise InstallationError( + "Could not satisfy constraints for '{}': " + "installation from path or url cannot be " + "constrained to a version".format(install_req.name) + ) + # If we're now installing a constraint, mark the existing + # object for real installation. + existing_req.constraint = False + # If we're now installing a user supplied requirement, + # mark the existing object as such. + if install_req.user_supplied: + existing_req.user_supplied = True + existing_req.extras = tuple( + sorted(set(existing_req.extras) | set(install_req.extras)) + ) + logger.debug( + "Setting %s extras to: %s", + existing_req, + existing_req.extras, + ) + # Return the existing requirement for addition to the parent and + # scanning again. + return [existing_req], existing_req + def _is_upgrade_allowed(self, req: InstallRequirement) -> bool: if self.upgrade_strategy == "to-satisfy-only": return False @@ -203,7 +325,7 @@ def _set_req_to_reinstall(self, req: InstallRequirement) -> None: """ # Don't uninstall the conflict if doing a user install and the # conflict is not a user install. - if not self.use_user_site or dist_in_usersite(req.satisfied_by): + if not self.use_user_site or req.satisfied_by.in_usersite: req.should_reinstall = True req.satisfied_by = None @@ -310,6 +432,14 @@ def _populate_link(self, req: InstallRequirement) -> None: logger.debug("Using cached wheel link: %s", cache_entry.link) if req.link is req.original_link and cache_entry.persistent: req.original_link_is_in_wheel_cache = True + if cache_entry.origin is not None: + req.download_info = cache_entry.origin + else: + # Legacy cache entry that does not have origin.json. + # download_info may miss the archive_info.hash field. + req.download_info = direct_url_from_link( + req.link, link_is_in_wheel_cache=cache_entry.persistent + ) req.link = cache_entry.link def _get_dist_for(self, req: InstallRequirement) -> BaseDistribution: @@ -393,7 +523,8 @@ def add_req(subreq: Requirement, extras_requested: Iterable[str]) -> None: # the legacy resolver so I'm just not going to bother refactoring. sub_install_req = self._make_install_req(str(subreq), req_to_install) parent_req_name = req_to_install.name - to_scan_again, add_to_parent = requirement_set.add_requirement( + to_scan_again, add_to_parent = self._add_requirement_to_set( + requirement_set, sub_install_req, parent_req_name=parent_req_name, extras_requested=extras_requested, @@ -410,7 +541,9 @@ def add_req(subreq: Requirement, extras_requested: Iterable[str]) -> None: # 'unnamed' requirements can only come from being directly # provided by the user. assert req_to_install.user_supplied - requirement_set.add_requirement(req_to_install, parent_req_name=None) + self._add_requirement_to_set( + requirement_set, req_to_install, parent_req_name=None + ) if not self.ignore_dependencies: if req_to_install.extras: diff --git a/src/pip/_internal/resolution/resolvelib/candidates.py b/src/pip/_internal/resolution/resolvelib/candidates.py index 60fad55db00..7f09efc1539 100644 --- a/src/pip/_internal/resolution/resolvelib/candidates.py +++ b/src/pip/_internal/resolution/resolvelib/candidates.py @@ -5,7 +5,11 @@ from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.packaging.version import Version -from pip._internal.exceptions import HashError, MetadataInconsistent +from pip._internal.exceptions import ( + HashError, + InstallationSubprocessError, + MetadataInconsistent, +) from pip._internal.metadata import BaseDistribution from pip._internal.models.link import Link, links_equivalent from pip._internal.models.wheel import Wheel @@ -14,6 +18,7 @@ install_req_from_line, ) from pip._internal.req.req_install import InstallRequirement +from pip._internal.utils.direct_url_helpers import direct_url_from_link from pip._internal.utils.misc import normalize_version_info from .base import Candidate, CandidateVersion, Requirement, format_name @@ -61,10 +66,10 @@ def make_install_req_from_link( isolated=template.isolated, constraint=template.constraint, options=dict( - install_options=template.install_options, global_options=template.global_options, hashes=template.hash_options, ), + config_settings=template.config_settings, ) ireq.original_link = template.original_link ireq.link = link @@ -84,18 +89,16 @@ def make_install_req_from_editable( constraint=template.constraint, permit_editable_wheels=template.permit_editable_wheels, options=dict( - install_options=template.install_options, global_options=template.global_options, hashes=template.hash_options, ), + config_settings=template.config_settings, ) def _make_install_req_from_dist( dist: BaseDistribution, template: InstallRequirement ) -> InstallRequirement: - from pip._internal.metadata.pkg_resources import Distribution as _Dist - if template.req: line = str(template.req) elif template.link: @@ -110,12 +113,12 @@ def _make_install_req_from_dist( isolated=template.isolated, constraint=template.constraint, options=dict( - install_options=template.install_options, global_options=template.global_options, hashes=template.hash_options, ), + config_settings=template.config_settings, ) - ireq.satisfied_by = cast(_Dist, dist)._dist + ireq.satisfied_by = dist return ireq @@ -229,6 +232,11 @@ def _prepare(self) -> BaseDistribution: # offending line to the user. e.req = self._ireq raise + except InstallationSubprocessError as exc: + # The output has been presented already, so don't duplicate it. + exc.context = "See above for output." + raise + self._check_metadata_consistency(dist) return dist @@ -271,12 +279,17 @@ def __init__( version, wheel_version, name ) - if ( - cache_entry is not None - and cache_entry.persistent - and template.link is template.original_link - ): - ireq.original_link_is_in_wheel_cache = True + if cache_entry is not None: + if cache_entry.persistent and template.link is template.original_link: + ireq.original_link_is_in_wheel_cache = True + if cache_entry.origin is not None: + ireq.download_info = cache_entry.origin + else: + # Legacy cache entry that does not have origin.json. + # download_info may miss the archive_info.hash field. + ireq.download_info = direct_url_from_link( + source_link, link_is_in_wheel_cache=cache_entry.persistent + ) super().__init__( link=link, diff --git a/src/pip/_internal/resolution/resolvelib/factory.py b/src/pip/_internal/resolution/resolvelib/factory.py index 6c784761c54..a4c24b52a1b 100644 --- a/src/pip/_internal/resolution/resolvelib/factory.py +++ b/src/pip/_internal/resolution/resolvelib/factory.py @@ -27,7 +27,6 @@ from pip._internal.exceptions import ( DistributionNotFound, InstallationError, - InstallationSubprocessError, MetadataInconsistent, UnsupportedPythonVersion, UnsupportedWheel, @@ -190,10 +189,16 @@ def _make_candidate_from_link( name=name, version=version, ) - except (InstallationSubprocessError, MetadataInconsistent) as e: - logger.warning("Discarding %s. %s", link, e) + except MetadataInconsistent as e: + logger.info( + "Discarding [blue underline]%s[/]: [yellow]%s[reset]", + link, + e, + extra={"markup": True}, + ) self._build_failures[link] = e return None + base: BaseCandidate = self._editable_candidate_cache[link] else: if link not in self._link_candidate_cache: @@ -205,8 +210,13 @@ def _make_candidate_from_link( name=name, version=version, ) - except (InstallationSubprocessError, MetadataInconsistent) as e: - logger.warning("Discarding %s. %s", link, e) + except MetadataInconsistent as e: + logger.info( + "Discarding [blue underline]%s[/]: [yellow]%s[reset]", + link, + e, + extra={"markup": True}, + ) self._build_failures[link] = e return None base = self._link_candidate_cache[link] @@ -273,14 +283,27 @@ def iter_index_candidate_infos() -> Iterator[IndexCandidateInfo]: ) icans = list(result.iter_applicable()) - # PEP 592: Yanked releases must be ignored unless only yanked - # releases can satisfy the version range. So if this is false, - # all yanked icans need to be skipped. + # PEP 592: Yanked releases are ignored unless the specifier + # explicitly pins a version (via '==' or '===') that can be + # solely satisfied by a yanked release. all_yanked = all(ican.link.is_yanked for ican in icans) + def is_pinned(specifier: SpecifierSet) -> bool: + for sp in specifier: + if sp.operator == "===": + return True + if sp.operator != "==": + continue + if sp.version.endswith(".*"): + continue + return True + return False + + pinned = is_pinned(specifier) + # PackageFinder returns earlier versions first, so we reverse. for ican in reversed(icans): - if not all_yanked and ican.link.is_yanked: + if not (all_yanked and pinned) and ican.link.is_yanked: continue func = functools.partial( self._make_candidate_from_link, @@ -579,8 +602,15 @@ def _report_single_requirement_conflict( req_disp = f"{req} (from {parent.name})" cands = self._finder.find_all_candidates(req.project_name) + skipped_by_requires_python = self._finder.requires_python_skipped_reasons() versions = [str(v) for v in sorted({c.version for c in cands})] + if skipped_by_requires_python: + logger.critical( + "Ignored the following versions that require a different python " + "version: %s", + "; ".join(skipped_by_requires_python) or "none", + ) logger.critical( "Could not find a version that satisfies the requirement %s " "(from versions: %s)", @@ -696,6 +726,6 @@ def describe_trigger(parent: Candidate) -> str: return DistributionNotFound( "ResolutionImpossible: for help visit " - "https://pip.pypa.io/en/latest/user_guide/" - "#fixing-conflicting-dependencies" + "https://pip.pypa.io/en/latest/topics/dependency-resolution/" + "#dealing-with-dependency-conflicts" ) diff --git a/src/pip/_internal/resolution/resolvelib/provider.py b/src/pip/_internal/resolution/resolvelib/provider.py index 579958dddc9..315fb9c8902 100644 --- a/src/pip/_internal/resolution/resolvelib/provider.py +++ b/src/pip/_internal/resolution/resolvelib/provider.py @@ -1,6 +1,15 @@ import collections import math -from typing import TYPE_CHECKING, Dict, Iterable, Iterator, Mapping, Sequence, Union +from typing import ( + TYPE_CHECKING, + Dict, + Iterable, + Iterator, + Mapping, + Sequence, + TypeVar, + Union, +) from pip._vendor.resolvelib.providers import AbstractProvider @@ -37,6 +46,35 @@ # services to those objects (access to pip's finder and preparer). +D = TypeVar("D") +V = TypeVar("V") + + +def _get_with_identifier( + mapping: Mapping[str, V], + identifier: str, + default: D, +) -> Union[D, V]: + """Get item from a package name lookup mapping with a resolver identifier. + + This extra logic is needed when the target mapping is keyed by package + name, which cannot be directly looked up with an identifier (which may + contain requested extras). Additional logic is added to also look up a value + by "cleaning up" the extras from the identifier. + """ + if identifier in mapping: + return mapping[identifier] + # HACK: Theoretically we should check whether this identifier is a valid + # "NAME[EXTRAS]" format, and parse out the name part with packaging or + # some regular expression. But since pip's resolver only spits out three + # kinds of identifiers: normalized PEP 503 names, normalized names plus + # extras, and Requires-Python, we can cheat a bit here. + name, open_bracket, _ = identifier.partition("[") + if open_bracket and name in mapping: + return mapping[name] + return default + + class PipProvider(_ProviderBase): """Pip's provider implementation for resolvelib. @@ -66,7 +104,7 @@ def __init__( def identify(self, requirement_or_candidate: Union[Requirement, Candidate]) -> str: return requirement_or_candidate.name - def get_preference( # type: ignore + def get_preference( self, identifier: str, resolutions: Mapping[str, Candidate], @@ -79,21 +117,36 @@ def get_preference( # type: ignore The lower the return value is, the more preferred this group of arguments is. - Currently pip considers the followings in order: + Currently pip considers the following in order: * Prefer if any of the known requirements is "direct", e.g. points to an explicit URL. * If equal, prefer if any requirement is "pinned", i.e. contains operator ``===`` or ``==``. * If equal, calculate an approximate "depth" and resolve requirements - closer to the user-specified requirements first. + closer to the user-specified requirements first. If the depth cannot + by determined (eg: due to no matching parents), it is considered + infinite. * Order user-specified requirements by the order they are specified. * If equal, prefers "non-free" requirements, i.e. contains at least one operator, such as ``>=`` or ``<``. * If equal, order alphabetically for consistency (helps debuggability). """ - lookups = (r.get_candidate_lookup() for r, _ in information[identifier]) - candidate, ireqs = zip(*lookups) + try: + next(iter(information[identifier])) + except StopIteration: + # There is no information for this identifier, so there's no known + # candidates. + has_information = False + else: + has_information = True + + if has_information: + lookups = (r.get_candidate_lookup() for r, _ in information[identifier]) + candidate, ireqs = zip(*lookups) + else: + candidate, ireqs = None, () + operators = [ specifier.operator for specifier_set in (ireq.specifier for ireq in ireqs if ireq) @@ -108,11 +161,14 @@ def get_preference( # type: ignore requested_order: Union[int, float] = self._user_requested[identifier] except KeyError: requested_order = math.inf - parent_depths = ( - self._known_depths[parent.name] if parent is not None else 0.0 - for _, parent in information[identifier] - ) - inferred_depth = min(d for d in parent_depths) + 1.0 + if has_information: + parent_depths = ( + self._known_depths[parent.name] if parent is not None else 0.0 + for _, parent in information[identifier] + ) + inferred_depth = min(d for d in parent_depths) + 1.0 + else: + inferred_depth = math.inf else: inferred_depth = 1.0 self._known_depths[identifier] = inferred_depth @@ -123,16 +179,6 @@ def get_preference( # type: ignore # free, so we always do it first to avoid needless work if it fails. requires_python = identifier == REQUIRES_PYTHON_IDENTIFIER - # HACK: Setuptools have a very long and solid backward compatibility - # track record, and extremely few projects would request a narrow, - # non-recent version range of it since that would break a lot things. - # (Most projects specify it only to request for an installer feature, - # which does not work, but that's another topic.) Intentionally - # delaying Setuptools helps reduce branches the resolver has to check. - # This serves as a temporary fix for issues like "apache-airflow[all]" - # while we work on "proper" branch pruning techniques. - delay_this = identifier == "setuptools" - # Prefer the causes of backtracking on the assumption that the problem # resolving the dependency tree is related to the failures that caused # the backtracking @@ -140,7 +186,6 @@ def get_preference( # type: ignore return ( not requires_python, - delay_this, not direct, not pinned, not backtrack_cause, @@ -150,28 +195,13 @@ def get_preference( # type: ignore identifier, ) - def _get_constraint(self, identifier: str) -> Constraint: - if identifier in self._constraints: - return self._constraints[identifier] - - # HACK: Theoretically we should check whether this identifier is a valid - # "NAME[EXTRAS]" format, and parse out the name part with packaging or - # some regular expression. But since pip's resolver only spits out - # three kinds of identifiers: normalized PEP 503 names, normalized names - # plus extras, and Requires-Python, we can cheat a bit here. - name, open_bracket, _ = identifier.partition("[") - if open_bracket and name in self._constraints: - return self._constraints[name] - - return Constraint.empty() - def find_matches( self, identifier: str, requirements: Mapping[str, Iterator[Requirement]], incompatibilities: Mapping[str, Iterator[Candidate]], ) -> Iterable[Candidate]: - def _eligible_for_upgrade(name: str) -> bool: + def _eligible_for_upgrade(identifier: str) -> bool: """Are upgrades allowed for this project? This checks the upgrade strategy, and whether the project was one @@ -185,13 +215,23 @@ def _eligible_for_upgrade(name: str) -> bool: if self._upgrade_strategy == "eager": return True elif self._upgrade_strategy == "only-if-needed": - return name in self._user_requested + user_order = _get_with_identifier( + self._user_requested, + identifier, + default=None, + ) + return user_order is not None return False + constraint = _get_with_identifier( + self._constraints, + identifier, + default=Constraint.empty(), + ) return self._factory.find_candidates( identifier=identifier, requirements=requirements, - constraint=self._get_constraint(identifier), + constraint=constraint, prefers_installed=(not _eligible_for_upgrade(identifier)), incompatibilities=incompatibilities, ) diff --git a/src/pip/_internal/resolution/resolvelib/reporter.py b/src/pip/_internal/resolution/resolvelib/reporter.py index 6ced5329b81..3c724238a1e 100644 --- a/src/pip/_internal/resolution/resolvelib/reporter.py +++ b/src/pip/_internal/resolution/resolvelib/reporter.py @@ -11,9 +11,9 @@ class PipReporter(BaseReporter): def __init__(self) -> None: - self.backtracks_by_package: DefaultDict[str, int] = defaultdict(int) + self.reject_count_by_package: DefaultDict[str, int] = defaultdict(int) - self._messages_at_backtrack = { + self._messages_at_reject_count = { 1: ( "pip is looking at multiple versions of {package_name} to " "determine which version is compatible with other " @@ -32,16 +32,28 @@ def __init__(self) -> None: ), } - def backtracking(self, candidate: Candidate) -> None: - self.backtracks_by_package[candidate.name] += 1 + def rejecting_candidate(self, criterion: Any, candidate: Candidate) -> None: + self.reject_count_by_package[candidate.name] += 1 - count = self.backtracks_by_package[candidate.name] - if count not in self._messages_at_backtrack: + count = self.reject_count_by_package[candidate.name] + if count not in self._messages_at_reject_count: return - message = self._messages_at_backtrack[count] + message = self._messages_at_reject_count[count] logger.info("INFO: %s", message.format(package_name=candidate.name)) + msg = "Will try a different candidate, due to conflict:" + for req_info in criterion.information: + req, parent = req_info.requirement, req_info.parent + # Inspired by Factory.get_installation_error + msg += "\n " + if parent: + msg += f"{parent.name} {parent.version} depends on " + else: + msg += "The user requested " + msg += req.format_for_error() + logger.debug(msg) + class PipDebuggingReporter(BaseReporter): """A reporter that does an info log for every event it sees.""" @@ -61,8 +73,8 @@ def ending(self, state: Any) -> None: def adding_requirement(self, requirement: Requirement, parent: Candidate) -> None: logger.info("Reporter.adding_requirement(%r, %r)", requirement, parent) - def backtracking(self, candidate: Candidate) -> None: - logger.info("Reporter.backtracking(%r)", candidate) + def rejecting_candidate(self, criterion: Any, candidate: Candidate) -> None: + logger.info("Reporter.rejecting_candidate(%r, %r)", criterion, candidate) def pinning(self, candidate: Candidate) -> None: logger.info("Reporter.pinning(%r)", candidate) diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index 12f96702024..a605d6c254f 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -171,17 +171,19 @@ def get_installation_order( get installed one-by-one. The current implementation creates a topological ordering of the - dependency graph, while breaking any cycles in the graph at arbitrary - points. We make no guarantees about where the cycle would be broken, - other than they would be broken. + dependency graph, giving more weight to packages with less + or no dependencies, while breaking any cycles in the graph at + arbitrary points. We make no guarantees about where the cycle + would be broken, other than it *would* be broken. """ assert self._result is not None, "must call resolve() first" + if not req_set.requirements: + # Nothing is left to install, so we do not need an order. + return [] + graph = self._result.graph - weights = get_topological_weights( - graph, - expected_node_count=len(self._result.mapping) + 1, - ) + weights = get_topological_weights(graph, set(req_set.requirements.keys())) sorted_items = sorted( req_set.requirements.items(), @@ -192,23 +194,32 @@ def get_installation_order( def get_topological_weights( - graph: "DirectedGraph[Optional[str]]", expected_node_count: int + graph: "DirectedGraph[Optional[str]]", requirement_keys: Set[str] ) -> Dict[Optional[str], int]: """Assign weights to each node based on how "deep" they are. This implementation may change at any point in the future without prior notice. - We take the length for the longest path to any node from root, ignoring any - paths that contain a single node twice (i.e. cycles). This is done through - a depth-first search through the graph, while keeping track of the path to - the node. + We first simplify the dependency graph by pruning any leaves and giving them + the highest weight: a package without any dependencies should be installed + first. This is done again and again in the same way, giving ever less weight + to the newly found leaves. The loop stops when no leaves are left: all + remaining packages have at least one dependency left in the graph. + + Then we continue with the remaining graph, by taking the length for the + longest path to any node from root, ignoring any paths that contain a single + node twice (i.e. cycles). This is done through a depth-first search through + the graph, while keeping track of the path to the node. Cycles in the graph result would result in node being revisited while also - being it's own path. In this case, take no action. This helps ensure we + being on its own path. In this case, take no action. This helps ensure we don't get stuck in a cycle. When assigning weight, the longer path (i.e. larger length) is preferred. + + We are only interested in the weights of packages that are in the + requirement_keys. """ path: Set[Optional[str]] = set() weights: Dict[Optional[str], int] = {} @@ -224,15 +235,49 @@ def visit(node: Optional[str]) -> None: visit(child) path.remove(node) + if node not in requirement_keys: + return + last_known_parent_count = weights.get(node, 0) weights[node] = max(last_known_parent_count, len(path)) + # Simplify the graph, pruning leaves that have no dependencies. + # This is needed for large graphs (say over 200 packages) because the + # `visit` function is exponentially slower then, taking minutes. + # See https://github.com/pypa/pip/issues/10557 + # We will loop until we explicitly break the loop. + while True: + leaves = set() + for key in graph: + if key is None: + continue + for _child in graph.iter_children(key): + # This means we have at least one child + break + else: + # No child. + leaves.add(key) + if not leaves: + # We are done simplifying. + break + # Calculate the weight for the leaves. + weight = len(graph) - 1 + for leaf in leaves: + if leaf not in requirement_keys: + continue + weights[leaf] = weight + # Remove the leaves from the graph, making it simpler. + for leaf in leaves: + graph.remove(leaf) + + # Visit the remaining graph. # `None` is guaranteed to be the root node by resolvelib. visit(None) - # Sanity checks - assert weights[None] == 0 - assert len(weights) == expected_node_count + # Sanity check: all requirement keys should be in the weights, + # and no other keys should be in the weights. + difference = set(weights.keys()).difference(requirement_keys) + assert not difference, difference return weights diff --git a/src/pip/_internal/self_outdated_check.py b/src/pip/_internal/self_outdated_check.py index 72f70fc2597..41cc42c5677 100644 --- a/src/pip/_internal/self_outdated_check.py +++ b/src/pip/_internal/self_outdated_check.py @@ -1,23 +1,34 @@ import datetime +import functools import hashlib import json import logging import optparse import os.path import sys -from typing import Any, Dict +from dataclasses import dataclass +from typing import Any, Callable, Dict, Optional from pip._vendor.packaging.version import parse as parse_version +from pip._vendor.rich.console import Group +from pip._vendor.rich.markup import escape +from pip._vendor.rich.text import Text from pip._internal.index.collector import LinkCollector from pip._internal.index.package_finder import PackageFinder from pip._internal.metadata import get_default_environment +from pip._internal.metadata.base import DistributionVersion from pip._internal.models.selection_prefs import SelectionPreferences from pip._internal.network.session import PipSession +from pip._internal.utils.compat import WINDOWS +from pip._internal.utils.entrypoints import ( + get_best_invocation_for_this_pip, + get_best_invocation_for_this_python, +) from pip._internal.utils.filesystem import adjacent_tmp_file, check_path_owner, replace from pip._internal.utils.misc import ensure_dir -SELFCHECK_DATE_FMT = "%Y-%m-%dT%H:%M:%SZ" +_DATE_FMT = "%Y-%m-%dT%H:%M:%SZ" logger = logging.getLogger(__name__) @@ -31,17 +42,17 @@ def _get_statefile_name(key: str) -> str: class SelfCheckState: def __init__(self, cache_dir: str) -> None: - self.state: Dict[str, Any] = {} - self.statefile_path = None + self._state: Dict[str, Any] = {} + self._statefile_path = None # Try to load the existing state if cache_dir: - self.statefile_path = os.path.join( + self._statefile_path = os.path.join( cache_dir, "selfcheck", _get_statefile_name(self.key) ) try: - with open(self.statefile_path, encoding="utf-8") as statefile: - self.state = json.load(statefile) + with open(self._statefile_path, encoding="utf-8") as statefile: + self._state = json.load(statefile) except (OSError, ValueError, KeyError): # Explicitly suppressing exceptions, since we don't want to # error out if the cache file is invalid. @@ -51,41 +62,87 @@ def __init__(self, cache_dir: str) -> None: def key(self) -> str: return sys.prefix - def save(self, pypi_version: str, current_time: datetime.datetime) -> None: + def get(self, current_time: datetime.datetime) -> Optional[str]: + """Check if we have a not-outdated version loaded already.""" + if not self._state: + return None + + if "last_check" not in self._state: + return None + + if "pypi_version" not in self._state: + return None + + seven_days_in_seconds = 7 * 24 * 60 * 60 + + # Determine if we need to refresh the state + last_check = datetime.datetime.strptime(self._state["last_check"], _DATE_FMT) + seconds_since_last_check = (current_time - last_check).total_seconds() + if seconds_since_last_check > seven_days_in_seconds: + return None + + return self._state["pypi_version"] + + def set(self, pypi_version: str, current_time: datetime.datetime) -> None: # If we do not have a path to cache in, don't bother saving. - if not self.statefile_path: + if not self._statefile_path: return # Check to make sure that we own the directory - if not check_path_owner(os.path.dirname(self.statefile_path)): + if not check_path_owner(os.path.dirname(self._statefile_path)): return # Now that we've ensured the directory is owned by this user, we'll go # ahead and make sure that all our directories are created. - ensure_dir(os.path.dirname(self.statefile_path)) + ensure_dir(os.path.dirname(self._statefile_path)) state = { # Include the key so it's easy to tell which pip wrote the # file. "key": self.key, - "last_check": current_time.strftime(SELFCHECK_DATE_FMT), + "last_check": current_time.strftime(_DATE_FMT), "pypi_version": pypi_version, } text = json.dumps(state, sort_keys=True, separators=(",", ":")) - with adjacent_tmp_file(self.statefile_path) as f: + with adjacent_tmp_file(self._statefile_path) as f: f.write(text.encode()) try: # Since we have a prefix-specific state file, we can just # overwrite whatever is there, no need to check. - replace(f.name, self.statefile_path) + replace(f.name, self._statefile_path) except OSError: # Best effort. pass +@dataclass +class UpgradePrompt: + old: str + new: str + + def __rich__(self) -> Group: + if WINDOWS: + pip_cmd = f"{get_best_invocation_for_this_python()} -m pip" + else: + pip_cmd = get_best_invocation_for_this_pip() + + notice = "[bold][[reset][blue]notice[reset][bold]][reset]" + return Group( + Text(), + Text.from_markup( + f"{notice} A new release of pip is available: " + f"[red]{self.old}[reset] -> [green]{self.new}[reset]" + ), + Text.from_markup( + f"{notice} To update, run: " + f"[green]{escape(pip_cmd)} install --upgrade pip" + ), + ) + + def was_installed_by_pip(pkg: str) -> bool: """Checks whether pkg was installed by pip @@ -96,6 +153,68 @@ def was_installed_by_pip(pkg: str) -> bool: return dist is not None and "pip" == dist.installer +def _get_current_remote_pip_version( + session: PipSession, options: optparse.Values +) -> Optional[str]: + # Lets use PackageFinder to see what the latest pip version is + link_collector = LinkCollector.create( + session, + options=options, + suppress_no_index=True, + ) + + # Pass allow_yanked=False so we don't suggest upgrading to a + # yanked version. + selection_prefs = SelectionPreferences( + allow_yanked=False, + allow_all_prereleases=False, # Explicitly set to False + ) + + finder = PackageFinder.create( + link_collector=link_collector, + selection_prefs=selection_prefs, + ) + best_candidate = finder.find_best_candidate("pip").best_candidate + if best_candidate is None: + return None + + return str(best_candidate.version) + + +def _self_version_check_logic( + *, + state: SelfCheckState, + current_time: datetime.datetime, + local_version: DistributionVersion, + get_remote_version: Callable[[], Optional[str]], +) -> Optional[UpgradePrompt]: + remote_version_str = state.get(current_time) + if remote_version_str is None: + remote_version_str = get_remote_version() + if remote_version_str is None: + logger.debug("No remote pip version found") + return None + state.set(remote_version_str, current_time) + + remote_version = parse_version(remote_version_str) + logger.debug("Remote version of pip: %s", remote_version) + logger.debug("Local version of pip: %s", local_version) + + pip_installed_by_pip = was_installed_by_pip("pip") + logger.debug("Was pip installed by pip? %s", pip_installed_by_pip) + if not pip_installed_by_pip: + return None # Only suggest upgrade if pip is installed by pip. + + local_version_is_older = ( + local_version < remote_version + and local_version.base_version != remote_version.base_version + ) + if local_version_is_older: + return UpgradePrompt(old=str(local_version), new=remote_version_str) + + return None + + def pip_self_version_check(session: PipSession, options: optparse.Values) -> None: """Check for an update for pip. @@ -107,76 +226,17 @@ def pip_self_version_check(session: PipSession, options: optparse.Values) -> Non if not installed_dist: return - pip_version = installed_dist.version - pypi_version = None - try: - state = SelfCheckState(cache_dir=options.cache_dir) - - current_time = datetime.datetime.utcnow() - # Determine if we need to refresh the state - if "last_check" in state.state and "pypi_version" in state.state: - last_check = datetime.datetime.strptime( - state.state["last_check"], SELFCHECK_DATE_FMT - ) - if (current_time - last_check).total_seconds() < 7 * 24 * 60 * 60: - pypi_version = state.state["pypi_version"] - - # Refresh the version if we need to or just see if we need to warn - if pypi_version is None: - # Lets use PackageFinder to see what the latest pip version is - link_collector = LinkCollector.create( - session, - options=options, - suppress_no_index=True, - ) - - # Pass allow_yanked=False so we don't suggest upgrading to a - # yanked version. - selection_prefs = SelectionPreferences( - allow_yanked=False, - allow_all_prereleases=False, # Explicitly set to False - ) - - finder = PackageFinder.create( - link_collector=link_collector, - selection_prefs=selection_prefs, - ) - best_candidate = finder.find_best_candidate("pip").best_candidate - if best_candidate is None: - return - pypi_version = str(best_candidate.version) - - # save that we've performed a check - state.save(pypi_version, current_time) - - remote_version = parse_version(pypi_version) - - local_version_is_older = ( - pip_version < remote_version - and pip_version.base_version != remote_version.base_version - and was_installed_by_pip("pip") - ) - - # Determine if our pypi_version is older - if not local_version_is_older: - return - - # We cannot tell how the current pip is available in the current - # command context, so be pragmatic here and suggest the command - # that's always available. This does not accommodate spaces in - # `sys.executable`. - pip_cmd = f"{sys.executable} -m pip" - logger.warning( - "You are using pip version %s; however, version %s is " - "available.\nYou should consider upgrading via the " - "'%s install --upgrade pip' command.", - pip_version, - pypi_version, - pip_cmd, + upgrade_prompt = _self_version_check_logic( + state=SelfCheckState(cache_dir=options.cache_dir), + current_time=datetime.datetime.utcnow(), + local_version=installed_dist.version, + get_remote_version=functools.partial( + _get_current_remote_pip_version, session, options + ), ) + if upgrade_prompt is not None: + logger.warning("[present-rich] %s", upgrade_prompt) except Exception: - logger.debug( - "There was an error checking the latest version of pip", - exc_info=True, - ) + logger.warning("There was an error checking the latest version of pip.") + logger.debug("See below for error", exc_info=True) diff --git a/src/pip/_internal/utils/_jaraco_text.py b/src/pip/_internal/utils/_jaraco_text.py new file mode 100644 index 00000000000..e06947c051a --- /dev/null +++ b/src/pip/_internal/utils/_jaraco_text.py @@ -0,0 +1,109 @@ +"""Functions brought over from jaraco.text. + +These functions are not supposed to be used within `pip._internal`. These are +helper functions brought over from `jaraco.text` to enable vendoring newer +copies of `pkg_resources` without having to vendor `jaraco.text` and its entire +dependency cone; something that our vendoring setup is not currently capable of +handling. + +License reproduced from original source below: + +Copyright Jason R. Coombs + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to +deal in the Software without restriction, including without limitation the +rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +IN THE SOFTWARE. +""" + +import functools +import itertools + + +def _nonblank(str): + return str and not str.startswith("#") + + +@functools.singledispatch +def yield_lines(iterable): + r""" + Yield valid lines of a string or iterable. + + >>> list(yield_lines('')) + [] + >>> list(yield_lines(['foo', 'bar'])) + ['foo', 'bar'] + >>> list(yield_lines('foo\nbar')) + ['foo', 'bar'] + >>> list(yield_lines('\nfoo\n#bar\nbaz #comment')) + ['foo', 'baz #comment'] + >>> list(yield_lines(['foo\nbar', 'baz', 'bing\n\n\n'])) + ['foo', 'bar', 'baz', 'bing'] + """ + return itertools.chain.from_iterable(map(yield_lines, iterable)) + + +@yield_lines.register(str) +def _(text): + return filter(_nonblank, map(str.strip, text.splitlines())) + + +def drop_comment(line): + """ + Drop comments. + + >>> drop_comment('foo # bar') + 'foo' + + A hash without a space may be in a URL. + + >>> drop_comment('http://example.com/foo#bar') + 'http://example.com/foo#bar' + """ + return line.partition(" #")[0] + + +def join_continuation(lines): + r""" + Join lines continued by a trailing backslash. + + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar', 'baz'])) + ['foobar', 'baz'] + >>> list(join_continuation(['foo \\', 'bar \\', 'baz'])) + ['foobarbaz'] + + Not sure why, but... + The character preceeding the backslash is also elided. + + >>> list(join_continuation(['goo\\', 'dly'])) + ['godly'] + + A terrible idea, but... + If no line is available to continue, suppress the lines. + + >>> list(join_continuation(['foo', 'bar\\', 'baz\\'])) + ['foo'] + """ + lines = iter(lines) + for item in lines: + while item.endswith("\\"): + try: + item = item[:-2].strip() + next(lines) + except StopIteration: + return + yield item diff --git a/src/pip/_internal/utils/deprecation.py b/src/pip/_internal/utils/deprecation.py index 72bd6f25a55..18d68f3ef94 100644 --- a/src/pip/_internal/utils/deprecation.py +++ b/src/pip/_internal/utils/deprecation.py @@ -118,3 +118,58 @@ def deprecated( raise PipDeprecationWarning(message) warnings.warn(message, category=PipDeprecationWarning, stacklevel=2) + + +class LegacyInstallReason: + def __init__( + self, + reason: str, + replacement: Optional[str] = None, + gone_in: Optional[str] = None, + feature_flag: Optional[str] = None, + issue: Optional[int] = None, + emit_after_success: bool = False, + emit_before_install: bool = False, + ): + self._reason = reason + self._replacement = replacement + self._gone_in = gone_in + self._feature_flag = feature_flag + self._issue = issue + self.emit_after_success = emit_after_success + self.emit_before_install = emit_before_install + + def emit_deprecation(self, name: str) -> None: + deprecated( + reason=self._reason.format(name=name), + replacement=self._replacement, + gone_in=self._gone_in, + feature_flag=self._feature_flag, + issue=self._issue, + ) + + +LegacyInstallReasonFailedBdistWheel = LegacyInstallReason( + reason=( + "{name} was installed using the legacy 'setup.py install' " + "method, because a wheel could not be built for it." + ), + replacement="to fix the wheel build issue reported above", + gone_in="23.1", + issue=8368, + emit_after_success=True, +) + + +LegacyInstallReasonMissingWheelPackage = LegacyInstallReason( + reason=( + "{name} is being installed using the legacy " + "'setup.py install' method, because it does not have a " + "'pyproject.toml' and the 'wheel' package " + "is not installed." + ), + replacement="to enable the '--use-pep517' option", + gone_in="23.1", + issue=8559, + emit_before_install=True, +) diff --git a/src/pip/_internal/utils/distutils_args.py b/src/pip/_internal/utils/distutils_args.py deleted file mode 100644 index e4aa5b827f6..00000000000 --- a/src/pip/_internal/utils/distutils_args.py +++ /dev/null @@ -1,42 +0,0 @@ -from distutils.errors import DistutilsArgError -from distutils.fancy_getopt import FancyGetopt -from typing import Dict, List - -_options = [ - ("exec-prefix=", None, ""), - ("home=", None, ""), - ("install-base=", None, ""), - ("install-data=", None, ""), - ("install-headers=", None, ""), - ("install-lib=", None, ""), - ("install-platlib=", None, ""), - ("install-purelib=", None, ""), - ("install-scripts=", None, ""), - ("prefix=", None, ""), - ("root=", None, ""), - ("user", None, ""), -] - - -# typeshed doesn't permit Tuple[str, None, str], see python/typeshed#3469. -_distutils_getopt = FancyGetopt(_options) # type: ignore - - -def parse_distutils_args(args: List[str]) -> Dict[str, str]: - """Parse provided arguments, returning an object that has the - matched arguments. - - Any unknown arguments are ignored. - """ - result = {} - for arg in args: - try: - _, match = _distutils_getopt.getopt(args=[arg]) - except DistutilsArgError: - # We don't care about any other options, which here may be - # considered unrecognized since our option list is not - # exhaustive. - pass - else: - result.update(match.__dict__) - return result diff --git a/src/pip/_internal/utils/egg_link.py b/src/pip/_internal/utils/egg_link.py index 9e0da8d2d29..eb57ed1519f 100644 --- a/src/pip/_internal/utils/egg_link.py +++ b/src/pip/_internal/utils/egg_link.py @@ -1,10 +1,7 @@ -# The following comment should be removed at some point in the future. -# mypy: strict-optional=False - import os import re import sys -from typing import Optional +from typing import List, Optional from pip._internal.locations import site_packages, user_site from pip._internal.utils.virtualenv import ( @@ -57,7 +54,7 @@ def egg_link_path_from_location(raw_name: str) -> Optional[str]: This method will just return the first one found. """ - sites = [] + sites: List[str] = [] if running_under_virtualenv(): sites.append(site_packages) if not virtualenv_no_global() and user_site: diff --git a/src/pip/_internal/utils/encoding.py b/src/pip/_internal/utils/encoding.py index 1c73f6c9a5d..008f06a79bf 100644 --- a/src/pip/_internal/utils/encoding.py +++ b/src/pip/_internal/utils/encoding.py @@ -14,7 +14,7 @@ (codecs.BOM_UTF32_LE, "utf-32-le"), ] -ENCODING_RE = re.compile(br"coding[:=]\s*([-\w.]+)") +ENCODING_RE = re.compile(rb"coding[:=]\s*([-\w.]+)") def auto_decode(data: bytes) -> str: diff --git a/src/pip/_internal/utils/entrypoints.py b/src/pip/_internal/utils/entrypoints.py index 1504a12916b..15013693854 100644 --- a/src/pip/_internal/utils/entrypoints.py +++ b/src/pip/_internal/utils/entrypoints.py @@ -1,7 +1,23 @@ +import itertools +import os +import shutil import sys from typing import List, Optional from pip._internal.cli.main import main +from pip._internal.utils.compat import WINDOWS + +_EXECUTABLE_NAMES = [ + "pip", + f"pip{sys.version_info.major}", + f"pip{sys.version_info.major}.{sys.version_info.minor}", +] +if WINDOWS: + _allowed_extensions = {"", ".exe"} + _EXECUTABLE_NAMES = [ + "".join(parts) + for parts in itertools.product(_EXECUTABLE_NAMES, _allowed_extensions) + ] def _wrapper(args: Optional[List[str]] = None) -> int: @@ -25,3 +41,44 @@ def _wrapper(args: Optional[List[str]] = None) -> int: "running pip directly.\n" ) return main(args) + + +def get_best_invocation_for_this_pip() -> str: + """Try to figure out the best way to invoke pip in the current environment.""" + binary_directory = "Scripts" if WINDOWS else "bin" + binary_prefix = os.path.join(sys.prefix, binary_directory) + + # Try to use pip[X[.Y]] names, if those executables for this environment are + # the first on PATH with that name. + path_parts = os.path.normcase(os.environ.get("PATH", "")).split(os.pathsep) + exe_are_in_PATH = os.path.normcase(binary_prefix) in path_parts + if exe_are_in_PATH: + for exe_name in _EXECUTABLE_NAMES: + found_executable = shutil.which(exe_name) + binary_executable = os.path.join(binary_prefix, exe_name) + if ( + found_executable + and os.path.exists(binary_executable) + and os.path.samefile( + found_executable, + binary_executable, + ) + ): + return exe_name + + # Use the `-m` invocation, if there's no "nice" invocation. + return f"{get_best_invocation_for_this_python()} -m pip" + + +def get_best_invocation_for_this_python() -> str: + """Try to figure out the best way to invoke the current Python.""" + exe = sys.executable + exe_name = os.path.basename(exe) + + # Try to use the basename, if it's the first executable. + found_executable = shutil.which(exe_name) + if found_executable and os.path.samefile(found_executable, exe): + return exe_name + + # Use the full executable name, because we couldn't find something simpler. + return exe diff --git a/src/pip/_internal/utils/filesystem.py b/src/pip/_internal/utils/filesystem.py index b7e6191abe6..83c2df75b96 100644 --- a/src/pip/_internal/utils/filesystem.py +++ b/src/pip/_internal/utils/filesystem.py @@ -2,12 +2,10 @@ import os import os.path import random -import shutil -import stat import sys from contextlib import contextmanager from tempfile import NamedTemporaryFile -from typing import Any, BinaryIO, Iterator, List, Union, cast +from typing import Any, BinaryIO, Generator, List, Union, cast from pip._vendor.tenacity import retry, stop_after_delay, wait_fixed @@ -42,35 +40,8 @@ def check_path_owner(path: str) -> bool: return False # assume we don't own the path -def copy2_fixed(src: str, dest: str) -> None: - """Wrap shutil.copy2() but map errors copying socket files to - SpecialFileError as expected. - - See also https://bugs.python.org/issue37700. - """ - try: - shutil.copy2(src, dest) - except OSError: - for f in [src, dest]: - try: - is_socket_file = is_socket(f) - except OSError: - # An error has already occurred. Another error here is not - # a problem and we can ignore it. - pass - else: - if is_socket_file: - raise shutil.SpecialFileError(f"`{f}` is a socket") - - raise - - -def is_socket(path: str) -> bool: - return stat.S_ISSOCK(os.lstat(path).st_mode) - - @contextmanager -def adjacent_tmp_file(path: str, **kwargs: Any) -> Iterator[BinaryIO]: +def adjacent_tmp_file(path: str, **kwargs: Any) -> Generator[BinaryIO, None, None]: """Return a file-like object pointing to a tmp file next to path. The file is created securely and is ensured to be written to disk diff --git a/src/pip/_internal/utils/hashes.py b/src/pip/_internal/utils/hashes.py index 82eb035a06e..76727306a4c 100644 --- a/src/pip/_internal/utils/hashes.py +++ b/src/pip/_internal/utils/hashes.py @@ -1,5 +1,5 @@ import hashlib -from typing import TYPE_CHECKING, BinaryIO, Dict, Iterator, List +from typing import TYPE_CHECKING, BinaryIO, Dict, Iterable, List, Optional from pip._internal.exceptions import HashMismatch, HashMissing, InstallationError from pip._internal.utils.misc import read_chunks @@ -28,7 +28,7 @@ class Hashes: """ - def __init__(self, hashes: Dict[str, List[str]] = None) -> None: + def __init__(self, hashes: Optional[Dict[str, List[str]]] = None) -> None: """ :param hashes: A dict of algorithm names pointing to lists of allowed hex digests @@ -67,7 +67,7 @@ def is_hash_allowed(self, hash_name: str, hex_digest: str) -> bool: """Return whether the given hex digest is allowed.""" return hex_digest in self._allowed.get(hash_name, []) - def check_against_chunks(self, chunks: Iterator[bytes]) -> None: + def check_against_chunks(self, chunks: Iterable[bytes]) -> None: """Check good hashes against ones built from iterable of chunks of data. diff --git a/src/pip/_internal/utils/logging.py b/src/pip/_internal/utils/logging.py index a4b828a84e1..c10e1f4ced6 100644 --- a/src/pip/_internal/utils/logging.py +++ b/src/pip/_internal/utils/logging.py @@ -4,28 +4,30 @@ import logging.handlers import os import sys +import threading +from dataclasses import dataclass +from io import TextIOWrapper from logging import Filter -from typing import IO, Any, Callable, Iterator, Optional, TextIO, Type, cast +from typing import Any, ClassVar, Generator, List, Optional, TextIO, Type + +from pip._vendor.rich.console import ( + Console, + ConsoleOptions, + ConsoleRenderable, + RenderableType, + RenderResult, + RichCast, +) +from pip._vendor.rich.highlighter import NullHighlighter +from pip._vendor.rich.logging import RichHandler +from pip._vendor.rich.segment import Segment +from pip._vendor.rich.style import Style from pip._internal.utils._log import VERBOSE, getLogger from pip._internal.utils.compat import WINDOWS from pip._internal.utils.deprecation import DEPRECATION_MSG_PREFIX from pip._internal.utils.misc import ensure_dir -try: - import threading -except ImportError: - import dummy_threading as threading # type: ignore - - -try: - from pip._vendor import colorama -# Lots of different errors can come from this, including SystemError and -# ImportError. -except Exception: - colorama = None - - _log_state = threading.local() subprocess_logger = getLogger("pip.subprocessor") @@ -50,7 +52,7 @@ def _is_broken_pipe_error(exc_class: Type[BaseException], exc: BaseException) -> @contextlib.contextmanager -def indent_log(num: int = 2) -> Iterator[None]: +def indent_log(num: int = 2) -> Generator[None, None, None]: """ A context manager which will cause the log output to be indented for any log messages emitted inside it. @@ -119,78 +121,66 @@ def format(self, record: logging.LogRecord) -> str: return formatted -def _color_wrap(*colors: str) -> Callable[[str], str]: - def wrapped(inp: str) -> str: - return "".join(list(colors) + [inp, colorama.Style.RESET_ALL]) - - return wrapped - - -class ColorizedStreamHandler(logging.StreamHandler): - - # Don't build up a list of colors if we don't have colorama - if colorama: - COLORS = [ - # This needs to be in order from highest logging level to lowest. - (logging.ERROR, _color_wrap(colorama.Fore.RED)), - (logging.WARNING, _color_wrap(colorama.Fore.YELLOW)), - ] - else: - COLORS = [] - - def __init__(self, stream: Optional[TextIO] = None, no_color: bool = None) -> None: - super().__init__(stream) - self._no_color = no_color - - if WINDOWS and colorama: - self.stream = colorama.AnsiToWin32(self.stream) - - def _using_stdout(self) -> bool: - """ - Return whether the handler is using sys.stdout. - """ - if WINDOWS and colorama: - # Then self.stream is an AnsiToWin32 object. - stream = cast(colorama.AnsiToWin32, self.stream) - return stream.wrapped is sys.stdout - - return self.stream is sys.stdout - - def should_color(self) -> bool: - # Don't colorize things if we do not have colorama or if told not to - if not colorama or self._no_color: - return False - - real_stream = ( - self.stream - if not isinstance(self.stream, colorama.AnsiToWin32) - else self.stream.wrapped +@dataclass +class IndentedRenderable: + renderable: RenderableType + indent: int + + def __rich_console__( + self, console: Console, options: ConsoleOptions + ) -> RenderResult: + segments = console.render(self.renderable, options) + lines = Segment.split_lines(segments) + for line in lines: + yield Segment(" " * self.indent) + yield from line + yield Segment("\n") + + +class RichPipStreamHandler(RichHandler): + KEYWORDS: ClassVar[Optional[List[str]]] = [] + + def __init__(self, stream: Optional[TextIO], no_color: bool) -> None: + super().__init__( + console=Console(file=stream, no_color=no_color, soft_wrap=True), + show_time=False, + show_level=False, + show_path=False, + highlighter=NullHighlighter(), ) - # If the stream is a tty we should color it - if hasattr(real_stream, "isatty") and real_stream.isatty(): - return True - - # If we have an ANSI term we should color it - if os.environ.get("TERM") == "ANSI": - return True - - # If anything else we should not color it - return False - - def format(self, record: logging.LogRecord) -> str: - msg = super().format(record) - - if self.should_color(): - for level, color in self.COLORS: - if record.levelno >= level: - msg = color(msg) - break - - return msg + # Our custom override on Rich's logger, to make things work as we need them to. + def emit(self, record: logging.LogRecord) -> None: + style: Optional[Style] = None + + # If we are given a diagnostic error to present, present it with indentation. + assert isinstance(record.args, tuple) + if record.msg == "[present-rich] %s" and len(record.args) == 1: + rich_renderable = record.args[0] + assert isinstance( + rich_renderable, (ConsoleRenderable, RichCast, str) + ), f"{rich_renderable} is not rich-console-renderable" + + renderable: RenderableType = IndentedRenderable( + rich_renderable, indent=get_indentation() + ) + else: + message = self.format(record) + renderable = self.render_message(record, message) + if record.levelno is not None: + if record.levelno >= logging.ERROR: + style = Style(color="red") + elif record.levelno >= logging.WARNING: + style = Style(color="yellow") + + try: + self.console.print(renderable, overflow="ignore", crop=False, style=style) + except Exception: + self.handleError(record) - # The logging module says handleError() can be customized. def handleError(self, record: logging.LogRecord) -> None: + """Called when logging is unable to log some output.""" + exc_class, exc = sys.exc_info()[:2] # If a broken pipe occurred while calling write() or flush() on the # stdout stream in logging's Handler.emit(), then raise our special @@ -199,7 +189,7 @@ def handleError(self, record: logging.LogRecord) -> None: if ( exc_class and exc - and self._using_stdout() + and self.console.file is sys.stdout and _is_broken_pipe_error(exc_class, exc) ): raise BrokenStdoutLoggingError() @@ -208,7 +198,7 @@ def handleError(self, record: logging.LogRecord) -> None: class BetterRotatingFileHandler(logging.handlers.RotatingFileHandler): - def _open(self) -> IO[Any]: + def _open(self) -> TextIOWrapper: ensure_dir(os.path.dirname(self.baseFilename)) return super()._open() @@ -275,7 +265,7 @@ def setup_logging(verbosity: int, no_color: bool, user_log_file: Optional[str]) "stderr": "ext://sys.stderr", } handler_classes = { - "stream": "pip._internal.utils.logging.ColorizedStreamHandler", + "stream": "pip._internal.utils.logging.RichPipStreamHandler", "file": "pip._internal.utils.logging.BetterRotatingFileHandler", } handlers = ["console", "console_errors", "console_subprocess"] + ( @@ -333,8 +323,8 @@ def setup_logging(verbosity: int, no_color: bool, user_log_file: Optional[str]) "console_subprocess": { "level": level, "class": handler_classes["stream"], - "no_color": no_color, "stream": log_streams["stderr"], + "no_color": no_color, "filters": ["restrict_to_subprocess"], "formatter": "indent", }, diff --git a/src/pip/_internal/utils/misc.py b/src/pip/_internal/utils/misc.py index d3e9053efd7..baa1ba7eac2 100644 --- a/src/pip/_internal/utils/misc.py +++ b/src/pip/_internal/utils/misc.py @@ -12,6 +12,7 @@ import shutil import stat import sys +import sysconfig import urllib.parse from io import StringIO from itertools import filterfalse, tee, zip_longest @@ -21,6 +22,8 @@ BinaryIO, Callable, ContextManager, + Dict, + Generator, Iterable, Iterator, List, @@ -32,14 +35,13 @@ cast, ) -from pip._vendor.pkg_resources import Distribution +from pip._vendor.pyproject_hooks import BuildBackendHookCaller from pip._vendor.tenacity import retry, stop_after_delay, wait_fixed from pip import __version__ -from pip._internal.exceptions import CommandError -from pip._internal.locations import get_major_minor_version, site_packages, user_site +from pip._internal.exceptions import CommandError, ExternallyManagedEnvironment +from pip._internal.locations import get_major_minor_version from pip._internal.utils.compat import WINDOWS -from pip._internal.utils.egg_link import egg_link_path_from_location from pip._internal.utils.virtualenv import running_under_virtualenv __all__ = [ @@ -56,9 +58,10 @@ "captured_stdout", "ensure_dir", "remove_auth_from_url", + "check_externally_managed", + "ConfiguredBuildBackendHookCaller", ] - logger = logging.getLogger(__name__) T = TypeVar("T") @@ -266,7 +269,9 @@ def is_installable_dir(path: str) -> bool: return False -def read_chunks(file: BinaryIO, size: int = io.DEFAULT_BUFFER_SIZE) -> Iterator[bytes]: +def read_chunks( + file: BinaryIO, size: int = io.DEFAULT_BUFFER_SIZE +) -> Generator[bytes, None, None]: """Yield pieces of data from a file-like object until EOF.""" while True: chunk = file.read(size) @@ -328,64 +333,6 @@ def is_local(path: str) -> bool: return path.startswith(normalize_path(sys.prefix)) -def dist_is_local(dist: Distribution) -> bool: - """ - Return True if given Distribution object is installed locally - (i.e. within current virtualenv). - - Always True if we're not in a virtualenv. - - """ - return is_local(dist_location(dist)) - - -def dist_in_usersite(dist: Distribution) -> bool: - """ - Return True if given Distribution is installed in user site. - """ - return dist_location(dist).startswith(normalize_path(user_site)) - - -def dist_in_site_packages(dist: Distribution) -> bool: - """ - Return True if given Distribution is installed in - sysconfig.get_python_lib(). - """ - return dist_location(dist).startswith(normalize_path(site_packages)) - - -def get_distribution(req_name: str) -> Optional[Distribution]: - """Given a requirement name, return the installed Distribution object. - - This searches from *all* distributions available in the environment, to - match the behavior of ``pkg_resources.get_distribution()``. - - Left for compatibility until direct pkg_resources uses are refactored out. - """ - from pip._internal.metadata import get_default_environment - from pip._internal.metadata.pkg_resources import Distribution as _Dist - - dist = get_default_environment().get_distribution(req_name) - if dist is None: - return None - return cast(_Dist, dist)._dist - - -def dist_location(dist: Distribution) -> str: - """ - Get the site-packages location of this distribution. Generally - this is dist.location, except in the case of develop-installed - packages, where dist.location is the source code location, and we - want to know where the egg-link file is. - - The returned location is normalized (in particular, with symlinks removed). - """ - egg_link = egg_link_path_from_location(dist.project_name) - if egg_link: - return normalize_path(egg_link) - return normalize_path(dist.location) - - def write_output(msg: Any, *args: Any) -> None: logger.info(msg, *args) @@ -406,7 +353,7 @@ def encoding(self): # type: ignore @contextlib.contextmanager -def captured_output(stream_name: str) -> Iterator[StreamWrapper]: +def captured_output(stream_name: str) -> Generator[StreamWrapper, None, None]: """Return a context manager used by captured_stdout/stdin/stderr that temporarily replaces the sys stream *stream_name* with a StringIO. @@ -616,9 +563,9 @@ def protect_pip_from_modification_on_windows(modifying_pip: bool) -> None: python -m pip ... """ pip_names = [ - "pip.exe", - "pip{}.exe".format(sys.version_info[0]), - "pip{}.{}.exe".format(*sys.version_info[:2]), + "pip", + f"pip{sys.version_info.major}", + f"pip{sys.version_info.major}.{sys.version_info.minor}", ] # See https://github.com/pypa/pip/issues/1299 for more discussion @@ -635,6 +582,21 @@ def protect_pip_from_modification_on_windows(modifying_pip: bool) -> None: ) +def check_externally_managed() -> None: + """Check whether the current environment is externally managed. + + If the ``EXTERNALLY-MANAGED`` config file is found, the current environment + is considered externally managed, and an ExternallyManagedEnvironment is + raised. + """ + if running_under_virtualenv(): + return + marker = os.path.join(sysconfig.get_path("stdlib"), "EXTERNALLY-MANAGED") + if not os.path.isfile(marker): + return + raise ExternallyManagedEnvironment.from_config(marker) + + def is_console_interactive() -> bool: """Is this console interactive?""" return sys.stdin is not None and sys.stdin.isatty() @@ -687,3 +649,91 @@ def partition( """ t1, t2 = tee(iterable) return filterfalse(pred, t1), filter(pred, t2) + + +class ConfiguredBuildBackendHookCaller(BuildBackendHookCaller): + def __init__( + self, + config_holder: Any, + source_dir: str, + build_backend: str, + backend_path: Optional[str] = None, + runner: Optional[Callable[..., None]] = None, + python_executable: Optional[str] = None, + ): + super().__init__( + source_dir, build_backend, backend_path, runner, python_executable + ) + self.config_holder = config_holder + + def build_wheel( + self, + wheel_directory: str, + config_settings: Optional[Dict[str, str]] = None, + metadata_directory: Optional[str] = None, + ) -> str: + cs = self.config_holder.config_settings + return super().build_wheel( + wheel_directory, config_settings=cs, metadata_directory=metadata_directory + ) + + def build_sdist( + self, sdist_directory: str, config_settings: Optional[Dict[str, str]] = None + ) -> str: + cs = self.config_holder.config_settings + return super().build_sdist(sdist_directory, config_settings=cs) + + def build_editable( + self, + wheel_directory: str, + config_settings: Optional[Dict[str, str]] = None, + metadata_directory: Optional[str] = None, + ) -> str: + cs = self.config_holder.config_settings + return super().build_editable( + wheel_directory, config_settings=cs, metadata_directory=metadata_directory + ) + + def get_requires_for_build_wheel( + self, config_settings: Optional[Dict[str, str]] = None + ) -> List[str]: + cs = self.config_holder.config_settings + return super().get_requires_for_build_wheel(config_settings=cs) + + def get_requires_for_build_sdist( + self, config_settings: Optional[Dict[str, str]] = None + ) -> List[str]: + cs = self.config_holder.config_settings + return super().get_requires_for_build_sdist(config_settings=cs) + + def get_requires_for_build_editable( + self, config_settings: Optional[Dict[str, str]] = None + ) -> List[str]: + cs = self.config_holder.config_settings + return super().get_requires_for_build_editable(config_settings=cs) + + def prepare_metadata_for_build_wheel( + self, + metadata_directory: str, + config_settings: Optional[Dict[str, str]] = None, + _allow_fallback: bool = True, + ) -> str: + cs = self.config_holder.config_settings + return super().prepare_metadata_for_build_wheel( + metadata_directory=metadata_directory, + config_settings=cs, + _allow_fallback=_allow_fallback, + ) + + def prepare_metadata_for_build_editable( + self, + metadata_directory: str, + config_settings: Optional[Dict[str, str]] = None, + _allow_fallback: bool = True, + ) -> str: + cs = self.config_holder.config_settings + return super().prepare_metadata_for_build_editable( + metadata_directory=metadata_directory, + config_settings=cs, + _allow_fallback=_allow_fallback, + ) diff --git a/src/pip/_internal/utils/packaging.py b/src/pip/_internal/utils/packaging.py index f100473e647..b9f6af4d174 100644 --- a/src/pip/_internal/utils/packaging.py +++ b/src/pip/_internal/utils/packaging.py @@ -1,16 +1,12 @@ import functools import logging -from email.message import Message -from email.parser import FeedParser -from typing import Optional, Tuple +import re +from typing import NewType, Optional, Tuple, cast -from pip._vendor import pkg_resources from pip._vendor.packaging import specifiers, version from pip._vendor.packaging.requirements import Requirement -from pip._vendor.pkg_resources import Distribution -from pip._internal.exceptions import NoneMetadataError -from pip._internal.utils.misc import display_path +NormalizedExtra = NewType("NormalizedExtra", str) logger = logging.getLogger(__name__) @@ -38,41 +34,6 @@ def check_requires_python( return python_version in requires_python_specifier -def get_metadata(dist: Distribution) -> Message: - """ - :raises NoneMetadataError: if the distribution reports `has_metadata()` - True but `get_metadata()` returns None. - """ - metadata_name = "METADATA" - if isinstance(dist, pkg_resources.DistInfoDistribution) and dist.has_metadata( - metadata_name - ): - metadata = dist.get_metadata(metadata_name) - elif dist.has_metadata("PKG-INFO"): - metadata_name = "PKG-INFO" - metadata = dist.get_metadata(metadata_name) - else: - logger.warning("No metadata found in %s", display_path(dist.location)) - metadata = "" - - if metadata is None: - raise NoneMetadataError(dist, metadata_name) - - feed_parser = FeedParser() - # The following line errors out if with a "NoneType" TypeError if - # passed metadata=None. - feed_parser.feed(metadata) - return feed_parser.close() - - -def get_installer(dist: Distribution) -> str: - if dist.has_metadata("INSTALLER"): - for line in dist.get_metadata_lines("INSTALLER"): - if line.strip(): - return line.strip() - return "" - - @functools.lru_cache(maxsize=512) def get_requirement(req_string: str) -> Requirement: """Construct a packaging.Requirement object with caching""" @@ -82,3 +43,15 @@ def get_requirement(req_string: str) -> Requirement: # minimize repeated parsing of the same string to construct equivalent # Requirement objects. return Requirement(req_string) + + +def safe_extra(extra: str) -> NormalizedExtra: + """Convert an arbitrary string to a standard 'extra' name + + Any runs of non-alphanumeric characters are replaced with a single '_', + and the result is always lowercased. + + This function is duplicated from ``pkg_resources``. Note that this is not + the same to either ``canonicalize_name`` or ``_egg_link_name``. + """ + return cast(NormalizedExtra, re.sub("[^A-Za-z0-9.-]+", "_", extra).lower()) diff --git a/src/pip/_internal/utils/parallel.py b/src/pip/_internal/utils/parallel.py deleted file mode 100644 index e31857732bf..00000000000 --- a/src/pip/_internal/utils/parallel.py +++ /dev/null @@ -1,103 +0,0 @@ -"""Convenient parallelization of higher order functions. - -This module provides two helper functions, with appropriate fallbacks on -Python 2 and on systems lacking support for synchronization mechanisms: - -- map_multiprocess -- map_multithread - -These helpers work like Python 3's map, with two differences: - -- They don't guarantee the order of processing of - the elements of the iterable. -- The underlying process/thread pools chop the iterable into - a number of chunks, so that for very long iterables using - a large value for chunksize can make the job complete much faster - than using the default value of 1. -""" - -__all__ = ["map_multiprocess", "map_multithread"] - -from contextlib import contextmanager -from multiprocessing import Pool as ProcessPool -from multiprocessing import pool -from multiprocessing.dummy import Pool as ThreadPool -from typing import Callable, Iterable, Iterator, TypeVar, Union - -from pip._vendor.requests.adapters import DEFAULT_POOLSIZE - -Pool = Union[pool.Pool, pool.ThreadPool] -S = TypeVar("S") -T = TypeVar("T") - -# On platforms without sem_open, multiprocessing[.dummy] Pool -# cannot be created. -try: - import multiprocessing.synchronize # noqa -except ImportError: - LACK_SEM_OPEN = True -else: - LACK_SEM_OPEN = False - -# Incredibly large timeout to work around bpo-8296 on Python 2. -TIMEOUT = 2000000 - - -@contextmanager -def closing(pool: Pool) -> Iterator[Pool]: - """Return a context manager making sure the pool closes properly.""" - try: - yield pool - finally: - # For Pool.imap*, close and join are needed - # for the returned iterator to begin yielding. - pool.close() - pool.join() - pool.terminate() - - -def _map_fallback( - func: Callable[[S], T], iterable: Iterable[S], chunksize: int = 1 -) -> Iterator[T]: - """Make an iterator applying func to each element in iterable. - - This function is the sequential fallback either on Python 2 - where Pool.imap* doesn't react to KeyboardInterrupt - or when sem_open is unavailable. - """ - return map(func, iterable) - - -def _map_multiprocess( - func: Callable[[S], T], iterable: Iterable[S], chunksize: int = 1 -) -> Iterator[T]: - """Chop iterable into chunks and submit them to a process pool. - - For very long iterables using a large value for chunksize can make - the job complete much faster than using the default value of 1. - - Return an unordered iterator of the results. - """ - with closing(ProcessPool()) as pool: - return pool.imap_unordered(func, iterable, chunksize) - - -def _map_multithread( - func: Callable[[S], T], iterable: Iterable[S], chunksize: int = 1 -) -> Iterator[T]: - """Chop iterable into chunks and submit them to a thread pool. - - For very long iterables using a large value for chunksize can make - the job complete much faster than using the default value of 1. - - Return an unordered iterator of the results. - """ - with closing(ThreadPool(DEFAULT_POOLSIZE)) as pool: - return pool.imap_unordered(func, iterable, chunksize) - - -if LACK_SEM_OPEN: - map_multiprocess = map_multithread = _map_fallback -else: - map_multiprocess = _map_multiprocess - map_multithread = _map_multithread diff --git a/src/pip/_internal/utils/pkg_resources.py b/src/pip/_internal/utils/pkg_resources.py deleted file mode 100644 index bd846aa97bc..00000000000 --- a/src/pip/_internal/utils/pkg_resources.py +++ /dev/null @@ -1,33 +0,0 @@ -from typing import Dict, Iterable, List - -from pip._vendor.pkg_resources import yield_lines - - -class DictMetadata: - """IMetadataProvider that reads metadata files from a dictionary.""" - - def __init__(self, metadata: Dict[str, bytes]) -> None: - self._metadata = metadata - - def has_metadata(self, name: str) -> bool: - return name in self._metadata - - def get_metadata(self, name: str) -> str: - try: - return self._metadata[name].decode() - except UnicodeDecodeError as e: - # Mirrors handling done in pkg_resources.NullProvider. - e.reason += f" in {name} file" - raise - - def get_metadata_lines(self, name: str) -> Iterable[str]: - return yield_lines(self.get_metadata(name)) - - def metadata_isdir(self, name: str) -> bool: - return False - - def metadata_listdir(self, name: str) -> List[str]: - return [] - - def run_script(self, script_name: str, namespace: str) -> None: - pass diff --git a/src/pip/_internal/utils/setuptools_build.py b/src/pip/_internal/utils/setuptools_build.py index 9d65ceba4ab..0662915cb05 100644 --- a/src/pip/_internal/utils/setuptools_build.py +++ b/src/pip/_internal/utils/setuptools_build.py @@ -1,26 +1,54 @@ import sys +import textwrap from typing import List, Optional, Sequence # Shim to wrap setup.py invocation with setuptools -# -# We set sys.argv[0] to the path to the underlying setup.py file so -# setuptools / distutils don't take the path to the setup.py to be "-c" when -# invoking via the shim. This avoids e.g. the following manifest_maker -# warning: "warning: manifest_maker: standard file '-c' not found". -_SETUPTOOLS_SHIM = ( - "import io, os, sys, setuptools, tokenize; sys.argv[0] = {0!r}; __file__={0!r};" - "f = getattr(tokenize, 'open', open)(__file__) " - "if os.path.exists(__file__) " - "else io.StringIO('from setuptools import setup; setup()');" - "code = f.read().replace('\\r\\n', '\\n');" - "f.close();" - "exec(compile(code, __file__, 'exec'))" -) +# Note that __file__ is handled via two {!r} *and* %r, to ensure that paths on +# Windows are correctly handled (it should be "C:\\Users" not "C:\Users"). +_SETUPTOOLS_SHIM = textwrap.dedent( + """ + exec(compile(''' + # This is -- a caller that pip uses to run setup.py + # + # - It imports setuptools before invoking setup.py, to enable projects that directly + # import from `distutils.core` to work with newer packaging standards. + # - It provides a clear error message when setuptools is not installed. + # - It sets `sys.argv[0]` to the underlying `setup.py`, when invoking `setup.py` so + # setuptools doesn't think the script is `-c`. This avoids the following warning: + # manifest_maker: standard file '-c' not found". + # - It generates a shim setup.py, for handling setup.cfg-only projects. + import os, sys, tokenize + + try: + import setuptools + except ImportError as error: + print( + "ERROR: Can not execute `setup.py` since setuptools is not available in " + "the build environment.", + file=sys.stderr, + ) + sys.exit(1) + + __file__ = %r + sys.argv[0] = __file__ + + if os.path.exists(__file__): + filename = __file__ + with tokenize.open(__file__) as f: + setup_py_code = f.read() + else: + filename = "" + setup_py_code = "from setuptools import setup; setup()" + + exec(compile(setup_py_code, filename, "exec")) + ''' % ({!r},), "", "exec")) + """ +).rstrip() def make_setuptools_shim_args( setup_py_path: str, - global_options: Sequence[str] = None, + global_options: Optional[Sequence[str]] = None, no_user_config: bool = False, unbuffered_output: bool = False, ) -> List[str]: @@ -75,8 +103,8 @@ def make_setuptools_clean_args( def make_setuptools_develop_args( setup_py_path: str, + *, global_options: Sequence[str], - install_options: Sequence[str], no_user_config: bool, prefix: Optional[str], home: Optional[str], @@ -92,8 +120,6 @@ def make_setuptools_develop_args( args += ["develop", "--no-deps"] - args += install_options - if prefix: args += ["--prefix", prefix] if home is not None: @@ -122,8 +148,8 @@ def make_setuptools_egg_info_args( def make_setuptools_install_args( setup_py_path: str, + *, global_options: Sequence[str], - install_options: Sequence[str], record_filename: str, root: Optional[str], prefix: Optional[str], @@ -162,6 +188,4 @@ def make_setuptools_install_args( if header_dir: args += ["--install-headers", header_dir] - args += install_options - return args diff --git a/src/pip/_internal/utils/subprocess.py b/src/pip/_internal/utils/subprocess.py index f6e8b219a6f..1e8ff50edfb 100644 --- a/src/pip/_internal/utils/subprocess.py +++ b/src/pip/_internal/utils/subprocess.py @@ -13,6 +13,8 @@ Union, ) +from pip._vendor.rich.markup import escape + from pip._internal.cli.spinners import SpinnerInterface, open_spinner from pip._internal.exceptions import InstallationSubprocessError from pip._internal.utils.logging import VERBOSE, subprocess_logger @@ -27,9 +29,6 @@ CommandArgs = List[Union[str, HiddenText]] -LOG_DIVIDER = "----------------------------------------" - - def make_command(*args: Union[str, HiddenText, CommandArgs]) -> CommandArgs: """ Create a CommandArgs object. @@ -69,53 +68,19 @@ def reveal_command_args(args: Union[List[str], CommandArgs]) -> List[str]: return [arg.secret if isinstance(arg, HiddenText) else arg for arg in args] -def make_subprocess_output_error( - cmd_args: Union[List[str], CommandArgs], - cwd: Optional[str], - lines: List[str], - exit_status: int, -) -> str: - """ - Create and return the error message to use to log a subprocess error - with command output. - - :param lines: A list of lines, each ending with a newline. - """ - command = format_command_args(cmd_args) - - # We know the joined output value ends in a newline. - output = "".join(lines) - msg = ( - # Use a unicode string to avoid "UnicodeEncodeError: 'ascii' - # codec can't encode character ..." in Python 2 when a format - # argument (e.g. `output`) has a non-ascii character. - "Command errored out with exit status {exit_status}:\n" - " command: {command_display}\n" - " cwd: {cwd_display}\n" - "Complete output ({line_count} lines):\n{output}{divider}" - ).format( - exit_status=exit_status, - command_display=command, - cwd_display=cwd, - line_count=len(lines), - output=output, - divider=LOG_DIVIDER, - ) - return msg - - def call_subprocess( cmd: Union[List[str], CommandArgs], show_stdout: bool = False, cwd: Optional[str] = None, on_returncode: 'Literal["raise", "warn", "ignore"]' = "raise", extra_ok_returncodes: Optional[Iterable[int]] = None, - command_desc: Optional[str] = None, extra_environ: Optional[Mapping[str, Any]] = None, unset_environ: Optional[Iterable[str]] = None, spinner: Optional[SpinnerInterface] = None, log_failed_cmd: Optional[bool] = True, stdout_only: Optional[bool] = False, + *, + command_desc: str, ) -> str: """ Args: @@ -151,7 +116,7 @@ def call_subprocess( # replaced by INFO. if show_stdout: # Then log the subprocess output at INFO level. - log_subprocess = subprocess_logger.info + log_subprocess: Callable[..., None] = subprocess_logger.info used_level = logging.INFO else: # Then log the subprocess output using VERBOSE. This also ensures @@ -166,9 +131,6 @@ def call_subprocess( # and we have a spinner. use_spinner = not showing_subprocess and spinner is not None - if command_desc is None: - command_desc = format_command_args(cmd) - log_subprocess("Running command %s", command_desc) env = os.environ.copy() if extra_environ: @@ -241,17 +203,25 @@ def call_subprocess( spinner.finish("done") if proc_had_error: if on_returncode == "raise": - if not showing_subprocess and log_failed_cmd: - # Then the subprocess streams haven't been logged to the - # console yet. - msg = make_subprocess_output_error( - cmd_args=cmd, - cwd=cwd, - lines=all_output, - exit_status=proc.returncode, + error = InstallationSubprocessError( + command_description=command_desc, + exit_code=proc.returncode, + output_lines=all_output if not showing_subprocess else None, + ) + if log_failed_cmd: + subprocess_logger.error("[present-rich] %s", error) + subprocess_logger.verbose( + "[bold magenta]full command[/]: [blue]%s[/]", + escape(format_command_args(cmd)), + extra={"markup": True}, + ) + subprocess_logger.verbose( + "[bold magenta]cwd[/]: %s", + escape(cwd or "[inherit]"), + extra={"markup": True}, ) - subprocess_logger.error(msg) - raise InstallationSubprocessError(proc.returncode, command_desc) + + raise error elif on_returncode == "warn": subprocess_logger.warning( 'Command "%s" had error code %s in %s', @@ -269,8 +239,8 @@ def call_subprocess( def runner_with_spinner_message(message: str) -> Callable[..., None]: """Provide a subprocess_runner that shows a spinner message. - Intended for use with for pep517's Pep517HookCaller. Thus, the runner has - an API that matches what's expected by Pep517HookCaller.subprocess_runner. + Intended for use with for BuildBackendHookCaller. Thus, the runner has + an API that matches what's expected by BuildBackendHookCaller.subprocess_runner. """ def runner( @@ -281,6 +251,7 @@ def runner( with open_spinner(message) as spinner: call_subprocess( cmd, + command_desc=message, cwd=cwd, extra_environ=extra_environ, spinner=spinner, diff --git a/src/pip/_internal/utils/temp_dir.py b/src/pip/_internal/utils/temp_dir.py index 442679a758b..8ee8a1cb180 100644 --- a/src/pip/_internal/utils/temp_dir.py +++ b/src/pip/_internal/utils/temp_dir.py @@ -4,7 +4,7 @@ import os.path import tempfile from contextlib import ExitStack, contextmanager -from typing import Any, Dict, Iterator, Optional, TypeVar, Union +from typing import Any, Dict, Generator, Optional, TypeVar, Union from pip._internal.utils.misc import enum, rmtree @@ -26,7 +26,7 @@ @contextmanager -def global_tempdir_manager() -> Iterator[None]: +def global_tempdir_manager() -> Generator[None, None, None]: global _tempdir_manager with ExitStack() as stack: old_tempdir_manager, _tempdir_manager = _tempdir_manager, stack @@ -59,7 +59,7 @@ def get_delete(self, kind: str) -> bool: @contextmanager -def tempdir_registry() -> Iterator[TempDirectoryTypeRegistry]: +def tempdir_registry() -> Generator[TempDirectoryTypeRegistry, None, None]: """Provides a scoped global tempdir registry that can be used to dictate whether directories should be deleted. """ @@ -200,7 +200,7 @@ def __init__(self, original: str, delete: Optional[bool] = None) -> None: super().__init__(delete=delete) @classmethod - def _generate_names(cls, name: str) -> Iterator[str]: + def _generate_names(cls, name: str) -> Generator[str, None, None]: """Generates a series of temporary names. The algorithm replaces the leading characters in the name diff --git a/src/pip/_internal/utils/unpacking.py b/src/pip/_internal/utils/unpacking.py index 5f63f974b95..78b5c13ced3 100644 --- a/src/pip/_internal/utils/unpacking.py +++ b/src/pip/_internal/utils/unpacking.py @@ -188,8 +188,7 @@ def untar_file(filename: str, location: str) -> None: ensure_dir(path) elif member.issym(): try: - # https://github.com/python/typeshed/issues/2673 - tar._extract_member(member, path) # type: ignore + tar._extract_member(member, path) except Exception as exc: # Some corrupt tar files seem to produce this # (specifically bad symlinks) diff --git a/src/pip/_internal/utils/virtualenv.py b/src/pip/_internal/utils/virtualenv.py index c926db4c332..882e36f5c1d 100644 --- a/src/pip/_internal/utils/virtualenv.py +++ b/src/pip/_internal/utils/virtualenv.py @@ -19,7 +19,7 @@ def _running_under_venv() -> bool: return sys.prefix != getattr(sys, "base_prefix", sys.prefix) -def _running_under_regular_virtualenv() -> bool: +def _running_under_legacy_virtualenv() -> bool: """Checks if sys.real_prefix is set. This handles virtual environments created with pypa's virtualenv. @@ -29,8 +29,8 @@ def _running_under_regular_virtualenv() -> bool: def running_under_virtualenv() -> bool: - """Return True if we're running inside a virtualenv, False otherwise.""" - return _running_under_venv() or _running_under_regular_virtualenv() + """True if we're running inside a virtual environment, False otherwise.""" + return _running_under_venv() or _running_under_legacy_virtualenv() def _get_pyvenv_cfg_lines() -> Optional[List[str]]: @@ -77,7 +77,7 @@ def _no_global_under_venv() -> bool: return False -def _no_global_under_regular_virtualenv() -> bool: +def _no_global_under_legacy_virtualenv() -> bool: """Check if "no-global-site-packages.txt" exists beside site.py This mirrors logic in pypa/virtualenv for determining whether system @@ -98,7 +98,7 @@ def virtualenv_no_global() -> bool: if _running_under_venv(): return _no_global_under_venv() - if _running_under_regular_virtualenv(): - return _no_global_under_regular_virtualenv() + if _running_under_legacy_virtualenv(): + return _no_global_under_legacy_virtualenv() return False diff --git a/src/pip/_internal/utils/wheel.py b/src/pip/_internal/utils/wheel.py index 03f00e40939..e5e3f34ed81 100644 --- a/src/pip/_internal/utils/wheel.py +++ b/src/pip/_internal/utils/wheel.py @@ -4,14 +4,12 @@ import logging from email.message import Message from email.parser import Parser -from typing import Dict, Tuple +from typing import Tuple from zipfile import BadZipFile, ZipFile from pip._vendor.packaging.utils import canonicalize_name -from pip._vendor.pkg_resources import DistInfoDistribution, Distribution from pip._internal.exceptions import UnsupportedWheel -from pip._internal.utils.pkg_resources import DictMetadata VERSION_COMPATIBLE = (1, 0) @@ -19,50 +17,6 @@ logger = logging.getLogger(__name__) -class WheelMetadata(DictMetadata): - """Metadata provider that maps metadata decoding exceptions to our - internal exception type. - """ - - def __init__(self, metadata: Dict[str, bytes], wheel_name: str) -> None: - super().__init__(metadata) - self._wheel_name = wheel_name - - def get_metadata(self, name: str) -> str: - try: - return super().get_metadata(name) - except UnicodeDecodeError as e: - # Augment the default error with the origin of the file. - raise UnsupportedWheel( - f"Error decoding metadata for {self._wheel_name}: {e}" - ) - - -def pkg_resources_distribution_for_wheel( - wheel_zip: ZipFile, name: str, location: str -) -> Distribution: - """Get a pkg_resources distribution given a wheel. - - :raises UnsupportedWheel: on any errors - """ - info_dir, _ = parse_wheel(wheel_zip, name) - - metadata_files = [p for p in wheel_zip.namelist() if p.startswith(f"{info_dir}/")] - - metadata_text: Dict[str, bytes] = {} - for path in metadata_files: - _, metadata_name = path.split("/", 1) - - try: - metadata_text[metadata_name] = read_wheel_metadata_file(wheel_zip, path) - except UnsupportedWheel as e: - raise UnsupportedWheel("{} has an invalid wheel, {}".format(name, str(e))) - - metadata = WheelMetadata(metadata_text, location) - - return DistInfoDistribution(location=location, metadata=metadata, project_name=name) - - def parse_wheel(wheel_zip: ZipFile, name: str) -> Tuple[str, Message]: """Extract information from the provided wheel, ensuring it meets basic standards. diff --git a/src/pip/_internal/vcs/bazaar.py b/src/pip/_internal/vcs/bazaar.py index 82e75954a08..20a17ed0927 100644 --- a/src/pip/_internal/vcs/bazaar.py +++ b/src/pip/_internal/vcs/bazaar.py @@ -33,7 +33,9 @@ class Bazaar(VersionControl): def get_base_rev_args(rev: str) -> List[str]: return ["-r", rev] - def fetch_new(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None: + def fetch_new( + self, dest: str, url: HiddenText, rev_options: RevOptions, verbosity: int + ) -> None: rev_display = rev_options.to_display() logger.info( "Checking out %s%s to %s", @@ -41,19 +43,36 @@ def fetch_new(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None rev_display, display_path(dest), ) - cmd_args = make_command("branch", "-q", rev_options.to_args(), url, dest) + if verbosity <= 0: + flag = "--quiet" + elif verbosity == 1: + flag = "" + else: + flag = f"-{'v'*verbosity}" + cmd_args = make_command( + "checkout", "--lightweight", flag, rev_options.to_args(), url, dest + ) self.run_command(cmd_args) def switch(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None: self.run_command(make_command("switch", url), cwd=dest) def update(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None: - cmd_args = make_command("pull", "-q", rev_options.to_args()) + output = self.run_command( + make_command("info"), show_stdout=False, stdout_only=True, cwd=dest + ) + if output.startswith("Standalone "): + # Older versions of pip used to create standalone branches. + # Convert the standalone branch to a checkout by calling "bzr bind". + cmd_args = make_command("bind", "-q", url) + self.run_command(cmd_args, cwd=dest) + + cmd_args = make_command("update", "-q", rev_options.to_args()) self.run_command(cmd_args, cwd=dest) @classmethod def get_url_rev_and_auth(cls, url: str) -> Tuple[str, Optional[str], AuthInfo]: - # hotfix the URL scheme after removing bzr+ from bzr+ssh:// readd it + # hotfix the URL scheme after removing bzr+ from bzr+ssh:// re-add it url, rev, user_pass = super().get_url_rev_and_auth(url) if url.startswith("ssh://"): url = "bzr+" + url diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index 7a78ad12dd5..8d1d4993767 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -91,7 +91,12 @@ def is_immutable_rev_checkout(self, url: str, dest: str) -> bool: return not is_tag_or_branch def get_git_version(self) -> Tuple[int, ...]: - version = self.run_command(["version"], show_stdout=False, stdout_only=True) + version = self.run_command( + ["version"], + command_desc="git version", + show_stdout=False, + stdout_only=True, + ) match = GIT_VERSION_REGEX.match(version) if not match: logger.warning("Can't parse git version: %s", version) @@ -253,9 +258,17 @@ def is_commit_id_equal(cls, dest: str, name: Optional[str]) -> bool: return cls.get_revision(dest) == name - def fetch_new(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None: + def fetch_new( + self, dest: str, url: HiddenText, rev_options: RevOptions, verbosity: int + ) -> None: rev_display = rev_options.to_display() logger.info("Cloning %s%s to %s", url, rev_display, display_path(dest)) + if verbosity <= 0: + flags: Tuple[str, ...] = ("--quiet",) + elif verbosity == 1: + flags = () + else: + flags = ("--verbose", "--progress") if self.get_git_version() >= (2, 17): # Git added support for partial clone in 2.17 # https://git-scm.com/docs/partial-clone @@ -264,13 +277,13 @@ def fetch_new(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None make_command( "clone", "--filter=blob:none", - "-q", + *flags, url, dest, ) ) else: - self.run_command(make_command("clone", "-q", url, dest)) + self.run_command(make_command("clone", *flags, url, dest)) if rev_options.rev: # Then a specific revision was requested. diff --git a/src/pip/_internal/vcs/mercurial.py b/src/pip/_internal/vcs/mercurial.py index 410c79d903a..2a005e0aff2 100644 --- a/src/pip/_internal/vcs/mercurial.py +++ b/src/pip/_internal/vcs/mercurial.py @@ -1,7 +1,7 @@ import configparser import logging import os -from typing import List, Optional +from typing import List, Optional, Tuple from pip._internal.exceptions import BadCommand, InstallationError from pip._internal.utils.misc import HiddenText, display_path @@ -33,7 +33,9 @@ class Mercurial(VersionControl): def get_base_rev_args(rev: str) -> List[str]: return [rev] - def fetch_new(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None: + def fetch_new( + self, dest: str, url: HiddenText, rev_options: RevOptions, verbosity: int + ) -> None: rev_display = rev_options.to_display() logger.info( "Cloning hg %s%s to %s", @@ -41,9 +43,17 @@ def fetch_new(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None rev_display, display_path(dest), ) - self.run_command(make_command("clone", "--noupdate", "-q", url, dest)) + if verbosity <= 0: + flags: Tuple[str, ...] = ("--quiet",) + elif verbosity == 1: + flags = () + elif verbosity == 2: + flags = ("--verbose",) + else: + flags = ("--verbose", "--debug") + self.run_command(make_command("clone", "--noupdate", *flags, url, dest)) self.run_command( - make_command("update", "-q", rev_options.to_args()), + make_command("update", *flags, rev_options.to_args()), cwd=dest, ) diff --git a/src/pip/_internal/vcs/subversion.py b/src/pip/_internal/vcs/subversion.py index b5b6fd5c09f..16d93a67b7b 100644 --- a/src/pip/_internal/vcs/subversion.py +++ b/src/pip/_internal/vcs/subversion.py @@ -87,7 +87,7 @@ def get_netloc_and_auth( @classmethod def get_url_rev_and_auth(cls, url: str) -> Tuple[str, Optional[str], AuthInfo]: - # hotfix the URL scheme after removing svn+ from svn+ssh:// readd it + # hotfix the URL scheme after removing svn+ from svn+ssh:// re-add it url, rev, user_pass = super().get_url_rev_and_auth(url) if url.startswith("ssh://"): url = "svn+" + url @@ -184,7 +184,7 @@ def is_commit_id_equal(cls, dest: str, name: Optional[str]) -> bool: """Always assume the versions don't match""" return False - def __init__(self, use_interactive: bool = None) -> None: + def __init__(self, use_interactive: Optional[bool] = None) -> None: if use_interactive is None: use_interactive = is_console_interactive() self.use_interactive = use_interactive @@ -277,7 +277,9 @@ def get_remote_call_options(self) -> CommandArgs: return [] - def fetch_new(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None: + def fetch_new( + self, dest: str, url: HiddenText, rev_options: RevOptions, verbosity: int + ) -> None: rev_display = rev_options.to_display() logger.info( "Checking out %s%s to %s", @@ -285,9 +287,13 @@ def fetch_new(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None rev_display, display_path(dest), ) + if verbosity <= 0: + flag = "--quiet" + else: + flag = "" cmd_args = make_command( "checkout", - "-q", + flag, self.get_remote_call_options(), rev_options.to_args(), url, diff --git a/src/pip/_internal/vcs/versioncontrol.py b/src/pip/_internal/vcs/versioncontrol.py index 1139051f3e1..02bbf68e7ad 100644 --- a/src/pip/_internal/vcs/versioncontrol.py +++ b/src/pip/_internal/vcs/versioncontrol.py @@ -31,7 +31,12 @@ is_installable_dir, rmtree, ) -from pip._internal.utils.subprocess import CommandArgs, call_subprocess, make_command +from pip._internal.utils.subprocess import ( + CommandArgs, + call_subprocess, + format_command_args, + make_command, +) from pip._internal.utils.urls import get_url_scheme if TYPE_CHECKING: @@ -458,7 +463,9 @@ def compare_urls(cls, url1: str, url2: str) -> bool: """ return cls.normalize_url(url1) == cls.normalize_url(url2) - def fetch_new(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None: + def fetch_new( + self, dest: str, url: HiddenText, rev_options: RevOptions, verbosity: int + ) -> None: """ Fetch a revision from a repository, in the case that this is the first fetch from the repository. @@ -466,6 +473,7 @@ def fetch_new(self, dest: str, url: HiddenText, rev_options: RevOptions) -> None Args: dest: the directory to fetch the repository to. rev_options: a RevOptions object. + verbosity: verbosity level. """ raise NotImplementedError @@ -498,18 +506,19 @@ def is_commit_id_equal(cls, dest: str, name: Optional[str]) -> bool: """ raise NotImplementedError - def obtain(self, dest: str, url: HiddenText) -> None: + def obtain(self, dest: str, url: HiddenText, verbosity: int) -> None: """ Install or update in editable mode the package represented by this VersionControl object. :param dest: the repository directory in which to install or update. :param url: the repository URL starting with a vcs prefix. + :param verbosity: verbosity level. """ url, rev_options = self.get_url_rev_options(url) if not os.path.exists(dest): - self.fetch_new(dest, url, rev_options) + self.fetch_new(dest, url, rev_options, verbosity=verbosity) return rev_display = rev_options.to_display() @@ -565,14 +574,14 @@ def obtain(self, dest: str, url: HiddenText) -> None: if response == "w": logger.warning("Deleting %s", display_path(dest)) rmtree(dest) - self.fetch_new(dest, url, rev_options) + self.fetch_new(dest, url, rev_options, verbosity=verbosity) return if response == "b": dest_dir = backup_dir(dest) logger.warning("Backing up %s to %s", display_path(dest), dest_dir) shutil.move(dest, dest_dir) - self.fetch_new(dest, url, rev_options) + self.fetch_new(dest, url, rev_options, verbosity=verbosity) return # Do nothing if the response is "i". @@ -586,16 +595,17 @@ def obtain(self, dest: str, url: HiddenText) -> None: ) self.switch(dest, url, rev_options) - def unpack(self, location: str, url: HiddenText) -> None: + def unpack(self, location: str, url: HiddenText, verbosity: int) -> None: """ Clean up current location and download the url repository (and vcs infos) into location :param url: the repository URL starting with a vcs prefix. + :param verbosity: verbosity level. """ if os.path.exists(location): rmtree(location) - self.obtain(location, url=url) + self.obtain(location, url=url, verbosity=verbosity) @classmethod def get_remote_url(cls, location: str) -> str: @@ -634,6 +644,8 @@ def run_command( command name, and checks that the VCS is available """ cmd = make_command(cls.name, *cmd) + if command_desc is None: + command_desc = format_command_args(cmd) try: return call_subprocess( cmd, diff --git a/src/pip/_internal/wheel_builder.py b/src/pip/_internal/wheel_builder.py index a9123a0f1f6..612c91ba317 100644 --- a/src/pip/_internal/wheel_builder.py +++ b/src/pip/_internal/wheel_builder.py @@ -5,7 +5,7 @@ import os.path import re import shutil -from typing import Any, Callable, Iterable, List, Optional, Tuple +from typing import Iterable, List, Optional, Tuple from pip._vendor.packaging.utils import canonicalize_name, canonicalize_version from pip._vendor.packaging.version import InvalidVersion, Version @@ -19,6 +19,7 @@ from pip._internal.operations.build.wheel_editable import build_wheel_editable from pip._internal.operations.build.wheel_legacy import build_wheel_legacy from pip._internal.req.req_install import InstallRequirement +from pip._internal.utils.deprecation import LegacyInstallReasonMissingWheelPackage from pip._internal.utils.logging import indent_log from pip._internal.utils.misc import ensure_dir, hash_file, is_wheel_installed from pip._internal.utils.setuptools_build import make_setuptools_clean_args @@ -31,7 +32,6 @@ _egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE) -BinaryAllowedPredicate = Callable[[InstallRequirement], bool] BuildResult = Tuple[List[InstallRequirement], List[InstallRequirement]] @@ -46,7 +46,6 @@ def _contains_egg_info(s: str) -> bool: def _should_build( req: InstallRequirement, need_wheel: bool, - check_binary_allowed: BinaryAllowedPredicate, ) -> bool: """Return whether an InstallRequirement should be built into a wheel.""" if req.constraint: @@ -77,20 +76,9 @@ def _should_build( if req.use_pep517: return True - if not check_binary_allowed(req): - logger.info( - "Skipping wheel build for %s, due to binaries being disabled for it.", - req.name, - ) - return False - if not is_wheel_installed(): # we don't build legacy requirements if wheel is not installed - logger.info( - "Using legacy 'setup.py install' for %s, " - "since package 'wheel' is not installed.", - req.name, - ) + req.legacy_install_reason = LegacyInstallReasonMissingWheelPackage return False return True @@ -99,16 +87,13 @@ def _should_build( def should_build_for_wheel_command( req: InstallRequirement, ) -> bool: - return _should_build(req, need_wheel=True, check_binary_allowed=_always_true) + return _should_build(req, need_wheel=True) def should_build_for_install_command( req: InstallRequirement, - check_binary_allowed: BinaryAllowedPredicate, ) -> bool: - return _should_build( - req, need_wheel=False, check_binary_allowed=check_binary_allowed - ) + return _should_build(req, need_wheel=False) def _should_cache( @@ -159,10 +144,6 @@ def _get_cache_dir( return cache_dir -def _always_true(_: Any) -> bool: - return True - - def _verify_one(req: InstallRequirement, wheel_path: str) -> None: canonical_name = canonicalize_name(req.name or "") w = Wheel(os.path.basename(wheel_path)) @@ -310,7 +291,9 @@ def _clean_one_legacy(req: InstallRequirement, global_options: List[str]) -> boo logger.info("Running setup.py clean for %s", req.name) try: - call_subprocess(clean_args, cwd=req.source_dir) + call_subprocess( + clean_args, command_desc="python setup.py clean", cwd=req.source_dir + ) return True except Exception: logger.error("Failed cleaning build dir for %s", req.name) @@ -352,6 +335,12 @@ def build( req.editable and req.permit_editable_wheels, ) if wheel_file: + # Record the download origin in the cache + if req.download_info is not None: + # download_info is guaranteed to be set because when we build an + # InstallRequirement it has been through the preparer before, but + # let's be cautious. + wheel_cache.record_download_origin(cache_dir, req.download_info) # Update the link for this. req.link = Link(path_to_url(wheel_file)) req.local_file_path = req.link.file_path diff --git a/src/pip/_vendor/README.rst b/src/pip/_vendor/README.rst index 26904ca251a..077f1abf773 100644 --- a/src/pip/_vendor/README.rst +++ b/src/pip/_vendor/README.rst @@ -104,9 +104,6 @@ Modifications rather than ``appdirs``. * ``packaging`` has been modified to import its dependencies from ``pip._vendor``. -* ``html5lib`` has been modified to import six from ``pip._vendor``, to prefer - importing from ``collections.abc`` instead of ``collections`` and does not - import ``xml.etree.cElementTree`` on Python 3. * ``CacheControl`` has been modified to import its dependencies from ``pip._vendor``. * ``requests`` has been modified to import its other dependencies from diff --git a/src/pip/_vendor/__init__.py b/src/pip/_vendor/__init__.py index 3843cb09955..b22f7abb93b 100644 --- a/src/pip/_vendor/__init__.py +++ b/src/pip/_vendor/__init__.py @@ -63,7 +63,6 @@ def vendored(modulename): vendored("colorama") vendored("distlib") vendored("distro") - vendored("html5lib") vendored("six") vendored("six.moves") vendored("six.moves.urllib") @@ -106,6 +105,16 @@ def vendored(modulename): vendored("requests.packages.urllib3.util.timeout") vendored("requests.packages.urllib3.util.url") vendored("resolvelib") + vendored("rich") + vendored("rich.console") + vendored("rich.highlighter") + vendored("rich.logging") + vendored("rich.markup") + vendored("rich.progress") + vendored("rich.segment") + vendored("rich.style") + vendored("rich.text") + vendored("rich.traceback") vendored("tenacity") vendored("tomli") vendored("urllib3") diff --git a/src/pip/_vendor/cachecontrol/LICENSE.txt b/src/pip/_vendor/cachecontrol/LICENSE.txt index 1ed31ac3688..d8b3b56d361 100644 --- a/src/pip/_vendor/cachecontrol/LICENSE.txt +++ b/src/pip/_vendor/cachecontrol/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright 2015 Eric Larson +Copyright 2012-2021 Eric Larson Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -8,8 +8,6 @@ You may obtain a copy of the License at Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or -implied. - +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. diff --git a/src/pip/_vendor/cachecontrol/__init__.py b/src/pip/_vendor/cachecontrol/__init__.py index a1bbbbe3bff..f631ae6df47 100644 --- a/src/pip/_vendor/cachecontrol/__init__.py +++ b/src/pip/_vendor/cachecontrol/__init__.py @@ -1,11 +1,18 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + """CacheControl import Interface. Make it easy to import from cachecontrol without long namespaces. """ __author__ = "Eric Larson" __email__ = "eric@ionrock.org" -__version__ = "0.12.6" +__version__ = "0.12.11" from .wrapper import CacheControl from .adapter import CacheControlAdapter from .controller import CacheController + +import logging +logging.getLogger(__name__).addHandler(logging.NullHandler()) diff --git a/src/pip/_vendor/cachecontrol/_cmd.py b/src/pip/_vendor/cachecontrol/_cmd.py index f1e0ad94a1c..4266b5ee92a 100644 --- a/src/pip/_vendor/cachecontrol/_cmd.py +++ b/src/pip/_vendor/cachecontrol/_cmd.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + import logging from pip._vendor import requests diff --git a/src/pip/_vendor/cachecontrol/adapter.py b/src/pip/_vendor/cachecontrol/adapter.py index 815650e81fe..94c75e1a05b 100644 --- a/src/pip/_vendor/cachecontrol/adapter.py +++ b/src/pip/_vendor/cachecontrol/adapter.py @@ -1,16 +1,20 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + import types import functools import zlib from pip._vendor.requests.adapters import HTTPAdapter -from .controller import CacheController +from .controller import CacheController, PERMANENT_REDIRECT_STATUSES from .cache import DictCache from .filewrapper import CallbackFileWrapper class CacheControlAdapter(HTTPAdapter): - invalidating_methods = {"PUT", "DELETE"} + invalidating_methods = {"PUT", "PATCH", "DELETE"} def __init__( self, @@ -93,7 +97,7 @@ def build_response( response = cached_response # We always cache the 301 responses - elif response.status == 301: + elif int(response.status) in PERMANENT_REDIRECT_STATUSES: self.controller.cache_response(request, response) else: # Wrap the response file with a wrapper that will cache the diff --git a/src/pip/_vendor/cachecontrol/cache.py b/src/pip/_vendor/cachecontrol/cache.py index 94e07732d91..2a965f595ff 100644 --- a/src/pip/_vendor/cachecontrol/cache.py +++ b/src/pip/_vendor/cachecontrol/cache.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + """ The cache object API for implementing caches. The default is a thread safe in-memory dictionary. @@ -10,7 +14,7 @@ class BaseCache(object): def get(self, key): raise NotImplementedError() - def set(self, key, value): + def set(self, key, value, expires=None): raise NotImplementedError() def delete(self, key): @@ -29,7 +33,7 @@ def __init__(self, init_dict=None): def get(self, key): return self.data.get(key, None) - def set(self, key, value): + def set(self, key, value, expires=None): with self.lock: self.data.update({key: value}) @@ -37,3 +41,25 @@ def delete(self, key): with self.lock: if key in self.data: self.data.pop(key) + + +class SeparateBodyBaseCache(BaseCache): + """ + In this variant, the body is not stored mixed in with the metadata, but is + passed in (as a bytes-like object) in a separate call to ``set_body()``. + + That is, the expected interaction pattern is:: + + cache.set(key, serialized_metadata) + cache.set_body(key) + + Similarly, the body should be loaded separately via ``get_body()``. + """ + def set_body(self, key, body): + raise NotImplementedError() + + def get_body(self, key): + """ + Return the body as file-like object. + """ + raise NotImplementedError() diff --git a/src/pip/_vendor/cachecontrol/caches/__init__.py b/src/pip/_vendor/cachecontrol/caches/__init__.py index 0e1658fa5e5..37827291fb5 100644 --- a/src/pip/_vendor/cachecontrol/caches/__init__.py +++ b/src/pip/_vendor/cachecontrol/caches/__init__.py @@ -1,2 +1,9 @@ -from .file_cache import FileCache # noqa -from .redis_cache import RedisCache # noqa +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + +from .file_cache import FileCache, SeparateBodyFileCache +from .redis_cache import RedisCache + + +__all__ = ["FileCache", "SeparateBodyFileCache", "RedisCache"] diff --git a/src/pip/_vendor/cachecontrol/caches/file_cache.py b/src/pip/_vendor/cachecontrol/caches/file_cache.py index 607b9452428..f1ddb2ebdf9 100644 --- a/src/pip/_vendor/cachecontrol/caches/file_cache.py +++ b/src/pip/_vendor/cachecontrol/caches/file_cache.py @@ -1,8 +1,12 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + import hashlib import os from textwrap import dedent -from ..cache import BaseCache +from ..cache import BaseCache, SeparateBodyBaseCache from ..controller import CacheController try: @@ -53,7 +57,8 @@ def _secure_open_write(filename, fmode): raise -class FileCache(BaseCache): +class _FileCacheMixin: + """Shared implementation for both FileCache variants.""" def __init__( self, @@ -114,22 +119,27 @@ def get(self, key): except FileNotFoundError: return None - def set(self, key, value): + def set(self, key, value, expires=None): name = self._fn(key) + self._write(name, value) + def _write(self, path, data: bytes): + """ + Safely write the data to the given path. + """ # Make sure the directory exists try: - os.makedirs(os.path.dirname(name), self.dirmode) + os.makedirs(os.path.dirname(path), self.dirmode) except (IOError, OSError): pass - with self.lock_class(name) as lock: + with self.lock_class(path) as lock: # Write our actual file with _secure_open_write(lock.path, self.filemode) as fh: - fh.write(value) + fh.write(data) - def delete(self, key): - name = self._fn(key) + def _delete(self, key, suffix): + name = self._fn(key) + suffix if not self.forever: try: os.remove(name) @@ -137,6 +147,38 @@ def delete(self, key): pass +class FileCache(_FileCacheMixin, BaseCache): + """ + Traditional FileCache: body is stored in memory, so not suitable for large + downloads. + """ + + def delete(self, key): + self._delete(key, "") + + +class SeparateBodyFileCache(_FileCacheMixin, SeparateBodyBaseCache): + """ + Memory-efficient FileCache: body is stored in a separate file, reducing + peak memory usage. + """ + + def get_body(self, key): + name = self._fn(key) + ".body" + try: + return open(name, "rb") + except FileNotFoundError: + return None + + def set_body(self, key, body): + name = self._fn(key) + ".body" + self._write(name, body) + + def delete(self, key): + self._delete(key, "") + self._delete(key, ".body") + + def url_to_file_path(url, filecache): """Return the file cache path based on the URL. diff --git a/src/pip/_vendor/cachecontrol/caches/redis_cache.py b/src/pip/_vendor/cachecontrol/caches/redis_cache.py index ed705ce7df6..2cba4b07080 100644 --- a/src/pip/_vendor/cachecontrol/caches/redis_cache.py +++ b/src/pip/_vendor/cachecontrol/caches/redis_cache.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + from __future__ import division from datetime import datetime @@ -15,9 +19,11 @@ def get(self, key): def set(self, key, value, expires=None): if not expires: self.conn.set(key, value) - else: + elif isinstance(expires, datetime): expires = expires - datetime.utcnow() self.conn.setex(key, int(expires.total_seconds()), value) + else: + self.conn.setex(key, expires, value) def delete(self, key): self.conn.delete(key) diff --git a/src/pip/_vendor/cachecontrol/compat.py b/src/pip/_vendor/cachecontrol/compat.py index 33b5aed0a32..ccec9379dba 100644 --- a/src/pip/_vendor/cachecontrol/compat.py +++ b/src/pip/_vendor/cachecontrol/compat.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + try: from urllib.parse import urljoin except ImportError: @@ -9,7 +13,6 @@ except ImportError: import pickle - # Handle the case where the requests module has been patched to not have # urllib3 bundled as part of its source. try: diff --git a/src/pip/_vendor/cachecontrol/controller.py b/src/pip/_vendor/cachecontrol/controller.py index dafe55ca70c..7f23529f115 100644 --- a/src/pip/_vendor/cachecontrol/controller.py +++ b/src/pip/_vendor/cachecontrol/controller.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + """ The httplib2 algorithms ported for use with requests. """ @@ -9,7 +13,7 @@ from pip._vendor.requests.structures import CaseInsensitiveDict -from .cache import DictCache +from .cache import DictCache, SeparateBodyBaseCache from .serialize import Serializer @@ -17,19 +21,20 @@ URI = re.compile(r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?") +PERMANENT_REDIRECT_STATUSES = (301, 308) + def parse_uri(uri): """Parses a URI using the regex given in Appendix B of RFC 3986. - (scheme, authority, path, query, fragment) = parse_uri(uri) + (scheme, authority, path, query, fragment) = parse_uri(uri) """ groups = URI.match(uri).groups() return (groups[1], groups[3], groups[4], groups[6], groups[8]) class CacheController(object): - """An interface to see if request should cached or not. - """ + """An interface to see if request should cached or not.""" def __init__( self, cache=None, cache_etags=True, serializer=None, status_codes=None @@ -37,7 +42,7 @@ def __init__( self.cache = DictCache() if cache is None else cache self.cache_etags = cache_etags self.serializer = serializer or Serializer() - self.cacheable_status_codes = status_codes or (200, 203, 300, 301) + self.cacheable_status_codes = status_codes or (200, 203, 300, 301, 308) @classmethod def _urlnorm(cls, uri): @@ -141,23 +146,29 @@ def cached_request(self, request): logger.debug("No cache entry available") return False + if isinstance(self.cache, SeparateBodyBaseCache): + body_file = self.cache.get_body(cache_url) + else: + body_file = None + # Check whether it can be deserialized - resp = self.serializer.loads(request, cache_data) + resp = self.serializer.loads(request, cache_data, body_file) if not resp: logger.warning("Cache entry deserialization failed, entry ignored") return False - # If we have a cached 301, return it immediately. We don't - # need to test our response for other headers b/c it is + # If we have a cached permanent redirect, return it immediately. We + # don't need to test our response for other headers b/c it is # intrinsically "cacheable" as it is Permanent. + # # See: # https://tools.ietf.org/html/rfc7231#section-6.4.2 # # Client can try to refresh the value by repeating the request # with cache busting headers as usual (ie no-cache). - if resp.status == 301: + if int(resp.status) in PERMANENT_REDIRECT_STATUSES: msg = ( - 'Returning cached "301 Moved Permanently" response ' + "Returning cached permanent redirect response " "(ignoring date and etag information)" ) logger.debug(msg) @@ -244,6 +255,26 @@ def conditional_headers(self, request): return new_headers + def _cache_set(self, cache_url, request, response, body=None, expires_time=None): + """ + Store the data in the cache. + """ + if isinstance(self.cache, SeparateBodyBaseCache): + # We pass in the body separately; just put a placeholder empty + # string in the metadata. + self.cache.set( + cache_url, + self.serializer.dumps(request, response, b""), + expires=expires_time, + ) + self.cache.set_body(cache_url, body) + else: + self.cache.set( + cache_url, + self.serializer.dumps(request, response, body), + expires=expires_time, + ) + def cache_response(self, request, response, body=None, status_codes=None): """ Algorithm for caching requests. @@ -261,6 +292,11 @@ def cache_response(self, request, response, body=None, status_codes=None): response_headers = CaseInsensitiveDict(response.headers) + if "date" in response_headers: + date = calendar.timegm(parsedate_tz(response_headers["date"])) + else: + date = 0 + # If we've been given a body, our response has a Content-Length, that # Content-Length is valid then we can check to see if the body we've # been given matches the expected size, and if it doesn't we'll just @@ -304,35 +340,62 @@ def cache_response(self, request, response, body=None, status_codes=None): # If we've been given an etag, then keep the response if self.cache_etags and "etag" in response_headers: + expires_time = 0 + if response_headers.get("expires"): + expires = parsedate_tz(response_headers["expires"]) + if expires is not None: + expires_time = calendar.timegm(expires) - date + + expires_time = max(expires_time, 14 * 86400) + + logger.debug("etag object cached for {0} seconds".format(expires_time)) logger.debug("Caching due to etag") - self.cache.set( - cache_url, self.serializer.dumps(request, response, body=body) - ) + self._cache_set(cache_url, request, response, body, expires_time) - # Add to the cache any 301s. We do this before looking that - # the Date headers. - elif response.status == 301: - logger.debug("Caching permanant redirect") - self.cache.set(cache_url, self.serializer.dumps(request, response)) + # Add to the cache any permanent redirects. We do this before looking + # that the Date headers. + elif int(response.status) in PERMANENT_REDIRECT_STATUSES: + logger.debug("Caching permanent redirect") + self._cache_set(cache_url, request, response, b"") # Add to the cache if the response headers demand it. If there # is no date header then we can't do anything about expiring # the cache. elif "date" in response_headers: + date = calendar.timegm(parsedate_tz(response_headers["date"])) # cache when there is a max-age > 0 if "max-age" in cc and cc["max-age"] > 0: logger.debug("Caching b/c date exists and max-age > 0") - self.cache.set( - cache_url, self.serializer.dumps(request, response, body=body) + expires_time = cc["max-age"] + self._cache_set( + cache_url, + request, + response, + body, + expires_time, ) # If the request can expire, it means we should cache it # in the meantime. elif "expires" in response_headers: if response_headers["expires"]: - logger.debug("Caching b/c of expires header") - self.cache.set( - cache_url, self.serializer.dumps(request, response, body=body) + expires = parsedate_tz(response_headers["expires"]) + if expires is not None: + expires_time = calendar.timegm(expires) - date + else: + expires_time = None + + logger.debug( + "Caching b/c of expires header. expires in {0} seconds".format( + expires_time + ) + ) + self._cache_set( + cache_url, + request, + response, + body, + expires_time, ) def update_cached_response(self, request, response): @@ -371,6 +434,6 @@ def update_cached_response(self, request, response): cached_response.status = 200 # update our cache - self.cache.set(cache_url, self.serializer.dumps(request, cached_response)) + self._cache_set(cache_url, request, cached_response) return cached_response diff --git a/src/pip/_vendor/cachecontrol/filewrapper.py b/src/pip/_vendor/cachecontrol/filewrapper.py index 30ed4c5a62a..f5ed5f6f6ec 100644 --- a/src/pip/_vendor/cachecontrol/filewrapper.py +++ b/src/pip/_vendor/cachecontrol/filewrapper.py @@ -1,4 +1,9 @@ -from io import BytesIO +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + +from tempfile import NamedTemporaryFile +import mmap class CallbackFileWrapper(object): @@ -11,10 +16,17 @@ class CallbackFileWrapper(object): This class uses members with a double underscore (__) leading prefix so as not to accidentally shadow an attribute. + + The data is stored in a temporary file until it is all available. As long + as the temporary files directory is disk-based (sometimes it's a + memory-backed-``tmpfs`` on Linux), data will be unloaded to disk if memory + pressure is high. For small files the disk usually won't be used at all, + it'll all be in the filesystem memory cache, so there should be no + performance impact. """ def __init__(self, fp, callback): - self.__buf = BytesIO() + self.__buf = NamedTemporaryFile("rb+", delete=True) self.__fp = fp self.__callback = callback @@ -49,7 +61,19 @@ def __is_fp_closed(self): def _close(self): if self.__callback: - self.__callback(self.__buf.getvalue()) + if self.__buf.tell() == 0: + # Empty file: + result = b"" + else: + # Return the data without actually loading it into memory, + # relying on Python's buffer API and mmap(). mmap() just gives + # a view directly into the filesystem's memory cache, so it + # doesn't result in duplicate memory use. + self.__buf.seek(0, 0) + result = memoryview( + mmap.mmap(self.__buf.fileno(), 0, access=mmap.ACCESS_READ) + ) + self.__callback(result) # We assign this to None here, because otherwise we can get into # really tricky problems where the CPython interpreter dead locks @@ -58,9 +82,16 @@ def _close(self): # and allows the garbage collector to do it's thing normally. self.__callback = None + # Closing the temporary file releases memory and frees disk space. + # Important when caching big files. + self.__buf.close() + def read(self, amt=None): data = self.__fp.read(amt) - self.__buf.write(data) + if data: + # We may be dealing with b'', a sign that things are over: + # it's passed e.g. after we've already closed self.__buf. + self.__buf.write(data) if self.__is_fp_closed(): self._close() diff --git a/src/pip/_vendor/cachecontrol/heuristics.py b/src/pip/_vendor/cachecontrol/heuristics.py index 6c0e9790d5d..ebe4a96f589 100644 --- a/src/pip/_vendor/cachecontrol/heuristics.py +++ b/src/pip/_vendor/cachecontrol/heuristics.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + import calendar import time diff --git a/src/pip/_vendor/cachecontrol/serialize.py b/src/pip/_vendor/cachecontrol/serialize.py index 3b6ec2de1c1..7fe1a3e33a3 100644 --- a/src/pip/_vendor/cachecontrol/serialize.py +++ b/src/pip/_vendor/cachecontrol/serialize.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + import base64 import io import json @@ -17,24 +21,18 @@ def _b64_decode_str(s): return _b64_decode_bytes(s).decode("utf8") -class Serializer(object): +_default_body_read = object() + +class Serializer(object): def dumps(self, request, response, body=None): response_headers = CaseInsensitiveDict(response.headers) if body is None: + # When a body isn't passed in, we'll read the response. We + # also update the response with a new file handler to be + # sure it acts as though it was never read. body = response.read(decode_content=False) - - # NOTE: 99% sure this is dead code. I'm only leaving it - # here b/c I don't have a test yet to prove - # it. Basically, before using - # `cachecontrol.filewrapper.CallbackFileWrapper`, - # this made an effort to reset the file handle. The - # `CallbackFileWrapper` short circuits this code by - # setting the body as the content is consumed, the - # result being a `body` argument is *always* passed - # into cache_response, and in turn, - # `Serializer.dump`. response._fp = io.BytesIO(body) # NOTE: This is all a bit weird, but it's really important that on @@ -46,7 +44,7 @@ def dumps(self, request, response, body=None): # enough to have msgpack know the difference. data = { u"response": { - u"body": body, + u"body": body, # Empty bytestring if body is stored separately u"headers": dict( (text_type(k), text_type(v)) for k, v in response.headers.items() ), @@ -71,7 +69,7 @@ def dumps(self, request, response, body=None): return b",".join([b"cc=4", msgpack.dumps(data, use_bin_type=True)]) - def loads(self, request, data): + def loads(self, request, data, body_file=None): # Short circuit if we've been given an empty set of data if not data: return @@ -94,14 +92,14 @@ def loads(self, request, data): # Dispatch to the actual load method for the given version try: - return getattr(self, "_loads_v{}".format(ver))(request, data) + return getattr(self, "_loads_v{}".format(ver))(request, data, body_file) except AttributeError: # This is a version we don't have a loads function for, so we'll # just treat it as a miss and return None return - def prepare_response(self, request, cached): + def prepare_response(self, request, cached, body_file=None): """Verify our vary headers match and construct a real urllib3 HTTPResponse object. """ @@ -127,7 +125,10 @@ def prepare_response(self, request, cached): cached["response"]["headers"] = headers try: - body = io.BytesIO(body_raw) + if body_file is None: + body = io.BytesIO(body_raw) + else: + body = body_file except TypeError: # This can happen if cachecontrol serialized to v1 format (pickle) # using Python 2. A Python 2 str(byte string) will be unpickled as @@ -139,21 +140,22 @@ def prepare_response(self, request, cached): return HTTPResponse(body=body, preload_content=False, **cached["response"]) - def _loads_v0(self, request, data): + def _loads_v0(self, request, data, body_file=None): # The original legacy cache data. This doesn't contain enough # information to construct everything we need, so we'll treat this as # a miss. return - def _loads_v1(self, request, data): + def _loads_v1(self, request, data, body_file=None): try: cached = pickle.loads(data) except ValueError: return - return self.prepare_response(request, cached) + return self.prepare_response(request, cached, body_file) - def _loads_v2(self, request, data): + def _loads_v2(self, request, data, body_file=None): + assert body_file is None try: cached = json.loads(zlib.decompress(data).decode("utf8")) except (ValueError, zlib.error): @@ -171,18 +173,18 @@ def _loads_v2(self, request, data): for k, v in cached["vary"].items() ) - return self.prepare_response(request, cached) + return self.prepare_response(request, cached, body_file) - def _loads_v3(self, request, data): + def _loads_v3(self, request, data, body_file): # Due to Python 2 encoding issues, it's impossible to know for sure # exactly how to load v3 entries, thus we'll treat these as a miss so # that they get rewritten out as v4 entries. return - def _loads_v4(self, request, data): + def _loads_v4(self, request, data, body_file=None): try: cached = msgpack.loads(data, raw=False) except ValueError: return - return self.prepare_response(request, cached) + return self.prepare_response(request, cached, body_file) diff --git a/src/pip/_vendor/cachecontrol/wrapper.py b/src/pip/_vendor/cachecontrol/wrapper.py index d8e6fc6a9e3..b6ee7f20398 100644 --- a/src/pip/_vendor/cachecontrol/wrapper.py +++ b/src/pip/_vendor/cachecontrol/wrapper.py @@ -1,3 +1,7 @@ +# SPDX-FileCopyrightText: 2015 Eric Larson +# +# SPDX-License-Identifier: Apache-2.0 + from .adapter import CacheControlAdapter from .cache import DictCache diff --git a/src/pip/_vendor/certifi.pyi b/src/pip/_vendor/certifi.pyi deleted file mode 100644 index e5c4d3d2af3..00000000000 --- a/src/pip/_vendor/certifi.pyi +++ /dev/null @@ -1 +0,0 @@ -from certifi import * \ No newline at end of file diff --git a/src/pip/_vendor/certifi/LICENSE b/src/pip/_vendor/certifi/LICENSE index c2fda9a2642..0a64774eabe 100644 --- a/src/pip/_vendor/certifi/LICENSE +++ b/src/pip/_vendor/certifi/LICENSE @@ -6,7 +6,7 @@ Certificate data from Mozilla as of: Thu Nov 3 19:04:19 2011# This is a bundle of X.509 certificates of public Certificate Authorities (CA). These were automatically extracted from Mozilla's root certificates file (certdata.txt). This file can be found in the mozilla source tree: -http://mxr.mozilla.org/mozilla/source/security/nss/lib/ckfw/builtins/certdata.txt?raw=1# +https://hg.mozilla.org/mozilla-central/file/tip/security/nss/lib/ckfw/builtins/certdata.txt It contains the certificates in PEM format and therefore can be directly used with curl / libcurl / php_curl, or with an Apache+mod_ssl webserver for SSL client authentication. diff --git a/src/pip/_vendor/certifi/__init__.py b/src/pip/_vendor/certifi/__init__.py index eebdf88867d..a3546f12555 100644 --- a/src/pip/_vendor/certifi/__init__.py +++ b/src/pip/_vendor/certifi/__init__.py @@ -1,3 +1,4 @@ from .core import contents, where -__version__ = "2021.05.30" +__all__ = ["contents", "where"] +__version__ = "2022.12.07" diff --git a/src/pip/_vendor/certifi/cacert.pem b/src/pip/_vendor/certifi/cacert.pem index 96e2fc65a81..df9e4e3c755 100644 --- a/src/pip/_vendor/certifi/cacert.pem +++ b/src/pip/_vendor/certifi/cacert.pem @@ -28,36 +28,6 @@ DKqC5JlR3XC321Y9YeRq4VzW9v493kHMB65jUr9TU/Qr6cf9tveCX4XSQRjbgbME HMUfpIBvFSDJ3gyICh3WZlXi/EjJKSZp4A== -----END CERTIFICATE----- -# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R2 -# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign Root CA - R2 -# Label: "GlobalSign Root CA - R2" -# Serial: 4835703278459682885658125 -# MD5 Fingerprint: 94:14:77:7e:3e:5e:fd:8f:30:bd:41:b0:cf:e7:d0:30 -# SHA1 Fingerprint: 75:e0:ab:b6:13:85:12:27:1c:04:f8:5f:dd:de:38:e4:b7:24:2e:fe -# SHA256 Fingerprint: ca:42:dd:41:74:5f:d0:b8:1e:b9:02:36:2c:f9:d8:bf:71:9d:a1:bd:1b:1e:fc:94:6f:5b:4c:99:f4:2c:1b:9e ------BEGIN CERTIFICATE----- -MIIDujCCAqKgAwIBAgILBAAAAAABD4Ym5g0wDQYJKoZIhvcNAQEFBQAwTDEgMB4G -A1UECxMXR2xvYmFsU2lnbiBSb290IENBIC0gUjIxEzARBgNVBAoTCkdsb2JhbFNp -Z24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMDYxMjE1MDgwMDAwWhcNMjExMjE1 -MDgwMDAwWjBMMSAwHgYDVQQLExdHbG9iYWxTaWduIFJvb3QgQ0EgLSBSMjETMBEG -A1UEChMKR2xvYmFsU2lnbjETMBEGA1UEAxMKR2xvYmFsU2lnbjCCASIwDQYJKoZI -hvcNAQEBBQADggEPADCCAQoCggEBAKbPJA6+Lm8omUVCxKs+IVSbC9N/hHD6ErPL -v4dfxn+G07IwXNb9rfF73OX4YJYJkhD10FPe+3t+c4isUoh7SqbKSaZeqKeMWhG8 -eoLrvozps6yWJQeXSpkqBy+0Hne/ig+1AnwblrjFuTosvNYSuetZfeLQBoZfXklq -tTleiDTsvHgMCJiEbKjNS7SgfQx5TfC4LcshytVsW33hoCmEofnTlEnLJGKRILzd -C9XZzPnqJworc5HGnRusyMvo4KD0L5CLTfuwNhv2GXqF4G3yYROIXJ/gkwpRl4pa -zq+r1feqCapgvdzZX99yqWATXgAByUr6P6TqBwMhAo6CygPCm48CAwEAAaOBnDCB -mTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUm+IH -V2ccHsBqBt5ZtJot39wZhi4wNgYDVR0fBC8wLTAroCmgJ4YlaHR0cDovL2NybC5n -bG9iYWxzaWduLm5ldC9yb290LXIyLmNybDAfBgNVHSMEGDAWgBSb4gdXZxwewGoG -3lm0mi3f3BmGLjANBgkqhkiG9w0BAQUFAAOCAQEAmYFThxxol4aR7OBKuEQLq4Gs -J0/WwbgcQ3izDJr86iw8bmEbTUsp9Z8FHSbBuOmDAGJFtqkIk7mpM0sYmsL4h4hO -291xNBrBVNpGP+DTKqttVCL1OmLNIG+6KYnX3ZHu01yiPqFbQfXf5WRDLenVOavS -ot+3i9DAgBkcRcAtjOj4LaR0VknFBbVPFd5uRHg5h6h+u/N5GJG79G+dwfCMNYxd -AfvDbbnvRG15RjF+Cv6pgsH/76tuIMRQyV+dTZsXjAzlAcmgQWpzU/qlULRuJQ/7 -TBj0/VLZjmmx6BEP3ojY+x1J96relc8geMJgEtslQIxq/H5COEBkEveegeGTLg== ------END CERTIFICATE----- - # Issuer: CN=Entrust.net Certification Authority (2048) O=Entrust.net OU=www.entrust.net/CPS_2048 incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited # Subject: CN=Entrust.net Certification Authority (2048) O=Entrust.net OU=www.entrust.net/CPS_2048 incorp. by ref. (limits liab.)/(c) 1999 Entrust.net Limited # Label: "Entrust.net Premium 2048 Secure Server CA" @@ -491,34 +461,6 @@ vEsXCS+0yx5DaMkHJ8HSXPfqIbloEpw8nL+e/IBcm2PN7EeqJSdnoDfzAIJ9VNep +OkuE6N36B9K -----END CERTIFICATE----- -# Issuer: CN=DST Root CA X3 O=Digital Signature Trust Co. -# Subject: CN=DST Root CA X3 O=Digital Signature Trust Co. -# Label: "DST Root CA X3" -# Serial: 91299735575339953335919266965803778155 -# MD5 Fingerprint: 41:03:52:dc:0f:f7:50:1b:16:f0:02:8e:ba:6f:45:c5 -# SHA1 Fingerprint: da:c9:02:4f:54:d8:f6:df:94:93:5f:b1:73:26:38:ca:6a:d7:7c:13 -# SHA256 Fingerprint: 06:87:26:03:31:a7:24:03:d9:09:f1:05:e6:9b:cf:0d:32:e1:bd:24:93:ff:c6:d9:20:6d:11:bc:d6:77:07:39 ------BEGIN CERTIFICATE----- -MIIDSjCCAjKgAwIBAgIQRK+wgNajJ7qJMDmGLvhAazANBgkqhkiG9w0BAQUFADA/ -MSQwIgYDVQQKExtEaWdpdGFsIFNpZ25hdHVyZSBUcnVzdCBDby4xFzAVBgNVBAMT -DkRTVCBSb290IENBIFgzMB4XDTAwMDkzMDIxMTIxOVoXDTIxMDkzMDE0MDExNVow -PzEkMCIGA1UEChMbRGlnaXRhbCBTaWduYXR1cmUgVHJ1c3QgQ28uMRcwFQYDVQQD -Ew5EU1QgUm9vdCBDQSBYMzCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEB -AN+v6ZdQCINXtMxiZfaQguzH0yxrMMpb7NnDfcdAwRgUi+DoM3ZJKuM/IUmTrE4O -rz5Iy2Xu/NMhD2XSKtkyj4zl93ewEnu1lcCJo6m67XMuegwGMoOifooUMM0RoOEq -OLl5CjH9UL2AZd+3UWODyOKIYepLYYHsUmu5ouJLGiifSKOeDNoJjj4XLh7dIN9b -xiqKqy69cK3FCxolkHRyxXtqqzTWMIn/5WgTe1QLyNau7Fqckh49ZLOMxt+/yUFw -7BZy1SbsOFU5Q9D8/RhcQPGX69Wam40dutolucbY38EVAjqr2m7xPi71XAicPNaD -aeQQmxkqtilX4+U9m5/wAl0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNV -HQ8BAf8EBAMCAQYwHQYDVR0OBBYEFMSnsaR7LHH62+FLkHX/xBVghYkQMA0GCSqG -SIb3DQEBBQUAA4IBAQCjGiybFwBcqR7uKGY3Or+Dxz9LwwmglSBd49lZRNI+DT69 -ikugdB/OEIKcdBodfpga3csTS7MgROSR6cz8faXbauX+5v3gTt23ADq1cEmv8uXr -AvHRAosZy5Q6XkjEGB5YGV8eAlrwDPGxrancWYaLbumR9YbK+rlmM6pZW87ipxZz -R8srzJmwN0jP41ZL9c8PDHIyh8bwRLtTcm1D9SZImlJnt1ir/md2cXjbDaJWFBM5 -JDGFoqgCWjBH4d1QB7wCCZAA62RjYJsWvIjJEubSfZGL+T0yjWW06XyxV3bqxbYo -Ob8VZRzI9neWagqNdwvYkQsEjgfbKbYK7p2CNTUQ ------END CERTIFICATE----- - # Issuer: CN=SwissSign Gold CA - G2 O=SwissSign AG # Subject: CN=SwissSign Gold CA - G2 O=SwissSign AG # Label: "SwissSign Gold CA - G2" @@ -694,37 +636,6 @@ BA6+C4OmF4O5MBKgxTMVBbkN+8cFduPYSo38NBejxiEovjBFMR7HeL5YYTisO+IB ZQ== -----END CERTIFICATE----- -# Issuer: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C. -# Subject: CN=Network Solutions Certificate Authority O=Network Solutions L.L.C. -# Label: "Network Solutions Certificate Authority" -# Serial: 116697915152937497490437556386812487904 -# MD5 Fingerprint: d3:f3:a6:16:c0:fa:6b:1d:59:b1:2d:96:4d:0e:11:2e -# SHA1 Fingerprint: 74:f8:a3:c3:ef:e7:b3:90:06:4b:83:90:3c:21:64:60:20:e5:df:ce -# SHA256 Fingerprint: 15:f0:ba:00:a3:ac:7a:f3:ac:88:4c:07:2b:10:11:a0:77:bd:77:c0:97:f4:01:64:b2:f8:59:8a:bd:83:86:0c ------BEGIN CERTIFICATE----- -MIID5jCCAs6gAwIBAgIQV8szb8JcFuZHFhfjkDFo4DANBgkqhkiG9w0BAQUFADBi -MQswCQYDVQQGEwJVUzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMu -MTAwLgYDVQQDEydOZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3Jp -dHkwHhcNMDYxMjAxMDAwMDAwWhcNMjkxMjMxMjM1OTU5WjBiMQswCQYDVQQGEwJV -UzEhMB8GA1UEChMYTmV0d29yayBTb2x1dGlvbnMgTC5MLkMuMTAwLgYDVQQDEydO -ZXR3b3JrIFNvbHV0aW9ucyBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkwggEiMA0GCSqG -SIb3DQEBAQUAA4IBDwAwggEKAoIBAQDkvH6SMG3G2I4rC7xGzuAnlt7e+foS0zwz -c7MEL7xxjOWftiJgPl9dzgn/ggwbmlFQGiaJ3dVhXRncEg8tCqJDXRfQNJIg6nPP -OCwGJgl6cvf6UDL4wpPTaaIjzkGxzOTVHzbRijr4jGPiFFlp7Q3Tf2vouAPlT2rl -mGNpSAW+Lv8ztumXWWn4Zxmuk2GWRBXTcrA/vGp97Eh/jcOrqnErU2lBUzS1sLnF -BgrEsEX1QV1uiUV7PTsmjHTC5dLRfbIR1PtYMiKagMnc/Qzpf14Dl847ABSHJ3A4 -qY5usyd2mFHgBeMhqxrVhSI8KbWaFsWAqPS7azCPL0YCorEMIuDTAgMBAAGjgZcw -gZQwHQYDVR0OBBYEFCEwyfsA106Y2oeqKtCnLrFAMadMMA4GA1UdDwEB/wQEAwIB -BjAPBgNVHRMBAf8EBTADAQH/MFIGA1UdHwRLMEkwR6BFoEOGQWh0dHA6Ly9jcmwu -bmV0c29sc3NsLmNvbS9OZXR3b3JrU29sdXRpb25zQ2VydGlmaWNhdGVBdXRob3Jp -dHkuY3JsMA0GCSqGSIb3DQEBBQUAA4IBAQC7rkvnt1frf6ott3NHhWrB5KUd5Oc8 -6fRZZXe1eltajSU24HqXLjjAV2CDmAaDn7l2em5Q4LqILPxFzBiwmZVRDuwduIj/ -h1AcgsLj4DKAv6ALR8jDMe+ZZzKATxcheQxpXN5eNK4CtSbqUN9/GGUsyfJj4akH -/nxxH2szJGoeBfcFaMBqEssuXmHLrijTfsK0ZpEmXzwuJF/LWA/rKOyvEZbz3Htv -wKeI8lN3s2Berq4o2jUsbzRF0ybh3uxbTydrFny9RAQYgrOJeRcQcT16ohZO9QHN -pGxlaKFJdlxDydi8NmdspZS11My5vWo1ViHe2MPr+8ukYEywVaCge1ey ------END CERTIFICATE----- - # Issuer: CN=COMODO ECC Certification Authority O=COMODO CA Limited # Subject: CN=COMODO ECC Certification Authority O=COMODO CA Limited # Label: "COMODO ECC Certification Authority" @@ -779,36 +690,6 @@ t0QmwCbAr1UwnjvVNioZBPRcHv/PLLf/0P2HQBHVESO7SMAhqaQoLf0V+LBOK/Qw WyH8EZE0vkHve52Xdf+XlcCWWC/qu0bXu+TZLg== -----END CERTIFICATE----- -# Issuer: CN=Cybertrust Global Root O=Cybertrust, Inc -# Subject: CN=Cybertrust Global Root O=Cybertrust, Inc -# Label: "Cybertrust Global Root" -# Serial: 4835703278459682877484360 -# MD5 Fingerprint: 72:e4:4a:87:e3:69:40:80:77:ea:bc:e3:f4:ff:f0:e1 -# SHA1 Fingerprint: 5f:43:e5:b1:bf:f8:78:8c:ac:1c:c7:ca:4a:9a:c6:22:2b:cc:34:c6 -# SHA256 Fingerprint: 96:0a:df:00:63:e9:63:56:75:0c:29:65:dd:0a:08:67:da:0b:9c:bd:6e:77:71:4a:ea:fb:23:49:ab:39:3d:a3 ------BEGIN CERTIFICATE----- -MIIDoTCCAomgAwIBAgILBAAAAAABD4WqLUgwDQYJKoZIhvcNAQEFBQAwOzEYMBYG -A1UEChMPQ3liZXJ0cnVzdCwgSW5jMR8wHQYDVQQDExZDeWJlcnRydXN0IEdsb2Jh -bCBSb290MB4XDTA2MTIxNTA4MDAwMFoXDTIxMTIxNTA4MDAwMFowOzEYMBYGA1UE -ChMPQ3liZXJ0cnVzdCwgSW5jMR8wHQYDVQQDExZDeWJlcnRydXN0IEdsb2JhbCBS -b290MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEA+Mi8vRRQZhP/8NN5 -7CPytxrHjoXxEnOmGaoQ25yiZXRadz5RfVb23CO21O1fWLE3TdVJDm71aofW0ozS -J8bi/zafmGWgE07GKmSb1ZASzxQG9Dvj1Ci+6A74q05IlG2OlTEQXO2iLb3VOm2y -HLtgwEZLAfVJrn5GitB0jaEMAs7u/OePuGtm839EAL9mJRQr3RAwHQeWP032a7iP -t3sMpTjr3kfb1V05/Iin89cqdPHoWqI7n1C6poxFNcJQZZXcY4Lv3b93TZxiyWNz -FtApD0mpSPCzqrdsxacwOUBdrsTiXSZT8M4cIwhhqJQZugRiQOwfOHB3EgZxpzAY -XSUnpQIDAQABo4GlMIGiMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/ -MB0GA1UdDgQWBBS2CHsNesysIEyGVjJez6tuhS1wVzA/BgNVHR8EODA2MDSgMqAw -hi5odHRwOi8vd3d3Mi5wdWJsaWMtdHJ1c3QuY29tL2NybC9jdC9jdHJvb3QuY3Js -MB8GA1UdIwQYMBaAFLYIew16zKwgTIZWMl7Pq26FLXBXMA0GCSqGSIb3DQEBBQUA -A4IBAQBW7wojoFROlZfJ+InaRcHUowAl9B8Tq7ejhVhpwjCt2BWKLePJzYFa+HMj -Wqd8BfP9IjsO0QbE2zZMcwSO5bAi5MXzLqXZI+O4Tkogp24CJJ8iYGd7ix1yCcUx -XOl5n4BHPa2hCwcUPUf/A2kaDAtE52Mlp3+yybh2hO0j9n0Hq0V+09+zv+mKts2o -omcrUtW3ZfA5TGOgkXmTUg9U3YO7n9GPp1Nzw8v/MOx8BLjYRB+TX3EJIrduPuoc -A06dGiBh+4E37F78CkWr1+cXVdCg6mCbpvbjjFspwgZgFJ0tl0ypkxWdYcQBX0jW -WL1WMRJOEcgh4LMRkWXbtKaIOM5V ------END CERTIFICATE----- - # Issuer: O=Chunghwa Telecom Co., Ltd. OU=ePKI Root Certification Authority # Subject: O=Chunghwa Telecom Co., Ltd. OU=ePKI Root Certification Authority # Label: "ePKI Root Certification Authority" @@ -1411,78 +1292,6 @@ t/2jioSgrGK+KwmHNPBqAbubKVY8/gA3zyNs8U6qtnRGEmyR7jTV7JqR50S+kDFy SjnRBUkLp7Y3gaVdjKozXoEofKd9J+sAro03 -----END CERTIFICATE----- -# Issuer: CN=EC-ACC O=Agencia Catalana de Certificacio (NIF Q-0801176-I) OU=Serveis Publics de Certificacio/Vegeu https://www.catcert.net/verarrel (c)03/Jerarquia Entitats de Certificacio Catalanes -# Subject: CN=EC-ACC O=Agencia Catalana de Certificacio (NIF Q-0801176-I) OU=Serveis Publics de Certificacio/Vegeu https://www.catcert.net/verarrel (c)03/Jerarquia Entitats de Certificacio Catalanes -# Label: "EC-ACC" -# Serial: -23701579247955709139626555126524820479 -# MD5 Fingerprint: eb:f5:9d:29:0d:61:f9:42:1f:7c:c2:ba:6d:e3:15:09 -# SHA1 Fingerprint: 28:90:3a:63:5b:52:80:fa:e6:77:4c:0b:6d:a7:d6:ba:a6:4a:f2:e8 -# SHA256 Fingerprint: 88:49:7f:01:60:2f:31:54:24:6a:e2:8c:4d:5a:ef:10:f1:d8:7e:bb:76:62:6f:4a:e0:b7:f9:5b:a7:96:87:99 ------BEGIN CERTIFICATE----- -MIIFVjCCBD6gAwIBAgIQ7is969Qh3hSoYqwE893EATANBgkqhkiG9w0BAQUFADCB -8zELMAkGA1UEBhMCRVMxOzA5BgNVBAoTMkFnZW5jaWEgQ2F0YWxhbmEgZGUgQ2Vy -dGlmaWNhY2lvIChOSUYgUS0wODAxMTc2LUkpMSgwJgYDVQQLEx9TZXJ2ZWlzIFB1 -YmxpY3MgZGUgQ2VydGlmaWNhY2lvMTUwMwYDVQQLEyxWZWdldSBodHRwczovL3d3 -dy5jYXRjZXJ0Lm5ldC92ZXJhcnJlbCAoYykwMzE1MDMGA1UECxMsSmVyYXJxdWlh -IEVudGl0YXRzIGRlIENlcnRpZmljYWNpbyBDYXRhbGFuZXMxDzANBgNVBAMTBkVD -LUFDQzAeFw0wMzAxMDcyMzAwMDBaFw0zMTAxMDcyMjU5NTlaMIHzMQswCQYDVQQG -EwJFUzE7MDkGA1UEChMyQWdlbmNpYSBDYXRhbGFuYSBkZSBDZXJ0aWZpY2FjaW8g -KE5JRiBRLTA4MDExNzYtSSkxKDAmBgNVBAsTH1NlcnZlaXMgUHVibGljcyBkZSBD -ZXJ0aWZpY2FjaW8xNTAzBgNVBAsTLFZlZ2V1IGh0dHBzOi8vd3d3LmNhdGNlcnQu -bmV0L3ZlcmFycmVsIChjKTAzMTUwMwYDVQQLEyxKZXJhcnF1aWEgRW50aXRhdHMg -ZGUgQ2VydGlmaWNhY2lvIENhdGFsYW5lczEPMA0GA1UEAxMGRUMtQUNDMIIBIjAN -BgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAsyLHT+KXQpWIR4NA9h0X84NzJB5R -85iKw5K4/0CQBXCHYMkAqbWUZRkiFRfCQ2xmRJoNBD45b6VLeqpjt4pEndljkYRm -4CgPukLjbo73FCeTae6RDqNfDrHrZqJyTxIThmV6PttPB/SnCWDaOkKZx7J/sxaV -HMf5NLWUhdWZXqBIoH7nF2W4onW4HvPlQn2v7fOKSGRdghST2MDk/7NQcvJ29rNd -QlB50JQ+awwAvthrDk4q7D7SzIKiGGUzE3eeml0aE9jD2z3Il3rucO2n5nzbcc8t -lGLfbdb1OL4/pYUKGbio2Al1QnDE6u/LDsg0qBIimAy4E5S2S+zw0JDnJwIDAQAB -o4HjMIHgMB0GA1UdEQQWMBSBEmVjX2FjY0BjYXRjZXJ0Lm5ldDAPBgNVHRMBAf8E -BTADAQH/MA4GA1UdDwEB/wQEAwIBBjAdBgNVHQ4EFgQUoMOLRKo3pUW/l4Ba0fF4 -opvpXY0wfwYDVR0gBHgwdjB0BgsrBgEEAfV4AQMBCjBlMCwGCCsGAQUFBwIBFiBo -dHRwczovL3d3dy5jYXRjZXJ0Lm5ldC92ZXJhcnJlbDA1BggrBgEFBQcCAjApGidW -ZWdldSBodHRwczovL3d3dy5jYXRjZXJ0Lm5ldC92ZXJhcnJlbCAwDQYJKoZIhvcN -AQEFBQADggEBAKBIW4IB9k1IuDlVNZyAelOZ1Vr/sXE7zDkJlF7W2u++AVtd0x7Y -/X1PzaBB4DSTv8vihpw3kpBWHNzrKQXlxJ7HNd+KDM3FIUPpqojlNcAZQmNaAl6k -SBg6hW/cnbw/nZzBh7h6YQjpdwt/cKt63dmXLGQehb+8dJahw3oS7AwaboMMPOhy -Rp/7SNVel+axofjk70YllJyJ22k4vuxcDlbHZVHlUIiIv0LVKz3l+bqeLrPK9HOS -Agu+TGbrIP65y7WZf+a2E/rKS03Z7lNGBjvGTq2TWoF+bCpLagVFjPIhpDGQh2xl -nJ2lYJU6Un/10asIbvPuW/mIPX64b24D5EI= ------END CERTIFICATE----- - -# Issuer: CN=Hellenic Academic and Research Institutions RootCA 2011 O=Hellenic Academic and Research Institutions Cert. Authority -# Subject: CN=Hellenic Academic and Research Institutions RootCA 2011 O=Hellenic Academic and Research Institutions Cert. Authority -# Label: "Hellenic Academic and Research Institutions RootCA 2011" -# Serial: 0 -# MD5 Fingerprint: 73:9f:4c:4b:73:5b:79:e9:fa:ba:1c:ef:6e:cb:d5:c9 -# SHA1 Fingerprint: fe:45:65:9b:79:03:5b:98:a1:61:b5:51:2e:ac:da:58:09:48:22:4d -# SHA256 Fingerprint: bc:10:4f:15:a4:8b:e7:09:dc:a5:42:a7:e1:d4:b9:df:6f:05:45:27:e8:02:ea:a9:2d:59:54:44:25:8a:fe:71 ------BEGIN CERTIFICATE----- -MIIEMTCCAxmgAwIBAgIBADANBgkqhkiG9w0BAQUFADCBlTELMAkGA1UEBhMCR1Ix -RDBCBgNVBAoTO0hlbGxlbmljIEFjYWRlbWljIGFuZCBSZXNlYXJjaCBJbnN0aXR1 -dGlvbnMgQ2VydC4gQXV0aG9yaXR5MUAwPgYDVQQDEzdIZWxsZW5pYyBBY2FkZW1p -YyBhbmQgUmVzZWFyY2ggSW5zdGl0dXRpb25zIFJvb3RDQSAyMDExMB4XDTExMTIw -NjEzNDk1MloXDTMxMTIwMTEzNDk1MlowgZUxCzAJBgNVBAYTAkdSMUQwQgYDVQQK -EztIZWxsZW5pYyBBY2FkZW1pYyBhbmQgUmVzZWFyY2ggSW5zdGl0dXRpb25zIENl -cnQuIEF1dGhvcml0eTFAMD4GA1UEAxM3SGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJl -c2VhcmNoIEluc3RpdHV0aW9ucyBSb290Q0EgMjAxMTCCASIwDQYJKoZIhvcNAQEB -BQADggEPADCCAQoCggEBAKlTAOMupvaO+mDYLZU++CwqVE7NuYRhlFhPjz2L5EPz -dYmNUeTDN9KKiE15HrcS3UN4SoqS5tdI1Q+kOilENbgH9mgdVc04UfCMJDGFr4PJ -fel3r+0ae50X+bOdOFAPplp5kYCvN66m0zH7tSYJnTxa71HFK9+WXesyHgLacEns -bgzImjeN9/E2YEsmLIKe0HjzDQ9jpFEw4fkrJxIH2Oq9GGKYsFk3fb7u8yBRQlqD -75O6aRXxYp2fmTmCobd0LovUxQt7L/DICto9eQqakxylKHJzkUOap9FNhYS5qXSP -FEDH3N6sQWRstBmbAmNtJGSPRLIl6s5ddAxjMlyNh+UCAwEAAaOBiTCBhjAPBgNV -HRMBAf8EBTADAQH/MAsGA1UdDwQEAwIBBjAdBgNVHQ4EFgQUppFC/RNhSiOeCKQp -5dgTBCPuQSUwRwYDVR0eBEAwPqA8MAWCAy5ncjAFggMuZXUwBoIELmVkdTAGggQu -b3JnMAWBAy5ncjAFgQMuZXUwBoEELmVkdTAGgQQub3JnMA0GCSqGSIb3DQEBBQUA -A4IBAQAf73lB4XtuP7KMhjdCSk4cNx6NZrokgclPEg8hwAOXhiVtXdMiKahsog2p -6z0GW5k6x8zDmjR/qw7IThzh+uTczQ2+vyT+bOdrwg3IBp5OjWEopmr95fZi6hg8 -TqBTnbI6nOulnJEWtk2C4AwFSKls9cz4y51JtPACpf1wA+2KIaWuE4ZJwzNzvoc7 -dIsXRSZMFpGD/md9zU1jZ/rzAxKWeAaNsWftjj++n08C9bMJL/NMh98qy5V8Acys -Nnq/onN694/BtZqhFLKPM58N7yLcZnuEvUUXBj08yrl3NI/K6s8/MT7jiOOASSXI -l7WdmplNsDz4SgCbZN2fOUvRJ9e4 ------END CERTIFICATE----- - # Issuer: CN=Actalis Authentication Root CA O=Actalis S.p.A./03358520967 # Subject: CN=Actalis Authentication Root CA O=Actalis S.p.A./03358520967 # Label: "Actalis Authentication Root CA" @@ -2342,27 +2151,6 @@ zzuqQhFkoJ2UOQIReVx7Hfpkue4WQrO/isIJxOzksU0CMQDpKmFHjFJKS04YcPbW RNZu9YO6bVi9JNlWSOrvxKJGgYhqOkbRqZtNyWHa0V1Xahg= -----END CERTIFICATE----- -# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4 -# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4 -# Label: "GlobalSign ECC Root CA - R4" -# Serial: 14367148294922964480859022125800977897474 -# MD5 Fingerprint: 20:f0:27:68:d1:7e:a0:9d:0e:e6:2a:ca:df:5c:89:8e -# SHA1 Fingerprint: 69:69:56:2e:40:80:f4:24:a1:e7:19:9f:14:ba:f3:ee:58:ab:6a:bb -# SHA256 Fingerprint: be:c9:49:11:c2:95:56:76:db:6c:0a:55:09:86:d7:6e:3b:a0:05:66:7c:44:2c:97:62:b4:fb:b7:73:de:22:8c ------BEGIN CERTIFICATE----- -MIIB4TCCAYegAwIBAgIRKjikHJYKBN5CsiilC+g0mAIwCgYIKoZIzj0EAwIwUDEk -MCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBDQSAtIFI0MRMwEQYDVQQKEwpH -bG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWduMB4XDTEyMTExMzAwMDAwMFoX -DTM4MDExOTAzMTQwN1owUDEkMCIGA1UECxMbR2xvYmFsU2lnbiBFQ0MgUm9vdCBD -QSAtIFI0MRMwEQYDVQQKEwpHbG9iYWxTaWduMRMwEQYDVQQDEwpHbG9iYWxTaWdu -MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAEuMZ5049sJQ6fLjkZHAOkrprlOQcJ -FspjsbmG+IpXwVfOQvpzofdlQv8ewQCybnMO/8ch5RikqtlxP6jUuc6MHaNCMEAw -DgYDVR0PAQH/BAQDAgEGMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFFSwe61F -uOJAf/sKbvu+M8k8o4TVMAoGCCqGSM49BAMCA0gAMEUCIQDckqGgE6bPA7DmxCGX -kPoUVy0D7O48027KqGx2vKLeuwIgJ6iFJzWbVsaj8kfSt24bAgAXqmemFZHe+pTs -ewv4n4Q= ------END CERTIFICATE----- - # Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R5 # Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R5 # Label: "GlobalSign ECC Root CA - R5" @@ -2385,46 +2173,6 @@ KoZIzj0EAwMDaAAwZQIxAOVpEslu28YxuglB4Zf4+/2a4n0Sye18ZNPLBSWLVtmg xwy8p2Fp8fc74SrL+SvzZpA3 -----END CERTIFICATE----- -# Issuer: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden -# Subject: CN=Staat der Nederlanden EV Root CA O=Staat der Nederlanden -# Label: "Staat der Nederlanden EV Root CA" -# Serial: 10000013 -# MD5 Fingerprint: fc:06:af:7b:e8:1a:f1:9a:b4:e8:d2:70:1f:c0:f5:ba -# SHA1 Fingerprint: 76:e2:7e:c1:4f:db:82:c1:c0:a6:75:b5:05:be:3d:29:b4:ed:db:bb -# SHA256 Fingerprint: 4d:24:91:41:4c:fe:95:67:46:ec:4c:ef:a6:cf:6f:72:e2:8a:13:29:43:2f:9d:8a:90:7a:c4:cb:5d:ad:c1:5a ------BEGIN CERTIFICATE----- -MIIFcDCCA1igAwIBAgIEAJiWjTANBgkqhkiG9w0BAQsFADBYMQswCQYDVQQGEwJO -TDEeMBwGA1UECgwVU3RhYXQgZGVyIE5lZGVybGFuZGVuMSkwJwYDVQQDDCBTdGFh -dCBkZXIgTmVkZXJsYW5kZW4gRVYgUm9vdCBDQTAeFw0xMDEyMDgxMTE5MjlaFw0y -MjEyMDgxMTEwMjhaMFgxCzAJBgNVBAYTAk5MMR4wHAYDVQQKDBVTdGFhdCBkZXIg -TmVkZXJsYW5kZW4xKTAnBgNVBAMMIFN0YWF0IGRlciBOZWRlcmxhbmRlbiBFViBS -b290IENBMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA48d+ifkkSzrS -M4M1LGns3Amk41GoJSt5uAg94JG6hIXGhaTK5skuU6TJJB79VWZxXSzFYGgEt9nC -UiY4iKTWO0Cmws0/zZiTs1QUWJZV1VD+hq2kY39ch/aO5ieSZxeSAgMs3NZmdO3d -Z//BYY1jTw+bbRcwJu+r0h8QoPnFfxZpgQNH7R5ojXKhTbImxrpsX23Wr9GxE46p -rfNeaXUmGD5BKyF/7otdBwadQ8QpCiv8Kj6GyzyDOvnJDdrFmeK8eEEzduG/L13l -pJhQDBXd4Pqcfzho0LKmeqfRMb1+ilgnQ7O6M5HTp5gVXJrm0w912fxBmJc+qiXb -j5IusHsMX/FjqTf5m3VpTCgmJdrV8hJwRVXj33NeN/UhbJCONVrJ0yPr08C+eKxC -KFhmpUZtcALXEPlLVPxdhkqHz3/KRawRWrUgUY0viEeXOcDPusBCAUCZSCELa6fS -/ZbV0b5GnUngC6agIk440ME8MLxwjyx1zNDFjFE7PZQIZCZhfbnDZY8UnCHQqv0X -cgOPvZuM5l5Tnrmd74K74bzickFbIZTTRTeU0d8JOV3nI6qaHcptqAqGhYqCvkIH -1vI4gnPah1vlPNOePqc7nvQDs/nxfRN0Av+7oeX6AHkcpmZBiFxgV6YuCcS6/ZrP -px9Aw7vMWgpVSzs4dlG4Y4uElBbmVvMCAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB -/zAOBgNVHQ8BAf8EBAMCAQYwHQYDVR0OBBYEFP6rAJCYniT8qcwaivsnuL8wbqg7 -MA0GCSqGSIb3DQEBCwUAA4ICAQDPdyxuVr5Os7aEAJSrR8kN0nbHhp8dB9O2tLsI -eK9p0gtJ3jPFrK3CiAJ9Brc1AsFgyb/E6JTe1NOpEyVa/m6irn0F3H3zbPB+po3u -2dfOWBfoqSmuc0iH55vKbimhZF8ZE/euBhD/UcabTVUlT5OZEAFTdfETzsemQUHS -v4ilf0X8rLiltTMMgsT7B/Zq5SWEXwbKwYY5EdtYzXc7LMJMD16a4/CrPmEbUCTC -wPTxGfARKbalGAKb12NMcIxHowNDXLldRqANb/9Zjr7dn3LDWyvfjFvO5QxGbJKy -CqNMVEIYFRIYvdr8unRu/8G2oGTYqV9Vrp9canaW2HNnh/tNf1zuacpzEPuKqf2e -vTY4SUmH9A4U8OmHuD+nT3pajnnUk+S7aFKErGzp85hwVXIy+TSrK0m1zSBi5Dp6 -Z2Orltxtrpfs/J92VoguZs9btsmksNcFuuEnL5O7Jiqik7Ab846+HUCjuTaPPoIa -Gl6I6lD4WeKDRikL40Rc4ZW2aZCaFG+XroHPaO+Zmr615+F/+PoTRxZMzG0IQOeL -eG9QgkRQP2YGiqtDhFZKDyAthg710tvSeopLzaXoTvFeJiUBWSOgftL2fiFX1ye8 -FVdMpEbB4IMeDExNH08GGeL5qPQ6gqGyeUN51q1veieQA6TqJIc/2b3Z6fJfUEkc -7uzXLg== ------END CERTIFICATE----- - # Issuer: CN=IdenTrust Commercial Root CA 1 O=IdenTrust # Subject: CN=IdenTrust Commercial Root CA 1 O=IdenTrust # Label: "IdenTrust Commercial Root CA 1" @@ -3032,116 +2780,6 @@ T8p+ck0LcIymSLumoRT2+1hEmRSuqguTaaApJUqlyyvdimYHFngVV3Eb7PVHhPOe MTd61X8kreS8/f3MboPoDKi3QWwH3b08hpcv0g== -----END CERTIFICATE----- -# Issuer: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Subject: CN=TrustCor RootCert CA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Label: "TrustCor RootCert CA-1" -# Serial: 15752444095811006489 -# MD5 Fingerprint: 6e:85:f1:dc:1a:00:d3:22:d5:b2:b2:ac:6b:37:05:45 -# SHA1 Fingerprint: ff:bd:cd:e7:82:c8:43:5e:3c:6f:26:86:5c:ca:a8:3a:45:5b:c3:0a -# SHA256 Fingerprint: d4:0e:9c:86:cd:8f:e4:68:c1:77:69:59:f4:9e:a7:74:fa:54:86:84:b6:c4:06:f3:90:92:61:f4:dc:e2:57:5c ------BEGIN CERTIFICATE----- -MIIEMDCCAxigAwIBAgIJANqb7HHzA7AZMA0GCSqGSIb3DQEBCwUAMIGkMQswCQYD -VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk -MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U -cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRydXN0Q29y -IFJvb3RDZXJ0IENBLTEwHhcNMTYwMjA0MTIzMjE2WhcNMjkxMjMxMTcyMzE2WjCB -pDELMAkGA1UEBhMCUEExDzANBgNVBAgMBlBhbmFtYTEUMBIGA1UEBwwLUGFuYW1h -IENpdHkxJDAiBgNVBAoMG1RydXN0Q29yIFN5c3RlbXMgUy4gZGUgUi5MLjEnMCUG -A1UECwweVHJ1c3RDb3IgQ2VydGlmaWNhdGUgQXV0aG9yaXR5MR8wHQYDVQQDDBZU -cnVzdENvciBSb290Q2VydCBDQS0xMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB -CgKCAQEAv463leLCJhJrMxnHQFgKq1mqjQCj/IDHUHuO1CAmujIS2CNUSSUQIpid -RtLByZ5OGy4sDjjzGiVoHKZaBeYei0i/mJZ0PmnK6bV4pQa81QBeCQryJ3pS/C3V -seq0iWEk8xoT26nPUu0MJLq5nux+AHT6k61sKZKuUbS701e/s/OojZz0JEsq1pme -9J7+wH5COucLlVPat2gOkEz7cD+PSiyU8ybdY2mplNgQTsVHCJCZGxdNuWxu72CV -EY4hgLW9oHPY0LJ3xEXqWib7ZnZ2+AYfYW0PVcWDtxBWcgYHpfOxGgMFZA6dWorW -hnAbJN7+KIor0Gqw/Hqi3LJ5DotlDwIDAQABo2MwYTAdBgNVHQ4EFgQU7mtJPHo/ -DeOxCbeKyKsZn3MzUOcwHwYDVR0jBBgwFoAU7mtJPHo/DeOxCbeKyKsZn3MzUOcw -DwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYwDQYJKoZIhvcNAQELBQAD -ggEBACUY1JGPE+6PHh0RU9otRCkZoB5rMZ5NDp6tPVxBb5UrJKF5mDo4Nvu7Zp5I -/5CQ7z3UuJu0h3U/IJvOcs+hVcFNZKIZBqEHMwwLKeXx6quj7LUKdJDHfXLy11yf -ke+Ri7fc7Waiz45mO7yfOgLgJ90WmMCV1Aqk5IGadZQ1nJBfiDcGrVmVCrDRZ9MZ -yonnMlo2HD6CqFqTvsbQZJG2z9m2GM/bftJlo6bEjhcxwft+dtvTheNYsnd6djts -L1Ac59v2Z3kf9YKVmgenFK+P3CghZwnS1k1aHBkcjndcw5QkPTJrS37UeJSDvjdN -zl/HHk484IkzlQsPpTLWPFp5LBk= ------END CERTIFICATE----- - -# Issuer: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Subject: CN=TrustCor RootCert CA-2 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Label: "TrustCor RootCert CA-2" -# Serial: 2711694510199101698 -# MD5 Fingerprint: a2:e1:f8:18:0b:ba:45:d5:c7:41:2a:bb:37:52:45:64 -# SHA1 Fingerprint: b8:be:6d:cb:56:f1:55:b9:63:d4:12:ca:4e:06:34:c7:94:b2:1c:c0 -# SHA256 Fingerprint: 07:53:e9:40:37:8c:1b:d5:e3:83:6e:39:5d:ae:a5:cb:83:9e:50:46:f1:bd:0e:ae:19:51:cf:10:fe:c7:c9:65 ------BEGIN CERTIFICATE----- -MIIGLzCCBBegAwIBAgIIJaHfyjPLWQIwDQYJKoZIhvcNAQELBQAwgaQxCzAJBgNV -BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw -IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy -dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEfMB0GA1UEAwwWVHJ1c3RDb3Ig -Um9vdENlcnQgQ0EtMjAeFw0xNjAyMDQxMjMyMjNaFw0zNDEyMzExNzI2MzlaMIGk -MQswCQYDVQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEg -Q2l0eTEkMCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYD -VQQLDB5UcnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxHzAdBgNVBAMMFlRy -dXN0Q29yIFJvb3RDZXJ0IENBLTIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIK -AoICAQCnIG7CKqJiJJWQdsg4foDSq8GbZQWU9MEKENUCrO2fk8eHyLAnK0IMPQo+ -QVqedd2NyuCb7GgypGmSaIwLgQ5WoD4a3SwlFIIvl9NkRvRUqdw6VC0xK5mC8tkq -1+9xALgxpL56JAfDQiDyitSSBBtlVkxs1Pu2YVpHI7TYabS3OtB0PAx1oYxOdqHp -2yqlO/rOsP9+aij9JxzIsekp8VduZLTQwRVtDr4uDkbIXvRR/u8OYzo7cbrPb1nK -DOObXUm4TOJXsZiKQlecdu/vvdFoqNL0Cbt3Nb4lggjEFixEIFapRBF37120Hape -az6LMvYHL1cEksr1/p3C6eizjkxLAjHZ5DxIgif3GIJ2SDpxsROhOdUuxTTCHWKF -3wP+TfSvPd9cW436cOGlfifHhi5qjxLGhF5DUVCcGZt45vz27Ud+ez1m7xMTiF88 -oWP7+ayHNZ/zgp6kPwqcMWmLmaSISo5uZk3vFsQPeSghYA2FFn3XVDjxklb9tTNM -g9zXEJ9L/cb4Qr26fHMC4P99zVvh1Kxhe1fVSntb1IVYJ12/+CtgrKAmrhQhJ8Z3 -mjOAPF5GP/fDsaOGM8boXg25NSyqRsGFAnWAoOsk+xWq5Gd/bnc/9ASKL3x74xdh -8N0JqSDIvgmk0H5Ew7IwSjiqqewYmgeCK9u4nBit2uBGF6zPXQIDAQABo2MwYTAd -BgNVHQ4EFgQU2f4hQG6UnrybPZx9mCAZ5YwwYrIwHwYDVR0jBBgwFoAU2f4hQG6U -nrybPZx9mCAZ5YwwYrIwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAYYw -DQYJKoZIhvcNAQELBQADggIBAJ5Fngw7tu/hOsh80QA9z+LqBrWyOrsGS2h60COX -dKcs8AjYeVrXWoSK2BKaG9l9XE1wxaX5q+WjiYndAfrs3fnpkpfbsEZC89NiqpX+ -MWcUaViQCqoL7jcjx1BRtPV+nuN79+TMQjItSQzL/0kMmx40/W5ulop5A7Zv2wnL -/V9lFDfhOPXzYRZY5LVtDQsEGz9QLX+zx3oaFoBg+Iof6Rsqxvm6ARppv9JYx1RX -CI/hOWB3S6xZhBqI8d3LT3jX5+EzLfzuQfogsL7L9ziUwOHQhQ+77Sxzq+3+knYa -ZH9bDTMJBzN7Bj8RpFxwPIXAz+OQqIN3+tvmxYxoZxBnpVIt8MSZj3+/0WvitUfW -2dCFmU2Umw9Lje4AWkcdEQOsQRivh7dvDDqPys/cA8GiCcjl/YBeyGBCARsaU1q7 -N6a3vLqE6R5sGtRk2tRD/pOLS/IseRYQ1JMLiI+h2IYURpFHmygk71dSTlxCnKr3 -Sewn6EAes6aJInKc9Q0ztFijMDvd1GpUk74aTfOTlPf8hAs/hCBcNANExdqtvArB -As8e5ZTZ845b2EzwnexhF7sUMlQMAimTHpKG9n/v55IFDlndmQguLvqcAFLTxWYp -5KeXRKQOKIETNcX2b2TmQcTVL8w0RSXPQQCWPUouwpaYT05KnJe32x+SMsj/D1Fu -1uwJ ------END CERTIFICATE----- - -# Issuer: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Subject: CN=TrustCor ECA-1 O=TrustCor Systems S. de R.L. OU=TrustCor Certificate Authority -# Label: "TrustCor ECA-1" -# Serial: 9548242946988625984 -# MD5 Fingerprint: 27:92:23:1d:0a:f5:40:7c:e9:e6:6b:9d:d8:f5:e7:6c -# SHA1 Fingerprint: 58:d1:df:95:95:67:6b:63:c0:f0:5b:1c:17:4d:8b:84:0b:c8:78:bd -# SHA256 Fingerprint: 5a:88:5d:b1:9c:01:d9:12:c5:75:93:88:93:8c:af:bb:df:03:1a:b2:d4:8e:91:ee:15:58:9b:42:97:1d:03:9c ------BEGIN CERTIFICATE----- -MIIEIDCCAwigAwIBAgIJAISCLF8cYtBAMA0GCSqGSIb3DQEBCwUAMIGcMQswCQYD -VQQGEwJQQTEPMA0GA1UECAwGUGFuYW1hMRQwEgYDVQQHDAtQYW5hbWEgQ2l0eTEk -MCIGA1UECgwbVHJ1c3RDb3IgU3lzdGVtcyBTLiBkZSBSLkwuMScwJQYDVQQLDB5U -cnVzdENvciBDZXJ0aWZpY2F0ZSBBdXRob3JpdHkxFzAVBgNVBAMMDlRydXN0Q29y -IEVDQS0xMB4XDTE2MDIwNDEyMzIzM1oXDTI5MTIzMTE3MjgwN1owgZwxCzAJBgNV -BAYTAlBBMQ8wDQYDVQQIDAZQYW5hbWExFDASBgNVBAcMC1BhbmFtYSBDaXR5MSQw -IgYDVQQKDBtUcnVzdENvciBTeXN0ZW1zIFMuIGRlIFIuTC4xJzAlBgNVBAsMHlRy -dXN0Q29yIENlcnRpZmljYXRlIEF1dGhvcml0eTEXMBUGA1UEAwwOVHJ1c3RDb3Ig -RUNBLTEwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQDPj+ARtZ+odnbb -3w9U73NjKYKtR8aja+3+XzP4Q1HpGjORMRegdMTUpwHmspI+ap3tDvl0mEDTPwOA -BoJA6LHip1GnHYMma6ve+heRK9jGrB6xnhkB1Zem6g23xFUfJ3zSCNV2HykVh0A5 -3ThFEXXQmqc04L/NyFIduUd+Dbi7xgz2c1cWWn5DkR9VOsZtRASqnKmcp0yJF4Ou -owReUoCLHhIlERnXDH19MURB6tuvsBzvgdAsxZohmz3tQjtQJvLsznFhBmIhVE5/ -wZ0+fyCMgMsq2JdiyIMzkX2woloPV+g7zPIlstR8L+xNxqE6FXrntl019fZISjZF -ZtS6mFjBAgMBAAGjYzBhMB0GA1UdDgQWBBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAf -BgNVHSMEGDAWgBREnkj1zG1I1KBLf/5ZJC+Dl5mahjAPBgNVHRMBAf8EBTADAQH/ -MA4GA1UdDwEB/wQEAwIBhjANBgkqhkiG9w0BAQsFAAOCAQEABT41XBVwm8nHc2Fv -civUwo/yQ10CzsSUuZQRg2dd4mdsdXa/uwyqNsatR5Nj3B5+1t4u/ukZMjgDfxT2 -AHMsWbEhBuH7rBiVDKP/mZb3Kyeb1STMHd3BOuCYRLDE5D53sXOpZCz2HAF8P11F -hcCF5yWPldwX8zyfGm6wyuMdKulMY/okYWLW2n62HGz1Ah3UKt1VkOsqEUc8Ll50 -soIipX1TH0XsJ5F95yIW6MBoNtjG8U+ARDL54dHRHareqKucBK+tIA5kmE2la8BI -WJZpTdwHjFGTot+fDz2LYLSCjaoITmJF4PkL0uDgPFveXHEnJcLmA4GLEFPjx1Wi -tJ/X5g== ------END CERTIFICATE----- - # Issuer: CN=SSL.com Root Certification Authority RSA O=SSL Corporation # Subject: CN=SSL.com Root Certification Authority RSA O=SSL Corporation # Label: "SSL.com Root Certification Authority RSA" @@ -3337,126 +2975,6 @@ rYy0UGYwEAYJKwYBBAGCNxUBBAMCAQAwCgYIKoZIzj0EAwMDaAAwZQIwJsdpW9zV Mgj/mkkCtojeFK9dbJlxjRo/i9fgojaGHAeCOnZT/cKi7e97sIBPWA9LUzm9 -----END CERTIFICATE----- -# Issuer: CN=GTS Root R1 O=Google Trust Services LLC -# Subject: CN=GTS Root R1 O=Google Trust Services LLC -# Label: "GTS Root R1" -# Serial: 146587175971765017618439757810265552097 -# MD5 Fingerprint: 82:1a:ef:d4:d2:4a:f2:9f:e2:3d:97:06:14:70:72:85 -# SHA1 Fingerprint: e1:c9:50:e6:ef:22:f8:4c:56:45:72:8b:92:20:60:d7:d5:a7:a3:e8 -# SHA256 Fingerprint: 2a:57:54:71:e3:13:40:bc:21:58:1c:bd:2c:f1:3e:15:84:63:20:3e:ce:94:bc:f9:d3:cc:19:6b:f0:9a:54:72 ------BEGIN CERTIFICATE----- -MIIFWjCCA0KgAwIBAgIQbkepxUtHDA3sM9CJuRz04TANBgkqhkiG9w0BAQwFADBH -MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM -QzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIy -MDAwMDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNl -cnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwggIiMA0GCSqGSIb3DQEB -AQUAA4ICDwAwggIKAoICAQC2EQKLHuOhd5s73L+UPreVp0A8of2C+X0yBoJx9vaM -f/vo27xqLpeXo4xL+Sv2sfnOhB2x+cWX3u+58qPpvBKJXqeqUqv4IyfLpLGcY9vX -mX7wCl7raKb0xlpHDU0QM+NOsROjyBhsS+z8CZDfnWQpJSMHobTSPS5g4M/SCYe7 -zUjwTcLCeoiKu7rPWRnWr4+wB7CeMfGCwcDfLqZtbBkOtdh+JhpFAz2weaSUKK0P -fyblqAj+lug8aJRT7oM6iCsVlgmy4HqMLnXWnOunVmSPlk9orj2XwoSPwLxAwAtc -vfaHszVsrBhQf4TgTM2S0yDpM7xSma8ytSmzJSq0SPly4cpk9+aCEI3oncKKiPo4 -Zor8Y/kB+Xj9e1x3+naH+uzfsQ55lVe0vSbv1gHR6xYKu44LtcXFilWr06zqkUsp -zBmkMiVOKvFlRNACzqrOSbTqn3yDsEB750Orp2yjj32JgfpMpf/VjsPOS+C12LOO -Rc92wO1AK/1TD7Cn1TsNsYqiA94xrcx36m97PtbfkSIS5r762DL8EGMUUXLeXdYW -k70paDPvOmbsB4om3xPXV2V4J95eSRQAogB/mqghtqmxlbCluQ0WEdrHbEg8QOB+ -DVrNVjzRlwW5y0vtOUucxD/SVRNuJLDWcfr0wbrM7Rv1/oFB2ACYPTrIrnqYNxgF -lQIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNV -HQ4EFgQU5K8rJnEaK0gnhS9SZizv8IkTcT4wDQYJKoZIhvcNAQEMBQADggIBADiW -Cu49tJYeX++dnAsznyvgyv3SjgofQXSlfKqE1OXyHuY3UjKcC9FhHb8owbZEKTV1 -d5iyfNm9dKyKaOOpMQkpAWBz40d8U6iQSifvS9efk+eCNs6aaAyC58/UEBZvXw6Z -XPYfcX3v73svfuo21pdwCxXu11xWajOl40k4DLh9+42FpLFZXvRq4d2h9mREruZR -gyFmxhE+885H7pwoHyXa/6xmld01D1zvICxi/ZG6qcz8WpyTgYMpl0p8WnK0OdC3 -d8t5/Wk6kjftbjhlRn7pYL15iJdfOBL07q9bgsiG1eGZbYwE8na6SfZu6W0eX6Dv -J4J2QPim01hcDyxC2kLGe4g0x8HYRZvBPsVhHdljUEn2NIVq4BjFbkerQUIpm/Zg -DdIx02OYI5NaAIFItO/Nis3Jz5nu2Z6qNuFoS3FJFDYoOj0dzpqPJeaAcWErtXvM -+SUWgeExX6GjfhaknBZqlxi9dnKlC54dNuYvoS++cJEPqOba+MSSQGwlfnuzCdyy -F62ARPBopY+Udf90WuioAnwMCeKpSwughQtiue+hMZL77/ZRBIls6Kl0obsXs7X9 -SQ98POyDGCBDTtWTurQ0sR8WNh8M5mQ5Fkzc4P4dyKliPUDqysU0ArSuiYgzNdws -E3PYJ/HQcu51OyLemGhmW/HGY0dVHLqlCFF1pkgl ------END CERTIFICATE----- - -# Issuer: CN=GTS Root R2 O=Google Trust Services LLC -# Subject: CN=GTS Root R2 O=Google Trust Services LLC -# Label: "GTS Root R2" -# Serial: 146587176055767053814479386953112547951 -# MD5 Fingerprint: 44:ed:9a:0e:a4:09:3b:00:f2:ae:4c:a3:c6:61:b0:8b -# SHA1 Fingerprint: d2:73:96:2a:2a:5e:39:9f:73:3f:e1:c7:1e:64:3f:03:38:34:fc:4d -# SHA256 Fingerprint: c4:5d:7b:b0:8e:6d:67:e6:2e:42:35:11:0b:56:4e:5f:78:fd:92:ef:05:8c:84:0a:ea:4e:64:55:d7:58:5c:60 ------BEGIN CERTIFICATE----- -MIIFWjCCA0KgAwIBAgIQbkepxlqz5yDFMJo/aFLybzANBgkqhkiG9w0BAQwFADBH -MQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExM -QzEUMBIGA1UEAxMLR1RTIFJvb3QgUjIwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIy -MDAwMDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNl -cnZpY2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjIwggIiMA0GCSqGSIb3DQEB -AQUAA4ICDwAwggIKAoICAQDO3v2m++zsFDQ8BwZabFn3GTXd98GdVarTzTukk3Lv -CvptnfbwhYBboUhSnznFt+4orO/LdmgUud+tAWyZH8QiHZ/+cnfgLFuv5AS/T3Kg -GjSY6Dlo7JUle3ah5mm5hRm9iYz+re026nO8/4Piy33B0s5Ks40FnotJk9/BW9Bu -XvAuMC6C/Pq8tBcKSOWIm8Wba96wyrQD8Nr0kLhlZPdcTK3ofmZemde4wj7I0BOd -re7kRXuJVfeKH2JShBKzwkCX44ofR5GmdFrS+LFjKBC4swm4VndAoiaYecb+3yXu -PuWgf9RhD1FLPD+M2uFwdNjCaKH5wQzpoeJ/u1U8dgbuak7MkogwTZq9TwtImoS1 -mKPV+3PBV2HdKFZ1E66HjucMUQkQdYhMvI35ezzUIkgfKtzra7tEscszcTJGr61K -8YzodDqs5xoic4DSMPclQsciOzsSrZYuxsN2B6ogtzVJV+mSSeh2FnIxZyuWfoqj -x5RWIr9qS34BIbIjMt/kmkRtWVtd9QCgHJvGeJeNkP+byKq0rxFROV7Z+2et1VsR -nTKaG73VululycslaVNVJ1zgyjbLiGH7HrfQy+4W+9OmTN6SpdTi3/UGVN4unUu0 -kzCqgc7dGtxRcw1PcOnlthYhGXmy5okLdWTK1au8CcEYof/UVKGFPP0UJAOyh9Ok -twIDAQABo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNV -HQ4EFgQUu//KjiOfT5nK2+JopqUVJxce2Q4wDQYJKoZIhvcNAQEMBQADggIBALZp -8KZ3/p7uC4Gt4cCpx/k1HUCCq+YEtN/L9x0Pg/B+E02NjO7jMyLDOfxA325BS0JT -vhaI8dI4XsRomRyYUpOM52jtG2pzegVATX9lO9ZY8c6DR2Dj/5epnGB3GFW1fgiT -z9D2PGcDFWEJ+YF59exTpJ/JjwGLc8R3dtyDovUMSRqodt6Sm2T4syzFJ9MHwAiA -pJiS4wGWAqoC7o87xdFtCjMwc3i5T1QWvwsHoaRc5svJXISPD+AVdyx+Jn7axEvb -pxZ3B7DNdehyQtaVhJ2Gg/LkkM0JR9SLA3DaWsYDQvTtN6LwG1BUSw7YhN4ZKJmB -R64JGz9I0cNv4rBgF/XuIwKl2gBbbZCr7qLpGzvpx0QnRY5rn/WkhLx3+WuXrD5R -RaIRpsyF7gpo8j5QOHokYh4XIDdtak23CZvJ/KRY9bb7nE4Yu5UC56GtmwfuNmsk -0jmGwZODUNKBRqhfYlcsu2xkiAhu7xNUX90txGdj08+JN7+dIPT7eoOboB6BAFDC -5AwiWVIQ7UNWhwD4FFKnHYuTjKJNRn8nxnGbJN7k2oaLDX5rIMHAnuFl2GqjpuiF -izoHCBy69Y9Vmhh1fuXsgWbRIXOhNUQLgD1bnF5vKheW0YMjiGZt5obicDIvUiLn -yOd/xCxgXS/Dr55FBcOEArf9LAhST4Ldo/DUhgkC ------END CERTIFICATE----- - -# Issuer: CN=GTS Root R3 O=Google Trust Services LLC -# Subject: CN=GTS Root R3 O=Google Trust Services LLC -# Label: "GTS Root R3" -# Serial: 146587176140553309517047991083707763997 -# MD5 Fingerprint: 1a:79:5b:6b:04:52:9c:5d:c7:74:33:1b:25:9a:f9:25 -# SHA1 Fingerprint: 30:d4:24:6f:07:ff:db:91:89:8a:0b:e9:49:66:11:eb:8c:5e:46:e5 -# SHA256 Fingerprint: 15:d5:b8:77:46:19:ea:7d:54:ce:1c:a6:d0:b0:c4:03:e0:37:a9:17:f1:31:e8:a0:4e:1e:6b:7a:71:ba:bc:e5 ------BEGIN CERTIFICATE----- -MIICDDCCAZGgAwIBAgIQbkepx2ypcyRAiQ8DVd2NHTAKBggqhkjOPQQDAzBHMQsw -CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU -MBIGA1UEAxMLR1RTIFJvb3QgUjMwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw -MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp -Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjMwdjAQBgcqhkjOPQIBBgUrgQQA -IgNiAAQfTzOHMymKoYTey8chWEGJ6ladK0uFxh1MJ7x/JlFyb+Kf1qPKzEUURout -736GjOyxfi//qXGdGIRFBEFVbivqJn+7kAHjSxm65FSWRQmx1WyRRK2EE46ajA2A -DDL24CejQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1Ud -DgQWBBTB8Sa6oC2uhYHP0/EqEr24Cmf9vDAKBggqhkjOPQQDAwNpADBmAjEAgFuk -fCPAlaUs3L6JbyO5o91lAFJekazInXJ0glMLfalAvWhgxeG4VDvBNhcl2MG9AjEA -njWSdIUlUfUk7GRSJFClH9voy8l27OyCbvWFGFPouOOaKaqW04MjyaR7YbPMAuhd ------END CERTIFICATE----- - -# Issuer: CN=GTS Root R4 O=Google Trust Services LLC -# Subject: CN=GTS Root R4 O=Google Trust Services LLC -# Label: "GTS Root R4" -# Serial: 146587176229350439916519468929765261721 -# MD5 Fingerprint: 5d:b6:6a:c4:60:17:24:6a:1a:99:a8:4b:ee:5e:b4:26 -# SHA1 Fingerprint: 2a:1d:60:27:d9:4a:b1:0a:1c:4d:91:5c:cd:33:a0:cb:3e:2d:54:cb -# SHA256 Fingerprint: 71:cc:a5:39:1f:9e:79:4b:04:80:25:30:b3:63:e1:21:da:8a:30:43:bb:26:66:2f:ea:4d:ca:7f:c9:51:a4:bd ------BEGIN CERTIFICATE----- -MIICCjCCAZGgAwIBAgIQbkepyIuUtui7OyrYorLBmTAKBggqhkjOPQQDAzBHMQsw -CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU -MBIGA1UEAxMLR1RTIFJvb3QgUjQwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw -MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp -Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjQwdjAQBgcqhkjOPQIBBgUrgQQA -IgNiAATzdHOnaItgrkO4NcWBMHtLSZ37wWHO5t5GvWvVYRg1rkDdc/eJkTBa6zzu -hXyiQHY7qca4R9gq55KRanPpsXI5nymfopjTX15YhmUPoYRlBtHci8nHc8iMai/l -xKvRHYqjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1Ud -DgQWBBSATNbrdP9JNqPV2Py1PsVq8JQdjDAKBggqhkjOPQQDAwNnADBkAjBqUFJ0 -CMRw3J5QdCHojXohw0+WbhXRIjVhLfoIN+4Zba3bssx9BzT1YBkstTTZbyACMANx -sbqjYAuG7ZoIapVon+Kz4ZNkfF6Tpt95LY2F45TPI11xzPKwTdb+mciUqXWi4w== ------END CERTIFICATE----- - # Issuer: CN=UCA Global G2 Root O=UniTrust # Subject: CN=UCA Global G2 Root O=UniTrust # Label: "UCA Global G2 Root" @@ -4255,3 +3773,755 @@ qJZ9ZPskWkoDbGs4xugDQ5r3V7mzKWmTOPQD8rv7gmsHINFSH5pkAnuYZttcTVoP 0ISVoDwUQwbKytu4QTbaakRnh6+v40URFWkIsr4WOZckbxJF0WddCajJFdr60qZf E2Efv4WstK2tBZQIgx51F9NxO5NQI1mg7TyRVJ12AMXDuDjb -----END CERTIFICATE----- + +# Issuer: CN=TunTrust Root CA O=Agence Nationale de Certification Electronique +# Subject: CN=TunTrust Root CA O=Agence Nationale de Certification Electronique +# Label: "TunTrust Root CA" +# Serial: 108534058042236574382096126452369648152337120275 +# MD5 Fingerprint: 85:13:b9:90:5b:36:5c:b6:5e:b8:5a:f8:e0:31:57:b4 +# SHA1 Fingerprint: cf:e9:70:84:0f:e0:73:0f:9d:f6:0c:7f:2c:4b:ee:20:46:34:9c:bb +# SHA256 Fingerprint: 2e:44:10:2a:b5:8c:b8:54:19:45:1c:8e:19:d9:ac:f3:66:2c:af:bc:61:4b:6a:53:96:0a:30:f7:d0:e2:eb:41 +-----BEGIN CERTIFICATE----- +MIIFszCCA5ugAwIBAgIUEwLV4kBMkkaGFmddtLu7sms+/BMwDQYJKoZIhvcNAQEL +BQAwYTELMAkGA1UEBhMCVE4xNzA1BgNVBAoMLkFnZW5jZSBOYXRpb25hbGUgZGUg +Q2VydGlmaWNhdGlvbiBFbGVjdHJvbmlxdWUxGTAXBgNVBAMMEFR1blRydXN0IFJv +b3QgQ0EwHhcNMTkwNDI2MDg1NzU2WhcNNDQwNDI2MDg1NzU2WjBhMQswCQYDVQQG +EwJUTjE3MDUGA1UECgwuQWdlbmNlIE5hdGlvbmFsZSBkZSBDZXJ0aWZpY2F0aW9u +IEVsZWN0cm9uaXF1ZTEZMBcGA1UEAwwQVHVuVHJ1c3QgUm9vdCBDQTCCAiIwDQYJ +KoZIhvcNAQEBBQADggIPADCCAgoCggIBAMPN0/y9BFPdDCA61YguBUtB9YOCfvdZ +n56eY+hz2vYGqU8ftPkLHzmMmiDQfgbU7DTZhrx1W4eI8NLZ1KMKsmwb60ksPqxd +2JQDoOw05TDENX37Jk0bbjBU2PWARZw5rZzJJQRNmpA+TkBuimvNKWfGzC3gdOgF +VwpIUPp6Q9p+7FuaDmJ2/uqdHYVy7BG7NegfJ7/Boce7SBbdVtfMTqDhuazb1YMZ +GoXRlJfXyqNlC/M4+QKu3fZnz8k/9YosRxqZbwUN/dAdgjH8KcwAWJeRTIAAHDOF +li/LQcKLEITDCSSJH7UP2dl3RxiSlGBcx5kDPP73lad9UKGAwqmDrViWVSHbhlnU +r8a83YFuB9tgYv7sEG7aaAH0gxupPqJbI9dkxt/con3YS7qC0lH4Zr8GRuR5KiY2 +eY8fTpkdso8MDhz/yV3A/ZAQprE38806JG60hZC/gLkMjNWb1sjxVj8agIl6qeIb +MlEsPvLfe/ZdeikZjuXIvTZxi11Mwh0/rViizz1wTaZQmCXcI/m4WEEIcb9PuISg +jwBUFfyRbVinljvrS5YnzWuioYasDXxU5mZMZl+QviGaAkYt5IPCgLnPSz7ofzwB +7I9ezX/SKEIBlYrilz0QIX32nRzFNKHsLA4KUiwSVXAkPcvCFDVDXSdOvsC9qnyW +5/yeYa1E0wCXAgMBAAGjYzBhMB0GA1UdDgQWBBQGmpsfU33x9aTI04Y+oXNZtPdE +ITAPBgNVHRMBAf8EBTADAQH/MB8GA1UdIwQYMBaAFAaamx9TffH1pMjThj6hc1m0 +90QhMA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQsFAAOCAgEAqgVutt0Vyb+z +xiD2BkewhpMl0425yAA/l/VSJ4hxyXT968pk21vvHl26v9Hr7lxpuhbI87mP0zYu +QEkHDVneixCwSQXi/5E/S7fdAo74gShczNxtr18UnH1YeA32gAm56Q6XKRm4t+v4 +FstVEuTGfbvE7Pi1HE4+Z7/FXxttbUcoqgRYYdZ2vyJ/0Adqp2RT8JeNnYA/u8EH +22Wv5psymsNUk8QcCMNE+3tjEUPRahphanltkE8pjkcFwRJpadbGNjHh/PqAulxP +xOu3Mqz4dWEX1xAZufHSCe96Qp1bWgvUxpVOKs7/B9dPfhgGiPEZtdmYu65xxBzn +dFlY7wyJz4sfdZMaBBSSSFCp61cpABbjNhzI+L/wM9VBD8TMPN3pM0MBkRArHtG5 +Xc0yGYuPjCB31yLEQtyEFpslbei0VXF/sHyz03FJuc9SpAQ/3D2gu68zngowYI7b +nV2UqL1g52KAdoGDDIzMMEZJ4gzSqK/rYXHv5yJiqfdcZGyfFoxnNidF9Ql7v/YQ +CvGwjVRDjAS6oz/v4jXH+XTgbzRB0L9zZVcg+ZtnemZoJE6AZb0QmQZZ8mWvuMZH +u/2QeItBcy6vVR/cO5JyboTT0GFMDcx2V+IthSIVNg3rAZ3r2OvEhJn7wAzMMujj +d9qDRIueVSjAi1jTkD5OGwDxFa2DK5o= +-----END CERTIFICATE----- + +# Issuer: CN=HARICA TLS RSA Root CA 2021 O=Hellenic Academic and Research Institutions CA +# Subject: CN=HARICA TLS RSA Root CA 2021 O=Hellenic Academic and Research Institutions CA +# Label: "HARICA TLS RSA Root CA 2021" +# Serial: 76817823531813593706434026085292783742 +# MD5 Fingerprint: 65:47:9b:58:86:dd:2c:f0:fc:a2:84:1f:1e:96:c4:91 +# SHA1 Fingerprint: 02:2d:05:82:fa:88:ce:14:0c:06:79:de:7f:14:10:e9:45:d7:a5:6d +# SHA256 Fingerprint: d9:5d:0e:8e:da:79:52:5b:f9:be:b1:1b:14:d2:10:0d:32:94:98:5f:0c:62:d9:fa:bd:9c:d9:99:ec:cb:7b:1d +-----BEGIN CERTIFICATE----- +MIIFpDCCA4ygAwIBAgIQOcqTHO9D88aOk8f0ZIk4fjANBgkqhkiG9w0BAQsFADBs +MQswCQYDVQQGEwJHUjE3MDUGA1UECgwuSGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJl +c2VhcmNoIEluc3RpdHV0aW9ucyBDQTEkMCIGA1UEAwwbSEFSSUNBIFRMUyBSU0Eg +Um9vdCBDQSAyMDIxMB4XDTIxMDIxOTEwNTUzOFoXDTQ1MDIxMzEwNTUzN1owbDEL +MAkGA1UEBhMCR1IxNzA1BgNVBAoMLkhlbGxlbmljIEFjYWRlbWljIGFuZCBSZXNl +YXJjaCBJbnN0aXR1dGlvbnMgQ0ExJDAiBgNVBAMMG0hBUklDQSBUTFMgUlNBIFJv +b3QgQ0EgMjAyMTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAIvC569l +mwVnlskNJLnQDmT8zuIkGCyEf3dRywQRNrhe7Wlxp57kJQmXZ8FHws+RFjZiPTgE +4VGC/6zStGndLuwRo0Xua2s7TL+MjaQenRG56Tj5eg4MmOIjHdFOY9TnuEFE+2uv +a9of08WRiFukiZLRgeaMOVig1mlDqa2YUlhu2wr7a89o+uOkXjpFc5gH6l8Cct4M +pbOfrqkdtx2z/IpZ525yZa31MJQjB/OCFks1mJxTuy/K5FrZx40d/JiZ+yykgmvw +Kh+OC19xXFyuQnspiYHLA6OZyoieC0AJQTPb5lh6/a6ZcMBaD9YThnEvdmn8kN3b +LW7R8pv1GmuebxWMevBLKKAiOIAkbDakO/IwkfN4E8/BPzWr8R0RI7VDIp4BkrcY +AuUR0YLbFQDMYTfBKnya4dC6s1BG7oKsnTH4+yPiAwBIcKMJJnkVU2DzOFytOOqB +AGMUuTNe3QvboEUHGjMJ+E20pwKmafTCWQWIZYVWrkvL4N48fS0ayOn7H6NhStYq +E613TBoYm5EPWNgGVMWX+Ko/IIqmhaZ39qb8HOLubpQzKoNQhArlT4b4UEV4AIHr +W2jjJo3Me1xR9BQsQL4aYB16cmEdH2MtiKrOokWQCPxrvrNQKlr9qEgYRtaQQJKQ +CoReaDH46+0N0x3GfZkYVVYnZS6NRcUk7M7jAgMBAAGjQjBAMA8GA1UdEwEB/wQF +MAMBAf8wHQYDVR0OBBYEFApII6ZgpJIKM+qTW8VX6iVNvRLuMA4GA1UdDwEB/wQE +AwIBhjANBgkqhkiG9w0BAQsFAAOCAgEAPpBIqm5iFSVmewzVjIuJndftTgfvnNAU +X15QvWiWkKQUEapobQk1OUAJ2vQJLDSle1mESSmXdMgHHkdt8s4cUCbjnj1AUz/3 +f5Z2EMVGpdAgS1D0NTsY9FVqQRtHBmg8uwkIYtlfVUKqrFOFrJVWNlar5AWMxaja +H6NpvVMPxP/cyuN+8kyIhkdGGvMA9YCRotxDQpSbIPDRzbLrLFPCU3hKTwSUQZqP +JzLB5UkZv/HywouoCjkxKLR9YjYsTewfM7Z+d21+UPCfDtcRj88YxeMn/ibvBZ3P +zzfF0HvaO7AWhAw6k9a+F9sPPg4ZeAnHqQJyIkv3N3a6dcSFA1pj1bF1BcK5vZSt +jBWZp5N99sXzqnTPBIWUmAD04vnKJGW/4GKvyMX6ssmeVkjaef2WdhW+o45WxLM0 +/L5H9MG0qPzVMIho7suuyWPEdr6sOBjhXlzPrjoiUevRi7PzKzMHVIf6tLITe7pT +BGIBnfHAT+7hOtSLIBD6Alfm78ELt5BGnBkpjNxvoEppaZS3JGWg/6w/zgH7IS79 +aPib8qXPMThcFarmlwDB31qlpzmq6YR/PFGoOtmUW4y/Twhx5duoXNTSpv4Ao8YW +xw/ogM4cKGR0GQjTQuPOAF1/sdwTsOEFy9EgqoZ0njnnkf3/W9b3raYvAwtt41dU +63ZTGI0RmLo= +-----END CERTIFICATE----- + +# Issuer: CN=HARICA TLS ECC Root CA 2021 O=Hellenic Academic and Research Institutions CA +# Subject: CN=HARICA TLS ECC Root CA 2021 O=Hellenic Academic and Research Institutions CA +# Label: "HARICA TLS ECC Root CA 2021" +# Serial: 137515985548005187474074462014555733966 +# MD5 Fingerprint: ae:f7:4c:e5:66:35:d1:b7:9b:8c:22:93:74:d3:4b:b0 +# SHA1 Fingerprint: bc:b0:c1:9d:e9:98:92:70:19:38:57:e9:8d:a7:b4:5d:6e:ee:01:48 +# SHA256 Fingerprint: 3f:99:cc:47:4a:cf:ce:4d:fe:d5:87:94:66:5e:47:8d:15:47:73:9f:2e:78:0f:1b:b4:ca:9b:13:30:97:d4:01 +-----BEGIN CERTIFICATE----- +MIICVDCCAdugAwIBAgIQZ3SdjXfYO2rbIvT/WeK/zjAKBggqhkjOPQQDAzBsMQsw +CQYDVQQGEwJHUjE3MDUGA1UECgwuSGVsbGVuaWMgQWNhZGVtaWMgYW5kIFJlc2Vh +cmNoIEluc3RpdHV0aW9ucyBDQTEkMCIGA1UEAwwbSEFSSUNBIFRMUyBFQ0MgUm9v +dCBDQSAyMDIxMB4XDTIxMDIxOTExMDExMFoXDTQ1MDIxMzExMDEwOVowbDELMAkG +A1UEBhMCR1IxNzA1BgNVBAoMLkhlbGxlbmljIEFjYWRlbWljIGFuZCBSZXNlYXJj +aCBJbnN0aXR1dGlvbnMgQ0ExJDAiBgNVBAMMG0hBUklDQSBUTFMgRUNDIFJvb3Qg +Q0EgMjAyMTB2MBAGByqGSM49AgEGBSuBBAAiA2IABDgI/rGgltJ6rK9JOtDA4MM7 +KKrxcm1lAEeIhPyaJmuqS7psBAqIXhfyVYf8MLA04jRYVxqEU+kw2anylnTDUR9Y +STHMmE5gEYd103KUkE+bECUqqHgtvpBBWJAVcqeht6NCMEAwDwYDVR0TAQH/BAUw +AwEB/zAdBgNVHQ4EFgQUyRtTgRL+BNUW0aq8mm+3oJUZbsowDgYDVR0PAQH/BAQD +AgGGMAoGCCqGSM49BAMDA2cAMGQCMBHervjcToiwqfAircJRQO9gcS3ujwLEXQNw +SaSS6sUUiHCm0w2wqsosQJz76YJumgIwK0eaB8bRwoF8yguWGEEbo/QwCZ61IygN +nxS2PFOiTAZpffpskcYqSUXm7LcT4Tps +-----END CERTIFICATE----- + +# Issuer: CN=Autoridad de Certificacion Firmaprofesional CIF A62634068 +# Subject: CN=Autoridad de Certificacion Firmaprofesional CIF A62634068 +# Label: "Autoridad de Certificacion Firmaprofesional CIF A62634068" +# Serial: 1977337328857672817 +# MD5 Fingerprint: 4e:6e:9b:54:4c:ca:b7:fa:48:e4:90:b1:15:4b:1c:a3 +# SHA1 Fingerprint: 0b:be:c2:27:22:49:cb:39:aa:db:35:5c:53:e3:8c:ae:78:ff:b6:fe +# SHA256 Fingerprint: 57:de:05:83:ef:d2:b2:6e:03:61:da:99:da:9d:f4:64:8d:ef:7e:e8:44:1c:3b:72:8a:fa:9b:cd:e0:f9:b2:6a +-----BEGIN CERTIFICATE----- +MIIGFDCCA/ygAwIBAgIIG3Dp0v+ubHEwDQYJKoZIhvcNAQELBQAwUTELMAkGA1UE +BhMCRVMxQjBABgNVBAMMOUF1dG9yaWRhZCBkZSBDZXJ0aWZpY2FjaW9uIEZpcm1h +cHJvZmVzaW9uYWwgQ0lGIEE2MjYzNDA2ODAeFw0xNDA5MjMxNTIyMDdaFw0zNjA1 +MDUxNTIyMDdaMFExCzAJBgNVBAYTAkVTMUIwQAYDVQQDDDlBdXRvcmlkYWQgZGUg +Q2VydGlmaWNhY2lvbiBGaXJtYXByb2Zlc2lvbmFsIENJRiBBNjI2MzQwNjgwggIi +MA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQDKlmuO6vj78aI14H9M2uDDUtd9 +thDIAl6zQyrET2qyyhxdKJp4ERppWVevtSBC5IsP5t9bpgOSL/UR5GLXMnE42QQM +cas9UX4PB99jBVzpv5RvwSmCwLTaUbDBPLutN0pcyvFLNg4kq7/DhHf9qFD0sefG +L9ItWY16Ck6WaVICqjaY7Pz6FIMMNx/Jkjd/14Et5cS54D40/mf0PmbR0/RAz15i +NA9wBj4gGFrO93IbJWyTdBSTo3OxDqqHECNZXyAFGUftaI6SEspd/NYrspI8IM/h +X68gvqB2f3bl7BqGYTM+53u0P6APjqK5am+5hyZvQWyIplD9amML9ZMWGxmPsu2b +m8mQ9QEM3xk9Dz44I8kvjwzRAv4bVdZO0I08r0+k8/6vKtMFnXkIoctXMbScyJCy +Z/QYFpM6/EfY0XiWMR+6KwxfXZmtY4laJCB22N/9q06mIqqdXuYnin1oKaPnirja +EbsXLZmdEyRG98Xi2J+Of8ePdG1asuhy9azuJBCtLxTa/y2aRnFHvkLfuwHb9H/T +KI8xWVvTyQKmtFLKbpf7Q8UIJm+K9Lv9nyiqDdVF8xM6HdjAeI9BZzwelGSuewvF +6NkBiDkal4ZkQdU7hwxu+g/GvUgUvzlN1J5Bto+WHWOWk9mVBngxaJ43BjuAiUVh +OSPHG0SjFeUc+JIwuwIDAQABo4HvMIHsMB0GA1UdDgQWBBRlzeurNR4APn7VdMAc +tHNHDhpkLzASBgNVHRMBAf8ECDAGAQH/AgEBMIGmBgNVHSAEgZ4wgZswgZgGBFUd +IAAwgY8wLwYIKwYBBQUHAgEWI2h0dHA6Ly93d3cuZmlybWFwcm9mZXNpb25hbC5j +b20vY3BzMFwGCCsGAQUFBwICMFAeTgBQAGEAcwBlAG8AIABkAGUAIABsAGEAIABC +AG8AbgBhAG4AbwB2AGEAIAA0ADcAIABCAGEAcgBjAGUAbABvAG4AYQAgADAAOAAw +ADEANzAOBgNVHQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQELBQADggIBAHSHKAIrdx9m +iWTtj3QuRhy7qPj4Cx2Dtjqn6EWKB7fgPiDL4QjbEwj4KKE1soCzC1HA01aajTNF +Sa9J8OA9B3pFE1r/yJfY0xgsfZb43aJlQ3CTkBW6kN/oGbDbLIpgD7dvlAceHabJ +hfa9NPhAeGIQcDq+fUs5gakQ1JZBu/hfHAsdCPKxsIl68veg4MSPi3i1O1ilI45P +Vf42O+AMt8oqMEEgtIDNrvx2ZnOorm7hfNoD6JQg5iKj0B+QXSBTFCZX2lSX3xZE +EAEeiGaPcjiT3SC3NL7X8e5jjkd5KAb881lFJWAiMxujX6i6KtoaPc1A6ozuBRWV +1aUsIC+nmCjuRfzxuIgALI9C2lHVnOUTaHFFQ4ueCyE8S1wF3BqfmI7avSKecs2t +CsvMo2ebKHTEm9caPARYpoKdrcd7b/+Alun4jWq9GJAd/0kakFI3ky88Al2CdgtR +5xbHV/g4+afNmyJU72OwFW1TZQNKXkqgsqeOSQBZONXH9IBk9W6VULgRfhVwOEqw +f9DEMnDAGf/JOC0ULGb0QkTmVXYbgBVX/8Cnp6o5qtjTcNAuuuuUavpfNIbnYrX9 +ivAwhZTJryQCL2/W3Wf+47BVTwSYT6RBVuKT0Gro1vP7ZeDOdcQxWQzugsgMYDNK +GbqEZycPvEJdvSRUDewdcAZfpLz6IHxV +-----END CERTIFICATE----- + +# Issuer: CN=vTrus ECC Root CA O=iTrusChina Co.,Ltd. +# Subject: CN=vTrus ECC Root CA O=iTrusChina Co.,Ltd. +# Label: "vTrus ECC Root CA" +# Serial: 630369271402956006249506845124680065938238527194 +# MD5 Fingerprint: de:4b:c1:f5:52:8c:9b:43:e1:3e:8f:55:54:17:8d:85 +# SHA1 Fingerprint: f6:9c:db:b0:fc:f6:02:13:b6:52:32:a6:a3:91:3f:16:70:da:c3:e1 +# SHA256 Fingerprint: 30:fb:ba:2c:32:23:8e:2a:98:54:7a:f9:79:31:e5:50:42:8b:9b:3f:1c:8e:eb:66:33:dc:fa:86:c5:b2:7d:d3 +-----BEGIN CERTIFICATE----- +MIICDzCCAZWgAwIBAgIUbmq8WapTvpg5Z6LSa6Q75m0c1towCgYIKoZIzj0EAwMw +RzELMAkGA1UEBhMCQ04xHDAaBgNVBAoTE2lUcnVzQ2hpbmEgQ28uLEx0ZC4xGjAY +BgNVBAMTEXZUcnVzIEVDQyBSb290IENBMB4XDTE4MDczMTA3MjY0NFoXDTQzMDcz +MTA3MjY0NFowRzELMAkGA1UEBhMCQ04xHDAaBgNVBAoTE2lUcnVzQ2hpbmEgQ28u +LEx0ZC4xGjAYBgNVBAMTEXZUcnVzIEVDQyBSb290IENBMHYwEAYHKoZIzj0CAQYF +K4EEACIDYgAEZVBKrox5lkqqHAjDo6LN/llWQXf9JpRCux3NCNtzslt188+cToL0 +v/hhJoVs1oVbcnDS/dtitN9Ti72xRFhiQgnH+n9bEOf+QP3A2MMrMudwpremIFUd +e4BdS49nTPEQo0IwQDAdBgNVHQ4EFgQUmDnNvtiyjPeyq+GtJK97fKHbH88wDwYD +VR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMCAQYwCgYIKoZIzj0EAwMDaAAwZQIw +V53dVvHH4+m4SVBrm2nDb+zDfSXkV5UTQJtS0zvzQBm8JsctBp61ezaf9SXUY2sA +AjEA6dPGnlaaKsyh2j/IZivTWJwghfqrkYpwcBE4YGQLYgmRWAD5Tfs0aNoJrSEG +GJTO +-----END CERTIFICATE----- + +# Issuer: CN=vTrus Root CA O=iTrusChina Co.,Ltd. +# Subject: CN=vTrus Root CA O=iTrusChina Co.,Ltd. +# Label: "vTrus Root CA" +# Serial: 387574501246983434957692974888460947164905180485 +# MD5 Fingerprint: b8:c9:37:df:fa:6b:31:84:64:c5:ea:11:6a:1b:75:fc +# SHA1 Fingerprint: 84:1a:69:fb:f5:cd:1a:25:34:13:3d:e3:f8:fc:b8:99:d0:c9:14:b7 +# SHA256 Fingerprint: 8a:71:de:65:59:33:6f:42:6c:26:e5:38:80:d0:0d:88:a1:8d:a4:c6:a9:1f:0d:cb:61:94:e2:06:c5:c9:63:87 +-----BEGIN CERTIFICATE----- +MIIFVjCCAz6gAwIBAgIUQ+NxE9izWRRdt86M/TX9b7wFjUUwDQYJKoZIhvcNAQEL +BQAwQzELMAkGA1UEBhMCQ04xHDAaBgNVBAoTE2lUcnVzQ2hpbmEgQ28uLEx0ZC4x +FjAUBgNVBAMTDXZUcnVzIFJvb3QgQ0EwHhcNMTgwNzMxMDcyNDA1WhcNNDMwNzMx +MDcyNDA1WjBDMQswCQYDVQQGEwJDTjEcMBoGA1UEChMTaVRydXNDaGluYSBDby4s +THRkLjEWMBQGA1UEAxMNdlRydXMgUm9vdCBDQTCCAiIwDQYJKoZIhvcNAQEBBQAD +ggIPADCCAgoCggIBAL1VfGHTuB0EYgWgrmy3cLRB6ksDXhA/kFocizuwZotsSKYc +IrrVQJLuM7IjWcmOvFjai57QGfIvWcaMY1q6n6MLsLOaXLoRuBLpDLvPbmyAhykU +AyyNJJrIZIO1aqwTLDPxn9wsYTwaP3BVm60AUn/PBLn+NvqcwBauYv6WTEN+VRS+ +GrPSbcKvdmaVayqwlHeFXgQPYh1jdfdr58tbmnDsPmcF8P4HCIDPKNsFxhQnL4Z9 +8Cfe/+Z+M0jnCx5Y0ScrUw5XSmXX+6KAYPxMvDVTAWqXcoKv8R1w6Jz1717CbMdH +flqUhSZNO7rrTOiwCcJlwp2dCZtOtZcFrPUGoPc2BX70kLJrxLT5ZOrpGgrIDajt +J8nU57O5q4IikCc9Kuh8kO+8T/3iCiSn3mUkpF3qwHYw03dQ+A0Em5Q2AXPKBlim +0zvc+gRGE1WKyURHuFE5Gi7oNOJ5y1lKCn+8pu8fA2dqWSslYpPZUxlmPCdiKYZN +pGvu/9ROutW04o5IWgAZCfEF2c6Rsffr6TlP9m8EQ5pV9T4FFL2/s1m02I4zhKOQ +UqqzApVg+QxMaPnu1RcN+HFXtSXkKe5lXa/R7jwXC1pDxaWG6iSe4gUH3DRCEpHW +OXSuTEGC2/KmSNGzm/MzqvOmwMVO9fSddmPmAsYiS8GVP1BkLFTltvA8Kc9XAgMB +AAGjQjBAMB0GA1UdDgQWBBRUYnBj8XWEQ1iO0RYgscasGrz2iTAPBgNVHRMBAf8E +BTADAQH/MA4GA1UdDwEB/wQEAwIBBjANBgkqhkiG9w0BAQsFAAOCAgEAKbqSSaet +8PFww+SX8J+pJdVrnjT+5hpk9jprUrIQeBqfTNqK2uwcN1LgQkv7bHbKJAs5EhWd +nxEt/Hlk3ODg9d3gV8mlsnZwUKT+twpw1aA08XXXTUm6EdGz2OyC/+sOxL9kLX1j +bhd47F18iMjrjld22VkE+rxSH0Ws8HqA7Oxvdq6R2xCOBNyS36D25q5J08FsEhvM +Kar5CKXiNxTKsbhm7xqC5PD48acWabfbqWE8n/Uxy+QARsIvdLGx14HuqCaVvIiv +TDUHKgLKeBRtRytAVunLKmChZwOgzoy8sHJnxDHO2zTlJQNgJXtxmOTAGytfdELS +S8VZCAeHvsXDf+eW2eHcKJfWjwXj9ZtOyh1QRwVTsMo554WgicEFOwE30z9J4nfr +I8iIZjs9OXYhRvHsXyO466JmdXTBQPfYaJqT4i2pLr0cox7IdMakLXogqzu4sEb9 +b91fUlV1YvCXoHzXOP0l382gmxDPi7g4Xl7FtKYCNqEeXxzP4padKar9mK5S4fNB +UvupLnKWnyfjqnN9+BojZns7q2WwMgFLFT49ok8MKzWixtlnEjUwzXYuFrOZnk1P +Ti07NEPhmg4NpGaXutIcSkwsKouLgU9xGqndXHt7CMUADTdA43x7VF8vhV929ven +sBxXVsFy6K2ir40zSbofitzmdHxghm+Hl3s= +-----END CERTIFICATE----- + +# Issuer: CN=ISRG Root X2 O=Internet Security Research Group +# Subject: CN=ISRG Root X2 O=Internet Security Research Group +# Label: "ISRG Root X2" +# Serial: 87493402998870891108772069816698636114 +# MD5 Fingerprint: d3:9e:c4:1e:23:3c:a6:df:cf:a3:7e:6d:e0:14:e6:e5 +# SHA1 Fingerprint: bd:b1:b9:3c:d5:97:8d:45:c6:26:14:55:f8:db:95:c7:5a:d1:53:af +# SHA256 Fingerprint: 69:72:9b:8e:15:a8:6e:fc:17:7a:57:af:b7:17:1d:fc:64:ad:d2:8c:2f:ca:8c:f1:50:7e:34:45:3c:cb:14:70 +-----BEGIN CERTIFICATE----- +MIICGzCCAaGgAwIBAgIQQdKd0XLq7qeAwSxs6S+HUjAKBggqhkjOPQQDAzBPMQsw +CQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJuZXQgU2VjdXJpdHkgUmVzZWFyY2gg +R3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBYMjAeFw0yMDA5MDQwMDAwMDBaFw00 +MDA5MTcxNjAwMDBaME8xCzAJBgNVBAYTAlVTMSkwJwYDVQQKEyBJbnRlcm5ldCBT +ZWN1cml0eSBSZXNlYXJjaCBHcm91cDEVMBMGA1UEAxMMSVNSRyBSb290IFgyMHYw +EAYHKoZIzj0CAQYFK4EEACIDYgAEzZvVn4CDCuwJSvMWSj5cz3es3mcFDR0HttwW ++1qLFNvicWDEukWVEYmO6gbf9yoWHKS5xcUy4APgHoIYOIvXRdgKam7mAHf7AlF9 +ItgKbppbd9/w+kHsOdx1ymgHDB/qo0IwQDAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0T +AQH/BAUwAwEB/zAdBgNVHQ4EFgQUfEKWrt5LSDv6kviejM9ti6lyN5UwCgYIKoZI +zj0EAwMDaAAwZQIwe3lORlCEwkSHRhtFcP9Ymd70/aTSVaYgLXTWNLxBo1BfASdW +tL4ndQavEi51mI38AjEAi/V3bNTIZargCyzuFJ0nN6T5U6VR5CmD1/iQMVtCnwr1 +/q4AaOeMSQ+2b1tbFfLn +-----END CERTIFICATE----- + +# Issuer: CN=HiPKI Root CA - G1 O=Chunghwa Telecom Co., Ltd. +# Subject: CN=HiPKI Root CA - G1 O=Chunghwa Telecom Co., Ltd. +# Label: "HiPKI Root CA - G1" +# Serial: 60966262342023497858655262305426234976 +# MD5 Fingerprint: 69:45:df:16:65:4b:e8:68:9a:8f:76:5f:ff:80:9e:d3 +# SHA1 Fingerprint: 6a:92:e4:a8:ee:1b:ec:96:45:37:e3:29:57:49:cd:96:e3:e5:d2:60 +# SHA256 Fingerprint: f0:15:ce:3c:c2:39:bf:ef:06:4b:e9:f1:d2:c4:17:e1:a0:26:4a:0a:94:be:1f:0c:8d:12:18:64:eb:69:49:cc +-----BEGIN CERTIFICATE----- +MIIFajCCA1KgAwIBAgIQLd2szmKXlKFD6LDNdmpeYDANBgkqhkiG9w0BAQsFADBP +MQswCQYDVQQGEwJUVzEjMCEGA1UECgwaQ2h1bmdod2EgVGVsZWNvbSBDby4sIEx0 +ZC4xGzAZBgNVBAMMEkhpUEtJIFJvb3QgQ0EgLSBHMTAeFw0xOTAyMjIwOTQ2MDRa +Fw0zNzEyMzExNTU5NTlaME8xCzAJBgNVBAYTAlRXMSMwIQYDVQQKDBpDaHVuZ2h3 +YSBUZWxlY29tIENvLiwgTHRkLjEbMBkGA1UEAwwSSGlQS0kgUm9vdCBDQSAtIEcx +MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA9B5/UnMyDHPkvRN0o9Qw +qNCuS9i233VHZvR85zkEHmpwINJaR3JnVfSl6J3VHiGh8Ge6zCFovkRTv4354twv +Vcg3Px+kwJyz5HdcoEb+d/oaoDjq7Zpy3iu9lFc6uux55199QmQ5eiY29yTw1S+6 +lZgRZq2XNdZ1AYDgr/SEYYwNHl98h5ZeQa/rh+r4XfEuiAU+TCK72h8q3VJGZDnz +Qs7ZngyzsHeXZJzA9KMuH5UHsBffMNsAGJZMoYFL3QRtU6M9/Aes1MU3guvklQgZ +KILSQjqj2FPseYlgSGDIcpJQ3AOPgz+yQlda22rpEZfdhSi8MEyr48KxRURHH+CK +FgeW0iEPU8DtqX7UTuybCeyvQqww1r/REEXgphaypcXTT3OUM3ECoWqj1jOXTyFj +HluP2cFeRXF3D4FdXyGarYPM+l7WjSNfGz1BryB1ZlpK9p/7qxj3ccC2HTHsOyDr +y+K49a6SsvfhhEvyovKTmiKe0xRvNlS9H15ZFblzqMF8b3ti6RZsR1pl8w4Rm0bZ +/W3c1pzAtH2lsN0/Vm+h+fbkEkj9Bn8SV7apI09bA8PgcSojt/ewsTu8mL3WmKgM +a/aOEmem8rJY5AIJEzypuxC00jBF8ez3ABHfZfjcK0NVvxaXxA/VLGGEqnKG/uY6 +fsI/fe78LxQ+5oXdUG+3Se0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAdBgNV +HQ4EFgQU8ncX+l6o/vY9cdVouslGDDjYr7AwDgYDVR0PAQH/BAQDAgGGMA0GCSqG +SIb3DQEBCwUAA4ICAQBQUfB13HAE4/+qddRxosuej6ip0691x1TPOhwEmSKsxBHi +7zNKpiMdDg1H2DfHb680f0+BazVP6XKlMeJ45/dOlBhbQH3PayFUhuaVevvGyuqc +SE5XCV0vrPSltJczWNWseanMX/mF+lLFjfiRFOs6DRfQUsJ748JzjkZ4Bjgs6Fza +ZsT0pPBWGTMpWmWSBUdGSquEwx4noR8RkpkndZMPvDY7l1ePJlsMu5wP1G4wB9Tc +XzZoZjmDlicmisjEOf6aIW/Vcobpf2Lll07QJNBAsNB1CI69aO4I1258EHBGG3zg +iLKecoaZAeO/n0kZtCW+VmWuF2PlHt/o/0elv+EmBYTksMCv5wiZqAxeJoBF1Pho +L5aPruJKHJwWDBNvOIf2u8g0X5IDUXlwpt/L9ZlNec1OvFefQ05rLisY+GpzjLrF +Ne85akEez3GoorKGB1s6yeHvP2UEgEcyRHCVTjFnanRbEEV16rCf0OY1/k6fi8wr +kkVbbiVghUbN0aqwdmaTd5a+g744tiROJgvM7XpWGuDpWsZkrUx6AEhEL7lAuxM+ +vhV4nYWBSipX3tUZQ9rbyltHhoMLP7YNdnhzeSJesYAfz77RP1YQmCuVh6EfnWQU +YDksswBVLuT1sw5XxJFBAJw/6KXf6vb/yPCtbVKoF6ubYfwSUTXkJf2vqmqGOQ== +-----END CERTIFICATE----- + +# Issuer: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4 +# Subject: CN=GlobalSign O=GlobalSign OU=GlobalSign ECC Root CA - R4 +# Label: "GlobalSign ECC Root CA - R4" +# Serial: 159662223612894884239637590694 +# MD5 Fingerprint: 26:29:f8:6d:e1:88:bf:a2:65:7f:aa:c4:cd:0f:7f:fc +# SHA1 Fingerprint: 6b:a0:b0:98:e1:71:ef:5a:ad:fe:48:15:80:77:10:f4:bd:6f:0b:28 +# SHA256 Fingerprint: b0:85:d7:0b:96:4f:19:1a:73:e4:af:0d:54:ae:7a:0e:07:aa:fd:af:9b:71:dd:08:62:13:8a:b7:32:5a:24:a2 +-----BEGIN CERTIFICATE----- +MIIB3DCCAYOgAwIBAgINAgPlfvU/k/2lCSGypjAKBggqhkjOPQQDAjBQMSQwIgYD +VQQLExtHbG9iYWxTaWduIEVDQyBSb290IENBIC0gUjQxEzARBgNVBAoTCkdsb2Jh +bFNpZ24xEzARBgNVBAMTCkdsb2JhbFNpZ24wHhcNMTIxMTEzMDAwMDAwWhcNMzgw +MTE5MDMxNDA3WjBQMSQwIgYDVQQLExtHbG9iYWxTaWduIEVDQyBSb290IENBIC0g +UjQxEzARBgNVBAoTCkdsb2JhbFNpZ24xEzARBgNVBAMTCkdsb2JhbFNpZ24wWTAT +BgcqhkjOPQIBBggqhkjOPQMBBwNCAAS4xnnTj2wlDp8uORkcA6SumuU5BwkWymOx +uYb4ilfBV85C+nOh92VC/x7BALJucw7/xyHlGKSq2XE/qNS5zowdo0IwQDAOBgNV +HQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQUVLB7rUW44kB/ ++wpu+74zyTyjhNUwCgYIKoZIzj0EAwIDRwAwRAIgIk90crlgr/HmnKAWBVBfw147 +bmF0774BxL4YSFlhgjICICadVGNA3jdgUM/I2O2dgq43mLyjj0xMqTQrbO/7lZsm +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R1 O=Google Trust Services LLC +# Subject: CN=GTS Root R1 O=Google Trust Services LLC +# Label: "GTS Root R1" +# Serial: 159662320309726417404178440727 +# MD5 Fingerprint: 05:fe:d0:bf:71:a8:a3:76:63:da:01:e0:d8:52:dc:40 +# SHA1 Fingerprint: e5:8c:1c:c4:91:3b:38:63:4b:e9:10:6e:e3:ad:8e:6b:9d:d9:81:4a +# SHA256 Fingerprint: d9:47:43:2a:bd:e7:b7:fa:90:fc:2e:6b:59:10:1b:12:80:e0:e1:c7:e4:e4:0f:a3:c6:88:7f:ff:57:a7:f4:cf +-----BEGIN CERTIFICATE----- +MIIFVzCCAz+gAwIBAgINAgPlk28xsBNJiGuiFzANBgkqhkiG9w0BAQwFADBHMQsw +CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU +MBIGA1UEAxMLR1RTIFJvb3QgUjEwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw +MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp +Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjEwggIiMA0GCSqGSIb3DQEBAQUA +A4ICDwAwggIKAoICAQC2EQKLHuOhd5s73L+UPreVp0A8of2C+X0yBoJx9vaMf/vo +27xqLpeXo4xL+Sv2sfnOhB2x+cWX3u+58qPpvBKJXqeqUqv4IyfLpLGcY9vXmX7w +Cl7raKb0xlpHDU0QM+NOsROjyBhsS+z8CZDfnWQpJSMHobTSPS5g4M/SCYe7zUjw +TcLCeoiKu7rPWRnWr4+wB7CeMfGCwcDfLqZtbBkOtdh+JhpFAz2weaSUKK0Pfybl +qAj+lug8aJRT7oM6iCsVlgmy4HqMLnXWnOunVmSPlk9orj2XwoSPwLxAwAtcvfaH +szVsrBhQf4TgTM2S0yDpM7xSma8ytSmzJSq0SPly4cpk9+aCEI3oncKKiPo4Zor8 +Y/kB+Xj9e1x3+naH+uzfsQ55lVe0vSbv1gHR6xYKu44LtcXFilWr06zqkUspzBmk +MiVOKvFlRNACzqrOSbTqn3yDsEB750Orp2yjj32JgfpMpf/VjsPOS+C12LOORc92 +wO1AK/1TD7Cn1TsNsYqiA94xrcx36m97PtbfkSIS5r762DL8EGMUUXLeXdYWk70p +aDPvOmbsB4om3xPXV2V4J95eSRQAogB/mqghtqmxlbCluQ0WEdrHbEg8QOB+DVrN +VjzRlwW5y0vtOUucxD/SVRNuJLDWcfr0wbrM7Rv1/oFB2ACYPTrIrnqYNxgFlQID +AQABo0IwQDAOBgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4E +FgQU5K8rJnEaK0gnhS9SZizv8IkTcT4wDQYJKoZIhvcNAQEMBQADggIBAJ+qQibb +C5u+/x6Wki4+omVKapi6Ist9wTrYggoGxval3sBOh2Z5ofmmWJyq+bXmYOfg6LEe +QkEzCzc9zolwFcq1JKjPa7XSQCGYzyI0zzvFIoTgxQ6KfF2I5DUkzps+GlQebtuy +h6f88/qBVRRiClmpIgUxPoLW7ttXNLwzldMXG+gnoot7TiYaelpkttGsN/H9oPM4 +7HLwEXWdyzRSjeZ2axfG34arJ45JK3VmgRAhpuo+9K4l/3wV3s6MJT/KYnAK9y8J +ZgfIPxz88NtFMN9iiMG1D53Dn0reWVlHxYciNuaCp+0KueIHoI17eko8cdLiA6Ef +MgfdG+RCzgwARWGAtQsgWSl4vflVy2PFPEz0tv/bal8xa5meLMFrUKTX5hgUvYU/ +Z6tGn6D/Qqc6f1zLXbBwHSs09dR2CQzreExZBfMzQsNhFRAbd03OIozUhfJFfbdT +6u9AWpQKXCBfTkBdYiJ23//OYb2MI3jSNwLgjt7RETeJ9r/tSQdirpLsQBqvFAnZ +0E6yove+7u7Y/9waLd64NnHi/Hm3lCXRSHNboTXns5lndcEZOitHTtNCjv0xyBZm +2tIMPNuzjsmhDYAPexZ3FL//2wmUspO8IFgV6dtxQ/PeEMMA3KgqlbbC1j+Qa3bb +bP6MvPJwNQzcmRk13NfIRmPVNnGuV/u3gm3c +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R2 O=Google Trust Services LLC +# Subject: CN=GTS Root R2 O=Google Trust Services LLC +# Label: "GTS Root R2" +# Serial: 159662449406622349769042896298 +# MD5 Fingerprint: 1e:39:c0:53:e6:1e:29:82:0b:ca:52:55:36:5d:57:dc +# SHA1 Fingerprint: 9a:44:49:76:32:db:de:fa:d0:bc:fb:5a:7b:17:bd:9e:56:09:24:94 +# SHA256 Fingerprint: 8d:25:cd:97:22:9d:bf:70:35:6b:da:4e:b3:cc:73:40:31:e2:4c:f0:0f:af:cf:d3:2d:c7:6e:b5:84:1c:7e:a8 +-----BEGIN CERTIFICATE----- +MIIFVzCCAz+gAwIBAgINAgPlrsWNBCUaqxElqjANBgkqhkiG9w0BAQwFADBHMQsw +CQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEU +MBIGA1UEAxMLR1RTIFJvb3QgUjIwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAw +MDAwWjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZp +Y2VzIExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjIwggIiMA0GCSqGSIb3DQEBAQUA +A4ICDwAwggIKAoICAQDO3v2m++zsFDQ8BwZabFn3GTXd98GdVarTzTukk3LvCvpt +nfbwhYBboUhSnznFt+4orO/LdmgUud+tAWyZH8QiHZ/+cnfgLFuv5AS/T3KgGjSY +6Dlo7JUle3ah5mm5hRm9iYz+re026nO8/4Piy33B0s5Ks40FnotJk9/BW9BuXvAu +MC6C/Pq8tBcKSOWIm8Wba96wyrQD8Nr0kLhlZPdcTK3ofmZemde4wj7I0BOdre7k +RXuJVfeKH2JShBKzwkCX44ofR5GmdFrS+LFjKBC4swm4VndAoiaYecb+3yXuPuWg +f9RhD1FLPD+M2uFwdNjCaKH5wQzpoeJ/u1U8dgbuak7MkogwTZq9TwtImoS1mKPV ++3PBV2HdKFZ1E66HjucMUQkQdYhMvI35ezzUIkgfKtzra7tEscszcTJGr61K8Yzo +dDqs5xoic4DSMPclQsciOzsSrZYuxsN2B6ogtzVJV+mSSeh2FnIxZyuWfoqjx5RW +Ir9qS34BIbIjMt/kmkRtWVtd9QCgHJvGeJeNkP+byKq0rxFROV7Z+2et1VsRnTKa +G73VululycslaVNVJ1zgyjbLiGH7HrfQy+4W+9OmTN6SpdTi3/UGVN4unUu0kzCq +gc7dGtxRcw1PcOnlthYhGXmy5okLdWTK1au8CcEYof/UVKGFPP0UJAOyh9OktwID +AQABo0IwQDAOBgNVHQ8BAf8EBAMCAYYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4E +FgQUu//KjiOfT5nK2+JopqUVJxce2Q4wDQYJKoZIhvcNAQEMBQADggIBAB/Kzt3H +vqGf2SdMC9wXmBFqiN495nFWcrKeGk6c1SuYJF2ba3uwM4IJvd8lRuqYnrYb/oM8 +0mJhwQTtzuDFycgTE1XnqGOtjHsB/ncw4c5omwX4Eu55MaBBRTUoCnGkJE+M3DyC +B19m3H0Q/gxhswWV7uGugQ+o+MePTagjAiZrHYNSVc61LwDKgEDg4XSsYPWHgJ2u +NmSRXbBoGOqKYcl3qJfEycel/FVL8/B/uWU9J2jQzGv6U53hkRrJXRqWbTKH7QMg +yALOWr7Z6v2yTcQvG99fevX4i8buMTolUVVnjWQye+mew4K6Ki3pHrTgSAai/Gev +HyICc/sgCq+dVEuhzf9gR7A/Xe8bVr2XIZYtCtFenTgCR2y59PYjJbigapordwj6 +xLEokCZYCDzifqrXPW+6MYgKBesntaFJ7qBFVHvmJ2WZICGoo7z7GJa7Um8M7YNR +TOlZ4iBgxcJlkoKM8xAfDoqXvneCbT+PHV28SSe9zE8P4c52hgQjxcCMElv924Sg +JPFI/2R80L5cFtHvma3AH/vLrrw4IgYmZNralw4/KBVEqE8AyvCazM90arQ+POuV +7LXTWtiBmelDGDfrs7vRWGJB82bSj6p4lVQgw1oudCvV0b4YacCs1aTPObpRhANl +6WLAYv7YTVWW4tAR+kg0Eeye7QUd5MjWHYbL +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R3 O=Google Trust Services LLC +# Subject: CN=GTS Root R3 O=Google Trust Services LLC +# Label: "GTS Root R3" +# Serial: 159662495401136852707857743206 +# MD5 Fingerprint: 3e:e7:9d:58:02:94:46:51:94:e5:e0:22:4a:8b:e7:73 +# SHA1 Fingerprint: ed:e5:71:80:2b:c8:92:b9:5b:83:3c:d2:32:68:3f:09:cd:a0:1e:46 +# SHA256 Fingerprint: 34:d8:a7:3e:e2:08:d9:bc:db:0d:95:65:20:93:4b:4e:40:e6:94:82:59:6e:8b:6f:73:c8:42:6b:01:0a:6f:48 +-----BEGIN CERTIFICATE----- +MIICCTCCAY6gAwIBAgINAgPluILrIPglJ209ZjAKBggqhkjOPQQDAzBHMQswCQYD +VQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEUMBIG +A1UEAxMLR1RTIFJvb3QgUjMwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAwMDAw +WjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2Vz +IExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjMwdjAQBgcqhkjOPQIBBgUrgQQAIgNi +AAQfTzOHMymKoYTey8chWEGJ6ladK0uFxh1MJ7x/JlFyb+Kf1qPKzEUURout736G +jOyxfi//qXGdGIRFBEFVbivqJn+7kAHjSxm65FSWRQmx1WyRRK2EE46ajA2ADDL2 +4CejQjBAMA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQW +BBTB8Sa6oC2uhYHP0/EqEr24Cmf9vDAKBggqhkjOPQQDAwNpADBmAjEA9uEglRR7 +VKOQFhG/hMjqb2sXnh5GmCCbn9MN2azTL818+FsuVbu/3ZL3pAzcMeGiAjEA/Jdm +ZuVDFhOD3cffL74UOO0BzrEXGhF16b0DjyZ+hOXJYKaV11RZt+cRLInUue4X +-----END CERTIFICATE----- + +# Issuer: CN=GTS Root R4 O=Google Trust Services LLC +# Subject: CN=GTS Root R4 O=Google Trust Services LLC +# Label: "GTS Root R4" +# Serial: 159662532700760215368942768210 +# MD5 Fingerprint: 43:96:83:77:19:4d:76:b3:9d:65:52:e4:1d:22:a5:e8 +# SHA1 Fingerprint: 77:d3:03:67:b5:e0:0c:15:f6:0c:38:61:df:7c:e1:3b:92:46:4d:47 +# SHA256 Fingerprint: 34:9d:fa:40:58:c5:e2:63:12:3b:39:8a:e7:95:57:3c:4e:13:13:c8:3f:e6:8f:93:55:6c:d5:e8:03:1b:3c:7d +-----BEGIN CERTIFICATE----- +MIICCTCCAY6gAwIBAgINAgPlwGjvYxqccpBQUjAKBggqhkjOPQQDAzBHMQswCQYD +VQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2VzIExMQzEUMBIG +A1UEAxMLR1RTIFJvb3QgUjQwHhcNMTYwNjIyMDAwMDAwWhcNMzYwNjIyMDAwMDAw +WjBHMQswCQYDVQQGEwJVUzEiMCAGA1UEChMZR29vZ2xlIFRydXN0IFNlcnZpY2Vz +IExMQzEUMBIGA1UEAxMLR1RTIFJvb3QgUjQwdjAQBgcqhkjOPQIBBgUrgQQAIgNi +AATzdHOnaItgrkO4NcWBMHtLSZ37wWHO5t5GvWvVYRg1rkDdc/eJkTBa6zzuhXyi +QHY7qca4R9gq55KRanPpsXI5nymfopjTX15YhmUPoYRlBtHci8nHc8iMai/lxKvR +HYqjQjBAMA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQW +BBSATNbrdP9JNqPV2Py1PsVq8JQdjDAKBggqhkjOPQQDAwNpADBmAjEA6ED/g94D +9J+uHXqnLrmvT/aDHQ4thQEd0dlq7A/Cr8deVl5c1RxYIigL9zC2L7F8AjEA8GE8 +p/SgguMh1YQdc4acLa/KNJvxn7kjNuK8YAOdgLOaVsjh4rsUecrNIdSUtUlD +-----END CERTIFICATE----- + +# Issuer: CN=Telia Root CA v2 O=Telia Finland Oyj +# Subject: CN=Telia Root CA v2 O=Telia Finland Oyj +# Label: "Telia Root CA v2" +# Serial: 7288924052977061235122729490515358 +# MD5 Fingerprint: 0e:8f:ac:aa:82:df:85:b1:f4:dc:10:1c:fc:99:d9:48 +# SHA1 Fingerprint: b9:99:cd:d1:73:50:8a:c4:47:05:08:9c:8c:88:fb:be:a0:2b:40:cd +# SHA256 Fingerprint: 24:2b:69:74:2f:cb:1e:5b:2a:bf:98:89:8b:94:57:21:87:54:4e:5b:4d:99:11:78:65:73:62:1f:6a:74:b8:2c +-----BEGIN CERTIFICATE----- +MIIFdDCCA1ygAwIBAgIPAWdfJ9b+euPkrL4JWwWeMA0GCSqGSIb3DQEBCwUAMEQx +CzAJBgNVBAYTAkZJMRowGAYDVQQKDBFUZWxpYSBGaW5sYW5kIE95ajEZMBcGA1UE +AwwQVGVsaWEgUm9vdCBDQSB2MjAeFw0xODExMjkxMTU1NTRaFw00MzExMjkxMTU1 +NTRaMEQxCzAJBgNVBAYTAkZJMRowGAYDVQQKDBFUZWxpYSBGaW5sYW5kIE95ajEZ +MBcGA1UEAwwQVGVsaWEgUm9vdCBDQSB2MjCCAiIwDQYJKoZIhvcNAQEBBQADggIP +ADCCAgoCggIBALLQPwe84nvQa5n44ndp586dpAO8gm2h/oFlH0wnrI4AuhZ76zBq +AMCzdGh+sq/H1WKzej9Qyow2RCRj0jbpDIX2Q3bVTKFgcmfiKDOlyzG4OiIjNLh9 +vVYiQJ3q9HsDrWj8soFPmNB06o3lfc1jw6P23pLCWBnglrvFxKk9pXSW/q/5iaq9 +lRdU2HhE8Qx3FZLgmEKnpNaqIJLNwaCzlrI6hEKNfdWV5Nbb6WLEWLN5xYzTNTOD +n3WhUidhOPFZPY5Q4L15POdslv5e2QJltI5c0BE0312/UqeBAMN/mUWZFdUXyApT +7GPzmX3MaRKGwhfwAZ6/hLzRUssbkmbOpFPlob/E2wnW5olWK8jjfN7j/4nlNW4o +6GwLI1GpJQXrSPjdscr6bAhR77cYbETKJuFzxokGgeWKrLDiKca5JLNrRBH0pUPC +TEPlcDaMtjNXepUugqD0XBCzYYP2AgWGLnwtbNwDRm41k9V6lS/eINhbfpSQBGq6 +WT0EBXWdN6IOLj3rwaRSg/7Qa9RmjtzG6RJOHSpXqhC8fF6CfaamyfItufUXJ63R +DolUK5X6wK0dmBR4M0KGCqlztft0DbcbMBnEWg4cJ7faGND/isgFuvGqHKI3t+ZI +pEYslOqodmJHixBTB0hXbOKSTbauBcvcwUpej6w9GU7C7WB1K9vBykLVAgMBAAGj +YzBhMB8GA1UdIwQYMBaAFHKs5DN5qkWH9v2sHZ7Wxy+G2CQ5MB0GA1UdDgQWBBRy +rOQzeapFh/b9rB2e1scvhtgkOTAOBgNVHQ8BAf8EBAMCAQYwDwYDVR0TAQH/BAUw +AwEB/zANBgkqhkiG9w0BAQsFAAOCAgEAoDtZpwmUPjaE0n4vOaWWl/oRrfxn83EJ +8rKJhGdEr7nv7ZbsnGTbMjBvZ5qsfl+yqwE2foH65IRe0qw24GtixX1LDoJt0nZi +0f6X+J8wfBj5tFJ3gh1229MdqfDBmgC9bXXYfef6xzijnHDoRnkDry5023X4blMM +A8iZGok1GTzTyVR8qPAs5m4HeW9q4ebqkYJpCh3DflminmtGFZhb069GHWLIzoBS +SRE/yQQSwxN8PzuKlts8oB4KtItUsiRnDe+Cy748fdHif64W1lZYudogsYMVoe+K +TTJvQS8TUoKU1xrBeKJR3Stwbbca+few4GeXVtt8YVMJAygCQMez2P2ccGrGKMOF +6eLtGpOg3kuYooQ+BXcBlj37tCAPnHICehIv1aO6UXivKitEZU61/Qrowc15h2Er +3oBXRb9n8ZuRXqWk7FlIEA04x7D6w0RtBPV4UBySllva9bguulvP5fBqnUsvWHMt +Ty3EHD70sz+rFQ47GUGKpMFXEmZxTPpT41frYpUJnlTd0cI8Vzy9OK2YZLe4A5pT +VmBds9hCG1xLEooc6+t9xnppxyd/pPiL8uSUZodL6ZQHCRJ5irLrdATczvREWeAW +ysUsWNc8e89ihmpQfTU2Zqf7N+cox9jQraVplI/owd8k+BsHMYeB2F326CjYSlKA +rBPuUBQemMc= +-----END CERTIFICATE----- + +# Issuer: CN=D-TRUST BR Root CA 1 2020 O=D-Trust GmbH +# Subject: CN=D-TRUST BR Root CA 1 2020 O=D-Trust GmbH +# Label: "D-TRUST BR Root CA 1 2020" +# Serial: 165870826978392376648679885835942448534 +# MD5 Fingerprint: b5:aa:4b:d5:ed:f7:e3:55:2e:8f:72:0a:f3:75:b8:ed +# SHA1 Fingerprint: 1f:5b:98:f0:e3:b5:f7:74:3c:ed:e6:b0:36:7d:32:cd:f4:09:41:67 +# SHA256 Fingerprint: e5:9a:aa:81:60:09:c2:2b:ff:5b:25:ba:d3:7d:f3:06:f0:49:79:7c:1f:81:d8:5a:b0:89:e6:57:bd:8f:00:44 +-----BEGIN CERTIFICATE----- +MIIC2zCCAmCgAwIBAgIQfMmPK4TX3+oPyWWa00tNljAKBggqhkjOPQQDAzBIMQsw +CQYDVQQGEwJERTEVMBMGA1UEChMMRC1UcnVzdCBHbWJIMSIwIAYDVQQDExlELVRS +VVNUIEJSIFJvb3QgQ0EgMSAyMDIwMB4XDTIwMDIxMTA5NDUwMFoXDTM1MDIxMTA5 +NDQ1OVowSDELMAkGA1UEBhMCREUxFTATBgNVBAoTDEQtVHJ1c3QgR21iSDEiMCAG +A1UEAxMZRC1UUlVTVCBCUiBSb290IENBIDEgMjAyMDB2MBAGByqGSM49AgEGBSuB +BAAiA2IABMbLxyjR+4T1mu9CFCDhQ2tuda38KwOE1HaTJddZO0Flax7mNCq7dPYS +zuht56vkPE4/RAiLzRZxy7+SmfSk1zxQVFKQhYN4lGdnoxwJGT11NIXe7WB9xwy0 +QVK5buXuQqOCAQ0wggEJMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFHOREKv/ +VbNafAkl1bK6CKBrqx9tMA4GA1UdDwEB/wQEAwIBBjCBxgYDVR0fBIG+MIG7MD6g +PKA6hjhodHRwOi8vY3JsLmQtdHJ1c3QubmV0L2NybC9kLXRydXN0X2JyX3Jvb3Rf +Y2FfMV8yMDIwLmNybDB5oHegdYZzbGRhcDovL2RpcmVjdG9yeS5kLXRydXN0Lm5l +dC9DTj1ELVRSVVNUJTIwQlIlMjBSb290JTIwQ0ElMjAxJTIwMjAyMCxPPUQtVHJ1 +c3QlMjBHbWJILEM9REU/Y2VydGlmaWNhdGVyZXZvY2F0aW9ubGlzdDAKBggqhkjO +PQQDAwNpADBmAjEAlJAtE/rhY/hhY+ithXhUkZy4kzg+GkHaQBZTQgjKL47xPoFW +wKrY7RjEsK70PvomAjEA8yjixtsrmfu3Ubgko6SUeho/5jbiA1czijDLgsfWFBHV +dWNbFJWcHwHP2NVypw87 +-----END CERTIFICATE----- + +# Issuer: CN=D-TRUST EV Root CA 1 2020 O=D-Trust GmbH +# Subject: CN=D-TRUST EV Root CA 1 2020 O=D-Trust GmbH +# Label: "D-TRUST EV Root CA 1 2020" +# Serial: 126288379621884218666039612629459926992 +# MD5 Fingerprint: 8c:2d:9d:70:9f:48:99:11:06:11:fb:e9:cb:30:c0:6e +# SHA1 Fingerprint: 61:db:8c:21:59:69:03:90:d8:7c:9c:12:86:54:cf:9d:3d:f4:dd:07 +# SHA256 Fingerprint: 08:17:0d:1a:a3:64:53:90:1a:2f:95:92:45:e3:47:db:0c:8d:37:ab:aa:bc:56:b8:1a:a1:00:dc:95:89:70:db +-----BEGIN CERTIFICATE----- +MIIC2zCCAmCgAwIBAgIQXwJB13qHfEwDo6yWjfv/0DAKBggqhkjOPQQDAzBIMQsw +CQYDVQQGEwJERTEVMBMGA1UEChMMRC1UcnVzdCBHbWJIMSIwIAYDVQQDExlELVRS +VVNUIEVWIFJvb3QgQ0EgMSAyMDIwMB4XDTIwMDIxMTEwMDAwMFoXDTM1MDIxMTA5 +NTk1OVowSDELMAkGA1UEBhMCREUxFTATBgNVBAoTDEQtVHJ1c3QgR21iSDEiMCAG +A1UEAxMZRC1UUlVTVCBFViBSb290IENBIDEgMjAyMDB2MBAGByqGSM49AgEGBSuB +BAAiA2IABPEL3YZDIBnfl4XoIkqbz52Yv7QFJsnL46bSj8WeeHsxiamJrSc8ZRCC +/N/DnU7wMyPE0jL1HLDfMxddxfCxivnvubcUyilKwg+pf3VlSSowZ/Rk99Yad9rD +wpdhQntJraOCAQ0wggEJMA8GA1UdEwEB/wQFMAMBAf8wHQYDVR0OBBYEFH8QARY3 +OqQo5FD4pPfsazK2/umLMA4GA1UdDwEB/wQEAwIBBjCBxgYDVR0fBIG+MIG7MD6g +PKA6hjhodHRwOi8vY3JsLmQtdHJ1c3QubmV0L2NybC9kLXRydXN0X2V2X3Jvb3Rf +Y2FfMV8yMDIwLmNybDB5oHegdYZzbGRhcDovL2RpcmVjdG9yeS5kLXRydXN0Lm5l +dC9DTj1ELVRSVVNUJTIwRVYlMjBSb290JTIwQ0ElMjAxJTIwMjAyMCxPPUQtVHJ1 +c3QlMjBHbWJILEM9REU/Y2VydGlmaWNhdGVyZXZvY2F0aW9ubGlzdDAKBggqhkjO +PQQDAwNpADBmAjEAyjzGKnXCXnViOTYAYFqLwZOZzNnbQTs7h5kXO9XMT8oi96CA +y/m0sRtW9XLS/BnRAjEAkfcwkz8QRitxpNA7RJvAKQIFskF3UfN5Wp6OFKBOQtJb +gfM0agPnIjhQW+0ZT0MW +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert TLS ECC P384 Root G5 O=DigiCert, Inc. +# Subject: CN=DigiCert TLS ECC P384 Root G5 O=DigiCert, Inc. +# Label: "DigiCert TLS ECC P384 Root G5" +# Serial: 13129116028163249804115411775095713523 +# MD5 Fingerprint: d3:71:04:6a:43:1c:db:a6:59:e1:a8:a3:aa:c5:71:ed +# SHA1 Fingerprint: 17:f3:de:5e:9f:0f:19:e9:8e:f6:1f:32:26:6e:20:c4:07:ae:30:ee +# SHA256 Fingerprint: 01:8e:13:f0:77:25:32:cf:80:9b:d1:b1:72:81:86:72:83:fc:48:c6:e1:3b:e9:c6:98:12:85:4a:49:0c:1b:05 +-----BEGIN CERTIFICATE----- +MIICGTCCAZ+gAwIBAgIQCeCTZaz32ci5PhwLBCou8zAKBggqhkjOPQQDAzBOMQsw +CQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xJjAkBgNVBAMTHURp +Z2lDZXJ0IFRMUyBFQ0MgUDM4NCBSb290IEc1MB4XDTIxMDExNTAwMDAwMFoXDTQ2 +MDExNDIzNTk1OVowTjELMAkGA1UEBhMCVVMxFzAVBgNVBAoTDkRpZ2lDZXJ0LCBJ +bmMuMSYwJAYDVQQDEx1EaWdpQ2VydCBUTFMgRUNDIFAzODQgUm9vdCBHNTB2MBAG +ByqGSM49AgEGBSuBBAAiA2IABMFEoc8Rl1Ca3iOCNQfN0MsYndLxf3c1TzvdlHJS +7cI7+Oz6e2tYIOyZrsn8aLN1udsJ7MgT9U7GCh1mMEy7H0cKPGEQQil8pQgO4CLp +0zVozptjn4S1mU1YoI71VOeVyaNCMEAwHQYDVR0OBBYEFMFRRVBZqz7nLFr6ICIS +B4CIfBFqMA4GA1UdDwEB/wQEAwIBhjAPBgNVHRMBAf8EBTADAQH/MAoGCCqGSM49 +BAMDA2gAMGUCMQCJao1H5+z8blUD2WdsJk6Dxv3J+ysTvLd6jLRl0mlpYxNjOyZQ +LgGheQaRnUi/wr4CMEfDFXuxoJGZSZOoPHzoRgaLLPIxAJSdYsiJvRmEFOml+wG4 +DXZDjC5Ty3zfDBeWUA== +-----END CERTIFICATE----- + +# Issuer: CN=DigiCert TLS RSA4096 Root G5 O=DigiCert, Inc. +# Subject: CN=DigiCert TLS RSA4096 Root G5 O=DigiCert, Inc. +# Label: "DigiCert TLS RSA4096 Root G5" +# Serial: 11930366277458970227240571539258396554 +# MD5 Fingerprint: ac:fe:f7:34:96:a9:f2:b3:b4:12:4b:e4:27:41:6f:e1 +# SHA1 Fingerprint: a7:88:49:dc:5d:7c:75:8c:8c:de:39:98:56:b3:aa:d0:b2:a5:71:35 +# SHA256 Fingerprint: 37:1a:00:dc:05:33:b3:72:1a:7e:eb:40:e8:41:9e:70:79:9d:2b:0a:0f:2c:1d:80:69:31:65:f7:ce:c4:ad:75 +-----BEGIN CERTIFICATE----- +MIIFZjCCA06gAwIBAgIQCPm0eKj6ftpqMzeJ3nzPijANBgkqhkiG9w0BAQwFADBN +MQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQsIEluYy4xJTAjBgNVBAMT +HERpZ2lDZXJ0IFRMUyBSU0E0MDk2IFJvb3QgRzUwHhcNMjEwMTE1MDAwMDAwWhcN +NDYwMTE0MjM1OTU5WjBNMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNlcnQs +IEluYy4xJTAjBgNVBAMTHERpZ2lDZXJ0IFRMUyBSU0E0MDk2IFJvb3QgRzUwggIi +MA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCz0PTJeRGd/fxmgefM1eS87IE+ +ajWOLrfn3q/5B03PMJ3qCQuZvWxX2hhKuHisOjmopkisLnLlvevxGs3npAOpPxG0 +2C+JFvuUAT27L/gTBaF4HI4o4EXgg/RZG5Wzrn4DReW+wkL+7vI8toUTmDKdFqgp +wgscONyfMXdcvyej/Cestyu9dJsXLfKB2l2w4SMXPohKEiPQ6s+d3gMXsUJKoBZM +pG2T6T867jp8nVid9E6P/DsjyG244gXazOvswzH016cpVIDPRFtMbzCe88zdH5RD +nU1/cHAN1DrRN/BsnZvAFJNY781BOHW8EwOVfH/jXOnVDdXifBBiqmvwPXbzP6Po +sMH976pXTayGpxi0KcEsDr9kvimM2AItzVwv8n/vFfQMFawKsPHTDU9qTXeXAaDx +Zre3zu/O7Oyldcqs4+Fj97ihBMi8ez9dLRYiVu1ISf6nL3kwJZu6ay0/nTvEF+cd +Lvvyz6b84xQslpghjLSR6Rlgg/IwKwZzUNWYOwbpx4oMYIwo+FKbbuH2TbsGJJvX +KyY//SovcfXWJL5/MZ4PbeiPT02jP/816t9JXkGPhvnxd3lLG7SjXi/7RgLQZhNe +XoVPzthwiHvOAbWWl9fNff2C+MIkwcoBOU+NosEUQB+cZtUMCUbW8tDRSHZWOkPL +tgoRObqME2wGtZ7P6wIDAQABo0IwQDAdBgNVHQ4EFgQUUTMc7TZArxfTJc1paPKv +TiM+s0EwDgYDVR0PAQH/BAQDAgGGMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcN +AQEMBQADggIBAGCmr1tfV9qJ20tQqcQjNSH/0GEwhJG3PxDPJY7Jv0Y02cEhJhxw +GXIeo8mH/qlDZJY6yFMECrZBu8RHANmfGBg7sg7zNOok992vIGCukihfNudd5N7H +PNtQOa27PShNlnx2xlv0wdsUpasZYgcYQF+Xkdycx6u1UQ3maVNVzDl92sURVXLF +O4uJ+DQtpBflF+aZfTCIITfNMBc9uPK8qHWgQ9w+iUuQrm0D4ByjoJYJu32jtyoQ +REtGBzRj7TG5BO6jm5qu5jF49OokYTurWGT/u4cnYiWB39yhL/btp/96j1EuMPik +AdKFOV8BmZZvWltwGUb+hmA+rYAQCd05JS9Yf7vSdPD3Rh9GOUrYU9DzLjtxpdRv +/PNn5AeP3SYZ4Y1b+qOTEZvpyDrDVWiakuFSdjjo4bq9+0/V77PnSIMx8IIh47a+ +p6tv75/fTM8BuGJqIz3nCU2AG3swpMPdB380vqQmsvZB6Akd4yCYqjdP//fx4ilw +MUc/dNAUFvohigLVigmUdy7yWSiLfFCSCmZ4OIN1xLVaqBHG5cGdZlXPU8Sv13WF +qUITVuwhd4GTWgzqltlJyqEI8pc7bZsEGCREjnwB8twl2F6GmrE52/WRMmrRpnCK +ovfepEWFJqgejF0pW8hL2JpqA15w8oVPbEtoL8pU9ozaMv7Da4M/OMZ+ +-----END CERTIFICATE----- + +# Issuer: CN=Certainly Root R1 O=Certainly +# Subject: CN=Certainly Root R1 O=Certainly +# Label: "Certainly Root R1" +# Serial: 188833316161142517227353805653483829216 +# MD5 Fingerprint: 07:70:d4:3e:82:87:a0:fa:33:36:13:f4:fa:33:e7:12 +# SHA1 Fingerprint: a0:50:ee:0f:28:71:f4:27:b2:12:6d:6f:50:96:25:ba:cc:86:42:af +# SHA256 Fingerprint: 77:b8:2c:d8:64:4c:43:05:f7:ac:c5:cb:15:6b:45:67:50:04:03:3d:51:c6:0c:62:02:a8:e0:c3:34:67:d3:a0 +-----BEGIN CERTIFICATE----- +MIIFRzCCAy+gAwIBAgIRAI4P+UuQcWhlM1T01EQ5t+AwDQYJKoZIhvcNAQELBQAw +PTELMAkGA1UEBhMCVVMxEjAQBgNVBAoTCUNlcnRhaW5seTEaMBgGA1UEAxMRQ2Vy +dGFpbmx5IFJvb3QgUjEwHhcNMjEwNDAxMDAwMDAwWhcNNDYwNDAxMDAwMDAwWjA9 +MQswCQYDVQQGEwJVUzESMBAGA1UEChMJQ2VydGFpbmx5MRowGAYDVQQDExFDZXJ0 +YWlubHkgUm9vdCBSMTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBANA2 +1B/q3avk0bbm+yLA3RMNansiExyXPGhjZjKcA7WNpIGD2ngwEc/csiu+kr+O5MQT +vqRoTNoCaBZ0vrLdBORrKt03H2As2/X3oXyVtwxwhi7xOu9S98zTm/mLvg7fMbed +aFySpvXl8wo0tf97ouSHocavFwDvA5HtqRxOcT3Si2yJ9HiG5mpJoM610rCrm/b0 +1C7jcvk2xusVtyWMOvwlDbMicyF0yEqWYZL1LwsYpfSt4u5BvQF5+paMjRcCMLT5 +r3gajLQ2EBAHBXDQ9DGQilHFhiZ5shGIXsXwClTNSaa/ApzSRKft43jvRl5tcdF5 +cBxGX1HpyTfcX35pe0HfNEXgO4T0oYoKNp43zGJS4YkNKPl6I7ENPT2a/Z2B7yyQ +wHtETrtJ4A5KVpK8y7XdeReJkd5hiXSSqOMyhb5OhaRLWcsrxXiOcVTQAjeZjOVJ +6uBUcqQRBi8LjMFbvrWhsFNunLhgkR9Za/kt9JQKl7XsxXYDVBtlUrpMklZRNaBA +2CnbrlJ2Oy0wQJuK0EJWtLeIAaSHO1OWzaMWj/Nmqhexx2DgwUMFDO6bW2BvBlyH +Wyf5QBGenDPBt+U1VwV/J84XIIwc/PH72jEpSe31C4SnT8H2TsIonPru4K8H+zMR +eiFPCyEQtkA6qyI6BJyLm4SGcprSp6XEtHWRqSsjAgMBAAGjQjBAMA4GA1UdDwEB +/wQEAwIBBjAPBgNVHRMBAf8EBTADAQH/MB0GA1UdDgQWBBTgqj8ljZ9EXME66C6u +d0yEPmcM9DANBgkqhkiG9w0BAQsFAAOCAgEAuVevuBLaV4OPaAszHQNTVfSVcOQr +PbA56/qJYv331hgELyE03fFo8NWWWt7CgKPBjcZq91l3rhVkz1t5BXdm6ozTaw3d +8VkswTOlMIAVRQdFGjEitpIAq5lNOo93r6kiyi9jyhXWx8bwPWz8HA2YEGGeEaIi +1wrykXprOQ4vMMM2SZ/g6Q8CRFA3lFV96p/2O7qUpUzpvD5RtOjKkjZUbVwlKNrd +rRT90+7iIgXr0PK3aBLXWopBGsaSpVo7Y0VPv+E6dyIvXL9G+VoDhRNCX8reU9di +taY1BMJH/5n9hN9czulegChB8n3nHpDYT3Y+gjwN/KUD+nsa2UUeYNrEjvn8K8l7 +lcUq/6qJ34IxD3L/DCfXCh5WAFAeDJDBlrXYFIW7pw0WwfgHJBu6haEaBQmAupVj +yTrsJZ9/nbqkRxWbRHDxakvWOF5D8xh+UG7pWijmZeZ3Gzr9Hb4DJqPb1OG7fpYn +Kx3upPvaJVQTA945xsMfTZDsjxtK0hzthZU4UHlG1sGQUDGpXJpuHfUzVounmdLy +yCwzk5Iwx06MZTMQZBf9JBeW0Y3COmor6xOLRPIh80oat3df1+2IpHLlOR+Vnb5n +wXARPbv0+Em34yaXOp/SX3z7wJl8OSngex2/DaeP0ik0biQVy96QXr8axGbqwua6 +OV+KmalBWQewLK8= +-----END CERTIFICATE----- + +# Issuer: CN=Certainly Root E1 O=Certainly +# Subject: CN=Certainly Root E1 O=Certainly +# Label: "Certainly Root E1" +# Serial: 8168531406727139161245376702891150584 +# MD5 Fingerprint: 0a:9e:ca:cd:3e:52:50:c6:36:f3:4b:a3:ed:a7:53:e9 +# SHA1 Fingerprint: f9:e1:6d:dc:01:89:cf:d5:82:45:63:3e:c5:37:7d:c2:eb:93:6f:2b +# SHA256 Fingerprint: b4:58:5f:22:e4:ac:75:6a:4e:86:12:a1:36:1c:5d:9d:03:1a:93:fd:84:fe:bb:77:8f:a3:06:8b:0f:c4:2d:c2 +-----BEGIN CERTIFICATE----- +MIIB9zCCAX2gAwIBAgIQBiUzsUcDMydc+Y2aub/M+DAKBggqhkjOPQQDAzA9MQsw +CQYDVQQGEwJVUzESMBAGA1UEChMJQ2VydGFpbmx5MRowGAYDVQQDExFDZXJ0YWlu +bHkgUm9vdCBFMTAeFw0yMTA0MDEwMDAwMDBaFw00NjA0MDEwMDAwMDBaMD0xCzAJ +BgNVBAYTAlVTMRIwEAYDVQQKEwlDZXJ0YWlubHkxGjAYBgNVBAMTEUNlcnRhaW5s +eSBSb290IEUxMHYwEAYHKoZIzj0CAQYFK4EEACIDYgAE3m/4fxzf7flHh4axpMCK ++IKXgOqPyEpeKn2IaKcBYhSRJHpcnqMXfYqGITQYUBsQ3tA3SybHGWCA6TS9YBk2 +QNYphwk8kXr2vBMj3VlOBF7PyAIcGFPBMdjaIOlEjeR2o0IwQDAOBgNVHQ8BAf8E +BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAdBgNVHQ4EFgQU8ygYy2R17ikq6+2uI1g4 +hevIIgcwCgYIKoZIzj0EAwMDaAAwZQIxALGOWiDDshliTd6wT99u0nCK8Z9+aozm +ut6Dacpps6kFtZaSF4fC0urQe87YQVt8rgIwRt7qy12a7DLCZRawTDBcMPPaTnOG +BtjOiQRINzf43TNRnXCve1XYAS59BWQOhriR +-----END CERTIFICATE----- + +# Issuer: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Subject: CN=E-Tugra Global Root CA RSA v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Label: "E-Tugra Global Root CA RSA v3" +# Serial: 75951268308633135324246244059508261641472512052 +# MD5 Fingerprint: 22:be:10:f6:c2:f8:03:88:73:5f:33:29:47:28:47:a4 +# SHA1 Fingerprint: e9:a8:5d:22:14:52:1c:5b:aa:0a:b4:be:24:6a:23:8a:c9:ba:e2:a9 +# SHA256 Fingerprint: ef:66:b0:b1:0a:3c:db:9f:2e:36:48:c7:6b:d2:af:18:ea:d2:bf:e6:f1:17:65:5e:28:c4:06:0d:a1:a3:f4:c2 +-----BEGIN CERTIFICATE----- +MIIF8zCCA9ugAwIBAgIUDU3FzRYilZYIfrgLfxUGNPt5EDQwDQYJKoZIhvcNAQEL +BQAwgYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUt +VHVncmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYw +JAYDVQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIFJTQSB2MzAeFw0yMDAzMTgw +OTA3MTdaFw00NTAzMTIwOTA3MTdaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMG +QW5rYXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1 +Z3JhIFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBD +QSBSU0EgdjMwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQCiZvCJt3J7 +7gnJY9LTQ91ew6aEOErxjYG7FL1H6EAX8z3DeEVypi6Q3po61CBxyryfHUuXCscx +uj7X/iWpKo429NEvx7epXTPcMHD4QGxLsqYxYdE0PD0xesevxKenhOGXpOhL9hd8 +7jwH7eKKV9y2+/hDJVDqJ4GohryPUkqWOmAalrv9c/SF/YP9f4RtNGx/ardLAQO/ +rWm31zLZ9Vdq6YaCPqVmMbMWPcLzJmAy01IesGykNz709a/r4d+ABs8qQedmCeFL +l+d3vSFtKbZnwy1+7dZ5ZdHPOrbRsV5WYVB6Ws5OUDGAA5hH5+QYfERaxqSzO8bG +wzrwbMOLyKSRBfP12baqBqG3q+Sx6iEUXIOk/P+2UNOMEiaZdnDpwA+mdPy70Bt4 +znKS4iicvObpCdg604nmvi533wEKb5b25Y08TVJ2Glbhc34XrD2tbKNSEhhw5oBO +M/J+JjKsBY04pOZ2PJ8QaQ5tndLBeSBrW88zjdGUdjXnXVXHt6woq0bM5zshtQoK +5EpZ3IE1S0SVEgpnpaH/WwAH0sDM+T/8nzPyAPiMbIedBi3x7+PmBvrFZhNb/FAH +nnGGstpvdDDPk1Po3CLW3iAfYY2jLqN4MpBs3KwytQXk9TwzDdbgh3cXTJ2w2Amo +DVf3RIXwyAS+XF1a4xeOVGNpf0l0ZAWMowIDAQABo2MwYTAPBgNVHRMBAf8EBTAD +AQH/MB8GA1UdIwQYMBaAFLK0ruYt9ybVqnUtdkvAG1Mh0EjvMB0GA1UdDgQWBBSy +tK7mLfcm1ap1LXZLwBtTIdBI7zAOBgNVHQ8BAf8EBAMCAQYwDQYJKoZIhvcNAQEL +BQADggIBAImocn+M684uGMQQgC0QDP/7FM0E4BQ8Tpr7nym/Ip5XuYJzEmMmtcyQ +6dIqKe6cLcwsmb5FJ+Sxce3kOJUxQfJ9emN438o2Fi+CiJ+8EUdPdk3ILY7r3y18 +Tjvarvbj2l0Upq7ohUSdBm6O++96SmotKygY/r+QLHUWnw/qln0F7psTpURs+APQ +3SPh/QMSEgj0GDSz4DcLdxEBSL9htLX4GdnLTeqjjO/98Aa1bZL0SmFQhO3sSdPk +vmjmLuMxC1QLGpLWgti2omU8ZgT5Vdps+9u1FGZNlIM7zR6mK7L+d0CGq+ffCsn9 +9t2HVhjYsCxVYJb6CH5SkPVLpi6HfMsg2wY+oF0Dd32iPBMbKaITVaA9FCKvb7jQ +mhty3QUBjYZgv6Rn7rWlDdF/5horYmbDB7rnoEgcOMPpRfunf/ztAmgayncSd6YA +VSgU7NbHEqIbZULpkejLPoeJVF3Zr52XnGnnCv8PWniLYypMfUeUP95L6VPQMPHF +9p5J3zugkaOj/s1YzOrfr28oO6Bpm4/srK4rVJ2bBLFHIK+WEj5jlB0E5y67hscM +moi/dkfv97ALl2bSRM9gUgfh1SxKOidhd8rXj+eHDjD/DLsE4mHDosiXYY60MGo8 +bcIHX0pzLz/5FooBZu+6kcpSV3uu1OYP3Qt6f4ueJiDPO++BcYNZ +-----END CERTIFICATE----- + +# Issuer: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Subject: CN=E-Tugra Global Root CA ECC v3 O=E-Tugra EBG A.S. OU=E-Tugra Trust Center +# Label: "E-Tugra Global Root CA ECC v3" +# Serial: 218504919822255052842371958738296604628416471745 +# MD5 Fingerprint: 46:bc:81:bb:f1:b5:1e:f7:4b:96:bc:14:e2:e7:27:64 +# SHA1 Fingerprint: 8a:2f:af:57:53:b1:b0:e6:a1:04:ec:5b:6a:69:71:6d:f6:1c:e2:84 +# SHA256 Fingerprint: 87:3f:46:85:fa:7f:56:36:25:25:2e:6d:36:bc:d7:f1:6f:c2:49:51:f2:64:e4:7e:1b:95:4f:49:08:cd:ca:13 +-----BEGIN CERTIFICATE----- +MIICpTCCAiqgAwIBAgIUJkYZdzHhT28oNt45UYbm1JeIIsEwCgYIKoZIzj0EAwMw +gYAxCzAJBgNVBAYTAlRSMQ8wDQYDVQQHEwZBbmthcmExGTAXBgNVBAoTEEUtVHVn +cmEgRUJHIEEuUy4xHTAbBgNVBAsTFEUtVHVncmEgVHJ1c3QgQ2VudGVyMSYwJAYD +VQQDEx1FLVR1Z3JhIEdsb2JhbCBSb290IENBIEVDQyB2MzAeFw0yMDAzMTgwOTQ2 +NThaFw00NTAzMTIwOTQ2NThaMIGAMQswCQYDVQQGEwJUUjEPMA0GA1UEBxMGQW5r +YXJhMRkwFwYDVQQKExBFLVR1Z3JhIEVCRyBBLlMuMR0wGwYDVQQLExRFLVR1Z3Jh +IFRydXN0IENlbnRlcjEmMCQGA1UEAxMdRS1UdWdyYSBHbG9iYWwgUm9vdCBDQSBF +Q0MgdjMwdjAQBgcqhkjOPQIBBgUrgQQAIgNiAASOmCm/xxAeJ9urA8woLNheSBkQ +KczLWYHMjLiSF4mDKpL2w6QdTGLVn9agRtwcvHbB40fQWxPa56WzZkjnIZpKT4YK +fWzqTTKACrJ6CZtpS5iB4i7sAnCWH/31Rs7K3IKjYzBhMA8GA1UdEwEB/wQFMAMB +Af8wHwYDVR0jBBgwFoAU/4Ixcj75xGZsrTie0bBRiKWQzPUwHQYDVR0OBBYEFP+C +MXI++cRmbK04ntGwUYilkMz1MA4GA1UdDwEB/wQEAwIBBjAKBggqhkjOPQQDAwNp +ADBmAjEA5gVYaWHlLcoNy/EZCL3W/VGSGn5jVASQkZo1kTmZ+gepZpO6yGjUij/6 +7W4WAie3AjEA3VoXK3YdZUKWpqxdinlW2Iob35reX8dQj7FbcQwm32pAAOwzkSFx +vmjkI6TZraE3 +-----END CERTIFICATE----- + +# Issuer: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD. +# Subject: CN=Security Communication RootCA3 O=SECOM Trust Systems CO.,LTD. +# Label: "Security Communication RootCA3" +# Serial: 16247922307909811815 +# MD5 Fingerprint: 1c:9a:16:ff:9e:5c:e0:4d:8a:14:01:f4:35:5d:29:26 +# SHA1 Fingerprint: c3:03:c8:22:74:92:e5:61:a2:9c:5f:79:91:2b:1e:44:13:91:30:3a +# SHA256 Fingerprint: 24:a5:5c:2a:b0:51:44:2d:06:17:76:65:41:23:9a:4a:d0:32:d7:c5:51:75:aa:34:ff:de:2f:bc:4f:5c:52:94 +-----BEGIN CERTIFICATE----- +MIIFfzCCA2egAwIBAgIJAOF8N0D9G/5nMA0GCSqGSIb3DQEBDAUAMF0xCzAJBgNV +BAYTAkpQMSUwIwYDVQQKExxTRUNPTSBUcnVzdCBTeXN0ZW1zIENPLixMVEQuMScw +JQYDVQQDEx5TZWN1cml0eSBDb21tdW5pY2F0aW9uIFJvb3RDQTMwHhcNMTYwNjE2 +MDYxNzE2WhcNMzgwMTE4MDYxNzE2WjBdMQswCQYDVQQGEwJKUDElMCMGA1UEChMc +U0VDT00gVHJ1c3QgU3lzdGVtcyBDTy4sTFRELjEnMCUGA1UEAxMeU2VjdXJpdHkg +Q29tbXVuaWNhdGlvbiBSb290Q0EzMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIIC +CgKCAgEA48lySfcw3gl8qUCBWNO0Ot26YQ+TUG5pPDXC7ltzkBtnTCHsXzW7OT4r +CmDvu20rhvtxosis5FaU+cmvsXLUIKx00rgVrVH+hXShuRD+BYD5UpOzQD11EKzA +lrenfna84xtSGc4RHwsENPXY9Wk8d/Nk9A2qhd7gCVAEF5aEt8iKvE1y/By7z/MG +TfmfZPd+pmaGNXHIEYBMwXFAWB6+oHP2/D5Q4eAvJj1+XCO1eXDe+uDRpdYMQXF7 +9+qMHIjH7Iv10S9VlkZ8WjtYO/u62C21Jdp6Ts9EriGmnpjKIG58u4iFW/vAEGK7 +8vknR+/RiTlDxN/e4UG/VHMgly1s2vPUB6PmudhvrvyMGS7TZ2crldtYXLVqAvO4 +g160a75BflcJdURQVc1aEWEhCmHCqYj9E7wtiS/NYeCVvsq1e+F7NGcLH7YMx3we +GVPKp7FKFSBWFHA9K4IsD50VHUeAR/94mQ4xr28+j+2GaR57GIgUssL8gjMunEst ++3A7caoreyYn8xrC3PsXuKHqy6C0rtOUfnrQq8PsOC0RLoi/1D+tEjtCrI8Cbn3M +0V9hvqG8OmpI6iZVIhZdXw3/JzOfGAN0iltSIEdrRU0id4xVJ/CvHozJgyJUt5rQ +T9nO/NkuHJYosQLTA70lUhw0Zk8jq/R3gpYd0VcwCBEF/VfR2ccCAwEAAaNCMEAw +HQYDVR0OBBYEFGQUfPxYchamCik0FW8qy7z8r6irMA4GA1UdDwEB/wQEAwIBBjAP +BgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBDAUAA4ICAQDcAiMI4u8hOscNtybS +YpOnpSNyByCCYN8Y11StaSWSntkUz5m5UoHPrmyKO1o5yGwBQ8IibQLwYs1OY0PA +FNr0Y/Dq9HHuTofjcan0yVflLl8cebsjqodEV+m9NU1Bu0soo5iyG9kLFwfl9+qd +9XbXv8S2gVj/yP9kaWJ5rW4OH3/uHWnlt3Jxs/6lATWUVCvAUm2PVcTJ0rjLyjQI +UYWg9by0F1jqClx6vWPGOi//lkkZhOpn2ASxYfQAW0q3nHE3GYV5v4GwxxMOdnE+ +OoAGrgYWp421wsTL/0ClXI2lyTrtcoHKXJg80jQDdwj98ClZXSEIx2C/pHF7uNke +gr4Jr2VvKKu/S7XuPghHJ6APbw+LP6yVGPO5DtxnVW5inkYO0QR4ynKudtml+LLf +iAlhi+8kTtFZP1rUPcmTPCtk9YENFpb3ksP+MW/oKjJ0DvRMmEoYDjBU1cXrvMUV +nuiZIesnKwkK2/HmcBhWuwzkvvnoEKQTkrgc4NtnHVMDpCKn3F2SEDzq//wbEBrD +2NCcnWXL0CsnMQMeNuE9dnUM/0Umud1RvCPHX9jYhxBAEg09ODfnRDwYwFMJZI// +1ZqmfHAuc1Uh6N//g7kdPjIe1qZ9LPFm6Vwdp6POXiUyK+OVrCoHzrQoeIY8Laad +TdJ0MN1kURXbg4NR16/9M51NZg== +-----END CERTIFICATE----- + +# Issuer: CN=Security Communication ECC RootCA1 O=SECOM Trust Systems CO.,LTD. +# Subject: CN=Security Communication ECC RootCA1 O=SECOM Trust Systems CO.,LTD. +# Label: "Security Communication ECC RootCA1" +# Serial: 15446673492073852651 +# MD5 Fingerprint: 7e:43:b0:92:68:ec:05:43:4c:98:ab:5d:35:2e:7e:86 +# SHA1 Fingerprint: b8:0e:26:a9:bf:d2:b2:3b:c0:ef:46:c9:ba:c7:bb:f6:1d:0d:41:41 +# SHA256 Fingerprint: e7:4f:bd:a5:5b:d5:64:c4:73:a3:6b:44:1a:a7:99:c8:a6:8e:07:74:40:e8:28:8b:9f:a1:e5:0e:4b:ba:ca:11 +-----BEGIN CERTIFICATE----- +MIICODCCAb6gAwIBAgIJANZdm7N4gS7rMAoGCCqGSM49BAMDMGExCzAJBgNVBAYT +AkpQMSUwIwYDVQQKExxTRUNPTSBUcnVzdCBTeXN0ZW1zIENPLixMVEQuMSswKQYD +VQQDEyJTZWN1cml0eSBDb21tdW5pY2F0aW9uIEVDQyBSb290Q0ExMB4XDTE2MDYx +NjA1MTUyOFoXDTM4MDExODA1MTUyOFowYTELMAkGA1UEBhMCSlAxJTAjBgNVBAoT +HFNFQ09NIFRydXN0IFN5c3RlbXMgQ08uLExURC4xKzApBgNVBAMTIlNlY3VyaXR5 +IENvbW11bmljYXRpb24gRUNDIFJvb3RDQTEwdjAQBgcqhkjOPQIBBgUrgQQAIgNi +AASkpW9gAwPDvTH00xecK4R1rOX9PVdu12O/5gSJko6BnOPpR27KkBLIE+Cnnfdl +dB9sELLo5OnvbYUymUSxXv3MdhDYW72ixvnWQuRXdtyQwjWpS4g8EkdtXP9JTxpK +ULGjQjBAMB0GA1UdDgQWBBSGHOf+LaVKiwj+KBH6vqNm+GBZLzAOBgNVHQ8BAf8E +BAMCAQYwDwYDVR0TAQH/BAUwAwEB/zAKBggqhkjOPQQDAwNoADBlAjAVXUI9/Lbu +9zuxNuie9sRGKEkz0FhDKmMpzE2xtHqiuQ04pV1IKv3LsnNdo4gIxwwCMQDAqy0O +be0YottT6SXbVQjgUMzfRGEWgqtJsLKB7HOHeLRMsmIbEvoWTSVLY70eN9k= +-----END CERTIFICATE----- diff --git a/src/pip/_vendor/certifi/core.py b/src/pip/_vendor/certifi/core.py index b8140cf1ae7..c3e546604c8 100644 --- a/src/pip/_vendor/certifi/core.py +++ b/src/pip/_vendor/certifi/core.py @@ -1,34 +1,20 @@ -# -*- coding: utf-8 -*- - """ certifi.py ~~~~~~~~~~ This module returns the installation location of cacert.pem or its contents. """ -import os - +import sys -class _PipPatchedCertificate(Exception): - pass +if sys.version_info >= (3, 11): -try: - # Return a certificate file on disk for a standalone pip zipapp running in - # an isolated build environment to use. Passing --cert to the standalone - # pip does not work since requests calls where() unconditionally on import. - _PIP_STANDALONE_CERT = os.environ.get("_PIP_STANDALONE_CERT") - if _PIP_STANDALONE_CERT: - def where(): - return _PIP_STANDALONE_CERT - raise _PipPatchedCertificate() - - from importlib.resources import path as get_path, read_text + from importlib.resources import as_file, files _CACERT_CTX = None _CACERT_PATH = None - def where(): + def where() -> str: # This is slightly terrible, but we want to delay extracting the file # in cases where we're inside of a zipimport situation until someone # actually calls where(), but we don't want to re-extract the file @@ -47,30 +33,76 @@ def where(): # We also have to hold onto the actual context manager, because # it will do the cleanup whenever it gets garbage collected, so # we will also store that at the global level as well. + _CACERT_CTX = as_file(files("pip._vendor.certifi").joinpath("cacert.pem")) + _CACERT_PATH = str(_CACERT_CTX.__enter__()) + + return _CACERT_PATH + + def contents() -> str: + return files("pip._vendor.certifi").joinpath("cacert.pem").read_text(encoding="ascii") + +elif sys.version_info >= (3, 7): + + from importlib.resources import path as get_path, read_text + + _CACERT_CTX = None + _CACERT_PATH = None + + def where() -> str: + # This is slightly terrible, but we want to delay extracting the + # file in cases where we're inside of a zipimport situation until + # someone actually calls where(), but we don't want to re-extract + # the file on every call of where(), so we'll do it once then store + # it in a global variable. + global _CACERT_CTX + global _CACERT_PATH + if _CACERT_PATH is None: + # This is slightly janky, the importlib.resources API wants you + # to manage the cleanup of this file, so it doesn't actually + # return a path, it returns a context manager that will give + # you the path when you enter it and will do any cleanup when + # you leave it. In the common case of not needing a temporary + # file, it will just return the file system location and the + # __exit__() is a no-op. + # + # We also have to hold onto the actual context manager, because + # it will do the cleanup whenever it gets garbage collected, so + # we will also store that at the global level as well. _CACERT_CTX = get_path("pip._vendor.certifi", "cacert.pem") _CACERT_PATH = str(_CACERT_CTX.__enter__()) return _CACERT_PATH -except _PipPatchedCertificate: - pass + def contents() -> str: + return read_text("pip._vendor.certifi", "cacert.pem", encoding="ascii") + +else: + import os + import types + from typing import Union + + Package = Union[types.ModuleType, str] + Resource = Union[str, "os.PathLike"] -except ImportError: # This fallback will work for Python versions prior to 3.7 that lack the # importlib.resources module but relies on the existing `where` function # so won't address issues with environments like PyOxidizer that don't set # __file__ on modules. - def read_text(_module, _path, encoding="ascii"): - with open(where(), "r", encoding=encoding) as data: + def read_text( + package: Package, + resource: Resource, + encoding: str = 'utf-8', + errors: str = 'strict' + ) -> str: + with open(where(), encoding=encoding) as data: return data.read() # If we don't have importlib.resources, then we will just do the old logic # of assuming we're on the filesystem and munge the path directly. - def where(): + def where() -> str: f = os.path.dirname(__file__) return os.path.join(f, "cacert.pem") - -def contents(): - return read_text("certifi", "cacert.pem", encoding="ascii") + def contents() -> str: + return read_text("pip._vendor.certifi", "cacert.pem", encoding="ascii") diff --git a/news/10593.trivial.rst b/src/pip/_vendor/certifi/py.typed similarity index 100% rename from news/10593.trivial.rst rename to src/pip/_vendor/certifi/py.typed diff --git a/src/pip/_vendor/chardet.pyi b/src/pip/_vendor/chardet.pyi deleted file mode 100644 index 29e87e33157..00000000000 --- a/src/pip/_vendor/chardet.pyi +++ /dev/null @@ -1 +0,0 @@ -from chardet import * \ No newline at end of file diff --git a/src/pip/_vendor/chardet/LICENSE b/src/pip/_vendor/chardet/LICENSE index 8add30ad590..4362b49151d 100644 --- a/src/pip/_vendor/chardet/LICENSE +++ b/src/pip/_vendor/chardet/LICENSE @@ -1,8 +1,8 @@ - GNU LESSER GENERAL PUBLIC LICENSE - Version 2.1, February 1999 + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. - 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -10,7 +10,7 @@ as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] - Preamble + Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public @@ -112,7 +112,7 @@ modification follow. Pay close attention to the difference between a former contains code derived from the library, whereas the latter must be combined with the library in order to run. - GNU LESSER GENERAL PUBLIC LICENSE + GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other @@ -146,7 +146,7 @@ such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. - + 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an @@ -432,7 +432,7 @@ decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. - NO WARRANTY + NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. @@ -455,7 +455,7 @@ FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - END OF TERMS AND CONDITIONS + END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries @@ -485,7 +485,7 @@ convey the exclusion of warranty; and each file should have at least the You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. @@ -500,5 +500,3 @@ necessary. Here is a sample; alter the names: Ty Coon, President of Vice That's all there is to it! - - diff --git a/src/pip/_vendor/chardet/__init__.py b/src/pip/_vendor/chardet/__init__.py index 80ad2546d79..fe581623d89 100644 --- a/src/pip/_vendor/chardet/__init__.py +++ b/src/pip/_vendor/chardet/__init__.py @@ -15,69 +15,101 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Union -from .universaldetector import UniversalDetector +from .charsetgroupprober import CharSetGroupProber +from .charsetprober import CharSetProber from .enums import InputState -from .version import __version__, VERSION - +from .resultdict import ResultDict +from .universaldetector import UniversalDetector +from .version import VERSION, __version__ -__all__ = ['UniversalDetector', 'detect', 'detect_all', '__version__', 'VERSION'] +__all__ = ["UniversalDetector", "detect", "detect_all", "__version__", "VERSION"] -def detect(byte_str): +def detect( + byte_str: Union[bytes, bytearray], should_rename_legacy: bool = False +) -> ResultDict: """ Detect the encoding of the given byte string. :param byte_str: The byte sequence to examine. :type byte_str: ``bytes`` or ``bytearray`` + :param should_rename_legacy: Should we rename legacy encodings + to their more modern equivalents? + :type should_rename_legacy: ``bool`` """ if not isinstance(byte_str, bytearray): if not isinstance(byte_str, bytes): - raise TypeError('Expected object of type bytes or bytearray, got: ' - '{}'.format(type(byte_str))) - else: - byte_str = bytearray(byte_str) - detector = UniversalDetector() + raise TypeError( + f"Expected object of type bytes or bytearray, got: {type(byte_str)}" + ) + byte_str = bytearray(byte_str) + detector = UniversalDetector(should_rename_legacy=should_rename_legacy) detector.feed(byte_str) return detector.close() -def detect_all(byte_str): +def detect_all( + byte_str: Union[bytes, bytearray], + ignore_threshold: bool = False, + should_rename_legacy: bool = False, +) -> List[ResultDict]: """ Detect all the possible encodings of the given byte string. - :param byte_str: The byte sequence to examine. - :type byte_str: ``bytes`` or ``bytearray`` + :param byte_str: The byte sequence to examine. + :type byte_str: ``bytes`` or ``bytearray`` + :param ignore_threshold: Include encodings that are below + ``UniversalDetector.MINIMUM_THRESHOLD`` + in results. + :type ignore_threshold: ``bool`` + :param should_rename_legacy: Should we rename legacy encodings + to their more modern equivalents? + :type should_rename_legacy: ``bool`` """ if not isinstance(byte_str, bytearray): if not isinstance(byte_str, bytes): - raise TypeError('Expected object of type bytes or bytearray, got: ' - '{}'.format(type(byte_str))) - else: - byte_str = bytearray(byte_str) + raise TypeError( + f"Expected object of type bytes or bytearray, got: {type(byte_str)}" + ) + byte_str = bytearray(byte_str) - detector = UniversalDetector() + detector = UniversalDetector(should_rename_legacy=should_rename_legacy) detector.feed(byte_str) detector.close() - if detector._input_state == InputState.HIGH_BYTE: - results = [] - for prober in detector._charset_probers: - if prober.get_confidence() > detector.MINIMUM_THRESHOLD: - charset_name = prober.charset_name - lower_charset_name = prober.charset_name.lower() + if detector.input_state == InputState.HIGH_BYTE: + results: List[ResultDict] = [] + probers: List[CharSetProber] = [] + for prober in detector.charset_probers: + if isinstance(prober, CharSetGroupProber): + probers.extend(p for p in prober.probers) + else: + probers.append(prober) + for prober in probers: + if ignore_threshold or prober.get_confidence() > detector.MINIMUM_THRESHOLD: + charset_name = prober.charset_name or "" + lower_charset_name = charset_name.lower() # Use Windows encoding name instead of ISO-8859 if we saw any # extra Windows-specific bytes - if lower_charset_name.startswith('iso-8859'): - if detector._has_win_bytes: - charset_name = detector.ISO_WIN_MAP.get(lower_charset_name, - charset_name) - results.append({ - 'encoding': charset_name, - 'confidence': prober.get_confidence(), - 'language': prober.language, - }) + if lower_charset_name.startswith("iso-8859") and detector.has_win_bytes: + charset_name = detector.ISO_WIN_MAP.get( + lower_charset_name, charset_name + ) + # Rename legacy encodings with superset encodings if asked + if should_rename_legacy: + charset_name = detector.LEGACY_MAP.get( + charset_name.lower(), charset_name + ) + results.append( + { + "encoding": charset_name, + "confidence": prober.get_confidence(), + "language": prober.language, + } + ) if len(results) > 0: - return sorted(results, key=lambda result: -result['confidence']) + return sorted(results, key=lambda result: -result["confidence"]) return [detector.result] diff --git a/src/pip/_vendor/chardet/big5freq.py b/src/pip/_vendor/chardet/big5freq.py index 38f32517aa8..87d9f972edd 100644 --- a/src/pip/_vendor/chardet/big5freq.py +++ b/src/pip/_vendor/chardet/big5freq.py @@ -42,9 +42,9 @@ BIG5_TYPICAL_DISTRIBUTION_RATIO = 0.75 -#Char to FreqOrder table +# Char to FreqOrder table BIG5_TABLE_SIZE = 5376 - +# fmt: off BIG5_CHAR_TO_FREQ_ORDER = ( 1,1801,1506, 255,1431, 198, 9, 82, 6,5008, 177, 202,3681,1256,2821, 110, # 16 3814, 33,3274, 261, 76, 44,2114, 16,2946,2187,1176, 659,3971, 26,3451,2653, # 32 @@ -383,4 +383,4 @@ 890,3669,3943,5791,1878,3798,3439,5792,2186,2358,3440,1652,5793,5794,5795, 941, # 5360 2299, 208,3546,4161,2020, 330,4438,3944,2906,2499,3799,4439,4811,5796,5797,5798, # 5376 ) - +# fmt: on diff --git a/src/pip/_vendor/chardet/big5prober.py b/src/pip/_vendor/chardet/big5prober.py index 98f99701220..ef09c60e327 100644 --- a/src/pip/_vendor/chardet/big5prober.py +++ b/src/pip/_vendor/chardet/big5prober.py @@ -25,23 +25,23 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine from .chardistribution import Big5DistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import BIG5_SM_MODEL class Big5Prober(MultiByteCharSetProber): - def __init__(self): - super(Big5Prober, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(BIG5_SM_MODEL) self.distribution_analyzer = Big5DistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "Big5" @property - def language(self): + def language(self) -> str: return "Chinese" diff --git a/src/pip/_vendor/chardet/chardistribution.py b/src/pip/_vendor/chardet/chardistribution.py index c0395f4a45a..176cb996408 100644 --- a/src/pip/_vendor/chardet/chardistribution.py +++ b/src/pip/_vendor/chardet/chardistribution.py @@ -25,40 +25,58 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .euctwfreq import (EUCTW_CHAR_TO_FREQ_ORDER, EUCTW_TABLE_SIZE, - EUCTW_TYPICAL_DISTRIBUTION_RATIO) -from .euckrfreq import (EUCKR_CHAR_TO_FREQ_ORDER, EUCKR_TABLE_SIZE, - EUCKR_TYPICAL_DISTRIBUTION_RATIO) -from .gb2312freq import (GB2312_CHAR_TO_FREQ_ORDER, GB2312_TABLE_SIZE, - GB2312_TYPICAL_DISTRIBUTION_RATIO) -from .big5freq import (BIG5_CHAR_TO_FREQ_ORDER, BIG5_TABLE_SIZE, - BIG5_TYPICAL_DISTRIBUTION_RATIO) -from .jisfreq import (JIS_CHAR_TO_FREQ_ORDER, JIS_TABLE_SIZE, - JIS_TYPICAL_DISTRIBUTION_RATIO) - - -class CharDistributionAnalysis(object): +from typing import Tuple, Union + +from .big5freq import ( + BIG5_CHAR_TO_FREQ_ORDER, + BIG5_TABLE_SIZE, + BIG5_TYPICAL_DISTRIBUTION_RATIO, +) +from .euckrfreq import ( + EUCKR_CHAR_TO_FREQ_ORDER, + EUCKR_TABLE_SIZE, + EUCKR_TYPICAL_DISTRIBUTION_RATIO, +) +from .euctwfreq import ( + EUCTW_CHAR_TO_FREQ_ORDER, + EUCTW_TABLE_SIZE, + EUCTW_TYPICAL_DISTRIBUTION_RATIO, +) +from .gb2312freq import ( + GB2312_CHAR_TO_FREQ_ORDER, + GB2312_TABLE_SIZE, + GB2312_TYPICAL_DISTRIBUTION_RATIO, +) +from .jisfreq import ( + JIS_CHAR_TO_FREQ_ORDER, + JIS_TABLE_SIZE, + JIS_TYPICAL_DISTRIBUTION_RATIO, +) +from .johabfreq import JOHAB_TO_EUCKR_ORDER_TABLE + + +class CharDistributionAnalysis: ENOUGH_DATA_THRESHOLD = 1024 SURE_YES = 0.99 SURE_NO = 0.01 MINIMUM_DATA_THRESHOLD = 3 - def __init__(self): + def __init__(self) -> None: # Mapping table to get frequency order from char order (get from # GetOrder()) - self._char_to_freq_order = None - self._table_size = None # Size of above table + self._char_to_freq_order: Tuple[int, ...] = tuple() + self._table_size = 0 # Size of above table # This is a constant value which varies from language to language, # used in calculating confidence. See # http://www.mozilla.org/projects/intl/UniversalCharsetDetection.html # for further detail. - self.typical_distribution_ratio = None - self._done = None - self._total_chars = None - self._freq_chars = None + self.typical_distribution_ratio = 0.0 + self._done = False + self._total_chars = 0 + self._freq_chars = 0 self.reset() - def reset(self): + def reset(self) -> None: """reset analyser, clear any state""" # If this flag is set to True, detection is done and conclusion has # been made @@ -67,7 +85,7 @@ def reset(self): # The number of characters whose frequency order is less than 512 self._freq_chars = 0 - def feed(self, char, char_len): + def feed(self, char: Union[bytes, bytearray], char_len: int) -> None: """feed a character with known length""" if char_len == 2: # we only care about 2-bytes character in our distribution analysis @@ -81,7 +99,7 @@ def feed(self, char, char_len): if 512 > self._char_to_freq_order[order]: self._freq_chars += 1 - def get_confidence(self): + def get_confidence(self) -> float: """return confidence based on existing data""" # if we didn't receive any character in our consideration range, # return negative answer @@ -89,20 +107,21 @@ def get_confidence(self): return self.SURE_NO if self._total_chars != self._freq_chars: - r = (self._freq_chars / ((self._total_chars - self._freq_chars) - * self.typical_distribution_ratio)) + r = self._freq_chars / ( + (self._total_chars - self._freq_chars) * self.typical_distribution_ratio + ) if r < self.SURE_YES: return r # normalize confidence (we don't want to be 100% sure) return self.SURE_YES - def got_enough_data(self): + def got_enough_data(self) -> bool: # It is not necessary to receive all data to draw conclusion. # For charset detection, certain amount of data is enough return self._total_chars > self.ENOUGH_DATA_THRESHOLD - def get_order(self, byte_str): + def get_order(self, _: Union[bytes, bytearray]) -> int: # We do not handle characters based on the original encoding string, # but convert this encoding string to a number, here called order. # This allows multiple encodings of a language to share one frequency @@ -111,13 +130,13 @@ def get_order(self, byte_str): class EUCTWDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(EUCTWDistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = EUCTW_CHAR_TO_FREQ_ORDER self._table_size = EUCTW_TABLE_SIZE self.typical_distribution_ratio = EUCTW_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for euc-TW encoding, we are interested # first byte range: 0xc4 -- 0xfe # second byte range: 0xa1 -- 0xfe @@ -125,18 +144,17 @@ def get_order(self, byte_str): first_char = byte_str[0] if first_char >= 0xC4: return 94 * (first_char - 0xC4) + byte_str[1] - 0xA1 - else: - return -1 + return -1 class EUCKRDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(EUCKRDistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER self._table_size = EUCKR_TABLE_SIZE self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for euc-KR encoding, we are interested # first byte range: 0xb0 -- 0xfe # second byte range: 0xa1 -- 0xfe @@ -144,18 +162,32 @@ def get_order(self, byte_str): first_char = byte_str[0] if first_char >= 0xB0: return 94 * (first_char - 0xB0) + byte_str[1] - 0xA1 - else: - return -1 + return -1 + + +class JOHABDistributionAnalysis(CharDistributionAnalysis): + def __init__(self) -> None: + super().__init__() + self._char_to_freq_order = EUCKR_CHAR_TO_FREQ_ORDER + self._table_size = EUCKR_TABLE_SIZE + self.typical_distribution_ratio = EUCKR_TYPICAL_DISTRIBUTION_RATIO + + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: + first_char = byte_str[0] + if 0x88 <= first_char < 0xD4: + code = first_char * 256 + byte_str[1] + return JOHAB_TO_EUCKR_ORDER_TABLE.get(code, -1) + return -1 class GB2312DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(GB2312DistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = GB2312_CHAR_TO_FREQ_ORDER self._table_size = GB2312_TABLE_SIZE self.typical_distribution_ratio = GB2312_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for GB2312 encoding, we are interested # first byte range: 0xb0 -- 0xfe # second byte range: 0xa1 -- 0xfe @@ -163,18 +195,17 @@ def get_order(self, byte_str): first_char, second_char = byte_str[0], byte_str[1] if (first_char >= 0xB0) and (second_char >= 0xA1): return 94 * (first_char - 0xB0) + second_char - 0xA1 - else: - return -1 + return -1 class Big5DistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(Big5DistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = BIG5_CHAR_TO_FREQ_ORDER self._table_size = BIG5_TABLE_SIZE self.typical_distribution_ratio = BIG5_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for big5 encoding, we are interested # first byte range: 0xa4 -- 0xfe # second byte range: 0x40 -- 0x7e , 0xa1 -- 0xfe @@ -183,28 +214,26 @@ def get_order(self, byte_str): if first_char >= 0xA4: if second_char >= 0xA1: return 157 * (first_char - 0xA4) + second_char - 0xA1 + 63 - else: - return 157 * (first_char - 0xA4) + second_char - 0x40 - else: - return -1 + return 157 * (first_char - 0xA4) + second_char - 0x40 + return -1 class SJISDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(SJISDistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER self._table_size = JIS_TABLE_SIZE self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for sjis encoding, we are interested # first byte range: 0x81 -- 0x9f , 0xe0 -- 0xfe # second byte range: 0x40 -- 0x7e, 0x81 -- oxfe # no validation needed here. State machine has done that first_char, second_char = byte_str[0], byte_str[1] - if (first_char >= 0x81) and (first_char <= 0x9F): + if 0x81 <= first_char <= 0x9F: order = 188 * (first_char - 0x81) - elif (first_char >= 0xE0) and (first_char <= 0xEF): + elif 0xE0 <= first_char <= 0xEF: order = 188 * (first_char - 0xE0 + 31) else: return -1 @@ -215,19 +244,18 @@ def get_order(self, byte_str): class EUCJPDistributionAnalysis(CharDistributionAnalysis): - def __init__(self): - super(EUCJPDistributionAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._char_to_freq_order = JIS_CHAR_TO_FREQ_ORDER self._table_size = JIS_TABLE_SIZE self.typical_distribution_ratio = JIS_TYPICAL_DISTRIBUTION_RATIO - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> int: # for euc-JP encoding, we are interested # first byte range: 0xa0 -- 0xfe # second byte range: 0xa1 -- 0xfe # no validation needed here. State machine has done that char = byte_str[0] if char >= 0xA0: - return 94 * (char - 0xA1) + byte_str[1] - 0xa1 - else: - return -1 + return 94 * (char - 0xA1) + byte_str[1] - 0xA1 + return -1 diff --git a/src/pip/_vendor/chardet/charsetgroupprober.py b/src/pip/_vendor/chardet/charsetgroupprober.py index 5812cef0b59..6def56b4a75 100644 --- a/src/pip/_vendor/chardet/charsetgroupprober.py +++ b/src/pip/_vendor/chardet/charsetgroupprober.py @@ -25,29 +25,30 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .enums import ProbingState +from typing import List, Optional, Union + from .charsetprober import CharSetProber +from .enums import LanguageFilter, ProbingState class CharSetGroupProber(CharSetProber): - def __init__(self, lang_filter=None): - super(CharSetGroupProber, self).__init__(lang_filter=lang_filter) + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + super().__init__(lang_filter=lang_filter) self._active_num = 0 - self.probers = [] - self._best_guess_prober = None + self.probers: List[CharSetProber] = [] + self._best_guess_prober: Optional[CharSetProber] = None - def reset(self): - super(CharSetGroupProber, self).reset() + def reset(self) -> None: + super().reset() self._active_num = 0 for prober in self.probers: - if prober: - prober.reset() - prober.active = True - self._active_num += 1 + prober.reset() + prober.active = True + self._active_num += 1 self._best_guess_prober = None @property - def charset_name(self): + def charset_name(self) -> Optional[str]: if not self._best_guess_prober: self.get_confidence() if not self._best_guess_prober: @@ -55,17 +56,15 @@ def charset_name(self): return self._best_guess_prober.charset_name @property - def language(self): + def language(self) -> Optional[str]: if not self._best_guess_prober: self.get_confidence() if not self._best_guess_prober: return None return self._best_guess_prober.language - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: for prober in self.probers: - if not prober: - continue if not prober.active: continue state = prober.feed(byte_str) @@ -75,7 +74,7 @@ def feed(self, byte_str): self._best_guess_prober = prober self._state = ProbingState.FOUND_IT return self.state - elif state == ProbingState.NOT_ME: + if state == ProbingState.NOT_ME: prober.active = False self._active_num -= 1 if self._active_num <= 0: @@ -83,22 +82,22 @@ def feed(self, byte_str): return self.state return self.state - def get_confidence(self): + def get_confidence(self) -> float: state = self.state if state == ProbingState.FOUND_IT: return 0.99 - elif state == ProbingState.NOT_ME: + if state == ProbingState.NOT_ME: return 0.01 best_conf = 0.0 self._best_guess_prober = None for prober in self.probers: - if not prober: - continue if not prober.active: - self.logger.debug('%s not active', prober.charset_name) + self.logger.debug("%s not active", prober.charset_name) continue conf = prober.get_confidence() - self.logger.debug('%s %s confidence = %s', prober.charset_name, prober.language, conf) + self.logger.debug( + "%s %s confidence = %s", prober.charset_name, prober.language, conf + ) if best_conf < conf: best_conf = conf self._best_guess_prober = prober diff --git a/src/pip/_vendor/chardet/charsetprober.py b/src/pip/_vendor/chardet/charsetprober.py index eac4e598657..a103ca11356 100644 --- a/src/pip/_vendor/chardet/charsetprober.py +++ b/src/pip/_vendor/chardet/charsetprober.py @@ -28,54 +28,62 @@ import logging import re +from typing import Optional, Union -from .enums import ProbingState +from .enums import LanguageFilter, ProbingState +INTERNATIONAL_WORDS_PATTERN = re.compile( + b"[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?" +) -class CharSetProber(object): + +class CharSetProber: SHORTCUT_THRESHOLD = 0.95 - def __init__(self, lang_filter=None): - self._state = None + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + self._state = ProbingState.DETECTING + self.active = True self.lang_filter = lang_filter self.logger = logging.getLogger(__name__) - def reset(self): + def reset(self) -> None: self._state = ProbingState.DETECTING @property - def charset_name(self): + def charset_name(self) -> Optional[str]: return None - def feed(self, buf): - pass + @property + def language(self) -> Optional[str]: + raise NotImplementedError + + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + raise NotImplementedError @property - def state(self): + def state(self) -> ProbingState: return self._state - def get_confidence(self): + def get_confidence(self) -> float: return 0.0 @staticmethod - def filter_high_byte_only(buf): - buf = re.sub(b'([\x00-\x7F])+', b' ', buf) + def filter_high_byte_only(buf: Union[bytes, bytearray]) -> bytes: + buf = re.sub(b"([\x00-\x7F])+", b" ", buf) return buf @staticmethod - def filter_international_words(buf): + def filter_international_words(buf: Union[bytes, bytearray]) -> bytearray: """ We define three types of bytes: alphabet: english alphabets [a-zA-Z] international: international characters [\x80-\xFF] marker: everything else [^a-zA-Z\x80-\xFF] - The input buffer can be thought to contain a series of words delimited by markers. This function works to filter all words that contain at least one international character. All contiguous sequences of markers are replaced by a single space ascii character. - This filter applies to all scripts which do not use English characters. """ filtered = bytearray() @@ -83,8 +91,7 @@ def filter_international_words(buf): # This regex expression filters out only words that have at-least one # international character. The word may include one marker character at # the end. - words = re.findall(b'[a-zA-Z]*[\x80-\xFF]+[a-zA-Z]*[^a-zA-Z\x80-\xFF]?', - buf) + words = INTERNATIONAL_WORDS_PATTERN.findall(buf) for word in words: filtered.extend(word[:-1]) @@ -94,20 +101,17 @@ def filter_international_words(buf): # similarly across all languages and may thus have similar # frequencies). last_char = word[-1:] - if not last_char.isalpha() and last_char < b'\x80': - last_char = b' ' + if not last_char.isalpha() and last_char < b"\x80": + last_char = b" " filtered.extend(last_char) return filtered @staticmethod - def filter_with_english_letters(buf): + def remove_xml_tags(buf: Union[bytes, bytearray]) -> bytes: """ Returns a copy of ``buf`` that retains only the sequences of English alphabet and high byte characters that are not between <> characters. - Also retains English alphabet and high byte characters immediately - before occurrences of >. - This filter can be applied to all scripts which contain both English characters and extended ASCII characters, but is currently only used by ``Latin1Prober``. @@ -115,26 +119,24 @@ def filter_with_english_letters(buf): filtered = bytearray() in_tag = False prev = 0 + buf = memoryview(buf).cast("c") - for curr in range(len(buf)): - # Slice here to get bytes instead of an int with Python 3 - buf_char = buf[curr:curr + 1] - # Check if we're coming out of or entering an HTML tag - if buf_char == b'>': - in_tag = False - elif buf_char == b'<': - in_tag = True + for curr, buf_char in enumerate(buf): + # Check if we're coming out of or entering an XML tag - # If current character is not extended-ASCII and not alphabetic... - if buf_char < b'\x80' and not buf_char.isalpha(): - # ...and we're not in a tag + # https://github.com/python/typeshed/issues/8182 + if buf_char == b">": # type: ignore[comparison-overlap] + prev = curr + 1 + in_tag = False + # https://github.com/python/typeshed/issues/8182 + elif buf_char == b"<": # type: ignore[comparison-overlap] if curr > prev and not in_tag: # Keep everything after last non-extended-ASCII, # non-alphabetic character filtered.extend(buf[prev:curr]) # Output a space to delimit stretch we kept - filtered.extend(b' ') - prev = curr + 1 + filtered.extend(b" ") + in_tag = True # If we're not in a tag... if not in_tag: diff --git a/src/pip/_vendor/chardet/cli/__init__.py b/src/pip/_vendor/chardet/cli/__init__.py index 8b137891791..e69de29bb2d 100644 --- a/src/pip/_vendor/chardet/cli/__init__.py +++ b/src/pip/_vendor/chardet/cli/__init__.py @@ -1 +0,0 @@ - diff --git a/src/pip/_vendor/chardet/cli/chardetect.py b/src/pip/_vendor/chardet/cli/chardetect.py index 6d6f93aabd4..43f6e144f67 100644 --- a/src/pip/_vendor/chardet/cli/chardetect.py +++ b/src/pip/_vendor/chardet/cli/chardetect.py @@ -12,17 +12,21 @@ """ -from __future__ import absolute_import, print_function, unicode_literals import argparse import sys +from typing import Iterable, List, Optional -from pip._vendor.chardet import __version__ -from pip._vendor.chardet.compat import PY2 -from pip._vendor.chardet.universaldetector import UniversalDetector +from .. import __version__ +from ..universaldetector import UniversalDetector -def description_of(lines, name='stdin'): +def description_of( + lines: Iterable[bytes], + name: str = "stdin", + minimal: bool = False, + should_rename_legacy: bool = False, +) -> Optional[str]: """ Return a string describing the probable encoding of a file or list of strings. @@ -31,8 +35,11 @@ def description_of(lines, name='stdin'): :type lines: Iterable of bytes :param name: Name of file or collection of lines :type name: str + :param should_rename_legacy: Should we rename legacy encodings to + their more modern equivalents? + :type should_rename_legacy: ``bool`` """ - u = UniversalDetector() + u = UniversalDetector(should_rename_legacy=should_rename_legacy) for line in lines: line = bytearray(line) u.feed(line) @@ -41,16 +48,14 @@ def description_of(lines, name='stdin'): break u.close() result = u.result - if PY2: - name = name.decode(sys.getfilesystemencoding(), 'ignore') - if result['encoding']: - return '{}: {} with confidence {}'.format(name, result['encoding'], - result['confidence']) - else: - return '{}: no result'.format(name) + if minimal: + return result["encoding"] + if result["encoding"]: + return f'{name}: {result["encoding"]} with confidence {result["confidence"]}' + return f"{name}: no result" -def main(argv=None): +def main(argv: Optional[List[str]] = None) -> None: """ Handles command line arguments and gets things started. @@ -60,25 +65,48 @@ def main(argv=None): """ # Get command line arguments parser = argparse.ArgumentParser( - description="Takes one or more file paths and reports their detected \ - encodings") - parser.add_argument('input', - help='File whose encoding we would like to determine. \ - (default: stdin)', - type=argparse.FileType('rb'), nargs='*', - default=[sys.stdin if PY2 else sys.stdin.buffer]) - parser.add_argument('--version', action='version', - version='%(prog)s {}'.format(__version__)) + description=( + "Takes one or more file paths and reports their detected encodings" + ) + ) + parser.add_argument( + "input", + help="File whose encoding we would like to determine. (default: stdin)", + type=argparse.FileType("rb"), + nargs="*", + default=[sys.stdin.buffer], + ) + parser.add_argument( + "--minimal", + help="Print only the encoding to standard output", + action="store_true", + ) + parser.add_argument( + "-l", + "--legacy", + help="Rename legacy encodings to more modern ones.", + action="store_true", + ) + parser.add_argument( + "--version", action="version", version=f"%(prog)s {__version__}" + ) args = parser.parse_args(argv) for f in args.input: if f.isatty(): - print("You are running chardetect interactively. Press " + - "CTRL-D twice at the start of a blank line to signal the " + - "end of your input. If you want help, run chardetect " + - "--help\n", file=sys.stderr) - print(description_of(f, f.name)) - - -if __name__ == '__main__': + print( + "You are running chardetect interactively. Press " + "CTRL-D twice at the start of a blank line to signal the " + "end of your input. If you want help, run chardetect " + "--help\n", + file=sys.stderr, + ) + print( + description_of( + f, f.name, minimal=args.minimal, should_rename_legacy=args.legacy + ) + ) + + +if __name__ == "__main__": main() diff --git a/src/pip/_vendor/chardet/codingstatemachine.py b/src/pip/_vendor/chardet/codingstatemachine.py index 68fba44f143..8ed4a8773b8 100644 --- a/src/pip/_vendor/chardet/codingstatemachine.py +++ b/src/pip/_vendor/chardet/codingstatemachine.py @@ -27,10 +27,11 @@ import logging +from .codingstatemachinedict import CodingStateMachineDict from .enums import MachineState -class CodingStateMachine(object): +class CodingStateMachine: """ A state machine to verify a byte sequence for a particular encoding. For each byte the detector receives, it will feed that byte to every active @@ -52,37 +53,38 @@ class CodingStateMachine(object): negative answer for this encoding. Detector will exclude this encoding from consideration from here on. """ - def __init__(self, sm): + + def __init__(self, sm: CodingStateMachineDict) -> None: self._model = sm self._curr_byte_pos = 0 self._curr_char_len = 0 - self._curr_state = None + self._curr_state = MachineState.START + self.active = True self.logger = logging.getLogger(__name__) self.reset() - def reset(self): + def reset(self) -> None: self._curr_state = MachineState.START - def next_state(self, c): + def next_state(self, c: int) -> int: # for each byte we get its class # if it is first byte, we also get byte length - byte_class = self._model['class_table'][c] + byte_class = self._model["class_table"][c] if self._curr_state == MachineState.START: self._curr_byte_pos = 0 - self._curr_char_len = self._model['char_len_table'][byte_class] + self._curr_char_len = self._model["char_len_table"][byte_class] # from byte's class and state_table, we get its next state - curr_state = (self._curr_state * self._model['class_factor'] - + byte_class) - self._curr_state = self._model['state_table'][curr_state] + curr_state = self._curr_state * self._model["class_factor"] + byte_class + self._curr_state = self._model["state_table"][curr_state] self._curr_byte_pos += 1 return self._curr_state - def get_current_charlen(self): + def get_current_charlen(self) -> int: return self._curr_char_len - def get_coding_state_machine(self): - return self._model['name'] + def get_coding_state_machine(self) -> str: + return self._model["name"] @property - def language(self): - return self._model['language'] + def language(self) -> str: + return self._model["language"] diff --git a/src/pip/_vendor/chardet/codingstatemachinedict.py b/src/pip/_vendor/chardet/codingstatemachinedict.py new file mode 100644 index 00000000000..7a3c4c7e3fe --- /dev/null +++ b/src/pip/_vendor/chardet/codingstatemachinedict.py @@ -0,0 +1,19 @@ +from typing import TYPE_CHECKING, Tuple + +if TYPE_CHECKING: + # TypedDict was introduced in Python 3.8. + # + # TODO: Remove the else block and TYPE_CHECKING check when dropping support + # for Python 3.7. + from typing import TypedDict + + class CodingStateMachineDict(TypedDict, total=False): + class_table: Tuple[int, ...] + class_factor: int + state_table: Tuple[int, ...] + char_len_table: Tuple[int, ...] + name: str + language: str # Optional key + +else: + CodingStateMachineDict = dict diff --git a/src/pip/_vendor/chardet/cp949prober.py b/src/pip/_vendor/chardet/cp949prober.py index efd793abca4..fa7307ed898 100644 --- a/src/pip/_vendor/chardet/cp949prober.py +++ b/src/pip/_vendor/chardet/cp949prober.py @@ -32,8 +32,8 @@ class CP949Prober(MultiByteCharSetProber): - def __init__(self): - super(CP949Prober, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(CP949_SM_MODEL) # NOTE: CP949 is a superset of EUC-KR, so the distribution should be # not different. @@ -41,9 +41,9 @@ def __init__(self): self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "CP949" @property - def language(self): + def language(self) -> str: return "Korean" diff --git a/src/pip/_vendor/chardet/enums.py b/src/pip/_vendor/chardet/enums.py index 04512072251..5e3e1982336 100644 --- a/src/pip/_vendor/chardet/enums.py +++ b/src/pip/_vendor/chardet/enums.py @@ -4,21 +4,26 @@ :author: Dan Blanchard (dan.blanchard@gmail.com) """ +from enum import Enum, Flag -class InputState(object): + +class InputState: """ This enum represents the different states a universal detector can be in. """ + PURE_ASCII = 0 ESC_ASCII = 1 HIGH_BYTE = 2 -class LanguageFilter(object): +class LanguageFilter(Flag): """ This enum represents the different language filters we can apply to a ``UniversalDetector``. """ + + NONE = 0x00 CHINESE_SIMPLIFIED = 0x01 CHINESE_TRADITIONAL = 0x02 JAPANESE = 0x04 @@ -29,46 +34,50 @@ class LanguageFilter(object): CJK = CHINESE | JAPANESE | KOREAN -class ProbingState(object): +class ProbingState(Enum): """ This enum represents the different states a prober can be in. """ + DETECTING = 0 FOUND_IT = 1 NOT_ME = 2 -class MachineState(object): +class MachineState: """ This enum represents the different states a state machine can be in. """ + START = 0 ERROR = 1 ITS_ME = 2 -class SequenceLikelihood(object): +class SequenceLikelihood: """ This enum represents the likelihood of a character following the previous one. """ + NEGATIVE = 0 UNLIKELY = 1 LIKELY = 2 POSITIVE = 3 @classmethod - def get_num_categories(cls): + def get_num_categories(cls) -> int: """:returns: The number of likelihood categories in the enum.""" return 4 -class CharacterCategory(object): +class CharacterCategory: """ This enum represents the different categories language models for ``SingleByteCharsetProber`` put characters into. Anything less than CONTROL is considered a letter. """ + UNDEFINED = 255 LINE_BREAK = 254 SYMBOL = 253 diff --git a/src/pip/_vendor/chardet/escprober.py b/src/pip/_vendor/chardet/escprober.py index c70493f2b13..fd713830d36 100644 --- a/src/pip/_vendor/chardet/escprober.py +++ b/src/pip/_vendor/chardet/escprober.py @@ -25,11 +25,17 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Optional, Union + from .charsetprober import CharSetProber from .codingstatemachine import CodingStateMachine -from .enums import LanguageFilter, ProbingState, MachineState -from .escsm import (HZ_SM_MODEL, ISO2022CN_SM_MODEL, ISO2022JP_SM_MODEL, - ISO2022KR_SM_MODEL) +from .enums import LanguageFilter, MachineState, ProbingState +from .escsm import ( + HZ_SM_MODEL, + ISO2022CN_SM_MODEL, + ISO2022JP_SM_MODEL, + ISO2022KR_SM_MODEL, +) class EscCharSetProber(CharSetProber): @@ -39,8 +45,8 @@ class EscCharSetProber(CharSetProber): identify these encodings. """ - def __init__(self, lang_filter=None): - super(EscCharSetProber, self).__init__(lang_filter=lang_filter) + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + super().__init__(lang_filter=lang_filter) self.coding_sm = [] if self.lang_filter & LanguageFilter.CHINESE_SIMPLIFIED: self.coding_sm.append(CodingStateMachine(HZ_SM_MODEL)) @@ -49,17 +55,15 @@ def __init__(self, lang_filter=None): self.coding_sm.append(CodingStateMachine(ISO2022JP_SM_MODEL)) if self.lang_filter & LanguageFilter.KOREAN: self.coding_sm.append(CodingStateMachine(ISO2022KR_SM_MODEL)) - self.active_sm_count = None - self._detected_charset = None - self._detected_language = None - self._state = None + self.active_sm_count = 0 + self._detected_charset: Optional[str] = None + self._detected_language: Optional[str] = None + self._state = ProbingState.DETECTING self.reset() - def reset(self): - super(EscCharSetProber, self).reset() + def reset(self) -> None: + super().reset() for coding_sm in self.coding_sm: - if not coding_sm: - continue coding_sm.active = True coding_sm.reset() self.active_sm_count = len(self.coding_sm) @@ -67,23 +71,20 @@ def reset(self): self._detected_language = None @property - def charset_name(self): + def charset_name(self) -> Optional[str]: return self._detected_charset @property - def language(self): + def language(self) -> Optional[str]: return self._detected_language - def get_confidence(self): - if self._detected_charset: - return 0.99 - else: - return 0.00 + def get_confidence(self) -> float: + return 0.99 if self._detected_charset else 0.00 - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: for c in byte_str: for coding_sm in self.coding_sm: - if not coding_sm or not coding_sm.active: + if not coding_sm.active: continue coding_state = coding_sm.next_state(c) if coding_state == MachineState.ERROR: diff --git a/src/pip/_vendor/chardet/escsm.py b/src/pip/_vendor/chardet/escsm.py index 0069523a049..11d4adf771f 100644 --- a/src/pip/_vendor/chardet/escsm.py +++ b/src/pip/_vendor/chardet/escsm.py @@ -12,7 +12,7 @@ # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either -# version 2.1 of the License, or (at your option) any later version. +# version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,227 +20,242 @@ # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from .codingstatemachinedict import CodingStateMachineDict from .enums import MachineState +# fmt: off HZ_CLS = ( -1,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,0,0,0,0, # 20 - 27 -0,0,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,0,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,4,0,5,2,0, # 78 - 7f -1,1,1,1,1,1,1,1, # 80 - 87 -1,1,1,1,1,1,1,1, # 88 - 8f -1,1,1,1,1,1,1,1, # 90 - 97 -1,1,1,1,1,1,1,1, # 98 - 9f -1,1,1,1,1,1,1,1, # a0 - a7 -1,1,1,1,1,1,1,1, # a8 - af -1,1,1,1,1,1,1,1, # b0 - b7 -1,1,1,1,1,1,1,1, # b8 - bf -1,1,1,1,1,1,1,1, # c0 - c7 -1,1,1,1,1,1,1,1, # c8 - cf -1,1,1,1,1,1,1,1, # d0 - d7 -1,1,1,1,1,1,1,1, # d8 - df -1,1,1,1,1,1,1,1, # e0 - e7 -1,1,1,1,1,1,1,1, # e8 - ef -1,1,1,1,1,1,1,1, # f0 - f7 -1,1,1,1,1,1,1,1, # f8 - ff + 1, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27 + 0, 0, 0, 0, 0, 0, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47 + 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 4, 0, 5, 2, 0, # 78 - 7f + 1, 1, 1, 1, 1, 1, 1, 1, # 80 - 87 + 1, 1, 1, 1, 1, 1, 1, 1, # 88 - 8f + 1, 1, 1, 1, 1, 1, 1, 1, # 90 - 97 + 1, 1, 1, 1, 1, 1, 1, 1, # 98 - 9f + 1, 1, 1, 1, 1, 1, 1, 1, # a0 - a7 + 1, 1, 1, 1, 1, 1, 1, 1, # a8 - af + 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7 + 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf + 1, 1, 1, 1, 1, 1, 1, 1, # c0 - c7 + 1, 1, 1, 1, 1, 1, 1, 1, # c8 - cf + 1, 1, 1, 1, 1, 1, 1, 1, # d0 - d7 + 1, 1, 1, 1, 1, 1, 1, 1, # d8 - df + 1, 1, 1, 1, 1, 1, 1, 1, # e0 - e7 + 1, 1, 1, 1, 1, 1, 1, 1, # e8 - ef + 1, 1, 1, 1, 1, 1, 1, 1, # f0 - f7 + 1, 1, 1, 1, 1, 1, 1, 1, # f8 - ff ) HZ_ST = ( -MachineState.START,MachineState.ERROR, 3,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START, 4,MachineState.ERROR,# 10-17 - 5,MachineState.ERROR, 6,MachineState.ERROR, 5, 5, 4,MachineState.ERROR,# 18-1f - 4,MachineState.ERROR, 4, 4, 4,MachineState.ERROR, 4,MachineState.ERROR,# 20-27 - 4,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 28-2f +MachineState.START, MachineState.ERROR, 3, MachineState.START, MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, # 00-07 +MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 08-0f +MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.START, MachineState.START, 4, MachineState.ERROR, # 10-17 + 5, MachineState.ERROR, 6, MachineState.ERROR, 5, 5, 4, MachineState.ERROR, # 18-1f + 4, MachineState.ERROR, 4, 4, 4, MachineState.ERROR, 4, MachineState.ERROR, # 20-27 + 4, MachineState.ITS_ME, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 28-2f ) +# fmt: on HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) -HZ_SM_MODEL = {'class_table': HZ_CLS, - 'class_factor': 6, - 'state_table': HZ_ST, - 'char_len_table': HZ_CHAR_LEN_TABLE, - 'name': "HZ-GB-2312", - 'language': 'Chinese'} +HZ_SM_MODEL: CodingStateMachineDict = { + "class_table": HZ_CLS, + "class_factor": 6, + "state_table": HZ_ST, + "char_len_table": HZ_CHAR_LEN_TABLE, + "name": "HZ-GB-2312", + "language": "Chinese", +} +# fmt: off ISO2022CN_CLS = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,0,0,0,0, # 20 - 27 -0,3,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,4,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff + 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27 + 0, 3, 0, 0, 0, 0, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 0, 0, 0, 4, 0, 0, 0, 0, # 40 - 47 + 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f + 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87 + 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f + 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97 + 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f + 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7 + 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef + 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7 + 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff ) ISO2022CN_ST = ( -MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07 -MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f -MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17 -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,# 18-1f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 20-27 - 5, 6,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 28-2f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 30-37 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,# 38-3f + MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 00-07 + MachineState.START, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 08-0f + MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 10-17 + MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, # 18-1f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 20-27 + 5, 6, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 28-2f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 30-37 + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.START, # 38-3f ) +# fmt: on ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0) -ISO2022CN_SM_MODEL = {'class_table': ISO2022CN_CLS, - 'class_factor': 9, - 'state_table': ISO2022CN_ST, - 'char_len_table': ISO2022CN_CHAR_LEN_TABLE, - 'name': "ISO-2022-CN", - 'language': 'Chinese'} +ISO2022CN_SM_MODEL: CodingStateMachineDict = { + "class_table": ISO2022CN_CLS, + "class_factor": 9, + "state_table": ISO2022CN_ST, + "char_len_table": ISO2022CN_CHAR_LEN_TABLE, + "name": "ISO-2022-CN", + "language": "Chinese", +} +# fmt: off ISO2022JP_CLS = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,2,2, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,7,0,0,0, # 20 - 27 -3,0,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -6,0,4,0,8,0,0,0, # 40 - 47 -0,9,5,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff + 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 0, 0, 0, 0, 2, 2, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 7, 0, 0, 0, # 20 - 27 + 3, 0, 0, 0, 0, 0, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 6, 0, 4, 0, 8, 0, 0, 0, # 40 - 47 + 0, 9, 5, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f + 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87 + 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f + 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97 + 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f + 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7 + 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef + 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7 + 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff ) ISO2022JP_ST = ( -MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 00-07 -MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 08-0f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 10-17 -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,# 18-1f -MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 20-27 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 6,MachineState.ITS_ME,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,# 28-2f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,# 30-37 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 38-3f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,MachineState.START,MachineState.START,# 40-47 + MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 00-07 + MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 08-0f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 10-17 + MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, # 18-1f + MachineState.ERROR, 5, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, MachineState.ERROR, # 20-27 + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 6, MachineState.ITS_ME, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, # 28-2f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, # 30-37 + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 38-3f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.START, MachineState.START, # 40-47 ) +# fmt: on ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0) -ISO2022JP_SM_MODEL = {'class_table': ISO2022JP_CLS, - 'class_factor': 10, - 'state_table': ISO2022JP_ST, - 'char_len_table': ISO2022JP_CHAR_LEN_TABLE, - 'name': "ISO-2022-JP", - 'language': 'Japanese'} +ISO2022JP_SM_MODEL: CodingStateMachineDict = { + "class_table": ISO2022JP_CLS, + "class_factor": 10, + "state_table": ISO2022JP_ST, + "char_len_table": ISO2022JP_CHAR_LEN_TABLE, + "name": "ISO-2022-JP", + "language": "Japanese", +} +# fmt: off ISO2022KR_CLS = ( -2,0,0,0,0,0,0,0, # 00 - 07 -0,0,0,0,0,0,0,0, # 08 - 0f -0,0,0,0,0,0,0,0, # 10 - 17 -0,0,0,1,0,0,0,0, # 18 - 1f -0,0,0,0,3,0,0,0, # 20 - 27 -0,4,0,0,0,0,0,0, # 28 - 2f -0,0,0,0,0,0,0,0, # 30 - 37 -0,0,0,0,0,0,0,0, # 38 - 3f -0,0,0,5,0,0,0,0, # 40 - 47 -0,0,0,0,0,0,0,0, # 48 - 4f -0,0,0,0,0,0,0,0, # 50 - 57 -0,0,0,0,0,0,0,0, # 58 - 5f -0,0,0,0,0,0,0,0, # 60 - 67 -0,0,0,0,0,0,0,0, # 68 - 6f -0,0,0,0,0,0,0,0, # 70 - 77 -0,0,0,0,0,0,0,0, # 78 - 7f -2,2,2,2,2,2,2,2, # 80 - 87 -2,2,2,2,2,2,2,2, # 88 - 8f -2,2,2,2,2,2,2,2, # 90 - 97 -2,2,2,2,2,2,2,2, # 98 - 9f -2,2,2,2,2,2,2,2, # a0 - a7 -2,2,2,2,2,2,2,2, # a8 - af -2,2,2,2,2,2,2,2, # b0 - b7 -2,2,2,2,2,2,2,2, # b8 - bf -2,2,2,2,2,2,2,2, # c0 - c7 -2,2,2,2,2,2,2,2, # c8 - cf -2,2,2,2,2,2,2,2, # d0 - d7 -2,2,2,2,2,2,2,2, # d8 - df -2,2,2,2,2,2,2,2, # e0 - e7 -2,2,2,2,2,2,2,2, # e8 - ef -2,2,2,2,2,2,2,2, # f0 - f7 -2,2,2,2,2,2,2,2, # f8 - ff + 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 3, 0, 0, 0, # 20 - 27 + 0, 4, 0, 0, 0, 0, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 0, 0, 0, 5, 0, 0, 0, 0, # 40 - 47 + 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f + 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87 + 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f + 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97 + 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f + 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7 + 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef + 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7 + 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff ) ISO2022KR_ST = ( -MachineState.START, 3,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,# 00-07 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,# 08-0f -MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 4,MachineState.ERROR,MachineState.ERROR,# 10-17 -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,# 18-1f -MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.START,MachineState.START,MachineState.START,MachineState.START,# 20-27 + MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, # 00-07 + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 08-0f + MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, MachineState.ERROR, # 10-17 + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 5, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 18-1f + MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 20-27 ) +# fmt: on ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0) -ISO2022KR_SM_MODEL = {'class_table': ISO2022KR_CLS, - 'class_factor': 6, - 'state_table': ISO2022KR_ST, - 'char_len_table': ISO2022KR_CHAR_LEN_TABLE, - 'name': "ISO-2022-KR", - 'language': 'Korean'} - - +ISO2022KR_SM_MODEL: CodingStateMachineDict = { + "class_table": ISO2022KR_CLS, + "class_factor": 6, + "state_table": ISO2022KR_ST, + "char_len_table": ISO2022KR_CHAR_LEN_TABLE, + "name": "ISO-2022-KR", + "language": "Korean", +} diff --git a/src/pip/_vendor/chardet/eucjpprober.py b/src/pip/_vendor/chardet/eucjpprober.py index 20ce8f7d15b..39487f4098d 100644 --- a/src/pip/_vendor/chardet/eucjpprober.py +++ b/src/pip/_vendor/chardet/eucjpprober.py @@ -25,68 +25,78 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .enums import ProbingState, MachineState -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine +from typing import Union + from .chardistribution import EUCJPDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .enums import MachineState, ProbingState from .jpcntx import EUCJPContextAnalysis +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import EUCJP_SM_MODEL class EUCJPProber(MultiByteCharSetProber): - def __init__(self): - super(EUCJPProber, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(EUCJP_SM_MODEL) self.distribution_analyzer = EUCJPDistributionAnalysis() self.context_analyzer = EUCJPContextAnalysis() self.reset() - def reset(self): - super(EUCJPProber, self).reset() + def reset(self) -> None: + super().reset() self.context_analyzer.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "EUC-JP" @property - def language(self): + def language(self) -> str: return "Japanese" - def feed(self, byte_str): - for i in range(len(byte_str)): - # PY3K: byte_str is a byte array, so byte_str[i] is an int, not a byte - coding_state = self.coding_sm.next_state(byte_str[i]) + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + assert self.coding_sm is not None + assert self.distribution_analyzer is not None + + for i, byte in enumerate(byte_str): + # PY3K: byte_str is a byte array, so byte is an int, not a byte + coding_state = self.coding_sm.next_state(byte) if coding_state == MachineState.ERROR: - self.logger.debug('%s %s prober hit error at byte %s', - self.charset_name, self.language, i) + self.logger.debug( + "%s %s prober hit error at byte %s", + self.charset_name, + self.language, + i, + ) self._state = ProbingState.NOT_ME break - elif coding_state == MachineState.ITS_ME: + if coding_state == MachineState.ITS_ME: self._state = ProbingState.FOUND_IT break - elif coding_state == MachineState.START: + if coding_state == MachineState.START: char_len = self.coding_sm.get_current_charlen() if i == 0: - self._last_char[1] = byte_str[0] + self._last_char[1] = byte self.context_analyzer.feed(self._last_char, char_len) self.distribution_analyzer.feed(self._last_char, char_len) else: - self.context_analyzer.feed(byte_str[i - 1:i + 1], - char_len) - self.distribution_analyzer.feed(byte_str[i - 1:i + 1], - char_len) + self.context_analyzer.feed(byte_str[i - 1 : i + 1], char_len) + self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len) self._last_char[0] = byte_str[-1] if self.state == ProbingState.DETECTING: - if (self.context_analyzer.got_enough_data() and - (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + if self.context_analyzer.got_enough_data() and ( + self.get_confidence() > self.SHORTCUT_THRESHOLD + ): self._state = ProbingState.FOUND_IT return self.state - def get_confidence(self): + def get_confidence(self) -> float: + assert self.distribution_analyzer is not None + context_conf = self.context_analyzer.get_confidence() distrib_conf = self.distribution_analyzer.get_confidence() return max(context_conf, distrib_conf) diff --git a/src/pip/_vendor/chardet/euckrfreq.py b/src/pip/_vendor/chardet/euckrfreq.py index b68078cb968..7dc3b10387d 100644 --- a/src/pip/_vendor/chardet/euckrfreq.py +++ b/src/pip/_vendor/chardet/euckrfreq.py @@ -43,6 +43,7 @@ EUCKR_TABLE_SIZE = 2352 # Char to FreqOrder table , +# fmt: off EUCKR_CHAR_TO_FREQ_ORDER = ( 13, 130, 120,1396, 481,1719,1720, 328, 609, 212,1721, 707, 400, 299,1722, 87, 1397,1723, 104, 536,1117,1203,1724,1267, 685,1268, 508,1725,1726,1727,1728,1398, @@ -192,4 +193,4 @@ 2629,2630,2631, 924, 648, 863, 603,2632,2633, 934,1540, 864, 865,2634, 642,1042, 670,1190,2635,2636,2637,2638, 168,2639, 652, 873, 542,1054,1541,2640,2641,2642, # 512, 256 ) - +# fmt: on diff --git a/src/pip/_vendor/chardet/euckrprober.py b/src/pip/_vendor/chardet/euckrprober.py index 345a060d023..1fc5de0462c 100644 --- a/src/pip/_vendor/chardet/euckrprober.py +++ b/src/pip/_vendor/chardet/euckrprober.py @@ -25,23 +25,23 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine from .chardistribution import EUCKRDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import EUCKR_SM_MODEL class EUCKRProber(MultiByteCharSetProber): - def __init__(self): - super(EUCKRProber, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(EUCKR_SM_MODEL) self.distribution_analyzer = EUCKRDistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "EUC-KR" @property - def language(self): + def language(self) -> str: return "Korean" diff --git a/src/pip/_vendor/chardet/euctwfreq.py b/src/pip/_vendor/chardet/euctwfreq.py index ed7a995a3aa..4900ccc160a 100644 --- a/src/pip/_vendor/chardet/euctwfreq.py +++ b/src/pip/_vendor/chardet/euctwfreq.py @@ -43,345 +43,346 @@ EUCTW_TYPICAL_DISTRIBUTION_RATIO = 0.75 -# Char to FreqOrder table , +# Char to FreqOrder table EUCTW_TABLE_SIZE = 5376 +# fmt: off EUCTW_CHAR_TO_FREQ_ORDER = ( - 1,1800,1506, 255,1431, 198, 9, 82, 6,7310, 177, 202,3615,1256,2808, 110, # 2742 -3735, 33,3241, 261, 76, 44,2113, 16,2931,2184,1176, 659,3868, 26,3404,2643, # 2758 -1198,3869,3313,4060, 410,2211, 302, 590, 361,1963, 8, 204, 58,4296,7311,1931, # 2774 - 63,7312,7313, 317,1614, 75, 222, 159,4061,2412,1480,7314,3500,3068, 224,2809, # 2790 -3616, 3, 10,3870,1471, 29,2774,1135,2852,1939, 873, 130,3242,1123, 312,7315, # 2806 -4297,2051, 507, 252, 682,7316, 142,1914, 124, 206,2932, 34,3501,3173, 64, 604, # 2822 -7317,2494,1976,1977, 155,1990, 645, 641,1606,7318,3405, 337, 72, 406,7319, 80, # 2838 - 630, 238,3174,1509, 263, 939,1092,2644, 756,1440,1094,3406, 449, 69,2969, 591, # 2854 - 179,2095, 471, 115,2034,1843, 60, 50,2970, 134, 806,1868, 734,2035,3407, 180, # 2870 - 995,1607, 156, 537,2893, 688,7320, 319,1305, 779,2144, 514,2374, 298,4298, 359, # 2886 -2495, 90,2707,1338, 663, 11, 906,1099,2545, 20,2436, 182, 532,1716,7321, 732, # 2902 -1376,4062,1311,1420,3175, 25,2312,1056, 113, 399, 382,1949, 242,3408,2467, 529, # 2918 -3243, 475,1447,3617,7322, 117, 21, 656, 810,1297,2295,2329,3502,7323, 126,4063, # 2934 - 706, 456, 150, 613,4299, 71,1118,2036,4064, 145,3069, 85, 835, 486,2114,1246, # 2950 -1426, 428, 727,1285,1015, 800, 106, 623, 303,1281,7324,2127,2354, 347,3736, 221, # 2966 -3503,3110,7325,1955,1153,4065, 83, 296,1199,3070, 192, 624, 93,7326, 822,1897, # 2982 -2810,3111, 795,2064, 991,1554,1542,1592, 27, 43,2853, 859, 139,1456, 860,4300, # 2998 - 437, 712,3871, 164,2392,3112, 695, 211,3017,2096, 195,3872,1608,3504,3505,3618, # 3014 -3873, 234, 811,2971,2097,3874,2229,1441,3506,1615,2375, 668,2076,1638, 305, 228, # 3030 -1664,4301, 467, 415,7327, 262,2098,1593, 239, 108, 300, 200,1033, 512,1247,2077, # 3046 -7328,7329,2173,3176,3619,2673, 593, 845,1062,3244, 88,1723,2037,3875,1950, 212, # 3062 - 266, 152, 149, 468,1898,4066,4302, 77, 187,7330,3018, 37, 5,2972,7331,3876, # 3078 -7332,7333, 39,2517,4303,2894,3177,2078, 55, 148, 74,4304, 545, 483,1474,1029, # 3094 -1665, 217,1869,1531,3113,1104,2645,4067, 24, 172,3507, 900,3877,3508,3509,4305, # 3110 - 32,1408,2811,1312, 329, 487,2355,2247,2708, 784,2674, 4,3019,3314,1427,1788, # 3126 - 188, 109, 499,7334,3620,1717,1789, 888,1217,3020,4306,7335,3510,7336,3315,1520, # 3142 -3621,3878, 196,1034, 775,7337,7338, 929,1815, 249, 439, 38,7339,1063,7340, 794, # 3158 -3879,1435,2296, 46, 178,3245,2065,7341,2376,7342, 214,1709,4307, 804, 35, 707, # 3174 - 324,3622,1601,2546, 140, 459,4068,7343,7344,1365, 839, 272, 978,2257,2572,3409, # 3190 -2128,1363,3623,1423, 697, 100,3071, 48, 70,1231, 495,3114,2193,7345,1294,7346, # 3206 -2079, 462, 586,1042,3246, 853, 256, 988, 185,2377,3410,1698, 434,1084,7347,3411, # 3222 - 314,2615,2775,4308,2330,2331, 569,2280, 637,1816,2518, 757,1162,1878,1616,3412, # 3238 - 287,1577,2115, 768,4309,1671,2854,3511,2519,1321,3737, 909,2413,7348,4069, 933, # 3254 -3738,7349,2052,2356,1222,4310, 765,2414,1322, 786,4311,7350,1919,1462,1677,2895, # 3270 -1699,7351,4312,1424,2437,3115,3624,2590,3316,1774,1940,3413,3880,4070, 309,1369, # 3286 -1130,2812, 364,2230,1653,1299,3881,3512,3882,3883,2646, 525,1085,3021, 902,2000, # 3302 -1475, 964,4313, 421,1844,1415,1057,2281, 940,1364,3116, 376,4314,4315,1381, 7, # 3318 -2520, 983,2378, 336,1710,2675,1845, 321,3414, 559,1131,3022,2742,1808,1132,1313, # 3334 - 265,1481,1857,7352, 352,1203,2813,3247, 167,1089, 420,2814, 776, 792,1724,3513, # 3350 -4071,2438,3248,7353,4072,7354, 446, 229, 333,2743, 901,3739,1200,1557,4316,2647, # 3366 -1920, 395,2744,2676,3740,4073,1835, 125, 916,3178,2616,4317,7355,7356,3741,7357, # 3382 -7358,7359,4318,3117,3625,1133,2547,1757,3415,1510,2313,1409,3514,7360,2145, 438, # 3398 -2591,2896,2379,3317,1068, 958,3023, 461, 311,2855,2677,4074,1915,3179,4075,1978, # 3414 - 383, 750,2745,2617,4076, 274, 539, 385,1278,1442,7361,1154,1964, 384, 561, 210, # 3430 - 98,1295,2548,3515,7362,1711,2415,1482,3416,3884,2897,1257, 129,7363,3742, 642, # 3446 - 523,2776,2777,2648,7364, 141,2231,1333, 68, 176, 441, 876, 907,4077, 603,2592, # 3462 - 710, 171,3417, 404, 549, 18,3118,2393,1410,3626,1666,7365,3516,4319,2898,4320, # 3478 -7366,2973, 368,7367, 146, 366, 99, 871,3627,1543, 748, 807,1586,1185, 22,2258, # 3494 - 379,3743,3180,7368,3181, 505,1941,2618,1991,1382,2314,7369, 380,2357, 218, 702, # 3510 -1817,1248,3418,3024,3517,3318,3249,7370,2974,3628, 930,3250,3744,7371, 59,7372, # 3526 - 585, 601,4078, 497,3419,1112,1314,4321,1801,7373,1223,1472,2174,7374, 749,1836, # 3542 - 690,1899,3745,1772,3885,1476, 429,1043,1790,2232,2116, 917,4079, 447,1086,1629, # 3558 -7375, 556,7376,7377,2020,1654, 844,1090, 105, 550, 966,1758,2815,1008,1782, 686, # 3574 -1095,7378,2282, 793,1602,7379,3518,2593,4322,4080,2933,2297,4323,3746, 980,2496, # 3590 - 544, 353, 527,4324, 908,2678,2899,7380, 381,2619,1942,1348,7381,1341,1252, 560, # 3606 -3072,7382,3420,2856,7383,2053, 973, 886,2080, 143,4325,7384,7385, 157,3886, 496, # 3622 -4081, 57, 840, 540,2038,4326,4327,3421,2117,1445, 970,2259,1748,1965,2081,4082, # 3638 -3119,1234,1775,3251,2816,3629, 773,1206,2129,1066,2039,1326,3887,1738,1725,4083, # 3654 - 279,3120, 51,1544,2594, 423,1578,2130,2066, 173,4328,1879,7386,7387,1583, 264, # 3670 - 610,3630,4329,2439, 280, 154,7388,7389,7390,1739, 338,1282,3073, 693,2857,1411, # 3686 -1074,3747,2440,7391,4330,7392,7393,1240, 952,2394,7394,2900,1538,2679, 685,1483, # 3702 -4084,2468,1436, 953,4085,2054,4331, 671,2395, 79,4086,2441,3252, 608, 567,2680, # 3718 -3422,4087,4088,1691, 393,1261,1791,2396,7395,4332,7396,7397,7398,7399,1383,1672, # 3734 -3748,3182,1464, 522,1119, 661,1150, 216, 675,4333,3888,1432,3519, 609,4334,2681, # 3750 -2397,7400,7401,7402,4089,3025, 0,7403,2469, 315, 231,2442, 301,3319,4335,2380, # 3766 -7404, 233,4090,3631,1818,4336,4337,7405, 96,1776,1315,2082,7406, 257,7407,1809, # 3782 -3632,2709,1139,1819,4091,2021,1124,2163,2778,1777,2649,7408,3074, 363,1655,3183, # 3798 -7409,2975,7410,7411,7412,3889,1567,3890, 718, 103,3184, 849,1443, 341,3320,2934, # 3814 -1484,7413,1712, 127, 67, 339,4092,2398, 679,1412, 821,7414,7415, 834, 738, 351, # 3830 -2976,2146, 846, 235,1497,1880, 418,1992,3749,2710, 186,1100,2147,2746,3520,1545, # 3846 -1355,2935,2858,1377, 583,3891,4093,2573,2977,7416,1298,3633,1078,2549,3634,2358, # 3862 - 78,3750,3751, 267,1289,2099,2001,1594,4094, 348, 369,1274,2194,2175,1837,4338, # 3878 -1820,2817,3635,2747,2283,2002,4339,2936,2748, 144,3321, 882,4340,3892,2749,3423, # 3894 -4341,2901,7417,4095,1726, 320,7418,3893,3026, 788,2978,7419,2818,1773,1327,2859, # 3910 -3894,2819,7420,1306,4342,2003,1700,3752,3521,2359,2650, 787,2022, 506, 824,3636, # 3926 - 534, 323,4343,1044,3322,2023,1900, 946,3424,7421,1778,1500,1678,7422,1881,4344, # 3942 - 165, 243,4345,3637,2521, 123, 683,4096, 764,4346, 36,3895,1792, 589,2902, 816, # 3958 - 626,1667,3027,2233,1639,1555,1622,3753,3896,7423,3897,2860,1370,1228,1932, 891, # 3974 -2083,2903, 304,4097,7424, 292,2979,2711,3522, 691,2100,4098,1115,4347, 118, 662, # 3990 -7425, 611,1156, 854,2381,1316,2861, 2, 386, 515,2904,7426,7427,3253, 868,2234, # 4006 -1486, 855,2651, 785,2212,3028,7428,1040,3185,3523,7429,3121, 448,7430,1525,7431, # 4022 -2164,4348,7432,3754,7433,4099,2820,3524,3122, 503, 818,3898,3123,1568, 814, 676, # 4038 -1444, 306,1749,7434,3755,1416,1030, 197,1428, 805,2821,1501,4349,7435,7436,7437, # 4054 -1993,7438,4350,7439,7440,2195, 13,2779,3638,2980,3124,1229,1916,7441,3756,2131, # 4070 -7442,4100,4351,2399,3525,7443,2213,1511,1727,1120,7444,7445, 646,3757,2443, 307, # 4086 -7446,7447,1595,3186,7448,7449,7450,3639,1113,1356,3899,1465,2522,2523,7451, 519, # 4102 -7452, 128,2132, 92,2284,1979,7453,3900,1512, 342,3125,2196,7454,2780,2214,1980, # 4118 -3323,7455, 290,1656,1317, 789, 827,2360,7456,3758,4352, 562, 581,3901,7457, 401, # 4134 -4353,2248, 94,4354,1399,2781,7458,1463,2024,4355,3187,1943,7459, 828,1105,4101, # 4150 -1262,1394,7460,4102, 605,4356,7461,1783,2862,7462,2822, 819,2101, 578,2197,2937, # 4166 -7463,1502, 436,3254,4103,3255,2823,3902,2905,3425,3426,7464,2712,2315,7465,7466, # 4182 -2332,2067, 23,4357, 193, 826,3759,2102, 699,1630,4104,3075, 390,1793,1064,3526, # 4198 -7467,1579,3076,3077,1400,7468,4105,1838,1640,2863,7469,4358,4359, 137,4106, 598, # 4214 -3078,1966, 780, 104, 974,2938,7470, 278, 899, 253, 402, 572, 504, 493,1339,7471, # 4230 -3903,1275,4360,2574,2550,7472,3640,3029,3079,2249, 565,1334,2713, 863, 41,7473, # 4246 -7474,4361,7475,1657,2333, 19, 463,2750,4107, 606,7476,2981,3256,1087,2084,1323, # 4262 -2652,2982,7477,1631,1623,1750,4108,2682,7478,2864, 791,2714,2653,2334, 232,2416, # 4278 -7479,2983,1498,7480,2654,2620, 755,1366,3641,3257,3126,2025,1609, 119,1917,3427, # 4294 - 862,1026,4109,7481,3904,3760,4362,3905,4363,2260,1951,2470,7482,1125, 817,4110, # 4310 -4111,3906,1513,1766,2040,1487,4112,3030,3258,2824,3761,3127,7483,7484,1507,7485, # 4326 -2683, 733, 40,1632,1106,2865, 345,4113, 841,2524, 230,4364,2984,1846,3259,3428, # 4342 -7486,1263, 986,3429,7487, 735, 879, 254,1137, 857, 622,1300,1180,1388,1562,3907, # 4358 -3908,2939, 967,2751,2655,1349, 592,2133,1692,3324,2985,1994,4114,1679,3909,1901, # 4374 -2185,7488, 739,3642,2715,1296,1290,7489,4115,2198,2199,1921,1563,2595,2551,1870, # 4390 -2752,2986,7490, 435,7491, 343,1108, 596, 17,1751,4365,2235,3430,3643,7492,4366, # 4406 - 294,3527,2940,1693, 477, 979, 281,2041,3528, 643,2042,3644,2621,2782,2261,1031, # 4422 -2335,2134,2298,3529,4367, 367,1249,2552,7493,3530,7494,4368,1283,3325,2004, 240, # 4438 -1762,3326,4369,4370, 836,1069,3128, 474,7495,2148,2525, 268,3531,7496,3188,1521, # 4454 -1284,7497,1658,1546,4116,7498,3532,3533,7499,4117,3327,2684,1685,4118, 961,1673, # 4470 -2622, 190,2005,2200,3762,4371,4372,7500, 570,2497,3645,1490,7501,4373,2623,3260, # 4486 -1956,4374, 584,1514, 396,1045,1944,7502,4375,1967,2444,7503,7504,4376,3910, 619, # 4502 -7505,3129,3261, 215,2006,2783,2553,3189,4377,3190,4378, 763,4119,3763,4379,7506, # 4518 -7507,1957,1767,2941,3328,3646,1174, 452,1477,4380,3329,3130,7508,2825,1253,2382, # 4534 -2186,1091,2285,4120, 492,7509, 638,1169,1824,2135,1752,3911, 648, 926,1021,1324, # 4550 -4381, 520,4382, 997, 847,1007, 892,4383,3764,2262,1871,3647,7510,2400,1784,4384, # 4566 -1952,2942,3080,3191,1728,4121,2043,3648,4385,2007,1701,3131,1551, 30,2263,4122, # 4582 -7511,2026,4386,3534,7512, 501,7513,4123, 594,3431,2165,1821,3535,3432,3536,3192, # 4598 - 829,2826,4124,7514,1680,3132,1225,4125,7515,3262,4387,4126,3133,2336,7516,4388, # 4614 -4127,7517,3912,3913,7518,1847,2383,2596,3330,7519,4389, 374,3914, 652,4128,4129, # 4630 - 375,1140, 798,7520,7521,7522,2361,4390,2264, 546,1659, 138,3031,2445,4391,7523, # 4646 -2250, 612,1848, 910, 796,3765,1740,1371, 825,3766,3767,7524,2906,2554,7525, 692, # 4662 - 444,3032,2624, 801,4392,4130,7526,1491, 244,1053,3033,4131,4132, 340,7527,3915, # 4678 -1041,2987, 293,1168, 87,1357,7528,1539, 959,7529,2236, 721, 694,4133,3768, 219, # 4694 -1478, 644,1417,3331,2656,1413,1401,1335,1389,3916,7530,7531,2988,2362,3134,1825, # 4710 - 730,1515, 184,2827, 66,4393,7532,1660,2943, 246,3332, 378,1457, 226,3433, 975, # 4726 -3917,2944,1264,3537, 674, 696,7533, 163,7534,1141,2417,2166, 713,3538,3333,4394, # 4742 -3918,7535,7536,1186, 15,7537,1079,1070,7538,1522,3193,3539, 276,1050,2716, 758, # 4758 -1126, 653,2945,3263,7539,2337, 889,3540,3919,3081,2989, 903,1250,4395,3920,3434, # 4774 -3541,1342,1681,1718, 766,3264, 286, 89,2946,3649,7540,1713,7541,2597,3334,2990, # 4790 -7542,2947,2215,3194,2866,7543,4396,2498,2526, 181, 387,1075,3921, 731,2187,3335, # 4806 -7544,3265, 310, 313,3435,2299, 770,4134, 54,3034, 189,4397,3082,3769,3922,7545, # 4822 -1230,1617,1849, 355,3542,4135,4398,3336, 111,4136,3650,1350,3135,3436,3035,4137, # 4838 -2149,3266,3543,7546,2784,3923,3924,2991, 722,2008,7547,1071, 247,1207,2338,2471, # 4854 -1378,4399,2009, 864,1437,1214,4400, 373,3770,1142,2216, 667,4401, 442,2753,2555, # 4870 -3771,3925,1968,4138,3267,1839, 837, 170,1107, 934,1336,1882,7548,7549,2118,4139, # 4886 -2828, 743,1569,7550,4402,4140, 582,2384,1418,3437,7551,1802,7552, 357,1395,1729, # 4902 -3651,3268,2418,1564,2237,7553,3083,3772,1633,4403,1114,2085,4141,1532,7554, 482, # 4918 -2446,4404,7555,7556,1492, 833,1466,7557,2717,3544,1641,2829,7558,1526,1272,3652, # 4934 -4142,1686,1794, 416,2556,1902,1953,1803,7559,3773,2785,3774,1159,2316,7560,2867, # 4950 -4405,1610,1584,3036,2419,2754, 443,3269,1163,3136,7561,7562,3926,7563,4143,2499, # 4966 -3037,4406,3927,3137,2103,1647,3545,2010,1872,4144,7564,4145, 431,3438,7565, 250, # 4982 - 97, 81,4146,7566,1648,1850,1558, 160, 848,7567, 866, 740,1694,7568,2201,2830, # 4998 -3195,4147,4407,3653,1687, 950,2472, 426, 469,3196,3654,3655,3928,7569,7570,1188, # 5014 - 424,1995, 861,3546,4148,3775,2202,2685, 168,1235,3547,4149,7571,2086,1674,4408, # 5030 -3337,3270, 220,2557,1009,7572,3776, 670,2992, 332,1208, 717,7573,7574,3548,2447, # 5046 -3929,3338,7575, 513,7576,1209,2868,3339,3138,4409,1080,7577,7578,7579,7580,2527, # 5062 -3656,3549, 815,1587,3930,3931,7581,3550,3439,3777,1254,4410,1328,3038,1390,3932, # 5078 -1741,3933,3778,3934,7582, 236,3779,2448,3271,7583,7584,3657,3780,1273,3781,4411, # 5094 -7585, 308,7586,4412, 245,4413,1851,2473,1307,2575, 430, 715,2136,2449,7587, 270, # 5110 - 199,2869,3935,7588,3551,2718,1753, 761,1754, 725,1661,1840,4414,3440,3658,7589, # 5126 -7590, 587, 14,3272, 227,2598, 326, 480,2265, 943,2755,3552, 291, 650,1883,7591, # 5142 -1702,1226, 102,1547, 62,3441, 904,4415,3442,1164,4150,7592,7593,1224,1548,2756, # 5158 - 391, 498,1493,7594,1386,1419,7595,2055,1177,4416, 813, 880,1081,2363, 566,1145, # 5174 -4417,2286,1001,1035,2558,2599,2238, 394,1286,7596,7597,2068,7598, 86,1494,1730, # 5190 -3936, 491,1588, 745, 897,2948, 843,3340,3937,2757,2870,3273,1768, 998,2217,2069, # 5206 - 397,1826,1195,1969,3659,2993,3341, 284,7599,3782,2500,2137,2119,1903,7600,3938, # 5222 -2150,3939,4151,1036,3443,1904, 114,2559,4152, 209,1527,7601,7602,2949,2831,2625, # 5238 -2385,2719,3139, 812,2560,7603,3274,7604,1559, 737,1884,3660,1210, 885, 28,2686, # 5254 -3553,3783,7605,4153,1004,1779,4418,7606, 346,1981,2218,2687,4419,3784,1742, 797, # 5270 -1642,3940,1933,1072,1384,2151, 896,3941,3275,3661,3197,2871,3554,7607,2561,1958, # 5286 -4420,2450,1785,7608,7609,7610,3942,4154,1005,1308,3662,4155,2720,4421,4422,1528, # 5302 -2600, 161,1178,4156,1982, 987,4423,1101,4157, 631,3943,1157,3198,2420,1343,1241, # 5318 -1016,2239,2562, 372, 877,2339,2501,1160, 555,1934, 911,3944,7611, 466,1170, 169, # 5334 -1051,2907,2688,3663,2474,2994,1182,2011,2563,1251,2626,7612, 992,2340,3444,1540, # 5350 -2721,1201,2070,2401,1996,2475,7613,4424, 528,1922,2188,1503,1873,1570,2364,3342, # 5366 -3276,7614, 557,1073,7615,1827,3445,2087,2266,3140,3039,3084, 767,3085,2786,4425, # 5382 -1006,4158,4426,2341,1267,2176,3664,3199, 778,3945,3200,2722,1597,2657,7616,4427, # 5398 -7617,3446,7618,7619,7620,3277,2689,1433,3278, 131, 95,1504,3946, 723,4159,3141, # 5414 -1841,3555,2758,2189,3947,2027,2104,3665,7621,2995,3948,1218,7622,3343,3201,3949, # 5430 -4160,2576, 248,1634,3785, 912,7623,2832,3666,3040,3786, 654, 53,7624,2996,7625, # 5446 -1688,4428, 777,3447,1032,3950,1425,7626, 191, 820,2120,2833, 971,4429, 931,3202, # 5462 - 135, 664, 783,3787,1997, 772,2908,1935,3951,3788,4430,2909,3203, 282,2723, 640, # 5478 -1372,3448,1127, 922, 325,3344,7627,7628, 711,2044,7629,7630,3952,2219,2787,1936, # 5494 -3953,3345,2220,2251,3789,2300,7631,4431,3790,1258,3279,3954,3204,2138,2950,3955, # 5510 -3956,7632,2221, 258,3205,4432, 101,1227,7633,3280,1755,7634,1391,3281,7635,2910, # 5526 -2056, 893,7636,7637,7638,1402,4161,2342,7639,7640,3206,3556,7641,7642, 878,1325, # 5542 -1780,2788,4433, 259,1385,2577, 744,1183,2267,4434,7643,3957,2502,7644, 684,1024, # 5558 -4162,7645, 472,3557,3449,1165,3282,3958,3959, 322,2152, 881, 455,1695,1152,1340, # 5574 - 660, 554,2153,4435,1058,4436,4163, 830,1065,3346,3960,4437,1923,7646,1703,1918, # 5590 -7647, 932,2268, 122,7648,4438, 947, 677,7649,3791,2627, 297,1905,1924,2269,4439, # 5606 -2317,3283,7650,7651,4164,7652,4165, 84,4166, 112, 989,7653, 547,1059,3961, 701, # 5622 -3558,1019,7654,4167,7655,3450, 942, 639, 457,2301,2451, 993,2951, 407, 851, 494, # 5638 -4440,3347, 927,7656,1237,7657,2421,3348, 573,4168, 680, 921,2911,1279,1874, 285, # 5654 - 790,1448,1983, 719,2167,7658,7659,4441,3962,3963,1649,7660,1541, 563,7661,1077, # 5670 -7662,3349,3041,3451, 511,2997,3964,3965,3667,3966,1268,2564,3350,3207,4442,4443, # 5686 -7663, 535,1048,1276,1189,2912,2028,3142,1438,1373,2834,2952,1134,2012,7664,4169, # 5702 -1238,2578,3086,1259,7665, 700,7666,2953,3143,3668,4170,7667,4171,1146,1875,1906, # 5718 -4444,2601,3967, 781,2422, 132,1589, 203, 147, 273,2789,2402, 898,1786,2154,3968, # 5734 -3969,7668,3792,2790,7669,7670,4445,4446,7671,3208,7672,1635,3793, 965,7673,1804, # 5750 -2690,1516,3559,1121,1082,1329,3284,3970,1449,3794, 65,1128,2835,2913,2759,1590, # 5766 -3795,7674,7675, 12,2658, 45, 976,2579,3144,4447, 517,2528,1013,1037,3209,7676, # 5782 -3796,2836,7677,3797,7678,3452,7679,2602, 614,1998,2318,3798,3087,2724,2628,7680, # 5798 -2580,4172, 599,1269,7681,1810,3669,7682,2691,3088, 759,1060, 489,1805,3351,3285, # 5814 -1358,7683,7684,2386,1387,1215,2629,2252, 490,7685,7686,4173,1759,2387,2343,7687, # 5830 -4448,3799,1907,3971,2630,1806,3210,4449,3453,3286,2760,2344, 874,7688,7689,3454, # 5846 -3670,1858, 91,2914,3671,3042,3800,4450,7690,3145,3972,2659,7691,3455,1202,1403, # 5862 -3801,2954,2529,1517,2503,4451,3456,2504,7692,4452,7693,2692,1885,1495,1731,3973, # 5878 -2365,4453,7694,2029,7695,7696,3974,2693,1216, 237,2581,4174,2319,3975,3802,4454, # 5894 -4455,2694,3560,3457, 445,4456,7697,7698,7699,7700,2761, 61,3976,3672,1822,3977, # 5910 -7701, 687,2045, 935, 925, 405,2660, 703,1096,1859,2725,4457,3978,1876,1367,2695, # 5926 -3352, 918,2105,1781,2476, 334,3287,1611,1093,4458, 564,3146,3458,3673,3353, 945, # 5942 -2631,2057,4459,7702,1925, 872,4175,7703,3459,2696,3089, 349,4176,3674,3979,4460, # 5958 -3803,4177,3675,2155,3980,4461,4462,4178,4463,2403,2046, 782,3981, 400, 251,4179, # 5974 -1624,7704,7705, 277,3676, 299,1265, 476,1191,3804,2121,4180,4181,1109, 205,7706, # 5990 -2582,1000,2156,3561,1860,7707,7708,7709,4464,7710,4465,2565, 107,2477,2157,3982, # 6006 -3460,3147,7711,1533, 541,1301, 158, 753,4182,2872,3562,7712,1696, 370,1088,4183, # 6022 -4466,3563, 579, 327, 440, 162,2240, 269,1937,1374,3461, 968,3043, 56,1396,3090, # 6038 -2106,3288,3354,7713,1926,2158,4467,2998,7714,3564,7715,7716,3677,4468,2478,7717, # 6054 -2791,7718,1650,4469,7719,2603,7720,7721,3983,2661,3355,1149,3356,3984,3805,3985, # 6070 -7722,1076, 49,7723, 951,3211,3289,3290, 450,2837, 920,7724,1811,2792,2366,4184, # 6086 -1908,1138,2367,3806,3462,7725,3212,4470,1909,1147,1518,2423,4471,3807,7726,4472, # 6102 -2388,2604, 260,1795,3213,7727,7728,3808,3291, 708,7729,3565,1704,7730,3566,1351, # 6118 -1618,3357,2999,1886, 944,4185,3358,4186,3044,3359,4187,7731,3678, 422, 413,1714, # 6134 -3292, 500,2058,2345,4188,2479,7732,1344,1910, 954,7733,1668,7734,7735,3986,2404, # 6150 -4189,3567,3809,4190,7736,2302,1318,2505,3091, 133,3092,2873,4473, 629, 31,2838, # 6166 -2697,3810,4474, 850, 949,4475,3987,2955,1732,2088,4191,1496,1852,7737,3988, 620, # 6182 -3214, 981,1242,3679,3360,1619,3680,1643,3293,2139,2452,1970,1719,3463,2168,7738, # 6198 -3215,7739,7740,3361,1828,7741,1277,4476,1565,2047,7742,1636,3568,3093,7743, 869, # 6214 -2839, 655,3811,3812,3094,3989,3000,3813,1310,3569,4477,7744,7745,7746,1733, 558, # 6230 -4478,3681, 335,1549,3045,1756,4192,3682,1945,3464,1829,1291,1192, 470,2726,2107, # 6246 -2793, 913,1054,3990,7747,1027,7748,3046,3991,4479, 982,2662,3362,3148,3465,3216, # 6262 -3217,1946,2794,7749, 571,4480,7750,1830,7751,3570,2583,1523,2424,7752,2089, 984, # 6278 -4481,3683,1959,7753,3684, 852, 923,2795,3466,3685, 969,1519, 999,2048,2320,1705, # 6294 -7754,3095, 615,1662, 151, 597,3992,2405,2321,1049, 275,4482,3686,4193, 568,3687, # 6310 -3571,2480,4194,3688,7755,2425,2270, 409,3218,7756,1566,2874,3467,1002, 769,2840, # 6326 - 194,2090,3149,3689,2222,3294,4195, 628,1505,7757,7758,1763,2177,3001,3993, 521, # 6342 -1161,2584,1787,2203,2406,4483,3994,1625,4196,4197, 412, 42,3096, 464,7759,2632, # 6358 -4484,3363,1760,1571,2875,3468,2530,1219,2204,3814,2633,2140,2368,4485,4486,3295, # 6374 -1651,3364,3572,7760,7761,3573,2481,3469,7762,3690,7763,7764,2271,2091, 460,7765, # 6390 -4487,7766,3002, 962, 588,3574, 289,3219,2634,1116, 52,7767,3047,1796,7768,7769, # 6406 -7770,1467,7771,1598,1143,3691,4198,1984,1734,1067,4488,1280,3365, 465,4489,1572, # 6422 - 510,7772,1927,2241,1812,1644,3575,7773,4490,3692,7774,7775,2663,1573,1534,7776, # 6438 -7777,4199, 536,1807,1761,3470,3815,3150,2635,7778,7779,7780,4491,3471,2915,1911, # 6454 -2796,7781,3296,1122, 377,3220,7782, 360,7783,7784,4200,1529, 551,7785,2059,3693, # 6470 -1769,2426,7786,2916,4201,3297,3097,2322,2108,2030,4492,1404, 136,1468,1479, 672, # 6486 -1171,3221,2303, 271,3151,7787,2762,7788,2049, 678,2727, 865,1947,4493,7789,2013, # 6502 -3995,2956,7790,2728,2223,1397,3048,3694,4494,4495,1735,2917,3366,3576,7791,3816, # 6518 - 509,2841,2453,2876,3817,7792,7793,3152,3153,4496,4202,2531,4497,2304,1166,1010, # 6534 - 552, 681,1887,7794,7795,2957,2958,3996,1287,1596,1861,3154, 358, 453, 736, 175, # 6550 - 478,1117, 905,1167,1097,7796,1853,1530,7797,1706,7798,2178,3472,2287,3695,3473, # 6566 -3577,4203,2092,4204,7799,3367,1193,2482,4205,1458,2190,2205,1862,1888,1421,3298, # 6582 -2918,3049,2179,3474, 595,2122,7800,3997,7801,7802,4206,1707,2636, 223,3696,1359, # 6598 - 751,3098, 183,3475,7803,2797,3003, 419,2369, 633, 704,3818,2389, 241,7804,7805, # 6614 -7806, 838,3004,3697,2272,2763,2454,3819,1938,2050,3998,1309,3099,2242,1181,7807, # 6630 -1136,2206,3820,2370,1446,4207,2305,4498,7808,7809,4208,1055,2605, 484,3698,7810, # 6646 -3999, 625,4209,2273,3368,1499,4210,4000,7811,4001,4211,3222,2274,2275,3476,7812, # 6662 -7813,2764, 808,2606,3699,3369,4002,4212,3100,2532, 526,3370,3821,4213, 955,7814, # 6678 -1620,4214,2637,2427,7815,1429,3700,1669,1831, 994, 928,7816,3578,1260,7817,7818, # 6694 -7819,1948,2288, 741,2919,1626,4215,2729,2455, 867,1184, 362,3371,1392,7820,7821, # 6710 -4003,4216,1770,1736,3223,2920,4499,4500,1928,2698,1459,1158,7822,3050,3372,2877, # 6726 -1292,1929,2506,2842,3701,1985,1187,2071,2014,2607,4217,7823,2566,2507,2169,3702, # 6742 -2483,3299,7824,3703,4501,7825,7826, 666,1003,3005,1022,3579,4218,7827,4502,1813, # 6758 -2253, 574,3822,1603, 295,1535, 705,3823,4219, 283, 858, 417,7828,7829,3224,4503, # 6774 -4504,3051,1220,1889,1046,2276,2456,4004,1393,1599, 689,2567, 388,4220,7830,2484, # 6790 - 802,7831,2798,3824,2060,1405,2254,7832,4505,3825,2109,1052,1345,3225,1585,7833, # 6806 - 809,7834,7835,7836, 575,2730,3477, 956,1552,1469,1144,2323,7837,2324,1560,2457, # 6822 -3580,3226,4005, 616,2207,3155,2180,2289,7838,1832,7839,3478,4506,7840,1319,3704, # 6838 -3705,1211,3581,1023,3227,1293,2799,7841,7842,7843,3826, 607,2306,3827, 762,2878, # 6854 -1439,4221,1360,7844,1485,3052,7845,4507,1038,4222,1450,2061,2638,4223,1379,4508, # 6870 -2585,7846,7847,4224,1352,1414,2325,2921,1172,7848,7849,3828,3829,7850,1797,1451, # 6886 -7851,7852,7853,7854,2922,4006,4007,2485,2346, 411,4008,4009,3582,3300,3101,4509, # 6902 -1561,2664,1452,4010,1375,7855,7856, 47,2959, 316,7857,1406,1591,2923,3156,7858, # 6918 -1025,2141,3102,3157, 354,2731, 884,2224,4225,2407, 508,3706, 726,3583, 996,2428, # 6934 -3584, 729,7859, 392,2191,1453,4011,4510,3707,7860,7861,2458,3585,2608,1675,2800, # 6950 - 919,2347,2960,2348,1270,4511,4012, 73,7862,7863, 647,7864,3228,2843,2255,1550, # 6966 -1346,3006,7865,1332, 883,3479,7866,7867,7868,7869,3301,2765,7870,1212, 831,1347, # 6982 -4226,4512,2326,3830,1863,3053, 720,3831,4513,4514,3832,7871,4227,7872,7873,4515, # 6998 -7874,7875,1798,4516,3708,2609,4517,3586,1645,2371,7876,7877,2924, 669,2208,2665, # 7014 -2429,7878,2879,7879,7880,1028,3229,7881,4228,2408,7882,2256,1353,7883,7884,4518, # 7030 -3158, 518,7885,4013,7886,4229,1960,7887,2142,4230,7888,7889,3007,2349,2350,3833, # 7046 - 516,1833,1454,4014,2699,4231,4519,2225,2610,1971,1129,3587,7890,2766,7891,2961, # 7062 -1422, 577,1470,3008,1524,3373,7892,7893, 432,4232,3054,3480,7894,2586,1455,2508, # 7078 -2226,1972,1175,7895,1020,2732,4015,3481,4520,7896,2733,7897,1743,1361,3055,3482, # 7094 -2639,4016,4233,4521,2290, 895, 924,4234,2170, 331,2243,3056, 166,1627,3057,1098, # 7110 -7898,1232,2880,2227,3374,4522, 657, 403,1196,2372, 542,3709,3375,1600,4235,3483, # 7126 -7899,4523,2767,3230, 576, 530,1362,7900,4524,2533,2666,3710,4017,7901, 842,3834, # 7142 -7902,2801,2031,1014,4018, 213,2700,3376, 665, 621,4236,7903,3711,2925,2430,7904, # 7158 -2431,3302,3588,3377,7905,4237,2534,4238,4525,3589,1682,4239,3484,1380,7906, 724, # 7174 -2277, 600,1670,7907,1337,1233,4526,3103,2244,7908,1621,4527,7909, 651,4240,7910, # 7190 -1612,4241,2611,7911,2844,7912,2734,2307,3058,7913, 716,2459,3059, 174,1255,2701, # 7206 -4019,3590, 548,1320,1398, 728,4020,1574,7914,1890,1197,3060,4021,7915,3061,3062, # 7222 -3712,3591,3713, 747,7916, 635,4242,4528,7917,7918,7919,4243,7920,7921,4529,7922, # 7238 -3378,4530,2432, 451,7923,3714,2535,2072,4244,2735,4245,4022,7924,1764,4531,7925, # 7254 -4246, 350,7926,2278,2390,2486,7927,4247,4023,2245,1434,4024, 488,4532, 458,4248, # 7270 -4025,3715, 771,1330,2391,3835,2568,3159,2159,2409,1553,2667,3160,4249,7928,2487, # 7286 -2881,2612,1720,2702,4250,3379,4533,7929,2536,4251,7930,3231,4252,2768,7931,2015, # 7302 -2736,7932,1155,1017,3716,3836,7933,3303,2308, 201,1864,4253,1430,7934,4026,7935, # 7318 -7936,7937,7938,7939,4254,1604,7940, 414,1865, 371,2587,4534,4535,3485,2016,3104, # 7334 -4536,1708, 960,4255, 887, 389,2171,1536,1663,1721,7941,2228,4027,2351,2926,1580, # 7350 -7942,7943,7944,1744,7945,2537,4537,4538,7946,4539,7947,2073,7948,7949,3592,3380, # 7366 -2882,4256,7950,4257,2640,3381,2802, 673,2703,2460, 709,3486,4028,3593,4258,7951, # 7382 -1148, 502, 634,7952,7953,1204,4540,3594,1575,4541,2613,3717,7954,3718,3105, 948, # 7398 -3232, 121,1745,3837,1110,7955,4259,3063,2509,3009,4029,3719,1151,1771,3838,1488, # 7414 -4030,1986,7956,2433,3487,7957,7958,2093,7959,4260,3839,1213,1407,2803, 531,2737, # 7430 -2538,3233,1011,1537,7960,2769,4261,3106,1061,7961,3720,3721,1866,2883,7962,2017, # 7446 - 120,4262,4263,2062,3595,3234,2309,3840,2668,3382,1954,4542,7963,7964,3488,1047, # 7462 -2704,1266,7965,1368,4543,2845, 649,3383,3841,2539,2738,1102,2846,2669,7966,7967, # 7478 -1999,7968,1111,3596,2962,7969,2488,3842,3597,2804,1854,3384,3722,7970,7971,3385, # 7494 -2410,2884,3304,3235,3598,7972,2569,7973,3599,2805,4031,1460, 856,7974,3600,7975, # 7510 -2885,2963,7976,2886,3843,7977,4264, 632,2510, 875,3844,1697,3845,2291,7978,7979, # 7526 -4544,3010,1239, 580,4545,4265,7980, 914, 936,2074,1190,4032,1039,2123,7981,7982, # 7542 -7983,3386,1473,7984,1354,4266,3846,7985,2172,3064,4033, 915,3305,4267,4268,3306, # 7558 -1605,1834,7986,2739, 398,3601,4269,3847,4034, 328,1912,2847,4035,3848,1331,4270, # 7574 -3011, 937,4271,7987,3602,4036,4037,3387,2160,4546,3388, 524, 742, 538,3065,1012, # 7590 -7988,7989,3849,2461,7990, 658,1103, 225,3850,7991,7992,4547,7993,4548,7994,3236, # 7606 -1243,7995,4038, 963,2246,4549,7996,2705,3603,3161,7997,7998,2588,2327,7999,4550, # 7622 -8000,8001,8002,3489,3307, 957,3389,2540,2032,1930,2927,2462, 870,2018,3604,1746, # 7638 -2770,2771,2434,2463,8003,3851,8004,3723,3107,3724,3490,3390,3725,8005,1179,3066, # 7654 -8006,3162,2373,4272,3726,2541,3163,3108,2740,4039,8007,3391,1556,2542,2292, 977, # 7670 -2887,2033,4040,1205,3392,8008,1765,3393,3164,2124,1271,1689, 714,4551,3491,8009, # 7686 -2328,3852, 533,4273,3605,2181, 617,8010,2464,3308,3492,2310,8011,8012,3165,8013, # 7702 -8014,3853,1987, 618, 427,2641,3493,3394,8015,8016,1244,1690,8017,2806,4274,4552, # 7718 -8018,3494,8019,8020,2279,1576, 473,3606,4275,3395, 972,8021,3607,8022,3067,8023, # 7734 -8024,4553,4554,8025,3727,4041,4042,8026, 153,4555, 356,8027,1891,2888,4276,2143, # 7750 - 408, 803,2352,8028,3854,8029,4277,1646,2570,2511,4556,4557,3855,8030,3856,4278, # 7766 -8031,2411,3396, 752,8032,8033,1961,2964,8034, 746,3012,2465,8035,4279,3728, 698, # 7782 -4558,1892,4280,3608,2543,4559,3609,3857,8036,3166,3397,8037,1823,1302,4043,2706, # 7798 -3858,1973,4281,8038,4282,3167, 823,1303,1288,1236,2848,3495,4044,3398, 774,3859, # 7814 -8039,1581,4560,1304,2849,3860,4561,8040,2435,2161,1083,3237,4283,4045,4284, 344, # 7830 -1173, 288,2311, 454,1683,8041,8042,1461,4562,4046,2589,8043,8044,4563, 985, 894, # 7846 -8045,3399,3168,8046,1913,2928,3729,1988,8047,2110,1974,8048,4047,8049,2571,1194, # 7862 - 425,8050,4564,3169,1245,3730,4285,8051,8052,2850,8053, 636,4565,1855,3861, 760, # 7878 -1799,8054,4286,2209,1508,4566,4048,1893,1684,2293,8055,8056,8057,4287,4288,2210, # 7894 - 479,8058,8059, 832,8060,4049,2489,8061,2965,2490,3731, 990,3109, 627,1814,2642, # 7910 -4289,1582,4290,2125,2111,3496,4567,8062, 799,4291,3170,8063,4568,2112,1737,3013, # 7926 -1018, 543, 754,4292,3309,1676,4569,4570,4050,8064,1489,8065,3497,8066,2614,2889, # 7942 -4051,8067,8068,2966,8069,8070,8071,8072,3171,4571,4572,2182,1722,8073,3238,3239, # 7958 -1842,3610,1715, 481, 365,1975,1856,8074,8075,1962,2491,4573,8076,2126,3611,3240, # 7974 - 433,1894,2063,2075,8077, 602,2741,8078,8079,8080,8081,8082,3014,1628,3400,8083, # 7990 -3172,4574,4052,2890,4575,2512,8084,2544,2772,8085,8086,8087,3310,4576,2891,8088, # 8006 -4577,8089,2851,4578,4579,1221,2967,4053,2513,8090,8091,8092,1867,1989,8093,8094, # 8022 -8095,1895,8096,8097,4580,1896,4054, 318,8098,2094,4055,4293,8099,8100, 485,8101, # 8038 - 938,3862, 553,2670, 116,8102,3863,3612,8103,3498,2671,2773,3401,3311,2807,8104, # 8054 -3613,2929,4056,1747,2930,2968,8105,8106, 207,8107,8108,2672,4581,2514,8109,3015, # 8070 - 890,3614,3864,8110,1877,3732,3402,8111,2183,2353,3403,1652,8112,8113,8114, 941, # 8086 -2294, 208,3499,4057,2019, 330,4294,3865,2892,2492,3733,4295,8115,8116,8117,8118, # 8102 + 1, 1800, 1506, 255, 1431, 198, 9, 82, 6, 7310, 177, 202, 3615, 1256, 2808, 110, # 2742 + 3735, 33, 3241, 261, 76, 44, 2113, 16, 2931, 2184, 1176, 659, 3868, 26, 3404, 2643, # 2758 + 1198, 3869, 3313, 4060, 410, 2211, 302, 590, 361, 1963, 8, 204, 58, 4296, 7311, 1931, # 2774 + 63, 7312, 7313, 317, 1614, 75, 222, 159, 4061, 2412, 1480, 7314, 3500, 3068, 224, 2809, # 2790 + 3616, 3, 10, 3870, 1471, 29, 2774, 1135, 2852, 1939, 873, 130, 3242, 1123, 312, 7315, # 2806 + 4297, 2051, 507, 252, 682, 7316, 142, 1914, 124, 206, 2932, 34, 3501, 3173, 64, 604, # 2822 + 7317, 2494, 1976, 1977, 155, 1990, 645, 641, 1606, 7318, 3405, 337, 72, 406, 7319, 80, # 2838 + 630, 238, 3174, 1509, 263, 939, 1092, 2644, 756, 1440, 1094, 3406, 449, 69, 2969, 591, # 2854 + 179, 2095, 471, 115, 2034, 1843, 60, 50, 2970, 134, 806, 1868, 734, 2035, 3407, 180, # 2870 + 995, 1607, 156, 537, 2893, 688, 7320, 319, 1305, 779, 2144, 514, 2374, 298, 4298, 359, # 2886 + 2495, 90, 2707, 1338, 663, 11, 906, 1099, 2545, 20, 2436, 182, 532, 1716, 7321, 732, # 2902 + 1376, 4062, 1311, 1420, 3175, 25, 2312, 1056, 113, 399, 382, 1949, 242, 3408, 2467, 529, # 2918 + 3243, 475, 1447, 3617, 7322, 117, 21, 656, 810, 1297, 2295, 2329, 3502, 7323, 126, 4063, # 2934 + 706, 456, 150, 613, 4299, 71, 1118, 2036, 4064, 145, 3069, 85, 835, 486, 2114, 1246, # 2950 + 1426, 428, 727, 1285, 1015, 800, 106, 623, 303, 1281, 7324, 2127, 2354, 347, 3736, 221, # 2966 + 3503, 3110, 7325, 1955, 1153, 4065, 83, 296, 1199, 3070, 192, 624, 93, 7326, 822, 1897, # 2982 + 2810, 3111, 795, 2064, 991, 1554, 1542, 1592, 27, 43, 2853, 859, 139, 1456, 860, 4300, # 2998 + 437, 712, 3871, 164, 2392, 3112, 695, 211, 3017, 2096, 195, 3872, 1608, 3504, 3505, 3618, # 3014 + 3873, 234, 811, 2971, 2097, 3874, 2229, 1441, 3506, 1615, 2375, 668, 2076, 1638, 305, 228, # 3030 + 1664, 4301, 467, 415, 7327, 262, 2098, 1593, 239, 108, 300, 200, 1033, 512, 1247, 2077, # 3046 + 7328, 7329, 2173, 3176, 3619, 2673, 593, 845, 1062, 3244, 88, 1723, 2037, 3875, 1950, 212, # 3062 + 266, 152, 149, 468, 1898, 4066, 4302, 77, 187, 7330, 3018, 37, 5, 2972, 7331, 3876, # 3078 + 7332, 7333, 39, 2517, 4303, 2894, 3177, 2078, 55, 148, 74, 4304, 545, 483, 1474, 1029, # 3094 + 1665, 217, 1869, 1531, 3113, 1104, 2645, 4067, 24, 172, 3507, 900, 3877, 3508, 3509, 4305, # 3110 + 32, 1408, 2811, 1312, 329, 487, 2355, 2247, 2708, 784, 2674, 4, 3019, 3314, 1427, 1788, # 3126 + 188, 109, 499, 7334, 3620, 1717, 1789, 888, 1217, 3020, 4306, 7335, 3510, 7336, 3315, 1520, # 3142 + 3621, 3878, 196, 1034, 775, 7337, 7338, 929, 1815, 249, 439, 38, 7339, 1063, 7340, 794, # 3158 + 3879, 1435, 2296, 46, 178, 3245, 2065, 7341, 2376, 7342, 214, 1709, 4307, 804, 35, 707, # 3174 + 324, 3622, 1601, 2546, 140, 459, 4068, 7343, 7344, 1365, 839, 272, 978, 2257, 2572, 3409, # 3190 + 2128, 1363, 3623, 1423, 697, 100, 3071, 48, 70, 1231, 495, 3114, 2193, 7345, 1294, 7346, # 3206 + 2079, 462, 586, 1042, 3246, 853, 256, 988, 185, 2377, 3410, 1698, 434, 1084, 7347, 3411, # 3222 + 314, 2615, 2775, 4308, 2330, 2331, 569, 2280, 637, 1816, 2518, 757, 1162, 1878, 1616, 3412, # 3238 + 287, 1577, 2115, 768, 4309, 1671, 2854, 3511, 2519, 1321, 3737, 909, 2413, 7348, 4069, 933, # 3254 + 3738, 7349, 2052, 2356, 1222, 4310, 765, 2414, 1322, 786, 4311, 7350, 1919, 1462, 1677, 2895, # 3270 + 1699, 7351, 4312, 1424, 2437, 3115, 3624, 2590, 3316, 1774, 1940, 3413, 3880, 4070, 309, 1369, # 3286 + 1130, 2812, 364, 2230, 1653, 1299, 3881, 3512, 3882, 3883, 2646, 525, 1085, 3021, 902, 2000, # 3302 + 1475, 964, 4313, 421, 1844, 1415, 1057, 2281, 940, 1364, 3116, 376, 4314, 4315, 1381, 7, # 3318 + 2520, 983, 2378, 336, 1710, 2675, 1845, 321, 3414, 559, 1131, 3022, 2742, 1808, 1132, 1313, # 3334 + 265, 1481, 1857, 7352, 352, 1203, 2813, 3247, 167, 1089, 420, 2814, 776, 792, 1724, 3513, # 3350 + 4071, 2438, 3248, 7353, 4072, 7354, 446, 229, 333, 2743, 901, 3739, 1200, 1557, 4316, 2647, # 3366 + 1920, 395, 2744, 2676, 3740, 4073, 1835, 125, 916, 3178, 2616, 4317, 7355, 7356, 3741, 7357, # 3382 + 7358, 7359, 4318, 3117, 3625, 1133, 2547, 1757, 3415, 1510, 2313, 1409, 3514, 7360, 2145, 438, # 3398 + 2591, 2896, 2379, 3317, 1068, 958, 3023, 461, 311, 2855, 2677, 4074, 1915, 3179, 4075, 1978, # 3414 + 383, 750, 2745, 2617, 4076, 274, 539, 385, 1278, 1442, 7361, 1154, 1964, 384, 561, 210, # 3430 + 98, 1295, 2548, 3515, 7362, 1711, 2415, 1482, 3416, 3884, 2897, 1257, 129, 7363, 3742, 642, # 3446 + 523, 2776, 2777, 2648, 7364, 141, 2231, 1333, 68, 176, 441, 876, 907, 4077, 603, 2592, # 3462 + 710, 171, 3417, 404, 549, 18, 3118, 2393, 1410, 3626, 1666, 7365, 3516, 4319, 2898, 4320, # 3478 + 7366, 2973, 368, 7367, 146, 366, 99, 871, 3627, 1543, 748, 807, 1586, 1185, 22, 2258, # 3494 + 379, 3743, 3180, 7368, 3181, 505, 1941, 2618, 1991, 1382, 2314, 7369, 380, 2357, 218, 702, # 3510 + 1817, 1248, 3418, 3024, 3517, 3318, 3249, 7370, 2974, 3628, 930, 3250, 3744, 7371, 59, 7372, # 3526 + 585, 601, 4078, 497, 3419, 1112, 1314, 4321, 1801, 7373, 1223, 1472, 2174, 7374, 749, 1836, # 3542 + 690, 1899, 3745, 1772, 3885, 1476, 429, 1043, 1790, 2232, 2116, 917, 4079, 447, 1086, 1629, # 3558 + 7375, 556, 7376, 7377, 2020, 1654, 844, 1090, 105, 550, 966, 1758, 2815, 1008, 1782, 686, # 3574 + 1095, 7378, 2282, 793, 1602, 7379, 3518, 2593, 4322, 4080, 2933, 2297, 4323, 3746, 980, 2496, # 3590 + 544, 353, 527, 4324, 908, 2678, 2899, 7380, 381, 2619, 1942, 1348, 7381, 1341, 1252, 560, # 3606 + 3072, 7382, 3420, 2856, 7383, 2053, 973, 886, 2080, 143, 4325, 7384, 7385, 157, 3886, 496, # 3622 + 4081, 57, 840, 540, 2038, 4326, 4327, 3421, 2117, 1445, 970, 2259, 1748, 1965, 2081, 4082, # 3638 + 3119, 1234, 1775, 3251, 2816, 3629, 773, 1206, 2129, 1066, 2039, 1326, 3887, 1738, 1725, 4083, # 3654 + 279, 3120, 51, 1544, 2594, 423, 1578, 2130, 2066, 173, 4328, 1879, 7386, 7387, 1583, 264, # 3670 + 610, 3630, 4329, 2439, 280, 154, 7388, 7389, 7390, 1739, 338, 1282, 3073, 693, 2857, 1411, # 3686 + 1074, 3747, 2440, 7391, 4330, 7392, 7393, 1240, 952, 2394, 7394, 2900, 1538, 2679, 685, 1483, # 3702 + 4084, 2468, 1436, 953, 4085, 2054, 4331, 671, 2395, 79, 4086, 2441, 3252, 608, 567, 2680, # 3718 + 3422, 4087, 4088, 1691, 393, 1261, 1791, 2396, 7395, 4332, 7396, 7397, 7398, 7399, 1383, 1672, # 3734 + 3748, 3182, 1464, 522, 1119, 661, 1150, 216, 675, 4333, 3888, 1432, 3519, 609, 4334, 2681, # 3750 + 2397, 7400, 7401, 7402, 4089, 3025, 0, 7403, 2469, 315, 231, 2442, 301, 3319, 4335, 2380, # 3766 + 7404, 233, 4090, 3631, 1818, 4336, 4337, 7405, 96, 1776, 1315, 2082, 7406, 257, 7407, 1809, # 3782 + 3632, 2709, 1139, 1819, 4091, 2021, 1124, 2163, 2778, 1777, 2649, 7408, 3074, 363, 1655, 3183, # 3798 + 7409, 2975, 7410, 7411, 7412, 3889, 1567, 3890, 718, 103, 3184, 849, 1443, 341, 3320, 2934, # 3814 + 1484, 7413, 1712, 127, 67, 339, 4092, 2398, 679, 1412, 821, 7414, 7415, 834, 738, 351, # 3830 + 2976, 2146, 846, 235, 1497, 1880, 418, 1992, 3749, 2710, 186, 1100, 2147, 2746, 3520, 1545, # 3846 + 1355, 2935, 2858, 1377, 583, 3891, 4093, 2573, 2977, 7416, 1298, 3633, 1078, 2549, 3634, 2358, # 3862 + 78, 3750, 3751, 267, 1289, 2099, 2001, 1594, 4094, 348, 369, 1274, 2194, 2175, 1837, 4338, # 3878 + 1820, 2817, 3635, 2747, 2283, 2002, 4339, 2936, 2748, 144, 3321, 882, 4340, 3892, 2749, 3423, # 3894 + 4341, 2901, 7417, 4095, 1726, 320, 7418, 3893, 3026, 788, 2978, 7419, 2818, 1773, 1327, 2859, # 3910 + 3894, 2819, 7420, 1306, 4342, 2003, 1700, 3752, 3521, 2359, 2650, 787, 2022, 506, 824, 3636, # 3926 + 534, 323, 4343, 1044, 3322, 2023, 1900, 946, 3424, 7421, 1778, 1500, 1678, 7422, 1881, 4344, # 3942 + 165, 243, 4345, 3637, 2521, 123, 683, 4096, 764, 4346, 36, 3895, 1792, 589, 2902, 816, # 3958 + 626, 1667, 3027, 2233, 1639, 1555, 1622, 3753, 3896, 7423, 3897, 2860, 1370, 1228, 1932, 891, # 3974 + 2083, 2903, 304, 4097, 7424, 292, 2979, 2711, 3522, 691, 2100, 4098, 1115, 4347, 118, 662, # 3990 + 7425, 611, 1156, 854, 2381, 1316, 2861, 2, 386, 515, 2904, 7426, 7427, 3253, 868, 2234, # 4006 + 1486, 855, 2651, 785, 2212, 3028, 7428, 1040, 3185, 3523, 7429, 3121, 448, 7430, 1525, 7431, # 4022 + 2164, 4348, 7432, 3754, 7433, 4099, 2820, 3524, 3122, 503, 818, 3898, 3123, 1568, 814, 676, # 4038 + 1444, 306, 1749, 7434, 3755, 1416, 1030, 197, 1428, 805, 2821, 1501, 4349, 7435, 7436, 7437, # 4054 + 1993, 7438, 4350, 7439, 7440, 2195, 13, 2779, 3638, 2980, 3124, 1229, 1916, 7441, 3756, 2131, # 4070 + 7442, 4100, 4351, 2399, 3525, 7443, 2213, 1511, 1727, 1120, 7444, 7445, 646, 3757, 2443, 307, # 4086 + 7446, 7447, 1595, 3186, 7448, 7449, 7450, 3639, 1113, 1356, 3899, 1465, 2522, 2523, 7451, 519, # 4102 + 7452, 128, 2132, 92, 2284, 1979, 7453, 3900, 1512, 342, 3125, 2196, 7454, 2780, 2214, 1980, # 4118 + 3323, 7455, 290, 1656, 1317, 789, 827, 2360, 7456, 3758, 4352, 562, 581, 3901, 7457, 401, # 4134 + 4353, 2248, 94, 4354, 1399, 2781, 7458, 1463, 2024, 4355, 3187, 1943, 7459, 828, 1105, 4101, # 4150 + 1262, 1394, 7460, 4102, 605, 4356, 7461, 1783, 2862, 7462, 2822, 819, 2101, 578, 2197, 2937, # 4166 + 7463, 1502, 436, 3254, 4103, 3255, 2823, 3902, 2905, 3425, 3426, 7464, 2712, 2315, 7465, 7466, # 4182 + 2332, 2067, 23, 4357, 193, 826, 3759, 2102, 699, 1630, 4104, 3075, 390, 1793, 1064, 3526, # 4198 + 7467, 1579, 3076, 3077, 1400, 7468, 4105, 1838, 1640, 2863, 7469, 4358, 4359, 137, 4106, 598, # 4214 + 3078, 1966, 780, 104, 974, 2938, 7470, 278, 899, 253, 402, 572, 504, 493, 1339, 7471, # 4230 + 3903, 1275, 4360, 2574, 2550, 7472, 3640, 3029, 3079, 2249, 565, 1334, 2713, 863, 41, 7473, # 4246 + 7474, 4361, 7475, 1657, 2333, 19, 463, 2750, 4107, 606, 7476, 2981, 3256, 1087, 2084, 1323, # 4262 + 2652, 2982, 7477, 1631, 1623, 1750, 4108, 2682, 7478, 2864, 791, 2714, 2653, 2334, 232, 2416, # 4278 + 7479, 2983, 1498, 7480, 2654, 2620, 755, 1366, 3641, 3257, 3126, 2025, 1609, 119, 1917, 3427, # 4294 + 862, 1026, 4109, 7481, 3904, 3760, 4362, 3905, 4363, 2260, 1951, 2470, 7482, 1125, 817, 4110, # 4310 + 4111, 3906, 1513, 1766, 2040, 1487, 4112, 3030, 3258, 2824, 3761, 3127, 7483, 7484, 1507, 7485, # 4326 + 2683, 733, 40, 1632, 1106, 2865, 345, 4113, 841, 2524, 230, 4364, 2984, 1846, 3259, 3428, # 4342 + 7486, 1263, 986, 3429, 7487, 735, 879, 254, 1137, 857, 622, 1300, 1180, 1388, 1562, 3907, # 4358 + 3908, 2939, 967, 2751, 2655, 1349, 592, 2133, 1692, 3324, 2985, 1994, 4114, 1679, 3909, 1901, # 4374 + 2185, 7488, 739, 3642, 2715, 1296, 1290, 7489, 4115, 2198, 2199, 1921, 1563, 2595, 2551, 1870, # 4390 + 2752, 2986, 7490, 435, 7491, 343, 1108, 596, 17, 1751, 4365, 2235, 3430, 3643, 7492, 4366, # 4406 + 294, 3527, 2940, 1693, 477, 979, 281, 2041, 3528, 643, 2042, 3644, 2621, 2782, 2261, 1031, # 4422 + 2335, 2134, 2298, 3529, 4367, 367, 1249, 2552, 7493, 3530, 7494, 4368, 1283, 3325, 2004, 240, # 4438 + 1762, 3326, 4369, 4370, 836, 1069, 3128, 474, 7495, 2148, 2525, 268, 3531, 7496, 3188, 1521, # 4454 + 1284, 7497, 1658, 1546, 4116, 7498, 3532, 3533, 7499, 4117, 3327, 2684, 1685, 4118, 961, 1673, # 4470 + 2622, 190, 2005, 2200, 3762, 4371, 4372, 7500, 570, 2497, 3645, 1490, 7501, 4373, 2623, 3260, # 4486 + 1956, 4374, 584, 1514, 396, 1045, 1944, 7502, 4375, 1967, 2444, 7503, 7504, 4376, 3910, 619, # 4502 + 7505, 3129, 3261, 215, 2006, 2783, 2553, 3189, 4377, 3190, 4378, 763, 4119, 3763, 4379, 7506, # 4518 + 7507, 1957, 1767, 2941, 3328, 3646, 1174, 452, 1477, 4380, 3329, 3130, 7508, 2825, 1253, 2382, # 4534 + 2186, 1091, 2285, 4120, 492, 7509, 638, 1169, 1824, 2135, 1752, 3911, 648, 926, 1021, 1324, # 4550 + 4381, 520, 4382, 997, 847, 1007, 892, 4383, 3764, 2262, 1871, 3647, 7510, 2400, 1784, 4384, # 4566 + 1952, 2942, 3080, 3191, 1728, 4121, 2043, 3648, 4385, 2007, 1701, 3131, 1551, 30, 2263, 4122, # 4582 + 7511, 2026, 4386, 3534, 7512, 501, 7513, 4123, 594, 3431, 2165, 1821, 3535, 3432, 3536, 3192, # 4598 + 829, 2826, 4124, 7514, 1680, 3132, 1225, 4125, 7515, 3262, 4387, 4126, 3133, 2336, 7516, 4388, # 4614 + 4127, 7517, 3912, 3913, 7518, 1847, 2383, 2596, 3330, 7519, 4389, 374, 3914, 652, 4128, 4129, # 4630 + 375, 1140, 798, 7520, 7521, 7522, 2361, 4390, 2264, 546, 1659, 138, 3031, 2445, 4391, 7523, # 4646 + 2250, 612, 1848, 910, 796, 3765, 1740, 1371, 825, 3766, 3767, 7524, 2906, 2554, 7525, 692, # 4662 + 444, 3032, 2624, 801, 4392, 4130, 7526, 1491, 244, 1053, 3033, 4131, 4132, 340, 7527, 3915, # 4678 + 1041, 2987, 293, 1168, 87, 1357, 7528, 1539, 959, 7529, 2236, 721, 694, 4133, 3768, 219, # 4694 + 1478, 644, 1417, 3331, 2656, 1413, 1401, 1335, 1389, 3916, 7530, 7531, 2988, 2362, 3134, 1825, # 4710 + 730, 1515, 184, 2827, 66, 4393, 7532, 1660, 2943, 246, 3332, 378, 1457, 226, 3433, 975, # 4726 + 3917, 2944, 1264, 3537, 674, 696, 7533, 163, 7534, 1141, 2417, 2166, 713, 3538, 3333, 4394, # 4742 + 3918, 7535, 7536, 1186, 15, 7537, 1079, 1070, 7538, 1522, 3193, 3539, 276, 1050, 2716, 758, # 4758 + 1126, 653, 2945, 3263, 7539, 2337, 889, 3540, 3919, 3081, 2989, 903, 1250, 4395, 3920, 3434, # 4774 + 3541, 1342, 1681, 1718, 766, 3264, 286, 89, 2946, 3649, 7540, 1713, 7541, 2597, 3334, 2990, # 4790 + 7542, 2947, 2215, 3194, 2866, 7543, 4396, 2498, 2526, 181, 387, 1075, 3921, 731, 2187, 3335, # 4806 + 7544, 3265, 310, 313, 3435, 2299, 770, 4134, 54, 3034, 189, 4397, 3082, 3769, 3922, 7545, # 4822 + 1230, 1617, 1849, 355, 3542, 4135, 4398, 3336, 111, 4136, 3650, 1350, 3135, 3436, 3035, 4137, # 4838 + 2149, 3266, 3543, 7546, 2784, 3923, 3924, 2991, 722, 2008, 7547, 1071, 247, 1207, 2338, 2471, # 4854 + 1378, 4399, 2009, 864, 1437, 1214, 4400, 373, 3770, 1142, 2216, 667, 4401, 442, 2753, 2555, # 4870 + 3771, 3925, 1968, 4138, 3267, 1839, 837, 170, 1107, 934, 1336, 1882, 7548, 7549, 2118, 4139, # 4886 + 2828, 743, 1569, 7550, 4402, 4140, 582, 2384, 1418, 3437, 7551, 1802, 7552, 357, 1395, 1729, # 4902 + 3651, 3268, 2418, 1564, 2237, 7553, 3083, 3772, 1633, 4403, 1114, 2085, 4141, 1532, 7554, 482, # 4918 + 2446, 4404, 7555, 7556, 1492, 833, 1466, 7557, 2717, 3544, 1641, 2829, 7558, 1526, 1272, 3652, # 4934 + 4142, 1686, 1794, 416, 2556, 1902, 1953, 1803, 7559, 3773, 2785, 3774, 1159, 2316, 7560, 2867, # 4950 + 4405, 1610, 1584, 3036, 2419, 2754, 443, 3269, 1163, 3136, 7561, 7562, 3926, 7563, 4143, 2499, # 4966 + 3037, 4406, 3927, 3137, 2103, 1647, 3545, 2010, 1872, 4144, 7564, 4145, 431, 3438, 7565, 250, # 4982 + 97, 81, 4146, 7566, 1648, 1850, 1558, 160, 848, 7567, 866, 740, 1694, 7568, 2201, 2830, # 4998 + 3195, 4147, 4407, 3653, 1687, 950, 2472, 426, 469, 3196, 3654, 3655, 3928, 7569, 7570, 1188, # 5014 + 424, 1995, 861, 3546, 4148, 3775, 2202, 2685, 168, 1235, 3547, 4149, 7571, 2086, 1674, 4408, # 5030 + 3337, 3270, 220, 2557, 1009, 7572, 3776, 670, 2992, 332, 1208, 717, 7573, 7574, 3548, 2447, # 5046 + 3929, 3338, 7575, 513, 7576, 1209, 2868, 3339, 3138, 4409, 1080, 7577, 7578, 7579, 7580, 2527, # 5062 + 3656, 3549, 815, 1587, 3930, 3931, 7581, 3550, 3439, 3777, 1254, 4410, 1328, 3038, 1390, 3932, # 5078 + 1741, 3933, 3778, 3934, 7582, 236, 3779, 2448, 3271, 7583, 7584, 3657, 3780, 1273, 3781, 4411, # 5094 + 7585, 308, 7586, 4412, 245, 4413, 1851, 2473, 1307, 2575, 430, 715, 2136, 2449, 7587, 270, # 5110 + 199, 2869, 3935, 7588, 3551, 2718, 1753, 761, 1754, 725, 1661, 1840, 4414, 3440, 3658, 7589, # 5126 + 7590, 587, 14, 3272, 227, 2598, 326, 480, 2265, 943, 2755, 3552, 291, 650, 1883, 7591, # 5142 + 1702, 1226, 102, 1547, 62, 3441, 904, 4415, 3442, 1164, 4150, 7592, 7593, 1224, 1548, 2756, # 5158 + 391, 498, 1493, 7594, 1386, 1419, 7595, 2055, 1177, 4416, 813, 880, 1081, 2363, 566, 1145, # 5174 + 4417, 2286, 1001, 1035, 2558, 2599, 2238, 394, 1286, 7596, 7597, 2068, 7598, 86, 1494, 1730, # 5190 + 3936, 491, 1588, 745, 897, 2948, 843, 3340, 3937, 2757, 2870, 3273, 1768, 998, 2217, 2069, # 5206 + 397, 1826, 1195, 1969, 3659, 2993, 3341, 284, 7599, 3782, 2500, 2137, 2119, 1903, 7600, 3938, # 5222 + 2150, 3939, 4151, 1036, 3443, 1904, 114, 2559, 4152, 209, 1527, 7601, 7602, 2949, 2831, 2625, # 5238 + 2385, 2719, 3139, 812, 2560, 7603, 3274, 7604, 1559, 737, 1884, 3660, 1210, 885, 28, 2686, # 5254 + 3553, 3783, 7605, 4153, 1004, 1779, 4418, 7606, 346, 1981, 2218, 2687, 4419, 3784, 1742, 797, # 5270 + 1642, 3940, 1933, 1072, 1384, 2151, 896, 3941, 3275, 3661, 3197, 2871, 3554, 7607, 2561, 1958, # 5286 + 4420, 2450, 1785, 7608, 7609, 7610, 3942, 4154, 1005, 1308, 3662, 4155, 2720, 4421, 4422, 1528, # 5302 + 2600, 161, 1178, 4156, 1982, 987, 4423, 1101, 4157, 631, 3943, 1157, 3198, 2420, 1343, 1241, # 5318 + 1016, 2239, 2562, 372, 877, 2339, 2501, 1160, 555, 1934, 911, 3944, 7611, 466, 1170, 169, # 5334 + 1051, 2907, 2688, 3663, 2474, 2994, 1182, 2011, 2563, 1251, 2626, 7612, 992, 2340, 3444, 1540, # 5350 + 2721, 1201, 2070, 2401, 1996, 2475, 7613, 4424, 528, 1922, 2188, 1503, 1873, 1570, 2364, 3342, # 5366 + 3276, 7614, 557, 1073, 7615, 1827, 3445, 2087, 2266, 3140, 3039, 3084, 767, 3085, 2786, 4425, # 5382 + 1006, 4158, 4426, 2341, 1267, 2176, 3664, 3199, 778, 3945, 3200, 2722, 1597, 2657, 7616, 4427, # 5398 + 7617, 3446, 7618, 7619, 7620, 3277, 2689, 1433, 3278, 131, 95, 1504, 3946, 723, 4159, 3141, # 5414 + 1841, 3555, 2758, 2189, 3947, 2027, 2104, 3665, 7621, 2995, 3948, 1218, 7622, 3343, 3201, 3949, # 5430 + 4160, 2576, 248, 1634, 3785, 912, 7623, 2832, 3666, 3040, 3786, 654, 53, 7624, 2996, 7625, # 5446 + 1688, 4428, 777, 3447, 1032, 3950, 1425, 7626, 191, 820, 2120, 2833, 971, 4429, 931, 3202, # 5462 + 135, 664, 783, 3787, 1997, 772, 2908, 1935, 3951, 3788, 4430, 2909, 3203, 282, 2723, 640, # 5478 + 1372, 3448, 1127, 922, 325, 3344, 7627, 7628, 711, 2044, 7629, 7630, 3952, 2219, 2787, 1936, # 5494 + 3953, 3345, 2220, 2251, 3789, 2300, 7631, 4431, 3790, 1258, 3279, 3954, 3204, 2138, 2950, 3955, # 5510 + 3956, 7632, 2221, 258, 3205, 4432, 101, 1227, 7633, 3280, 1755, 7634, 1391, 3281, 7635, 2910, # 5526 + 2056, 893, 7636, 7637, 7638, 1402, 4161, 2342, 7639, 7640, 3206, 3556, 7641, 7642, 878, 1325, # 5542 + 1780, 2788, 4433, 259, 1385, 2577, 744, 1183, 2267, 4434, 7643, 3957, 2502, 7644, 684, 1024, # 5558 + 4162, 7645, 472, 3557, 3449, 1165, 3282, 3958, 3959, 322, 2152, 881, 455, 1695, 1152, 1340, # 5574 + 660, 554, 2153, 4435, 1058, 4436, 4163, 830, 1065, 3346, 3960, 4437, 1923, 7646, 1703, 1918, # 5590 + 7647, 932, 2268, 122, 7648, 4438, 947, 677, 7649, 3791, 2627, 297, 1905, 1924, 2269, 4439, # 5606 + 2317, 3283, 7650, 7651, 4164, 7652, 4165, 84, 4166, 112, 989, 7653, 547, 1059, 3961, 701, # 5622 + 3558, 1019, 7654, 4167, 7655, 3450, 942, 639, 457, 2301, 2451, 993, 2951, 407, 851, 494, # 5638 + 4440, 3347, 927, 7656, 1237, 7657, 2421, 3348, 573, 4168, 680, 921, 2911, 1279, 1874, 285, # 5654 + 790, 1448, 1983, 719, 2167, 7658, 7659, 4441, 3962, 3963, 1649, 7660, 1541, 563, 7661, 1077, # 5670 + 7662, 3349, 3041, 3451, 511, 2997, 3964, 3965, 3667, 3966, 1268, 2564, 3350, 3207, 4442, 4443, # 5686 + 7663, 535, 1048, 1276, 1189, 2912, 2028, 3142, 1438, 1373, 2834, 2952, 1134, 2012, 7664, 4169, # 5702 + 1238, 2578, 3086, 1259, 7665, 700, 7666, 2953, 3143, 3668, 4170, 7667, 4171, 1146, 1875, 1906, # 5718 + 4444, 2601, 3967, 781, 2422, 132, 1589, 203, 147, 273, 2789, 2402, 898, 1786, 2154, 3968, # 5734 + 3969, 7668, 3792, 2790, 7669, 7670, 4445, 4446, 7671, 3208, 7672, 1635, 3793, 965, 7673, 1804, # 5750 + 2690, 1516, 3559, 1121, 1082, 1329, 3284, 3970, 1449, 3794, 65, 1128, 2835, 2913, 2759, 1590, # 5766 + 3795, 7674, 7675, 12, 2658, 45, 976, 2579, 3144, 4447, 517, 2528, 1013, 1037, 3209, 7676, # 5782 + 3796, 2836, 7677, 3797, 7678, 3452, 7679, 2602, 614, 1998, 2318, 3798, 3087, 2724, 2628, 7680, # 5798 + 2580, 4172, 599, 1269, 7681, 1810, 3669, 7682, 2691, 3088, 759, 1060, 489, 1805, 3351, 3285, # 5814 + 1358, 7683, 7684, 2386, 1387, 1215, 2629, 2252, 490, 7685, 7686, 4173, 1759, 2387, 2343, 7687, # 5830 + 4448, 3799, 1907, 3971, 2630, 1806, 3210, 4449, 3453, 3286, 2760, 2344, 874, 7688, 7689, 3454, # 5846 + 3670, 1858, 91, 2914, 3671, 3042, 3800, 4450, 7690, 3145, 3972, 2659, 7691, 3455, 1202, 1403, # 5862 + 3801, 2954, 2529, 1517, 2503, 4451, 3456, 2504, 7692, 4452, 7693, 2692, 1885, 1495, 1731, 3973, # 5878 + 2365, 4453, 7694, 2029, 7695, 7696, 3974, 2693, 1216, 237, 2581, 4174, 2319, 3975, 3802, 4454, # 5894 + 4455, 2694, 3560, 3457, 445, 4456, 7697, 7698, 7699, 7700, 2761, 61, 3976, 3672, 1822, 3977, # 5910 + 7701, 687, 2045, 935, 925, 405, 2660, 703, 1096, 1859, 2725, 4457, 3978, 1876, 1367, 2695, # 5926 + 3352, 918, 2105, 1781, 2476, 334, 3287, 1611, 1093, 4458, 564, 3146, 3458, 3673, 3353, 945, # 5942 + 2631, 2057, 4459, 7702, 1925, 872, 4175, 7703, 3459, 2696, 3089, 349, 4176, 3674, 3979, 4460, # 5958 + 3803, 4177, 3675, 2155, 3980, 4461, 4462, 4178, 4463, 2403, 2046, 782, 3981, 400, 251, 4179, # 5974 + 1624, 7704, 7705, 277, 3676, 299, 1265, 476, 1191, 3804, 2121, 4180, 4181, 1109, 205, 7706, # 5990 + 2582, 1000, 2156, 3561, 1860, 7707, 7708, 7709, 4464, 7710, 4465, 2565, 107, 2477, 2157, 3982, # 6006 + 3460, 3147, 7711, 1533, 541, 1301, 158, 753, 4182, 2872, 3562, 7712, 1696, 370, 1088, 4183, # 6022 + 4466, 3563, 579, 327, 440, 162, 2240, 269, 1937, 1374, 3461, 968, 3043, 56, 1396, 3090, # 6038 + 2106, 3288, 3354, 7713, 1926, 2158, 4467, 2998, 7714, 3564, 7715, 7716, 3677, 4468, 2478, 7717, # 6054 + 2791, 7718, 1650, 4469, 7719, 2603, 7720, 7721, 3983, 2661, 3355, 1149, 3356, 3984, 3805, 3985, # 6070 + 7722, 1076, 49, 7723, 951, 3211, 3289, 3290, 450, 2837, 920, 7724, 1811, 2792, 2366, 4184, # 6086 + 1908, 1138, 2367, 3806, 3462, 7725, 3212, 4470, 1909, 1147, 1518, 2423, 4471, 3807, 7726, 4472, # 6102 + 2388, 2604, 260, 1795, 3213, 7727, 7728, 3808, 3291, 708, 7729, 3565, 1704, 7730, 3566, 1351, # 6118 + 1618, 3357, 2999, 1886, 944, 4185, 3358, 4186, 3044, 3359, 4187, 7731, 3678, 422, 413, 1714, # 6134 + 3292, 500, 2058, 2345, 4188, 2479, 7732, 1344, 1910, 954, 7733, 1668, 7734, 7735, 3986, 2404, # 6150 + 4189, 3567, 3809, 4190, 7736, 2302, 1318, 2505, 3091, 133, 3092, 2873, 4473, 629, 31, 2838, # 6166 + 2697, 3810, 4474, 850, 949, 4475, 3987, 2955, 1732, 2088, 4191, 1496, 1852, 7737, 3988, 620, # 6182 + 3214, 981, 1242, 3679, 3360, 1619, 3680, 1643, 3293, 2139, 2452, 1970, 1719, 3463, 2168, 7738, # 6198 + 3215, 7739, 7740, 3361, 1828, 7741, 1277, 4476, 1565, 2047, 7742, 1636, 3568, 3093, 7743, 869, # 6214 + 2839, 655, 3811, 3812, 3094, 3989, 3000, 3813, 1310, 3569, 4477, 7744, 7745, 7746, 1733, 558, # 6230 + 4478, 3681, 335, 1549, 3045, 1756, 4192, 3682, 1945, 3464, 1829, 1291, 1192, 470, 2726, 2107, # 6246 + 2793, 913, 1054, 3990, 7747, 1027, 7748, 3046, 3991, 4479, 982, 2662, 3362, 3148, 3465, 3216, # 6262 + 3217, 1946, 2794, 7749, 571, 4480, 7750, 1830, 7751, 3570, 2583, 1523, 2424, 7752, 2089, 984, # 6278 + 4481, 3683, 1959, 7753, 3684, 852, 923, 2795, 3466, 3685, 969, 1519, 999, 2048, 2320, 1705, # 6294 + 7754, 3095, 615, 1662, 151, 597, 3992, 2405, 2321, 1049, 275, 4482, 3686, 4193, 568, 3687, # 6310 + 3571, 2480, 4194, 3688, 7755, 2425, 2270, 409, 3218, 7756, 1566, 2874, 3467, 1002, 769, 2840, # 6326 + 194, 2090, 3149, 3689, 2222, 3294, 4195, 628, 1505, 7757, 7758, 1763, 2177, 3001, 3993, 521, # 6342 + 1161, 2584, 1787, 2203, 2406, 4483, 3994, 1625, 4196, 4197, 412, 42, 3096, 464, 7759, 2632, # 6358 + 4484, 3363, 1760, 1571, 2875, 3468, 2530, 1219, 2204, 3814, 2633, 2140, 2368, 4485, 4486, 3295, # 6374 + 1651, 3364, 3572, 7760, 7761, 3573, 2481, 3469, 7762, 3690, 7763, 7764, 2271, 2091, 460, 7765, # 6390 + 4487, 7766, 3002, 962, 588, 3574, 289, 3219, 2634, 1116, 52, 7767, 3047, 1796, 7768, 7769, # 6406 + 7770, 1467, 7771, 1598, 1143, 3691, 4198, 1984, 1734, 1067, 4488, 1280, 3365, 465, 4489, 1572, # 6422 + 510, 7772, 1927, 2241, 1812, 1644, 3575, 7773, 4490, 3692, 7774, 7775, 2663, 1573, 1534, 7776, # 6438 + 7777, 4199, 536, 1807, 1761, 3470, 3815, 3150, 2635, 7778, 7779, 7780, 4491, 3471, 2915, 1911, # 6454 + 2796, 7781, 3296, 1122, 377, 3220, 7782, 360, 7783, 7784, 4200, 1529, 551, 7785, 2059, 3693, # 6470 + 1769, 2426, 7786, 2916, 4201, 3297, 3097, 2322, 2108, 2030, 4492, 1404, 136, 1468, 1479, 672, # 6486 + 1171, 3221, 2303, 271, 3151, 7787, 2762, 7788, 2049, 678, 2727, 865, 1947, 4493, 7789, 2013, # 6502 + 3995, 2956, 7790, 2728, 2223, 1397, 3048, 3694, 4494, 4495, 1735, 2917, 3366, 3576, 7791, 3816, # 6518 + 509, 2841, 2453, 2876, 3817, 7792, 7793, 3152, 3153, 4496, 4202, 2531, 4497, 2304, 1166, 1010, # 6534 + 552, 681, 1887, 7794, 7795, 2957, 2958, 3996, 1287, 1596, 1861, 3154, 358, 453, 736, 175, # 6550 + 478, 1117, 905, 1167, 1097, 7796, 1853, 1530, 7797, 1706, 7798, 2178, 3472, 2287, 3695, 3473, # 6566 + 3577, 4203, 2092, 4204, 7799, 3367, 1193, 2482, 4205, 1458, 2190, 2205, 1862, 1888, 1421, 3298, # 6582 + 2918, 3049, 2179, 3474, 595, 2122, 7800, 3997, 7801, 7802, 4206, 1707, 2636, 223, 3696, 1359, # 6598 + 751, 3098, 183, 3475, 7803, 2797, 3003, 419, 2369, 633, 704, 3818, 2389, 241, 7804, 7805, # 6614 + 7806, 838, 3004, 3697, 2272, 2763, 2454, 3819, 1938, 2050, 3998, 1309, 3099, 2242, 1181, 7807, # 6630 + 1136, 2206, 3820, 2370, 1446, 4207, 2305, 4498, 7808, 7809, 4208, 1055, 2605, 484, 3698, 7810, # 6646 + 3999, 625, 4209, 2273, 3368, 1499, 4210, 4000, 7811, 4001, 4211, 3222, 2274, 2275, 3476, 7812, # 6662 + 7813, 2764, 808, 2606, 3699, 3369, 4002, 4212, 3100, 2532, 526, 3370, 3821, 4213, 955, 7814, # 6678 + 1620, 4214, 2637, 2427, 7815, 1429, 3700, 1669, 1831, 994, 928, 7816, 3578, 1260, 7817, 7818, # 6694 + 7819, 1948, 2288, 741, 2919, 1626, 4215, 2729, 2455, 867, 1184, 362, 3371, 1392, 7820, 7821, # 6710 + 4003, 4216, 1770, 1736, 3223, 2920, 4499, 4500, 1928, 2698, 1459, 1158, 7822, 3050, 3372, 2877, # 6726 + 1292, 1929, 2506, 2842, 3701, 1985, 1187, 2071, 2014, 2607, 4217, 7823, 2566, 2507, 2169, 3702, # 6742 + 2483, 3299, 7824, 3703, 4501, 7825, 7826, 666, 1003, 3005, 1022, 3579, 4218, 7827, 4502, 1813, # 6758 + 2253, 574, 3822, 1603, 295, 1535, 705, 3823, 4219, 283, 858, 417, 7828, 7829, 3224, 4503, # 6774 + 4504, 3051, 1220, 1889, 1046, 2276, 2456, 4004, 1393, 1599, 689, 2567, 388, 4220, 7830, 2484, # 6790 + 802, 7831, 2798, 3824, 2060, 1405, 2254, 7832, 4505, 3825, 2109, 1052, 1345, 3225, 1585, 7833, # 6806 + 809, 7834, 7835, 7836, 575, 2730, 3477, 956, 1552, 1469, 1144, 2323, 7837, 2324, 1560, 2457, # 6822 + 3580, 3226, 4005, 616, 2207, 3155, 2180, 2289, 7838, 1832, 7839, 3478, 4506, 7840, 1319, 3704, # 6838 + 3705, 1211, 3581, 1023, 3227, 1293, 2799, 7841, 7842, 7843, 3826, 607, 2306, 3827, 762, 2878, # 6854 + 1439, 4221, 1360, 7844, 1485, 3052, 7845, 4507, 1038, 4222, 1450, 2061, 2638, 4223, 1379, 4508, # 6870 + 2585, 7846, 7847, 4224, 1352, 1414, 2325, 2921, 1172, 7848, 7849, 3828, 3829, 7850, 1797, 1451, # 6886 + 7851, 7852, 7853, 7854, 2922, 4006, 4007, 2485, 2346, 411, 4008, 4009, 3582, 3300, 3101, 4509, # 6902 + 1561, 2664, 1452, 4010, 1375, 7855, 7856, 47, 2959, 316, 7857, 1406, 1591, 2923, 3156, 7858, # 6918 + 1025, 2141, 3102, 3157, 354, 2731, 884, 2224, 4225, 2407, 508, 3706, 726, 3583, 996, 2428, # 6934 + 3584, 729, 7859, 392, 2191, 1453, 4011, 4510, 3707, 7860, 7861, 2458, 3585, 2608, 1675, 2800, # 6950 + 919, 2347, 2960, 2348, 1270, 4511, 4012, 73, 7862, 7863, 647, 7864, 3228, 2843, 2255, 1550, # 6966 + 1346, 3006, 7865, 1332, 883, 3479, 7866, 7867, 7868, 7869, 3301, 2765, 7870, 1212, 831, 1347, # 6982 + 4226, 4512, 2326, 3830, 1863, 3053, 720, 3831, 4513, 4514, 3832, 7871, 4227, 7872, 7873, 4515, # 6998 + 7874, 7875, 1798, 4516, 3708, 2609, 4517, 3586, 1645, 2371, 7876, 7877, 2924, 669, 2208, 2665, # 7014 + 2429, 7878, 2879, 7879, 7880, 1028, 3229, 7881, 4228, 2408, 7882, 2256, 1353, 7883, 7884, 4518, # 7030 + 3158, 518, 7885, 4013, 7886, 4229, 1960, 7887, 2142, 4230, 7888, 7889, 3007, 2349, 2350, 3833, # 7046 + 516, 1833, 1454, 4014, 2699, 4231, 4519, 2225, 2610, 1971, 1129, 3587, 7890, 2766, 7891, 2961, # 7062 + 1422, 577, 1470, 3008, 1524, 3373, 7892, 7893, 432, 4232, 3054, 3480, 7894, 2586, 1455, 2508, # 7078 + 2226, 1972, 1175, 7895, 1020, 2732, 4015, 3481, 4520, 7896, 2733, 7897, 1743, 1361, 3055, 3482, # 7094 + 2639, 4016, 4233, 4521, 2290, 895, 924, 4234, 2170, 331, 2243, 3056, 166, 1627, 3057, 1098, # 7110 + 7898, 1232, 2880, 2227, 3374, 4522, 657, 403, 1196, 2372, 542, 3709, 3375, 1600, 4235, 3483, # 7126 + 7899, 4523, 2767, 3230, 576, 530, 1362, 7900, 4524, 2533, 2666, 3710, 4017, 7901, 842, 3834, # 7142 + 7902, 2801, 2031, 1014, 4018, 213, 2700, 3376, 665, 621, 4236, 7903, 3711, 2925, 2430, 7904, # 7158 + 2431, 3302, 3588, 3377, 7905, 4237, 2534, 4238, 4525, 3589, 1682, 4239, 3484, 1380, 7906, 724, # 7174 + 2277, 600, 1670, 7907, 1337, 1233, 4526, 3103, 2244, 7908, 1621, 4527, 7909, 651, 4240, 7910, # 7190 + 1612, 4241, 2611, 7911, 2844, 7912, 2734, 2307, 3058, 7913, 716, 2459, 3059, 174, 1255, 2701, # 7206 + 4019, 3590, 548, 1320, 1398, 728, 4020, 1574, 7914, 1890, 1197, 3060, 4021, 7915, 3061, 3062, # 7222 + 3712, 3591, 3713, 747, 7916, 635, 4242, 4528, 7917, 7918, 7919, 4243, 7920, 7921, 4529, 7922, # 7238 + 3378, 4530, 2432, 451, 7923, 3714, 2535, 2072, 4244, 2735, 4245, 4022, 7924, 1764, 4531, 7925, # 7254 + 4246, 350, 7926, 2278, 2390, 2486, 7927, 4247, 4023, 2245, 1434, 4024, 488, 4532, 458, 4248, # 7270 + 4025, 3715, 771, 1330, 2391, 3835, 2568, 3159, 2159, 2409, 1553, 2667, 3160, 4249, 7928, 2487, # 7286 + 2881, 2612, 1720, 2702, 4250, 3379, 4533, 7929, 2536, 4251, 7930, 3231, 4252, 2768, 7931, 2015, # 7302 + 2736, 7932, 1155, 1017, 3716, 3836, 7933, 3303, 2308, 201, 1864, 4253, 1430, 7934, 4026, 7935, # 7318 + 7936, 7937, 7938, 7939, 4254, 1604, 7940, 414, 1865, 371, 2587, 4534, 4535, 3485, 2016, 3104, # 7334 + 4536, 1708, 960, 4255, 887, 389, 2171, 1536, 1663, 1721, 7941, 2228, 4027, 2351, 2926, 1580, # 7350 + 7942, 7943, 7944, 1744, 7945, 2537, 4537, 4538, 7946, 4539, 7947, 2073, 7948, 7949, 3592, 3380, # 7366 + 2882, 4256, 7950, 4257, 2640, 3381, 2802, 673, 2703, 2460, 709, 3486, 4028, 3593, 4258, 7951, # 7382 + 1148, 502, 634, 7952, 7953, 1204, 4540, 3594, 1575, 4541, 2613, 3717, 7954, 3718, 3105, 948, # 7398 + 3232, 121, 1745, 3837, 1110, 7955, 4259, 3063, 2509, 3009, 4029, 3719, 1151, 1771, 3838, 1488, # 7414 + 4030, 1986, 7956, 2433, 3487, 7957, 7958, 2093, 7959, 4260, 3839, 1213, 1407, 2803, 531, 2737, # 7430 + 2538, 3233, 1011, 1537, 7960, 2769, 4261, 3106, 1061, 7961, 3720, 3721, 1866, 2883, 7962, 2017, # 7446 + 120, 4262, 4263, 2062, 3595, 3234, 2309, 3840, 2668, 3382, 1954, 4542, 7963, 7964, 3488, 1047, # 7462 + 2704, 1266, 7965, 1368, 4543, 2845, 649, 3383, 3841, 2539, 2738, 1102, 2846, 2669, 7966, 7967, # 7478 + 1999, 7968, 1111, 3596, 2962, 7969, 2488, 3842, 3597, 2804, 1854, 3384, 3722, 7970, 7971, 3385, # 7494 + 2410, 2884, 3304, 3235, 3598, 7972, 2569, 7973, 3599, 2805, 4031, 1460, 856, 7974, 3600, 7975, # 7510 + 2885, 2963, 7976, 2886, 3843, 7977, 4264, 632, 2510, 875, 3844, 1697, 3845, 2291, 7978, 7979, # 7526 + 4544, 3010, 1239, 580, 4545, 4265, 7980, 914, 936, 2074, 1190, 4032, 1039, 2123, 7981, 7982, # 7542 + 7983, 3386, 1473, 7984, 1354, 4266, 3846, 7985, 2172, 3064, 4033, 915, 3305, 4267, 4268, 3306, # 7558 + 1605, 1834, 7986, 2739, 398, 3601, 4269, 3847, 4034, 328, 1912, 2847, 4035, 3848, 1331, 4270, # 7574 + 3011, 937, 4271, 7987, 3602, 4036, 4037, 3387, 2160, 4546, 3388, 524, 742, 538, 3065, 1012, # 7590 + 7988, 7989, 3849, 2461, 7990, 658, 1103, 225, 3850, 7991, 7992, 4547, 7993, 4548, 7994, 3236, # 7606 + 1243, 7995, 4038, 963, 2246, 4549, 7996, 2705, 3603, 3161, 7997, 7998, 2588, 2327, 7999, 4550, # 7622 + 8000, 8001, 8002, 3489, 3307, 957, 3389, 2540, 2032, 1930, 2927, 2462, 870, 2018, 3604, 1746, # 7638 + 2770, 2771, 2434, 2463, 8003, 3851, 8004, 3723, 3107, 3724, 3490, 3390, 3725, 8005, 1179, 3066, # 7654 + 8006, 3162, 2373, 4272, 3726, 2541, 3163, 3108, 2740, 4039, 8007, 3391, 1556, 2542, 2292, 977, # 7670 + 2887, 2033, 4040, 1205, 3392, 8008, 1765, 3393, 3164, 2124, 1271, 1689, 714, 4551, 3491, 8009, # 7686 + 2328, 3852, 533, 4273, 3605, 2181, 617, 8010, 2464, 3308, 3492, 2310, 8011, 8012, 3165, 8013, # 7702 + 8014, 3853, 1987, 618, 427, 2641, 3493, 3394, 8015, 8016, 1244, 1690, 8017, 2806, 4274, 4552, # 7718 + 8018, 3494, 8019, 8020, 2279, 1576, 473, 3606, 4275, 3395, 972, 8021, 3607, 8022, 3067, 8023, # 7734 + 8024, 4553, 4554, 8025, 3727, 4041, 4042, 8026, 153, 4555, 356, 8027, 1891, 2888, 4276, 2143, # 7750 + 408, 803, 2352, 8028, 3854, 8029, 4277, 1646, 2570, 2511, 4556, 4557, 3855, 8030, 3856, 4278, # 7766 + 8031, 2411, 3396, 752, 8032, 8033, 1961, 2964, 8034, 746, 3012, 2465, 8035, 4279, 3728, 698, # 7782 + 4558, 1892, 4280, 3608, 2543, 4559, 3609, 3857, 8036, 3166, 3397, 8037, 1823, 1302, 4043, 2706, # 7798 + 3858, 1973, 4281, 8038, 4282, 3167, 823, 1303, 1288, 1236, 2848, 3495, 4044, 3398, 774, 3859, # 7814 + 8039, 1581, 4560, 1304, 2849, 3860, 4561, 8040, 2435, 2161, 1083, 3237, 4283, 4045, 4284, 344, # 7830 + 1173, 288, 2311, 454, 1683, 8041, 8042, 1461, 4562, 4046, 2589, 8043, 8044, 4563, 985, 894, # 7846 + 8045, 3399, 3168, 8046, 1913, 2928, 3729, 1988, 8047, 2110, 1974, 8048, 4047, 8049, 2571, 1194, # 7862 + 425, 8050, 4564, 3169, 1245, 3730, 4285, 8051, 8052, 2850, 8053, 636, 4565, 1855, 3861, 760, # 7878 + 1799, 8054, 4286, 2209, 1508, 4566, 4048, 1893, 1684, 2293, 8055, 8056, 8057, 4287, 4288, 2210, # 7894 + 479, 8058, 8059, 832, 8060, 4049, 2489, 8061, 2965, 2490, 3731, 990, 3109, 627, 1814, 2642, # 7910 + 4289, 1582, 4290, 2125, 2111, 3496, 4567, 8062, 799, 4291, 3170, 8063, 4568, 2112, 1737, 3013, # 7926 + 1018, 543, 754, 4292, 3309, 1676, 4569, 4570, 4050, 8064, 1489, 8065, 3497, 8066, 2614, 2889, # 7942 + 4051, 8067, 8068, 2966, 8069, 8070, 8071, 8072, 3171, 4571, 4572, 2182, 1722, 8073, 3238, 3239, # 7958 + 1842, 3610, 1715, 481, 365, 1975, 1856, 8074, 8075, 1962, 2491, 4573, 8076, 2126, 3611, 3240, # 7974 + 433, 1894, 2063, 2075, 8077, 602, 2741, 8078, 8079, 8080, 8081, 8082, 3014, 1628, 3400, 8083, # 7990 + 3172, 4574, 4052, 2890, 4575, 2512, 8084, 2544, 2772, 8085, 8086, 8087, 3310, 4576, 2891, 8088, # 8006 + 4577, 8089, 2851, 4578, 4579, 1221, 2967, 4053, 2513, 8090, 8091, 8092, 1867, 1989, 8093, 8094, # 8022 + 8095, 1895, 8096, 8097, 4580, 1896, 4054, 318, 8098, 2094, 4055, 4293, 8099, 8100, 485, 8101, # 8038 + 938, 3862, 553, 2670, 116, 8102, 3863, 3612, 8103, 3498, 2671, 2773, 3401, 3311, 2807, 8104, # 8054 + 3613, 2929, 4056, 1747, 2930, 2968, 8105, 8106, 207, 8107, 8108, 2672, 4581, 2514, 8109, 3015, # 8070 + 890, 3614, 3864, 8110, 1877, 3732, 3402, 8111, 2183, 2353, 3403, 1652, 8112, 8113, 8114, 941, # 8086 + 2294, 208, 3499, 4057, 2019, 330, 4294, 3865, 2892, 2492, 3733, 4295, 8115, 8116, 8117, 8118, # 8102 ) - +# fmt: on diff --git a/src/pip/_vendor/chardet/euctwprober.py b/src/pip/_vendor/chardet/euctwprober.py index 35669cc4dd8..a37ab189958 100644 --- a/src/pip/_vendor/chardet/euctwprober.py +++ b/src/pip/_vendor/chardet/euctwprober.py @@ -25,22 +25,23 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine from .chardistribution import EUCTWDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import EUCTW_SM_MODEL + class EUCTWProber(MultiByteCharSetProber): - def __init__(self): - super(EUCTWProber, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(EUCTW_SM_MODEL) self.distribution_analyzer = EUCTWDistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "EUC-TW" @property - def language(self): + def language(self) -> str: return "Taiwan" diff --git a/src/pip/_vendor/chardet/gb2312freq.py b/src/pip/_vendor/chardet/gb2312freq.py index 697837bd9a8..b32bfc74213 100644 --- a/src/pip/_vendor/chardet/gb2312freq.py +++ b/src/pip/_vendor/chardet/gb2312freq.py @@ -43,6 +43,7 @@ GB2312_TABLE_SIZE = 3760 +# fmt: off GB2312_CHAR_TO_FREQ_ORDER = ( 1671, 749,1443,2364,3924,3807,2330,3921,1704,3463,2691,1511,1515, 572,3191,2205, 2361, 224,2558, 479,1711, 963,3162, 440,4060,1905,2966,2947,3580,2647,3961,3842, @@ -280,4 +281,4 @@ 381,1638,4592,1020, 516,3214, 458, 947,4575,1432, 211,1514,2926,1865,2142, 189, 852,1221,1400,1486, 882,2299,4036, 351, 28,1122, 700,6479,6480,6481,6482,6483, #last 512 ) - +# fmt: on diff --git a/src/pip/_vendor/chardet/gb2312prober.py b/src/pip/_vendor/chardet/gb2312prober.py index 8446d2dd959..d423e7311e2 100644 --- a/src/pip/_vendor/chardet/gb2312prober.py +++ b/src/pip/_vendor/chardet/gb2312prober.py @@ -25,22 +25,23 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine from .chardistribution import GB2312DistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import GB2312_SM_MODEL + class GB2312Prober(MultiByteCharSetProber): - def __init__(self): - super(GB2312Prober, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(GB2312_SM_MODEL) self.distribution_analyzer = GB2312DistributionAnalysis() self.reset() @property - def charset_name(self): + def charset_name(self) -> str: return "GB2312" @property - def language(self): + def language(self) -> str: return "Chinese" diff --git a/src/pip/_vendor/chardet/hebrewprober.py b/src/pip/_vendor/chardet/hebrewprober.py index b0e1bf49268..785d0057bcc 100644 --- a/src/pip/_vendor/chardet/hebrewprober.py +++ b/src/pip/_vendor/chardet/hebrewprober.py @@ -25,8 +25,11 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Optional, Union + from .charsetprober import CharSetProber from .enums import ProbingState +from .sbcharsetprober import SingleByteCharSetProber # This prober doesn't actually recognize a language or a charset. # It is a helper prober for the use of the Hebrew model probers @@ -125,18 +128,20 @@ # model probers scores. The answer is returned in the form of the name of the # charset identified, either "windows-1255" or "ISO-8859-8". + class HebrewProber(CharSetProber): + SPACE = 0x20 # windows-1255 / ISO-8859-8 code points of interest - FINAL_KAF = 0xea - NORMAL_KAF = 0xeb - FINAL_MEM = 0xed - NORMAL_MEM = 0xee - FINAL_NUN = 0xef - NORMAL_NUN = 0xf0 - FINAL_PE = 0xf3 - NORMAL_PE = 0xf4 - FINAL_TSADI = 0xf5 - NORMAL_TSADI = 0xf6 + FINAL_KAF = 0xEA + NORMAL_KAF = 0xEB + FINAL_MEM = 0xED + NORMAL_MEM = 0xEE + FINAL_NUN = 0xEF + NORMAL_NUN = 0xF0 + FINAL_PE = 0xF3 + NORMAL_PE = 0xF4 + FINAL_TSADI = 0xF5 + NORMAL_TSADI = 0xF6 # Minimum Visual vs Logical final letter score difference. # If the difference is below this, don't rely solely on the final letter score @@ -151,35 +156,44 @@ class HebrewProber(CharSetProber): VISUAL_HEBREW_NAME = "ISO-8859-8" LOGICAL_HEBREW_NAME = "windows-1255" - def __init__(self): - super(HebrewProber, self).__init__() - self._final_char_logical_score = None - self._final_char_visual_score = None - self._prev = None - self._before_prev = None - self._logical_prober = None - self._visual_prober = None + def __init__(self) -> None: + super().__init__() + self._final_char_logical_score = 0 + self._final_char_visual_score = 0 + self._prev = self.SPACE + self._before_prev = self.SPACE + self._logical_prober: Optional[SingleByteCharSetProber] = None + self._visual_prober: Optional[SingleByteCharSetProber] = None self.reset() - def reset(self): + def reset(self) -> None: self._final_char_logical_score = 0 self._final_char_visual_score = 0 # The two last characters seen in the previous buffer, # mPrev and mBeforePrev are initialized to space in order to simulate # a word delimiter at the beginning of the data - self._prev = ' ' - self._before_prev = ' ' + self._prev = self.SPACE + self._before_prev = self.SPACE # These probers are owned by the group prober. - def set_model_probers(self, logicalProber, visualProber): - self._logical_prober = logicalProber - self._visual_prober = visualProber + def set_model_probers( + self, + logical_prober: SingleByteCharSetProber, + visual_prober: SingleByteCharSetProber, + ) -> None: + self._logical_prober = logical_prober + self._visual_prober = visual_prober - def is_final(self, c): - return c in [self.FINAL_KAF, self.FINAL_MEM, self.FINAL_NUN, - self.FINAL_PE, self.FINAL_TSADI] + def is_final(self, c: int) -> bool: + return c in [ + self.FINAL_KAF, + self.FINAL_MEM, + self.FINAL_NUN, + self.FINAL_PE, + self.FINAL_TSADI, + ] - def is_non_final(self, c): + def is_non_final(self, c: int) -> bool: # The normal Tsadi is not a good Non-Final letter due to words like # 'lechotet' (to chat) containing an apostrophe after the tsadi. This # apostrophe is converted to a space in FilterWithoutEnglishLetters @@ -190,10 +204,9 @@ def is_non_final(self, c): # for example legally end with a Non-Final Pe or Kaf. However, the # benefit of these letters as Non-Final letters outweighs the damage # since these words are quite rare. - return c in [self.NORMAL_KAF, self.NORMAL_MEM, - self.NORMAL_NUN, self.NORMAL_PE] + return c in [self.NORMAL_KAF, self.NORMAL_MEM, self.NORMAL_NUN, self.NORMAL_PE] - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: # Final letter analysis for logical-visual decision. # Look for evidence that the received buffer is either logical Hebrew # or visual Hebrew. @@ -227,9 +240,9 @@ def feed(self, byte_str): byte_str = self.filter_high_byte_only(byte_str) for cur in byte_str: - if cur == ' ': + if cur == self.SPACE: # We stand on a space - a word just ended - if self._before_prev != ' ': + if self._before_prev != self.SPACE: # next-to-last char was not a space so self._prev is not a # 1 letter word if self.is_final(self._prev): @@ -241,8 +254,11 @@ def feed(self, byte_str): self._final_char_visual_score += 1 else: # Not standing on a space - if ((self._before_prev == ' ') and - (self.is_final(self._prev)) and (cur != ' ')): + if ( + (self._before_prev == self.SPACE) + and (self.is_final(self._prev)) + and (cur != self.SPACE) + ): # case (3) [-2:space][-1:final letter][cur:not space] self._final_char_visual_score += 1 self._before_prev = self._prev @@ -253,7 +269,10 @@ def feed(self, byte_str): return ProbingState.DETECTING @property - def charset_name(self): + def charset_name(self) -> str: + assert self._logical_prober is not None + assert self._visual_prober is not None + # Make the decision: is it Logical or Visual? # If the final letter score distance is dominant enough, rely on it. finalsub = self._final_char_logical_score - self._final_char_visual_score @@ -263,8 +282,9 @@ def charset_name(self): return self.VISUAL_HEBREW_NAME # It's not dominant enough, try to rely on the model scores instead. - modelsub = (self._logical_prober.get_confidence() - - self._visual_prober.get_confidence()) + modelsub = ( + self._logical_prober.get_confidence() - self._visual_prober.get_confidence() + ) if modelsub > self.MIN_MODEL_DISTANCE: return self.LOGICAL_HEBREW_NAME if modelsub < -self.MIN_MODEL_DISTANCE: @@ -280,13 +300,17 @@ def charset_name(self): return self.LOGICAL_HEBREW_NAME @property - def language(self): - return 'Hebrew' + def language(self) -> str: + return "Hebrew" @property - def state(self): + def state(self) -> ProbingState: + assert self._logical_prober is not None + assert self._visual_prober is not None + # Remain active as long as any of the model probers are active. - if (self._logical_prober.state == ProbingState.NOT_ME) and \ - (self._visual_prober.state == ProbingState.NOT_ME): + if (self._logical_prober.state == ProbingState.NOT_ME) and ( + self._visual_prober.state == ProbingState.NOT_ME + ): return ProbingState.NOT_ME return ProbingState.DETECTING diff --git a/src/pip/_vendor/chardet/jisfreq.py b/src/pip/_vendor/chardet/jisfreq.py index 83fc082b545..3293576e012 100644 --- a/src/pip/_vendor/chardet/jisfreq.py +++ b/src/pip/_vendor/chardet/jisfreq.py @@ -46,6 +46,7 @@ # Char to FreqOrder table , JIS_TABLE_SIZE = 4368 +# fmt: off JIS_CHAR_TO_FREQ_ORDER = ( 40, 1, 6, 182, 152, 180, 295,2127, 285, 381,3295,4304,3068,4606,3165,3510, # 16 3511,1822,2785,4607,1193,2226,5070,4608, 171,2996,1247, 18, 179,5071, 856,1661, # 32 @@ -321,5 +322,4 @@ 1444,1698,2385,2251,3729,1365,2281,2235,1717,6188, 864,3841,2515, 444, 527,2767, # 4352 2922,3625, 544, 461,6189, 566, 209,2437,3398,2098,1065,2068,3331,3626,3257,2137, # 4368 #last 512 ) - - +# fmt: on diff --git a/src/pip/_vendor/chardet/johabfreq.py b/src/pip/_vendor/chardet/johabfreq.py new file mode 100644 index 00000000000..c12969990d7 --- /dev/null +++ b/src/pip/_vendor/chardet/johabfreq.py @@ -0,0 +1,2382 @@ +######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is Mozilla Communicator client code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Mark Pilgrim - port to Python +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +# The frequency data itself is the same as euc-kr. +# This is just a mapping table to euc-kr. + +JOHAB_TO_EUCKR_ORDER_TABLE = { + 0x8861: 0, + 0x8862: 1, + 0x8865: 2, + 0x8868: 3, + 0x8869: 4, + 0x886A: 5, + 0x886B: 6, + 0x8871: 7, + 0x8873: 8, + 0x8874: 9, + 0x8875: 10, + 0x8876: 11, + 0x8877: 12, + 0x8878: 13, + 0x8879: 14, + 0x887B: 15, + 0x887C: 16, + 0x887D: 17, + 0x8881: 18, + 0x8882: 19, + 0x8885: 20, + 0x8889: 21, + 0x8891: 22, + 0x8893: 23, + 0x8895: 24, + 0x8896: 25, + 0x8897: 26, + 0x88A1: 27, + 0x88A2: 28, + 0x88A5: 29, + 0x88A9: 30, + 0x88B5: 31, + 0x88B7: 32, + 0x88C1: 33, + 0x88C5: 34, + 0x88C9: 35, + 0x88E1: 36, + 0x88E2: 37, + 0x88E5: 38, + 0x88E8: 39, + 0x88E9: 40, + 0x88EB: 41, + 0x88F1: 42, + 0x88F3: 43, + 0x88F5: 44, + 0x88F6: 45, + 0x88F7: 46, + 0x88F8: 47, + 0x88FB: 48, + 0x88FC: 49, + 0x88FD: 50, + 0x8941: 51, + 0x8945: 52, + 0x8949: 53, + 0x8951: 54, + 0x8953: 55, + 0x8955: 56, + 0x8956: 57, + 0x8957: 58, + 0x8961: 59, + 0x8962: 60, + 0x8963: 61, + 0x8965: 62, + 0x8968: 63, + 0x8969: 64, + 0x8971: 65, + 0x8973: 66, + 0x8975: 67, + 0x8976: 68, + 0x8977: 69, + 0x897B: 70, + 0x8981: 71, + 0x8985: 72, + 0x8989: 73, + 0x8993: 74, + 0x8995: 75, + 0x89A1: 76, + 0x89A2: 77, + 0x89A5: 78, + 0x89A8: 79, + 0x89A9: 80, + 0x89AB: 81, + 0x89AD: 82, + 0x89B0: 83, + 0x89B1: 84, + 0x89B3: 85, + 0x89B5: 86, + 0x89B7: 87, + 0x89B8: 88, + 0x89C1: 89, + 0x89C2: 90, + 0x89C5: 91, + 0x89C9: 92, + 0x89CB: 93, + 0x89D1: 94, + 0x89D3: 95, + 0x89D5: 96, + 0x89D7: 97, + 0x89E1: 98, + 0x89E5: 99, + 0x89E9: 100, + 0x89F3: 101, + 0x89F6: 102, + 0x89F7: 103, + 0x8A41: 104, + 0x8A42: 105, + 0x8A45: 106, + 0x8A49: 107, + 0x8A51: 108, + 0x8A53: 109, + 0x8A55: 110, + 0x8A57: 111, + 0x8A61: 112, + 0x8A65: 113, + 0x8A69: 114, + 0x8A73: 115, + 0x8A75: 116, + 0x8A81: 117, + 0x8A82: 118, + 0x8A85: 119, + 0x8A88: 120, + 0x8A89: 121, + 0x8A8A: 122, + 0x8A8B: 123, + 0x8A90: 124, + 0x8A91: 125, + 0x8A93: 126, + 0x8A95: 127, + 0x8A97: 128, + 0x8A98: 129, + 0x8AA1: 130, + 0x8AA2: 131, + 0x8AA5: 132, + 0x8AA9: 133, + 0x8AB6: 134, + 0x8AB7: 135, + 0x8AC1: 136, + 0x8AD5: 137, + 0x8AE1: 138, + 0x8AE2: 139, + 0x8AE5: 140, + 0x8AE9: 141, + 0x8AF1: 142, + 0x8AF3: 143, + 0x8AF5: 144, + 0x8B41: 145, + 0x8B45: 146, + 0x8B49: 147, + 0x8B61: 148, + 0x8B62: 149, + 0x8B65: 150, + 0x8B68: 151, + 0x8B69: 152, + 0x8B6A: 153, + 0x8B71: 154, + 0x8B73: 155, + 0x8B75: 156, + 0x8B77: 157, + 0x8B81: 158, + 0x8BA1: 159, + 0x8BA2: 160, + 0x8BA5: 161, + 0x8BA8: 162, + 0x8BA9: 163, + 0x8BAB: 164, + 0x8BB1: 165, + 0x8BB3: 166, + 0x8BB5: 167, + 0x8BB7: 168, + 0x8BB8: 169, + 0x8BBC: 170, + 0x8C61: 171, + 0x8C62: 172, + 0x8C63: 173, + 0x8C65: 174, + 0x8C69: 175, + 0x8C6B: 176, + 0x8C71: 177, + 0x8C73: 178, + 0x8C75: 179, + 0x8C76: 180, + 0x8C77: 181, + 0x8C7B: 182, + 0x8C81: 183, + 0x8C82: 184, + 0x8C85: 185, + 0x8C89: 186, + 0x8C91: 187, + 0x8C93: 188, + 0x8C95: 189, + 0x8C96: 190, + 0x8C97: 191, + 0x8CA1: 192, + 0x8CA2: 193, + 0x8CA9: 194, + 0x8CE1: 195, + 0x8CE2: 196, + 0x8CE3: 197, + 0x8CE5: 198, + 0x8CE9: 199, + 0x8CF1: 200, + 0x8CF3: 201, + 0x8CF5: 202, + 0x8CF6: 203, + 0x8CF7: 204, + 0x8D41: 205, + 0x8D42: 206, + 0x8D45: 207, + 0x8D51: 208, + 0x8D55: 209, + 0x8D57: 210, + 0x8D61: 211, + 0x8D65: 212, + 0x8D69: 213, + 0x8D75: 214, + 0x8D76: 215, + 0x8D7B: 216, + 0x8D81: 217, + 0x8DA1: 218, + 0x8DA2: 219, + 0x8DA5: 220, + 0x8DA7: 221, + 0x8DA9: 222, + 0x8DB1: 223, + 0x8DB3: 224, + 0x8DB5: 225, + 0x8DB7: 226, + 0x8DB8: 227, + 0x8DB9: 228, + 0x8DC1: 229, + 0x8DC2: 230, + 0x8DC9: 231, + 0x8DD6: 232, + 0x8DD7: 233, + 0x8DE1: 234, + 0x8DE2: 235, + 0x8DF7: 236, + 0x8E41: 237, + 0x8E45: 238, + 0x8E49: 239, + 0x8E51: 240, + 0x8E53: 241, + 0x8E57: 242, + 0x8E61: 243, + 0x8E81: 244, + 0x8E82: 245, + 0x8E85: 246, + 0x8E89: 247, + 0x8E90: 248, + 0x8E91: 249, + 0x8E93: 250, + 0x8E95: 251, + 0x8E97: 252, + 0x8E98: 253, + 0x8EA1: 254, + 0x8EA9: 255, + 0x8EB6: 256, + 0x8EB7: 257, + 0x8EC1: 258, + 0x8EC2: 259, + 0x8EC5: 260, + 0x8EC9: 261, + 0x8ED1: 262, + 0x8ED3: 263, + 0x8ED6: 264, + 0x8EE1: 265, + 0x8EE5: 266, + 0x8EE9: 267, + 0x8EF1: 268, + 0x8EF3: 269, + 0x8F41: 270, + 0x8F61: 271, + 0x8F62: 272, + 0x8F65: 273, + 0x8F67: 274, + 0x8F69: 275, + 0x8F6B: 276, + 0x8F70: 277, + 0x8F71: 278, + 0x8F73: 279, + 0x8F75: 280, + 0x8F77: 281, + 0x8F7B: 282, + 0x8FA1: 283, + 0x8FA2: 284, + 0x8FA5: 285, + 0x8FA9: 286, + 0x8FB1: 287, + 0x8FB3: 288, + 0x8FB5: 289, + 0x8FB7: 290, + 0x9061: 291, + 0x9062: 292, + 0x9063: 293, + 0x9065: 294, + 0x9068: 295, + 0x9069: 296, + 0x906A: 297, + 0x906B: 298, + 0x9071: 299, + 0x9073: 300, + 0x9075: 301, + 0x9076: 302, + 0x9077: 303, + 0x9078: 304, + 0x9079: 305, + 0x907B: 306, + 0x907D: 307, + 0x9081: 308, + 0x9082: 309, + 0x9085: 310, + 0x9089: 311, + 0x9091: 312, + 0x9093: 313, + 0x9095: 314, + 0x9096: 315, + 0x9097: 316, + 0x90A1: 317, + 0x90A2: 318, + 0x90A5: 319, + 0x90A9: 320, + 0x90B1: 321, + 0x90B7: 322, + 0x90E1: 323, + 0x90E2: 324, + 0x90E4: 325, + 0x90E5: 326, + 0x90E9: 327, + 0x90EB: 328, + 0x90EC: 329, + 0x90F1: 330, + 0x90F3: 331, + 0x90F5: 332, + 0x90F6: 333, + 0x90F7: 334, + 0x90FD: 335, + 0x9141: 336, + 0x9142: 337, + 0x9145: 338, + 0x9149: 339, + 0x9151: 340, + 0x9153: 341, + 0x9155: 342, + 0x9156: 343, + 0x9157: 344, + 0x9161: 345, + 0x9162: 346, + 0x9165: 347, + 0x9169: 348, + 0x9171: 349, + 0x9173: 350, + 0x9176: 351, + 0x9177: 352, + 0x917A: 353, + 0x9181: 354, + 0x9185: 355, + 0x91A1: 356, + 0x91A2: 357, + 0x91A5: 358, + 0x91A9: 359, + 0x91AB: 360, + 0x91B1: 361, + 0x91B3: 362, + 0x91B5: 363, + 0x91B7: 364, + 0x91BC: 365, + 0x91BD: 366, + 0x91C1: 367, + 0x91C5: 368, + 0x91C9: 369, + 0x91D6: 370, + 0x9241: 371, + 0x9245: 372, + 0x9249: 373, + 0x9251: 374, + 0x9253: 375, + 0x9255: 376, + 0x9261: 377, + 0x9262: 378, + 0x9265: 379, + 0x9269: 380, + 0x9273: 381, + 0x9275: 382, + 0x9277: 383, + 0x9281: 384, + 0x9282: 385, + 0x9285: 386, + 0x9288: 387, + 0x9289: 388, + 0x9291: 389, + 0x9293: 390, + 0x9295: 391, + 0x9297: 392, + 0x92A1: 393, + 0x92B6: 394, + 0x92C1: 395, + 0x92E1: 396, + 0x92E5: 397, + 0x92E9: 398, + 0x92F1: 399, + 0x92F3: 400, + 0x9341: 401, + 0x9342: 402, + 0x9349: 403, + 0x9351: 404, + 0x9353: 405, + 0x9357: 406, + 0x9361: 407, + 0x9362: 408, + 0x9365: 409, + 0x9369: 410, + 0x936A: 411, + 0x936B: 412, + 0x9371: 413, + 0x9373: 414, + 0x9375: 415, + 0x9377: 416, + 0x9378: 417, + 0x937C: 418, + 0x9381: 419, + 0x9385: 420, + 0x9389: 421, + 0x93A1: 422, + 0x93A2: 423, + 0x93A5: 424, + 0x93A9: 425, + 0x93AB: 426, + 0x93B1: 427, + 0x93B3: 428, + 0x93B5: 429, + 0x93B7: 430, + 0x93BC: 431, + 0x9461: 432, + 0x9462: 433, + 0x9463: 434, + 0x9465: 435, + 0x9468: 436, + 0x9469: 437, + 0x946A: 438, + 0x946B: 439, + 0x946C: 440, + 0x9470: 441, + 0x9471: 442, + 0x9473: 443, + 0x9475: 444, + 0x9476: 445, + 0x9477: 446, + 0x9478: 447, + 0x9479: 448, + 0x947D: 449, + 0x9481: 450, + 0x9482: 451, + 0x9485: 452, + 0x9489: 453, + 0x9491: 454, + 0x9493: 455, + 0x9495: 456, + 0x9496: 457, + 0x9497: 458, + 0x94A1: 459, + 0x94E1: 460, + 0x94E2: 461, + 0x94E3: 462, + 0x94E5: 463, + 0x94E8: 464, + 0x94E9: 465, + 0x94EB: 466, + 0x94EC: 467, + 0x94F1: 468, + 0x94F3: 469, + 0x94F5: 470, + 0x94F7: 471, + 0x94F9: 472, + 0x94FC: 473, + 0x9541: 474, + 0x9542: 475, + 0x9545: 476, + 0x9549: 477, + 0x9551: 478, + 0x9553: 479, + 0x9555: 480, + 0x9556: 481, + 0x9557: 482, + 0x9561: 483, + 0x9565: 484, + 0x9569: 485, + 0x9576: 486, + 0x9577: 487, + 0x9581: 488, + 0x9585: 489, + 0x95A1: 490, + 0x95A2: 491, + 0x95A5: 492, + 0x95A8: 493, + 0x95A9: 494, + 0x95AB: 495, + 0x95AD: 496, + 0x95B1: 497, + 0x95B3: 498, + 0x95B5: 499, + 0x95B7: 500, + 0x95B9: 501, + 0x95BB: 502, + 0x95C1: 503, + 0x95C5: 504, + 0x95C9: 505, + 0x95E1: 506, + 0x95F6: 507, + 0x9641: 508, + 0x9645: 509, + 0x9649: 510, + 0x9651: 511, + 0x9653: 512, + 0x9655: 513, + 0x9661: 514, + 0x9681: 515, + 0x9682: 516, + 0x9685: 517, + 0x9689: 518, + 0x9691: 519, + 0x9693: 520, + 0x9695: 521, + 0x9697: 522, + 0x96A1: 523, + 0x96B6: 524, + 0x96C1: 525, + 0x96D7: 526, + 0x96E1: 527, + 0x96E5: 528, + 0x96E9: 529, + 0x96F3: 530, + 0x96F5: 531, + 0x96F7: 532, + 0x9741: 533, + 0x9745: 534, + 0x9749: 535, + 0x9751: 536, + 0x9757: 537, + 0x9761: 538, + 0x9762: 539, + 0x9765: 540, + 0x9768: 541, + 0x9769: 542, + 0x976B: 543, + 0x9771: 544, + 0x9773: 545, + 0x9775: 546, + 0x9777: 547, + 0x9781: 548, + 0x97A1: 549, + 0x97A2: 550, + 0x97A5: 551, + 0x97A8: 552, + 0x97A9: 553, + 0x97B1: 554, + 0x97B3: 555, + 0x97B5: 556, + 0x97B6: 557, + 0x97B7: 558, + 0x97B8: 559, + 0x9861: 560, + 0x9862: 561, + 0x9865: 562, + 0x9869: 563, + 0x9871: 564, + 0x9873: 565, + 0x9875: 566, + 0x9876: 567, + 0x9877: 568, + 0x987D: 569, + 0x9881: 570, + 0x9882: 571, + 0x9885: 572, + 0x9889: 573, + 0x9891: 574, + 0x9893: 575, + 0x9895: 576, + 0x9896: 577, + 0x9897: 578, + 0x98E1: 579, + 0x98E2: 580, + 0x98E5: 581, + 0x98E9: 582, + 0x98EB: 583, + 0x98EC: 584, + 0x98F1: 585, + 0x98F3: 586, + 0x98F5: 587, + 0x98F6: 588, + 0x98F7: 589, + 0x98FD: 590, + 0x9941: 591, + 0x9942: 592, + 0x9945: 593, + 0x9949: 594, + 0x9951: 595, + 0x9953: 596, + 0x9955: 597, + 0x9956: 598, + 0x9957: 599, + 0x9961: 600, + 0x9976: 601, + 0x99A1: 602, + 0x99A2: 603, + 0x99A5: 604, + 0x99A9: 605, + 0x99B7: 606, + 0x99C1: 607, + 0x99C9: 608, + 0x99E1: 609, + 0x9A41: 610, + 0x9A45: 611, + 0x9A81: 612, + 0x9A82: 613, + 0x9A85: 614, + 0x9A89: 615, + 0x9A90: 616, + 0x9A91: 617, + 0x9A97: 618, + 0x9AC1: 619, + 0x9AE1: 620, + 0x9AE5: 621, + 0x9AE9: 622, + 0x9AF1: 623, + 0x9AF3: 624, + 0x9AF7: 625, + 0x9B61: 626, + 0x9B62: 627, + 0x9B65: 628, + 0x9B68: 629, + 0x9B69: 630, + 0x9B71: 631, + 0x9B73: 632, + 0x9B75: 633, + 0x9B81: 634, + 0x9B85: 635, + 0x9B89: 636, + 0x9B91: 637, + 0x9B93: 638, + 0x9BA1: 639, + 0x9BA5: 640, + 0x9BA9: 641, + 0x9BB1: 642, + 0x9BB3: 643, + 0x9BB5: 644, + 0x9BB7: 645, + 0x9C61: 646, + 0x9C62: 647, + 0x9C65: 648, + 0x9C69: 649, + 0x9C71: 650, + 0x9C73: 651, + 0x9C75: 652, + 0x9C76: 653, + 0x9C77: 654, + 0x9C78: 655, + 0x9C7C: 656, + 0x9C7D: 657, + 0x9C81: 658, + 0x9C82: 659, + 0x9C85: 660, + 0x9C89: 661, + 0x9C91: 662, + 0x9C93: 663, + 0x9C95: 664, + 0x9C96: 665, + 0x9C97: 666, + 0x9CA1: 667, + 0x9CA2: 668, + 0x9CA5: 669, + 0x9CB5: 670, + 0x9CB7: 671, + 0x9CE1: 672, + 0x9CE2: 673, + 0x9CE5: 674, + 0x9CE9: 675, + 0x9CF1: 676, + 0x9CF3: 677, + 0x9CF5: 678, + 0x9CF6: 679, + 0x9CF7: 680, + 0x9CFD: 681, + 0x9D41: 682, + 0x9D42: 683, + 0x9D45: 684, + 0x9D49: 685, + 0x9D51: 686, + 0x9D53: 687, + 0x9D55: 688, + 0x9D57: 689, + 0x9D61: 690, + 0x9D62: 691, + 0x9D65: 692, + 0x9D69: 693, + 0x9D71: 694, + 0x9D73: 695, + 0x9D75: 696, + 0x9D76: 697, + 0x9D77: 698, + 0x9D81: 699, + 0x9D85: 700, + 0x9D93: 701, + 0x9D95: 702, + 0x9DA1: 703, + 0x9DA2: 704, + 0x9DA5: 705, + 0x9DA9: 706, + 0x9DB1: 707, + 0x9DB3: 708, + 0x9DB5: 709, + 0x9DB7: 710, + 0x9DC1: 711, + 0x9DC5: 712, + 0x9DD7: 713, + 0x9DF6: 714, + 0x9E41: 715, + 0x9E45: 716, + 0x9E49: 717, + 0x9E51: 718, + 0x9E53: 719, + 0x9E55: 720, + 0x9E57: 721, + 0x9E61: 722, + 0x9E65: 723, + 0x9E69: 724, + 0x9E73: 725, + 0x9E75: 726, + 0x9E77: 727, + 0x9E81: 728, + 0x9E82: 729, + 0x9E85: 730, + 0x9E89: 731, + 0x9E91: 732, + 0x9E93: 733, + 0x9E95: 734, + 0x9E97: 735, + 0x9EA1: 736, + 0x9EB6: 737, + 0x9EC1: 738, + 0x9EE1: 739, + 0x9EE2: 740, + 0x9EE5: 741, + 0x9EE9: 742, + 0x9EF1: 743, + 0x9EF5: 744, + 0x9EF7: 745, + 0x9F41: 746, + 0x9F42: 747, + 0x9F45: 748, + 0x9F49: 749, + 0x9F51: 750, + 0x9F53: 751, + 0x9F55: 752, + 0x9F57: 753, + 0x9F61: 754, + 0x9F62: 755, + 0x9F65: 756, + 0x9F69: 757, + 0x9F71: 758, + 0x9F73: 759, + 0x9F75: 760, + 0x9F77: 761, + 0x9F78: 762, + 0x9F7B: 763, + 0x9F7C: 764, + 0x9FA1: 765, + 0x9FA2: 766, + 0x9FA5: 767, + 0x9FA9: 768, + 0x9FB1: 769, + 0x9FB3: 770, + 0x9FB5: 771, + 0x9FB7: 772, + 0xA061: 773, + 0xA062: 774, + 0xA065: 775, + 0xA067: 776, + 0xA068: 777, + 0xA069: 778, + 0xA06A: 779, + 0xA06B: 780, + 0xA071: 781, + 0xA073: 782, + 0xA075: 783, + 0xA077: 784, + 0xA078: 785, + 0xA07B: 786, + 0xA07D: 787, + 0xA081: 788, + 0xA082: 789, + 0xA085: 790, + 0xA089: 791, + 0xA091: 792, + 0xA093: 793, + 0xA095: 794, + 0xA096: 795, + 0xA097: 796, + 0xA098: 797, + 0xA0A1: 798, + 0xA0A2: 799, + 0xA0A9: 800, + 0xA0B7: 801, + 0xA0E1: 802, + 0xA0E2: 803, + 0xA0E5: 804, + 0xA0E9: 805, + 0xA0EB: 806, + 0xA0F1: 807, + 0xA0F3: 808, + 0xA0F5: 809, + 0xA0F7: 810, + 0xA0F8: 811, + 0xA0FD: 812, + 0xA141: 813, + 0xA142: 814, + 0xA145: 815, + 0xA149: 816, + 0xA151: 817, + 0xA153: 818, + 0xA155: 819, + 0xA156: 820, + 0xA157: 821, + 0xA161: 822, + 0xA162: 823, + 0xA165: 824, + 0xA169: 825, + 0xA175: 826, + 0xA176: 827, + 0xA177: 828, + 0xA179: 829, + 0xA181: 830, + 0xA1A1: 831, + 0xA1A2: 832, + 0xA1A4: 833, + 0xA1A5: 834, + 0xA1A9: 835, + 0xA1AB: 836, + 0xA1B1: 837, + 0xA1B3: 838, + 0xA1B5: 839, + 0xA1B7: 840, + 0xA1C1: 841, + 0xA1C5: 842, + 0xA1D6: 843, + 0xA1D7: 844, + 0xA241: 845, + 0xA245: 846, + 0xA249: 847, + 0xA253: 848, + 0xA255: 849, + 0xA257: 850, + 0xA261: 851, + 0xA265: 852, + 0xA269: 853, + 0xA273: 854, + 0xA275: 855, + 0xA281: 856, + 0xA282: 857, + 0xA283: 858, + 0xA285: 859, + 0xA288: 860, + 0xA289: 861, + 0xA28A: 862, + 0xA28B: 863, + 0xA291: 864, + 0xA293: 865, + 0xA295: 866, + 0xA297: 867, + 0xA29B: 868, + 0xA29D: 869, + 0xA2A1: 870, + 0xA2A5: 871, + 0xA2A9: 872, + 0xA2B3: 873, + 0xA2B5: 874, + 0xA2C1: 875, + 0xA2E1: 876, + 0xA2E5: 877, + 0xA2E9: 878, + 0xA341: 879, + 0xA345: 880, + 0xA349: 881, + 0xA351: 882, + 0xA355: 883, + 0xA361: 884, + 0xA365: 885, + 0xA369: 886, + 0xA371: 887, + 0xA375: 888, + 0xA3A1: 889, + 0xA3A2: 890, + 0xA3A5: 891, + 0xA3A8: 892, + 0xA3A9: 893, + 0xA3AB: 894, + 0xA3B1: 895, + 0xA3B3: 896, + 0xA3B5: 897, + 0xA3B6: 898, + 0xA3B7: 899, + 0xA3B9: 900, + 0xA3BB: 901, + 0xA461: 902, + 0xA462: 903, + 0xA463: 904, + 0xA464: 905, + 0xA465: 906, + 0xA468: 907, + 0xA469: 908, + 0xA46A: 909, + 0xA46B: 910, + 0xA46C: 911, + 0xA471: 912, + 0xA473: 913, + 0xA475: 914, + 0xA477: 915, + 0xA47B: 916, + 0xA481: 917, + 0xA482: 918, + 0xA485: 919, + 0xA489: 920, + 0xA491: 921, + 0xA493: 922, + 0xA495: 923, + 0xA496: 924, + 0xA497: 925, + 0xA49B: 926, + 0xA4A1: 927, + 0xA4A2: 928, + 0xA4A5: 929, + 0xA4B3: 930, + 0xA4E1: 931, + 0xA4E2: 932, + 0xA4E5: 933, + 0xA4E8: 934, + 0xA4E9: 935, + 0xA4EB: 936, + 0xA4F1: 937, + 0xA4F3: 938, + 0xA4F5: 939, + 0xA4F7: 940, + 0xA4F8: 941, + 0xA541: 942, + 0xA542: 943, + 0xA545: 944, + 0xA548: 945, + 0xA549: 946, + 0xA551: 947, + 0xA553: 948, + 0xA555: 949, + 0xA556: 950, + 0xA557: 951, + 0xA561: 952, + 0xA562: 953, + 0xA565: 954, + 0xA569: 955, + 0xA573: 956, + 0xA575: 957, + 0xA576: 958, + 0xA577: 959, + 0xA57B: 960, + 0xA581: 961, + 0xA585: 962, + 0xA5A1: 963, + 0xA5A2: 964, + 0xA5A3: 965, + 0xA5A5: 966, + 0xA5A9: 967, + 0xA5B1: 968, + 0xA5B3: 969, + 0xA5B5: 970, + 0xA5B7: 971, + 0xA5C1: 972, + 0xA5C5: 973, + 0xA5D6: 974, + 0xA5E1: 975, + 0xA5F6: 976, + 0xA641: 977, + 0xA642: 978, + 0xA645: 979, + 0xA649: 980, + 0xA651: 981, + 0xA653: 982, + 0xA661: 983, + 0xA665: 984, + 0xA681: 985, + 0xA682: 986, + 0xA685: 987, + 0xA688: 988, + 0xA689: 989, + 0xA68A: 990, + 0xA68B: 991, + 0xA691: 992, + 0xA693: 993, + 0xA695: 994, + 0xA697: 995, + 0xA69B: 996, + 0xA69C: 997, + 0xA6A1: 998, + 0xA6A9: 999, + 0xA6B6: 1000, + 0xA6C1: 1001, + 0xA6E1: 1002, + 0xA6E2: 1003, + 0xA6E5: 1004, + 0xA6E9: 1005, + 0xA6F7: 1006, + 0xA741: 1007, + 0xA745: 1008, + 0xA749: 1009, + 0xA751: 1010, + 0xA755: 1011, + 0xA757: 1012, + 0xA761: 1013, + 0xA762: 1014, + 0xA765: 1015, + 0xA769: 1016, + 0xA771: 1017, + 0xA773: 1018, + 0xA775: 1019, + 0xA7A1: 1020, + 0xA7A2: 1021, + 0xA7A5: 1022, + 0xA7A9: 1023, + 0xA7AB: 1024, + 0xA7B1: 1025, + 0xA7B3: 1026, + 0xA7B5: 1027, + 0xA7B7: 1028, + 0xA7B8: 1029, + 0xA7B9: 1030, + 0xA861: 1031, + 0xA862: 1032, + 0xA865: 1033, + 0xA869: 1034, + 0xA86B: 1035, + 0xA871: 1036, + 0xA873: 1037, + 0xA875: 1038, + 0xA876: 1039, + 0xA877: 1040, + 0xA87D: 1041, + 0xA881: 1042, + 0xA882: 1043, + 0xA885: 1044, + 0xA889: 1045, + 0xA891: 1046, + 0xA893: 1047, + 0xA895: 1048, + 0xA896: 1049, + 0xA897: 1050, + 0xA8A1: 1051, + 0xA8A2: 1052, + 0xA8B1: 1053, + 0xA8E1: 1054, + 0xA8E2: 1055, + 0xA8E5: 1056, + 0xA8E8: 1057, + 0xA8E9: 1058, + 0xA8F1: 1059, + 0xA8F5: 1060, + 0xA8F6: 1061, + 0xA8F7: 1062, + 0xA941: 1063, + 0xA957: 1064, + 0xA961: 1065, + 0xA962: 1066, + 0xA971: 1067, + 0xA973: 1068, + 0xA975: 1069, + 0xA976: 1070, + 0xA977: 1071, + 0xA9A1: 1072, + 0xA9A2: 1073, + 0xA9A5: 1074, + 0xA9A9: 1075, + 0xA9B1: 1076, + 0xA9B3: 1077, + 0xA9B7: 1078, + 0xAA41: 1079, + 0xAA61: 1080, + 0xAA77: 1081, + 0xAA81: 1082, + 0xAA82: 1083, + 0xAA85: 1084, + 0xAA89: 1085, + 0xAA91: 1086, + 0xAA95: 1087, + 0xAA97: 1088, + 0xAB41: 1089, + 0xAB57: 1090, + 0xAB61: 1091, + 0xAB65: 1092, + 0xAB69: 1093, + 0xAB71: 1094, + 0xAB73: 1095, + 0xABA1: 1096, + 0xABA2: 1097, + 0xABA5: 1098, + 0xABA9: 1099, + 0xABB1: 1100, + 0xABB3: 1101, + 0xABB5: 1102, + 0xABB7: 1103, + 0xAC61: 1104, + 0xAC62: 1105, + 0xAC64: 1106, + 0xAC65: 1107, + 0xAC68: 1108, + 0xAC69: 1109, + 0xAC6A: 1110, + 0xAC6B: 1111, + 0xAC71: 1112, + 0xAC73: 1113, + 0xAC75: 1114, + 0xAC76: 1115, + 0xAC77: 1116, + 0xAC7B: 1117, + 0xAC81: 1118, + 0xAC82: 1119, + 0xAC85: 1120, + 0xAC89: 1121, + 0xAC91: 1122, + 0xAC93: 1123, + 0xAC95: 1124, + 0xAC96: 1125, + 0xAC97: 1126, + 0xACA1: 1127, + 0xACA2: 1128, + 0xACA5: 1129, + 0xACA9: 1130, + 0xACB1: 1131, + 0xACB3: 1132, + 0xACB5: 1133, + 0xACB7: 1134, + 0xACC1: 1135, + 0xACC5: 1136, + 0xACC9: 1137, + 0xACD1: 1138, + 0xACD7: 1139, + 0xACE1: 1140, + 0xACE2: 1141, + 0xACE3: 1142, + 0xACE4: 1143, + 0xACE5: 1144, + 0xACE8: 1145, + 0xACE9: 1146, + 0xACEB: 1147, + 0xACEC: 1148, + 0xACF1: 1149, + 0xACF3: 1150, + 0xACF5: 1151, + 0xACF6: 1152, + 0xACF7: 1153, + 0xACFC: 1154, + 0xAD41: 1155, + 0xAD42: 1156, + 0xAD45: 1157, + 0xAD49: 1158, + 0xAD51: 1159, + 0xAD53: 1160, + 0xAD55: 1161, + 0xAD56: 1162, + 0xAD57: 1163, + 0xAD61: 1164, + 0xAD62: 1165, + 0xAD65: 1166, + 0xAD69: 1167, + 0xAD71: 1168, + 0xAD73: 1169, + 0xAD75: 1170, + 0xAD76: 1171, + 0xAD77: 1172, + 0xAD81: 1173, + 0xAD85: 1174, + 0xAD89: 1175, + 0xAD97: 1176, + 0xADA1: 1177, + 0xADA2: 1178, + 0xADA3: 1179, + 0xADA5: 1180, + 0xADA9: 1181, + 0xADAB: 1182, + 0xADB1: 1183, + 0xADB3: 1184, + 0xADB5: 1185, + 0xADB7: 1186, + 0xADBB: 1187, + 0xADC1: 1188, + 0xADC2: 1189, + 0xADC5: 1190, + 0xADC9: 1191, + 0xADD7: 1192, + 0xADE1: 1193, + 0xADE5: 1194, + 0xADE9: 1195, + 0xADF1: 1196, + 0xADF5: 1197, + 0xADF6: 1198, + 0xAE41: 1199, + 0xAE45: 1200, + 0xAE49: 1201, + 0xAE51: 1202, + 0xAE53: 1203, + 0xAE55: 1204, + 0xAE61: 1205, + 0xAE62: 1206, + 0xAE65: 1207, + 0xAE69: 1208, + 0xAE71: 1209, + 0xAE73: 1210, + 0xAE75: 1211, + 0xAE77: 1212, + 0xAE81: 1213, + 0xAE82: 1214, + 0xAE85: 1215, + 0xAE88: 1216, + 0xAE89: 1217, + 0xAE91: 1218, + 0xAE93: 1219, + 0xAE95: 1220, + 0xAE97: 1221, + 0xAE99: 1222, + 0xAE9B: 1223, + 0xAE9C: 1224, + 0xAEA1: 1225, + 0xAEB6: 1226, + 0xAEC1: 1227, + 0xAEC2: 1228, + 0xAEC5: 1229, + 0xAEC9: 1230, + 0xAED1: 1231, + 0xAED7: 1232, + 0xAEE1: 1233, + 0xAEE2: 1234, + 0xAEE5: 1235, + 0xAEE9: 1236, + 0xAEF1: 1237, + 0xAEF3: 1238, + 0xAEF5: 1239, + 0xAEF7: 1240, + 0xAF41: 1241, + 0xAF42: 1242, + 0xAF49: 1243, + 0xAF51: 1244, + 0xAF55: 1245, + 0xAF57: 1246, + 0xAF61: 1247, + 0xAF62: 1248, + 0xAF65: 1249, + 0xAF69: 1250, + 0xAF6A: 1251, + 0xAF71: 1252, + 0xAF73: 1253, + 0xAF75: 1254, + 0xAF77: 1255, + 0xAFA1: 1256, + 0xAFA2: 1257, + 0xAFA5: 1258, + 0xAFA8: 1259, + 0xAFA9: 1260, + 0xAFB0: 1261, + 0xAFB1: 1262, + 0xAFB3: 1263, + 0xAFB5: 1264, + 0xAFB7: 1265, + 0xAFBC: 1266, + 0xB061: 1267, + 0xB062: 1268, + 0xB064: 1269, + 0xB065: 1270, + 0xB069: 1271, + 0xB071: 1272, + 0xB073: 1273, + 0xB076: 1274, + 0xB077: 1275, + 0xB07D: 1276, + 0xB081: 1277, + 0xB082: 1278, + 0xB085: 1279, + 0xB089: 1280, + 0xB091: 1281, + 0xB093: 1282, + 0xB096: 1283, + 0xB097: 1284, + 0xB0B7: 1285, + 0xB0E1: 1286, + 0xB0E2: 1287, + 0xB0E5: 1288, + 0xB0E9: 1289, + 0xB0EB: 1290, + 0xB0F1: 1291, + 0xB0F3: 1292, + 0xB0F6: 1293, + 0xB0F7: 1294, + 0xB141: 1295, + 0xB145: 1296, + 0xB149: 1297, + 0xB185: 1298, + 0xB1A1: 1299, + 0xB1A2: 1300, + 0xB1A5: 1301, + 0xB1A8: 1302, + 0xB1A9: 1303, + 0xB1AB: 1304, + 0xB1B1: 1305, + 0xB1B3: 1306, + 0xB1B7: 1307, + 0xB1C1: 1308, + 0xB1C2: 1309, + 0xB1C5: 1310, + 0xB1D6: 1311, + 0xB1E1: 1312, + 0xB1F6: 1313, + 0xB241: 1314, + 0xB245: 1315, + 0xB249: 1316, + 0xB251: 1317, + 0xB253: 1318, + 0xB261: 1319, + 0xB281: 1320, + 0xB282: 1321, + 0xB285: 1322, + 0xB289: 1323, + 0xB291: 1324, + 0xB293: 1325, + 0xB297: 1326, + 0xB2A1: 1327, + 0xB2B6: 1328, + 0xB2C1: 1329, + 0xB2E1: 1330, + 0xB2E5: 1331, + 0xB357: 1332, + 0xB361: 1333, + 0xB362: 1334, + 0xB365: 1335, + 0xB369: 1336, + 0xB36B: 1337, + 0xB370: 1338, + 0xB371: 1339, + 0xB373: 1340, + 0xB381: 1341, + 0xB385: 1342, + 0xB389: 1343, + 0xB391: 1344, + 0xB3A1: 1345, + 0xB3A2: 1346, + 0xB3A5: 1347, + 0xB3A9: 1348, + 0xB3B1: 1349, + 0xB3B3: 1350, + 0xB3B5: 1351, + 0xB3B7: 1352, + 0xB461: 1353, + 0xB462: 1354, + 0xB465: 1355, + 0xB466: 1356, + 0xB467: 1357, + 0xB469: 1358, + 0xB46A: 1359, + 0xB46B: 1360, + 0xB470: 1361, + 0xB471: 1362, + 0xB473: 1363, + 0xB475: 1364, + 0xB476: 1365, + 0xB477: 1366, + 0xB47B: 1367, + 0xB47C: 1368, + 0xB481: 1369, + 0xB482: 1370, + 0xB485: 1371, + 0xB489: 1372, + 0xB491: 1373, + 0xB493: 1374, + 0xB495: 1375, + 0xB496: 1376, + 0xB497: 1377, + 0xB4A1: 1378, + 0xB4A2: 1379, + 0xB4A5: 1380, + 0xB4A9: 1381, + 0xB4AC: 1382, + 0xB4B1: 1383, + 0xB4B3: 1384, + 0xB4B5: 1385, + 0xB4B7: 1386, + 0xB4BB: 1387, + 0xB4BD: 1388, + 0xB4C1: 1389, + 0xB4C5: 1390, + 0xB4C9: 1391, + 0xB4D3: 1392, + 0xB4E1: 1393, + 0xB4E2: 1394, + 0xB4E5: 1395, + 0xB4E6: 1396, + 0xB4E8: 1397, + 0xB4E9: 1398, + 0xB4EA: 1399, + 0xB4EB: 1400, + 0xB4F1: 1401, + 0xB4F3: 1402, + 0xB4F4: 1403, + 0xB4F5: 1404, + 0xB4F6: 1405, + 0xB4F7: 1406, + 0xB4F8: 1407, + 0xB4FA: 1408, + 0xB4FC: 1409, + 0xB541: 1410, + 0xB542: 1411, + 0xB545: 1412, + 0xB549: 1413, + 0xB551: 1414, + 0xB553: 1415, + 0xB555: 1416, + 0xB557: 1417, + 0xB561: 1418, + 0xB562: 1419, + 0xB563: 1420, + 0xB565: 1421, + 0xB569: 1422, + 0xB56B: 1423, + 0xB56C: 1424, + 0xB571: 1425, + 0xB573: 1426, + 0xB574: 1427, + 0xB575: 1428, + 0xB576: 1429, + 0xB577: 1430, + 0xB57B: 1431, + 0xB57C: 1432, + 0xB57D: 1433, + 0xB581: 1434, + 0xB585: 1435, + 0xB589: 1436, + 0xB591: 1437, + 0xB593: 1438, + 0xB595: 1439, + 0xB596: 1440, + 0xB5A1: 1441, + 0xB5A2: 1442, + 0xB5A5: 1443, + 0xB5A9: 1444, + 0xB5AA: 1445, + 0xB5AB: 1446, + 0xB5AD: 1447, + 0xB5B0: 1448, + 0xB5B1: 1449, + 0xB5B3: 1450, + 0xB5B5: 1451, + 0xB5B7: 1452, + 0xB5B9: 1453, + 0xB5C1: 1454, + 0xB5C2: 1455, + 0xB5C5: 1456, + 0xB5C9: 1457, + 0xB5D1: 1458, + 0xB5D3: 1459, + 0xB5D5: 1460, + 0xB5D6: 1461, + 0xB5D7: 1462, + 0xB5E1: 1463, + 0xB5E2: 1464, + 0xB5E5: 1465, + 0xB5F1: 1466, + 0xB5F5: 1467, + 0xB5F7: 1468, + 0xB641: 1469, + 0xB642: 1470, + 0xB645: 1471, + 0xB649: 1472, + 0xB651: 1473, + 0xB653: 1474, + 0xB655: 1475, + 0xB657: 1476, + 0xB661: 1477, + 0xB662: 1478, + 0xB665: 1479, + 0xB669: 1480, + 0xB671: 1481, + 0xB673: 1482, + 0xB675: 1483, + 0xB677: 1484, + 0xB681: 1485, + 0xB682: 1486, + 0xB685: 1487, + 0xB689: 1488, + 0xB68A: 1489, + 0xB68B: 1490, + 0xB691: 1491, + 0xB693: 1492, + 0xB695: 1493, + 0xB697: 1494, + 0xB6A1: 1495, + 0xB6A2: 1496, + 0xB6A5: 1497, + 0xB6A9: 1498, + 0xB6B1: 1499, + 0xB6B3: 1500, + 0xB6B6: 1501, + 0xB6B7: 1502, + 0xB6C1: 1503, + 0xB6C2: 1504, + 0xB6C5: 1505, + 0xB6C9: 1506, + 0xB6D1: 1507, + 0xB6D3: 1508, + 0xB6D7: 1509, + 0xB6E1: 1510, + 0xB6E2: 1511, + 0xB6E5: 1512, + 0xB6E9: 1513, + 0xB6F1: 1514, + 0xB6F3: 1515, + 0xB6F5: 1516, + 0xB6F7: 1517, + 0xB741: 1518, + 0xB742: 1519, + 0xB745: 1520, + 0xB749: 1521, + 0xB751: 1522, + 0xB753: 1523, + 0xB755: 1524, + 0xB757: 1525, + 0xB759: 1526, + 0xB761: 1527, + 0xB762: 1528, + 0xB765: 1529, + 0xB769: 1530, + 0xB76F: 1531, + 0xB771: 1532, + 0xB773: 1533, + 0xB775: 1534, + 0xB777: 1535, + 0xB778: 1536, + 0xB779: 1537, + 0xB77A: 1538, + 0xB77B: 1539, + 0xB77C: 1540, + 0xB77D: 1541, + 0xB781: 1542, + 0xB785: 1543, + 0xB789: 1544, + 0xB791: 1545, + 0xB795: 1546, + 0xB7A1: 1547, + 0xB7A2: 1548, + 0xB7A5: 1549, + 0xB7A9: 1550, + 0xB7AA: 1551, + 0xB7AB: 1552, + 0xB7B0: 1553, + 0xB7B1: 1554, + 0xB7B3: 1555, + 0xB7B5: 1556, + 0xB7B6: 1557, + 0xB7B7: 1558, + 0xB7B8: 1559, + 0xB7BC: 1560, + 0xB861: 1561, + 0xB862: 1562, + 0xB865: 1563, + 0xB867: 1564, + 0xB868: 1565, + 0xB869: 1566, + 0xB86B: 1567, + 0xB871: 1568, + 0xB873: 1569, + 0xB875: 1570, + 0xB876: 1571, + 0xB877: 1572, + 0xB878: 1573, + 0xB881: 1574, + 0xB882: 1575, + 0xB885: 1576, + 0xB889: 1577, + 0xB891: 1578, + 0xB893: 1579, + 0xB895: 1580, + 0xB896: 1581, + 0xB897: 1582, + 0xB8A1: 1583, + 0xB8A2: 1584, + 0xB8A5: 1585, + 0xB8A7: 1586, + 0xB8A9: 1587, + 0xB8B1: 1588, + 0xB8B7: 1589, + 0xB8C1: 1590, + 0xB8C5: 1591, + 0xB8C9: 1592, + 0xB8E1: 1593, + 0xB8E2: 1594, + 0xB8E5: 1595, + 0xB8E9: 1596, + 0xB8EB: 1597, + 0xB8F1: 1598, + 0xB8F3: 1599, + 0xB8F5: 1600, + 0xB8F7: 1601, + 0xB8F8: 1602, + 0xB941: 1603, + 0xB942: 1604, + 0xB945: 1605, + 0xB949: 1606, + 0xB951: 1607, + 0xB953: 1608, + 0xB955: 1609, + 0xB957: 1610, + 0xB961: 1611, + 0xB965: 1612, + 0xB969: 1613, + 0xB971: 1614, + 0xB973: 1615, + 0xB976: 1616, + 0xB977: 1617, + 0xB981: 1618, + 0xB9A1: 1619, + 0xB9A2: 1620, + 0xB9A5: 1621, + 0xB9A9: 1622, + 0xB9AB: 1623, + 0xB9B1: 1624, + 0xB9B3: 1625, + 0xB9B5: 1626, + 0xB9B7: 1627, + 0xB9B8: 1628, + 0xB9B9: 1629, + 0xB9BD: 1630, + 0xB9C1: 1631, + 0xB9C2: 1632, + 0xB9C9: 1633, + 0xB9D3: 1634, + 0xB9D5: 1635, + 0xB9D7: 1636, + 0xB9E1: 1637, + 0xB9F6: 1638, + 0xB9F7: 1639, + 0xBA41: 1640, + 0xBA45: 1641, + 0xBA49: 1642, + 0xBA51: 1643, + 0xBA53: 1644, + 0xBA55: 1645, + 0xBA57: 1646, + 0xBA61: 1647, + 0xBA62: 1648, + 0xBA65: 1649, + 0xBA77: 1650, + 0xBA81: 1651, + 0xBA82: 1652, + 0xBA85: 1653, + 0xBA89: 1654, + 0xBA8A: 1655, + 0xBA8B: 1656, + 0xBA91: 1657, + 0xBA93: 1658, + 0xBA95: 1659, + 0xBA97: 1660, + 0xBAA1: 1661, + 0xBAB6: 1662, + 0xBAC1: 1663, + 0xBAE1: 1664, + 0xBAE2: 1665, + 0xBAE5: 1666, + 0xBAE9: 1667, + 0xBAF1: 1668, + 0xBAF3: 1669, + 0xBAF5: 1670, + 0xBB41: 1671, + 0xBB45: 1672, + 0xBB49: 1673, + 0xBB51: 1674, + 0xBB61: 1675, + 0xBB62: 1676, + 0xBB65: 1677, + 0xBB69: 1678, + 0xBB71: 1679, + 0xBB73: 1680, + 0xBB75: 1681, + 0xBB77: 1682, + 0xBBA1: 1683, + 0xBBA2: 1684, + 0xBBA5: 1685, + 0xBBA8: 1686, + 0xBBA9: 1687, + 0xBBAB: 1688, + 0xBBB1: 1689, + 0xBBB3: 1690, + 0xBBB5: 1691, + 0xBBB7: 1692, + 0xBBB8: 1693, + 0xBBBB: 1694, + 0xBBBC: 1695, + 0xBC61: 1696, + 0xBC62: 1697, + 0xBC65: 1698, + 0xBC67: 1699, + 0xBC69: 1700, + 0xBC6C: 1701, + 0xBC71: 1702, + 0xBC73: 1703, + 0xBC75: 1704, + 0xBC76: 1705, + 0xBC77: 1706, + 0xBC81: 1707, + 0xBC82: 1708, + 0xBC85: 1709, + 0xBC89: 1710, + 0xBC91: 1711, + 0xBC93: 1712, + 0xBC95: 1713, + 0xBC96: 1714, + 0xBC97: 1715, + 0xBCA1: 1716, + 0xBCA5: 1717, + 0xBCB7: 1718, + 0xBCE1: 1719, + 0xBCE2: 1720, + 0xBCE5: 1721, + 0xBCE9: 1722, + 0xBCF1: 1723, + 0xBCF3: 1724, + 0xBCF5: 1725, + 0xBCF6: 1726, + 0xBCF7: 1727, + 0xBD41: 1728, + 0xBD57: 1729, + 0xBD61: 1730, + 0xBD76: 1731, + 0xBDA1: 1732, + 0xBDA2: 1733, + 0xBDA5: 1734, + 0xBDA9: 1735, + 0xBDB1: 1736, + 0xBDB3: 1737, + 0xBDB5: 1738, + 0xBDB7: 1739, + 0xBDB9: 1740, + 0xBDC1: 1741, + 0xBDC2: 1742, + 0xBDC9: 1743, + 0xBDD6: 1744, + 0xBDE1: 1745, + 0xBDF6: 1746, + 0xBE41: 1747, + 0xBE45: 1748, + 0xBE49: 1749, + 0xBE51: 1750, + 0xBE53: 1751, + 0xBE77: 1752, + 0xBE81: 1753, + 0xBE82: 1754, + 0xBE85: 1755, + 0xBE89: 1756, + 0xBE91: 1757, + 0xBE93: 1758, + 0xBE97: 1759, + 0xBEA1: 1760, + 0xBEB6: 1761, + 0xBEB7: 1762, + 0xBEE1: 1763, + 0xBF41: 1764, + 0xBF61: 1765, + 0xBF71: 1766, + 0xBF75: 1767, + 0xBF77: 1768, + 0xBFA1: 1769, + 0xBFA2: 1770, + 0xBFA5: 1771, + 0xBFA9: 1772, + 0xBFB1: 1773, + 0xBFB3: 1774, + 0xBFB7: 1775, + 0xBFB8: 1776, + 0xBFBD: 1777, + 0xC061: 1778, + 0xC062: 1779, + 0xC065: 1780, + 0xC067: 1781, + 0xC069: 1782, + 0xC071: 1783, + 0xC073: 1784, + 0xC075: 1785, + 0xC076: 1786, + 0xC077: 1787, + 0xC078: 1788, + 0xC081: 1789, + 0xC082: 1790, + 0xC085: 1791, + 0xC089: 1792, + 0xC091: 1793, + 0xC093: 1794, + 0xC095: 1795, + 0xC096: 1796, + 0xC097: 1797, + 0xC0A1: 1798, + 0xC0A5: 1799, + 0xC0A7: 1800, + 0xC0A9: 1801, + 0xC0B1: 1802, + 0xC0B7: 1803, + 0xC0E1: 1804, + 0xC0E2: 1805, + 0xC0E5: 1806, + 0xC0E9: 1807, + 0xC0F1: 1808, + 0xC0F3: 1809, + 0xC0F5: 1810, + 0xC0F6: 1811, + 0xC0F7: 1812, + 0xC141: 1813, + 0xC142: 1814, + 0xC145: 1815, + 0xC149: 1816, + 0xC151: 1817, + 0xC153: 1818, + 0xC155: 1819, + 0xC157: 1820, + 0xC161: 1821, + 0xC165: 1822, + 0xC176: 1823, + 0xC181: 1824, + 0xC185: 1825, + 0xC197: 1826, + 0xC1A1: 1827, + 0xC1A2: 1828, + 0xC1A5: 1829, + 0xC1A9: 1830, + 0xC1B1: 1831, + 0xC1B3: 1832, + 0xC1B5: 1833, + 0xC1B7: 1834, + 0xC1C1: 1835, + 0xC1C5: 1836, + 0xC1C9: 1837, + 0xC1D7: 1838, + 0xC241: 1839, + 0xC245: 1840, + 0xC249: 1841, + 0xC251: 1842, + 0xC253: 1843, + 0xC255: 1844, + 0xC257: 1845, + 0xC261: 1846, + 0xC271: 1847, + 0xC281: 1848, + 0xC282: 1849, + 0xC285: 1850, + 0xC289: 1851, + 0xC291: 1852, + 0xC293: 1853, + 0xC295: 1854, + 0xC297: 1855, + 0xC2A1: 1856, + 0xC2B6: 1857, + 0xC2C1: 1858, + 0xC2C5: 1859, + 0xC2E1: 1860, + 0xC2E5: 1861, + 0xC2E9: 1862, + 0xC2F1: 1863, + 0xC2F3: 1864, + 0xC2F5: 1865, + 0xC2F7: 1866, + 0xC341: 1867, + 0xC345: 1868, + 0xC349: 1869, + 0xC351: 1870, + 0xC357: 1871, + 0xC361: 1872, + 0xC362: 1873, + 0xC365: 1874, + 0xC369: 1875, + 0xC371: 1876, + 0xC373: 1877, + 0xC375: 1878, + 0xC377: 1879, + 0xC3A1: 1880, + 0xC3A2: 1881, + 0xC3A5: 1882, + 0xC3A8: 1883, + 0xC3A9: 1884, + 0xC3AA: 1885, + 0xC3B1: 1886, + 0xC3B3: 1887, + 0xC3B5: 1888, + 0xC3B7: 1889, + 0xC461: 1890, + 0xC462: 1891, + 0xC465: 1892, + 0xC469: 1893, + 0xC471: 1894, + 0xC473: 1895, + 0xC475: 1896, + 0xC477: 1897, + 0xC481: 1898, + 0xC482: 1899, + 0xC485: 1900, + 0xC489: 1901, + 0xC491: 1902, + 0xC493: 1903, + 0xC495: 1904, + 0xC496: 1905, + 0xC497: 1906, + 0xC4A1: 1907, + 0xC4A2: 1908, + 0xC4B7: 1909, + 0xC4E1: 1910, + 0xC4E2: 1911, + 0xC4E5: 1912, + 0xC4E8: 1913, + 0xC4E9: 1914, + 0xC4F1: 1915, + 0xC4F3: 1916, + 0xC4F5: 1917, + 0xC4F6: 1918, + 0xC4F7: 1919, + 0xC541: 1920, + 0xC542: 1921, + 0xC545: 1922, + 0xC549: 1923, + 0xC551: 1924, + 0xC553: 1925, + 0xC555: 1926, + 0xC557: 1927, + 0xC561: 1928, + 0xC565: 1929, + 0xC569: 1930, + 0xC571: 1931, + 0xC573: 1932, + 0xC575: 1933, + 0xC576: 1934, + 0xC577: 1935, + 0xC581: 1936, + 0xC5A1: 1937, + 0xC5A2: 1938, + 0xC5A5: 1939, + 0xC5A9: 1940, + 0xC5B1: 1941, + 0xC5B3: 1942, + 0xC5B5: 1943, + 0xC5B7: 1944, + 0xC5C1: 1945, + 0xC5C2: 1946, + 0xC5C5: 1947, + 0xC5C9: 1948, + 0xC5D1: 1949, + 0xC5D7: 1950, + 0xC5E1: 1951, + 0xC5F7: 1952, + 0xC641: 1953, + 0xC649: 1954, + 0xC661: 1955, + 0xC681: 1956, + 0xC682: 1957, + 0xC685: 1958, + 0xC689: 1959, + 0xC691: 1960, + 0xC693: 1961, + 0xC695: 1962, + 0xC697: 1963, + 0xC6A1: 1964, + 0xC6A5: 1965, + 0xC6A9: 1966, + 0xC6B7: 1967, + 0xC6C1: 1968, + 0xC6D7: 1969, + 0xC6E1: 1970, + 0xC6E2: 1971, + 0xC6E5: 1972, + 0xC6E9: 1973, + 0xC6F1: 1974, + 0xC6F3: 1975, + 0xC6F5: 1976, + 0xC6F7: 1977, + 0xC741: 1978, + 0xC745: 1979, + 0xC749: 1980, + 0xC751: 1981, + 0xC761: 1982, + 0xC762: 1983, + 0xC765: 1984, + 0xC769: 1985, + 0xC771: 1986, + 0xC773: 1987, + 0xC777: 1988, + 0xC7A1: 1989, + 0xC7A2: 1990, + 0xC7A5: 1991, + 0xC7A9: 1992, + 0xC7B1: 1993, + 0xC7B3: 1994, + 0xC7B5: 1995, + 0xC7B7: 1996, + 0xC861: 1997, + 0xC862: 1998, + 0xC865: 1999, + 0xC869: 2000, + 0xC86A: 2001, + 0xC871: 2002, + 0xC873: 2003, + 0xC875: 2004, + 0xC876: 2005, + 0xC877: 2006, + 0xC881: 2007, + 0xC882: 2008, + 0xC885: 2009, + 0xC889: 2010, + 0xC891: 2011, + 0xC893: 2012, + 0xC895: 2013, + 0xC896: 2014, + 0xC897: 2015, + 0xC8A1: 2016, + 0xC8B7: 2017, + 0xC8E1: 2018, + 0xC8E2: 2019, + 0xC8E5: 2020, + 0xC8E9: 2021, + 0xC8EB: 2022, + 0xC8F1: 2023, + 0xC8F3: 2024, + 0xC8F5: 2025, + 0xC8F6: 2026, + 0xC8F7: 2027, + 0xC941: 2028, + 0xC942: 2029, + 0xC945: 2030, + 0xC949: 2031, + 0xC951: 2032, + 0xC953: 2033, + 0xC955: 2034, + 0xC957: 2035, + 0xC961: 2036, + 0xC965: 2037, + 0xC976: 2038, + 0xC981: 2039, + 0xC985: 2040, + 0xC9A1: 2041, + 0xC9A2: 2042, + 0xC9A5: 2043, + 0xC9A9: 2044, + 0xC9B1: 2045, + 0xC9B3: 2046, + 0xC9B5: 2047, + 0xC9B7: 2048, + 0xC9BC: 2049, + 0xC9C1: 2050, + 0xC9C5: 2051, + 0xC9E1: 2052, + 0xCA41: 2053, + 0xCA45: 2054, + 0xCA55: 2055, + 0xCA57: 2056, + 0xCA61: 2057, + 0xCA81: 2058, + 0xCA82: 2059, + 0xCA85: 2060, + 0xCA89: 2061, + 0xCA91: 2062, + 0xCA93: 2063, + 0xCA95: 2064, + 0xCA97: 2065, + 0xCAA1: 2066, + 0xCAB6: 2067, + 0xCAC1: 2068, + 0xCAE1: 2069, + 0xCAE2: 2070, + 0xCAE5: 2071, + 0xCAE9: 2072, + 0xCAF1: 2073, + 0xCAF3: 2074, + 0xCAF7: 2075, + 0xCB41: 2076, + 0xCB45: 2077, + 0xCB49: 2078, + 0xCB51: 2079, + 0xCB57: 2080, + 0xCB61: 2081, + 0xCB62: 2082, + 0xCB65: 2083, + 0xCB68: 2084, + 0xCB69: 2085, + 0xCB6B: 2086, + 0xCB71: 2087, + 0xCB73: 2088, + 0xCB75: 2089, + 0xCB81: 2090, + 0xCB85: 2091, + 0xCB89: 2092, + 0xCB91: 2093, + 0xCB93: 2094, + 0xCBA1: 2095, + 0xCBA2: 2096, + 0xCBA5: 2097, + 0xCBA9: 2098, + 0xCBB1: 2099, + 0xCBB3: 2100, + 0xCBB5: 2101, + 0xCBB7: 2102, + 0xCC61: 2103, + 0xCC62: 2104, + 0xCC63: 2105, + 0xCC65: 2106, + 0xCC69: 2107, + 0xCC6B: 2108, + 0xCC71: 2109, + 0xCC73: 2110, + 0xCC75: 2111, + 0xCC76: 2112, + 0xCC77: 2113, + 0xCC7B: 2114, + 0xCC81: 2115, + 0xCC82: 2116, + 0xCC85: 2117, + 0xCC89: 2118, + 0xCC91: 2119, + 0xCC93: 2120, + 0xCC95: 2121, + 0xCC96: 2122, + 0xCC97: 2123, + 0xCCA1: 2124, + 0xCCA2: 2125, + 0xCCE1: 2126, + 0xCCE2: 2127, + 0xCCE5: 2128, + 0xCCE9: 2129, + 0xCCF1: 2130, + 0xCCF3: 2131, + 0xCCF5: 2132, + 0xCCF6: 2133, + 0xCCF7: 2134, + 0xCD41: 2135, + 0xCD42: 2136, + 0xCD45: 2137, + 0xCD49: 2138, + 0xCD51: 2139, + 0xCD53: 2140, + 0xCD55: 2141, + 0xCD57: 2142, + 0xCD61: 2143, + 0xCD65: 2144, + 0xCD69: 2145, + 0xCD71: 2146, + 0xCD73: 2147, + 0xCD76: 2148, + 0xCD77: 2149, + 0xCD81: 2150, + 0xCD89: 2151, + 0xCD93: 2152, + 0xCD95: 2153, + 0xCDA1: 2154, + 0xCDA2: 2155, + 0xCDA5: 2156, + 0xCDA9: 2157, + 0xCDB1: 2158, + 0xCDB3: 2159, + 0xCDB5: 2160, + 0xCDB7: 2161, + 0xCDC1: 2162, + 0xCDD7: 2163, + 0xCE41: 2164, + 0xCE45: 2165, + 0xCE61: 2166, + 0xCE65: 2167, + 0xCE69: 2168, + 0xCE73: 2169, + 0xCE75: 2170, + 0xCE81: 2171, + 0xCE82: 2172, + 0xCE85: 2173, + 0xCE88: 2174, + 0xCE89: 2175, + 0xCE8B: 2176, + 0xCE91: 2177, + 0xCE93: 2178, + 0xCE95: 2179, + 0xCE97: 2180, + 0xCEA1: 2181, + 0xCEB7: 2182, + 0xCEE1: 2183, + 0xCEE5: 2184, + 0xCEE9: 2185, + 0xCEF1: 2186, + 0xCEF5: 2187, + 0xCF41: 2188, + 0xCF45: 2189, + 0xCF49: 2190, + 0xCF51: 2191, + 0xCF55: 2192, + 0xCF57: 2193, + 0xCF61: 2194, + 0xCF65: 2195, + 0xCF69: 2196, + 0xCF71: 2197, + 0xCF73: 2198, + 0xCF75: 2199, + 0xCFA1: 2200, + 0xCFA2: 2201, + 0xCFA5: 2202, + 0xCFA9: 2203, + 0xCFB1: 2204, + 0xCFB3: 2205, + 0xCFB5: 2206, + 0xCFB7: 2207, + 0xD061: 2208, + 0xD062: 2209, + 0xD065: 2210, + 0xD069: 2211, + 0xD06E: 2212, + 0xD071: 2213, + 0xD073: 2214, + 0xD075: 2215, + 0xD077: 2216, + 0xD081: 2217, + 0xD082: 2218, + 0xD085: 2219, + 0xD089: 2220, + 0xD091: 2221, + 0xD093: 2222, + 0xD095: 2223, + 0xD096: 2224, + 0xD097: 2225, + 0xD0A1: 2226, + 0xD0B7: 2227, + 0xD0E1: 2228, + 0xD0E2: 2229, + 0xD0E5: 2230, + 0xD0E9: 2231, + 0xD0EB: 2232, + 0xD0F1: 2233, + 0xD0F3: 2234, + 0xD0F5: 2235, + 0xD0F7: 2236, + 0xD141: 2237, + 0xD142: 2238, + 0xD145: 2239, + 0xD149: 2240, + 0xD151: 2241, + 0xD153: 2242, + 0xD155: 2243, + 0xD157: 2244, + 0xD161: 2245, + 0xD162: 2246, + 0xD165: 2247, + 0xD169: 2248, + 0xD171: 2249, + 0xD173: 2250, + 0xD175: 2251, + 0xD176: 2252, + 0xD177: 2253, + 0xD181: 2254, + 0xD185: 2255, + 0xD189: 2256, + 0xD193: 2257, + 0xD1A1: 2258, + 0xD1A2: 2259, + 0xD1A5: 2260, + 0xD1A9: 2261, + 0xD1AE: 2262, + 0xD1B1: 2263, + 0xD1B3: 2264, + 0xD1B5: 2265, + 0xD1B7: 2266, + 0xD1BB: 2267, + 0xD1C1: 2268, + 0xD1C2: 2269, + 0xD1C5: 2270, + 0xD1C9: 2271, + 0xD1D5: 2272, + 0xD1D7: 2273, + 0xD1E1: 2274, + 0xD1E2: 2275, + 0xD1E5: 2276, + 0xD1F5: 2277, + 0xD1F7: 2278, + 0xD241: 2279, + 0xD242: 2280, + 0xD245: 2281, + 0xD249: 2282, + 0xD253: 2283, + 0xD255: 2284, + 0xD257: 2285, + 0xD261: 2286, + 0xD265: 2287, + 0xD269: 2288, + 0xD273: 2289, + 0xD275: 2290, + 0xD281: 2291, + 0xD282: 2292, + 0xD285: 2293, + 0xD289: 2294, + 0xD28E: 2295, + 0xD291: 2296, + 0xD295: 2297, + 0xD297: 2298, + 0xD2A1: 2299, + 0xD2A5: 2300, + 0xD2A9: 2301, + 0xD2B1: 2302, + 0xD2B7: 2303, + 0xD2C1: 2304, + 0xD2C2: 2305, + 0xD2C5: 2306, + 0xD2C9: 2307, + 0xD2D7: 2308, + 0xD2E1: 2309, + 0xD2E2: 2310, + 0xD2E5: 2311, + 0xD2E9: 2312, + 0xD2F1: 2313, + 0xD2F3: 2314, + 0xD2F5: 2315, + 0xD2F7: 2316, + 0xD341: 2317, + 0xD342: 2318, + 0xD345: 2319, + 0xD349: 2320, + 0xD351: 2321, + 0xD355: 2322, + 0xD357: 2323, + 0xD361: 2324, + 0xD362: 2325, + 0xD365: 2326, + 0xD367: 2327, + 0xD368: 2328, + 0xD369: 2329, + 0xD36A: 2330, + 0xD371: 2331, + 0xD373: 2332, + 0xD375: 2333, + 0xD377: 2334, + 0xD37B: 2335, + 0xD381: 2336, + 0xD385: 2337, + 0xD389: 2338, + 0xD391: 2339, + 0xD393: 2340, + 0xD397: 2341, + 0xD3A1: 2342, + 0xD3A2: 2343, + 0xD3A5: 2344, + 0xD3A9: 2345, + 0xD3B1: 2346, + 0xD3B3: 2347, + 0xD3B5: 2348, + 0xD3B7: 2349, +} diff --git a/src/pip/_vendor/chardet/compat.py b/src/pip/_vendor/chardet/johabprober.py similarity index 50% rename from src/pip/_vendor/chardet/compat.py rename to src/pip/_vendor/chardet/johabprober.py index 8941572b3e6..d7364ba61ec 100644 --- a/src/pip/_vendor/chardet/compat.py +++ b/src/pip/_vendor/chardet/johabprober.py @@ -1,7 +1,13 @@ ######################## BEGIN LICENSE BLOCK ######################## +# The Original Code is mozilla.org code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 1998 +# the Initial Developer. All Rights Reserved. +# # Contributor(s): -# Dan Blanchard -# Ian Cordasco +# Mark Pilgrim - port to Python # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public @@ -19,18 +25,23 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -import sys +from .chardistribution import JOHABDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .mbcharsetprober import MultiByteCharSetProber +from .mbcssm import JOHAB_SM_MODEL + + +class JOHABProber(MultiByteCharSetProber): + def __init__(self) -> None: + super().__init__() + self.coding_sm = CodingStateMachine(JOHAB_SM_MODEL) + self.distribution_analyzer = JOHABDistributionAnalysis() + self.reset() + @property + def charset_name(self) -> str: + return "Johab" -if sys.version_info < (3, 0): - PY2 = True - PY3 = False - string_types = (str, unicode) - text_type = unicode - iteritems = dict.iteritems -else: - PY2 = False - PY3 = True - string_types = (bytes, str) - text_type = str - iteritems = dict.items + @property + def language(self) -> str: + return "Korean" diff --git a/src/pip/_vendor/chardet/jpcntx.py b/src/pip/_vendor/chardet/jpcntx.py index 20044e4bc8f..2f53bdda09e 100644 --- a/src/pip/_vendor/chardet/jpcntx.py +++ b/src/pip/_vendor/chardet/jpcntx.py @@ -25,110 +25,114 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Tuple, Union # This is hiragana 2-char sequence table, the number in each cell represents its frequency category -jp2CharContext = ( -(0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1), -(2,4,0,4,0,3,0,4,0,3,4,4,4,2,4,3,3,4,3,2,3,3,4,2,3,3,3,2,4,1,4,3,3,1,5,4,3,4,3,4,3,5,3,0,3,5,4,2,0,3,1,0,3,3,0,3,3,0,1,1,0,4,3,0,3,3,0,4,0,2,0,3,5,5,5,5,4,0,4,1,0,3,4), -(0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2), -(0,4,0,5,0,5,0,4,0,4,5,4,4,3,5,3,5,1,5,3,4,3,4,4,3,4,3,3,4,3,5,4,4,3,5,5,3,5,5,5,3,5,5,3,4,5,5,3,1,3,2,0,3,4,0,4,2,0,4,2,1,5,3,2,3,5,0,4,0,2,0,5,4,4,5,4,5,0,4,0,0,4,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,4,0,3,0,3,0,4,5,4,3,3,3,3,4,3,5,4,4,3,5,4,4,3,4,3,4,4,4,4,5,3,4,4,3,4,5,5,4,5,5,1,4,5,4,3,0,3,3,1,3,3,0,4,4,0,3,3,1,5,3,3,3,5,0,4,0,3,0,4,4,3,4,3,3,0,4,1,1,3,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,4,0,3,0,3,0,4,0,3,4,4,3,2,2,1,2,1,3,1,3,3,3,3,3,4,3,1,3,3,5,3,3,0,4,3,0,5,4,3,3,5,4,4,3,4,4,5,0,1,2,0,1,2,0,2,2,0,1,0,0,5,2,2,1,4,0,3,0,1,0,4,4,3,5,4,3,0,2,1,0,4,3), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,5,0,4,0,2,1,4,4,2,4,1,4,2,4,2,4,3,3,3,4,3,3,3,3,1,4,2,3,3,3,1,4,4,1,1,1,4,3,3,2,0,2,4,3,2,0,3,3,0,3,1,1,0,0,0,3,3,0,4,2,2,3,4,0,4,0,3,0,4,4,5,3,4,4,0,3,0,0,1,4), -(1,4,0,4,0,4,0,4,0,3,5,4,4,3,4,3,5,4,3,3,4,3,5,4,4,4,4,3,4,2,4,3,3,1,5,4,3,2,4,5,4,5,5,4,4,5,4,4,0,3,2,2,3,3,0,4,3,1,3,2,1,4,3,3,4,5,0,3,0,2,0,4,5,5,4,5,4,0,4,0,0,5,4), -(0,5,0,5,0,4,0,3,0,4,4,3,4,3,3,3,4,0,4,4,4,3,4,3,4,3,3,1,4,2,4,3,4,0,5,4,1,4,5,4,4,5,3,2,4,3,4,3,2,4,1,3,3,3,2,3,2,0,4,3,3,4,3,3,3,4,0,4,0,3,0,4,5,4,4,4,3,0,4,1,0,1,3), -(0,3,1,4,0,3,0,2,0,3,4,4,3,1,4,2,3,3,4,3,4,3,4,3,4,4,3,2,3,1,5,4,4,1,4,4,3,5,4,4,3,5,5,4,3,4,4,3,1,2,3,1,2,2,0,3,2,0,3,1,0,5,3,3,3,4,3,3,3,3,4,4,4,4,5,4,2,0,3,3,2,4,3), -(0,2,0,3,0,1,0,1,0,0,3,2,0,0,2,0,1,0,2,1,3,3,3,1,2,3,1,0,1,0,4,2,1,1,3,3,0,4,3,3,1,4,3,3,0,3,3,2,0,0,0,0,1,0,0,2,0,0,0,0,0,4,1,0,2,3,2,2,2,1,3,3,3,4,4,3,2,0,3,1,0,3,3), -(0,4,0,4,0,3,0,3,0,4,4,4,3,3,3,3,3,3,4,3,4,2,4,3,4,3,3,2,4,3,4,5,4,1,4,5,3,5,4,5,3,5,4,0,3,5,5,3,1,3,3,2,2,3,0,3,4,1,3,3,2,4,3,3,3,4,0,4,0,3,0,4,5,4,4,5,3,0,4,1,0,3,4), -(0,2,0,3,0,3,0,0,0,2,2,2,1,0,1,0,0,0,3,0,3,0,3,0,1,3,1,0,3,1,3,3,3,1,3,3,3,0,1,3,1,3,4,0,0,3,1,1,0,3,2,0,0,0,0,1,3,0,1,0,0,3,3,2,0,3,0,0,0,0,0,3,4,3,4,3,3,0,3,0,0,2,3), -(2,3,0,3,0,2,0,1,0,3,3,4,3,1,3,1,1,1,3,1,4,3,4,3,3,3,0,0,3,1,5,4,3,1,4,3,2,5,5,4,4,4,4,3,3,4,4,4,0,2,1,1,3,2,0,1,2,0,0,1,0,4,1,3,3,3,0,3,0,1,0,4,4,4,5,5,3,0,2,0,0,4,4), -(0,2,0,1,0,3,1,3,0,2,3,3,3,0,3,1,0,0,3,0,3,2,3,1,3,2,1,1,0,0,4,2,1,0,2,3,1,4,3,2,0,4,4,3,1,3,1,3,0,1,0,0,1,0,0,0,1,0,0,0,0,4,1,1,1,2,0,3,0,0,0,3,4,2,4,3,2,0,1,0,0,3,3), -(0,1,0,4,0,5,0,4,0,2,4,4,2,3,3,2,3,3,5,3,3,3,4,3,4,2,3,0,4,3,3,3,4,1,4,3,2,1,5,5,3,4,5,1,3,5,4,2,0,3,3,0,1,3,0,4,2,0,1,3,1,4,3,3,3,3,0,3,0,1,0,3,4,4,4,5,5,0,3,0,1,4,5), -(0,2,0,3,0,3,0,0,0,2,3,1,3,0,4,0,1,1,3,0,3,4,3,2,3,1,0,3,3,2,3,1,3,0,2,3,0,2,1,4,1,2,2,0,0,3,3,0,0,2,0,0,0,1,0,0,0,0,2,2,0,3,2,1,3,3,0,2,0,2,0,0,3,3,1,2,4,0,3,0,2,2,3), -(2,4,0,5,0,4,0,4,0,2,4,4,4,3,4,3,3,3,1,2,4,3,4,3,4,4,5,0,3,3,3,3,2,0,4,3,1,4,3,4,1,4,4,3,3,4,4,3,1,2,3,0,4,2,0,4,1,0,3,3,0,4,3,3,3,4,0,4,0,2,0,3,5,3,4,5,2,0,3,0,0,4,5), -(0,3,0,4,0,1,0,1,0,1,3,2,2,1,3,0,3,0,2,0,2,0,3,0,2,0,0,0,1,0,1,1,0,0,3,1,0,0,0,4,0,3,1,0,2,1,3,0,0,0,0,0,0,3,0,0,0,0,0,0,0,4,2,2,3,1,0,3,0,0,0,1,4,4,4,3,0,0,4,0,0,1,4), -(1,4,1,5,0,3,0,3,0,4,5,4,4,3,5,3,3,4,4,3,4,1,3,3,3,3,2,1,4,1,5,4,3,1,4,4,3,5,4,4,3,5,4,3,3,4,4,4,0,3,3,1,2,3,0,3,1,0,3,3,0,5,4,4,4,4,4,4,3,3,5,4,4,3,3,5,4,0,3,2,0,4,4), -(0,2,0,3,0,1,0,0,0,1,3,3,3,2,4,1,3,0,3,1,3,0,2,2,1,1,0,0,2,0,4,3,1,0,4,3,0,4,4,4,1,4,3,1,1,3,3,1,0,2,0,0,1,3,0,0,0,0,2,0,0,4,3,2,4,3,5,4,3,3,3,4,3,3,4,3,3,0,2,1,0,3,3), -(0,2,0,4,0,3,0,2,0,2,5,5,3,4,4,4,4,1,4,3,3,0,4,3,4,3,1,3,3,2,4,3,0,3,4,3,0,3,4,4,2,4,4,0,4,5,3,3,2,2,1,1,1,2,0,1,5,0,3,3,2,4,3,3,3,4,0,3,0,2,0,4,4,3,5,5,0,0,3,0,2,3,3), -(0,3,0,4,0,3,0,1,0,3,4,3,3,1,3,3,3,0,3,1,3,0,4,3,3,1,1,0,3,0,3,3,0,0,4,4,0,1,5,4,3,3,5,0,3,3,4,3,0,2,0,1,1,1,0,1,3,0,1,2,1,3,3,2,3,3,0,3,0,1,0,1,3,3,4,4,1,0,1,2,2,1,3), -(0,1,0,4,0,4,0,3,0,1,3,3,3,2,3,1,1,0,3,0,3,3,4,3,2,4,2,0,1,0,4,3,2,0,4,3,0,5,3,3,2,4,4,4,3,3,3,4,0,1,3,0,0,1,0,0,1,0,0,0,0,4,2,3,3,3,0,3,0,0,0,4,4,4,5,3,2,0,3,3,0,3,5), -(0,2,0,3,0,0,0,3,0,1,3,0,2,0,0,0,1,0,3,1,1,3,3,0,0,3,0,0,3,0,2,3,1,0,3,1,0,3,3,2,0,4,2,2,0,2,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,2,1,2,0,1,0,1,0,0,0,1,3,1,2,0,0,0,1,0,0,1,4), -(0,3,0,3,0,5,0,1,0,2,4,3,1,3,3,2,1,1,5,2,1,0,5,1,2,0,0,0,3,3,2,2,3,2,4,3,0,0,3,3,1,3,3,0,2,5,3,4,0,3,3,0,1,2,0,2,2,0,3,2,0,2,2,3,3,3,0,2,0,1,0,3,4,4,2,5,4,0,3,0,0,3,5), -(0,3,0,3,0,3,0,1,0,3,3,3,3,0,3,0,2,0,2,1,1,0,2,0,1,0,0,0,2,1,0,0,1,0,3,2,0,0,3,3,1,2,3,1,0,3,3,0,0,1,0,0,0,0,0,2,0,0,0,0,0,2,3,1,2,3,0,3,0,1,0,3,2,1,0,4,3,0,1,1,0,3,3), -(0,4,0,5,0,3,0,3,0,4,5,5,4,3,5,3,4,3,5,3,3,2,5,3,4,4,4,3,4,3,4,5,5,3,4,4,3,4,4,5,4,4,4,3,4,5,5,4,2,3,4,2,3,4,0,3,3,1,4,3,2,4,3,3,5,5,0,3,0,3,0,5,5,5,5,4,4,0,4,0,1,4,4), -(0,4,0,4,0,3,0,3,0,3,5,4,4,2,3,2,5,1,3,2,5,1,4,2,3,2,3,3,4,3,3,3,3,2,5,4,1,3,3,5,3,4,4,0,4,4,3,1,1,3,1,0,2,3,0,2,3,0,3,0,0,4,3,1,3,4,0,3,0,2,0,4,4,4,3,4,5,0,4,0,0,3,4), -(0,3,0,3,0,3,1,2,0,3,4,4,3,3,3,0,2,2,4,3,3,1,3,3,3,1,1,0,3,1,4,3,2,3,4,4,2,4,4,4,3,4,4,3,2,4,4,3,1,3,3,1,3,3,0,4,1,0,2,2,1,4,3,2,3,3,5,4,3,3,5,4,4,3,3,0,4,0,3,2,2,4,4), -(0,2,0,1,0,0,0,0,0,1,2,1,3,0,0,0,0,0,2,0,1,2,1,0,0,1,0,0,0,0,3,0,0,1,0,1,1,3,1,0,0,0,1,1,0,1,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,2,2,0,3,4,0,0,0,1,1,0,0,1,0,0,0,0,0,1,1), -(0,1,0,0,0,1,0,0,0,0,4,0,4,1,4,0,3,0,4,0,3,0,4,0,3,0,3,0,4,1,5,1,4,0,0,3,0,5,0,5,2,0,1,0,0,0,2,1,4,0,1,3,0,0,3,0,0,3,1,1,4,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0), -(1,4,0,5,0,3,0,2,0,3,5,4,4,3,4,3,5,3,4,3,3,0,4,3,3,3,3,3,3,2,4,4,3,1,3,4,4,5,4,4,3,4,4,1,3,5,4,3,3,3,1,2,2,3,3,1,3,1,3,3,3,5,3,3,4,5,0,3,0,3,0,3,4,3,4,4,3,0,3,0,2,4,3), -(0,1,0,4,0,0,0,0,0,1,4,0,4,1,4,2,4,0,3,0,1,0,1,0,0,0,0,0,2,0,3,1,1,1,0,3,0,0,0,1,2,1,0,0,1,1,1,1,0,1,0,0,0,1,0,0,3,0,0,0,0,3,2,0,2,2,0,1,0,0,0,2,3,2,3,3,0,0,0,0,2,1,0), -(0,5,1,5,0,3,0,3,0,5,4,4,5,1,5,3,3,0,4,3,4,3,5,3,4,3,3,2,4,3,4,3,3,0,3,3,1,4,4,3,4,4,4,3,4,5,5,3,2,3,1,1,3,3,1,3,1,1,3,3,2,4,5,3,3,5,0,4,0,3,0,4,4,3,5,3,3,0,3,4,0,4,3), -(0,5,0,5,0,3,0,2,0,4,4,3,5,2,4,3,3,3,4,4,4,3,5,3,5,3,3,1,4,0,4,3,3,0,3,3,0,4,4,4,4,5,4,3,3,5,5,3,2,3,1,2,3,2,0,1,0,0,3,2,2,4,4,3,1,5,0,4,0,3,0,4,3,1,3,2,1,0,3,3,0,3,3), -(0,4,0,5,0,5,0,4,0,4,5,5,5,3,4,3,3,2,5,4,4,3,5,3,5,3,4,0,4,3,4,4,3,2,4,4,3,4,5,4,4,5,5,0,3,5,5,4,1,3,3,2,3,3,1,3,1,0,4,3,1,4,4,3,4,5,0,4,0,2,0,4,3,4,4,3,3,0,4,0,0,5,5), -(0,4,0,4,0,5,0,1,1,3,3,4,4,3,4,1,3,0,5,1,3,0,3,1,3,1,1,0,3,0,3,3,4,0,4,3,0,4,4,4,3,4,4,0,3,5,4,1,0,3,0,0,2,3,0,3,1,0,3,1,0,3,2,1,3,5,0,3,0,1,0,3,2,3,3,4,4,0,2,2,0,4,4), -(2,4,0,5,0,4,0,3,0,4,5,5,4,3,5,3,5,3,5,3,5,2,5,3,4,3,3,4,3,4,5,3,2,1,5,4,3,2,3,4,5,3,4,1,2,5,4,3,0,3,3,0,3,2,0,2,3,0,4,1,0,3,4,3,3,5,0,3,0,1,0,4,5,5,5,4,3,0,4,2,0,3,5), -(0,5,0,4,0,4,0,2,0,5,4,3,4,3,4,3,3,3,4,3,4,2,5,3,5,3,4,1,4,3,4,4,4,0,3,5,0,4,4,4,4,5,3,1,3,4,5,3,3,3,3,3,3,3,0,2,2,0,3,3,2,4,3,3,3,5,3,4,1,3,3,5,3,2,0,0,0,0,4,3,1,3,3), -(0,1,0,3,0,3,0,1,0,1,3,3,3,2,3,3,3,0,3,0,0,0,3,1,3,0,0,0,2,2,2,3,0,0,3,2,0,1,2,4,1,3,3,0,0,3,3,3,0,1,0,0,2,1,0,0,3,0,3,1,0,3,0,0,1,3,0,2,0,1,0,3,3,1,3,3,0,0,1,1,0,3,3), -(0,2,0,3,0,2,1,4,0,2,2,3,1,1,3,1,1,0,2,0,3,1,2,3,1,3,0,0,1,0,4,3,2,3,3,3,1,4,2,3,3,3,3,1,0,3,1,4,0,1,1,0,1,2,0,1,1,0,1,1,0,3,1,3,2,2,0,1,0,0,0,2,3,3,3,1,0,0,0,0,0,2,3), -(0,5,0,4,0,5,0,2,0,4,5,5,3,3,4,3,3,1,5,4,4,2,4,4,4,3,4,2,4,3,5,5,4,3,3,4,3,3,5,5,4,5,5,1,3,4,5,3,1,4,3,1,3,3,0,3,3,1,4,3,1,4,5,3,3,5,0,4,0,3,0,5,3,3,1,4,3,0,4,0,1,5,3), -(0,5,0,5,0,4,0,2,0,4,4,3,4,3,3,3,3,3,5,4,4,4,4,4,4,5,3,3,5,2,4,4,4,3,4,4,3,3,4,4,5,5,3,3,4,3,4,3,3,4,3,3,3,3,1,2,2,1,4,3,3,5,4,4,3,4,0,4,0,3,0,4,4,4,4,4,1,0,4,2,0,2,4), -(0,4,0,4,0,3,0,1,0,3,5,2,3,0,3,0,2,1,4,2,3,3,4,1,4,3,3,2,4,1,3,3,3,0,3,3,0,0,3,3,3,5,3,3,3,3,3,2,0,2,0,0,2,0,0,2,0,0,1,0,0,3,1,2,2,3,0,3,0,2,0,4,4,3,3,4,1,0,3,0,0,2,4), -(0,0,0,4,0,0,0,0,0,0,1,0,1,0,2,0,0,0,0,0,1,0,2,0,1,0,0,0,0,0,3,1,3,0,3,2,0,0,0,1,0,3,2,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,4,0,2,0,0,0,0,0,0,2), -(0,2,1,3,0,2,0,2,0,3,3,3,3,1,3,1,3,3,3,3,3,3,4,2,2,1,2,1,4,0,4,3,1,3,3,3,2,4,3,5,4,3,3,3,3,3,3,3,0,1,3,0,2,0,0,1,0,0,1,0,0,4,2,0,2,3,0,3,3,0,3,3,4,2,3,1,4,0,1,2,0,2,3), -(0,3,0,3,0,1,0,3,0,2,3,3,3,0,3,1,2,0,3,3,2,3,3,2,3,2,3,1,3,0,4,3,2,0,3,3,1,4,3,3,2,3,4,3,1,3,3,1,1,0,1,1,0,1,0,1,0,1,0,0,0,4,1,1,0,3,0,3,1,0,2,3,3,3,3,3,1,0,0,2,0,3,3), -(0,0,0,0,0,0,0,0,0,0,3,0,2,0,3,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,3,1,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,2,3,0,0,0,0,0,0,0,0,3), -(0,2,0,3,1,3,0,3,0,2,3,3,3,1,3,1,3,1,3,1,3,3,3,1,3,0,2,3,1,1,4,3,3,2,3,3,1,2,2,4,1,3,3,0,1,4,2,3,0,1,3,0,3,0,0,1,3,0,2,0,0,3,3,2,1,3,0,3,0,2,0,3,4,4,4,3,1,0,3,0,0,3,3), -(0,2,0,1,0,2,0,0,0,1,3,2,2,1,3,0,1,1,3,0,3,2,3,1,2,0,2,0,1,1,3,3,3,0,3,3,1,1,2,3,2,3,3,1,2,3,2,0,0,1,0,0,0,0,0,0,3,0,1,0,0,2,1,2,1,3,0,3,0,0,0,3,4,4,4,3,2,0,2,0,0,2,4), -(0,0,0,1,0,1,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,2,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,3,1,0,0,0,0,0,0,0,3), -(0,3,0,3,0,2,0,3,0,3,3,3,2,3,2,2,2,0,3,1,3,3,3,2,3,3,0,0,3,0,3,2,2,0,2,3,1,4,3,4,3,3,2,3,1,5,4,4,0,3,1,2,1,3,0,3,1,1,2,0,2,3,1,3,1,3,0,3,0,1,0,3,3,4,4,2,1,0,2,1,0,2,4), -(0,1,0,3,0,1,0,2,0,1,4,2,5,1,4,0,2,0,2,1,3,1,4,0,2,1,0,0,2,1,4,1,1,0,3,3,0,5,1,3,2,3,3,1,0,3,2,3,0,1,0,0,0,0,0,0,1,0,0,0,0,4,0,1,0,3,0,2,0,1,0,3,3,3,4,3,3,0,0,0,0,2,3), -(0,0,0,1,0,0,0,0,0,0,2,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,1,0,0,0,0,0,3), -(0,1,0,3,0,4,0,3,0,2,4,3,1,0,3,2,2,1,3,1,2,2,3,1,1,1,2,1,3,0,1,2,0,1,3,2,1,3,0,5,5,1,0,0,1,3,2,1,0,3,0,0,1,0,0,0,0,0,3,4,0,1,1,1,3,2,0,2,0,1,0,2,3,3,1,2,3,0,1,0,1,0,4), -(0,0,0,1,0,3,0,3,0,2,2,1,0,0,4,0,3,0,3,1,3,0,3,0,3,0,1,0,3,0,3,1,3,0,3,3,0,0,1,2,1,1,1,0,1,2,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,2,2,1,2,0,0,2,0,0,0,0,2,3,3,3,3,0,0,0,0,1,4), -(0,0,0,3,0,3,0,0,0,0,3,1,1,0,3,0,1,0,2,0,1,0,0,0,0,0,0,0,1,0,3,0,2,0,2,3,0,0,2,2,3,1,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,2,3), -(2,4,0,5,0,5,0,4,0,3,4,3,3,3,4,3,3,3,4,3,4,4,5,4,5,5,5,2,3,0,5,5,4,1,5,4,3,1,5,4,3,4,4,3,3,4,3,3,0,3,2,0,2,3,0,3,0,0,3,3,0,5,3,2,3,3,0,3,0,3,0,3,4,5,4,5,3,0,4,3,0,3,4), -(0,3,0,3,0,3,0,3,0,3,3,4,3,2,3,2,3,0,4,3,3,3,3,3,3,3,3,0,3,2,4,3,3,1,3,4,3,4,4,4,3,4,4,3,2,4,4,1,0,2,0,0,1,1,0,2,0,0,3,1,0,5,3,2,1,3,0,3,0,1,2,4,3,2,4,3,3,0,3,2,0,4,4), -(0,3,0,3,0,1,0,0,0,1,4,3,3,2,3,1,3,1,4,2,3,2,4,2,3,4,3,0,2,2,3,3,3,0,3,3,3,0,3,4,1,3,3,0,3,4,3,3,0,1,1,0,1,0,0,0,4,0,3,0,0,3,1,2,1,3,0,4,0,1,0,4,3,3,4,3,3,0,2,0,0,3,3), -(0,3,0,4,0,1,0,3,0,3,4,3,3,0,3,3,3,1,3,1,3,3,4,3,3,3,0,0,3,1,5,3,3,1,3,3,2,5,4,3,3,4,5,3,2,5,3,4,0,1,0,0,0,0,0,2,0,0,1,1,0,4,2,2,1,3,0,3,0,2,0,4,4,3,5,3,2,0,1,1,0,3,4), -(0,5,0,4,0,5,0,2,0,4,4,3,3,2,3,3,3,1,4,3,4,1,5,3,4,3,4,0,4,2,4,3,4,1,5,4,0,4,4,4,4,5,4,1,3,5,4,2,1,4,1,1,3,2,0,3,1,0,3,2,1,4,3,3,3,4,0,4,0,3,0,4,4,4,3,3,3,0,4,2,0,3,4), -(1,4,0,4,0,3,0,1,0,3,3,3,1,1,3,3,2,2,3,3,1,0,3,2,2,1,2,0,3,1,2,1,2,0,3,2,0,2,2,3,3,4,3,0,3,3,1,2,0,1,1,3,1,2,0,0,3,0,1,1,0,3,2,2,3,3,0,3,0,0,0,2,3,3,4,3,3,0,1,0,0,1,4), -(0,4,0,4,0,4,0,0,0,3,4,4,3,1,4,2,3,2,3,3,3,1,4,3,4,0,3,0,4,2,3,3,2,2,5,4,2,1,3,4,3,4,3,1,3,3,4,2,0,2,1,0,3,3,0,0,2,0,3,1,0,4,4,3,4,3,0,4,0,1,0,2,4,4,4,4,4,0,3,2,0,3,3), -(0,0,0,1,0,4,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,3,2,0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2), -(0,2,0,3,0,4,0,4,0,1,3,3,3,0,4,0,2,1,2,1,1,1,2,0,3,1,1,0,1,0,3,1,0,0,3,3,2,0,1,1,0,0,0,0,0,1,0,2,0,2,2,0,3,1,0,0,1,0,1,1,0,1,2,0,3,0,0,0,0,1,0,0,3,3,4,3,1,0,1,0,3,0,2), -(0,0,0,3,0,5,0,0,0,0,1,0,2,0,3,1,0,1,3,0,0,0,2,0,0,0,1,0,0,0,1,1,0,0,4,0,0,0,2,3,0,1,4,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,3), -(0,2,0,5,0,5,0,1,0,2,4,3,3,2,5,1,3,2,3,3,3,0,4,1,2,0,3,0,4,0,2,2,1,1,5,3,0,0,1,4,2,3,2,0,3,3,3,2,0,2,4,1,1,2,0,1,1,0,3,1,0,1,3,1,2,3,0,2,0,0,0,1,3,5,4,4,4,0,3,0,0,1,3), -(0,4,0,5,0,4,0,4,0,4,5,4,3,3,4,3,3,3,4,3,4,4,5,3,4,5,4,2,4,2,3,4,3,1,4,4,1,3,5,4,4,5,5,4,4,5,5,5,2,3,3,1,4,3,1,3,3,0,3,3,1,4,3,4,4,4,0,3,0,4,0,3,3,4,4,5,0,0,4,3,0,4,5), -(0,4,0,4,0,3,0,3,0,3,4,4,4,3,3,2,4,3,4,3,4,3,5,3,4,3,2,1,4,2,4,4,3,1,3,4,2,4,5,5,3,4,5,4,1,5,4,3,0,3,2,2,3,2,1,3,1,0,3,3,3,5,3,3,3,5,4,4,2,3,3,4,3,3,3,2,1,0,3,2,1,4,3), -(0,4,0,5,0,4,0,3,0,3,5,5,3,2,4,3,4,0,5,4,4,1,4,4,4,3,3,3,4,3,5,5,2,3,3,4,1,2,5,5,3,5,5,2,3,5,5,4,0,3,2,0,3,3,1,1,5,1,4,1,0,4,3,2,3,5,0,4,0,3,0,5,4,3,4,3,0,0,4,1,0,4,4), -(1,3,0,4,0,2,0,2,0,2,5,5,3,3,3,3,3,0,4,2,3,4,4,4,3,4,0,0,3,4,5,4,3,3,3,3,2,5,5,4,5,5,5,4,3,5,5,5,1,3,1,0,1,0,0,3,2,0,4,2,0,5,2,3,2,4,1,3,0,3,0,4,5,4,5,4,3,0,4,2,0,5,4), -(0,3,0,4,0,5,0,3,0,3,4,4,3,2,3,2,3,3,3,3,3,2,4,3,3,2,2,0,3,3,3,3,3,1,3,3,3,0,4,4,3,4,4,1,1,4,4,2,0,3,1,0,1,1,0,4,1,0,2,3,1,3,3,1,3,4,0,3,0,1,0,3,1,3,0,0,1,0,2,0,0,4,4), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), -(0,3,0,3,0,2,0,3,0,1,5,4,3,3,3,1,4,2,1,2,3,4,4,2,4,4,5,0,3,1,4,3,4,0,4,3,3,3,2,3,2,5,3,4,3,2,2,3,0,0,3,0,2,1,0,1,2,0,0,0,0,2,1,1,3,1,0,2,0,4,0,3,4,4,4,5,2,0,2,0,0,1,3), -(0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,1,0,0,1,1,0,0,0,4,2,1,1,0,1,0,3,2,0,0,3,1,1,1,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,2,0,0,0,1,4,0,4,2,1,0,0,0,0,0,1), -(0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,0,0,3,1,0,0,0,2,0,2,1,0,0,1,2,1,0,1,1,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,3,1,0,0,0,0,0,1,0,0,2,1,0,0,0,0,0,0,0,0,2), -(0,4,0,4,0,4,0,3,0,4,4,3,4,2,4,3,2,0,4,4,4,3,5,3,5,3,3,2,4,2,4,3,4,3,1,4,0,2,3,4,4,4,3,3,3,4,4,4,3,4,1,3,4,3,2,1,2,1,3,3,3,4,4,3,3,5,0,4,0,3,0,4,3,3,3,2,1,0,3,0,0,3,3), -(0,4,0,3,0,3,0,3,0,3,5,5,3,3,3,3,4,3,4,3,3,3,4,4,4,3,3,3,3,4,3,5,3,3,1,3,2,4,5,5,5,5,4,3,4,5,5,3,2,2,3,3,3,3,2,3,3,1,2,3,2,4,3,3,3,4,0,4,0,2,0,4,3,2,2,1,2,0,3,0,0,4,1), +# fmt: off +jp2_char_context = ( + (0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1), + (2, 4, 0, 4, 0, 3, 0, 4, 0, 3, 4, 4, 4, 2, 4, 3, 3, 4, 3, 2, 3, 3, 4, 2, 3, 3, 3, 2, 4, 1, 4, 3, 3, 1, 5, 4, 3, 4, 3, 4, 3, 5, 3, 0, 3, 5, 4, 2, 0, 3, 1, 0, 3, 3, 0, 3, 3, 0, 1, 1, 0, 4, 3, 0, 3, 3, 0, 4, 0, 2, 0, 3, 5, 5, 5, 5, 4, 0, 4, 1, 0, 3, 4), + (0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2), + (0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 4, 4, 3, 5, 3, 5, 1, 5, 3, 4, 3, 4, 4, 3, 4, 3, 3, 4, 3, 5, 4, 4, 3, 5, 5, 3, 5, 5, 5, 3, 5, 5, 3, 4, 5, 5, 3, 1, 3, 2, 0, 3, 4, 0, 4, 2, 0, 4, 2, 1, 5, 3, 2, 3, 5, 0, 4, 0, 2, 0, 5, 4, 4, 5, 4, 5, 0, 4, 0, 0, 4, 4), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + (0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 5, 4, 3, 3, 3, 3, 4, 3, 5, 4, 4, 3, 5, 4, 4, 3, 4, 3, 4, 4, 4, 4, 5, 3, 4, 4, 3, 4, 5, 5, 4, 5, 5, 1, 4, 5, 4, 3, 0, 3, 3, 1, 3, 3, 0, 4, 4, 0, 3, 3, 1, 5, 3, 3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 0, 4, 1, 1, 3, 4), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + (0, 4, 0, 3, 0, 3, 0, 4, 0, 3, 4, 4, 3, 2, 2, 1, 2, 1, 3, 1, 3, 3, 3, 3, 3, 4, 3, 1, 3, 3, 5, 3, 3, 0, 4, 3, 0, 5, 4, 3, 3, 5, 4, 4, 3, 4, 4, 5, 0, 1, 2, 0, 1, 2, 0, 2, 2, 0, 1, 0, 0, 5, 2, 2, 1, 4, 0, 3, 0, 1, 0, 4, 4, 3, 5, 4, 3, 0, 2, 1, 0, 4, 3), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + (0, 3, 0, 5, 0, 4, 0, 2, 1, 4, 4, 2, 4, 1, 4, 2, 4, 2, 4, 3, 3, 3, 4, 3, 3, 3, 3, 1, 4, 2, 3, 3, 3, 1, 4, 4, 1, 1, 1, 4, 3, 3, 2, 0, 2, 4, 3, 2, 0, 3, 3, 0, 3, 1, 1, 0, 0, 0, 3, 3, 0, 4, 2, 2, 3, 4, 0, 4, 0, 3, 0, 4, 4, 5, 3, 4, 4, 0, 3, 0, 0, 1, 4), + (1, 4, 0, 4, 0, 4, 0, 4, 0, 3, 5, 4, 4, 3, 4, 3, 5, 4, 3, 3, 4, 3, 5, 4, 4, 4, 4, 3, 4, 2, 4, 3, 3, 1, 5, 4, 3, 2, 4, 5, 4, 5, 5, 4, 4, 5, 4, 4, 0, 3, 2, 2, 3, 3, 0, 4, 3, 1, 3, 2, 1, 4, 3, 3, 4, 5, 0, 3, 0, 2, 0, 4, 5, 5, 4, 5, 4, 0, 4, 0, 0, 5, 4), + (0, 5, 0, 5, 0, 4, 0, 3, 0, 4, 4, 3, 4, 3, 3, 3, 4, 0, 4, 4, 4, 3, 4, 3, 4, 3, 3, 1, 4, 2, 4, 3, 4, 0, 5, 4, 1, 4, 5, 4, 4, 5, 3, 2, 4, 3, 4, 3, 2, 4, 1, 3, 3, 3, 2, 3, 2, 0, 4, 3, 3, 4, 3, 3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 4, 3, 0, 4, 1, 0, 1, 3), + (0, 3, 1, 4, 0, 3, 0, 2, 0, 3, 4, 4, 3, 1, 4, 2, 3, 3, 4, 3, 4, 3, 4, 3, 4, 4, 3, 2, 3, 1, 5, 4, 4, 1, 4, 4, 3, 5, 4, 4, 3, 5, 5, 4, 3, 4, 4, 3, 1, 2, 3, 1, 2, 2, 0, 3, 2, 0, 3, 1, 0, 5, 3, 3, 3, 4, 3, 3, 3, 3, 4, 4, 4, 4, 5, 4, 2, 0, 3, 3, 2, 4, 3), + (0, 2, 0, 3, 0, 1, 0, 1, 0, 0, 3, 2, 0, 0, 2, 0, 1, 0, 2, 1, 3, 3, 3, 1, 2, 3, 1, 0, 1, 0, 4, 2, 1, 1, 3, 3, 0, 4, 3, 3, 1, 4, 3, 3, 0, 3, 3, 2, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0, 0, 4, 1, 0, 2, 3, 2, 2, 2, 1, 3, 3, 3, 4, 4, 3, 2, 0, 3, 1, 0, 3, 3), + (0, 4, 0, 4, 0, 3, 0, 3, 0, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 3, 4, 2, 4, 3, 4, 3, 3, 2, 4, 3, 4, 5, 4, 1, 4, 5, 3, 5, 4, 5, 3, 5, 4, 0, 3, 5, 5, 3, 1, 3, 3, 2, 2, 3, 0, 3, 4, 1, 3, 3, 2, 4, 3, 3, 3, 4, 0, 4, 0, 3, 0, 4, 5, 4, 4, 5, 3, 0, 4, 1, 0, 3, 4), + (0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 2, 2, 1, 0, 1, 0, 0, 0, 3, 0, 3, 0, 3, 0, 1, 3, 1, 0, 3, 1, 3, 3, 3, 1, 3, 3, 3, 0, 1, 3, 1, 3, 4, 0, 0, 3, 1, 1, 0, 3, 2, 0, 0, 0, 0, 1, 3, 0, 1, 0, 0, 3, 3, 2, 0, 3, 0, 0, 0, 0, 0, 3, 4, 3, 4, 3, 3, 0, 3, 0, 0, 2, 3), + (2, 3, 0, 3, 0, 2, 0, 1, 0, 3, 3, 4, 3, 1, 3, 1, 1, 1, 3, 1, 4, 3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 4, 3, 1, 4, 3, 2, 5, 5, 4, 4, 4, 4, 3, 3, 4, 4, 4, 0, 2, 1, 1, 3, 2, 0, 1, 2, 0, 0, 1, 0, 4, 1, 3, 3, 3, 0, 3, 0, 1, 0, 4, 4, 4, 5, 5, 3, 0, 2, 0, 0, 4, 4), + (0, 2, 0, 1, 0, 3, 1, 3, 0, 2, 3, 3, 3, 0, 3, 1, 0, 0, 3, 0, 3, 2, 3, 1, 3, 2, 1, 1, 0, 0, 4, 2, 1, 0, 2, 3, 1, 4, 3, 2, 0, 4, 4, 3, 1, 3, 1, 3, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 1, 1, 1, 2, 0, 3, 0, 0, 0, 3, 4, 2, 4, 3, 2, 0, 1, 0, 0, 3, 3), + (0, 1, 0, 4, 0, 5, 0, 4, 0, 2, 4, 4, 2, 3, 3, 2, 3, 3, 5, 3, 3, 3, 4, 3, 4, 2, 3, 0, 4, 3, 3, 3, 4, 1, 4, 3, 2, 1, 5, 5, 3, 4, 5, 1, 3, 5, 4, 2, 0, 3, 3, 0, 1, 3, 0, 4, 2, 0, 1, 3, 1, 4, 3, 3, 3, 3, 0, 3, 0, 1, 0, 3, 4, 4, 4, 5, 5, 0, 3, 0, 1, 4, 5), + (0, 2, 0, 3, 0, 3, 0, 0, 0, 2, 3, 1, 3, 0, 4, 0, 1, 1, 3, 0, 3, 4, 3, 2, 3, 1, 0, 3, 3, 2, 3, 1, 3, 0, 2, 3, 0, 2, 1, 4, 1, 2, 2, 0, 0, 3, 3, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 0, 2, 2, 0, 3, 2, 1, 3, 3, 0, 2, 0, 2, 0, 0, 3, 3, 1, 2, 4, 0, 3, 0, 2, 2, 3), + (2, 4, 0, 5, 0, 4, 0, 4, 0, 2, 4, 4, 4, 3, 4, 3, 3, 3, 1, 2, 4, 3, 4, 3, 4, 4, 5, 0, 3, 3, 3, 3, 2, 0, 4, 3, 1, 4, 3, 4, 1, 4, 4, 3, 3, 4, 4, 3, 1, 2, 3, 0, 4, 2, 0, 4, 1, 0, 3, 3, 0, 4, 3, 3, 3, 4, 0, 4, 0, 2, 0, 3, 5, 3, 4, 5, 2, 0, 3, 0, 0, 4, 5), + (0, 3, 0, 4, 0, 1, 0, 1, 0, 1, 3, 2, 2, 1, 3, 0, 3, 0, 2, 0, 2, 0, 3, 0, 2, 0, 0, 0, 1, 0, 1, 1, 0, 0, 3, 1, 0, 0, 0, 4, 0, 3, 1, 0, 2, 1, 3, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4, 2, 2, 3, 1, 0, 3, 0, 0, 0, 1, 4, 4, 4, 3, 0, 0, 4, 0, 0, 1, 4), + (1, 4, 1, 5, 0, 3, 0, 3, 0, 4, 5, 4, 4, 3, 5, 3, 3, 4, 4, 3, 4, 1, 3, 3, 3, 3, 2, 1, 4, 1, 5, 4, 3, 1, 4, 4, 3, 5, 4, 4, 3, 5, 4, 3, 3, 4, 4, 4, 0, 3, 3, 1, 2, 3, 0, 3, 1, 0, 3, 3, 0, 5, 4, 4, 4, 4, 4, 4, 3, 3, 5, 4, 4, 3, 3, 5, 4, 0, 3, 2, 0, 4, 4), + (0, 2, 0, 3, 0, 1, 0, 0, 0, 1, 3, 3, 3, 2, 4, 1, 3, 0, 3, 1, 3, 0, 2, 2, 1, 1, 0, 0, 2, 0, 4, 3, 1, 0, 4, 3, 0, 4, 4, 4, 1, 4, 3, 1, 1, 3, 3, 1, 0, 2, 0, 0, 1, 3, 0, 0, 0, 0, 2, 0, 0, 4, 3, 2, 4, 3, 5, 4, 3, 3, 3, 4, 3, 3, 4, 3, 3, 0, 2, 1, 0, 3, 3), + (0, 2, 0, 4, 0, 3, 0, 2, 0, 2, 5, 5, 3, 4, 4, 4, 4, 1, 4, 3, 3, 0, 4, 3, 4, 3, 1, 3, 3, 2, 4, 3, 0, 3, 4, 3, 0, 3, 4, 4, 2, 4, 4, 0, 4, 5, 3, 3, 2, 2, 1, 1, 1, 2, 0, 1, 5, 0, 3, 3, 2, 4, 3, 3, 3, 4, 0, 3, 0, 2, 0, 4, 4, 3, 5, 5, 0, 0, 3, 0, 2, 3, 3), + (0, 3, 0, 4, 0, 3, 0, 1, 0, 3, 4, 3, 3, 1, 3, 3, 3, 0, 3, 1, 3, 0, 4, 3, 3, 1, 1, 0, 3, 0, 3, 3, 0, 0, 4, 4, 0, 1, 5, 4, 3, 3, 5, 0, 3, 3, 4, 3, 0, 2, 0, 1, 1, 1, 0, 1, 3, 0, 1, 2, 1, 3, 3, 2, 3, 3, 0, 3, 0, 1, 0, 1, 3, 3, 4, 4, 1, 0, 1, 2, 2, 1, 3), + (0, 1, 0, 4, 0, 4, 0, 3, 0, 1, 3, 3, 3, 2, 3, 1, 1, 0, 3, 0, 3, 3, 4, 3, 2, 4, 2, 0, 1, 0, 4, 3, 2, 0, 4, 3, 0, 5, 3, 3, 2, 4, 4, 4, 3, 3, 3, 4, 0, 1, 3, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 4, 2, 3, 3, 3, 0, 3, 0, 0, 0, 4, 4, 4, 5, 3, 2, 0, 3, 3, 0, 3, 5), + (0, 2, 0, 3, 0, 0, 0, 3, 0, 1, 3, 0, 2, 0, 0, 0, 1, 0, 3, 1, 1, 3, 3, 0, 0, 3, 0, 0, 3, 0, 2, 3, 1, 0, 3, 1, 0, 3, 3, 2, 0, 4, 2, 2, 0, 2, 0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 2, 0, 1, 0, 1, 0, 0, 0, 1, 3, 1, 2, 0, 0, 0, 1, 0, 0, 1, 4), + (0, 3, 0, 3, 0, 5, 0, 1, 0, 2, 4, 3, 1, 3, 3, 2, 1, 1, 5, 2, 1, 0, 5, 1, 2, 0, 0, 0, 3, 3, 2, 2, 3, 2, 4, 3, 0, 0, 3, 3, 1, 3, 3, 0, 2, 5, 3, 4, 0, 3, 3, 0, 1, 2, 0, 2, 2, 0, 3, 2, 0, 2, 2, 3, 3, 3, 0, 2, 0, 1, 0, 3, 4, 4, 2, 5, 4, 0, 3, 0, 0, 3, 5), + (0, 3, 0, 3, 0, 3, 0, 1, 0, 3, 3, 3, 3, 0, 3, 0, 2, 0, 2, 1, 1, 0, 2, 0, 1, 0, 0, 0, 2, 1, 0, 0, 1, 0, 3, 2, 0, 0, 3, 3, 1, 2, 3, 1, 0, 3, 3, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 3, 1, 2, 3, 0, 3, 0, 1, 0, 3, 2, 1, 0, 4, 3, 0, 1, 1, 0, 3, 3), + (0, 4, 0, 5, 0, 3, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 4, 3, 5, 3, 3, 2, 5, 3, 4, 4, 4, 3, 4, 3, 4, 5, 5, 3, 4, 4, 3, 4, 4, 5, 4, 4, 4, 3, 4, 5, 5, 4, 2, 3, 4, 2, 3, 4, 0, 3, 3, 1, 4, 3, 2, 4, 3, 3, 5, 5, 0, 3, 0, 3, 0, 5, 5, 5, 5, 4, 4, 0, 4, 0, 1, 4, 4), + (0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 5, 4, 4, 2, 3, 2, 5, 1, 3, 2, 5, 1, 4, 2, 3, 2, 3, 3, 4, 3, 3, 3, 3, 2, 5, 4, 1, 3, 3, 5, 3, 4, 4, 0, 4, 4, 3, 1, 1, 3, 1, 0, 2, 3, 0, 2, 3, 0, 3, 0, 0, 4, 3, 1, 3, 4, 0, 3, 0, 2, 0, 4, 4, 4, 3, 4, 5, 0, 4, 0, 0, 3, 4), + (0, 3, 0, 3, 0, 3, 1, 2, 0, 3, 4, 4, 3, 3, 3, 0, 2, 2, 4, 3, 3, 1, 3, 3, 3, 1, 1, 0, 3, 1, 4, 3, 2, 3, 4, 4, 2, 4, 4, 4, 3, 4, 4, 3, 2, 4, 4, 3, 1, 3, 3, 1, 3, 3, 0, 4, 1, 0, 2, 2, 1, 4, 3, 2, 3, 3, 5, 4, 3, 3, 5, 4, 4, 3, 3, 0, 4, 0, 3, 2, 2, 4, 4), + (0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 2, 1, 3, 0, 0, 0, 0, 0, 2, 0, 1, 2, 1, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0, 1, 0, 1, 1, 3, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 3, 4, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1), + (0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 4, 1, 4, 0, 3, 0, 4, 0, 3, 0, 4, 0, 3, 0, 3, 0, 4, 1, 5, 1, 4, 0, 0, 3, 0, 5, 0, 5, 2, 0, 1, 0, 0, 0, 2, 1, 4, 0, 1, 3, 0, 0, 3, 0, 0, 3, 1, 1, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0), + (1, 4, 0, 5, 0, 3, 0, 2, 0, 3, 5, 4, 4, 3, 4, 3, 5, 3, 4, 3, 3, 0, 4, 3, 3, 3, 3, 3, 3, 2, 4, 4, 3, 1, 3, 4, 4, 5, 4, 4, 3, 4, 4, 1, 3, 5, 4, 3, 3, 3, 1, 2, 2, 3, 3, 1, 3, 1, 3, 3, 3, 5, 3, 3, 4, 5, 0, 3, 0, 3, 0, 3, 4, 3, 4, 4, 3, 0, 3, 0, 2, 4, 3), + (0, 1, 0, 4, 0, 0, 0, 0, 0, 1, 4, 0, 4, 1, 4, 2, 4, 0, 3, 0, 1, 0, 1, 0, 0, 0, 0, 0, 2, 0, 3, 1, 1, 1, 0, 3, 0, 0, 0, 1, 2, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0, 3, 2, 0, 2, 2, 0, 1, 0, 0, 0, 2, 3, 2, 3, 3, 0, 0, 0, 0, 2, 1, 0), + (0, 5, 1, 5, 0, 3, 0, 3, 0, 5, 4, 4, 5, 1, 5, 3, 3, 0, 4, 3, 4, 3, 5, 3, 4, 3, 3, 2, 4, 3, 4, 3, 3, 0, 3, 3, 1, 4, 4, 3, 4, 4, 4, 3, 4, 5, 5, 3, 2, 3, 1, 1, 3, 3, 1, 3, 1, 1, 3, 3, 2, 4, 5, 3, 3, 5, 0, 4, 0, 3, 0, 4, 4, 3, 5, 3, 3, 0, 3, 4, 0, 4, 3), + (0, 5, 0, 5, 0, 3, 0, 2, 0, 4, 4, 3, 5, 2, 4, 3, 3, 3, 4, 4, 4, 3, 5, 3, 5, 3, 3, 1, 4, 0, 4, 3, 3, 0, 3, 3, 0, 4, 4, 4, 4, 5, 4, 3, 3, 5, 5, 3, 2, 3, 1, 2, 3, 2, 0, 1, 0, 0, 3, 2, 2, 4, 4, 3, 1, 5, 0, 4, 0, 3, 0, 4, 3, 1, 3, 2, 1, 0, 3, 3, 0, 3, 3), + (0, 4, 0, 5, 0, 5, 0, 4, 0, 4, 5, 5, 5, 3, 4, 3, 3, 2, 5, 4, 4, 3, 5, 3, 5, 3, 4, 0, 4, 3, 4, 4, 3, 2, 4, 4, 3, 4, 5, 4, 4, 5, 5, 0, 3, 5, 5, 4, 1, 3, 3, 2, 3, 3, 1, 3, 1, 0, 4, 3, 1, 4, 4, 3, 4, 5, 0, 4, 0, 2, 0, 4, 3, 4, 4, 3, 3, 0, 4, 0, 0, 5, 5), + (0, 4, 0, 4, 0, 5, 0, 1, 1, 3, 3, 4, 4, 3, 4, 1, 3, 0, 5, 1, 3, 0, 3, 1, 3, 1, 1, 0, 3, 0, 3, 3, 4, 0, 4, 3, 0, 4, 4, 4, 3, 4, 4, 0, 3, 5, 4, 1, 0, 3, 0, 0, 2, 3, 0, 3, 1, 0, 3, 1, 0, 3, 2, 1, 3, 5, 0, 3, 0, 1, 0, 3, 2, 3, 3, 4, 4, 0, 2, 2, 0, 4, 4), + (2, 4, 0, 5, 0, 4, 0, 3, 0, 4, 5, 5, 4, 3, 5, 3, 5, 3, 5, 3, 5, 2, 5, 3, 4, 3, 3, 4, 3, 4, 5, 3, 2, 1, 5, 4, 3, 2, 3, 4, 5, 3, 4, 1, 2, 5, 4, 3, 0, 3, 3, 0, 3, 2, 0, 2, 3, 0, 4, 1, 0, 3, 4, 3, 3, 5, 0, 3, 0, 1, 0, 4, 5, 5, 5, 4, 3, 0, 4, 2, 0, 3, 5), + (0, 5, 0, 4, 0, 4, 0, 2, 0, 5, 4, 3, 4, 3, 4, 3, 3, 3, 4, 3, 4, 2, 5, 3, 5, 3, 4, 1, 4, 3, 4, 4, 4, 0, 3, 5, 0, 4, 4, 4, 4, 5, 3, 1, 3, 4, 5, 3, 3, 3, 3, 3, 3, 3, 0, 2, 2, 0, 3, 3, 2, 4, 3, 3, 3, 5, 3, 4, 1, 3, 3, 5, 3, 2, 0, 0, 0, 0, 4, 3, 1, 3, 3), + (0, 1, 0, 3, 0, 3, 0, 1, 0, 1, 3, 3, 3, 2, 3, 3, 3, 0, 3, 0, 0, 0, 3, 1, 3, 0, 0, 0, 2, 2, 2, 3, 0, 0, 3, 2, 0, 1, 2, 4, 1, 3, 3, 0, 0, 3, 3, 3, 0, 1, 0, 0, 2, 1, 0, 0, 3, 0, 3, 1, 0, 3, 0, 0, 1, 3, 0, 2, 0, 1, 0, 3, 3, 1, 3, 3, 0, 0, 1, 1, 0, 3, 3), + (0, 2, 0, 3, 0, 2, 1, 4, 0, 2, 2, 3, 1, 1, 3, 1, 1, 0, 2, 0, 3, 1, 2, 3, 1, 3, 0, 0, 1, 0, 4, 3, 2, 3, 3, 3, 1, 4, 2, 3, 3, 3, 3, 1, 0, 3, 1, 4, 0, 1, 1, 0, 1, 2, 0, 1, 1, 0, 1, 1, 0, 3, 1, 3, 2, 2, 0, 1, 0, 0, 0, 2, 3, 3, 3, 1, 0, 0, 0, 0, 0, 2, 3), + (0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 5, 5, 3, 3, 4, 3, 3, 1, 5, 4, 4, 2, 4, 4, 4, 3, 4, 2, 4, 3, 5, 5, 4, 3, 3, 4, 3, 3, 5, 5, 4, 5, 5, 1, 3, 4, 5, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 1, 4, 5, 3, 3, 5, 0, 4, 0, 3, 0, 5, 3, 3, 1, 4, 3, 0, 4, 0, 1, 5, 3), + (0, 5, 0, 5, 0, 4, 0, 2, 0, 4, 4, 3, 4, 3, 3, 3, 3, 3, 5, 4, 4, 4, 4, 4, 4, 5, 3, 3, 5, 2, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4, 5, 5, 3, 3, 4, 3, 4, 3, 3, 4, 3, 3, 3, 3, 1, 2, 2, 1, 4, 3, 3, 5, 4, 4, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 4, 4, 1, 0, 4, 2, 0, 2, 4), + (0, 4, 0, 4, 0, 3, 0, 1, 0, 3, 5, 2, 3, 0, 3, 0, 2, 1, 4, 2, 3, 3, 4, 1, 4, 3, 3, 2, 4, 1, 3, 3, 3, 0, 3, 3, 0, 0, 3, 3, 3, 5, 3, 3, 3, 3, 3, 2, 0, 2, 0, 0, 2, 0, 0, 2, 0, 0, 1, 0, 0, 3, 1, 2, 2, 3, 0, 3, 0, 2, 0, 4, 4, 3, 3, 4, 1, 0, 3, 0, 0, 2, 4), + (0, 0, 0, 4, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0, 0, 0, 0, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0, 0, 3, 1, 3, 0, 3, 2, 0, 0, 0, 1, 0, 3, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 4, 0, 2, 0, 0, 0, 0, 0, 0, 2), + (0, 2, 1, 3, 0, 2, 0, 2, 0, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 3, 3, 4, 2, 2, 1, 2, 1, 4, 0, 4, 3, 1, 3, 3, 3, 2, 4, 3, 5, 4, 3, 3, 3, 3, 3, 3, 3, 0, 1, 3, 0, 2, 0, 0, 1, 0, 0, 1, 0, 0, 4, 2, 0, 2, 3, 0, 3, 3, 0, 3, 3, 4, 2, 3, 1, 4, 0, 1, 2, 0, 2, 3), + (0, 3, 0, 3, 0, 1, 0, 3, 0, 2, 3, 3, 3, 0, 3, 1, 2, 0, 3, 3, 2, 3, 3, 2, 3, 2, 3, 1, 3, 0, 4, 3, 2, 0, 3, 3, 1, 4, 3, 3, 2, 3, 4, 3, 1, 3, 3, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 4, 1, 1, 0, 3, 0, 3, 1, 0, 2, 3, 3, 3, 3, 3, 1, 0, 0, 2, 0, 3, 3), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 3, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 2, 0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 3), + (0, 2, 0, 3, 1, 3, 0, 3, 0, 2, 3, 3, 3, 1, 3, 1, 3, 1, 3, 1, 3, 3, 3, 1, 3, 0, 2, 3, 1, 1, 4, 3, 3, 2, 3, 3, 1, 2, 2, 4, 1, 3, 3, 0, 1, 4, 2, 3, 0, 1, 3, 0, 3, 0, 0, 1, 3, 0, 2, 0, 0, 3, 3, 2, 1, 3, 0, 3, 0, 2, 0, 3, 4, 4, 4, 3, 1, 0, 3, 0, 0, 3, 3), + (0, 2, 0, 1, 0, 2, 0, 0, 0, 1, 3, 2, 2, 1, 3, 0, 1, 1, 3, 0, 3, 2, 3, 1, 2, 0, 2, 0, 1, 1, 3, 3, 3, 0, 3, 3, 1, 1, 2, 3, 2, 3, 3, 1, 2, 3, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 2, 1, 2, 1, 3, 0, 3, 0, 0, 0, 3, 4, 4, 4, 3, 2, 0, 2, 0, 0, 2, 4), + (0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 3), + (0, 3, 0, 3, 0, 2, 0, 3, 0, 3, 3, 3, 2, 3, 2, 2, 2, 0, 3, 1, 3, 3, 3, 2, 3, 3, 0, 0, 3, 0, 3, 2, 2, 0, 2, 3, 1, 4, 3, 4, 3, 3, 2, 3, 1, 5, 4, 4, 0, 3, 1, 2, 1, 3, 0, 3, 1, 1, 2, 0, 2, 3, 1, 3, 1, 3, 0, 3, 0, 1, 0, 3, 3, 4, 4, 2, 1, 0, 2, 1, 0, 2, 4), + (0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 4, 2, 5, 1, 4, 0, 2, 0, 2, 1, 3, 1, 4, 0, 2, 1, 0, 0, 2, 1, 4, 1, 1, 0, 3, 3, 0, 5, 1, 3, 2, 3, 3, 1, 0, 3, 2, 3, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 4, 0, 1, 0, 3, 0, 2, 0, 1, 0, 3, 3, 3, 4, 3, 3, 0, 0, 0, 0, 2, 3), + (0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 1, 0, 0, 1, 0, 0, 0, 0, 0, 3), + (0, 1, 0, 3, 0, 4, 0, 3, 0, 2, 4, 3, 1, 0, 3, 2, 2, 1, 3, 1, 2, 2, 3, 1, 1, 1, 2, 1, 3, 0, 1, 2, 0, 1, 3, 2, 1, 3, 0, 5, 5, 1, 0, 0, 1, 3, 2, 1, 0, 3, 0, 0, 1, 0, 0, 0, 0, 0, 3, 4, 0, 1, 1, 1, 3, 2, 0, 2, 0, 1, 0, 2, 3, 3, 1, 2, 3, 0, 1, 0, 1, 0, 4), + (0, 0, 0, 1, 0, 3, 0, 3, 0, 2, 2, 1, 0, 0, 4, 0, 3, 0, 3, 1, 3, 0, 3, 0, 3, 0, 1, 0, 3, 0, 3, 1, 3, 0, 3, 3, 0, 0, 1, 2, 1, 1, 1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 1, 2, 0, 0, 2, 0, 0, 0, 0, 2, 3, 3, 3, 3, 0, 0, 0, 0, 1, 4), + (0, 0, 0, 3, 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 0, 2, 0, 2, 3, 0, 0, 2, 2, 3, 1, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 2, 0, 0, 0, 0, 2, 3), + (2, 4, 0, 5, 0, 5, 0, 4, 0, 3, 4, 3, 3, 3, 4, 3, 3, 3, 4, 3, 4, 4, 5, 4, 5, 5, 5, 2, 3, 0, 5, 5, 4, 1, 5, 4, 3, 1, 5, 4, 3, 4, 4, 3, 3, 4, 3, 3, 0, 3, 2, 0, 2, 3, 0, 3, 0, 0, 3, 3, 0, 5, 3, 2, 3, 3, 0, 3, 0, 3, 0, 3, 4, 5, 4, 5, 3, 0, 4, 3, 0, 3, 4), + (0, 3, 0, 3, 0, 3, 0, 3, 0, 3, 3, 4, 3, 2, 3, 2, 3, 0, 4, 3, 3, 3, 3, 3, 3, 3, 3, 0, 3, 2, 4, 3, 3, 1, 3, 4, 3, 4, 4, 4, 3, 4, 4, 3, 2, 4, 4, 1, 0, 2, 0, 0, 1, 1, 0, 2, 0, 0, 3, 1, 0, 5, 3, 2, 1, 3, 0, 3, 0, 1, 2, 4, 3, 2, 4, 3, 3, 0, 3, 2, 0, 4, 4), + (0, 3, 0, 3, 0, 1, 0, 0, 0, 1, 4, 3, 3, 2, 3, 1, 3, 1, 4, 2, 3, 2, 4, 2, 3, 4, 3, 0, 2, 2, 3, 3, 3, 0, 3, 3, 3, 0, 3, 4, 1, 3, 3, 0, 3, 4, 3, 3, 0, 1, 1, 0, 1, 0, 0, 0, 4, 0, 3, 0, 0, 3, 1, 2, 1, 3, 0, 4, 0, 1, 0, 4, 3, 3, 4, 3, 3, 0, 2, 0, 0, 3, 3), + (0, 3, 0, 4, 0, 1, 0, 3, 0, 3, 4, 3, 3, 0, 3, 3, 3, 1, 3, 1, 3, 3, 4, 3, 3, 3, 0, 0, 3, 1, 5, 3, 3, 1, 3, 3, 2, 5, 4, 3, 3, 4, 5, 3, 2, 5, 3, 4, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 1, 1, 0, 4, 2, 2, 1, 3, 0, 3, 0, 2, 0, 4, 4, 3, 5, 3, 2, 0, 1, 1, 0, 3, 4), + (0, 5, 0, 4, 0, 5, 0, 2, 0, 4, 4, 3, 3, 2, 3, 3, 3, 1, 4, 3, 4, 1, 5, 3, 4, 3, 4, 0, 4, 2, 4, 3, 4, 1, 5, 4, 0, 4, 4, 4, 4, 5, 4, 1, 3, 5, 4, 2, 1, 4, 1, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 4, 3, 3, 3, 4, 0, 4, 0, 3, 0, 4, 4, 4, 3, 3, 3, 0, 4, 2, 0, 3, 4), + (1, 4, 0, 4, 0, 3, 0, 1, 0, 3, 3, 3, 1, 1, 3, 3, 2, 2, 3, 3, 1, 0, 3, 2, 2, 1, 2, 0, 3, 1, 2, 1, 2, 0, 3, 2, 0, 2, 2, 3, 3, 4, 3, 0, 3, 3, 1, 2, 0, 1, 1, 3, 1, 2, 0, 0, 3, 0, 1, 1, 0, 3, 2, 2, 3, 3, 0, 3, 0, 0, 0, 2, 3, 3, 4, 3, 3, 0, 1, 0, 0, 1, 4), + (0, 4, 0, 4, 0, 4, 0, 0, 0, 3, 4, 4, 3, 1, 4, 2, 3, 2, 3, 3, 3, 1, 4, 3, 4, 0, 3, 0, 4, 2, 3, 3, 2, 2, 5, 4, 2, 1, 3, 4, 3, 4, 3, 1, 3, 3, 4, 2, 0, 2, 1, 0, 3, 3, 0, 0, 2, 0, 3, 1, 0, 4, 4, 3, 4, 3, 0, 4, 0, 1, 0, 2, 4, 4, 4, 4, 4, 0, 3, 2, 0, 3, 3), + (0, 0, 0, 1, 0, 4, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 3, 2, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 2), + (0, 2, 0, 3, 0, 4, 0, 4, 0, 1, 3, 3, 3, 0, 4, 0, 2, 1, 2, 1, 1, 1, 2, 0, 3, 1, 1, 0, 1, 0, 3, 1, 0, 0, 3, 3, 2, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 2, 0, 2, 2, 0, 3, 1, 0, 0, 1, 0, 1, 1, 0, 1, 2, 0, 3, 0, 0, 0, 0, 1, 0, 0, 3, 3, 4, 3, 1, 0, 1, 0, 3, 0, 2), + (0, 0, 0, 3, 0, 5, 0, 0, 0, 0, 1, 0, 2, 0, 3, 1, 0, 1, 3, 0, 0, 0, 2, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 4, 0, 0, 0, 2, 3, 0, 1, 4, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 3, 0, 0, 0, 0, 0, 3), + (0, 2, 0, 5, 0, 5, 0, 1, 0, 2, 4, 3, 3, 2, 5, 1, 3, 2, 3, 3, 3, 0, 4, 1, 2, 0, 3, 0, 4, 0, 2, 2, 1, 1, 5, 3, 0, 0, 1, 4, 2, 3, 2, 0, 3, 3, 3, 2, 0, 2, 4, 1, 1, 2, 0, 1, 1, 0, 3, 1, 0, 1, 3, 1, 2, 3, 0, 2, 0, 0, 0, 1, 3, 5, 4, 4, 4, 0, 3, 0, 0, 1, 3), + (0, 4, 0, 5, 0, 4, 0, 4, 0, 4, 5, 4, 3, 3, 4, 3, 3, 3, 4, 3, 4, 4, 5, 3, 4, 5, 4, 2, 4, 2, 3, 4, 3, 1, 4, 4, 1, 3, 5, 4, 4, 5, 5, 4, 4, 5, 5, 5, 2, 3, 3, 1, 4, 3, 1, 3, 3, 0, 3, 3, 1, 4, 3, 4, 4, 4, 0, 3, 0, 4, 0, 3, 3, 4, 4, 5, 0, 0, 4, 3, 0, 4, 5), + (0, 4, 0, 4, 0, 3, 0, 3, 0, 3, 4, 4, 4, 3, 3, 2, 4, 3, 4, 3, 4, 3, 5, 3, 4, 3, 2, 1, 4, 2, 4, 4, 3, 1, 3, 4, 2, 4, 5, 5, 3, 4, 5, 4, 1, 5, 4, 3, 0, 3, 2, 2, 3, 2, 1, 3, 1, 0, 3, 3, 3, 5, 3, 3, 3, 5, 4, 4, 2, 3, 3, 4, 3, 3, 3, 2, 1, 0, 3, 2, 1, 4, 3), + (0, 4, 0, 5, 0, 4, 0, 3, 0, 3, 5, 5, 3, 2, 4, 3, 4, 0, 5, 4, 4, 1, 4, 4, 4, 3, 3, 3, 4, 3, 5, 5, 2, 3, 3, 4, 1, 2, 5, 5, 3, 5, 5, 2, 3, 5, 5, 4, 0, 3, 2, 0, 3, 3, 1, 1, 5, 1, 4, 1, 0, 4, 3, 2, 3, 5, 0, 4, 0, 3, 0, 5, 4, 3, 4, 3, 0, 0, 4, 1, 0, 4, 4), + (1, 3, 0, 4, 0, 2, 0, 2, 0, 2, 5, 5, 3, 3, 3, 3, 3, 0, 4, 2, 3, 4, 4, 4, 3, 4, 0, 0, 3, 4, 5, 4, 3, 3, 3, 3, 2, 5, 5, 4, 5, 5, 5, 4, 3, 5, 5, 5, 1, 3, 1, 0, 1, 0, 0, 3, 2, 0, 4, 2, 0, 5, 2, 3, 2, 4, 1, 3, 0, 3, 0, 4, 5, 4, 5, 4, 3, 0, 4, 2, 0, 5, 4), + (0, 3, 0, 4, 0, 5, 0, 3, 0, 3, 4, 4, 3, 2, 3, 2, 3, 3, 3, 3, 3, 2, 4, 3, 3, 2, 2, 0, 3, 3, 3, 3, 3, 1, 3, 3, 3, 0, 4, 4, 3, 4, 4, 1, 1, 4, 4, 2, 0, 3, 1, 0, 1, 1, 0, 4, 1, 0, 2, 3, 1, 3, 3, 1, 3, 4, 0, 3, 0, 1, 0, 3, 1, 3, 0, 0, 1, 0, 2, 0, 0, 4, 4), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + (0, 3, 0, 3, 0, 2, 0, 3, 0, 1, 5, 4, 3, 3, 3, 1, 4, 2, 1, 2, 3, 4, 4, 2, 4, 4, 5, 0, 3, 1, 4, 3, 4, 0, 4, 3, 3, 3, 2, 3, 2, 5, 3, 4, 3, 2, 2, 3, 0, 0, 3, 0, 2, 1, 0, 1, 2, 0, 0, 0, 0, 2, 1, 1, 3, 1, 0, 2, 0, 4, 0, 3, 4, 4, 4, 5, 2, 0, 2, 0, 0, 1, 3), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 4, 2, 1, 1, 0, 1, 0, 3, 2, 0, 0, 3, 1, 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 1, 0, 0, 0, 2, 0, 0, 0, 1, 4, 0, 4, 2, 1, 0, 0, 0, 0, 0, 1), + (0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 3, 1, 0, 0, 0, 2, 0, 2, 1, 0, 0, 1, 2, 1, 0, 1, 1, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 1, 0, 0, 0, 0, 0, 1, 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 2), + (0, 4, 0, 4, 0, 4, 0, 3, 0, 4, 4, 3, 4, 2, 4, 3, 2, 0, 4, 4, 4, 3, 5, 3, 5, 3, 3, 2, 4, 2, 4, 3, 4, 3, 1, 4, 0, 2, 3, 4, 4, 4, 3, 3, 3, 4, 4, 4, 3, 4, 1, 3, 4, 3, 2, 1, 2, 1, 3, 3, 3, 4, 4, 3, 3, 5, 0, 4, 0, 3, 0, 4, 3, 3, 3, 2, 1, 0, 3, 0, 0, 3, 3), + (0, 4, 0, 3, 0, 3, 0, 3, 0, 3, 5, 5, 3, 3, 3, 3, 4, 3, 4, 3, 3, 3, 4, 4, 4, 3, 3, 3, 3, 4, 3, 5, 3, 3, 1, 3, 2, 4, 5, 5, 5, 5, 4, 3, 4, 5, 5, 3, 2, 2, 3, 3, 3, 3, 2, 3, 3, 1, 2, 3, 2, 4, 3, 3, 3, 4, 0, 4, 0, 2, 0, 4, 3, 2, 2, 1, 2, 0, 3, 0, 0, 4, 1), ) +# fmt: on -class JapaneseContextAnalysis(object): + +class JapaneseContextAnalysis: NUM_OF_CATEGORY = 6 DONT_KNOW = -1 ENOUGH_REL_THRESHOLD = 100 MAX_REL_THRESHOLD = 1000 MINIMUM_DATA_THRESHOLD = 4 - def __init__(self): - self._total_rel = None - self._rel_sample = None - self._need_to_skip_char_num = None - self._last_char_order = None - self._done = None + def __init__(self) -> None: + self._total_rel = 0 + self._rel_sample: List[int] = [] + self._need_to_skip_char_num = 0 + self._last_char_order = -1 + self._done = False self.reset() - def reset(self): + def reset(self) -> None: self._total_rel = 0 # total sequence received # category counters, each integer counts sequence in its category self._rel_sample = [0] * self.NUM_OF_CATEGORY @@ -140,7 +144,7 @@ def reset(self): # been made self._done = False - def feed(self, byte_str, num_bytes): + def feed(self, byte_str: Union[bytes, bytearray], num_bytes: int) -> None: if self._done: return @@ -153,7 +157,7 @@ def feed(self, byte_str, num_bytes): # this character will simply our logic and improve performance. i = self._need_to_skip_char_num while i < num_bytes: - order, char_len = self.get_order(byte_str[i:i + 2]) + order, char_len = self.get_order(byte_str[i : i + 2]) i += char_len if i > num_bytes: self._need_to_skip_char_num = i - num_bytes @@ -164,32 +168,34 @@ def feed(self, byte_str, num_bytes): if self._total_rel > self.MAX_REL_THRESHOLD: self._done = True break - self._rel_sample[jp2CharContext[self._last_char_order][order]] += 1 + self._rel_sample[ + jp2_char_context[self._last_char_order][order] + ] += 1 self._last_char_order = order - def got_enough_data(self): + def got_enough_data(self) -> bool: return self._total_rel > self.ENOUGH_REL_THRESHOLD - def get_confidence(self): + def get_confidence(self) -> float: # This is just one way to calculate confidence. It works well for me. if self._total_rel > self.MINIMUM_DATA_THRESHOLD: return (self._total_rel - self._rel_sample[0]) / self._total_rel - else: - return self.DONT_KNOW + return self.DONT_KNOW - def get_order(self, byte_str): + def get_order(self, _: Union[bytes, bytearray]) -> Tuple[int, int]: return -1, 1 + class SJISContextAnalysis(JapaneseContextAnalysis): - def __init__(self): - super(SJISContextAnalysis, self).__init__() + def __init__(self) -> None: + super().__init__() self._charset_name = "SHIFT_JIS" @property - def charset_name(self): + def charset_name(self) -> str: return self._charset_name - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]: if not byte_str: return -1, 1 # find out current char's byte length @@ -209,8 +215,9 @@ def get_order(self, byte_str): return -1, char_len + class EUCJPContextAnalysis(JapaneseContextAnalysis): - def get_order(self, byte_str): + def get_order(self, byte_str: Union[bytes, bytearray]) -> Tuple[int, int]: if not byte_str: return -1, 1 # find out current char's byte length @@ -229,5 +236,3 @@ def get_order(self, byte_str): return second_char - 0xA1, char_len return -1, char_len - - diff --git a/src/pip/_vendor/chardet/langbulgarianmodel.py b/src/pip/_vendor/chardet/langbulgarianmodel.py index e963a50979a..994668219dd 100644 --- a/src/pip/_vendor/chardet/langbulgarianmodel.py +++ b/src/pip/_vendor/chardet/langbulgarianmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from pip._vendor.chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,536 +4111,539 @@ # Character Mapping Table(s): ISO_8859_5_BULGARIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 77, # 'A' - 66: 90, # 'B' - 67: 99, # 'C' - 68: 100, # 'D' - 69: 72, # 'E' - 70: 109, # 'F' - 71: 107, # 'G' - 72: 101, # 'H' - 73: 79, # 'I' - 74: 185, # 'J' - 75: 81, # 'K' - 76: 102, # 'L' - 77: 76, # 'M' - 78: 94, # 'N' - 79: 82, # 'O' - 80: 110, # 'P' - 81: 186, # 'Q' - 82: 108, # 'R' - 83: 91, # 'S' - 84: 74, # 'T' - 85: 119, # 'U' - 86: 84, # 'V' - 87: 96, # 'W' - 88: 111, # 'X' - 89: 187, # 'Y' - 90: 115, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 65, # 'a' - 98: 69, # 'b' - 99: 70, # 'c' - 100: 66, # 'd' - 101: 63, # 'e' - 102: 68, # 'f' - 103: 112, # 'g' - 104: 103, # 'h' - 105: 92, # 'i' - 106: 194, # 'j' - 107: 104, # 'k' - 108: 95, # 'l' - 109: 86, # 'm' - 110: 87, # 'n' - 111: 71, # 'o' - 112: 116, # 'p' - 113: 195, # 'q' - 114: 85, # 'r' - 115: 93, # 's' - 116: 97, # 't' - 117: 113, # 'u' - 118: 196, # 'v' - 119: 197, # 'w' - 120: 198, # 'x' - 121: 199, # 'y' - 122: 200, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 194, # '\x80' - 129: 195, # '\x81' - 130: 196, # '\x82' - 131: 197, # '\x83' - 132: 198, # '\x84' - 133: 199, # '\x85' - 134: 200, # '\x86' - 135: 201, # '\x87' - 136: 202, # '\x88' - 137: 203, # '\x89' - 138: 204, # '\x8a' - 139: 205, # '\x8b' - 140: 206, # '\x8c' - 141: 207, # '\x8d' - 142: 208, # '\x8e' - 143: 209, # '\x8f' - 144: 210, # '\x90' - 145: 211, # '\x91' - 146: 212, # '\x92' - 147: 213, # '\x93' - 148: 214, # '\x94' - 149: 215, # '\x95' - 150: 216, # '\x96' - 151: 217, # '\x97' - 152: 218, # '\x98' - 153: 219, # '\x99' - 154: 220, # '\x9a' - 155: 221, # '\x9b' - 156: 222, # '\x9c' - 157: 223, # '\x9d' - 158: 224, # '\x9e' - 159: 225, # '\x9f' - 160: 81, # '\xa0' - 161: 226, # 'Ё' - 162: 227, # 'Ђ' - 163: 228, # 'Ѓ' - 164: 229, # 'Є' - 165: 230, # 'Ѕ' - 166: 105, # 'І' - 167: 231, # 'Ї' - 168: 232, # 'Ј' - 169: 233, # 'Љ' - 170: 234, # 'Њ' - 171: 235, # 'Ћ' - 172: 236, # 'Ќ' - 173: 45, # '\xad' - 174: 237, # 'Ў' - 175: 238, # 'Џ' - 176: 31, # 'А' - 177: 32, # 'Б' - 178: 35, # 'В' - 179: 43, # 'Г' - 180: 37, # 'Д' - 181: 44, # 'Е' - 182: 55, # 'Ж' - 183: 47, # 'З' - 184: 40, # 'И' - 185: 59, # 'Й' - 186: 33, # 'К' - 187: 46, # 'Л' - 188: 38, # 'М' - 189: 36, # 'Н' - 190: 41, # 'О' - 191: 30, # 'П' - 192: 39, # 'Р' - 193: 28, # 'С' - 194: 34, # 'Т' - 195: 51, # 'У' - 196: 48, # 'Ф' - 197: 49, # 'Х' - 198: 53, # 'Ц' - 199: 50, # 'Ч' - 200: 54, # 'Ш' - 201: 57, # 'Щ' - 202: 61, # 'Ъ' - 203: 239, # 'Ы' - 204: 67, # 'Ь' - 205: 240, # 'Э' - 206: 60, # 'Ю' - 207: 56, # 'Я' - 208: 1, # 'а' - 209: 18, # 'б' - 210: 9, # 'в' - 211: 20, # 'г' - 212: 11, # 'д' - 213: 3, # 'е' - 214: 23, # 'ж' - 215: 15, # 'з' - 216: 2, # 'и' - 217: 26, # 'й' - 218: 12, # 'к' - 219: 10, # 'л' - 220: 14, # 'м' - 221: 6, # 'н' - 222: 4, # 'о' - 223: 13, # 'п' - 224: 7, # 'р' - 225: 8, # 'с' - 226: 5, # 'т' - 227: 19, # 'у' - 228: 29, # 'ф' - 229: 25, # 'х' - 230: 22, # 'ц' - 231: 21, # 'ч' - 232: 27, # 'ш' - 233: 24, # 'щ' - 234: 17, # 'ъ' - 235: 75, # 'ы' - 236: 52, # 'ь' - 237: 241, # 'э' - 238: 42, # 'ю' - 239: 16, # 'я' - 240: 62, # '№' - 241: 242, # 'ё' - 242: 243, # 'ђ' - 243: 244, # 'ѓ' - 244: 58, # 'є' - 245: 245, # 'ѕ' - 246: 98, # 'і' - 247: 246, # 'ї' - 248: 247, # 'ј' - 249: 248, # 'љ' - 250: 249, # 'њ' - 251: 250, # 'ћ' - 252: 251, # 'ќ' - 253: 91, # '§' - 254: 252, # 'ў' - 255: 253, # 'џ' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 77, # 'A' + 66: 90, # 'B' + 67: 99, # 'C' + 68: 100, # 'D' + 69: 72, # 'E' + 70: 109, # 'F' + 71: 107, # 'G' + 72: 101, # 'H' + 73: 79, # 'I' + 74: 185, # 'J' + 75: 81, # 'K' + 76: 102, # 'L' + 77: 76, # 'M' + 78: 94, # 'N' + 79: 82, # 'O' + 80: 110, # 'P' + 81: 186, # 'Q' + 82: 108, # 'R' + 83: 91, # 'S' + 84: 74, # 'T' + 85: 119, # 'U' + 86: 84, # 'V' + 87: 96, # 'W' + 88: 111, # 'X' + 89: 187, # 'Y' + 90: 115, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 65, # 'a' + 98: 69, # 'b' + 99: 70, # 'c' + 100: 66, # 'd' + 101: 63, # 'e' + 102: 68, # 'f' + 103: 112, # 'g' + 104: 103, # 'h' + 105: 92, # 'i' + 106: 194, # 'j' + 107: 104, # 'k' + 108: 95, # 'l' + 109: 86, # 'm' + 110: 87, # 'n' + 111: 71, # 'o' + 112: 116, # 'p' + 113: 195, # 'q' + 114: 85, # 'r' + 115: 93, # 's' + 116: 97, # 't' + 117: 113, # 'u' + 118: 196, # 'v' + 119: 197, # 'w' + 120: 198, # 'x' + 121: 199, # 'y' + 122: 200, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 194, # '\x80' + 129: 195, # '\x81' + 130: 196, # '\x82' + 131: 197, # '\x83' + 132: 198, # '\x84' + 133: 199, # '\x85' + 134: 200, # '\x86' + 135: 201, # '\x87' + 136: 202, # '\x88' + 137: 203, # '\x89' + 138: 204, # '\x8a' + 139: 205, # '\x8b' + 140: 206, # '\x8c' + 141: 207, # '\x8d' + 142: 208, # '\x8e' + 143: 209, # '\x8f' + 144: 210, # '\x90' + 145: 211, # '\x91' + 146: 212, # '\x92' + 147: 213, # '\x93' + 148: 214, # '\x94' + 149: 215, # '\x95' + 150: 216, # '\x96' + 151: 217, # '\x97' + 152: 218, # '\x98' + 153: 219, # '\x99' + 154: 220, # '\x9a' + 155: 221, # '\x9b' + 156: 222, # '\x9c' + 157: 223, # '\x9d' + 158: 224, # '\x9e' + 159: 225, # '\x9f' + 160: 81, # '\xa0' + 161: 226, # 'Ё' + 162: 227, # 'Ђ' + 163: 228, # 'Ѓ' + 164: 229, # 'Є' + 165: 230, # 'Ѕ' + 166: 105, # 'І' + 167: 231, # 'Ї' + 168: 232, # 'Ј' + 169: 233, # 'Љ' + 170: 234, # 'Њ' + 171: 235, # 'Ћ' + 172: 236, # 'Ќ' + 173: 45, # '\xad' + 174: 237, # 'Ў' + 175: 238, # 'Џ' + 176: 31, # 'А' + 177: 32, # 'Б' + 178: 35, # 'В' + 179: 43, # 'Г' + 180: 37, # 'Д' + 181: 44, # 'Е' + 182: 55, # 'Ж' + 183: 47, # 'З' + 184: 40, # 'И' + 185: 59, # 'Й' + 186: 33, # 'К' + 187: 46, # 'Л' + 188: 38, # 'М' + 189: 36, # 'Н' + 190: 41, # 'О' + 191: 30, # 'П' + 192: 39, # 'Р' + 193: 28, # 'С' + 194: 34, # 'Т' + 195: 51, # 'У' + 196: 48, # 'Ф' + 197: 49, # 'Х' + 198: 53, # 'Ц' + 199: 50, # 'Ч' + 200: 54, # 'Ш' + 201: 57, # 'Щ' + 202: 61, # 'Ъ' + 203: 239, # 'Ы' + 204: 67, # 'Ь' + 205: 240, # 'Э' + 206: 60, # 'Ю' + 207: 56, # 'Я' + 208: 1, # 'а' + 209: 18, # 'б' + 210: 9, # 'в' + 211: 20, # 'г' + 212: 11, # 'д' + 213: 3, # 'е' + 214: 23, # 'ж' + 215: 15, # 'з' + 216: 2, # 'и' + 217: 26, # 'й' + 218: 12, # 'к' + 219: 10, # 'л' + 220: 14, # 'м' + 221: 6, # 'н' + 222: 4, # 'о' + 223: 13, # 'п' + 224: 7, # 'р' + 225: 8, # 'с' + 226: 5, # 'т' + 227: 19, # 'у' + 228: 29, # 'ф' + 229: 25, # 'х' + 230: 22, # 'ц' + 231: 21, # 'ч' + 232: 27, # 'ш' + 233: 24, # 'щ' + 234: 17, # 'ъ' + 235: 75, # 'ы' + 236: 52, # 'ь' + 237: 241, # 'э' + 238: 42, # 'ю' + 239: 16, # 'я' + 240: 62, # '№' + 241: 242, # 'ё' + 242: 243, # 'ђ' + 243: 244, # 'ѓ' + 244: 58, # 'є' + 245: 245, # 'ѕ' + 246: 98, # 'і' + 247: 246, # 'ї' + 248: 247, # 'ј' + 249: 248, # 'љ' + 250: 249, # 'њ' + 251: 250, # 'ћ' + 252: 251, # 'ќ' + 253: 91, # '§' + 254: 252, # 'ў' + 255: 253, # 'џ' } -ISO_8859_5_BULGARIAN_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-5', - language='Bulgarian', - char_to_order_map=ISO_8859_5_BULGARIAN_CHAR_TO_ORDER, - language_model=BULGARIAN_LANG_MODEL, - typical_positive_ratio=0.969392, - keep_ascii_letters=False, - alphabet='АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя') +ISO_8859_5_BULGARIAN_MODEL = SingleByteCharSetModel( + charset_name="ISO-8859-5", + language="Bulgarian", + char_to_order_map=ISO_8859_5_BULGARIAN_CHAR_TO_ORDER, + language_model=BULGARIAN_LANG_MODEL, + typical_positive_ratio=0.969392, + keep_ascii_letters=False, + alphabet="АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя", +) WINDOWS_1251_BULGARIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 77, # 'A' - 66: 90, # 'B' - 67: 99, # 'C' - 68: 100, # 'D' - 69: 72, # 'E' - 70: 109, # 'F' - 71: 107, # 'G' - 72: 101, # 'H' - 73: 79, # 'I' - 74: 185, # 'J' - 75: 81, # 'K' - 76: 102, # 'L' - 77: 76, # 'M' - 78: 94, # 'N' - 79: 82, # 'O' - 80: 110, # 'P' - 81: 186, # 'Q' - 82: 108, # 'R' - 83: 91, # 'S' - 84: 74, # 'T' - 85: 119, # 'U' - 86: 84, # 'V' - 87: 96, # 'W' - 88: 111, # 'X' - 89: 187, # 'Y' - 90: 115, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 65, # 'a' - 98: 69, # 'b' - 99: 70, # 'c' - 100: 66, # 'd' - 101: 63, # 'e' - 102: 68, # 'f' - 103: 112, # 'g' - 104: 103, # 'h' - 105: 92, # 'i' - 106: 194, # 'j' - 107: 104, # 'k' - 108: 95, # 'l' - 109: 86, # 'm' - 110: 87, # 'n' - 111: 71, # 'o' - 112: 116, # 'p' - 113: 195, # 'q' - 114: 85, # 'r' - 115: 93, # 's' - 116: 97, # 't' - 117: 113, # 'u' - 118: 196, # 'v' - 119: 197, # 'w' - 120: 198, # 'x' - 121: 199, # 'y' - 122: 200, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 206, # 'Ђ' - 129: 207, # 'Ѓ' - 130: 208, # '‚' - 131: 209, # 'ѓ' - 132: 210, # '„' - 133: 211, # '…' - 134: 212, # '†' - 135: 213, # '‡' - 136: 120, # '€' - 137: 214, # '‰' - 138: 215, # 'Љ' - 139: 216, # '‹' - 140: 217, # 'Њ' - 141: 218, # 'Ќ' - 142: 219, # 'Ћ' - 143: 220, # 'Џ' - 144: 221, # 'ђ' - 145: 78, # '‘' - 146: 64, # '’' - 147: 83, # '“' - 148: 121, # '”' - 149: 98, # '•' - 150: 117, # '–' - 151: 105, # '—' - 152: 222, # None - 153: 223, # '™' - 154: 224, # 'љ' - 155: 225, # '›' - 156: 226, # 'њ' - 157: 227, # 'ќ' - 158: 228, # 'ћ' - 159: 229, # 'џ' - 160: 88, # '\xa0' - 161: 230, # 'Ў' - 162: 231, # 'ў' - 163: 232, # 'Ј' - 164: 233, # '¤' - 165: 122, # 'Ґ' - 166: 89, # '¦' - 167: 106, # '§' - 168: 234, # 'Ё' - 169: 235, # '©' - 170: 236, # 'Є' - 171: 237, # '«' - 172: 238, # '¬' - 173: 45, # '\xad' - 174: 239, # '®' - 175: 240, # 'Ї' - 176: 73, # '°' - 177: 80, # '±' - 178: 118, # 'І' - 179: 114, # 'і' - 180: 241, # 'ґ' - 181: 242, # 'µ' - 182: 243, # '¶' - 183: 244, # '·' - 184: 245, # 'ё' - 185: 62, # '№' - 186: 58, # 'є' - 187: 246, # '»' - 188: 247, # 'ј' - 189: 248, # 'Ѕ' - 190: 249, # 'ѕ' - 191: 250, # 'ї' - 192: 31, # 'А' - 193: 32, # 'Б' - 194: 35, # 'В' - 195: 43, # 'Г' - 196: 37, # 'Д' - 197: 44, # 'Е' - 198: 55, # 'Ж' - 199: 47, # 'З' - 200: 40, # 'И' - 201: 59, # 'Й' - 202: 33, # 'К' - 203: 46, # 'Л' - 204: 38, # 'М' - 205: 36, # 'Н' - 206: 41, # 'О' - 207: 30, # 'П' - 208: 39, # 'Р' - 209: 28, # 'С' - 210: 34, # 'Т' - 211: 51, # 'У' - 212: 48, # 'Ф' - 213: 49, # 'Х' - 214: 53, # 'Ц' - 215: 50, # 'Ч' - 216: 54, # 'Ш' - 217: 57, # 'Щ' - 218: 61, # 'Ъ' - 219: 251, # 'Ы' - 220: 67, # 'Ь' - 221: 252, # 'Э' - 222: 60, # 'Ю' - 223: 56, # 'Я' - 224: 1, # 'а' - 225: 18, # 'б' - 226: 9, # 'в' - 227: 20, # 'г' - 228: 11, # 'д' - 229: 3, # 'е' - 230: 23, # 'ж' - 231: 15, # 'з' - 232: 2, # 'и' - 233: 26, # 'й' - 234: 12, # 'к' - 235: 10, # 'л' - 236: 14, # 'м' - 237: 6, # 'н' - 238: 4, # 'о' - 239: 13, # 'п' - 240: 7, # 'р' - 241: 8, # 'с' - 242: 5, # 'т' - 243: 19, # 'у' - 244: 29, # 'ф' - 245: 25, # 'х' - 246: 22, # 'ц' - 247: 21, # 'ч' - 248: 27, # 'ш' - 249: 24, # 'щ' - 250: 17, # 'ъ' - 251: 75, # 'ы' - 252: 52, # 'ь' - 253: 253, # 'э' - 254: 42, # 'ю' - 255: 16, # 'я' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 77, # 'A' + 66: 90, # 'B' + 67: 99, # 'C' + 68: 100, # 'D' + 69: 72, # 'E' + 70: 109, # 'F' + 71: 107, # 'G' + 72: 101, # 'H' + 73: 79, # 'I' + 74: 185, # 'J' + 75: 81, # 'K' + 76: 102, # 'L' + 77: 76, # 'M' + 78: 94, # 'N' + 79: 82, # 'O' + 80: 110, # 'P' + 81: 186, # 'Q' + 82: 108, # 'R' + 83: 91, # 'S' + 84: 74, # 'T' + 85: 119, # 'U' + 86: 84, # 'V' + 87: 96, # 'W' + 88: 111, # 'X' + 89: 187, # 'Y' + 90: 115, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 65, # 'a' + 98: 69, # 'b' + 99: 70, # 'c' + 100: 66, # 'd' + 101: 63, # 'e' + 102: 68, # 'f' + 103: 112, # 'g' + 104: 103, # 'h' + 105: 92, # 'i' + 106: 194, # 'j' + 107: 104, # 'k' + 108: 95, # 'l' + 109: 86, # 'm' + 110: 87, # 'n' + 111: 71, # 'o' + 112: 116, # 'p' + 113: 195, # 'q' + 114: 85, # 'r' + 115: 93, # 's' + 116: 97, # 't' + 117: 113, # 'u' + 118: 196, # 'v' + 119: 197, # 'w' + 120: 198, # 'x' + 121: 199, # 'y' + 122: 200, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 206, # 'Ђ' + 129: 207, # 'Ѓ' + 130: 208, # '‚' + 131: 209, # 'ѓ' + 132: 210, # '„' + 133: 211, # '…' + 134: 212, # '†' + 135: 213, # '‡' + 136: 120, # '€' + 137: 214, # '‰' + 138: 215, # 'Љ' + 139: 216, # '‹' + 140: 217, # 'Њ' + 141: 218, # 'Ќ' + 142: 219, # 'Ћ' + 143: 220, # 'Џ' + 144: 221, # 'ђ' + 145: 78, # '‘' + 146: 64, # '’' + 147: 83, # '“' + 148: 121, # '”' + 149: 98, # '•' + 150: 117, # '–' + 151: 105, # '—' + 152: 222, # None + 153: 223, # '™' + 154: 224, # 'љ' + 155: 225, # '›' + 156: 226, # 'њ' + 157: 227, # 'ќ' + 158: 228, # 'ћ' + 159: 229, # 'џ' + 160: 88, # '\xa0' + 161: 230, # 'Ў' + 162: 231, # 'ў' + 163: 232, # 'Ј' + 164: 233, # '¤' + 165: 122, # 'Ґ' + 166: 89, # '¦' + 167: 106, # '§' + 168: 234, # 'Ё' + 169: 235, # '©' + 170: 236, # 'Є' + 171: 237, # '«' + 172: 238, # '¬' + 173: 45, # '\xad' + 174: 239, # '®' + 175: 240, # 'Ї' + 176: 73, # '°' + 177: 80, # '±' + 178: 118, # 'І' + 179: 114, # 'і' + 180: 241, # 'ґ' + 181: 242, # 'µ' + 182: 243, # '¶' + 183: 244, # '·' + 184: 245, # 'ё' + 185: 62, # '№' + 186: 58, # 'є' + 187: 246, # '»' + 188: 247, # 'ј' + 189: 248, # 'Ѕ' + 190: 249, # 'ѕ' + 191: 250, # 'ї' + 192: 31, # 'А' + 193: 32, # 'Б' + 194: 35, # 'В' + 195: 43, # 'Г' + 196: 37, # 'Д' + 197: 44, # 'Е' + 198: 55, # 'Ж' + 199: 47, # 'З' + 200: 40, # 'И' + 201: 59, # 'Й' + 202: 33, # 'К' + 203: 46, # 'Л' + 204: 38, # 'М' + 205: 36, # 'Н' + 206: 41, # 'О' + 207: 30, # 'П' + 208: 39, # 'Р' + 209: 28, # 'С' + 210: 34, # 'Т' + 211: 51, # 'У' + 212: 48, # 'Ф' + 213: 49, # 'Х' + 214: 53, # 'Ц' + 215: 50, # 'Ч' + 216: 54, # 'Ш' + 217: 57, # 'Щ' + 218: 61, # 'Ъ' + 219: 251, # 'Ы' + 220: 67, # 'Ь' + 221: 252, # 'Э' + 222: 60, # 'Ю' + 223: 56, # 'Я' + 224: 1, # 'а' + 225: 18, # 'б' + 226: 9, # 'в' + 227: 20, # 'г' + 228: 11, # 'д' + 229: 3, # 'е' + 230: 23, # 'ж' + 231: 15, # 'з' + 232: 2, # 'и' + 233: 26, # 'й' + 234: 12, # 'к' + 235: 10, # 'л' + 236: 14, # 'м' + 237: 6, # 'н' + 238: 4, # 'о' + 239: 13, # 'п' + 240: 7, # 'р' + 241: 8, # 'с' + 242: 5, # 'т' + 243: 19, # 'у' + 244: 29, # 'ф' + 245: 25, # 'х' + 246: 22, # 'ц' + 247: 21, # 'ч' + 248: 27, # 'ш' + 249: 24, # 'щ' + 250: 17, # 'ъ' + 251: 75, # 'ы' + 252: 52, # 'ь' + 253: 253, # 'э' + 254: 42, # 'ю' + 255: 16, # 'я' } -WINDOWS_1251_BULGARIAN_MODEL = SingleByteCharSetModel(charset_name='windows-1251', - language='Bulgarian', - char_to_order_map=WINDOWS_1251_BULGARIAN_CHAR_TO_ORDER, - language_model=BULGARIAN_LANG_MODEL, - typical_positive_ratio=0.969392, - keep_ascii_letters=False, - alphabet='АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя') - +WINDOWS_1251_BULGARIAN_MODEL = SingleByteCharSetModel( + charset_name="windows-1251", + language="Bulgarian", + char_to_order_map=WINDOWS_1251_BULGARIAN_CHAR_TO_ORDER, + language_model=BULGARIAN_LANG_MODEL, + typical_positive_ratio=0.969392, + keep_ascii_letters=False, + alphabet="АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя", +) diff --git a/src/pip/_vendor/chardet/langgreekmodel.py b/src/pip/_vendor/chardet/langgreekmodel.py index d99528ede73..cfb8639e560 100644 --- a/src/pip/_vendor/chardet/langgreekmodel.py +++ b/src/pip/_vendor/chardet/langgreekmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from pip._vendor.chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -3863,536 +3859,539 @@ # Character Mapping Table(s): WINDOWS_1253_GREEK_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 82, # 'A' - 66: 100, # 'B' - 67: 104, # 'C' - 68: 94, # 'D' - 69: 98, # 'E' - 70: 101, # 'F' - 71: 116, # 'G' - 72: 102, # 'H' - 73: 111, # 'I' - 74: 187, # 'J' - 75: 117, # 'K' - 76: 92, # 'L' - 77: 88, # 'M' - 78: 113, # 'N' - 79: 85, # 'O' - 80: 79, # 'P' - 81: 118, # 'Q' - 82: 105, # 'R' - 83: 83, # 'S' - 84: 67, # 'T' - 85: 114, # 'U' - 86: 119, # 'V' - 87: 95, # 'W' - 88: 99, # 'X' - 89: 109, # 'Y' - 90: 188, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 72, # 'a' - 98: 70, # 'b' - 99: 80, # 'c' - 100: 81, # 'd' - 101: 60, # 'e' - 102: 96, # 'f' - 103: 93, # 'g' - 104: 89, # 'h' - 105: 68, # 'i' - 106: 120, # 'j' - 107: 97, # 'k' - 108: 77, # 'l' - 109: 86, # 'm' - 110: 69, # 'n' - 111: 55, # 'o' - 112: 78, # 'p' - 113: 115, # 'q' - 114: 65, # 'r' - 115: 66, # 's' - 116: 58, # 't' - 117: 76, # 'u' - 118: 106, # 'v' - 119: 103, # 'w' - 120: 87, # 'x' - 121: 107, # 'y' - 122: 112, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 255, # '€' - 129: 255, # None - 130: 255, # '‚' - 131: 255, # 'ƒ' - 132: 255, # '„' - 133: 255, # '…' - 134: 255, # '†' - 135: 255, # '‡' - 136: 255, # None - 137: 255, # '‰' - 138: 255, # None - 139: 255, # '‹' - 140: 255, # None - 141: 255, # None - 142: 255, # None - 143: 255, # None - 144: 255, # None - 145: 255, # '‘' - 146: 255, # '’' - 147: 255, # '“' - 148: 255, # '”' - 149: 255, # '•' - 150: 255, # '–' - 151: 255, # '—' - 152: 255, # None - 153: 255, # '™' - 154: 255, # None - 155: 255, # '›' - 156: 255, # None - 157: 255, # None - 158: 255, # None - 159: 255, # None - 160: 253, # '\xa0' - 161: 233, # '΅' - 162: 61, # 'Ά' - 163: 253, # '£' - 164: 253, # '¤' - 165: 253, # '¥' - 166: 253, # '¦' - 167: 253, # '§' - 168: 253, # '¨' - 169: 253, # '©' - 170: 253, # None - 171: 253, # '«' - 172: 253, # '¬' - 173: 74, # '\xad' - 174: 253, # '®' - 175: 253, # '―' - 176: 253, # '°' - 177: 253, # '±' - 178: 253, # '²' - 179: 253, # '³' - 180: 247, # '΄' - 181: 253, # 'µ' - 182: 253, # '¶' - 183: 36, # '·' - 184: 46, # 'Έ' - 185: 71, # 'Ή' - 186: 73, # 'Ί' - 187: 253, # '»' - 188: 54, # 'Ό' - 189: 253, # '½' - 190: 108, # 'Ύ' - 191: 123, # 'Ώ' - 192: 110, # 'ΐ' - 193: 31, # 'Α' - 194: 51, # 'Β' - 195: 43, # 'Γ' - 196: 41, # 'Δ' - 197: 34, # 'Ε' - 198: 91, # 'Ζ' - 199: 40, # 'Η' - 200: 52, # 'Θ' - 201: 47, # 'Ι' - 202: 44, # 'Κ' - 203: 53, # 'Λ' - 204: 38, # 'Μ' - 205: 49, # 'Ν' - 206: 59, # 'Ξ' - 207: 39, # 'Ο' - 208: 35, # 'Π' - 209: 48, # 'Ρ' - 210: 250, # None - 211: 37, # 'Σ' - 212: 33, # 'Τ' - 213: 45, # 'Υ' - 214: 56, # 'Φ' - 215: 50, # 'Χ' - 216: 84, # 'Ψ' - 217: 57, # 'Ω' - 218: 120, # 'Ϊ' - 219: 121, # 'Ϋ' - 220: 17, # 'ά' - 221: 18, # 'έ' - 222: 22, # 'ή' - 223: 15, # 'ί' - 224: 124, # 'ΰ' - 225: 1, # 'α' - 226: 29, # 'β' - 227: 20, # 'γ' - 228: 21, # 'δ' - 229: 3, # 'ε' - 230: 32, # 'ζ' - 231: 13, # 'η' - 232: 25, # 'θ' - 233: 5, # 'ι' - 234: 11, # 'κ' - 235: 16, # 'λ' - 236: 10, # 'μ' - 237: 6, # 'ν' - 238: 30, # 'ξ' - 239: 4, # 'ο' - 240: 9, # 'π' - 241: 8, # 'ρ' - 242: 14, # 'ς' - 243: 7, # 'σ' - 244: 2, # 'τ' - 245: 12, # 'υ' - 246: 28, # 'φ' - 247: 23, # 'χ' - 248: 42, # 'ψ' - 249: 24, # 'ω' - 250: 64, # 'ϊ' - 251: 75, # 'ϋ' - 252: 19, # 'ό' - 253: 26, # 'ύ' - 254: 27, # 'ώ' - 255: 253, # None + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 82, # 'A' + 66: 100, # 'B' + 67: 104, # 'C' + 68: 94, # 'D' + 69: 98, # 'E' + 70: 101, # 'F' + 71: 116, # 'G' + 72: 102, # 'H' + 73: 111, # 'I' + 74: 187, # 'J' + 75: 117, # 'K' + 76: 92, # 'L' + 77: 88, # 'M' + 78: 113, # 'N' + 79: 85, # 'O' + 80: 79, # 'P' + 81: 118, # 'Q' + 82: 105, # 'R' + 83: 83, # 'S' + 84: 67, # 'T' + 85: 114, # 'U' + 86: 119, # 'V' + 87: 95, # 'W' + 88: 99, # 'X' + 89: 109, # 'Y' + 90: 188, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 72, # 'a' + 98: 70, # 'b' + 99: 80, # 'c' + 100: 81, # 'd' + 101: 60, # 'e' + 102: 96, # 'f' + 103: 93, # 'g' + 104: 89, # 'h' + 105: 68, # 'i' + 106: 120, # 'j' + 107: 97, # 'k' + 108: 77, # 'l' + 109: 86, # 'm' + 110: 69, # 'n' + 111: 55, # 'o' + 112: 78, # 'p' + 113: 115, # 'q' + 114: 65, # 'r' + 115: 66, # 's' + 116: 58, # 't' + 117: 76, # 'u' + 118: 106, # 'v' + 119: 103, # 'w' + 120: 87, # 'x' + 121: 107, # 'y' + 122: 112, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 255, # '€' + 129: 255, # None + 130: 255, # '‚' + 131: 255, # 'ƒ' + 132: 255, # '„' + 133: 255, # '…' + 134: 255, # '†' + 135: 255, # '‡' + 136: 255, # None + 137: 255, # '‰' + 138: 255, # None + 139: 255, # '‹' + 140: 255, # None + 141: 255, # None + 142: 255, # None + 143: 255, # None + 144: 255, # None + 145: 255, # '‘' + 146: 255, # '’' + 147: 255, # '“' + 148: 255, # '”' + 149: 255, # '•' + 150: 255, # '–' + 151: 255, # '—' + 152: 255, # None + 153: 255, # '™' + 154: 255, # None + 155: 255, # '›' + 156: 255, # None + 157: 255, # None + 158: 255, # None + 159: 255, # None + 160: 253, # '\xa0' + 161: 233, # '΅' + 162: 61, # 'Ά' + 163: 253, # '£' + 164: 253, # '¤' + 165: 253, # '¥' + 166: 253, # '¦' + 167: 253, # '§' + 168: 253, # '¨' + 169: 253, # '©' + 170: 253, # None + 171: 253, # '«' + 172: 253, # '¬' + 173: 74, # '\xad' + 174: 253, # '®' + 175: 253, # '―' + 176: 253, # '°' + 177: 253, # '±' + 178: 253, # '²' + 179: 253, # '³' + 180: 247, # '΄' + 181: 253, # 'µ' + 182: 253, # '¶' + 183: 36, # '·' + 184: 46, # 'Έ' + 185: 71, # 'Ή' + 186: 73, # 'Ί' + 187: 253, # '»' + 188: 54, # 'Ό' + 189: 253, # '½' + 190: 108, # 'Ύ' + 191: 123, # 'Ώ' + 192: 110, # 'ΐ' + 193: 31, # 'Α' + 194: 51, # 'Β' + 195: 43, # 'Γ' + 196: 41, # 'Δ' + 197: 34, # 'Ε' + 198: 91, # 'Ζ' + 199: 40, # 'Η' + 200: 52, # 'Θ' + 201: 47, # 'Ι' + 202: 44, # 'Κ' + 203: 53, # 'Λ' + 204: 38, # 'Μ' + 205: 49, # 'Ν' + 206: 59, # 'Ξ' + 207: 39, # 'Ο' + 208: 35, # 'Π' + 209: 48, # 'Ρ' + 210: 250, # None + 211: 37, # 'Σ' + 212: 33, # 'Τ' + 213: 45, # 'Υ' + 214: 56, # 'Φ' + 215: 50, # 'Χ' + 216: 84, # 'Ψ' + 217: 57, # 'Ω' + 218: 120, # 'Ϊ' + 219: 121, # 'Ϋ' + 220: 17, # 'ά' + 221: 18, # 'έ' + 222: 22, # 'ή' + 223: 15, # 'ί' + 224: 124, # 'ΰ' + 225: 1, # 'α' + 226: 29, # 'β' + 227: 20, # 'γ' + 228: 21, # 'δ' + 229: 3, # 'ε' + 230: 32, # 'ζ' + 231: 13, # 'η' + 232: 25, # 'θ' + 233: 5, # 'ι' + 234: 11, # 'κ' + 235: 16, # 'λ' + 236: 10, # 'μ' + 237: 6, # 'ν' + 238: 30, # 'ξ' + 239: 4, # 'ο' + 240: 9, # 'π' + 241: 8, # 'ρ' + 242: 14, # 'ς' + 243: 7, # 'σ' + 244: 2, # 'τ' + 245: 12, # 'υ' + 246: 28, # 'φ' + 247: 23, # 'χ' + 248: 42, # 'ψ' + 249: 24, # 'ω' + 250: 64, # 'ϊ' + 251: 75, # 'ϋ' + 252: 19, # 'ό' + 253: 26, # 'ύ' + 254: 27, # 'ώ' + 255: 253, # None } -WINDOWS_1253_GREEK_MODEL = SingleByteCharSetModel(charset_name='windows-1253', - language='Greek', - char_to_order_map=WINDOWS_1253_GREEK_CHAR_TO_ORDER, - language_model=GREEK_LANG_MODEL, - typical_positive_ratio=0.982851, - keep_ascii_letters=False, - alphabet='ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ') +WINDOWS_1253_GREEK_MODEL = SingleByteCharSetModel( + charset_name="windows-1253", + language="Greek", + char_to_order_map=WINDOWS_1253_GREEK_CHAR_TO_ORDER, + language_model=GREEK_LANG_MODEL, + typical_positive_ratio=0.982851, + keep_ascii_letters=False, + alphabet="ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ", +) ISO_8859_7_GREEK_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 82, # 'A' - 66: 100, # 'B' - 67: 104, # 'C' - 68: 94, # 'D' - 69: 98, # 'E' - 70: 101, # 'F' - 71: 116, # 'G' - 72: 102, # 'H' - 73: 111, # 'I' - 74: 187, # 'J' - 75: 117, # 'K' - 76: 92, # 'L' - 77: 88, # 'M' - 78: 113, # 'N' - 79: 85, # 'O' - 80: 79, # 'P' - 81: 118, # 'Q' - 82: 105, # 'R' - 83: 83, # 'S' - 84: 67, # 'T' - 85: 114, # 'U' - 86: 119, # 'V' - 87: 95, # 'W' - 88: 99, # 'X' - 89: 109, # 'Y' - 90: 188, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 72, # 'a' - 98: 70, # 'b' - 99: 80, # 'c' - 100: 81, # 'd' - 101: 60, # 'e' - 102: 96, # 'f' - 103: 93, # 'g' - 104: 89, # 'h' - 105: 68, # 'i' - 106: 120, # 'j' - 107: 97, # 'k' - 108: 77, # 'l' - 109: 86, # 'm' - 110: 69, # 'n' - 111: 55, # 'o' - 112: 78, # 'p' - 113: 115, # 'q' - 114: 65, # 'r' - 115: 66, # 's' - 116: 58, # 't' - 117: 76, # 'u' - 118: 106, # 'v' - 119: 103, # 'w' - 120: 87, # 'x' - 121: 107, # 'y' - 122: 112, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 255, # '\x80' - 129: 255, # '\x81' - 130: 255, # '\x82' - 131: 255, # '\x83' - 132: 255, # '\x84' - 133: 255, # '\x85' - 134: 255, # '\x86' - 135: 255, # '\x87' - 136: 255, # '\x88' - 137: 255, # '\x89' - 138: 255, # '\x8a' - 139: 255, # '\x8b' - 140: 255, # '\x8c' - 141: 255, # '\x8d' - 142: 255, # '\x8e' - 143: 255, # '\x8f' - 144: 255, # '\x90' - 145: 255, # '\x91' - 146: 255, # '\x92' - 147: 255, # '\x93' - 148: 255, # '\x94' - 149: 255, # '\x95' - 150: 255, # '\x96' - 151: 255, # '\x97' - 152: 255, # '\x98' - 153: 255, # '\x99' - 154: 255, # '\x9a' - 155: 255, # '\x9b' - 156: 255, # '\x9c' - 157: 255, # '\x9d' - 158: 255, # '\x9e' - 159: 255, # '\x9f' - 160: 253, # '\xa0' - 161: 233, # '‘' - 162: 90, # '’' - 163: 253, # '£' - 164: 253, # '€' - 165: 253, # '₯' - 166: 253, # '¦' - 167: 253, # '§' - 168: 253, # '¨' - 169: 253, # '©' - 170: 253, # 'ͺ' - 171: 253, # '«' - 172: 253, # '¬' - 173: 74, # '\xad' - 174: 253, # None - 175: 253, # '―' - 176: 253, # '°' - 177: 253, # '±' - 178: 253, # '²' - 179: 253, # '³' - 180: 247, # '΄' - 181: 248, # '΅' - 182: 61, # 'Ά' - 183: 36, # '·' - 184: 46, # 'Έ' - 185: 71, # 'Ή' - 186: 73, # 'Ί' - 187: 253, # '»' - 188: 54, # 'Ό' - 189: 253, # '½' - 190: 108, # 'Ύ' - 191: 123, # 'Ώ' - 192: 110, # 'ΐ' - 193: 31, # 'Α' - 194: 51, # 'Β' - 195: 43, # 'Γ' - 196: 41, # 'Δ' - 197: 34, # 'Ε' - 198: 91, # 'Ζ' - 199: 40, # 'Η' - 200: 52, # 'Θ' - 201: 47, # 'Ι' - 202: 44, # 'Κ' - 203: 53, # 'Λ' - 204: 38, # 'Μ' - 205: 49, # 'Ν' - 206: 59, # 'Ξ' - 207: 39, # 'Ο' - 208: 35, # 'Π' - 209: 48, # 'Ρ' - 210: 250, # None - 211: 37, # 'Σ' - 212: 33, # 'Τ' - 213: 45, # 'Υ' - 214: 56, # 'Φ' - 215: 50, # 'Χ' - 216: 84, # 'Ψ' - 217: 57, # 'Ω' - 218: 120, # 'Ϊ' - 219: 121, # 'Ϋ' - 220: 17, # 'ά' - 221: 18, # 'έ' - 222: 22, # 'ή' - 223: 15, # 'ί' - 224: 124, # 'ΰ' - 225: 1, # 'α' - 226: 29, # 'β' - 227: 20, # 'γ' - 228: 21, # 'δ' - 229: 3, # 'ε' - 230: 32, # 'ζ' - 231: 13, # 'η' - 232: 25, # 'θ' - 233: 5, # 'ι' - 234: 11, # 'κ' - 235: 16, # 'λ' - 236: 10, # 'μ' - 237: 6, # 'ν' - 238: 30, # 'ξ' - 239: 4, # 'ο' - 240: 9, # 'π' - 241: 8, # 'ρ' - 242: 14, # 'ς' - 243: 7, # 'σ' - 244: 2, # 'τ' - 245: 12, # 'υ' - 246: 28, # 'φ' - 247: 23, # 'χ' - 248: 42, # 'ψ' - 249: 24, # 'ω' - 250: 64, # 'ϊ' - 251: 75, # 'ϋ' - 252: 19, # 'ό' - 253: 26, # 'ύ' - 254: 27, # 'ώ' - 255: 253, # None + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 82, # 'A' + 66: 100, # 'B' + 67: 104, # 'C' + 68: 94, # 'D' + 69: 98, # 'E' + 70: 101, # 'F' + 71: 116, # 'G' + 72: 102, # 'H' + 73: 111, # 'I' + 74: 187, # 'J' + 75: 117, # 'K' + 76: 92, # 'L' + 77: 88, # 'M' + 78: 113, # 'N' + 79: 85, # 'O' + 80: 79, # 'P' + 81: 118, # 'Q' + 82: 105, # 'R' + 83: 83, # 'S' + 84: 67, # 'T' + 85: 114, # 'U' + 86: 119, # 'V' + 87: 95, # 'W' + 88: 99, # 'X' + 89: 109, # 'Y' + 90: 188, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 72, # 'a' + 98: 70, # 'b' + 99: 80, # 'c' + 100: 81, # 'd' + 101: 60, # 'e' + 102: 96, # 'f' + 103: 93, # 'g' + 104: 89, # 'h' + 105: 68, # 'i' + 106: 120, # 'j' + 107: 97, # 'k' + 108: 77, # 'l' + 109: 86, # 'm' + 110: 69, # 'n' + 111: 55, # 'o' + 112: 78, # 'p' + 113: 115, # 'q' + 114: 65, # 'r' + 115: 66, # 's' + 116: 58, # 't' + 117: 76, # 'u' + 118: 106, # 'v' + 119: 103, # 'w' + 120: 87, # 'x' + 121: 107, # 'y' + 122: 112, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 255, # '\x80' + 129: 255, # '\x81' + 130: 255, # '\x82' + 131: 255, # '\x83' + 132: 255, # '\x84' + 133: 255, # '\x85' + 134: 255, # '\x86' + 135: 255, # '\x87' + 136: 255, # '\x88' + 137: 255, # '\x89' + 138: 255, # '\x8a' + 139: 255, # '\x8b' + 140: 255, # '\x8c' + 141: 255, # '\x8d' + 142: 255, # '\x8e' + 143: 255, # '\x8f' + 144: 255, # '\x90' + 145: 255, # '\x91' + 146: 255, # '\x92' + 147: 255, # '\x93' + 148: 255, # '\x94' + 149: 255, # '\x95' + 150: 255, # '\x96' + 151: 255, # '\x97' + 152: 255, # '\x98' + 153: 255, # '\x99' + 154: 255, # '\x9a' + 155: 255, # '\x9b' + 156: 255, # '\x9c' + 157: 255, # '\x9d' + 158: 255, # '\x9e' + 159: 255, # '\x9f' + 160: 253, # '\xa0' + 161: 233, # '‘' + 162: 90, # '’' + 163: 253, # '£' + 164: 253, # '€' + 165: 253, # '₯' + 166: 253, # '¦' + 167: 253, # '§' + 168: 253, # '¨' + 169: 253, # '©' + 170: 253, # 'ͺ' + 171: 253, # '«' + 172: 253, # '¬' + 173: 74, # '\xad' + 174: 253, # None + 175: 253, # '―' + 176: 253, # '°' + 177: 253, # '±' + 178: 253, # '²' + 179: 253, # '³' + 180: 247, # '΄' + 181: 248, # '΅' + 182: 61, # 'Ά' + 183: 36, # '·' + 184: 46, # 'Έ' + 185: 71, # 'Ή' + 186: 73, # 'Ί' + 187: 253, # '»' + 188: 54, # 'Ό' + 189: 253, # '½' + 190: 108, # 'Ύ' + 191: 123, # 'Ώ' + 192: 110, # 'ΐ' + 193: 31, # 'Α' + 194: 51, # 'Β' + 195: 43, # 'Γ' + 196: 41, # 'Δ' + 197: 34, # 'Ε' + 198: 91, # 'Ζ' + 199: 40, # 'Η' + 200: 52, # 'Θ' + 201: 47, # 'Ι' + 202: 44, # 'Κ' + 203: 53, # 'Λ' + 204: 38, # 'Μ' + 205: 49, # 'Ν' + 206: 59, # 'Ξ' + 207: 39, # 'Ο' + 208: 35, # 'Π' + 209: 48, # 'Ρ' + 210: 250, # None + 211: 37, # 'Σ' + 212: 33, # 'Τ' + 213: 45, # 'Υ' + 214: 56, # 'Φ' + 215: 50, # 'Χ' + 216: 84, # 'Ψ' + 217: 57, # 'Ω' + 218: 120, # 'Ϊ' + 219: 121, # 'Ϋ' + 220: 17, # 'ά' + 221: 18, # 'έ' + 222: 22, # 'ή' + 223: 15, # 'ί' + 224: 124, # 'ΰ' + 225: 1, # 'α' + 226: 29, # 'β' + 227: 20, # 'γ' + 228: 21, # 'δ' + 229: 3, # 'ε' + 230: 32, # 'ζ' + 231: 13, # 'η' + 232: 25, # 'θ' + 233: 5, # 'ι' + 234: 11, # 'κ' + 235: 16, # 'λ' + 236: 10, # 'μ' + 237: 6, # 'ν' + 238: 30, # 'ξ' + 239: 4, # 'ο' + 240: 9, # 'π' + 241: 8, # 'ρ' + 242: 14, # 'ς' + 243: 7, # 'σ' + 244: 2, # 'τ' + 245: 12, # 'υ' + 246: 28, # 'φ' + 247: 23, # 'χ' + 248: 42, # 'ψ' + 249: 24, # 'ω' + 250: 64, # 'ϊ' + 251: 75, # 'ϋ' + 252: 19, # 'ό' + 253: 26, # 'ύ' + 254: 27, # 'ώ' + 255: 253, # None } -ISO_8859_7_GREEK_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-7', - language='Greek', - char_to_order_map=ISO_8859_7_GREEK_CHAR_TO_ORDER, - language_model=GREEK_LANG_MODEL, - typical_positive_ratio=0.982851, - keep_ascii_letters=False, - alphabet='ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ') - +ISO_8859_7_GREEK_MODEL = SingleByteCharSetModel( + charset_name="ISO-8859-7", + language="Greek", + char_to_order_map=ISO_8859_7_GREEK_CHAR_TO_ORDER, + language_model=GREEK_LANG_MODEL, + typical_positive_ratio=0.982851, + keep_ascii_letters=False, + alphabet="ΆΈΉΊΌΎΏΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩάέήίαβγδεζηθικλμνξοπρςστυφχψωόύώ", +) diff --git a/src/pip/_vendor/chardet/langhebrewmodel.py b/src/pip/_vendor/chardet/langhebrewmodel.py index 484c652a48e..56d2975877f 100644 --- a/src/pip/_vendor/chardet/langhebrewmodel.py +++ b/src/pip/_vendor/chardet/langhebrewmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from pip._vendor.chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,269 +4111,270 @@ # Character Mapping Table(s): WINDOWS_1255_HEBREW_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 69, # 'A' - 66: 91, # 'B' - 67: 79, # 'C' - 68: 80, # 'D' - 69: 92, # 'E' - 70: 89, # 'F' - 71: 97, # 'G' - 72: 90, # 'H' - 73: 68, # 'I' - 74: 111, # 'J' - 75: 112, # 'K' - 76: 82, # 'L' - 77: 73, # 'M' - 78: 95, # 'N' - 79: 85, # 'O' - 80: 78, # 'P' - 81: 121, # 'Q' - 82: 86, # 'R' - 83: 71, # 'S' - 84: 67, # 'T' - 85: 102, # 'U' - 86: 107, # 'V' - 87: 84, # 'W' - 88: 114, # 'X' - 89: 103, # 'Y' - 90: 115, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 50, # 'a' - 98: 74, # 'b' - 99: 60, # 'c' - 100: 61, # 'd' - 101: 42, # 'e' - 102: 76, # 'f' - 103: 70, # 'g' - 104: 64, # 'h' - 105: 53, # 'i' - 106: 105, # 'j' - 107: 93, # 'k' - 108: 56, # 'l' - 109: 65, # 'm' - 110: 54, # 'n' - 111: 49, # 'o' - 112: 66, # 'p' - 113: 110, # 'q' - 114: 51, # 'r' - 115: 43, # 's' - 116: 44, # 't' - 117: 63, # 'u' - 118: 81, # 'v' - 119: 77, # 'w' - 120: 98, # 'x' - 121: 75, # 'y' - 122: 108, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 124, # '€' - 129: 202, # None - 130: 203, # '‚' - 131: 204, # 'ƒ' - 132: 205, # '„' - 133: 40, # '…' - 134: 58, # '†' - 135: 206, # '‡' - 136: 207, # 'ˆ' - 137: 208, # '‰' - 138: 209, # None - 139: 210, # '‹' - 140: 211, # None - 141: 212, # None - 142: 213, # None - 143: 214, # None - 144: 215, # None - 145: 83, # '‘' - 146: 52, # '’' - 147: 47, # '“' - 148: 46, # '”' - 149: 72, # '•' - 150: 32, # '–' - 151: 94, # '—' - 152: 216, # '˜' - 153: 113, # '™' - 154: 217, # None - 155: 109, # '›' - 156: 218, # None - 157: 219, # None - 158: 220, # None - 159: 221, # None - 160: 34, # '\xa0' - 161: 116, # '¡' - 162: 222, # '¢' - 163: 118, # '£' - 164: 100, # '₪' - 165: 223, # '¥' - 166: 224, # '¦' - 167: 117, # '§' - 168: 119, # '¨' - 169: 104, # '©' - 170: 125, # '×' - 171: 225, # '«' - 172: 226, # '¬' - 173: 87, # '\xad' - 174: 99, # '®' - 175: 227, # '¯' - 176: 106, # '°' - 177: 122, # '±' - 178: 123, # '²' - 179: 228, # '³' - 180: 55, # '´' - 181: 229, # 'µ' - 182: 230, # '¶' - 183: 101, # '·' - 184: 231, # '¸' - 185: 232, # '¹' - 186: 120, # '÷' - 187: 233, # '»' - 188: 48, # '¼' - 189: 39, # '½' - 190: 57, # '¾' - 191: 234, # '¿' - 192: 30, # 'ְ' - 193: 59, # 'ֱ' - 194: 41, # 'ֲ' - 195: 88, # 'ֳ' - 196: 33, # 'ִ' - 197: 37, # 'ֵ' - 198: 36, # 'ֶ' - 199: 31, # 'ַ' - 200: 29, # 'ָ' - 201: 35, # 'ֹ' - 202: 235, # None - 203: 62, # 'ֻ' - 204: 28, # 'ּ' - 205: 236, # 'ֽ' - 206: 126, # '־' - 207: 237, # 'ֿ' - 208: 238, # '׀' - 209: 38, # 'ׁ' - 210: 45, # 'ׂ' - 211: 239, # '׃' - 212: 240, # 'װ' - 213: 241, # 'ױ' - 214: 242, # 'ײ' - 215: 243, # '׳' - 216: 127, # '״' - 217: 244, # None - 218: 245, # None - 219: 246, # None - 220: 247, # None - 221: 248, # None - 222: 249, # None - 223: 250, # None - 224: 9, # 'א' - 225: 8, # 'ב' - 226: 20, # 'ג' - 227: 16, # 'ד' - 228: 3, # 'ה' - 229: 2, # 'ו' - 230: 24, # 'ז' - 231: 14, # 'ח' - 232: 22, # 'ט' - 233: 1, # 'י' - 234: 25, # 'ך' - 235: 15, # 'כ' - 236: 4, # 'ל' - 237: 11, # 'ם' - 238: 6, # 'מ' - 239: 23, # 'ן' - 240: 12, # 'נ' - 241: 19, # 'ס' - 242: 13, # 'ע' - 243: 26, # 'ף' - 244: 18, # 'פ' - 245: 27, # 'ץ' - 246: 21, # 'צ' - 247: 17, # 'ק' - 248: 7, # 'ר' - 249: 10, # 'ש' - 250: 5, # 'ת' - 251: 251, # None - 252: 252, # None - 253: 128, # '\u200e' - 254: 96, # '\u200f' - 255: 253, # None + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 69, # 'A' + 66: 91, # 'B' + 67: 79, # 'C' + 68: 80, # 'D' + 69: 92, # 'E' + 70: 89, # 'F' + 71: 97, # 'G' + 72: 90, # 'H' + 73: 68, # 'I' + 74: 111, # 'J' + 75: 112, # 'K' + 76: 82, # 'L' + 77: 73, # 'M' + 78: 95, # 'N' + 79: 85, # 'O' + 80: 78, # 'P' + 81: 121, # 'Q' + 82: 86, # 'R' + 83: 71, # 'S' + 84: 67, # 'T' + 85: 102, # 'U' + 86: 107, # 'V' + 87: 84, # 'W' + 88: 114, # 'X' + 89: 103, # 'Y' + 90: 115, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 50, # 'a' + 98: 74, # 'b' + 99: 60, # 'c' + 100: 61, # 'd' + 101: 42, # 'e' + 102: 76, # 'f' + 103: 70, # 'g' + 104: 64, # 'h' + 105: 53, # 'i' + 106: 105, # 'j' + 107: 93, # 'k' + 108: 56, # 'l' + 109: 65, # 'm' + 110: 54, # 'n' + 111: 49, # 'o' + 112: 66, # 'p' + 113: 110, # 'q' + 114: 51, # 'r' + 115: 43, # 's' + 116: 44, # 't' + 117: 63, # 'u' + 118: 81, # 'v' + 119: 77, # 'w' + 120: 98, # 'x' + 121: 75, # 'y' + 122: 108, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 124, # '€' + 129: 202, # None + 130: 203, # '‚' + 131: 204, # 'ƒ' + 132: 205, # '„' + 133: 40, # '…' + 134: 58, # '†' + 135: 206, # '‡' + 136: 207, # 'ˆ' + 137: 208, # '‰' + 138: 209, # None + 139: 210, # '‹' + 140: 211, # None + 141: 212, # None + 142: 213, # None + 143: 214, # None + 144: 215, # None + 145: 83, # '‘' + 146: 52, # '’' + 147: 47, # '“' + 148: 46, # '”' + 149: 72, # '•' + 150: 32, # '–' + 151: 94, # '—' + 152: 216, # '˜' + 153: 113, # '™' + 154: 217, # None + 155: 109, # '›' + 156: 218, # None + 157: 219, # None + 158: 220, # None + 159: 221, # None + 160: 34, # '\xa0' + 161: 116, # '¡' + 162: 222, # '¢' + 163: 118, # '£' + 164: 100, # '₪' + 165: 223, # '¥' + 166: 224, # '¦' + 167: 117, # '§' + 168: 119, # '¨' + 169: 104, # '©' + 170: 125, # '×' + 171: 225, # '«' + 172: 226, # '¬' + 173: 87, # '\xad' + 174: 99, # '®' + 175: 227, # '¯' + 176: 106, # '°' + 177: 122, # '±' + 178: 123, # '²' + 179: 228, # '³' + 180: 55, # '´' + 181: 229, # 'µ' + 182: 230, # '¶' + 183: 101, # '·' + 184: 231, # '¸' + 185: 232, # '¹' + 186: 120, # '÷' + 187: 233, # '»' + 188: 48, # '¼' + 189: 39, # '½' + 190: 57, # '¾' + 191: 234, # '¿' + 192: 30, # 'ְ' + 193: 59, # 'ֱ' + 194: 41, # 'ֲ' + 195: 88, # 'ֳ' + 196: 33, # 'ִ' + 197: 37, # 'ֵ' + 198: 36, # 'ֶ' + 199: 31, # 'ַ' + 200: 29, # 'ָ' + 201: 35, # 'ֹ' + 202: 235, # None + 203: 62, # 'ֻ' + 204: 28, # 'ּ' + 205: 236, # 'ֽ' + 206: 126, # '־' + 207: 237, # 'ֿ' + 208: 238, # '׀' + 209: 38, # 'ׁ' + 210: 45, # 'ׂ' + 211: 239, # '׃' + 212: 240, # 'װ' + 213: 241, # 'ױ' + 214: 242, # 'ײ' + 215: 243, # '׳' + 216: 127, # '״' + 217: 244, # None + 218: 245, # None + 219: 246, # None + 220: 247, # None + 221: 248, # None + 222: 249, # None + 223: 250, # None + 224: 9, # 'א' + 225: 8, # 'ב' + 226: 20, # 'ג' + 227: 16, # 'ד' + 228: 3, # 'ה' + 229: 2, # 'ו' + 230: 24, # 'ז' + 231: 14, # 'ח' + 232: 22, # 'ט' + 233: 1, # 'י' + 234: 25, # 'ך' + 235: 15, # 'כ' + 236: 4, # 'ל' + 237: 11, # 'ם' + 238: 6, # 'מ' + 239: 23, # 'ן' + 240: 12, # 'נ' + 241: 19, # 'ס' + 242: 13, # 'ע' + 243: 26, # 'ף' + 244: 18, # 'פ' + 245: 27, # 'ץ' + 246: 21, # 'צ' + 247: 17, # 'ק' + 248: 7, # 'ר' + 249: 10, # 'ש' + 250: 5, # 'ת' + 251: 251, # None + 252: 252, # None + 253: 128, # '\u200e' + 254: 96, # '\u200f' + 255: 253, # None } -WINDOWS_1255_HEBREW_MODEL = SingleByteCharSetModel(charset_name='windows-1255', - language='Hebrew', - char_to_order_map=WINDOWS_1255_HEBREW_CHAR_TO_ORDER, - language_model=HEBREW_LANG_MODEL, - typical_positive_ratio=0.984004, - keep_ascii_letters=False, - alphabet='אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ') - +WINDOWS_1255_HEBREW_MODEL = SingleByteCharSetModel( + charset_name="windows-1255", + language="Hebrew", + char_to_order_map=WINDOWS_1255_HEBREW_CHAR_TO_ORDER, + language_model=HEBREW_LANG_MODEL, + typical_positive_ratio=0.984004, + keep_ascii_letters=False, + alphabet="אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ", +) diff --git a/src/pip/_vendor/chardet/langhungarianmodel.py b/src/pip/_vendor/chardet/langhungarianmodel.py index bbc5cda6441..09a0d326b98 100644 --- a/src/pip/_vendor/chardet/langhungarianmodel.py +++ b/src/pip/_vendor/chardet/langhungarianmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from pip._vendor.chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,536 +4111,539 @@ # Character Mapping Table(s): WINDOWS_1250_HUNGARIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 28, # 'A' - 66: 40, # 'B' - 67: 54, # 'C' - 68: 45, # 'D' - 69: 32, # 'E' - 70: 50, # 'F' - 71: 49, # 'G' - 72: 38, # 'H' - 73: 39, # 'I' - 74: 53, # 'J' - 75: 36, # 'K' - 76: 41, # 'L' - 77: 34, # 'M' - 78: 35, # 'N' - 79: 47, # 'O' - 80: 46, # 'P' - 81: 72, # 'Q' - 82: 43, # 'R' - 83: 33, # 'S' - 84: 37, # 'T' - 85: 57, # 'U' - 86: 48, # 'V' - 87: 64, # 'W' - 88: 68, # 'X' - 89: 55, # 'Y' - 90: 52, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 2, # 'a' - 98: 18, # 'b' - 99: 26, # 'c' - 100: 17, # 'd' - 101: 1, # 'e' - 102: 27, # 'f' - 103: 12, # 'g' - 104: 20, # 'h' - 105: 9, # 'i' - 106: 22, # 'j' - 107: 7, # 'k' - 108: 6, # 'l' - 109: 13, # 'm' - 110: 4, # 'n' - 111: 8, # 'o' - 112: 23, # 'p' - 113: 67, # 'q' - 114: 10, # 'r' - 115: 5, # 's' - 116: 3, # 't' - 117: 21, # 'u' - 118: 19, # 'v' - 119: 65, # 'w' - 120: 62, # 'x' - 121: 16, # 'y' - 122: 11, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 161, # '€' - 129: 162, # None - 130: 163, # '‚' - 131: 164, # None - 132: 165, # '„' - 133: 166, # '…' - 134: 167, # '†' - 135: 168, # '‡' - 136: 169, # None - 137: 170, # '‰' - 138: 171, # 'Š' - 139: 172, # '‹' - 140: 173, # 'Ś' - 141: 174, # 'Ť' - 142: 175, # 'Ž' - 143: 176, # 'Ź' - 144: 177, # None - 145: 178, # '‘' - 146: 179, # '’' - 147: 180, # '“' - 148: 78, # '”' - 149: 181, # '•' - 150: 69, # '–' - 151: 182, # '—' - 152: 183, # None - 153: 184, # '™' - 154: 185, # 'š' - 155: 186, # '›' - 156: 187, # 'ś' - 157: 188, # 'ť' - 158: 189, # 'ž' - 159: 190, # 'ź' - 160: 191, # '\xa0' - 161: 192, # 'ˇ' - 162: 193, # '˘' - 163: 194, # 'Ł' - 164: 195, # '¤' - 165: 196, # 'Ą' - 166: 197, # '¦' - 167: 76, # '§' - 168: 198, # '¨' - 169: 199, # '©' - 170: 200, # 'Ş' - 171: 201, # '«' - 172: 202, # '¬' - 173: 203, # '\xad' - 174: 204, # '®' - 175: 205, # 'Ż' - 176: 81, # '°' - 177: 206, # '±' - 178: 207, # '˛' - 179: 208, # 'ł' - 180: 209, # '´' - 181: 210, # 'µ' - 182: 211, # '¶' - 183: 212, # '·' - 184: 213, # '¸' - 185: 214, # 'ą' - 186: 215, # 'ş' - 187: 216, # '»' - 188: 217, # 'Ľ' - 189: 218, # '˝' - 190: 219, # 'ľ' - 191: 220, # 'ż' - 192: 221, # 'Ŕ' - 193: 51, # 'Á' - 194: 83, # 'Â' - 195: 222, # 'Ă' - 196: 80, # 'Ä' - 197: 223, # 'Ĺ' - 198: 224, # 'Ć' - 199: 225, # 'Ç' - 200: 226, # 'Č' - 201: 44, # 'É' - 202: 227, # 'Ę' - 203: 228, # 'Ë' - 204: 229, # 'Ě' - 205: 61, # 'Í' - 206: 230, # 'Î' - 207: 231, # 'Ď' - 208: 232, # 'Đ' - 209: 233, # 'Ń' - 210: 234, # 'Ň' - 211: 58, # 'Ó' - 212: 235, # 'Ô' - 213: 66, # 'Ő' - 214: 59, # 'Ö' - 215: 236, # '×' - 216: 237, # 'Ř' - 217: 238, # 'Ů' - 218: 60, # 'Ú' - 219: 70, # 'Ű' - 220: 63, # 'Ü' - 221: 239, # 'Ý' - 222: 240, # 'Ţ' - 223: 241, # 'ß' - 224: 84, # 'ŕ' - 225: 14, # 'á' - 226: 75, # 'â' - 227: 242, # 'ă' - 228: 71, # 'ä' - 229: 82, # 'ĺ' - 230: 243, # 'ć' - 231: 73, # 'ç' - 232: 244, # 'č' - 233: 15, # 'é' - 234: 85, # 'ę' - 235: 79, # 'ë' - 236: 86, # 'ě' - 237: 30, # 'í' - 238: 77, # 'î' - 239: 87, # 'ď' - 240: 245, # 'đ' - 241: 246, # 'ń' - 242: 247, # 'ň' - 243: 25, # 'ó' - 244: 74, # 'ô' - 245: 42, # 'ő' - 246: 24, # 'ö' - 247: 248, # '÷' - 248: 249, # 'ř' - 249: 250, # 'ů' - 250: 31, # 'ú' - 251: 56, # 'ű' - 252: 29, # 'ü' - 253: 251, # 'ý' - 254: 252, # 'ţ' - 255: 253, # '˙' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 28, # 'A' + 66: 40, # 'B' + 67: 54, # 'C' + 68: 45, # 'D' + 69: 32, # 'E' + 70: 50, # 'F' + 71: 49, # 'G' + 72: 38, # 'H' + 73: 39, # 'I' + 74: 53, # 'J' + 75: 36, # 'K' + 76: 41, # 'L' + 77: 34, # 'M' + 78: 35, # 'N' + 79: 47, # 'O' + 80: 46, # 'P' + 81: 72, # 'Q' + 82: 43, # 'R' + 83: 33, # 'S' + 84: 37, # 'T' + 85: 57, # 'U' + 86: 48, # 'V' + 87: 64, # 'W' + 88: 68, # 'X' + 89: 55, # 'Y' + 90: 52, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 2, # 'a' + 98: 18, # 'b' + 99: 26, # 'c' + 100: 17, # 'd' + 101: 1, # 'e' + 102: 27, # 'f' + 103: 12, # 'g' + 104: 20, # 'h' + 105: 9, # 'i' + 106: 22, # 'j' + 107: 7, # 'k' + 108: 6, # 'l' + 109: 13, # 'm' + 110: 4, # 'n' + 111: 8, # 'o' + 112: 23, # 'p' + 113: 67, # 'q' + 114: 10, # 'r' + 115: 5, # 's' + 116: 3, # 't' + 117: 21, # 'u' + 118: 19, # 'v' + 119: 65, # 'w' + 120: 62, # 'x' + 121: 16, # 'y' + 122: 11, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 161, # '€' + 129: 162, # None + 130: 163, # '‚' + 131: 164, # None + 132: 165, # '„' + 133: 166, # '…' + 134: 167, # '†' + 135: 168, # '‡' + 136: 169, # None + 137: 170, # '‰' + 138: 171, # 'Š' + 139: 172, # '‹' + 140: 173, # 'Ś' + 141: 174, # 'Ť' + 142: 175, # 'Ž' + 143: 176, # 'Ź' + 144: 177, # None + 145: 178, # '‘' + 146: 179, # '’' + 147: 180, # '“' + 148: 78, # '”' + 149: 181, # '•' + 150: 69, # '–' + 151: 182, # '—' + 152: 183, # None + 153: 184, # '™' + 154: 185, # 'š' + 155: 186, # '›' + 156: 187, # 'ś' + 157: 188, # 'ť' + 158: 189, # 'ž' + 159: 190, # 'ź' + 160: 191, # '\xa0' + 161: 192, # 'ˇ' + 162: 193, # '˘' + 163: 194, # 'Ł' + 164: 195, # '¤' + 165: 196, # 'Ą' + 166: 197, # '¦' + 167: 76, # '§' + 168: 198, # '¨' + 169: 199, # '©' + 170: 200, # 'Ş' + 171: 201, # '«' + 172: 202, # '¬' + 173: 203, # '\xad' + 174: 204, # '®' + 175: 205, # 'Ż' + 176: 81, # '°' + 177: 206, # '±' + 178: 207, # '˛' + 179: 208, # 'ł' + 180: 209, # '´' + 181: 210, # 'µ' + 182: 211, # '¶' + 183: 212, # '·' + 184: 213, # '¸' + 185: 214, # 'ą' + 186: 215, # 'ş' + 187: 216, # '»' + 188: 217, # 'Ľ' + 189: 218, # '˝' + 190: 219, # 'ľ' + 191: 220, # 'ż' + 192: 221, # 'Ŕ' + 193: 51, # 'Á' + 194: 83, # 'Â' + 195: 222, # 'Ă' + 196: 80, # 'Ä' + 197: 223, # 'Ĺ' + 198: 224, # 'Ć' + 199: 225, # 'Ç' + 200: 226, # 'Č' + 201: 44, # 'É' + 202: 227, # 'Ę' + 203: 228, # 'Ë' + 204: 229, # 'Ě' + 205: 61, # 'Í' + 206: 230, # 'Î' + 207: 231, # 'Ď' + 208: 232, # 'Đ' + 209: 233, # 'Ń' + 210: 234, # 'Ň' + 211: 58, # 'Ó' + 212: 235, # 'Ô' + 213: 66, # 'Ő' + 214: 59, # 'Ö' + 215: 236, # '×' + 216: 237, # 'Ř' + 217: 238, # 'Ů' + 218: 60, # 'Ú' + 219: 70, # 'Ű' + 220: 63, # 'Ü' + 221: 239, # 'Ý' + 222: 240, # 'Ţ' + 223: 241, # 'ß' + 224: 84, # 'ŕ' + 225: 14, # 'á' + 226: 75, # 'â' + 227: 242, # 'ă' + 228: 71, # 'ä' + 229: 82, # 'ĺ' + 230: 243, # 'ć' + 231: 73, # 'ç' + 232: 244, # 'č' + 233: 15, # 'é' + 234: 85, # 'ę' + 235: 79, # 'ë' + 236: 86, # 'ě' + 237: 30, # 'í' + 238: 77, # 'î' + 239: 87, # 'ď' + 240: 245, # 'đ' + 241: 246, # 'ń' + 242: 247, # 'ň' + 243: 25, # 'ó' + 244: 74, # 'ô' + 245: 42, # 'ő' + 246: 24, # 'ö' + 247: 248, # '÷' + 248: 249, # 'ř' + 249: 250, # 'ů' + 250: 31, # 'ú' + 251: 56, # 'ű' + 252: 29, # 'ü' + 253: 251, # 'ý' + 254: 252, # 'ţ' + 255: 253, # '˙' } -WINDOWS_1250_HUNGARIAN_MODEL = SingleByteCharSetModel(charset_name='windows-1250', - language='Hungarian', - char_to_order_map=WINDOWS_1250_HUNGARIAN_CHAR_TO_ORDER, - language_model=HUNGARIAN_LANG_MODEL, - typical_positive_ratio=0.947368, - keep_ascii_letters=True, - alphabet='ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű') +WINDOWS_1250_HUNGARIAN_MODEL = SingleByteCharSetModel( + charset_name="windows-1250", + language="Hungarian", + char_to_order_map=WINDOWS_1250_HUNGARIAN_CHAR_TO_ORDER, + language_model=HUNGARIAN_LANG_MODEL, + typical_positive_ratio=0.947368, + keep_ascii_letters=True, + alphabet="ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű", +) ISO_8859_2_HUNGARIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 28, # 'A' - 66: 40, # 'B' - 67: 54, # 'C' - 68: 45, # 'D' - 69: 32, # 'E' - 70: 50, # 'F' - 71: 49, # 'G' - 72: 38, # 'H' - 73: 39, # 'I' - 74: 53, # 'J' - 75: 36, # 'K' - 76: 41, # 'L' - 77: 34, # 'M' - 78: 35, # 'N' - 79: 47, # 'O' - 80: 46, # 'P' - 81: 71, # 'Q' - 82: 43, # 'R' - 83: 33, # 'S' - 84: 37, # 'T' - 85: 57, # 'U' - 86: 48, # 'V' - 87: 64, # 'W' - 88: 68, # 'X' - 89: 55, # 'Y' - 90: 52, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 2, # 'a' - 98: 18, # 'b' - 99: 26, # 'c' - 100: 17, # 'd' - 101: 1, # 'e' - 102: 27, # 'f' - 103: 12, # 'g' - 104: 20, # 'h' - 105: 9, # 'i' - 106: 22, # 'j' - 107: 7, # 'k' - 108: 6, # 'l' - 109: 13, # 'm' - 110: 4, # 'n' - 111: 8, # 'o' - 112: 23, # 'p' - 113: 67, # 'q' - 114: 10, # 'r' - 115: 5, # 's' - 116: 3, # 't' - 117: 21, # 'u' - 118: 19, # 'v' - 119: 65, # 'w' - 120: 62, # 'x' - 121: 16, # 'y' - 122: 11, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 159, # '\x80' - 129: 160, # '\x81' - 130: 161, # '\x82' - 131: 162, # '\x83' - 132: 163, # '\x84' - 133: 164, # '\x85' - 134: 165, # '\x86' - 135: 166, # '\x87' - 136: 167, # '\x88' - 137: 168, # '\x89' - 138: 169, # '\x8a' - 139: 170, # '\x8b' - 140: 171, # '\x8c' - 141: 172, # '\x8d' - 142: 173, # '\x8e' - 143: 174, # '\x8f' - 144: 175, # '\x90' - 145: 176, # '\x91' - 146: 177, # '\x92' - 147: 178, # '\x93' - 148: 179, # '\x94' - 149: 180, # '\x95' - 150: 181, # '\x96' - 151: 182, # '\x97' - 152: 183, # '\x98' - 153: 184, # '\x99' - 154: 185, # '\x9a' - 155: 186, # '\x9b' - 156: 187, # '\x9c' - 157: 188, # '\x9d' - 158: 189, # '\x9e' - 159: 190, # '\x9f' - 160: 191, # '\xa0' - 161: 192, # 'Ą' - 162: 193, # '˘' - 163: 194, # 'Ł' - 164: 195, # '¤' - 165: 196, # 'Ľ' - 166: 197, # 'Ś' - 167: 75, # '§' - 168: 198, # '¨' - 169: 199, # 'Š' - 170: 200, # 'Ş' - 171: 201, # 'Ť' - 172: 202, # 'Ź' - 173: 203, # '\xad' - 174: 204, # 'Ž' - 175: 205, # 'Ż' - 176: 79, # '°' - 177: 206, # 'ą' - 178: 207, # '˛' - 179: 208, # 'ł' - 180: 209, # '´' - 181: 210, # 'ľ' - 182: 211, # 'ś' - 183: 212, # 'ˇ' - 184: 213, # '¸' - 185: 214, # 'š' - 186: 215, # 'ş' - 187: 216, # 'ť' - 188: 217, # 'ź' - 189: 218, # '˝' - 190: 219, # 'ž' - 191: 220, # 'ż' - 192: 221, # 'Ŕ' - 193: 51, # 'Á' - 194: 81, # 'Â' - 195: 222, # 'Ă' - 196: 78, # 'Ä' - 197: 223, # 'Ĺ' - 198: 224, # 'Ć' - 199: 225, # 'Ç' - 200: 226, # 'Č' - 201: 44, # 'É' - 202: 227, # 'Ę' - 203: 228, # 'Ë' - 204: 229, # 'Ě' - 205: 61, # 'Í' - 206: 230, # 'Î' - 207: 231, # 'Ď' - 208: 232, # 'Đ' - 209: 233, # 'Ń' - 210: 234, # 'Ň' - 211: 58, # 'Ó' - 212: 235, # 'Ô' - 213: 66, # 'Ő' - 214: 59, # 'Ö' - 215: 236, # '×' - 216: 237, # 'Ř' - 217: 238, # 'Ů' - 218: 60, # 'Ú' - 219: 69, # 'Ű' - 220: 63, # 'Ü' - 221: 239, # 'Ý' - 222: 240, # 'Ţ' - 223: 241, # 'ß' - 224: 82, # 'ŕ' - 225: 14, # 'á' - 226: 74, # 'â' - 227: 242, # 'ă' - 228: 70, # 'ä' - 229: 80, # 'ĺ' - 230: 243, # 'ć' - 231: 72, # 'ç' - 232: 244, # 'č' - 233: 15, # 'é' - 234: 83, # 'ę' - 235: 77, # 'ë' - 236: 84, # 'ě' - 237: 30, # 'í' - 238: 76, # 'î' - 239: 85, # 'ď' - 240: 245, # 'đ' - 241: 246, # 'ń' - 242: 247, # 'ň' - 243: 25, # 'ó' - 244: 73, # 'ô' - 245: 42, # 'ő' - 246: 24, # 'ö' - 247: 248, # '÷' - 248: 249, # 'ř' - 249: 250, # 'ů' - 250: 31, # 'ú' - 251: 56, # 'ű' - 252: 29, # 'ü' - 253: 251, # 'ý' - 254: 252, # 'ţ' - 255: 253, # '˙' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 28, # 'A' + 66: 40, # 'B' + 67: 54, # 'C' + 68: 45, # 'D' + 69: 32, # 'E' + 70: 50, # 'F' + 71: 49, # 'G' + 72: 38, # 'H' + 73: 39, # 'I' + 74: 53, # 'J' + 75: 36, # 'K' + 76: 41, # 'L' + 77: 34, # 'M' + 78: 35, # 'N' + 79: 47, # 'O' + 80: 46, # 'P' + 81: 71, # 'Q' + 82: 43, # 'R' + 83: 33, # 'S' + 84: 37, # 'T' + 85: 57, # 'U' + 86: 48, # 'V' + 87: 64, # 'W' + 88: 68, # 'X' + 89: 55, # 'Y' + 90: 52, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 2, # 'a' + 98: 18, # 'b' + 99: 26, # 'c' + 100: 17, # 'd' + 101: 1, # 'e' + 102: 27, # 'f' + 103: 12, # 'g' + 104: 20, # 'h' + 105: 9, # 'i' + 106: 22, # 'j' + 107: 7, # 'k' + 108: 6, # 'l' + 109: 13, # 'm' + 110: 4, # 'n' + 111: 8, # 'o' + 112: 23, # 'p' + 113: 67, # 'q' + 114: 10, # 'r' + 115: 5, # 's' + 116: 3, # 't' + 117: 21, # 'u' + 118: 19, # 'v' + 119: 65, # 'w' + 120: 62, # 'x' + 121: 16, # 'y' + 122: 11, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 159, # '\x80' + 129: 160, # '\x81' + 130: 161, # '\x82' + 131: 162, # '\x83' + 132: 163, # '\x84' + 133: 164, # '\x85' + 134: 165, # '\x86' + 135: 166, # '\x87' + 136: 167, # '\x88' + 137: 168, # '\x89' + 138: 169, # '\x8a' + 139: 170, # '\x8b' + 140: 171, # '\x8c' + 141: 172, # '\x8d' + 142: 173, # '\x8e' + 143: 174, # '\x8f' + 144: 175, # '\x90' + 145: 176, # '\x91' + 146: 177, # '\x92' + 147: 178, # '\x93' + 148: 179, # '\x94' + 149: 180, # '\x95' + 150: 181, # '\x96' + 151: 182, # '\x97' + 152: 183, # '\x98' + 153: 184, # '\x99' + 154: 185, # '\x9a' + 155: 186, # '\x9b' + 156: 187, # '\x9c' + 157: 188, # '\x9d' + 158: 189, # '\x9e' + 159: 190, # '\x9f' + 160: 191, # '\xa0' + 161: 192, # 'Ą' + 162: 193, # '˘' + 163: 194, # 'Ł' + 164: 195, # '¤' + 165: 196, # 'Ľ' + 166: 197, # 'Ś' + 167: 75, # '§' + 168: 198, # '¨' + 169: 199, # 'Š' + 170: 200, # 'Ş' + 171: 201, # 'Ť' + 172: 202, # 'Ź' + 173: 203, # '\xad' + 174: 204, # 'Ž' + 175: 205, # 'Ż' + 176: 79, # '°' + 177: 206, # 'ą' + 178: 207, # '˛' + 179: 208, # 'ł' + 180: 209, # '´' + 181: 210, # 'ľ' + 182: 211, # 'ś' + 183: 212, # 'ˇ' + 184: 213, # '¸' + 185: 214, # 'š' + 186: 215, # 'ş' + 187: 216, # 'ť' + 188: 217, # 'ź' + 189: 218, # '˝' + 190: 219, # 'ž' + 191: 220, # 'ż' + 192: 221, # 'Ŕ' + 193: 51, # 'Á' + 194: 81, # 'Â' + 195: 222, # 'Ă' + 196: 78, # 'Ä' + 197: 223, # 'Ĺ' + 198: 224, # 'Ć' + 199: 225, # 'Ç' + 200: 226, # 'Č' + 201: 44, # 'É' + 202: 227, # 'Ę' + 203: 228, # 'Ë' + 204: 229, # 'Ě' + 205: 61, # 'Í' + 206: 230, # 'Î' + 207: 231, # 'Ď' + 208: 232, # 'Đ' + 209: 233, # 'Ń' + 210: 234, # 'Ň' + 211: 58, # 'Ó' + 212: 235, # 'Ô' + 213: 66, # 'Ő' + 214: 59, # 'Ö' + 215: 236, # '×' + 216: 237, # 'Ř' + 217: 238, # 'Ů' + 218: 60, # 'Ú' + 219: 69, # 'Ű' + 220: 63, # 'Ü' + 221: 239, # 'Ý' + 222: 240, # 'Ţ' + 223: 241, # 'ß' + 224: 82, # 'ŕ' + 225: 14, # 'á' + 226: 74, # 'â' + 227: 242, # 'ă' + 228: 70, # 'ä' + 229: 80, # 'ĺ' + 230: 243, # 'ć' + 231: 72, # 'ç' + 232: 244, # 'č' + 233: 15, # 'é' + 234: 83, # 'ę' + 235: 77, # 'ë' + 236: 84, # 'ě' + 237: 30, # 'í' + 238: 76, # 'î' + 239: 85, # 'ď' + 240: 245, # 'đ' + 241: 246, # 'ń' + 242: 247, # 'ň' + 243: 25, # 'ó' + 244: 73, # 'ô' + 245: 42, # 'ő' + 246: 24, # 'ö' + 247: 248, # '÷' + 248: 249, # 'ř' + 249: 250, # 'ů' + 250: 31, # 'ú' + 251: 56, # 'ű' + 252: 29, # 'ü' + 253: 251, # 'ý' + 254: 252, # 'ţ' + 255: 253, # '˙' } -ISO_8859_2_HUNGARIAN_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-2', - language='Hungarian', - char_to_order_map=ISO_8859_2_HUNGARIAN_CHAR_TO_ORDER, - language_model=HUNGARIAN_LANG_MODEL, - typical_positive_ratio=0.947368, - keep_ascii_letters=True, - alphabet='ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű') - +ISO_8859_2_HUNGARIAN_MODEL = SingleByteCharSetModel( + charset_name="ISO-8859-2", + language="Hungarian", + char_to_order_map=ISO_8859_2_HUNGARIAN_CHAR_TO_ORDER, + language_model=HUNGARIAN_LANG_MODEL, + typical_positive_ratio=0.947368, + keep_ascii_letters=True, + alphabet="ABCDEFGHIJKLMNOPRSTUVZabcdefghijklmnoprstuvzÁÉÍÓÖÚÜáéíóöúüŐőŰű", +) diff --git a/src/pip/_vendor/chardet/langrussianmodel.py b/src/pip/_vendor/chardet/langrussianmodel.py index 5594452b55b..39a5388948e 100644 --- a/src/pip/_vendor/chardet/langrussianmodel.py +++ b/src/pip/_vendor/chardet/langrussianmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from pip._vendor.chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,1604 +4111,1615 @@ # Character Mapping Table(s): IBM866_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 37, # 'А' - 129: 44, # 'Б' - 130: 33, # 'В' - 131: 46, # 'Г' - 132: 41, # 'Д' - 133: 48, # 'Е' - 134: 56, # 'Ж' - 135: 51, # 'З' - 136: 42, # 'И' - 137: 60, # 'Й' - 138: 36, # 'К' - 139: 49, # 'Л' - 140: 38, # 'М' - 141: 31, # 'Н' - 142: 34, # 'О' - 143: 35, # 'П' - 144: 45, # 'Р' - 145: 32, # 'С' - 146: 40, # 'Т' - 147: 52, # 'У' - 148: 53, # 'Ф' - 149: 55, # 'Х' - 150: 58, # 'Ц' - 151: 50, # 'Ч' - 152: 57, # 'Ш' - 153: 63, # 'Щ' - 154: 70, # 'Ъ' - 155: 62, # 'Ы' - 156: 61, # 'Ь' - 157: 47, # 'Э' - 158: 59, # 'Ю' - 159: 43, # 'Я' - 160: 3, # 'а' - 161: 21, # 'б' - 162: 10, # 'в' - 163: 19, # 'г' - 164: 13, # 'д' - 165: 2, # 'е' - 166: 24, # 'ж' - 167: 20, # 'з' - 168: 4, # 'и' - 169: 23, # 'й' - 170: 11, # 'к' - 171: 8, # 'л' - 172: 12, # 'м' - 173: 5, # 'н' - 174: 1, # 'о' - 175: 15, # 'п' - 176: 191, # '░' - 177: 192, # '▒' - 178: 193, # '▓' - 179: 194, # '│' - 180: 195, # '┤' - 181: 196, # '╡' - 182: 197, # '╢' - 183: 198, # '╖' - 184: 199, # '╕' - 185: 200, # '╣' - 186: 201, # '║' - 187: 202, # '╗' - 188: 203, # '╝' - 189: 204, # '╜' - 190: 205, # '╛' - 191: 206, # '┐' - 192: 207, # '└' - 193: 208, # '┴' - 194: 209, # '┬' - 195: 210, # '├' - 196: 211, # '─' - 197: 212, # '┼' - 198: 213, # '╞' - 199: 214, # '╟' - 200: 215, # '╚' - 201: 216, # '╔' - 202: 217, # '╩' - 203: 218, # '╦' - 204: 219, # '╠' - 205: 220, # '═' - 206: 221, # '╬' - 207: 222, # '╧' - 208: 223, # '╨' - 209: 224, # '╤' - 210: 225, # '╥' - 211: 226, # '╙' - 212: 227, # '╘' - 213: 228, # '╒' - 214: 229, # '╓' - 215: 230, # '╫' - 216: 231, # '╪' - 217: 232, # '┘' - 218: 233, # '┌' - 219: 234, # '█' - 220: 235, # '▄' - 221: 236, # '▌' - 222: 237, # '▐' - 223: 238, # '▀' - 224: 9, # 'р' - 225: 7, # 'с' - 226: 6, # 'т' - 227: 14, # 'у' - 228: 39, # 'ф' - 229: 26, # 'х' - 230: 28, # 'ц' - 231: 22, # 'ч' - 232: 25, # 'ш' - 233: 29, # 'щ' - 234: 54, # 'ъ' - 235: 18, # 'ы' - 236: 17, # 'ь' - 237: 30, # 'э' - 238: 27, # 'ю' - 239: 16, # 'я' - 240: 239, # 'Ё' - 241: 68, # 'ё' - 242: 240, # 'Є' - 243: 241, # 'є' - 244: 242, # 'Ї' - 245: 243, # 'ї' - 246: 244, # 'Ў' - 247: 245, # 'ў' - 248: 246, # '°' - 249: 247, # '∙' - 250: 248, # '·' - 251: 249, # '√' - 252: 250, # '№' - 253: 251, # '¤' - 254: 252, # '■' - 255: 255, # '\xa0' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 37, # 'А' + 129: 44, # 'Б' + 130: 33, # 'В' + 131: 46, # 'Г' + 132: 41, # 'Д' + 133: 48, # 'Е' + 134: 56, # 'Ж' + 135: 51, # 'З' + 136: 42, # 'И' + 137: 60, # 'Й' + 138: 36, # 'К' + 139: 49, # 'Л' + 140: 38, # 'М' + 141: 31, # 'Н' + 142: 34, # 'О' + 143: 35, # 'П' + 144: 45, # 'Р' + 145: 32, # 'С' + 146: 40, # 'Т' + 147: 52, # 'У' + 148: 53, # 'Ф' + 149: 55, # 'Х' + 150: 58, # 'Ц' + 151: 50, # 'Ч' + 152: 57, # 'Ш' + 153: 63, # 'Щ' + 154: 70, # 'Ъ' + 155: 62, # 'Ы' + 156: 61, # 'Ь' + 157: 47, # 'Э' + 158: 59, # 'Ю' + 159: 43, # 'Я' + 160: 3, # 'а' + 161: 21, # 'б' + 162: 10, # 'в' + 163: 19, # 'г' + 164: 13, # 'д' + 165: 2, # 'е' + 166: 24, # 'ж' + 167: 20, # 'з' + 168: 4, # 'и' + 169: 23, # 'й' + 170: 11, # 'к' + 171: 8, # 'л' + 172: 12, # 'м' + 173: 5, # 'н' + 174: 1, # 'о' + 175: 15, # 'п' + 176: 191, # '░' + 177: 192, # '▒' + 178: 193, # '▓' + 179: 194, # '│' + 180: 195, # '┤' + 181: 196, # '╡' + 182: 197, # '╢' + 183: 198, # '╖' + 184: 199, # '╕' + 185: 200, # '╣' + 186: 201, # '║' + 187: 202, # '╗' + 188: 203, # '╝' + 189: 204, # '╜' + 190: 205, # '╛' + 191: 206, # '┐' + 192: 207, # '└' + 193: 208, # '┴' + 194: 209, # '┬' + 195: 210, # '├' + 196: 211, # '─' + 197: 212, # '┼' + 198: 213, # '╞' + 199: 214, # '╟' + 200: 215, # '╚' + 201: 216, # '╔' + 202: 217, # '╩' + 203: 218, # '╦' + 204: 219, # '╠' + 205: 220, # '═' + 206: 221, # '╬' + 207: 222, # '╧' + 208: 223, # '╨' + 209: 224, # '╤' + 210: 225, # '╥' + 211: 226, # '╙' + 212: 227, # '╘' + 213: 228, # '╒' + 214: 229, # '╓' + 215: 230, # '╫' + 216: 231, # '╪' + 217: 232, # '┘' + 218: 233, # '┌' + 219: 234, # '█' + 220: 235, # '▄' + 221: 236, # '▌' + 222: 237, # '▐' + 223: 238, # '▀' + 224: 9, # 'р' + 225: 7, # 'с' + 226: 6, # 'т' + 227: 14, # 'у' + 228: 39, # 'ф' + 229: 26, # 'х' + 230: 28, # 'ц' + 231: 22, # 'ч' + 232: 25, # 'ш' + 233: 29, # 'щ' + 234: 54, # 'ъ' + 235: 18, # 'ы' + 236: 17, # 'ь' + 237: 30, # 'э' + 238: 27, # 'ю' + 239: 16, # 'я' + 240: 239, # 'Ё' + 241: 68, # 'ё' + 242: 240, # 'Є' + 243: 241, # 'є' + 244: 242, # 'Ї' + 245: 243, # 'ї' + 246: 244, # 'Ў' + 247: 245, # 'ў' + 248: 246, # '°' + 249: 247, # '∙' + 250: 248, # '·' + 251: 249, # '√' + 252: 250, # '№' + 253: 251, # '¤' + 254: 252, # '■' + 255: 255, # '\xa0' } -IBM866_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='IBM866', - language='Russian', - char_to_order_map=IBM866_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') +IBM866_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="IBM866", + language="Russian", + char_to_order_map=IBM866_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) WINDOWS_1251_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 191, # 'Ђ' - 129: 192, # 'Ѓ' - 130: 193, # '‚' - 131: 194, # 'ѓ' - 132: 195, # '„' - 133: 196, # '…' - 134: 197, # '†' - 135: 198, # '‡' - 136: 199, # '€' - 137: 200, # '‰' - 138: 201, # 'Љ' - 139: 202, # '‹' - 140: 203, # 'Њ' - 141: 204, # 'Ќ' - 142: 205, # 'Ћ' - 143: 206, # 'Џ' - 144: 207, # 'ђ' - 145: 208, # '‘' - 146: 209, # '’' - 147: 210, # '“' - 148: 211, # '”' - 149: 212, # '•' - 150: 213, # '–' - 151: 214, # '—' - 152: 215, # None - 153: 216, # '™' - 154: 217, # 'љ' - 155: 218, # '›' - 156: 219, # 'њ' - 157: 220, # 'ќ' - 158: 221, # 'ћ' - 159: 222, # 'џ' - 160: 223, # '\xa0' - 161: 224, # 'Ў' - 162: 225, # 'ў' - 163: 226, # 'Ј' - 164: 227, # '¤' - 165: 228, # 'Ґ' - 166: 229, # '¦' - 167: 230, # '§' - 168: 231, # 'Ё' - 169: 232, # '©' - 170: 233, # 'Є' - 171: 234, # '«' - 172: 235, # '¬' - 173: 236, # '\xad' - 174: 237, # '®' - 175: 238, # 'Ї' - 176: 239, # '°' - 177: 240, # '±' - 178: 241, # 'І' - 179: 242, # 'і' - 180: 243, # 'ґ' - 181: 244, # 'µ' - 182: 245, # '¶' - 183: 246, # '·' - 184: 68, # 'ё' - 185: 247, # '№' - 186: 248, # 'є' - 187: 249, # '»' - 188: 250, # 'ј' - 189: 251, # 'Ѕ' - 190: 252, # 'ѕ' - 191: 253, # 'ї' - 192: 37, # 'А' - 193: 44, # 'Б' - 194: 33, # 'В' - 195: 46, # 'Г' - 196: 41, # 'Д' - 197: 48, # 'Е' - 198: 56, # 'Ж' - 199: 51, # 'З' - 200: 42, # 'И' - 201: 60, # 'Й' - 202: 36, # 'К' - 203: 49, # 'Л' - 204: 38, # 'М' - 205: 31, # 'Н' - 206: 34, # 'О' - 207: 35, # 'П' - 208: 45, # 'Р' - 209: 32, # 'С' - 210: 40, # 'Т' - 211: 52, # 'У' - 212: 53, # 'Ф' - 213: 55, # 'Х' - 214: 58, # 'Ц' - 215: 50, # 'Ч' - 216: 57, # 'Ш' - 217: 63, # 'Щ' - 218: 70, # 'Ъ' - 219: 62, # 'Ы' - 220: 61, # 'Ь' - 221: 47, # 'Э' - 222: 59, # 'Ю' - 223: 43, # 'Я' - 224: 3, # 'а' - 225: 21, # 'б' - 226: 10, # 'в' - 227: 19, # 'г' - 228: 13, # 'д' - 229: 2, # 'е' - 230: 24, # 'ж' - 231: 20, # 'з' - 232: 4, # 'и' - 233: 23, # 'й' - 234: 11, # 'к' - 235: 8, # 'л' - 236: 12, # 'м' - 237: 5, # 'н' - 238: 1, # 'о' - 239: 15, # 'п' - 240: 9, # 'р' - 241: 7, # 'с' - 242: 6, # 'т' - 243: 14, # 'у' - 244: 39, # 'ф' - 245: 26, # 'х' - 246: 28, # 'ц' - 247: 22, # 'ч' - 248: 25, # 'ш' - 249: 29, # 'щ' - 250: 54, # 'ъ' - 251: 18, # 'ы' - 252: 17, # 'ь' - 253: 30, # 'э' - 254: 27, # 'ю' - 255: 16, # 'я' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 191, # 'Ђ' + 129: 192, # 'Ѓ' + 130: 193, # '‚' + 131: 194, # 'ѓ' + 132: 195, # '„' + 133: 196, # '…' + 134: 197, # '†' + 135: 198, # '‡' + 136: 199, # '€' + 137: 200, # '‰' + 138: 201, # 'Љ' + 139: 202, # '‹' + 140: 203, # 'Њ' + 141: 204, # 'Ќ' + 142: 205, # 'Ћ' + 143: 206, # 'Џ' + 144: 207, # 'ђ' + 145: 208, # '‘' + 146: 209, # '’' + 147: 210, # '“' + 148: 211, # '”' + 149: 212, # '•' + 150: 213, # '–' + 151: 214, # '—' + 152: 215, # None + 153: 216, # '™' + 154: 217, # 'љ' + 155: 218, # '›' + 156: 219, # 'њ' + 157: 220, # 'ќ' + 158: 221, # 'ћ' + 159: 222, # 'џ' + 160: 223, # '\xa0' + 161: 224, # 'Ў' + 162: 225, # 'ў' + 163: 226, # 'Ј' + 164: 227, # '¤' + 165: 228, # 'Ґ' + 166: 229, # '¦' + 167: 230, # '§' + 168: 231, # 'Ё' + 169: 232, # '©' + 170: 233, # 'Є' + 171: 234, # '«' + 172: 235, # '¬' + 173: 236, # '\xad' + 174: 237, # '®' + 175: 238, # 'Ї' + 176: 239, # '°' + 177: 240, # '±' + 178: 241, # 'І' + 179: 242, # 'і' + 180: 243, # 'ґ' + 181: 244, # 'µ' + 182: 245, # '¶' + 183: 246, # '·' + 184: 68, # 'ё' + 185: 247, # '№' + 186: 248, # 'є' + 187: 249, # '»' + 188: 250, # 'ј' + 189: 251, # 'Ѕ' + 190: 252, # 'ѕ' + 191: 253, # 'ї' + 192: 37, # 'А' + 193: 44, # 'Б' + 194: 33, # 'В' + 195: 46, # 'Г' + 196: 41, # 'Д' + 197: 48, # 'Е' + 198: 56, # 'Ж' + 199: 51, # 'З' + 200: 42, # 'И' + 201: 60, # 'Й' + 202: 36, # 'К' + 203: 49, # 'Л' + 204: 38, # 'М' + 205: 31, # 'Н' + 206: 34, # 'О' + 207: 35, # 'П' + 208: 45, # 'Р' + 209: 32, # 'С' + 210: 40, # 'Т' + 211: 52, # 'У' + 212: 53, # 'Ф' + 213: 55, # 'Х' + 214: 58, # 'Ц' + 215: 50, # 'Ч' + 216: 57, # 'Ш' + 217: 63, # 'Щ' + 218: 70, # 'Ъ' + 219: 62, # 'Ы' + 220: 61, # 'Ь' + 221: 47, # 'Э' + 222: 59, # 'Ю' + 223: 43, # 'Я' + 224: 3, # 'а' + 225: 21, # 'б' + 226: 10, # 'в' + 227: 19, # 'г' + 228: 13, # 'д' + 229: 2, # 'е' + 230: 24, # 'ж' + 231: 20, # 'з' + 232: 4, # 'и' + 233: 23, # 'й' + 234: 11, # 'к' + 235: 8, # 'л' + 236: 12, # 'м' + 237: 5, # 'н' + 238: 1, # 'о' + 239: 15, # 'п' + 240: 9, # 'р' + 241: 7, # 'с' + 242: 6, # 'т' + 243: 14, # 'у' + 244: 39, # 'ф' + 245: 26, # 'х' + 246: 28, # 'ц' + 247: 22, # 'ч' + 248: 25, # 'ш' + 249: 29, # 'щ' + 250: 54, # 'ъ' + 251: 18, # 'ы' + 252: 17, # 'ь' + 253: 30, # 'э' + 254: 27, # 'ю' + 255: 16, # 'я' } -WINDOWS_1251_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='windows-1251', - language='Russian', - char_to_order_map=WINDOWS_1251_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') +WINDOWS_1251_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="windows-1251", + language="Russian", + char_to_order_map=WINDOWS_1251_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) IBM855_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 191, # 'ђ' - 129: 192, # 'Ђ' - 130: 193, # 'ѓ' - 131: 194, # 'Ѓ' - 132: 68, # 'ё' - 133: 195, # 'Ё' - 134: 196, # 'є' - 135: 197, # 'Є' - 136: 198, # 'ѕ' - 137: 199, # 'Ѕ' - 138: 200, # 'і' - 139: 201, # 'І' - 140: 202, # 'ї' - 141: 203, # 'Ї' - 142: 204, # 'ј' - 143: 205, # 'Ј' - 144: 206, # 'љ' - 145: 207, # 'Љ' - 146: 208, # 'њ' - 147: 209, # 'Њ' - 148: 210, # 'ћ' - 149: 211, # 'Ћ' - 150: 212, # 'ќ' - 151: 213, # 'Ќ' - 152: 214, # 'ў' - 153: 215, # 'Ў' - 154: 216, # 'џ' - 155: 217, # 'Џ' - 156: 27, # 'ю' - 157: 59, # 'Ю' - 158: 54, # 'ъ' - 159: 70, # 'Ъ' - 160: 3, # 'а' - 161: 37, # 'А' - 162: 21, # 'б' - 163: 44, # 'Б' - 164: 28, # 'ц' - 165: 58, # 'Ц' - 166: 13, # 'д' - 167: 41, # 'Д' - 168: 2, # 'е' - 169: 48, # 'Е' - 170: 39, # 'ф' - 171: 53, # 'Ф' - 172: 19, # 'г' - 173: 46, # 'Г' - 174: 218, # '«' - 175: 219, # '»' - 176: 220, # '░' - 177: 221, # '▒' - 178: 222, # '▓' - 179: 223, # '│' - 180: 224, # '┤' - 181: 26, # 'х' - 182: 55, # 'Х' - 183: 4, # 'и' - 184: 42, # 'И' - 185: 225, # '╣' - 186: 226, # '║' - 187: 227, # '╗' - 188: 228, # '╝' - 189: 23, # 'й' - 190: 60, # 'Й' - 191: 229, # '┐' - 192: 230, # '└' - 193: 231, # '┴' - 194: 232, # '┬' - 195: 233, # '├' - 196: 234, # '─' - 197: 235, # '┼' - 198: 11, # 'к' - 199: 36, # 'К' - 200: 236, # '╚' - 201: 237, # '╔' - 202: 238, # '╩' - 203: 239, # '╦' - 204: 240, # '╠' - 205: 241, # '═' - 206: 242, # '╬' - 207: 243, # '¤' - 208: 8, # 'л' - 209: 49, # 'Л' - 210: 12, # 'м' - 211: 38, # 'М' - 212: 5, # 'н' - 213: 31, # 'Н' - 214: 1, # 'о' - 215: 34, # 'О' - 216: 15, # 'п' - 217: 244, # '┘' - 218: 245, # '┌' - 219: 246, # '█' - 220: 247, # '▄' - 221: 35, # 'П' - 222: 16, # 'я' - 223: 248, # '▀' - 224: 43, # 'Я' - 225: 9, # 'р' - 226: 45, # 'Р' - 227: 7, # 'с' - 228: 32, # 'С' - 229: 6, # 'т' - 230: 40, # 'Т' - 231: 14, # 'у' - 232: 52, # 'У' - 233: 24, # 'ж' - 234: 56, # 'Ж' - 235: 10, # 'в' - 236: 33, # 'В' - 237: 17, # 'ь' - 238: 61, # 'Ь' - 239: 249, # '№' - 240: 250, # '\xad' - 241: 18, # 'ы' - 242: 62, # 'Ы' - 243: 20, # 'з' - 244: 51, # 'З' - 245: 25, # 'ш' - 246: 57, # 'Ш' - 247: 30, # 'э' - 248: 47, # 'Э' - 249: 29, # 'щ' - 250: 63, # 'Щ' - 251: 22, # 'ч' - 252: 50, # 'Ч' - 253: 251, # '§' - 254: 252, # '■' - 255: 255, # '\xa0' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 191, # 'ђ' + 129: 192, # 'Ђ' + 130: 193, # 'ѓ' + 131: 194, # 'Ѓ' + 132: 68, # 'ё' + 133: 195, # 'Ё' + 134: 196, # 'є' + 135: 197, # 'Є' + 136: 198, # 'ѕ' + 137: 199, # 'Ѕ' + 138: 200, # 'і' + 139: 201, # 'І' + 140: 202, # 'ї' + 141: 203, # 'Ї' + 142: 204, # 'ј' + 143: 205, # 'Ј' + 144: 206, # 'љ' + 145: 207, # 'Љ' + 146: 208, # 'њ' + 147: 209, # 'Њ' + 148: 210, # 'ћ' + 149: 211, # 'Ћ' + 150: 212, # 'ќ' + 151: 213, # 'Ќ' + 152: 214, # 'ў' + 153: 215, # 'Ў' + 154: 216, # 'џ' + 155: 217, # 'Џ' + 156: 27, # 'ю' + 157: 59, # 'Ю' + 158: 54, # 'ъ' + 159: 70, # 'Ъ' + 160: 3, # 'а' + 161: 37, # 'А' + 162: 21, # 'б' + 163: 44, # 'Б' + 164: 28, # 'ц' + 165: 58, # 'Ц' + 166: 13, # 'д' + 167: 41, # 'Д' + 168: 2, # 'е' + 169: 48, # 'Е' + 170: 39, # 'ф' + 171: 53, # 'Ф' + 172: 19, # 'г' + 173: 46, # 'Г' + 174: 218, # '«' + 175: 219, # '»' + 176: 220, # '░' + 177: 221, # '▒' + 178: 222, # '▓' + 179: 223, # '│' + 180: 224, # '┤' + 181: 26, # 'х' + 182: 55, # 'Х' + 183: 4, # 'и' + 184: 42, # 'И' + 185: 225, # '╣' + 186: 226, # '║' + 187: 227, # '╗' + 188: 228, # '╝' + 189: 23, # 'й' + 190: 60, # 'Й' + 191: 229, # '┐' + 192: 230, # '└' + 193: 231, # '┴' + 194: 232, # '┬' + 195: 233, # '├' + 196: 234, # '─' + 197: 235, # '┼' + 198: 11, # 'к' + 199: 36, # 'К' + 200: 236, # '╚' + 201: 237, # '╔' + 202: 238, # '╩' + 203: 239, # '╦' + 204: 240, # '╠' + 205: 241, # '═' + 206: 242, # '╬' + 207: 243, # '¤' + 208: 8, # 'л' + 209: 49, # 'Л' + 210: 12, # 'м' + 211: 38, # 'М' + 212: 5, # 'н' + 213: 31, # 'Н' + 214: 1, # 'о' + 215: 34, # 'О' + 216: 15, # 'п' + 217: 244, # '┘' + 218: 245, # '┌' + 219: 246, # '█' + 220: 247, # '▄' + 221: 35, # 'П' + 222: 16, # 'я' + 223: 248, # '▀' + 224: 43, # 'Я' + 225: 9, # 'р' + 226: 45, # 'Р' + 227: 7, # 'с' + 228: 32, # 'С' + 229: 6, # 'т' + 230: 40, # 'Т' + 231: 14, # 'у' + 232: 52, # 'У' + 233: 24, # 'ж' + 234: 56, # 'Ж' + 235: 10, # 'в' + 236: 33, # 'В' + 237: 17, # 'ь' + 238: 61, # 'Ь' + 239: 249, # '№' + 240: 250, # '\xad' + 241: 18, # 'ы' + 242: 62, # 'Ы' + 243: 20, # 'з' + 244: 51, # 'З' + 245: 25, # 'ш' + 246: 57, # 'Ш' + 247: 30, # 'э' + 248: 47, # 'Э' + 249: 29, # 'щ' + 250: 63, # 'Щ' + 251: 22, # 'ч' + 252: 50, # 'Ч' + 253: 251, # '§' + 254: 252, # '■' + 255: 255, # '\xa0' } -IBM855_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='IBM855', - language='Russian', - char_to_order_map=IBM855_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') +IBM855_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="IBM855", + language="Russian", + char_to_order_map=IBM855_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) KOI8_R_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 191, # '─' - 129: 192, # '│' - 130: 193, # '┌' - 131: 194, # '┐' - 132: 195, # '└' - 133: 196, # '┘' - 134: 197, # '├' - 135: 198, # '┤' - 136: 199, # '┬' - 137: 200, # '┴' - 138: 201, # '┼' - 139: 202, # '▀' - 140: 203, # '▄' - 141: 204, # '█' - 142: 205, # '▌' - 143: 206, # '▐' - 144: 207, # '░' - 145: 208, # '▒' - 146: 209, # '▓' - 147: 210, # '⌠' - 148: 211, # '■' - 149: 212, # '∙' - 150: 213, # '√' - 151: 214, # '≈' - 152: 215, # '≤' - 153: 216, # '≥' - 154: 217, # '\xa0' - 155: 218, # '⌡' - 156: 219, # '°' - 157: 220, # '²' - 158: 221, # '·' - 159: 222, # '÷' - 160: 223, # '═' - 161: 224, # '║' - 162: 225, # '╒' - 163: 68, # 'ё' - 164: 226, # '╓' - 165: 227, # '╔' - 166: 228, # '╕' - 167: 229, # '╖' - 168: 230, # '╗' - 169: 231, # '╘' - 170: 232, # '╙' - 171: 233, # '╚' - 172: 234, # '╛' - 173: 235, # '╜' - 174: 236, # '╝' - 175: 237, # '╞' - 176: 238, # '╟' - 177: 239, # '╠' - 178: 240, # '╡' - 179: 241, # 'Ё' - 180: 242, # '╢' - 181: 243, # '╣' - 182: 244, # '╤' - 183: 245, # '╥' - 184: 246, # '╦' - 185: 247, # '╧' - 186: 248, # '╨' - 187: 249, # '╩' - 188: 250, # '╪' - 189: 251, # '╫' - 190: 252, # '╬' - 191: 253, # '©' - 192: 27, # 'ю' - 193: 3, # 'а' - 194: 21, # 'б' - 195: 28, # 'ц' - 196: 13, # 'д' - 197: 2, # 'е' - 198: 39, # 'ф' - 199: 19, # 'г' - 200: 26, # 'х' - 201: 4, # 'и' - 202: 23, # 'й' - 203: 11, # 'к' - 204: 8, # 'л' - 205: 12, # 'м' - 206: 5, # 'н' - 207: 1, # 'о' - 208: 15, # 'п' - 209: 16, # 'я' - 210: 9, # 'р' - 211: 7, # 'с' - 212: 6, # 'т' - 213: 14, # 'у' - 214: 24, # 'ж' - 215: 10, # 'в' - 216: 17, # 'ь' - 217: 18, # 'ы' - 218: 20, # 'з' - 219: 25, # 'ш' - 220: 30, # 'э' - 221: 29, # 'щ' - 222: 22, # 'ч' - 223: 54, # 'ъ' - 224: 59, # 'Ю' - 225: 37, # 'А' - 226: 44, # 'Б' - 227: 58, # 'Ц' - 228: 41, # 'Д' - 229: 48, # 'Е' - 230: 53, # 'Ф' - 231: 46, # 'Г' - 232: 55, # 'Х' - 233: 42, # 'И' - 234: 60, # 'Й' - 235: 36, # 'К' - 236: 49, # 'Л' - 237: 38, # 'М' - 238: 31, # 'Н' - 239: 34, # 'О' - 240: 35, # 'П' - 241: 43, # 'Я' - 242: 45, # 'Р' - 243: 32, # 'С' - 244: 40, # 'Т' - 245: 52, # 'У' - 246: 56, # 'Ж' - 247: 33, # 'В' - 248: 61, # 'Ь' - 249: 62, # 'Ы' - 250: 51, # 'З' - 251: 57, # 'Ш' - 252: 47, # 'Э' - 253: 63, # 'Щ' - 254: 50, # 'Ч' - 255: 70, # 'Ъ' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 191, # '─' + 129: 192, # '│' + 130: 193, # '┌' + 131: 194, # '┐' + 132: 195, # '└' + 133: 196, # '┘' + 134: 197, # '├' + 135: 198, # '┤' + 136: 199, # '┬' + 137: 200, # '┴' + 138: 201, # '┼' + 139: 202, # '▀' + 140: 203, # '▄' + 141: 204, # '█' + 142: 205, # '▌' + 143: 206, # '▐' + 144: 207, # '░' + 145: 208, # '▒' + 146: 209, # '▓' + 147: 210, # '⌠' + 148: 211, # '■' + 149: 212, # '∙' + 150: 213, # '√' + 151: 214, # '≈' + 152: 215, # '≤' + 153: 216, # '≥' + 154: 217, # '\xa0' + 155: 218, # '⌡' + 156: 219, # '°' + 157: 220, # '²' + 158: 221, # '·' + 159: 222, # '÷' + 160: 223, # '═' + 161: 224, # '║' + 162: 225, # '╒' + 163: 68, # 'ё' + 164: 226, # '╓' + 165: 227, # '╔' + 166: 228, # '╕' + 167: 229, # '╖' + 168: 230, # '╗' + 169: 231, # '╘' + 170: 232, # '╙' + 171: 233, # '╚' + 172: 234, # '╛' + 173: 235, # '╜' + 174: 236, # '╝' + 175: 237, # '╞' + 176: 238, # '╟' + 177: 239, # '╠' + 178: 240, # '╡' + 179: 241, # 'Ё' + 180: 242, # '╢' + 181: 243, # '╣' + 182: 244, # '╤' + 183: 245, # '╥' + 184: 246, # '╦' + 185: 247, # '╧' + 186: 248, # '╨' + 187: 249, # '╩' + 188: 250, # '╪' + 189: 251, # '╫' + 190: 252, # '╬' + 191: 253, # '©' + 192: 27, # 'ю' + 193: 3, # 'а' + 194: 21, # 'б' + 195: 28, # 'ц' + 196: 13, # 'д' + 197: 2, # 'е' + 198: 39, # 'ф' + 199: 19, # 'г' + 200: 26, # 'х' + 201: 4, # 'и' + 202: 23, # 'й' + 203: 11, # 'к' + 204: 8, # 'л' + 205: 12, # 'м' + 206: 5, # 'н' + 207: 1, # 'о' + 208: 15, # 'п' + 209: 16, # 'я' + 210: 9, # 'р' + 211: 7, # 'с' + 212: 6, # 'т' + 213: 14, # 'у' + 214: 24, # 'ж' + 215: 10, # 'в' + 216: 17, # 'ь' + 217: 18, # 'ы' + 218: 20, # 'з' + 219: 25, # 'ш' + 220: 30, # 'э' + 221: 29, # 'щ' + 222: 22, # 'ч' + 223: 54, # 'ъ' + 224: 59, # 'Ю' + 225: 37, # 'А' + 226: 44, # 'Б' + 227: 58, # 'Ц' + 228: 41, # 'Д' + 229: 48, # 'Е' + 230: 53, # 'Ф' + 231: 46, # 'Г' + 232: 55, # 'Х' + 233: 42, # 'И' + 234: 60, # 'Й' + 235: 36, # 'К' + 236: 49, # 'Л' + 237: 38, # 'М' + 238: 31, # 'Н' + 239: 34, # 'О' + 240: 35, # 'П' + 241: 43, # 'Я' + 242: 45, # 'Р' + 243: 32, # 'С' + 244: 40, # 'Т' + 245: 52, # 'У' + 246: 56, # 'Ж' + 247: 33, # 'В' + 248: 61, # 'Ь' + 249: 62, # 'Ы' + 250: 51, # 'З' + 251: 57, # 'Ш' + 252: 47, # 'Э' + 253: 63, # 'Щ' + 254: 50, # 'Ч' + 255: 70, # 'Ъ' } -KOI8_R_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='KOI8-R', - language='Russian', - char_to_order_map=KOI8_R_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') +KOI8_R_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="KOI8-R", + language="Russian", + char_to_order_map=KOI8_R_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) MACCYRILLIC_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 37, # 'А' - 129: 44, # 'Б' - 130: 33, # 'В' - 131: 46, # 'Г' - 132: 41, # 'Д' - 133: 48, # 'Е' - 134: 56, # 'Ж' - 135: 51, # 'З' - 136: 42, # 'И' - 137: 60, # 'Й' - 138: 36, # 'К' - 139: 49, # 'Л' - 140: 38, # 'М' - 141: 31, # 'Н' - 142: 34, # 'О' - 143: 35, # 'П' - 144: 45, # 'Р' - 145: 32, # 'С' - 146: 40, # 'Т' - 147: 52, # 'У' - 148: 53, # 'Ф' - 149: 55, # 'Х' - 150: 58, # 'Ц' - 151: 50, # 'Ч' - 152: 57, # 'Ш' - 153: 63, # 'Щ' - 154: 70, # 'Ъ' - 155: 62, # 'Ы' - 156: 61, # 'Ь' - 157: 47, # 'Э' - 158: 59, # 'Ю' - 159: 43, # 'Я' - 160: 191, # '†' - 161: 192, # '°' - 162: 193, # 'Ґ' - 163: 194, # '£' - 164: 195, # '§' - 165: 196, # '•' - 166: 197, # '¶' - 167: 198, # 'І' - 168: 199, # '®' - 169: 200, # '©' - 170: 201, # '™' - 171: 202, # 'Ђ' - 172: 203, # 'ђ' - 173: 204, # '≠' - 174: 205, # 'Ѓ' - 175: 206, # 'ѓ' - 176: 207, # '∞' - 177: 208, # '±' - 178: 209, # '≤' - 179: 210, # '≥' - 180: 211, # 'і' - 181: 212, # 'µ' - 182: 213, # 'ґ' - 183: 214, # 'Ј' - 184: 215, # 'Є' - 185: 216, # 'є' - 186: 217, # 'Ї' - 187: 218, # 'ї' - 188: 219, # 'Љ' - 189: 220, # 'љ' - 190: 221, # 'Њ' - 191: 222, # 'њ' - 192: 223, # 'ј' - 193: 224, # 'Ѕ' - 194: 225, # '¬' - 195: 226, # '√' - 196: 227, # 'ƒ' - 197: 228, # '≈' - 198: 229, # '∆' - 199: 230, # '«' - 200: 231, # '»' - 201: 232, # '…' - 202: 233, # '\xa0' - 203: 234, # 'Ћ' - 204: 235, # 'ћ' - 205: 236, # 'Ќ' - 206: 237, # 'ќ' - 207: 238, # 'ѕ' - 208: 239, # '–' - 209: 240, # '—' - 210: 241, # '“' - 211: 242, # '”' - 212: 243, # '‘' - 213: 244, # '’' - 214: 245, # '÷' - 215: 246, # '„' - 216: 247, # 'Ў' - 217: 248, # 'ў' - 218: 249, # 'Џ' - 219: 250, # 'џ' - 220: 251, # '№' - 221: 252, # 'Ё' - 222: 68, # 'ё' - 223: 16, # 'я' - 224: 3, # 'а' - 225: 21, # 'б' - 226: 10, # 'в' - 227: 19, # 'г' - 228: 13, # 'д' - 229: 2, # 'е' - 230: 24, # 'ж' - 231: 20, # 'з' - 232: 4, # 'и' - 233: 23, # 'й' - 234: 11, # 'к' - 235: 8, # 'л' - 236: 12, # 'м' - 237: 5, # 'н' - 238: 1, # 'о' - 239: 15, # 'п' - 240: 9, # 'р' - 241: 7, # 'с' - 242: 6, # 'т' - 243: 14, # 'у' - 244: 39, # 'ф' - 245: 26, # 'х' - 246: 28, # 'ц' - 247: 22, # 'ч' - 248: 25, # 'ш' - 249: 29, # 'щ' - 250: 54, # 'ъ' - 251: 18, # 'ы' - 252: 17, # 'ь' - 253: 30, # 'э' - 254: 27, # 'ю' - 255: 255, # '€' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 37, # 'А' + 129: 44, # 'Б' + 130: 33, # 'В' + 131: 46, # 'Г' + 132: 41, # 'Д' + 133: 48, # 'Е' + 134: 56, # 'Ж' + 135: 51, # 'З' + 136: 42, # 'И' + 137: 60, # 'Й' + 138: 36, # 'К' + 139: 49, # 'Л' + 140: 38, # 'М' + 141: 31, # 'Н' + 142: 34, # 'О' + 143: 35, # 'П' + 144: 45, # 'Р' + 145: 32, # 'С' + 146: 40, # 'Т' + 147: 52, # 'У' + 148: 53, # 'Ф' + 149: 55, # 'Х' + 150: 58, # 'Ц' + 151: 50, # 'Ч' + 152: 57, # 'Ш' + 153: 63, # 'Щ' + 154: 70, # 'Ъ' + 155: 62, # 'Ы' + 156: 61, # 'Ь' + 157: 47, # 'Э' + 158: 59, # 'Ю' + 159: 43, # 'Я' + 160: 191, # '†' + 161: 192, # '°' + 162: 193, # 'Ґ' + 163: 194, # '£' + 164: 195, # '§' + 165: 196, # '•' + 166: 197, # '¶' + 167: 198, # 'І' + 168: 199, # '®' + 169: 200, # '©' + 170: 201, # '™' + 171: 202, # 'Ђ' + 172: 203, # 'ђ' + 173: 204, # '≠' + 174: 205, # 'Ѓ' + 175: 206, # 'ѓ' + 176: 207, # '∞' + 177: 208, # '±' + 178: 209, # '≤' + 179: 210, # '≥' + 180: 211, # 'і' + 181: 212, # 'µ' + 182: 213, # 'ґ' + 183: 214, # 'Ј' + 184: 215, # 'Є' + 185: 216, # 'є' + 186: 217, # 'Ї' + 187: 218, # 'ї' + 188: 219, # 'Љ' + 189: 220, # 'љ' + 190: 221, # 'Њ' + 191: 222, # 'њ' + 192: 223, # 'ј' + 193: 224, # 'Ѕ' + 194: 225, # '¬' + 195: 226, # '√' + 196: 227, # 'ƒ' + 197: 228, # '≈' + 198: 229, # '∆' + 199: 230, # '«' + 200: 231, # '»' + 201: 232, # '…' + 202: 233, # '\xa0' + 203: 234, # 'Ћ' + 204: 235, # 'ћ' + 205: 236, # 'Ќ' + 206: 237, # 'ќ' + 207: 238, # 'ѕ' + 208: 239, # '–' + 209: 240, # '—' + 210: 241, # '“' + 211: 242, # '”' + 212: 243, # '‘' + 213: 244, # '’' + 214: 245, # '÷' + 215: 246, # '„' + 216: 247, # 'Ў' + 217: 248, # 'ў' + 218: 249, # 'Џ' + 219: 250, # 'џ' + 220: 251, # '№' + 221: 252, # 'Ё' + 222: 68, # 'ё' + 223: 16, # 'я' + 224: 3, # 'а' + 225: 21, # 'б' + 226: 10, # 'в' + 227: 19, # 'г' + 228: 13, # 'д' + 229: 2, # 'е' + 230: 24, # 'ж' + 231: 20, # 'з' + 232: 4, # 'и' + 233: 23, # 'й' + 234: 11, # 'к' + 235: 8, # 'л' + 236: 12, # 'м' + 237: 5, # 'н' + 238: 1, # 'о' + 239: 15, # 'п' + 240: 9, # 'р' + 241: 7, # 'с' + 242: 6, # 'т' + 243: 14, # 'у' + 244: 39, # 'ф' + 245: 26, # 'х' + 246: 28, # 'ц' + 247: 22, # 'ч' + 248: 25, # 'ш' + 249: 29, # 'щ' + 250: 54, # 'ъ' + 251: 18, # 'ы' + 252: 17, # 'ь' + 253: 30, # 'э' + 254: 27, # 'ю' + 255: 255, # '€' } -MACCYRILLIC_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='MacCyrillic', - language='Russian', - char_to_order_map=MACCYRILLIC_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') +MACCYRILLIC_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="MacCyrillic", + language="Russian", + char_to_order_map=MACCYRILLIC_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) ISO_8859_5_RUSSIAN_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 142, # 'A' - 66: 143, # 'B' - 67: 144, # 'C' - 68: 145, # 'D' - 69: 146, # 'E' - 70: 147, # 'F' - 71: 148, # 'G' - 72: 149, # 'H' - 73: 150, # 'I' - 74: 151, # 'J' - 75: 152, # 'K' - 76: 74, # 'L' - 77: 153, # 'M' - 78: 75, # 'N' - 79: 154, # 'O' - 80: 155, # 'P' - 81: 156, # 'Q' - 82: 157, # 'R' - 83: 158, # 'S' - 84: 159, # 'T' - 85: 160, # 'U' - 86: 161, # 'V' - 87: 162, # 'W' - 88: 163, # 'X' - 89: 164, # 'Y' - 90: 165, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 71, # 'a' - 98: 172, # 'b' - 99: 66, # 'c' - 100: 173, # 'd' - 101: 65, # 'e' - 102: 174, # 'f' - 103: 76, # 'g' - 104: 175, # 'h' - 105: 64, # 'i' - 106: 176, # 'j' - 107: 177, # 'k' - 108: 77, # 'l' - 109: 72, # 'm' - 110: 178, # 'n' - 111: 69, # 'o' - 112: 67, # 'p' - 113: 179, # 'q' - 114: 78, # 'r' - 115: 73, # 's' - 116: 180, # 't' - 117: 181, # 'u' - 118: 79, # 'v' - 119: 182, # 'w' - 120: 183, # 'x' - 121: 184, # 'y' - 122: 185, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 191, # '\x80' - 129: 192, # '\x81' - 130: 193, # '\x82' - 131: 194, # '\x83' - 132: 195, # '\x84' - 133: 196, # '\x85' - 134: 197, # '\x86' - 135: 198, # '\x87' - 136: 199, # '\x88' - 137: 200, # '\x89' - 138: 201, # '\x8a' - 139: 202, # '\x8b' - 140: 203, # '\x8c' - 141: 204, # '\x8d' - 142: 205, # '\x8e' - 143: 206, # '\x8f' - 144: 207, # '\x90' - 145: 208, # '\x91' - 146: 209, # '\x92' - 147: 210, # '\x93' - 148: 211, # '\x94' - 149: 212, # '\x95' - 150: 213, # '\x96' - 151: 214, # '\x97' - 152: 215, # '\x98' - 153: 216, # '\x99' - 154: 217, # '\x9a' - 155: 218, # '\x9b' - 156: 219, # '\x9c' - 157: 220, # '\x9d' - 158: 221, # '\x9e' - 159: 222, # '\x9f' - 160: 223, # '\xa0' - 161: 224, # 'Ё' - 162: 225, # 'Ђ' - 163: 226, # 'Ѓ' - 164: 227, # 'Є' - 165: 228, # 'Ѕ' - 166: 229, # 'І' - 167: 230, # 'Ї' - 168: 231, # 'Ј' - 169: 232, # 'Љ' - 170: 233, # 'Њ' - 171: 234, # 'Ћ' - 172: 235, # 'Ќ' - 173: 236, # '\xad' - 174: 237, # 'Ў' - 175: 238, # 'Џ' - 176: 37, # 'А' - 177: 44, # 'Б' - 178: 33, # 'В' - 179: 46, # 'Г' - 180: 41, # 'Д' - 181: 48, # 'Е' - 182: 56, # 'Ж' - 183: 51, # 'З' - 184: 42, # 'И' - 185: 60, # 'Й' - 186: 36, # 'К' - 187: 49, # 'Л' - 188: 38, # 'М' - 189: 31, # 'Н' - 190: 34, # 'О' - 191: 35, # 'П' - 192: 45, # 'Р' - 193: 32, # 'С' - 194: 40, # 'Т' - 195: 52, # 'У' - 196: 53, # 'Ф' - 197: 55, # 'Х' - 198: 58, # 'Ц' - 199: 50, # 'Ч' - 200: 57, # 'Ш' - 201: 63, # 'Щ' - 202: 70, # 'Ъ' - 203: 62, # 'Ы' - 204: 61, # 'Ь' - 205: 47, # 'Э' - 206: 59, # 'Ю' - 207: 43, # 'Я' - 208: 3, # 'а' - 209: 21, # 'б' - 210: 10, # 'в' - 211: 19, # 'г' - 212: 13, # 'д' - 213: 2, # 'е' - 214: 24, # 'ж' - 215: 20, # 'з' - 216: 4, # 'и' - 217: 23, # 'й' - 218: 11, # 'к' - 219: 8, # 'л' - 220: 12, # 'м' - 221: 5, # 'н' - 222: 1, # 'о' - 223: 15, # 'п' - 224: 9, # 'р' - 225: 7, # 'с' - 226: 6, # 'т' - 227: 14, # 'у' - 228: 39, # 'ф' - 229: 26, # 'х' - 230: 28, # 'ц' - 231: 22, # 'ч' - 232: 25, # 'ш' - 233: 29, # 'щ' - 234: 54, # 'ъ' - 235: 18, # 'ы' - 236: 17, # 'ь' - 237: 30, # 'э' - 238: 27, # 'ю' - 239: 16, # 'я' - 240: 239, # '№' - 241: 68, # 'ё' - 242: 240, # 'ђ' - 243: 241, # 'ѓ' - 244: 242, # 'є' - 245: 243, # 'ѕ' - 246: 244, # 'і' - 247: 245, # 'ї' - 248: 246, # 'ј' - 249: 247, # 'љ' - 250: 248, # 'њ' - 251: 249, # 'ћ' - 252: 250, # 'ќ' - 253: 251, # '§' - 254: 252, # 'ў' - 255: 255, # 'џ' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 142, # 'A' + 66: 143, # 'B' + 67: 144, # 'C' + 68: 145, # 'D' + 69: 146, # 'E' + 70: 147, # 'F' + 71: 148, # 'G' + 72: 149, # 'H' + 73: 150, # 'I' + 74: 151, # 'J' + 75: 152, # 'K' + 76: 74, # 'L' + 77: 153, # 'M' + 78: 75, # 'N' + 79: 154, # 'O' + 80: 155, # 'P' + 81: 156, # 'Q' + 82: 157, # 'R' + 83: 158, # 'S' + 84: 159, # 'T' + 85: 160, # 'U' + 86: 161, # 'V' + 87: 162, # 'W' + 88: 163, # 'X' + 89: 164, # 'Y' + 90: 165, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 71, # 'a' + 98: 172, # 'b' + 99: 66, # 'c' + 100: 173, # 'd' + 101: 65, # 'e' + 102: 174, # 'f' + 103: 76, # 'g' + 104: 175, # 'h' + 105: 64, # 'i' + 106: 176, # 'j' + 107: 177, # 'k' + 108: 77, # 'l' + 109: 72, # 'm' + 110: 178, # 'n' + 111: 69, # 'o' + 112: 67, # 'p' + 113: 179, # 'q' + 114: 78, # 'r' + 115: 73, # 's' + 116: 180, # 't' + 117: 181, # 'u' + 118: 79, # 'v' + 119: 182, # 'w' + 120: 183, # 'x' + 121: 184, # 'y' + 122: 185, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 191, # '\x80' + 129: 192, # '\x81' + 130: 193, # '\x82' + 131: 194, # '\x83' + 132: 195, # '\x84' + 133: 196, # '\x85' + 134: 197, # '\x86' + 135: 198, # '\x87' + 136: 199, # '\x88' + 137: 200, # '\x89' + 138: 201, # '\x8a' + 139: 202, # '\x8b' + 140: 203, # '\x8c' + 141: 204, # '\x8d' + 142: 205, # '\x8e' + 143: 206, # '\x8f' + 144: 207, # '\x90' + 145: 208, # '\x91' + 146: 209, # '\x92' + 147: 210, # '\x93' + 148: 211, # '\x94' + 149: 212, # '\x95' + 150: 213, # '\x96' + 151: 214, # '\x97' + 152: 215, # '\x98' + 153: 216, # '\x99' + 154: 217, # '\x9a' + 155: 218, # '\x9b' + 156: 219, # '\x9c' + 157: 220, # '\x9d' + 158: 221, # '\x9e' + 159: 222, # '\x9f' + 160: 223, # '\xa0' + 161: 224, # 'Ё' + 162: 225, # 'Ђ' + 163: 226, # 'Ѓ' + 164: 227, # 'Є' + 165: 228, # 'Ѕ' + 166: 229, # 'І' + 167: 230, # 'Ї' + 168: 231, # 'Ј' + 169: 232, # 'Љ' + 170: 233, # 'Њ' + 171: 234, # 'Ћ' + 172: 235, # 'Ќ' + 173: 236, # '\xad' + 174: 237, # 'Ў' + 175: 238, # 'Џ' + 176: 37, # 'А' + 177: 44, # 'Б' + 178: 33, # 'В' + 179: 46, # 'Г' + 180: 41, # 'Д' + 181: 48, # 'Е' + 182: 56, # 'Ж' + 183: 51, # 'З' + 184: 42, # 'И' + 185: 60, # 'Й' + 186: 36, # 'К' + 187: 49, # 'Л' + 188: 38, # 'М' + 189: 31, # 'Н' + 190: 34, # 'О' + 191: 35, # 'П' + 192: 45, # 'Р' + 193: 32, # 'С' + 194: 40, # 'Т' + 195: 52, # 'У' + 196: 53, # 'Ф' + 197: 55, # 'Х' + 198: 58, # 'Ц' + 199: 50, # 'Ч' + 200: 57, # 'Ш' + 201: 63, # 'Щ' + 202: 70, # 'Ъ' + 203: 62, # 'Ы' + 204: 61, # 'Ь' + 205: 47, # 'Э' + 206: 59, # 'Ю' + 207: 43, # 'Я' + 208: 3, # 'а' + 209: 21, # 'б' + 210: 10, # 'в' + 211: 19, # 'г' + 212: 13, # 'д' + 213: 2, # 'е' + 214: 24, # 'ж' + 215: 20, # 'з' + 216: 4, # 'и' + 217: 23, # 'й' + 218: 11, # 'к' + 219: 8, # 'л' + 220: 12, # 'м' + 221: 5, # 'н' + 222: 1, # 'о' + 223: 15, # 'п' + 224: 9, # 'р' + 225: 7, # 'с' + 226: 6, # 'т' + 227: 14, # 'у' + 228: 39, # 'ф' + 229: 26, # 'х' + 230: 28, # 'ц' + 231: 22, # 'ч' + 232: 25, # 'ш' + 233: 29, # 'щ' + 234: 54, # 'ъ' + 235: 18, # 'ы' + 236: 17, # 'ь' + 237: 30, # 'э' + 238: 27, # 'ю' + 239: 16, # 'я' + 240: 239, # '№' + 241: 68, # 'ё' + 242: 240, # 'ђ' + 243: 241, # 'ѓ' + 244: 242, # 'є' + 245: 243, # 'ѕ' + 246: 244, # 'і' + 247: 245, # 'ї' + 248: 246, # 'ј' + 249: 247, # 'љ' + 250: 248, # 'њ' + 251: 249, # 'ћ' + 252: 250, # 'ќ' + 253: 251, # '§' + 254: 252, # 'ў' + 255: 255, # 'џ' } -ISO_8859_5_RUSSIAN_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-5', - language='Russian', - char_to_order_map=ISO_8859_5_RUSSIAN_CHAR_TO_ORDER, - language_model=RUSSIAN_LANG_MODEL, - typical_positive_ratio=0.976601, - keep_ascii_letters=False, - alphabet='ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё') - +ISO_8859_5_RUSSIAN_MODEL = SingleByteCharSetModel( + charset_name="ISO-8859-5", + language="Russian", + char_to_order_map=ISO_8859_5_RUSSIAN_CHAR_TO_ORDER, + language_model=RUSSIAN_LANG_MODEL, + typical_positive_ratio=0.976601, + keep_ascii_letters=False, + alphabet="ЁАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяё", +) diff --git a/src/pip/_vendor/chardet/langthaimodel.py b/src/pip/_vendor/chardet/langthaimodel.py index 9a37db57388..489cad930e0 100644 --- a/src/pip/_vendor/chardet/langthaimodel.py +++ b/src/pip/_vendor/chardet/langthaimodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from pip._vendor.chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,269 +4111,270 @@ # Character Mapping Table(s): TIS_620_THAI_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 254, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 254, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 253, # ' ' - 33: 253, # '!' - 34: 253, # '"' - 35: 253, # '#' - 36: 253, # '$' - 37: 253, # '%' - 38: 253, # '&' - 39: 253, # "'" - 40: 253, # '(' - 41: 253, # ')' - 42: 253, # '*' - 43: 253, # '+' - 44: 253, # ',' - 45: 253, # '-' - 46: 253, # '.' - 47: 253, # '/' - 48: 252, # '0' - 49: 252, # '1' - 50: 252, # '2' - 51: 252, # '3' - 52: 252, # '4' - 53: 252, # '5' - 54: 252, # '6' - 55: 252, # '7' - 56: 252, # '8' - 57: 252, # '9' - 58: 253, # ':' - 59: 253, # ';' - 60: 253, # '<' - 61: 253, # '=' - 62: 253, # '>' - 63: 253, # '?' - 64: 253, # '@' - 65: 182, # 'A' - 66: 106, # 'B' - 67: 107, # 'C' - 68: 100, # 'D' - 69: 183, # 'E' - 70: 184, # 'F' - 71: 185, # 'G' - 72: 101, # 'H' - 73: 94, # 'I' - 74: 186, # 'J' - 75: 187, # 'K' - 76: 108, # 'L' - 77: 109, # 'M' - 78: 110, # 'N' - 79: 111, # 'O' - 80: 188, # 'P' - 81: 189, # 'Q' - 82: 190, # 'R' - 83: 89, # 'S' - 84: 95, # 'T' - 85: 112, # 'U' - 86: 113, # 'V' - 87: 191, # 'W' - 88: 192, # 'X' - 89: 193, # 'Y' - 90: 194, # 'Z' - 91: 253, # '[' - 92: 253, # '\\' - 93: 253, # ']' - 94: 253, # '^' - 95: 253, # '_' - 96: 253, # '`' - 97: 64, # 'a' - 98: 72, # 'b' - 99: 73, # 'c' - 100: 114, # 'd' - 101: 74, # 'e' - 102: 115, # 'f' - 103: 116, # 'g' - 104: 102, # 'h' - 105: 81, # 'i' - 106: 201, # 'j' - 107: 117, # 'k' - 108: 90, # 'l' - 109: 103, # 'm' - 110: 78, # 'n' - 111: 82, # 'o' - 112: 96, # 'p' - 113: 202, # 'q' - 114: 91, # 'r' - 115: 79, # 's' - 116: 84, # 't' - 117: 104, # 'u' - 118: 105, # 'v' - 119: 97, # 'w' - 120: 98, # 'x' - 121: 92, # 'y' - 122: 203, # 'z' - 123: 253, # '{' - 124: 253, # '|' - 125: 253, # '}' - 126: 253, # '~' - 127: 253, # '\x7f' - 128: 209, # '\x80' - 129: 210, # '\x81' - 130: 211, # '\x82' - 131: 212, # '\x83' - 132: 213, # '\x84' - 133: 88, # '\x85' - 134: 214, # '\x86' - 135: 215, # '\x87' - 136: 216, # '\x88' - 137: 217, # '\x89' - 138: 218, # '\x8a' - 139: 219, # '\x8b' - 140: 220, # '\x8c' - 141: 118, # '\x8d' - 142: 221, # '\x8e' - 143: 222, # '\x8f' - 144: 223, # '\x90' - 145: 224, # '\x91' - 146: 99, # '\x92' - 147: 85, # '\x93' - 148: 83, # '\x94' - 149: 225, # '\x95' - 150: 226, # '\x96' - 151: 227, # '\x97' - 152: 228, # '\x98' - 153: 229, # '\x99' - 154: 230, # '\x9a' - 155: 231, # '\x9b' - 156: 232, # '\x9c' - 157: 233, # '\x9d' - 158: 234, # '\x9e' - 159: 235, # '\x9f' - 160: 236, # None - 161: 5, # 'ก' - 162: 30, # 'ข' - 163: 237, # 'ฃ' - 164: 24, # 'ค' - 165: 238, # 'ฅ' - 166: 75, # 'ฆ' - 167: 8, # 'ง' - 168: 26, # 'จ' - 169: 52, # 'ฉ' - 170: 34, # 'ช' - 171: 51, # 'ซ' - 172: 119, # 'ฌ' - 173: 47, # 'ญ' - 174: 58, # 'ฎ' - 175: 57, # 'ฏ' - 176: 49, # 'ฐ' - 177: 53, # 'ฑ' - 178: 55, # 'ฒ' - 179: 43, # 'ณ' - 180: 20, # 'ด' - 181: 19, # 'ต' - 182: 44, # 'ถ' - 183: 14, # 'ท' - 184: 48, # 'ธ' - 185: 3, # 'น' - 186: 17, # 'บ' - 187: 25, # 'ป' - 188: 39, # 'ผ' - 189: 62, # 'ฝ' - 190: 31, # 'พ' - 191: 54, # 'ฟ' - 192: 45, # 'ภ' - 193: 9, # 'ม' - 194: 16, # 'ย' - 195: 2, # 'ร' - 196: 61, # 'ฤ' - 197: 15, # 'ล' - 198: 239, # 'ฦ' - 199: 12, # 'ว' - 200: 42, # 'ศ' - 201: 46, # 'ษ' - 202: 18, # 'ส' - 203: 21, # 'ห' - 204: 76, # 'ฬ' - 205: 4, # 'อ' - 206: 66, # 'ฮ' - 207: 63, # 'ฯ' - 208: 22, # 'ะ' - 209: 10, # 'ั' - 210: 1, # 'า' - 211: 36, # 'ำ' - 212: 23, # 'ิ' - 213: 13, # 'ี' - 214: 40, # 'ึ' - 215: 27, # 'ื' - 216: 32, # 'ุ' - 217: 35, # 'ู' - 218: 86, # 'ฺ' - 219: 240, # None - 220: 241, # None - 221: 242, # None - 222: 243, # None - 223: 244, # '฿' - 224: 11, # 'เ' - 225: 28, # 'แ' - 226: 41, # 'โ' - 227: 29, # 'ใ' - 228: 33, # 'ไ' - 229: 245, # 'ๅ' - 230: 50, # 'ๆ' - 231: 37, # '็' - 232: 6, # '่' - 233: 7, # '้' - 234: 67, # '๊' - 235: 77, # '๋' - 236: 38, # '์' - 237: 93, # 'ํ' - 238: 246, # '๎' - 239: 247, # '๏' - 240: 68, # '๐' - 241: 56, # '๑' - 242: 59, # '๒' - 243: 65, # '๓' - 244: 69, # '๔' - 245: 60, # '๕' - 246: 70, # '๖' - 247: 80, # '๗' - 248: 71, # '๘' - 249: 87, # '๙' - 250: 248, # '๚' - 251: 249, # '๛' - 252: 250, # None - 253: 251, # None - 254: 252, # None - 255: 253, # None + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 254, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 254, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 253, # ' ' + 33: 253, # '!' + 34: 253, # '"' + 35: 253, # '#' + 36: 253, # '$' + 37: 253, # '%' + 38: 253, # '&' + 39: 253, # "'" + 40: 253, # '(' + 41: 253, # ')' + 42: 253, # '*' + 43: 253, # '+' + 44: 253, # ',' + 45: 253, # '-' + 46: 253, # '.' + 47: 253, # '/' + 48: 252, # '0' + 49: 252, # '1' + 50: 252, # '2' + 51: 252, # '3' + 52: 252, # '4' + 53: 252, # '5' + 54: 252, # '6' + 55: 252, # '7' + 56: 252, # '8' + 57: 252, # '9' + 58: 253, # ':' + 59: 253, # ';' + 60: 253, # '<' + 61: 253, # '=' + 62: 253, # '>' + 63: 253, # '?' + 64: 253, # '@' + 65: 182, # 'A' + 66: 106, # 'B' + 67: 107, # 'C' + 68: 100, # 'D' + 69: 183, # 'E' + 70: 184, # 'F' + 71: 185, # 'G' + 72: 101, # 'H' + 73: 94, # 'I' + 74: 186, # 'J' + 75: 187, # 'K' + 76: 108, # 'L' + 77: 109, # 'M' + 78: 110, # 'N' + 79: 111, # 'O' + 80: 188, # 'P' + 81: 189, # 'Q' + 82: 190, # 'R' + 83: 89, # 'S' + 84: 95, # 'T' + 85: 112, # 'U' + 86: 113, # 'V' + 87: 191, # 'W' + 88: 192, # 'X' + 89: 193, # 'Y' + 90: 194, # 'Z' + 91: 253, # '[' + 92: 253, # '\\' + 93: 253, # ']' + 94: 253, # '^' + 95: 253, # '_' + 96: 253, # '`' + 97: 64, # 'a' + 98: 72, # 'b' + 99: 73, # 'c' + 100: 114, # 'd' + 101: 74, # 'e' + 102: 115, # 'f' + 103: 116, # 'g' + 104: 102, # 'h' + 105: 81, # 'i' + 106: 201, # 'j' + 107: 117, # 'k' + 108: 90, # 'l' + 109: 103, # 'm' + 110: 78, # 'n' + 111: 82, # 'o' + 112: 96, # 'p' + 113: 202, # 'q' + 114: 91, # 'r' + 115: 79, # 's' + 116: 84, # 't' + 117: 104, # 'u' + 118: 105, # 'v' + 119: 97, # 'w' + 120: 98, # 'x' + 121: 92, # 'y' + 122: 203, # 'z' + 123: 253, # '{' + 124: 253, # '|' + 125: 253, # '}' + 126: 253, # '~' + 127: 253, # '\x7f' + 128: 209, # '\x80' + 129: 210, # '\x81' + 130: 211, # '\x82' + 131: 212, # '\x83' + 132: 213, # '\x84' + 133: 88, # '\x85' + 134: 214, # '\x86' + 135: 215, # '\x87' + 136: 216, # '\x88' + 137: 217, # '\x89' + 138: 218, # '\x8a' + 139: 219, # '\x8b' + 140: 220, # '\x8c' + 141: 118, # '\x8d' + 142: 221, # '\x8e' + 143: 222, # '\x8f' + 144: 223, # '\x90' + 145: 224, # '\x91' + 146: 99, # '\x92' + 147: 85, # '\x93' + 148: 83, # '\x94' + 149: 225, # '\x95' + 150: 226, # '\x96' + 151: 227, # '\x97' + 152: 228, # '\x98' + 153: 229, # '\x99' + 154: 230, # '\x9a' + 155: 231, # '\x9b' + 156: 232, # '\x9c' + 157: 233, # '\x9d' + 158: 234, # '\x9e' + 159: 235, # '\x9f' + 160: 236, # None + 161: 5, # 'ก' + 162: 30, # 'ข' + 163: 237, # 'ฃ' + 164: 24, # 'ค' + 165: 238, # 'ฅ' + 166: 75, # 'ฆ' + 167: 8, # 'ง' + 168: 26, # 'จ' + 169: 52, # 'ฉ' + 170: 34, # 'ช' + 171: 51, # 'ซ' + 172: 119, # 'ฌ' + 173: 47, # 'ญ' + 174: 58, # 'ฎ' + 175: 57, # 'ฏ' + 176: 49, # 'ฐ' + 177: 53, # 'ฑ' + 178: 55, # 'ฒ' + 179: 43, # 'ณ' + 180: 20, # 'ด' + 181: 19, # 'ต' + 182: 44, # 'ถ' + 183: 14, # 'ท' + 184: 48, # 'ธ' + 185: 3, # 'น' + 186: 17, # 'บ' + 187: 25, # 'ป' + 188: 39, # 'ผ' + 189: 62, # 'ฝ' + 190: 31, # 'พ' + 191: 54, # 'ฟ' + 192: 45, # 'ภ' + 193: 9, # 'ม' + 194: 16, # 'ย' + 195: 2, # 'ร' + 196: 61, # 'ฤ' + 197: 15, # 'ล' + 198: 239, # 'ฦ' + 199: 12, # 'ว' + 200: 42, # 'ศ' + 201: 46, # 'ษ' + 202: 18, # 'ส' + 203: 21, # 'ห' + 204: 76, # 'ฬ' + 205: 4, # 'อ' + 206: 66, # 'ฮ' + 207: 63, # 'ฯ' + 208: 22, # 'ะ' + 209: 10, # 'ั' + 210: 1, # 'า' + 211: 36, # 'ำ' + 212: 23, # 'ิ' + 213: 13, # 'ี' + 214: 40, # 'ึ' + 215: 27, # 'ื' + 216: 32, # 'ุ' + 217: 35, # 'ู' + 218: 86, # 'ฺ' + 219: 240, # None + 220: 241, # None + 221: 242, # None + 222: 243, # None + 223: 244, # '฿' + 224: 11, # 'เ' + 225: 28, # 'แ' + 226: 41, # 'โ' + 227: 29, # 'ใ' + 228: 33, # 'ไ' + 229: 245, # 'ๅ' + 230: 50, # 'ๆ' + 231: 37, # '็' + 232: 6, # '่' + 233: 7, # '้' + 234: 67, # '๊' + 235: 77, # '๋' + 236: 38, # '์' + 237: 93, # 'ํ' + 238: 246, # '๎' + 239: 247, # '๏' + 240: 68, # '๐' + 241: 56, # '๑' + 242: 59, # '๒' + 243: 65, # '๓' + 244: 69, # '๔' + 245: 60, # '๕' + 246: 70, # '๖' + 247: 80, # '๗' + 248: 71, # '๘' + 249: 87, # '๙' + 250: 248, # '๚' + 251: 249, # '๛' + 252: 250, # None + 253: 251, # None + 254: 252, # None + 255: 253, # None } -TIS_620_THAI_MODEL = SingleByteCharSetModel(charset_name='TIS-620', - language='Thai', - char_to_order_map=TIS_620_THAI_CHAR_TO_ORDER, - language_model=THAI_LANG_MODEL, - typical_positive_ratio=0.926386, - keep_ascii_letters=False, - alphabet='กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛') - +TIS_620_THAI_MODEL = SingleByteCharSetModel( + charset_name="TIS-620", + language="Thai", + char_to_order_map=TIS_620_THAI_CHAR_TO_ORDER, + language_model=THAI_LANG_MODEL, + typical_positive_ratio=0.926386, + keep_ascii_letters=False, + alphabet="กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛", +) diff --git a/src/pip/_vendor/chardet/langturkishmodel.py b/src/pip/_vendor/chardet/langturkishmodel.py index 43f4230aead..291857c25c8 100644 --- a/src/pip/_vendor/chardet/langturkishmodel.py +++ b/src/pip/_vendor/chardet/langturkishmodel.py @@ -1,9 +1,5 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - from pip._vendor.chardet.sbcharsetprober import SingleByteCharSetModel - # 3: Positive # 2: Likely # 1: Unlikely @@ -4115,269 +4111,270 @@ # Character Mapping Table(s): ISO_8859_9_TURKISH_CHAR_TO_ORDER = { - 0: 255, # '\x00' - 1: 255, # '\x01' - 2: 255, # '\x02' - 3: 255, # '\x03' - 4: 255, # '\x04' - 5: 255, # '\x05' - 6: 255, # '\x06' - 7: 255, # '\x07' - 8: 255, # '\x08' - 9: 255, # '\t' - 10: 255, # '\n' - 11: 255, # '\x0b' - 12: 255, # '\x0c' - 13: 255, # '\r' - 14: 255, # '\x0e' - 15: 255, # '\x0f' - 16: 255, # '\x10' - 17: 255, # '\x11' - 18: 255, # '\x12' - 19: 255, # '\x13' - 20: 255, # '\x14' - 21: 255, # '\x15' - 22: 255, # '\x16' - 23: 255, # '\x17' - 24: 255, # '\x18' - 25: 255, # '\x19' - 26: 255, # '\x1a' - 27: 255, # '\x1b' - 28: 255, # '\x1c' - 29: 255, # '\x1d' - 30: 255, # '\x1e' - 31: 255, # '\x1f' - 32: 255, # ' ' - 33: 255, # '!' - 34: 255, # '"' - 35: 255, # '#' - 36: 255, # '$' - 37: 255, # '%' - 38: 255, # '&' - 39: 255, # "'" - 40: 255, # '(' - 41: 255, # ')' - 42: 255, # '*' - 43: 255, # '+' - 44: 255, # ',' - 45: 255, # '-' - 46: 255, # '.' - 47: 255, # '/' - 48: 255, # '0' - 49: 255, # '1' - 50: 255, # '2' - 51: 255, # '3' - 52: 255, # '4' - 53: 255, # '5' - 54: 255, # '6' - 55: 255, # '7' - 56: 255, # '8' - 57: 255, # '9' - 58: 255, # ':' - 59: 255, # ';' - 60: 255, # '<' - 61: 255, # '=' - 62: 255, # '>' - 63: 255, # '?' - 64: 255, # '@' - 65: 23, # 'A' - 66: 37, # 'B' - 67: 47, # 'C' - 68: 39, # 'D' - 69: 29, # 'E' - 70: 52, # 'F' - 71: 36, # 'G' - 72: 45, # 'H' - 73: 53, # 'I' - 74: 60, # 'J' - 75: 16, # 'K' - 76: 49, # 'L' - 77: 20, # 'M' - 78: 46, # 'N' - 79: 42, # 'O' - 80: 48, # 'P' - 81: 69, # 'Q' - 82: 44, # 'R' - 83: 35, # 'S' - 84: 31, # 'T' - 85: 51, # 'U' - 86: 38, # 'V' - 87: 62, # 'W' - 88: 65, # 'X' - 89: 43, # 'Y' - 90: 56, # 'Z' - 91: 255, # '[' - 92: 255, # '\\' - 93: 255, # ']' - 94: 255, # '^' - 95: 255, # '_' - 96: 255, # '`' - 97: 1, # 'a' - 98: 21, # 'b' - 99: 28, # 'c' - 100: 12, # 'd' - 101: 2, # 'e' - 102: 18, # 'f' - 103: 27, # 'g' - 104: 25, # 'h' - 105: 3, # 'i' - 106: 24, # 'j' - 107: 10, # 'k' - 108: 5, # 'l' - 109: 13, # 'm' - 110: 4, # 'n' - 111: 15, # 'o' - 112: 26, # 'p' - 113: 64, # 'q' - 114: 7, # 'r' - 115: 8, # 's' - 116: 9, # 't' - 117: 14, # 'u' - 118: 32, # 'v' - 119: 57, # 'w' - 120: 58, # 'x' - 121: 11, # 'y' - 122: 22, # 'z' - 123: 255, # '{' - 124: 255, # '|' - 125: 255, # '}' - 126: 255, # '~' - 127: 255, # '\x7f' - 128: 180, # '\x80' - 129: 179, # '\x81' - 130: 178, # '\x82' - 131: 177, # '\x83' - 132: 176, # '\x84' - 133: 175, # '\x85' - 134: 174, # '\x86' - 135: 173, # '\x87' - 136: 172, # '\x88' - 137: 171, # '\x89' - 138: 170, # '\x8a' - 139: 169, # '\x8b' - 140: 168, # '\x8c' - 141: 167, # '\x8d' - 142: 166, # '\x8e' - 143: 165, # '\x8f' - 144: 164, # '\x90' - 145: 163, # '\x91' - 146: 162, # '\x92' - 147: 161, # '\x93' - 148: 160, # '\x94' - 149: 159, # '\x95' - 150: 101, # '\x96' - 151: 158, # '\x97' - 152: 157, # '\x98' - 153: 156, # '\x99' - 154: 155, # '\x9a' - 155: 154, # '\x9b' - 156: 153, # '\x9c' - 157: 152, # '\x9d' - 158: 151, # '\x9e' - 159: 106, # '\x9f' - 160: 150, # '\xa0' - 161: 149, # '¡' - 162: 148, # '¢' - 163: 147, # '£' - 164: 146, # '¤' - 165: 145, # '¥' - 166: 144, # '¦' - 167: 100, # '§' - 168: 143, # '¨' - 169: 142, # '©' - 170: 141, # 'ª' - 171: 140, # '«' - 172: 139, # '¬' - 173: 138, # '\xad' - 174: 137, # '®' - 175: 136, # '¯' - 176: 94, # '°' - 177: 80, # '±' - 178: 93, # '²' - 179: 135, # '³' - 180: 105, # '´' - 181: 134, # 'µ' - 182: 133, # '¶' - 183: 63, # '·' - 184: 132, # '¸' - 185: 131, # '¹' - 186: 130, # 'º' - 187: 129, # '»' - 188: 128, # '¼' - 189: 127, # '½' - 190: 126, # '¾' - 191: 125, # '¿' - 192: 124, # 'À' - 193: 104, # 'Á' - 194: 73, # 'Â' - 195: 99, # 'Ã' - 196: 79, # 'Ä' - 197: 85, # 'Å' - 198: 123, # 'Æ' - 199: 54, # 'Ç' - 200: 122, # 'È' - 201: 98, # 'É' - 202: 92, # 'Ê' - 203: 121, # 'Ë' - 204: 120, # 'Ì' - 205: 91, # 'Í' - 206: 103, # 'Î' - 207: 119, # 'Ï' - 208: 68, # 'Ğ' - 209: 118, # 'Ñ' - 210: 117, # 'Ò' - 211: 97, # 'Ó' - 212: 116, # 'Ô' - 213: 115, # 'Õ' - 214: 50, # 'Ö' - 215: 90, # '×' - 216: 114, # 'Ø' - 217: 113, # 'Ù' - 218: 112, # 'Ú' - 219: 111, # 'Û' - 220: 55, # 'Ü' - 221: 41, # 'İ' - 222: 40, # 'Ş' - 223: 86, # 'ß' - 224: 89, # 'à' - 225: 70, # 'á' - 226: 59, # 'â' - 227: 78, # 'ã' - 228: 71, # 'ä' - 229: 82, # 'å' - 230: 88, # 'æ' - 231: 33, # 'ç' - 232: 77, # 'è' - 233: 66, # 'é' - 234: 84, # 'ê' - 235: 83, # 'ë' - 236: 110, # 'ì' - 237: 75, # 'í' - 238: 61, # 'î' - 239: 96, # 'ï' - 240: 30, # 'ğ' - 241: 67, # 'ñ' - 242: 109, # 'ò' - 243: 74, # 'ó' - 244: 87, # 'ô' - 245: 102, # 'õ' - 246: 34, # 'ö' - 247: 95, # '÷' - 248: 81, # 'ø' - 249: 108, # 'ù' - 250: 76, # 'ú' - 251: 72, # 'û' - 252: 17, # 'ü' - 253: 6, # 'ı' - 254: 19, # 'ş' - 255: 107, # 'ÿ' + 0: 255, # '\x00' + 1: 255, # '\x01' + 2: 255, # '\x02' + 3: 255, # '\x03' + 4: 255, # '\x04' + 5: 255, # '\x05' + 6: 255, # '\x06' + 7: 255, # '\x07' + 8: 255, # '\x08' + 9: 255, # '\t' + 10: 255, # '\n' + 11: 255, # '\x0b' + 12: 255, # '\x0c' + 13: 255, # '\r' + 14: 255, # '\x0e' + 15: 255, # '\x0f' + 16: 255, # '\x10' + 17: 255, # '\x11' + 18: 255, # '\x12' + 19: 255, # '\x13' + 20: 255, # '\x14' + 21: 255, # '\x15' + 22: 255, # '\x16' + 23: 255, # '\x17' + 24: 255, # '\x18' + 25: 255, # '\x19' + 26: 255, # '\x1a' + 27: 255, # '\x1b' + 28: 255, # '\x1c' + 29: 255, # '\x1d' + 30: 255, # '\x1e' + 31: 255, # '\x1f' + 32: 255, # ' ' + 33: 255, # '!' + 34: 255, # '"' + 35: 255, # '#' + 36: 255, # '$' + 37: 255, # '%' + 38: 255, # '&' + 39: 255, # "'" + 40: 255, # '(' + 41: 255, # ')' + 42: 255, # '*' + 43: 255, # '+' + 44: 255, # ',' + 45: 255, # '-' + 46: 255, # '.' + 47: 255, # '/' + 48: 255, # '0' + 49: 255, # '1' + 50: 255, # '2' + 51: 255, # '3' + 52: 255, # '4' + 53: 255, # '5' + 54: 255, # '6' + 55: 255, # '7' + 56: 255, # '8' + 57: 255, # '9' + 58: 255, # ':' + 59: 255, # ';' + 60: 255, # '<' + 61: 255, # '=' + 62: 255, # '>' + 63: 255, # '?' + 64: 255, # '@' + 65: 23, # 'A' + 66: 37, # 'B' + 67: 47, # 'C' + 68: 39, # 'D' + 69: 29, # 'E' + 70: 52, # 'F' + 71: 36, # 'G' + 72: 45, # 'H' + 73: 53, # 'I' + 74: 60, # 'J' + 75: 16, # 'K' + 76: 49, # 'L' + 77: 20, # 'M' + 78: 46, # 'N' + 79: 42, # 'O' + 80: 48, # 'P' + 81: 69, # 'Q' + 82: 44, # 'R' + 83: 35, # 'S' + 84: 31, # 'T' + 85: 51, # 'U' + 86: 38, # 'V' + 87: 62, # 'W' + 88: 65, # 'X' + 89: 43, # 'Y' + 90: 56, # 'Z' + 91: 255, # '[' + 92: 255, # '\\' + 93: 255, # ']' + 94: 255, # '^' + 95: 255, # '_' + 96: 255, # '`' + 97: 1, # 'a' + 98: 21, # 'b' + 99: 28, # 'c' + 100: 12, # 'd' + 101: 2, # 'e' + 102: 18, # 'f' + 103: 27, # 'g' + 104: 25, # 'h' + 105: 3, # 'i' + 106: 24, # 'j' + 107: 10, # 'k' + 108: 5, # 'l' + 109: 13, # 'm' + 110: 4, # 'n' + 111: 15, # 'o' + 112: 26, # 'p' + 113: 64, # 'q' + 114: 7, # 'r' + 115: 8, # 's' + 116: 9, # 't' + 117: 14, # 'u' + 118: 32, # 'v' + 119: 57, # 'w' + 120: 58, # 'x' + 121: 11, # 'y' + 122: 22, # 'z' + 123: 255, # '{' + 124: 255, # '|' + 125: 255, # '}' + 126: 255, # '~' + 127: 255, # '\x7f' + 128: 180, # '\x80' + 129: 179, # '\x81' + 130: 178, # '\x82' + 131: 177, # '\x83' + 132: 176, # '\x84' + 133: 175, # '\x85' + 134: 174, # '\x86' + 135: 173, # '\x87' + 136: 172, # '\x88' + 137: 171, # '\x89' + 138: 170, # '\x8a' + 139: 169, # '\x8b' + 140: 168, # '\x8c' + 141: 167, # '\x8d' + 142: 166, # '\x8e' + 143: 165, # '\x8f' + 144: 164, # '\x90' + 145: 163, # '\x91' + 146: 162, # '\x92' + 147: 161, # '\x93' + 148: 160, # '\x94' + 149: 159, # '\x95' + 150: 101, # '\x96' + 151: 158, # '\x97' + 152: 157, # '\x98' + 153: 156, # '\x99' + 154: 155, # '\x9a' + 155: 154, # '\x9b' + 156: 153, # '\x9c' + 157: 152, # '\x9d' + 158: 151, # '\x9e' + 159: 106, # '\x9f' + 160: 150, # '\xa0' + 161: 149, # '¡' + 162: 148, # '¢' + 163: 147, # '£' + 164: 146, # '¤' + 165: 145, # '¥' + 166: 144, # '¦' + 167: 100, # '§' + 168: 143, # '¨' + 169: 142, # '©' + 170: 141, # 'ª' + 171: 140, # '«' + 172: 139, # '¬' + 173: 138, # '\xad' + 174: 137, # '®' + 175: 136, # '¯' + 176: 94, # '°' + 177: 80, # '±' + 178: 93, # '²' + 179: 135, # '³' + 180: 105, # '´' + 181: 134, # 'µ' + 182: 133, # '¶' + 183: 63, # '·' + 184: 132, # '¸' + 185: 131, # '¹' + 186: 130, # 'º' + 187: 129, # '»' + 188: 128, # '¼' + 189: 127, # '½' + 190: 126, # '¾' + 191: 125, # '¿' + 192: 124, # 'À' + 193: 104, # 'Á' + 194: 73, # 'Â' + 195: 99, # 'Ã' + 196: 79, # 'Ä' + 197: 85, # 'Å' + 198: 123, # 'Æ' + 199: 54, # 'Ç' + 200: 122, # 'È' + 201: 98, # 'É' + 202: 92, # 'Ê' + 203: 121, # 'Ë' + 204: 120, # 'Ì' + 205: 91, # 'Í' + 206: 103, # 'Î' + 207: 119, # 'Ï' + 208: 68, # 'Ğ' + 209: 118, # 'Ñ' + 210: 117, # 'Ò' + 211: 97, # 'Ó' + 212: 116, # 'Ô' + 213: 115, # 'Õ' + 214: 50, # 'Ö' + 215: 90, # '×' + 216: 114, # 'Ø' + 217: 113, # 'Ù' + 218: 112, # 'Ú' + 219: 111, # 'Û' + 220: 55, # 'Ü' + 221: 41, # 'İ' + 222: 40, # 'Ş' + 223: 86, # 'ß' + 224: 89, # 'à' + 225: 70, # 'á' + 226: 59, # 'â' + 227: 78, # 'ã' + 228: 71, # 'ä' + 229: 82, # 'å' + 230: 88, # 'æ' + 231: 33, # 'ç' + 232: 77, # 'è' + 233: 66, # 'é' + 234: 84, # 'ê' + 235: 83, # 'ë' + 236: 110, # 'ì' + 237: 75, # 'í' + 238: 61, # 'î' + 239: 96, # 'ï' + 240: 30, # 'ğ' + 241: 67, # 'ñ' + 242: 109, # 'ò' + 243: 74, # 'ó' + 244: 87, # 'ô' + 245: 102, # 'õ' + 246: 34, # 'ö' + 247: 95, # '÷' + 248: 81, # 'ø' + 249: 108, # 'ù' + 250: 76, # 'ú' + 251: 72, # 'û' + 252: 17, # 'ü' + 253: 6, # 'ı' + 254: 19, # 'ş' + 255: 107, # 'ÿ' } -ISO_8859_9_TURKISH_MODEL = SingleByteCharSetModel(charset_name='ISO-8859-9', - language='Turkish', - char_to_order_map=ISO_8859_9_TURKISH_CHAR_TO_ORDER, - language_model=TURKISH_LANG_MODEL, - typical_positive_ratio=0.97029, - keep_ascii_letters=True, - alphabet='ABCDEFGHIJKLMNOPRSTUVYZabcdefghijklmnoprstuvyzÂÇÎÖÛÜâçîöûüĞğİıŞş') - +ISO_8859_9_TURKISH_MODEL = SingleByteCharSetModel( + charset_name="ISO-8859-9", + language="Turkish", + char_to_order_map=ISO_8859_9_TURKISH_CHAR_TO_ORDER, + language_model=TURKISH_LANG_MODEL, + typical_positive_ratio=0.97029, + keep_ascii_letters=True, + alphabet="ABCDEFGHIJKLMNOPRSTUVYZabcdefghijklmnoprstuvyzÂÇÎÖÛÜâçîöûüĞğİıŞş", +) diff --git a/src/pip/_vendor/chardet/latin1prober.py b/src/pip/_vendor/chardet/latin1prober.py index 7d1e8c20fb0..59a01d91b87 100644 --- a/src/pip/_vendor/chardet/latin1prober.py +++ b/src/pip/_vendor/chardet/latin1prober.py @@ -26,6 +26,8 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import List, Union + from .charsetprober import CharSetProber from .enums import ProbingState @@ -41,6 +43,7 @@ ASO = 7 # accent small other CLASS_NUM = 8 # total classes +# fmt: off Latin1_CharToClass = ( OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F @@ -91,34 +94,34 @@ 0, 3, 1, 3, 1, 1, 1, 3, # ASV 0, 3, 1, 3, 1, 1, 3, 3, # ASO ) +# fmt: on class Latin1Prober(CharSetProber): - def __init__(self): - super(Latin1Prober, self).__init__() - self._last_char_class = None - self._freq_counter = None + def __init__(self) -> None: + super().__init__() + self._last_char_class = OTH + self._freq_counter: List[int] = [] self.reset() - def reset(self): + def reset(self) -> None: self._last_char_class = OTH self._freq_counter = [0] * FREQ_CAT_NUM - CharSetProber.reset(self) + super().reset() @property - def charset_name(self): + def charset_name(self) -> str: return "ISO-8859-1" @property - def language(self): + def language(self) -> str: return "" - def feed(self, byte_str): - byte_str = self.filter_with_english_letters(byte_str) + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + byte_str = self.remove_xml_tags(byte_str) for c in byte_str: char_class = Latin1_CharToClass[c] - freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) - + char_class] + freq = Latin1ClassModel[(self._last_char_class * CLASS_NUM) + char_class] if freq == 0: self._state = ProbingState.NOT_ME break @@ -127,19 +130,18 @@ def feed(self, byte_str): return self.state - def get_confidence(self): + def get_confidence(self) -> float: if self.state == ProbingState.NOT_ME: return 0.01 total = sum(self._freq_counter) - if total < 0.01: - confidence = 0.0 - else: - confidence = ((self._freq_counter[3] - self._freq_counter[1] * 20.0) - / total) - if confidence < 0.0: - confidence = 0.0 + confidence = ( + 0.0 + if total < 0.01 + else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total + ) + confidence = max(confidence, 0.0) # lower the confidence of latin1 so that other more accurate # detector can take priority. - confidence = confidence * 0.73 + confidence *= 0.73 return confidence diff --git a/src/pip/_vendor/chardet/macromanprober.py b/src/pip/_vendor/chardet/macromanprober.py new file mode 100644 index 00000000000..1425d10ecaa --- /dev/null +++ b/src/pip/_vendor/chardet/macromanprober.py @@ -0,0 +1,162 @@ +######################## BEGIN LICENSE BLOCK ######################## +# This code was modified from latin1prober.py by Rob Speer . +# The Original Code is Mozilla Universal charset detector code. +# +# The Initial Developer of the Original Code is +# Netscape Communications Corporation. +# Portions created by the Initial Developer are Copyright (C) 2001 +# the Initial Developer. All Rights Reserved. +# +# Contributor(s): +# Rob Speer - adapt to MacRoman encoding +# Mark Pilgrim - port to Python +# Shy Shalom - original C code +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### + +from typing import List, Union + +from .charsetprober import CharSetProber +from .enums import ProbingState + +FREQ_CAT_NUM = 4 + +UDF = 0 # undefined +OTH = 1 # other +ASC = 2 # ascii capital letter +ASS = 3 # ascii small letter +ACV = 4 # accent capital vowel +ACO = 5 # accent capital other +ASV = 6 # accent small vowel +ASO = 7 # accent small other +ODD = 8 # character that is unlikely to appear +CLASS_NUM = 9 # total classes + +# The change from Latin1 is that we explicitly look for extended characters +# that are infrequently-occurring symbols, and consider them to always be +# improbable. This should let MacRoman get out of the way of more likely +# encodings in most situations. + +# fmt: off +MacRoman_CharToClass = ( + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 00 - 07 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 08 - 0F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 10 - 17 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 18 - 1F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 20 - 27 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 28 - 2F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 30 - 37 + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # 38 - 3F + OTH, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 40 - 47 + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 48 - 4F + ASC, ASC, ASC, ASC, ASC, ASC, ASC, ASC, # 50 - 57 + ASC, ASC, ASC, OTH, OTH, OTH, OTH, OTH, # 58 - 5F + OTH, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 60 - 67 + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 68 - 6F + ASS, ASS, ASS, ASS, ASS, ASS, ASS, ASS, # 70 - 77 + ASS, ASS, ASS, OTH, OTH, OTH, OTH, OTH, # 78 - 7F + ACV, ACV, ACO, ACV, ACO, ACV, ACV, ASV, # 80 - 87 + ASV, ASV, ASV, ASV, ASV, ASO, ASV, ASV, # 88 - 8F + ASV, ASV, ASV, ASV, ASV, ASV, ASO, ASV, # 90 - 97 + ASV, ASV, ASV, ASV, ASV, ASV, ASV, ASV, # 98 - 9F + OTH, OTH, OTH, OTH, OTH, OTH, OTH, ASO, # A0 - A7 + OTH, OTH, ODD, ODD, OTH, OTH, ACV, ACV, # A8 - AF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, OTH, # B0 - B7 + OTH, OTH, OTH, OTH, OTH, OTH, ASV, ASV, # B8 - BF + OTH, OTH, ODD, OTH, ODD, OTH, OTH, OTH, # C0 - C7 + OTH, OTH, OTH, ACV, ACV, ACV, ACV, ASV, # C8 - CF + OTH, OTH, OTH, OTH, OTH, OTH, OTH, ODD, # D0 - D7 + ASV, ACV, ODD, OTH, OTH, OTH, OTH, OTH, # D8 - DF + OTH, OTH, OTH, OTH, OTH, ACV, ACV, ACV, # E0 - E7 + ACV, ACV, ACV, ACV, ACV, ACV, ACV, ACV, # E8 - EF + ODD, ACV, ACV, ACV, ACV, ASV, ODD, ODD, # F0 - F7 + ODD, ODD, ODD, ODD, ODD, ODD, ODD, ODD, # F8 - FF +) + +# 0 : illegal +# 1 : very unlikely +# 2 : normal +# 3 : very likely +MacRomanClassModel = ( +# UDF OTH ASC ASS ACV ACO ASV ASO ODD + 0, 0, 0, 0, 0, 0, 0, 0, 0, # UDF + 0, 3, 3, 3, 3, 3, 3, 3, 1, # OTH + 0, 3, 3, 3, 3, 3, 3, 3, 1, # ASC + 0, 3, 3, 3, 1, 1, 3, 3, 1, # ASS + 0, 3, 3, 3, 1, 2, 1, 2, 1, # ACV + 0, 3, 3, 3, 3, 3, 3, 3, 1, # ACO + 0, 3, 1, 3, 1, 1, 1, 3, 1, # ASV + 0, 3, 1, 3, 1, 1, 3, 3, 1, # ASO + 0, 1, 1, 1, 1, 1, 1, 1, 1, # ODD +) +# fmt: on + + +class MacRomanProber(CharSetProber): + def __init__(self) -> None: + super().__init__() + self._last_char_class = OTH + self._freq_counter: List[int] = [] + self.reset() + + def reset(self) -> None: + self._last_char_class = OTH + self._freq_counter = [0] * FREQ_CAT_NUM + + # express the prior that MacRoman is a somewhat rare encoding; + # this can be done by starting out in a slightly improbable state + # that must be overcome + self._freq_counter[2] = 10 + + super().reset() + + @property + def charset_name(self) -> str: + return "MacRoman" + + @property + def language(self) -> str: + return "" + + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + byte_str = self.remove_xml_tags(byte_str) + for c in byte_str: + char_class = MacRoman_CharToClass[c] + freq = MacRomanClassModel[(self._last_char_class * CLASS_NUM) + char_class] + if freq == 0: + self._state = ProbingState.NOT_ME + break + self._freq_counter[freq] += 1 + self._last_char_class = char_class + + return self.state + + def get_confidence(self) -> float: + if self.state == ProbingState.NOT_ME: + return 0.01 + + total = sum(self._freq_counter) + confidence = ( + 0.0 + if total < 0.01 + else (self._freq_counter[3] - self._freq_counter[1] * 20.0) / total + ) + confidence = max(confidence, 0.0) + # lower the confidence of MacRoman so that other more accurate + # detector can take priority. + confidence *= 0.73 + return confidence diff --git a/src/pip/_vendor/chardet/mbcharsetprober.py b/src/pip/_vendor/chardet/mbcharsetprober.py index 6256ecfd1e2..666307e8fe0 100644 --- a/src/pip/_vendor/chardet/mbcharsetprober.py +++ b/src/pip/_vendor/chardet/mbcharsetprober.py @@ -27,8 +27,12 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Optional, Union + +from .chardistribution import CharDistributionAnalysis from .charsetprober import CharSetProber -from .enums import ProbingState, MachineState +from .codingstatemachine import CodingStateMachine +from .enums import LanguageFilter, MachineState, ProbingState class MultiByteCharSetProber(CharSetProber): @@ -36,56 +40,56 @@ class MultiByteCharSetProber(CharSetProber): MultiByteCharSetProber """ - def __init__(self, lang_filter=None): - super(MultiByteCharSetProber, self).__init__(lang_filter=lang_filter) - self.distribution_analyzer = None - self.coding_sm = None - self._last_char = [0, 0] + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + super().__init__(lang_filter=lang_filter) + self.distribution_analyzer: Optional[CharDistributionAnalysis] = None + self.coding_sm: Optional[CodingStateMachine] = None + self._last_char = bytearray(b"\0\0") - def reset(self): - super(MultiByteCharSetProber, self).reset() + def reset(self) -> None: + super().reset() if self.coding_sm: self.coding_sm.reset() if self.distribution_analyzer: self.distribution_analyzer.reset() - self._last_char = [0, 0] - - @property - def charset_name(self): - raise NotImplementedError + self._last_char = bytearray(b"\0\0") - @property - def language(self): - raise NotImplementedError + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + assert self.coding_sm is not None + assert self.distribution_analyzer is not None - def feed(self, byte_str): - for i in range(len(byte_str)): - coding_state = self.coding_sm.next_state(byte_str[i]) + for i, byte in enumerate(byte_str): + coding_state = self.coding_sm.next_state(byte) if coding_state == MachineState.ERROR: - self.logger.debug('%s %s prober hit error at byte %s', - self.charset_name, self.language, i) + self.logger.debug( + "%s %s prober hit error at byte %s", + self.charset_name, + self.language, + i, + ) self._state = ProbingState.NOT_ME break - elif coding_state == MachineState.ITS_ME: + if coding_state == MachineState.ITS_ME: self._state = ProbingState.FOUND_IT break - elif coding_state == MachineState.START: + if coding_state == MachineState.START: char_len = self.coding_sm.get_current_charlen() if i == 0: - self._last_char[1] = byte_str[0] + self._last_char[1] = byte self.distribution_analyzer.feed(self._last_char, char_len) else: - self.distribution_analyzer.feed(byte_str[i - 1:i + 1], - char_len) + self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len) self._last_char[0] = byte_str[-1] if self.state == ProbingState.DETECTING: - if (self.distribution_analyzer.got_enough_data() and - (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + if self.distribution_analyzer.got_enough_data() and ( + self.get_confidence() > self.SHORTCUT_THRESHOLD + ): self._state = ProbingState.FOUND_IT return self.state - def get_confidence(self): + def get_confidence(self) -> float: + assert self.distribution_analyzer is not None return self.distribution_analyzer.get_confidence() diff --git a/src/pip/_vendor/chardet/mbcsgroupprober.py b/src/pip/_vendor/chardet/mbcsgroupprober.py index 530abe75e0c..6cb9cc7b3bc 100644 --- a/src/pip/_vendor/chardet/mbcsgroupprober.py +++ b/src/pip/_vendor/chardet/mbcsgroupprober.py @@ -27,20 +27,22 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from .big5prober import Big5Prober from .charsetgroupprober import CharSetGroupProber -from .utf8prober import UTF8Prober -from .sjisprober import SJISProber +from .cp949prober import CP949Prober +from .enums import LanguageFilter from .eucjpprober import EUCJPProber -from .gb2312prober import GB2312Prober from .euckrprober import EUCKRProber -from .cp949prober import CP949Prober -from .big5prober import Big5Prober from .euctwprober import EUCTWProber +from .gb2312prober import GB2312Prober +from .johabprober import JOHABProber +from .sjisprober import SJISProber +from .utf8prober import UTF8Prober class MBCSGroupProber(CharSetGroupProber): - def __init__(self, lang_filter=None): - super(MBCSGroupProber, self).__init__(lang_filter=lang_filter) + def __init__(self, lang_filter: LanguageFilter = LanguageFilter.NONE) -> None: + super().__init__(lang_filter=lang_filter) self.probers = [ UTF8Prober(), SJISProber(), @@ -49,6 +51,7 @@ def __init__(self, lang_filter=None): EUCKRProber(), CP949Prober(), Big5Prober(), - EUCTWProber() + EUCTWProber(), + JOHABProber(), ] self.reset() diff --git a/src/pip/_vendor/chardet/mbcssm.py b/src/pip/_vendor/chardet/mbcssm.py index 8360d0f284e..7bbe97e6665 100644 --- a/src/pip/_vendor/chardet/mbcssm.py +++ b/src/pip/_vendor/chardet/mbcssm.py @@ -25,43 +25,45 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from .codingstatemachinedict import CodingStateMachineDict from .enums import MachineState # BIG5 +# fmt: off BIG5_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as legal value - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,1, # 78 - 7f - 4,4,4,4,4,4,4,4, # 80 - 87 - 4,4,4,4,4,4,4,4, # 88 - 8f - 4,4,4,4,4,4,4,4, # 90 - 97 - 4,4,4,4,4,4,4,4, # 98 - 9f - 4,3,3,3,3,3,3,3, # a0 - a7 - 3,3,3,3,3,3,3,3, # a8 - af - 3,3,3,3,3,3,3,3, # b0 - b7 - 3,3,3,3,3,3,3,3, # b8 - bf - 3,3,3,3,3,3,3,3, # c0 - c7 - 3,3,3,3,3,3,3,3, # c8 - cf - 3,3,3,3,3,3,3,3, # d0 - d7 - 3,3,3,3,3,3,3,3, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,3,3,3, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,3,3,0 # f8 - ff + 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as legal value + 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17 + 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27 + 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f + 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37 + 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f + 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47 + 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f + 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57 + 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f + 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67 + 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f + 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77 + 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f + 4, 4, 4, 4, 4, 4, 4, 4, # 80 - 87 + 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f + 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97 + 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f + 4, 3, 3, 3, 3, 3, 3, 3, # a0 - a7 + 3, 3, 3, 3, 3, 3, 3, 3, # a8 - af + 3, 3, 3, 3, 3, 3, 3, 3, # b0 - b7 + 3, 3, 3, 3, 3, 3, 3, 3, # b8 - bf + 3, 3, 3, 3, 3, 3, 3, 3, # c0 - c7 + 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf + 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7 + 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df + 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7 + 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef + 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7 + 3, 3, 3, 3, 3, 3, 3, 0 # f8 - ff ) BIG5_ST = ( @@ -69,34 +71,37 @@ MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,#08-0f MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START#10-17 ) +# fmt: on BIG5_CHAR_LEN_TABLE = (0, 1, 1, 2, 0) -BIG5_SM_MODEL = {'class_table': BIG5_CLS, - 'class_factor': 5, - 'state_table': BIG5_ST, - 'char_len_table': BIG5_CHAR_LEN_TABLE, - 'name': 'Big5'} +BIG5_SM_MODEL: CodingStateMachineDict = { + "class_table": BIG5_CLS, + "class_factor": 5, + "state_table": BIG5_ST, + "char_len_table": BIG5_CHAR_LEN_TABLE, + "name": "Big5", +} # CP949 - +# fmt: off CP949_CLS = ( - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0,0, # 00 - 0f - 1,1,1,1,1,1,1,1, 1,1,1,0,1,1,1,1, # 10 - 1f - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 20 - 2f - 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, # 30 - 3f - 1,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4, # 40 - 4f - 4,4,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 50 - 5f - 1,5,5,5,5,5,5,5, 5,5,5,5,5,5,5,5, # 60 - 6f - 5,5,5,5,5,5,5,5, 5,5,5,1,1,1,1,1, # 70 - 7f - 0,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 80 - 8f - 6,6,6,6,6,6,6,6, 6,6,6,6,6,6,6,6, # 90 - 9f - 6,7,7,7,7,7,7,7, 7,7,7,7,7,8,8,8, # a0 - af - 7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7, # b0 - bf - 7,7,7,7,7,7,9,2, 2,3,2,2,2,2,2,2, # c0 - cf - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # d0 - df - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, # e0 - ef - 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,0, # f0 - ff + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, # 00 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, # 10 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 2f + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 3f + 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 4f + 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, # 50 - 5f + 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, # 60 - 6f + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, 1, # 70 - 7f + 0, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, # 80 - 8f + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 9f + 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, # a0 - af + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, # b0 - bf + 7, 7, 7, 7, 7, 7, 9, 2, 2, 3, 2, 2, 2, 2, 2, 2, # c0 - cf + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # d0 - df + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, # e0 - ef + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, # f0 - ff ) CP949_ST = ( @@ -109,50 +114,53 @@ MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START, # 5 MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START, # 6 ) +# fmt: on CP949_CHAR_LEN_TABLE = (0, 1, 2, 0, 1, 1, 2, 2, 0, 2) -CP949_SM_MODEL = {'class_table': CP949_CLS, - 'class_factor': 10, - 'state_table': CP949_ST, - 'char_len_table': CP949_CHAR_LEN_TABLE, - 'name': 'CP949'} +CP949_SM_MODEL: CodingStateMachineDict = { + "class_table": CP949_CLS, + "class_factor": 10, + "state_table": CP949_ST, + "char_len_table": CP949_CHAR_LEN_TABLE, + "name": "CP949", +} # EUC-JP - +# fmt: off EUCJP_CLS = ( - 4,4,4,4,4,4,4,4, # 00 - 07 - 4,4,4,4,4,4,5,5, # 08 - 0f - 4,4,4,4,4,4,4,4, # 10 - 17 - 4,4,4,5,4,4,4,4, # 18 - 1f - 4,4,4,4,4,4,4,4, # 20 - 27 - 4,4,4,4,4,4,4,4, # 28 - 2f - 4,4,4,4,4,4,4,4, # 30 - 37 - 4,4,4,4,4,4,4,4, # 38 - 3f - 4,4,4,4,4,4,4,4, # 40 - 47 - 4,4,4,4,4,4,4,4, # 48 - 4f - 4,4,4,4,4,4,4,4, # 50 - 57 - 4,4,4,4,4,4,4,4, # 58 - 5f - 4,4,4,4,4,4,4,4, # 60 - 67 - 4,4,4,4,4,4,4,4, # 68 - 6f - 4,4,4,4,4,4,4,4, # 70 - 77 - 4,4,4,4,4,4,4,4, # 78 - 7f - 5,5,5,5,5,5,5,5, # 80 - 87 - 5,5,5,5,5,5,1,3, # 88 - 8f - 5,5,5,5,5,5,5,5, # 90 - 97 - 5,5,5,5,5,5,5,5, # 98 - 9f - 5,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,2,2,2, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,2,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,0,5 # f8 - ff + 4, 4, 4, 4, 4, 4, 4, 4, # 00 - 07 + 4, 4, 4, 4, 4, 4, 5, 5, # 08 - 0f + 4, 4, 4, 4, 4, 4, 4, 4, # 10 - 17 + 4, 4, 4, 5, 4, 4, 4, 4, # 18 - 1f + 4, 4, 4, 4, 4, 4, 4, 4, # 20 - 27 + 4, 4, 4, 4, 4, 4, 4, 4, # 28 - 2f + 4, 4, 4, 4, 4, 4, 4, 4, # 30 - 37 + 4, 4, 4, 4, 4, 4, 4, 4, # 38 - 3f + 4, 4, 4, 4, 4, 4, 4, 4, # 40 - 47 + 4, 4, 4, 4, 4, 4, 4, 4, # 48 - 4f + 4, 4, 4, 4, 4, 4, 4, 4, # 50 - 57 + 4, 4, 4, 4, 4, 4, 4, 4, # 58 - 5f + 4, 4, 4, 4, 4, 4, 4, 4, # 60 - 67 + 4, 4, 4, 4, 4, 4, 4, 4, # 68 - 6f + 4, 4, 4, 4, 4, 4, 4, 4, # 70 - 77 + 4, 4, 4, 4, 4, 4, 4, 4, # 78 - 7f + 5, 5, 5, 5, 5, 5, 5, 5, # 80 - 87 + 5, 5, 5, 5, 5, 5, 1, 3, # 88 - 8f + 5, 5, 5, 5, 5, 5, 5, 5, # 90 - 97 + 5, 5, 5, 5, 5, 5, 5, 5, # 98 - 9f + 5, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7 + 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef + 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7 + 0, 0, 0, 0, 0, 0, 0, 5 # f8 - ff ) EUCJP_ST = ( @@ -162,100 +170,163 @@ MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 3,MachineState.ERROR,#18-1f 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START#20-27 ) +# fmt: on EUCJP_CHAR_LEN_TABLE = (2, 2, 2, 3, 1, 0) -EUCJP_SM_MODEL = {'class_table': EUCJP_CLS, - 'class_factor': 6, - 'state_table': EUCJP_ST, - 'char_len_table': EUCJP_CHAR_LEN_TABLE, - 'name': 'EUC-JP'} +EUCJP_SM_MODEL: CodingStateMachineDict = { + "class_table": EUCJP_CLS, + "class_factor": 6, + "state_table": EUCJP_ST, + "char_len_table": EUCJP_CHAR_LEN_TABLE, + "name": "EUC-JP", +} # EUC-KR - +# fmt: off EUCKR_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 1,1,1,1,1,1,1,1, # 40 - 47 + 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 + 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17 + 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27 + 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f + 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37 + 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f + 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47 + 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f + 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57 + 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f + 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67 + 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f + 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77 + 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f + 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87 + 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f + 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97 + 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f + 0, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 3, 3, 3, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 3, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7 + 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef + 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7 + 2, 2, 2, 2, 2, 2, 2, 0 # f8 - ff +) + +EUCKR_ST = ( + MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f +) +# fmt: on + +EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0) + +EUCKR_SM_MODEL: CodingStateMachineDict = { + "class_table": EUCKR_CLS, + "class_factor": 4, + "state_table": EUCKR_ST, + "char_len_table": EUCKR_CHAR_LEN_TABLE, + "name": "EUC-KR", +} + +# JOHAB +# fmt: off +JOHAB_CLS = ( + 4,4,4,4,4,4,4,4, # 00 - 07 + 4,4,4,4,4,4,0,0, # 08 - 0f + 4,4,4,4,4,4,4,4, # 10 - 17 + 4,4,4,0,4,4,4,4, # 18 - 1f + 4,4,4,4,4,4,4,4, # 20 - 27 + 4,4,4,4,4,4,4,4, # 28 - 2f + 4,3,3,3,3,3,3,3, # 30 - 37 + 3,3,3,3,3,3,3,3, # 38 - 3f + 3,1,1,1,1,1,1,1, # 40 - 47 1,1,1,1,1,1,1,1, # 48 - 4f 1,1,1,1,1,1,1,1, # 50 - 57 1,1,1,1,1,1,1,1, # 58 - 5f 1,1,1,1,1,1,1,1, # 60 - 67 1,1,1,1,1,1,1,1, # 68 - 6f 1,1,1,1,1,1,1,1, # 70 - 77 - 1,1,1,1,1,1,1,1, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,3,3,3, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,3,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 2,2,2,2,2,2,2,2, # e0 - e7 - 2,2,2,2,2,2,2,2, # e8 - ef - 2,2,2,2,2,2,2,2, # f0 - f7 - 2,2,2,2,2,2,2,0 # f8 - ff + 1,1,1,1,1,1,1,2, # 78 - 7f + 6,6,6,6,8,8,8,8, # 80 - 87 + 8,8,8,8,8,8,8,8, # 88 - 8f + 8,7,7,7,7,7,7,7, # 90 - 97 + 7,7,7,7,7,7,7,7, # 98 - 9f + 7,7,7,7,7,7,7,7, # a0 - a7 + 7,7,7,7,7,7,7,7, # a8 - af + 7,7,7,7,7,7,7,7, # b0 - b7 + 7,7,7,7,7,7,7,7, # b8 - bf + 7,7,7,7,7,7,7,7, # c0 - c7 + 7,7,7,7,7,7,7,7, # c8 - cf + 7,7,7,7,5,5,5,5, # d0 - d7 + 5,9,9,9,9,9,9,5, # d8 - df + 9,9,9,9,9,9,9,9, # e0 - e7 + 9,9,9,9,9,9,9,9, # e8 - ef + 9,9,9,9,9,9,9,9, # f0 - f7 + 9,9,5,5,5,5,5,0 # f8 - ff ) -EUCKR_ST = ( - MachineState.ERROR,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 - MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #08-0f +JOHAB_ST = ( +# cls = 0 1 2 3 4 5 6 7 8 9 + MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.ERROR ,MachineState.ERROR ,3 ,3 ,4 , # MachineState.START + MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME, # MachineState.ITS_ME + MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR ,MachineState.ERROR , # MachineState.ERROR + MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.ERROR ,MachineState.ERROR ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START ,MachineState.START , # 3 + MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START ,MachineState.ERROR ,MachineState.START , # 4 ) +# fmt: on -EUCKR_CHAR_LEN_TABLE = (0, 1, 2, 0) +JOHAB_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 0, 0, 2, 2, 2) -EUCKR_SM_MODEL = {'class_table': EUCKR_CLS, - 'class_factor': 4, - 'state_table': EUCKR_ST, - 'char_len_table': EUCKR_CHAR_LEN_TABLE, - 'name': 'EUC-KR'} +JOHAB_SM_MODEL: CodingStateMachineDict = { + "class_table": JOHAB_CLS, + "class_factor": 10, + "state_table": JOHAB_ST, + "char_len_table": JOHAB_CHAR_LEN_TABLE, + "name": "Johab", +} # EUC-TW - +# fmt: off EUCTW_CLS = ( - 2,2,2,2,2,2,2,2, # 00 - 07 - 2,2,2,2,2,2,0,0, # 08 - 0f - 2,2,2,2,2,2,2,2, # 10 - 17 - 2,2,2,0,2,2,2,2, # 18 - 1f - 2,2,2,2,2,2,2,2, # 20 - 27 - 2,2,2,2,2,2,2,2, # 28 - 2f - 2,2,2,2,2,2,2,2, # 30 - 37 - 2,2,2,2,2,2,2,2, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,2, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,6,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,3,4,4,4,4,4,4, # a0 - a7 - 5,5,1,1,1,1,1,1, # a8 - af - 1,1,1,1,1,1,1,1, # b0 - b7 - 1,1,1,1,1,1,1,1, # b8 - bf - 1,1,3,1,3,3,3,3, # c0 - c7 - 3,3,3,3,3,3,3,3, # c8 - cf - 3,3,3,3,3,3,3,3, # d0 - d7 - 3,3,3,3,3,3,3,3, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,3,3,3, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,3,3,0 # f8 - ff + 2, 2, 2, 2, 2, 2, 2, 2, # 00 - 07 + 2, 2, 2, 2, 2, 2, 0, 0, # 08 - 0f + 2, 2, 2, 2, 2, 2, 2, 2, # 10 - 17 + 2, 2, 2, 0, 2, 2, 2, 2, # 18 - 1f + 2, 2, 2, 2, 2, 2, 2, 2, # 20 - 27 + 2, 2, 2, 2, 2, 2, 2, 2, # 28 - 2f + 2, 2, 2, 2, 2, 2, 2, 2, # 30 - 37 + 2, 2, 2, 2, 2, 2, 2, 2, # 38 - 3f + 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47 + 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f + 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57 + 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f + 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67 + 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f + 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77 + 2, 2, 2, 2, 2, 2, 2, 2, # 78 - 7f + 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87 + 0, 0, 0, 0, 0, 0, 6, 0, # 88 - 8f + 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97 + 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f + 0, 3, 4, 4, 4, 4, 4, 4, # a0 - a7 + 5, 5, 1, 1, 1, 1, 1, 1, # a8 - af + 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7 + 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf + 1, 1, 3, 1, 3, 3, 3, 3, # c0 - c7 + 3, 3, 3, 3, 3, 3, 3, 3, # c8 - cf + 3, 3, 3, 3, 3, 3, 3, 3, # d0 - d7 + 3, 3, 3, 3, 3, 3, 3, 3, # d8 - df + 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7 + 3, 3, 3, 3, 3, 3, 3, 3, # e8 - ef + 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7 + 3, 3, 3, 3, 3, 3, 3, 0 # f8 - ff ) EUCTW_ST = ( @@ -266,50 +337,53 @@ 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,#20-27 MachineState.START,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f ) +# fmt: on EUCTW_CHAR_LEN_TABLE = (0, 0, 1, 2, 2, 2, 3) -EUCTW_SM_MODEL = {'class_table': EUCTW_CLS, - 'class_factor': 7, - 'state_table': EUCTW_ST, - 'char_len_table': EUCTW_CHAR_LEN_TABLE, - 'name': 'x-euc-tw'} +EUCTW_SM_MODEL: CodingStateMachineDict = { + "class_table": EUCTW_CLS, + "class_factor": 7, + "state_table": EUCTW_ST, + "char_len_table": EUCTW_CHAR_LEN_TABLE, + "name": "x-euc-tw", +} # GB2312 - +# fmt: off GB2312_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 3,3,3,3,3,3,3,3, # 30 - 37 - 3,3,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,4, # 78 - 7f - 5,6,6,6,6,6,6,6, # 80 - 87 - 6,6,6,6,6,6,6,6, # 88 - 8f - 6,6,6,6,6,6,6,6, # 90 - 97 - 6,6,6,6,6,6,6,6, # 98 - 9f - 6,6,6,6,6,6,6,6, # a0 - a7 - 6,6,6,6,6,6,6,6, # a8 - af - 6,6,6,6,6,6,6,6, # b0 - b7 - 6,6,6,6,6,6,6,6, # b8 - bf - 6,6,6,6,6,6,6,6, # c0 - c7 - 6,6,6,6,6,6,6,6, # c8 - cf - 6,6,6,6,6,6,6,6, # d0 - d7 - 6,6,6,6,6,6,6,6, # d8 - df - 6,6,6,6,6,6,6,6, # e0 - e7 - 6,6,6,6,6,6,6,6, # e8 - ef - 6,6,6,6,6,6,6,6, # f0 - f7 - 6,6,6,6,6,6,6,0 # f8 - ff + 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 + 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17 + 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27 + 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f + 3, 3, 3, 3, 3, 3, 3, 3, # 30 - 37 + 3, 3, 1, 1, 1, 1, 1, 1, # 38 - 3f + 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47 + 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f + 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57 + 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f + 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67 + 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f + 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77 + 2, 2, 2, 2, 2, 2, 2, 4, # 78 - 7f + 5, 6, 6, 6, 6, 6, 6, 6, # 80 - 87 + 6, 6, 6, 6, 6, 6, 6, 6, # 88 - 8f + 6, 6, 6, 6, 6, 6, 6, 6, # 90 - 97 + 6, 6, 6, 6, 6, 6, 6, 6, # 98 - 9f + 6, 6, 6, 6, 6, 6, 6, 6, # a0 - a7 + 6, 6, 6, 6, 6, 6, 6, 6, # a8 - af + 6, 6, 6, 6, 6, 6, 6, 6, # b0 - b7 + 6, 6, 6, 6, 6, 6, 6, 6, # b8 - bf + 6, 6, 6, 6, 6, 6, 6, 6, # c0 - c7 + 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf + 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7 + 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df + 6, 6, 6, 6, 6, 6, 6, 6, # e0 - e7 + 6, 6, 6, 6, 6, 6, 6, 6, # e8 - ef + 6, 6, 6, 6, 6, 6, 6, 6, # f0 - f7 + 6, 6, 6, 6, 6, 6, 6, 0 # f8 - ff ) GB2312_ST = ( @@ -320,6 +394,7 @@ MachineState.ERROR,MachineState.ERROR, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ERROR,#20-27 MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.START #28-2f ) +# fmt: on # To be accurate, the length of class 6 can be either 2 or 4. # But it is not necessary to discriminate between the two since @@ -328,100 +403,105 @@ # 2 here. GB2312_CHAR_LEN_TABLE = (0, 1, 1, 1, 1, 1, 2) -GB2312_SM_MODEL = {'class_table': GB2312_CLS, - 'class_factor': 7, - 'state_table': GB2312_ST, - 'char_len_table': GB2312_CHAR_LEN_TABLE, - 'name': 'GB2312'} +GB2312_SM_MODEL: CodingStateMachineDict = { + "class_table": GB2312_CLS, + "class_factor": 7, + "state_table": GB2312_ST, + "char_len_table": GB2312_CHAR_LEN_TABLE, + "name": "GB2312", +} # Shift_JIS - +# fmt: off SJIS_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 2,2,2,2,2,2,2,2, # 40 - 47 - 2,2,2,2,2,2,2,2, # 48 - 4f - 2,2,2,2,2,2,2,2, # 50 - 57 - 2,2,2,2,2,2,2,2, # 58 - 5f - 2,2,2,2,2,2,2,2, # 60 - 67 - 2,2,2,2,2,2,2,2, # 68 - 6f - 2,2,2,2,2,2,2,2, # 70 - 77 - 2,2,2,2,2,2,2,1, # 78 - 7f - 3,3,3,3,3,2,2,3, # 80 - 87 - 3,3,3,3,3,3,3,3, # 88 - 8f - 3,3,3,3,3,3,3,3, # 90 - 97 - 3,3,3,3,3,3,3,3, # 98 - 9f + 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 + 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17 + 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27 + 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f + 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37 + 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f + 2, 2, 2, 2, 2, 2, 2, 2, # 40 - 47 + 2, 2, 2, 2, 2, 2, 2, 2, # 48 - 4f + 2, 2, 2, 2, 2, 2, 2, 2, # 50 - 57 + 2, 2, 2, 2, 2, 2, 2, 2, # 58 - 5f + 2, 2, 2, 2, 2, 2, 2, 2, # 60 - 67 + 2, 2, 2, 2, 2, 2, 2, 2, # 68 - 6f + 2, 2, 2, 2, 2, 2, 2, 2, # 70 - 77 + 2, 2, 2, 2, 2, 2, 2, 1, # 78 - 7f + 3, 3, 3, 3, 3, 2, 2, 3, # 80 - 87 + 3, 3, 3, 3, 3, 3, 3, 3, # 88 - 8f + 3, 3, 3, 3, 3, 3, 3, 3, # 90 - 97 + 3, 3, 3, 3, 3, 3, 3, 3, # 98 - 9f #0xa0 is illegal in sjis encoding, but some pages does #contain such byte. We need to be more error forgiven. - 2,2,2,2,2,2,2,2, # a0 - a7 - 2,2,2,2,2,2,2,2, # a8 - af - 2,2,2,2,2,2,2,2, # b0 - b7 - 2,2,2,2,2,2,2,2, # b8 - bf - 2,2,2,2,2,2,2,2, # c0 - c7 - 2,2,2,2,2,2,2,2, # c8 - cf - 2,2,2,2,2,2,2,2, # d0 - d7 - 2,2,2,2,2,2,2,2, # d8 - df - 3,3,3,3,3,3,3,3, # e0 - e7 - 3,3,3,3,3,4,4,4, # e8 - ef - 3,3,3,3,3,3,3,3, # f0 - f7 - 3,3,3,3,3,0,0,0) # f8 - ff - + 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7 + 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af + 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7 + 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf + 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7 + 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf + 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7 + 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df + 3, 3, 3, 3, 3, 3, 3, 3, # e0 - e7 + 3, 3, 3, 3, 3, 4, 4, 4, # e8 - ef + 3, 3, 3, 3, 3, 3, 3, 3, # f0 - f7 + 3, 3, 3, 3, 3, 0, 0, 0, # f8 - ff +) SJIS_ST = ( MachineState.ERROR,MachineState.START,MachineState.START, 3,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,#00-07 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ITS_ME,#08-0f MachineState.ITS_ME,MachineState.ITS_ME,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START #10-17 ) +# fmt: on SJIS_CHAR_LEN_TABLE = (0, 1, 1, 2, 0, 0) -SJIS_SM_MODEL = {'class_table': SJIS_CLS, - 'class_factor': 6, - 'state_table': SJIS_ST, - 'char_len_table': SJIS_CHAR_LEN_TABLE, - 'name': 'Shift_JIS'} +SJIS_SM_MODEL: CodingStateMachineDict = { + "class_table": SJIS_CLS, + "class_factor": 6, + "state_table": SJIS_ST, + "char_len_table": SJIS_CHAR_LEN_TABLE, + "name": "Shift_JIS", +} # UCS2-BE - +# fmt: off UCS2BE_CLS = ( - 0,0,0,0,0,0,0,0, # 00 - 07 - 0,0,1,0,0,2,0,0, # 08 - 0f - 0,0,0,0,0,0,0,0, # 10 - 17 - 0,0,0,3,0,0,0,0, # 18 - 1f - 0,0,0,0,0,0,0,0, # 20 - 27 - 0,3,3,3,3,3,0,0, # 28 - 2f - 0,0,0,0,0,0,0,0, # 30 - 37 - 0,0,0,0,0,0,0,0, # 38 - 3f - 0,0,0,0,0,0,0,0, # 40 - 47 - 0,0,0,0,0,0,0,0, # 48 - 4f - 0,0,0,0,0,0,0,0, # 50 - 57 - 0,0,0,0,0,0,0,0, # 58 - 5f - 0,0,0,0,0,0,0,0, # 60 - 67 - 0,0,0,0,0,0,0,0, # 68 - 6f - 0,0,0,0,0,0,0,0, # 70 - 77 - 0,0,0,0,0,0,0,0, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,0,0,0,0,0,0,0, # a0 - a7 - 0,0,0,0,0,0,0,0, # a8 - af - 0,0,0,0,0,0,0,0, # b0 - b7 - 0,0,0,0,0,0,0,0, # b8 - bf - 0,0,0,0,0,0,0,0, # c0 - c7 - 0,0,0,0,0,0,0,0, # c8 - cf - 0,0,0,0,0,0,0,0, # d0 - d7 - 0,0,0,0,0,0,0,0, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,4,5 # f8 - ff + 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27 + 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47 + 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f + 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87 + 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f + 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97 + 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f + 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7 + 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af + 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7 + 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf + 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7 + 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf + 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7 + 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df + 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7 + 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef + 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7 + 0, 0, 0, 0, 0, 0, 4, 5 # f8 - ff ) UCS2BE_ST = ( @@ -433,50 +513,53 @@ 5, 8, 6, 6,MachineState.ERROR, 6, 6, 6,#28-2f 6, 6, 6, 6,MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START #30-37 ) +# fmt: on UCS2BE_CHAR_LEN_TABLE = (2, 2, 2, 0, 2, 2) -UCS2BE_SM_MODEL = {'class_table': UCS2BE_CLS, - 'class_factor': 6, - 'state_table': UCS2BE_ST, - 'char_len_table': UCS2BE_CHAR_LEN_TABLE, - 'name': 'UTF-16BE'} +UCS2BE_SM_MODEL: CodingStateMachineDict = { + "class_table": UCS2BE_CLS, + "class_factor": 6, + "state_table": UCS2BE_ST, + "char_len_table": UCS2BE_CHAR_LEN_TABLE, + "name": "UTF-16BE", +} # UCS2-LE - +# fmt: off UCS2LE_CLS = ( - 0,0,0,0,0,0,0,0, # 00 - 07 - 0,0,1,0,0,2,0,0, # 08 - 0f - 0,0,0,0,0,0,0,0, # 10 - 17 - 0,0,0,3,0,0,0,0, # 18 - 1f - 0,0,0,0,0,0,0,0, # 20 - 27 - 0,3,3,3,3,3,0,0, # 28 - 2f - 0,0,0,0,0,0,0,0, # 30 - 37 - 0,0,0,0,0,0,0,0, # 38 - 3f - 0,0,0,0,0,0,0,0, # 40 - 47 - 0,0,0,0,0,0,0,0, # 48 - 4f - 0,0,0,0,0,0,0,0, # 50 - 57 - 0,0,0,0,0,0,0,0, # 58 - 5f - 0,0,0,0,0,0,0,0, # 60 - 67 - 0,0,0,0,0,0,0,0, # 68 - 6f - 0,0,0,0,0,0,0,0, # 70 - 77 - 0,0,0,0,0,0,0,0, # 78 - 7f - 0,0,0,0,0,0,0,0, # 80 - 87 - 0,0,0,0,0,0,0,0, # 88 - 8f - 0,0,0,0,0,0,0,0, # 90 - 97 - 0,0,0,0,0,0,0,0, # 98 - 9f - 0,0,0,0,0,0,0,0, # a0 - a7 - 0,0,0,0,0,0,0,0, # a8 - af - 0,0,0,0,0,0,0,0, # b0 - b7 - 0,0,0,0,0,0,0,0, # b8 - bf - 0,0,0,0,0,0,0,0, # c0 - c7 - 0,0,0,0,0,0,0,0, # c8 - cf - 0,0,0,0,0,0,0,0, # d0 - d7 - 0,0,0,0,0,0,0,0, # d8 - df - 0,0,0,0,0,0,0,0, # e0 - e7 - 0,0,0,0,0,0,0,0, # e8 - ef - 0,0,0,0,0,0,0,0, # f0 - f7 - 0,0,0,0,0,0,4,5 # f8 - ff + 0, 0, 0, 0, 0, 0, 0, 0, # 00 - 07 + 0, 0, 1, 0, 0, 2, 0, 0, # 08 - 0f + 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17 + 0, 0, 0, 3, 0, 0, 0, 0, # 18 - 1f + 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27 + 0, 3, 3, 3, 3, 3, 0, 0, # 28 - 2f + 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37 + 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f + 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47 + 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f + 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57 + 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f + 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67 + 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f + 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77 + 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f + 0, 0, 0, 0, 0, 0, 0, 0, # 80 - 87 + 0, 0, 0, 0, 0, 0, 0, 0, # 88 - 8f + 0, 0, 0, 0, 0, 0, 0, 0, # 90 - 97 + 0, 0, 0, 0, 0, 0, 0, 0, # 98 - 9f + 0, 0, 0, 0, 0, 0, 0, 0, # a0 - a7 + 0, 0, 0, 0, 0, 0, 0, 0, # a8 - af + 0, 0, 0, 0, 0, 0, 0, 0, # b0 - b7 + 0, 0, 0, 0, 0, 0, 0, 0, # b8 - bf + 0, 0, 0, 0, 0, 0, 0, 0, # c0 - c7 + 0, 0, 0, 0, 0, 0, 0, 0, # c8 - cf + 0, 0, 0, 0, 0, 0, 0, 0, # d0 - d7 + 0, 0, 0, 0, 0, 0, 0, 0, # d8 - df + 0, 0, 0, 0, 0, 0, 0, 0, # e0 - e7 + 0, 0, 0, 0, 0, 0, 0, 0, # e8 - ef + 0, 0, 0, 0, 0, 0, 0, 0, # f0 - f7 + 0, 0, 0, 0, 0, 0, 4, 5 # f8 - ff ) UCS2LE_ST = ( @@ -488,50 +571,53 @@ 5, 5, 5,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR, 5, 5,#28-2f 5, 5, 5,MachineState.ERROR, 5,MachineState.ERROR,MachineState.START,MachineState.START #30-37 ) +# fmt: on UCS2LE_CHAR_LEN_TABLE = (2, 2, 2, 2, 2, 2) -UCS2LE_SM_MODEL = {'class_table': UCS2LE_CLS, - 'class_factor': 6, - 'state_table': UCS2LE_ST, - 'char_len_table': UCS2LE_CHAR_LEN_TABLE, - 'name': 'UTF-16LE'} +UCS2LE_SM_MODEL: CodingStateMachineDict = { + "class_table": UCS2LE_CLS, + "class_factor": 6, + "state_table": UCS2LE_ST, + "char_len_table": UCS2LE_CHAR_LEN_TABLE, + "name": "UTF-16LE", +} # UTF-8 - +# fmt: off UTF8_CLS = ( - 1,1,1,1,1,1,1,1, # 00 - 07 #allow 0x00 as a legal value - 1,1,1,1,1,1,0,0, # 08 - 0f - 1,1,1,1,1,1,1,1, # 10 - 17 - 1,1,1,0,1,1,1,1, # 18 - 1f - 1,1,1,1,1,1,1,1, # 20 - 27 - 1,1,1,1,1,1,1,1, # 28 - 2f - 1,1,1,1,1,1,1,1, # 30 - 37 - 1,1,1,1,1,1,1,1, # 38 - 3f - 1,1,1,1,1,1,1,1, # 40 - 47 - 1,1,1,1,1,1,1,1, # 48 - 4f - 1,1,1,1,1,1,1,1, # 50 - 57 - 1,1,1,1,1,1,1,1, # 58 - 5f - 1,1,1,1,1,1,1,1, # 60 - 67 - 1,1,1,1,1,1,1,1, # 68 - 6f - 1,1,1,1,1,1,1,1, # 70 - 77 - 1,1,1,1,1,1,1,1, # 78 - 7f - 2,2,2,2,3,3,3,3, # 80 - 87 - 4,4,4,4,4,4,4,4, # 88 - 8f - 4,4,4,4,4,4,4,4, # 90 - 97 - 4,4,4,4,4,4,4,4, # 98 - 9f - 5,5,5,5,5,5,5,5, # a0 - a7 - 5,5,5,5,5,5,5,5, # a8 - af - 5,5,5,5,5,5,5,5, # b0 - b7 - 5,5,5,5,5,5,5,5, # b8 - bf - 0,0,6,6,6,6,6,6, # c0 - c7 - 6,6,6,6,6,6,6,6, # c8 - cf - 6,6,6,6,6,6,6,6, # d0 - d7 - 6,6,6,6,6,6,6,6, # d8 - df - 7,8,8,8,8,8,8,8, # e0 - e7 - 8,8,8,8,8,9,8,8, # e8 - ef - 10,11,11,11,11,11,11,11, # f0 - f7 - 12,13,13,13,14,15,0,0 # f8 - ff + 1, 1, 1, 1, 1, 1, 1, 1, # 00 - 07 #allow 0x00 as a legal value + 1, 1, 1, 1, 1, 1, 0, 0, # 08 - 0f + 1, 1, 1, 1, 1, 1, 1, 1, # 10 - 17 + 1, 1, 1, 0, 1, 1, 1, 1, # 18 - 1f + 1, 1, 1, 1, 1, 1, 1, 1, # 20 - 27 + 1, 1, 1, 1, 1, 1, 1, 1, # 28 - 2f + 1, 1, 1, 1, 1, 1, 1, 1, # 30 - 37 + 1, 1, 1, 1, 1, 1, 1, 1, # 38 - 3f + 1, 1, 1, 1, 1, 1, 1, 1, # 40 - 47 + 1, 1, 1, 1, 1, 1, 1, 1, # 48 - 4f + 1, 1, 1, 1, 1, 1, 1, 1, # 50 - 57 + 1, 1, 1, 1, 1, 1, 1, 1, # 58 - 5f + 1, 1, 1, 1, 1, 1, 1, 1, # 60 - 67 + 1, 1, 1, 1, 1, 1, 1, 1, # 68 - 6f + 1, 1, 1, 1, 1, 1, 1, 1, # 70 - 77 + 1, 1, 1, 1, 1, 1, 1, 1, # 78 - 7f + 2, 2, 2, 2, 3, 3, 3, 3, # 80 - 87 + 4, 4, 4, 4, 4, 4, 4, 4, # 88 - 8f + 4, 4, 4, 4, 4, 4, 4, 4, # 90 - 97 + 4, 4, 4, 4, 4, 4, 4, 4, # 98 - 9f + 5, 5, 5, 5, 5, 5, 5, 5, # a0 - a7 + 5, 5, 5, 5, 5, 5, 5, 5, # a8 - af + 5, 5, 5, 5, 5, 5, 5, 5, # b0 - b7 + 5, 5, 5, 5, 5, 5, 5, 5, # b8 - bf + 0, 0, 6, 6, 6, 6, 6, 6, # c0 - c7 + 6, 6, 6, 6, 6, 6, 6, 6, # c8 - cf + 6, 6, 6, 6, 6, 6, 6, 6, # d0 - d7 + 6, 6, 6, 6, 6, 6, 6, 6, # d8 - df + 7, 8, 8, 8, 8, 8, 8, 8, # e0 - e7 + 8, 8, 8, 8, 8, 9, 8, 8, # e8 - ef + 10, 11, 11, 11, 11, 11, 11, 11, # f0 - f7 + 12, 13, 13, 13, 14, 15, 0, 0 # f8 - ff ) UTF8_ST = ( @@ -562,11 +648,14 @@ MachineState.ERROR,MachineState.ERROR,MachineState.START,MachineState.START,MachineState.START,MachineState.START,MachineState.ERROR,MachineState.ERROR,#c0-c7 MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR,MachineState.ERROR #c8-cf ) +# fmt: on UTF8_CHAR_LEN_TABLE = (0, 1, 0, 0, 0, 0, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6) -UTF8_SM_MODEL = {'class_table': UTF8_CLS, - 'class_factor': 16, - 'state_table': UTF8_ST, - 'char_len_table': UTF8_CHAR_LEN_TABLE, - 'name': 'UTF-8'} +UTF8_SM_MODEL: CodingStateMachineDict = { + "class_table": UTF8_CLS, + "class_factor": 16, + "state_table": UTF8_ST, + "char_len_table": UTF8_CHAR_LEN_TABLE, + "name": "UTF-8", +} diff --git a/src/pip/_vendor/chardet/metadata/languages.py b/src/pip/_vendor/chardet/metadata/languages.py index 3237d5abf60..eb40c5f0c85 100644 --- a/src/pip/_vendor/chardet/metadata/languages.py +++ b/src/pip/_vendor/chardet/metadata/languages.py @@ -1,19 +1,17 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- """ Metadata about languages used by our model training code for our SingleByteCharSetProbers. Could be used for other things in the future. This code is based on the language metadata from the uchardet project. """ -from __future__ import absolute_import, print_function from string import ascii_letters +from typing import List, Optional +# TODO: Add Ukrainian (KOI8-U) -# TODO: Add Ukranian (KOI8-U) -class Language(object): +class Language: """Metadata about a language useful for training models :ivar name: The human name for the language, in English. @@ -33,9 +31,17 @@ class Language(object): Wikipedia for training data. :type wiki_start_pages: list of str """ - def __init__(self, name=None, iso_code=None, use_ascii=True, charsets=None, - alphabet=None, wiki_start_pages=None): - super(Language, self).__init__() + + def __init__( + self, + name: Optional[str] = None, + iso_code: Optional[str] = None, + use_ascii: bool = True, + charsets: Optional[List[str]] = None, + alphabet: Optional[str] = None, + wiki_start_pages: Optional[List[str]] = None, + ) -> None: + super().__init__() self.name = name self.iso_code = iso_code self.use_ascii = use_ascii @@ -46,265 +52,301 @@ def __init__(self, name=None, iso_code=None, use_ascii=True, charsets=None, else: alphabet = ascii_letters elif not alphabet: - raise ValueError('Must supply alphabet if use_ascii is False') - self.alphabet = ''.join(sorted(set(alphabet))) if alphabet else None + raise ValueError("Must supply alphabet if use_ascii is False") + self.alphabet = "".join(sorted(set(alphabet))) if alphabet else None self.wiki_start_pages = wiki_start_pages - def __repr__(self): - return '{}({})'.format(self.__class__.__name__, - ', '.join('{}={!r}'.format(k, v) - for k, v in self.__dict__.items() - if not k.startswith('_'))) + def __repr__(self) -> str: + param_str = ", ".join( + f"{k}={v!r}" for k, v in self.__dict__.items() if not k.startswith("_") + ) + return f"{self.__class__.__name__}({param_str})" -LANGUAGES = {'Arabic': Language(name='Arabic', - iso_code='ar', - use_ascii=False, - # We only support encodings that use isolated - # forms, because the current recommendation is - # that the rendering system handles presentation - # forms. This means we purposefully skip IBM864. - charsets=['ISO-8859-6', 'WINDOWS-1256', - 'CP720', 'CP864'], - alphabet=u'ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ', - wiki_start_pages=[u'الصفحة_الرئيسية']), - 'Belarusian': Language(name='Belarusian', - iso_code='be', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'IBM866', 'MacCyrillic'], - alphabet=(u'АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯ' - u'абвгдеёжзійклмнопрстуўфхцчшыьэюяʼ'), - wiki_start_pages=[u'Галоўная_старонка']), - 'Bulgarian': Language(name='Bulgarian', - iso_code='bg', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'IBM855'], - alphabet=(u'АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯ' - u'абвгдежзийклмнопрстуфхцчшщъьюя'), - wiki_start_pages=[u'Начална_страница']), - 'Czech': Language(name='Czech', - iso_code='cz', - use_ascii=True, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=u'áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ', - wiki_start_pages=[u'Hlavní_strana']), - 'Danish': Language(name='Danish', - iso_code='da', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'æøåÆØÅ', - wiki_start_pages=[u'Forside']), - 'German': Language(name='German', - iso_code='de', - use_ascii=True, - charsets=['ISO-8859-1', 'WINDOWS-1252'], - alphabet=u'äöüßÄÖÜ', - wiki_start_pages=[u'Wikipedia:Hauptseite']), - 'Greek': Language(name='Greek', - iso_code='el', - use_ascii=False, - charsets=['ISO-8859-7', 'WINDOWS-1253'], - alphabet=(u'αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώ' - u'ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ'), - wiki_start_pages=[u'Πύλη:Κύρια']), - 'English': Language(name='English', - iso_code='en', - use_ascii=True, - charsets=['ISO-8859-1', 'WINDOWS-1252'], - wiki_start_pages=[u'Main_Page']), - 'Esperanto': Language(name='Esperanto', - iso_code='eo', - # Q, W, X, and Y not used at all - use_ascii=False, - charsets=['ISO-8859-3'], - alphabet=(u'abcĉdefgĝhĥijĵklmnoprsŝtuŭvz' - u'ABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ'), - wiki_start_pages=[u'Vikipedio:Ĉefpaĝo']), - 'Spanish': Language(name='Spanish', - iso_code='es', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ñáéíóúüÑÁÉÍÓÚÜ', - wiki_start_pages=[u'Wikipedia:Portada']), - 'Estonian': Language(name='Estonian', - iso_code='et', - use_ascii=False, - charsets=['ISO-8859-4', 'ISO-8859-13', - 'WINDOWS-1257'], - # C, F, Š, Q, W, X, Y, Z, Ž are only for - # loanwords - alphabet=(u'ABDEGHIJKLMNOPRSTUVÕÄÖÜ' - u'abdeghijklmnoprstuvõäöü'), - wiki_start_pages=[u'Esileht']), - 'Finnish': Language(name='Finnish', - iso_code='fi', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ÅÄÖŠŽåäöšž', - wiki_start_pages=[u'Wikipedia:Etusivu']), - 'French': Language(name='French', - iso_code='fr', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ', - wiki_start_pages=[u'Wikipédia:Accueil_principal', - u'Bœuf (animal)']), - 'Hebrew': Language(name='Hebrew', - iso_code='he', - use_ascii=False, - charsets=['ISO-8859-8', 'WINDOWS-1255'], - alphabet=u'אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ', - wiki_start_pages=[u'עמוד_ראשי']), - 'Croatian': Language(name='Croatian', - iso_code='hr', - # Q, W, X, Y are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'abcčćdđefghijklmnoprsštuvzž' - u'ABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ'), - wiki_start_pages=[u'Glavna_stranica']), - 'Hungarian': Language(name='Hungarian', - iso_code='hu', - # Q, W, X, Y are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'abcdefghijklmnoprstuvzáéíóöőúüű' - u'ABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ'), - wiki_start_pages=[u'Kezdőlap']), - 'Italian': Language(name='Italian', - iso_code='it', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ÀÈÉÌÒÓÙàèéìòóù', - wiki_start_pages=[u'Pagina_principale']), - 'Lithuanian': Language(name='Lithuanian', - iso_code='lt', - use_ascii=False, - charsets=['ISO-8859-13', 'WINDOWS-1257', - 'ISO-8859-4'], - # Q, W, and X not used at all - alphabet=(u'AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽ' - u'aąbcčdeęėfghiįyjklmnoprsštuųūvzž'), - wiki_start_pages=[u'Pagrindinis_puslapis']), - 'Latvian': Language(name='Latvian', - iso_code='lv', - use_ascii=False, - charsets=['ISO-8859-13', 'WINDOWS-1257', - 'ISO-8859-4'], - # Q, W, X, Y are only for loanwords - alphabet=(u'AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽ' - u'aābcčdeēfgģhiījkķlļmnņoprsštuūvzž'), - wiki_start_pages=[u'Sākumlapa']), - 'Macedonian': Language(name='Macedonian', - iso_code='mk', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'MacCyrillic', 'IBM855'], - alphabet=(u'АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШ' - u'абвгдѓежзѕијклљмнњопрстќуфхцчџш'), - wiki_start_pages=[u'Главна_страница']), - 'Dutch': Language(name='Dutch', - iso_code='nl', - use_ascii=True, - charsets=['ISO-8859-1', 'WINDOWS-1252'], - wiki_start_pages=[u'Hoofdpagina']), - 'Polish': Language(name='Polish', - iso_code='pl', - # Q and X are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻ' - u'aąbcćdeęfghijklłmnńoóprsśtuwyzźż'), - wiki_start_pages=[u'Wikipedia:Strona_główna']), - 'Portuguese': Language(name='Portuguese', - iso_code='pt', - use_ascii=True, - charsets=['ISO-8859-1', 'ISO-8859-15', - 'WINDOWS-1252'], - alphabet=u'ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú', - wiki_start_pages=[u'Wikipédia:Página_principal']), - 'Romanian': Language(name='Romanian', - iso_code='ro', - use_ascii=True, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=u'ăâîșțĂÂÎȘȚ', - wiki_start_pages=[u'Pagina_principală']), - 'Russian': Language(name='Russian', - iso_code='ru', - use_ascii=False, - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'KOI8-R', 'MacCyrillic', 'IBM866', - 'IBM855'], - alphabet=(u'абвгдеёжзийклмнопрстуфхцчшщъыьэюя' - u'АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ'), - wiki_start_pages=[u'Заглавная_страница']), - 'Slovak': Language(name='Slovak', - iso_code='sk', - use_ascii=True, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=u'áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ', - wiki_start_pages=[u'Hlavná_stránka']), - 'Slovene': Language(name='Slovene', - iso_code='sl', - # Q, W, X, Y are only used for foreign words. - use_ascii=False, - charsets=['ISO-8859-2', 'WINDOWS-1250'], - alphabet=(u'abcčdefghijklmnoprsštuvzž' - u'ABCČDEFGHIJKLMNOPRSŠTUVZŽ'), - wiki_start_pages=[u'Glavna_stran']), - # Serbian can be written in both Latin and Cyrillic, but there's no - # simple way to get the Latin alphabet pages from Wikipedia through - # the API, so for now we just support Cyrillic. - 'Serbian': Language(name='Serbian', - iso_code='sr', - alphabet=(u'АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШ' - u'абвгдђежзијклљмнњопрстћуфхцчџш'), - charsets=['ISO-8859-5', 'WINDOWS-1251', - 'MacCyrillic', 'IBM855'], - wiki_start_pages=[u'Главна_страна']), - 'Thai': Language(name='Thai', - iso_code='th', - use_ascii=False, - charsets=['ISO-8859-11', 'TIS-620', 'CP874'], - alphabet=u'กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛', - wiki_start_pages=[u'หน้าหลัก']), - 'Turkish': Language(name='Turkish', - iso_code='tr', - # Q, W, and X are not used by Turkish - use_ascii=False, - charsets=['ISO-8859-3', 'ISO-8859-9', - 'WINDOWS-1254'], - alphabet=(u'abcçdefgğhıijklmnoöprsştuüvyzâîû' - u'ABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ'), - wiki_start_pages=[u'Ana_Sayfa']), - 'Vietnamese': Language(name='Vietnamese', - iso_code='vi', - use_ascii=False, - # Windows-1258 is the only common 8-bit - # Vietnamese encoding supported by Python. - # From Wikipedia: - # For systems that lack support for Unicode, - # dozens of 8-bit Vietnamese code pages are - # available.[1] The most common are VISCII - # (TCVN 5712:1993), VPS, and Windows-1258.[3] - # Where ASCII is required, such as when - # ensuring readability in plain text e-mail, - # Vietnamese letters are often encoded - # according to Vietnamese Quoted-Readable - # (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4] - # though usage of either variable-width - # scheme has declined dramatically following - # the adoption of Unicode on the World Wide - # Web. - charsets=['WINDOWS-1258'], - alphabet=(u'aăâbcdđeêghiklmnoôơpqrstuưvxy' - u'AĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY'), - wiki_start_pages=[u'Chữ_Quốc_ngữ']), - } +LANGUAGES = { + "Arabic": Language( + name="Arabic", + iso_code="ar", + use_ascii=False, + # We only support encodings that use isolated + # forms, because the current recommendation is + # that the rendering system handles presentation + # forms. This means we purposefully skip IBM864. + charsets=["ISO-8859-6", "WINDOWS-1256", "CP720", "CP864"], + alphabet="ءآأؤإئابةتثجحخدذرزسشصضطظعغػؼؽؾؿـفقكلمنهوىيًٌٍَُِّ", + wiki_start_pages=["الصفحة_الرئيسية"], + ), + "Belarusian": Language( + name="Belarusian", + iso_code="be", + use_ascii=False, + charsets=["ISO-8859-5", "WINDOWS-1251", "IBM866", "MacCyrillic"], + alphabet="АБВГДЕЁЖЗІЙКЛМНОПРСТУЎФХЦЧШЫЬЭЮЯабвгдеёжзійклмнопрстуўфхцчшыьэюяʼ", + wiki_start_pages=["Галоўная_старонка"], + ), + "Bulgarian": Language( + name="Bulgarian", + iso_code="bg", + use_ascii=False, + charsets=["ISO-8859-5", "WINDOWS-1251", "IBM855"], + alphabet="АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЬЮЯабвгдежзийклмнопрстуфхцчшщъьюя", + wiki_start_pages=["Начална_страница"], + ), + "Czech": Language( + name="Czech", + iso_code="cz", + use_ascii=True, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="áčďéěíňóřšťúůýžÁČĎÉĚÍŇÓŘŠŤÚŮÝŽ", + wiki_start_pages=["Hlavní_strana"], + ), + "Danish": Language( + name="Danish", + iso_code="da", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="æøåÆØÅ", + wiki_start_pages=["Forside"], + ), + "German": Language( + name="German", + iso_code="de", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="äöüßẞÄÖÜ", + wiki_start_pages=["Wikipedia:Hauptseite"], + ), + "Greek": Language( + name="Greek", + iso_code="el", + use_ascii=False, + charsets=["ISO-8859-7", "WINDOWS-1253"], + alphabet="αβγδεζηθικλμνξοπρσςτυφχψωάέήίόύώΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΣΤΥΦΧΨΩΆΈΉΊΌΎΏ", + wiki_start_pages=["Πύλη:Κύρια"], + ), + "English": Language( + name="English", + iso_code="en", + use_ascii=True, + charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"], + wiki_start_pages=["Main_Page"], + ), + "Esperanto": Language( + name="Esperanto", + iso_code="eo", + # Q, W, X, and Y not used at all + use_ascii=False, + charsets=["ISO-8859-3"], + alphabet="abcĉdefgĝhĥijĵklmnoprsŝtuŭvzABCĈDEFGĜHĤIJĴKLMNOPRSŜTUŬVZ", + wiki_start_pages=["Vikipedio:Ĉefpaĝo"], + ), + "Spanish": Language( + name="Spanish", + iso_code="es", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="ñáéíóúüÑÁÉÍÓÚÜ", + wiki_start_pages=["Wikipedia:Portada"], + ), + "Estonian": Language( + name="Estonian", + iso_code="et", + use_ascii=False, + charsets=["ISO-8859-4", "ISO-8859-13", "WINDOWS-1257"], + # C, F, Š, Q, W, X, Y, Z, Ž are only for + # loanwords + alphabet="ABDEGHIJKLMNOPRSTUVÕÄÖÜabdeghijklmnoprstuvõäöü", + wiki_start_pages=["Esileht"], + ), + "Finnish": Language( + name="Finnish", + iso_code="fi", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="ÅÄÖŠŽåäöšž", + wiki_start_pages=["Wikipedia:Etusivu"], + ), + "French": Language( + name="French", + iso_code="fr", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="œàâçèéîïùûêŒÀÂÇÈÉÎÏÙÛÊ", + wiki_start_pages=["Wikipédia:Accueil_principal", "Bœuf (animal)"], + ), + "Hebrew": Language( + name="Hebrew", + iso_code="he", + use_ascii=False, + charsets=["ISO-8859-8", "WINDOWS-1255"], + alphabet="אבגדהוזחטיךכלםמןנסעףפץצקרשתװױײ", + wiki_start_pages=["עמוד_ראשי"], + ), + "Croatian": Language( + name="Croatian", + iso_code="hr", + # Q, W, X, Y are only used for foreign words. + use_ascii=False, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="abcčćdđefghijklmnoprsštuvzžABCČĆDĐEFGHIJKLMNOPRSŠTUVZŽ", + wiki_start_pages=["Glavna_stranica"], + ), + "Hungarian": Language( + name="Hungarian", + iso_code="hu", + # Q, W, X, Y are only used for foreign words. + use_ascii=False, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="abcdefghijklmnoprstuvzáéíóöőúüűABCDEFGHIJKLMNOPRSTUVZÁÉÍÓÖŐÚÜŰ", + wiki_start_pages=["Kezdőlap"], + ), + "Italian": Language( + name="Italian", + iso_code="it", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="ÀÈÉÌÒÓÙàèéìòóù", + wiki_start_pages=["Pagina_principale"], + ), + "Lithuanian": Language( + name="Lithuanian", + iso_code="lt", + use_ascii=False, + charsets=["ISO-8859-13", "WINDOWS-1257", "ISO-8859-4"], + # Q, W, and X not used at all + alphabet="AĄBCČDEĘĖFGHIĮYJKLMNOPRSŠTUŲŪVZŽaąbcčdeęėfghiįyjklmnoprsštuųūvzž", + wiki_start_pages=["Pagrindinis_puslapis"], + ), + "Latvian": Language( + name="Latvian", + iso_code="lv", + use_ascii=False, + charsets=["ISO-8859-13", "WINDOWS-1257", "ISO-8859-4"], + # Q, W, X, Y are only for loanwords + alphabet="AĀBCČDEĒFGĢHIĪJKĶLĻMNŅOPRSŠTUŪVZŽaābcčdeēfgģhiījkķlļmnņoprsštuūvzž", + wiki_start_pages=["Sākumlapa"], + ), + "Macedonian": Language( + name="Macedonian", + iso_code="mk", + use_ascii=False, + charsets=["ISO-8859-5", "WINDOWS-1251", "MacCyrillic", "IBM855"], + alphabet="АБВГДЃЕЖЗЅИЈКЛЉМНЊОПРСТЌУФХЦЧЏШабвгдѓежзѕијклљмнњопрстќуфхцчџш", + wiki_start_pages=["Главна_страница"], + ), + "Dutch": Language( + name="Dutch", + iso_code="nl", + use_ascii=True, + charsets=["ISO-8859-1", "WINDOWS-1252", "MacRoman"], + wiki_start_pages=["Hoofdpagina"], + ), + "Polish": Language( + name="Polish", + iso_code="pl", + # Q and X are only used for foreign words. + use_ascii=False, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="AĄBCĆDEĘFGHIJKLŁMNŃOÓPRSŚTUWYZŹŻaąbcćdeęfghijklłmnńoóprsśtuwyzźż", + wiki_start_pages=["Wikipedia:Strona_główna"], + ), + "Portuguese": Language( + name="Portuguese", + iso_code="pt", + use_ascii=True, + charsets=["ISO-8859-1", "ISO-8859-15", "WINDOWS-1252", "MacRoman"], + alphabet="ÁÂÃÀÇÉÊÍÓÔÕÚáâãàçéêíóôõú", + wiki_start_pages=["Wikipédia:Página_principal"], + ), + "Romanian": Language( + name="Romanian", + iso_code="ro", + use_ascii=True, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="ăâîșțĂÂÎȘȚ", + wiki_start_pages=["Pagina_principală"], + ), + "Russian": Language( + name="Russian", + iso_code="ru", + use_ascii=False, + charsets=[ + "ISO-8859-5", + "WINDOWS-1251", + "KOI8-R", + "MacCyrillic", + "IBM866", + "IBM855", + ], + alphabet="абвгдеёжзийклмнопрстуфхцчшщъыьэюяАБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯ", + wiki_start_pages=["Заглавная_страница"], + ), + "Slovak": Language( + name="Slovak", + iso_code="sk", + use_ascii=True, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="áäčďéíĺľňóôŕšťúýžÁÄČĎÉÍĹĽŇÓÔŔŠŤÚÝŽ", + wiki_start_pages=["Hlavná_stránka"], + ), + "Slovene": Language( + name="Slovene", + iso_code="sl", + # Q, W, X, Y are only used for foreign words. + use_ascii=False, + charsets=["ISO-8859-2", "WINDOWS-1250"], + alphabet="abcčdefghijklmnoprsštuvzžABCČDEFGHIJKLMNOPRSŠTUVZŽ", + wiki_start_pages=["Glavna_stran"], + ), + # Serbian can be written in both Latin and Cyrillic, but there's no + # simple way to get the Latin alphabet pages from Wikipedia through + # the API, so for now we just support Cyrillic. + "Serbian": Language( + name="Serbian", + iso_code="sr", + alphabet="АБВГДЂЕЖЗИЈКЛЉМНЊОПРСТЋУФХЦЧЏШабвгдђежзијклљмнњопрстћуфхцчџш", + charsets=["ISO-8859-5", "WINDOWS-1251", "MacCyrillic", "IBM855"], + wiki_start_pages=["Главна_страна"], + ), + "Thai": Language( + name="Thai", + iso_code="th", + use_ascii=False, + charsets=["ISO-8859-11", "TIS-620", "CP874"], + alphabet="กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรฤลฦวศษสหฬอฮฯะัาำิีึืฺุู฿เแโใไๅๆ็่้๊๋์ํ๎๏๐๑๒๓๔๕๖๗๘๙๚๛", + wiki_start_pages=["หน้าหลัก"], + ), + "Turkish": Language( + name="Turkish", + iso_code="tr", + # Q, W, and X are not used by Turkish + use_ascii=False, + charsets=["ISO-8859-3", "ISO-8859-9", "WINDOWS-1254"], + alphabet="abcçdefgğhıijklmnoöprsştuüvyzâîûABCÇDEFGĞHIİJKLMNOÖPRSŞTUÜVYZÂÎÛ", + wiki_start_pages=["Ana_Sayfa"], + ), + "Vietnamese": Language( + name="Vietnamese", + iso_code="vi", + use_ascii=False, + # Windows-1258 is the only common 8-bit + # Vietnamese encoding supported by Python. + # From Wikipedia: + # For systems that lack support for Unicode, + # dozens of 8-bit Vietnamese code pages are + # available.[1] The most common are VISCII + # (TCVN 5712:1993), VPS, and Windows-1258.[3] + # Where ASCII is required, such as when + # ensuring readability in plain text e-mail, + # Vietnamese letters are often encoded + # according to Vietnamese Quoted-Readable + # (VIQR) or VSCII Mnemonic (VSCII-MNEM),[4] + # though usage of either variable-width + # scheme has declined dramatically following + # the adoption of Unicode on the World Wide + # Web. + charsets=["WINDOWS-1258"], + alphabet="aăâbcdđeêghiklmnoôơpqrstuưvxyAĂÂBCDĐEÊGHIKLMNOÔƠPQRSTUƯVXY", + wiki_start_pages=["Chữ_Quốc_ngữ"], + ), +} diff --git a/news/FD80732D-5DC1-4B68-BDB5-0283EF9EBC3A.trivial.rst b/src/pip/_vendor/chardet/py.typed similarity index 100% rename from news/FD80732D-5DC1-4B68-BDB5-0283EF9EBC3A.trivial.rst rename to src/pip/_vendor/chardet/py.typed diff --git a/src/pip/_vendor/chardet/resultdict.py b/src/pip/_vendor/chardet/resultdict.py new file mode 100644 index 00000000000..7d36e64c467 --- /dev/null +++ b/src/pip/_vendor/chardet/resultdict.py @@ -0,0 +1,16 @@ +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + # TypedDict was introduced in Python 3.8. + # + # TODO: Remove the else block and TYPE_CHECKING check when dropping support + # for Python 3.7. + from typing import TypedDict + + class ResultDict(TypedDict): + encoding: Optional[str] + confidence: float + language: Optional[str] + +else: + ResultDict = dict diff --git a/src/pip/_vendor/chardet/sbcharsetprober.py b/src/pip/_vendor/chardet/sbcharsetprober.py index 46ba835c66c..0ffbcdd2c3e 100644 --- a/src/pip/_vendor/chardet/sbcharsetprober.py +++ b/src/pip/_vendor/chardet/sbcharsetprober.py @@ -26,70 +26,77 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from collections import namedtuple +from typing import Dict, List, NamedTuple, Optional, Union from .charsetprober import CharSetProber from .enums import CharacterCategory, ProbingState, SequenceLikelihood -SingleByteCharSetModel = namedtuple('SingleByteCharSetModel', - ['charset_name', - 'language', - 'char_to_order_map', - 'language_model', - 'typical_positive_ratio', - 'keep_ascii_letters', - 'alphabet']) +class SingleByteCharSetModel(NamedTuple): + charset_name: str + language: str + char_to_order_map: Dict[int, int] + language_model: Dict[int, Dict[int, int]] + typical_positive_ratio: float + keep_ascii_letters: bool + alphabet: str class SingleByteCharSetProber(CharSetProber): SAMPLE_SIZE = 64 - SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2 + SB_ENOUGH_REL_THRESHOLD = 1024 # 0.25 * SAMPLE_SIZE^2 POSITIVE_SHORTCUT_THRESHOLD = 0.95 NEGATIVE_SHORTCUT_THRESHOLD = 0.05 - def __init__(self, model, reversed=False, name_prober=None): - super(SingleByteCharSetProber, self).__init__() + def __init__( + self, + model: SingleByteCharSetModel, + is_reversed: bool = False, + name_prober: Optional[CharSetProber] = None, + ) -> None: + super().__init__() self._model = model # TRUE if we need to reverse every pair in the model lookup - self._reversed = reversed + self._reversed = is_reversed # Optional auxiliary prober for name decision self._name_prober = name_prober - self._last_order = None - self._seq_counters = None - self._total_seqs = None - self._total_char = None - self._freq_char = None + self._last_order = 255 + self._seq_counters: List[int] = [] + self._total_seqs = 0 + self._total_char = 0 + self._control_char = 0 + self._freq_char = 0 self.reset() - def reset(self): - super(SingleByteCharSetProber, self).reset() + def reset(self) -> None: + super().reset() # char order of last character self._last_order = 255 self._seq_counters = [0] * SequenceLikelihood.get_num_categories() self._total_seqs = 0 self._total_char = 0 + self._control_char = 0 # characters that fall in our sampling range self._freq_char = 0 @property - def charset_name(self): + def charset_name(self) -> Optional[str]: if self._name_prober: return self._name_prober.charset_name - else: - return self._model.charset_name + return self._model.charset_name @property - def language(self): + def language(self) -> Optional[str]: if self._name_prober: return self._name_prober.language - else: - return self._model.language + return self._model.language - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: # TODO: Make filter_international_words keep things in self.alphabet if not self._model.keep_ascii_letters: byte_str = self.filter_international_words(byte_str) + else: + byte_str = self.remove_xml_tags(byte_str) if not byte_str: return self.state char_to_order_map = self._model.char_to_order_map @@ -103,9 +110,6 @@ def feed(self, byte_str): # _total_char purposes. if order < CharacterCategory.CONTROL: self._total_char += 1 - # TODO: Follow uchardet's lead and discount confidence for frequent - # control characters. - # See https://github.com/BYVoid/uchardet/commit/55b4f23971db61 if order < self.SAMPLE_SIZE: self._freq_char += 1 if self._last_order < self.SAMPLE_SIZE: @@ -122,23 +126,36 @@ def feed(self, byte_str): if self._total_seqs > self.SB_ENOUGH_REL_THRESHOLD: confidence = self.get_confidence() if confidence > self.POSITIVE_SHORTCUT_THRESHOLD: - self.logger.debug('%s confidence = %s, we have a winner', - charset_name, confidence) + self.logger.debug( + "%s confidence = %s, we have a winner", charset_name, confidence + ) self._state = ProbingState.FOUND_IT elif confidence < self.NEGATIVE_SHORTCUT_THRESHOLD: - self.logger.debug('%s confidence = %s, below negative ' - 'shortcut threshhold %s', charset_name, - confidence, - self.NEGATIVE_SHORTCUT_THRESHOLD) + self.logger.debug( + "%s confidence = %s, below negative shortcut threshold %s", + charset_name, + confidence, + self.NEGATIVE_SHORTCUT_THRESHOLD, + ) self._state = ProbingState.NOT_ME return self.state - def get_confidence(self): + def get_confidence(self) -> float: r = 0.01 if self._total_seqs > 0: - r = ((1.0 * self._seq_counters[SequenceLikelihood.POSITIVE]) / - self._total_seqs / self._model.typical_positive_ratio) + r = ( + ( + self._seq_counters[SequenceLikelihood.POSITIVE] + + 0.25 * self._seq_counters[SequenceLikelihood.LIKELY] + ) + / self._total_seqs + / self._model.typical_positive_ratio + ) + # The more control characters (proportionnaly to the size + # of the text), the less confident we become in the current + # charset. + r = r * (self._total_char - self._control_char) / self._total_char r = r * self._freq_char / self._total_char if r >= 1.0: r = 0.99 diff --git a/src/pip/_vendor/chardet/sbcsgroupprober.py b/src/pip/_vendor/chardet/sbcsgroupprober.py index bdeef4e15b0..890ae8465c5 100644 --- a/src/pip/_vendor/chardet/sbcsgroupprober.py +++ b/src/pip/_vendor/chardet/sbcsgroupprober.py @@ -28,33 +28,38 @@ from .charsetgroupprober import CharSetGroupProber from .hebrewprober import HebrewProber -from .langbulgarianmodel import (ISO_8859_5_BULGARIAN_MODEL, - WINDOWS_1251_BULGARIAN_MODEL) +from .langbulgarianmodel import ISO_8859_5_BULGARIAN_MODEL, WINDOWS_1251_BULGARIAN_MODEL from .langgreekmodel import ISO_8859_7_GREEK_MODEL, WINDOWS_1253_GREEK_MODEL from .langhebrewmodel import WINDOWS_1255_HEBREW_MODEL + # from .langhungarianmodel import (ISO_8859_2_HUNGARIAN_MODEL, # WINDOWS_1250_HUNGARIAN_MODEL) -from .langrussianmodel import (IBM855_RUSSIAN_MODEL, IBM866_RUSSIAN_MODEL, - ISO_8859_5_RUSSIAN_MODEL, KOI8_R_RUSSIAN_MODEL, - MACCYRILLIC_RUSSIAN_MODEL, - WINDOWS_1251_RUSSIAN_MODEL) +from .langrussianmodel import ( + IBM855_RUSSIAN_MODEL, + IBM866_RUSSIAN_MODEL, + ISO_8859_5_RUSSIAN_MODEL, + KOI8_R_RUSSIAN_MODEL, + MACCYRILLIC_RUSSIAN_MODEL, + WINDOWS_1251_RUSSIAN_MODEL, +) from .langthaimodel import TIS_620_THAI_MODEL from .langturkishmodel import ISO_8859_9_TURKISH_MODEL from .sbcharsetprober import SingleByteCharSetProber class SBCSGroupProber(CharSetGroupProber): - def __init__(self): - super(SBCSGroupProber, self).__init__() + def __init__(self) -> None: + super().__init__() hebrew_prober = HebrewProber() - logical_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL, - False, hebrew_prober) + logical_hebrew_prober = SingleByteCharSetProber( + WINDOWS_1255_HEBREW_MODEL, is_reversed=False, name_prober=hebrew_prober + ) # TODO: See if using ISO-8859-8 Hebrew model works better here, since # it's actually the visual one - visual_hebrew_prober = SingleByteCharSetProber(WINDOWS_1255_HEBREW_MODEL, - True, hebrew_prober) - hebrew_prober.set_model_probers(logical_hebrew_prober, - visual_hebrew_prober) + visual_hebrew_prober = SingleByteCharSetProber( + WINDOWS_1255_HEBREW_MODEL, is_reversed=True, name_prober=hebrew_prober + ) + hebrew_prober.set_model_probers(logical_hebrew_prober, visual_hebrew_prober) # TODO: ORDER MATTERS HERE. I changed the order vs what was in master # and several tests failed that did not before. Some thought # should be put into the ordering, and we should consider making diff --git a/src/pip/_vendor/chardet/sjisprober.py b/src/pip/_vendor/chardet/sjisprober.py index 9e29623bdc5..91df077961b 100644 --- a/src/pip/_vendor/chardet/sjisprober.py +++ b/src/pip/_vendor/chardet/sjisprober.py @@ -25,68 +25,81 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### -from .mbcharsetprober import MultiByteCharSetProber -from .codingstatemachine import CodingStateMachine +from typing import Union + from .chardistribution import SJISDistributionAnalysis +from .codingstatemachine import CodingStateMachine +from .enums import MachineState, ProbingState from .jpcntx import SJISContextAnalysis +from .mbcharsetprober import MultiByteCharSetProber from .mbcssm import SJIS_SM_MODEL -from .enums import ProbingState, MachineState class SJISProber(MultiByteCharSetProber): - def __init__(self): - super(SJISProber, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(SJIS_SM_MODEL) self.distribution_analyzer = SJISDistributionAnalysis() self.context_analyzer = SJISContextAnalysis() self.reset() - def reset(self): - super(SJISProber, self).reset() + def reset(self) -> None: + super().reset() self.context_analyzer.reset() @property - def charset_name(self): + def charset_name(self) -> str: return self.context_analyzer.charset_name @property - def language(self): + def language(self) -> str: return "Japanese" - def feed(self, byte_str): - for i in range(len(byte_str)): - coding_state = self.coding_sm.next_state(byte_str[i]) + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + assert self.coding_sm is not None + assert self.distribution_analyzer is not None + + for i, byte in enumerate(byte_str): + coding_state = self.coding_sm.next_state(byte) if coding_state == MachineState.ERROR: - self.logger.debug('%s %s prober hit error at byte %s', - self.charset_name, self.language, i) + self.logger.debug( + "%s %s prober hit error at byte %s", + self.charset_name, + self.language, + i, + ) self._state = ProbingState.NOT_ME break - elif coding_state == MachineState.ITS_ME: + if coding_state == MachineState.ITS_ME: self._state = ProbingState.FOUND_IT break - elif coding_state == MachineState.START: + if coding_state == MachineState.START: char_len = self.coding_sm.get_current_charlen() if i == 0: - self._last_char[1] = byte_str[0] - self.context_analyzer.feed(self._last_char[2 - char_len:], - char_len) + self._last_char[1] = byte + self.context_analyzer.feed( + self._last_char[2 - char_len :], char_len + ) self.distribution_analyzer.feed(self._last_char, char_len) else: - self.context_analyzer.feed(byte_str[i + 1 - char_len:i + 3 - - char_len], char_len) - self.distribution_analyzer.feed(byte_str[i - 1:i + 1], - char_len) + self.context_analyzer.feed( + byte_str[i + 1 - char_len : i + 3 - char_len], char_len + ) + self.distribution_analyzer.feed(byte_str[i - 1 : i + 1], char_len) self._last_char[0] = byte_str[-1] if self.state == ProbingState.DETECTING: - if (self.context_analyzer.got_enough_data() and - (self.get_confidence() > self.SHORTCUT_THRESHOLD)): + if self.context_analyzer.got_enough_data() and ( + self.get_confidence() > self.SHORTCUT_THRESHOLD + ): self._state = ProbingState.FOUND_IT return self.state - def get_confidence(self): + def get_confidence(self) -> float: + assert self.distribution_analyzer is not None + context_conf = self.context_analyzer.get_confidence() distrib_conf = self.distribution_analyzer.get_confidence() return max(context_conf, distrib_conf) diff --git a/src/pip/_vendor/chardet/universaldetector.py b/src/pip/_vendor/chardet/universaldetector.py index 055a8ac1b1d..30c441dc28e 100644 --- a/src/pip/_vendor/chardet/universaldetector.py +++ b/src/pip/_vendor/chardet/universaldetector.py @@ -39,16 +39,21 @@ class a user of ``chardet`` should use. import codecs import logging import re +from typing import List, Optional, Union from .charsetgroupprober import CharSetGroupProber +from .charsetprober import CharSetProber from .enums import InputState, LanguageFilter, ProbingState from .escprober import EscCharSetProber from .latin1prober import Latin1Prober +from .macromanprober import MacRomanProber from .mbcsgroupprober import MBCSGroupProber +from .resultdict import ResultDict from .sbcsgroupprober import SBCSGroupProber +from .utf1632prober import UTF1632Prober -class UniversalDetector(object): +class UniversalDetector: """ The ``UniversalDetector`` class underlies the ``chardet.detect`` function and coordinates all of the different charset probers. @@ -66,49 +71,87 @@ class UniversalDetector(object): """ MINIMUM_THRESHOLD = 0.20 - HIGH_BYTE_DETECTOR = re.compile(b'[\x80-\xFF]') - ESC_DETECTOR = re.compile(b'(\033|~{)') - WIN_BYTE_DETECTOR = re.compile(b'[\x80-\x9F]') - ISO_WIN_MAP = {'iso-8859-1': 'Windows-1252', - 'iso-8859-2': 'Windows-1250', - 'iso-8859-5': 'Windows-1251', - 'iso-8859-6': 'Windows-1256', - 'iso-8859-7': 'Windows-1253', - 'iso-8859-8': 'Windows-1255', - 'iso-8859-9': 'Windows-1254', - 'iso-8859-13': 'Windows-1257'} - - def __init__(self, lang_filter=LanguageFilter.ALL): - self._esc_charset_prober = None - self._charset_probers = [] - self.result = None - self.done = None - self._got_data = None - self._input_state = None - self._last_char = None + HIGH_BYTE_DETECTOR = re.compile(b"[\x80-\xFF]") + ESC_DETECTOR = re.compile(b"(\033|~{)") + WIN_BYTE_DETECTOR = re.compile(b"[\x80-\x9F]") + ISO_WIN_MAP = { + "iso-8859-1": "Windows-1252", + "iso-8859-2": "Windows-1250", + "iso-8859-5": "Windows-1251", + "iso-8859-6": "Windows-1256", + "iso-8859-7": "Windows-1253", + "iso-8859-8": "Windows-1255", + "iso-8859-9": "Windows-1254", + "iso-8859-13": "Windows-1257", + } + # Based on https://encoding.spec.whatwg.org/#names-and-labels + # but altered to match Python names for encodings and remove mappings + # that break tests. + LEGACY_MAP = { + "ascii": "Windows-1252", + "iso-8859-1": "Windows-1252", + "tis-620": "ISO-8859-11", + "iso-8859-9": "Windows-1254", + "gb2312": "GB18030", + "euc-kr": "CP949", + "utf-16le": "UTF-16", + } + + def __init__( + self, + lang_filter: LanguageFilter = LanguageFilter.ALL, + should_rename_legacy: bool = False, + ) -> None: + self._esc_charset_prober: Optional[EscCharSetProber] = None + self._utf1632_prober: Optional[UTF1632Prober] = None + self._charset_probers: List[CharSetProber] = [] + self.result: ResultDict = { + "encoding": None, + "confidence": 0.0, + "language": None, + } + self.done = False + self._got_data = False + self._input_state = InputState.PURE_ASCII + self._last_char = b"" self.lang_filter = lang_filter self.logger = logging.getLogger(__name__) - self._has_win_bytes = None + self._has_win_bytes = False + self.should_rename_legacy = should_rename_legacy self.reset() - def reset(self): + @property + def input_state(self) -> int: + return self._input_state + + @property + def has_win_bytes(self) -> bool: + return self._has_win_bytes + + @property + def charset_probers(self) -> List[CharSetProber]: + return self._charset_probers + + def reset(self) -> None: """ Reset the UniversalDetector and all of its probers back to their initial states. This is called by ``__init__``, so you only need to call this directly in between analyses of different documents. """ - self.result = {'encoding': None, 'confidence': 0.0, 'language': None} + self.result = {"encoding": None, "confidence": 0.0, "language": None} self.done = False self._got_data = False self._has_win_bytes = False self._input_state = InputState.PURE_ASCII - self._last_char = b'' + self._last_char = b"" if self._esc_charset_prober: self._esc_charset_prober.reset() + if self._utf1632_prober: + self._utf1632_prober.reset() for prober in self._charset_probers: prober.reset() - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> None: """ Takes a chunk of a document and feeds it through all of the relevant charset probers. @@ -125,7 +168,7 @@ def feed(self, byte_str): if self.done: return - if not len(byte_str): + if not byte_str: return if not isinstance(byte_str, bytearray): @@ -136,35 +179,38 @@ def feed(self, byte_str): # If the data starts with BOM, we know it is UTF if byte_str.startswith(codecs.BOM_UTF8): # EF BB BF UTF-8 with BOM - self.result = {'encoding': "UTF-8-SIG", - 'confidence': 1.0, - 'language': ''} - elif byte_str.startswith((codecs.BOM_UTF32_LE, - codecs.BOM_UTF32_BE)): + self.result = { + "encoding": "UTF-8-SIG", + "confidence": 1.0, + "language": "", + } + elif byte_str.startswith((codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE)): # FF FE 00 00 UTF-32, little-endian BOM # 00 00 FE FF UTF-32, big-endian BOM - self.result = {'encoding': "UTF-32", - 'confidence': 1.0, - 'language': ''} - elif byte_str.startswith(b'\xFE\xFF\x00\x00'): + self.result = {"encoding": "UTF-32", "confidence": 1.0, "language": ""} + elif byte_str.startswith(b"\xFE\xFF\x00\x00"): # FE FF 00 00 UCS-4, unusual octet order BOM (3412) - self.result = {'encoding': "X-ISO-10646-UCS-4-3412", - 'confidence': 1.0, - 'language': ''} - elif byte_str.startswith(b'\x00\x00\xFF\xFE'): + self.result = { + # TODO: This encoding is not supported by Python. Should remove? + "encoding": "X-ISO-10646-UCS-4-3412", + "confidence": 1.0, + "language": "", + } + elif byte_str.startswith(b"\x00\x00\xFF\xFE"): # 00 00 FF FE UCS-4, unusual octet order BOM (2143) - self.result = {'encoding': "X-ISO-10646-UCS-4-2143", - 'confidence': 1.0, - 'language': ''} + self.result = { + # TODO: This encoding is not supported by Python. Should remove? + "encoding": "X-ISO-10646-UCS-4-2143", + "confidence": 1.0, + "language": "", + } elif byte_str.startswith((codecs.BOM_LE, codecs.BOM_BE)): # FF FE UTF-16, little endian BOM # FE FF UTF-16, big endian BOM - self.result = {'encoding': "UTF-16", - 'confidence': 1.0, - 'language': ''} + self.result = {"encoding": "UTF-16", "confidence": 1.0, "language": ""} self._got_data = True - if self.result['encoding'] is not None: + if self.result["encoding"] is not None: self.done = True return @@ -173,12 +219,29 @@ def feed(self, byte_str): if self._input_state == InputState.PURE_ASCII: if self.HIGH_BYTE_DETECTOR.search(byte_str): self._input_state = InputState.HIGH_BYTE - elif self._input_state == InputState.PURE_ASCII and \ - self.ESC_DETECTOR.search(self._last_char + byte_str): + elif ( + self._input_state == InputState.PURE_ASCII + and self.ESC_DETECTOR.search(self._last_char + byte_str) + ): self._input_state = InputState.ESC_ASCII self._last_char = byte_str[-1:] + # next we will look to see if it is appears to be either a UTF-16 or + # UTF-32 encoding + if not self._utf1632_prober: + self._utf1632_prober = UTF1632Prober() + + if self._utf1632_prober.state == ProbingState.DETECTING: + if self._utf1632_prober.feed(byte_str) == ProbingState.FOUND_IT: + self.result = { + "encoding": self._utf1632_prober.charset_name, + "confidence": self._utf1632_prober.get_confidence(), + "language": "", + } + self.done = True + return + # If we've seen escape sequences, use the EscCharSetProber, which # uses a simple state machine to check for known escape sequences in # HZ and ISO-2022 encodings, since those are the only encodings that @@ -187,12 +250,11 @@ def feed(self, byte_str): if not self._esc_charset_prober: self._esc_charset_prober = EscCharSetProber(self.lang_filter) if self._esc_charset_prober.feed(byte_str) == ProbingState.FOUND_IT: - self.result = {'encoding': - self._esc_charset_prober.charset_name, - 'confidence': - self._esc_charset_prober.get_confidence(), - 'language': - self._esc_charset_prober.language} + self.result = { + "encoding": self._esc_charset_prober.charset_name, + "confidence": self._esc_charset_prober.get_confidence(), + "language": self._esc_charset_prober.language, + } self.done = True # If we've seen high bytes (i.e., those with values greater than 127), # we need to do more complicated checks using all our multi-byte and @@ -207,17 +269,20 @@ def feed(self, byte_str): if self.lang_filter & LanguageFilter.NON_CJK: self._charset_probers.append(SBCSGroupProber()) self._charset_probers.append(Latin1Prober()) + self._charset_probers.append(MacRomanProber()) for prober in self._charset_probers: if prober.feed(byte_str) == ProbingState.FOUND_IT: - self.result = {'encoding': prober.charset_name, - 'confidence': prober.get_confidence(), - 'language': prober.language} + self.result = { + "encoding": prober.charset_name, + "confidence": prober.get_confidence(), + "language": prober.language, + } self.done = True break if self.WIN_BYTE_DETECTOR.search(byte_str): self._has_win_bytes = True - def close(self): + def close(self) -> ResultDict: """ Stop analyzing the current document and come up with a final prediction. @@ -231,13 +296,11 @@ def close(self): self.done = True if not self._got_data: - self.logger.debug('no data received!') + self.logger.debug("no data received!") # Default to ASCII if it is all we've seen so far elif self._input_state == InputState.PURE_ASCII: - self.result = {'encoding': 'ascii', - 'confidence': 1.0, - 'language': ''} + self.result = {"encoding": "ascii", "confidence": 1.0, "language": ""} # If we have seen non-ASCII, return the best that met MINIMUM_THRESHOLD elif self._input_state == InputState.HIGH_BYTE: @@ -253,34 +316,47 @@ def close(self): max_prober = prober if max_prober and (max_prober_confidence > self.MINIMUM_THRESHOLD): charset_name = max_prober.charset_name - lower_charset_name = max_prober.charset_name.lower() + assert charset_name is not None + lower_charset_name = charset_name.lower() confidence = max_prober.get_confidence() # Use Windows encoding name instead of ISO-8859 if we saw any # extra Windows-specific bytes - if lower_charset_name.startswith('iso-8859'): + if lower_charset_name.startswith("iso-8859"): if self._has_win_bytes: - charset_name = self.ISO_WIN_MAP.get(lower_charset_name, - charset_name) - self.result = {'encoding': charset_name, - 'confidence': confidence, - 'language': max_prober.language} + charset_name = self.ISO_WIN_MAP.get( + lower_charset_name, charset_name + ) + # Rename legacy encodings with superset encodings if asked + if self.should_rename_legacy: + charset_name = self.LEGACY_MAP.get( + (charset_name or "").lower(), charset_name + ) + self.result = { + "encoding": charset_name, + "confidence": confidence, + "language": max_prober.language, + } # Log all prober confidences if none met MINIMUM_THRESHOLD if self.logger.getEffectiveLevel() <= logging.DEBUG: - if self.result['encoding'] is None: - self.logger.debug('no probers hit minimum threshold') + if self.result["encoding"] is None: + self.logger.debug("no probers hit minimum threshold") for group_prober in self._charset_probers: if not group_prober: continue if isinstance(group_prober, CharSetGroupProber): for prober in group_prober.probers: - self.logger.debug('%s %s confidence = %s', - prober.charset_name, - prober.language, - prober.get_confidence()) + self.logger.debug( + "%s %s confidence = %s", + prober.charset_name, + prober.language, + prober.get_confidence(), + ) else: - self.logger.debug('%s %s confidence = %s', - group_prober.charset_name, - group_prober.language, - group_prober.get_confidence()) + self.logger.debug( + "%s %s confidence = %s", + group_prober.charset_name, + group_prober.language, + group_prober.get_confidence(), + ) return self.result diff --git a/src/pip/_vendor/chardet/utf1632prober.py b/src/pip/_vendor/chardet/utf1632prober.py new file mode 100644 index 00000000000..6bdec63d686 --- /dev/null +++ b/src/pip/_vendor/chardet/utf1632prober.py @@ -0,0 +1,225 @@ +######################## BEGIN LICENSE BLOCK ######################## +# +# Contributor(s): +# Jason Zavaglia +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or (at your option) any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA +# 02110-1301 USA +######################### END LICENSE BLOCK ######################### +from typing import List, Union + +from .charsetprober import CharSetProber +from .enums import ProbingState + + +class UTF1632Prober(CharSetProber): + """ + This class simply looks for occurrences of zero bytes, and infers + whether the file is UTF16 or UTF32 (low-endian or big-endian) + For instance, files looking like ( \0 \0 \0 [nonzero] )+ + have a good probability to be UTF32BE. Files looking like ( \0 [nonzero] )+ + may be guessed to be UTF16BE, and inversely for little-endian varieties. + """ + + # how many logical characters to scan before feeling confident of prediction + MIN_CHARS_FOR_DETECTION = 20 + # a fixed constant ratio of expected zeros or non-zeros in modulo-position. + EXPECTED_RATIO = 0.94 + + def __init__(self) -> None: + super().__init__() + self.position = 0 + self.zeros_at_mod = [0] * 4 + self.nonzeros_at_mod = [0] * 4 + self._state = ProbingState.DETECTING + self.quad = [0, 0, 0, 0] + self.invalid_utf16be = False + self.invalid_utf16le = False + self.invalid_utf32be = False + self.invalid_utf32le = False + self.first_half_surrogate_pair_detected_16be = False + self.first_half_surrogate_pair_detected_16le = False + self.reset() + + def reset(self) -> None: + super().reset() + self.position = 0 + self.zeros_at_mod = [0] * 4 + self.nonzeros_at_mod = [0] * 4 + self._state = ProbingState.DETECTING + self.invalid_utf16be = False + self.invalid_utf16le = False + self.invalid_utf32be = False + self.invalid_utf32le = False + self.first_half_surrogate_pair_detected_16be = False + self.first_half_surrogate_pair_detected_16le = False + self.quad = [0, 0, 0, 0] + + @property + def charset_name(self) -> str: + if self.is_likely_utf32be(): + return "utf-32be" + if self.is_likely_utf32le(): + return "utf-32le" + if self.is_likely_utf16be(): + return "utf-16be" + if self.is_likely_utf16le(): + return "utf-16le" + # default to something valid + return "utf-16" + + @property + def language(self) -> str: + return "" + + def approx_32bit_chars(self) -> float: + return max(1.0, self.position / 4.0) + + def approx_16bit_chars(self) -> float: + return max(1.0, self.position / 2.0) + + def is_likely_utf32be(self) -> bool: + approx_chars = self.approx_32bit_chars() + return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( + self.zeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO + and self.zeros_at_mod[1] / approx_chars > self.EXPECTED_RATIO + and self.zeros_at_mod[2] / approx_chars > self.EXPECTED_RATIO + and self.nonzeros_at_mod[3] / approx_chars > self.EXPECTED_RATIO + and not self.invalid_utf32be + ) + + def is_likely_utf32le(self) -> bool: + approx_chars = self.approx_32bit_chars() + return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( + self.nonzeros_at_mod[0] / approx_chars > self.EXPECTED_RATIO + and self.zeros_at_mod[1] / approx_chars > self.EXPECTED_RATIO + and self.zeros_at_mod[2] / approx_chars > self.EXPECTED_RATIO + and self.zeros_at_mod[3] / approx_chars > self.EXPECTED_RATIO + and not self.invalid_utf32le + ) + + def is_likely_utf16be(self) -> bool: + approx_chars = self.approx_16bit_chars() + return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( + (self.nonzeros_at_mod[1] + self.nonzeros_at_mod[3]) / approx_chars + > self.EXPECTED_RATIO + and (self.zeros_at_mod[0] + self.zeros_at_mod[2]) / approx_chars + > self.EXPECTED_RATIO + and not self.invalid_utf16be + ) + + def is_likely_utf16le(self) -> bool: + approx_chars = self.approx_16bit_chars() + return approx_chars >= self.MIN_CHARS_FOR_DETECTION and ( + (self.nonzeros_at_mod[0] + self.nonzeros_at_mod[2]) / approx_chars + > self.EXPECTED_RATIO + and (self.zeros_at_mod[1] + self.zeros_at_mod[3]) / approx_chars + > self.EXPECTED_RATIO + and not self.invalid_utf16le + ) + + def validate_utf32_characters(self, quad: List[int]) -> None: + """ + Validate if the quad of bytes is valid UTF-32. + + UTF-32 is valid in the range 0x00000000 - 0x0010FFFF + excluding 0x0000D800 - 0x0000DFFF + + https://en.wikipedia.org/wiki/UTF-32 + """ + if ( + quad[0] != 0 + or quad[1] > 0x10 + or (quad[0] == 0 and quad[1] == 0 and 0xD8 <= quad[2] <= 0xDF) + ): + self.invalid_utf32be = True + if ( + quad[3] != 0 + or quad[2] > 0x10 + or (quad[3] == 0 and quad[2] == 0 and 0xD8 <= quad[1] <= 0xDF) + ): + self.invalid_utf32le = True + + def validate_utf16_characters(self, pair: List[int]) -> None: + """ + Validate if the pair of bytes is valid UTF-16. + + UTF-16 is valid in the range 0x0000 - 0xFFFF excluding 0xD800 - 0xFFFF + with an exception for surrogate pairs, which must be in the range + 0xD800-0xDBFF followed by 0xDC00-0xDFFF + + https://en.wikipedia.org/wiki/UTF-16 + """ + if not self.first_half_surrogate_pair_detected_16be: + if 0xD8 <= pair[0] <= 0xDB: + self.first_half_surrogate_pair_detected_16be = True + elif 0xDC <= pair[0] <= 0xDF: + self.invalid_utf16be = True + else: + if 0xDC <= pair[0] <= 0xDF: + self.first_half_surrogate_pair_detected_16be = False + else: + self.invalid_utf16be = True + + if not self.first_half_surrogate_pair_detected_16le: + if 0xD8 <= pair[1] <= 0xDB: + self.first_half_surrogate_pair_detected_16le = True + elif 0xDC <= pair[1] <= 0xDF: + self.invalid_utf16le = True + else: + if 0xDC <= pair[1] <= 0xDF: + self.first_half_surrogate_pair_detected_16le = False + else: + self.invalid_utf16le = True + + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: + for c in byte_str: + mod4 = self.position % 4 + self.quad[mod4] = c + if mod4 == 3: + self.validate_utf32_characters(self.quad) + self.validate_utf16_characters(self.quad[0:2]) + self.validate_utf16_characters(self.quad[2:4]) + if c == 0: + self.zeros_at_mod[mod4] += 1 + else: + self.nonzeros_at_mod[mod4] += 1 + self.position += 1 + return self.state + + @property + def state(self) -> ProbingState: + if self._state in {ProbingState.NOT_ME, ProbingState.FOUND_IT}: + # terminal, decided states + return self._state + if self.get_confidence() > 0.80: + self._state = ProbingState.FOUND_IT + elif self.position > 4 * 1024: + # if we get to 4kb into the file, and we can't conclude it's UTF, + # let's give up + self._state = ProbingState.NOT_ME + return self._state + + def get_confidence(self) -> float: + return ( + 0.85 + if ( + self.is_likely_utf16le() + or self.is_likely_utf16be() + or self.is_likely_utf32le() + or self.is_likely_utf32be() + ) + else 0.00 + ) diff --git a/src/pip/_vendor/chardet/utf8prober.py b/src/pip/_vendor/chardet/utf8prober.py index 6c3196cc2d7..d96354d97c2 100644 --- a/src/pip/_vendor/chardet/utf8prober.py +++ b/src/pip/_vendor/chardet/utf8prober.py @@ -25,45 +25,46 @@ # 02110-1301 USA ######################### END LICENSE BLOCK ######################### +from typing import Union + from .charsetprober import CharSetProber -from .enums import ProbingState, MachineState from .codingstatemachine import CodingStateMachine +from .enums import MachineState, ProbingState from .mbcssm import UTF8_SM_MODEL - class UTF8Prober(CharSetProber): ONE_CHAR_PROB = 0.5 - def __init__(self): - super(UTF8Prober, self).__init__() + def __init__(self) -> None: + super().__init__() self.coding_sm = CodingStateMachine(UTF8_SM_MODEL) - self._num_mb_chars = None + self._num_mb_chars = 0 self.reset() - def reset(self): - super(UTF8Prober, self).reset() + def reset(self) -> None: + super().reset() self.coding_sm.reset() self._num_mb_chars = 0 @property - def charset_name(self): + def charset_name(self) -> str: return "utf-8" @property - def language(self): + def language(self) -> str: return "" - def feed(self, byte_str): + def feed(self, byte_str: Union[bytes, bytearray]) -> ProbingState: for c in byte_str: coding_state = self.coding_sm.next_state(c) if coding_state == MachineState.ERROR: self._state = ProbingState.NOT_ME break - elif coding_state == MachineState.ITS_ME: + if coding_state == MachineState.ITS_ME: self._state = ProbingState.FOUND_IT break - elif coding_state == MachineState.START: + if coding_state == MachineState.START: if self.coding_sm.get_current_charlen() >= 2: self._num_mb_chars += 1 @@ -73,10 +74,9 @@ def feed(self, byte_str): return self.state - def get_confidence(self): + def get_confidence(self) -> float: unlike = 0.99 if self._num_mb_chars < 6: - unlike *= self.ONE_CHAR_PROB ** self._num_mb_chars + unlike *= self.ONE_CHAR_PROB**self._num_mb_chars return 1.0 - unlike - else: - return unlike + return unlike diff --git a/src/pip/_vendor/chardet/version.py b/src/pip/_vendor/chardet/version.py index 70369b9d663..c5e9d85cd75 100644 --- a/src/pip/_vendor/chardet/version.py +++ b/src/pip/_vendor/chardet/version.py @@ -1,9 +1,9 @@ """ This module exists only to simplify retrieving the version number of chardet -from within setup.py and from chardet subpackages. +from within setuptools and from chardet subpackages. :author: Dan Blanchard (dan.blanchard@gmail.com) """ -__version__ = "4.0.0" -VERSION = __version__.split('.') +__version__ = "5.1.0" +VERSION = __version__.split(".") diff --git a/src/pip/_vendor/colorama/__init__.py b/src/pip/_vendor/colorama/__init__.py index b149ed79b0a..383101cdb38 100644 --- a/src/pip/_vendor/colorama/__init__.py +++ b/src/pip/_vendor/colorama/__init__.py @@ -1,6 +1,7 @@ # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. -from .initialise import init, deinit, reinit, colorama_text +from .initialise import init, deinit, reinit, colorama_text, just_fix_windows_console from .ansi import Fore, Back, Style, Cursor from .ansitowin32 import AnsiToWin32 -__version__ = '0.4.4' +__version__ = '0.4.6' + diff --git a/src/pip/_vendor/colorama/ansitowin32.py b/src/pip/_vendor/colorama/ansitowin32.py index 6039a054320..abf209e60c7 100644 --- a/src/pip/_vendor/colorama/ansitowin32.py +++ b/src/pip/_vendor/colorama/ansitowin32.py @@ -4,7 +4,7 @@ import os from .ansi import AnsiFore, AnsiBack, AnsiStyle, Style, BEL -from .winterm import WinTerm, WinColor, WinStyle +from .winterm import enable_vt_processing, WinTerm, WinColor, WinStyle from .win32 import windll, winapi_test @@ -37,6 +37,12 @@ def __enter__(self, *args, **kwargs): def __exit__(self, *args, **kwargs): return self.__wrapped.__exit__(*args, **kwargs) + def __setstate__(self, state): + self.__dict__ = state + + def __getstate__(self): + return self.__dict__ + def write(self, text): self.__convertor.write(text) @@ -57,7 +63,9 @@ def closed(self): stream = self.__wrapped try: return stream.closed - except AttributeError: + # AttributeError in the case that the stream doesn't support being closed + # ValueError for the case that the stream has already been detached when atexit runs + except (AttributeError, ValueError): return True @@ -86,15 +94,22 @@ def __init__(self, wrapped, convert=None, strip=None, autoreset=False): # (e.g. Cygwin Terminal). In this case it's up to the terminal # to support the ANSI codes. conversion_supported = on_windows and winapi_test() + try: + fd = wrapped.fileno() + except Exception: + fd = -1 + system_has_native_ansi = not on_windows or enable_vt_processing(fd) + have_tty = not self.stream.closed and self.stream.isatty() + need_conversion = conversion_supported and not system_has_native_ansi # should we strip ANSI sequences from our output? if strip is None: - strip = conversion_supported or (not self.stream.closed and not self.stream.isatty()) + strip = need_conversion or not have_tty self.strip = strip # should we should convert ANSI sequences into win32 calls? if convert is None: - convert = conversion_supported and not self.stream.closed and self.stream.isatty() + convert = need_conversion and have_tty self.convert = convert # dict of ansi codes to win32 functions and parameters @@ -256,3 +271,7 @@ def convert_osc(self, text): if params[0] in '02': winterm.set_title(params[1]) return text + + + def flush(self): + self.wrapped.flush() diff --git a/src/pip/_vendor/colorama/initialise.py b/src/pip/_vendor/colorama/initialise.py index 430d0668727..d5fd4b71fed 100644 --- a/src/pip/_vendor/colorama/initialise.py +++ b/src/pip/_vendor/colorama/initialise.py @@ -6,13 +6,27 @@ from .ansitowin32 import AnsiToWin32 -orig_stdout = None -orig_stderr = None +def _wipe_internal_state_for_tests(): + global orig_stdout, orig_stderr + orig_stdout = None + orig_stderr = None + + global wrapped_stdout, wrapped_stderr + wrapped_stdout = None + wrapped_stderr = None -wrapped_stdout = None -wrapped_stderr = None + global atexit_done + atexit_done = False + + global fixed_windows_console + fixed_windows_console = False -atexit_done = False + try: + # no-op if it wasn't registered + atexit.unregister(reset_all) + except AttributeError: + # python 2: no atexit.unregister. Oh well, we did our best. + pass def reset_all(): @@ -55,6 +69,29 @@ def deinit(): sys.stderr = orig_stderr +def just_fix_windows_console(): + global fixed_windows_console + + if sys.platform != "win32": + return + if fixed_windows_console: + return + if wrapped_stdout is not None or wrapped_stderr is not None: + # Someone already ran init() and it did stuff, so we won't second-guess them + return + + # On newer versions of Windows, AnsiToWin32.__init__ will implicitly enable the + # native ANSI support in the console as a side-effect. We only need to actually + # replace sys.stdout/stderr if we're in the old-style conversion mode. + new_stdout = AnsiToWin32(sys.stdout, convert=None, strip=None, autoreset=False) + if new_stdout.convert: + sys.stdout = new_stdout + new_stderr = AnsiToWin32(sys.stderr, convert=None, strip=None, autoreset=False) + if new_stderr.convert: + sys.stderr = new_stderr + + fixed_windows_console = True + @contextlib.contextmanager def colorama_text(*args, **kwargs): init(*args, **kwargs) @@ -78,3 +115,7 @@ def wrap_stream(stream, convert, strip, autoreset, wrap): if wrapper.should_wrap(): stream = wrapper.stream return stream + + +# Use this for initial setup as well, to reduce code duplication +_wipe_internal_state_for_tests() diff --git a/src/pip/_vendor/colorama/tests/__init__.py b/src/pip/_vendor/colorama/tests/__init__.py new file mode 100644 index 00000000000..8c5661e93a2 --- /dev/null +++ b/src/pip/_vendor/colorama/tests/__init__.py @@ -0,0 +1 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. diff --git a/src/pip/_vendor/colorama/tests/ansi_test.py b/src/pip/_vendor/colorama/tests/ansi_test.py new file mode 100644 index 00000000000..0a20c80f882 --- /dev/null +++ b/src/pip/_vendor/colorama/tests/ansi_test.py @@ -0,0 +1,76 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import sys +from unittest import TestCase, main + +from ..ansi import Back, Fore, Style +from ..ansitowin32 import AnsiToWin32 + +stdout_orig = sys.stdout +stderr_orig = sys.stderr + + +class AnsiTest(TestCase): + + def setUp(self): + # sanity check: stdout should be a file or StringIO object. + # It will only be AnsiToWin32 if init() has previously wrapped it + self.assertNotEqual(type(sys.stdout), AnsiToWin32) + self.assertNotEqual(type(sys.stderr), AnsiToWin32) + + def tearDown(self): + sys.stdout = stdout_orig + sys.stderr = stderr_orig + + + def testForeAttributes(self): + self.assertEqual(Fore.BLACK, '\033[30m') + self.assertEqual(Fore.RED, '\033[31m') + self.assertEqual(Fore.GREEN, '\033[32m') + self.assertEqual(Fore.YELLOW, '\033[33m') + self.assertEqual(Fore.BLUE, '\033[34m') + self.assertEqual(Fore.MAGENTA, '\033[35m') + self.assertEqual(Fore.CYAN, '\033[36m') + self.assertEqual(Fore.WHITE, '\033[37m') + self.assertEqual(Fore.RESET, '\033[39m') + + # Check the light, extended versions. + self.assertEqual(Fore.LIGHTBLACK_EX, '\033[90m') + self.assertEqual(Fore.LIGHTRED_EX, '\033[91m') + self.assertEqual(Fore.LIGHTGREEN_EX, '\033[92m') + self.assertEqual(Fore.LIGHTYELLOW_EX, '\033[93m') + self.assertEqual(Fore.LIGHTBLUE_EX, '\033[94m') + self.assertEqual(Fore.LIGHTMAGENTA_EX, '\033[95m') + self.assertEqual(Fore.LIGHTCYAN_EX, '\033[96m') + self.assertEqual(Fore.LIGHTWHITE_EX, '\033[97m') + + + def testBackAttributes(self): + self.assertEqual(Back.BLACK, '\033[40m') + self.assertEqual(Back.RED, '\033[41m') + self.assertEqual(Back.GREEN, '\033[42m') + self.assertEqual(Back.YELLOW, '\033[43m') + self.assertEqual(Back.BLUE, '\033[44m') + self.assertEqual(Back.MAGENTA, '\033[45m') + self.assertEqual(Back.CYAN, '\033[46m') + self.assertEqual(Back.WHITE, '\033[47m') + self.assertEqual(Back.RESET, '\033[49m') + + # Check the light, extended versions. + self.assertEqual(Back.LIGHTBLACK_EX, '\033[100m') + self.assertEqual(Back.LIGHTRED_EX, '\033[101m') + self.assertEqual(Back.LIGHTGREEN_EX, '\033[102m') + self.assertEqual(Back.LIGHTYELLOW_EX, '\033[103m') + self.assertEqual(Back.LIGHTBLUE_EX, '\033[104m') + self.assertEqual(Back.LIGHTMAGENTA_EX, '\033[105m') + self.assertEqual(Back.LIGHTCYAN_EX, '\033[106m') + self.assertEqual(Back.LIGHTWHITE_EX, '\033[107m') + + + def testStyleAttributes(self): + self.assertEqual(Style.DIM, '\033[2m') + self.assertEqual(Style.NORMAL, '\033[22m') + self.assertEqual(Style.BRIGHT, '\033[1m') + + +if __name__ == '__main__': + main() diff --git a/src/pip/_vendor/colorama/tests/ansitowin32_test.py b/src/pip/_vendor/colorama/tests/ansitowin32_test.py new file mode 100644 index 00000000000..91ca551f97b --- /dev/null +++ b/src/pip/_vendor/colorama/tests/ansitowin32_test.py @@ -0,0 +1,294 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +from io import StringIO, TextIOWrapper +from unittest import TestCase, main +try: + from contextlib import ExitStack +except ImportError: + # python 2 + from contextlib2 import ExitStack + +try: + from unittest.mock import MagicMock, Mock, patch +except ImportError: + from mock import MagicMock, Mock, patch + +from ..ansitowin32 import AnsiToWin32, StreamWrapper +from ..win32 import ENABLE_VIRTUAL_TERMINAL_PROCESSING +from .utils import osname + + +class StreamWrapperTest(TestCase): + + def testIsAProxy(self): + mockStream = Mock() + wrapper = StreamWrapper(mockStream, None) + self.assertTrue( wrapper.random_attr is mockStream.random_attr ) + + def testDelegatesWrite(self): + mockStream = Mock() + mockConverter = Mock() + wrapper = StreamWrapper(mockStream, mockConverter) + wrapper.write('hello') + self.assertTrue(mockConverter.write.call_args, (('hello',), {})) + + def testDelegatesContext(self): + mockConverter = Mock() + s = StringIO() + with StreamWrapper(s, mockConverter) as fp: + fp.write(u'hello') + self.assertTrue(s.closed) + + def testProxyNoContextManager(self): + mockStream = MagicMock() + mockStream.__enter__.side_effect = AttributeError() + mockConverter = Mock() + with self.assertRaises(AttributeError) as excinfo: + with StreamWrapper(mockStream, mockConverter) as wrapper: + wrapper.write('hello') + + def test_closed_shouldnt_raise_on_closed_stream(self): + stream = StringIO() + stream.close() + wrapper = StreamWrapper(stream, None) + self.assertEqual(wrapper.closed, True) + + def test_closed_shouldnt_raise_on_detached_stream(self): + stream = TextIOWrapper(StringIO()) + stream.detach() + wrapper = StreamWrapper(stream, None) + self.assertEqual(wrapper.closed, True) + +class AnsiToWin32Test(TestCase): + + def testInit(self): + mockStdout = Mock() + auto = Mock() + stream = AnsiToWin32(mockStdout, autoreset=auto) + self.assertEqual(stream.wrapped, mockStdout) + self.assertEqual(stream.autoreset, auto) + + @patch('colorama.ansitowin32.winterm', None) + @patch('colorama.ansitowin32.winapi_test', lambda *_: True) + def testStripIsTrueOnWindows(self): + with osname('nt'): + mockStdout = Mock() + stream = AnsiToWin32(mockStdout) + self.assertTrue(stream.strip) + + def testStripIsFalseOffWindows(self): + with osname('posix'): + mockStdout = Mock(closed=False) + stream = AnsiToWin32(mockStdout) + self.assertFalse(stream.strip) + + def testWriteStripsAnsi(self): + mockStdout = Mock() + stream = AnsiToWin32(mockStdout) + stream.wrapped = Mock() + stream.write_and_convert = Mock() + stream.strip = True + + stream.write('abc') + + self.assertFalse(stream.wrapped.write.called) + self.assertEqual(stream.write_and_convert.call_args, (('abc',), {})) + + def testWriteDoesNotStripAnsi(self): + mockStdout = Mock() + stream = AnsiToWin32(mockStdout) + stream.wrapped = Mock() + stream.write_and_convert = Mock() + stream.strip = False + stream.convert = False + + stream.write('abc') + + self.assertFalse(stream.write_and_convert.called) + self.assertEqual(stream.wrapped.write.call_args, (('abc',), {})) + + def assert_autoresets(self, convert, autoreset=True): + stream = AnsiToWin32(Mock()) + stream.convert = convert + stream.reset_all = Mock() + stream.autoreset = autoreset + stream.winterm = Mock() + + stream.write('abc') + + self.assertEqual(stream.reset_all.called, autoreset) + + def testWriteAutoresets(self): + self.assert_autoresets(convert=True) + self.assert_autoresets(convert=False) + self.assert_autoresets(convert=True, autoreset=False) + self.assert_autoresets(convert=False, autoreset=False) + + def testWriteAndConvertWritesPlainText(self): + stream = AnsiToWin32(Mock()) + stream.write_and_convert( 'abc' ) + self.assertEqual( stream.wrapped.write.call_args, (('abc',), {}) ) + + def testWriteAndConvertStripsAllValidAnsi(self): + stream = AnsiToWin32(Mock()) + stream.call_win32 = Mock() + data = [ + 'abc\033[mdef', + 'abc\033[0mdef', + 'abc\033[2mdef', + 'abc\033[02mdef', + 'abc\033[002mdef', + 'abc\033[40mdef', + 'abc\033[040mdef', + 'abc\033[0;1mdef', + 'abc\033[40;50mdef', + 'abc\033[50;30;40mdef', + 'abc\033[Adef', + 'abc\033[0Gdef', + 'abc\033[1;20;128Hdef', + ] + for datum in data: + stream.wrapped.write.reset_mock() + stream.write_and_convert( datum ) + self.assertEqual( + [args[0] for args in stream.wrapped.write.call_args_list], + [ ('abc',), ('def',) ] + ) + + def testWriteAndConvertSkipsEmptySnippets(self): + stream = AnsiToWin32(Mock()) + stream.call_win32 = Mock() + stream.write_and_convert( '\033[40m\033[41m' ) + self.assertFalse( stream.wrapped.write.called ) + + def testWriteAndConvertCallsWin32WithParamsAndCommand(self): + stream = AnsiToWin32(Mock()) + stream.convert = True + stream.call_win32 = Mock() + stream.extract_params = Mock(return_value='params') + data = { + 'abc\033[adef': ('a', 'params'), + 'abc\033[;;bdef': ('b', 'params'), + 'abc\033[0cdef': ('c', 'params'), + 'abc\033[;;0;;Gdef': ('G', 'params'), + 'abc\033[1;20;128Hdef': ('H', 'params'), + } + for datum, expected in data.items(): + stream.call_win32.reset_mock() + stream.write_and_convert( datum ) + self.assertEqual( stream.call_win32.call_args[0], expected ) + + def test_reset_all_shouldnt_raise_on_closed_orig_stdout(self): + stream = StringIO() + converter = AnsiToWin32(stream) + stream.close() + + converter.reset_all() + + def test_wrap_shouldnt_raise_on_closed_orig_stdout(self): + stream = StringIO() + stream.close() + with \ + patch("colorama.ansitowin32.os.name", "nt"), \ + patch("colorama.ansitowin32.winapi_test", lambda: True): + converter = AnsiToWin32(stream) + self.assertTrue(converter.strip) + self.assertFalse(converter.convert) + + def test_wrap_shouldnt_raise_on_missing_closed_attr(self): + with \ + patch("colorama.ansitowin32.os.name", "nt"), \ + patch("colorama.ansitowin32.winapi_test", lambda: True): + converter = AnsiToWin32(object()) + self.assertTrue(converter.strip) + self.assertFalse(converter.convert) + + def testExtractParams(self): + stream = AnsiToWin32(Mock()) + data = { + '': (0,), + ';;': (0,), + '2': (2,), + ';;002;;': (2,), + '0;1': (0, 1), + ';;003;;456;;': (3, 456), + '11;22;33;44;55': (11, 22, 33, 44, 55), + } + for datum, expected in data.items(): + self.assertEqual(stream.extract_params('m', datum), expected) + + def testCallWin32UsesLookup(self): + listener = Mock() + stream = AnsiToWin32(listener) + stream.win32_calls = { + 1: (lambda *_, **__: listener(11),), + 2: (lambda *_, **__: listener(22),), + 3: (lambda *_, **__: listener(33),), + } + stream.call_win32('m', (3, 1, 99, 2)) + self.assertEqual( + [a[0][0] for a in listener.call_args_list], + [33, 11, 22] ) + + def test_osc_codes(self): + mockStdout = Mock() + stream = AnsiToWin32(mockStdout, convert=True) + with patch('colorama.ansitowin32.winterm') as winterm: + data = [ + '\033]0\x07', # missing arguments + '\033]0;foo\x08', # wrong OSC command + '\033]0;colorama_test_title\x07', # should work + '\033]1;colorama_test_title\x07', # wrong set command + '\033]2;colorama_test_title\x07', # should work + '\033]' + ';' * 64 + '\x08', # see issue #247 + ] + for code in data: + stream.write(code) + self.assertEqual(winterm.set_title.call_count, 2) + + def test_native_windows_ansi(self): + with ExitStack() as stack: + def p(a, b): + stack.enter_context(patch(a, b, create=True)) + # Pretend to be on Windows + p("colorama.ansitowin32.os.name", "nt") + p("colorama.ansitowin32.winapi_test", lambda: True) + p("colorama.win32.winapi_test", lambda: True) + p("colorama.winterm.win32.windll", "non-None") + p("colorama.winterm.get_osfhandle", lambda _: 1234) + + # Pretend that our mock stream has native ANSI support + p( + "colorama.winterm.win32.GetConsoleMode", + lambda _: ENABLE_VIRTUAL_TERMINAL_PROCESSING, + ) + SetConsoleMode = Mock() + p("colorama.winterm.win32.SetConsoleMode", SetConsoleMode) + + stdout = Mock() + stdout.closed = False + stdout.isatty.return_value = True + stdout.fileno.return_value = 1 + + # Our fake console says it has native vt support, so AnsiToWin32 should + # enable that support and do nothing else. + stream = AnsiToWin32(stdout) + SetConsoleMode.assert_called_with(1234, ENABLE_VIRTUAL_TERMINAL_PROCESSING) + self.assertFalse(stream.strip) + self.assertFalse(stream.convert) + self.assertFalse(stream.should_wrap()) + + # Now let's pretend we're on an old Windows console, that doesn't have + # native ANSI support. + p("colorama.winterm.win32.GetConsoleMode", lambda _: 0) + SetConsoleMode = Mock() + p("colorama.winterm.win32.SetConsoleMode", SetConsoleMode) + + stream = AnsiToWin32(stdout) + SetConsoleMode.assert_called_with(1234, ENABLE_VIRTUAL_TERMINAL_PROCESSING) + self.assertTrue(stream.strip) + self.assertTrue(stream.convert) + self.assertTrue(stream.should_wrap()) + + +if __name__ == '__main__': + main() diff --git a/src/pip/_vendor/colorama/tests/initialise_test.py b/src/pip/_vendor/colorama/tests/initialise_test.py new file mode 100644 index 00000000000..89f9b07511c --- /dev/null +++ b/src/pip/_vendor/colorama/tests/initialise_test.py @@ -0,0 +1,189 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import sys +from unittest import TestCase, main, skipUnless + +try: + from unittest.mock import patch, Mock +except ImportError: + from mock import patch, Mock + +from ..ansitowin32 import StreamWrapper +from ..initialise import init, just_fix_windows_console, _wipe_internal_state_for_tests +from .utils import osname, replace_by + +orig_stdout = sys.stdout +orig_stderr = sys.stderr + + +class InitTest(TestCase): + + @skipUnless(sys.stdout.isatty(), "sys.stdout is not a tty") + def setUp(self): + # sanity check + self.assertNotWrapped() + + def tearDown(self): + _wipe_internal_state_for_tests() + sys.stdout = orig_stdout + sys.stderr = orig_stderr + + def assertWrapped(self): + self.assertIsNot(sys.stdout, orig_stdout, 'stdout should be wrapped') + self.assertIsNot(sys.stderr, orig_stderr, 'stderr should be wrapped') + self.assertTrue(isinstance(sys.stdout, StreamWrapper), + 'bad stdout wrapper') + self.assertTrue(isinstance(sys.stderr, StreamWrapper), + 'bad stderr wrapper') + + def assertNotWrapped(self): + self.assertIs(sys.stdout, orig_stdout, 'stdout should not be wrapped') + self.assertIs(sys.stderr, orig_stderr, 'stderr should not be wrapped') + + @patch('colorama.initialise.reset_all') + @patch('colorama.ansitowin32.winapi_test', lambda *_: True) + @patch('colorama.ansitowin32.enable_vt_processing', lambda *_: False) + def testInitWrapsOnWindows(self, _): + with osname("nt"): + init() + self.assertWrapped() + + @patch('colorama.initialise.reset_all') + @patch('colorama.ansitowin32.winapi_test', lambda *_: False) + def testInitDoesntWrapOnEmulatedWindows(self, _): + with osname("nt"): + init() + self.assertNotWrapped() + + def testInitDoesntWrapOnNonWindows(self): + with osname("posix"): + init() + self.assertNotWrapped() + + def testInitDoesntWrapIfNone(self): + with replace_by(None): + init() + # We can't use assertNotWrapped here because replace_by(None) + # changes stdout/stderr already. + self.assertIsNone(sys.stdout) + self.assertIsNone(sys.stderr) + + def testInitAutoresetOnWrapsOnAllPlatforms(self): + with osname("posix"): + init(autoreset=True) + self.assertWrapped() + + def testInitWrapOffDoesntWrapOnWindows(self): + with osname("nt"): + init(wrap=False) + self.assertNotWrapped() + + def testInitWrapOffIncompatibleWithAutoresetOn(self): + self.assertRaises(ValueError, lambda: init(autoreset=True, wrap=False)) + + @patch('colorama.win32.SetConsoleTextAttribute') + @patch('colorama.initialise.AnsiToWin32') + def testAutoResetPassedOn(self, mockATW32, _): + with osname("nt"): + init(autoreset=True) + self.assertEqual(len(mockATW32.call_args_list), 2) + self.assertEqual(mockATW32.call_args_list[1][1]['autoreset'], True) + self.assertEqual(mockATW32.call_args_list[0][1]['autoreset'], True) + + @patch('colorama.initialise.AnsiToWin32') + def testAutoResetChangeable(self, mockATW32): + with osname("nt"): + init() + + init(autoreset=True) + self.assertEqual(len(mockATW32.call_args_list), 4) + self.assertEqual(mockATW32.call_args_list[2][1]['autoreset'], True) + self.assertEqual(mockATW32.call_args_list[3][1]['autoreset'], True) + + init() + self.assertEqual(len(mockATW32.call_args_list), 6) + self.assertEqual( + mockATW32.call_args_list[4][1]['autoreset'], False) + self.assertEqual( + mockATW32.call_args_list[5][1]['autoreset'], False) + + + @patch('colorama.initialise.atexit.register') + def testAtexitRegisteredOnlyOnce(self, mockRegister): + init() + self.assertTrue(mockRegister.called) + mockRegister.reset_mock() + init() + self.assertFalse(mockRegister.called) + + +class JustFixWindowsConsoleTest(TestCase): + def _reset(self): + _wipe_internal_state_for_tests() + sys.stdout = orig_stdout + sys.stderr = orig_stderr + + def tearDown(self): + self._reset() + + @patch("colorama.ansitowin32.winapi_test", lambda: True) + def testJustFixWindowsConsole(self): + if sys.platform != "win32": + # just_fix_windows_console should be a no-op + just_fix_windows_console() + self.assertIs(sys.stdout, orig_stdout) + self.assertIs(sys.stderr, orig_stderr) + else: + def fake_std(): + # Emulate stdout=not a tty, stderr=tty + # to check that we handle both cases correctly + stdout = Mock() + stdout.closed = False + stdout.isatty.return_value = False + stdout.fileno.return_value = 1 + sys.stdout = stdout + + stderr = Mock() + stderr.closed = False + stderr.isatty.return_value = True + stderr.fileno.return_value = 2 + sys.stderr = stderr + + for native_ansi in [False, True]: + with patch( + 'colorama.ansitowin32.enable_vt_processing', + lambda *_: native_ansi + ): + self._reset() + fake_std() + + # Regular single-call test + prev_stdout = sys.stdout + prev_stderr = sys.stderr + just_fix_windows_console() + self.assertIs(sys.stdout, prev_stdout) + if native_ansi: + self.assertIs(sys.stderr, prev_stderr) + else: + self.assertIsNot(sys.stderr, prev_stderr) + + # second call without resetting is always a no-op + prev_stdout = sys.stdout + prev_stderr = sys.stderr + just_fix_windows_console() + self.assertIs(sys.stdout, prev_stdout) + self.assertIs(sys.stderr, prev_stderr) + + self._reset() + fake_std() + + # If init() runs first, just_fix_windows_console should be a no-op + init() + prev_stdout = sys.stdout + prev_stderr = sys.stderr + just_fix_windows_console() + self.assertIs(prev_stdout, sys.stdout) + self.assertIs(prev_stderr, sys.stderr) + + +if __name__ == '__main__': + main() diff --git a/src/pip/_vendor/colorama/tests/isatty_test.py b/src/pip/_vendor/colorama/tests/isatty_test.py new file mode 100644 index 00000000000..0f84e4befe5 --- /dev/null +++ b/src/pip/_vendor/colorama/tests/isatty_test.py @@ -0,0 +1,57 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import sys +from unittest import TestCase, main + +from ..ansitowin32 import StreamWrapper, AnsiToWin32 +from .utils import pycharm, replace_by, replace_original_by, StreamTTY, StreamNonTTY + + +def is_a_tty(stream): + return StreamWrapper(stream, None).isatty() + +class IsattyTest(TestCase): + + def test_TTY(self): + tty = StreamTTY() + self.assertTrue(is_a_tty(tty)) + with pycharm(): + self.assertTrue(is_a_tty(tty)) + + def test_nonTTY(self): + non_tty = StreamNonTTY() + self.assertFalse(is_a_tty(non_tty)) + with pycharm(): + self.assertFalse(is_a_tty(non_tty)) + + def test_withPycharm(self): + with pycharm(): + self.assertTrue(is_a_tty(sys.stderr)) + self.assertTrue(is_a_tty(sys.stdout)) + + def test_withPycharmTTYOverride(self): + tty = StreamTTY() + with pycharm(), replace_by(tty): + self.assertTrue(is_a_tty(tty)) + + def test_withPycharmNonTTYOverride(self): + non_tty = StreamNonTTY() + with pycharm(), replace_by(non_tty): + self.assertFalse(is_a_tty(non_tty)) + + def test_withPycharmNoneOverride(self): + with pycharm(): + with replace_by(None), replace_original_by(None): + self.assertFalse(is_a_tty(None)) + self.assertFalse(is_a_tty(StreamNonTTY())) + self.assertTrue(is_a_tty(StreamTTY())) + + def test_withPycharmStreamWrapped(self): + with pycharm(): + self.assertTrue(AnsiToWin32(StreamTTY()).stream.isatty()) + self.assertFalse(AnsiToWin32(StreamNonTTY()).stream.isatty()) + self.assertTrue(AnsiToWin32(sys.stdout).stream.isatty()) + self.assertTrue(AnsiToWin32(sys.stderr).stream.isatty()) + + +if __name__ == '__main__': + main() diff --git a/src/pip/_vendor/colorama/tests/utils.py b/src/pip/_vendor/colorama/tests/utils.py new file mode 100644 index 00000000000..472fafb4403 --- /dev/null +++ b/src/pip/_vendor/colorama/tests/utils.py @@ -0,0 +1,49 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +from contextlib import contextmanager +from io import StringIO +import sys +import os + + +class StreamTTY(StringIO): + def isatty(self): + return True + +class StreamNonTTY(StringIO): + def isatty(self): + return False + +@contextmanager +def osname(name): + orig = os.name + os.name = name + yield + os.name = orig + +@contextmanager +def replace_by(stream): + orig_stdout = sys.stdout + orig_stderr = sys.stderr + sys.stdout = stream + sys.stderr = stream + yield + sys.stdout = orig_stdout + sys.stderr = orig_stderr + +@contextmanager +def replace_original_by(stream): + orig_stdout = sys.__stdout__ + orig_stderr = sys.__stderr__ + sys.__stdout__ = stream + sys.__stderr__ = stream + yield + sys.__stdout__ = orig_stdout + sys.__stderr__ = orig_stderr + +@contextmanager +def pycharm(): + os.environ["PYCHARM_HOSTED"] = "1" + non_tty = StreamNonTTY() + with replace_by(non_tty), replace_original_by(non_tty): + yield + del os.environ["PYCHARM_HOSTED"] diff --git a/src/pip/_vendor/colorama/tests/winterm_test.py b/src/pip/_vendor/colorama/tests/winterm_test.py new file mode 100644 index 00000000000..d0955f9e608 --- /dev/null +++ b/src/pip/_vendor/colorama/tests/winterm_test.py @@ -0,0 +1,131 @@ +# Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. +import sys +from unittest import TestCase, main, skipUnless + +try: + from unittest.mock import Mock, patch +except ImportError: + from mock import Mock, patch + +from ..winterm import WinColor, WinStyle, WinTerm + + +class WinTermTest(TestCase): + + @patch('colorama.winterm.win32') + def testInit(self, mockWin32): + mockAttr = Mock() + mockAttr.wAttributes = 7 + 6 * 16 + 8 + mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr + term = WinTerm() + self.assertEqual(term._fore, 7) + self.assertEqual(term._back, 6) + self.assertEqual(term._style, 8) + + @skipUnless(sys.platform.startswith("win"), "requires Windows") + def testGetAttrs(self): + term = WinTerm() + + term._fore = 0 + term._back = 0 + term._style = 0 + self.assertEqual(term.get_attrs(), 0) + + term._fore = WinColor.YELLOW + self.assertEqual(term.get_attrs(), WinColor.YELLOW) + + term._back = WinColor.MAGENTA + self.assertEqual( + term.get_attrs(), + WinColor.YELLOW + WinColor.MAGENTA * 16) + + term._style = WinStyle.BRIGHT + self.assertEqual( + term.get_attrs(), + WinColor.YELLOW + WinColor.MAGENTA * 16 + WinStyle.BRIGHT) + + @patch('colorama.winterm.win32') + def testResetAll(self, mockWin32): + mockAttr = Mock() + mockAttr.wAttributes = 1 + 2 * 16 + 8 + mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr + term = WinTerm() + + term.set_console = Mock() + term._fore = -1 + term._back = -1 + term._style = -1 + + term.reset_all() + + self.assertEqual(term._fore, 1) + self.assertEqual(term._back, 2) + self.assertEqual(term._style, 8) + self.assertEqual(term.set_console.called, True) + + @skipUnless(sys.platform.startswith("win"), "requires Windows") + def testFore(self): + term = WinTerm() + term.set_console = Mock() + term._fore = 0 + + term.fore(5) + + self.assertEqual(term._fore, 5) + self.assertEqual(term.set_console.called, True) + + @skipUnless(sys.platform.startswith("win"), "requires Windows") + def testBack(self): + term = WinTerm() + term.set_console = Mock() + term._back = 0 + + term.back(5) + + self.assertEqual(term._back, 5) + self.assertEqual(term.set_console.called, True) + + @skipUnless(sys.platform.startswith("win"), "requires Windows") + def testStyle(self): + term = WinTerm() + term.set_console = Mock() + term._style = 0 + + term.style(22) + + self.assertEqual(term._style, 22) + self.assertEqual(term.set_console.called, True) + + @patch('colorama.winterm.win32') + def testSetConsole(self, mockWin32): + mockAttr = Mock() + mockAttr.wAttributes = 0 + mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr + term = WinTerm() + term.windll = Mock() + + term.set_console() + + self.assertEqual( + mockWin32.SetConsoleTextAttribute.call_args, + ((mockWin32.STDOUT, term.get_attrs()), {}) + ) + + @patch('colorama.winterm.win32') + def testSetConsoleOnStderr(self, mockWin32): + mockAttr = Mock() + mockAttr.wAttributes = 0 + mockWin32.GetConsoleScreenBufferInfo.return_value = mockAttr + term = WinTerm() + term.windll = Mock() + + term.set_console(on_stderr=True) + + self.assertEqual( + mockWin32.SetConsoleTextAttribute.call_args, + ((mockWin32.STDERR, term.get_attrs()), {}) + ) + + +if __name__ == '__main__': + main() diff --git a/src/pip/_vendor/colorama/win32.py b/src/pip/_vendor/colorama/win32.py index c2d83603367..841b0e270a3 100644 --- a/src/pip/_vendor/colorama/win32.py +++ b/src/pip/_vendor/colorama/win32.py @@ -4,6 +4,8 @@ STDOUT = -11 STDERR = -12 +ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004 + try: import ctypes from ctypes import LibraryLoader @@ -89,6 +91,20 @@ def __str__(self): ] _SetConsoleTitleW.restype = wintypes.BOOL + _GetConsoleMode = windll.kernel32.GetConsoleMode + _GetConsoleMode.argtypes = [ + wintypes.HANDLE, + POINTER(wintypes.DWORD) + ] + _GetConsoleMode.restype = wintypes.BOOL + + _SetConsoleMode = windll.kernel32.SetConsoleMode + _SetConsoleMode.argtypes = [ + wintypes.HANDLE, + wintypes.DWORD + ] + _SetConsoleMode.restype = wintypes.BOOL + def _winapi_test(handle): csbi = CONSOLE_SCREEN_BUFFER_INFO() success = _GetConsoleScreenBufferInfo( @@ -150,3 +166,15 @@ def FillConsoleOutputAttribute(stream_id, attr, length, start): def SetConsoleTitle(title): return _SetConsoleTitleW(title) + + def GetConsoleMode(handle): + mode = wintypes.DWORD() + success = _GetConsoleMode(handle, byref(mode)) + if not success: + raise ctypes.WinError() + return mode.value + + def SetConsoleMode(handle, mode): + success = _SetConsoleMode(handle, mode) + if not success: + raise ctypes.WinError() diff --git a/src/pip/_vendor/colorama/winterm.py b/src/pip/_vendor/colorama/winterm.py index 0fdb4ec4e91..aad867e8c80 100644 --- a/src/pip/_vendor/colorama/winterm.py +++ b/src/pip/_vendor/colorama/winterm.py @@ -1,6 +1,12 @@ # Copyright Jonathan Hartley 2013. BSD 3-Clause license, see LICENSE file. -from . import win32 +try: + from msvcrt import get_osfhandle +except ImportError: + def get_osfhandle(_): + raise OSError("This isn't windows!") + +from . import win32 # from wincon.h class WinColor(object): @@ -167,3 +173,23 @@ def erase_line(self, mode=0, on_stderr=False): def set_title(self, title): win32.SetConsoleTitle(title) + + +def enable_vt_processing(fd): + if win32.windll is None or not win32.winapi_test(): + return False + + try: + handle = get_osfhandle(fd) + mode = win32.GetConsoleMode(handle) + win32.SetConsoleMode( + handle, + mode | win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING, + ) + + mode = win32.GetConsoleMode(handle) + if mode & win32.ENABLE_VIRTUAL_TERMINAL_PROCESSING: + return True + # Can get TypeError in testsuite where 'fd' is a Mock() + except (OSError, TypeError): + return False diff --git a/src/pip/_vendor/distlib/__init__.py b/src/pip/_vendor/distlib/__init__.py index 11549481074..962173c8d0a 100644 --- a/src/pip/_vendor/distlib/__init__.py +++ b/src/pip/_vendor/distlib/__init__.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- # -# Copyright (C) 2012-2019 Vinay Sajip. +# Copyright (C) 2012-2022 Vinay Sajip. # Licensed to the Python Software Foundation under a contributor agreement. # See LICENSE.txt and CONTRIBUTORS.txt. # import logging -__version__ = '0.3.3' +__version__ = '0.3.6' class DistlibException(Exception): pass diff --git a/src/pip/_vendor/distlib/_backport/__init__.py b/src/pip/_vendor/distlib/_backport/__init__.py deleted file mode 100644 index f7dbf4c9aa8..00000000000 --- a/src/pip/_vendor/distlib/_backport/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Modules copied from Python 3 standard libraries, for internal use only. - -Individual classes and functions are found in d2._backport.misc. Intended -usage is to always import things missing from 3.1 from that module: the -built-in/stdlib objects will be used if found. -""" diff --git a/src/pip/_vendor/distlib/_backport/misc.py b/src/pip/_vendor/distlib/_backport/misc.py deleted file mode 100644 index cfb318d34f7..00000000000 --- a/src/pip/_vendor/distlib/_backport/misc.py +++ /dev/null @@ -1,41 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 The Python Software Foundation. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -"""Backports for individual classes and functions.""" - -import os -import sys - -__all__ = ['cache_from_source', 'callable', 'fsencode'] - - -try: - from imp import cache_from_source -except ImportError: - def cache_from_source(py_file, debug=__debug__): - ext = debug and 'c' or 'o' - return py_file + ext - - -try: - callable = callable -except NameError: - from collections import Callable - - def callable(obj): - return isinstance(obj, Callable) - - -try: - fsencode = os.fsencode -except AttributeError: - def fsencode(filename): - if isinstance(filename, bytes): - return filename - elif isinstance(filename, str): - return filename.encode(sys.getfilesystemencoding()) - else: - raise TypeError("expect bytes or str, not %s" % - type(filename).__name__) diff --git a/src/pip/_vendor/distlib/_backport/shutil.py b/src/pip/_vendor/distlib/_backport/shutil.py deleted file mode 100644 index 10ed3625397..00000000000 --- a/src/pip/_vendor/distlib/_backport/shutil.py +++ /dev/null @@ -1,764 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 The Python Software Foundation. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -"""Utility functions for copying and archiving files and directory trees. - -XXX The functions here don't copy the resource fork or other metadata on Mac. - -""" - -import os -import sys -import stat -from os.path import abspath -import fnmatch -try: - from collections.abc import Callable -except ImportError: - from collections import Callable -import errno -from . import tarfile - -try: - import bz2 - _BZ2_SUPPORTED = True -except ImportError: - _BZ2_SUPPORTED = False - -try: - from pwd import getpwnam -except ImportError: - getpwnam = None - -try: - from grp import getgrnam -except ImportError: - getgrnam = None - -__all__ = ["copyfileobj", "copyfile", "copymode", "copystat", "copy", "copy2", - "copytree", "move", "rmtree", "Error", "SpecialFileError", - "ExecError", "make_archive", "get_archive_formats", - "register_archive_format", "unregister_archive_format", - "get_unpack_formats", "register_unpack_format", - "unregister_unpack_format", "unpack_archive", "ignore_patterns"] - -class Error(EnvironmentError): - pass - -class SpecialFileError(EnvironmentError): - """Raised when trying to do a kind of operation (e.g. copying) which is - not supported on a special file (e.g. a named pipe)""" - -class ExecError(EnvironmentError): - """Raised when a command could not be executed""" - -class ReadError(EnvironmentError): - """Raised when an archive cannot be read""" - -class RegistryError(Exception): - """Raised when a registry operation with the archiving - and unpacking registries fails""" - - -try: - WindowsError -except NameError: - WindowsError = None - -def copyfileobj(fsrc, fdst, length=16*1024): - """copy data from file-like object fsrc to file-like object fdst""" - while 1: - buf = fsrc.read(length) - if not buf: - break - fdst.write(buf) - -def _samefile(src, dst): - # Macintosh, Unix. - if hasattr(os.path, 'samefile'): - try: - return os.path.samefile(src, dst) - except OSError: - return False - - # All other platforms: check for same pathname. - return (os.path.normcase(os.path.abspath(src)) == - os.path.normcase(os.path.abspath(dst))) - -def copyfile(src, dst): - """Copy data from src to dst""" - if _samefile(src, dst): - raise Error("`%s` and `%s` are the same file" % (src, dst)) - - for fn in [src, dst]: - try: - st = os.stat(fn) - except OSError: - # File most likely does not exist - pass - else: - # XXX What about other special files? (sockets, devices...) - if stat.S_ISFIFO(st.st_mode): - raise SpecialFileError("`%s` is a named pipe" % fn) - - with open(src, 'rb') as fsrc: - with open(dst, 'wb') as fdst: - copyfileobj(fsrc, fdst) - -def copymode(src, dst): - """Copy mode bits from src to dst""" - if hasattr(os, 'chmod'): - st = os.stat(src) - mode = stat.S_IMODE(st.st_mode) - os.chmod(dst, mode) - -def copystat(src, dst): - """Copy all stat info (mode bits, atime, mtime, flags) from src to dst""" - st = os.stat(src) - mode = stat.S_IMODE(st.st_mode) - if hasattr(os, 'utime'): - os.utime(dst, (st.st_atime, st.st_mtime)) - if hasattr(os, 'chmod'): - os.chmod(dst, mode) - if hasattr(os, 'chflags') and hasattr(st, 'st_flags'): - try: - os.chflags(dst, st.st_flags) - except OSError as why: - if (not hasattr(errno, 'EOPNOTSUPP') or - why.errno != errno.EOPNOTSUPP): - raise - -def copy(src, dst): - """Copy data and mode bits ("cp src dst"). - - The destination may be a directory. - - """ - if os.path.isdir(dst): - dst = os.path.join(dst, os.path.basename(src)) - copyfile(src, dst) - copymode(src, dst) - -def copy2(src, dst): - """Copy data and all stat info ("cp -p src dst"). - - The destination may be a directory. - - """ - if os.path.isdir(dst): - dst = os.path.join(dst, os.path.basename(src)) - copyfile(src, dst) - copystat(src, dst) - -def ignore_patterns(*patterns): - """Function that can be used as copytree() ignore parameter. - - Patterns is a sequence of glob-style patterns - that are used to exclude files""" - def _ignore_patterns(path, names): - ignored_names = [] - for pattern in patterns: - ignored_names.extend(fnmatch.filter(names, pattern)) - return set(ignored_names) - return _ignore_patterns - -def copytree(src, dst, symlinks=False, ignore=None, copy_function=copy2, - ignore_dangling_symlinks=False): - """Recursively copy a directory tree. - - The destination directory must not already exist. - If exception(s) occur, an Error is raised with a list of reasons. - - If the optional symlinks flag is true, symbolic links in the - source tree result in symbolic links in the destination tree; if - it is false, the contents of the files pointed to by symbolic - links are copied. If the file pointed by the symlink doesn't - exist, an exception will be added in the list of errors raised in - an Error exception at the end of the copy process. - - You can set the optional ignore_dangling_symlinks flag to true if you - want to silence this exception. Notice that this has no effect on - platforms that don't support os.symlink. - - The optional ignore argument is a callable. If given, it - is called with the `src` parameter, which is the directory - being visited by copytree(), and `names` which is the list of - `src` contents, as returned by os.listdir(): - - callable(src, names) -> ignored_names - - Since copytree() is called recursively, the callable will be - called once for each directory that is copied. It returns a - list of names relative to the `src` directory that should - not be copied. - - The optional copy_function argument is a callable that will be used - to copy each file. It will be called with the source path and the - destination path as arguments. By default, copy2() is used, but any - function that supports the same signature (like copy()) can be used. - - """ - names = os.listdir(src) - if ignore is not None: - ignored_names = ignore(src, names) - else: - ignored_names = set() - - os.makedirs(dst) - errors = [] - for name in names: - if name in ignored_names: - continue - srcname = os.path.join(src, name) - dstname = os.path.join(dst, name) - try: - if os.path.islink(srcname): - linkto = os.readlink(srcname) - if symlinks: - os.symlink(linkto, dstname) - else: - # ignore dangling symlink if the flag is on - if not os.path.exists(linkto) and ignore_dangling_symlinks: - continue - # otherwise let the copy occurs. copy2 will raise an error - copy_function(srcname, dstname) - elif os.path.isdir(srcname): - copytree(srcname, dstname, symlinks, ignore, copy_function) - else: - # Will raise a SpecialFileError for unsupported file types - copy_function(srcname, dstname) - # catch the Error from the recursive copytree so that we can - # continue with other files - except Error as err: - errors.extend(err.args[0]) - except EnvironmentError as why: - errors.append((srcname, dstname, str(why))) - try: - copystat(src, dst) - except OSError as why: - if WindowsError is not None and isinstance(why, WindowsError): - # Copying file access times may fail on Windows - pass - else: - errors.extend((src, dst, str(why))) - if errors: - raise Error(errors) - -def rmtree(path, ignore_errors=False, onerror=None): - """Recursively delete a directory tree. - - If ignore_errors is set, errors are ignored; otherwise, if onerror - is set, it is called to handle the error with arguments (func, - path, exc_info) where func is os.listdir, os.remove, or os.rmdir; - path is the argument to that function that caused it to fail; and - exc_info is a tuple returned by sys.exc_info(). If ignore_errors - is false and onerror is None, an exception is raised. - - """ - if ignore_errors: - def onerror(*args): - pass - elif onerror is None: - def onerror(*args): - raise - try: - if os.path.islink(path): - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError: - onerror(os.path.islink, path, sys.exc_info()) - # can't continue even if onerror hook returns - return - names = [] - try: - names = os.listdir(path) - except os.error: - onerror(os.listdir, path, sys.exc_info()) - for name in names: - fullname = os.path.join(path, name) - try: - mode = os.lstat(fullname).st_mode - except os.error: - mode = 0 - if stat.S_ISDIR(mode): - rmtree(fullname, ignore_errors, onerror) - else: - try: - os.remove(fullname) - except os.error: - onerror(os.remove, fullname, sys.exc_info()) - try: - os.rmdir(path) - except os.error: - onerror(os.rmdir, path, sys.exc_info()) - - -def _basename(path): - # A basename() variant which first strips the trailing slash, if present. - # Thus we always get the last component of the path, even for directories. - return os.path.basename(path.rstrip(os.path.sep)) - -def move(src, dst): - """Recursively move a file or directory to another location. This is - similar to the Unix "mv" command. - - If the destination is a directory or a symlink to a directory, the source - is moved inside the directory. The destination path must not already - exist. - - If the destination already exists but is not a directory, it may be - overwritten depending on os.rename() semantics. - - If the destination is on our current filesystem, then rename() is used. - Otherwise, src is copied to the destination and then removed. - A lot more could be done here... A look at a mv.c shows a lot of - the issues this implementation glosses over. - - """ - real_dst = dst - if os.path.isdir(dst): - if _samefile(src, dst): - # We might be on a case insensitive filesystem, - # perform the rename anyway. - os.rename(src, dst) - return - - real_dst = os.path.join(dst, _basename(src)) - if os.path.exists(real_dst): - raise Error("Destination path '%s' already exists" % real_dst) - try: - os.rename(src, real_dst) - except OSError: - if os.path.isdir(src): - if _destinsrc(src, dst): - raise Error("Cannot move a directory '%s' into itself '%s'." % (src, dst)) - copytree(src, real_dst, symlinks=True) - rmtree(src) - else: - copy2(src, real_dst) - os.unlink(src) - -def _destinsrc(src, dst): - src = abspath(src) - dst = abspath(dst) - if not src.endswith(os.path.sep): - src += os.path.sep - if not dst.endswith(os.path.sep): - dst += os.path.sep - return dst.startswith(src) - -def _get_gid(name): - """Returns a gid, given a group name.""" - if getgrnam is None or name is None: - return None - try: - result = getgrnam(name) - except KeyError: - result = None - if result is not None: - return result[2] - return None - -def _get_uid(name): - """Returns an uid, given a user name.""" - if getpwnam is None or name is None: - return None - try: - result = getpwnam(name) - except KeyError: - result = None - if result is not None: - return result[2] - return None - -def _make_tarball(base_name, base_dir, compress="gzip", verbose=0, dry_run=0, - owner=None, group=None, logger=None): - """Create a (possibly compressed) tar file from all the files under - 'base_dir'. - - 'compress' must be "gzip" (the default), "bzip2", or None. - - 'owner' and 'group' can be used to define an owner and a group for the - archive that is being built. If not provided, the current owner and group - will be used. - - The output tar file will be named 'base_name' + ".tar", possibly plus - the appropriate compression extension (".gz", or ".bz2"). - - Returns the output filename. - """ - tar_compression = {'gzip': 'gz', None: ''} - compress_ext = {'gzip': '.gz'} - - if _BZ2_SUPPORTED: - tar_compression['bzip2'] = 'bz2' - compress_ext['bzip2'] = '.bz2' - - # flags for compression program, each element of list will be an argument - if compress is not None and compress not in compress_ext: - raise ValueError("bad value for 'compress', or compression format not " - "supported : {0}".format(compress)) - - archive_name = base_name + '.tar' + compress_ext.get(compress, '') - archive_dir = os.path.dirname(archive_name) - - if not os.path.exists(archive_dir): - if logger is not None: - logger.info("creating %s", archive_dir) - if not dry_run: - os.makedirs(archive_dir) - - # creating the tarball - if logger is not None: - logger.info('Creating tar archive') - - uid = _get_uid(owner) - gid = _get_gid(group) - - def _set_uid_gid(tarinfo): - if gid is not None: - tarinfo.gid = gid - tarinfo.gname = group - if uid is not None: - tarinfo.uid = uid - tarinfo.uname = owner - return tarinfo - - if not dry_run: - tar = tarfile.open(archive_name, 'w|%s' % tar_compression[compress]) - try: - tar.add(base_dir, filter=_set_uid_gid) - finally: - tar.close() - - return archive_name - -def _call_external_zip(base_dir, zip_filename, verbose=False, dry_run=False): - # XXX see if we want to keep an external call here - if verbose: - zipoptions = "-r" - else: - zipoptions = "-rq" - from distutils.errors import DistutilsExecError - from distutils.spawn import spawn - try: - spawn(["zip", zipoptions, zip_filename, base_dir], dry_run=dry_run) - except DistutilsExecError: - # XXX really should distinguish between "couldn't find - # external 'zip' command" and "zip failed". - raise ExecError("unable to create zip file '%s': " - "could neither import the 'zipfile' module nor " - "find a standalone zip utility") % zip_filename - -def _make_zipfile(base_name, base_dir, verbose=0, dry_run=0, logger=None): - """Create a zip file from all the files under 'base_dir'. - - The output zip file will be named 'base_name' + ".zip". Uses either the - "zipfile" Python module (if available) or the InfoZIP "zip" utility - (if installed and found on the default search path). If neither tool is - available, raises ExecError. Returns the name of the output zip - file. - """ - zip_filename = base_name + ".zip" - archive_dir = os.path.dirname(base_name) - - if not os.path.exists(archive_dir): - if logger is not None: - logger.info("creating %s", archive_dir) - if not dry_run: - os.makedirs(archive_dir) - - # If zipfile module is not available, try spawning an external 'zip' - # command. - try: - import zipfile - except ImportError: - zipfile = None - - if zipfile is None: - _call_external_zip(base_dir, zip_filename, verbose, dry_run) - else: - if logger is not None: - logger.info("creating '%s' and adding '%s' to it", - zip_filename, base_dir) - - if not dry_run: - zip = zipfile.ZipFile(zip_filename, "w", - compression=zipfile.ZIP_DEFLATED) - - for dirpath, dirnames, filenames in os.walk(base_dir): - for name in filenames: - path = os.path.normpath(os.path.join(dirpath, name)) - if os.path.isfile(path): - zip.write(path, path) - if logger is not None: - logger.info("adding '%s'", path) - zip.close() - - return zip_filename - -_ARCHIVE_FORMATS = { - 'gztar': (_make_tarball, [('compress', 'gzip')], "gzip'ed tar-file"), - 'bztar': (_make_tarball, [('compress', 'bzip2')], "bzip2'ed tar-file"), - 'tar': (_make_tarball, [('compress', None)], "uncompressed tar file"), - 'zip': (_make_zipfile, [], "ZIP file"), - } - -if _BZ2_SUPPORTED: - _ARCHIVE_FORMATS['bztar'] = (_make_tarball, [('compress', 'bzip2')], - "bzip2'ed tar-file") - -def get_archive_formats(): - """Returns a list of supported formats for archiving and unarchiving. - - Each element of the returned sequence is a tuple (name, description) - """ - formats = [(name, registry[2]) for name, registry in - _ARCHIVE_FORMATS.items()] - formats.sort() - return formats - -def register_archive_format(name, function, extra_args=None, description=''): - """Registers an archive format. - - name is the name of the format. function is the callable that will be - used to create archives. If provided, extra_args is a sequence of - (name, value) tuples that will be passed as arguments to the callable. - description can be provided to describe the format, and will be returned - by the get_archive_formats() function. - """ - if extra_args is None: - extra_args = [] - if not isinstance(function, Callable): - raise TypeError('The %s object is not callable' % function) - if not isinstance(extra_args, (tuple, list)): - raise TypeError('extra_args needs to be a sequence') - for element in extra_args: - if not isinstance(element, (tuple, list)) or len(element) !=2: - raise TypeError('extra_args elements are : (arg_name, value)') - - _ARCHIVE_FORMATS[name] = (function, extra_args, description) - -def unregister_archive_format(name): - del _ARCHIVE_FORMATS[name] - -def make_archive(base_name, format, root_dir=None, base_dir=None, verbose=0, - dry_run=0, owner=None, group=None, logger=None): - """Create an archive file (eg. zip or tar). - - 'base_name' is the name of the file to create, minus any format-specific - extension; 'format' is the archive format: one of "zip", "tar", "bztar" - or "gztar". - - 'root_dir' is a directory that will be the root directory of the - archive; ie. we typically chdir into 'root_dir' before creating the - archive. 'base_dir' is the directory where we start archiving from; - ie. 'base_dir' will be the common prefix of all files and - directories in the archive. 'root_dir' and 'base_dir' both default - to the current directory. Returns the name of the archive file. - - 'owner' and 'group' are used when creating a tar archive. By default, - uses the current owner and group. - """ - save_cwd = os.getcwd() - if root_dir is not None: - if logger is not None: - logger.debug("changing into '%s'", root_dir) - base_name = os.path.abspath(base_name) - if not dry_run: - os.chdir(root_dir) - - if base_dir is None: - base_dir = os.curdir - - kwargs = {'dry_run': dry_run, 'logger': logger} - - try: - format_info = _ARCHIVE_FORMATS[format] - except KeyError: - raise ValueError("unknown archive format '%s'" % format) - - func = format_info[0] - for arg, val in format_info[1]: - kwargs[arg] = val - - if format != 'zip': - kwargs['owner'] = owner - kwargs['group'] = group - - try: - filename = func(base_name, base_dir, **kwargs) - finally: - if root_dir is not None: - if logger is not None: - logger.debug("changing back to '%s'", save_cwd) - os.chdir(save_cwd) - - return filename - - -def get_unpack_formats(): - """Returns a list of supported formats for unpacking. - - Each element of the returned sequence is a tuple - (name, extensions, description) - """ - formats = [(name, info[0], info[3]) for name, info in - _UNPACK_FORMATS.items()] - formats.sort() - return formats - -def _check_unpack_options(extensions, function, extra_args): - """Checks what gets registered as an unpacker.""" - # first make sure no other unpacker is registered for this extension - existing_extensions = {} - for name, info in _UNPACK_FORMATS.items(): - for ext in info[0]: - existing_extensions[ext] = name - - for extension in extensions: - if extension in existing_extensions: - msg = '%s is already registered for "%s"' - raise RegistryError(msg % (extension, - existing_extensions[extension])) - - if not isinstance(function, Callable): - raise TypeError('The registered function must be a callable') - - -def register_unpack_format(name, extensions, function, extra_args=None, - description=''): - """Registers an unpack format. - - `name` is the name of the format. `extensions` is a list of extensions - corresponding to the format. - - `function` is the callable that will be - used to unpack archives. The callable will receive archives to unpack. - If it's unable to handle an archive, it needs to raise a ReadError - exception. - - If provided, `extra_args` is a sequence of - (name, value) tuples that will be passed as arguments to the callable. - description can be provided to describe the format, and will be returned - by the get_unpack_formats() function. - """ - if extra_args is None: - extra_args = [] - _check_unpack_options(extensions, function, extra_args) - _UNPACK_FORMATS[name] = extensions, function, extra_args, description - -def unregister_unpack_format(name): - """Removes the pack format from the registry.""" - del _UNPACK_FORMATS[name] - -def _ensure_directory(path): - """Ensure that the parent directory of `path` exists""" - dirname = os.path.dirname(path) - if not os.path.isdir(dirname): - os.makedirs(dirname) - -def _unpack_zipfile(filename, extract_dir): - """Unpack zip `filename` to `extract_dir` - """ - try: - import zipfile - except ImportError: - raise ReadError('zlib not supported, cannot unpack this archive.') - - if not zipfile.is_zipfile(filename): - raise ReadError("%s is not a zip file" % filename) - - zip = zipfile.ZipFile(filename) - try: - for info in zip.infolist(): - name = info.filename - - # don't extract absolute paths or ones with .. in them - if name.startswith('/') or '..' in name: - continue - - target = os.path.join(extract_dir, *name.split('/')) - if not target: - continue - - _ensure_directory(target) - if not name.endswith('/'): - # file - data = zip.read(info.filename) - f = open(target, 'wb') - try: - f.write(data) - finally: - f.close() - del data - finally: - zip.close() - -def _unpack_tarfile(filename, extract_dir): - """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir` - """ - try: - tarobj = tarfile.open(filename) - except tarfile.TarError: - raise ReadError( - "%s is not a compressed or uncompressed tar file" % filename) - try: - tarobj.extractall(extract_dir) - finally: - tarobj.close() - -_UNPACK_FORMATS = { - 'gztar': (['.tar.gz', '.tgz'], _unpack_tarfile, [], "gzip'ed tar-file"), - 'tar': (['.tar'], _unpack_tarfile, [], "uncompressed tar file"), - 'zip': (['.zip'], _unpack_zipfile, [], "ZIP file") - } - -if _BZ2_SUPPORTED: - _UNPACK_FORMATS['bztar'] = (['.bz2'], _unpack_tarfile, [], - "bzip2'ed tar-file") - -def _find_unpack_format(filename): - for name, info in _UNPACK_FORMATS.items(): - for extension in info[0]: - if filename.endswith(extension): - return name - return None - -def unpack_archive(filename, extract_dir=None, format=None): - """Unpack an archive. - - `filename` is the name of the archive. - - `extract_dir` is the name of the target directory, where the archive - is unpacked. If not provided, the current working directory is used. - - `format` is the archive format: one of "zip", "tar", or "gztar". Or any - other registered format. If not provided, unpack_archive will use the - filename extension and see if an unpacker was registered for that - extension. - - In case none is found, a ValueError is raised. - """ - if extract_dir is None: - extract_dir = os.getcwd() - - if format is not None: - try: - format_info = _UNPACK_FORMATS[format] - except KeyError: - raise ValueError("Unknown unpack format '{0}'".format(format)) - - func = format_info[1] - func(filename, extract_dir, **dict(format_info[2])) - else: - # we need to look at the registered unpackers supported extensions - format = _find_unpack_format(filename) - if format is None: - raise ReadError("Unknown archive format '{0}'".format(filename)) - - func = _UNPACK_FORMATS[format][1] - kwargs = dict(_UNPACK_FORMATS[format][2]) - func(filename, extract_dir, **kwargs) diff --git a/src/pip/_vendor/distlib/_backport/sysconfig.cfg b/src/pip/_vendor/distlib/_backport/sysconfig.cfg deleted file mode 100644 index 1746bd01c1a..00000000000 --- a/src/pip/_vendor/distlib/_backport/sysconfig.cfg +++ /dev/null @@ -1,84 +0,0 @@ -[posix_prefix] -# Configuration directories. Some of these come straight out of the -# configure script. They are for implementing the other variables, not to -# be used directly in [resource_locations]. -confdir = /etc -datadir = /usr/share -libdir = /usr/lib -statedir = /var -# User resource directory -local = ~/.local/{distribution.name} - -stdlib = {base}/lib/python{py_version_short} -platstdlib = {platbase}/lib/python{py_version_short} -purelib = {base}/lib/python{py_version_short}/site-packages -platlib = {platbase}/lib/python{py_version_short}/site-packages -include = {base}/include/python{py_version_short}{abiflags} -platinclude = {platbase}/include/python{py_version_short}{abiflags} -data = {base} - -[posix_home] -stdlib = {base}/lib/python -platstdlib = {base}/lib/python -purelib = {base}/lib/python -platlib = {base}/lib/python -include = {base}/include/python -platinclude = {base}/include/python -scripts = {base}/bin -data = {base} - -[nt] -stdlib = {base}/Lib -platstdlib = {base}/Lib -purelib = {base}/Lib/site-packages -platlib = {base}/Lib/site-packages -include = {base}/Include -platinclude = {base}/Include -scripts = {base}/Scripts -data = {base} - -[os2] -stdlib = {base}/Lib -platstdlib = {base}/Lib -purelib = {base}/Lib/site-packages -platlib = {base}/Lib/site-packages -include = {base}/Include -platinclude = {base}/Include -scripts = {base}/Scripts -data = {base} - -[os2_home] -stdlib = {userbase}/lib/python{py_version_short} -platstdlib = {userbase}/lib/python{py_version_short} -purelib = {userbase}/lib/python{py_version_short}/site-packages -platlib = {userbase}/lib/python{py_version_short}/site-packages -include = {userbase}/include/python{py_version_short} -scripts = {userbase}/bin -data = {userbase} - -[nt_user] -stdlib = {userbase}/Python{py_version_nodot} -platstdlib = {userbase}/Python{py_version_nodot} -purelib = {userbase}/Python{py_version_nodot}/site-packages -platlib = {userbase}/Python{py_version_nodot}/site-packages -include = {userbase}/Python{py_version_nodot}/Include -scripts = {userbase}/Scripts -data = {userbase} - -[posix_user] -stdlib = {userbase}/lib/python{py_version_short} -platstdlib = {userbase}/lib/python{py_version_short} -purelib = {userbase}/lib/python{py_version_short}/site-packages -platlib = {userbase}/lib/python{py_version_short}/site-packages -include = {userbase}/include/python{py_version_short} -scripts = {userbase}/bin -data = {userbase} - -[osx_framework_user] -stdlib = {userbase}/lib/python -platstdlib = {userbase}/lib/python -purelib = {userbase}/lib/python/site-packages -platlib = {userbase}/lib/python/site-packages -include = {userbase}/include -scripts = {userbase}/bin -data = {userbase} diff --git a/src/pip/_vendor/distlib/_backport/sysconfig.py b/src/pip/_vendor/distlib/_backport/sysconfig.py deleted file mode 100644 index b470a373c86..00000000000 --- a/src/pip/_vendor/distlib/_backport/sysconfig.py +++ /dev/null @@ -1,786 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (C) 2012 The Python Software Foundation. -# See LICENSE.txt and CONTRIBUTORS.txt. -# -"""Access to Python's configuration information.""" - -import codecs -import os -import re -import sys -from os.path import pardir, realpath -try: - import configparser -except ImportError: - import ConfigParser as configparser - - -__all__ = [ - 'get_config_h_filename', - 'get_config_var', - 'get_config_vars', - 'get_makefile_filename', - 'get_path', - 'get_path_names', - 'get_paths', - 'get_platform', - 'get_python_version', - 'get_scheme_names', - 'parse_config_h', -] - - -def _safe_realpath(path): - try: - return realpath(path) - except OSError: - return path - - -if sys.executable: - _PROJECT_BASE = os.path.dirname(_safe_realpath(sys.executable)) -else: - # sys.executable can be empty if argv[0] has been changed and Python is - # unable to retrieve the real program name - _PROJECT_BASE = _safe_realpath(os.getcwd()) - -if os.name == "nt" and "pcbuild" in _PROJECT_BASE[-8:].lower(): - _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir)) -# PC/VS7.1 -if os.name == "nt" and "\\pc\\v" in _PROJECT_BASE[-10:].lower(): - _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) -# PC/AMD64 -if os.name == "nt" and "\\pcbuild\\amd64" in _PROJECT_BASE[-14:].lower(): - _PROJECT_BASE = _safe_realpath(os.path.join(_PROJECT_BASE, pardir, pardir)) - - -def is_python_build(): - for fn in ("Setup.dist", "Setup.local"): - if os.path.isfile(os.path.join(_PROJECT_BASE, "Modules", fn)): - return True - return False - -_PYTHON_BUILD = is_python_build() - -_cfg_read = False - -def _ensure_cfg_read(): - global _cfg_read - if not _cfg_read: - from ..resources import finder - backport_package = __name__.rsplit('.', 1)[0] - _finder = finder(backport_package) - _cfgfile = _finder.find('sysconfig.cfg') - assert _cfgfile, 'sysconfig.cfg exists' - with _cfgfile.as_stream() as s: - _SCHEMES.readfp(s) - if _PYTHON_BUILD: - for scheme in ('posix_prefix', 'posix_home'): - _SCHEMES.set(scheme, 'include', '{srcdir}/Include') - _SCHEMES.set(scheme, 'platinclude', '{projectbase}/.') - - _cfg_read = True - - -_SCHEMES = configparser.RawConfigParser() -_VAR_REPL = re.compile(r'\{([^{]*?)\}') - -def _expand_globals(config): - _ensure_cfg_read() - if config.has_section('globals'): - globals = config.items('globals') - else: - globals = tuple() - - sections = config.sections() - for section in sections: - if section == 'globals': - continue - for option, value in globals: - if config.has_option(section, option): - continue - config.set(section, option, value) - config.remove_section('globals') - - # now expanding local variables defined in the cfg file - # - for section in config.sections(): - variables = dict(config.items(section)) - - def _replacer(matchobj): - name = matchobj.group(1) - if name in variables: - return variables[name] - return matchobj.group(0) - - for option, value in config.items(section): - config.set(section, option, _VAR_REPL.sub(_replacer, value)) - -#_expand_globals(_SCHEMES) - -_PY_VERSION = '%s.%s.%s' % sys.version_info[:3] -_PY_VERSION_SHORT = '%s.%s' % sys.version_info[:2] -_PY_VERSION_SHORT_NO_DOT = '%s%s' % sys.version_info[:2] -_PREFIX = os.path.normpath(sys.prefix) -_EXEC_PREFIX = os.path.normpath(sys.exec_prefix) -_CONFIG_VARS = None -_USER_BASE = None - - -def _subst_vars(path, local_vars): - """In the string `path`, replace tokens like {some.thing} with the - corresponding value from the map `local_vars`. - - If there is no corresponding value, leave the token unchanged. - """ - def _replacer(matchobj): - name = matchobj.group(1) - if name in local_vars: - return local_vars[name] - elif name in os.environ: - return os.environ[name] - return matchobj.group(0) - return _VAR_REPL.sub(_replacer, path) - - -def _extend_dict(target_dict, other_dict): - target_keys = target_dict.keys() - for key, value in other_dict.items(): - if key in target_keys: - continue - target_dict[key] = value - - -def _expand_vars(scheme, vars): - res = {} - if vars is None: - vars = {} - _extend_dict(vars, get_config_vars()) - - for key, value in _SCHEMES.items(scheme): - if os.name in ('posix', 'nt'): - value = os.path.expanduser(value) - res[key] = os.path.normpath(_subst_vars(value, vars)) - return res - - -def format_value(value, vars): - def _replacer(matchobj): - name = matchobj.group(1) - if name in vars: - return vars[name] - return matchobj.group(0) - return _VAR_REPL.sub(_replacer, value) - - -def _get_default_scheme(): - if os.name == 'posix': - # the default scheme for posix is posix_prefix - return 'posix_prefix' - return os.name - - -def _getuserbase(): - env_base = os.environ.get("PYTHONUSERBASE", None) - - def joinuser(*args): - return os.path.expanduser(os.path.join(*args)) - - # what about 'os2emx', 'riscos' ? - if os.name == "nt": - base = os.environ.get("APPDATA") or "~" - if env_base: - return env_base - else: - return joinuser(base, "Python") - - if sys.platform == "darwin": - framework = get_config_var("PYTHONFRAMEWORK") - if framework: - if env_base: - return env_base - else: - return joinuser("~", "Library", framework, "%d.%d" % - sys.version_info[:2]) - - if env_base: - return env_base - else: - return joinuser("~", ".local") - - -def _parse_makefile(filename, vars=None): - """Parse a Makefile-style file. - - A dictionary containing name/value pairs is returned. If an - optional dictionary is passed in as the second argument, it is - used instead of a new dictionary. - """ - # Regexes needed for parsing Makefile (and similar syntaxes, - # like old-style Setup files). - _variable_rx = re.compile(r"([a-zA-Z][a-zA-Z0-9_]+)\s*=\s*(.*)") - _findvar1_rx = re.compile(r"\$\(([A-Za-z][A-Za-z0-9_]*)\)") - _findvar2_rx = re.compile(r"\${([A-Za-z][A-Za-z0-9_]*)}") - - if vars is None: - vars = {} - done = {} - notdone = {} - - with codecs.open(filename, encoding='utf-8', errors="surrogateescape") as f: - lines = f.readlines() - - for line in lines: - if line.startswith('#') or line.strip() == '': - continue - m = _variable_rx.match(line) - if m: - n, v = m.group(1, 2) - v = v.strip() - # `$$' is a literal `$' in make - tmpv = v.replace('$$', '') - - if "$" in tmpv: - notdone[n] = v - else: - try: - v = int(v) - except ValueError: - # insert literal `$' - done[n] = v.replace('$$', '$') - else: - done[n] = v - - # do variable interpolation here - variables = list(notdone.keys()) - - # Variables with a 'PY_' prefix in the makefile. These need to - # be made available without that prefix through sysconfig. - # Special care is needed to ensure that variable expansion works, even - # if the expansion uses the name without a prefix. - renamed_variables = ('CFLAGS', 'LDFLAGS', 'CPPFLAGS') - - while len(variables) > 0: - for name in tuple(variables): - value = notdone[name] - m = _findvar1_rx.search(value) or _findvar2_rx.search(value) - if m is not None: - n = m.group(1) - found = True - if n in done: - item = str(done[n]) - elif n in notdone: - # get it on a subsequent round - found = False - elif n in os.environ: - # do it like make: fall back to environment - item = os.environ[n] - - elif n in renamed_variables: - if (name.startswith('PY_') and - name[3:] in renamed_variables): - item = "" - - elif 'PY_' + n in notdone: - found = False - - else: - item = str(done['PY_' + n]) - - else: - done[n] = item = "" - - if found: - after = value[m.end():] - value = value[:m.start()] + item + after - if "$" in after: - notdone[name] = value - else: - try: - value = int(value) - except ValueError: - done[name] = value.strip() - else: - done[name] = value - variables.remove(name) - - if (name.startswith('PY_') and - name[3:] in renamed_variables): - - name = name[3:] - if name not in done: - done[name] = value - - else: - # bogus variable reference (e.g. "prefix=$/opt/python"); - # just drop it since we can't deal - done[name] = value - variables.remove(name) - - # strip spurious spaces - for k, v in done.items(): - if isinstance(v, str): - done[k] = v.strip() - - # save the results in the global dictionary - vars.update(done) - return vars - - -def get_makefile_filename(): - """Return the path of the Makefile.""" - if _PYTHON_BUILD: - return os.path.join(_PROJECT_BASE, "Makefile") - if hasattr(sys, 'abiflags'): - config_dir_name = 'config-%s%s' % (_PY_VERSION_SHORT, sys.abiflags) - else: - config_dir_name = 'config' - return os.path.join(get_path('stdlib'), config_dir_name, 'Makefile') - - -def _init_posix(vars): - """Initialize the module as appropriate for POSIX systems.""" - # load the installed Makefile: - makefile = get_makefile_filename() - try: - _parse_makefile(makefile, vars) - except IOError as e: - msg = "invalid Python installation: unable to open %s" % makefile - if hasattr(e, "strerror"): - msg = msg + " (%s)" % e.strerror - raise IOError(msg) - # load the installed pyconfig.h: - config_h = get_config_h_filename() - try: - with open(config_h) as f: - parse_config_h(f, vars) - except IOError as e: - msg = "invalid Python installation: unable to open %s" % config_h - if hasattr(e, "strerror"): - msg = msg + " (%s)" % e.strerror - raise IOError(msg) - # On AIX, there are wrong paths to the linker scripts in the Makefile - # -- these paths are relative to the Python source, but when installed - # the scripts are in another directory. - if _PYTHON_BUILD: - vars['LDSHARED'] = vars['BLDSHARED'] - - -def _init_non_posix(vars): - """Initialize the module as appropriate for NT""" - # set basic install directories - vars['LIBDEST'] = get_path('stdlib') - vars['BINLIBDEST'] = get_path('platstdlib') - vars['INCLUDEPY'] = get_path('include') - vars['SO'] = '.pyd' - vars['EXE'] = '.exe' - vars['VERSION'] = _PY_VERSION_SHORT_NO_DOT - vars['BINDIR'] = os.path.dirname(_safe_realpath(sys.executable)) - -# -# public APIs -# - - -def parse_config_h(fp, vars=None): - """Parse a config.h-style file. - - A dictionary containing name/value pairs is returned. If an - optional dictionary is passed in as the second argument, it is - used instead of a new dictionary. - """ - if vars is None: - vars = {} - define_rx = re.compile("#define ([A-Z][A-Za-z0-9_]+) (.*)\n") - undef_rx = re.compile("/[*] #undef ([A-Z][A-Za-z0-9_]+) [*]/\n") - - while True: - line = fp.readline() - if not line: - break - m = define_rx.match(line) - if m: - n, v = m.group(1, 2) - try: - v = int(v) - except ValueError: - pass - vars[n] = v - else: - m = undef_rx.match(line) - if m: - vars[m.group(1)] = 0 - return vars - - -def get_config_h_filename(): - """Return the path of pyconfig.h.""" - if _PYTHON_BUILD: - if os.name == "nt": - inc_dir = os.path.join(_PROJECT_BASE, "PC") - else: - inc_dir = _PROJECT_BASE - else: - inc_dir = get_path('platinclude') - return os.path.join(inc_dir, 'pyconfig.h') - - -def get_scheme_names(): - """Return a tuple containing the schemes names.""" - return tuple(sorted(_SCHEMES.sections())) - - -def get_path_names(): - """Return a tuple containing the paths names.""" - # xxx see if we want a static list - return _SCHEMES.options('posix_prefix') - - -def get_paths(scheme=_get_default_scheme(), vars=None, expand=True): - """Return a mapping containing an install scheme. - - ``scheme`` is the install scheme name. If not provided, it will - return the default scheme for the current platform. - """ - _ensure_cfg_read() - if expand: - return _expand_vars(scheme, vars) - else: - return dict(_SCHEMES.items(scheme)) - - -def get_path(name, scheme=_get_default_scheme(), vars=None, expand=True): - """Return a path corresponding to the scheme. - - ``scheme`` is the install scheme name. - """ - return get_paths(scheme, vars, expand)[name] - - -def get_config_vars(*args): - """With no arguments, return a dictionary of all configuration - variables relevant for the current platform. - - On Unix, this means every variable defined in Python's installed Makefile; - On Windows and Mac OS it's a much smaller set. - - With arguments, return a list of values that result from looking up - each argument in the configuration variable dictionary. - """ - global _CONFIG_VARS - if _CONFIG_VARS is None: - _CONFIG_VARS = {} - # Normalized versions of prefix and exec_prefix are handy to have; - # in fact, these are the standard versions used most places in the - # distutils2 module. - _CONFIG_VARS['prefix'] = _PREFIX - _CONFIG_VARS['exec_prefix'] = _EXEC_PREFIX - _CONFIG_VARS['py_version'] = _PY_VERSION - _CONFIG_VARS['py_version_short'] = _PY_VERSION_SHORT - _CONFIG_VARS['py_version_nodot'] = _PY_VERSION[0] + _PY_VERSION[2] - _CONFIG_VARS['base'] = _PREFIX - _CONFIG_VARS['platbase'] = _EXEC_PREFIX - _CONFIG_VARS['projectbase'] = _PROJECT_BASE - try: - _CONFIG_VARS['abiflags'] = sys.abiflags - except AttributeError: - # sys.abiflags may not be defined on all platforms. - _CONFIG_VARS['abiflags'] = '' - - if os.name in ('nt', 'os2'): - _init_non_posix(_CONFIG_VARS) - if os.name == 'posix': - _init_posix(_CONFIG_VARS) - # Setting 'userbase' is done below the call to the - # init function to enable using 'get_config_var' in - # the init-function. - if sys.version >= '2.6': - _CONFIG_VARS['userbase'] = _getuserbase() - - if 'srcdir' not in _CONFIG_VARS: - _CONFIG_VARS['srcdir'] = _PROJECT_BASE - else: - _CONFIG_VARS['srcdir'] = _safe_realpath(_CONFIG_VARS['srcdir']) - - # Convert srcdir into an absolute path if it appears necessary. - # Normally it is relative to the build directory. However, during - # testing, for example, we might be running a non-installed python - # from a different directory. - if _PYTHON_BUILD and os.name == "posix": - base = _PROJECT_BASE - try: - cwd = os.getcwd() - except OSError: - cwd = None - if (not os.path.isabs(_CONFIG_VARS['srcdir']) and - base != cwd): - # srcdir is relative and we are not in the same directory - # as the executable. Assume executable is in the build - # directory and make srcdir absolute. - srcdir = os.path.join(base, _CONFIG_VARS['srcdir']) - _CONFIG_VARS['srcdir'] = os.path.normpath(srcdir) - - if sys.platform == 'darwin': - kernel_version = os.uname()[2] # Kernel version (8.4.3) - major_version = int(kernel_version.split('.')[0]) - - if major_version < 8: - # On Mac OS X before 10.4, check if -arch and -isysroot - # are in CFLAGS or LDFLAGS and remove them if they are. - # This is needed when building extensions on a 10.3 system - # using a universal build of python. - for key in ('LDFLAGS', 'BASECFLAGS', - # a number of derived variables. These need to be - # patched up as well. - 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): - flags = _CONFIG_VARS[key] - flags = re.sub(r'-arch\s+\w+\s', ' ', flags) - flags = re.sub('-isysroot [^ \t]*', ' ', flags) - _CONFIG_VARS[key] = flags - else: - # Allow the user to override the architecture flags using - # an environment variable. - # NOTE: This name was introduced by Apple in OSX 10.5 and - # is used by several scripting languages distributed with - # that OS release. - if 'ARCHFLAGS' in os.environ: - arch = os.environ['ARCHFLAGS'] - for key in ('LDFLAGS', 'BASECFLAGS', - # a number of derived variables. These need to be - # patched up as well. - 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): - - flags = _CONFIG_VARS[key] - flags = re.sub(r'-arch\s+\w+\s', ' ', flags) - flags = flags + ' ' + arch - _CONFIG_VARS[key] = flags - - # If we're on OSX 10.5 or later and the user tries to - # compiles an extension using an SDK that is not present - # on the current machine it is better to not use an SDK - # than to fail. - # - # The major usecase for this is users using a Python.org - # binary installer on OSX 10.6: that installer uses - # the 10.4u SDK, but that SDK is not installed by default - # when you install Xcode. - # - CFLAGS = _CONFIG_VARS.get('CFLAGS', '') - m = re.search(r'-isysroot\s+(\S+)', CFLAGS) - if m is not None: - sdk = m.group(1) - if not os.path.exists(sdk): - for key in ('LDFLAGS', 'BASECFLAGS', - # a number of derived variables. These need to be - # patched up as well. - 'CFLAGS', 'PY_CFLAGS', 'BLDSHARED'): - - flags = _CONFIG_VARS[key] - flags = re.sub(r'-isysroot\s+\S+(\s|$)', ' ', flags) - _CONFIG_VARS[key] = flags - - if args: - vals = [] - for name in args: - vals.append(_CONFIG_VARS.get(name)) - return vals - else: - return _CONFIG_VARS - - -def get_config_var(name): - """Return the value of a single variable using the dictionary returned by - 'get_config_vars()'. - - Equivalent to get_config_vars().get(name) - """ - return get_config_vars().get(name) - - -def get_platform(): - """Return a string that identifies the current platform. - - This is used mainly to distinguish platform-specific build directories and - platform-specific built distributions. Typically includes the OS name - and version and the architecture (as supplied by 'os.uname()'), - although the exact information included depends on the OS; eg. for IRIX - the architecture isn't particularly important (IRIX only runs on SGI - hardware), but for Linux the kernel version isn't particularly - important. - - Examples of returned values: - linux-i586 - linux-alpha (?) - solaris-2.6-sun4u - irix-5.3 - irix64-6.2 - - Windows will return one of: - win-amd64 (64bit Windows on AMD64 (aka x86_64, Intel64, EM64T, etc) - win-ia64 (64bit Windows on Itanium) - win32 (all others - specifically, sys.platform is returned) - - For other non-POSIX platforms, currently just returns 'sys.platform'. - """ - if os.name == 'nt': - # sniff sys.version for architecture. - prefix = " bit (" - i = sys.version.find(prefix) - if i == -1: - return sys.platform - j = sys.version.find(")", i) - look = sys.version[i+len(prefix):j].lower() - if look == 'amd64': - return 'win-amd64' - if look == 'itanium': - return 'win-ia64' - return sys.platform - - if os.name != "posix" or not hasattr(os, 'uname'): - # XXX what about the architecture? NT is Intel or Alpha, - # Mac OS is M68k or PPC, etc. - return sys.platform - - # Try to distinguish various flavours of Unix - osname, host, release, version, machine = os.uname() - - # Convert the OS name to lowercase, remove '/' characters - # (to accommodate BSD/OS), and translate spaces (for "Power Macintosh") - osname = osname.lower().replace('/', '') - machine = machine.replace(' ', '_') - machine = machine.replace('/', '-') - - if osname[:5] == "linux": - # At least on Linux/Intel, 'machine' is the processor -- - # i386, etc. - # XXX what about Alpha, SPARC, etc? - return "%s-%s" % (osname, machine) - elif osname[:5] == "sunos": - if release[0] >= "5": # SunOS 5 == Solaris 2 - osname = "solaris" - release = "%d.%s" % (int(release[0]) - 3, release[2:]) - # fall through to standard osname-release-machine representation - elif osname[:4] == "irix": # could be "irix64"! - return "%s-%s" % (osname, release) - elif osname[:3] == "aix": - return "%s-%s.%s" % (osname, version, release) - elif osname[:6] == "cygwin": - osname = "cygwin" - rel_re = re.compile(r'[\d.]+') - m = rel_re.match(release) - if m: - release = m.group() - elif osname[:6] == "darwin": - # - # For our purposes, we'll assume that the system version from - # distutils' perspective is what MACOSX_DEPLOYMENT_TARGET is set - # to. This makes the compatibility story a bit more sane because the - # machine is going to compile and link as if it were - # MACOSX_DEPLOYMENT_TARGET. - cfgvars = get_config_vars() - macver = cfgvars.get('MACOSX_DEPLOYMENT_TARGET') - - if True: - # Always calculate the release of the running machine, - # needed to determine if we can build fat binaries or not. - - macrelease = macver - # Get the system version. Reading this plist is a documented - # way to get the system version (see the documentation for - # the Gestalt Manager) - try: - f = open('/System/Library/CoreServices/SystemVersion.plist') - except IOError: - # We're on a plain darwin box, fall back to the default - # behaviour. - pass - else: - try: - m = re.search(r'ProductUserVisibleVersion\s*' - r'(.*?)', f.read()) - finally: - f.close() - if m is not None: - macrelease = '.'.join(m.group(1).split('.')[:2]) - # else: fall back to the default behaviour - - if not macver: - macver = macrelease - - if macver: - release = macver - osname = "macosx" - - if ((macrelease + '.') >= '10.4.' and - '-arch' in get_config_vars().get('CFLAGS', '').strip()): - # The universal build will build fat binaries, but not on - # systems before 10.4 - # - # Try to detect 4-way universal builds, those have machine-type - # 'universal' instead of 'fat'. - - machine = 'fat' - cflags = get_config_vars().get('CFLAGS') - - archs = re.findall(r'-arch\s+(\S+)', cflags) - archs = tuple(sorted(set(archs))) - - if len(archs) == 1: - machine = archs[0] - elif archs == ('i386', 'ppc'): - machine = 'fat' - elif archs == ('i386', 'x86_64'): - machine = 'intel' - elif archs == ('i386', 'ppc', 'x86_64'): - machine = 'fat3' - elif archs == ('ppc64', 'x86_64'): - machine = 'fat64' - elif archs == ('i386', 'ppc', 'ppc64', 'x86_64'): - machine = 'universal' - else: - raise ValueError( - "Don't know machine value for archs=%r" % (archs,)) - - elif machine == 'i386': - # On OSX the machine type returned by uname is always the - # 32-bit variant, even if the executable architecture is - # the 64-bit variant - if sys.maxsize >= 2**32: - machine = 'x86_64' - - elif machine in ('PowerPC', 'Power_Macintosh'): - # Pick a sane name for the PPC architecture. - # See 'i386' case - if sys.maxsize >= 2**32: - machine = 'ppc64' - else: - machine = 'ppc' - - return "%s-%s-%s" % (osname, release, machine) - - -def get_python_version(): - return _PY_VERSION_SHORT - - -def _print_dict(title, data): - for index, (key, value) in enumerate(sorted(data.items())): - if index == 0: - print('%s: ' % (title)) - print('\t%s = "%s"' % (key, value)) - - -def _main(): - """Display all information sysconfig detains.""" - print('Platform: "%s"' % get_platform()) - print('Python version: "%s"' % get_python_version()) - print('Current installation scheme: "%s"' % _get_default_scheme()) - print() - _print_dict('Paths', get_paths()) - print() - _print_dict('Variables', get_config_vars()) - - -if __name__ == '__main__': - _main() diff --git a/src/pip/_vendor/distlib/_backport/tarfile.py b/src/pip/_vendor/distlib/_backport/tarfile.py deleted file mode 100644 index d66d8566374..00000000000 --- a/src/pip/_vendor/distlib/_backport/tarfile.py +++ /dev/null @@ -1,2607 +0,0 @@ -#------------------------------------------------------------------- -# tarfile.py -#------------------------------------------------------------------- -# Copyright (C) 2002 Lars Gustaebel -# All rights reserved. -# -# Permission is hereby granted, free of charge, to any person -# obtaining a copy of this software and associated documentation -# files (the "Software"), to deal in the Software without -# restriction, including without limitation the rights to use, -# copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following -# conditions: -# -# The above copyright notice and this permission notice shall be -# included in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -from __future__ import print_function - -"""Read from and write to tar format archives. -""" - -__version__ = "$Revision$" - -version = "0.9.0" -__author__ = "Lars Gust\u00e4bel (lars@gustaebel.de)" -__date__ = "$Date: 2011-02-25 17:42:01 +0200 (Fri, 25 Feb 2011) $" -__cvsid__ = "$Id: tarfile.py 88586 2011-02-25 15:42:01Z marc-andre.lemburg $" -__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend." - -#--------- -# Imports -#--------- -import sys -import os -import stat -import errno -import time -import struct -import copy -import re - -try: - import grp, pwd -except ImportError: - grp = pwd = None - -# os.symlink on Windows prior to 6.0 raises NotImplementedError -symlink_exception = (AttributeError, NotImplementedError) -try: - # WindowsError (1314) will be raised if the caller does not hold the - # SeCreateSymbolicLinkPrivilege privilege - symlink_exception += (WindowsError,) -except NameError: - pass - -# from tarfile import * -__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"] - -if sys.version_info[0] < 3: - import __builtin__ as builtins -else: - import builtins - -_open = builtins.open # Since 'open' is TarFile.open - -#--------------------------------------------------------- -# tar constants -#--------------------------------------------------------- -NUL = b"\0" # the null character -BLOCKSIZE = 512 # length of processing blocks -RECORDSIZE = BLOCKSIZE * 20 # length of records -GNU_MAGIC = b"ustar \0" # magic gnu tar string -POSIX_MAGIC = b"ustar\x0000" # magic posix tar string - -LENGTH_NAME = 100 # maximum length of a filename -LENGTH_LINK = 100 # maximum length of a linkname -LENGTH_PREFIX = 155 # maximum length of the prefix field - -REGTYPE = b"0" # regular file -AREGTYPE = b"\0" # regular file -LNKTYPE = b"1" # link (inside tarfile) -SYMTYPE = b"2" # symbolic link -CHRTYPE = b"3" # character special device -BLKTYPE = b"4" # block special device -DIRTYPE = b"5" # directory -FIFOTYPE = b"6" # fifo special device -CONTTYPE = b"7" # contiguous file - -GNUTYPE_LONGNAME = b"L" # GNU tar longname -GNUTYPE_LONGLINK = b"K" # GNU tar longlink -GNUTYPE_SPARSE = b"S" # GNU tar sparse file - -XHDTYPE = b"x" # POSIX.1-2001 extended header -XGLTYPE = b"g" # POSIX.1-2001 global header -SOLARIS_XHDTYPE = b"X" # Solaris extended header - -USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format -GNU_FORMAT = 1 # GNU tar format -PAX_FORMAT = 2 # POSIX.1-2001 (pax) format -DEFAULT_FORMAT = GNU_FORMAT - -#--------------------------------------------------------- -# tarfile constants -#--------------------------------------------------------- -# File types that tarfile supports: -SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE, - SYMTYPE, DIRTYPE, FIFOTYPE, - CONTTYPE, CHRTYPE, BLKTYPE, - GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, - GNUTYPE_SPARSE) - -# File types that will be treated as a regular file. -REGULAR_TYPES = (REGTYPE, AREGTYPE, - CONTTYPE, GNUTYPE_SPARSE) - -# File types that are part of the GNU tar format. -GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK, - GNUTYPE_SPARSE) - -# Fields from a pax header that override a TarInfo attribute. -PAX_FIELDS = ("path", "linkpath", "size", "mtime", - "uid", "gid", "uname", "gname") - -# Fields from a pax header that are affected by hdrcharset. -PAX_NAME_FIELDS = set(("path", "linkpath", "uname", "gname")) - -# Fields in a pax header that are numbers, all other fields -# are treated as strings. -PAX_NUMBER_FIELDS = { - "atime": float, - "ctime": float, - "mtime": float, - "uid": int, - "gid": int, - "size": int -} - -#--------------------------------------------------------- -# Bits used in the mode field, values in octal. -#--------------------------------------------------------- -S_IFLNK = 0o120000 # symbolic link -S_IFREG = 0o100000 # regular file -S_IFBLK = 0o060000 # block device -S_IFDIR = 0o040000 # directory -S_IFCHR = 0o020000 # character device -S_IFIFO = 0o010000 # fifo - -TSUID = 0o4000 # set UID on execution -TSGID = 0o2000 # set GID on execution -TSVTX = 0o1000 # reserved - -TUREAD = 0o400 # read by owner -TUWRITE = 0o200 # write by owner -TUEXEC = 0o100 # execute/search by owner -TGREAD = 0o040 # read by group -TGWRITE = 0o020 # write by group -TGEXEC = 0o010 # execute/search by group -TOREAD = 0o004 # read by other -TOWRITE = 0o002 # write by other -TOEXEC = 0o001 # execute/search by other - -#--------------------------------------------------------- -# initialization -#--------------------------------------------------------- -if os.name in ("nt", "ce"): - ENCODING = "utf-8" -else: - ENCODING = sys.getfilesystemencoding() - -#--------------------------------------------------------- -# Some useful functions -#--------------------------------------------------------- - -def stn(s, length, encoding, errors): - """Convert a string to a null-terminated bytes object. - """ - s = s.encode(encoding, errors) - return s[:length] + (length - len(s)) * NUL - -def nts(s, encoding, errors): - """Convert a null-terminated bytes object to a string. - """ - p = s.find(b"\0") - if p != -1: - s = s[:p] - return s.decode(encoding, errors) - -def nti(s): - """Convert a number field to a python number. - """ - # There are two possible encodings for a number field, see - # itn() below. - if s[0] != chr(0o200): - try: - n = int(nts(s, "ascii", "strict") or "0", 8) - except ValueError: - raise InvalidHeaderError("invalid header") - else: - n = 0 - for i in range(len(s) - 1): - n <<= 8 - n += ord(s[i + 1]) - return n - -def itn(n, digits=8, format=DEFAULT_FORMAT): - """Convert a python number to a number field. - """ - # POSIX 1003.1-1988 requires numbers to be encoded as a string of - # octal digits followed by a null-byte, this allows values up to - # (8**(digits-1))-1. GNU tar allows storing numbers greater than - # that if necessary. A leading 0o200 byte indicates this particular - # encoding, the following digits-1 bytes are a big-endian - # representation. This allows values up to (256**(digits-1))-1. - if 0 <= n < 8 ** (digits - 1): - s = ("%0*o" % (digits - 1, n)).encode("ascii") + NUL - else: - if format != GNU_FORMAT or n >= 256 ** (digits - 1): - raise ValueError("overflow in number field") - - if n < 0: - # XXX We mimic GNU tar's behaviour with negative numbers, - # this could raise OverflowError. - n = struct.unpack("L", struct.pack("l", n))[0] - - s = bytearray() - for i in range(digits - 1): - s.insert(0, n & 0o377) - n >>= 8 - s.insert(0, 0o200) - return s - -def calc_chksums(buf): - """Calculate the checksum for a member's header by summing up all - characters except for the chksum field which is treated as if - it was filled with spaces. According to the GNU tar sources, - some tars (Sun and NeXT) calculate chksum with signed char, - which will be different if there are chars in the buffer with - the high bit set. So we calculate two checksums, unsigned and - signed. - """ - unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512])) - signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512])) - return unsigned_chksum, signed_chksum - -def copyfileobj(src, dst, length=None): - """Copy length bytes from fileobj src to fileobj dst. - If length is None, copy the entire content. - """ - if length == 0: - return - if length is None: - while True: - buf = src.read(16*1024) - if not buf: - break - dst.write(buf) - return - - BUFSIZE = 16 * 1024 - blocks, remainder = divmod(length, BUFSIZE) - for b in range(blocks): - buf = src.read(BUFSIZE) - if len(buf) < BUFSIZE: - raise IOError("end of file reached") - dst.write(buf) - - if remainder != 0: - buf = src.read(remainder) - if len(buf) < remainder: - raise IOError("end of file reached") - dst.write(buf) - return - -filemode_table = ( - ((S_IFLNK, "l"), - (S_IFREG, "-"), - (S_IFBLK, "b"), - (S_IFDIR, "d"), - (S_IFCHR, "c"), - (S_IFIFO, "p")), - - ((TUREAD, "r"),), - ((TUWRITE, "w"),), - ((TUEXEC|TSUID, "s"), - (TSUID, "S"), - (TUEXEC, "x")), - - ((TGREAD, "r"),), - ((TGWRITE, "w"),), - ((TGEXEC|TSGID, "s"), - (TSGID, "S"), - (TGEXEC, "x")), - - ((TOREAD, "r"),), - ((TOWRITE, "w"),), - ((TOEXEC|TSVTX, "t"), - (TSVTX, "T"), - (TOEXEC, "x")) -) - -def filemode(mode): - """Convert a file's mode to a string of the form - -rwxrwxrwx. - Used by TarFile.list() - """ - perm = [] - for table in filemode_table: - for bit, char in table: - if mode & bit == bit: - perm.append(char) - break - else: - perm.append("-") - return "".join(perm) - -class TarError(Exception): - """Base exception.""" - pass -class ExtractError(TarError): - """General exception for extract errors.""" - pass -class ReadError(TarError): - """Exception for unreadable tar archives.""" - pass -class CompressionError(TarError): - """Exception for unavailable compression methods.""" - pass -class StreamError(TarError): - """Exception for unsupported operations on stream-like TarFiles.""" - pass -class HeaderError(TarError): - """Base exception for header errors.""" - pass -class EmptyHeaderError(HeaderError): - """Exception for empty headers.""" - pass -class TruncatedHeaderError(HeaderError): - """Exception for truncated headers.""" - pass -class EOFHeaderError(HeaderError): - """Exception for end of file headers.""" - pass -class InvalidHeaderError(HeaderError): - """Exception for invalid headers.""" - pass -class SubsequentHeaderError(HeaderError): - """Exception for missing and invalid extended headers.""" - pass - -#--------------------------- -# internal stream interface -#--------------------------- -class _LowLevelFile(object): - """Low-level file object. Supports reading and writing. - It is used instead of a regular file object for streaming - access. - """ - - def __init__(self, name, mode): - mode = { - "r": os.O_RDONLY, - "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC, - }[mode] - if hasattr(os, "O_BINARY"): - mode |= os.O_BINARY - self.fd = os.open(name, mode, 0o666) - - def close(self): - os.close(self.fd) - - def read(self, size): - return os.read(self.fd, size) - - def write(self, s): - os.write(self.fd, s) - -class _Stream(object): - """Class that serves as an adapter between TarFile and - a stream-like object. The stream-like object only - needs to have a read() or write() method and is accessed - blockwise. Use of gzip or bzip2 compression is possible. - A stream-like object could be for example: sys.stdin, - sys.stdout, a socket, a tape device etc. - - _Stream is intended to be used only internally. - """ - - def __init__(self, name, mode, comptype, fileobj, bufsize): - """Construct a _Stream object. - """ - self._extfileobj = True - if fileobj is None: - fileobj = _LowLevelFile(name, mode) - self._extfileobj = False - - if comptype == '*': - # Enable transparent compression detection for the - # stream interface - fileobj = _StreamProxy(fileobj) - comptype = fileobj.getcomptype() - - self.name = name or "" - self.mode = mode - self.comptype = comptype - self.fileobj = fileobj - self.bufsize = bufsize - self.buf = b"" - self.pos = 0 - self.closed = False - - try: - if comptype == "gz": - try: - import zlib - except ImportError: - raise CompressionError("zlib module is not available") - self.zlib = zlib - self.crc = zlib.crc32(b"") - if mode == "r": - self._init_read_gz() - else: - self._init_write_gz() - - if comptype == "bz2": - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - if mode == "r": - self.dbuf = b"" - self.cmp = bz2.BZ2Decompressor() - else: - self.cmp = bz2.BZ2Compressor() - except: - if not self._extfileobj: - self.fileobj.close() - self.closed = True - raise - - def __del__(self): - if hasattr(self, "closed") and not self.closed: - self.close() - - def _init_write_gz(self): - """Initialize for writing with gzip compression. - """ - self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED, - -self.zlib.MAX_WBITS, - self.zlib.DEF_MEM_LEVEL, - 0) - timestamp = struct.pack(" self.bufsize: - self.fileobj.write(self.buf[:self.bufsize]) - self.buf = self.buf[self.bufsize:] - - def close(self): - """Close the _Stream object. No operation should be - done on it afterwards. - """ - if self.closed: - return - - if self.mode == "w" and self.comptype != "tar": - self.buf += self.cmp.flush() - - if self.mode == "w" and self.buf: - self.fileobj.write(self.buf) - self.buf = b"" - if self.comptype == "gz": - # The native zlib crc is an unsigned 32-bit integer, but - # the Python wrapper implicitly casts that to a signed C - # long. So, on a 32-bit box self.crc may "look negative", - # while the same crc on a 64-bit box may "look positive". - # To avoid irksome warnings from the `struct` module, force - # it to look positive on all boxes. - self.fileobj.write(struct.pack("= 0: - blocks, remainder = divmod(pos - self.pos, self.bufsize) - for i in range(blocks): - self.read(self.bufsize) - self.read(remainder) - else: - raise StreamError("seeking backwards is not allowed") - return self.pos - - def read(self, size=None): - """Return the next size number of bytes from the stream. - If size is not defined, return all bytes of the stream - up to EOF. - """ - if size is None: - t = [] - while True: - buf = self._read(self.bufsize) - if not buf: - break - t.append(buf) - buf = "".join(t) - else: - buf = self._read(size) - self.pos += len(buf) - return buf - - def _read(self, size): - """Return size bytes from the stream. - """ - if self.comptype == "tar": - return self.__read(size) - - c = len(self.dbuf) - while c < size: - buf = self.__read(self.bufsize) - if not buf: - break - try: - buf = self.cmp.decompress(buf) - except IOError: - raise ReadError("invalid compressed data") - self.dbuf += buf - c += len(buf) - buf = self.dbuf[:size] - self.dbuf = self.dbuf[size:] - return buf - - def __read(self, size): - """Return size bytes from stream. If internal buffer is empty, - read another block from the stream. - """ - c = len(self.buf) - while c < size: - buf = self.fileobj.read(self.bufsize) - if not buf: - break - self.buf += buf - c += len(buf) - buf = self.buf[:size] - self.buf = self.buf[size:] - return buf -# class _Stream - -class _StreamProxy(object): - """Small proxy class that enables transparent compression - detection for the Stream interface (mode 'r|*'). - """ - - def __init__(self, fileobj): - self.fileobj = fileobj - self.buf = self.fileobj.read(BLOCKSIZE) - - def read(self, size): - self.read = self.fileobj.read - return self.buf - - def getcomptype(self): - if self.buf.startswith(b"\037\213\010"): - return "gz" - if self.buf.startswith(b"BZh91"): - return "bz2" - return "tar" - - def close(self): - self.fileobj.close() -# class StreamProxy - -class _BZ2Proxy(object): - """Small proxy class that enables external file object - support for "r:bz2" and "w:bz2" modes. This is actually - a workaround for a limitation in bz2 module's BZ2File - class which (unlike gzip.GzipFile) has no support for - a file object argument. - """ - - blocksize = 16 * 1024 - - def __init__(self, fileobj, mode): - self.fileobj = fileobj - self.mode = mode - self.name = getattr(self.fileobj, "name", None) - self.init() - - def init(self): - import bz2 - self.pos = 0 - if self.mode == "r": - self.bz2obj = bz2.BZ2Decompressor() - self.fileobj.seek(0) - self.buf = b"" - else: - self.bz2obj = bz2.BZ2Compressor() - - def read(self, size): - x = len(self.buf) - while x < size: - raw = self.fileobj.read(self.blocksize) - if not raw: - break - data = self.bz2obj.decompress(raw) - self.buf += data - x += len(data) - - buf = self.buf[:size] - self.buf = self.buf[size:] - self.pos += len(buf) - return buf - - def seek(self, pos): - if pos < self.pos: - self.init() - self.read(pos - self.pos) - - def tell(self): - return self.pos - - def write(self, data): - self.pos += len(data) - raw = self.bz2obj.compress(data) - self.fileobj.write(raw) - - def close(self): - if self.mode == "w": - raw = self.bz2obj.flush() - self.fileobj.write(raw) -# class _BZ2Proxy - -#------------------------ -# Extraction file object -#------------------------ -class _FileInFile(object): - """A thin wrapper around an existing file object that - provides a part of its data as an individual file - object. - """ - - def __init__(self, fileobj, offset, size, blockinfo=None): - self.fileobj = fileobj - self.offset = offset - self.size = size - self.position = 0 - - if blockinfo is None: - blockinfo = [(0, size)] - - # Construct a map with data and zero blocks. - self.map_index = 0 - self.map = [] - lastpos = 0 - realpos = self.offset - for offset, size in blockinfo: - if offset > lastpos: - self.map.append((False, lastpos, offset, None)) - self.map.append((True, offset, offset + size, realpos)) - realpos += size - lastpos = offset + size - if lastpos < self.size: - self.map.append((False, lastpos, self.size, None)) - - def seekable(self): - if not hasattr(self.fileobj, "seekable"): - # XXX gzip.GzipFile and bz2.BZ2File - return True - return self.fileobj.seekable() - - def tell(self): - """Return the current file position. - """ - return self.position - - def seek(self, position): - """Seek to a position in the file. - """ - self.position = position - - def read(self, size=None): - """Read data from the file. - """ - if size is None: - size = self.size - self.position - else: - size = min(size, self.size - self.position) - - buf = b"" - while size > 0: - while True: - data, start, stop, offset = self.map[self.map_index] - if start <= self.position < stop: - break - else: - self.map_index += 1 - if self.map_index == len(self.map): - self.map_index = 0 - length = min(size, stop - self.position) - if data: - self.fileobj.seek(offset + (self.position - start)) - buf += self.fileobj.read(length) - else: - buf += NUL * length - size -= length - self.position += length - return buf -#class _FileInFile - - -class ExFileObject(object): - """File-like object for reading an archive member. - Is returned by TarFile.extractfile(). - """ - blocksize = 1024 - - def __init__(self, tarfile, tarinfo): - self.fileobj = _FileInFile(tarfile.fileobj, - tarinfo.offset_data, - tarinfo.size, - tarinfo.sparse) - self.name = tarinfo.name - self.mode = "r" - self.closed = False - self.size = tarinfo.size - - self.position = 0 - self.buffer = b"" - - def readable(self): - return True - - def writable(self): - return False - - def seekable(self): - return self.fileobj.seekable() - - def read(self, size=None): - """Read at most size bytes from the file. If size is not - present or None, read all data until EOF is reached. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - buf = b"" - if self.buffer: - if size is None: - buf = self.buffer - self.buffer = b"" - else: - buf = self.buffer[:size] - self.buffer = self.buffer[size:] - - if size is None: - buf += self.fileobj.read() - else: - buf += self.fileobj.read(size - len(buf)) - - self.position += len(buf) - return buf - - # XXX TextIOWrapper uses the read1() method. - read1 = read - - def readline(self, size=-1): - """Read one entire line from the file. If size is present - and non-negative, return a string with at most that - size, which may be an incomplete line. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - pos = self.buffer.find(b"\n") + 1 - if pos == 0: - # no newline found. - while True: - buf = self.fileobj.read(self.blocksize) - self.buffer += buf - if not buf or b"\n" in buf: - pos = self.buffer.find(b"\n") + 1 - if pos == 0: - # no newline found. - pos = len(self.buffer) - break - - if size != -1: - pos = min(size, pos) - - buf = self.buffer[:pos] - self.buffer = self.buffer[pos:] - self.position += len(buf) - return buf - - def readlines(self): - """Return a list with all remaining lines. - """ - result = [] - while True: - line = self.readline() - if not line: break - result.append(line) - return result - - def tell(self): - """Return the current file position. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - return self.position - - def seek(self, pos, whence=os.SEEK_SET): - """Seek to a position in the file. - """ - if self.closed: - raise ValueError("I/O operation on closed file") - - if whence == os.SEEK_SET: - self.position = min(max(pos, 0), self.size) - elif whence == os.SEEK_CUR: - if pos < 0: - self.position = max(self.position + pos, 0) - else: - self.position = min(self.position + pos, self.size) - elif whence == os.SEEK_END: - self.position = max(min(self.size + pos, self.size), 0) - else: - raise ValueError("Invalid argument") - - self.buffer = b"" - self.fileobj.seek(self.position) - - def close(self): - """Close the file object. - """ - self.closed = True - - def __iter__(self): - """Get an iterator over the file's lines. - """ - while True: - line = self.readline() - if not line: - break - yield line -#class ExFileObject - -#------------------ -# Exported Classes -#------------------ -class TarInfo(object): - """Informational class which holds the details about an - archive member given by a tar header block. - TarInfo objects are returned by TarFile.getmember(), - TarFile.getmembers() and TarFile.gettarinfo() and are - usually created internally. - """ - - __slots__ = ("name", "mode", "uid", "gid", "size", "mtime", - "chksum", "type", "linkname", "uname", "gname", - "devmajor", "devminor", - "offset", "offset_data", "pax_headers", "sparse", - "tarfile", "_sparse_structs", "_link_target") - - def __init__(self, name=""): - """Construct a TarInfo object. name is the optional name - of the member. - """ - self.name = name # member name - self.mode = 0o644 # file permissions - self.uid = 0 # user id - self.gid = 0 # group id - self.size = 0 # file size - self.mtime = 0 # modification time - self.chksum = 0 # header checksum - self.type = REGTYPE # member type - self.linkname = "" # link name - self.uname = "" # user name - self.gname = "" # group name - self.devmajor = 0 # device major number - self.devminor = 0 # device minor number - - self.offset = 0 # the tar header starts here - self.offset_data = 0 # the file's data starts here - - self.sparse = None # sparse member information - self.pax_headers = {} # pax header information - - # In pax headers the "name" and "linkname" field are called - # "path" and "linkpath". - def _getpath(self): - return self.name - def _setpath(self, name): - self.name = name - path = property(_getpath, _setpath) - - def _getlinkpath(self): - return self.linkname - def _setlinkpath(self, linkname): - self.linkname = linkname - linkpath = property(_getlinkpath, _setlinkpath) - - def __repr__(self): - return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self)) - - def get_info(self): - """Return the TarInfo's attributes as a dictionary. - """ - info = { - "name": self.name, - "mode": self.mode & 0o7777, - "uid": self.uid, - "gid": self.gid, - "size": self.size, - "mtime": self.mtime, - "chksum": self.chksum, - "type": self.type, - "linkname": self.linkname, - "uname": self.uname, - "gname": self.gname, - "devmajor": self.devmajor, - "devminor": self.devminor - } - - if info["type"] == DIRTYPE and not info["name"].endswith("/"): - info["name"] += "/" - - return info - - def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"): - """Return a tar header as a string of 512 byte blocks. - """ - info = self.get_info() - - if format == USTAR_FORMAT: - return self.create_ustar_header(info, encoding, errors) - elif format == GNU_FORMAT: - return self.create_gnu_header(info, encoding, errors) - elif format == PAX_FORMAT: - return self.create_pax_header(info, encoding) - else: - raise ValueError("invalid format") - - def create_ustar_header(self, info, encoding, errors): - """Return the object as a ustar header block. - """ - info["magic"] = POSIX_MAGIC - - if len(info["linkname"]) > LENGTH_LINK: - raise ValueError("linkname is too long") - - if len(info["name"]) > LENGTH_NAME: - info["prefix"], info["name"] = self._posix_split_name(info["name"]) - - return self._create_header(info, USTAR_FORMAT, encoding, errors) - - def create_gnu_header(self, info, encoding, errors): - """Return the object as a GNU header block sequence. - """ - info["magic"] = GNU_MAGIC - - buf = b"" - if len(info["linkname"]) > LENGTH_LINK: - buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors) - - if len(info["name"]) > LENGTH_NAME: - buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors) - - return buf + self._create_header(info, GNU_FORMAT, encoding, errors) - - def create_pax_header(self, info, encoding): - """Return the object as a ustar header block. If it cannot be - represented this way, prepend a pax extended header sequence - with supplement information. - """ - info["magic"] = POSIX_MAGIC - pax_headers = self.pax_headers.copy() - - # Test string fields for values that exceed the field length or cannot - # be represented in ASCII encoding. - for name, hname, length in ( - ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK), - ("uname", "uname", 32), ("gname", "gname", 32)): - - if hname in pax_headers: - # The pax header has priority. - continue - - # Try to encode the string as ASCII. - try: - info[name].encode("ascii", "strict") - except UnicodeEncodeError: - pax_headers[hname] = info[name] - continue - - if len(info[name]) > length: - pax_headers[hname] = info[name] - - # Test number fields for values that exceed the field limit or values - # that like to be stored as float. - for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)): - if name in pax_headers: - # The pax header has priority. Avoid overflow. - info[name] = 0 - continue - - val = info[name] - if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float): - pax_headers[name] = str(val) - info[name] = 0 - - # Create a pax extended header if necessary. - if pax_headers: - buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding) - else: - buf = b"" - - return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace") - - @classmethod - def create_pax_global_header(cls, pax_headers): - """Return the object as a pax global header block sequence. - """ - return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf8") - - def _posix_split_name(self, name): - """Split a name longer than 100 chars into a prefix - and a name part. - """ - prefix = name[:LENGTH_PREFIX + 1] - while prefix and prefix[-1] != "/": - prefix = prefix[:-1] - - name = name[len(prefix):] - prefix = prefix[:-1] - - if not prefix or len(name) > LENGTH_NAME: - raise ValueError("name is too long") - return prefix, name - - @staticmethod - def _create_header(info, format, encoding, errors): - """Return a header block. info is a dictionary with file - information, format must be one of the *_FORMAT constants. - """ - parts = [ - stn(info.get("name", ""), 100, encoding, errors), - itn(info.get("mode", 0) & 0o7777, 8, format), - itn(info.get("uid", 0), 8, format), - itn(info.get("gid", 0), 8, format), - itn(info.get("size", 0), 12, format), - itn(info.get("mtime", 0), 12, format), - b" ", # checksum field - info.get("type", REGTYPE), - stn(info.get("linkname", ""), 100, encoding, errors), - info.get("magic", POSIX_MAGIC), - stn(info.get("uname", ""), 32, encoding, errors), - stn(info.get("gname", ""), 32, encoding, errors), - itn(info.get("devmajor", 0), 8, format), - itn(info.get("devminor", 0), 8, format), - stn(info.get("prefix", ""), 155, encoding, errors) - ] - - buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts)) - chksum = calc_chksums(buf[-BLOCKSIZE:])[0] - buf = buf[:-364] + ("%06o\0" % chksum).encode("ascii") + buf[-357:] - return buf - - @staticmethod - def _create_payload(payload): - """Return the string payload filled with zero bytes - up to the next 512 byte border. - """ - blocks, remainder = divmod(len(payload), BLOCKSIZE) - if remainder > 0: - payload += (BLOCKSIZE - remainder) * NUL - return payload - - @classmethod - def _create_gnu_long_header(cls, name, type, encoding, errors): - """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence - for name. - """ - name = name.encode(encoding, errors) + NUL - - info = {} - info["name"] = "././@LongLink" - info["type"] = type - info["size"] = len(name) - info["magic"] = GNU_MAGIC - - # create extended header + name blocks. - return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \ - cls._create_payload(name) - - @classmethod - def _create_pax_generic_header(cls, pax_headers, type, encoding): - """Return a POSIX.1-2008 extended or global header sequence - that contains a list of keyword, value pairs. The values - must be strings. - """ - # Check if one of the fields contains surrogate characters and thereby - # forces hdrcharset=BINARY, see _proc_pax() for more information. - binary = False - for keyword, value in pax_headers.items(): - try: - value.encode("utf8", "strict") - except UnicodeEncodeError: - binary = True - break - - records = b"" - if binary: - # Put the hdrcharset field at the beginning of the header. - records += b"21 hdrcharset=BINARY\n" - - for keyword, value in pax_headers.items(): - keyword = keyword.encode("utf8") - if binary: - # Try to restore the original byte representation of `value'. - # Needless to say, that the encoding must match the string. - value = value.encode(encoding, "surrogateescape") - else: - value = value.encode("utf8") - - l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n' - n = p = 0 - while True: - n = l + len(str(p)) - if n == p: - break - p = n - records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n" - - # We use a hardcoded "././@PaxHeader" name like star does - # instead of the one that POSIX recommends. - info = {} - info["name"] = "././@PaxHeader" - info["type"] = type - info["size"] = len(records) - info["magic"] = POSIX_MAGIC - - # Create pax header + record blocks. - return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \ - cls._create_payload(records) - - @classmethod - def frombuf(cls, buf, encoding, errors): - """Construct a TarInfo object from a 512 byte bytes object. - """ - if len(buf) == 0: - raise EmptyHeaderError("empty header") - if len(buf) != BLOCKSIZE: - raise TruncatedHeaderError("truncated header") - if buf.count(NUL) == BLOCKSIZE: - raise EOFHeaderError("end of file header") - - chksum = nti(buf[148:156]) - if chksum not in calc_chksums(buf): - raise InvalidHeaderError("bad checksum") - - obj = cls() - obj.name = nts(buf[0:100], encoding, errors) - obj.mode = nti(buf[100:108]) - obj.uid = nti(buf[108:116]) - obj.gid = nti(buf[116:124]) - obj.size = nti(buf[124:136]) - obj.mtime = nti(buf[136:148]) - obj.chksum = chksum - obj.type = buf[156:157] - obj.linkname = nts(buf[157:257], encoding, errors) - obj.uname = nts(buf[265:297], encoding, errors) - obj.gname = nts(buf[297:329], encoding, errors) - obj.devmajor = nti(buf[329:337]) - obj.devminor = nti(buf[337:345]) - prefix = nts(buf[345:500], encoding, errors) - - # Old V7 tar format represents a directory as a regular - # file with a trailing slash. - if obj.type == AREGTYPE and obj.name.endswith("/"): - obj.type = DIRTYPE - - # The old GNU sparse format occupies some of the unused - # space in the buffer for up to 4 sparse structures. - # Save the them for later processing in _proc_sparse(). - if obj.type == GNUTYPE_SPARSE: - pos = 386 - structs = [] - for i in range(4): - try: - offset = nti(buf[pos:pos + 12]) - numbytes = nti(buf[pos + 12:pos + 24]) - except ValueError: - break - structs.append((offset, numbytes)) - pos += 24 - isextended = bool(buf[482]) - origsize = nti(buf[483:495]) - obj._sparse_structs = (structs, isextended, origsize) - - # Remove redundant slashes from directories. - if obj.isdir(): - obj.name = obj.name.rstrip("/") - - # Reconstruct a ustar longname. - if prefix and obj.type not in GNU_TYPES: - obj.name = prefix + "/" + obj.name - return obj - - @classmethod - def fromtarfile(cls, tarfile): - """Return the next TarInfo object from TarFile object - tarfile. - """ - buf = tarfile.fileobj.read(BLOCKSIZE) - obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors) - obj.offset = tarfile.fileobj.tell() - BLOCKSIZE - return obj._proc_member(tarfile) - - #-------------------------------------------------------------------------- - # The following are methods that are called depending on the type of a - # member. The entry point is _proc_member() which can be overridden in a - # subclass to add custom _proc_*() methods. A _proc_*() method MUST - # implement the following - # operations: - # 1. Set self.offset_data to the position where the data blocks begin, - # if there is data that follows. - # 2. Set tarfile.offset to the position where the next member's header will - # begin. - # 3. Return self or another valid TarInfo object. - def _proc_member(self, tarfile): - """Choose the right processing method depending on - the type and call it. - """ - if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK): - return self._proc_gnulong(tarfile) - elif self.type == GNUTYPE_SPARSE: - return self._proc_sparse(tarfile) - elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE): - return self._proc_pax(tarfile) - else: - return self._proc_builtin(tarfile) - - def _proc_builtin(self, tarfile): - """Process a builtin type or an unknown type which - will be treated as a regular file. - """ - self.offset_data = tarfile.fileobj.tell() - offset = self.offset_data - if self.isreg() or self.type not in SUPPORTED_TYPES: - # Skip the following data blocks. - offset += self._block(self.size) - tarfile.offset = offset - - # Patch the TarInfo object with saved global - # header information. - self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors) - - return self - - def _proc_gnulong(self, tarfile): - """Process the blocks that hold a GNU longname - or longlink member. - """ - buf = tarfile.fileobj.read(self._block(self.size)) - - # Fetch the next header and process it. - try: - next = self.fromtarfile(tarfile) - except HeaderError: - raise SubsequentHeaderError("missing or bad subsequent header") - - # Patch the TarInfo object from the next header with - # the longname information. - next.offset = self.offset - if self.type == GNUTYPE_LONGNAME: - next.name = nts(buf, tarfile.encoding, tarfile.errors) - elif self.type == GNUTYPE_LONGLINK: - next.linkname = nts(buf, tarfile.encoding, tarfile.errors) - - return next - - def _proc_sparse(self, tarfile): - """Process a GNU sparse header plus extra headers. - """ - # We already collected some sparse structures in frombuf(). - structs, isextended, origsize = self._sparse_structs - del self._sparse_structs - - # Collect sparse structures from extended header blocks. - while isextended: - buf = tarfile.fileobj.read(BLOCKSIZE) - pos = 0 - for i in range(21): - try: - offset = nti(buf[pos:pos + 12]) - numbytes = nti(buf[pos + 12:pos + 24]) - except ValueError: - break - if offset and numbytes: - structs.append((offset, numbytes)) - pos += 24 - isextended = bool(buf[504]) - self.sparse = structs - - self.offset_data = tarfile.fileobj.tell() - tarfile.offset = self.offset_data + self._block(self.size) - self.size = origsize - return self - - def _proc_pax(self, tarfile): - """Process an extended or global header as described in - POSIX.1-2008. - """ - # Read the header information. - buf = tarfile.fileobj.read(self._block(self.size)) - - # A pax header stores supplemental information for either - # the following file (extended) or all following files - # (global). - if self.type == XGLTYPE: - pax_headers = tarfile.pax_headers - else: - pax_headers = tarfile.pax_headers.copy() - - # Check if the pax header contains a hdrcharset field. This tells us - # the encoding of the path, linkpath, uname and gname fields. Normally, - # these fields are UTF-8 encoded but since POSIX.1-2008 tar - # implementations are allowed to store them as raw binary strings if - # the translation to UTF-8 fails. - match = re.search(br"\d+ hdrcharset=([^\n]+)\n", buf) - if match is not None: - pax_headers["hdrcharset"] = match.group(1).decode("utf8") - - # For the time being, we don't care about anything other than "BINARY". - # The only other value that is currently allowed by the standard is - # "ISO-IR 10646 2000 UTF-8" in other words UTF-8. - hdrcharset = pax_headers.get("hdrcharset") - if hdrcharset == "BINARY": - encoding = tarfile.encoding - else: - encoding = "utf8" - - # Parse pax header information. A record looks like that: - # "%d %s=%s\n" % (length, keyword, value). length is the size - # of the complete record including the length field itself and - # the newline. keyword and value are both UTF-8 encoded strings. - regex = re.compile(br"(\d+) ([^=]+)=") - pos = 0 - while True: - match = regex.match(buf, pos) - if not match: - break - - length, keyword = match.groups() - length = int(length) - value = buf[match.end(2) + 1:match.start(1) + length - 1] - - # Normally, we could just use "utf8" as the encoding and "strict" - # as the error handler, but we better not take the risk. For - # example, GNU tar <= 1.23 is known to store filenames it cannot - # translate to UTF-8 as raw strings (unfortunately without a - # hdrcharset=BINARY header). - # We first try the strict standard encoding, and if that fails we - # fall back on the user's encoding and error handler. - keyword = self._decode_pax_field(keyword, "utf8", "utf8", - tarfile.errors) - if keyword in PAX_NAME_FIELDS: - value = self._decode_pax_field(value, encoding, tarfile.encoding, - tarfile.errors) - else: - value = self._decode_pax_field(value, "utf8", "utf8", - tarfile.errors) - - pax_headers[keyword] = value - pos += length - - # Fetch the next header. - try: - next = self.fromtarfile(tarfile) - except HeaderError: - raise SubsequentHeaderError("missing or bad subsequent header") - - # Process GNU sparse information. - if "GNU.sparse.map" in pax_headers: - # GNU extended sparse format version 0.1. - self._proc_gnusparse_01(next, pax_headers) - - elif "GNU.sparse.size" in pax_headers: - # GNU extended sparse format version 0.0. - self._proc_gnusparse_00(next, pax_headers, buf) - - elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0": - # GNU extended sparse format version 1.0. - self._proc_gnusparse_10(next, pax_headers, tarfile) - - if self.type in (XHDTYPE, SOLARIS_XHDTYPE): - # Patch the TarInfo object with the extended header info. - next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors) - next.offset = self.offset - - if "size" in pax_headers: - # If the extended header replaces the size field, - # we need to recalculate the offset where the next - # header starts. - offset = next.offset_data - if next.isreg() or next.type not in SUPPORTED_TYPES: - offset += next._block(next.size) - tarfile.offset = offset - - return next - - def _proc_gnusparse_00(self, next, pax_headers, buf): - """Process a GNU tar extended sparse header, version 0.0. - """ - offsets = [] - for match in re.finditer(br"\d+ GNU.sparse.offset=(\d+)\n", buf): - offsets.append(int(match.group(1))) - numbytes = [] - for match in re.finditer(br"\d+ GNU.sparse.numbytes=(\d+)\n", buf): - numbytes.append(int(match.group(1))) - next.sparse = list(zip(offsets, numbytes)) - - def _proc_gnusparse_01(self, next, pax_headers): - """Process a GNU tar extended sparse header, version 0.1. - """ - sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")] - next.sparse = list(zip(sparse[::2], sparse[1::2])) - - def _proc_gnusparse_10(self, next, pax_headers, tarfile): - """Process a GNU tar extended sparse header, version 1.0. - """ - fields = None - sparse = [] - buf = tarfile.fileobj.read(BLOCKSIZE) - fields, buf = buf.split(b"\n", 1) - fields = int(fields) - while len(sparse) < fields * 2: - if b"\n" not in buf: - buf += tarfile.fileobj.read(BLOCKSIZE) - number, buf = buf.split(b"\n", 1) - sparse.append(int(number)) - next.offset_data = tarfile.fileobj.tell() - next.sparse = list(zip(sparse[::2], sparse[1::2])) - - def _apply_pax_info(self, pax_headers, encoding, errors): - """Replace fields with supplemental information from a previous - pax extended or global header. - """ - for keyword, value in pax_headers.items(): - if keyword == "GNU.sparse.name": - setattr(self, "path", value) - elif keyword == "GNU.sparse.size": - setattr(self, "size", int(value)) - elif keyword == "GNU.sparse.realsize": - setattr(self, "size", int(value)) - elif keyword in PAX_FIELDS: - if keyword in PAX_NUMBER_FIELDS: - try: - value = PAX_NUMBER_FIELDS[keyword](value) - except ValueError: - value = 0 - if keyword == "path": - value = value.rstrip("/") - setattr(self, keyword, value) - - self.pax_headers = pax_headers.copy() - - def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors): - """Decode a single field from a pax record. - """ - try: - return value.decode(encoding, "strict") - except UnicodeDecodeError: - return value.decode(fallback_encoding, fallback_errors) - - def _block(self, count): - """Round up a byte count by BLOCKSIZE and return it, - e.g. _block(834) => 1024. - """ - blocks, remainder = divmod(count, BLOCKSIZE) - if remainder: - blocks += 1 - return blocks * BLOCKSIZE - - def isreg(self): - return self.type in REGULAR_TYPES - def isfile(self): - return self.isreg() - def isdir(self): - return self.type == DIRTYPE - def issym(self): - return self.type == SYMTYPE - def islnk(self): - return self.type == LNKTYPE - def ischr(self): - return self.type == CHRTYPE - def isblk(self): - return self.type == BLKTYPE - def isfifo(self): - return self.type == FIFOTYPE - def issparse(self): - return self.sparse is not None - def isdev(self): - return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE) -# class TarInfo - -class TarFile(object): - """The TarFile Class provides an interface to tar archives. - """ - - debug = 0 # May be set from 0 (no msgs) to 3 (all msgs) - - dereference = False # If true, add content of linked file to the - # tar file, else the link. - - ignore_zeros = False # If true, skips empty or invalid blocks and - # continues processing. - - errorlevel = 1 # If 0, fatal errors only appear in debug - # messages (if debug >= 0). If > 0, errors - # are passed to the caller as exceptions. - - format = DEFAULT_FORMAT # The format to use when creating an archive. - - encoding = ENCODING # Encoding for 8-bit character strings. - - errors = None # Error handler for unicode conversion. - - tarinfo = TarInfo # The default TarInfo class to use. - - fileobject = ExFileObject # The default ExFileObject class to use. - - def __init__(self, name=None, mode="r", fileobj=None, format=None, - tarinfo=None, dereference=None, ignore_zeros=None, encoding=None, - errors="surrogateescape", pax_headers=None, debug=None, errorlevel=None): - """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to - read from an existing archive, 'a' to append data to an existing - file or 'w' to create a new file overwriting an existing one. `mode' - defaults to 'r'. - If `fileobj' is given, it is used for reading or writing data. If it - can be determined, `mode' is overridden by `fileobj's mode. - `fileobj' is not closed, when TarFile is closed. - """ - if len(mode) > 1 or mode not in "raw": - raise ValueError("mode must be 'r', 'a' or 'w'") - self.mode = mode - self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode] - - if not fileobj: - if self.mode == "a" and not os.path.exists(name): - # Create nonexistent files in append mode. - self.mode = "w" - self._mode = "wb" - fileobj = bltn_open(name, self._mode) - self._extfileobj = False - else: - if name is None and hasattr(fileobj, "name"): - name = fileobj.name - if hasattr(fileobj, "mode"): - self._mode = fileobj.mode - self._extfileobj = True - self.name = os.path.abspath(name) if name else None - self.fileobj = fileobj - - # Init attributes. - if format is not None: - self.format = format - if tarinfo is not None: - self.tarinfo = tarinfo - if dereference is not None: - self.dereference = dereference - if ignore_zeros is not None: - self.ignore_zeros = ignore_zeros - if encoding is not None: - self.encoding = encoding - self.errors = errors - - if pax_headers is not None and self.format == PAX_FORMAT: - self.pax_headers = pax_headers - else: - self.pax_headers = {} - - if debug is not None: - self.debug = debug - if errorlevel is not None: - self.errorlevel = errorlevel - - # Init datastructures. - self.closed = False - self.members = [] # list of members as TarInfo objects - self._loaded = False # flag if all members have been read - self.offset = self.fileobj.tell() - # current position in the archive file - self.inodes = {} # dictionary caching the inodes of - # archive members already added - - try: - if self.mode == "r": - self.firstmember = None - self.firstmember = self.next() - - if self.mode == "a": - # Move to the end of the archive, - # before the first empty block. - while True: - self.fileobj.seek(self.offset) - try: - tarinfo = self.tarinfo.fromtarfile(self) - self.members.append(tarinfo) - except EOFHeaderError: - self.fileobj.seek(self.offset) - break - except HeaderError as e: - raise ReadError(str(e)) - - if self.mode in "aw": - self._loaded = True - - if self.pax_headers: - buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy()) - self.fileobj.write(buf) - self.offset += len(buf) - except: - if not self._extfileobj: - self.fileobj.close() - self.closed = True - raise - - #-------------------------------------------------------------------------- - # Below are the classmethods which act as alternate constructors to the - # TarFile class. The open() method is the only one that is needed for - # public use; it is the "super"-constructor and is able to select an - # adequate "sub"-constructor for a particular compression using the mapping - # from OPEN_METH. - # - # This concept allows one to subclass TarFile without losing the comfort of - # the super-constructor. A sub-constructor is registered and made available - # by adding it to the mapping in OPEN_METH. - - @classmethod - def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs): - """Open a tar archive for reading, writing or appending. Return - an appropriate TarFile class. - - mode: - 'r' or 'r:*' open for reading with transparent compression - 'r:' open for reading exclusively uncompressed - 'r:gz' open for reading with gzip compression - 'r:bz2' open for reading with bzip2 compression - 'a' or 'a:' open for appending, creating the file if necessary - 'w' or 'w:' open for writing without compression - 'w:gz' open for writing with gzip compression - 'w:bz2' open for writing with bzip2 compression - - 'r|*' open a stream of tar blocks with transparent compression - 'r|' open an uncompressed stream of tar blocks for reading - 'r|gz' open a gzip compressed stream of tar blocks - 'r|bz2' open a bzip2 compressed stream of tar blocks - 'w|' open an uncompressed stream for writing - 'w|gz' open a gzip compressed stream for writing - 'w|bz2' open a bzip2 compressed stream for writing - """ - - if not name and not fileobj: - raise ValueError("nothing to open") - - if mode in ("r", "r:*"): - # Find out which *open() is appropriate for opening the file. - for comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - if fileobj is not None: - saved_pos = fileobj.tell() - try: - return func(name, "r", fileobj, **kwargs) - except (ReadError, CompressionError) as e: - if fileobj is not None: - fileobj.seek(saved_pos) - continue - raise ReadError("file could not be opened successfully") - - elif ":" in mode: - filemode, comptype = mode.split(":", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - # Select the *open() function according to - # given compression. - if comptype in cls.OPEN_METH: - func = getattr(cls, cls.OPEN_METH[comptype]) - else: - raise CompressionError("unknown compression type %r" % comptype) - return func(name, filemode, fileobj, **kwargs) - - elif "|" in mode: - filemode, comptype = mode.split("|", 1) - filemode = filemode or "r" - comptype = comptype or "tar" - - if filemode not in "rw": - raise ValueError("mode must be 'r' or 'w'") - - stream = _Stream(name, filemode, comptype, fileobj, bufsize) - try: - t = cls(name, filemode, stream, **kwargs) - except: - stream.close() - raise - t._extfileobj = False - return t - - elif mode in "aw": - return cls.taropen(name, mode, fileobj, **kwargs) - - raise ValueError("undiscernible mode") - - @classmethod - def taropen(cls, name, mode="r", fileobj=None, **kwargs): - """Open uncompressed tar archive name for reading or writing. - """ - if len(mode) > 1 or mode not in "raw": - raise ValueError("mode must be 'r', 'a' or 'w'") - return cls(name, mode, fileobj, **kwargs) - - @classmethod - def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): - """Open gzip compressed tar archive name for reading or writing. - Appending is not allowed. - """ - if len(mode) > 1 or mode not in "rw": - raise ValueError("mode must be 'r' or 'w'") - - try: - import gzip - gzip.GzipFile - except (ImportError, AttributeError): - raise CompressionError("gzip module is not available") - - extfileobj = fileobj is not None - try: - fileobj = gzip.GzipFile(name, mode + "b", compresslevel, fileobj) - t = cls.taropen(name, mode, fileobj, **kwargs) - except IOError: - if not extfileobj and fileobj is not None: - fileobj.close() - if fileobj is None: - raise - raise ReadError("not a gzip file") - except: - if not extfileobj and fileobj is not None: - fileobj.close() - raise - t._extfileobj = extfileobj - return t - - @classmethod - def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs): - """Open bzip2 compressed tar archive name for reading or writing. - Appending is not allowed. - """ - if len(mode) > 1 or mode not in "rw": - raise ValueError("mode must be 'r' or 'w'.") - - try: - import bz2 - except ImportError: - raise CompressionError("bz2 module is not available") - - if fileobj is not None: - fileobj = _BZ2Proxy(fileobj, mode) - else: - fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel) - - try: - t = cls.taropen(name, mode, fileobj, **kwargs) - except (IOError, EOFError): - fileobj.close() - raise ReadError("not a bzip2 file") - t._extfileobj = False - return t - - # All *open() methods are registered here. - OPEN_METH = { - "tar": "taropen", # uncompressed tar - "gz": "gzopen", # gzip compressed tar - "bz2": "bz2open" # bzip2 compressed tar - } - - #-------------------------------------------------------------------------- - # The public methods which TarFile provides: - - def close(self): - """Close the TarFile. In write-mode, two finishing zero blocks are - appended to the archive. - """ - if self.closed: - return - - if self.mode in "aw": - self.fileobj.write(NUL * (BLOCKSIZE * 2)) - self.offset += (BLOCKSIZE * 2) - # fill up the end with zero-blocks - # (like option -b20 for tar does) - blocks, remainder = divmod(self.offset, RECORDSIZE) - if remainder > 0: - self.fileobj.write(NUL * (RECORDSIZE - remainder)) - - if not self._extfileobj: - self.fileobj.close() - self.closed = True - - def getmember(self, name): - """Return a TarInfo object for member `name'. If `name' can not be - found in the archive, KeyError is raised. If a member occurs more - than once in the archive, its last occurrence is assumed to be the - most up-to-date version. - """ - tarinfo = self._getmember(name) - if tarinfo is None: - raise KeyError("filename %r not found" % name) - return tarinfo - - def getmembers(self): - """Return the members of the archive as a list of TarInfo objects. The - list has the same order as the members in the archive. - """ - self._check() - if not self._loaded: # if we want to obtain a list of - self._load() # all members, we first have to - # scan the whole archive. - return self.members - - def getnames(self): - """Return the members of the archive as a list of their names. It has - the same order as the list returned by getmembers(). - """ - return [tarinfo.name for tarinfo in self.getmembers()] - - def gettarinfo(self, name=None, arcname=None, fileobj=None): - """Create a TarInfo object for either the file `name' or the file - object `fileobj' (using os.fstat on its file descriptor). You can - modify some of the TarInfo's attributes before you add it using - addfile(). If given, `arcname' specifies an alternative name for the - file in the archive. - """ - self._check("aw") - - # When fileobj is given, replace name by - # fileobj's real name. - if fileobj is not None: - name = fileobj.name - - # Building the name of the member in the archive. - # Backward slashes are converted to forward slashes, - # Absolute paths are turned to relative paths. - if arcname is None: - arcname = name - drv, arcname = os.path.splitdrive(arcname) - arcname = arcname.replace(os.sep, "/") - arcname = arcname.lstrip("/") - - # Now, fill the TarInfo object with - # information specific for the file. - tarinfo = self.tarinfo() - tarinfo.tarfile = self - - # Use os.stat or os.lstat, depending on platform - # and if symlinks shall be resolved. - if fileobj is None: - if hasattr(os, "lstat") and not self.dereference: - statres = os.lstat(name) - else: - statres = os.stat(name) - else: - statres = os.fstat(fileobj.fileno()) - linkname = "" - - stmd = statres.st_mode - if stat.S_ISREG(stmd): - inode = (statres.st_ino, statres.st_dev) - if not self.dereference and statres.st_nlink > 1 and \ - inode in self.inodes and arcname != self.inodes[inode]: - # Is it a hardlink to an already - # archived file? - type = LNKTYPE - linkname = self.inodes[inode] - else: - # The inode is added only if its valid. - # For win32 it is always 0. - type = REGTYPE - if inode[0]: - self.inodes[inode] = arcname - elif stat.S_ISDIR(stmd): - type = DIRTYPE - elif stat.S_ISFIFO(stmd): - type = FIFOTYPE - elif stat.S_ISLNK(stmd): - type = SYMTYPE - linkname = os.readlink(name) - elif stat.S_ISCHR(stmd): - type = CHRTYPE - elif stat.S_ISBLK(stmd): - type = BLKTYPE - else: - return None - - # Fill the TarInfo object with all - # information we can get. - tarinfo.name = arcname - tarinfo.mode = stmd - tarinfo.uid = statres.st_uid - tarinfo.gid = statres.st_gid - if type == REGTYPE: - tarinfo.size = statres.st_size - else: - tarinfo.size = 0 - tarinfo.mtime = statres.st_mtime - tarinfo.type = type - tarinfo.linkname = linkname - if pwd: - try: - tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0] - except KeyError: - pass - if grp: - try: - tarinfo.gname = grp.getgrgid(tarinfo.gid)[0] - except KeyError: - pass - - if type in (CHRTYPE, BLKTYPE): - if hasattr(os, "major") and hasattr(os, "minor"): - tarinfo.devmajor = os.major(statres.st_rdev) - tarinfo.devminor = os.minor(statres.st_rdev) - return tarinfo - - def list(self, verbose=True): - """Print a table of contents to sys.stdout. If `verbose' is False, only - the names of the members are printed. If it is True, an `ls -l'-like - output is produced. - """ - self._check() - - for tarinfo in self: - if verbose: - print(filemode(tarinfo.mode), end=' ') - print("%s/%s" % (tarinfo.uname or tarinfo.uid, - tarinfo.gname or tarinfo.gid), end=' ') - if tarinfo.ischr() or tarinfo.isblk(): - print("%10s" % ("%d,%d" \ - % (tarinfo.devmajor, tarinfo.devminor)), end=' ') - else: - print("%10d" % tarinfo.size, end=' ') - print("%d-%02d-%02d %02d:%02d:%02d" \ - % time.localtime(tarinfo.mtime)[:6], end=' ') - - print(tarinfo.name + ("/" if tarinfo.isdir() else ""), end=' ') - - if verbose: - if tarinfo.issym(): - print("->", tarinfo.linkname, end=' ') - if tarinfo.islnk(): - print("link to", tarinfo.linkname, end=' ') - print() - - def add(self, name, arcname=None, recursive=True, exclude=None, filter=None): - """Add the file `name' to the archive. `name' may be any type of file - (directory, fifo, symbolic link, etc.). If given, `arcname' - specifies an alternative name for the file in the archive. - Directories are added recursively by default. This can be avoided by - setting `recursive' to False. `exclude' is a function that should - return True for each filename to be excluded. `filter' is a function - that expects a TarInfo object argument and returns the changed - TarInfo object, if it returns None the TarInfo object will be - excluded from the archive. - """ - self._check("aw") - - if arcname is None: - arcname = name - - # Exclude pathnames. - if exclude is not None: - import warnings - warnings.warn("use the filter argument instead", - DeprecationWarning, 2) - if exclude(name): - self._dbg(2, "tarfile: Excluded %r" % name) - return - - # Skip if somebody tries to archive the archive... - if self.name is not None and os.path.abspath(name) == self.name: - self._dbg(2, "tarfile: Skipped %r" % name) - return - - self._dbg(1, name) - - # Create a TarInfo object from the file. - tarinfo = self.gettarinfo(name, arcname) - - if tarinfo is None: - self._dbg(1, "tarfile: Unsupported type %r" % name) - return - - # Change or exclude the TarInfo object. - if filter is not None: - tarinfo = filter(tarinfo) - if tarinfo is None: - self._dbg(2, "tarfile: Excluded %r" % name) - return - - # Append the tar header and data to the archive. - if tarinfo.isreg(): - f = bltn_open(name, "rb") - self.addfile(tarinfo, f) - f.close() - - elif tarinfo.isdir(): - self.addfile(tarinfo) - if recursive: - for f in os.listdir(name): - self.add(os.path.join(name, f), os.path.join(arcname, f), - recursive, exclude, filter=filter) - - else: - self.addfile(tarinfo) - - def addfile(self, tarinfo, fileobj=None): - """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is - given, tarinfo.size bytes are read from it and added to the archive. - You can create TarInfo objects using gettarinfo(). - On Windows platforms, `fileobj' should always be opened with mode - 'rb' to avoid irritation about the file size. - """ - self._check("aw") - - tarinfo = copy.copy(tarinfo) - - buf = tarinfo.tobuf(self.format, self.encoding, self.errors) - self.fileobj.write(buf) - self.offset += len(buf) - - # If there's data to follow, append it. - if fileobj is not None: - copyfileobj(fileobj, self.fileobj, tarinfo.size) - blocks, remainder = divmod(tarinfo.size, BLOCKSIZE) - if remainder > 0: - self.fileobj.write(NUL * (BLOCKSIZE - remainder)) - blocks += 1 - self.offset += blocks * BLOCKSIZE - - self.members.append(tarinfo) - - def extractall(self, path=".", members=None): - """Extract all members from the archive to the current working - directory and set owner, modification time and permissions on - directories afterwards. `path' specifies a different directory - to extract to. `members' is optional and must be a subset of the - list returned by getmembers(). - """ - directories = [] - - if members is None: - members = self - - for tarinfo in members: - if tarinfo.isdir(): - # Extract directories with a safe mode. - directories.append(tarinfo) - tarinfo = copy.copy(tarinfo) - tarinfo.mode = 0o700 - # Do not set_attrs directories, as we will do that further down - self.extract(tarinfo, path, set_attrs=not tarinfo.isdir()) - - # Reverse sort directories. - directories.sort(key=lambda a: a.name) - directories.reverse() - - # Set correct owner, mtime and filemode on directories. - for tarinfo in directories: - dirpath = os.path.join(path, tarinfo.name) - try: - self.chown(tarinfo, dirpath) - self.utime(tarinfo, dirpath) - self.chmod(tarinfo, dirpath) - except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def extract(self, member, path="", set_attrs=True): - """Extract a member from the archive to the current working directory, - using its full name. Its file information is extracted as accurately - as possible. `member' may be a filename or a TarInfo object. You can - specify a different directory using `path'. File attributes (owner, - mtime, mode) are set unless `set_attrs' is False. - """ - self._check("r") - - if isinstance(member, str): - tarinfo = self.getmember(member) - else: - tarinfo = member - - # Prepare the link target for makelink(). - if tarinfo.islnk(): - tarinfo._link_target = os.path.join(path, tarinfo.linkname) - - try: - self._extract_member(tarinfo, os.path.join(path, tarinfo.name), - set_attrs=set_attrs) - except EnvironmentError as e: - if self.errorlevel > 0: - raise - else: - if e.filename is None: - self._dbg(1, "tarfile: %s" % e.strerror) - else: - self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename)) - except ExtractError as e: - if self.errorlevel > 1: - raise - else: - self._dbg(1, "tarfile: %s" % e) - - def extractfile(self, member): - """Extract a member from the archive as a file object. `member' may be - a filename or a TarInfo object. If `member' is a regular file, a - file-like object is returned. If `member' is a link, a file-like - object is constructed from the link's target. If `member' is none of - the above, None is returned. - The file-like object is read-only and provides the following - methods: read(), readline(), readlines(), seek() and tell() - """ - self._check("r") - - if isinstance(member, str): - tarinfo = self.getmember(member) - else: - tarinfo = member - - if tarinfo.isreg(): - return self.fileobject(self, tarinfo) - - elif tarinfo.type not in SUPPORTED_TYPES: - # If a member's type is unknown, it is treated as a - # regular file. - return self.fileobject(self, tarinfo) - - elif tarinfo.islnk() or tarinfo.issym(): - if isinstance(self.fileobj, _Stream): - # A small but ugly workaround for the case that someone tries - # to extract a (sym)link as a file-object from a non-seekable - # stream of tar blocks. - raise StreamError("cannot extract (sym)link as file object") - else: - # A (sym)link's file object is its target's file object. - return self.extractfile(self._find_link_target(tarinfo)) - else: - # If there's no data associated with the member (directory, chrdev, - # blkdev, etc.), return None instead of a file object. - return None - - def _extract_member(self, tarinfo, targetpath, set_attrs=True): - """Extract the TarInfo object tarinfo to a physical - file called targetpath. - """ - # Fetch the TarInfo object for the given name - # and build the destination pathname, replacing - # forward slashes to platform specific separators. - targetpath = targetpath.rstrip("/") - targetpath = targetpath.replace("/", os.sep) - - # Create all upper directories. - upperdirs = os.path.dirname(targetpath) - if upperdirs and not os.path.exists(upperdirs): - # Create directories that are not part of the archive with - # default permissions. - os.makedirs(upperdirs) - - if tarinfo.islnk() or tarinfo.issym(): - self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname)) - else: - self._dbg(1, tarinfo.name) - - if tarinfo.isreg(): - self.makefile(tarinfo, targetpath) - elif tarinfo.isdir(): - self.makedir(tarinfo, targetpath) - elif tarinfo.isfifo(): - self.makefifo(tarinfo, targetpath) - elif tarinfo.ischr() or tarinfo.isblk(): - self.makedev(tarinfo, targetpath) - elif tarinfo.islnk() or tarinfo.issym(): - self.makelink(tarinfo, targetpath) - elif tarinfo.type not in SUPPORTED_TYPES: - self.makeunknown(tarinfo, targetpath) - else: - self.makefile(tarinfo, targetpath) - - if set_attrs: - self.chown(tarinfo, targetpath) - if not tarinfo.issym(): - self.chmod(tarinfo, targetpath) - self.utime(tarinfo, targetpath) - - #-------------------------------------------------------------------------- - # Below are the different file methods. They are called via - # _extract_member() when extract() is called. They can be replaced in a - # subclass to implement other functionality. - - def makedir(self, tarinfo, targetpath): - """Make a directory called targetpath. - """ - try: - # Use a safe mode for the directory, the real mode is set - # later in _extract_member(). - os.mkdir(targetpath, 0o700) - except EnvironmentError as e: - if e.errno != errno.EEXIST: - raise - - def makefile(self, tarinfo, targetpath): - """Make a file called targetpath. - """ - source = self.fileobj - source.seek(tarinfo.offset_data) - target = bltn_open(targetpath, "wb") - if tarinfo.sparse is not None: - for offset, size in tarinfo.sparse: - target.seek(offset) - copyfileobj(source, target, size) - else: - copyfileobj(source, target, tarinfo.size) - target.seek(tarinfo.size) - target.truncate() - target.close() - - def makeunknown(self, tarinfo, targetpath): - """Make a file from a TarInfo object with an unknown type - at targetpath. - """ - self.makefile(tarinfo, targetpath) - self._dbg(1, "tarfile: Unknown file type %r, " \ - "extracted as regular file." % tarinfo.type) - - def makefifo(self, tarinfo, targetpath): - """Make a fifo called targetpath. - """ - if hasattr(os, "mkfifo"): - os.mkfifo(targetpath) - else: - raise ExtractError("fifo not supported by system") - - def makedev(self, tarinfo, targetpath): - """Make a character or block device called targetpath. - """ - if not hasattr(os, "mknod") or not hasattr(os, "makedev"): - raise ExtractError("special devices not supported by system") - - mode = tarinfo.mode - if tarinfo.isblk(): - mode |= stat.S_IFBLK - else: - mode |= stat.S_IFCHR - - os.mknod(targetpath, mode, - os.makedev(tarinfo.devmajor, tarinfo.devminor)) - - def makelink(self, tarinfo, targetpath): - """Make a (symbolic) link called targetpath. If it cannot be created - (platform limitation), we try to make a copy of the referenced file - instead of a link. - """ - try: - # For systems that support symbolic and hard links. - if tarinfo.issym(): - os.symlink(tarinfo.linkname, targetpath) - else: - # See extract(). - if os.path.exists(tarinfo._link_target): - os.link(tarinfo._link_target, targetpath) - else: - self._extract_member(self._find_link_target(tarinfo), - targetpath) - except symlink_exception: - if tarinfo.issym(): - linkpath = os.path.join(os.path.dirname(tarinfo.name), - tarinfo.linkname) - else: - linkpath = tarinfo.linkname - else: - try: - self._extract_member(self._find_link_target(tarinfo), - targetpath) - except KeyError: - raise ExtractError("unable to resolve link inside archive") - - def chown(self, tarinfo, targetpath): - """Set owner of targetpath according to tarinfo. - """ - if pwd and hasattr(os, "geteuid") and os.geteuid() == 0: - # We have to be root to do so. - try: - g = grp.getgrnam(tarinfo.gname)[2] - except KeyError: - g = tarinfo.gid - try: - u = pwd.getpwnam(tarinfo.uname)[2] - except KeyError: - u = tarinfo.uid - try: - if tarinfo.issym() and hasattr(os, "lchown"): - os.lchown(targetpath, u, g) - else: - if sys.platform != "os2emx": - os.chown(targetpath, u, g) - except EnvironmentError as e: - raise ExtractError("could not change owner") - - def chmod(self, tarinfo, targetpath): - """Set file permissions of targetpath according to tarinfo. - """ - if hasattr(os, 'chmod'): - try: - os.chmod(targetpath, tarinfo.mode) - except EnvironmentError as e: - raise ExtractError("could not change mode") - - def utime(self, tarinfo, targetpath): - """Set modification time of targetpath according to tarinfo. - """ - if not hasattr(os, 'utime'): - return - try: - os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime)) - except EnvironmentError as e: - raise ExtractError("could not change modification time") - - #-------------------------------------------------------------------------- - def next(self): - """Return the next member of the archive as a TarInfo object, when - TarFile is opened for reading. Return None if there is no more - available. - """ - self._check("ra") - if self.firstmember is not None: - m = self.firstmember - self.firstmember = None - return m - - # Read the next block. - self.fileobj.seek(self.offset) - tarinfo = None - while True: - try: - tarinfo = self.tarinfo.fromtarfile(self) - except EOFHeaderError as e: - if self.ignore_zeros: - self._dbg(2, "0x%X: %s" % (self.offset, e)) - self.offset += BLOCKSIZE - continue - except InvalidHeaderError as e: - if self.ignore_zeros: - self._dbg(2, "0x%X: %s" % (self.offset, e)) - self.offset += BLOCKSIZE - continue - elif self.offset == 0: - raise ReadError(str(e)) - except EmptyHeaderError: - if self.offset == 0: - raise ReadError("empty file") - except TruncatedHeaderError as e: - if self.offset == 0: - raise ReadError(str(e)) - except SubsequentHeaderError as e: - raise ReadError(str(e)) - break - - if tarinfo is not None: - self.members.append(tarinfo) - else: - self._loaded = True - - return tarinfo - - #-------------------------------------------------------------------------- - # Little helper methods: - - def _getmember(self, name, tarinfo=None, normalize=False): - """Find an archive member by name from bottom to top. - If tarinfo is given, it is used as the starting point. - """ - # Ensure that all members have been loaded. - members = self.getmembers() - - # Limit the member search list up to tarinfo. - if tarinfo is not None: - members = members[:members.index(tarinfo)] - - if normalize: - name = os.path.normpath(name) - - for member in reversed(members): - if normalize: - member_name = os.path.normpath(member.name) - else: - member_name = member.name - - if name == member_name: - return member - - def _load(self): - """Read through the entire archive file and look for readable - members. - """ - while True: - tarinfo = self.next() - if tarinfo is None: - break - self._loaded = True - - def _check(self, mode=None): - """Check if TarFile is still open, and if the operation's mode - corresponds to TarFile's mode. - """ - if self.closed: - raise IOError("%s is closed" % self.__class__.__name__) - if mode is not None and self.mode not in mode: - raise IOError("bad operation for mode %r" % self.mode) - - def _find_link_target(self, tarinfo): - """Find the target member of a symlink or hardlink member in the - archive. - """ - if tarinfo.issym(): - # Always search the entire archive. - linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname - limit = None - else: - # Search the archive before the link, because a hard link is - # just a reference to an already archived file. - linkname = tarinfo.linkname - limit = tarinfo - - member = self._getmember(linkname, tarinfo=limit, normalize=True) - if member is None: - raise KeyError("linkname %r not found" % linkname) - return member - - def __iter__(self): - """Provide an iterator object. - """ - if self._loaded: - return iter(self.members) - else: - return TarIter(self) - - def _dbg(self, level, msg): - """Write debugging output to sys.stderr. - """ - if level <= self.debug: - print(msg, file=sys.stderr) - - def __enter__(self): - self._check() - return self - - def __exit__(self, type, value, traceback): - if type is None: - self.close() - else: - # An exception occurred. We must not call close() because - # it would try to write end-of-archive blocks and padding. - if not self._extfileobj: - self.fileobj.close() - self.closed = True -# class TarFile - -class TarIter(object): - """Iterator Class. - - for tarinfo in TarFile(...): - suite... - """ - - def __init__(self, tarfile): - """Construct a TarIter object. - """ - self.tarfile = tarfile - self.index = 0 - def __iter__(self): - """Return iterator object. - """ - return self - - def __next__(self): - """Return the next item using TarFile's next() method. - When all members have been read, set TarFile as _loaded. - """ - # Fix for SF #1100429: Under rare circumstances it can - # happen that getmembers() is called during iteration, - # which will cause TarIter to stop prematurely. - if not self.tarfile._loaded: - tarinfo = self.tarfile.next() - if not tarinfo: - self.tarfile._loaded = True - raise StopIteration - else: - try: - tarinfo = self.tarfile.members[self.index] - except IndexError: - raise StopIteration - self.index += 1 - return tarinfo - - next = __next__ # for Python 2.x - -#-------------------- -# exported functions -#-------------------- -def is_tarfile(name): - """Return True if name points to a tar archive that we - are able to handle, else return False. - """ - try: - t = open(name) - t.close() - return True - except TarError: - return False - -bltn_open = open -open = TarFile.open diff --git a/src/pip/_vendor/distlib/compat.py b/src/pip/_vendor/distlib/compat.py index e594106956f..1fe3d225acb 100644 --- a/src/pip/_vendor/distlib/compat.py +++ b/src/pip/_vendor/distlib/compat.py @@ -22,7 +22,6 @@ from types import FileType as file_type import __builtin__ as builtins import ConfigParser as configparser - from ._backport import shutil from urlparse import urlparse, urlunparse, urljoin, urlsplit, urlunsplit from urllib import (urlretrieve, quote as _quote, unquote, url2pathname, pathname2url, ContentTooShortError, splittype) @@ -313,10 +312,8 @@ def python_implementation(): return 'IronPython' return 'CPython' -try: - import sysconfig -except ImportError: # pragma: no cover - from ._backport import sysconfig +import shutil +import sysconfig try: callable = callable @@ -618,18 +615,15 @@ def clear(self): try: from importlib.util import cache_from_source # Python >= 3.4 except ImportError: # pragma: no cover - try: - from imp import cache_from_source - except ImportError: # pragma: no cover - def cache_from_source(path, debug_override=None): - assert path.endswith('.py') - if debug_override is None: - debug_override = __debug__ - if debug_override: - suffix = 'c' - else: - suffix = 'o' - return path + suffix + def cache_from_source(path, debug_override=None): + assert path.endswith('.py') + if debug_override is None: + debug_override = __debug__ + if debug_override: + suffix = 'c' + else: + suffix = 'o' + return path + suffix try: from collections import OrderedDict diff --git a/src/pip/_vendor/distlib/database.py b/src/pip/_vendor/distlib/database.py index 0a90c300ba8..5db5d7f507c 100644 --- a/src/pip/_vendor/distlib/database.py +++ b/src/pip/_vendor/distlib/database.py @@ -132,29 +132,35 @@ def _yield_distributions(self): r = finder.find(entry) if not r or r.path in seen: continue - if self._include_dist and entry.endswith(DISTINFO_EXT): - possible_filenames = [METADATA_FILENAME, - WHEEL_METADATA_FILENAME, - LEGACY_METADATA_FILENAME] - for metadata_filename in possible_filenames: - metadata_path = posixpath.join(entry, metadata_filename) - pydist = finder.find(metadata_path) - if pydist: - break - else: - continue + try: + if self._include_dist and entry.endswith(DISTINFO_EXT): + possible_filenames = [METADATA_FILENAME, + WHEEL_METADATA_FILENAME, + LEGACY_METADATA_FILENAME] + for metadata_filename in possible_filenames: + metadata_path = posixpath.join(entry, metadata_filename) + pydist = finder.find(metadata_path) + if pydist: + break + else: + continue - with contextlib.closing(pydist.as_stream()) as stream: - metadata = Metadata(fileobj=stream, scheme='legacy') - logger.debug('Found %s', r.path) - seen.add(r.path) - yield new_dist_class(r.path, metadata=metadata, - env=self) - elif self._include_egg and entry.endswith(('.egg-info', - '.egg')): - logger.debug('Found %s', r.path) - seen.add(r.path) - yield old_dist_class(r.path, self) + with contextlib.closing(pydist.as_stream()) as stream: + metadata = Metadata(fileobj=stream, scheme='legacy') + logger.debug('Found %s', r.path) + seen.add(r.path) + yield new_dist_class(r.path, metadata=metadata, + env=self) + elif self._include_egg and entry.endswith(('.egg-info', + '.egg')): + logger.debug('Found %s', r.path) + seen.add(r.path) + yield old_dist_class(r.path, self) + except Exception as e: + msg = 'Unable to read distribution at %s, perhaps due to bad metadata: %s' + logger.warning(msg, r.path, e) + import warnings + warnings.warn(msg % (r.path, e), stacklevel=2) def _generate_cache(self): """ @@ -379,8 +385,9 @@ def provides(self): def _get_requirements(self, req_attr): md = self.metadata - logger.debug('Getting requirements from metadata %r', md.todict()) reqts = getattr(md, req_attr) + logger.debug('%s: got requirements %r from metadata: %r', self.name, req_attr, + reqts) return set(md.get_requirements(reqts, extras=self.extras, env=self.context)) @@ -1308,22 +1315,26 @@ def get_required_dists(dists, dist): :param dists: a list of distributions :param dist: a distribution, member of *dists* for which we are interested + in finding the dependencies. """ if dist not in dists: raise DistlibException('given distribution %r is not a member ' 'of the list' % dist.name) graph = make_graph(dists) - req = [] # required distributions + req = set() # required distributions todo = graph.adjacency_list[dist] # list of nodes we should inspect + seen = set(t[0] for t in todo) # already added to todo while todo: d = todo.pop()[0] - req.append(d) - for pred in graph.adjacency_list[d]: - if pred not in req: + req.add(d) + pred_list = graph.adjacency_list[d] + for pred in pred_list: + d = pred[0] + if d not in req and d not in seen: + seen.add(d) todo.append(pred) - return req diff --git a/src/pip/_vendor/distlib/index.py b/src/pip/_vendor/distlib/index.py index b1fbbf8e8d2..9b6d129ed69 100644 --- a/src/pip/_vendor/distlib/index.py +++ b/src/pip/_vendor/distlib/index.py @@ -12,7 +12,7 @@ import tempfile try: from threading import Thread -except ImportError: +except ImportError: # pragma: no cover from dummy_threading import Thread from . import DistlibException @@ -104,7 +104,7 @@ def check_credentials(self): pm.add_password(self.realm, netloc, self.username, self.password) self.password_handler = HTTPBasicAuthHandler(pm) - def register(self, metadata): + def register(self, metadata): # pragma: no cover """ Register a distribution on PyPI, using the provided metadata. @@ -142,8 +142,7 @@ def _reader(self, name, stream, outbuf): logger.debug('%s: %s' % (name, s)) stream.close() - def get_sign_command(self, filename, signer, sign_password, - keystore=None): + def get_sign_command(self, filename, signer, sign_password, keystore=None): # pragma: no cover """ Return a suitable command for signing a file. @@ -206,7 +205,7 @@ def run_command(self, cmd, input_data=None): t2.join() return p.returncode, stdout, stderr - def sign_file(self, filename, signer, sign_password, keystore=None): + def sign_file(self, filename, signer, sign_password, keystore=None): # pragma: no cover """ Sign a file. @@ -286,7 +285,7 @@ def upload_file(self, metadata, filename, signer=None, sign_password=None, request = self.encode_request(d.items(), files) return self.send_request(request) - def upload_documentation(self, metadata, doc_dir): + def upload_documentation(self, metadata, doc_dir): # pragma: no cover """ Upload documentation to the index. @@ -499,7 +498,7 @@ def encode_request(self, fields, files): } return Request(self.url, body, headers) - def search(self, terms, operator=None): + def search(self, terms, operator=None): # pragma: no cover if isinstance(terms, string_types): terms = {'name': terms} rpc_proxy = ServerProxy(self.url, timeout=3.0) diff --git a/src/pip/_vendor/distlib/locators.py b/src/pip/_vendor/distlib/locators.py index 0c7d6391438..966ebc0e37d 100644 --- a/src/pip/_vendor/distlib/locators.py +++ b/src/pip/_vendor/distlib/locators.py @@ -633,7 +633,7 @@ def _prepare_threads(self): self._threads = [] for i in range(self.num_workers): t = threading.Thread(target=self._fetch) - t.setDaemon(True) + t.daemon = True t.start() self._threads.append(t) @@ -1053,9 +1053,9 @@ def get_distribution_names(self): # We use a legacy scheme simply because most of the dists on PyPI use legacy -# versions which don't conform to PEP 426 / PEP 440. +# versions which don't conform to PEP 440. default_locator = AggregatingLocator( - JSONLocator(), + # JSONLocator(), # don't use as PEP 426 is withdrawn SimpleScrapingLocator('https://pypi.org/simple/', timeout=3.0), scheme='legacy') diff --git a/src/pip/_vendor/distlib/markers.py b/src/pip/_vendor/distlib/markers.py index b43136fa11e..9dc68410337 100644 --- a/src/pip/_vendor/distlib/markers.py +++ b/src/pip/_vendor/distlib/markers.py @@ -90,6 +90,8 @@ def evaluate(self, expr, context): result = self.operations[op](lhs, rhs) return result +_DIGITS = re.compile(r'\d+\.\d+') + def default_context(): def format_full_version(info): version = '%s.%s.%s' % (info.major, info.minor, info.micro) @@ -105,6 +107,9 @@ def format_full_version(info): implementation_version = '0' implementation_name = '' + ppv = platform.python_version() + m = _DIGITS.match(ppv) + pv = m.group(0) result = { 'implementation_name': implementation_name, 'implementation_version': implementation_version, @@ -115,8 +120,8 @@ def format_full_version(info): 'platform_system': platform.system(), 'platform_version': platform.version(), 'platform_in_venv': str(in_venv()), - 'python_full_version': platform.python_version(), - 'python_version': platform.python_version()[:3], + 'python_full_version': ppv, + 'python_version': pv, 'sys_platform': sys.platform, } return result diff --git a/src/pip/_vendor/distlib/metadata.py b/src/pip/_vendor/distlib/metadata.py index 6a26b0ab232..c329e1977fd 100644 --- a/src/pip/_vendor/distlib/metadata.py +++ b/src/pip/_vendor/distlib/metadata.py @@ -5,7 +5,7 @@ # """Implementation of the Metadata for Python packages PEPs. -Supports all metadata formats (1.0, 1.1, 1.2, 1.3/2.1 and withdrawn 2.0). +Supports all metadata formats (1.0, 1.1, 1.2, 1.3/2.1 and 2.2). """ from __future__ import unicode_literals @@ -100,12 +100,17 @@ class MetadataInvalidError(DistlibException): _566_MARKERS = ('Description-Content-Type',) +_643_MARKERS = ('Dynamic', 'License-File') + +_643_FIELDS = _566_FIELDS + _643_MARKERS + _ALL_FIELDS = set() _ALL_FIELDS.update(_241_FIELDS) _ALL_FIELDS.update(_314_FIELDS) _ALL_FIELDS.update(_345_FIELDS) _ALL_FIELDS.update(_426_FIELDS) _ALL_FIELDS.update(_566_FIELDS) +_ALL_FIELDS.update(_643_FIELDS) EXTRA_RE = re.compile(r'''extra\s*==\s*("([^"]+)"|'([^']+)')''') @@ -121,7 +126,10 @@ def _version2fieldlist(version): # avoid adding field names if already there return _345_FIELDS + tuple(f for f in _566_FIELDS if f not in _345_FIELDS) elif version == '2.0': - return _426_FIELDS + raise ValueError('Metadata 2.0 is withdrawn and not supported') + # return _426_FIELDS + elif version == '2.2': + return _643_FIELDS raise MetadataUnrecognizedVersionError(version) @@ -139,7 +147,7 @@ def _has_marker(keys, markers): continue keys.append(key) - possible_versions = ['1.0', '1.1', '1.2', '1.3', '2.0', '2.1'] + possible_versions = ['1.0', '1.1', '1.2', '1.3', '2.1', '2.2'] # 2.0 removed # first let's try to see if a field is not part of one of the version for key in keys: @@ -159,9 +167,12 @@ def _has_marker(keys, markers): if key != 'Description': # In 2.1, description allowed after headers possible_versions.remove('2.1') logger.debug('Removed 2.1 due to %s', key) - if key not in _426_FIELDS and '2.0' in possible_versions: - possible_versions.remove('2.0') - logger.debug('Removed 2.0 due to %s', key) + if key not in _643_FIELDS and '2.2' in possible_versions: + possible_versions.remove('2.2') + logger.debug('Removed 2.2 due to %s', key) + # if key not in _426_FIELDS and '2.0' in possible_versions: + # possible_versions.remove('2.0') + # logger.debug('Removed 2.0 due to %s', key) # possible_version contains qualified versions if len(possible_versions) == 1: @@ -174,16 +185,18 @@ def _has_marker(keys, markers): is_1_1 = '1.1' in possible_versions and _has_marker(keys, _314_MARKERS) is_1_2 = '1.2' in possible_versions and _has_marker(keys, _345_MARKERS) is_2_1 = '2.1' in possible_versions and _has_marker(keys, _566_MARKERS) - is_2_0 = '2.0' in possible_versions and _has_marker(keys, _426_MARKERS) - if int(is_1_1) + int(is_1_2) + int(is_2_1) + int(is_2_0) > 1: - raise MetadataConflictError('You used incompatible 1.1/1.2/2.0/2.1 fields') + # is_2_0 = '2.0' in possible_versions and _has_marker(keys, _426_MARKERS) + is_2_2 = '2.2' in possible_versions and _has_marker(keys, _643_MARKERS) + if int(is_1_1) + int(is_1_2) + int(is_2_1) + int(is_2_2) > 1: + raise MetadataConflictError('You used incompatible 1.1/1.2/2.1/2.2 fields') - # we have the choice, 1.0, or 1.2, or 2.0 + # we have the choice, 1.0, or 1.2, 2.1 or 2.2 # - 1.0 has a broken Summary field but works with all tools # - 1.1 is to avoid # - 1.2 fixes Summary but has little adoption - # - 2.0 adds more features and is very new - if not is_1_1 and not is_1_2 and not is_2_1 and not is_2_0: + # - 2.1 adds more features + # - 2.2 is the latest + if not is_1_1 and not is_1_2 and not is_2_1 and not is_2_2: # we couldn't find any specific marker if PKG_INFO_PREFERRED_VERSION in possible_versions: return PKG_INFO_PREFERRED_VERSION @@ -193,8 +206,10 @@ def _has_marker(keys, markers): return '1.2' if is_2_1: return '2.1' + # if is_2_2: + # return '2.2' - return '2.0' + return '2.2' # This follows the rules about transforming keys as described in # https://www.python.org/dev/peps/pep-0566/#id17 @@ -210,7 +225,7 @@ def _has_marker(keys, markers): 'Requires', 'Provides', 'Obsoletes-Dist', 'Provides-Dist', 'Requires-Dist', 'Requires-External', 'Project-URL', 'Supported-Platform', 'Setup-Requires-Dist', - 'Provides-Extra', 'Extension') + 'Provides-Extra', 'Extension', 'License-File') _LISTTUPLEFIELDS = ('Project-URL',) _ELEMENTSFIELD = ('Keywords',) @@ -602,7 +617,7 @@ def __repr__(self): class Metadata(object): """ - The metadata of a release. This implementation uses 2.0 (JSON) + The metadata of a release. This implementation uses 2.1 metadata where possible. If not possible, it wraps a LegacyMetadata instance which handles the key-value metadata format. """ @@ -611,6 +626,8 @@ class Metadata(object): NAME_MATCHER = re.compile('^[0-9A-Z]([0-9A-Z_.-]*[0-9A-Z])?$', re.I) + FIELDNAME_MATCHER = re.compile('^[A-Z]([0-9A-Z-]*[0-9A-Z])?$', re.I) + VERSION_MATCHER = PEP440_VERSION_RE SUMMARY_MATCHER = re.compile('.{1,2047}') @@ -638,6 +655,7 @@ class Metadata(object): 'name': (NAME_MATCHER, ('legacy',)), 'version': (VERSION_MATCHER, ('legacy',)), 'summary': (SUMMARY_MATCHER, ('legacy',)), + 'dynamic': (FIELDNAME_MATCHER, ('legacy',)), } __slots__ = ('_legacy', '_data', 'scheme') diff --git a/src/pip/_vendor/distlib/scripts.py b/src/pip/_vendor/distlib/scripts.py index 913912c7b8e..d2706242b8a 100644 --- a/src/pip/_vendor/distlib/scripts.py +++ b/src/pip/_vendor/distlib/scripts.py @@ -10,6 +10,8 @@ import re import struct import sys +import time +from zipfile import ZipInfo from .compat import sysconfig, detect_encoding, ZipFile from .resources import finder @@ -249,7 +251,13 @@ def _write_script(self, names, shebang, script_bytes, filenames, ext): launcher = self._get_launcher('w') stream = BytesIO() with ZipFile(stream, 'w') as zf: - zf.writestr('__main__.py', script_bytes) + source_date_epoch = os.environ.get('SOURCE_DATE_EPOCH') + if source_date_epoch: + date_time = time.gmtime(int(source_date_epoch))[:6] + zinfo = ZipInfo(filename='__main__.py', date_time=date_time) + zf.writestr(zinfo, script_bytes) + else: + zf.writestr('__main__.py', script_bytes) zip_data = stream.getvalue() script_bytes = launcher + shebang + zip_data for name in names: diff --git a/src/pip/_vendor/distlib/t32.exe b/src/pip/_vendor/distlib/t32.exe index 8932a18e459..52154f0be32 100644 Binary files a/src/pip/_vendor/distlib/t32.exe and b/src/pip/_vendor/distlib/t32.exe differ diff --git a/src/pip/_vendor/distlib/t64-arm.exe b/src/pip/_vendor/distlib/t64-arm.exe index c5df4869da5..e1ab8f8f589 100644 Binary files a/src/pip/_vendor/distlib/t64-arm.exe and b/src/pip/_vendor/distlib/t64-arm.exe differ diff --git a/src/pip/_vendor/distlib/t64.exe b/src/pip/_vendor/distlib/t64.exe index 325b8057c08..e8bebdba6d8 100644 Binary files a/src/pip/_vendor/distlib/t64.exe and b/src/pip/_vendor/distlib/t64.exe differ diff --git a/src/pip/_vendor/distlib/util.py b/src/pip/_vendor/distlib/util.py index 80bfc864bcb..dd01849d997 100644 --- a/src/pip/_vendor/distlib/util.py +++ b/src/pip/_vendor/distlib/util.py @@ -1432,29 +1432,19 @@ def connect(self): self.sock = sock self._tunnel() - if not hasattr(ssl, 'SSLContext'): - # For 2.x - if self.ca_certs: - cert_reqs = ssl.CERT_REQUIRED - else: - cert_reqs = ssl.CERT_NONE - self.sock = ssl.wrap_socket(sock, self.key_file, self.cert_file, - cert_reqs=cert_reqs, - ssl_version=ssl.PROTOCOL_SSLv23, - ca_certs=self.ca_certs) - else: # pragma: no cover - context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) - if hasattr(ssl, 'OP_NO_SSLv2'): - context.options |= ssl.OP_NO_SSLv2 - if self.cert_file: - context.load_cert_chain(self.cert_file, self.key_file) - kwargs = {} - if self.ca_certs: - context.verify_mode = ssl.CERT_REQUIRED - context.load_verify_locations(cafile=self.ca_certs) - if getattr(ssl, 'HAS_SNI', False): - kwargs['server_hostname'] = self.host - self.sock = context.wrap_socket(sock, **kwargs) + context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) + if hasattr(ssl, 'OP_NO_SSLv2'): + context.options |= ssl.OP_NO_SSLv2 + if self.cert_file: + context.load_cert_chain(self.cert_file, self.key_file) + kwargs = {} + if self.ca_certs: + context.verify_mode = ssl.CERT_REQUIRED + context.load_verify_locations(cafile=self.ca_certs) + if getattr(ssl, 'HAS_SNI', False): + kwargs['server_hostname'] = self.host + + self.sock = context.wrap_socket(sock, **kwargs) if self.ca_certs and self.check_domain: try: match_hostname(self.sock.getpeercert(), self.host) @@ -1513,25 +1503,6 @@ def http_open(self, req): # # XML-RPC with timeouts # - -_ver_info = sys.version_info[:2] - -if _ver_info == (2, 6): - class HTTP(httplib.HTTP): - def __init__(self, host='', port=None, **kwargs): - if port == 0: # 0 means use port 0, not the default port - port = None - self._setup(self._connection_class(host, port, **kwargs)) - - - if ssl: - class HTTPS(httplib.HTTPS): - def __init__(self, host='', port=None, **kwargs): - if port == 0: # 0 means use port 0, not the default port - port = None - self._setup(self._connection_class(host, port, **kwargs)) - - class Transport(xmlrpclib.Transport): def __init__(self, timeout, use_datetime=0): self.timeout = timeout @@ -1539,14 +1510,10 @@ def __init__(self, timeout, use_datetime=0): def make_connection(self, host): h, eh, x509 = self.get_host_info(host) - if _ver_info == (2, 6): - result = HTTP(h, timeout=self.timeout) - else: - if not self._connection or host != self._connection[0]: - self._extra_headers = eh - self._connection = host, httplib.HTTPConnection(h) - result = self._connection[1] - return result + if not self._connection or host != self._connection[0]: + self._extra_headers = eh + self._connection = host, httplib.HTTPConnection(h) + return self._connection[1] if ssl: class SafeTransport(xmlrpclib.SafeTransport): @@ -1559,15 +1526,11 @@ def make_connection(self, host): if not kwargs: kwargs = {} kwargs['timeout'] = self.timeout - if _ver_info == (2, 6): - result = HTTPS(host, None, **kwargs) - else: - if not self._connection or host != self._connection[0]: - self._extra_headers = eh - self._connection = host, httplib.HTTPSConnection(h, None, - **kwargs) - result = self._connection[1] - return result + if not self._connection or host != self._connection[0]: + self._extra_headers = eh + self._connection = host, httplib.HTTPSConnection(h, None, + **kwargs) + return self._connection[1] class ServerProxy(xmlrpclib.ServerProxy): diff --git a/src/pip/_vendor/distlib/w32.exe b/src/pip/_vendor/distlib/w32.exe index e6439e9e458..4ee2d3a31b5 100644 Binary files a/src/pip/_vendor/distlib/w32.exe and b/src/pip/_vendor/distlib/w32.exe differ diff --git a/src/pip/_vendor/distlib/w64-arm.exe b/src/pip/_vendor/distlib/w64-arm.exe index 70a2ec26852..951d5817c9e 100644 Binary files a/src/pip/_vendor/distlib/w64-arm.exe and b/src/pip/_vendor/distlib/w64-arm.exe differ diff --git a/src/pip/_vendor/distlib/w64.exe b/src/pip/_vendor/distlib/w64.exe index 46139dbf940..5763076d287 100644 Binary files a/src/pip/_vendor/distlib/w64.exe and b/src/pip/_vendor/distlib/w64.exe differ diff --git a/src/pip/_vendor/distlib/wheel.py b/src/pip/_vendor/distlib/wheel.py index 48abfde5b52..028c2d99b57 100644 --- a/src/pip/_vendor/distlib/wheel.py +++ b/src/pip/_vendor/distlib/wheel.py @@ -11,7 +11,6 @@ import datetime from email import message_from_file import hashlib -import imp import json import logging import os @@ -61,10 +60,18 @@ def _derive_abi(): parts = ['cp', VER_SUFFIX] if sysconfig.get_config_var('Py_DEBUG'): parts.append('d') - if sysconfig.get_config_var('WITH_PYMALLOC'): - parts.append('m') - if sysconfig.get_config_var('Py_UNICODE_SIZE') == 4: - parts.append('u') + if IMP_PREFIX == 'cp': + vi = sys.version_info[:2] + if vi < (3, 8): + wpm = sysconfig.get_config_var('WITH_PYMALLOC') + if wpm is None: + wpm = True + if wpm: + parts.append('m') + if vi < (3, 3): + us = sysconfig.get_config_var('Py_UNICODE_SIZE') + if us == 4 or (us is None and sys.maxunicode == 0x10FFFF): + parts.append('u') return ''.join(parts) ABI = _derive_abi() del _derive_abi @@ -95,6 +102,29 @@ def _derive_abi(): else: to_posix = lambda o: o.replace(os.sep, '/') +if sys.version_info[0] < 3: + import imp +else: + imp = None + import importlib.machinery + import importlib.util + +def _get_suffixes(): + if imp: + return [s[0] for s in imp.get_suffixes()] + else: + return importlib.machinery.EXTENSION_SUFFIXES + +def _load_dynamic(name, path): + # https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly + if imp: + return imp.load_dynamic(name, path) + else: + spec = importlib.util.spec_from_file_location(name, path) + module = importlib.util.module_from_spec(spec) + sys.modules[name] = module + spec.loader.exec_module(module) + return module class Mounter(object): def __init__(self): @@ -124,7 +154,7 @@ def load_module(self, fullname): else: if fullname not in self.libs: raise ImportError('unable to find extension for %s' % fullname) - result = imp.load_dynamic(fullname, self.libs[fullname]) + result = _load_dynamic(fullname, self.libs[fullname]) result.__loader__ = self parts = fullname.rsplit('.', 1) if len(parts) > 1: @@ -301,10 +331,9 @@ def get_hash(self, data, hash_kind=None): result = base64.urlsafe_b64encode(result).rstrip(b'=').decode('ascii') return hash_kind, result - def write_record(self, records, record_path, base): + def write_record(self, records, record_path, archive_record_path): records = list(records) # make a copy, as mutated - p = to_posix(os.path.relpath(record_path, base)) - records.append((p, '', '')) + records.append((archive_record_path, '', '')) with CSVWriter(record_path) as writer: for row in records: writer.writerow(row) @@ -321,8 +350,8 @@ def write_records(self, info, libdir, archive_paths): records.append((ap, digest, size)) p = os.path.join(distinfo, 'RECORD') - self.write_record(records, p, libdir) ap = to_posix(os.path.join(info_dir, 'RECORD')) + self.write_record(records, p, ap) archive_paths.append((ap, p)) def build_zip(self, pathname, archive_paths): @@ -965,7 +994,7 @@ def compatible_tags(): versions.append(''.join([major, str(minor)])) abis = [] - for suffix, _, _ in imp.get_suffixes(): + for suffix in _get_suffixes(): if suffix.startswith('.abi'): abis.append(suffix.split('.', 2)[1]) abis.sort() diff --git a/src/pip/_vendor/distro.LICENSE b/src/pip/_vendor/distro/LICENSE similarity index 100% rename from src/pip/_vendor/distro.LICENSE rename to src/pip/_vendor/distro/LICENSE diff --git a/src/pip/_vendor/distro/__init__.py b/src/pip/_vendor/distro/__init__.py new file mode 100644 index 00000000000..7686fe85a7c --- /dev/null +++ b/src/pip/_vendor/distro/__init__.py @@ -0,0 +1,54 @@ +from .distro import ( + NORMALIZED_DISTRO_ID, + NORMALIZED_LSB_ID, + NORMALIZED_OS_ID, + LinuxDistribution, + __version__, + build_number, + codename, + distro_release_attr, + distro_release_info, + id, + info, + like, + linux_distribution, + lsb_release_attr, + lsb_release_info, + major_version, + minor_version, + name, + os_release_attr, + os_release_info, + uname_attr, + uname_info, + version, + version_parts, +) + +__all__ = [ + "NORMALIZED_DISTRO_ID", + "NORMALIZED_LSB_ID", + "NORMALIZED_OS_ID", + "LinuxDistribution", + "build_number", + "codename", + "distro_release_attr", + "distro_release_info", + "id", + "info", + "like", + "linux_distribution", + "lsb_release_attr", + "lsb_release_info", + "major_version", + "minor_version", + "name", + "os_release_attr", + "os_release_info", + "uname_attr", + "uname_info", + "version", + "version_parts", +] + +__version__ = __version__ diff --git a/src/pip/_vendor/distro/__main__.py b/src/pip/_vendor/distro/__main__.py new file mode 100644 index 00000000000..0c01d5b08b6 --- /dev/null +++ b/src/pip/_vendor/distro/__main__.py @@ -0,0 +1,4 @@ +from .distro import main + +if __name__ == "__main__": + main() diff --git a/src/pip/_vendor/distro.py b/src/pip/_vendor/distro/distro.py similarity index 79% rename from src/pip/_vendor/distro.py rename to src/pip/_vendor/distro/distro.py index 7892741347d..89e18680472 100644 --- a/src/pip/_vendor/distro.py +++ b/src/pip/_vendor/distro/distro.py @@ -1,3 +1,4 @@ +#!/usr/bin/env python # Copyright 2015,2016,2017 Nir Cohen # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -36,40 +37,39 @@ import subprocess import sys import warnings +from typing import ( + Any, + Callable, + Dict, + Iterable, + Optional, + Sequence, + TextIO, + Tuple, + Type, +) -__version__ = "1.6.0" - -# Use `if False` to avoid an ImportError on Python 2. After dropping Python 2 -# support, can use typing.TYPE_CHECKING instead. See: -# https://docs.python.org/3/library/typing.html#typing.TYPE_CHECKING -if False: # pragma: nocover - from typing import ( - Any, - Callable, - Dict, - Iterable, - Optional, - Sequence, - TextIO, - Tuple, - Type, - TypedDict, - Union, - ) +try: + from typing import TypedDict +except ImportError: + # Python 3.7 + TypedDict = dict - VersionDict = TypedDict( - "VersionDict", {"major": str, "minor": str, "build_number": str} - ) - InfoDict = TypedDict( - "InfoDict", - { - "id": str, - "version": str, - "version_parts": VersionDict, - "like": str, - "codename": str, - }, - ) +__version__ = "1.8.0" + + +class VersionDict(TypedDict): + major: str + minor: str + build_number: str + + +class InfoDict(TypedDict): + id: str + version: str + version_parts: VersionDict + like: str + codename: str _UNIXCONFDIR = os.environ.get("UNIXCONFDIR", "/etc") @@ -85,6 +85,7 @@ #: * Value: Normalized value. NORMALIZED_OS_ID = { "ol": "oracle", # Oracle Linux + "opensuse-leap": "opensuse", # Newer versions of OpenSuSE report as opensuse-leap } #: Translation table for normalizing the "Distributor ID" attribute returned by @@ -121,6 +122,26 @@ # Pattern for base file name of distro release file _DISTRO_RELEASE_BASENAME_PATTERN = re.compile(r"(\w+)[-_](release|version)$") +# Base file names to be looked up for if _UNIXCONFDIR is not readable. +_DISTRO_RELEASE_BASENAMES = [ + "SuSE-release", + "arch-release", + "base-release", + "centos-release", + "fedora-release", + "gentoo-release", + "mageia-release", + "mandrake-release", + "mandriva-release", + "mandrivalinux-release", + "manjaro-release", + "oracle-release", + "redhat-release", + "rocky-release", + "sl-release", + "slackware-version", +] + # Base file names to be ignored when searching for distro release file _DISTRO_RELEASE_IGNORE_BASENAMES = ( "debian_version", @@ -133,8 +154,7 @@ ) -def linux_distribution(full_distribution_name=True): - # type: (bool) -> Tuple[str, str, str] +def linux_distribution(full_distribution_name: bool = True) -> Tuple[str, str, str]: """ .. deprecated:: 1.6.0 @@ -151,7 +171,8 @@ def linux_distribution(full_distribution_name=True): * ``version``: The result of :func:`distro.version`. - * ``codename``: The result of :func:`distro.codename`. + * ``codename``: The extra item (usually in parentheses) after the + os-release version number, or the result of :func:`distro.codename`. The interface of this function is compatible with the original :py:func:`platform.linux_distribution` function, supporting a subset of @@ -176,8 +197,7 @@ def linux_distribution(full_distribution_name=True): return _distro.linux_distribution(full_distribution_name) -def id(): - # type: () -> str +def id() -> str: """ Return the distro ID of the current distribution, as a machine-readable string. @@ -198,8 +218,9 @@ def id(): "fedora" Fedora "sles" SUSE Linux Enterprise Server "opensuse" openSUSE - "amazon" Amazon Linux + "amzn" Amazon Linux "arch" Arch Linux + "buildroot" Buildroot "cloudlinux" CloudLinux OS "exherbo" Exherbo Linux "gentoo" GenToo Linux @@ -219,6 +240,9 @@ def id(): "netbsd" NetBSD "freebsd" FreeBSD "midnightbsd" MidnightBSD + "rocky" Rocky Linux + "aix" AIX + "guix" Guix System ============== ========================================= If you have a need to get distros for reliable IDs added into this set, @@ -256,8 +280,7 @@ def id(): return _distro.id() -def name(pretty=False): - # type: (bool) -> str +def name(pretty: bool = False) -> str: """ Return the name of the current OS distribution, as a human-readable string. @@ -296,8 +319,7 @@ def name(pretty=False): return _distro.name(pretty) -def version(pretty=False, best=False): - # type: (bool, bool) -> str +def version(pretty: bool = False, best: bool = False) -> str: """ Return the version of the current OS distribution, as a human-readable string. @@ -313,6 +335,10 @@ def version(pretty=False, best=False): sources in a fixed priority order does not always yield the most precise version (e.g. for Debian 8.2, or CentOS 7.1). + Some other distributions may not provide this kind of information. In these + cases, an empty string would be returned. This behavior can be observed + with rolling releases distributions (e.g. Arch Linux). + The *best* parameter can be used to control the approach for the returned version: @@ -341,8 +367,7 @@ def version(pretty=False, best=False): return _distro.version(pretty, best) -def version_parts(best=False): - # type: (bool) -> Tuple[str, str, str] +def version_parts(best: bool = False) -> Tuple[str, str, str]: """ Return the version of the current OS distribution as a tuple ``(major, minor, build_number)`` with items as follows: @@ -359,8 +384,7 @@ def version_parts(best=False): return _distro.version_parts(best) -def major_version(best=False): - # type: (bool) -> str +def major_version(best: bool = False) -> str: """ Return the major version of the current OS distribution, as a string, if provided. @@ -373,8 +397,7 @@ def major_version(best=False): return _distro.major_version(best) -def minor_version(best=False): - # type: (bool) -> str +def minor_version(best: bool = False) -> str: """ Return the minor version of the current OS distribution, as a string, if provided. @@ -387,8 +410,7 @@ def minor_version(best=False): return _distro.minor_version(best) -def build_number(best=False): - # type: (bool) -> str +def build_number(best: bool = False) -> str: """ Return the build number of the current OS distribution, as a string, if provided. @@ -401,8 +423,7 @@ def build_number(best=False): return _distro.build_number(best) -def like(): - # type: () -> str +def like() -> str: """ Return a space-separated list of distro IDs of distributions that are closely related to the current OS distribution in regards to packaging @@ -419,8 +440,7 @@ def like(): return _distro.like() -def codename(): - # type: () -> str +def codename() -> str: """ Return the codename for the release of the current OS distribution, as a string. @@ -444,8 +464,7 @@ def codename(): return _distro.codename() -def info(pretty=False, best=False): - # type: (bool, bool) -> InfoDict +def info(pretty: bool = False, best: bool = False) -> InfoDict: """ Return certain machine-readable information items about the current OS distribution in a dictionary, as shown in the following example: @@ -489,8 +508,7 @@ def info(pretty=False, best=False): return _distro.info(pretty, best) -def os_release_info(): - # type: () -> Dict[str, str] +def os_release_info() -> Dict[str, str]: """ Return a dictionary containing key-value pairs for the information items from the os-release file data source of the current OS distribution. @@ -500,8 +518,7 @@ def os_release_info(): return _distro.os_release_info() -def lsb_release_info(): - # type: () -> Dict[str, str] +def lsb_release_info() -> Dict[str, str]: """ Return a dictionary containing key-value pairs for the information items from the lsb_release command data source of the current OS distribution. @@ -512,8 +529,7 @@ def lsb_release_info(): return _distro.lsb_release_info() -def distro_release_info(): - # type: () -> Dict[str, str] +def distro_release_info() -> Dict[str, str]: """ Return a dictionary containing key-value pairs for the information items from the distro release file data source of the current OS distribution. @@ -523,8 +539,7 @@ def distro_release_info(): return _distro.distro_release_info() -def uname_info(): - # type: () -> Dict[str, str] +def uname_info() -> Dict[str, str]: """ Return a dictionary containing key-value pairs for the information items from the distro release file data source of the current OS distribution. @@ -532,8 +547,7 @@ def uname_info(): return _distro.uname_info() -def os_release_attr(attribute): - # type: (str) -> str +def os_release_attr(attribute: str) -> str: """ Return a single named information item from the os-release file data source of the current OS distribution. @@ -552,8 +566,7 @@ def os_release_attr(attribute): return _distro.os_release_attr(attribute) -def lsb_release_attr(attribute): - # type: (str) -> str +def lsb_release_attr(attribute: str) -> str: """ Return a single named information item from the lsb_release command output data source of the current OS distribution. @@ -573,8 +586,7 @@ def lsb_release_attr(attribute): return _distro.lsb_release_attr(attribute) -def distro_release_attr(attribute): - # type: (str) -> str +def distro_release_attr(attribute: str) -> str: """ Return a single named information item from the distro release file data source of the current OS distribution. @@ -593,8 +605,7 @@ def distro_release_attr(attribute): return _distro.distro_release_attr(attribute) -def uname_attr(attribute): - # type: (str) -> str +def uname_attr(attribute: str) -> str: """ Return a single named information item from the distro release file data source of the current OS distribution. @@ -615,25 +626,23 @@ def uname_attr(attribute): from functools import cached_property except ImportError: # Python < 3.8 - class cached_property(object): # type: ignore + class cached_property: # type: ignore """A version of @property which caches the value. On access, it calls the underlying function and sets the value in `__dict__` so future accesses will not re-call the property. """ - def __init__(self, f): - # type: (Callable[[Any], Any]) -> None + def __init__(self, f: Callable[[Any], Any]) -> None: self._fname = f.__name__ self._f = f - def __get__(self, obj, owner): - # type: (Any, Type[Any]) -> Any - assert obj is not None, "call {} on an instance".format(self._fname) + def __get__(self, obj: Any, owner: Type[Any]) -> Any: + assert obj is not None, f"call {self._fname} on an instance" ret = obj.__dict__[self._fname] = self._f(obj) return ret -class LinuxDistribution(object): +class LinuxDistribution: """ Provides information about a OS distribution. @@ -653,13 +662,13 @@ class LinuxDistribution(object): def __init__( self, - include_lsb=True, - os_release_file="", - distro_release_file="", - include_uname=True, - root_dir=None, - ): - # type: (bool, str, str, bool, Optional[str]) -> None + include_lsb: Optional[bool] = None, + os_release_file: str = "", + distro_release_file: str = "", + include_uname: Optional[bool] = None, + root_dir: Optional[str] = None, + include_oslevel: Optional[bool] = None, + ) -> None: """ The initialization method of this class gathers information from the available data sources, and stores that in private instance attributes. @@ -699,7 +708,13 @@ def __init__( be empty. * ``root_dir`` (string): The absolute path to the root directory to use - to find distro-related information files. + to find distro-related information files. Note that ``include_*`` + parameters must not be enabled in combination with ``root_dir``. + + * ``include_oslevel`` (bool): Controls whether (AIX) oslevel command + output is included as a data source. If the oslevel command is not + available in the program execution path the data source will be + empty. Public instance attributes: @@ -718,14 +733,21 @@ def __init__( parameter. This controls whether the uname information will be loaded. + * ``include_oslevel`` (bool): The result of the ``include_oslevel`` + parameter. This controls whether (AIX) oslevel information will be + loaded. + + * ``root_dir`` (string): The result of the ``root_dir`` parameter. + The absolute path to the root directory to use to find distro-related + information files. + Raises: - * :py:exc:`IOError`: Some I/O issue with an os-release file or distro - release file. + * :py:exc:`ValueError`: Initialization parameters combination is not + supported. - * :py:exc:`subprocess.CalledProcessError`: The lsb_release command had - some issue (other than not being available in the program execution - path). + * :py:exc:`OSError`: Some I/O issue with an os-release file or distro + release file. * :py:exc:`UnicodeError`: A data source has unexpected characters or uses an unexpected encoding. @@ -754,11 +776,24 @@ def __init__( self.os_release_file = usr_lib_os_release_file self.distro_release_file = distro_release_file or "" # updated later - self.include_lsb = include_lsb - self.include_uname = include_uname - def __repr__(self): - # type: () -> str + is_root_dir_defined = root_dir is not None + if is_root_dir_defined and (include_lsb or include_uname or include_oslevel): + raise ValueError( + "Including subprocess data sources from specific root_dir is disallowed" + " to prevent false information" + ) + self.include_lsb = ( + include_lsb if include_lsb is not None else not is_root_dir_defined + ) + self.include_uname = ( + include_uname if include_uname is not None else not is_root_dir_defined + ) + self.include_oslevel = ( + include_oslevel if include_oslevel is not None else not is_root_dir_defined + ) + + def __repr__(self) -> str: """Return repr of all info""" return ( "LinuxDistribution(" @@ -766,14 +801,18 @@ def __repr__(self): "distro_release_file={self.distro_release_file!r}, " "include_lsb={self.include_lsb!r}, " "include_uname={self.include_uname!r}, " + "include_oslevel={self.include_oslevel!r}, " + "root_dir={self.root_dir!r}, " "_os_release_info={self._os_release_info!r}, " "_lsb_release_info={self._lsb_release_info!r}, " "_distro_release_info={self._distro_release_info!r}, " - "_uname_info={self._uname_info!r})".format(self=self) + "_uname_info={self._uname_info!r}, " + "_oslevel_info={self._oslevel_info!r})".format(self=self) ) - def linux_distribution(self, full_distribution_name=True): - # type: (bool) -> Tuple[str, str, str] + def linux_distribution( + self, full_distribution_name: bool = True + ) -> Tuple[str, str, str]: """ Return information about the OS distribution that is compatible with Python's :func:`platform.linux_distribution`, supporting a subset @@ -784,18 +823,16 @@ def linux_distribution(self, full_distribution_name=True): return ( self.name() if full_distribution_name else self.id(), self.version(), - self.codename(), + self._os_release_info.get("release_codename") or self.codename(), ) - def id(self): - # type: () -> str + def id(self) -> str: """Return the distro ID of the OS distribution, as a string. For details, see :func:`distro.id`. """ - def normalize(distro_id, table): - # type: (str, Dict[str, str]) -> str + def normalize(distro_id: str, table: Dict[str, str]) -> str: distro_id = distro_id.lower().replace(" ", "_") return table.get(distro_id, distro_id) @@ -817,8 +854,7 @@ def normalize(distro_id, table): return "" - def name(self, pretty=False): - # type: (bool) -> str + def name(self, pretty: bool = False) -> str: """ Return the name of the OS distribution, as a string. @@ -838,11 +874,10 @@ def name(self, pretty=False): name = self.distro_release_attr("name") or self.uname_attr("name") version = self.version(pretty=True) if version: - name = name + " " + version + name = f"{name} {version}" return name or "" - def version(self, pretty=False, best=False): - # type: (bool, bool) -> str + def version(self, pretty: bool = False, best: bool = False) -> str: """ Return the version of the OS distribution, as a string. @@ -860,6 +895,12 @@ def version(self, pretty=False, best=False): ).get("version_id", ""), self.uname_attr("release"), ] + if self.uname_attr("id").startswith("aix"): + # On AIX platforms, prefer oslevel command output. + versions.insert(0, self.oslevel_info()) + elif self.id() == "debian" or "debian" in self.like().split(): + # On Debian-like, add debian_version file content to candidates list. + versions.append(self._debian_version) version = "" if best: # This algorithm uses the last version in priority order that has @@ -875,11 +916,10 @@ def version(self, pretty=False, best=False): version = v break if pretty and version and self.codename(): - version = "{0} ({1})".format(version, self.codename()) + version = f"{version} ({self.codename()})" return version - def version_parts(self, best=False): - # type: (bool) -> Tuple[str, str, str] + def version_parts(self, best: bool = False) -> Tuple[str, str, str]: """ Return the version of the OS distribution, as a tuple of version numbers. @@ -895,8 +935,7 @@ def version_parts(self, best=False): return major, minor or "", build_number or "" return "", "", "" - def major_version(self, best=False): - # type: (bool) -> str + def major_version(self, best: bool = False) -> str: """ Return the major version number of the current distribution. @@ -904,8 +943,7 @@ def major_version(self, best=False): """ return self.version_parts(best)[0] - def minor_version(self, best=False): - # type: (bool) -> str + def minor_version(self, best: bool = False) -> str: """ Return the minor version number of the current distribution. @@ -913,8 +951,7 @@ def minor_version(self, best=False): """ return self.version_parts(best)[1] - def build_number(self, best=False): - # type: (bool) -> str + def build_number(self, best: bool = False) -> str: """ Return the build number of the current distribution. @@ -922,8 +959,7 @@ def build_number(self, best=False): """ return self.version_parts(best)[2] - def like(self): - # type: () -> str + def like(self) -> str: """ Return the IDs of distributions that are like the OS distribution. @@ -931,8 +967,7 @@ def like(self): """ return self.os_release_attr("id_like") or "" - def codename(self): - # type: () -> str + def codename(self) -> str: """ Return the codename of the OS distribution. @@ -949,8 +984,7 @@ def codename(self): or "" ) - def info(self, pretty=False, best=False): - # type: (bool, bool) -> InfoDict + def info(self, pretty: bool = False, best: bool = False) -> InfoDict: """ Return certain machine-readable information about the OS distribution. @@ -969,8 +1003,7 @@ def info(self, pretty=False, best=False): codename=self.codename(), ) - def os_release_info(self): - # type: () -> Dict[str, str] + def os_release_info(self) -> Dict[str, str]: """ Return a dictionary containing key-value pairs for the information items from the os-release file data source of the OS distribution. @@ -979,8 +1012,7 @@ def os_release_info(self): """ return self._os_release_info - def lsb_release_info(self): - # type: () -> Dict[str, str] + def lsb_release_info(self) -> Dict[str, str]: """ Return a dictionary containing key-value pairs for the information items from the lsb_release command data source of the OS @@ -990,8 +1022,7 @@ def lsb_release_info(self): """ return self._lsb_release_info - def distro_release_info(self): - # type: () -> Dict[str, str] + def distro_release_info(self) -> Dict[str, str]: """ Return a dictionary containing key-value pairs for the information items from the distro release file data source of the OS @@ -1001,8 +1032,7 @@ def distro_release_info(self): """ return self._distro_release_info - def uname_info(self): - # type: () -> Dict[str, str] + def uname_info(self) -> Dict[str, str]: """ Return a dictionary containing key-value pairs for the information items from the uname command data source of the OS distribution. @@ -1011,8 +1041,13 @@ def uname_info(self): """ return self._uname_info - def os_release_attr(self, attribute): - # type: (str) -> str + def oslevel_info(self) -> str: + """ + Return AIX' oslevel command output. + """ + return self._oslevel_info + + def os_release_attr(self, attribute: str) -> str: """ Return a single named information item from the os-release file data source of the OS distribution. @@ -1021,8 +1056,7 @@ def os_release_attr(self, attribute): """ return self._os_release_info.get(attribute, "") - def lsb_release_attr(self, attribute): - # type: (str) -> str + def lsb_release_attr(self, attribute: str) -> str: """ Return a single named information item from the lsb_release command output data source of the OS distribution. @@ -1031,8 +1065,7 @@ def lsb_release_attr(self, attribute): """ return self._lsb_release_info.get(attribute, "") - def distro_release_attr(self, attribute): - # type: (str) -> str + def distro_release_attr(self, attribute: str) -> str: """ Return a single named information item from the distro release file data source of the OS distribution. @@ -1041,8 +1074,7 @@ def distro_release_attr(self, attribute): """ return self._distro_release_info.get(attribute, "") - def uname_attr(self, attribute): - # type: (str) -> str + def uname_attr(self, attribute: str) -> str: """ Return a single named information item from the uname command output data source of the OS distribution. @@ -1052,8 +1084,7 @@ def uname_attr(self, attribute): return self._uname_info.get(attribute, "") @cached_property - def _os_release_info(self): - # type: () -> Dict[str, str] + def _os_release_info(self) -> Dict[str, str]: """ Get the information items from the specified os-release file. @@ -1061,13 +1092,12 @@ def _os_release_info(self): A dictionary containing all information items. """ if os.path.isfile(self.os_release_file): - with open(self.os_release_file) as release_file: + with open(self.os_release_file, encoding="utf-8") as release_file: return self._parse_os_release_content(release_file) return {} @staticmethod - def _parse_os_release_content(lines): - # type: (TextIO) -> Dict[str, str] + def _parse_os_release_content(lines: TextIO) -> Dict[str, str]: """ Parse the lines of an os-release file. @@ -1084,16 +1114,6 @@ def _parse_os_release_content(lines): lexer = shlex.shlex(lines, posix=True) lexer.whitespace_split = True - # The shlex module defines its `wordchars` variable using literals, - # making it dependent on the encoding of the Python source file. - # In Python 2.6 and 2.7, the shlex source file is encoded in - # 'iso-8859-1', and the `wordchars` variable is defined as a byte - # string. This causes a UnicodeDecodeError to be raised when the - # parsed content is a unicode object. The following fix resolves that - # (... but it should be fixed in shlex...): - if sys.version_info[0] == 2 and isinstance(lexer.wordchars, bytes): - lexer.wordchars = lexer.wordchars.decode("iso-8859-1") - tokens = list(lexer) for token in tokens: # At this point, all shell-like parsing has been done (i.e. @@ -1102,12 +1122,17 @@ def _parse_os_release_content(lines): # stripped, etc.), so the tokens are now either: # * variable assignments: var=value # * commands or their arguments (not allowed in os-release) + # Ignore any tokens that are not variable assignments if "=" in token: k, v = token.split("=", 1) props[k.lower()] = v - else: - # Ignore any tokens that are not variable assignments - pass + + if "version" in props: + # extract release codename (if any) from version attribute + match = re.search(r"\((\D+)\)|,\s*(\D+)", props["version"]) + if match: + release_codename = match.group(1) or match.group(2) + props["codename"] = props["release_codename"] = release_codename if "version_codename" in props: # os-release added a version_codename field. Use that in @@ -1118,22 +1143,11 @@ def _parse_os_release_content(lines): elif "ubuntu_codename" in props: # Same as above but a non-standard field name used on older Ubuntus props["codename"] = props["ubuntu_codename"] - elif "version" in props: - # If there is no version_codename, parse it from the version - match = re.search(r"(\(\D+\))|,(\s+)?\D+", props["version"]) - if match: - codename = match.group() - codename = codename.strip("()") - codename = codename.strip(",") - codename = codename.strip() - # codename appears within paranthese. - props["codename"] = codename return props @cached_property - def _lsb_release_info(self): - # type: () -> Dict[str, str] + def _lsb_release_info(self) -> Dict[str, str]: """ Get the information items from the lsb_release command output. @@ -1142,19 +1156,17 @@ def _lsb_release_info(self): """ if not self.include_lsb: return {} - with open(os.devnull, "wb") as devnull: - try: - cmd = ("lsb_release", "-a") - stdout = subprocess.check_output(cmd, stderr=devnull) - # Command not found or lsb_release returned error - except (OSError, subprocess.CalledProcessError): - return {} + try: + cmd = ("lsb_release", "-a") + stdout = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) + # Command not found or lsb_release returned error + except (OSError, subprocess.CalledProcessError): + return {} content = self._to_str(stdout).splitlines() return self._parse_lsb_release_content(content) @staticmethod - def _parse_lsb_release_content(lines): - # type: (Iterable[str]) -> Dict[str, str] + def _parse_lsb_release_content(lines: Iterable[str]) -> Dict[str, str]: """ Parse the output of the lsb_release command. @@ -1178,20 +1190,41 @@ def _parse_lsb_release_content(lines): return props @cached_property - def _uname_info(self): - # type: () -> Dict[str, str] - with open(os.devnull, "wb") as devnull: - try: - cmd = ("uname", "-rs") - stdout = subprocess.check_output(cmd, stderr=devnull) - except OSError: - return {} + def _uname_info(self) -> Dict[str, str]: + if not self.include_uname: + return {} + try: + cmd = ("uname", "-rs") + stdout = subprocess.check_output(cmd, stderr=subprocess.DEVNULL) + except OSError: + return {} content = self._to_str(stdout).splitlines() return self._parse_uname_content(content) + @cached_property + def _oslevel_info(self) -> str: + if not self.include_oslevel: + return "" + try: + stdout = subprocess.check_output("oslevel", stderr=subprocess.DEVNULL) + except (OSError, subprocess.CalledProcessError): + return "" + return self._to_str(stdout).strip() + + @cached_property + def _debian_version(self) -> str: + try: + with open( + os.path.join(self.etc_dir, "debian_version"), encoding="ascii" + ) as fp: + return fp.readline().rstrip() + except FileNotFoundError: + return "" + @staticmethod - def _parse_uname_content(lines): - # type: (Sequence[str]) -> Dict[str, str] + def _parse_uname_content(lines: Sequence[str]) -> Dict[str, str]: + if not lines: + return {} props = {} match = re.search(r"^([^\s]+)\s+([\d\.]+)", lines[0].strip()) if match: @@ -1208,23 +1241,12 @@ def _parse_uname_content(lines): return props @staticmethod - def _to_str(text): - # type: (Union[bytes, str]) -> str + def _to_str(bytestring: bytes) -> str: encoding = sys.getfilesystemencoding() - encoding = "utf-8" if encoding == "ascii" else encoding - - if sys.version_info[0] >= 3: - if isinstance(text, bytes): - return text.decode(encoding) - else: - if isinstance(text, unicode): # noqa - return text.encode(encoding) - - return text + return bytestring.decode(encoding) @cached_property - def _distro_release_info(self): - # type: () -> Dict[str, str] + def _distro_release_info(self) -> Dict[str, str]: """ Get the information items from the specified distro release file. @@ -1241,14 +1263,14 @@ def _distro_release_info(self): # file), because we want to use what was specified as best as # possible. match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename) - if "name" in distro_info and "cloudlinux" in distro_info["name"].lower(): - distro_info["id"] = "cloudlinux" - elif match: - distro_info["id"] = match.group(1) - return distro_info else: try: - basenames = os.listdir(self.etc_dir) + basenames = [ + basename + for basename in os.listdir(self.etc_dir) + if basename not in _DISTRO_RELEASE_IGNORE_BASENAMES + and os.path.isfile(os.path.join(self.etc_dir, basename)) + ] # We sort for repeatability in cases where there are multiple # distro specific files; e.g. CentOS, Oracle, Enterprise all # containing `redhat-release` on top of their own. @@ -1258,41 +1280,31 @@ def _distro_release_info(self): # sure about the *-release files. Check common entries of # /etc for information. If they turn out to not be there the # error is handled in `_parse_distro_release_file()`. - basenames = [ - "SuSE-release", - "arch-release", - "base-release", - "centos-release", - "fedora-release", - "gentoo-release", - "mageia-release", - "mandrake-release", - "mandriva-release", - "mandrivalinux-release", - "manjaro-release", - "oracle-release", - "redhat-release", - "sl-release", - "slackware-version", - ] + basenames = _DISTRO_RELEASE_BASENAMES for basename in basenames: - if basename in _DISTRO_RELEASE_IGNORE_BASENAMES: - continue match = _DISTRO_RELEASE_BASENAME_PATTERN.match(basename) - if match: - filepath = os.path.join(self.etc_dir, basename) - distro_info = self._parse_distro_release_file(filepath) - if "name" in distro_info: - # The name is always present if the pattern matches - self.distro_release_file = filepath - distro_info["id"] = match.group(1) - if "cloudlinux" in distro_info["name"].lower(): - distro_info["id"] = "cloudlinux" - return distro_info - return {} + if match is None: + continue + filepath = os.path.join(self.etc_dir, basename) + distro_info = self._parse_distro_release_file(filepath) + # The name is always present if the pattern matches. + if "name" not in distro_info: + continue + self.distro_release_file = filepath + break + else: # the loop didn't "break": no candidate. + return {} + + if match is not None: + distro_info["id"] = match.group(1) + + # CloudLinux < 7: manually enrich info with proper id. + if "cloudlinux" in distro_info.get("name", "").lower(): + distro_info["id"] = "cloudlinux" + + return distro_info - def _parse_distro_release_file(self, filepath): - # type: (str) -> Dict[str, str] + def _parse_distro_release_file(self, filepath: str) -> Dict[str, str]: """ Parse a distro release file. @@ -1304,19 +1316,18 @@ def _parse_distro_release_file(self, filepath): A dictionary containing all information items. """ try: - with open(filepath) as fp: + with open(filepath, encoding="utf-8") as fp: # Only parse the first line. For instance, on SLES there # are multiple lines. We don't want them... return self._parse_distro_release_content(fp.readline()) - except (OSError, IOError): + except OSError: # Ignore not being able to read a specific, seemingly version # related file. # See https://github.com/python-distro/distro/issues/162 return {} @staticmethod - def _parse_distro_release_content(line): - # type: (str) -> Dict[str, str] + def _parse_distro_release_content(line: str) -> Dict[str, str]: """ Parse a line from a distro release file. @@ -1344,8 +1355,7 @@ def _parse_distro_release_content(line): _distro = LinuxDistribution() -def main(): - # type: () -> None +def main() -> None: logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) logger.addHandler(logging.StreamHandler(sys.stdout)) @@ -1367,7 +1377,10 @@ def main(): if args.root_dir: dist = LinuxDistribution( - include_lsb=False, include_uname=False, root_dir=args.root_dir + include_lsb=False, + include_uname=False, + include_oslevel=False, + root_dir=args.root_dir, ) else: dist = _distro diff --git a/src/pip/_vendor/html5lib/filters/__init__.py b/src/pip/_vendor/distro/py.typed similarity index 100% rename from src/pip/_vendor/html5lib/filters/__init__.py rename to src/pip/_vendor/distro/py.typed diff --git a/src/pip/_vendor/html5lib.pyi b/src/pip/_vendor/html5lib.pyi deleted file mode 100644 index 9bc9af95e3e..00000000000 --- a/src/pip/_vendor/html5lib.pyi +++ /dev/null @@ -1 +0,0 @@ -from html5lib import * \ No newline at end of file diff --git a/src/pip/_vendor/html5lib/LICENSE b/src/pip/_vendor/html5lib/LICENSE deleted file mode 100644 index c87fa7a0006..00000000000 --- a/src/pip/_vendor/html5lib/LICENSE +++ /dev/null @@ -1,20 +0,0 @@ -Copyright (c) 2006-2013 James Graham and other contributors - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/src/pip/_vendor/html5lib/__init__.py b/src/pip/_vendor/html5lib/__init__.py deleted file mode 100644 index d1d82f157f8..00000000000 --- a/src/pip/_vendor/html5lib/__init__.py +++ /dev/null @@ -1,35 +0,0 @@ -""" -HTML parsing library based on the `WHATWG HTML specification -`_. The parser is designed to be compatible with -existing HTML found in the wild and implements well-defined error recovery that -is largely compatible with modern desktop web browsers. - -Example usage:: - - from pip._vendor import html5lib - with open("my_document.html", "rb") as f: - tree = html5lib.parse(f) - -For convenience, this module re-exports the following names: - -* :func:`~.html5parser.parse` -* :func:`~.html5parser.parseFragment` -* :class:`~.html5parser.HTMLParser` -* :func:`~.treebuilders.getTreeBuilder` -* :func:`~.treewalkers.getTreeWalker` -* :func:`~.serializer.serialize` -""" - -from __future__ import absolute_import, division, unicode_literals - -from .html5parser import HTMLParser, parse, parseFragment -from .treebuilders import getTreeBuilder -from .treewalkers import getTreeWalker -from .serializer import serialize - -__all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder", - "getTreeWalker", "serialize"] - -# this has to be at the top level, see how setup.py parses this -#: Distribution version number. -__version__ = "1.1" diff --git a/src/pip/_vendor/html5lib/_ihatexml.py b/src/pip/_vendor/html5lib/_ihatexml.py deleted file mode 100644 index 3ff803c1952..00000000000 --- a/src/pip/_vendor/html5lib/_ihatexml.py +++ /dev/null @@ -1,289 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import re -import warnings - -from .constants import DataLossWarning - -baseChar = """ -[#x0041-#x005A] | [#x0061-#x007A] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | -[#x00F8-#x00FF] | [#x0100-#x0131] | [#x0134-#x013E] | [#x0141-#x0148] | -[#x014A-#x017E] | [#x0180-#x01C3] | [#x01CD-#x01F0] | [#x01F4-#x01F5] | -[#x01FA-#x0217] | [#x0250-#x02A8] | [#x02BB-#x02C1] | #x0386 | -[#x0388-#x038A] | #x038C | [#x038E-#x03A1] | [#x03A3-#x03CE] | -[#x03D0-#x03D6] | #x03DA | #x03DC | #x03DE | #x03E0 | [#x03E2-#x03F3] | -[#x0401-#x040C] | [#x040E-#x044F] | [#x0451-#x045C] | [#x045E-#x0481] | -[#x0490-#x04C4] | [#x04C7-#x04C8] | [#x04CB-#x04CC] | [#x04D0-#x04EB] | -[#x04EE-#x04F5] | [#x04F8-#x04F9] | [#x0531-#x0556] | #x0559 | -[#x0561-#x0586] | [#x05D0-#x05EA] | [#x05F0-#x05F2] | [#x0621-#x063A] | -[#x0641-#x064A] | [#x0671-#x06B7] | [#x06BA-#x06BE] | [#x06C0-#x06CE] | -[#x06D0-#x06D3] | #x06D5 | [#x06E5-#x06E6] | [#x0905-#x0939] | #x093D | -[#x0958-#x0961] | [#x0985-#x098C] | [#x098F-#x0990] | [#x0993-#x09A8] | -[#x09AA-#x09B0] | #x09B2 | [#x09B6-#x09B9] | [#x09DC-#x09DD] | -[#x09DF-#x09E1] | [#x09F0-#x09F1] | [#x0A05-#x0A0A] | [#x0A0F-#x0A10] | -[#x0A13-#x0A28] | [#x0A2A-#x0A30] | [#x0A32-#x0A33] | [#x0A35-#x0A36] | -[#x0A38-#x0A39] | [#x0A59-#x0A5C] | #x0A5E | [#x0A72-#x0A74] | -[#x0A85-#x0A8B] | #x0A8D | [#x0A8F-#x0A91] | [#x0A93-#x0AA8] | -[#x0AAA-#x0AB0] | [#x0AB2-#x0AB3] | [#x0AB5-#x0AB9] | #x0ABD | #x0AE0 | -[#x0B05-#x0B0C] | [#x0B0F-#x0B10] | [#x0B13-#x0B28] | [#x0B2A-#x0B30] | -[#x0B32-#x0B33] | [#x0B36-#x0B39] | #x0B3D | [#x0B5C-#x0B5D] | -[#x0B5F-#x0B61] | [#x0B85-#x0B8A] | [#x0B8E-#x0B90] | [#x0B92-#x0B95] | -[#x0B99-#x0B9A] | #x0B9C | [#x0B9E-#x0B9F] | [#x0BA3-#x0BA4] | -[#x0BA8-#x0BAA] | [#x0BAE-#x0BB5] | [#x0BB7-#x0BB9] | [#x0C05-#x0C0C] | -[#x0C0E-#x0C10] | [#x0C12-#x0C28] | [#x0C2A-#x0C33] | [#x0C35-#x0C39] | -[#x0C60-#x0C61] | [#x0C85-#x0C8C] | [#x0C8E-#x0C90] | [#x0C92-#x0CA8] | -[#x0CAA-#x0CB3] | [#x0CB5-#x0CB9] | #x0CDE | [#x0CE0-#x0CE1] | -[#x0D05-#x0D0C] | [#x0D0E-#x0D10] | [#x0D12-#x0D28] | [#x0D2A-#x0D39] | -[#x0D60-#x0D61] | [#x0E01-#x0E2E] | #x0E30 | [#x0E32-#x0E33] | -[#x0E40-#x0E45] | [#x0E81-#x0E82] | #x0E84 | [#x0E87-#x0E88] | #x0E8A | -#x0E8D | [#x0E94-#x0E97] | [#x0E99-#x0E9F] | [#x0EA1-#x0EA3] | #x0EA5 | -#x0EA7 | [#x0EAA-#x0EAB] | [#x0EAD-#x0EAE] | #x0EB0 | [#x0EB2-#x0EB3] | -#x0EBD | [#x0EC0-#x0EC4] | [#x0F40-#x0F47] | [#x0F49-#x0F69] | -[#x10A0-#x10C5] | [#x10D0-#x10F6] | #x1100 | [#x1102-#x1103] | -[#x1105-#x1107] | #x1109 | [#x110B-#x110C] | [#x110E-#x1112] | #x113C | -#x113E | #x1140 | #x114C | #x114E | #x1150 | [#x1154-#x1155] | #x1159 | -[#x115F-#x1161] | #x1163 | #x1165 | #x1167 | #x1169 | [#x116D-#x116E] | -[#x1172-#x1173] | #x1175 | #x119E | #x11A8 | #x11AB | [#x11AE-#x11AF] | -[#x11B7-#x11B8] | #x11BA | [#x11BC-#x11C2] | #x11EB | #x11F0 | #x11F9 | -[#x1E00-#x1E9B] | [#x1EA0-#x1EF9] | [#x1F00-#x1F15] | [#x1F18-#x1F1D] | -[#x1F20-#x1F45] | [#x1F48-#x1F4D] | [#x1F50-#x1F57] | #x1F59 | #x1F5B | -#x1F5D | [#x1F5F-#x1F7D] | [#x1F80-#x1FB4] | [#x1FB6-#x1FBC] | #x1FBE | -[#x1FC2-#x1FC4] | [#x1FC6-#x1FCC] | [#x1FD0-#x1FD3] | [#x1FD6-#x1FDB] | -[#x1FE0-#x1FEC] | [#x1FF2-#x1FF4] | [#x1FF6-#x1FFC] | #x2126 | -[#x212A-#x212B] | #x212E | [#x2180-#x2182] | [#x3041-#x3094] | -[#x30A1-#x30FA] | [#x3105-#x312C] | [#xAC00-#xD7A3]""" - -ideographic = """[#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]""" - -combiningCharacter = """ -[#x0300-#x0345] | [#x0360-#x0361] | [#x0483-#x0486] | [#x0591-#x05A1] | -[#x05A3-#x05B9] | [#x05BB-#x05BD] | #x05BF | [#x05C1-#x05C2] | #x05C4 | -[#x064B-#x0652] | #x0670 | [#x06D6-#x06DC] | [#x06DD-#x06DF] | -[#x06E0-#x06E4] | [#x06E7-#x06E8] | [#x06EA-#x06ED] | [#x0901-#x0903] | -#x093C | [#x093E-#x094C] | #x094D | [#x0951-#x0954] | [#x0962-#x0963] | -[#x0981-#x0983] | #x09BC | #x09BE | #x09BF | [#x09C0-#x09C4] | -[#x09C7-#x09C8] | [#x09CB-#x09CD] | #x09D7 | [#x09E2-#x09E3] | #x0A02 | -#x0A3C | #x0A3E | #x0A3F | [#x0A40-#x0A42] | [#x0A47-#x0A48] | -[#x0A4B-#x0A4D] | [#x0A70-#x0A71] | [#x0A81-#x0A83] | #x0ABC | -[#x0ABE-#x0AC5] | [#x0AC7-#x0AC9] | [#x0ACB-#x0ACD] | [#x0B01-#x0B03] | -#x0B3C | [#x0B3E-#x0B43] | [#x0B47-#x0B48] | [#x0B4B-#x0B4D] | -[#x0B56-#x0B57] | [#x0B82-#x0B83] | [#x0BBE-#x0BC2] | [#x0BC6-#x0BC8] | -[#x0BCA-#x0BCD] | #x0BD7 | [#x0C01-#x0C03] | [#x0C3E-#x0C44] | -[#x0C46-#x0C48] | [#x0C4A-#x0C4D] | [#x0C55-#x0C56] | [#x0C82-#x0C83] | -[#x0CBE-#x0CC4] | [#x0CC6-#x0CC8] | [#x0CCA-#x0CCD] | [#x0CD5-#x0CD6] | -[#x0D02-#x0D03] | [#x0D3E-#x0D43] | [#x0D46-#x0D48] | [#x0D4A-#x0D4D] | -#x0D57 | #x0E31 | [#x0E34-#x0E3A] | [#x0E47-#x0E4E] | #x0EB1 | -[#x0EB4-#x0EB9] | [#x0EBB-#x0EBC] | [#x0EC8-#x0ECD] | [#x0F18-#x0F19] | -#x0F35 | #x0F37 | #x0F39 | #x0F3E | #x0F3F | [#x0F71-#x0F84] | -[#x0F86-#x0F8B] | [#x0F90-#x0F95] | #x0F97 | [#x0F99-#x0FAD] | -[#x0FB1-#x0FB7] | #x0FB9 | [#x20D0-#x20DC] | #x20E1 | [#x302A-#x302F] | -#x3099 | #x309A""" - -digit = """ -[#x0030-#x0039] | [#x0660-#x0669] | [#x06F0-#x06F9] | [#x0966-#x096F] | -[#x09E6-#x09EF] | [#x0A66-#x0A6F] | [#x0AE6-#x0AEF] | [#x0B66-#x0B6F] | -[#x0BE7-#x0BEF] | [#x0C66-#x0C6F] | [#x0CE6-#x0CEF] | [#x0D66-#x0D6F] | -[#x0E50-#x0E59] | [#x0ED0-#x0ED9] | [#x0F20-#x0F29]""" - -extender = """ -#x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | #x0E46 | #x0EC6 | #x3005 | -#[#x3031-#x3035] | [#x309D-#x309E] | [#x30FC-#x30FE]""" - -letter = " | ".join([baseChar, ideographic]) - -# Without the -name = " | ".join([letter, digit, ".", "-", "_", combiningCharacter, - extender]) -nameFirst = " | ".join([letter, "_"]) - -reChar = re.compile(r"#x([\d|A-F]{4,4})") -reCharRange = re.compile(r"\[#x([\d|A-F]{4,4})-#x([\d|A-F]{4,4})\]") - - -def charStringToList(chars): - charRanges = [item.strip() for item in chars.split(" | ")] - rv = [] - for item in charRanges: - foundMatch = False - for regexp in (reChar, reCharRange): - match = regexp.match(item) - if match is not None: - rv.append([hexToInt(item) for item in match.groups()]) - if len(rv[-1]) == 1: - rv[-1] = rv[-1] * 2 - foundMatch = True - break - if not foundMatch: - assert len(item) == 1 - - rv.append([ord(item)] * 2) - rv = normaliseCharList(rv) - return rv - - -def normaliseCharList(charList): - charList = sorted(charList) - for item in charList: - assert item[1] >= item[0] - rv = [] - i = 0 - while i < len(charList): - j = 1 - rv.append(charList[i]) - while i + j < len(charList) and charList[i + j][0] <= rv[-1][1] + 1: - rv[-1][1] = charList[i + j][1] - j += 1 - i += j - return rv - - -# We don't really support characters above the BMP :( -max_unicode = int("FFFF", 16) - - -def missingRanges(charList): - rv = [] - if charList[0] != 0: - rv.append([0, charList[0][0] - 1]) - for i, item in enumerate(charList[:-1]): - rv.append([item[1] + 1, charList[i + 1][0] - 1]) - if charList[-1][1] != max_unicode: - rv.append([charList[-1][1] + 1, max_unicode]) - return rv - - -def listToRegexpStr(charList): - rv = [] - for item in charList: - if item[0] == item[1]: - rv.append(escapeRegexp(chr(item[0]))) - else: - rv.append(escapeRegexp(chr(item[0])) + "-" + - escapeRegexp(chr(item[1]))) - return "[%s]" % "".join(rv) - - -def hexToInt(hex_str): - return int(hex_str, 16) - - -def escapeRegexp(string): - specialCharacters = (".", "^", "$", "*", "+", "?", "{", "}", - "[", "]", "|", "(", ")", "-") - for char in specialCharacters: - string = string.replace(char, "\\" + char) - - return string - -# output from the above -nonXmlNameBMPRegexp = re.compile('[\x00-,/:-@\\[-\\^`\\{-\xb6\xb8-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u02cf\u02d2-\u02ff\u0346-\u035f\u0362-\u0385\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482\u0487-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u0590\u05a2\u05ba\u05be\u05c0\u05c3\u05c5-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u063f\u0653-\u065f\u066a-\u066f\u06b8-\u06b9\u06bf\u06cf\u06d4\u06e9\u06ee-\u06ef\u06fa-\u0900\u0904\u093a-\u093b\u094e-\u0950\u0955-\u0957\u0964-\u0965\u0970-\u0980\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09bb\u09bd\u09c5-\u09c6\u09c9-\u09ca\u09ce-\u09d6\u09d8-\u09db\u09de\u09e4-\u09e5\u09f2-\u0a01\u0a03-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a3b\u0a3d\u0a43-\u0a46\u0a49-\u0a4a\u0a4e-\u0a58\u0a5d\u0a5f-\u0a65\u0a75-\u0a80\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abb\u0ac6\u0aca\u0ace-\u0adf\u0ae1-\u0ae5\u0af0-\u0b00\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3b\u0b44-\u0b46\u0b49-\u0b4a\u0b4e-\u0b55\u0b58-\u0b5b\u0b5e\u0b62-\u0b65\u0b70-\u0b81\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0bbd\u0bc3-\u0bc5\u0bc9\u0bce-\u0bd6\u0bd8-\u0be6\u0bf0-\u0c00\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c3d\u0c45\u0c49\u0c4e-\u0c54\u0c57-\u0c5f\u0c62-\u0c65\u0c70-\u0c81\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cbd\u0cc5\u0cc9\u0cce-\u0cd4\u0cd7-\u0cdd\u0cdf\u0ce2-\u0ce5\u0cf0-\u0d01\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d3d\u0d44-\u0d45\u0d49\u0d4e-\u0d56\u0d58-\u0d5f\u0d62-\u0d65\u0d70-\u0e00\u0e2f\u0e3b-\u0e3f\u0e4f\u0e5a-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eba\u0ebe-\u0ebf\u0ec5\u0ec7\u0ece-\u0ecf\u0eda-\u0f17\u0f1a-\u0f1f\u0f2a-\u0f34\u0f36\u0f38\u0f3a-\u0f3d\u0f48\u0f6a-\u0f70\u0f85\u0f8c-\u0f8f\u0f96\u0f98\u0fae-\u0fb0\u0fb8\u0fba-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u20cf\u20dd-\u20e0\u20e2-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3004\u3006\u3008-\u3020\u3030\u3036-\u3040\u3095-\u3098\u309b-\u309c\u309f-\u30a0\u30fb\u30ff-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa - -nonXmlNameFirstBMPRegexp = re.compile('[\x00-@\\[-\\^`\\{-\xbf\xd7\xf7\u0132-\u0133\u013f-\u0140\u0149\u017f\u01c4-\u01cc\u01f1-\u01f3\u01f6-\u01f9\u0218-\u024f\u02a9-\u02ba\u02c2-\u0385\u0387\u038b\u038d\u03a2\u03cf\u03d7-\u03d9\u03db\u03dd\u03df\u03e1\u03f4-\u0400\u040d\u0450\u045d\u0482-\u048f\u04c5-\u04c6\u04c9-\u04ca\u04cd-\u04cf\u04ec-\u04ed\u04f6-\u04f7\u04fa-\u0530\u0557-\u0558\u055a-\u0560\u0587-\u05cf\u05eb-\u05ef\u05f3-\u0620\u063b-\u0640\u064b-\u0670\u06b8-\u06b9\u06bf\u06cf\u06d4\u06d6-\u06e4\u06e7-\u0904\u093a-\u093c\u093e-\u0957\u0962-\u0984\u098d-\u098e\u0991-\u0992\u09a9\u09b1\u09b3-\u09b5\u09ba-\u09db\u09de\u09e2-\u09ef\u09f2-\u0a04\u0a0b-\u0a0e\u0a11-\u0a12\u0a29\u0a31\u0a34\u0a37\u0a3a-\u0a58\u0a5d\u0a5f-\u0a71\u0a75-\u0a84\u0a8c\u0a8e\u0a92\u0aa9\u0ab1\u0ab4\u0aba-\u0abc\u0abe-\u0adf\u0ae1-\u0b04\u0b0d-\u0b0e\u0b11-\u0b12\u0b29\u0b31\u0b34-\u0b35\u0b3a-\u0b3c\u0b3e-\u0b5b\u0b5e\u0b62-\u0b84\u0b8b-\u0b8d\u0b91\u0b96-\u0b98\u0b9b\u0b9d\u0ba0-\u0ba2\u0ba5-\u0ba7\u0bab-\u0bad\u0bb6\u0bba-\u0c04\u0c0d\u0c11\u0c29\u0c34\u0c3a-\u0c5f\u0c62-\u0c84\u0c8d\u0c91\u0ca9\u0cb4\u0cba-\u0cdd\u0cdf\u0ce2-\u0d04\u0d0d\u0d11\u0d29\u0d3a-\u0d5f\u0d62-\u0e00\u0e2f\u0e31\u0e34-\u0e3f\u0e46-\u0e80\u0e83\u0e85-\u0e86\u0e89\u0e8b-\u0e8c\u0e8e-\u0e93\u0e98\u0ea0\u0ea4\u0ea6\u0ea8-\u0ea9\u0eac\u0eaf\u0eb1\u0eb4-\u0ebc\u0ebe-\u0ebf\u0ec5-\u0f3f\u0f48\u0f6a-\u109f\u10c6-\u10cf\u10f7-\u10ff\u1101\u1104\u1108\u110a\u110d\u1113-\u113b\u113d\u113f\u1141-\u114b\u114d\u114f\u1151-\u1153\u1156-\u1158\u115a-\u115e\u1162\u1164\u1166\u1168\u116a-\u116c\u116f-\u1171\u1174\u1176-\u119d\u119f-\u11a7\u11a9-\u11aa\u11ac-\u11ad\u11b0-\u11b6\u11b9\u11bb\u11c3-\u11ea\u11ec-\u11ef\u11f1-\u11f8\u11fa-\u1dff\u1e9c-\u1e9f\u1efa-\u1eff\u1f16-\u1f17\u1f1e-\u1f1f\u1f46-\u1f47\u1f4e-\u1f4f\u1f58\u1f5a\u1f5c\u1f5e\u1f7e-\u1f7f\u1fb5\u1fbd\u1fbf-\u1fc1\u1fc5\u1fcd-\u1fcf\u1fd4-\u1fd5\u1fdc-\u1fdf\u1fed-\u1ff1\u1ff5\u1ffd-\u2125\u2127-\u2129\u212c-\u212d\u212f-\u217f\u2183-\u3006\u3008-\u3020\u302a-\u3040\u3095-\u30a0\u30fb-\u3104\u312d-\u4dff\u9fa6-\uabff\ud7a4-\uffff]') # noqa - -# Simpler things -nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") - - -class InfosetFilter(object): - replacementRegexp = re.compile(r"U[\dA-F]{5,5}") - - def __init__(self, - dropXmlnsLocalName=False, - dropXmlnsAttrNs=False, - preventDoubleDashComments=False, - preventDashAtCommentEnd=False, - replaceFormFeedCharacters=True, - preventSingleQuotePubid=False): - - self.dropXmlnsLocalName = dropXmlnsLocalName - self.dropXmlnsAttrNs = dropXmlnsAttrNs - - self.preventDoubleDashComments = preventDoubleDashComments - self.preventDashAtCommentEnd = preventDashAtCommentEnd - - self.replaceFormFeedCharacters = replaceFormFeedCharacters - - self.preventSingleQuotePubid = preventSingleQuotePubid - - self.replaceCache = {} - - def coerceAttribute(self, name, namespace=None): - if self.dropXmlnsLocalName and name.startswith("xmlns:"): - warnings.warn("Attributes cannot begin with xmlns", DataLossWarning) - return None - elif (self.dropXmlnsAttrNs and - namespace == "http://www.w3.org/2000/xmlns/"): - warnings.warn("Attributes cannot be in the xml namespace", DataLossWarning) - return None - else: - return self.toXmlName(name) - - def coerceElement(self, name): - return self.toXmlName(name) - - def coerceComment(self, data): - if self.preventDoubleDashComments: - while "--" in data: - warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning) - data = data.replace("--", "- -") - if data.endswith("-"): - warnings.warn("Comments cannot end in a dash", DataLossWarning) - data += " " - return data - - def coerceCharacters(self, data): - if self.replaceFormFeedCharacters: - for _ in range(data.count("\x0C")): - warnings.warn("Text cannot contain U+000C", DataLossWarning) - data = data.replace("\x0C", " ") - # Other non-xml characters - return data - - def coercePubid(self, data): - dataOutput = data - for char in nonPubidCharRegexp.findall(data): - warnings.warn("Coercing non-XML pubid", DataLossWarning) - replacement = self.getReplacementCharacter(char) - dataOutput = dataOutput.replace(char, replacement) - if self.preventSingleQuotePubid and dataOutput.find("'") >= 0: - warnings.warn("Pubid cannot contain single quote", DataLossWarning) - dataOutput = dataOutput.replace("'", self.getReplacementCharacter("'")) - return dataOutput - - def toXmlName(self, name): - nameFirst = name[0] - nameRest = name[1:] - m = nonXmlNameFirstBMPRegexp.match(nameFirst) - if m: - warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning) - nameFirstOutput = self.getReplacementCharacter(nameFirst) - else: - nameFirstOutput = nameFirst - - nameRestOutput = nameRest - replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest)) - for char in replaceChars: - warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning) - replacement = self.getReplacementCharacter(char) - nameRestOutput = nameRestOutput.replace(char, replacement) - return nameFirstOutput + nameRestOutput - - def getReplacementCharacter(self, char): - if char in self.replaceCache: - replacement = self.replaceCache[char] - else: - replacement = self.escapeChar(char) - return replacement - - def fromXmlName(self, name): - for item in set(self.replacementRegexp.findall(name)): - name = name.replace(item, self.unescapeChar(item)) - return name - - def escapeChar(self, char): - replacement = "U%05X" % ord(char) - self.replaceCache[char] = replacement - return replacement - - def unescapeChar(self, charcode): - return chr(int(charcode[1:], 16)) diff --git a/src/pip/_vendor/html5lib/_inputstream.py b/src/pip/_vendor/html5lib/_inputstream.py deleted file mode 100644 index e0bb37602c8..00000000000 --- a/src/pip/_vendor/html5lib/_inputstream.py +++ /dev/null @@ -1,918 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from pip._vendor.six import text_type -from pip._vendor.six.moves import http_client, urllib - -import codecs -import re -from io import BytesIO, StringIO - -from pip._vendor import webencodings - -from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase -from .constants import _ReparseException -from . import _utils - -# Non-unicode versions of constants for use in the pre-parser -spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) -asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) -asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) -spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) - - -invalid_unicode_no_surrogate = "[\u0001-\u0008\u000B\u000E-\u001F\u007F-\u009F\uFDD0-\uFDEF\uFFFE\uFFFF\U0001FFFE\U0001FFFF\U0002FFFE\U0002FFFF\U0003FFFE\U0003FFFF\U0004FFFE\U0004FFFF\U0005FFFE\U0005FFFF\U0006FFFE\U0006FFFF\U0007FFFE\U0007FFFF\U0008FFFE\U0008FFFF\U0009FFFE\U0009FFFF\U000AFFFE\U000AFFFF\U000BFFFE\U000BFFFF\U000CFFFE\U000CFFFF\U000DFFFE\U000DFFFF\U000EFFFE\U000EFFFF\U000FFFFE\U000FFFFF\U0010FFFE\U0010FFFF]" # noqa - -if _utils.supports_lone_surrogates: - # Use one extra step of indirection and create surrogates with - # eval. Not using this indirection would introduce an illegal - # unicode literal on platforms not supporting such lone - # surrogates. - assert invalid_unicode_no_surrogate[-1] == "]" and invalid_unicode_no_surrogate.count("]") == 1 - invalid_unicode_re = re.compile(invalid_unicode_no_surrogate[:-1] + - eval('"\\uD800-\\uDFFF"') + # pylint:disable=eval-used - "]") -else: - invalid_unicode_re = re.compile(invalid_unicode_no_surrogate) - -non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, - 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, - 0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, - 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, - 0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, - 0x10FFFE, 0x10FFFF} - -ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]") - -# Cache for charsUntil() -charsUntilRegEx = {} - - -class BufferedStream(object): - """Buffering for streams that do not have buffering of their own - - The buffer is implemented as a list of chunks on the assumption that - joining many strings will be slow since it is O(n**2) - """ - - def __init__(self, stream): - self.stream = stream - self.buffer = [] - self.position = [-1, 0] # chunk number, offset - - def tell(self): - pos = 0 - for chunk in self.buffer[:self.position[0]]: - pos += len(chunk) - pos += self.position[1] - return pos - - def seek(self, pos): - assert pos <= self._bufferedBytes() - offset = pos - i = 0 - while len(self.buffer[i]) < offset: - offset -= len(self.buffer[i]) - i += 1 - self.position = [i, offset] - - def read(self, bytes): - if not self.buffer: - return self._readStream(bytes) - elif (self.position[0] == len(self.buffer) and - self.position[1] == len(self.buffer[-1])): - return self._readStream(bytes) - else: - return self._readFromBuffer(bytes) - - def _bufferedBytes(self): - return sum([len(item) for item in self.buffer]) - - def _readStream(self, bytes): - data = self.stream.read(bytes) - self.buffer.append(data) - self.position[0] += 1 - self.position[1] = len(data) - return data - - def _readFromBuffer(self, bytes): - remainingBytes = bytes - rv = [] - bufferIndex = self.position[0] - bufferOffset = self.position[1] - while bufferIndex < len(self.buffer) and remainingBytes != 0: - assert remainingBytes > 0 - bufferedData = self.buffer[bufferIndex] - - if remainingBytes <= len(bufferedData) - bufferOffset: - bytesToRead = remainingBytes - self.position = [bufferIndex, bufferOffset + bytesToRead] - else: - bytesToRead = len(bufferedData) - bufferOffset - self.position = [bufferIndex, len(bufferedData)] - bufferIndex += 1 - rv.append(bufferedData[bufferOffset:bufferOffset + bytesToRead]) - remainingBytes -= bytesToRead - - bufferOffset = 0 - - if remainingBytes: - rv.append(self._readStream(remainingBytes)) - - return b"".join(rv) - - -def HTMLInputStream(source, **kwargs): - # Work around Python bug #20007: read(0) closes the connection. - # http://bugs.python.org/issue20007 - if (isinstance(source, http_client.HTTPResponse) or - # Also check for addinfourl wrapping HTTPResponse - (isinstance(source, urllib.response.addbase) and - isinstance(source.fp, http_client.HTTPResponse))): - isUnicode = False - elif hasattr(source, "read"): - isUnicode = isinstance(source.read(0), text_type) - else: - isUnicode = isinstance(source, text_type) - - if isUnicode: - encodings = [x for x in kwargs if x.endswith("_encoding")] - if encodings: - raise TypeError("Cannot set an encoding with a unicode input, set %r" % encodings) - - return HTMLUnicodeInputStream(source, **kwargs) - else: - return HTMLBinaryInputStream(source, **kwargs) - - -class HTMLUnicodeInputStream(object): - """Provides a unicode stream of characters to the HTMLTokenizer. - - This class takes care of character encoding and removing or replacing - incorrect byte-sequences and also provides column and line tracking. - - """ - - _defaultChunkSize = 10240 - - def __init__(self, source): - """Initialises the HTMLInputStream. - - HTMLInputStream(source, [encoding]) -> Normalized stream from source - for use by html5lib. - - source can be either a file-object, local filename or a string. - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - - """ - - if not _utils.supports_lone_surrogates: - # Such platforms will have already checked for such - # surrogate errors, so no need to do this checking. - self.reportCharacterErrors = None - elif len("\U0010FFFF") == 1: - self.reportCharacterErrors = self.characterErrorsUCS4 - else: - self.reportCharacterErrors = self.characterErrorsUCS2 - - # List of where new lines occur - self.newLines = [0] - - self.charEncoding = (lookupEncoding("utf-8"), "certain") - self.dataStream = self.openStream(source) - - self.reset() - - def reset(self): - self.chunk = "" - self.chunkSize = 0 - self.chunkOffset = 0 - self.errors = [] - - # number of (complete) lines in previous chunks - self.prevNumLines = 0 - # number of columns in the last line of the previous chunk - self.prevNumCols = 0 - - # Deal with CR LF and surrogates split over chunk boundaries - self._bufferedCharacter = None - - def openStream(self, source): - """Produces a file object from source. - - source can be either a file object, local filename or a string. - - """ - # Already a file object - if hasattr(source, 'read'): - stream = source - else: - stream = StringIO(source) - - return stream - - def _position(self, offset): - chunk = self.chunk - nLines = chunk.count('\n', 0, offset) - positionLine = self.prevNumLines + nLines - lastLinePos = chunk.rfind('\n', 0, offset) - if lastLinePos == -1: - positionColumn = self.prevNumCols + offset - else: - positionColumn = offset - (lastLinePos + 1) - return (positionLine, positionColumn) - - def position(self): - """Returns (line, col) of the current position in the stream.""" - line, col = self._position(self.chunkOffset) - return (line + 1, col) - - def char(self): - """ Read one character from the stream or queue if available. Return - EOF when EOF is reached. - """ - # Read a new chunk from the input stream if necessary - if self.chunkOffset >= self.chunkSize: - if not self.readChunk(): - return EOF - - chunkOffset = self.chunkOffset - char = self.chunk[chunkOffset] - self.chunkOffset = chunkOffset + 1 - - return char - - def readChunk(self, chunkSize=None): - if chunkSize is None: - chunkSize = self._defaultChunkSize - - self.prevNumLines, self.prevNumCols = self._position(self.chunkSize) - - self.chunk = "" - self.chunkSize = 0 - self.chunkOffset = 0 - - data = self.dataStream.read(chunkSize) - - # Deal with CR LF and surrogates broken across chunks - if self._bufferedCharacter: - data = self._bufferedCharacter + data - self._bufferedCharacter = None - elif not data: - # We have no more data, bye-bye stream - return False - - if len(data) > 1: - lastv = ord(data[-1]) - if lastv == 0x0D or 0xD800 <= lastv <= 0xDBFF: - self._bufferedCharacter = data[-1] - data = data[:-1] - - if self.reportCharacterErrors: - self.reportCharacterErrors(data) - - # Replace invalid characters - data = data.replace("\r\n", "\n") - data = data.replace("\r", "\n") - - self.chunk = data - self.chunkSize = len(data) - - return True - - def characterErrorsUCS4(self, data): - for _ in range(len(invalid_unicode_re.findall(data))): - self.errors.append("invalid-codepoint") - - def characterErrorsUCS2(self, data): - # Someone picked the wrong compile option - # You lose - skip = False - for match in invalid_unicode_re.finditer(data): - if skip: - continue - codepoint = ord(match.group()) - pos = match.start() - # Pretty sure there should be endianness issues here - if _utils.isSurrogatePair(data[pos:pos + 2]): - # We have a surrogate pair! - char_val = _utils.surrogatePairToCodepoint(data[pos:pos + 2]) - if char_val in non_bmp_invalid_codepoints: - self.errors.append("invalid-codepoint") - skip = True - elif (codepoint >= 0xD800 and codepoint <= 0xDFFF and - pos == len(data) - 1): - self.errors.append("invalid-codepoint") - else: - skip = False - self.errors.append("invalid-codepoint") - - def charsUntil(self, characters, opposite=False): - """ Returns a string of characters from the stream up to but not - including any character in 'characters' or EOF. 'characters' must be - a container that supports the 'in' method and iteration over its - characters. - """ - - # Use a cache of regexps to find the required characters - try: - chars = charsUntilRegEx[(characters, opposite)] - except KeyError: - if __debug__: - for c in characters: - assert(ord(c) < 128) - regex = "".join(["\\x%02x" % ord(c) for c in characters]) - if not opposite: - regex = "^%s" % regex - chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) - - rv = [] - - while True: - # Find the longest matching prefix - m = chars.match(self.chunk, self.chunkOffset) - if m is None: - # If nothing matched, and it wasn't because we ran out of chunk, - # then stop - if self.chunkOffset != self.chunkSize: - break - else: - end = m.end() - # If not the whole chunk matched, return everything - # up to the part that didn't match - if end != self.chunkSize: - rv.append(self.chunk[self.chunkOffset:end]) - self.chunkOffset = end - break - # If the whole remainder of the chunk matched, - # use it all and read the next chunk - rv.append(self.chunk[self.chunkOffset:]) - if not self.readChunk(): - # Reached EOF - break - - r = "".join(rv) - return r - - def unget(self, char): - # Only one character is allowed to be ungotten at once - it must - # be consumed again before any further call to unget - if char is not EOF: - if self.chunkOffset == 0: - # unget is called quite rarely, so it's a good idea to do - # more work here if it saves a bit of work in the frequently - # called char and charsUntil. - # So, just prepend the ungotten character onto the current - # chunk: - self.chunk = char + self.chunk - self.chunkSize += 1 - else: - self.chunkOffset -= 1 - assert self.chunk[self.chunkOffset] == char - - -class HTMLBinaryInputStream(HTMLUnicodeInputStream): - """Provides a unicode stream of characters to the HTMLTokenizer. - - This class takes care of character encoding and removing or replacing - incorrect byte-sequences and also provides column and line tracking. - - """ - - def __init__(self, source, override_encoding=None, transport_encoding=None, - same_origin_parent_encoding=None, likely_encoding=None, - default_encoding="windows-1252", useChardet=True): - """Initialises the HTMLInputStream. - - HTMLInputStream(source, [encoding]) -> Normalized stream from source - for use by html5lib. - - source can be either a file-object, local filename or a string. - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - - """ - # Raw Stream - for unicode objects this will encode to utf-8 and set - # self.charEncoding as appropriate - self.rawStream = self.openStream(source) - - HTMLUnicodeInputStream.__init__(self, self.rawStream) - - # Encoding Information - # Number of bytes to use when looking for a meta element with - # encoding information - self.numBytesMeta = 1024 - # Number of bytes to use when using detecting encoding using chardet - self.numBytesChardet = 100 - # Things from args - self.override_encoding = override_encoding - self.transport_encoding = transport_encoding - self.same_origin_parent_encoding = same_origin_parent_encoding - self.likely_encoding = likely_encoding - self.default_encoding = default_encoding - - # Determine encoding - self.charEncoding = self.determineEncoding(useChardet) - assert self.charEncoding[0] is not None - - # Call superclass - self.reset() - - def reset(self): - self.dataStream = self.charEncoding[0].codec_info.streamreader(self.rawStream, 'replace') - HTMLUnicodeInputStream.reset(self) - - def openStream(self, source): - """Produces a file object from source. - - source can be either a file object, local filename or a string. - - """ - # Already a file object - if hasattr(source, 'read'): - stream = source - else: - stream = BytesIO(source) - - try: - stream.seek(stream.tell()) - except Exception: - stream = BufferedStream(stream) - - return stream - - def determineEncoding(self, chardet=True): - # BOMs take precedence over everything - # This will also read past the BOM if present - charEncoding = self.detectBOM(), "certain" - if charEncoding[0] is not None: - return charEncoding - - # If we've been overridden, we've been overridden - charEncoding = lookupEncoding(self.override_encoding), "certain" - if charEncoding[0] is not None: - return charEncoding - - # Now check the transport layer - charEncoding = lookupEncoding(self.transport_encoding), "certain" - if charEncoding[0] is not None: - return charEncoding - - # Look for meta elements with encoding information - charEncoding = self.detectEncodingMeta(), "tentative" - if charEncoding[0] is not None: - return charEncoding - - # Parent document encoding - charEncoding = lookupEncoding(self.same_origin_parent_encoding), "tentative" - if charEncoding[0] is not None and not charEncoding[0].name.startswith("utf-16"): - return charEncoding - - # "likely" encoding - charEncoding = lookupEncoding(self.likely_encoding), "tentative" - if charEncoding[0] is not None: - return charEncoding - - # Guess with chardet, if available - if chardet: - try: - from pip._vendor.chardet.universaldetector import UniversalDetector - except ImportError: - pass - else: - buffers = [] - detector = UniversalDetector() - while not detector.done: - buffer = self.rawStream.read(self.numBytesChardet) - assert isinstance(buffer, bytes) - if not buffer: - break - buffers.append(buffer) - detector.feed(buffer) - detector.close() - encoding = lookupEncoding(detector.result['encoding']) - self.rawStream.seek(0) - if encoding is not None: - return encoding, "tentative" - - # Try the default encoding - charEncoding = lookupEncoding(self.default_encoding), "tentative" - if charEncoding[0] is not None: - return charEncoding - - # Fallback to html5lib's default if even that hasn't worked - return lookupEncoding("windows-1252"), "tentative" - - def changeEncoding(self, newEncoding): - assert self.charEncoding[1] != "certain" - newEncoding = lookupEncoding(newEncoding) - if newEncoding is None: - return - if newEncoding.name in ("utf-16be", "utf-16le"): - newEncoding = lookupEncoding("utf-8") - assert newEncoding is not None - elif newEncoding == self.charEncoding[0]: - self.charEncoding = (self.charEncoding[0], "certain") - else: - self.rawStream.seek(0) - self.charEncoding = (newEncoding, "certain") - self.reset() - raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) - - def detectBOM(self): - """Attempts to detect at BOM at the start of the stream. If - an encoding can be determined from the BOM return the name of the - encoding otherwise return None""" - bomDict = { - codecs.BOM_UTF8: 'utf-8', - codecs.BOM_UTF16_LE: 'utf-16le', codecs.BOM_UTF16_BE: 'utf-16be', - codecs.BOM_UTF32_LE: 'utf-32le', codecs.BOM_UTF32_BE: 'utf-32be' - } - - # Go to beginning of file and read in 4 bytes - string = self.rawStream.read(4) - assert isinstance(string, bytes) - - # Try detecting the BOM using bytes from the string - encoding = bomDict.get(string[:3]) # UTF-8 - seek = 3 - if not encoding: - # Need to detect UTF-32 before UTF-16 - encoding = bomDict.get(string) # UTF-32 - seek = 4 - if not encoding: - encoding = bomDict.get(string[:2]) # UTF-16 - seek = 2 - - # Set the read position past the BOM if one was found, otherwise - # set it to the start of the stream - if encoding: - self.rawStream.seek(seek) - return lookupEncoding(encoding) - else: - self.rawStream.seek(0) - return None - - def detectEncodingMeta(self): - """Report the encoding declared by the meta element - """ - buffer = self.rawStream.read(self.numBytesMeta) - assert isinstance(buffer, bytes) - parser = EncodingParser(buffer) - self.rawStream.seek(0) - encoding = parser.getEncoding() - - if encoding is not None and encoding.name in ("utf-16be", "utf-16le"): - encoding = lookupEncoding("utf-8") - - return encoding - - -class EncodingBytes(bytes): - """String-like object with an associated position and various extra methods - If the position is ever greater than the string length then an exception is - raised""" - def __new__(self, value): - assert isinstance(value, bytes) - return bytes.__new__(self, value.lower()) - - def __init__(self, value): - # pylint:disable=unused-argument - self._position = -1 - - def __iter__(self): - return self - - def __next__(self): - p = self._position = self._position + 1 - if p >= len(self): - raise StopIteration - elif p < 0: - raise TypeError - return self[p:p + 1] - - def next(self): - # Py2 compat - return self.__next__() - - def previous(self): - p = self._position - if p >= len(self): - raise StopIteration - elif p < 0: - raise TypeError - self._position = p = p - 1 - return self[p:p + 1] - - def setPosition(self, position): - if self._position >= len(self): - raise StopIteration - self._position = position - - def getPosition(self): - if self._position >= len(self): - raise StopIteration - if self._position >= 0: - return self._position - else: - return None - - position = property(getPosition, setPosition) - - def getCurrentByte(self): - return self[self.position:self.position + 1] - - currentByte = property(getCurrentByte) - - def skip(self, chars=spaceCharactersBytes): - """Skip past a list of characters""" - p = self.position # use property for the error-checking - while p < len(self): - c = self[p:p + 1] - if c not in chars: - self._position = p - return c - p += 1 - self._position = p - return None - - def skipUntil(self, chars): - p = self.position - while p < len(self): - c = self[p:p + 1] - if c in chars: - self._position = p - return c - p += 1 - self._position = p - return None - - def matchBytes(self, bytes): - """Look for a sequence of bytes at the start of a string. If the bytes - are found return True and advance the position to the byte after the - match. Otherwise return False and leave the position alone""" - rv = self.startswith(bytes, self.position) - if rv: - self.position += len(bytes) - return rv - - def jumpTo(self, bytes): - """Look for the next sequence of bytes matching a given sequence. If - a match is found advance the position to the last byte of the match""" - try: - self._position = self.index(bytes, self.position) + len(bytes) - 1 - except ValueError: - raise StopIteration - return True - - -class EncodingParser(object): - """Mini parser for detecting character encoding from meta elements""" - - def __init__(self, data): - """string - the data to work on for encoding detection""" - self.data = EncodingBytes(data) - self.encoding = None - - def getEncoding(self): - if b"") - - def handleMeta(self): - if self.data.currentByte not in spaceCharactersBytes: - # if we have ") - - def getAttribute(self): - """Return a name,value pair for the next attribute in the stream, - if one is found, or None""" - data = self.data - # Step 1 (skip chars) - c = data.skip(spaceCharactersBytes | frozenset([b"/"])) - assert c is None or len(c) == 1 - # Step 2 - if c in (b">", None): - return None - # Step 3 - attrName = [] - attrValue = [] - # Step 4 attribute name - while True: - if c == b"=" and attrName: - break - elif c in spaceCharactersBytes: - # Step 6! - c = data.skip() - break - elif c in (b"/", b">"): - return b"".join(attrName), b"" - elif c in asciiUppercaseBytes: - attrName.append(c.lower()) - elif c is None: - return None - else: - attrName.append(c) - # Step 5 - c = next(data) - # Step 7 - if c != b"=": - data.previous() - return b"".join(attrName), b"" - # Step 8 - next(data) - # Step 9 - c = data.skip() - # Step 10 - if c in (b"'", b'"'): - # 10.1 - quoteChar = c - while True: - # 10.2 - c = next(data) - # 10.3 - if c == quoteChar: - next(data) - return b"".join(attrName), b"".join(attrValue) - # 10.4 - elif c in asciiUppercaseBytes: - attrValue.append(c.lower()) - # 10.5 - else: - attrValue.append(c) - elif c == b">": - return b"".join(attrName), b"" - elif c in asciiUppercaseBytes: - attrValue.append(c.lower()) - elif c is None: - return None - else: - attrValue.append(c) - # Step 11 - while True: - c = next(data) - if c in spacesAngleBrackets: - return b"".join(attrName), b"".join(attrValue) - elif c in asciiUppercaseBytes: - attrValue.append(c.lower()) - elif c is None: - return None - else: - attrValue.append(c) - - -class ContentAttrParser(object): - def __init__(self, data): - assert isinstance(data, bytes) - self.data = data - - def parse(self): - try: - # Check if the attr name is charset - # otherwise return - self.data.jumpTo(b"charset") - self.data.position += 1 - self.data.skip() - if not self.data.currentByte == b"=": - # If there is no = sign keep looking for attrs - return None - self.data.position += 1 - self.data.skip() - # Look for an encoding between matching quote marks - if self.data.currentByte in (b'"', b"'"): - quoteMark = self.data.currentByte - self.data.position += 1 - oldPosition = self.data.position - if self.data.jumpTo(quoteMark): - return self.data[oldPosition:self.data.position] - else: - return None - else: - # Unquoted value - oldPosition = self.data.position - try: - self.data.skipUntil(spaceCharactersBytes) - return self.data[oldPosition:self.data.position] - except StopIteration: - # Return the whole remaining value - return self.data[oldPosition:] - except StopIteration: - return None - - -def lookupEncoding(encoding): - """Return the python codec name corresponding to an encoding or None if the - string doesn't correspond to a valid encoding.""" - if isinstance(encoding, bytes): - try: - encoding = encoding.decode("ascii") - except UnicodeDecodeError: - return None - - if encoding is not None: - try: - return webencodings.lookup(encoding) - except AttributeError: - return None - else: - return None diff --git a/src/pip/_vendor/html5lib/_tokenizer.py b/src/pip/_vendor/html5lib/_tokenizer.py deleted file mode 100644 index 5f00253e2f6..00000000000 --- a/src/pip/_vendor/html5lib/_tokenizer.py +++ /dev/null @@ -1,1735 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from pip._vendor.six import unichr as chr - -from collections import deque, OrderedDict -from sys import version_info - -from .constants import spaceCharacters -from .constants import entities -from .constants import asciiLetters, asciiUpper2Lower -from .constants import digits, hexDigits, EOF -from .constants import tokenTypes, tagTokenTypes -from .constants import replacementCharacters - -from ._inputstream import HTMLInputStream - -from ._trie import Trie - -entitiesTrie = Trie(entities) - -if version_info >= (3, 7): - attributeMap = dict -else: - attributeMap = OrderedDict - - -class HTMLTokenizer(object): - """ This class takes care of tokenizing HTML. - - * self.currentToken - Holds the token that is currently being processed. - - * self.state - Holds a reference to the method to be invoked... XXX - - * self.stream - Points to HTMLInputStream object. - """ - - def __init__(self, stream, parser=None, **kwargs): - - self.stream = HTMLInputStream(stream, **kwargs) - self.parser = parser - - # Setup the initial tokenizer state - self.escapeFlag = False - self.lastFourChars = [] - self.state = self.dataState - self.escape = False - - # The current token being created - self.currentToken = None - super(HTMLTokenizer, self).__init__() - - def __iter__(self): - """ This is where the magic happens. - - We do our usually processing through the states and when we have a token - to return we yield the token which pauses processing until the next token - is requested. - """ - self.tokenQueue = deque([]) - # Start processing. When EOF is reached self.state will return False - # instead of True and the loop will terminate. - while self.state(): - while self.stream.errors: - yield {"type": tokenTypes["ParseError"], "data": self.stream.errors.pop(0)} - while self.tokenQueue: - yield self.tokenQueue.popleft() - - def consumeNumberEntity(self, isHex): - """This function returns either U+FFFD or the character based on the - decimal or hexadecimal representation. It also discards ";" if present. - If not present self.tokenQueue.append({"type": tokenTypes["ParseError"]}) is invoked. - """ - - allowed = digits - radix = 10 - if isHex: - allowed = hexDigits - radix = 16 - - charStack = [] - - # Consume all the characters that are in range while making sure we - # don't hit an EOF. - c = self.stream.char() - while c in allowed and c is not EOF: - charStack.append(c) - c = self.stream.char() - - # Convert the set of characters consumed to an int. - charAsInt = int("".join(charStack), radix) - - # Certain characters get replaced with others - if charAsInt in replacementCharacters: - char = replacementCharacters[charAsInt] - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - elif ((0xD800 <= charAsInt <= 0xDFFF) or - (charAsInt > 0x10FFFF)): - char = "\uFFFD" - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - else: - # Should speed up this check somehow (e.g. move the set to a constant) - if ((0x0001 <= charAsInt <= 0x0008) or - (0x000E <= charAsInt <= 0x001F) or - (0x007F <= charAsInt <= 0x009F) or - (0xFDD0 <= charAsInt <= 0xFDEF) or - charAsInt in frozenset([0x000B, 0xFFFE, 0xFFFF, 0x1FFFE, - 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, - 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, - 0x5FFFF, 0x6FFFE, 0x6FFFF, 0x7FFFE, - 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, - 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, - 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE, - 0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, - 0xFFFFF, 0x10FFFE, 0x10FFFF])): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "illegal-codepoint-for-numeric-entity", - "datavars": {"charAsInt": charAsInt}}) - try: - # Try/except needed as UCS-2 Python builds' unichar only works - # within the BMP. - char = chr(charAsInt) - except ValueError: - v = charAsInt - 0x10000 - char = chr(0xD800 | (v >> 10)) + chr(0xDC00 | (v & 0x3FF)) - - # Discard the ; if present. Otherwise, put it back on the queue and - # invoke parseError on parser. - if c != ";": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "numeric-entity-without-semicolon"}) - self.stream.unget(c) - - return char - - def consumeEntity(self, allowedChar=None, fromAttribute=False): - # Initialise to the default output for when no entity is matched - output = "&" - - charStack = [self.stream.char()] - if (charStack[0] in spaceCharacters or charStack[0] in (EOF, "<", "&") or - (allowedChar is not None and allowedChar == charStack[0])): - self.stream.unget(charStack[0]) - - elif charStack[0] == "#": - # Read the next character to see if it's hex or decimal - hex = False - charStack.append(self.stream.char()) - if charStack[-1] in ("x", "X"): - hex = True - charStack.append(self.stream.char()) - - # charStack[-1] should be the first digit - if (hex and charStack[-1] in hexDigits) \ - or (not hex and charStack[-1] in digits): - # At least one digit found, so consume the whole number - self.stream.unget(charStack[-1]) - output = self.consumeNumberEntity(hex) - else: - # No digits found - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "expected-numeric-entity"}) - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - - else: - # At this point in the process might have named entity. Entities - # are stored in the global variable "entities". - # - # Consume characters and compare to these to a substring of the - # entity names in the list until the substring no longer matches. - while (charStack[-1] is not EOF): - if not entitiesTrie.has_keys_with_prefix("".join(charStack)): - break - charStack.append(self.stream.char()) - - # At this point we have a string that starts with some characters - # that may match an entity - # Try to find the longest entity the string will match to take care - # of ¬i for instance. - try: - entityName = entitiesTrie.longest_prefix("".join(charStack[:-1])) - entityLength = len(entityName) - except KeyError: - entityName = None - - if entityName is not None: - if entityName[-1] != ";": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "named-entity-without-semicolon"}) - if (entityName[-1] != ";" and fromAttribute and - (charStack[entityLength] in asciiLetters or - charStack[entityLength] in digits or - charStack[entityLength] == "=")): - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - else: - output = entities[entityName] - self.stream.unget(charStack.pop()) - output += "".join(charStack[entityLength:]) - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-named-entity"}) - self.stream.unget(charStack.pop()) - output = "&" + "".join(charStack) - - if fromAttribute: - self.currentToken["data"][-1][1] += output - else: - if output in spaceCharacters: - tokenType = "SpaceCharacters" - else: - tokenType = "Characters" - self.tokenQueue.append({"type": tokenTypes[tokenType], "data": output}) - - def processEntityInAttribute(self, allowedChar): - """This method replaces the need for "entityInAttributeValueState". - """ - self.consumeEntity(allowedChar=allowedChar, fromAttribute=True) - - def emitCurrentToken(self): - """This method is a generic handler for emitting the tags. It also sets - the state to "data" because that's what's needed after a token has been - emitted. - """ - token = self.currentToken - # Add token to the queue to be yielded - if (token["type"] in tagTokenTypes): - token["name"] = token["name"].translate(asciiUpper2Lower) - if token["type"] == tokenTypes["StartTag"]: - raw = token["data"] - data = attributeMap(raw) - if len(raw) > len(data): - # we had some duplicated attribute, fix so first wins - data.update(raw[::-1]) - token["data"] = data - - if token["type"] == tokenTypes["EndTag"]: - if token["data"]: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "attributes-in-end-tag"}) - if token["selfClosing"]: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "self-closing-flag-on-end-tag"}) - self.tokenQueue.append(token) - self.state = self.dataState - - # Below are the various tokenizer states worked out. - def dataState(self): - data = self.stream.char() - if data == "&": - self.state = self.entityDataState - elif data == "<": - self.state = self.tagOpenState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\u0000"}) - elif data is EOF: - # Tokenization ends. - return False - elif data in spaceCharacters: - # Directly after emitting a token you switch back to the "data - # state". At that point spaceCharacters are important so they are - # emitted separately. - self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": - data + self.stream.charsUntil(spaceCharacters, True)}) - # No need to update lastFourChars here, since the first space will - # have already been appended to lastFourChars and will have broken - # any sequences - else: - chars = self.stream.charsUntil(("&", "<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def entityDataState(self): - self.consumeEntity() - self.state = self.dataState - return True - - def rcdataState(self): - data = self.stream.char() - if data == "&": - self.state = self.characterReferenceInRcdata - elif data == "<": - self.state = self.rcdataLessThanSignState - elif data == EOF: - # Tokenization ends. - return False - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data in spaceCharacters: - # Directly after emitting a token you switch back to the "data - # state". At that point spaceCharacters are important so they are - # emitted separately. - self.tokenQueue.append({"type": tokenTypes["SpaceCharacters"], "data": - data + self.stream.charsUntil(spaceCharacters, True)}) - # No need to update lastFourChars here, since the first space will - # have already been appended to lastFourChars and will have broken - # any sequences - else: - chars = self.stream.charsUntil(("&", "<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def characterReferenceInRcdata(self): - self.consumeEntity() - self.state = self.rcdataState - return True - - def rawtextState(self): - data = self.stream.char() - if data == "<": - self.state = self.rawtextLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - # Tokenization ends. - return False - else: - chars = self.stream.charsUntil(("<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def scriptDataState(self): - data = self.stream.char() - if data == "<": - self.state = self.scriptDataLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - # Tokenization ends. - return False - else: - chars = self.stream.charsUntil(("<", "\u0000")) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + chars}) - return True - - def plaintextState(self): - data = self.stream.char() - if data == EOF: - # Tokenization ends. - return False - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": - data + self.stream.charsUntil("\u0000")}) - return True - - def tagOpenState(self): - data = self.stream.char() - if data == "!": - self.state = self.markupDeclarationOpenState - elif data == "/": - self.state = self.closeTagOpenState - elif data in asciiLetters: - self.currentToken = {"type": tokenTypes["StartTag"], - "name": data, "data": [], - "selfClosing": False, - "selfClosingAcknowledged": False} - self.state = self.tagNameState - elif data == ">": - # XXX In theory it could be something besides a tag name. But - # do we really care? - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name-but-got-right-bracket"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<>"}) - self.state = self.dataState - elif data == "?": - # XXX In theory it could be something besides a tag name. But - # do we really care? - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name-but-got-question-mark"}) - self.stream.unget(data) - self.state = self.bogusCommentState - else: - # XXX - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-tag-name"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.dataState - return True - - def closeTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.currentToken = {"type": tokenTypes["EndTag"], "name": data, - "data": [], "selfClosing": False} - self.state = self.tagNameState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-right-bracket"}) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-closing-tag-but-got-eof"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "": - self.emitCurrentToken() - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-tag-name"}) - self.state = self.dataState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] += "\uFFFD" - else: - self.currentToken["name"] += data - # (Don't use charsUntil here, because tag names are - # very short and it's faster to not do anything fancy) - return True - - def rcdataLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.temporaryBuffer = "" - self.state = self.rcdataEndTagOpenState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.rcdataState - return True - - def rcdataEndTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.temporaryBuffer += data - self.state = self.rcdataEndTagNameState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) - self.state = self.scriptDataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataEscapedState - elif data == EOF: - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataEscapedState - return True - - def scriptDataEscapedLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.temporaryBuffer = "" - self.state = self.scriptDataEscapedEndTagOpenState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<" + data}) - self.temporaryBuffer = data - self.state = self.scriptDataDoubleEscapeStartState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.stream.unget(data) - self.state = self.scriptDataEscapedState - return True - - def scriptDataEscapedEndTagOpenState(self): - data = self.stream.char() - if data in asciiLetters: - self.temporaryBuffer = data - self.state = self.scriptDataEscapedEndTagNameState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "" and appropriate: - self.currentToken = {"type": tokenTypes["EndTag"], - "name": self.temporaryBuffer, - "data": [], "selfClosing": False} - self.emitCurrentToken() - self.state = self.dataState - elif data in asciiLetters: - self.temporaryBuffer += data - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": ""))): - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - if self.temporaryBuffer.lower() == "script": - self.state = self.scriptDataDoubleEscapedState - else: - self.state = self.scriptDataEscapedState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.temporaryBuffer += data - else: - self.stream.unget(data) - self.state = self.scriptDataEscapedState - return True - - def scriptDataDoubleEscapedState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - self.state = self.scriptDataDoubleEscapedDashState - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - return True - - def scriptDataDoubleEscapedDashState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - self.state = self.scriptDataDoubleEscapedDashDashState - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataDoubleEscapedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapedDashDashState(self): - data = self.stream.char() - if data == "-": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "-"}) - elif data == "<": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "<"}) - self.state = self.scriptDataDoubleEscapedLessThanSignState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": ">"}) - self.state = self.scriptDataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": "\uFFFD"}) - self.state = self.scriptDataDoubleEscapedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-script-in-script"}) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapedLessThanSignState(self): - data = self.stream.char() - if data == "/": - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": "/"}) - self.temporaryBuffer = "" - self.state = self.scriptDataDoubleEscapeEndState - else: - self.stream.unget(data) - self.state = self.scriptDataDoubleEscapedState - return True - - def scriptDataDoubleEscapeEndState(self): - data = self.stream.char() - if data in (spaceCharacters | frozenset(("/", ">"))): - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - if self.temporaryBuffer.lower() == "script": - self.state = self.scriptDataEscapedState - else: - self.state = self.scriptDataDoubleEscapedState - elif data in asciiLetters: - self.tokenQueue.append({"type": tokenTypes["Characters"], "data": data}) - self.temporaryBuffer += data - else: - self.stream.unget(data) - self.state = self.scriptDataDoubleEscapedState - return True - - def beforeAttributeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data in asciiLetters: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == ">": - self.emitCurrentToken() - elif data == "/": - self.state = self.selfClosingStartTagState - elif data in ("'", '"', "=", "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-in-attribute-name"}) - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"].append(["\uFFFD", ""]) - self.state = self.attributeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-name-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - return True - - def attributeNameState(self): - data = self.stream.char() - leavingThisState = True - emitToken = False - if data == "=": - self.state = self.beforeAttributeValueState - elif data in asciiLetters: - self.currentToken["data"][-1][0] += data +\ - self.stream.charsUntil(asciiLetters, True) - leavingThisState = False - elif data == ">": - # XXX If we emit here the attributes are converted to a dict - # without being checked and when the code below runs we error - # because data is a dict not a list - emitToken = True - elif data in spaceCharacters: - self.state = self.afterAttributeNameState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][0] += "\uFFFD" - leavingThisState = False - elif data in ("'", '"', "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "invalid-character-in-attribute-name"}) - self.currentToken["data"][-1][0] += data - leavingThisState = False - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "eof-in-attribute-name"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][0] += data - leavingThisState = False - - if leavingThisState: - # Attributes are not dropped at this stage. That happens when the - # start tag token is emitted so values can still be safely appended - # to attributes, but we do want to report the parse error in time. - self.currentToken["data"][-1][0] = ( - self.currentToken["data"][-1][0].translate(asciiUpper2Lower)) - for name, _ in self.currentToken["data"][:-1]: - if self.currentToken["data"][-1][0] == name: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "duplicate-attribute"}) - break - # XXX Fix for above XXX - if emitToken: - self.emitCurrentToken() - return True - - def afterAttributeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data == "=": - self.state = self.beforeAttributeValueState - elif data == ">": - self.emitCurrentToken() - elif data in asciiLetters: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data == "/": - self.state = self.selfClosingStartTagState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"].append(["\uFFFD", ""]) - self.state = self.attributeNameState - elif data in ("'", '"', "<"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "invalid-character-after-attribute-name"}) - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-end-of-tag-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"].append([data, ""]) - self.state = self.attributeNameState - return True - - def beforeAttributeValueState(self): - data = self.stream.char() - if data in spaceCharacters: - self.stream.charsUntil(spaceCharacters, True) - elif data == "\"": - self.state = self.attributeValueDoubleQuotedState - elif data == "&": - self.state = self.attributeValueUnQuotedState - self.stream.unget(data) - elif data == "'": - self.state = self.attributeValueSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-value-but-got-right-bracket"}) - self.emitCurrentToken() - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - self.state = self.attributeValueUnQuotedState - elif data in ("=", "<", "`"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "equals-in-unquoted-attribute-value"}) - self.currentToken["data"][-1][1] += data - self.state = self.attributeValueUnQuotedState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-attribute-value-but-got-eof"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data - self.state = self.attributeValueUnQuotedState - return True - - def attributeValueDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterAttributeValueState - elif data == "&": - self.processEntityInAttribute('"') - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-double-quote"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("\"", "&", "\u0000")) - return True - - def attributeValueSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterAttributeValueState - elif data == "&": - self.processEntityInAttribute("'") - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-single-quote"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data +\ - self.stream.charsUntil(("'", "&", "\u0000")) - return True - - def attributeValueUnQuotedState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeAttributeNameState - elif data == "&": - self.processEntityInAttribute(">") - elif data == ">": - self.emitCurrentToken() - elif data in ('"', "'", "=", "<", "`"): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-in-unquoted-attribute-value"}) - self.currentToken["data"][-1][1] += data - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"][-1][1] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-attribute-value-no-quotes"}) - self.state = self.dataState - else: - self.currentToken["data"][-1][1] += data + self.stream.charsUntil( - frozenset(("&", ">", '"', "'", "=", "<", "`", "\u0000")) | spaceCharacters) - return True - - def afterAttributeValueState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeAttributeNameState - elif data == ">": - self.emitCurrentToken() - elif data == "/": - self.state = self.selfClosingStartTagState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-EOF-after-attribute-value"}) - self.stream.unget(data) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-after-attribute-value"}) - self.stream.unget(data) - self.state = self.beforeAttributeNameState - return True - - def selfClosingStartTagState(self): - data = self.stream.char() - if data == ">": - self.currentToken["selfClosing"] = True - self.emitCurrentToken() - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": - "unexpected-EOF-after-solidus-in-tag"}) - self.stream.unget(data) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-character-after-solidus-in-tag"}) - self.stream.unget(data) - self.state = self.beforeAttributeNameState - return True - - def bogusCommentState(self): - # Make a new comment token and give it as value all the characters - # until the first > or EOF (charsUntil checks for EOF automatically) - # and emit it. - data = self.stream.charsUntil(">") - data = data.replace("\u0000", "\uFFFD") - self.tokenQueue.append( - {"type": tokenTypes["Comment"], "data": data}) - - # Eat the character directly after the bogus comment which is either a - # ">" or an EOF. - self.stream.char() - self.state = self.dataState - return True - - def markupDeclarationOpenState(self): - charStack = [self.stream.char()] - if charStack[-1] == "-": - charStack.append(self.stream.char()) - if charStack[-1] == "-": - self.currentToken = {"type": tokenTypes["Comment"], "data": ""} - self.state = self.commentStartState - return True - elif charStack[-1] in ('d', 'D'): - matched = True - for expected in (('o', 'O'), ('c', 'C'), ('t', 'T'), - ('y', 'Y'), ('p', 'P'), ('e', 'E')): - charStack.append(self.stream.char()) - if charStack[-1] not in expected: - matched = False - break - if matched: - self.currentToken = {"type": tokenTypes["Doctype"], - "name": "", - "publicId": None, "systemId": None, - "correct": True} - self.state = self.doctypeState - return True - elif (charStack[-1] == "[" and - self.parser is not None and - self.parser.tree.openElements and - self.parser.tree.openElements[-1].namespace != self.parser.tree.defaultNamespace): - matched = True - for expected in ["C", "D", "A", "T", "A", "["]: - charStack.append(self.stream.char()) - if charStack[-1] != expected: - matched = False - break - if matched: - self.state = self.cdataSectionState - return True - - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-dashes-or-doctype"}) - - while charStack: - self.stream.unget(charStack.pop()) - self.state = self.bogusCommentState - return True - - def commentStartState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentStartDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "incorrect-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += data - self.state = self.commentState - return True - - def commentStartDashState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "-\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "incorrect-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "-" + data - self.state = self.commentState - return True - - def commentState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "\uFFFD" - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "eof-in-comment"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += data + \ - self.stream.charsUntil(("-", "\u0000")) - return True - - def commentEndDashState(self): - data = self.stream.char() - if data == "-": - self.state = self.commentEndState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "-\uFFFD" - self.state = self.commentState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-end-dash"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "-" + data - self.state = self.commentState - return True - - def commentEndState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "--\uFFFD" - self.state = self.commentState - elif data == "!": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-bang-after-double-dash-in-comment"}) - self.state = self.commentEndBangState - elif data == "-": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-dash-after-double-dash-in-comment"}) - self.currentToken["data"] += data - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-double-dash"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - # XXX - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-comment"}) - self.currentToken["data"] += "--" + data - self.state = self.commentState - return True - - def commentEndBangState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "-": - self.currentToken["data"] += "--!" - self.state = self.commentEndDashState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["data"] += "--!\uFFFD" - self.state = self.commentState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-comment-end-bang-state"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["data"] += "--!" + data - self.state = self.commentState - return True - - def doctypeState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-eof"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "need-space-after-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypeNameState - return True - - def beforeDoctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-right-bracket"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] = "\uFFFD" - self.state = self.doctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-doctype-name-but-got-eof"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["name"] = data - self.state = self.doctypeNameState - return True - - def doctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.state = self.afterDoctypeNameState - elif data == ">": - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["name"] += "\uFFFD" - self.state = self.doctypeNameState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype-name"}) - self.currentToken["correct"] = False - self.currentToken["name"] = self.currentToken["name"].translate(asciiUpper2Lower) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["name"] += data - return True - - def afterDoctypeNameState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.currentToken["correct"] = False - self.stream.unget(data) - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - if data in ("p", "P"): - matched = True - for expected in (("u", "U"), ("b", "B"), ("l", "L"), - ("i", "I"), ("c", "C")): - data = self.stream.char() - if data not in expected: - matched = False - break - if matched: - self.state = self.afterDoctypePublicKeywordState - return True - elif data in ("s", "S"): - matched = True - for expected in (("y", "Y"), ("s", "S"), ("t", "T"), - ("e", "E"), ("m", "M")): - data = self.stream.char() - if data not in expected: - matched = False - break - if matched: - self.state = self.afterDoctypeSystemKeywordState - return True - - # All the characters read before the current 'data' will be - # [a-zA-Z], so they're garbage in the bogus doctype and can be - # discarded; only the latest character might be '>' or EOF - # and needs to be ungetted - self.stream.unget(data) - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "expected-space-or-right-bracket-in-doctype", "datavars": - {"data": data}}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - - return True - - def afterDoctypePublicKeywordState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypePublicIdentifierState - elif data in ("'", '"'): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypePublicIdentifierState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.stream.unget(data) - self.state = self.beforeDoctypePublicIdentifierState - return True - - def beforeDoctypePublicIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == "\"": - self.currentToken["publicId"] = "" - self.state = self.doctypePublicIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["publicId"] = "" - self.state = self.doctypePublicIdentifierSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def doctypePublicIdentifierDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterDoctypePublicIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["publicId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["publicId"] += data - return True - - def doctypePublicIdentifierSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterDoctypePublicIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["publicId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["publicId"] += data - return True - - def afterDoctypePublicIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.betweenDoctypePublicAndSystemIdentifiersState - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == '"': - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def betweenDoctypePublicAndSystemIdentifiersState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data == '"': - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data == EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def afterDoctypeSystemKeywordState(self): - data = self.stream.char() - if data in spaceCharacters: - self.state = self.beforeDoctypeSystemIdentifierState - elif data in ("'", '"'): - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.stream.unget(data) - self.state = self.beforeDoctypeSystemIdentifierState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.stream.unget(data) - self.state = self.beforeDoctypeSystemIdentifierState - return True - - def beforeDoctypeSystemIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == "\"": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierDoubleQuotedState - elif data == "'": - self.currentToken["systemId"] = "" - self.state = self.doctypeSystemIdentifierSingleQuotedState - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.currentToken["correct"] = False - self.state = self.bogusDoctypeState - return True - - def doctypeSystemIdentifierDoubleQuotedState(self): - data = self.stream.char() - if data == "\"": - self.state = self.afterDoctypeSystemIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["systemId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["systemId"] += data - return True - - def doctypeSystemIdentifierSingleQuotedState(self): - data = self.stream.char() - if data == "'": - self.state = self.afterDoctypeSystemIdentifierState - elif data == "\u0000": - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - self.currentToken["systemId"] += "\uFFFD" - elif data == ">": - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-end-of-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.currentToken["systemId"] += data - return True - - def afterDoctypeSystemIdentifierState(self): - data = self.stream.char() - if data in spaceCharacters: - pass - elif data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "eof-in-doctype"}) - self.currentToken["correct"] = False - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - self.tokenQueue.append({"type": tokenTypes["ParseError"], "data": - "unexpected-char-in-doctype"}) - self.state = self.bogusDoctypeState - return True - - def bogusDoctypeState(self): - data = self.stream.char() - if data == ">": - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - elif data is EOF: - # XXX EMIT - self.stream.unget(data) - self.tokenQueue.append(self.currentToken) - self.state = self.dataState - else: - pass - return True - - def cdataSectionState(self): - data = [] - while True: - data.append(self.stream.charsUntil("]")) - data.append(self.stream.charsUntil(">")) - char = self.stream.char() - if char == EOF: - break - else: - assert char == ">" - if data[-1][-2:] == "]]": - data[-1] = data[-1][:-2] - break - else: - data.append(char) - - data = "".join(data) # pylint:disable=redefined-variable-type - # Deal with null here rather than in the parser - nullCount = data.count("\u0000") - if nullCount > 0: - for _ in range(nullCount): - self.tokenQueue.append({"type": tokenTypes["ParseError"], - "data": "invalid-codepoint"}) - data = data.replace("\u0000", "\uFFFD") - if data: - self.tokenQueue.append({"type": tokenTypes["Characters"], - "data": data}) - self.state = self.dataState - return True diff --git a/src/pip/_vendor/html5lib/_trie/__init__.py b/src/pip/_vendor/html5lib/_trie/__init__.py deleted file mode 100644 index 07bad5d31c1..00000000000 --- a/src/pip/_vendor/html5lib/_trie/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from .py import Trie - -__all__ = ["Trie"] diff --git a/src/pip/_vendor/html5lib/_trie/_base.py b/src/pip/_vendor/html5lib/_trie/_base.py deleted file mode 100644 index 6b71975f085..00000000000 --- a/src/pip/_vendor/html5lib/_trie/_base.py +++ /dev/null @@ -1,40 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -try: - from collections.abc import Mapping -except ImportError: # Python 2.7 - from collections import Mapping - - -class Trie(Mapping): - """Abstract base class for tries""" - - def keys(self, prefix=None): - # pylint:disable=arguments-differ - keys = super(Trie, self).keys() - - if prefix is None: - return set(keys) - - return {x for x in keys if x.startswith(prefix)} - - def has_keys_with_prefix(self, prefix): - for key in self.keys(): - if key.startswith(prefix): - return True - - return False - - def longest_prefix(self, prefix): - if prefix in self: - return prefix - - for i in range(1, len(prefix) + 1): - if prefix[:-i] in self: - return prefix[:-i] - - raise KeyError(prefix) - - def longest_prefix_item(self, prefix): - lprefix = self.longest_prefix(prefix) - return (lprefix, self[lprefix]) diff --git a/src/pip/_vendor/html5lib/_trie/py.py b/src/pip/_vendor/html5lib/_trie/py.py deleted file mode 100644 index c178b219def..00000000000 --- a/src/pip/_vendor/html5lib/_trie/py.py +++ /dev/null @@ -1,67 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pip._vendor.six import text_type - -from bisect import bisect_left - -from ._base import Trie as ABCTrie - - -class Trie(ABCTrie): - def __init__(self, data): - if not all(isinstance(x, text_type) for x in data.keys()): - raise TypeError("All keys must be strings") - - self._data = data - self._keys = sorted(data.keys()) - self._cachestr = "" - self._cachepoints = (0, len(data)) - - def __contains__(self, key): - return key in self._data - - def __len__(self): - return len(self._data) - - def __iter__(self): - return iter(self._data) - - def __getitem__(self, key): - return self._data[key] - - def keys(self, prefix=None): - if prefix is None or prefix == "" or not self._keys: - return set(self._keys) - - if prefix.startswith(self._cachestr): - lo, hi = self._cachepoints - start = i = bisect_left(self._keys, prefix, lo, hi) - else: - start = i = bisect_left(self._keys, prefix) - - keys = set() - if start == len(self._keys): - return keys - - while self._keys[i].startswith(prefix): - keys.add(self._keys[i]) - i += 1 - - self._cachestr = prefix - self._cachepoints = (start, i) - - return keys - - def has_keys_with_prefix(self, prefix): - if prefix in self._data: - return True - - if prefix.startswith(self._cachestr): - lo, hi = self._cachepoints - i = bisect_left(self._keys, prefix, lo, hi) - else: - i = bisect_left(self._keys, prefix) - - if i == len(self._keys): - return False - - return self._keys[i].startswith(prefix) diff --git a/src/pip/_vendor/html5lib/_utils.py b/src/pip/_vendor/html5lib/_utils.py deleted file mode 100644 index d7c4926afce..00000000000 --- a/src/pip/_vendor/html5lib/_utils.py +++ /dev/null @@ -1,159 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from types import ModuleType - -try: - from collections.abc import Mapping -except ImportError: - from collections import Mapping - -from pip._vendor.six import text_type, PY3 - -if PY3: - import xml.etree.ElementTree as default_etree -else: - try: - import xml.etree.cElementTree as default_etree - except ImportError: - import xml.etree.ElementTree as default_etree - - -__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", - "surrogatePairToCodepoint", "moduleFactoryFactory", - "supports_lone_surrogates"] - - -# Platforms not supporting lone surrogates (\uD800-\uDFFF) should be -# caught by the below test. In general this would be any platform -# using UTF-16 as its encoding of unicode strings, such as -# Jython. This is because UTF-16 itself is based on the use of such -# surrogates, and there is no mechanism to further escape such -# escapes. -try: - _x = eval('"\\uD800"') # pylint:disable=eval-used - if not isinstance(_x, text_type): - # We need this with u"" because of http://bugs.jython.org/issue2039 - _x = eval('u"\\uD800"') # pylint:disable=eval-used - assert isinstance(_x, text_type) -except Exception: - supports_lone_surrogates = False -else: - supports_lone_surrogates = True - - -class MethodDispatcher(dict): - """Dict with 2 special properties: - - On initiation, keys that are lists, sets or tuples are converted to - multiple keys so accessing any one of the items in the original - list-like object returns the matching value - - md = MethodDispatcher({("foo", "bar"):"baz"}) - md["foo"] == "baz" - - A default value which can be set through the default attribute. - """ - - def __init__(self, items=()): - _dictEntries = [] - for name, value in items: - if isinstance(name, (list, tuple, frozenset, set)): - for item in name: - _dictEntries.append((item, value)) - else: - _dictEntries.append((name, value)) - dict.__init__(self, _dictEntries) - assert len(self) == len(_dictEntries) - self.default = None - - def __getitem__(self, key): - return dict.get(self, key, self.default) - - def __get__(self, instance, owner=None): - return BoundMethodDispatcher(instance, self) - - -class BoundMethodDispatcher(Mapping): - """Wraps a MethodDispatcher, binding its return values to `instance`""" - def __init__(self, instance, dispatcher): - self.instance = instance - self.dispatcher = dispatcher - - def __getitem__(self, key): - # see https://docs.python.org/3/reference/datamodel.html#object.__get__ - # on a function, __get__ is used to bind a function to an instance as a bound method - return self.dispatcher[key].__get__(self.instance) - - def get(self, key, default): - if key in self.dispatcher: - return self[key] - else: - return default - - def __iter__(self): - return iter(self.dispatcher) - - def __len__(self): - return len(self.dispatcher) - - def __contains__(self, key): - return key in self.dispatcher - - -# Some utility functions to deal with weirdness around UCS2 vs UCS4 -# python builds - -def isSurrogatePair(data): - return (len(data) == 2 and - ord(data[0]) >= 0xD800 and ord(data[0]) <= 0xDBFF and - ord(data[1]) >= 0xDC00 and ord(data[1]) <= 0xDFFF) - - -def surrogatePairToCodepoint(data): - char_val = (0x10000 + (ord(data[0]) - 0xD800) * 0x400 + - (ord(data[1]) - 0xDC00)) - return char_val - -# Module Factory Factory (no, this isn't Java, I know) -# Here to stop this being duplicated all over the place. - - -def moduleFactoryFactory(factory): - moduleCache = {} - - def moduleFactory(baseModule, *args, **kwargs): - if isinstance(ModuleType.__name__, type("")): - name = "_%s_factory" % baseModule.__name__ - else: - name = b"_%s_factory" % baseModule.__name__ - - kwargs_tuple = tuple(kwargs.items()) - - try: - return moduleCache[name][args][kwargs_tuple] - except KeyError: - mod = ModuleType(name) - objs = factory(baseModule, *args, **kwargs) - mod.__dict__.update(objs) - if "name" not in moduleCache: - moduleCache[name] = {} - if "args" not in moduleCache[name]: - moduleCache[name][args] = {} - if "kwargs" not in moduleCache[name][args]: - moduleCache[name][args][kwargs_tuple] = {} - moduleCache[name][args][kwargs_tuple] = mod - return mod - - return moduleFactory - - -def memoize(func): - cache = {} - - def wrapped(*args, **kwargs): - key = (tuple(args), tuple(kwargs.items())) - if key not in cache: - cache[key] = func(*args, **kwargs) - return cache[key] - - return wrapped diff --git a/src/pip/_vendor/html5lib/constants.py b/src/pip/_vendor/html5lib/constants.py deleted file mode 100644 index fe3e237cd8a..00000000000 --- a/src/pip/_vendor/html5lib/constants.py +++ /dev/null @@ -1,2946 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import string - -EOF = None - -E = { - "null-character": - "Null character in input stream, replaced with U+FFFD.", - "invalid-codepoint": - "Invalid codepoint in stream.", - "incorrectly-placed-solidus": - "Solidus (/) incorrectly placed in tag.", - "incorrect-cr-newline-entity": - "Incorrect CR newline entity, replaced with LF.", - "illegal-windows-1252-entity": - "Entity used with illegal number (windows-1252 reference).", - "cant-convert-numeric-entity": - "Numeric entity couldn't be converted to character " - "(codepoint U+%(charAsInt)08x).", - "illegal-codepoint-for-numeric-entity": - "Numeric entity represents an illegal codepoint: " - "U+%(charAsInt)08x.", - "numeric-entity-without-semicolon": - "Numeric entity didn't end with ';'.", - "expected-numeric-entity-but-got-eof": - "Numeric entity expected. Got end of file instead.", - "expected-numeric-entity": - "Numeric entity expected but none found.", - "named-entity-without-semicolon": - "Named entity didn't end with ';'.", - "expected-named-entity": - "Named entity expected. Got none.", - "attributes-in-end-tag": - "End tag contains unexpected attributes.", - 'self-closing-flag-on-end-tag': - "End tag contains unexpected self-closing flag.", - "expected-tag-name-but-got-right-bracket": - "Expected tag name. Got '>' instead.", - "expected-tag-name-but-got-question-mark": - "Expected tag name. Got '?' instead. (HTML doesn't " - "support processing instructions.)", - "expected-tag-name": - "Expected tag name. Got something else instead", - "expected-closing-tag-but-got-right-bracket": - "Expected closing tag. Got '>' instead. Ignoring ''.", - "expected-closing-tag-but-got-eof": - "Expected closing tag. Unexpected end of file.", - "expected-closing-tag-but-got-char": - "Expected closing tag. Unexpected character '%(data)s' found.", - "eof-in-tag-name": - "Unexpected end of file in the tag name.", - "expected-attribute-name-but-got-eof": - "Unexpected end of file. Expected attribute name instead.", - "eof-in-attribute-name": - "Unexpected end of file in attribute name.", - "invalid-character-in-attribute-name": - "Invalid character in attribute name", - "duplicate-attribute": - "Dropped duplicate attribute on tag.", - "expected-end-of-tag-name-but-got-eof": - "Unexpected end of file. Expected = or end of tag.", - "expected-attribute-value-but-got-eof": - "Unexpected end of file. Expected attribute value.", - "expected-attribute-value-but-got-right-bracket": - "Expected attribute value. Got '>' instead.", - 'equals-in-unquoted-attribute-value': - "Unexpected = in unquoted attribute", - 'unexpected-character-in-unquoted-attribute-value': - "Unexpected character in unquoted attribute", - "invalid-character-after-attribute-name": - "Unexpected character after attribute name.", - "unexpected-character-after-attribute-value": - "Unexpected character after attribute value.", - "eof-in-attribute-value-double-quote": - "Unexpected end of file in attribute value (\").", - "eof-in-attribute-value-single-quote": - "Unexpected end of file in attribute value (').", - "eof-in-attribute-value-no-quotes": - "Unexpected end of file in attribute value.", - "unexpected-EOF-after-solidus-in-tag": - "Unexpected end of file in tag. Expected >", - "unexpected-character-after-solidus-in-tag": - "Unexpected character after / in tag. Expected >", - "expected-dashes-or-doctype": - "Expected '--' or 'DOCTYPE'. Not found.", - "unexpected-bang-after-double-dash-in-comment": - "Unexpected ! after -- in comment", - "unexpected-space-after-double-dash-in-comment": - "Unexpected space after -- in comment", - "incorrect-comment": - "Incorrect comment.", - "eof-in-comment": - "Unexpected end of file in comment.", - "eof-in-comment-end-dash": - "Unexpected end of file in comment (-)", - "unexpected-dash-after-double-dash-in-comment": - "Unexpected '-' after '--' found in comment.", - "eof-in-comment-double-dash": - "Unexpected end of file in comment (--).", - "eof-in-comment-end-space-state": - "Unexpected end of file in comment.", - "eof-in-comment-end-bang-state": - "Unexpected end of file in comment.", - "unexpected-char-in-comment": - "Unexpected character in comment found.", - "need-space-after-doctype": - "No space after literal string 'DOCTYPE'.", - "expected-doctype-name-but-got-right-bracket": - "Unexpected > character. Expected DOCTYPE name.", - "expected-doctype-name-but-got-eof": - "Unexpected end of file. Expected DOCTYPE name.", - "eof-in-doctype-name": - "Unexpected end of file in DOCTYPE name.", - "eof-in-doctype": - "Unexpected end of file in DOCTYPE.", - "expected-space-or-right-bracket-in-doctype": - "Expected space or '>'. Got '%(data)s'", - "unexpected-end-of-doctype": - "Unexpected end of DOCTYPE.", - "unexpected-char-in-doctype": - "Unexpected character in DOCTYPE.", - "eof-in-innerhtml": - "XXX innerHTML EOF", - "unexpected-doctype": - "Unexpected DOCTYPE. Ignored.", - "non-html-root": - "html needs to be the first start tag.", - "expected-doctype-but-got-eof": - "Unexpected End of file. Expected DOCTYPE.", - "unknown-doctype": - "Erroneous DOCTYPE.", - "expected-doctype-but-got-chars": - "Unexpected non-space characters. Expected DOCTYPE.", - "expected-doctype-but-got-start-tag": - "Unexpected start tag (%(name)s). Expected DOCTYPE.", - "expected-doctype-but-got-end-tag": - "Unexpected end tag (%(name)s). Expected DOCTYPE.", - "end-tag-after-implied-root": - "Unexpected end tag (%(name)s) after the (implied) root element.", - "expected-named-closing-tag-but-got-eof": - "Unexpected end of file. Expected end tag (%(name)s).", - "two-heads-are-not-better-than-one": - "Unexpected start tag head in existing head. Ignored.", - "unexpected-end-tag": - "Unexpected end tag (%(name)s). Ignored.", - "unexpected-start-tag-out-of-my-head": - "Unexpected start tag (%(name)s) that can be in head. Moved.", - "unexpected-start-tag": - "Unexpected start tag (%(name)s).", - "missing-end-tag": - "Missing end tag (%(name)s).", - "missing-end-tags": - "Missing end tags (%(name)s).", - "unexpected-start-tag-implies-end-tag": - "Unexpected start tag (%(startName)s) " - "implies end tag (%(endName)s).", - "unexpected-start-tag-treated-as": - "Unexpected start tag (%(originalName)s). Treated as %(newName)s.", - "deprecated-tag": - "Unexpected start tag %(name)s. Don't use it!", - "unexpected-start-tag-ignored": - "Unexpected start tag %(name)s. Ignored.", - "expected-one-end-tag-but-got-another": - "Unexpected end tag (%(gotName)s). " - "Missing end tag (%(expectedName)s).", - "end-tag-too-early": - "End tag (%(name)s) seen too early. Expected other end tag.", - "end-tag-too-early-named": - "Unexpected end tag (%(gotName)s). Expected end tag (%(expectedName)s).", - "end-tag-too-early-ignored": - "End tag (%(name)s) seen too early. Ignored.", - "adoption-agency-1.1": - "End tag (%(name)s) violates step 1, " - "paragraph 1 of the adoption agency algorithm.", - "adoption-agency-1.2": - "End tag (%(name)s) violates step 1, " - "paragraph 2 of the adoption agency algorithm.", - "adoption-agency-1.3": - "End tag (%(name)s) violates step 1, " - "paragraph 3 of the adoption agency algorithm.", - "adoption-agency-4.4": - "End tag (%(name)s) violates step 4, " - "paragraph 4 of the adoption agency algorithm.", - "unexpected-end-tag-treated-as": - "Unexpected end tag (%(originalName)s). Treated as %(newName)s.", - "no-end-tag": - "This element (%(name)s) has no end tag.", - "unexpected-implied-end-tag-in-table": - "Unexpected implied end tag (%(name)s) in the table phase.", - "unexpected-implied-end-tag-in-table-body": - "Unexpected implied end tag (%(name)s) in the table body phase.", - "unexpected-char-implies-table-voodoo": - "Unexpected non-space characters in " - "table context caused voodoo mode.", - "unexpected-hidden-input-in-table": - "Unexpected input with type hidden in table context.", - "unexpected-form-in-table": - "Unexpected form in table context.", - "unexpected-start-tag-implies-table-voodoo": - "Unexpected start tag (%(name)s) in " - "table context caused voodoo mode.", - "unexpected-end-tag-implies-table-voodoo": - "Unexpected end tag (%(name)s) in " - "table context caused voodoo mode.", - "unexpected-cell-in-table-body": - "Unexpected table cell start tag (%(name)s) " - "in the table body phase.", - "unexpected-cell-end-tag": - "Got table cell end tag (%(name)s) " - "while required end tags are missing.", - "unexpected-end-tag-in-table-body": - "Unexpected end tag (%(name)s) in the table body phase. Ignored.", - "unexpected-implied-end-tag-in-table-row": - "Unexpected implied end tag (%(name)s) in the table row phase.", - "unexpected-end-tag-in-table-row": - "Unexpected end tag (%(name)s) in the table row phase. Ignored.", - "unexpected-select-in-select": - "Unexpected select start tag in the select phase " - "treated as select end tag.", - "unexpected-input-in-select": - "Unexpected input start tag in the select phase.", - "unexpected-start-tag-in-select": - "Unexpected start tag token (%(name)s in the select phase. " - "Ignored.", - "unexpected-end-tag-in-select": - "Unexpected end tag (%(name)s) in the select phase. Ignored.", - "unexpected-table-element-start-tag-in-select-in-table": - "Unexpected table element start tag (%(name)s) in the select in table phase.", - "unexpected-table-element-end-tag-in-select-in-table": - "Unexpected table element end tag (%(name)s) in the select in table phase.", - "unexpected-char-after-body": - "Unexpected non-space characters in the after body phase.", - "unexpected-start-tag-after-body": - "Unexpected start tag token (%(name)s)" - " in the after body phase.", - "unexpected-end-tag-after-body": - "Unexpected end tag token (%(name)s)" - " in the after body phase.", - "unexpected-char-in-frameset": - "Unexpected characters in the frameset phase. Characters ignored.", - "unexpected-start-tag-in-frameset": - "Unexpected start tag token (%(name)s)" - " in the frameset phase. Ignored.", - "unexpected-frameset-in-frameset-innerhtml": - "Unexpected end tag token (frameset) " - "in the frameset phase (innerHTML).", - "unexpected-end-tag-in-frameset": - "Unexpected end tag token (%(name)s)" - " in the frameset phase. Ignored.", - "unexpected-char-after-frameset": - "Unexpected non-space characters in the " - "after frameset phase. Ignored.", - "unexpected-start-tag-after-frameset": - "Unexpected start tag (%(name)s)" - " in the after frameset phase. Ignored.", - "unexpected-end-tag-after-frameset": - "Unexpected end tag (%(name)s)" - " in the after frameset phase. Ignored.", - "unexpected-end-tag-after-body-innerhtml": - "Unexpected end tag after body(innerHtml)", - "expected-eof-but-got-char": - "Unexpected non-space characters. Expected end of file.", - "expected-eof-but-got-start-tag": - "Unexpected start tag (%(name)s)" - ". Expected end of file.", - "expected-eof-but-got-end-tag": - "Unexpected end tag (%(name)s)" - ". Expected end of file.", - "eof-in-table": - "Unexpected end of file. Expected table content.", - "eof-in-select": - "Unexpected end of file. Expected select content.", - "eof-in-frameset": - "Unexpected end of file. Expected frameset content.", - "eof-in-script-in-script": - "Unexpected end of file. Expected script content.", - "eof-in-foreign-lands": - "Unexpected end of file. Expected foreign content", - "non-void-element-with-trailing-solidus": - "Trailing solidus not allowed on element %(name)s", - "unexpected-html-element-in-foreign-content": - "Element %(name)s not allowed in a non-html context", - "unexpected-end-tag-before-html": - "Unexpected end tag (%(name)s) before html.", - "unexpected-inhead-noscript-tag": - "Element %(name)s not allowed in a inhead-noscript context", - "eof-in-head-noscript": - "Unexpected end of file. Expected inhead-noscript content", - "char-in-head-noscript": - "Unexpected non-space character. Expected inhead-noscript content", - "XXX-undefined-error": - "Undefined error (this sucks and should be fixed)", -} - -namespaces = { - "html": "http://www.w3.org/1999/xhtml", - "mathml": "http://www.w3.org/1998/Math/MathML", - "svg": "http://www.w3.org/2000/svg", - "xlink": "http://www.w3.org/1999/xlink", - "xml": "http://www.w3.org/XML/1998/namespace", - "xmlns": "http://www.w3.org/2000/xmlns/" -} - -scopingElements = frozenset([ - (namespaces["html"], "applet"), - (namespaces["html"], "caption"), - (namespaces["html"], "html"), - (namespaces["html"], "marquee"), - (namespaces["html"], "object"), - (namespaces["html"], "table"), - (namespaces["html"], "td"), - (namespaces["html"], "th"), - (namespaces["mathml"], "mi"), - (namespaces["mathml"], "mo"), - (namespaces["mathml"], "mn"), - (namespaces["mathml"], "ms"), - (namespaces["mathml"], "mtext"), - (namespaces["mathml"], "annotation-xml"), - (namespaces["svg"], "foreignObject"), - (namespaces["svg"], "desc"), - (namespaces["svg"], "title"), -]) - -formattingElements = frozenset([ - (namespaces["html"], "a"), - (namespaces["html"], "b"), - (namespaces["html"], "big"), - (namespaces["html"], "code"), - (namespaces["html"], "em"), - (namespaces["html"], "font"), - (namespaces["html"], "i"), - (namespaces["html"], "nobr"), - (namespaces["html"], "s"), - (namespaces["html"], "small"), - (namespaces["html"], "strike"), - (namespaces["html"], "strong"), - (namespaces["html"], "tt"), - (namespaces["html"], "u") -]) - -specialElements = frozenset([ - (namespaces["html"], "address"), - (namespaces["html"], "applet"), - (namespaces["html"], "area"), - (namespaces["html"], "article"), - (namespaces["html"], "aside"), - (namespaces["html"], "base"), - (namespaces["html"], "basefont"), - (namespaces["html"], "bgsound"), - (namespaces["html"], "blockquote"), - (namespaces["html"], "body"), - (namespaces["html"], "br"), - (namespaces["html"], "button"), - (namespaces["html"], "caption"), - (namespaces["html"], "center"), - (namespaces["html"], "col"), - (namespaces["html"], "colgroup"), - (namespaces["html"], "command"), - (namespaces["html"], "dd"), - (namespaces["html"], "details"), - (namespaces["html"], "dir"), - (namespaces["html"], "div"), - (namespaces["html"], "dl"), - (namespaces["html"], "dt"), - (namespaces["html"], "embed"), - (namespaces["html"], "fieldset"), - (namespaces["html"], "figure"), - (namespaces["html"], "footer"), - (namespaces["html"], "form"), - (namespaces["html"], "frame"), - (namespaces["html"], "frameset"), - (namespaces["html"], "h1"), - (namespaces["html"], "h2"), - (namespaces["html"], "h3"), - (namespaces["html"], "h4"), - (namespaces["html"], "h5"), - (namespaces["html"], "h6"), - (namespaces["html"], "head"), - (namespaces["html"], "header"), - (namespaces["html"], "hr"), - (namespaces["html"], "html"), - (namespaces["html"], "iframe"), - # Note that image is commented out in the spec as "this isn't an - # element that can end up on the stack, so it doesn't matter," - (namespaces["html"], "image"), - (namespaces["html"], "img"), - (namespaces["html"], "input"), - (namespaces["html"], "isindex"), - (namespaces["html"], "li"), - (namespaces["html"], "link"), - (namespaces["html"], "listing"), - (namespaces["html"], "marquee"), - (namespaces["html"], "menu"), - (namespaces["html"], "meta"), - (namespaces["html"], "nav"), - (namespaces["html"], "noembed"), - (namespaces["html"], "noframes"), - (namespaces["html"], "noscript"), - (namespaces["html"], "object"), - (namespaces["html"], "ol"), - (namespaces["html"], "p"), - (namespaces["html"], "param"), - (namespaces["html"], "plaintext"), - (namespaces["html"], "pre"), - (namespaces["html"], "script"), - (namespaces["html"], "section"), - (namespaces["html"], "select"), - (namespaces["html"], "style"), - (namespaces["html"], "table"), - (namespaces["html"], "tbody"), - (namespaces["html"], "td"), - (namespaces["html"], "textarea"), - (namespaces["html"], "tfoot"), - (namespaces["html"], "th"), - (namespaces["html"], "thead"), - (namespaces["html"], "title"), - (namespaces["html"], "tr"), - (namespaces["html"], "ul"), - (namespaces["html"], "wbr"), - (namespaces["html"], "xmp"), - (namespaces["svg"], "foreignObject") -]) - -htmlIntegrationPointElements = frozenset([ - (namespaces["mathml"], "annotation-xml"), - (namespaces["svg"], "foreignObject"), - (namespaces["svg"], "desc"), - (namespaces["svg"], "title") -]) - -mathmlTextIntegrationPointElements = frozenset([ - (namespaces["mathml"], "mi"), - (namespaces["mathml"], "mo"), - (namespaces["mathml"], "mn"), - (namespaces["mathml"], "ms"), - (namespaces["mathml"], "mtext") -]) - -adjustSVGAttributes = { - "attributename": "attributeName", - "attributetype": "attributeType", - "basefrequency": "baseFrequency", - "baseprofile": "baseProfile", - "calcmode": "calcMode", - "clippathunits": "clipPathUnits", - "contentscripttype": "contentScriptType", - "contentstyletype": "contentStyleType", - "diffuseconstant": "diffuseConstant", - "edgemode": "edgeMode", - "externalresourcesrequired": "externalResourcesRequired", - "filterres": "filterRes", - "filterunits": "filterUnits", - "glyphref": "glyphRef", - "gradienttransform": "gradientTransform", - "gradientunits": "gradientUnits", - "kernelmatrix": "kernelMatrix", - "kernelunitlength": "kernelUnitLength", - "keypoints": "keyPoints", - "keysplines": "keySplines", - "keytimes": "keyTimes", - "lengthadjust": "lengthAdjust", - "limitingconeangle": "limitingConeAngle", - "markerheight": "markerHeight", - "markerunits": "markerUnits", - "markerwidth": "markerWidth", - "maskcontentunits": "maskContentUnits", - "maskunits": "maskUnits", - "numoctaves": "numOctaves", - "pathlength": "pathLength", - "patterncontentunits": "patternContentUnits", - "patterntransform": "patternTransform", - "patternunits": "patternUnits", - "pointsatx": "pointsAtX", - "pointsaty": "pointsAtY", - "pointsatz": "pointsAtZ", - "preservealpha": "preserveAlpha", - "preserveaspectratio": "preserveAspectRatio", - "primitiveunits": "primitiveUnits", - "refx": "refX", - "refy": "refY", - "repeatcount": "repeatCount", - "repeatdur": "repeatDur", - "requiredextensions": "requiredExtensions", - "requiredfeatures": "requiredFeatures", - "specularconstant": "specularConstant", - "specularexponent": "specularExponent", - "spreadmethod": "spreadMethod", - "startoffset": "startOffset", - "stddeviation": "stdDeviation", - "stitchtiles": "stitchTiles", - "surfacescale": "surfaceScale", - "systemlanguage": "systemLanguage", - "tablevalues": "tableValues", - "targetx": "targetX", - "targety": "targetY", - "textlength": "textLength", - "viewbox": "viewBox", - "viewtarget": "viewTarget", - "xchannelselector": "xChannelSelector", - "ychannelselector": "yChannelSelector", - "zoomandpan": "zoomAndPan" -} - -adjustMathMLAttributes = {"definitionurl": "definitionURL"} - -adjustForeignAttributes = { - "xlink:actuate": ("xlink", "actuate", namespaces["xlink"]), - "xlink:arcrole": ("xlink", "arcrole", namespaces["xlink"]), - "xlink:href": ("xlink", "href", namespaces["xlink"]), - "xlink:role": ("xlink", "role", namespaces["xlink"]), - "xlink:show": ("xlink", "show", namespaces["xlink"]), - "xlink:title": ("xlink", "title", namespaces["xlink"]), - "xlink:type": ("xlink", "type", namespaces["xlink"]), - "xml:base": ("xml", "base", namespaces["xml"]), - "xml:lang": ("xml", "lang", namespaces["xml"]), - "xml:space": ("xml", "space", namespaces["xml"]), - "xmlns": (None, "xmlns", namespaces["xmlns"]), - "xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"]) -} - -unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in - adjustForeignAttributes.items()} - -spaceCharacters = frozenset([ - "\t", - "\n", - "\u000C", - " ", - "\r" -]) - -tableInsertModeElements = frozenset([ - "table", - "tbody", - "tfoot", - "thead", - "tr" -]) - -asciiLowercase = frozenset(string.ascii_lowercase) -asciiUppercase = frozenset(string.ascii_uppercase) -asciiLetters = frozenset(string.ascii_letters) -digits = frozenset(string.digits) -hexDigits = frozenset(string.hexdigits) - -asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase} - -# Heading elements need to be ordered -headingElements = ( - "h1", - "h2", - "h3", - "h4", - "h5", - "h6" -) - -voidElements = frozenset([ - "base", - "command", - "event-source", - "link", - "meta", - "hr", - "br", - "img", - "embed", - "param", - "area", - "col", - "input", - "source", - "track" -]) - -cdataElements = frozenset(['title', 'textarea']) - -rcdataElements = frozenset([ - 'style', - 'script', - 'xmp', - 'iframe', - 'noembed', - 'noframes', - 'noscript' -]) - -booleanAttributes = { - "": frozenset(["irrelevant", "itemscope"]), - "style": frozenset(["scoped"]), - "img": frozenset(["ismap"]), - "audio": frozenset(["autoplay", "controls"]), - "video": frozenset(["autoplay", "controls"]), - "script": frozenset(["defer", "async"]), - "details": frozenset(["open"]), - "datagrid": frozenset(["multiple", "disabled"]), - "command": frozenset(["hidden", "disabled", "checked", "default"]), - "hr": frozenset(["noshade"]), - "menu": frozenset(["autosubmit"]), - "fieldset": frozenset(["disabled", "readonly"]), - "option": frozenset(["disabled", "readonly", "selected"]), - "optgroup": frozenset(["disabled", "readonly"]), - "button": frozenset(["disabled", "autofocus"]), - "input": frozenset(["disabled", "readonly", "required", "autofocus", "checked", "ismap"]), - "select": frozenset(["disabled", "readonly", "autofocus", "multiple"]), - "output": frozenset(["disabled", "readonly"]), - "iframe": frozenset(["seamless"]), -} - -# entitiesWindows1252 has to be _ordered_ and needs to have an index. It -# therefore can't be a frozenset. -entitiesWindows1252 = ( - 8364, # 0x80 0x20AC EURO SIGN - 65533, # 0x81 UNDEFINED - 8218, # 0x82 0x201A SINGLE LOW-9 QUOTATION MARK - 402, # 0x83 0x0192 LATIN SMALL LETTER F WITH HOOK - 8222, # 0x84 0x201E DOUBLE LOW-9 QUOTATION MARK - 8230, # 0x85 0x2026 HORIZONTAL ELLIPSIS - 8224, # 0x86 0x2020 DAGGER - 8225, # 0x87 0x2021 DOUBLE DAGGER - 710, # 0x88 0x02C6 MODIFIER LETTER CIRCUMFLEX ACCENT - 8240, # 0x89 0x2030 PER MILLE SIGN - 352, # 0x8A 0x0160 LATIN CAPITAL LETTER S WITH CARON - 8249, # 0x8B 0x2039 SINGLE LEFT-POINTING ANGLE QUOTATION MARK - 338, # 0x8C 0x0152 LATIN CAPITAL LIGATURE OE - 65533, # 0x8D UNDEFINED - 381, # 0x8E 0x017D LATIN CAPITAL LETTER Z WITH CARON - 65533, # 0x8F UNDEFINED - 65533, # 0x90 UNDEFINED - 8216, # 0x91 0x2018 LEFT SINGLE QUOTATION MARK - 8217, # 0x92 0x2019 RIGHT SINGLE QUOTATION MARK - 8220, # 0x93 0x201C LEFT DOUBLE QUOTATION MARK - 8221, # 0x94 0x201D RIGHT DOUBLE QUOTATION MARK - 8226, # 0x95 0x2022 BULLET - 8211, # 0x96 0x2013 EN DASH - 8212, # 0x97 0x2014 EM DASH - 732, # 0x98 0x02DC SMALL TILDE - 8482, # 0x99 0x2122 TRADE MARK SIGN - 353, # 0x9A 0x0161 LATIN SMALL LETTER S WITH CARON - 8250, # 0x9B 0x203A SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 339, # 0x9C 0x0153 LATIN SMALL LIGATURE OE - 65533, # 0x9D UNDEFINED - 382, # 0x9E 0x017E LATIN SMALL LETTER Z WITH CARON - 376 # 0x9F 0x0178 LATIN CAPITAL LETTER Y WITH DIAERESIS -) - -xmlEntities = frozenset(['lt;', 'gt;', 'amp;', 'apos;', 'quot;']) - -entities = { - "AElig": "\xc6", - "AElig;": "\xc6", - "AMP": "&", - "AMP;": "&", - "Aacute": "\xc1", - "Aacute;": "\xc1", - "Abreve;": "\u0102", - "Acirc": "\xc2", - "Acirc;": "\xc2", - "Acy;": "\u0410", - "Afr;": "\U0001d504", - "Agrave": "\xc0", - "Agrave;": "\xc0", - "Alpha;": "\u0391", - "Amacr;": "\u0100", - "And;": "\u2a53", - "Aogon;": "\u0104", - "Aopf;": "\U0001d538", - "ApplyFunction;": "\u2061", - "Aring": "\xc5", - "Aring;": "\xc5", - "Ascr;": "\U0001d49c", - "Assign;": "\u2254", - "Atilde": "\xc3", - "Atilde;": "\xc3", - "Auml": "\xc4", - "Auml;": "\xc4", - "Backslash;": "\u2216", - "Barv;": "\u2ae7", - "Barwed;": "\u2306", - "Bcy;": "\u0411", - "Because;": "\u2235", - "Bernoullis;": "\u212c", - "Beta;": "\u0392", - "Bfr;": "\U0001d505", - "Bopf;": "\U0001d539", - "Breve;": "\u02d8", - "Bscr;": "\u212c", - "Bumpeq;": "\u224e", - "CHcy;": "\u0427", - "COPY": "\xa9", - "COPY;": "\xa9", - "Cacute;": "\u0106", - "Cap;": "\u22d2", - "CapitalDifferentialD;": "\u2145", - "Cayleys;": "\u212d", - "Ccaron;": "\u010c", - "Ccedil": "\xc7", - "Ccedil;": "\xc7", - "Ccirc;": "\u0108", - "Cconint;": "\u2230", - "Cdot;": "\u010a", - "Cedilla;": "\xb8", - "CenterDot;": "\xb7", - "Cfr;": "\u212d", - "Chi;": "\u03a7", - "CircleDot;": "\u2299", - "CircleMinus;": "\u2296", - "CirclePlus;": "\u2295", - "CircleTimes;": "\u2297", - "ClockwiseContourIntegral;": "\u2232", - "CloseCurlyDoubleQuote;": "\u201d", - "CloseCurlyQuote;": "\u2019", - "Colon;": "\u2237", - "Colone;": "\u2a74", - "Congruent;": "\u2261", - "Conint;": "\u222f", - "ContourIntegral;": "\u222e", - "Copf;": "\u2102", - "Coproduct;": "\u2210", - "CounterClockwiseContourIntegral;": "\u2233", - "Cross;": "\u2a2f", - "Cscr;": "\U0001d49e", - "Cup;": "\u22d3", - "CupCap;": "\u224d", - "DD;": "\u2145", - "DDotrahd;": "\u2911", - "DJcy;": "\u0402", - "DScy;": "\u0405", - "DZcy;": "\u040f", - "Dagger;": "\u2021", - "Darr;": "\u21a1", - "Dashv;": "\u2ae4", - "Dcaron;": "\u010e", - "Dcy;": "\u0414", - "Del;": "\u2207", - "Delta;": "\u0394", - "Dfr;": "\U0001d507", - "DiacriticalAcute;": "\xb4", - "DiacriticalDot;": "\u02d9", - "DiacriticalDoubleAcute;": "\u02dd", - "DiacriticalGrave;": "`", - "DiacriticalTilde;": "\u02dc", - "Diamond;": "\u22c4", - "DifferentialD;": "\u2146", - "Dopf;": "\U0001d53b", - "Dot;": "\xa8", - "DotDot;": "\u20dc", - "DotEqual;": "\u2250", - "DoubleContourIntegral;": "\u222f", - "DoubleDot;": "\xa8", - "DoubleDownArrow;": "\u21d3", - "DoubleLeftArrow;": "\u21d0", - "DoubleLeftRightArrow;": "\u21d4", - "DoubleLeftTee;": "\u2ae4", - "DoubleLongLeftArrow;": "\u27f8", - "DoubleLongLeftRightArrow;": "\u27fa", - "DoubleLongRightArrow;": "\u27f9", - "DoubleRightArrow;": "\u21d2", - "DoubleRightTee;": "\u22a8", - "DoubleUpArrow;": "\u21d1", - "DoubleUpDownArrow;": "\u21d5", - "DoubleVerticalBar;": "\u2225", - "DownArrow;": "\u2193", - "DownArrowBar;": "\u2913", - "DownArrowUpArrow;": "\u21f5", - "DownBreve;": "\u0311", - "DownLeftRightVector;": "\u2950", - "DownLeftTeeVector;": "\u295e", - "DownLeftVector;": "\u21bd", - "DownLeftVectorBar;": "\u2956", - "DownRightTeeVector;": "\u295f", - "DownRightVector;": "\u21c1", - "DownRightVectorBar;": "\u2957", - "DownTee;": "\u22a4", - "DownTeeArrow;": "\u21a7", - "Downarrow;": "\u21d3", - "Dscr;": "\U0001d49f", - "Dstrok;": "\u0110", - "ENG;": "\u014a", - "ETH": "\xd0", - "ETH;": "\xd0", - "Eacute": "\xc9", - "Eacute;": "\xc9", - "Ecaron;": "\u011a", - "Ecirc": "\xca", - "Ecirc;": "\xca", - "Ecy;": "\u042d", - "Edot;": "\u0116", - "Efr;": "\U0001d508", - "Egrave": "\xc8", - "Egrave;": "\xc8", - "Element;": "\u2208", - "Emacr;": "\u0112", - "EmptySmallSquare;": "\u25fb", - "EmptyVerySmallSquare;": "\u25ab", - "Eogon;": "\u0118", - "Eopf;": "\U0001d53c", - "Epsilon;": "\u0395", - "Equal;": "\u2a75", - "EqualTilde;": "\u2242", - "Equilibrium;": "\u21cc", - "Escr;": "\u2130", - "Esim;": "\u2a73", - "Eta;": "\u0397", - "Euml": "\xcb", - "Euml;": "\xcb", - "Exists;": "\u2203", - "ExponentialE;": "\u2147", - "Fcy;": "\u0424", - "Ffr;": "\U0001d509", - "FilledSmallSquare;": "\u25fc", - "FilledVerySmallSquare;": "\u25aa", - "Fopf;": "\U0001d53d", - "ForAll;": "\u2200", - "Fouriertrf;": "\u2131", - "Fscr;": "\u2131", - "GJcy;": "\u0403", - "GT": ">", - "GT;": ">", - "Gamma;": "\u0393", - "Gammad;": "\u03dc", - "Gbreve;": "\u011e", - "Gcedil;": "\u0122", - "Gcirc;": "\u011c", - "Gcy;": "\u0413", - "Gdot;": "\u0120", - "Gfr;": "\U0001d50a", - "Gg;": "\u22d9", - "Gopf;": "\U0001d53e", - "GreaterEqual;": "\u2265", - "GreaterEqualLess;": "\u22db", - "GreaterFullEqual;": "\u2267", - "GreaterGreater;": "\u2aa2", - "GreaterLess;": "\u2277", - "GreaterSlantEqual;": "\u2a7e", - "GreaterTilde;": "\u2273", - "Gscr;": "\U0001d4a2", - "Gt;": "\u226b", - "HARDcy;": "\u042a", - "Hacek;": "\u02c7", - "Hat;": "^", - "Hcirc;": "\u0124", - "Hfr;": "\u210c", - "HilbertSpace;": "\u210b", - "Hopf;": "\u210d", - "HorizontalLine;": "\u2500", - "Hscr;": "\u210b", - "Hstrok;": "\u0126", - "HumpDownHump;": "\u224e", - "HumpEqual;": "\u224f", - "IEcy;": "\u0415", - "IJlig;": "\u0132", - "IOcy;": "\u0401", - "Iacute": "\xcd", - "Iacute;": "\xcd", - "Icirc": "\xce", - "Icirc;": "\xce", - "Icy;": "\u0418", - "Idot;": "\u0130", - "Ifr;": "\u2111", - "Igrave": "\xcc", - "Igrave;": "\xcc", - "Im;": "\u2111", - "Imacr;": "\u012a", - "ImaginaryI;": "\u2148", - "Implies;": "\u21d2", - "Int;": "\u222c", - "Integral;": "\u222b", - "Intersection;": "\u22c2", - "InvisibleComma;": "\u2063", - "InvisibleTimes;": "\u2062", - "Iogon;": "\u012e", - "Iopf;": "\U0001d540", - "Iota;": "\u0399", - "Iscr;": "\u2110", - "Itilde;": "\u0128", - "Iukcy;": "\u0406", - "Iuml": "\xcf", - "Iuml;": "\xcf", - "Jcirc;": "\u0134", - "Jcy;": "\u0419", - "Jfr;": "\U0001d50d", - "Jopf;": "\U0001d541", - "Jscr;": "\U0001d4a5", - "Jsercy;": "\u0408", - "Jukcy;": "\u0404", - "KHcy;": "\u0425", - "KJcy;": "\u040c", - "Kappa;": "\u039a", - "Kcedil;": "\u0136", - "Kcy;": "\u041a", - "Kfr;": "\U0001d50e", - "Kopf;": "\U0001d542", - "Kscr;": "\U0001d4a6", - "LJcy;": "\u0409", - "LT": "<", - "LT;": "<", - "Lacute;": "\u0139", - "Lambda;": "\u039b", - "Lang;": "\u27ea", - "Laplacetrf;": "\u2112", - "Larr;": "\u219e", - "Lcaron;": "\u013d", - "Lcedil;": "\u013b", - "Lcy;": "\u041b", - "LeftAngleBracket;": "\u27e8", - "LeftArrow;": "\u2190", - "LeftArrowBar;": "\u21e4", - "LeftArrowRightArrow;": "\u21c6", - "LeftCeiling;": "\u2308", - "LeftDoubleBracket;": "\u27e6", - "LeftDownTeeVector;": "\u2961", - "LeftDownVector;": "\u21c3", - "LeftDownVectorBar;": "\u2959", - "LeftFloor;": "\u230a", - "LeftRightArrow;": "\u2194", - "LeftRightVector;": "\u294e", - "LeftTee;": "\u22a3", - "LeftTeeArrow;": "\u21a4", - "LeftTeeVector;": "\u295a", - "LeftTriangle;": "\u22b2", - "LeftTriangleBar;": "\u29cf", - "LeftTriangleEqual;": "\u22b4", - "LeftUpDownVector;": "\u2951", - "LeftUpTeeVector;": "\u2960", - "LeftUpVector;": "\u21bf", - "LeftUpVectorBar;": "\u2958", - "LeftVector;": "\u21bc", - "LeftVectorBar;": "\u2952", - "Leftarrow;": "\u21d0", - "Leftrightarrow;": "\u21d4", - "LessEqualGreater;": "\u22da", - "LessFullEqual;": "\u2266", - "LessGreater;": "\u2276", - "LessLess;": "\u2aa1", - "LessSlantEqual;": "\u2a7d", - "LessTilde;": "\u2272", - "Lfr;": "\U0001d50f", - "Ll;": "\u22d8", - "Lleftarrow;": "\u21da", - "Lmidot;": "\u013f", - "LongLeftArrow;": "\u27f5", - "LongLeftRightArrow;": "\u27f7", - "LongRightArrow;": "\u27f6", - "Longleftarrow;": "\u27f8", - "Longleftrightarrow;": "\u27fa", - "Longrightarrow;": "\u27f9", - "Lopf;": "\U0001d543", - "LowerLeftArrow;": "\u2199", - "LowerRightArrow;": "\u2198", - "Lscr;": "\u2112", - "Lsh;": "\u21b0", - "Lstrok;": "\u0141", - "Lt;": "\u226a", - "Map;": "\u2905", - "Mcy;": "\u041c", - "MediumSpace;": "\u205f", - "Mellintrf;": "\u2133", - "Mfr;": "\U0001d510", - "MinusPlus;": "\u2213", - "Mopf;": "\U0001d544", - "Mscr;": "\u2133", - "Mu;": "\u039c", - "NJcy;": "\u040a", - "Nacute;": "\u0143", - "Ncaron;": "\u0147", - "Ncedil;": "\u0145", - "Ncy;": "\u041d", - "NegativeMediumSpace;": "\u200b", - "NegativeThickSpace;": "\u200b", - "NegativeThinSpace;": "\u200b", - "NegativeVeryThinSpace;": "\u200b", - "NestedGreaterGreater;": "\u226b", - "NestedLessLess;": "\u226a", - "NewLine;": "\n", - "Nfr;": "\U0001d511", - "NoBreak;": "\u2060", - "NonBreakingSpace;": "\xa0", - "Nopf;": "\u2115", - "Not;": "\u2aec", - "NotCongruent;": "\u2262", - "NotCupCap;": "\u226d", - "NotDoubleVerticalBar;": "\u2226", - "NotElement;": "\u2209", - "NotEqual;": "\u2260", - "NotEqualTilde;": "\u2242\u0338", - "NotExists;": "\u2204", - "NotGreater;": "\u226f", - "NotGreaterEqual;": "\u2271", - "NotGreaterFullEqual;": "\u2267\u0338", - "NotGreaterGreater;": "\u226b\u0338", - "NotGreaterLess;": "\u2279", - "NotGreaterSlantEqual;": "\u2a7e\u0338", - "NotGreaterTilde;": "\u2275", - "NotHumpDownHump;": "\u224e\u0338", - "NotHumpEqual;": "\u224f\u0338", - "NotLeftTriangle;": "\u22ea", - "NotLeftTriangleBar;": "\u29cf\u0338", - "NotLeftTriangleEqual;": "\u22ec", - "NotLess;": "\u226e", - "NotLessEqual;": "\u2270", - "NotLessGreater;": "\u2278", - "NotLessLess;": "\u226a\u0338", - "NotLessSlantEqual;": "\u2a7d\u0338", - "NotLessTilde;": "\u2274", - "NotNestedGreaterGreater;": "\u2aa2\u0338", - "NotNestedLessLess;": "\u2aa1\u0338", - "NotPrecedes;": "\u2280", - "NotPrecedesEqual;": "\u2aaf\u0338", - "NotPrecedesSlantEqual;": "\u22e0", - "NotReverseElement;": "\u220c", - "NotRightTriangle;": "\u22eb", - "NotRightTriangleBar;": "\u29d0\u0338", - "NotRightTriangleEqual;": "\u22ed", - "NotSquareSubset;": "\u228f\u0338", - "NotSquareSubsetEqual;": "\u22e2", - "NotSquareSuperset;": "\u2290\u0338", - "NotSquareSupersetEqual;": "\u22e3", - "NotSubset;": "\u2282\u20d2", - "NotSubsetEqual;": "\u2288", - "NotSucceeds;": "\u2281", - "NotSucceedsEqual;": "\u2ab0\u0338", - "NotSucceedsSlantEqual;": "\u22e1", - "NotSucceedsTilde;": "\u227f\u0338", - "NotSuperset;": "\u2283\u20d2", - "NotSupersetEqual;": "\u2289", - "NotTilde;": "\u2241", - "NotTildeEqual;": "\u2244", - "NotTildeFullEqual;": "\u2247", - "NotTildeTilde;": "\u2249", - "NotVerticalBar;": "\u2224", - "Nscr;": "\U0001d4a9", - "Ntilde": "\xd1", - "Ntilde;": "\xd1", - "Nu;": "\u039d", - "OElig;": "\u0152", - "Oacute": "\xd3", - "Oacute;": "\xd3", - "Ocirc": "\xd4", - "Ocirc;": "\xd4", - "Ocy;": "\u041e", - "Odblac;": "\u0150", - "Ofr;": "\U0001d512", - "Ograve": "\xd2", - "Ograve;": "\xd2", - "Omacr;": "\u014c", - "Omega;": "\u03a9", - "Omicron;": "\u039f", - "Oopf;": "\U0001d546", - "OpenCurlyDoubleQuote;": "\u201c", - "OpenCurlyQuote;": "\u2018", - "Or;": "\u2a54", - "Oscr;": "\U0001d4aa", - "Oslash": "\xd8", - "Oslash;": "\xd8", - "Otilde": "\xd5", - "Otilde;": "\xd5", - "Otimes;": "\u2a37", - "Ouml": "\xd6", - "Ouml;": "\xd6", - "OverBar;": "\u203e", - "OverBrace;": "\u23de", - "OverBracket;": "\u23b4", - "OverParenthesis;": "\u23dc", - "PartialD;": "\u2202", - "Pcy;": "\u041f", - "Pfr;": "\U0001d513", - "Phi;": "\u03a6", - "Pi;": "\u03a0", - "PlusMinus;": "\xb1", - "Poincareplane;": "\u210c", - "Popf;": "\u2119", - "Pr;": "\u2abb", - "Precedes;": "\u227a", - "PrecedesEqual;": "\u2aaf", - "PrecedesSlantEqual;": "\u227c", - "PrecedesTilde;": "\u227e", - "Prime;": "\u2033", - "Product;": "\u220f", - "Proportion;": "\u2237", - "Proportional;": "\u221d", - "Pscr;": "\U0001d4ab", - "Psi;": "\u03a8", - "QUOT": "\"", - "QUOT;": "\"", - "Qfr;": "\U0001d514", - "Qopf;": "\u211a", - "Qscr;": "\U0001d4ac", - "RBarr;": "\u2910", - "REG": "\xae", - "REG;": "\xae", - "Racute;": "\u0154", - "Rang;": "\u27eb", - "Rarr;": "\u21a0", - "Rarrtl;": "\u2916", - "Rcaron;": "\u0158", - "Rcedil;": "\u0156", - "Rcy;": "\u0420", - "Re;": "\u211c", - "ReverseElement;": "\u220b", - "ReverseEquilibrium;": "\u21cb", - "ReverseUpEquilibrium;": "\u296f", - "Rfr;": "\u211c", - "Rho;": "\u03a1", - "RightAngleBracket;": "\u27e9", - "RightArrow;": "\u2192", - "RightArrowBar;": "\u21e5", - "RightArrowLeftArrow;": "\u21c4", - "RightCeiling;": "\u2309", - "RightDoubleBracket;": "\u27e7", - "RightDownTeeVector;": "\u295d", - "RightDownVector;": "\u21c2", - "RightDownVectorBar;": "\u2955", - "RightFloor;": "\u230b", - "RightTee;": "\u22a2", - "RightTeeArrow;": "\u21a6", - "RightTeeVector;": "\u295b", - "RightTriangle;": "\u22b3", - "RightTriangleBar;": "\u29d0", - "RightTriangleEqual;": "\u22b5", - "RightUpDownVector;": "\u294f", - "RightUpTeeVector;": "\u295c", - "RightUpVector;": "\u21be", - "RightUpVectorBar;": "\u2954", - "RightVector;": "\u21c0", - "RightVectorBar;": "\u2953", - "Rightarrow;": "\u21d2", - "Ropf;": "\u211d", - "RoundImplies;": "\u2970", - "Rrightarrow;": "\u21db", - "Rscr;": "\u211b", - "Rsh;": "\u21b1", - "RuleDelayed;": "\u29f4", - "SHCHcy;": "\u0429", - "SHcy;": "\u0428", - "SOFTcy;": "\u042c", - "Sacute;": "\u015a", - "Sc;": "\u2abc", - "Scaron;": "\u0160", - "Scedil;": "\u015e", - "Scirc;": "\u015c", - "Scy;": "\u0421", - "Sfr;": "\U0001d516", - "ShortDownArrow;": "\u2193", - "ShortLeftArrow;": "\u2190", - "ShortRightArrow;": "\u2192", - "ShortUpArrow;": "\u2191", - "Sigma;": "\u03a3", - "SmallCircle;": "\u2218", - "Sopf;": "\U0001d54a", - "Sqrt;": "\u221a", - "Square;": "\u25a1", - "SquareIntersection;": "\u2293", - "SquareSubset;": "\u228f", - "SquareSubsetEqual;": "\u2291", - "SquareSuperset;": "\u2290", - "SquareSupersetEqual;": "\u2292", - "SquareUnion;": "\u2294", - "Sscr;": "\U0001d4ae", - "Star;": "\u22c6", - "Sub;": "\u22d0", - "Subset;": "\u22d0", - "SubsetEqual;": "\u2286", - "Succeeds;": "\u227b", - "SucceedsEqual;": "\u2ab0", - "SucceedsSlantEqual;": "\u227d", - "SucceedsTilde;": "\u227f", - "SuchThat;": "\u220b", - "Sum;": "\u2211", - "Sup;": "\u22d1", - "Superset;": "\u2283", - "SupersetEqual;": "\u2287", - "Supset;": "\u22d1", - "THORN": "\xde", - "THORN;": "\xde", - "TRADE;": "\u2122", - "TSHcy;": "\u040b", - "TScy;": "\u0426", - "Tab;": "\t", - "Tau;": "\u03a4", - "Tcaron;": "\u0164", - "Tcedil;": "\u0162", - "Tcy;": "\u0422", - "Tfr;": "\U0001d517", - "Therefore;": "\u2234", - "Theta;": "\u0398", - "ThickSpace;": "\u205f\u200a", - "ThinSpace;": "\u2009", - "Tilde;": "\u223c", - "TildeEqual;": "\u2243", - "TildeFullEqual;": "\u2245", - "TildeTilde;": "\u2248", - "Topf;": "\U0001d54b", - "TripleDot;": "\u20db", - "Tscr;": "\U0001d4af", - "Tstrok;": "\u0166", - "Uacute": "\xda", - "Uacute;": "\xda", - "Uarr;": "\u219f", - "Uarrocir;": "\u2949", - "Ubrcy;": "\u040e", - "Ubreve;": "\u016c", - "Ucirc": "\xdb", - "Ucirc;": "\xdb", - "Ucy;": "\u0423", - "Udblac;": "\u0170", - "Ufr;": "\U0001d518", - "Ugrave": "\xd9", - "Ugrave;": "\xd9", - "Umacr;": "\u016a", - "UnderBar;": "_", - "UnderBrace;": "\u23df", - "UnderBracket;": "\u23b5", - "UnderParenthesis;": "\u23dd", - "Union;": "\u22c3", - "UnionPlus;": "\u228e", - "Uogon;": "\u0172", - "Uopf;": "\U0001d54c", - "UpArrow;": "\u2191", - "UpArrowBar;": "\u2912", - "UpArrowDownArrow;": "\u21c5", - "UpDownArrow;": "\u2195", - "UpEquilibrium;": "\u296e", - "UpTee;": "\u22a5", - "UpTeeArrow;": "\u21a5", - "Uparrow;": "\u21d1", - "Updownarrow;": "\u21d5", - "UpperLeftArrow;": "\u2196", - "UpperRightArrow;": "\u2197", - "Upsi;": "\u03d2", - "Upsilon;": "\u03a5", - "Uring;": "\u016e", - "Uscr;": "\U0001d4b0", - "Utilde;": "\u0168", - "Uuml": "\xdc", - "Uuml;": "\xdc", - "VDash;": "\u22ab", - "Vbar;": "\u2aeb", - "Vcy;": "\u0412", - "Vdash;": "\u22a9", - "Vdashl;": "\u2ae6", - "Vee;": "\u22c1", - "Verbar;": "\u2016", - "Vert;": "\u2016", - "VerticalBar;": "\u2223", - "VerticalLine;": "|", - "VerticalSeparator;": "\u2758", - "VerticalTilde;": "\u2240", - "VeryThinSpace;": "\u200a", - "Vfr;": "\U0001d519", - "Vopf;": "\U0001d54d", - "Vscr;": "\U0001d4b1", - "Vvdash;": "\u22aa", - "Wcirc;": "\u0174", - "Wedge;": "\u22c0", - "Wfr;": "\U0001d51a", - "Wopf;": "\U0001d54e", - "Wscr;": "\U0001d4b2", - "Xfr;": "\U0001d51b", - "Xi;": "\u039e", - "Xopf;": "\U0001d54f", - "Xscr;": "\U0001d4b3", - "YAcy;": "\u042f", - "YIcy;": "\u0407", - "YUcy;": "\u042e", - "Yacute": "\xdd", - "Yacute;": "\xdd", - "Ycirc;": "\u0176", - "Ycy;": "\u042b", - "Yfr;": "\U0001d51c", - "Yopf;": "\U0001d550", - "Yscr;": "\U0001d4b4", - "Yuml;": "\u0178", - "ZHcy;": "\u0416", - "Zacute;": "\u0179", - "Zcaron;": "\u017d", - "Zcy;": "\u0417", - "Zdot;": "\u017b", - "ZeroWidthSpace;": "\u200b", - "Zeta;": "\u0396", - "Zfr;": "\u2128", - "Zopf;": "\u2124", - "Zscr;": "\U0001d4b5", - "aacute": "\xe1", - "aacute;": "\xe1", - "abreve;": "\u0103", - "ac;": "\u223e", - "acE;": "\u223e\u0333", - "acd;": "\u223f", - "acirc": "\xe2", - "acirc;": "\xe2", - "acute": "\xb4", - "acute;": "\xb4", - "acy;": "\u0430", - "aelig": "\xe6", - "aelig;": "\xe6", - "af;": "\u2061", - "afr;": "\U0001d51e", - "agrave": "\xe0", - "agrave;": "\xe0", - "alefsym;": "\u2135", - "aleph;": "\u2135", - "alpha;": "\u03b1", - "amacr;": "\u0101", - "amalg;": "\u2a3f", - "amp": "&", - "amp;": "&", - "and;": "\u2227", - "andand;": "\u2a55", - "andd;": "\u2a5c", - "andslope;": "\u2a58", - "andv;": "\u2a5a", - "ang;": "\u2220", - "ange;": "\u29a4", - "angle;": "\u2220", - "angmsd;": "\u2221", - "angmsdaa;": "\u29a8", - "angmsdab;": "\u29a9", - "angmsdac;": "\u29aa", - "angmsdad;": "\u29ab", - "angmsdae;": "\u29ac", - "angmsdaf;": "\u29ad", - "angmsdag;": "\u29ae", - "angmsdah;": "\u29af", - "angrt;": "\u221f", - "angrtvb;": "\u22be", - "angrtvbd;": "\u299d", - "angsph;": "\u2222", - "angst;": "\xc5", - "angzarr;": "\u237c", - "aogon;": "\u0105", - "aopf;": "\U0001d552", - "ap;": "\u2248", - "apE;": "\u2a70", - "apacir;": "\u2a6f", - "ape;": "\u224a", - "apid;": "\u224b", - "apos;": "'", - "approx;": "\u2248", - "approxeq;": "\u224a", - "aring": "\xe5", - "aring;": "\xe5", - "ascr;": "\U0001d4b6", - "ast;": "*", - "asymp;": "\u2248", - "asympeq;": "\u224d", - "atilde": "\xe3", - "atilde;": "\xe3", - "auml": "\xe4", - "auml;": "\xe4", - "awconint;": "\u2233", - "awint;": "\u2a11", - "bNot;": "\u2aed", - "backcong;": "\u224c", - "backepsilon;": "\u03f6", - "backprime;": "\u2035", - "backsim;": "\u223d", - "backsimeq;": "\u22cd", - "barvee;": "\u22bd", - "barwed;": "\u2305", - "barwedge;": "\u2305", - "bbrk;": "\u23b5", - "bbrktbrk;": "\u23b6", - "bcong;": "\u224c", - "bcy;": "\u0431", - "bdquo;": "\u201e", - "becaus;": "\u2235", - "because;": "\u2235", - "bemptyv;": "\u29b0", - "bepsi;": "\u03f6", - "bernou;": "\u212c", - "beta;": "\u03b2", - "beth;": "\u2136", - "between;": "\u226c", - "bfr;": "\U0001d51f", - "bigcap;": "\u22c2", - "bigcirc;": "\u25ef", - "bigcup;": "\u22c3", - "bigodot;": "\u2a00", - "bigoplus;": "\u2a01", - "bigotimes;": "\u2a02", - "bigsqcup;": "\u2a06", - "bigstar;": "\u2605", - "bigtriangledown;": "\u25bd", - "bigtriangleup;": "\u25b3", - "biguplus;": "\u2a04", - "bigvee;": "\u22c1", - "bigwedge;": "\u22c0", - "bkarow;": "\u290d", - "blacklozenge;": "\u29eb", - "blacksquare;": "\u25aa", - "blacktriangle;": "\u25b4", - "blacktriangledown;": "\u25be", - "blacktriangleleft;": "\u25c2", - "blacktriangleright;": "\u25b8", - "blank;": "\u2423", - "blk12;": "\u2592", - "blk14;": "\u2591", - "blk34;": "\u2593", - "block;": "\u2588", - "bne;": "=\u20e5", - "bnequiv;": "\u2261\u20e5", - "bnot;": "\u2310", - "bopf;": "\U0001d553", - "bot;": "\u22a5", - "bottom;": "\u22a5", - "bowtie;": "\u22c8", - "boxDL;": "\u2557", - "boxDR;": "\u2554", - "boxDl;": "\u2556", - "boxDr;": "\u2553", - "boxH;": "\u2550", - "boxHD;": "\u2566", - "boxHU;": "\u2569", - "boxHd;": "\u2564", - "boxHu;": "\u2567", - "boxUL;": "\u255d", - "boxUR;": "\u255a", - "boxUl;": "\u255c", - "boxUr;": "\u2559", - "boxV;": "\u2551", - "boxVH;": "\u256c", - "boxVL;": "\u2563", - "boxVR;": "\u2560", - "boxVh;": "\u256b", - "boxVl;": "\u2562", - "boxVr;": "\u255f", - "boxbox;": "\u29c9", - "boxdL;": "\u2555", - "boxdR;": "\u2552", - "boxdl;": "\u2510", - "boxdr;": "\u250c", - "boxh;": "\u2500", - "boxhD;": "\u2565", - "boxhU;": "\u2568", - "boxhd;": "\u252c", - "boxhu;": "\u2534", - "boxminus;": "\u229f", - "boxplus;": "\u229e", - "boxtimes;": "\u22a0", - "boxuL;": "\u255b", - "boxuR;": "\u2558", - "boxul;": "\u2518", - "boxur;": "\u2514", - "boxv;": "\u2502", - "boxvH;": "\u256a", - "boxvL;": "\u2561", - "boxvR;": "\u255e", - "boxvh;": "\u253c", - "boxvl;": "\u2524", - "boxvr;": "\u251c", - "bprime;": "\u2035", - "breve;": "\u02d8", - "brvbar": "\xa6", - "brvbar;": "\xa6", - "bscr;": "\U0001d4b7", - "bsemi;": "\u204f", - "bsim;": "\u223d", - "bsime;": "\u22cd", - "bsol;": "\\", - "bsolb;": "\u29c5", - "bsolhsub;": "\u27c8", - "bull;": "\u2022", - "bullet;": "\u2022", - "bump;": "\u224e", - "bumpE;": "\u2aae", - "bumpe;": "\u224f", - "bumpeq;": "\u224f", - "cacute;": "\u0107", - "cap;": "\u2229", - "capand;": "\u2a44", - "capbrcup;": "\u2a49", - "capcap;": "\u2a4b", - "capcup;": "\u2a47", - "capdot;": "\u2a40", - "caps;": "\u2229\ufe00", - "caret;": "\u2041", - "caron;": "\u02c7", - "ccaps;": "\u2a4d", - "ccaron;": "\u010d", - "ccedil": "\xe7", - "ccedil;": "\xe7", - "ccirc;": "\u0109", - "ccups;": "\u2a4c", - "ccupssm;": "\u2a50", - "cdot;": "\u010b", - "cedil": "\xb8", - "cedil;": "\xb8", - "cemptyv;": "\u29b2", - "cent": "\xa2", - "cent;": "\xa2", - "centerdot;": "\xb7", - "cfr;": "\U0001d520", - "chcy;": "\u0447", - "check;": "\u2713", - "checkmark;": "\u2713", - "chi;": "\u03c7", - "cir;": "\u25cb", - "cirE;": "\u29c3", - "circ;": "\u02c6", - "circeq;": "\u2257", - "circlearrowleft;": "\u21ba", - "circlearrowright;": "\u21bb", - "circledR;": "\xae", - "circledS;": "\u24c8", - "circledast;": "\u229b", - "circledcirc;": "\u229a", - "circleddash;": "\u229d", - "cire;": "\u2257", - "cirfnint;": "\u2a10", - "cirmid;": "\u2aef", - "cirscir;": "\u29c2", - "clubs;": "\u2663", - "clubsuit;": "\u2663", - "colon;": ":", - "colone;": "\u2254", - "coloneq;": "\u2254", - "comma;": ",", - "commat;": "@", - "comp;": "\u2201", - "compfn;": "\u2218", - "complement;": "\u2201", - "complexes;": "\u2102", - "cong;": "\u2245", - "congdot;": "\u2a6d", - "conint;": "\u222e", - "copf;": "\U0001d554", - "coprod;": "\u2210", - "copy": "\xa9", - "copy;": "\xa9", - "copysr;": "\u2117", - "crarr;": "\u21b5", - "cross;": "\u2717", - "cscr;": "\U0001d4b8", - "csub;": "\u2acf", - "csube;": "\u2ad1", - "csup;": "\u2ad0", - "csupe;": "\u2ad2", - "ctdot;": "\u22ef", - "cudarrl;": "\u2938", - "cudarrr;": "\u2935", - "cuepr;": "\u22de", - "cuesc;": "\u22df", - "cularr;": "\u21b6", - "cularrp;": "\u293d", - "cup;": "\u222a", - "cupbrcap;": "\u2a48", - "cupcap;": "\u2a46", - "cupcup;": "\u2a4a", - "cupdot;": "\u228d", - "cupor;": "\u2a45", - "cups;": "\u222a\ufe00", - "curarr;": "\u21b7", - "curarrm;": "\u293c", - "curlyeqprec;": "\u22de", - "curlyeqsucc;": "\u22df", - "curlyvee;": "\u22ce", - "curlywedge;": "\u22cf", - "curren": "\xa4", - "curren;": "\xa4", - "curvearrowleft;": "\u21b6", - "curvearrowright;": "\u21b7", - "cuvee;": "\u22ce", - "cuwed;": "\u22cf", - "cwconint;": "\u2232", - "cwint;": "\u2231", - "cylcty;": "\u232d", - "dArr;": "\u21d3", - "dHar;": "\u2965", - "dagger;": "\u2020", - "daleth;": "\u2138", - "darr;": "\u2193", - "dash;": "\u2010", - "dashv;": "\u22a3", - "dbkarow;": "\u290f", - "dblac;": "\u02dd", - "dcaron;": "\u010f", - "dcy;": "\u0434", - "dd;": "\u2146", - "ddagger;": "\u2021", - "ddarr;": "\u21ca", - "ddotseq;": "\u2a77", - "deg": "\xb0", - "deg;": "\xb0", - "delta;": "\u03b4", - "demptyv;": "\u29b1", - "dfisht;": "\u297f", - "dfr;": "\U0001d521", - "dharl;": "\u21c3", - "dharr;": "\u21c2", - "diam;": "\u22c4", - "diamond;": "\u22c4", - "diamondsuit;": "\u2666", - "diams;": "\u2666", - "die;": "\xa8", - "digamma;": "\u03dd", - "disin;": "\u22f2", - "div;": "\xf7", - "divide": "\xf7", - "divide;": "\xf7", - "divideontimes;": "\u22c7", - "divonx;": "\u22c7", - "djcy;": "\u0452", - "dlcorn;": "\u231e", - "dlcrop;": "\u230d", - "dollar;": "$", - "dopf;": "\U0001d555", - "dot;": "\u02d9", - "doteq;": "\u2250", - "doteqdot;": "\u2251", - "dotminus;": "\u2238", - "dotplus;": "\u2214", - "dotsquare;": "\u22a1", - "doublebarwedge;": "\u2306", - "downarrow;": "\u2193", - "downdownarrows;": "\u21ca", - "downharpoonleft;": "\u21c3", - "downharpoonright;": "\u21c2", - "drbkarow;": "\u2910", - "drcorn;": "\u231f", - "drcrop;": "\u230c", - "dscr;": "\U0001d4b9", - "dscy;": "\u0455", - "dsol;": "\u29f6", - "dstrok;": "\u0111", - "dtdot;": "\u22f1", - "dtri;": "\u25bf", - "dtrif;": "\u25be", - "duarr;": "\u21f5", - "duhar;": "\u296f", - "dwangle;": "\u29a6", - "dzcy;": "\u045f", - "dzigrarr;": "\u27ff", - "eDDot;": "\u2a77", - "eDot;": "\u2251", - "eacute": "\xe9", - "eacute;": "\xe9", - "easter;": "\u2a6e", - "ecaron;": "\u011b", - "ecir;": "\u2256", - "ecirc": "\xea", - "ecirc;": "\xea", - "ecolon;": "\u2255", - "ecy;": "\u044d", - "edot;": "\u0117", - "ee;": "\u2147", - "efDot;": "\u2252", - "efr;": "\U0001d522", - "eg;": "\u2a9a", - "egrave": "\xe8", - "egrave;": "\xe8", - "egs;": "\u2a96", - "egsdot;": "\u2a98", - "el;": "\u2a99", - "elinters;": "\u23e7", - "ell;": "\u2113", - "els;": "\u2a95", - "elsdot;": "\u2a97", - "emacr;": "\u0113", - "empty;": "\u2205", - "emptyset;": "\u2205", - "emptyv;": "\u2205", - "emsp13;": "\u2004", - "emsp14;": "\u2005", - "emsp;": "\u2003", - "eng;": "\u014b", - "ensp;": "\u2002", - "eogon;": "\u0119", - "eopf;": "\U0001d556", - "epar;": "\u22d5", - "eparsl;": "\u29e3", - "eplus;": "\u2a71", - "epsi;": "\u03b5", - "epsilon;": "\u03b5", - "epsiv;": "\u03f5", - "eqcirc;": "\u2256", - "eqcolon;": "\u2255", - "eqsim;": "\u2242", - "eqslantgtr;": "\u2a96", - "eqslantless;": "\u2a95", - "equals;": "=", - "equest;": "\u225f", - "equiv;": "\u2261", - "equivDD;": "\u2a78", - "eqvparsl;": "\u29e5", - "erDot;": "\u2253", - "erarr;": "\u2971", - "escr;": "\u212f", - "esdot;": "\u2250", - "esim;": "\u2242", - "eta;": "\u03b7", - "eth": "\xf0", - "eth;": "\xf0", - "euml": "\xeb", - "euml;": "\xeb", - "euro;": "\u20ac", - "excl;": "!", - "exist;": "\u2203", - "expectation;": "\u2130", - "exponentiale;": "\u2147", - "fallingdotseq;": "\u2252", - "fcy;": "\u0444", - "female;": "\u2640", - "ffilig;": "\ufb03", - "fflig;": "\ufb00", - "ffllig;": "\ufb04", - "ffr;": "\U0001d523", - "filig;": "\ufb01", - "fjlig;": "fj", - "flat;": "\u266d", - "fllig;": "\ufb02", - "fltns;": "\u25b1", - "fnof;": "\u0192", - "fopf;": "\U0001d557", - "forall;": "\u2200", - "fork;": "\u22d4", - "forkv;": "\u2ad9", - "fpartint;": "\u2a0d", - "frac12": "\xbd", - "frac12;": "\xbd", - "frac13;": "\u2153", - "frac14": "\xbc", - "frac14;": "\xbc", - "frac15;": "\u2155", - "frac16;": "\u2159", - "frac18;": "\u215b", - "frac23;": "\u2154", - "frac25;": "\u2156", - "frac34": "\xbe", - "frac34;": "\xbe", - "frac35;": "\u2157", - "frac38;": "\u215c", - "frac45;": "\u2158", - "frac56;": "\u215a", - "frac58;": "\u215d", - "frac78;": "\u215e", - "frasl;": "\u2044", - "frown;": "\u2322", - "fscr;": "\U0001d4bb", - "gE;": "\u2267", - "gEl;": "\u2a8c", - "gacute;": "\u01f5", - "gamma;": "\u03b3", - "gammad;": "\u03dd", - "gap;": "\u2a86", - "gbreve;": "\u011f", - "gcirc;": "\u011d", - "gcy;": "\u0433", - "gdot;": "\u0121", - "ge;": "\u2265", - "gel;": "\u22db", - "geq;": "\u2265", - "geqq;": "\u2267", - "geqslant;": "\u2a7e", - "ges;": "\u2a7e", - "gescc;": "\u2aa9", - "gesdot;": "\u2a80", - "gesdoto;": "\u2a82", - "gesdotol;": "\u2a84", - "gesl;": "\u22db\ufe00", - "gesles;": "\u2a94", - "gfr;": "\U0001d524", - "gg;": "\u226b", - "ggg;": "\u22d9", - "gimel;": "\u2137", - "gjcy;": "\u0453", - "gl;": "\u2277", - "glE;": "\u2a92", - "gla;": "\u2aa5", - "glj;": "\u2aa4", - "gnE;": "\u2269", - "gnap;": "\u2a8a", - "gnapprox;": "\u2a8a", - "gne;": "\u2a88", - "gneq;": "\u2a88", - "gneqq;": "\u2269", - "gnsim;": "\u22e7", - "gopf;": "\U0001d558", - "grave;": "`", - "gscr;": "\u210a", - "gsim;": "\u2273", - "gsime;": "\u2a8e", - "gsiml;": "\u2a90", - "gt": ">", - "gt;": ">", - "gtcc;": "\u2aa7", - "gtcir;": "\u2a7a", - "gtdot;": "\u22d7", - "gtlPar;": "\u2995", - "gtquest;": "\u2a7c", - "gtrapprox;": "\u2a86", - "gtrarr;": "\u2978", - "gtrdot;": "\u22d7", - "gtreqless;": "\u22db", - "gtreqqless;": "\u2a8c", - "gtrless;": "\u2277", - "gtrsim;": "\u2273", - "gvertneqq;": "\u2269\ufe00", - "gvnE;": "\u2269\ufe00", - "hArr;": "\u21d4", - "hairsp;": "\u200a", - "half;": "\xbd", - "hamilt;": "\u210b", - "hardcy;": "\u044a", - "harr;": "\u2194", - "harrcir;": "\u2948", - "harrw;": "\u21ad", - "hbar;": "\u210f", - "hcirc;": "\u0125", - "hearts;": "\u2665", - "heartsuit;": "\u2665", - "hellip;": "\u2026", - "hercon;": "\u22b9", - "hfr;": "\U0001d525", - "hksearow;": "\u2925", - "hkswarow;": "\u2926", - "hoarr;": "\u21ff", - "homtht;": "\u223b", - "hookleftarrow;": "\u21a9", - "hookrightarrow;": "\u21aa", - "hopf;": "\U0001d559", - "horbar;": "\u2015", - "hscr;": "\U0001d4bd", - "hslash;": "\u210f", - "hstrok;": "\u0127", - "hybull;": "\u2043", - "hyphen;": "\u2010", - "iacute": "\xed", - "iacute;": "\xed", - "ic;": "\u2063", - "icirc": "\xee", - "icirc;": "\xee", - "icy;": "\u0438", - "iecy;": "\u0435", - "iexcl": "\xa1", - "iexcl;": "\xa1", - "iff;": "\u21d4", - "ifr;": "\U0001d526", - "igrave": "\xec", - "igrave;": "\xec", - "ii;": "\u2148", - "iiiint;": "\u2a0c", - "iiint;": "\u222d", - "iinfin;": "\u29dc", - "iiota;": "\u2129", - "ijlig;": "\u0133", - "imacr;": "\u012b", - "image;": "\u2111", - "imagline;": "\u2110", - "imagpart;": "\u2111", - "imath;": "\u0131", - "imof;": "\u22b7", - "imped;": "\u01b5", - "in;": "\u2208", - "incare;": "\u2105", - "infin;": "\u221e", - "infintie;": "\u29dd", - "inodot;": "\u0131", - "int;": "\u222b", - "intcal;": "\u22ba", - "integers;": "\u2124", - "intercal;": "\u22ba", - "intlarhk;": "\u2a17", - "intprod;": "\u2a3c", - "iocy;": "\u0451", - "iogon;": "\u012f", - "iopf;": "\U0001d55a", - "iota;": "\u03b9", - "iprod;": "\u2a3c", - "iquest": "\xbf", - "iquest;": "\xbf", - "iscr;": "\U0001d4be", - "isin;": "\u2208", - "isinE;": "\u22f9", - "isindot;": "\u22f5", - "isins;": "\u22f4", - "isinsv;": "\u22f3", - "isinv;": "\u2208", - "it;": "\u2062", - "itilde;": "\u0129", - "iukcy;": "\u0456", - "iuml": "\xef", - "iuml;": "\xef", - "jcirc;": "\u0135", - "jcy;": "\u0439", - "jfr;": "\U0001d527", - "jmath;": "\u0237", - "jopf;": "\U0001d55b", - "jscr;": "\U0001d4bf", - "jsercy;": "\u0458", - "jukcy;": "\u0454", - "kappa;": "\u03ba", - "kappav;": "\u03f0", - "kcedil;": "\u0137", - "kcy;": "\u043a", - "kfr;": "\U0001d528", - "kgreen;": "\u0138", - "khcy;": "\u0445", - "kjcy;": "\u045c", - "kopf;": "\U0001d55c", - "kscr;": "\U0001d4c0", - "lAarr;": "\u21da", - "lArr;": "\u21d0", - "lAtail;": "\u291b", - "lBarr;": "\u290e", - "lE;": "\u2266", - "lEg;": "\u2a8b", - "lHar;": "\u2962", - "lacute;": "\u013a", - "laemptyv;": "\u29b4", - "lagran;": "\u2112", - "lambda;": "\u03bb", - "lang;": "\u27e8", - "langd;": "\u2991", - "langle;": "\u27e8", - "lap;": "\u2a85", - "laquo": "\xab", - "laquo;": "\xab", - "larr;": "\u2190", - "larrb;": "\u21e4", - "larrbfs;": "\u291f", - "larrfs;": "\u291d", - "larrhk;": "\u21a9", - "larrlp;": "\u21ab", - "larrpl;": "\u2939", - "larrsim;": "\u2973", - "larrtl;": "\u21a2", - "lat;": "\u2aab", - "latail;": "\u2919", - "late;": "\u2aad", - "lates;": "\u2aad\ufe00", - "lbarr;": "\u290c", - "lbbrk;": "\u2772", - "lbrace;": "{", - "lbrack;": "[", - "lbrke;": "\u298b", - "lbrksld;": "\u298f", - "lbrkslu;": "\u298d", - "lcaron;": "\u013e", - "lcedil;": "\u013c", - "lceil;": "\u2308", - "lcub;": "{", - "lcy;": "\u043b", - "ldca;": "\u2936", - "ldquo;": "\u201c", - "ldquor;": "\u201e", - "ldrdhar;": "\u2967", - "ldrushar;": "\u294b", - "ldsh;": "\u21b2", - "le;": "\u2264", - "leftarrow;": "\u2190", - "leftarrowtail;": "\u21a2", - "leftharpoondown;": "\u21bd", - "leftharpoonup;": "\u21bc", - "leftleftarrows;": "\u21c7", - "leftrightarrow;": "\u2194", - "leftrightarrows;": "\u21c6", - "leftrightharpoons;": "\u21cb", - "leftrightsquigarrow;": "\u21ad", - "leftthreetimes;": "\u22cb", - "leg;": "\u22da", - "leq;": "\u2264", - "leqq;": "\u2266", - "leqslant;": "\u2a7d", - "les;": "\u2a7d", - "lescc;": "\u2aa8", - "lesdot;": "\u2a7f", - "lesdoto;": "\u2a81", - "lesdotor;": "\u2a83", - "lesg;": "\u22da\ufe00", - "lesges;": "\u2a93", - "lessapprox;": "\u2a85", - "lessdot;": "\u22d6", - "lesseqgtr;": "\u22da", - "lesseqqgtr;": "\u2a8b", - "lessgtr;": "\u2276", - "lesssim;": "\u2272", - "lfisht;": "\u297c", - "lfloor;": "\u230a", - "lfr;": "\U0001d529", - "lg;": "\u2276", - "lgE;": "\u2a91", - "lhard;": "\u21bd", - "lharu;": "\u21bc", - "lharul;": "\u296a", - "lhblk;": "\u2584", - "ljcy;": "\u0459", - "ll;": "\u226a", - "llarr;": "\u21c7", - "llcorner;": "\u231e", - "llhard;": "\u296b", - "lltri;": "\u25fa", - "lmidot;": "\u0140", - "lmoust;": "\u23b0", - "lmoustache;": "\u23b0", - "lnE;": "\u2268", - "lnap;": "\u2a89", - "lnapprox;": "\u2a89", - "lne;": "\u2a87", - "lneq;": "\u2a87", - "lneqq;": "\u2268", - "lnsim;": "\u22e6", - "loang;": "\u27ec", - "loarr;": "\u21fd", - "lobrk;": "\u27e6", - "longleftarrow;": "\u27f5", - "longleftrightarrow;": "\u27f7", - "longmapsto;": "\u27fc", - "longrightarrow;": "\u27f6", - "looparrowleft;": "\u21ab", - "looparrowright;": "\u21ac", - "lopar;": "\u2985", - "lopf;": "\U0001d55d", - "loplus;": "\u2a2d", - "lotimes;": "\u2a34", - "lowast;": "\u2217", - "lowbar;": "_", - "loz;": "\u25ca", - "lozenge;": "\u25ca", - "lozf;": "\u29eb", - "lpar;": "(", - "lparlt;": "\u2993", - "lrarr;": "\u21c6", - "lrcorner;": "\u231f", - "lrhar;": "\u21cb", - "lrhard;": "\u296d", - "lrm;": "\u200e", - "lrtri;": "\u22bf", - "lsaquo;": "\u2039", - "lscr;": "\U0001d4c1", - "lsh;": "\u21b0", - "lsim;": "\u2272", - "lsime;": "\u2a8d", - "lsimg;": "\u2a8f", - "lsqb;": "[", - "lsquo;": "\u2018", - "lsquor;": "\u201a", - "lstrok;": "\u0142", - "lt": "<", - "lt;": "<", - "ltcc;": "\u2aa6", - "ltcir;": "\u2a79", - "ltdot;": "\u22d6", - "lthree;": "\u22cb", - "ltimes;": "\u22c9", - "ltlarr;": "\u2976", - "ltquest;": "\u2a7b", - "ltrPar;": "\u2996", - "ltri;": "\u25c3", - "ltrie;": "\u22b4", - "ltrif;": "\u25c2", - "lurdshar;": "\u294a", - "luruhar;": "\u2966", - "lvertneqq;": "\u2268\ufe00", - "lvnE;": "\u2268\ufe00", - "mDDot;": "\u223a", - "macr": "\xaf", - "macr;": "\xaf", - "male;": "\u2642", - "malt;": "\u2720", - "maltese;": "\u2720", - "map;": "\u21a6", - "mapsto;": "\u21a6", - "mapstodown;": "\u21a7", - "mapstoleft;": "\u21a4", - "mapstoup;": "\u21a5", - "marker;": "\u25ae", - "mcomma;": "\u2a29", - "mcy;": "\u043c", - "mdash;": "\u2014", - "measuredangle;": "\u2221", - "mfr;": "\U0001d52a", - "mho;": "\u2127", - "micro": "\xb5", - "micro;": "\xb5", - "mid;": "\u2223", - "midast;": "*", - "midcir;": "\u2af0", - "middot": "\xb7", - "middot;": "\xb7", - "minus;": "\u2212", - "minusb;": "\u229f", - "minusd;": "\u2238", - "minusdu;": "\u2a2a", - "mlcp;": "\u2adb", - "mldr;": "\u2026", - "mnplus;": "\u2213", - "models;": "\u22a7", - "mopf;": "\U0001d55e", - "mp;": "\u2213", - "mscr;": "\U0001d4c2", - "mstpos;": "\u223e", - "mu;": "\u03bc", - "multimap;": "\u22b8", - "mumap;": "\u22b8", - "nGg;": "\u22d9\u0338", - "nGt;": "\u226b\u20d2", - "nGtv;": "\u226b\u0338", - "nLeftarrow;": "\u21cd", - "nLeftrightarrow;": "\u21ce", - "nLl;": "\u22d8\u0338", - "nLt;": "\u226a\u20d2", - "nLtv;": "\u226a\u0338", - "nRightarrow;": "\u21cf", - "nVDash;": "\u22af", - "nVdash;": "\u22ae", - "nabla;": "\u2207", - "nacute;": "\u0144", - "nang;": "\u2220\u20d2", - "nap;": "\u2249", - "napE;": "\u2a70\u0338", - "napid;": "\u224b\u0338", - "napos;": "\u0149", - "napprox;": "\u2249", - "natur;": "\u266e", - "natural;": "\u266e", - "naturals;": "\u2115", - "nbsp": "\xa0", - "nbsp;": "\xa0", - "nbump;": "\u224e\u0338", - "nbumpe;": "\u224f\u0338", - "ncap;": "\u2a43", - "ncaron;": "\u0148", - "ncedil;": "\u0146", - "ncong;": "\u2247", - "ncongdot;": "\u2a6d\u0338", - "ncup;": "\u2a42", - "ncy;": "\u043d", - "ndash;": "\u2013", - "ne;": "\u2260", - "neArr;": "\u21d7", - "nearhk;": "\u2924", - "nearr;": "\u2197", - "nearrow;": "\u2197", - "nedot;": "\u2250\u0338", - "nequiv;": "\u2262", - "nesear;": "\u2928", - "nesim;": "\u2242\u0338", - "nexist;": "\u2204", - "nexists;": "\u2204", - "nfr;": "\U0001d52b", - "ngE;": "\u2267\u0338", - "nge;": "\u2271", - "ngeq;": "\u2271", - "ngeqq;": "\u2267\u0338", - "ngeqslant;": "\u2a7e\u0338", - "nges;": "\u2a7e\u0338", - "ngsim;": "\u2275", - "ngt;": "\u226f", - "ngtr;": "\u226f", - "nhArr;": "\u21ce", - "nharr;": "\u21ae", - "nhpar;": "\u2af2", - "ni;": "\u220b", - "nis;": "\u22fc", - "nisd;": "\u22fa", - "niv;": "\u220b", - "njcy;": "\u045a", - "nlArr;": "\u21cd", - "nlE;": "\u2266\u0338", - "nlarr;": "\u219a", - "nldr;": "\u2025", - "nle;": "\u2270", - "nleftarrow;": "\u219a", - "nleftrightarrow;": "\u21ae", - "nleq;": "\u2270", - "nleqq;": "\u2266\u0338", - "nleqslant;": "\u2a7d\u0338", - "nles;": "\u2a7d\u0338", - "nless;": "\u226e", - "nlsim;": "\u2274", - "nlt;": "\u226e", - "nltri;": "\u22ea", - "nltrie;": "\u22ec", - "nmid;": "\u2224", - "nopf;": "\U0001d55f", - "not": "\xac", - "not;": "\xac", - "notin;": "\u2209", - "notinE;": "\u22f9\u0338", - "notindot;": "\u22f5\u0338", - "notinva;": "\u2209", - "notinvb;": "\u22f7", - "notinvc;": "\u22f6", - "notni;": "\u220c", - "notniva;": "\u220c", - "notnivb;": "\u22fe", - "notnivc;": "\u22fd", - "npar;": "\u2226", - "nparallel;": "\u2226", - "nparsl;": "\u2afd\u20e5", - "npart;": "\u2202\u0338", - "npolint;": "\u2a14", - "npr;": "\u2280", - "nprcue;": "\u22e0", - "npre;": "\u2aaf\u0338", - "nprec;": "\u2280", - "npreceq;": "\u2aaf\u0338", - "nrArr;": "\u21cf", - "nrarr;": "\u219b", - "nrarrc;": "\u2933\u0338", - "nrarrw;": "\u219d\u0338", - "nrightarrow;": "\u219b", - "nrtri;": "\u22eb", - "nrtrie;": "\u22ed", - "nsc;": "\u2281", - "nsccue;": "\u22e1", - "nsce;": "\u2ab0\u0338", - "nscr;": "\U0001d4c3", - "nshortmid;": "\u2224", - "nshortparallel;": "\u2226", - "nsim;": "\u2241", - "nsime;": "\u2244", - "nsimeq;": "\u2244", - "nsmid;": "\u2224", - "nspar;": "\u2226", - "nsqsube;": "\u22e2", - "nsqsupe;": "\u22e3", - "nsub;": "\u2284", - "nsubE;": "\u2ac5\u0338", - "nsube;": "\u2288", - "nsubset;": "\u2282\u20d2", - "nsubseteq;": "\u2288", - "nsubseteqq;": "\u2ac5\u0338", - "nsucc;": "\u2281", - "nsucceq;": "\u2ab0\u0338", - "nsup;": "\u2285", - "nsupE;": "\u2ac6\u0338", - "nsupe;": "\u2289", - "nsupset;": "\u2283\u20d2", - "nsupseteq;": "\u2289", - "nsupseteqq;": "\u2ac6\u0338", - "ntgl;": "\u2279", - "ntilde": "\xf1", - "ntilde;": "\xf1", - "ntlg;": "\u2278", - "ntriangleleft;": "\u22ea", - "ntrianglelefteq;": "\u22ec", - "ntriangleright;": "\u22eb", - "ntrianglerighteq;": "\u22ed", - "nu;": "\u03bd", - "num;": "#", - "numero;": "\u2116", - "numsp;": "\u2007", - "nvDash;": "\u22ad", - "nvHarr;": "\u2904", - "nvap;": "\u224d\u20d2", - "nvdash;": "\u22ac", - "nvge;": "\u2265\u20d2", - "nvgt;": ">\u20d2", - "nvinfin;": "\u29de", - "nvlArr;": "\u2902", - "nvle;": "\u2264\u20d2", - "nvlt;": "<\u20d2", - "nvltrie;": "\u22b4\u20d2", - "nvrArr;": "\u2903", - "nvrtrie;": "\u22b5\u20d2", - "nvsim;": "\u223c\u20d2", - "nwArr;": "\u21d6", - "nwarhk;": "\u2923", - "nwarr;": "\u2196", - "nwarrow;": "\u2196", - "nwnear;": "\u2927", - "oS;": "\u24c8", - "oacute": "\xf3", - "oacute;": "\xf3", - "oast;": "\u229b", - "ocir;": "\u229a", - "ocirc": "\xf4", - "ocirc;": "\xf4", - "ocy;": "\u043e", - "odash;": "\u229d", - "odblac;": "\u0151", - "odiv;": "\u2a38", - "odot;": "\u2299", - "odsold;": "\u29bc", - "oelig;": "\u0153", - "ofcir;": "\u29bf", - "ofr;": "\U0001d52c", - "ogon;": "\u02db", - "ograve": "\xf2", - "ograve;": "\xf2", - "ogt;": "\u29c1", - "ohbar;": "\u29b5", - "ohm;": "\u03a9", - "oint;": "\u222e", - "olarr;": "\u21ba", - "olcir;": "\u29be", - "olcross;": "\u29bb", - "oline;": "\u203e", - "olt;": "\u29c0", - "omacr;": "\u014d", - "omega;": "\u03c9", - "omicron;": "\u03bf", - "omid;": "\u29b6", - "ominus;": "\u2296", - "oopf;": "\U0001d560", - "opar;": "\u29b7", - "operp;": "\u29b9", - "oplus;": "\u2295", - "or;": "\u2228", - "orarr;": "\u21bb", - "ord;": "\u2a5d", - "order;": "\u2134", - "orderof;": "\u2134", - "ordf": "\xaa", - "ordf;": "\xaa", - "ordm": "\xba", - "ordm;": "\xba", - "origof;": "\u22b6", - "oror;": "\u2a56", - "orslope;": "\u2a57", - "orv;": "\u2a5b", - "oscr;": "\u2134", - "oslash": "\xf8", - "oslash;": "\xf8", - "osol;": "\u2298", - "otilde": "\xf5", - "otilde;": "\xf5", - "otimes;": "\u2297", - "otimesas;": "\u2a36", - "ouml": "\xf6", - "ouml;": "\xf6", - "ovbar;": "\u233d", - "par;": "\u2225", - "para": "\xb6", - "para;": "\xb6", - "parallel;": "\u2225", - "parsim;": "\u2af3", - "parsl;": "\u2afd", - "part;": "\u2202", - "pcy;": "\u043f", - "percnt;": "%", - "period;": ".", - "permil;": "\u2030", - "perp;": "\u22a5", - "pertenk;": "\u2031", - "pfr;": "\U0001d52d", - "phi;": "\u03c6", - "phiv;": "\u03d5", - "phmmat;": "\u2133", - "phone;": "\u260e", - "pi;": "\u03c0", - "pitchfork;": "\u22d4", - "piv;": "\u03d6", - "planck;": "\u210f", - "planckh;": "\u210e", - "plankv;": "\u210f", - "plus;": "+", - "plusacir;": "\u2a23", - "plusb;": "\u229e", - "pluscir;": "\u2a22", - "plusdo;": "\u2214", - "plusdu;": "\u2a25", - "pluse;": "\u2a72", - "plusmn": "\xb1", - "plusmn;": "\xb1", - "plussim;": "\u2a26", - "plustwo;": "\u2a27", - "pm;": "\xb1", - "pointint;": "\u2a15", - "popf;": "\U0001d561", - "pound": "\xa3", - "pound;": "\xa3", - "pr;": "\u227a", - "prE;": "\u2ab3", - "prap;": "\u2ab7", - "prcue;": "\u227c", - "pre;": "\u2aaf", - "prec;": "\u227a", - "precapprox;": "\u2ab7", - "preccurlyeq;": "\u227c", - "preceq;": "\u2aaf", - "precnapprox;": "\u2ab9", - "precneqq;": "\u2ab5", - "precnsim;": "\u22e8", - "precsim;": "\u227e", - "prime;": "\u2032", - "primes;": "\u2119", - "prnE;": "\u2ab5", - "prnap;": "\u2ab9", - "prnsim;": "\u22e8", - "prod;": "\u220f", - "profalar;": "\u232e", - "profline;": "\u2312", - "profsurf;": "\u2313", - "prop;": "\u221d", - "propto;": "\u221d", - "prsim;": "\u227e", - "prurel;": "\u22b0", - "pscr;": "\U0001d4c5", - "psi;": "\u03c8", - "puncsp;": "\u2008", - "qfr;": "\U0001d52e", - "qint;": "\u2a0c", - "qopf;": "\U0001d562", - "qprime;": "\u2057", - "qscr;": "\U0001d4c6", - "quaternions;": "\u210d", - "quatint;": "\u2a16", - "quest;": "?", - "questeq;": "\u225f", - "quot": "\"", - "quot;": "\"", - "rAarr;": "\u21db", - "rArr;": "\u21d2", - "rAtail;": "\u291c", - "rBarr;": "\u290f", - "rHar;": "\u2964", - "race;": "\u223d\u0331", - "racute;": "\u0155", - "radic;": "\u221a", - "raemptyv;": "\u29b3", - "rang;": "\u27e9", - "rangd;": "\u2992", - "range;": "\u29a5", - "rangle;": "\u27e9", - "raquo": "\xbb", - "raquo;": "\xbb", - "rarr;": "\u2192", - "rarrap;": "\u2975", - "rarrb;": "\u21e5", - "rarrbfs;": "\u2920", - "rarrc;": "\u2933", - "rarrfs;": "\u291e", - "rarrhk;": "\u21aa", - "rarrlp;": "\u21ac", - "rarrpl;": "\u2945", - "rarrsim;": "\u2974", - "rarrtl;": "\u21a3", - "rarrw;": "\u219d", - "ratail;": "\u291a", - "ratio;": "\u2236", - "rationals;": "\u211a", - "rbarr;": "\u290d", - "rbbrk;": "\u2773", - "rbrace;": "}", - "rbrack;": "]", - "rbrke;": "\u298c", - "rbrksld;": "\u298e", - "rbrkslu;": "\u2990", - "rcaron;": "\u0159", - "rcedil;": "\u0157", - "rceil;": "\u2309", - "rcub;": "}", - "rcy;": "\u0440", - "rdca;": "\u2937", - "rdldhar;": "\u2969", - "rdquo;": "\u201d", - "rdquor;": "\u201d", - "rdsh;": "\u21b3", - "real;": "\u211c", - "realine;": "\u211b", - "realpart;": "\u211c", - "reals;": "\u211d", - "rect;": "\u25ad", - "reg": "\xae", - "reg;": "\xae", - "rfisht;": "\u297d", - "rfloor;": "\u230b", - "rfr;": "\U0001d52f", - "rhard;": "\u21c1", - "rharu;": "\u21c0", - "rharul;": "\u296c", - "rho;": "\u03c1", - "rhov;": "\u03f1", - "rightarrow;": "\u2192", - "rightarrowtail;": "\u21a3", - "rightharpoondown;": "\u21c1", - "rightharpoonup;": "\u21c0", - "rightleftarrows;": "\u21c4", - "rightleftharpoons;": "\u21cc", - "rightrightarrows;": "\u21c9", - "rightsquigarrow;": "\u219d", - "rightthreetimes;": "\u22cc", - "ring;": "\u02da", - "risingdotseq;": "\u2253", - "rlarr;": "\u21c4", - "rlhar;": "\u21cc", - "rlm;": "\u200f", - "rmoust;": "\u23b1", - "rmoustache;": "\u23b1", - "rnmid;": "\u2aee", - "roang;": "\u27ed", - "roarr;": "\u21fe", - "robrk;": "\u27e7", - "ropar;": "\u2986", - "ropf;": "\U0001d563", - "roplus;": "\u2a2e", - "rotimes;": "\u2a35", - "rpar;": ")", - "rpargt;": "\u2994", - "rppolint;": "\u2a12", - "rrarr;": "\u21c9", - "rsaquo;": "\u203a", - "rscr;": "\U0001d4c7", - "rsh;": "\u21b1", - "rsqb;": "]", - "rsquo;": "\u2019", - "rsquor;": "\u2019", - "rthree;": "\u22cc", - "rtimes;": "\u22ca", - "rtri;": "\u25b9", - "rtrie;": "\u22b5", - "rtrif;": "\u25b8", - "rtriltri;": "\u29ce", - "ruluhar;": "\u2968", - "rx;": "\u211e", - "sacute;": "\u015b", - "sbquo;": "\u201a", - "sc;": "\u227b", - "scE;": "\u2ab4", - "scap;": "\u2ab8", - "scaron;": "\u0161", - "sccue;": "\u227d", - "sce;": "\u2ab0", - "scedil;": "\u015f", - "scirc;": "\u015d", - "scnE;": "\u2ab6", - "scnap;": "\u2aba", - "scnsim;": "\u22e9", - "scpolint;": "\u2a13", - "scsim;": "\u227f", - "scy;": "\u0441", - "sdot;": "\u22c5", - "sdotb;": "\u22a1", - "sdote;": "\u2a66", - "seArr;": "\u21d8", - "searhk;": "\u2925", - "searr;": "\u2198", - "searrow;": "\u2198", - "sect": "\xa7", - "sect;": "\xa7", - "semi;": ";", - "seswar;": "\u2929", - "setminus;": "\u2216", - "setmn;": "\u2216", - "sext;": "\u2736", - "sfr;": "\U0001d530", - "sfrown;": "\u2322", - "sharp;": "\u266f", - "shchcy;": "\u0449", - "shcy;": "\u0448", - "shortmid;": "\u2223", - "shortparallel;": "\u2225", - "shy": "\xad", - "shy;": "\xad", - "sigma;": "\u03c3", - "sigmaf;": "\u03c2", - "sigmav;": "\u03c2", - "sim;": "\u223c", - "simdot;": "\u2a6a", - "sime;": "\u2243", - "simeq;": "\u2243", - "simg;": "\u2a9e", - "simgE;": "\u2aa0", - "siml;": "\u2a9d", - "simlE;": "\u2a9f", - "simne;": "\u2246", - "simplus;": "\u2a24", - "simrarr;": "\u2972", - "slarr;": "\u2190", - "smallsetminus;": "\u2216", - "smashp;": "\u2a33", - "smeparsl;": "\u29e4", - "smid;": "\u2223", - "smile;": "\u2323", - "smt;": "\u2aaa", - "smte;": "\u2aac", - "smtes;": "\u2aac\ufe00", - "softcy;": "\u044c", - "sol;": "/", - "solb;": "\u29c4", - "solbar;": "\u233f", - "sopf;": "\U0001d564", - "spades;": "\u2660", - "spadesuit;": "\u2660", - "spar;": "\u2225", - "sqcap;": "\u2293", - "sqcaps;": "\u2293\ufe00", - "sqcup;": "\u2294", - "sqcups;": "\u2294\ufe00", - "sqsub;": "\u228f", - "sqsube;": "\u2291", - "sqsubset;": "\u228f", - "sqsubseteq;": "\u2291", - "sqsup;": "\u2290", - "sqsupe;": "\u2292", - "sqsupset;": "\u2290", - "sqsupseteq;": "\u2292", - "squ;": "\u25a1", - "square;": "\u25a1", - "squarf;": "\u25aa", - "squf;": "\u25aa", - "srarr;": "\u2192", - "sscr;": "\U0001d4c8", - "ssetmn;": "\u2216", - "ssmile;": "\u2323", - "sstarf;": "\u22c6", - "star;": "\u2606", - "starf;": "\u2605", - "straightepsilon;": "\u03f5", - "straightphi;": "\u03d5", - "strns;": "\xaf", - "sub;": "\u2282", - "subE;": "\u2ac5", - "subdot;": "\u2abd", - "sube;": "\u2286", - "subedot;": "\u2ac3", - "submult;": "\u2ac1", - "subnE;": "\u2acb", - "subne;": "\u228a", - "subplus;": "\u2abf", - "subrarr;": "\u2979", - "subset;": "\u2282", - "subseteq;": "\u2286", - "subseteqq;": "\u2ac5", - "subsetneq;": "\u228a", - "subsetneqq;": "\u2acb", - "subsim;": "\u2ac7", - "subsub;": "\u2ad5", - "subsup;": "\u2ad3", - "succ;": "\u227b", - "succapprox;": "\u2ab8", - "succcurlyeq;": "\u227d", - "succeq;": "\u2ab0", - "succnapprox;": "\u2aba", - "succneqq;": "\u2ab6", - "succnsim;": "\u22e9", - "succsim;": "\u227f", - "sum;": "\u2211", - "sung;": "\u266a", - "sup1": "\xb9", - "sup1;": "\xb9", - "sup2": "\xb2", - "sup2;": "\xb2", - "sup3": "\xb3", - "sup3;": "\xb3", - "sup;": "\u2283", - "supE;": "\u2ac6", - "supdot;": "\u2abe", - "supdsub;": "\u2ad8", - "supe;": "\u2287", - "supedot;": "\u2ac4", - "suphsol;": "\u27c9", - "suphsub;": "\u2ad7", - "suplarr;": "\u297b", - "supmult;": "\u2ac2", - "supnE;": "\u2acc", - "supne;": "\u228b", - "supplus;": "\u2ac0", - "supset;": "\u2283", - "supseteq;": "\u2287", - "supseteqq;": "\u2ac6", - "supsetneq;": "\u228b", - "supsetneqq;": "\u2acc", - "supsim;": "\u2ac8", - "supsub;": "\u2ad4", - "supsup;": "\u2ad6", - "swArr;": "\u21d9", - "swarhk;": "\u2926", - "swarr;": "\u2199", - "swarrow;": "\u2199", - "swnwar;": "\u292a", - "szlig": "\xdf", - "szlig;": "\xdf", - "target;": "\u2316", - "tau;": "\u03c4", - "tbrk;": "\u23b4", - "tcaron;": "\u0165", - "tcedil;": "\u0163", - "tcy;": "\u0442", - "tdot;": "\u20db", - "telrec;": "\u2315", - "tfr;": "\U0001d531", - "there4;": "\u2234", - "therefore;": "\u2234", - "theta;": "\u03b8", - "thetasym;": "\u03d1", - "thetav;": "\u03d1", - "thickapprox;": "\u2248", - "thicksim;": "\u223c", - "thinsp;": "\u2009", - "thkap;": "\u2248", - "thksim;": "\u223c", - "thorn": "\xfe", - "thorn;": "\xfe", - "tilde;": "\u02dc", - "times": "\xd7", - "times;": "\xd7", - "timesb;": "\u22a0", - "timesbar;": "\u2a31", - "timesd;": "\u2a30", - "tint;": "\u222d", - "toea;": "\u2928", - "top;": "\u22a4", - "topbot;": "\u2336", - "topcir;": "\u2af1", - "topf;": "\U0001d565", - "topfork;": "\u2ada", - "tosa;": "\u2929", - "tprime;": "\u2034", - "trade;": "\u2122", - "triangle;": "\u25b5", - "triangledown;": "\u25bf", - "triangleleft;": "\u25c3", - "trianglelefteq;": "\u22b4", - "triangleq;": "\u225c", - "triangleright;": "\u25b9", - "trianglerighteq;": "\u22b5", - "tridot;": "\u25ec", - "trie;": "\u225c", - "triminus;": "\u2a3a", - "triplus;": "\u2a39", - "trisb;": "\u29cd", - "tritime;": "\u2a3b", - "trpezium;": "\u23e2", - "tscr;": "\U0001d4c9", - "tscy;": "\u0446", - "tshcy;": "\u045b", - "tstrok;": "\u0167", - "twixt;": "\u226c", - "twoheadleftarrow;": "\u219e", - "twoheadrightarrow;": "\u21a0", - "uArr;": "\u21d1", - "uHar;": "\u2963", - "uacute": "\xfa", - "uacute;": "\xfa", - "uarr;": "\u2191", - "ubrcy;": "\u045e", - "ubreve;": "\u016d", - "ucirc": "\xfb", - "ucirc;": "\xfb", - "ucy;": "\u0443", - "udarr;": "\u21c5", - "udblac;": "\u0171", - "udhar;": "\u296e", - "ufisht;": "\u297e", - "ufr;": "\U0001d532", - "ugrave": "\xf9", - "ugrave;": "\xf9", - "uharl;": "\u21bf", - "uharr;": "\u21be", - "uhblk;": "\u2580", - "ulcorn;": "\u231c", - "ulcorner;": "\u231c", - "ulcrop;": "\u230f", - "ultri;": "\u25f8", - "umacr;": "\u016b", - "uml": "\xa8", - "uml;": "\xa8", - "uogon;": "\u0173", - "uopf;": "\U0001d566", - "uparrow;": "\u2191", - "updownarrow;": "\u2195", - "upharpoonleft;": "\u21bf", - "upharpoonright;": "\u21be", - "uplus;": "\u228e", - "upsi;": "\u03c5", - "upsih;": "\u03d2", - "upsilon;": "\u03c5", - "upuparrows;": "\u21c8", - "urcorn;": "\u231d", - "urcorner;": "\u231d", - "urcrop;": "\u230e", - "uring;": "\u016f", - "urtri;": "\u25f9", - "uscr;": "\U0001d4ca", - "utdot;": "\u22f0", - "utilde;": "\u0169", - "utri;": "\u25b5", - "utrif;": "\u25b4", - "uuarr;": "\u21c8", - "uuml": "\xfc", - "uuml;": "\xfc", - "uwangle;": "\u29a7", - "vArr;": "\u21d5", - "vBar;": "\u2ae8", - "vBarv;": "\u2ae9", - "vDash;": "\u22a8", - "vangrt;": "\u299c", - "varepsilon;": "\u03f5", - "varkappa;": "\u03f0", - "varnothing;": "\u2205", - "varphi;": "\u03d5", - "varpi;": "\u03d6", - "varpropto;": "\u221d", - "varr;": "\u2195", - "varrho;": "\u03f1", - "varsigma;": "\u03c2", - "varsubsetneq;": "\u228a\ufe00", - "varsubsetneqq;": "\u2acb\ufe00", - "varsupsetneq;": "\u228b\ufe00", - "varsupsetneqq;": "\u2acc\ufe00", - "vartheta;": "\u03d1", - "vartriangleleft;": "\u22b2", - "vartriangleright;": "\u22b3", - "vcy;": "\u0432", - "vdash;": "\u22a2", - "vee;": "\u2228", - "veebar;": "\u22bb", - "veeeq;": "\u225a", - "vellip;": "\u22ee", - "verbar;": "|", - "vert;": "|", - "vfr;": "\U0001d533", - "vltri;": "\u22b2", - "vnsub;": "\u2282\u20d2", - "vnsup;": "\u2283\u20d2", - "vopf;": "\U0001d567", - "vprop;": "\u221d", - "vrtri;": "\u22b3", - "vscr;": "\U0001d4cb", - "vsubnE;": "\u2acb\ufe00", - "vsubne;": "\u228a\ufe00", - "vsupnE;": "\u2acc\ufe00", - "vsupne;": "\u228b\ufe00", - "vzigzag;": "\u299a", - "wcirc;": "\u0175", - "wedbar;": "\u2a5f", - "wedge;": "\u2227", - "wedgeq;": "\u2259", - "weierp;": "\u2118", - "wfr;": "\U0001d534", - "wopf;": "\U0001d568", - "wp;": "\u2118", - "wr;": "\u2240", - "wreath;": "\u2240", - "wscr;": "\U0001d4cc", - "xcap;": "\u22c2", - "xcirc;": "\u25ef", - "xcup;": "\u22c3", - "xdtri;": "\u25bd", - "xfr;": "\U0001d535", - "xhArr;": "\u27fa", - "xharr;": "\u27f7", - "xi;": "\u03be", - "xlArr;": "\u27f8", - "xlarr;": "\u27f5", - "xmap;": "\u27fc", - "xnis;": "\u22fb", - "xodot;": "\u2a00", - "xopf;": "\U0001d569", - "xoplus;": "\u2a01", - "xotime;": "\u2a02", - "xrArr;": "\u27f9", - "xrarr;": "\u27f6", - "xscr;": "\U0001d4cd", - "xsqcup;": "\u2a06", - "xuplus;": "\u2a04", - "xutri;": "\u25b3", - "xvee;": "\u22c1", - "xwedge;": "\u22c0", - "yacute": "\xfd", - "yacute;": "\xfd", - "yacy;": "\u044f", - "ycirc;": "\u0177", - "ycy;": "\u044b", - "yen": "\xa5", - "yen;": "\xa5", - "yfr;": "\U0001d536", - "yicy;": "\u0457", - "yopf;": "\U0001d56a", - "yscr;": "\U0001d4ce", - "yucy;": "\u044e", - "yuml": "\xff", - "yuml;": "\xff", - "zacute;": "\u017a", - "zcaron;": "\u017e", - "zcy;": "\u0437", - "zdot;": "\u017c", - "zeetrf;": "\u2128", - "zeta;": "\u03b6", - "zfr;": "\U0001d537", - "zhcy;": "\u0436", - "zigrarr;": "\u21dd", - "zopf;": "\U0001d56b", - "zscr;": "\U0001d4cf", - "zwj;": "\u200d", - "zwnj;": "\u200c", -} - -replacementCharacters = { - 0x0: "\uFFFD", - 0x0d: "\u000D", - 0x80: "\u20AC", - 0x81: "\u0081", - 0x82: "\u201A", - 0x83: "\u0192", - 0x84: "\u201E", - 0x85: "\u2026", - 0x86: "\u2020", - 0x87: "\u2021", - 0x88: "\u02C6", - 0x89: "\u2030", - 0x8A: "\u0160", - 0x8B: "\u2039", - 0x8C: "\u0152", - 0x8D: "\u008D", - 0x8E: "\u017D", - 0x8F: "\u008F", - 0x90: "\u0090", - 0x91: "\u2018", - 0x92: "\u2019", - 0x93: "\u201C", - 0x94: "\u201D", - 0x95: "\u2022", - 0x96: "\u2013", - 0x97: "\u2014", - 0x98: "\u02DC", - 0x99: "\u2122", - 0x9A: "\u0161", - 0x9B: "\u203A", - 0x9C: "\u0153", - 0x9D: "\u009D", - 0x9E: "\u017E", - 0x9F: "\u0178", -} - -tokenTypes = { - "Doctype": 0, - "Characters": 1, - "SpaceCharacters": 2, - "StartTag": 3, - "EndTag": 4, - "EmptyTag": 5, - "Comment": 6, - "ParseError": 7 -} - -tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"], - tokenTypes["EmptyTag"]]) - - -prefixes = {v: k for k, v in namespaces.items()} -prefixes["http://www.w3.org/1998/Math/MathML"] = "math" - - -class DataLossWarning(UserWarning): - """Raised when the current tree is unable to represent the input data""" - pass - - -class _ReparseException(Exception): - pass diff --git a/src/pip/_vendor/html5lib/filters/alphabeticalattributes.py b/src/pip/_vendor/html5lib/filters/alphabeticalattributes.py deleted file mode 100644 index 5ba926e3b09..00000000000 --- a/src/pip/_vendor/html5lib/filters/alphabeticalattributes.py +++ /dev/null @@ -1,29 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import base - -from collections import OrderedDict - - -def _attr_key(attr): - """Return an appropriate key for an attribute for sorting - - Attributes have a namespace that can be either ``None`` or a string. We - can't compare the two because they're different types, so we convert - ``None`` to an empty string first. - - """ - return (attr[0][0] or ''), attr[0][1] - - -class Filter(base.Filter): - """Alphabetizes attributes for elements""" - def __iter__(self): - for token in base.Filter.__iter__(self): - if token["type"] in ("StartTag", "EmptyTag"): - attrs = OrderedDict() - for name, value in sorted(token["data"].items(), - key=_attr_key): - attrs[name] = value - token["data"] = attrs - yield token diff --git a/src/pip/_vendor/html5lib/filters/base.py b/src/pip/_vendor/html5lib/filters/base.py deleted file mode 100644 index c7dbaed0fab..00000000000 --- a/src/pip/_vendor/html5lib/filters/base.py +++ /dev/null @@ -1,12 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - - -class Filter(object): - def __init__(self, source): - self.source = source - - def __iter__(self): - return iter(self.source) - - def __getattr__(self, name): - return getattr(self.source, name) diff --git a/src/pip/_vendor/html5lib/filters/inject_meta_charset.py b/src/pip/_vendor/html5lib/filters/inject_meta_charset.py deleted file mode 100644 index aefb5c842c2..00000000000 --- a/src/pip/_vendor/html5lib/filters/inject_meta_charset.py +++ /dev/null @@ -1,73 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import base - - -class Filter(base.Filter): - """Injects ```` tag into head of document""" - def __init__(self, source, encoding): - """Creates a Filter - - :arg source: the source token stream - - :arg encoding: the encoding to set - - """ - base.Filter.__init__(self, source) - self.encoding = encoding - - def __iter__(self): - state = "pre_head" - meta_found = (self.encoding is None) - pending = [] - - for token in base.Filter.__iter__(self): - type = token["type"] - if type == "StartTag": - if token["name"].lower() == "head": - state = "in_head" - - elif type == "EmptyTag": - if token["name"].lower() == "meta": - # replace charset with actual encoding - has_http_equiv_content_type = False - for (namespace, name), value in token["data"].items(): - if namespace is not None: - continue - elif name.lower() == 'charset': - token["data"][(namespace, name)] = self.encoding - meta_found = True - break - elif name == 'http-equiv' and value.lower() == 'content-type': - has_http_equiv_content_type = True - else: - if has_http_equiv_content_type and (None, "content") in token["data"]: - token["data"][(None, "content")] = 'text/html; charset=%s' % self.encoding - meta_found = True - - elif token["name"].lower() == "head" and not meta_found: - # insert meta into empty head - yield {"type": "StartTag", "name": "head", - "data": token["data"]} - yield {"type": "EmptyTag", "name": "meta", - "data": {(None, "charset"): self.encoding}} - yield {"type": "EndTag", "name": "head"} - meta_found = True - continue - - elif type == "EndTag": - if token["name"].lower() == "head" and pending: - # insert meta into head (if necessary) and flush pending queue - yield pending.pop(0) - if not meta_found: - yield {"type": "EmptyTag", "name": "meta", - "data": {(None, "charset"): self.encoding}} - while pending: - yield pending.pop(0) - meta_found = True - state = "post_head" - - if state == "in_head": - pending.append(token) - else: - yield token diff --git a/src/pip/_vendor/html5lib/filters/lint.py b/src/pip/_vendor/html5lib/filters/lint.py deleted file mode 100644 index fcc07eec5b2..00000000000 --- a/src/pip/_vendor/html5lib/filters/lint.py +++ /dev/null @@ -1,93 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from pip._vendor.six import text_type - -from . import base -from ..constants import namespaces, voidElements - -from ..constants import spaceCharacters -spaceCharacters = "".join(spaceCharacters) - - -class Filter(base.Filter): - """Lints the token stream for errors - - If it finds any errors, it'll raise an ``AssertionError``. - - """ - def __init__(self, source, require_matching_tags=True): - """Creates a Filter - - :arg source: the source token stream - - :arg require_matching_tags: whether or not to require matching tags - - """ - super(Filter, self).__init__(source) - self.require_matching_tags = require_matching_tags - - def __iter__(self): - open_elements = [] - for token in base.Filter.__iter__(self): - type = token["type"] - if type in ("StartTag", "EmptyTag"): - namespace = token["namespace"] - name = token["name"] - assert namespace is None or isinstance(namespace, text_type) - assert namespace != "" - assert isinstance(name, text_type) - assert name != "" - assert isinstance(token["data"], dict) - if (not namespace or namespace == namespaces["html"]) and name in voidElements: - assert type == "EmptyTag" - else: - assert type == "StartTag" - if type == "StartTag" and self.require_matching_tags: - open_elements.append((namespace, name)) - for (namespace, name), value in token["data"].items(): - assert namespace is None or isinstance(namespace, text_type) - assert namespace != "" - assert isinstance(name, text_type) - assert name != "" - assert isinstance(value, text_type) - - elif type == "EndTag": - namespace = token["namespace"] - name = token["name"] - assert namespace is None or isinstance(namespace, text_type) - assert namespace != "" - assert isinstance(name, text_type) - assert name != "" - if (not namespace or namespace == namespaces["html"]) and name in voidElements: - assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} - elif self.require_matching_tags: - start = open_elements.pop() - assert start == (namespace, name) - - elif type == "Comment": - data = token["data"] - assert isinstance(data, text_type) - - elif type in ("Characters", "SpaceCharacters"): - data = token["data"] - assert isinstance(data, text_type) - assert data != "" - if type == "SpaceCharacters": - assert data.strip(spaceCharacters) == "" - - elif type == "Doctype": - name = token["name"] - assert name is None or isinstance(name, text_type) - assert token["publicId"] is None or isinstance(name, text_type) - assert token["systemId"] is None or isinstance(name, text_type) - - elif type == "Entity": - assert isinstance(token["name"], text_type) - - elif type == "SerializerError": - assert isinstance(token["data"], text_type) - - else: - assert False, "Unknown token type: %(type)s" % {"type": type} - - yield token diff --git a/src/pip/_vendor/html5lib/filters/optionaltags.py b/src/pip/_vendor/html5lib/filters/optionaltags.py deleted file mode 100644 index 4a865012c16..00000000000 --- a/src/pip/_vendor/html5lib/filters/optionaltags.py +++ /dev/null @@ -1,207 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -from . import base - - -class Filter(base.Filter): - """Removes optional tags from the token stream""" - def slider(self): - previous1 = previous2 = None - for token in self.source: - if previous1 is not None: - yield previous2, previous1, token - previous2 = previous1 - previous1 = token - if previous1 is not None: - yield previous2, previous1, None - - def __iter__(self): - for previous, token, next in self.slider(): - type = token["type"] - if type == "StartTag": - if (token["data"] or - not self.is_optional_start(token["name"], previous, next)): - yield token - elif type == "EndTag": - if not self.is_optional_end(token["name"], next): - yield token - else: - yield token - - def is_optional_start(self, tagname, previous, next): - type = next and next["type"] or None - if tagname in 'html': - # An html element's start tag may be omitted if the first thing - # inside the html element is not a space character or a comment. - return type not in ("Comment", "SpaceCharacters") - elif tagname == 'head': - # A head element's start tag may be omitted if the first thing - # inside the head element is an element. - # XXX: we also omit the start tag if the head element is empty - if type in ("StartTag", "EmptyTag"): - return True - elif type == "EndTag": - return next["name"] == "head" - elif tagname == 'body': - # A body element's start tag may be omitted if the first thing - # inside the body element is not a space character or a comment, - # except if the first thing inside the body element is a script - # or style element and the node immediately preceding the body - # element is a head element whose end tag has been omitted. - if type in ("Comment", "SpaceCharacters"): - return False - elif type == "StartTag": - # XXX: we do not look at the preceding event, so we never omit - # the body element's start tag if it's followed by a script or - # a style element. - return next["name"] not in ('script', 'style') - else: - return True - elif tagname == 'colgroup': - # A colgroup element's start tag may be omitted if the first thing - # inside the colgroup element is a col element, and if the element - # is not immediately preceded by another colgroup element whose - # end tag has been omitted. - if type in ("StartTag", "EmptyTag"): - # XXX: we do not look at the preceding event, so instead we never - # omit the colgroup element's end tag when it is immediately - # followed by another colgroup element. See is_optional_end. - return next["name"] == "col" - else: - return False - elif tagname == 'tbody': - # A tbody element's start tag may be omitted if the first thing - # inside the tbody element is a tr element, and if the element is - # not immediately preceded by a tbody, thead, or tfoot element - # whose end tag has been omitted. - if type == "StartTag": - # omit the thead and tfoot elements' end tag when they are - # immediately followed by a tbody element. See is_optional_end. - if previous and previous['type'] == 'EndTag' and \ - previous['name'] in ('tbody', 'thead', 'tfoot'): - return False - return next["name"] == 'tr' - else: - return False - return False - - def is_optional_end(self, tagname, next): - type = next and next["type"] or None - if tagname in ('html', 'head', 'body'): - # An html element's end tag may be omitted if the html element - # is not immediately followed by a space character or a comment. - return type not in ("Comment", "SpaceCharacters") - elif tagname in ('li', 'optgroup', 'tr'): - # A li element's end tag may be omitted if the li element is - # immediately followed by another li element or if there is - # no more content in the parent element. - # An optgroup element's end tag may be omitted if the optgroup - # element is immediately followed by another optgroup element, - # or if there is no more content in the parent element. - # A tr element's end tag may be omitted if the tr element is - # immediately followed by another tr element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] == tagname - else: - return type == "EndTag" or type is None - elif tagname in ('dt', 'dd'): - # A dt element's end tag may be omitted if the dt element is - # immediately followed by another dt element or a dd element. - # A dd element's end tag may be omitted if the dd element is - # immediately followed by another dd element or a dt element, - # or if there is no more content in the parent element. - if type == "StartTag": - return next["name"] in ('dt', 'dd') - elif tagname == 'dd': - return type == "EndTag" or type is None - else: - return False - elif tagname == 'p': - # A p element's end tag may be omitted if the p element is - # immediately followed by an address, article, aside, - # blockquote, datagrid, dialog, dir, div, dl, fieldset, - # footer, form, h1, h2, h3, h4, h5, h6, header, hr, menu, - # nav, ol, p, pre, section, table, or ul, element, or if - # there is no more content in the parent element. - if type in ("StartTag", "EmptyTag"): - return next["name"] in ('address', 'article', 'aside', - 'blockquote', 'datagrid', 'dialog', - 'dir', 'div', 'dl', 'fieldset', 'footer', - 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', - 'header', 'hr', 'menu', 'nav', 'ol', - 'p', 'pre', 'section', 'table', 'ul') - else: - return type == "EndTag" or type is None - elif tagname == 'option': - # An option element's end tag may be omitted if the option - # element is immediately followed by another option element, - # or if it is immediately followed by an optgroup - # element, or if there is no more content in the parent - # element. - if type == "StartTag": - return next["name"] in ('option', 'optgroup') - else: - return type == "EndTag" or type is None - elif tagname in ('rt', 'rp'): - # An rt element's end tag may be omitted if the rt element is - # immediately followed by an rt or rp element, or if there is - # no more content in the parent element. - # An rp element's end tag may be omitted if the rp element is - # immediately followed by an rt or rp element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] in ('rt', 'rp') - else: - return type == "EndTag" or type is None - elif tagname == 'colgroup': - # A colgroup element's end tag may be omitted if the colgroup - # element is not immediately followed by a space character or - # a comment. - if type in ("Comment", "SpaceCharacters"): - return False - elif type == "StartTag": - # XXX: we also look for an immediately following colgroup - # element. See is_optional_start. - return next["name"] != 'colgroup' - else: - return True - elif tagname in ('thead', 'tbody'): - # A thead element's end tag may be omitted if the thead element - # is immediately followed by a tbody or tfoot element. - # A tbody element's end tag may be omitted if the tbody element - # is immediately followed by a tbody or tfoot element, or if - # there is no more content in the parent element. - # A tfoot element's end tag may be omitted if the tfoot element - # is immediately followed by a tbody element, or if there is no - # more content in the parent element. - # XXX: we never omit the end tag when the following element is - # a tbody. See is_optional_start. - if type == "StartTag": - return next["name"] in ['tbody', 'tfoot'] - elif tagname == 'tbody': - return type == "EndTag" or type is None - else: - return False - elif tagname == 'tfoot': - # A tfoot element's end tag may be omitted if the tfoot element - # is immediately followed by a tbody element, or if there is no - # more content in the parent element. - # XXX: we never omit the end tag when the following element is - # a tbody. See is_optional_start. - if type == "StartTag": - return next["name"] == 'tbody' - else: - return type == "EndTag" or type is None - elif tagname in ('td', 'th'): - # A td element's end tag may be omitted if the td element is - # immediately followed by a td or th element, or if there is - # no more content in the parent element. - # A th element's end tag may be omitted if the th element is - # immediately followed by a td or th element, or if there is - # no more content in the parent element. - if type == "StartTag": - return next["name"] in ('td', 'th') - else: - return type == "EndTag" or type is None - return False diff --git a/src/pip/_vendor/html5lib/filters/sanitizer.py b/src/pip/_vendor/html5lib/filters/sanitizer.py deleted file mode 100644 index aa7431d1312..00000000000 --- a/src/pip/_vendor/html5lib/filters/sanitizer.py +++ /dev/null @@ -1,916 +0,0 @@ -"""Deprecated from html5lib 1.1. - -See `here `_ for -information about its deprecation; `Bleach `_ -is recommended as a replacement. Please let us know in the aforementioned issue -if Bleach is unsuitable for your needs. - -""" -from __future__ import absolute_import, division, unicode_literals - -import re -import warnings -from xml.sax.saxutils import escape, unescape - -from pip._vendor.six.moves import urllib_parse as urlparse - -from . import base -from ..constants import namespaces, prefixes - -__all__ = ["Filter"] - - -_deprecation_msg = ( - "html5lib's sanitizer is deprecated; see " + - "https://github.com/html5lib/html5lib-python/issues/443 and please let " + - "us know if Bleach is unsuitable for your needs" -) - -warnings.warn(_deprecation_msg, DeprecationWarning) - -allowed_elements = frozenset(( - (namespaces['html'], 'a'), - (namespaces['html'], 'abbr'), - (namespaces['html'], 'acronym'), - (namespaces['html'], 'address'), - (namespaces['html'], 'area'), - (namespaces['html'], 'article'), - (namespaces['html'], 'aside'), - (namespaces['html'], 'audio'), - (namespaces['html'], 'b'), - (namespaces['html'], 'big'), - (namespaces['html'], 'blockquote'), - (namespaces['html'], 'br'), - (namespaces['html'], 'button'), - (namespaces['html'], 'canvas'), - (namespaces['html'], 'caption'), - (namespaces['html'], 'center'), - (namespaces['html'], 'cite'), - (namespaces['html'], 'code'), - (namespaces['html'], 'col'), - (namespaces['html'], 'colgroup'), - (namespaces['html'], 'command'), - (namespaces['html'], 'datagrid'), - (namespaces['html'], 'datalist'), - (namespaces['html'], 'dd'), - (namespaces['html'], 'del'), - (namespaces['html'], 'details'), - (namespaces['html'], 'dfn'), - (namespaces['html'], 'dialog'), - (namespaces['html'], 'dir'), - (namespaces['html'], 'div'), - (namespaces['html'], 'dl'), - (namespaces['html'], 'dt'), - (namespaces['html'], 'em'), - (namespaces['html'], 'event-source'), - (namespaces['html'], 'fieldset'), - (namespaces['html'], 'figcaption'), - (namespaces['html'], 'figure'), - (namespaces['html'], 'footer'), - (namespaces['html'], 'font'), - (namespaces['html'], 'form'), - (namespaces['html'], 'header'), - (namespaces['html'], 'h1'), - (namespaces['html'], 'h2'), - (namespaces['html'], 'h3'), - (namespaces['html'], 'h4'), - (namespaces['html'], 'h5'), - (namespaces['html'], 'h6'), - (namespaces['html'], 'hr'), - (namespaces['html'], 'i'), - (namespaces['html'], 'img'), - (namespaces['html'], 'input'), - (namespaces['html'], 'ins'), - (namespaces['html'], 'keygen'), - (namespaces['html'], 'kbd'), - (namespaces['html'], 'label'), - (namespaces['html'], 'legend'), - (namespaces['html'], 'li'), - (namespaces['html'], 'm'), - (namespaces['html'], 'map'), - (namespaces['html'], 'menu'), - (namespaces['html'], 'meter'), - (namespaces['html'], 'multicol'), - (namespaces['html'], 'nav'), - (namespaces['html'], 'nextid'), - (namespaces['html'], 'ol'), - (namespaces['html'], 'output'), - (namespaces['html'], 'optgroup'), - (namespaces['html'], 'option'), - (namespaces['html'], 'p'), - (namespaces['html'], 'pre'), - (namespaces['html'], 'progress'), - (namespaces['html'], 'q'), - (namespaces['html'], 's'), - (namespaces['html'], 'samp'), - (namespaces['html'], 'section'), - (namespaces['html'], 'select'), - (namespaces['html'], 'small'), - (namespaces['html'], 'sound'), - (namespaces['html'], 'source'), - (namespaces['html'], 'spacer'), - (namespaces['html'], 'span'), - (namespaces['html'], 'strike'), - (namespaces['html'], 'strong'), - (namespaces['html'], 'sub'), - (namespaces['html'], 'sup'), - (namespaces['html'], 'table'), - (namespaces['html'], 'tbody'), - (namespaces['html'], 'td'), - (namespaces['html'], 'textarea'), - (namespaces['html'], 'time'), - (namespaces['html'], 'tfoot'), - (namespaces['html'], 'th'), - (namespaces['html'], 'thead'), - (namespaces['html'], 'tr'), - (namespaces['html'], 'tt'), - (namespaces['html'], 'u'), - (namespaces['html'], 'ul'), - (namespaces['html'], 'var'), - (namespaces['html'], 'video'), - (namespaces['mathml'], 'maction'), - (namespaces['mathml'], 'math'), - (namespaces['mathml'], 'merror'), - (namespaces['mathml'], 'mfrac'), - (namespaces['mathml'], 'mi'), - (namespaces['mathml'], 'mmultiscripts'), - (namespaces['mathml'], 'mn'), - (namespaces['mathml'], 'mo'), - (namespaces['mathml'], 'mover'), - (namespaces['mathml'], 'mpadded'), - (namespaces['mathml'], 'mphantom'), - (namespaces['mathml'], 'mprescripts'), - (namespaces['mathml'], 'mroot'), - (namespaces['mathml'], 'mrow'), - (namespaces['mathml'], 'mspace'), - (namespaces['mathml'], 'msqrt'), - (namespaces['mathml'], 'mstyle'), - (namespaces['mathml'], 'msub'), - (namespaces['mathml'], 'msubsup'), - (namespaces['mathml'], 'msup'), - (namespaces['mathml'], 'mtable'), - (namespaces['mathml'], 'mtd'), - (namespaces['mathml'], 'mtext'), - (namespaces['mathml'], 'mtr'), - (namespaces['mathml'], 'munder'), - (namespaces['mathml'], 'munderover'), - (namespaces['mathml'], 'none'), - (namespaces['svg'], 'a'), - (namespaces['svg'], 'animate'), - (namespaces['svg'], 'animateColor'), - (namespaces['svg'], 'animateMotion'), - (namespaces['svg'], 'animateTransform'), - (namespaces['svg'], 'clipPath'), - (namespaces['svg'], 'circle'), - (namespaces['svg'], 'defs'), - (namespaces['svg'], 'desc'), - (namespaces['svg'], 'ellipse'), - (namespaces['svg'], 'font-face'), - (namespaces['svg'], 'font-face-name'), - (namespaces['svg'], 'font-face-src'), - (namespaces['svg'], 'g'), - (namespaces['svg'], 'glyph'), - (namespaces['svg'], 'hkern'), - (namespaces['svg'], 'linearGradient'), - (namespaces['svg'], 'line'), - (namespaces['svg'], 'marker'), - (namespaces['svg'], 'metadata'), - (namespaces['svg'], 'missing-glyph'), - (namespaces['svg'], 'mpath'), - (namespaces['svg'], 'path'), - (namespaces['svg'], 'polygon'), - (namespaces['svg'], 'polyline'), - (namespaces['svg'], 'radialGradient'), - (namespaces['svg'], 'rect'), - (namespaces['svg'], 'set'), - (namespaces['svg'], 'stop'), - (namespaces['svg'], 'svg'), - (namespaces['svg'], 'switch'), - (namespaces['svg'], 'text'), - (namespaces['svg'], 'title'), - (namespaces['svg'], 'tspan'), - (namespaces['svg'], 'use'), -)) - -allowed_attributes = frozenset(( - # HTML attributes - (None, 'abbr'), - (None, 'accept'), - (None, 'accept-charset'), - (None, 'accesskey'), - (None, 'action'), - (None, 'align'), - (None, 'alt'), - (None, 'autocomplete'), - (None, 'autofocus'), - (None, 'axis'), - (None, 'background'), - (None, 'balance'), - (None, 'bgcolor'), - (None, 'bgproperties'), - (None, 'border'), - (None, 'bordercolor'), - (None, 'bordercolordark'), - (None, 'bordercolorlight'), - (None, 'bottompadding'), - (None, 'cellpadding'), - (None, 'cellspacing'), - (None, 'ch'), - (None, 'challenge'), - (None, 'char'), - (None, 'charoff'), - (None, 'choff'), - (None, 'charset'), - (None, 'checked'), - (None, 'cite'), - (None, 'class'), - (None, 'clear'), - (None, 'color'), - (None, 'cols'), - (None, 'colspan'), - (None, 'compact'), - (None, 'contenteditable'), - (None, 'controls'), - (None, 'coords'), - (None, 'data'), - (None, 'datafld'), - (None, 'datapagesize'), - (None, 'datasrc'), - (None, 'datetime'), - (None, 'default'), - (None, 'delay'), - (None, 'dir'), - (None, 'disabled'), - (None, 'draggable'), - (None, 'dynsrc'), - (None, 'enctype'), - (None, 'end'), - (None, 'face'), - (None, 'for'), - (None, 'form'), - (None, 'frame'), - (None, 'galleryimg'), - (None, 'gutter'), - (None, 'headers'), - (None, 'height'), - (None, 'hidefocus'), - (None, 'hidden'), - (None, 'high'), - (None, 'href'), - (None, 'hreflang'), - (None, 'hspace'), - (None, 'icon'), - (None, 'id'), - (None, 'inputmode'), - (None, 'ismap'), - (None, 'keytype'), - (None, 'label'), - (None, 'leftspacing'), - (None, 'lang'), - (None, 'list'), - (None, 'longdesc'), - (None, 'loop'), - (None, 'loopcount'), - (None, 'loopend'), - (None, 'loopstart'), - (None, 'low'), - (None, 'lowsrc'), - (None, 'max'), - (None, 'maxlength'), - (None, 'media'), - (None, 'method'), - (None, 'min'), - (None, 'multiple'), - (None, 'name'), - (None, 'nohref'), - (None, 'noshade'), - (None, 'nowrap'), - (None, 'open'), - (None, 'optimum'), - (None, 'pattern'), - (None, 'ping'), - (None, 'point-size'), - (None, 'poster'), - (None, 'pqg'), - (None, 'preload'), - (None, 'prompt'), - (None, 'radiogroup'), - (None, 'readonly'), - (None, 'rel'), - (None, 'repeat-max'), - (None, 'repeat-min'), - (None, 'replace'), - (None, 'required'), - (None, 'rev'), - (None, 'rightspacing'), - (None, 'rows'), - (None, 'rowspan'), - (None, 'rules'), - (None, 'scope'), - (None, 'selected'), - (None, 'shape'), - (None, 'size'), - (None, 'span'), - (None, 'src'), - (None, 'start'), - (None, 'step'), - (None, 'style'), - (None, 'summary'), - (None, 'suppress'), - (None, 'tabindex'), - (None, 'target'), - (None, 'template'), - (None, 'title'), - (None, 'toppadding'), - (None, 'type'), - (None, 'unselectable'), - (None, 'usemap'), - (None, 'urn'), - (None, 'valign'), - (None, 'value'), - (None, 'variable'), - (None, 'volume'), - (None, 'vspace'), - (None, 'vrml'), - (None, 'width'), - (None, 'wrap'), - (namespaces['xml'], 'lang'), - # MathML attributes - (None, 'actiontype'), - (None, 'align'), - (None, 'columnalign'), - (None, 'columnalign'), - (None, 'columnalign'), - (None, 'columnlines'), - (None, 'columnspacing'), - (None, 'columnspan'), - (None, 'depth'), - (None, 'display'), - (None, 'displaystyle'), - (None, 'equalcolumns'), - (None, 'equalrows'), - (None, 'fence'), - (None, 'fontstyle'), - (None, 'fontweight'), - (None, 'frame'), - (None, 'height'), - (None, 'linethickness'), - (None, 'lspace'), - (None, 'mathbackground'), - (None, 'mathcolor'), - (None, 'mathvariant'), - (None, 'mathvariant'), - (None, 'maxsize'), - (None, 'minsize'), - (None, 'other'), - (None, 'rowalign'), - (None, 'rowalign'), - (None, 'rowalign'), - (None, 'rowlines'), - (None, 'rowspacing'), - (None, 'rowspan'), - (None, 'rspace'), - (None, 'scriptlevel'), - (None, 'selection'), - (None, 'separator'), - (None, 'stretchy'), - (None, 'width'), - (None, 'width'), - (namespaces['xlink'], 'href'), - (namespaces['xlink'], 'show'), - (namespaces['xlink'], 'type'), - # SVG attributes - (None, 'accent-height'), - (None, 'accumulate'), - (None, 'additive'), - (None, 'alphabetic'), - (None, 'arabic-form'), - (None, 'ascent'), - (None, 'attributeName'), - (None, 'attributeType'), - (None, 'baseProfile'), - (None, 'bbox'), - (None, 'begin'), - (None, 'by'), - (None, 'calcMode'), - (None, 'cap-height'), - (None, 'class'), - (None, 'clip-path'), - (None, 'color'), - (None, 'color-rendering'), - (None, 'content'), - (None, 'cx'), - (None, 'cy'), - (None, 'd'), - (None, 'dx'), - (None, 'dy'), - (None, 'descent'), - (None, 'display'), - (None, 'dur'), - (None, 'end'), - (None, 'fill'), - (None, 'fill-opacity'), - (None, 'fill-rule'), - (None, 'font-family'), - (None, 'font-size'), - (None, 'font-stretch'), - (None, 'font-style'), - (None, 'font-variant'), - (None, 'font-weight'), - (None, 'from'), - (None, 'fx'), - (None, 'fy'), - (None, 'g1'), - (None, 'g2'), - (None, 'glyph-name'), - (None, 'gradientUnits'), - (None, 'hanging'), - (None, 'height'), - (None, 'horiz-adv-x'), - (None, 'horiz-origin-x'), - (None, 'id'), - (None, 'ideographic'), - (None, 'k'), - (None, 'keyPoints'), - (None, 'keySplines'), - (None, 'keyTimes'), - (None, 'lang'), - (None, 'marker-end'), - (None, 'marker-mid'), - (None, 'marker-start'), - (None, 'markerHeight'), - (None, 'markerUnits'), - (None, 'markerWidth'), - (None, 'mathematical'), - (None, 'max'), - (None, 'min'), - (None, 'name'), - (None, 'offset'), - (None, 'opacity'), - (None, 'orient'), - (None, 'origin'), - (None, 'overline-position'), - (None, 'overline-thickness'), - (None, 'panose-1'), - (None, 'path'), - (None, 'pathLength'), - (None, 'points'), - (None, 'preserveAspectRatio'), - (None, 'r'), - (None, 'refX'), - (None, 'refY'), - (None, 'repeatCount'), - (None, 'repeatDur'), - (None, 'requiredExtensions'), - (None, 'requiredFeatures'), - (None, 'restart'), - (None, 'rotate'), - (None, 'rx'), - (None, 'ry'), - (None, 'slope'), - (None, 'stemh'), - (None, 'stemv'), - (None, 'stop-color'), - (None, 'stop-opacity'), - (None, 'strikethrough-position'), - (None, 'strikethrough-thickness'), - (None, 'stroke'), - (None, 'stroke-dasharray'), - (None, 'stroke-dashoffset'), - (None, 'stroke-linecap'), - (None, 'stroke-linejoin'), - (None, 'stroke-miterlimit'), - (None, 'stroke-opacity'), - (None, 'stroke-width'), - (None, 'systemLanguage'), - (None, 'target'), - (None, 'text-anchor'), - (None, 'to'), - (None, 'transform'), - (None, 'type'), - (None, 'u1'), - (None, 'u2'), - (None, 'underline-position'), - (None, 'underline-thickness'), - (None, 'unicode'), - (None, 'unicode-range'), - (None, 'units-per-em'), - (None, 'values'), - (None, 'version'), - (None, 'viewBox'), - (None, 'visibility'), - (None, 'width'), - (None, 'widths'), - (None, 'x'), - (None, 'x-height'), - (None, 'x1'), - (None, 'x2'), - (namespaces['xlink'], 'actuate'), - (namespaces['xlink'], 'arcrole'), - (namespaces['xlink'], 'href'), - (namespaces['xlink'], 'role'), - (namespaces['xlink'], 'show'), - (namespaces['xlink'], 'title'), - (namespaces['xlink'], 'type'), - (namespaces['xml'], 'base'), - (namespaces['xml'], 'lang'), - (namespaces['xml'], 'space'), - (None, 'y'), - (None, 'y1'), - (None, 'y2'), - (None, 'zoomAndPan'), -)) - -attr_val_is_uri = frozenset(( - (None, 'href'), - (None, 'src'), - (None, 'cite'), - (None, 'action'), - (None, 'longdesc'), - (None, 'poster'), - (None, 'background'), - (None, 'datasrc'), - (None, 'dynsrc'), - (None, 'lowsrc'), - (None, 'ping'), - (namespaces['xlink'], 'href'), - (namespaces['xml'], 'base'), -)) - -svg_attr_val_allows_ref = frozenset(( - (None, 'clip-path'), - (None, 'color-profile'), - (None, 'cursor'), - (None, 'fill'), - (None, 'filter'), - (None, 'marker'), - (None, 'marker-start'), - (None, 'marker-mid'), - (None, 'marker-end'), - (None, 'mask'), - (None, 'stroke'), -)) - -svg_allow_local_href = frozenset(( - (None, 'altGlyph'), - (None, 'animate'), - (None, 'animateColor'), - (None, 'animateMotion'), - (None, 'animateTransform'), - (None, 'cursor'), - (None, 'feImage'), - (None, 'filter'), - (None, 'linearGradient'), - (None, 'pattern'), - (None, 'radialGradient'), - (None, 'textpath'), - (None, 'tref'), - (None, 'set'), - (None, 'use') -)) - -allowed_css_properties = frozenset(( - 'azimuth', - 'background-color', - 'border-bottom-color', - 'border-collapse', - 'border-color', - 'border-left-color', - 'border-right-color', - 'border-top-color', - 'clear', - 'color', - 'cursor', - 'direction', - 'display', - 'elevation', - 'float', - 'font', - 'font-family', - 'font-size', - 'font-style', - 'font-variant', - 'font-weight', - 'height', - 'letter-spacing', - 'line-height', - 'overflow', - 'pause', - 'pause-after', - 'pause-before', - 'pitch', - 'pitch-range', - 'richness', - 'speak', - 'speak-header', - 'speak-numeral', - 'speak-punctuation', - 'speech-rate', - 'stress', - 'text-align', - 'text-decoration', - 'text-indent', - 'unicode-bidi', - 'vertical-align', - 'voice-family', - 'volume', - 'white-space', - 'width', -)) - -allowed_css_keywords = frozenset(( - 'auto', - 'aqua', - 'black', - 'block', - 'blue', - 'bold', - 'both', - 'bottom', - 'brown', - 'center', - 'collapse', - 'dashed', - 'dotted', - 'fuchsia', - 'gray', - 'green', - '!important', - 'italic', - 'left', - 'lime', - 'maroon', - 'medium', - 'none', - 'navy', - 'normal', - 'nowrap', - 'olive', - 'pointer', - 'purple', - 'red', - 'right', - 'solid', - 'silver', - 'teal', - 'top', - 'transparent', - 'underline', - 'white', - 'yellow', -)) - -allowed_svg_properties = frozenset(( - 'fill', - 'fill-opacity', - 'fill-rule', - 'stroke', - 'stroke-width', - 'stroke-linecap', - 'stroke-linejoin', - 'stroke-opacity', -)) - -allowed_protocols = frozenset(( - 'ed2k', - 'ftp', - 'http', - 'https', - 'irc', - 'mailto', - 'news', - 'gopher', - 'nntp', - 'telnet', - 'webcal', - 'xmpp', - 'callto', - 'feed', - 'urn', - 'aim', - 'rsync', - 'tag', - 'ssh', - 'sftp', - 'rtsp', - 'afs', - 'data', -)) - -allowed_content_types = frozenset(( - 'image/png', - 'image/jpeg', - 'image/gif', - 'image/webp', - 'image/bmp', - 'text/plain', -)) - - -data_content_type = re.compile(r''' - ^ - # Match a content type / - (?P[-a-zA-Z0-9.]+/[-a-zA-Z0-9.]+) - # Match any character set and encoding - (?:(?:;charset=(?:[-a-zA-Z0-9]+)(?:;(?:base64))?) - |(?:;(?:base64))?(?:;charset=(?:[-a-zA-Z0-9]+))?) - # Assume the rest is data - ,.* - $ - ''', - re.VERBOSE) - - -class Filter(base.Filter): - """Sanitizes token stream of XHTML+MathML+SVG and of inline style attributes""" - def __init__(self, - source, - allowed_elements=allowed_elements, - allowed_attributes=allowed_attributes, - allowed_css_properties=allowed_css_properties, - allowed_css_keywords=allowed_css_keywords, - allowed_svg_properties=allowed_svg_properties, - allowed_protocols=allowed_protocols, - allowed_content_types=allowed_content_types, - attr_val_is_uri=attr_val_is_uri, - svg_attr_val_allows_ref=svg_attr_val_allows_ref, - svg_allow_local_href=svg_allow_local_href): - """Creates a Filter - - :arg allowed_elements: set of elements to allow--everything else will - be escaped - - :arg allowed_attributes: set of attributes to allow in - elements--everything else will be stripped - - :arg allowed_css_properties: set of CSS properties to allow--everything - else will be stripped - - :arg allowed_css_keywords: set of CSS keywords to allow--everything - else will be stripped - - :arg allowed_svg_properties: set of SVG properties to allow--everything - else will be removed - - :arg allowed_protocols: set of allowed protocols for URIs - - :arg allowed_content_types: set of allowed content types for ``data`` URIs. - - :arg attr_val_is_uri: set of attributes that have URI values--values - that have a scheme not listed in ``allowed_protocols`` are removed - - :arg svg_attr_val_allows_ref: set of SVG attributes that can have - references - - :arg svg_allow_local_href: set of SVG elements that can have local - hrefs--these are removed - - """ - super(Filter, self).__init__(source) - - warnings.warn(_deprecation_msg, DeprecationWarning) - - self.allowed_elements = allowed_elements - self.allowed_attributes = allowed_attributes - self.allowed_css_properties = allowed_css_properties - self.allowed_css_keywords = allowed_css_keywords - self.allowed_svg_properties = allowed_svg_properties - self.allowed_protocols = allowed_protocols - self.allowed_content_types = allowed_content_types - self.attr_val_is_uri = attr_val_is_uri - self.svg_attr_val_allows_ref = svg_attr_val_allows_ref - self.svg_allow_local_href = svg_allow_local_href - - def __iter__(self): - for token in base.Filter.__iter__(self): - token = self.sanitize_token(token) - if token: - yield token - - # Sanitize the +html+, escaping all elements not in ALLOWED_ELEMENTS, and - # stripping out all attributes not in ALLOWED_ATTRIBUTES. Style attributes - # are parsed, and a restricted set, specified by ALLOWED_CSS_PROPERTIES and - # ALLOWED_CSS_KEYWORDS, are allowed through. attributes in ATTR_VAL_IS_URI - # are scanned, and only URI schemes specified in ALLOWED_PROTOCOLS are - # allowed. - # - # sanitize_html('') - # => <script> do_nasty_stuff() </script> - # sanitize_html('Click here for $100') - # => Click here for $100 - def sanitize_token(self, token): - - # accommodate filters which use token_type differently - token_type = token["type"] - if token_type in ("StartTag", "EndTag", "EmptyTag"): - name = token["name"] - namespace = token["namespace"] - if ((namespace, name) in self.allowed_elements or - (namespace is None and - (namespaces["html"], name) in self.allowed_elements)): - return self.allowed_token(token) - else: - return self.disallowed_token(token) - elif token_type == "Comment": - pass - else: - return token - - def allowed_token(self, token): - if "data" in token: - attrs = token["data"] - attr_names = set(attrs.keys()) - - # Remove forbidden attributes - for to_remove in (attr_names - self.allowed_attributes): - del token["data"][to_remove] - attr_names.remove(to_remove) - - # Remove attributes with disallowed URL values - for attr in (attr_names & self.attr_val_is_uri): - assert attr in attrs - # I don't have a clue where this regexp comes from or why it matches those - # characters, nor why we call unescape. I just know it's always been here. - # Should you be worried by this comment in a sanitizer? Yes. On the other hand, all - # this will do is remove *more* than it otherwise would. - val_unescaped = re.sub("[`\x00-\x20\x7f-\xa0\\s]+", '', - unescape(attrs[attr])).lower() - # remove replacement characters from unescaped characters - val_unescaped = val_unescaped.replace("\ufffd", "") - try: - uri = urlparse.urlparse(val_unescaped) - except ValueError: - uri = None - del attrs[attr] - if uri and uri.scheme: - if uri.scheme not in self.allowed_protocols: - del attrs[attr] - if uri.scheme == 'data': - m = data_content_type.match(uri.path) - if not m: - del attrs[attr] - elif m.group('content_type') not in self.allowed_content_types: - del attrs[attr] - - for attr in self.svg_attr_val_allows_ref: - if attr in attrs: - attrs[attr] = re.sub(r'url\s*\(\s*[^#\s][^)]+?\)', - ' ', - unescape(attrs[attr])) - if (token["name"] in self.svg_allow_local_href and - (namespaces['xlink'], 'href') in attrs and re.search(r'^\s*[^#\s].*', - attrs[(namespaces['xlink'], 'href')])): - del attrs[(namespaces['xlink'], 'href')] - if (None, 'style') in attrs: - attrs[(None, 'style')] = self.sanitize_css(attrs[(None, 'style')]) - token["data"] = attrs - return token - - def disallowed_token(self, token): - token_type = token["type"] - if token_type == "EndTag": - token["data"] = "" % token["name"] - elif token["data"]: - assert token_type in ("StartTag", "EmptyTag") - attrs = [] - for (ns, name), v in token["data"].items(): - attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) - token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) - else: - token["data"] = "<%s>" % token["name"] - if token.get("selfClosing"): - token["data"] = token["data"][:-1] + "/>" - - token["type"] = "Characters" - - del token["name"] - return token - - def sanitize_css(self, style): - # disallow urls - style = re.compile(r'url\s*\(\s*[^\s)]+?\s*\)\s*').sub(' ', style) - - # gauntlet - if not re.match(r"""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): - return '' - if not re.match(r"^\s*([-\w]+\s*:[^:;]*(;\s*|$))*$", style): - return '' - - clean = [] - for prop, value in re.findall(r"([-\w]+)\s*:\s*([^:;]*)", style): - if not value: - continue - if prop.lower() in self.allowed_css_properties: - clean.append(prop + ': ' + value + ';') - elif prop.split('-')[0].lower() in ['background', 'border', 'margin', - 'padding']: - for keyword in value.split(): - if keyword not in self.allowed_css_keywords and \ - not re.match(r"^(#[0-9a-fA-F]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$", keyword): # noqa - break - else: - clean.append(prop + ': ' + value + ';') - elif prop.lower() in self.allowed_svg_properties: - clean.append(prop + ': ' + value + ';') - - return ' '.join(clean) diff --git a/src/pip/_vendor/html5lib/filters/whitespace.py b/src/pip/_vendor/html5lib/filters/whitespace.py deleted file mode 100644 index 0d12584b459..00000000000 --- a/src/pip/_vendor/html5lib/filters/whitespace.py +++ /dev/null @@ -1,38 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals - -import re - -from . import base -from ..constants import rcdataElements, spaceCharacters -spaceCharacters = "".join(spaceCharacters) - -SPACES_REGEX = re.compile("[%s]+" % spaceCharacters) - - -class Filter(base.Filter): - """Collapses whitespace except in pre, textarea, and script elements""" - spacePreserveElements = frozenset(["pre", "textarea"] + list(rcdataElements)) - - def __iter__(self): - preserve = 0 - for token in base.Filter.__iter__(self): - type = token["type"] - if type == "StartTag" \ - and (preserve or token["name"] in self.spacePreserveElements): - preserve += 1 - - elif type == "EndTag" and preserve: - preserve -= 1 - - elif not preserve and type == "SpaceCharacters" and token["data"]: - # Test on token["data"] above to not introduce spaces where there were not - token["data"] = " " - - elif not preserve and type == "Characters": - token["data"] = collapse_spaces(token["data"]) - - yield token - - -def collapse_spaces(text): - return SPACES_REGEX.sub(' ', text) diff --git a/src/pip/_vendor/html5lib/html5parser.py b/src/pip/_vendor/html5lib/html5parser.py deleted file mode 100644 index d06784f3d25..00000000000 --- a/src/pip/_vendor/html5lib/html5parser.py +++ /dev/null @@ -1,2795 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals -from pip._vendor.six import with_metaclass, viewkeys - -import types - -from . import _inputstream -from . import _tokenizer - -from . import treebuilders -from .treebuilders.base import Marker - -from . import _utils -from .constants import ( - spaceCharacters, asciiUpper2Lower, - specialElements, headingElements, cdataElements, rcdataElements, - tokenTypes, tagTokenTypes, - namespaces, - htmlIntegrationPointElements, mathmlTextIntegrationPointElements, - adjustForeignAttributes as adjustForeignAttributesMap, - adjustMathMLAttributes, adjustSVGAttributes, - E, - _ReparseException -) - - -def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): - """Parse an HTML document as a string or file-like object into a tree - - :arg doc: the document to parse as a string or file-like object - - :arg treebuilder: the treebuilder to use when parsing - - :arg namespaceHTMLElements: whether or not to namespace HTML elements - - :returns: parsed tree - - Example: - - >>> from html5lib.html5parser import parse - >>> parse('

This is a doc

') - - - """ - tb = treebuilders.getTreeBuilder(treebuilder) - p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parse(doc, **kwargs) - - -def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): - """Parse an HTML fragment as a string or file-like object into a tree - - :arg doc: the fragment to parse as a string or file-like object - - :arg container: the container context to parse the fragment in - - :arg treebuilder: the treebuilder to use when parsing - - :arg namespaceHTMLElements: whether or not to namespace HTML elements - - :returns: parsed tree - - Example: - - >>> from html5lib.html5libparser import parseFragment - >>> parseFragment('this is a fragment') - - - """ - tb = treebuilders.getTreeBuilder(treebuilder) - p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) - return p.parseFragment(doc, container=container, **kwargs) - - -def method_decorator_metaclass(function): - class Decorated(type): - def __new__(meta, classname, bases, classDict): - for attributeName, attribute in classDict.items(): - if isinstance(attribute, types.FunctionType): - attribute = function(attribute) - - classDict[attributeName] = attribute - return type.__new__(meta, classname, bases, classDict) - return Decorated - - -class HTMLParser(object): - """HTML parser - - Generates a tree structure from a stream of (possibly malformed) HTML. - - """ - - def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): - """ - :arg tree: a treebuilder class controlling the type of tree that will be - returned. Built in treebuilders can be accessed through - html5lib.treebuilders.getTreeBuilder(treeType) - - :arg strict: raise an exception when a parse error is encountered - - :arg namespaceHTMLElements: whether or not to namespace HTML elements - - :arg debug: whether or not to enable debug mode which logs things - - Example: - - >>> from html5lib.html5parser import HTMLParser - >>> parser = HTMLParser() # generates parser with etree builder - >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict - - """ - - # Raise an exception on the first error encountered - self.strict = strict - - if tree is None: - tree = treebuilders.getTreeBuilder("etree") - self.tree = tree(namespaceHTMLElements) - self.errors = [] - - self.phases = {name: cls(self, self.tree) for name, cls in - getPhases(debug).items()} - - def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): - - self.innerHTMLMode = innerHTML - self.container = container - self.scripting = scripting - self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) - self.reset() - - try: - self.mainLoop() - except _ReparseException: - self.reset() - self.mainLoop() - - def reset(self): - self.tree.reset() - self.firstStartTag = False - self.errors = [] - self.log = [] # only used with debug mode - # "quirks" / "limited quirks" / "no quirks" - self.compatMode = "no quirks" - - if self.innerHTMLMode: - self.innerHTML = self.container.lower() - - if self.innerHTML in cdataElements: - self.tokenizer.state = self.tokenizer.rcdataState - elif self.innerHTML in rcdataElements: - self.tokenizer.state = self.tokenizer.rawtextState - elif self.innerHTML == 'plaintext': - self.tokenizer.state = self.tokenizer.plaintextState - else: - # state already is data state - # self.tokenizer.state = self.tokenizer.dataState - pass - self.phase = self.phases["beforeHtml"] - self.phase.insertHtmlElement() - self.resetInsertionMode() - else: - self.innerHTML = False # pylint:disable=redefined-variable-type - self.phase = self.phases["initial"] - - self.lastPhase = None - - self.beforeRCDataPhase = None - - self.framesetOK = True - - @property - def documentEncoding(self): - """Name of the character encoding that was used to decode the input stream, or - :obj:`None` if that is not determined yet - - """ - if not hasattr(self, 'tokenizer'): - return None - return self.tokenizer.stream.charEncoding[0].name - - def isHTMLIntegrationPoint(self, element): - if (element.name == "annotation-xml" and - element.namespace == namespaces["mathml"]): - return ("encoding" in element.attributes and - element.attributes["encoding"].translate( - asciiUpper2Lower) in - ("text/html", "application/xhtml+xml")) - else: - return (element.namespace, element.name) in htmlIntegrationPointElements - - def isMathMLTextIntegrationPoint(self, element): - return (element.namespace, element.name) in mathmlTextIntegrationPointElements - - def mainLoop(self): - CharactersToken = tokenTypes["Characters"] - SpaceCharactersToken = tokenTypes["SpaceCharacters"] - StartTagToken = tokenTypes["StartTag"] - EndTagToken = tokenTypes["EndTag"] - CommentToken = tokenTypes["Comment"] - DoctypeToken = tokenTypes["Doctype"] - ParseErrorToken = tokenTypes["ParseError"] - - for token in self.tokenizer: - prev_token = None - new_token = token - while new_token is not None: - prev_token = new_token - currentNode = self.tree.openElements[-1] if self.tree.openElements else None - currentNodeNamespace = currentNode.namespace if currentNode else None - currentNodeName = currentNode.name if currentNode else None - - type = new_token["type"] - - if type == ParseErrorToken: - self.parseError(new_token["data"], new_token.get("datavars", {})) - new_token = None - else: - if (len(self.tree.openElements) == 0 or - currentNodeNamespace == self.tree.defaultNamespace or - (self.isMathMLTextIntegrationPoint(currentNode) and - ((type == StartTagToken and - token["name"] not in frozenset(["mglyph", "malignmark"])) or - type in (CharactersToken, SpaceCharactersToken))) or - (currentNodeNamespace == namespaces["mathml"] and - currentNodeName == "annotation-xml" and - type == StartTagToken and - token["name"] == "svg") or - (self.isHTMLIntegrationPoint(currentNode) and - type in (StartTagToken, CharactersToken, SpaceCharactersToken))): - phase = self.phase - else: - phase = self.phases["inForeignContent"] - - if type == CharactersToken: - new_token = phase.processCharacters(new_token) - elif type == SpaceCharactersToken: - new_token = phase.processSpaceCharacters(new_token) - elif type == StartTagToken: - new_token = phase.processStartTag(new_token) - elif type == EndTagToken: - new_token = phase.processEndTag(new_token) - elif type == CommentToken: - new_token = phase.processComment(new_token) - elif type == DoctypeToken: - new_token = phase.processDoctype(new_token) - - if (type == StartTagToken and prev_token["selfClosing"] and - not prev_token["selfClosingAcknowledged"]): - self.parseError("non-void-element-with-trailing-solidus", - {"name": prev_token["name"]}) - - # When the loop finishes it's EOF - reprocess = True - phases = [] - while reprocess: - phases.append(self.phase) - reprocess = self.phase.processEOF() - if reprocess: - assert self.phase not in phases - - def parse(self, stream, *args, **kwargs): - """Parse a HTML document into a well-formed tree - - :arg stream: a file-like object or string containing the HTML to be parsed - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element). - - :arg scripting: treat noscript elements as if JavaScript was turned on - - :returns: parsed tree - - Example: - - >>> from html5lib.html5parser import HTMLParser - >>> parser = HTMLParser() - >>> parser.parse('

This is a doc

') - - - """ - self._parse(stream, False, None, *args, **kwargs) - return self.tree.getDocument() - - def parseFragment(self, stream, *args, **kwargs): - """Parse a HTML fragment into a well-formed tree fragment - - :arg container: name of the element we're setting the innerHTML - property if set to None, default to 'div' - - :arg stream: a file-like object or string containing the HTML to be parsed - - The optional encoding parameter must be a string that indicates - the encoding. If specified, that encoding will be used, - regardless of any BOM or later declaration (such as in a meta - element) - - :arg scripting: treat noscript elements as if JavaScript was turned on - - :returns: parsed tree - - Example: - - >>> from html5lib.html5libparser import HTMLParser - >>> parser = HTMLParser() - >>> parser.parseFragment('this is a fragment') - - - """ - self._parse(stream, True, *args, **kwargs) - return self.tree.getFragment() - - def parseError(self, errorcode="XXX-undefined-error", datavars=None): - # XXX The idea is to make errorcode mandatory. - if datavars is None: - datavars = {} - self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) - if self.strict: - raise ParseError(E[errorcode] % datavars) - - def adjustMathMLAttributes(self, token): - adjust_attributes(token, adjustMathMLAttributes) - - def adjustSVGAttributes(self, token): - adjust_attributes(token, adjustSVGAttributes) - - def adjustForeignAttributes(self, token): - adjust_attributes(token, adjustForeignAttributesMap) - - def reparseTokenNormal(self, token): - # pylint:disable=unused-argument - self.parser.phase() - - def resetInsertionMode(self): - # The name of this method is mostly historical. (It's also used in the - # specification.) - last = False - newModes = { - "select": "inSelect", - "td": "inCell", - "th": "inCell", - "tr": "inRow", - "tbody": "inTableBody", - "thead": "inTableBody", - "tfoot": "inTableBody", - "caption": "inCaption", - "colgroup": "inColumnGroup", - "table": "inTable", - "head": "inBody", - "body": "inBody", - "frameset": "inFrameset", - "html": "beforeHead" - } - for node in self.tree.openElements[::-1]: - nodeName = node.name - new_phase = None - if node == self.tree.openElements[0]: - assert self.innerHTML - last = True - nodeName = self.innerHTML - # Check for conditions that should only happen in the innerHTML - # case - if nodeName in ("select", "colgroup", "head", "html"): - assert self.innerHTML - - if not last and node.namespace != self.tree.defaultNamespace: - continue - - if nodeName in newModes: - new_phase = self.phases[newModes[nodeName]] - break - elif last: - new_phase = self.phases["inBody"] - break - - self.phase = new_phase - - def parseRCDataRawtext(self, token, contentType): - # Generic RCDATA/RAWTEXT Parsing algorithm - assert contentType in ("RAWTEXT", "RCDATA") - - self.tree.insertElement(token) - - if contentType == "RAWTEXT": - self.tokenizer.state = self.tokenizer.rawtextState - else: - self.tokenizer.state = self.tokenizer.rcdataState - - self.originalPhase = self.phase - - self.phase = self.phases["text"] - - -@_utils.memoize -def getPhases(debug): - def log(function): - """Logger that records which phase processes each token""" - type_names = {value: key for key, value in tokenTypes.items()} - - def wrapped(self, *args, **kwargs): - if function.__name__.startswith("process") and len(args) > 0: - token = args[0] - info = {"type": type_names[token['type']]} - if token['type'] in tagTokenTypes: - info["name"] = token['name'] - - self.parser.log.append((self.parser.tokenizer.state.__name__, - self.parser.phase.__class__.__name__, - self.__class__.__name__, - function.__name__, - info)) - return function(self, *args, **kwargs) - else: - return function(self, *args, **kwargs) - return wrapped - - def getMetaclass(use_metaclass, metaclass_func): - if use_metaclass: - return method_decorator_metaclass(metaclass_func) - else: - return type - - # pylint:disable=unused-argument - class Phase(with_metaclass(getMetaclass(debug, log))): - """Base class for helper object that implements each phase of processing - """ - __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") - - def __init__(self, parser, tree): - self.parser = parser - self.tree = tree - self.__startTagCache = {} - self.__endTagCache = {} - - def processEOF(self): - raise NotImplementedError - - def processComment(self, token): - # For most phases the following is correct. Where it's not it will be - # overridden. - self.tree.insertComment(token, self.tree.openElements[-1]) - - def processDoctype(self, token): - self.parser.parseError("unexpected-doctype") - - def processCharacters(self, token): - self.tree.insertText(token["data"]) - - def processSpaceCharacters(self, token): - self.tree.insertText(token["data"]) - - def processStartTag(self, token): - # Note the caching is done here rather than BoundMethodDispatcher as doing it there - # requires a circular reference to the Phase, and this ends up with a significant - # (CPython 2.7, 3.8) GC cost when parsing many short inputs - name = token["name"] - # In Py2, using `in` is quicker in general than try/except KeyError - # In Py3, `in` is quicker when there are few cache hits (typically short inputs) - if name in self.__startTagCache: - func = self.__startTagCache[name] - else: - func = self.__startTagCache[name] = self.startTagHandler[name] - # bound the cache size in case we get loads of unknown tags - while len(self.__startTagCache) > len(self.startTagHandler) * 1.1: - # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7 - self.__startTagCache.pop(next(iter(self.__startTagCache))) - return func(token) - - def startTagHtml(self, token): - if not self.parser.firstStartTag and token["name"] == "html": - self.parser.parseError("non-html-root") - # XXX Need a check here to see if the first start tag token emitted is - # this token... If it's not, invoke self.parser.parseError(). - for attr, value in token["data"].items(): - if attr not in self.tree.openElements[0].attributes: - self.tree.openElements[0].attributes[attr] = value - self.parser.firstStartTag = False - - def processEndTag(self, token): - # Note the caching is done here rather than BoundMethodDispatcher as doing it there - # requires a circular reference to the Phase, and this ends up with a significant - # (CPython 2.7, 3.8) GC cost when parsing many short inputs - name = token["name"] - # In Py2, using `in` is quicker in general than try/except KeyError - # In Py3, `in` is quicker when there are few cache hits (typically short inputs) - if name in self.__endTagCache: - func = self.__endTagCache[name] - else: - func = self.__endTagCache[name] = self.endTagHandler[name] - # bound the cache size in case we get loads of unknown tags - while len(self.__endTagCache) > len(self.endTagHandler) * 1.1: - # this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7 - self.__endTagCache.pop(next(iter(self.__endTagCache))) - return func(token) - - class InitialPhase(Phase): - __slots__ = tuple() - - def processSpaceCharacters(self, token): - pass - - def processComment(self, token): - self.tree.insertComment(token, self.tree.document) - - def processDoctype(self, token): - name = token["name"] - publicId = token["publicId"] - systemId = token["systemId"] - correct = token["correct"] - - if (name != "html" or publicId is not None or - systemId is not None and systemId != "about:legacy-compat"): - self.parser.parseError("unknown-doctype") - - if publicId is None: - publicId = "" - - self.tree.insertDoctype(token) - - if publicId != "": - publicId = publicId.translate(asciiUpper2Lower) - - if (not correct or token["name"] != "html" or - publicId.startswith( - ("+//silmaril//dtd html pro v0r11 19970101//", - "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", - "-//as//dtd html 3.0 aswedit + extensions//", - "-//ietf//dtd html 2.0 level 1//", - "-//ietf//dtd html 2.0 level 2//", - "-//ietf//dtd html 2.0 strict level 1//", - "-//ietf//dtd html 2.0 strict level 2//", - "-//ietf//dtd html 2.0 strict//", - "-//ietf//dtd html 2.0//", - "-//ietf//dtd html 2.1e//", - "-//ietf//dtd html 3.0//", - "-//ietf//dtd html 3.2 final//", - "-//ietf//dtd html 3.2//", - "-//ietf//dtd html 3//", - "-//ietf//dtd html level 0//", - "-//ietf//dtd html level 1//", - "-//ietf//dtd html level 2//", - "-//ietf//dtd html level 3//", - "-//ietf//dtd html strict level 0//", - "-//ietf//dtd html strict level 1//", - "-//ietf//dtd html strict level 2//", - "-//ietf//dtd html strict level 3//", - "-//ietf//dtd html strict//", - "-//ietf//dtd html//", - "-//metrius//dtd metrius presentational//", - "-//microsoft//dtd internet explorer 2.0 html strict//", - "-//microsoft//dtd internet explorer 2.0 html//", - "-//microsoft//dtd internet explorer 2.0 tables//", - "-//microsoft//dtd internet explorer 3.0 html strict//", - "-//microsoft//dtd internet explorer 3.0 html//", - "-//microsoft//dtd internet explorer 3.0 tables//", - "-//netscape comm. corp.//dtd html//", - "-//netscape comm. corp.//dtd strict html//", - "-//o'reilly and associates//dtd html 2.0//", - "-//o'reilly and associates//dtd html extended 1.0//", - "-//o'reilly and associates//dtd html extended relaxed 1.0//", - "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", - "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", - "-//spyglass//dtd html 2.0 extended//", - "-//sq//dtd html 2.0 hotmetal + extensions//", - "-//sun microsystems corp.//dtd hotjava html//", - "-//sun microsystems corp.//dtd hotjava strict html//", - "-//w3c//dtd html 3 1995-03-24//", - "-//w3c//dtd html 3.2 draft//", - "-//w3c//dtd html 3.2 final//", - "-//w3c//dtd html 3.2//", - "-//w3c//dtd html 3.2s draft//", - "-//w3c//dtd html 4.0 frameset//", - "-//w3c//dtd html 4.0 transitional//", - "-//w3c//dtd html experimental 19960712//", - "-//w3c//dtd html experimental 970421//", - "-//w3c//dtd w3 html//", - "-//w3o//dtd w3 html 3.0//", - "-//webtechs//dtd mozilla html 2.0//", - "-//webtechs//dtd mozilla html//")) or - publicId in ("-//w3o//dtd w3 html strict 3.0//en//", - "-/w3c/dtd html 4.0 transitional/en", - "html") or - publicId.startswith( - ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId is None or - systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): - self.parser.compatMode = "quirks" - elif (publicId.startswith( - ("-//w3c//dtd xhtml 1.0 frameset//", - "-//w3c//dtd xhtml 1.0 transitional//")) or - publicId.startswith( - ("-//w3c//dtd html 4.01 frameset//", - "-//w3c//dtd html 4.01 transitional//")) and - systemId is not None): - self.parser.compatMode = "limited quirks" - - self.parser.phase = self.parser.phases["beforeHtml"] - - def anythingElse(self): - self.parser.compatMode = "quirks" - self.parser.phase = self.parser.phases["beforeHtml"] - - def processCharacters(self, token): - self.parser.parseError("expected-doctype-but-got-chars") - self.anythingElse() - return token - - def processStartTag(self, token): - self.parser.parseError("expected-doctype-but-got-start-tag", - {"name": token["name"]}) - self.anythingElse() - return token - - def processEndTag(self, token): - self.parser.parseError("expected-doctype-but-got-end-tag", - {"name": token["name"]}) - self.anythingElse() - return token - - def processEOF(self): - self.parser.parseError("expected-doctype-but-got-eof") - self.anythingElse() - return True - - class BeforeHtmlPhase(Phase): - __slots__ = tuple() - - # helper methods - def insertHtmlElement(self): - self.tree.insertRoot(impliedTagToken("html", "StartTag")) - self.parser.phase = self.parser.phases["beforeHead"] - - # other - def processEOF(self): - self.insertHtmlElement() - return True - - def processComment(self, token): - self.tree.insertComment(token, self.tree.document) - - def processSpaceCharacters(self, token): - pass - - def processCharacters(self, token): - self.insertHtmlElement() - return token - - def processStartTag(self, token): - if token["name"] == "html": - self.parser.firstStartTag = True - self.insertHtmlElement() - return token - - def processEndTag(self, token): - if token["name"] not in ("head", "body", "html", "br"): - self.parser.parseError("unexpected-end-tag-before-html", - {"name": token["name"]}) - else: - self.insertHtmlElement() - return token - - class BeforeHeadPhase(Phase): - __slots__ = tuple() - - def processEOF(self): - self.startTagHead(impliedTagToken("head", "StartTag")) - return True - - def processSpaceCharacters(self, token): - pass - - def processCharacters(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagHead(self, token): - self.tree.insertElement(token) - self.tree.headPointer = self.tree.openElements[-1] - self.parser.phase = self.parser.phases["inHead"] - - def startTagOther(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def endTagImplyHead(self, token): - self.startTagHead(impliedTagToken("head", "StartTag")) - return token - - def endTagOther(self, token): - self.parser.parseError("end-tag-after-implied-root", - {"name": token["name"]}) - - startTagHandler = _utils.MethodDispatcher([ - ("html", startTagHtml), - ("head", startTagHead) - ]) - startTagHandler.default = startTagOther - - endTagHandler = _utils.MethodDispatcher([ - (("head", "body", "html", "br"), endTagImplyHead) - ]) - endTagHandler.default = endTagOther - - class InHeadPhase(Phase): - __slots__ = tuple() - - # the real thing - def processEOF(self): - self.anythingElse() - return True - - def processCharacters(self, token): - self.anythingElse() - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagHead(self, token): - self.parser.parseError("two-heads-are-not-better-than-one") - - def startTagBaseLinkCommand(self, token): - self.tree.insertElement(token) - self.tree.openElements.pop() - token["selfClosingAcknowledged"] = True - - def startTagMeta(self, token): - self.tree.insertElement(token) - self.tree.openElements.pop() - token["selfClosingAcknowledged"] = True - - attributes = token["data"] - if self.parser.tokenizer.stream.charEncoding[1] == "tentative": - if "charset" in attributes: - self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) - elif ("content" in attributes and - "http-equiv" in attributes and - attributes["http-equiv"].lower() == "content-type"): - # Encoding it as UTF-8 here is a hack, as really we should pass - # the abstract Unicode string, and just use the - # ContentAttrParser on that, but using UTF-8 allows all chars - # to be encoded and as a ASCII-superset works. - data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) - parser = _inputstream.ContentAttrParser(data) - codec = parser.parse() - self.parser.tokenizer.stream.changeEncoding(codec) - - def startTagTitle(self, token): - self.parser.parseRCDataRawtext(token, "RCDATA") - - def startTagNoFramesStyle(self, token): - # Need to decide whether to implement the scripting-disabled case - self.parser.parseRCDataRawtext(token, "RAWTEXT") - - def startTagNoscript(self, token): - if self.parser.scripting: - self.parser.parseRCDataRawtext(token, "RAWTEXT") - else: - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inHeadNoscript"] - - def startTagScript(self, token): - self.tree.insertElement(token) - self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState - self.parser.originalPhase = self.parser.phase - self.parser.phase = self.parser.phases["text"] - - def startTagOther(self, token): - self.anythingElse() - return token - - def endTagHead(self, token): - node = self.parser.tree.openElements.pop() - assert node.name == "head", "Expected head got %s" % node.name - self.parser.phase = self.parser.phases["afterHead"] - - def endTagHtmlBodyBr(self, token): - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - self.endTagHead(impliedTagToken("head")) - - startTagHandler = _utils.MethodDispatcher([ - ("html", startTagHtml), - ("title", startTagTitle), - (("noframes", "style"), startTagNoFramesStyle), - ("noscript", startTagNoscript), - ("script", startTagScript), - (("base", "basefont", "bgsound", "command", "link"), - startTagBaseLinkCommand), - ("meta", startTagMeta), - ("head", startTagHead) - ]) - startTagHandler.default = startTagOther - - endTagHandler = _utils.MethodDispatcher([ - ("head", endTagHead), - (("br", "html", "body"), endTagHtmlBodyBr) - ]) - endTagHandler.default = endTagOther - - class InHeadNoscriptPhase(Phase): - __slots__ = tuple() - - def processEOF(self): - self.parser.parseError("eof-in-head-noscript") - self.anythingElse() - return True - - def processComment(self, token): - return self.parser.phases["inHead"].processComment(token) - - def processCharacters(self, token): - self.parser.parseError("char-in-head-noscript") - self.anythingElse() - return token - - def processSpaceCharacters(self, token): - return self.parser.phases["inHead"].processSpaceCharacters(token) - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagBaseLinkCommand(self, token): - return self.parser.phases["inHead"].processStartTag(token) - - def startTagHeadNoscript(self, token): - self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) - - def startTagOther(self, token): - self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) - self.anythingElse() - return token - - def endTagNoscript(self, token): - node = self.parser.tree.openElements.pop() - assert node.name == "noscript", "Expected noscript got %s" % node.name - self.parser.phase = self.parser.phases["inHead"] - - def endTagBr(self, token): - self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - # Caller must raise parse error first! - self.endTagNoscript(impliedTagToken("noscript")) - - startTagHandler = _utils.MethodDispatcher([ - ("html", startTagHtml), - (("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand), - (("head", "noscript"), startTagHeadNoscript), - ]) - startTagHandler.default = startTagOther - - endTagHandler = _utils.MethodDispatcher([ - ("noscript", endTagNoscript), - ("br", endTagBr), - ]) - endTagHandler.default = endTagOther - - class AfterHeadPhase(Phase): - __slots__ = tuple() - - def processEOF(self): - self.anythingElse() - return True - - def processCharacters(self, token): - self.anythingElse() - return token - - def startTagHtml(self, token): - return self.parser.phases["inBody"].processStartTag(token) - - def startTagBody(self, token): - self.parser.framesetOK = False - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inBody"] - - def startTagFrameset(self, token): - self.tree.insertElement(token) - self.parser.phase = self.parser.phases["inFrameset"] - - def startTagFromHead(self, token): - self.parser.parseError("unexpected-start-tag-out-of-my-head", - {"name": token["name"]}) - self.tree.openElements.append(self.tree.headPointer) - self.parser.phases["inHead"].processStartTag(token) - for node in self.tree.openElements[::-1]: - if node.name == "head": - self.tree.openElements.remove(node) - break - - def startTagHead(self, token): - self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) - - def startTagOther(self, token): - self.anythingElse() - return token - - def endTagHtmlBodyBr(self, token): - self.anythingElse() - return token - - def endTagOther(self, token): - self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) - - def anythingElse(self): - self.tree.insertElement(impliedTagToken("body", "StartTag")) - self.parser.phase = self.parser.phases["inBody"] - self.parser.framesetOK = True - - startTagHandler = _utils.MethodDispatcher([ - ("html", startTagHtml), - ("body", startTagBody), - ("frameset", startTagFrameset), - (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", - "style", "title"), - startTagFromHead), - ("head", startTagHead) - ]) - startTagHandler.default = startTagOther - endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), - endTagHtmlBodyBr)]) - endTagHandler.default = endTagOther - - class InBodyPhase(Phase): - # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody - # the really-really-really-very crazy mode - __slots__ = ("processSpaceCharacters",) - - def __init__(self, *args, **kwargs): - super(InBodyPhase, self).__init__(*args, **kwargs) - # Set this to the default handler - self.processSpaceCharacters = self.processSpaceCharactersNonPre - - def isMatchingFormattingElement(self, node1, node2): - return (node1.name == node2.name and - node1.namespace == node2.namespace and - node1.attributes == node2.attributes) - - # helper - def addFormattingElement(self, token): - self.tree.insertElement(token) - element = self.tree.openElements[-1] - - matchingElements = [] - for node in self.tree.activeFormattingElements[::-1]: - if node is Marker: - break - elif self.isMatchingFormattingElement(node, element): - matchingElements.append(node) - - assert len(matchingElements) <= 3 - if len(matchingElements) == 3: - self.tree.activeFormattingElements.remove(matchingElements[-1]) - self.tree.activeFormattingElements.append(element) - - # the real deal - def processEOF(self): - allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", - "tfoot", "th", "thead", "tr", "body", - "html")) - for node in self.tree.openElements[::-1]: - if node.name not in allowed_elements: - self.parser.parseError("expected-closing-tag-but-got-eof") - break - # Stop parsing - - def processSpaceCharactersDropNewline(self, token): - # Sometimes (start of
, , and