Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/INSTALLER +1 -0
- .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/METADATA +138 -0
- .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/RECORD +173 -0
- .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/WHEEL +5 -0
- .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE +3 -0
- .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE.APACHE +202 -0
- .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE.BSD +27 -0
- .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/INSTALLER +1 -0
- .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/METADATA +176 -0
- .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/RECORD +76 -0
- .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/WHEEL +4 -0
- .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/entry_points.txt +2 -0
- .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/licenses/COPYING +19 -0
- .venv/lib/python3.11/site-packages/mistral_common/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/__pycache__/base.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/__pycache__/exceptions.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/__pycache__/multimodal.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/data/mistral_instruct_tokenizer_240323.model.v3 +3 -0
- .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__init__.py +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/request.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/response.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/request.py +11 -0
- .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/response.py +21 -0
- .venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/messages.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/response.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/__init__.py +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__init__.py +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__pycache__/request.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/request.py +25 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__init__.py +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/__init__.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/base.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/mistral.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/multimodal.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/sentencepiece.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/tekken.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/utils.cpython-311.pyc +0 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/base.py +200 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/mistral.py +251 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/multimodal.py +172 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/sentencepiece.py +672 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/tekken.py +312 -0
- .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/utils.py +6 -0
- .venv/lib/python3.11/site-packages/numpy/ma/tests/__init__.py +0 -0
- .venv/lib/python3.11/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-311.pyc +0 -0
.gitattributes
CHANGED
@@ -391,3 +391,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/lib/
|
|
391 |
.venv/lib/python3.11/site-packages/numpy/lib/__pycache__/function_base.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
392 |
.venv/lib/python3.11/site-packages/numpy/lib/__pycache__/npyio.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
393 |
.venv/lib/python3.11/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
|
|
|
391 |
.venv/lib/python3.11/site-packages/numpy/lib/__pycache__/function_base.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
392 |
.venv/lib/python3.11/site-packages/numpy/lib/__pycache__/npyio.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
393 |
.venv/lib/python3.11/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
|
394 |
+
.venv/lib/python3.11/site-packages/mistral_common/data/mistral_instruct_tokenizer_240323.model.v3 filter=lfs diff=lfs merge=lfs -text
|
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/INSTALLER
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
pip
|
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/METADATA
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.3
|
2 |
+
Name: cryptography
|
3 |
+
Version: 43.0.3
|
4 |
+
Classifier: Development Status :: 5 - Production/Stable
|
5 |
+
Classifier: Intended Audience :: Developers
|
6 |
+
Classifier: License :: OSI Approved :: Apache Software License
|
7 |
+
Classifier: License :: OSI Approved :: BSD License
|
8 |
+
Classifier: Natural Language :: English
|
9 |
+
Classifier: Operating System :: MacOS :: MacOS X
|
10 |
+
Classifier: Operating System :: POSIX
|
11 |
+
Classifier: Operating System :: POSIX :: BSD
|
12 |
+
Classifier: Operating System :: POSIX :: Linux
|
13 |
+
Classifier: Operating System :: Microsoft :: Windows
|
14 |
+
Classifier: Programming Language :: Python
|
15 |
+
Classifier: Programming Language :: Python :: 3
|
16 |
+
Classifier: Programming Language :: Python :: 3 :: Only
|
17 |
+
Classifier: Programming Language :: Python :: 3.7
|
18 |
+
Classifier: Programming Language :: Python :: 3.8
|
19 |
+
Classifier: Programming Language :: Python :: 3.9
|
20 |
+
Classifier: Programming Language :: Python :: 3.10
|
21 |
+
Classifier: Programming Language :: Python :: 3.11
|
22 |
+
Classifier: Programming Language :: Python :: 3.12
|
23 |
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
24 |
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
25 |
+
Classifier: Topic :: Security :: Cryptography
|
26 |
+
Requires-Dist: cffi >=1.12 ; platform_python_implementation != 'PyPy'
|
27 |
+
Requires-Dist: bcrypt >=3.1.5 ; extra == 'ssh'
|
28 |
+
Requires-Dist: nox ; extra == 'nox'
|
29 |
+
Requires-Dist: cryptography-vectors ==43.0.3 ; extra == 'test'
|
30 |
+
Requires-Dist: pytest >=6.2.0 ; extra == 'test'
|
31 |
+
Requires-Dist: pytest-benchmark ; extra == 'test'
|
32 |
+
Requires-Dist: pytest-cov ; extra == 'test'
|
33 |
+
Requires-Dist: pytest-xdist ; extra == 'test'
|
34 |
+
Requires-Dist: pretend ; extra == 'test'
|
35 |
+
Requires-Dist: certifi ; extra == 'test'
|
36 |
+
Requires-Dist: pytest-randomly ; extra == 'test-randomorder'
|
37 |
+
Requires-Dist: sphinx >=5.3.0 ; extra == 'docs'
|
38 |
+
Requires-Dist: sphinx-rtd-theme >=1.1.1 ; extra == 'docs'
|
39 |
+
Requires-Dist: pyenchant >=1.6.11 ; extra == 'docstest'
|
40 |
+
Requires-Dist: readme-renderer ; extra == 'docstest'
|
41 |
+
Requires-Dist: sphinxcontrib-spelling >=4.0.1 ; extra == 'docstest'
|
42 |
+
Requires-Dist: build ; extra == 'sdist'
|
43 |
+
Requires-Dist: ruff ; extra == 'pep8test'
|
44 |
+
Requires-Dist: mypy ; extra == 'pep8test'
|
45 |
+
Requires-Dist: check-sdist ; extra == 'pep8test'
|
46 |
+
Requires-Dist: click ; extra == 'pep8test'
|
47 |
+
Provides-Extra: ssh
|
48 |
+
Provides-Extra: nox
|
49 |
+
Provides-Extra: test
|
50 |
+
Provides-Extra: test-randomorder
|
51 |
+
Provides-Extra: docs
|
52 |
+
Provides-Extra: docstest
|
53 |
+
Provides-Extra: sdist
|
54 |
+
Provides-Extra: pep8test
|
55 |
+
License-File: LICENSE
|
56 |
+
License-File: LICENSE.APACHE
|
57 |
+
License-File: LICENSE.BSD
|
58 |
+
Summary: cryptography is a package which provides cryptographic recipes and primitives to Python developers.
|
59 |
+
Author: The cryptography developers <[email protected]>
|
60 |
+
Author-email: The Python Cryptographic Authority and individual contributors <[email protected]>
|
61 |
+
License: Apache-2.0 OR BSD-3-Clause
|
62 |
+
Requires-Python: >=3.7
|
63 |
+
Description-Content-Type: text/x-rst; charset=UTF-8
|
64 |
+
Project-URL: homepage, https://github.com/pyca/cryptography
|
65 |
+
Project-URL: documentation, https://cryptography.io/
|
66 |
+
Project-URL: source, https://github.com/pyca/cryptography/
|
67 |
+
Project-URL: issues, https://github.com/pyca/cryptography/issues
|
68 |
+
Project-URL: changelog, https://cryptography.io/en/latest/changelog/
|
69 |
+
|
70 |
+
pyca/cryptography
|
71 |
+
=================
|
72 |
+
|
73 |
+
.. image:: https://img.shields.io/pypi/v/cryptography.svg
|
74 |
+
:target: https://pypi.org/project/cryptography/
|
75 |
+
:alt: Latest Version
|
76 |
+
|
77 |
+
.. image:: https://readthedocs.org/projects/cryptography/badge/?version=latest
|
78 |
+
:target: https://cryptography.io
|
79 |
+
:alt: Latest Docs
|
80 |
+
|
81 |
+
.. image:: https://github.com/pyca/cryptography/workflows/CI/badge.svg?branch=main
|
82 |
+
:target: https://github.com/pyca/cryptography/actions?query=workflow%3ACI+branch%3Amain
|
83 |
+
|
84 |
+
|
85 |
+
``cryptography`` is a package which provides cryptographic recipes and
|
86 |
+
primitives to Python developers. Our goal is for it to be your "cryptographic
|
87 |
+
standard library". It supports Python 3.7+ and PyPy3 7.3.11+.
|
88 |
+
|
89 |
+
``cryptography`` includes both high level recipes and low level interfaces to
|
90 |
+
common cryptographic algorithms such as symmetric ciphers, message digests, and
|
91 |
+
key derivation functions. For example, to encrypt something with
|
92 |
+
``cryptography``'s high level symmetric encryption recipe:
|
93 |
+
|
94 |
+
.. code-block:: pycon
|
95 |
+
|
96 |
+
>>> from cryptography.fernet import Fernet
|
97 |
+
>>> # Put this somewhere safe!
|
98 |
+
>>> key = Fernet.generate_key()
|
99 |
+
>>> f = Fernet(key)
|
100 |
+
>>> token = f.encrypt(b"A really secret message. Not for prying eyes.")
|
101 |
+
>>> token
|
102 |
+
b'...'
|
103 |
+
>>> f.decrypt(token)
|
104 |
+
b'A really secret message. Not for prying eyes.'
|
105 |
+
|
106 |
+
You can find more information in the `documentation`_.
|
107 |
+
|
108 |
+
You can install ``cryptography`` with:
|
109 |
+
|
110 |
+
.. code-block:: console
|
111 |
+
|
112 |
+
$ pip install cryptography
|
113 |
+
|
114 |
+
For full details see `the installation documentation`_.
|
115 |
+
|
116 |
+
Discussion
|
117 |
+
~~~~~~~~~~
|
118 |
+
|
119 |
+
If you run into bugs, you can file them in our `issue tracker`_.
|
120 |
+
|
121 |
+
We maintain a `cryptography-dev`_ mailing list for development discussion.
|
122 |
+
|
123 |
+
You can also join ``#pyca`` on ``irc.libera.chat`` to ask questions or get
|
124 |
+
involved.
|
125 |
+
|
126 |
+
Security
|
127 |
+
~~~~~~~~
|
128 |
+
|
129 |
+
Need to report a security issue? Please consult our `security reporting`_
|
130 |
+
documentation.
|
131 |
+
|
132 |
+
|
133 |
+
.. _`documentation`: https://cryptography.io/
|
134 |
+
.. _`the installation documentation`: https://cryptography.io/en/latest/installation/
|
135 |
+
.. _`issue tracker`: https://github.com/pyca/cryptography/issues
|
136 |
+
.. _`cryptography-dev`: https://mail.python.org/mailman/listinfo/cryptography-dev
|
137 |
+
.. _`security reporting`: https://cryptography.io/en/latest/security/
|
138 |
+
|
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/RECORD
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
cryptography-43.0.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
2 |
+
cryptography-43.0.3.dist-info/METADATA,sha256=6zbg5CUehHnvNpZEQHVe8ivt1BG6h6k_cm-o5bsOZLA,5440
|
3 |
+
cryptography-43.0.3.dist-info/RECORD,,
|
4 |
+
cryptography-43.0.3.dist-info/WHEEL,sha256=5SNCVD9cb88a-xAIrDHIo1CvpgNriOYcNgb4b8rPcOw,107
|
5 |
+
cryptography-43.0.3.dist-info/license_files/LICENSE,sha256=Pgx8CRqUi4JTO6mP18u0BDLW8amsv4X1ki0vmak65rs,197
|
6 |
+
cryptography-43.0.3.dist-info/license_files/LICENSE.APACHE,sha256=qsc7MUj20dcRHbyjIJn2jSbGRMaBOuHk8F9leaomY_4,11360
|
7 |
+
cryptography-43.0.3.dist-info/license_files/LICENSE.BSD,sha256=YCxMdILeZHndLpeTzaJ15eY9dz2s0eymiSMqtwCPtPs,1532
|
8 |
+
cryptography/__about__.py,sha256=-FkHKD9mSuEfH37wsSKnQzJZmL5zUAUTpB5OeUQjPE0,445
|
9 |
+
cryptography/__init__.py,sha256=mthuUrTd4FROCpUYrTIqhjz6s6T9djAZrV7nZ1oMm2o,364
|
10 |
+
cryptography/__pycache__/__about__.cpython-311.pyc,,
|
11 |
+
cryptography/__pycache__/__init__.cpython-311.pyc,,
|
12 |
+
cryptography/__pycache__/exceptions.cpython-311.pyc,,
|
13 |
+
cryptography/__pycache__/fernet.cpython-311.pyc,,
|
14 |
+
cryptography/__pycache__/utils.cpython-311.pyc,,
|
15 |
+
cryptography/exceptions.py,sha256=835EWILc2fwxw-gyFMriciC2SqhViETB10LBSytnDIc,1087
|
16 |
+
cryptography/fernet.py,sha256=aPj82w-Z_1GBXUtWRUsZdVbMwRo5Mbjj0wkA9wG4rkw,6696
|
17 |
+
cryptography/hazmat/__init__.py,sha256=5IwrLWrVp0AjEr_4FdWG_V057NSJGY_W4egNNsuct0g,455
|
18 |
+
cryptography/hazmat/__pycache__/__init__.cpython-311.pyc,,
|
19 |
+
cryptography/hazmat/__pycache__/_oid.cpython-311.pyc,,
|
20 |
+
cryptography/hazmat/_oid.py,sha256=e9yLmxtdQtuL94ztQv3SGtt_ea1Mx6aUwGftJsP6EXk,15201
|
21 |
+
cryptography/hazmat/backends/__init__.py,sha256=O5jvKFQdZnXhKeqJ-HtulaEL9Ni7mr1mDzZY5kHlYhI,361
|
22 |
+
cryptography/hazmat/backends/__pycache__/__init__.cpython-311.pyc,,
|
23 |
+
cryptography/hazmat/backends/openssl/__init__.py,sha256=p3jmJfnCag9iE5sdMrN6VvVEu55u46xaS_IjoI0SrmA,305
|
24 |
+
cryptography/hazmat/backends/openssl/__pycache__/__init__.cpython-311.pyc,,
|
25 |
+
cryptography/hazmat/backends/openssl/__pycache__/backend.cpython-311.pyc,,
|
26 |
+
cryptography/hazmat/backends/openssl/backend.py,sha256=pUXUbugLwMm2Gls-h5U5fw2RvepaNjEvnao6CTmL1xQ,9648
|
27 |
+
cryptography/hazmat/bindings/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180
|
28 |
+
cryptography/hazmat/bindings/__pycache__/__init__.cpython-311.pyc,,
|
29 |
+
cryptography/hazmat/bindings/_rust.abi3.so,sha256=QrghdFa6x-vG5lFvGVfv-slqoy0UA9a8eHmtp9hzGCk,10862344
|
30 |
+
cryptography/hazmat/bindings/_rust/__init__.pyi,sha256=wb1OT76lG19vjq97_q2MM3qdJlQhyloXfVbKFDmRse4,737
|
31 |
+
cryptography/hazmat/bindings/_rust/_openssl.pyi,sha256=mpNJLuYLbCVrd5i33FBTmWwL_55Dw7JPkSLlSX9Q7oI,230
|
32 |
+
cryptography/hazmat/bindings/_rust/asn1.pyi,sha256=BrGjC8J6nwuS-r3EVcdXJB8ndotfY9mbQYOfpbPG0HA,354
|
33 |
+
cryptography/hazmat/bindings/_rust/exceptions.pyi,sha256=exXr2xw_0pB1kk93cYbM3MohbzoUkjOms1ZMUi0uQZE,640
|
34 |
+
cryptography/hazmat/bindings/_rust/ocsp.pyi,sha256=R-xJ-XmJZ1lOk-fWHHvRnP3QNTCFnKv-l3xlNWfLVt4,868
|
35 |
+
cryptography/hazmat/bindings/_rust/openssl/__init__.pyi,sha256=Lvn250QMdPyeF-hoBF6rkQgHLBJxVauXCb8i8uYTomQ,1368
|
36 |
+
cryptography/hazmat/bindings/_rust/openssl/aead.pyi,sha256=i0gA3jUQ4rkJXTGGZrq-AuY-VQLN31lyDeWuDZ0zJYw,2553
|
37 |
+
cryptography/hazmat/bindings/_rust/openssl/ciphers.pyi,sha256=iK0ZhQ-WyCQbjaraaFgK6q4PpD-7Rf5RDHkFD3YEW_g,1301
|
38 |
+
cryptography/hazmat/bindings/_rust/openssl/cmac.pyi,sha256=nPH0X57RYpsAkRowVpjQiHE566ThUTx7YXrsadmrmHk,564
|
39 |
+
cryptography/hazmat/bindings/_rust/openssl/dh.pyi,sha256=Z3TC-G04-THtSdAOPLM1h2G7ml5bda1ElZUcn5wpuhk,1564
|
40 |
+
cryptography/hazmat/bindings/_rust/openssl/dsa.pyi,sha256=qBtkgj2albt2qFcnZ9UDrhzoNhCVO7HTby5VSf1EXMI,1299
|
41 |
+
cryptography/hazmat/bindings/_rust/openssl/ec.pyi,sha256=zJy0pRa5n-_p2dm45PxECB_-B6SVZyNKfjxFDpPqT38,1691
|
42 |
+
cryptography/hazmat/bindings/_rust/openssl/ed25519.pyi,sha256=OJsrblS2nHptZctva-pAKFL5q8yPEAkhmjPZpJ6TA94,493
|
43 |
+
cryptography/hazmat/bindings/_rust/openssl/ed448.pyi,sha256=SkPHK2HdbYN02TVQEUOgW3iTdiEY7HBE4DijpdkAzmk,475
|
44 |
+
cryptography/hazmat/bindings/_rust/openssl/hashes.pyi,sha256=J8HoN0GdtPcjRAfNHr5Elva_nkmQfq63L75_z9dd8Uc,573
|
45 |
+
cryptography/hazmat/bindings/_rust/openssl/hmac.pyi,sha256=ZmLJ73pmxcZFC1XosWEiXMRYtvJJor3ZLdCQOJu85Cw,662
|
46 |
+
cryptography/hazmat/bindings/_rust/openssl/kdf.pyi,sha256=wPS5c7NLspM2632II0I4iH1RSxZvSRtBOVqmpyQATfk,544
|
47 |
+
cryptography/hazmat/bindings/_rust/openssl/keys.pyi,sha256=JSrlGNaW49ZCZ1hcb-YJdS1EAbsMwRbVEcLL0P9OApA,872
|
48 |
+
cryptography/hazmat/bindings/_rust/openssl/poly1305.pyi,sha256=9iogF7Q4i81IkOS-IMXp6HvxFF_3cNy_ucrAjVQnn14,540
|
49 |
+
cryptography/hazmat/bindings/_rust/openssl/rsa.pyi,sha256=2OQCNSXkxgc-3uw1xiCCloIQTV6p9_kK79Yu0rhZgPc,1364
|
50 |
+
cryptography/hazmat/bindings/_rust/openssl/x25519.pyi,sha256=2BKdbrddM_9SMUpdvHKGhb9MNjURCarPxccbUDzHeoA,484
|
51 |
+
cryptography/hazmat/bindings/_rust/openssl/x448.pyi,sha256=AoRMWNvCJTiH5L-lkIkCdPlrPLUdJvvfXpIvf1GmxpM,466
|
52 |
+
cryptography/hazmat/bindings/_rust/pkcs12.pyi,sha256=afhB_6M8xI1MIE5vxkaDF1jSxA48ib1--NiOxtf6boM,1394
|
53 |
+
cryptography/hazmat/bindings/_rust/pkcs7.pyi,sha256=QCmuA0IgDr4iOecUOXgUUeh3BAjJx8ubjz__EnNbyGY,972
|
54 |
+
cryptography/hazmat/bindings/_rust/test_support.pyi,sha256=Xo1Gd7bh9rU4HuIS4pm9UwCY6IS1gInvFwmhABLOVO4,936
|
55 |
+
cryptography/hazmat/bindings/_rust/x509.pyi,sha256=WLrGmqmFss8dXKhlG_J9nVhoCcodR72xJdCoxEuBtjY,3551
|
56 |
+
cryptography/hazmat/bindings/openssl/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180
|
57 |
+
cryptography/hazmat/bindings/openssl/__pycache__/__init__.cpython-311.pyc,,
|
58 |
+
cryptography/hazmat/bindings/openssl/__pycache__/_conditional.cpython-311.pyc,,
|
59 |
+
cryptography/hazmat/bindings/openssl/__pycache__/binding.cpython-311.pyc,,
|
60 |
+
cryptography/hazmat/bindings/openssl/_conditional.py,sha256=dkGKGU-22uR2ZKeOOwaSxEJCGaafgUjb2romWcu03QE,5163
|
61 |
+
cryptography/hazmat/bindings/openssl/binding.py,sha256=e1gnFAZBPrkJ3CsiZV-ug6kaPdNTAEROaUFiFrUh71M,4042
|
62 |
+
cryptography/hazmat/decrepit/__init__.py,sha256=wHCbWfaefa-fk6THSw9th9fJUsStJo7245wfFBqmduA,216
|
63 |
+
cryptography/hazmat/decrepit/__pycache__/__init__.cpython-311.pyc,,
|
64 |
+
cryptography/hazmat/decrepit/ciphers/__init__.py,sha256=wHCbWfaefa-fk6THSw9th9fJUsStJo7245wfFBqmduA,216
|
65 |
+
cryptography/hazmat/decrepit/ciphers/__pycache__/__init__.cpython-311.pyc,,
|
66 |
+
cryptography/hazmat/decrepit/ciphers/__pycache__/algorithms.cpython-311.pyc,,
|
67 |
+
cryptography/hazmat/decrepit/ciphers/algorithms.py,sha256=HWA4PKDS2w4D2dQoRerpLRU7Kntt5vJeJC7j--AlZVU,2520
|
68 |
+
cryptography/hazmat/primitives/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180
|
69 |
+
cryptography/hazmat/primitives/__pycache__/__init__.cpython-311.pyc,,
|
70 |
+
cryptography/hazmat/primitives/__pycache__/_asymmetric.cpython-311.pyc,,
|
71 |
+
cryptography/hazmat/primitives/__pycache__/_cipheralgorithm.cpython-311.pyc,,
|
72 |
+
cryptography/hazmat/primitives/__pycache__/_serialization.cpython-311.pyc,,
|
73 |
+
cryptography/hazmat/primitives/__pycache__/cmac.cpython-311.pyc,,
|
74 |
+
cryptography/hazmat/primitives/__pycache__/constant_time.cpython-311.pyc,,
|
75 |
+
cryptography/hazmat/primitives/__pycache__/hashes.cpython-311.pyc,,
|
76 |
+
cryptography/hazmat/primitives/__pycache__/hmac.cpython-311.pyc,,
|
77 |
+
cryptography/hazmat/primitives/__pycache__/keywrap.cpython-311.pyc,,
|
78 |
+
cryptography/hazmat/primitives/__pycache__/padding.cpython-311.pyc,,
|
79 |
+
cryptography/hazmat/primitives/__pycache__/poly1305.cpython-311.pyc,,
|
80 |
+
cryptography/hazmat/primitives/_asymmetric.py,sha256=RhgcouUB6HTiFDBrR1LxqkMjpUxIiNvQ1r_zJjRG6qQ,532
|
81 |
+
cryptography/hazmat/primitives/_cipheralgorithm.py,sha256=gKa0WrLz6K4fqhnGbfBYKDSxgLxsPU0uj_EK2UT47W4,1495
|
82 |
+
cryptography/hazmat/primitives/_serialization.py,sha256=qrozc8fw2WZSbjk3DAlSl3ResxpauwJ74ZgGoUL-mj0,5142
|
83 |
+
cryptography/hazmat/primitives/asymmetric/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180
|
84 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/__init__.cpython-311.pyc,,
|
85 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/dh.cpython-311.pyc,,
|
86 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/dsa.cpython-311.pyc,,
|
87 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/ec.cpython-311.pyc,,
|
88 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/ed25519.cpython-311.pyc,,
|
89 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/ed448.cpython-311.pyc,,
|
90 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/padding.cpython-311.pyc,,
|
91 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/rsa.cpython-311.pyc,,
|
92 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/types.cpython-311.pyc,,
|
93 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/utils.cpython-311.pyc,,
|
94 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/x25519.cpython-311.pyc,,
|
95 |
+
cryptography/hazmat/primitives/asymmetric/__pycache__/x448.cpython-311.pyc,,
|
96 |
+
cryptography/hazmat/primitives/asymmetric/dh.py,sha256=OOCjMClH1Bf14Sy7jAdwzEeCxFPb8XUe2qePbExvXwc,3420
|
97 |
+
cryptography/hazmat/primitives/asymmetric/dsa.py,sha256=xBwdf0pZOgvqjUKcO7Q0L3NxwalYj0SJDUqThemhSmI,3945
|
98 |
+
cryptography/hazmat/primitives/asymmetric/ec.py,sha256=lwZmtAwi3PM8lsY1MsNaby_bVi--49OCxwE_1yqKC-A,10428
|
99 |
+
cryptography/hazmat/primitives/asymmetric/ed25519.py,sha256=kl63fg7myuMjNTmMoVFeH6iVr0x5FkjNmggxIRTloJk,3423
|
100 |
+
cryptography/hazmat/primitives/asymmetric/ed448.py,sha256=2UzEDzzfkPn83UFVFlMZfIMbAixxY09WmQyrwinWTn8,3456
|
101 |
+
cryptography/hazmat/primitives/asymmetric/padding.py,sha256=eZcvUqVLbe3u48SunLdeniaPlV4-k6pwBl67OW4jSy8,2885
|
102 |
+
cryptography/hazmat/primitives/asymmetric/rsa.py,sha256=nW_Ko7PID9UBJF10GVJOc_1L00ymFsfZDUJYtM5kfGQ,7637
|
103 |
+
cryptography/hazmat/primitives/asymmetric/types.py,sha256=LnsOJym-wmPUJ7Knu_7bCNU3kIiELCd6krOaW_JU08I,2996
|
104 |
+
cryptography/hazmat/primitives/asymmetric/utils.py,sha256=DPTs6T4F-UhwzFQTh-1fSEpQzazH2jf2xpIro3ItF4o,790
|
105 |
+
cryptography/hazmat/primitives/asymmetric/x25519.py,sha256=VGYuRdIYuVBtizpFdNWd2bTrT10JRa1admQdBr08xz8,3341
|
106 |
+
cryptography/hazmat/primitives/asymmetric/x448.py,sha256=GKKJBqYLr03VewMF18bXIM941aaWcZIQ4rC02GLLEmw,3374
|
107 |
+
cryptography/hazmat/primitives/ciphers/__init__.py,sha256=eyEXmjk6_CZXaOPYDr7vAYGXr29QvzgWL2-4CSolLFs,680
|
108 |
+
cryptography/hazmat/primitives/ciphers/__pycache__/__init__.cpython-311.pyc,,
|
109 |
+
cryptography/hazmat/primitives/ciphers/__pycache__/aead.cpython-311.pyc,,
|
110 |
+
cryptography/hazmat/primitives/ciphers/__pycache__/algorithms.cpython-311.pyc,,
|
111 |
+
cryptography/hazmat/primitives/ciphers/__pycache__/base.cpython-311.pyc,,
|
112 |
+
cryptography/hazmat/primitives/ciphers/__pycache__/modes.cpython-311.pyc,,
|
113 |
+
cryptography/hazmat/primitives/ciphers/aead.py,sha256=Fzlyx7w8KYQakzDp1zWgJnIr62zgZrgVh1u2h4exB54,634
|
114 |
+
cryptography/hazmat/primitives/ciphers/algorithms.py,sha256=QvBMDmphRZfNmykij58L5eDkd_2NnCzIpJpyX2QwMxc,4223
|
115 |
+
cryptography/hazmat/primitives/ciphers/base.py,sha256=tg-XNaKUyETBi7ounGDEL1_ICn-s4FF9LR7moV58blI,4211
|
116 |
+
cryptography/hazmat/primitives/ciphers/modes.py,sha256=BFpxEGSaxoeZjrQ4sqpyPDvKClrqfDKIBv7kYtFURhE,8192
|
117 |
+
cryptography/hazmat/primitives/cmac.py,sha256=sz_s6H_cYnOvx-VNWdIKhRhe3Ymp8z8J0D3CBqOX3gg,338
|
118 |
+
cryptography/hazmat/primitives/constant_time.py,sha256=xdunWT0nf8OvKdcqUhhlFKayGp4_PgVJRU2W1wLSr_A,422
|
119 |
+
cryptography/hazmat/primitives/hashes.py,sha256=EvDIJBhj83Z7f-oHbsA0TzZLFSDV_Yv8hQRdM4o8FD0,5091
|
120 |
+
cryptography/hazmat/primitives/hmac.py,sha256=RpB3z9z5skirCQrm7zQbtnp9pLMnAjrlTUvKqF5aDDc,423
|
121 |
+
cryptography/hazmat/primitives/kdf/__init__.py,sha256=4XibZnrYq4hh5xBjWiIXzaYW6FKx8hPbVaa_cB9zS64,750
|
122 |
+
cryptography/hazmat/primitives/kdf/__pycache__/__init__.cpython-311.pyc,,
|
123 |
+
cryptography/hazmat/primitives/kdf/__pycache__/concatkdf.cpython-311.pyc,,
|
124 |
+
cryptography/hazmat/primitives/kdf/__pycache__/hkdf.cpython-311.pyc,,
|
125 |
+
cryptography/hazmat/primitives/kdf/__pycache__/kbkdf.cpython-311.pyc,,
|
126 |
+
cryptography/hazmat/primitives/kdf/__pycache__/pbkdf2.cpython-311.pyc,,
|
127 |
+
cryptography/hazmat/primitives/kdf/__pycache__/scrypt.cpython-311.pyc,,
|
128 |
+
cryptography/hazmat/primitives/kdf/__pycache__/x963kdf.cpython-311.pyc,,
|
129 |
+
cryptography/hazmat/primitives/kdf/concatkdf.py,sha256=bcn4NGXse-EsFl7nlU83e5ilop7TSHcX-CJJS107W80,3686
|
130 |
+
cryptography/hazmat/primitives/kdf/hkdf.py,sha256=uhN5L87w4JvtAqQcPh_Ji2TPSc18IDThpaYJiHOWy3A,3015
|
131 |
+
cryptography/hazmat/primitives/kdf/kbkdf.py,sha256=eSuLK1sATkamgCAit794jLr7sDNlu5X0USdcWhwJdmk,9146
|
132 |
+
cryptography/hazmat/primitives/kdf/pbkdf2.py,sha256=Xj3YIeX30h2BUaoJAtOo1RMXV_em0-eCG0PU_0FHJzM,1950
|
133 |
+
cryptography/hazmat/primitives/kdf/scrypt.py,sha256=4QONhjxA_ZtuQtQ7QV3FnbB8ftrFnM52B4HPfV7hFys,2354
|
134 |
+
cryptography/hazmat/primitives/kdf/x963kdf.py,sha256=wCpWmwQjZ2vAu2rlk3R_PX0nINl8WGXYBmlyMOC5iPw,1992
|
135 |
+
cryptography/hazmat/primitives/keywrap.py,sha256=XV4Pj2fqSeD-RqZVvY2cA3j5_7RwJSFygYuLfk2ujCo,5650
|
136 |
+
cryptography/hazmat/primitives/padding.py,sha256=QUq0n-EAgEan9aQzuTsiJYGKbWiK1nSHkcYjDF1L1ok,5518
|
137 |
+
cryptography/hazmat/primitives/poly1305.py,sha256=P5EPQV-RB_FJPahpg01u0Ts4S_PnAmsroxIGXbGeRRo,355
|
138 |
+
cryptography/hazmat/primitives/serialization/__init__.py,sha256=jyNx_7NcOEbVRBY4nP9ks0IVXBafbcYnTK27vafPLW8,1653
|
139 |
+
cryptography/hazmat/primitives/serialization/__pycache__/__init__.cpython-311.pyc,,
|
140 |
+
cryptography/hazmat/primitives/serialization/__pycache__/base.cpython-311.pyc,,
|
141 |
+
cryptography/hazmat/primitives/serialization/__pycache__/pkcs12.cpython-311.pyc,,
|
142 |
+
cryptography/hazmat/primitives/serialization/__pycache__/pkcs7.cpython-311.pyc,,
|
143 |
+
cryptography/hazmat/primitives/serialization/__pycache__/ssh.cpython-311.pyc,,
|
144 |
+
cryptography/hazmat/primitives/serialization/base.py,sha256=ikq5MJIwp_oUnjiaBco_PmQwOTYuGi-XkYUYHKy8Vo0,615
|
145 |
+
cryptography/hazmat/primitives/serialization/pkcs12.py,sha256=7vVXbiP7qhhvKAHJT_M8-LBZdbpOwrpWRHWxNrNqzXE,4492
|
146 |
+
cryptography/hazmat/primitives/serialization/pkcs7.py,sha256=CNzcsuDMyEFMe3EUii4NfJlQzmakB2hLlfRFYObnHRs,11141
|
147 |
+
cryptography/hazmat/primitives/serialization/ssh.py,sha256=VKscMrVdYK5B9PQISjjdRMglRvqa_L3sDNm5vdjVHJY,51915
|
148 |
+
cryptography/hazmat/primitives/twofactor/__init__.py,sha256=tmMZGB-g4IU1r7lIFqASU019zr0uPp_wEBYcwdDCKCA,258
|
149 |
+
cryptography/hazmat/primitives/twofactor/__pycache__/__init__.cpython-311.pyc,,
|
150 |
+
cryptography/hazmat/primitives/twofactor/__pycache__/hotp.cpython-311.pyc,,
|
151 |
+
cryptography/hazmat/primitives/twofactor/__pycache__/totp.cpython-311.pyc,,
|
152 |
+
cryptography/hazmat/primitives/twofactor/hotp.py,sha256=l1YdRMIhfPIuHKkA66keBDHhNbnBAlh6-O44P-OHIK8,2976
|
153 |
+
cryptography/hazmat/primitives/twofactor/totp.py,sha256=v0y0xKwtYrP83ypOo5Ofd441RJLOkaFfjmp554jo5F0,1450
|
154 |
+
cryptography/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
155 |
+
cryptography/utils.py,sha256=Rp7ppg4XIBVVzNQ6XngGndwkICJoYp6FoFOOgTWLJ7g,3925
|
156 |
+
cryptography/x509/__init__.py,sha256=uGdiViR7KFnWGoJFVUStt-e_ufomWc87RQBGAZ7dT-4,7980
|
157 |
+
cryptography/x509/__pycache__/__init__.cpython-311.pyc,,
|
158 |
+
cryptography/x509/__pycache__/base.cpython-311.pyc,,
|
159 |
+
cryptography/x509/__pycache__/certificate_transparency.cpython-311.pyc,,
|
160 |
+
cryptography/x509/__pycache__/extensions.cpython-311.pyc,,
|
161 |
+
cryptography/x509/__pycache__/general_name.cpython-311.pyc,,
|
162 |
+
cryptography/x509/__pycache__/name.cpython-311.pyc,,
|
163 |
+
cryptography/x509/__pycache__/ocsp.cpython-311.pyc,,
|
164 |
+
cryptography/x509/__pycache__/oid.cpython-311.pyc,,
|
165 |
+
cryptography/x509/__pycache__/verification.cpython-311.pyc,,
|
166 |
+
cryptography/x509/base.py,sha256=3NbbUn9wPruhmoPO7Cl3trc3SrqV2OFIBBE0P2l05mg,37081
|
167 |
+
cryptography/x509/certificate_transparency.py,sha256=6HvzAD0dlSQVxy6tnDhGj0-pisp1MaJ9bxQNRr92inI,2261
|
168 |
+
cryptography/x509/extensions.py,sha256=R70KkJ_c5NQ6Kx7Rho0sGJ0Rh-bOuBHjVOFSQGRAFCs,67370
|
169 |
+
cryptography/x509/general_name.py,sha256=sP_rV11Qlpsk4x3XXGJY_Mv0Q_s9dtjeLckHsjpLQoQ,7836
|
170 |
+
cryptography/x509/name.py,sha256=MYCxCSTQTpzhjxFPZaANqJ9fGrhESH73vPkoay8HSWM,14830
|
171 |
+
cryptography/x509/ocsp.py,sha256=P6A02msz5pe-IkUFpvxezHvnEHGvPdXiD3S0wsuf4-I,20003
|
172 |
+
cryptography/x509/oid.py,sha256=X8EbhkRTLrGuv9vHZSGqPd9zpvRVsonU_joWAL5LLY8,885
|
173 |
+
cryptography/x509/verification.py,sha256=alfx3VaTSb2bMz7_7s788oL90vzgHwBjVINssdz0Gv0,796
|
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/WHEEL
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Wheel-Version: 1.0
|
2 |
+
Generator: maturin (1.7.0)
|
3 |
+
Root-Is-Purelib: false
|
4 |
+
Tag: cp39-abi3-manylinux_2_28_x86_64
|
5 |
+
|
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
This software is made available under the terms of *either* of the licenses
|
2 |
+
found in LICENSE.APACHE or LICENSE.BSD. Contributions to cryptography are made
|
3 |
+
under the terms of *both* these licenses.
|
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE.APACHE
ADDED
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
Apache License
|
3 |
+
Version 2.0, January 2004
|
4 |
+
https://www.apache.org/licenses/
|
5 |
+
|
6 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
7 |
+
|
8 |
+
1. Definitions.
|
9 |
+
|
10 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
11 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
12 |
+
|
13 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
14 |
+
the copyright owner that is granting the License.
|
15 |
+
|
16 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
17 |
+
other entities that control, are controlled by, or are under common
|
18 |
+
control with that entity. For the purposes of this definition,
|
19 |
+
"control" means (i) the power, direct or indirect, to cause the
|
20 |
+
direction or management of such entity, whether by contract or
|
21 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
22 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
23 |
+
|
24 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
25 |
+
exercising permissions granted by this License.
|
26 |
+
|
27 |
+
"Source" form shall mean the preferred form for making modifications,
|
28 |
+
including but not limited to software source code, documentation
|
29 |
+
source, and configuration files.
|
30 |
+
|
31 |
+
"Object" form shall mean any form resulting from mechanical
|
32 |
+
transformation or translation of a Source form, including but
|
33 |
+
not limited to compiled object code, generated documentation,
|
34 |
+
and conversions to other media types.
|
35 |
+
|
36 |
+
"Work" shall mean the work of authorship, whether in Source or
|
37 |
+
Object form, made available under the License, as indicated by a
|
38 |
+
copyright notice that is included in or attached to the work
|
39 |
+
(an example is provided in the Appendix below).
|
40 |
+
|
41 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
42 |
+
form, that is based on (or derived from) the Work and for which the
|
43 |
+
editorial revisions, annotations, elaborations, or other modifications
|
44 |
+
represent, as a whole, an original work of authorship. For the purposes
|
45 |
+
of this License, Derivative Works shall not include works that remain
|
46 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
47 |
+
the Work and Derivative Works thereof.
|
48 |
+
|
49 |
+
"Contribution" shall mean any work of authorship, including
|
50 |
+
the original version of the Work and any modifications or additions
|
51 |
+
to that Work or Derivative Works thereof, that is intentionally
|
52 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
53 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
54 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
55 |
+
means any form of electronic, verbal, or written communication sent
|
56 |
+
to the Licensor or its representatives, including but not limited to
|
57 |
+
communication on electronic mailing lists, source code control systems,
|
58 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
59 |
+
Licensor for the purpose of discussing and improving the Work, but
|
60 |
+
excluding communication that is conspicuously marked or otherwise
|
61 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
62 |
+
|
63 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
64 |
+
on behalf of whom a Contribution has been received by Licensor and
|
65 |
+
subsequently incorporated within the Work.
|
66 |
+
|
67 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
68 |
+
this License, each Contributor hereby grants to You a perpetual,
|
69 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
70 |
+
copyright license to reproduce, prepare Derivative Works of,
|
71 |
+
publicly display, publicly perform, sublicense, and distribute the
|
72 |
+
Work and such Derivative Works in Source or Object form.
|
73 |
+
|
74 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
75 |
+
this License, each Contributor hereby grants to You a perpetual,
|
76 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
77 |
+
(except as stated in this section) patent license to make, have made,
|
78 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
79 |
+
where such license applies only to those patent claims licensable
|
80 |
+
by such Contributor that are necessarily infringed by their
|
81 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
82 |
+
with the Work to which such Contribution(s) was submitted. If You
|
83 |
+
institute patent litigation against any entity (including a
|
84 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
85 |
+
or a Contribution incorporated within the Work constitutes direct
|
86 |
+
or contributory patent infringement, then any patent licenses
|
87 |
+
granted to You under this License for that Work shall terminate
|
88 |
+
as of the date such litigation is filed.
|
89 |
+
|
90 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
91 |
+
Work or Derivative Works thereof in any medium, with or without
|
92 |
+
modifications, and in Source or Object form, provided that You
|
93 |
+
meet the following conditions:
|
94 |
+
|
95 |
+
(a) You must give any other recipients of the Work or
|
96 |
+
Derivative Works a copy of this License; and
|
97 |
+
|
98 |
+
(b) You must cause any modified files to carry prominent notices
|
99 |
+
stating that You changed the files; and
|
100 |
+
|
101 |
+
(c) You must retain, in the Source form of any Derivative Works
|
102 |
+
that You distribute, all copyright, patent, trademark, and
|
103 |
+
attribution notices from the Source form of the Work,
|
104 |
+
excluding those notices that do not pertain to any part of
|
105 |
+
the Derivative Works; and
|
106 |
+
|
107 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
108 |
+
distribution, then any Derivative Works that You distribute must
|
109 |
+
include a readable copy of the attribution notices contained
|
110 |
+
within such NOTICE file, excluding those notices that do not
|
111 |
+
pertain to any part of the Derivative Works, in at least one
|
112 |
+
of the following places: within a NOTICE text file distributed
|
113 |
+
as part of the Derivative Works; within the Source form or
|
114 |
+
documentation, if provided along with the Derivative Works; or,
|
115 |
+
within a display generated by the Derivative Works, if and
|
116 |
+
wherever such third-party notices normally appear. The contents
|
117 |
+
of the NOTICE file are for informational purposes only and
|
118 |
+
do not modify the License. You may add Your own attribution
|
119 |
+
notices within Derivative Works that You distribute, alongside
|
120 |
+
or as an addendum to the NOTICE text from the Work, provided
|
121 |
+
that such additional attribution notices cannot be construed
|
122 |
+
as modifying the License.
|
123 |
+
|
124 |
+
You may add Your own copyright statement to Your modifications and
|
125 |
+
may provide additional or different license terms and conditions
|
126 |
+
for use, reproduction, or distribution of Your modifications, or
|
127 |
+
for any such Derivative Works as a whole, provided Your use,
|
128 |
+
reproduction, and distribution of the Work otherwise complies with
|
129 |
+
the conditions stated in this License.
|
130 |
+
|
131 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
132 |
+
any Contribution intentionally submitted for inclusion in the Work
|
133 |
+
by You to the Licensor shall be under the terms and conditions of
|
134 |
+
this License, without any additional terms or conditions.
|
135 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
136 |
+
the terms of any separate license agreement you may have executed
|
137 |
+
with Licensor regarding such Contributions.
|
138 |
+
|
139 |
+
6. Trademarks. This License does not grant permission to use the trade
|
140 |
+
names, trademarks, service marks, or product names of the Licensor,
|
141 |
+
except as required for reasonable and customary use in describing the
|
142 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
143 |
+
|
144 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
145 |
+
agreed to in writing, Licensor provides the Work (and each
|
146 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
147 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
148 |
+
implied, including, without limitation, any warranties or conditions
|
149 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
150 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
151 |
+
appropriateness of using or redistributing the Work and assume any
|
152 |
+
risks associated with Your exercise of permissions under this License.
|
153 |
+
|
154 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
155 |
+
whether in tort (including negligence), contract, or otherwise,
|
156 |
+
unless required by applicable law (such as deliberate and grossly
|
157 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
158 |
+
liable to You for damages, including any direct, indirect, special,
|
159 |
+
incidental, or consequential damages of any character arising as a
|
160 |
+
result of this License or out of the use or inability to use the
|
161 |
+
Work (including but not limited to damages for loss of goodwill,
|
162 |
+
work stoppage, computer failure or malfunction, or any and all
|
163 |
+
other commercial damages or losses), even if such Contributor
|
164 |
+
has been advised of the possibility of such damages.
|
165 |
+
|
166 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
167 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
168 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
169 |
+
or other liability obligations and/or rights consistent with this
|
170 |
+
License. However, in accepting such obligations, You may act only
|
171 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
172 |
+
of any other Contributor, and only if You agree to indemnify,
|
173 |
+
defend, and hold each Contributor harmless for any liability
|
174 |
+
incurred by, or claims asserted against, such Contributor by reason
|
175 |
+
of your accepting any such warranty or additional liability.
|
176 |
+
|
177 |
+
END OF TERMS AND CONDITIONS
|
178 |
+
|
179 |
+
APPENDIX: How to apply the Apache License to your work.
|
180 |
+
|
181 |
+
To apply the Apache License to your work, attach the following
|
182 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
183 |
+
replaced with your own identifying information. (Don't include
|
184 |
+
the brackets!) The text should be enclosed in the appropriate
|
185 |
+
comment syntax for the file format. We also recommend that a
|
186 |
+
file or class name and description of purpose be included on the
|
187 |
+
same "printed page" as the copyright notice for easier
|
188 |
+
identification within third-party archives.
|
189 |
+
|
190 |
+
Copyright [yyyy] [name of copyright owner]
|
191 |
+
|
192 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
193 |
+
you may not use this file except in compliance with the License.
|
194 |
+
You may obtain a copy of the License at
|
195 |
+
|
196 |
+
https://www.apache.org/licenses/LICENSE-2.0
|
197 |
+
|
198 |
+
Unless required by applicable law or agreed to in writing, software
|
199 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
200 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
201 |
+
See the License for the specific language governing permissions and
|
202 |
+
limitations under the License.
|
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE.BSD
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Copyright (c) Individual contributors.
|
2 |
+
All rights reserved.
|
3 |
+
|
4 |
+
Redistribution and use in source and binary forms, with or without
|
5 |
+
modification, are permitted provided that the following conditions are met:
|
6 |
+
|
7 |
+
1. Redistributions of source code must retain the above copyright notice,
|
8 |
+
this list of conditions and the following disclaimer.
|
9 |
+
|
10 |
+
2. Redistributions in binary form must reproduce the above copyright
|
11 |
+
notice, this list of conditions and the following disclaimer in the
|
12 |
+
documentation and/or other materials provided with the distribution.
|
13 |
+
|
14 |
+
3. Neither the name of PyCA Cryptography nor the names of its contributors
|
15 |
+
may be used to endorse or promote products derived from this software
|
16 |
+
without specific prior written permission.
|
17 |
+
|
18 |
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
19 |
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
20 |
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21 |
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
22 |
+
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
23 |
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
24 |
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
25 |
+
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
26 |
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
27 |
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/INSTALLER
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
pip
|
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/METADATA
ADDED
@@ -0,0 +1,176 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.3
|
2 |
+
Name: jsonschema
|
3 |
+
Version: 4.23.0
|
4 |
+
Summary: An implementation of JSON Schema validation for Python
|
5 |
+
Project-URL: Homepage, https://github.com/python-jsonschema/jsonschema
|
6 |
+
Project-URL: Documentation, https://python-jsonschema.readthedocs.io/
|
7 |
+
Project-URL: Issues, https://github.com/python-jsonschema/jsonschema/issues/
|
8 |
+
Project-URL: Funding, https://github.com/sponsors/Julian
|
9 |
+
Project-URL: Tidelift, https://tidelift.com/subscription/pkg/pypi-jsonschema?utm_source=pypi-jsonschema&utm_medium=referral&utm_campaign=pypi-link
|
10 |
+
Project-URL: Changelog, https://github.com/python-jsonschema/jsonschema/blob/main/CHANGELOG.rst
|
11 |
+
Project-URL: Source, https://github.com/python-jsonschema/jsonschema
|
12 |
+
Author-email: Julian Berman <[email protected]>
|
13 |
+
License: MIT
|
14 |
+
License-File: COPYING
|
15 |
+
Keywords: data validation,json,json schema,jsonschema,validation
|
16 |
+
Classifier: Development Status :: 5 - Production/Stable
|
17 |
+
Classifier: Intended Audience :: Developers
|
18 |
+
Classifier: License :: OSI Approved :: MIT License
|
19 |
+
Classifier: Operating System :: OS Independent
|
20 |
+
Classifier: Programming Language :: Python
|
21 |
+
Classifier: Programming Language :: Python :: 3.8
|
22 |
+
Classifier: Programming Language :: Python :: 3.9
|
23 |
+
Classifier: Programming Language :: Python :: 3.10
|
24 |
+
Classifier: Programming Language :: Python :: 3.11
|
25 |
+
Classifier: Programming Language :: Python :: 3.12
|
26 |
+
Classifier: Programming Language :: Python :: 3.13
|
27 |
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
28 |
+
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
29 |
+
Classifier: Topic :: File Formats :: JSON
|
30 |
+
Classifier: Topic :: File Formats :: JSON :: JSON Schema
|
31 |
+
Requires-Python: >=3.8
|
32 |
+
Requires-Dist: attrs>=22.2.0
|
33 |
+
Requires-Dist: importlib-resources>=1.4.0; python_version < '3.9'
|
34 |
+
Requires-Dist: jsonschema-specifications>=2023.03.6
|
35 |
+
Requires-Dist: pkgutil-resolve-name>=1.3.10; python_version < '3.9'
|
36 |
+
Requires-Dist: referencing>=0.28.4
|
37 |
+
Requires-Dist: rpds-py>=0.7.1
|
38 |
+
Provides-Extra: format
|
39 |
+
Requires-Dist: fqdn; extra == 'format'
|
40 |
+
Requires-Dist: idna; extra == 'format'
|
41 |
+
Requires-Dist: isoduration; extra == 'format'
|
42 |
+
Requires-Dist: jsonpointer>1.13; extra == 'format'
|
43 |
+
Requires-Dist: rfc3339-validator; extra == 'format'
|
44 |
+
Requires-Dist: rfc3987; extra == 'format'
|
45 |
+
Requires-Dist: uri-template; extra == 'format'
|
46 |
+
Requires-Dist: webcolors>=1.11; extra == 'format'
|
47 |
+
Provides-Extra: format-nongpl
|
48 |
+
Requires-Dist: fqdn; extra == 'format-nongpl'
|
49 |
+
Requires-Dist: idna; extra == 'format-nongpl'
|
50 |
+
Requires-Dist: isoduration; extra == 'format-nongpl'
|
51 |
+
Requires-Dist: jsonpointer>1.13; extra == 'format-nongpl'
|
52 |
+
Requires-Dist: rfc3339-validator; extra == 'format-nongpl'
|
53 |
+
Requires-Dist: rfc3986-validator>0.1.0; extra == 'format-nongpl'
|
54 |
+
Requires-Dist: uri-template; extra == 'format-nongpl'
|
55 |
+
Requires-Dist: webcolors>=24.6.0; extra == 'format-nongpl'
|
56 |
+
Description-Content-Type: text/x-rst
|
57 |
+
|
58 |
+
==========
|
59 |
+
jsonschema
|
60 |
+
==========
|
61 |
+
|
62 |
+
|PyPI| |Pythons| |CI| |ReadTheDocs| |Precommit| |Zenodo|
|
63 |
+
|
64 |
+
.. |PyPI| image:: https://img.shields.io/pypi/v/jsonschema.svg
|
65 |
+
:alt: PyPI version
|
66 |
+
:target: https://pypi.org/project/jsonschema/
|
67 |
+
|
68 |
+
.. |Pythons| image:: https://img.shields.io/pypi/pyversions/jsonschema.svg
|
69 |
+
:alt: Supported Python versions
|
70 |
+
:target: https://pypi.org/project/jsonschema/
|
71 |
+
|
72 |
+
.. |CI| image:: https://github.com/python-jsonschema/jsonschema/workflows/CI/badge.svg
|
73 |
+
:alt: Build status
|
74 |
+
:target: https://github.com/python-jsonschema/jsonschema/actions?query=workflow%3ACI
|
75 |
+
|
76 |
+
.. |ReadTheDocs| image:: https://readthedocs.org/projects/python-jsonschema/badge/?version=stable&style=flat
|
77 |
+
:alt: ReadTheDocs status
|
78 |
+
:target: https://python-jsonschema.readthedocs.io/en/stable/
|
79 |
+
|
80 |
+
.. |Precommit| image:: https://results.pre-commit.ci/badge/github/python-jsonschema/jsonschema/main.svg
|
81 |
+
:alt: pre-commit.ci status
|
82 |
+
:target: https://results.pre-commit.ci/latest/github/python-jsonschema/jsonschema/main
|
83 |
+
|
84 |
+
.. |Zenodo| image:: https://zenodo.org/badge/3072629.svg
|
85 |
+
:alt: Zenodo DOI
|
86 |
+
:target: https://zenodo.org/badge/latestdoi/3072629
|
87 |
+
|
88 |
+
|
89 |
+
``jsonschema`` is an implementation of the `JSON Schema <https://json-schema.org>`_ specification for Python.
|
90 |
+
|
91 |
+
.. code:: python
|
92 |
+
|
93 |
+
>>> from jsonschema import validate
|
94 |
+
|
95 |
+
>>> # A sample schema, like what we'd get from json.load()
|
96 |
+
>>> schema = {
|
97 |
+
... "type" : "object",
|
98 |
+
... "properties" : {
|
99 |
+
... "price" : {"type" : "number"},
|
100 |
+
... "name" : {"type" : "string"},
|
101 |
+
... },
|
102 |
+
... }
|
103 |
+
|
104 |
+
>>> # If no exception is raised by validate(), the instance is valid.
|
105 |
+
>>> validate(instance={"name" : "Eggs", "price" : 34.99}, schema=schema)
|
106 |
+
|
107 |
+
>>> validate(
|
108 |
+
... instance={"name" : "Eggs", "price" : "Invalid"}, schema=schema,
|
109 |
+
... ) # doctest: +IGNORE_EXCEPTION_DETAIL
|
110 |
+
Traceback (most recent call last):
|
111 |
+
...
|
112 |
+
ValidationError: 'Invalid' is not of type 'number'
|
113 |
+
|
114 |
+
It can also be used from the command line by installing `check-jsonschema <https://github.com/python-jsonschema/check-jsonschema>`_.
|
115 |
+
|
116 |
+
Features
|
117 |
+
--------
|
118 |
+
|
119 |
+
* Full support for `Draft 2020-12 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft202012Validator>`_, `Draft 2019-09 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft201909Validator>`_, `Draft 7 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft7Validator>`_, `Draft 6 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft6Validator>`_, `Draft 4 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft4Validator>`_ and `Draft 3 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft3Validator>`_
|
120 |
+
|
121 |
+
* `Lazy validation <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/protocols/#jsonschema.protocols.Validator.iter_errors>`_ that can iteratively report *all* validation errors.
|
122 |
+
|
123 |
+
* `Programmatic querying <https://python-jsonschema.readthedocs.io/en/latest/errors/>`_ of which properties or items failed validation.
|
124 |
+
|
125 |
+
|
126 |
+
Installation
|
127 |
+
------------
|
128 |
+
|
129 |
+
``jsonschema`` is available on `PyPI <https://pypi.org/project/jsonschema/>`_. You can install using `pip <https://pip.pypa.io/en/stable/>`_:
|
130 |
+
|
131 |
+
.. code:: bash
|
132 |
+
|
133 |
+
$ pip install jsonschema
|
134 |
+
|
135 |
+
|
136 |
+
Extras
|
137 |
+
======
|
138 |
+
|
139 |
+
Two extras are available when installing the package, both currently related to ``format`` validation:
|
140 |
+
|
141 |
+
* ``format``
|
142 |
+
* ``format-nongpl``
|
143 |
+
|
144 |
+
They can be used when installing in order to include additional dependencies, e.g.:
|
145 |
+
|
146 |
+
.. code:: bash
|
147 |
+
|
148 |
+
$ pip install jsonschema'[format]'
|
149 |
+
|
150 |
+
Be aware that the mere presence of these dependencies – or even the specification of ``format`` checks in a schema – do *not* activate format checks (as per the specification).
|
151 |
+
Please read the `format validation documentation <https://python-jsonschema.readthedocs.io/en/latest/validate/#validating-formats>`_ for further details.
|
152 |
+
|
153 |
+
About
|
154 |
+
-----
|
155 |
+
|
156 |
+
I'm Julian Berman.
|
157 |
+
|
158 |
+
``jsonschema`` is on `GitHub <https://github.com/python-jsonschema/jsonschema>`_.
|
159 |
+
|
160 |
+
Get in touch, via GitHub or otherwise, if you've got something to contribute, it'd be most welcome!
|
161 |
+
|
162 |
+
You can also generally find me on Libera (nick: ``Julian``) in various channels, including ``#python``.
|
163 |
+
|
164 |
+
If you feel overwhelmingly grateful, you can also `sponsor me <https://github.com/sponsors/Julian/>`_.
|
165 |
+
|
166 |
+
And for companies who appreciate ``jsonschema`` and its continued support and growth, ``jsonschema`` is also now supportable via `TideLift <https://tidelift.com/subscription/pkg/pypi-jsonschema?utm_source=pypi-jsonschema&utm_medium=referral&utm_campaign=readme>`_.
|
167 |
+
|
168 |
+
|
169 |
+
Release Information
|
170 |
+
-------------------
|
171 |
+
|
172 |
+
v4.23.0
|
173 |
+
=======
|
174 |
+
|
175 |
+
* Do not reorder dictionaries (schemas, instances) that are printed as part of validation errors.
|
176 |
+
* Declare support for Py3.13
|
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/RECORD
ADDED
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
../../../bin/jsonschema,sha256=XHo009U0gdoLuMe818hXZdVGUS_4zdLM86D9zqPvDvA,231
|
2 |
+
jsonschema-4.23.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
|
3 |
+
jsonschema-4.23.0.dist-info/METADATA,sha256=Hd96gAfdO0v5RpFeT25qjyo7PvhASy56F4Jw3FUUTlo,7906
|
4 |
+
jsonschema-4.23.0.dist-info/RECORD,,
|
5 |
+
jsonschema-4.23.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
6 |
+
jsonschema-4.23.0.dist-info/entry_points.txt,sha256=vO7rX4Fs_xIVJy2pnAtKgTSxfpnozAVQ0DjCmpMxnWE,51
|
7 |
+
jsonschema-4.23.0.dist-info/licenses/COPYING,sha256=T5KgFaE8TRoEC-8BiqE0MLTxvHO0Gxa7hGw0Z2bedDk,1057
|
8 |
+
jsonschema/__init__.py,sha256=LkPwscySlJ9lTOp7ZB1M7jQ8mbG7-bYG41iBwbZ-o9s,3941
|
9 |
+
jsonschema/__main__.py,sha256=iLsZf2upUB3ilBKTlMnyK-HHt2Cnnfkwwxi_c6gLvSA,115
|
10 |
+
jsonschema/__pycache__/__init__.cpython-311.pyc,,
|
11 |
+
jsonschema/__pycache__/__main__.cpython-311.pyc,,
|
12 |
+
jsonschema/__pycache__/_format.cpython-311.pyc,,
|
13 |
+
jsonschema/__pycache__/_keywords.cpython-311.pyc,,
|
14 |
+
jsonschema/__pycache__/_legacy_keywords.cpython-311.pyc,,
|
15 |
+
jsonschema/__pycache__/_types.cpython-311.pyc,,
|
16 |
+
jsonschema/__pycache__/_typing.cpython-311.pyc,,
|
17 |
+
jsonschema/__pycache__/_utils.cpython-311.pyc,,
|
18 |
+
jsonschema/__pycache__/cli.cpython-311.pyc,,
|
19 |
+
jsonschema/__pycache__/exceptions.cpython-311.pyc,,
|
20 |
+
jsonschema/__pycache__/protocols.cpython-311.pyc,,
|
21 |
+
jsonschema/__pycache__/validators.cpython-311.pyc,,
|
22 |
+
jsonschema/_format.py,sha256=F_MA52IkrhOIxDqD8x-01bH37mG5nh0kyNrWUSLtWb8,14591
|
23 |
+
jsonschema/_keywords.py,sha256=r8_DrqAfn6QLwQnmXEggveiSU-UaIL2p2nuPINelfFc,14949
|
24 |
+
jsonschema/_legacy_keywords.py,sha256=2tWuwRPWbYS7EAl8wBIC_rabGuv1J4dfYLqNEPpShhA,15191
|
25 |
+
jsonschema/_types.py,sha256=HQ5QD_oL85zF1FSW2v-5rvfYF0967HJdxSR88kzw2mY,5367
|
26 |
+
jsonschema/_typing.py,sha256=NZhPhkBOn9INYZk8G69rDeuRamztgXCMLh10z9cfT6g,610
|
27 |
+
jsonschema/_utils.py,sha256=ODga3vrJ6K2wMGxerpgn4ipc9q7ZSqBsvwKU4embLEE,10642
|
28 |
+
jsonschema/benchmarks/__init__.py,sha256=A0sQrxDBVHSyQ-8ru3L11hMXf3q9gVuB9x_YgHb4R9M,70
|
29 |
+
jsonschema/benchmarks/__pycache__/__init__.cpython-311.pyc,,
|
30 |
+
jsonschema/benchmarks/__pycache__/const_vs_enum.cpython-311.pyc,,
|
31 |
+
jsonschema/benchmarks/__pycache__/contains.cpython-311.pyc,,
|
32 |
+
jsonschema/benchmarks/__pycache__/issue232.cpython-311.pyc,,
|
33 |
+
jsonschema/benchmarks/__pycache__/json_schema_test_suite.cpython-311.pyc,,
|
34 |
+
jsonschema/benchmarks/__pycache__/nested_schemas.cpython-311.pyc,,
|
35 |
+
jsonschema/benchmarks/__pycache__/subcomponents.cpython-311.pyc,,
|
36 |
+
jsonschema/benchmarks/__pycache__/unused_registry.cpython-311.pyc,,
|
37 |
+
jsonschema/benchmarks/__pycache__/useless_applicator_schemas.cpython-311.pyc,,
|
38 |
+
jsonschema/benchmarks/__pycache__/useless_keywords.cpython-311.pyc,,
|
39 |
+
jsonschema/benchmarks/__pycache__/validator_creation.cpython-311.pyc,,
|
40 |
+
jsonschema/benchmarks/const_vs_enum.py,sha256=DVFi3WDqBalZFOibnjpX1uTSr3Rxa2cPgFcowd7Ukrs,830
|
41 |
+
jsonschema/benchmarks/contains.py,sha256=gexQoUrCOwECofbt19BeosQZ7WFL6PDdkX49DWwBlOg,786
|
42 |
+
jsonschema/benchmarks/issue232.py,sha256=3LLYLIlBGQnVuyyo2iAv-xky5P6PRFHANx4-zIIQOoE,521
|
43 |
+
jsonschema/benchmarks/issue232/issue.json,sha256=eaPOZjMRu5u8RpKrsA9uk7ucPZS5tkKG4D_hkOTQ3Hk,117105
|
44 |
+
jsonschema/benchmarks/json_schema_test_suite.py,sha256=PvfabpUYcF4_7csYDTcTauED8rnFEGYbdY5RqTXD08s,320
|
45 |
+
jsonschema/benchmarks/nested_schemas.py,sha256=mo07dx-CIgmSOI62CNs4g5xu1FzHklLBpkQoDxWYcKs,1892
|
46 |
+
jsonschema/benchmarks/subcomponents.py,sha256=fEyiMzsWeK2pd7DEGCuuY-vzGunwhHczRBWEnBRLKIo,1113
|
47 |
+
jsonschema/benchmarks/unused_registry.py,sha256=hwRwONc9cefPtYzkoX_TYRO3GyUojriv0-YQaK3vnj0,940
|
48 |
+
jsonschema/benchmarks/useless_applicator_schemas.py,sha256=EVm5-EtOEFoLP_Vt2j4SrCwlx05NhPqNuZQ6LIMP1Dc,3342
|
49 |
+
jsonschema/benchmarks/useless_keywords.py,sha256=bj_zKr1oVctFlqyZaObCsYTgFjiiNgPzC0hr1Y868mE,867
|
50 |
+
jsonschema/benchmarks/validator_creation.py,sha256=UkUQlLAnussnr_KdCIdad6xx2pXxQLmYtsXoiirKeWQ,285
|
51 |
+
jsonschema/cli.py,sha256=SGy9JPg02mgXhNxugU8iXhYNivfSjBhKTNAgV90ty-M,8551
|
52 |
+
jsonschema/exceptions.py,sha256=RxE2T5xxgg_B6ttR8a3lCbZyh29RUtFe4oZKMoHPBAE,15035
|
53 |
+
jsonschema/protocols.py,sha256=7mpZxO1gfRNMCGXwldwsSN3nEugVfIVyKZ_HZgN1vSw,7174
|
54 |
+
jsonschema/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
55 |
+
jsonschema/tests/__pycache__/__init__.cpython-311.pyc,,
|
56 |
+
jsonschema/tests/__pycache__/_suite.cpython-311.pyc,,
|
57 |
+
jsonschema/tests/__pycache__/fuzz_validate.cpython-311.pyc,,
|
58 |
+
jsonschema/tests/__pycache__/test_cli.cpython-311.pyc,,
|
59 |
+
jsonschema/tests/__pycache__/test_deprecations.cpython-311.pyc,,
|
60 |
+
jsonschema/tests/__pycache__/test_exceptions.cpython-311.pyc,,
|
61 |
+
jsonschema/tests/__pycache__/test_format.cpython-311.pyc,,
|
62 |
+
jsonschema/tests/__pycache__/test_jsonschema_test_suite.cpython-311.pyc,,
|
63 |
+
jsonschema/tests/__pycache__/test_types.cpython-311.pyc,,
|
64 |
+
jsonschema/tests/__pycache__/test_utils.cpython-311.pyc,,
|
65 |
+
jsonschema/tests/__pycache__/test_validators.cpython-311.pyc,,
|
66 |
+
jsonschema/tests/_suite.py,sha256=QAfBj34zMbJQ5_JJ2ogpiTlw9hQ6Is43dvo_bpS0EdM,8156
|
67 |
+
jsonschema/tests/fuzz_validate.py,sha256=fUA7yTJIihaCwJplkUehZeyB84HcXEcqtY5oPJXIO7I,1114
|
68 |
+
jsonschema/tests/test_cli.py,sha256=uFMu2YbIfbSDCnykhLL4-VR3-jg1tvQLJn2Bliwp_Bw,28587
|
69 |
+
jsonschema/tests/test_deprecations.py,sha256=9VxOCfWzMG1Tg4OD8riU_Znd6HDOQZkepzVgxsdUdU8,15760
|
70 |
+
jsonschema/tests/test_exceptions.py,sha256=JgC-E1ZFZK2puVBp35WFRnG8CNOiSWLYtyLjh9IvFKI,22591
|
71 |
+
jsonschema/tests/test_format.py,sha256=eVm5SMaWF2lOPO28bPAwNvkiQvHCQKy-MnuAgEchfEc,3188
|
72 |
+
jsonschema/tests/test_jsonschema_test_suite.py,sha256=a2saPs2Cwwg0sdRdu-uJ8goSXLbqrS-pC48QJy0K4DE,8674
|
73 |
+
jsonschema/tests/test_types.py,sha256=cF51KTDmdsx06MrIc4fXKt0X9fIsVgw5uhT8CamVa8U,6977
|
74 |
+
jsonschema/tests/test_utils.py,sha256=sao74o1PyYMxBfqweokQN48CFSS6yhJk5FkCfMJ5PsI,4163
|
75 |
+
jsonschema/tests/test_validators.py,sha256=eiaigsZMzHYYsniQ1UPygaS56a1d-_7-9NC4wVXAhzs,87975
|
76 |
+
jsonschema/validators.py,sha256=H31FwHdyB7LP5eunxdBrZ9E57hpvozfnRlZaOYy45jU,47045
|
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/WHEEL
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Wheel-Version: 1.0
|
2 |
+
Generator: hatchling 1.25.0
|
3 |
+
Root-Is-Purelib: true
|
4 |
+
Tag: py3-none-any
|
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/entry_points.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[console_scripts]
|
2 |
+
jsonschema = jsonschema.cli:main
|
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/licenses/COPYING
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Copyright (c) 2013 Julian Berman
|
2 |
+
|
3 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
4 |
+
of this software and associated documentation files (the "Software"), to deal
|
5 |
+
in the Software without restriction, including without limitation the rights
|
6 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
7 |
+
copies of the Software, and to permit persons to whom the Software is
|
8 |
+
furnished to do so, subject to the following conditions:
|
9 |
+
|
10 |
+
The above copyright notice and this permission notice shall be included in
|
11 |
+
all copies or substantial portions of the Software.
|
12 |
+
|
13 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
14 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
15 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
16 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
17 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
18 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
19 |
+
THE SOFTWARE.
|
.venv/lib/python3.11/site-packages/mistral_common/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (207 Bytes). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/__pycache__/base.cpython-311.pyc
ADDED
Binary file (710 Bytes). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/__pycache__/exceptions.cpython-311.pyc
ADDED
Binary file (5.66 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/__pycache__/multimodal.cpython-311.pyc
ADDED
Binary file (4.03 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/data/mistral_instruct_tokenizer_240323.model.v3
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9addc8bdce5988448ae81b729336f43a81262160ae8da760674badab9d4c7d33
|
3 |
+
size 587591
|
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__init__.py
ADDED
File without changes
|
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (206 Bytes). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/request.cpython-311.pyc
ADDED
Binary file (1.14 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/response.cpython-311.pyc
ADDED
Binary file (2.07 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/request.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List, Optional, Union
|
2 |
+
|
3 |
+
from pydantic import Field
|
4 |
+
|
5 |
+
from mistral_common.base import MistralBase
|
6 |
+
|
7 |
+
|
8 |
+
class EmbeddingRequest(MistralBase):
|
9 |
+
input: Union[str, List[str]] = Field(description="Text to embed.")
|
10 |
+
model: str = Field(description="ID of the model to use.")
|
11 |
+
encoding_format: Optional[str] = Field(default="float", description="The format to return the embeddings in.")
|
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/response.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
|
3 |
+
from pydantic import Field
|
4 |
+
|
5 |
+
from mistral_common.base import MistralBase
|
6 |
+
from mistral_common.protocol.base import UsageInfo
|
7 |
+
from mistral_common.protocol.utils import random_uuid
|
8 |
+
|
9 |
+
|
10 |
+
class EmbeddingObject(MistralBase):
|
11 |
+
object: str = Field(default="embedding", description="The type of the object returned.")
|
12 |
+
embedding: List[float] = Field(description="The type of the object returned.")
|
13 |
+
index: int = Field(description="The index of the embedding in the input text.")
|
14 |
+
|
15 |
+
|
16 |
+
class EmbeddingResponse(MistralBase):
|
17 |
+
id: str = Field(default_factory=lambda: f"embd-{random_uuid()}")
|
18 |
+
object: str = Field(default="list", description="The type of the object returned.")
|
19 |
+
data: List[EmbeddingObject] = Field(description="List of embeddings.")
|
20 |
+
model: str = Field(description="The model used to generate the embeddings.")
|
21 |
+
usage: UsageInfo
|
.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (205 Bytes). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/messages.cpython-311.pyc
ADDED
Binary file (7.04 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/response.cpython-311.pyc
ADDED
Binary file (5.06 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/__init__.py
ADDED
File without changes
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (194 Bytes). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__init__.py
ADDED
File without changes
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (203 Bytes). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__pycache__/request.cpython-311.pyc
ADDED
Binary file (1.63 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/request.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Generic, List, Optional
|
2 |
+
|
3 |
+
from mistral_common.base import MistralBase
|
4 |
+
from mistral_common.protocol.instruct.messages import ChatMessageType
|
5 |
+
from mistral_common.protocol.instruct.tool_calls import ToolType
|
6 |
+
|
7 |
+
|
8 |
+
class FIMRequest(MistralBase):
|
9 |
+
"""
|
10 |
+
A valid Fill in the Middle completion request to be tokenized
|
11 |
+
"""
|
12 |
+
|
13 |
+
prompt: str
|
14 |
+
suffix: Optional[str] = None
|
15 |
+
|
16 |
+
|
17 |
+
class InstructRequest(MistralBase, Generic[ChatMessageType, ToolType]):
|
18 |
+
"""
|
19 |
+
A valid request to be tokenized
|
20 |
+
"""
|
21 |
+
|
22 |
+
messages: List[ChatMessageType]
|
23 |
+
system_prompt: Optional[str] = None
|
24 |
+
available_tools: Optional[List[ToolType]] = None
|
25 |
+
truncate_at_max_tokens: Optional[int] = None
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__init__.py
ADDED
File without changes
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/__init__.cpython-311.pyc
ADDED
Binary file (205 Bytes). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/base.cpython-311.pyc
ADDED
Binary file (10.8 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/mistral.cpython-311.pyc
ADDED
Binary file (14.5 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/multimodal.cpython-311.pyc
ADDED
Binary file (9.45 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/sentencepiece.cpython-311.pyc
ADDED
Binary file (40.2 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/tekken.cpython-311.pyc
ADDED
Binary file (18.6 kB). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/utils.cpython-311.pyc
ADDED
Binary file (717 Bytes). View file
|
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/base.py
ADDED
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC, abstractmethod
|
2 |
+
from dataclasses import dataclass
|
3 |
+
from enum import Enum
|
4 |
+
from typing import Generic, List, Optional, Protocol, Tuple, TypeVar, Union
|
5 |
+
|
6 |
+
import numpy as np
|
7 |
+
from pydantic import ConfigDict
|
8 |
+
|
9 |
+
from mistral_common.base import MistralBase
|
10 |
+
from mistral_common.protocol.instruct.messages import (
|
11 |
+
AssistantMessageType,
|
12 |
+
ContentChunk,
|
13 |
+
ImageChunk,
|
14 |
+
ImageURLChunk,
|
15 |
+
UserMessage,
|
16 |
+
)
|
17 |
+
from mistral_common.protocol.instruct.tool_calls import Tool
|
18 |
+
from mistral_common.tokens.instruct.request import FIMRequest, InstructRequest
|
19 |
+
|
20 |
+
|
21 |
+
class SpecialTokens(str, Enum):
|
22 |
+
bos = "<s>"
|
23 |
+
eos = "</s>"
|
24 |
+
begin_inst = "[INST]"
|
25 |
+
end_inst = "[/INST]"
|
26 |
+
begin_tools = "[AVAILABLE_TOOLS]"
|
27 |
+
end_tools = "[/AVAILABLE_TOOLS]"
|
28 |
+
begin_tool_results = "[TOOL_RESULTS]"
|
29 |
+
end_tool_results = "[/TOOL_RESULTS]"
|
30 |
+
tool_calls = "[TOOL_CALLS]"
|
31 |
+
img = "[IMG]"
|
32 |
+
img_break = "[IMG_BREAK]"
|
33 |
+
img_end = "[IMG_END]"
|
34 |
+
prefix = "[PREFIX]"
|
35 |
+
middle = "[MIDDLE]"
|
36 |
+
suffix = "[SUFFIX]"
|
37 |
+
begin_system = "[SYSTEM_PROMPT]"
|
38 |
+
end_system = "[/SYSTEM_PROMPT]"
|
39 |
+
begin_tool_content = "[TOOL_CONTENT]"
|
40 |
+
|
41 |
+
|
42 |
+
class TokenizerVersion(str, Enum):
|
43 |
+
v1 = "v1" # vocab_size = 32000
|
44 |
+
v2 = "v2" # vocab_size = 32768 with special control tokens [INST], [\INST]
|
45 |
+
v3 = "v3" # vocab_size = 32768 (spm) OR 128000 (tekken) with improved function calling
|
46 |
+
v7 = "v7" # vocab_size = 32768 (spm) or 128000 (tekken) with improved system prompt and function calling
|
47 |
+
|
48 |
+
|
49 |
+
class Tokenized(MistralBase):
|
50 |
+
"""
|
51 |
+
A tokenized InstructRequest
|
52 |
+
"""
|
53 |
+
|
54 |
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
55 |
+
tokens: List[int]
|
56 |
+
text: Optional[str] = None
|
57 |
+
prefix_ids: Optional[List[int]] = None
|
58 |
+
images: List[np.ndarray] = []
|
59 |
+
|
60 |
+
|
61 |
+
class Tokenizer(ABC):
|
62 |
+
@property
|
63 |
+
@abstractmethod
|
64 |
+
def n_words(self) -> int:
|
65 |
+
"""Vocabulary size"""
|
66 |
+
|
67 |
+
@abstractmethod
|
68 |
+
def vocab(self) -> List[str]:
|
69 |
+
"""All tokens in the vocabulary as strings"""
|
70 |
+
|
71 |
+
@abstractmethod
|
72 |
+
def id_to_piece(self, token_id: int) -> str:
|
73 |
+
"""Convert a token id to the token str"""
|
74 |
+
|
75 |
+
@property
|
76 |
+
@abstractmethod
|
77 |
+
def bos_id(self) -> int:
|
78 |
+
"""id of the Beginning of String token"""
|
79 |
+
|
80 |
+
@property
|
81 |
+
@abstractmethod
|
82 |
+
def eos_id(self) -> int:
|
83 |
+
"""id of the End of String token"""
|
84 |
+
|
85 |
+
@property
|
86 |
+
@abstractmethod
|
87 |
+
def pad_id(self) -> int:
|
88 |
+
"""id of the Pad token"""
|
89 |
+
|
90 |
+
@property
|
91 |
+
@abstractmethod
|
92 |
+
def unk_id(self) -> int:
|
93 |
+
"""id of the Unk token"""
|
94 |
+
|
95 |
+
@abstractmethod
|
96 |
+
def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
|
97 |
+
"""String to token ids"""
|
98 |
+
|
99 |
+
@abstractmethod
|
100 |
+
def decode(self, t: List[int]) -> str:
|
101 |
+
"""Token ids to string"""
|
102 |
+
|
103 |
+
@abstractmethod
|
104 |
+
def get_control_token(self, s: str) -> int:
|
105 |
+
"""Get the id of a control token"""
|
106 |
+
|
107 |
+
@property
|
108 |
+
@abstractmethod
|
109 |
+
def version(self) -> TokenizerVersion:
|
110 |
+
"""Get the version of the tokenizer"""
|
111 |
+
|
112 |
+
@abstractmethod
|
113 |
+
def to_string(self, tokens: List[int]) -> str:
|
114 |
+
"""Convert token ids to string"""
|
115 |
+
|
116 |
+
|
117 |
+
InstructRequestType = TypeVar("InstructRequestType", bound=InstructRequest)
|
118 |
+
FIMRequestType = TypeVar("FIMRequestType", bound=FIMRequest)
|
119 |
+
TokenizedType = TypeVar("TokenizedType", bound=Tokenized)
|
120 |
+
|
121 |
+
|
122 |
+
@dataclass
|
123 |
+
class ImageEncoding:
|
124 |
+
tokens: List[int]
|
125 |
+
image: np.ndarray
|
126 |
+
|
127 |
+
|
128 |
+
@dataclass
|
129 |
+
class SpecialImageIDs:
|
130 |
+
img: int
|
131 |
+
img_break: int
|
132 |
+
img_end: int
|
133 |
+
|
134 |
+
@staticmethod
|
135 |
+
def from_tokenizer(tokenizer: "Tokenizer") -> "SpecialImageIDs":
|
136 |
+
return SpecialImageIDs(
|
137 |
+
img=tokenizer.get_control_token(SpecialTokens.img.value),
|
138 |
+
img_break=tokenizer.get_control_token(SpecialTokens.img_break.value),
|
139 |
+
img_end=tokenizer.get_control_token(SpecialTokens.img_end.value),
|
140 |
+
)
|
141 |
+
|
142 |
+
|
143 |
+
class MultiModalEncoder(Protocol):
|
144 |
+
def __call__(self, content: Union[ImageChunk, ImageURLChunk]) -> ImageEncoding:
|
145 |
+
"""
|
146 |
+
Encode the given content.
|
147 |
+
|
148 |
+
Args:
|
149 |
+
content (ChunkContent): The content to be encoded.
|
150 |
+
|
151 |
+
Returns:
|
152 |
+
ImageEncoding: The encoded image content.
|
153 |
+
"""
|
154 |
+
...
|
155 |
+
|
156 |
+
@property
|
157 |
+
def image_token(self) -> int:
|
158 |
+
...
|
159 |
+
|
160 |
+
|
161 |
+
class InstructTokenizer(Generic[InstructRequestType, FIMRequestType, TokenizedType, AssistantMessageType]):
|
162 |
+
tokenizer: Tokenizer
|
163 |
+
mm_encoder: Optional[MultiModalEncoder]
|
164 |
+
|
165 |
+
def __init__(self, tokenizer: Tokenizer, mm_encoder: Optional[MultiModalEncoder]) -> None:
|
166 |
+
"""Init from tokenizer"""
|
167 |
+
|
168 |
+
@abstractmethod
|
169 |
+
def encode_instruct(self, request: InstructRequestType) -> TokenizedType:
|
170 |
+
"""Instruct request to Tokenized object"""
|
171 |
+
|
172 |
+
@abstractmethod
|
173 |
+
def decode(self, tokens: List[int]) -> str:
|
174 |
+
"""Convert token ids to string"""
|
175 |
+
|
176 |
+
@abstractmethod
|
177 |
+
def encode_fim(self, request: FIMRequestType) -> TokenizedType:
|
178 |
+
"""FIM request to Tokenized object"""
|
179 |
+
|
180 |
+
@abstractmethod
|
181 |
+
def encode_user_message(
|
182 |
+
self,
|
183 |
+
message: UserMessage,
|
184 |
+
available_tools: Optional[List[Tool]],
|
185 |
+
is_last: bool,
|
186 |
+
is_first: bool,
|
187 |
+
system_prompt: Optional[str] = None,
|
188 |
+
force_img_first: bool = False,
|
189 |
+
) -> Tuple[List[int], List[np.ndarray]]:
|
190 |
+
...
|
191 |
+
|
192 |
+
@abstractmethod
|
193 |
+
def encode_user_content(
|
194 |
+
self,
|
195 |
+
content: Union[str, List[ContentChunk]],
|
196 |
+
is_last: bool,
|
197 |
+
system_prompt: Optional[str] = None,
|
198 |
+
force_img_first: bool = False,
|
199 |
+
) -> Tuple[List[int], List[np.ndarray]]:
|
200 |
+
...
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/mistral.py
ADDED
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import warnings
|
2 |
+
from pathlib import Path
|
3 |
+
from typing import Callable, Dict, Generic, List, Optional, Union
|
4 |
+
|
5 |
+
from mistral_common.exceptions import (
|
6 |
+
TokenizerException,
|
7 |
+
)
|
8 |
+
from mistral_common.protocol.instruct.messages import (
|
9 |
+
UATS,
|
10 |
+
AssistantMessageType,
|
11 |
+
SystemMessageType,
|
12 |
+
ToolMessageType,
|
13 |
+
UserMessageType,
|
14 |
+
)
|
15 |
+
from mistral_common.protocol.instruct.normalize import InstructRequestNormalizer, normalizer_for_tokenizer_version
|
16 |
+
from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
17 |
+
from mistral_common.protocol.instruct.validator import (
|
18 |
+
MistralRequestValidator,
|
19 |
+
MistralRequestValidatorV3,
|
20 |
+
ValidationMode,
|
21 |
+
)
|
22 |
+
from mistral_common.tokens.instruct.request import FIMRequest
|
23 |
+
from mistral_common.tokens.tokenizers.base import (
|
24 |
+
InstructRequest,
|
25 |
+
InstructRequestType,
|
26 |
+
InstructTokenizer,
|
27 |
+
SpecialTokens,
|
28 |
+
TokenizedType,
|
29 |
+
TokenizerVersion,
|
30 |
+
)
|
31 |
+
from mistral_common.tokens.tokenizers.multimodal import (
|
32 |
+
ImageEncoder,
|
33 |
+
MultimodalConfig,
|
34 |
+
MultiModalEncoder,
|
35 |
+
SpecialImageIDs,
|
36 |
+
)
|
37 |
+
from mistral_common.tokens.tokenizers.sentencepiece import (
|
38 |
+
InstructTokenizerV1,
|
39 |
+
InstructTokenizerV2,
|
40 |
+
InstructTokenizerV3,
|
41 |
+
InstructTokenizerV7,
|
42 |
+
SentencePieceTokenizer,
|
43 |
+
get_mm_config,
|
44 |
+
is_sentencepiece,
|
45 |
+
)
|
46 |
+
from mistral_common.tokens.tokenizers.tekken import Tekkenizer, is_tekken
|
47 |
+
|
48 |
+
|
49 |
+
def load_mm_encoder(
|
50 |
+
mm_config: MultimodalConfig, tokenizer: Union[Tekkenizer, SentencePieceTokenizer]
|
51 |
+
) -> MultiModalEncoder:
|
52 |
+
special_ids = SpecialImageIDs(
|
53 |
+
img=tokenizer.get_control_token(SpecialTokens.img.value),
|
54 |
+
img_break=tokenizer.get_control_token(SpecialTokens.img_break.value),
|
55 |
+
img_end=tokenizer.get_control_token(SpecialTokens.img_end.value),
|
56 |
+
)
|
57 |
+
return ImageEncoder(mm_config, special_ids)
|
58 |
+
|
59 |
+
|
60 |
+
class MistralTokenizer(
|
61 |
+
Generic[UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType, TokenizedType]
|
62 |
+
):
|
63 |
+
def __init__(
|
64 |
+
self,
|
65 |
+
instruct_tokenizer: InstructTokenizer[InstructRequest, FIMRequest, TokenizedType, AssistantMessageType],
|
66 |
+
validator: MistralRequestValidator[UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType],
|
67 |
+
request_normalizer: InstructRequestNormalizer[
|
68 |
+
UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType, InstructRequestType
|
69 |
+
],
|
70 |
+
):
|
71 |
+
self._chat_completion_request_validator = validator
|
72 |
+
self._instruct_request_normalizer = request_normalizer
|
73 |
+
self.instruct_tokenizer = instruct_tokenizer
|
74 |
+
|
75 |
+
@classmethod
|
76 |
+
def _data_path(cls) -> Path:
|
77 |
+
return Path(__file__).parents[2] / "data"
|
78 |
+
|
79 |
+
@classmethod
|
80 |
+
def v1(cls) -> "MistralTokenizer":
|
81 |
+
"""open 7B x 8x7B + embed"""
|
82 |
+
return cls.from_file(str(cls._data_path() / "tokenizer.model.v1"), mode=ValidationMode.test)
|
83 |
+
|
84 |
+
@classmethod
|
85 |
+
def v2(cls) -> "MistralTokenizer":
|
86 |
+
"""mistral-small // mistral-large"""
|
87 |
+
return cls.from_file(
|
88 |
+
str(cls._data_path() / "mistral_instruct_tokenizer_240216.model.v2"), mode=ValidationMode.test
|
89 |
+
)
|
90 |
+
|
91 |
+
@classmethod
|
92 |
+
def v3(cls, is_tekken: bool = False, is_mm: bool = False) -> "MistralTokenizer":
|
93 |
+
"""open-mixtral-8x22B"""
|
94 |
+
if is_tekken and is_mm:
|
95 |
+
tokenizer_name = "tekken_240911.json"
|
96 |
+
elif is_tekken and not is_mm:
|
97 |
+
tokenizer_name = "tekken_240718.json"
|
98 |
+
elif not is_tekken and is_mm:
|
99 |
+
raise ValueError("Multimodal tokenizer is currently only supported for tekken")
|
100 |
+
else:
|
101 |
+
tokenizer_name = "mistral_instruct_tokenizer_240323.model.v3"
|
102 |
+
|
103 |
+
return cls.from_file(str(cls._data_path() / tokenizer_name), mode=ValidationMode.test)
|
104 |
+
|
105 |
+
@classmethod
|
106 |
+
def v7(cls, is_mm: bool = False) -> "MistralTokenizer":
|
107 |
+
"""mistral-large 2.1"""
|
108 |
+
if is_mm:
|
109 |
+
return cls.from_file(
|
110 |
+
str(cls._data_path() / "mistral_instruct_tokenizer_241114.model.v7m1"), mode=ValidationMode.test
|
111 |
+
)
|
112 |
+
else:
|
113 |
+
return cls.from_file(
|
114 |
+
str(cls._data_path() / "mistral_instruct_tokenizer_241114.model.v7"), mode=ValidationMode.test
|
115 |
+
)
|
116 |
+
|
117 |
+
@classmethod
|
118 |
+
def from_model(cls, model: str, strict: bool = False) -> "MistralTokenizer":
|
119 |
+
model_name_to_tokenizer_cls: Dict[str, Callable[[], MistralTokenizer]] = {
|
120 |
+
"ministral-8b-2410": lambda: MistralTokenizer.v3(is_tekken=True),
|
121 |
+
"mistral-tiny-2312": MistralTokenizer.v2,
|
122 |
+
"open-mistral-nemo-2407": lambda: MistralTokenizer.v3(is_tekken=True),
|
123 |
+
"mistral-tiny-2407": MistralTokenizer.v3,
|
124 |
+
"mistral-small-2312": MistralTokenizer.v2,
|
125 |
+
"open-mixtral-8x22b-2404": MistralTokenizer.v3,
|
126 |
+
"mistral-small-2402": MistralTokenizer.v2,
|
127 |
+
"mistral-small-2409": lambda: MistralTokenizer.v3(is_tekken=True),
|
128 |
+
"mistral-medium-2312": MistralTokenizer.v1,
|
129 |
+
"mistral-large-2402": MistralTokenizer.v2,
|
130 |
+
"mistral-large-2407": MistralTokenizer.v3,
|
131 |
+
"mistral-large-2411": MistralTokenizer.v7,
|
132 |
+
"pixtral-large-2411": lambda: MistralTokenizer.v7(is_mm=True),
|
133 |
+
"codestral-2405": MistralTokenizer.v3,
|
134 |
+
"codestral-mamba-2407": MistralTokenizer.v3,
|
135 |
+
"pixtral-12b-2409": lambda: MistralTokenizer.v3(is_tekken=True, is_mm=True),
|
136 |
+
# The following are deprecated - only left for backward comp. Delete in >= 1.6.0
|
137 |
+
"open-mistral-7b": MistralTokenizer.v1,
|
138 |
+
"open-mixtral-8x7b": MistralTokenizer.v1,
|
139 |
+
"mistral-embed": MistralTokenizer.v1,
|
140 |
+
"mistral-small-v1": MistralTokenizer.v2,
|
141 |
+
"mistral-large-v1": MistralTokenizer.v2,
|
142 |
+
"mistral-small": MistralTokenizer.v3,
|
143 |
+
"mistral-large": MistralTokenizer.v3,
|
144 |
+
"open-mixtral-8x22b": MistralTokenizer.v3,
|
145 |
+
"codestral-22b": MistralTokenizer.v3,
|
146 |
+
"mistral-nemo": lambda: MistralTokenizer.v3(is_tekken=True),
|
147 |
+
"pixtral": lambda: MistralTokenizer.v3(is_tekken=True, is_mm=True),
|
148 |
+
"pixtral-large": lambda: MistralTokenizer.v7(is_mm=True),
|
149 |
+
}
|
150 |
+
|
151 |
+
if not strict:
|
152 |
+
warnings.warn(
|
153 |
+
"Calling `MistralTokenizer.from_model(..., strict=False)` is deprecated as it can lead to incorrect "
|
154 |
+
"tokenizers. It is strongly recommended to use MistralTokenizer.from_model(..., strict=True)` "
|
155 |
+
"which will become the default in `mistral_common=1.6.0`."
|
156 |
+
"If you are using `mistral_common` for open-sourced model weights, we recommend using "
|
157 |
+
"`MistralTokenizer.from_file('<path/to/tokenizer/file>')` instead.",
|
158 |
+
FutureWarning,
|
159 |
+
)
|
160 |
+
|
161 |
+
# TODO(Delete this code in mistral_common >= 1.6.0
|
162 |
+
# Prefix search the model name mapping
|
163 |
+
for model_name, tokenizer_cls in model_name_to_tokenizer_cls.items():
|
164 |
+
if model_name in model.lower():
|
165 |
+
return tokenizer_cls()
|
166 |
+
|
167 |
+
if model not in model_name_to_tokenizer_cls:
|
168 |
+
raise TokenizerException(f"Unrecognized model: {model}")
|
169 |
+
|
170 |
+
return model_name_to_tokenizer_cls[model]()
|
171 |
+
|
172 |
+
@classmethod
|
173 |
+
def from_file(
|
174 |
+
cls,
|
175 |
+
tokenizer_filename: str,
|
176 |
+
mode: ValidationMode = ValidationMode.test,
|
177 |
+
) -> "MistralTokenizer":
|
178 |
+
"""
|
179 |
+
Depending on which model we are loading, tokenization and validation might be different. 💩
|
180 |
+
"""
|
181 |
+
tokenizer: Union[SentencePieceTokenizer, Tekkenizer]
|
182 |
+
|
183 |
+
if is_tekken(tokenizer_filename):
|
184 |
+
tokenizer = Tekkenizer.from_file(tokenizer_filename)
|
185 |
+
mm_config = tokenizer.multimodal
|
186 |
+
elif is_sentencepiece(tokenizer_filename):
|
187 |
+
tokenizer = SentencePieceTokenizer(tokenizer_filename)
|
188 |
+
mm_config = get_mm_config(tokenizer_filename)
|
189 |
+
else:
|
190 |
+
raise TokenizerException(f"Unrecognized tokenizer file: {tokenizer_filename}")
|
191 |
+
|
192 |
+
mm_encoder = load_mm_encoder(mm_config, tokenizer) if mm_config is not None else None
|
193 |
+
|
194 |
+
request_normalizer = normalizer_for_tokenizer_version(tokenizer.version)
|
195 |
+
|
196 |
+
if tokenizer.version == TokenizerVersion.v1:
|
197 |
+
assert mm_encoder is None, "Tokenizer version needs to be >= v3"
|
198 |
+
return MistralTokenizer(
|
199 |
+
InstructTokenizerV1(tokenizer),
|
200 |
+
validator=MistralRequestValidator(mode=mode),
|
201 |
+
request_normalizer=request_normalizer,
|
202 |
+
)
|
203 |
+
elif tokenizer.version == TokenizerVersion.v2:
|
204 |
+
assert mm_encoder is None, "Tokenizer version needs to be >= v3"
|
205 |
+
return MistralTokenizer(
|
206 |
+
InstructTokenizerV2(tokenizer),
|
207 |
+
validator=MistralRequestValidator(mode=mode),
|
208 |
+
request_normalizer=request_normalizer,
|
209 |
+
)
|
210 |
+
elif tokenizer.version == TokenizerVersion.v3:
|
211 |
+
return MistralTokenizer(
|
212 |
+
InstructTokenizerV3(tokenizer, mm_encoder=mm_encoder),
|
213 |
+
validator=MistralRequestValidatorV3(mode=mode),
|
214 |
+
request_normalizer=request_normalizer,
|
215 |
+
)
|
216 |
+
elif tokenizer.version == TokenizerVersion.v7:
|
217 |
+
return MistralTokenizer(
|
218 |
+
InstructTokenizerV7(tokenizer, mm_encoder=mm_encoder),
|
219 |
+
validator=MistralRequestValidatorV3(mode=mode),
|
220 |
+
request_normalizer=request_normalizer,
|
221 |
+
)
|
222 |
+
else:
|
223 |
+
raise TokenizerException(f"Unrecognized tokenizer filename: {tokenizer_filename}")
|
224 |
+
|
225 |
+
raise TokenizerException(f"Unrecognized tokenizer version: {tokenizer.version}")
|
226 |
+
|
227 |
+
def encode_chat_completion(
|
228 |
+
self, request: ChatCompletionRequest[UATS], max_model_input_len: Optional[int] = None
|
229 |
+
) -> TokenizedType:
|
230 |
+
validated_request = self._chat_completion_request_validator.validate_request(request)
|
231 |
+
|
232 |
+
if max_model_input_len is None and request.truncate_for_context_length:
|
233 |
+
# the max_model_input_len arg should not be optionnal ;
|
234 |
+
# but this function is used in many small scripts that have no use
|
235 |
+
# for truncation, and don't provide the max model len
|
236 |
+
raise TokenizerException(
|
237 |
+
"encoding a chat completion request with truncation, but no max model len was provided",
|
238 |
+
)
|
239 |
+
|
240 |
+
instruct_request = self._instruct_request_normalizer.from_chat_completion_request(validated_request)
|
241 |
+
|
242 |
+
if request.truncate_for_context_length:
|
243 |
+
instruct_request.truncate_at_max_tokens = max_model_input_len
|
244 |
+
|
245 |
+
return self.instruct_tokenizer.encode_instruct(instruct_request)
|
246 |
+
|
247 |
+
def encode_fim(self, request: FIMRequest) -> TokenizedType:
|
248 |
+
return self.instruct_tokenizer.encode_fim(request)
|
249 |
+
|
250 |
+
def decode(self, tokens: List[int]) -> str:
|
251 |
+
return self.instruct_tokenizer.decode(tokens)
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/multimodal.py
ADDED
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import logging
|
3 |
+
from dataclasses import dataclass
|
4 |
+
from enum import Enum
|
5 |
+
from io import BytesIO
|
6 |
+
from typing import Tuple, Union
|
7 |
+
|
8 |
+
import numpy as np
|
9 |
+
from PIL import Image
|
10 |
+
|
11 |
+
from mistral_common.multimodal import SerializableImage, download_image
|
12 |
+
from mistral_common.protocol.instruct.messages import ImageChunk, ImageURLChunk
|
13 |
+
from mistral_common.tokens.tokenizers.base import (
|
14 |
+
ImageEncoding,
|
15 |
+
MultiModalEncoder,
|
16 |
+
SpecialImageIDs,
|
17 |
+
)
|
18 |
+
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
|
22 |
+
_cv2_installed: bool
|
23 |
+
try:
|
24 |
+
import cv2
|
25 |
+
|
26 |
+
_cv2_installed = True
|
27 |
+
except ImportError:
|
28 |
+
_cv2_installed = False
|
29 |
+
except Exception as e:
|
30 |
+
# cv2 has lots of import problems: https://github.com/opencv/opencv-python/issues/884
|
31 |
+
# for better UX, let's simply skip all errors that might arise from import for now
|
32 |
+
logger.warning(
|
33 |
+
f"Warning: Your installation of OpenCV appears to be broken: {e}."
|
34 |
+
"Please follow the instructions at https://github.com/opencv/opencv-python/issues/884 "
|
35 |
+
"to correct your environment. The import of cv2 has been skipped."
|
36 |
+
)
|
37 |
+
|
38 |
+
|
39 |
+
def is_cv2_installed() -> bool:
|
40 |
+
return _cv2_installed
|
41 |
+
|
42 |
+
|
43 |
+
def image_from_chunk(chunk: Union[ImageURLChunk, ImageChunk]) -> SerializableImage:
|
44 |
+
"""Get a serializable image from a chunk."""
|
45 |
+
if isinstance(chunk, ImageChunk):
|
46 |
+
return chunk.image
|
47 |
+
if chunk.get_url().startswith("data:image"):
|
48 |
+
data = chunk.get_url().split(",")[1]
|
49 |
+
image_data = base64.b64decode(data)
|
50 |
+
return Image.open(BytesIO(image_data))
|
51 |
+
if chunk.get_url().startswith("http"):
|
52 |
+
return download_image(chunk.get_url())
|
53 |
+
|
54 |
+
raise RuntimeError(f"Unsupported image url scheme {chunk.get_url()}")
|
55 |
+
|
56 |
+
|
57 |
+
DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073) # RGB
|
58 |
+
DATASET_STD = (0.26862954, 0.26130258, 0.27577711) # RGB
|
59 |
+
|
60 |
+
|
61 |
+
# only relevant for spm
|
62 |
+
class MultiModalVersion(str, Enum):
|
63 |
+
m1 = "m1"
|
64 |
+
|
65 |
+
@property
|
66 |
+
def config(self) -> "MultimodalConfig":
|
67 |
+
if self.name == "m1":
|
68 |
+
return MultimodalConfig(16, 1024)
|
69 |
+
|
70 |
+
raise NotImplementedError(f"{self.name}")
|
71 |
+
|
72 |
+
|
73 |
+
@dataclass
|
74 |
+
class MultimodalConfig:
|
75 |
+
image_patch_size: int
|
76 |
+
max_image_size: int
|
77 |
+
|
78 |
+
|
79 |
+
def _convert_to_rgb(image: Image.Image) -> Image.Image:
|
80 |
+
"""
|
81 |
+
Convert a PIL image to RGB.
|
82 |
+
We ensure transparent background becomes white.
|
83 |
+
"""
|
84 |
+
if image.mode == "RGB":
|
85 |
+
return image
|
86 |
+
if image.mode != "RGBA":
|
87 |
+
image = image.convert("RGBA")
|
88 |
+
white_bg: Image.Image = Image.new("RGBA", image.size, "WHITE")
|
89 |
+
white_bg.paste(image, (0, 0), image)
|
90 |
+
return white_bg.convert("RGB")
|
91 |
+
|
92 |
+
|
93 |
+
def normalize(
|
94 |
+
np_image: np.ndarray,
|
95 |
+
mean: Tuple[float, float, float],
|
96 |
+
std: Tuple[float, float, float],
|
97 |
+
) -> np.ndarray:
|
98 |
+
"""
|
99 |
+
Normalize a tensor image with mean and standard deviation.
|
100 |
+
|
101 |
+
Args:
|
102 |
+
image (np.ndarray): Image to be normalized.
|
103 |
+
mean (tuple[float, float, float]): Mean for each channel.
|
104 |
+
std (tuple[float, float, float]): Standard deviation for each channel.
|
105 |
+
|
106 |
+
Returns:
|
107 |
+
np.ndarray: Normalized image with shape (C, H, W).
|
108 |
+
"""
|
109 |
+
np_image = np_image / 255.0
|
110 |
+
|
111 |
+
assert len(np_image.shape) == 3, f"{np_image.shape=}"
|
112 |
+
assert np_image.shape[2] == len(mean) == len(std), f"{np_image.shape=}, {mean=}, {std=}"
|
113 |
+
|
114 |
+
np_image = (np_image - mean) / std
|
115 |
+
|
116 |
+
return np_image.transpose(2, 0, 1)
|
117 |
+
|
118 |
+
|
119 |
+
def transform_image(image: Image.Image, new_size: Tuple[int, int]) -> np.ndarray:
|
120 |
+
if not is_cv2_installed():
|
121 |
+
raise ImportError("OpenCV is required for this function. Install it with 'pip install mistral_common[opencv]'")
|
122 |
+
|
123 |
+
np_image = cv2.resize(np.array(_convert_to_rgb(image), dtype=np.float32), new_size, interpolation=cv2.INTER_CUBIC)
|
124 |
+
return normalize(np_image, DATASET_MEAN, DATASET_STD)
|
125 |
+
|
126 |
+
|
127 |
+
class ImageEncoder(MultiModalEncoder):
|
128 |
+
def __init__(self, mm_config: MultimodalConfig, special_ids: SpecialImageIDs) -> None:
|
129 |
+
self.mm_config = mm_config
|
130 |
+
self.special_ids = special_ids
|
131 |
+
|
132 |
+
def _image_to_num_tokens(self, img: Image.Image) -> Tuple[int, int]:
|
133 |
+
w: Union[int, float]
|
134 |
+
h: Union[int, float]
|
135 |
+
|
136 |
+
w, h = img.size
|
137 |
+
ratio = max(h / self.mm_config.max_image_size, w / self.mm_config.max_image_size)
|
138 |
+
if ratio > 1:
|
139 |
+
w = round(w / ratio)
|
140 |
+
h = round(h / ratio)
|
141 |
+
|
142 |
+
width_tokens = (w - 1) // self.mm_config.image_patch_size + 1
|
143 |
+
height_tokens = (h - 1) // self.mm_config.image_patch_size + 1
|
144 |
+
|
145 |
+
return width_tokens, height_tokens
|
146 |
+
|
147 |
+
def __call__(self, content: Union[ImageChunk, ImageURLChunk]) -> ImageEncoding:
|
148 |
+
"""
|
149 |
+
Converts ImageChunks to numpy image arrays and image token ids
|
150 |
+
|
151 |
+
Args:
|
152 |
+
image (ImageChunk, ImageURLChunk): ImageChunk to be converted
|
153 |
+
|
154 |
+
Returns:
|
155 |
+
ImageEncoding containing image token ids and processed image in numpy format
|
156 |
+
"""
|
157 |
+
image = image_from_chunk(content)
|
158 |
+
w, h = self._image_to_num_tokens(image)
|
159 |
+
assert w > 0
|
160 |
+
assert h > 0
|
161 |
+
image_tokens = ([self.special_ids.img] * w + [self.special_ids.img_break]) * h
|
162 |
+
image_tokens[-1] = self.special_ids.img_end
|
163 |
+
new_image_size = (
|
164 |
+
w * self.mm_config.image_patch_size,
|
165 |
+
h * self.mm_config.image_patch_size,
|
166 |
+
)
|
167 |
+
processed_image = transform_image(image, new_image_size)
|
168 |
+
return ImageEncoding(tokens=image_tokens, image=processed_image)
|
169 |
+
|
170 |
+
@property
|
171 |
+
def image_token(self) -> int:
|
172 |
+
return self.special_ids.img
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/sentencepiece.py
ADDED
@@ -0,0 +1,672 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import logging
|
3 |
+
import os
|
4 |
+
from abc import abstractmethod
|
5 |
+
from functools import cached_property
|
6 |
+
from pathlib import Path
|
7 |
+
from typing import Any, Dict, Generic, List, Optional, Set, Tuple, Union
|
8 |
+
|
9 |
+
import numpy as np
|
10 |
+
from sentencepiece import SentencePieceProcessor
|
11 |
+
|
12 |
+
from mistral_common.exceptions import TokenizerException
|
13 |
+
from mistral_common.protocol.instruct.messages import (
|
14 |
+
AssistantMessage,
|
15 |
+
AssistantMessageType,
|
16 |
+
ContentChunk,
|
17 |
+
SystemMessage,
|
18 |
+
TextChunk,
|
19 |
+
ToolMessage,
|
20 |
+
UserMessage,
|
21 |
+
)
|
22 |
+
from mistral_common.protocol.instruct.tool_calls import Tool, ToolCall
|
23 |
+
from mistral_common.tokens.instruct.request import FIMRequest, InstructRequest
|
24 |
+
from mistral_common.tokens.tokenizers.base import (
|
25 |
+
FIMRequestType,
|
26 |
+
InstructRequestType,
|
27 |
+
InstructTokenizer,
|
28 |
+
SpecialTokens,
|
29 |
+
Tokenized,
|
30 |
+
TokenizedType,
|
31 |
+
Tokenizer,
|
32 |
+
TokenizerVersion,
|
33 |
+
)
|
34 |
+
from mistral_common.tokens.tokenizers.multimodal import MultimodalConfig, MultiModalEncoder, MultiModalVersion
|
35 |
+
|
36 |
+
|
37 |
+
def is_sentencepiece(path: Union[str, Path]) -> bool:
|
38 |
+
if isinstance(path, str):
|
39 |
+
path = Path(path)
|
40 |
+
|
41 |
+
instruct_versions = list(TokenizerVersion.__members__)
|
42 |
+
mm_versions = list(MultiModalVersion.__members__) + [""] # allow no mm version
|
43 |
+
suffixes = [f".model.{v}{m}" for v in instruct_versions for m in mm_versions] + [".model"]
|
44 |
+
|
45 |
+
return path.is_file() and any(path.name.endswith(suffix) for suffix in suffixes)
|
46 |
+
|
47 |
+
|
48 |
+
def get_spm_version(tokenizer_filename: str, raise_deprecated: bool = False) -> TokenizerVersion:
|
49 |
+
_version_str = tokenizer_filename.split(".")[-1].split("m")[0]
|
50 |
+
if _version_str == "model":
|
51 |
+
if raise_deprecated:
|
52 |
+
raise TokenizerException(f"Make sure to rename your tokenizer file to end with {tokenizer_filename}.v1.")
|
53 |
+
|
54 |
+
# tokenizer.model => tokenizer.model.v1
|
55 |
+
return TokenizerVersion("v1")
|
56 |
+
|
57 |
+
if _version_str not in TokenizerVersion.__members__:
|
58 |
+
raise TokenizerException(f"Unrecognized tokenizer filename: {tokenizer_filename}")
|
59 |
+
|
60 |
+
return TokenizerVersion(_version_str)
|
61 |
+
|
62 |
+
|
63 |
+
def get_mm_config(tokenizer_filename: str) -> Optional[MultimodalConfig]:
|
64 |
+
_version_str = tokenizer_filename.split(".")[-1]
|
65 |
+
if "m" not in _version_str:
|
66 |
+
return None
|
67 |
+
|
68 |
+
_mm_version_str = "m" + _version_str.split("m")[-1]
|
69 |
+
|
70 |
+
if _mm_version_str not in MultiModalVersion.__members__:
|
71 |
+
raise TokenizerException(f"Unrecognized tokenizer filename: {tokenizer_filename}")
|
72 |
+
|
73 |
+
return MultiModalVersion(_mm_version_str).config
|
74 |
+
|
75 |
+
|
76 |
+
class SentencePieceTokenizer(Tokenizer):
|
77 |
+
def __init__(self, model_path: str, tokenizer_version: Optional[TokenizerVersion] = None) -> None:
|
78 |
+
self._logger = logging.getLogger(self.__class__.__name__)
|
79 |
+
# reload tokenizer
|
80 |
+
assert os.path.isfile(model_path), model_path
|
81 |
+
self._model = SentencePieceProcessor(model_file=model_path)
|
82 |
+
|
83 |
+
assert self._model.vocab_size() == self._model.get_piece_size()
|
84 |
+
self._vocab = [self._model.id_to_piece(i) for i in range(self.n_words)]
|
85 |
+
|
86 |
+
self._version: TokenizerVersion = tokenizer_version or get_spm_version(model_path, raise_deprecated=False)
|
87 |
+
|
88 |
+
super().__init__()
|
89 |
+
|
90 |
+
@property
|
91 |
+
def version(self) -> TokenizerVersion:
|
92 |
+
return self._version
|
93 |
+
|
94 |
+
def get_control_token(self, s: str) -> int:
|
95 |
+
return self._model.piece_to_id(s) # type: ignore
|
96 |
+
|
97 |
+
@property
|
98 |
+
def n_words(self) -> int:
|
99 |
+
return self._model.vocab_size() # type: ignore
|
100 |
+
|
101 |
+
def vocab(self) -> List[str]:
|
102 |
+
return self._vocab
|
103 |
+
|
104 |
+
@property
|
105 |
+
def bos_id(self) -> int:
|
106 |
+
return self._model.bos_id() # type: ignore
|
107 |
+
|
108 |
+
@property
|
109 |
+
def eos_id(self) -> int:
|
110 |
+
return self._model.eos_id() # type: ignore
|
111 |
+
|
112 |
+
@cached_property
|
113 |
+
def _control_tokens(self) -> Set[int]:
|
114 |
+
return {tok for tok in range(self.n_words) if self._model.IsControl(tok)}
|
115 |
+
|
116 |
+
def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
|
117 |
+
assert isinstance(s, str)
|
118 |
+
t: List[int] = self._model.encode(s)
|
119 |
+
if bos:
|
120 |
+
t = [self.bos_id, *t]
|
121 |
+
if eos:
|
122 |
+
t = [*t, self.eos_id]
|
123 |
+
return t
|
124 |
+
|
125 |
+
def decode(self, t: List[int]) -> str:
|
126 |
+
return self._model.decode(t) # type: ignore
|
127 |
+
|
128 |
+
def id_to_piece(self, token_id: int) -> str:
|
129 |
+
return self._model.id_to_piece(token_id) # type: ignore
|
130 |
+
|
131 |
+
def to_string(self, tokens: List[int]) -> str:
|
132 |
+
"""
|
133 |
+
Converts tokens into a string for debugging purposes
|
134 |
+
"""
|
135 |
+
text = ""
|
136 |
+
curr_tokens: List[int] = []
|
137 |
+
for tok in tokens:
|
138 |
+
if tok in self._control_tokens:
|
139 |
+
if curr_tokens:
|
140 |
+
text += "".join([self.id_to_piece(tok) for tok in curr_tokens])
|
141 |
+
curr_tokens = []
|
142 |
+
|
143 |
+
text += self.id_to_piece(tok)
|
144 |
+
|
145 |
+
else:
|
146 |
+
curr_tokens.append(tok)
|
147 |
+
|
148 |
+
if curr_tokens:
|
149 |
+
text += "".join([self.id_to_piece(tok) for tok in curr_tokens])
|
150 |
+
|
151 |
+
return text
|
152 |
+
|
153 |
+
@property
|
154 |
+
def pad_id(self) -> int:
|
155 |
+
return self._model.pad_id() # type: ignore
|
156 |
+
|
157 |
+
@property
|
158 |
+
def unk_id(self) -> int:
|
159 |
+
return self._model.unk_id() # type: ignore
|
160 |
+
|
161 |
+
|
162 |
+
class InstructTokenizerBase(
|
163 |
+
InstructTokenizer, Generic[InstructRequestType, FIMRequestType, TokenizedType, AssistantMessageType]
|
164 |
+
):
|
165 |
+
def __init__(self, tokenizer: Tokenizer, mm_encoder: Optional[MultiModalEncoder] = None):
|
166 |
+
self.tokenizer = tokenizer
|
167 |
+
self.mm_encoder = mm_encoder
|
168 |
+
super().__init__(tokenizer, mm_encoder)
|
169 |
+
|
170 |
+
def start(self) -> List[int]:
|
171 |
+
return [self.tokenizer.bos_id]
|
172 |
+
|
173 |
+
@staticmethod
|
174 |
+
def find_first_last_user(request: InstructRequest) -> Tuple[int, int]:
|
175 |
+
# find last user message
|
176 |
+
last_user_idx = -1
|
177 |
+
first_user_idx = -1
|
178 |
+
for i, msg in list(enumerate(request.messages)):
|
179 |
+
if isinstance(msg, UserMessage):
|
180 |
+
if first_user_idx == -1:
|
181 |
+
first_user_idx = i
|
182 |
+
last_user_idx = i
|
183 |
+
return first_user_idx, last_user_idx
|
184 |
+
|
185 |
+
@abstractmethod
|
186 |
+
def encode_tool_message(self, message: ToolMessage, is_before_last_user_message: bool) -> List[int]:
|
187 |
+
raise NotImplementedError("Tool message not implemented")
|
188 |
+
|
189 |
+
@abstractmethod
|
190 |
+
def encode_assistant_message(self, message: AssistantMessageType, is_before_last_user_message: bool) -> List[int]:
|
191 |
+
raise NotImplementedError("Assistant message not implemented")
|
192 |
+
|
193 |
+
def _truncate_for_max_tokens(
|
194 |
+
self,
|
195 |
+
tokenized: List[Optional[List[int]]],
|
196 |
+
messages: List[AssistantMessageType],
|
197 |
+
max_tokens: int,
|
198 |
+
last_user_message_index: int,
|
199 |
+
) -> None:
|
200 |
+
# Tokenizer ⩽ V3 does not support truncation
|
201 |
+
return
|
202 |
+
|
203 |
+
def encode_instruct(
|
204 |
+
self,
|
205 |
+
request: InstructRequest[AssistantMessageType, Tool],
|
206 |
+
) -> Tokenized:
|
207 |
+
# init at bos
|
208 |
+
images: List[np.ndarray] = []
|
209 |
+
prefix_ids: Optional[List[int]] = None
|
210 |
+
tokens_list: List[Optional[List[int]]] = []
|
211 |
+
|
212 |
+
# find last user message
|
213 |
+
first_user_idx, last_user_idx = self.find_first_last_user(request)
|
214 |
+
for msg_idx, msg in enumerate(request.messages):
|
215 |
+
if isinstance(msg, UserMessage):
|
216 |
+
new_tokens, new_images = self.encode_user_message(
|
217 |
+
msg,
|
218 |
+
request.available_tools,
|
219 |
+
msg_idx == last_user_idx,
|
220 |
+
msg_idx == first_user_idx,
|
221 |
+
system_prompt=request.system_prompt,
|
222 |
+
force_img_first=True, # img is always first when providing text/img chunk pair
|
223 |
+
)
|
224 |
+
images.extend(new_images)
|
225 |
+
elif isinstance(msg, ToolMessage):
|
226 |
+
new_tokens = self.encode_tool_message(msg, msg_idx < last_user_idx)
|
227 |
+
elif isinstance(msg, AssistantMessage):
|
228 |
+
new_tokens = self.encode_assistant_message(msg, msg_idx < last_user_idx)
|
229 |
+
if msg_idx == len(request.messages) - 1:
|
230 |
+
prefix_ids = new_tokens
|
231 |
+
elif isinstance(msg, SystemMessage):
|
232 |
+
new_tokens = self.encode_system_message(msg)
|
233 |
+
|
234 |
+
tokens_list.append(new_tokens)
|
235 |
+
|
236 |
+
if request.truncate_at_max_tokens is not None:
|
237 |
+
self._truncate_for_max_tokens(
|
238 |
+
tokens_list,
|
239 |
+
request.messages,
|
240 |
+
request.truncate_at_max_tokens,
|
241 |
+
last_user_idx,
|
242 |
+
)
|
243 |
+
tokens = self.start()
|
244 |
+
|
245 |
+
for tok in tokens_list:
|
246 |
+
if tok is not None:
|
247 |
+
tokens.extend(tok)
|
248 |
+
|
249 |
+
return Tokenized(
|
250 |
+
tokens=tokens,
|
251 |
+
text=self.tokenizer.to_string(tokens),
|
252 |
+
prefix_ids=prefix_ids,
|
253 |
+
images=images,
|
254 |
+
)
|
255 |
+
|
256 |
+
def decode(self, tokens: List[int]) -> str:
|
257 |
+
return self.tokenizer.decode(tokens)
|
258 |
+
|
259 |
+
|
260 |
+
class InstructTokenizerV1(
|
261 |
+
InstructTokenizerBase, Generic[InstructRequestType, FIMRequestType, TokenizedType, AssistantMessageType]
|
262 |
+
):
|
263 |
+
def encode_user_message(
|
264 |
+
self,
|
265 |
+
message: UserMessage,
|
266 |
+
available_tools: Optional[List[Tool]],
|
267 |
+
is_last: bool,
|
268 |
+
is_first: bool,
|
269 |
+
system_prompt: Optional[str] = None,
|
270 |
+
force_img_first: bool = False,
|
271 |
+
) -> Tuple[List[int], List[np.ndarray]]:
|
272 |
+
assert message.content is not None
|
273 |
+
assert isinstance(message.content, str), "Message content must be normalized"
|
274 |
+
assert self.mm_encoder is None, "InstructTokenizerV1 cannot encode images"
|
275 |
+
|
276 |
+
content = ""
|
277 |
+
if is_first and system_prompt:
|
278 |
+
content = system_prompt + "\n\n" + message.content
|
279 |
+
else:
|
280 |
+
content = message.content
|
281 |
+
|
282 |
+
message_txt = f"[INST] {content} [/INST]"
|
283 |
+
curr_tokens, image_tokens = self.encode_user_content(content=message_txt, is_last=False, system_prompt=None)
|
284 |
+
return curr_tokens, image_tokens
|
285 |
+
|
286 |
+
def encode_user_content(
|
287 |
+
self,
|
288 |
+
content: Union[str, List[ContentChunk]],
|
289 |
+
is_last: bool,
|
290 |
+
system_prompt: Optional[str] = None,
|
291 |
+
force_img_first: bool = False,
|
292 |
+
) -> Tuple[List[int], List[np.ndarray]]:
|
293 |
+
assert isinstance(content, str)
|
294 |
+
|
295 |
+
if is_last and system_prompt:
|
296 |
+
content = system_prompt + "\n\n" + content
|
297 |
+
|
298 |
+
tokens = self.tokenizer.encode(content, bos=False, eos=False)
|
299 |
+
return tokens, []
|
300 |
+
|
301 |
+
def encode_tool_message(self, message: ToolMessage, is_before_last_user_message: bool) -> List[int]:
|
302 |
+
raise TokenizerException("Tools not implemented for tokenizer V1")
|
303 |
+
|
304 |
+
def encode_assistant_message(self, message: AssistantMessageType, is_before_last_user_message: bool) -> List[int]:
|
305 |
+
assert isinstance(message, AssistantMessage), message
|
306 |
+
if message.tool_calls is not None and len(message.tool_calls) > 0:
|
307 |
+
raise TokenizerException("Tools not implemented for tokenizer V1")
|
308 |
+
elif message.content:
|
309 |
+
curr_tokens = self.tokenizer.encode(message.content, bos=False, eos=False)
|
310 |
+
else:
|
311 |
+
raise TokenizerException(f"{message.content} // {message.tool_calls}")
|
312 |
+
if not message.prefix:
|
313 |
+
curr_tokens.append(self.tokenizer.eos_id)
|
314 |
+
return curr_tokens
|
315 |
+
|
316 |
+
def encode_fim(self, request: FIMRequest) -> Tokenized:
|
317 |
+
raise TokenizerException("FIM not available for tokenizer V1")
|
318 |
+
|
319 |
+
|
320 |
+
class InstructTokenizerV2(
|
321 |
+
InstructTokenizerV1, Generic[InstructRequestType, FIMRequestType, TokenizedType, AssistantMessageType]
|
322 |
+
):
|
323 |
+
def __init__(self, tokenizer: Tokenizer, mm_encoder: Optional[MultiModalEncoder] = None):
|
324 |
+
super().__init__(tokenizer, mm_encoder)
|
325 |
+
self.BEGIN_INST = self.tokenizer.get_control_token(SpecialTokens.begin_inst.value)
|
326 |
+
self.END_INST = self.tokenizer.get_control_token(SpecialTokens.end_inst.value)
|
327 |
+
self.BEGIN_AVAILABLE_TOOLS = self.tokenizer.get_control_token(SpecialTokens.begin_tools.value)
|
328 |
+
self.END_AVAILABLE_TOOLS = self.tokenizer.get_control_token(SpecialTokens.end_tools.value)
|
329 |
+
self.BEGIN_TOOL_RESULTS = self.tokenizer.get_control_token(SpecialTokens.begin_tool_results.value)
|
330 |
+
self.END_TOOL_RESULTS = self.tokenizer.get_control_token(SpecialTokens.end_tool_results.value)
|
331 |
+
self.TOOL_CALLS = self.tokenizer.get_control_token(SpecialTokens.tool_calls.value)
|
332 |
+
self.BOS = self.tokenizer.get_control_token(SpecialTokens.bos.value)
|
333 |
+
self.PREFIX = self.tokenizer.get_control_token(SpecialTokens.prefix.value)
|
334 |
+
self.SUFFIX = self.tokenizer.get_control_token(SpecialTokens.suffix.value)
|
335 |
+
|
336 |
+
def encode_user_message(
|
337 |
+
self,
|
338 |
+
message: UserMessage,
|
339 |
+
available_tools: Optional[List[Tool]],
|
340 |
+
is_last: bool,
|
341 |
+
is_first: bool,
|
342 |
+
system_prompt: Optional[str] = None,
|
343 |
+
force_img_first: bool = False,
|
344 |
+
) -> Tuple[List[int], List[np.ndarray]]:
|
345 |
+
assert message.content is not None
|
346 |
+
tools_tokens: List[int] = []
|
347 |
+
if is_last and available_tools:
|
348 |
+
tools = [tool.model_dump() for tool in available_tools]
|
349 |
+
tools_json_tokens = self.tokenizer.encode(json.dumps(tools, ensure_ascii=False), bos=False, eos=False)
|
350 |
+
tools_tokens = [
|
351 |
+
self.BEGIN_AVAILABLE_TOOLS,
|
352 |
+
*tools_json_tokens,
|
353 |
+
self.END_AVAILABLE_TOOLS,
|
354 |
+
]
|
355 |
+
|
356 |
+
tokens, image_tokens = self.encode_user_content(
|
357 |
+
content=message.content,
|
358 |
+
is_last=is_last,
|
359 |
+
system_prompt=system_prompt,
|
360 |
+
force_img_first=force_img_first,
|
361 |
+
)
|
362 |
+
|
363 |
+
prefix_tokens = [*tools_tokens, self.BEGIN_INST]
|
364 |
+
suffix_tokens = [self.END_INST]
|
365 |
+
|
366 |
+
curr_tokens = prefix_tokens + tokens + suffix_tokens
|
367 |
+
|
368 |
+
return curr_tokens, image_tokens
|
369 |
+
|
370 |
+
def _parse_json_content(self, content: str) -> Any:
|
371 |
+
try:
|
372 |
+
return json.loads(content)
|
373 |
+
except json.JSONDecodeError:
|
374 |
+
return content
|
375 |
+
|
376 |
+
def _prepare_tool_result(self, tool_message: ToolMessage) -> Dict[str, Any]:
|
377 |
+
"""
|
378 |
+
Bit of a hack due to the way tool results are tokenized
|
379 |
+
"""
|
380 |
+
assert tool_message.content is not None, "Tool message content cannot be None"
|
381 |
+
return {
|
382 |
+
"name": tool_message.name,
|
383 |
+
"content": self._parse_json_content(tool_message.content),
|
384 |
+
}
|
385 |
+
|
386 |
+
def encode_tool_message(self, message: ToolMessage, is_before_last_user_message: bool) -> List[int]:
|
387 |
+
if is_before_last_user_message:
|
388 |
+
# don't tokenize last tool response before last user msg
|
389 |
+
return []
|
390 |
+
|
391 |
+
# Currently only supports single tool results
|
392 |
+
tool_result_str = json.dumps([self._prepare_tool_result(message)], ensure_ascii=False)
|
393 |
+
curr_tokens = [
|
394 |
+
self.BEGIN_TOOL_RESULTS,
|
395 |
+
*self.tokenizer.encode(tool_result_str, bos=False, eos=False),
|
396 |
+
self.END_TOOL_RESULTS,
|
397 |
+
]
|
398 |
+
return curr_tokens
|
399 |
+
|
400 |
+
def _prepare_function_call(self, tool_call: ToolCall) -> Dict[str, Any]:
|
401 |
+
"""
|
402 |
+
Bit of a hack due to the way function calls are tokenized
|
403 |
+
"""
|
404 |
+
return {
|
405 |
+
"name": tool_call.function.name,
|
406 |
+
"arguments": self._parse_json_content(tool_call.function.arguments),
|
407 |
+
}
|
408 |
+
|
409 |
+
def _encode_normal_content_assistant_message(self, message: AssistantMessageType) -> List[int]:
|
410 |
+
assert message.content, f"Assistant message must have content. Got {message}"
|
411 |
+
return self.tokenizer.encode(message.content.rstrip(" "), bos=False, eos=False)
|
412 |
+
|
413 |
+
def _encode_tool_calls_in_assistant_message(self, message: AssistantMessageType) -> List[int]:
|
414 |
+
assert message.tool_calls, f"Assistant message must have tool calls. Got {message}"
|
415 |
+
prepared_tool_calls = []
|
416 |
+
for tool_call in message.tool_calls:
|
417 |
+
prepared_tool_calls.append(self._prepare_function_call(tool_call))
|
418 |
+
tool_call_str = json.dumps(prepared_tool_calls, ensure_ascii=False)
|
419 |
+
curr_tokens = [
|
420 |
+
self.TOOL_CALLS,
|
421 |
+
*self.tokenizer.encode(tool_call_str, bos=False, eos=False),
|
422 |
+
]
|
423 |
+
return curr_tokens
|
424 |
+
|
425 |
+
def encode_assistant_message(self, message: AssistantMessageType, is_before_last_user_message: bool) -> List[int]:
|
426 |
+
if message.tool_calls:
|
427 |
+
if is_before_last_user_message:
|
428 |
+
# don't tokenize tool call before last user message
|
429 |
+
return []
|
430 |
+
curr_tokens = self._encode_tool_calls_in_assistant_message(message)
|
431 |
+
elif message.content:
|
432 |
+
curr_tokens = self._encode_normal_content_assistant_message(message)
|
433 |
+
else:
|
434 |
+
raise TokenizerException(f"Invalid assistant message: {message.content}")
|
435 |
+
if not message.prefix:
|
436 |
+
curr_tokens.append(self.tokenizer.eos_id)
|
437 |
+
return curr_tokens
|
438 |
+
|
439 |
+
def _encode_infilling(self, text: str) -> List[int]:
|
440 |
+
"""
|
441 |
+
Remove prefix space in the case of SentencePieceTokenizers
|
442 |
+
Thanks Fabian !
|
443 |
+
"""
|
444 |
+
|
445 |
+
return self.tokenizer.encode("☺" + text, bos=False, eos=False)[2:]
|
446 |
+
|
447 |
+
def encode_fim(self, request: FIMRequest) -> Tokenized:
|
448 |
+
prefix_tokens = self.tokenizer.encode(request.prompt, bos=False, eos=False)
|
449 |
+
suffix_tokens = self._encode_infilling(request.suffix) if request.suffix else []
|
450 |
+
tokens = [
|
451 |
+
self.BOS,
|
452 |
+
self.SUFFIX,
|
453 |
+
*suffix_tokens,
|
454 |
+
self.PREFIX,
|
455 |
+
*prefix_tokens,
|
456 |
+
]
|
457 |
+
return Tokenized(tokens=tokens, text=self.tokenizer.to_string(tokens))
|
458 |
+
|
459 |
+
|
460 |
+
class InstructTokenizerV3(
|
461 |
+
InstructTokenizerV2, Generic[InstructRequestType, FIMRequestType, TokenizedType, AssistantMessageType]
|
462 |
+
):
|
463 |
+
"""
|
464 |
+
The only difference with V3 tokenizer is that it encodes the tool messages differently
|
465 |
+
"""
|
466 |
+
|
467 |
+
def __init__(self, tokenizer: Tokenizer, mm_encoder: Optional[MultiModalEncoder] = None) -> None:
|
468 |
+
super().__init__(tokenizer, mm_encoder=mm_encoder)
|
469 |
+
|
470 |
+
def _prepare_function_call(self, tool_call: ToolCall) -> Dict[str, Any]:
|
471 |
+
function_call = {
|
472 |
+
"name": tool_call.function.name,
|
473 |
+
"arguments": self._parse_json_content(tool_call.function.arguments),
|
474 |
+
}
|
475 |
+
|
476 |
+
if tool_call.id and tool_call.id != "null":
|
477 |
+
function_call["id"] = tool_call.id
|
478 |
+
|
479 |
+
return function_call
|
480 |
+
|
481 |
+
def _prepare_tool_result(self, tool_message: ToolMessage) -> Dict[str, Any]:
|
482 |
+
assert tool_message.content is not None, "Tool message content cannot be None"
|
483 |
+
assert tool_message.tool_call_id is not None, "Tool message has to have the tool call id defined in v3"
|
484 |
+
|
485 |
+
return {
|
486 |
+
"content": self._parse_json_content(tool_message.content),
|
487 |
+
"call_id": tool_message.tool_call_id,
|
488 |
+
}
|
489 |
+
|
490 |
+
def encode_tool_message(self, message: ToolMessage, is_before_last_user_message: bool) -> List[int]:
|
491 |
+
"""
|
492 |
+
Same as V2 but tools not wrapped in a list and history is tokenized also
|
493 |
+
"""
|
494 |
+
tool_result_str = json.dumps(self._prepare_tool_result(message), ensure_ascii=False)
|
495 |
+
curr_tokens = [
|
496 |
+
self.BEGIN_TOOL_RESULTS,
|
497 |
+
*self.tokenizer.encode(tool_result_str, bos=False, eos=False),
|
498 |
+
self.END_TOOL_RESULTS,
|
499 |
+
]
|
500 |
+
return curr_tokens
|
501 |
+
|
502 |
+
def encode_assistant_message(self, message: AssistantMessageType, is_before_last_user_message: bool) -> List[int]:
|
503 |
+
"""
|
504 |
+
Same as V2 but always encode tool history
|
505 |
+
"""
|
506 |
+
return super().encode_assistant_message(message, False)
|
507 |
+
|
508 |
+
def encode_user_content(
|
509 |
+
self,
|
510 |
+
content: Union[str, List[ContentChunk]],
|
511 |
+
is_last: bool,
|
512 |
+
system_prompt: Optional[str] = None,
|
513 |
+
force_img_first: bool = False,
|
514 |
+
) -> Tuple[List[int], List[np.ndarray]]:
|
515 |
+
if isinstance(content, str):
|
516 |
+
return super().encode_user_content(content, is_last, system_prompt)
|
517 |
+
|
518 |
+
tokens: List[int] = []
|
519 |
+
images: List[np.ndarray] = []
|
520 |
+
|
521 |
+
has_one_img_one_text_first = (
|
522 |
+
len(content) == 2 and isinstance(content[0], TextChunk) and not isinstance(content[1], TextChunk)
|
523 |
+
)
|
524 |
+
if force_img_first and has_one_img_one_text_first:
|
525 |
+
# make sure that if exactly one image and text chunk are passed we force the image chunk to be first
|
526 |
+
content = [content[1], content[0]]
|
527 |
+
|
528 |
+
first_chunk = True
|
529 |
+
for chunk in content:
|
530 |
+
content = ""
|
531 |
+
if first_chunk and is_last and system_prompt:
|
532 |
+
first_chunk = False
|
533 |
+
content = system_prompt + "\n\n"
|
534 |
+
if isinstance(chunk, TextChunk):
|
535 |
+
content += chunk.text
|
536 |
+
tokens.extend(self.tokenizer.encode(content, bos=False, eos=False))
|
537 |
+
else:
|
538 |
+
assert self.mm_encoder is not None, "Make sure to define a multi-modal encoder at init"
|
539 |
+
if content:
|
540 |
+
tokens.extend(self.tokenizer.encode(content, bos=False, eos=False))
|
541 |
+
|
542 |
+
img_encoding = self.mm_encoder(chunk)
|
543 |
+
|
544 |
+
tokens.extend(img_encoding.tokens)
|
545 |
+
images.append(img_encoding.image)
|
546 |
+
|
547 |
+
return tokens, images
|
548 |
+
|
549 |
+
|
550 |
+
class InstructTokenizerV7(InstructTokenizerV3):
|
551 |
+
"""
|
552 |
+
The difference with V3 tokenizer is that it encodes the system prompts differently:
|
553 |
+
- in V7 the system prompts are treated as separate SystemMessages
|
554 |
+
- they are no longer prepended to the last user message
|
555 |
+
- they are printed between special tokens
|
556 |
+
Tool call results are encoded as :
|
557 |
+
- [begin tool call] call_id_tokens [tool_content] content tokens [end tool call]
|
558 |
+
"""
|
559 |
+
|
560 |
+
def __init__(self, tokenizer: Tokenizer, mm_encoder: Optional[MultiModalEncoder] = None) -> None:
|
561 |
+
super().__init__(tokenizer, mm_encoder)
|
562 |
+
self.BEGIN_SYSTEM = self.tokenizer.get_control_token(SpecialTokens.begin_system.value)
|
563 |
+
self.END_SYSTEM = self.tokenizer.get_control_token(SpecialTokens.end_system.value)
|
564 |
+
self.BEGIN_TOOL_CONTENT = self.tokenizer.get_control_token(SpecialTokens.begin_tool_content.value)
|
565 |
+
|
566 |
+
def _truncate_for_max_tokens(
|
567 |
+
self,
|
568 |
+
tokenized_messages: List[Optional[List[int]]],
|
569 |
+
messages: List[AssistantMessageType],
|
570 |
+
max_tokens: int,
|
571 |
+
last_user_message_index: int,
|
572 |
+
) -> None:
|
573 |
+
# drop some messages to fit in max_tokens. Rules:
|
574 |
+
# - don't drop any system messages
|
575 |
+
# - when a user message is dropped, all following assistant|tool message should be dropped until the next
|
576 |
+
# user message
|
577 |
+
# - we never drop the last message
|
578 |
+
to_drop = sum(len(t) for t in tokenized_messages if t is not None) - max_tokens
|
579 |
+
|
580 |
+
def drop(idx: int) -> None:
|
581 |
+
nonlocal to_drop
|
582 |
+
if isinstance(messages[idx], SystemMessage):
|
583 |
+
# never drop system messages
|
584 |
+
return
|
585 |
+
if idx == last_user_message_index:
|
586 |
+
# never drop the last user message
|
587 |
+
return
|
588 |
+
tok = tokenized_messages[idx]
|
589 |
+
assert tok is not None
|
590 |
+
to_drop -= len(tok)
|
591 |
+
tokenized_messages[idx] = None
|
592 |
+
|
593 |
+
current_idx = 0
|
594 |
+
while to_drop > 0 and current_idx < len(messages):
|
595 |
+
drop(current_idx)
|
596 |
+
current_idx += 1
|
597 |
+
if isinstance(messages[current_idx - 1], UserMessage):
|
598 |
+
# if we just dropped a UserMessage,
|
599 |
+
# also drop everything until the next user message
|
600 |
+
while current_idx < len(messages) and not isinstance(messages[current_idx], UserMessage):
|
601 |
+
drop(current_idx)
|
602 |
+
current_idx += 1
|
603 |
+
|
604 |
+
if to_drop > 0:
|
605 |
+
raise TokenizerException("Input couldn't fit in truncate_at_max_token")
|
606 |
+
|
607 |
+
def encode_system_message(self, message: SystemMessage) -> List[int]:
|
608 |
+
assert message.content is not None
|
609 |
+
assert isinstance(message.content, str), "Message content must be normalized"
|
610 |
+
tokens = [
|
611 |
+
self.BEGIN_SYSTEM,
|
612 |
+
*self.tokenizer.encode(message.content, bos=False, eos=False),
|
613 |
+
self.END_SYSTEM,
|
614 |
+
]
|
615 |
+
return tokens
|
616 |
+
|
617 |
+
def encode_user_message(
|
618 |
+
self,
|
619 |
+
message: UserMessage,
|
620 |
+
available_tools: Optional[List[Tool]],
|
621 |
+
is_last: bool,
|
622 |
+
is_first: bool,
|
623 |
+
system_prompt: Optional[str] = None,
|
624 |
+
force_img_first: bool = False,
|
625 |
+
) -> Tuple[List[int], List[np.ndarray]]:
|
626 |
+
assert system_prompt is None, "in Tokenizer V7 we don't encode system prompts in user messages"
|
627 |
+
return super().encode_user_message(
|
628 |
+
message,
|
629 |
+
available_tools,
|
630 |
+
is_last=is_last,
|
631 |
+
is_first=is_first,
|
632 |
+
system_prompt=None,
|
633 |
+
force_img_first=force_img_first,
|
634 |
+
)
|
635 |
+
|
636 |
+
def encode_tool_message(self, message: ToolMessage, is_before_last_user_message: bool) -> List[int]:
|
637 |
+
"""
|
638 |
+
Same as V3 but tools not wrapped in a list and history is tokenized also
|
639 |
+
"""
|
640 |
+
assert message.tool_call_id is not None
|
641 |
+
tool_call_id_tokens = self.tokenizer.encode(message.tool_call_id, bos=False, eos=False)
|
642 |
+
tokens = self.tokenizer.encode(message.content, bos=False, eos=False)
|
643 |
+
|
644 |
+
prefix_tokens = [
|
645 |
+
self.BEGIN_TOOL_RESULTS,
|
646 |
+
*tool_call_id_tokens,
|
647 |
+
self.BEGIN_TOOL_CONTENT,
|
648 |
+
]
|
649 |
+
curr_tokens = [
|
650 |
+
*prefix_tokens,
|
651 |
+
*tokens,
|
652 |
+
self.END_TOOL_RESULTS,
|
653 |
+
]
|
654 |
+
return curr_tokens
|
655 |
+
|
656 |
+
def encode_assistant_message(self, message: AssistantMessageType, is_before_last_user_message: bool) -> List[int]:
|
657 |
+
if not message.content and not message.tool_calls:
|
658 |
+
raise TokenizerException(f"Invalid assistant message: {message}")
|
659 |
+
curr_tokens: list = []
|
660 |
+
if message.content:
|
661 |
+
if isinstance(message.content, str):
|
662 |
+
curr_tokens += self._encode_normal_content_assistant_message(message)
|
663 |
+
elif isinstance(message.content, list):
|
664 |
+
curr_tokens += self.encode_content_chunks(
|
665 |
+
message.content, is_last=False, system_prompt=None, force_img_first=True
|
666 |
+
).tokens
|
667 |
+
if message.tool_calls:
|
668 |
+
curr_tokens += self._encode_tool_calls_in_assistant_message(message)
|
669 |
+
if not message.prefix:
|
670 |
+
curr_tokens.append(self.tokenizer.eos_id)
|
671 |
+
|
672 |
+
return curr_tokens
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/tekken.py
ADDED
@@ -0,0 +1,312 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import json
|
3 |
+
import logging
|
4 |
+
from enum import Enum
|
5 |
+
from functools import cached_property
|
6 |
+
from itertools import groupby
|
7 |
+
from pathlib import Path
|
8 |
+
from typing import Dict, List, Optional, Type, TypedDict, Union
|
9 |
+
|
10 |
+
import tiktoken
|
11 |
+
|
12 |
+
from mistral_common.tokens.tokenizers.base import (
|
13 |
+
SpecialTokens,
|
14 |
+
Tokenizer,
|
15 |
+
TokenizerVersion,
|
16 |
+
)
|
17 |
+
from mistral_common.tokens.tokenizers.multimodal import MultimodalConfig
|
18 |
+
|
19 |
+
logger = logging.getLogger(__name__)
|
20 |
+
|
21 |
+
|
22 |
+
def is_tekken(path: Union[str, Path]) -> bool:
|
23 |
+
if isinstance(path, str):
|
24 |
+
path = Path(path)
|
25 |
+
return path.is_file() and "tekken" in path.name and path.suffix == ".json"
|
26 |
+
|
27 |
+
|
28 |
+
# Formatting specification of the JSON file
|
29 |
+
class TokenInfo(TypedDict):
|
30 |
+
rank: int
|
31 |
+
token_bytes: str # base64 encoded
|
32 |
+
token_str: Optional[str]
|
33 |
+
|
34 |
+
|
35 |
+
class TekkenConfig(TypedDict):
|
36 |
+
pattern: str
|
37 |
+
num_vocab_tokens: int
|
38 |
+
default_vocab_size: int
|
39 |
+
default_num_special_tokens: int
|
40 |
+
version: str
|
41 |
+
|
42 |
+
|
43 |
+
class ModelData(TypedDict):
|
44 |
+
vocab: List[TokenInfo]
|
45 |
+
config: TekkenConfig
|
46 |
+
version: int
|
47 |
+
type: str
|
48 |
+
multimodal: MultimodalConfig
|
49 |
+
|
50 |
+
|
51 |
+
class SpecialTokenPolicy(Enum):
|
52 |
+
"""What to do with special tokens when encoding/decoding."""
|
53 |
+
|
54 |
+
IGNORE = 0
|
55 |
+
KEEP = 1
|
56 |
+
RAISE = 2
|
57 |
+
|
58 |
+
|
59 |
+
class Tekkenizer(Tokenizer):
|
60 |
+
SPECIAL_TOKENS = (
|
61 |
+
"<unk>",
|
62 |
+
SpecialTokens.bos,
|
63 |
+
SpecialTokens.eos,
|
64 |
+
SpecialTokens.begin_inst,
|
65 |
+
SpecialTokens.end_inst,
|
66 |
+
SpecialTokens.begin_tools,
|
67 |
+
SpecialTokens.end_tools,
|
68 |
+
SpecialTokens.begin_tool_results,
|
69 |
+
SpecialTokens.end_tool_results,
|
70 |
+
SpecialTokens.tool_calls,
|
71 |
+
SpecialTokens.img,
|
72 |
+
"<pad>",
|
73 |
+
SpecialTokens.img_break,
|
74 |
+
SpecialTokens.img_end,
|
75 |
+
SpecialTokens.prefix,
|
76 |
+
SpecialTokens.middle,
|
77 |
+
SpecialTokens.suffix,
|
78 |
+
SpecialTokens.begin_system,
|
79 |
+
SpecialTokens.end_system,
|
80 |
+
SpecialTokens.begin_tool_content,
|
81 |
+
)
|
82 |
+
SPECIAL_TOKEN_TEMPLATE = "<SPECIAL_{id}>"
|
83 |
+
|
84 |
+
# # note that params has a vocab_size field, but it's not used
|
85 |
+
|
86 |
+
def __init__(
|
87 |
+
self,
|
88 |
+
vocab: List[TokenInfo],
|
89 |
+
pattern: str,
|
90 |
+
vocab_size: int,
|
91 |
+
num_special_tokens: int,
|
92 |
+
version: TokenizerVersion,
|
93 |
+
*,
|
94 |
+
name: str = "tekkenizer",
|
95 |
+
_path: Optional[str] = None,
|
96 |
+
mm_config: Optional[MultimodalConfig] = None,
|
97 |
+
):
|
98 |
+
assert vocab_size <= len(vocab) + num_special_tokens, (
|
99 |
+
vocab_size,
|
100 |
+
len(vocab),
|
101 |
+
num_special_tokens,
|
102 |
+
)
|
103 |
+
self._vocab_size = vocab_size
|
104 |
+
self._path = _path
|
105 |
+
|
106 |
+
special_tokens = list(self.SPECIAL_TOKENS)
|
107 |
+
assert len(special_tokens) == len(set(special_tokens)), f"Special tokens must be unique: {special_tokens}"
|
108 |
+
assert len(special_tokens) < num_special_tokens
|
109 |
+
|
110 |
+
special_filler = [
|
111 |
+
self.SPECIAL_TOKEN_TEMPLATE.format(id=i) for i in range(len(special_tokens), num_special_tokens)
|
112 |
+
]
|
113 |
+
if special_filler:
|
114 |
+
logger.info(f"Adding special tokens {special_filler[0]}, ..., {special_filler[-1]}")
|
115 |
+
special_tokens = special_tokens + special_filler
|
116 |
+
assert len(set(special_tokens)) == len(special_tokens) == num_special_tokens, special_tokens
|
117 |
+
inner_vocab_size = vocab_size - num_special_tokens
|
118 |
+
|
119 |
+
# reload vocab
|
120 |
+
self._tekken_token2id_nospecial = _reload_mergeable_ranks(vocab, max_vocab=inner_vocab_size)
|
121 |
+
assert set(range(inner_vocab_size)) == set(self._tekken_token2id_nospecial.values()), (
|
122 |
+
inner_vocab_size,
|
123 |
+
self._tekken_token2id_nospecial,
|
124 |
+
)
|
125 |
+
|
126 |
+
self._model = tiktoken.Encoding(
|
127 |
+
name=name,
|
128 |
+
pat_str=pattern,
|
129 |
+
mergeable_ranks=self._tekken_token2id_nospecial,
|
130 |
+
special_tokens={}, # special tokens are handled manually
|
131 |
+
)
|
132 |
+
self._all_special_tokens = special_tokens
|
133 |
+
self._vocab = [self.id_to_piece(i) for i in range(vocab_size)]
|
134 |
+
self._version = version
|
135 |
+
self._special_token_policy = SpecialTokenPolicy.RAISE
|
136 |
+
self._mm_config = mm_config
|
137 |
+
|
138 |
+
@classmethod
|
139 |
+
def from_file(cls: Type["Tekkenizer"], path: Union[str, Path]) -> "Tekkenizer":
|
140 |
+
if isinstance(path, str):
|
141 |
+
path = Path(path)
|
142 |
+
assert path.exists()
|
143 |
+
with open(path, "r") as f:
|
144 |
+
untyped = json.load(f)
|
145 |
+
if mm := untyped.get("multimodal", None):
|
146 |
+
untyped["multimodal"] = MultimodalConfig(**mm)
|
147 |
+
model_data: ModelData = untyped
|
148 |
+
|
149 |
+
_version_str = model_data["config"].get("version")
|
150 |
+
if _version_str not in TokenizerVersion.__members__:
|
151 |
+
raise ValueError(
|
152 |
+
f"Unknown version: {_version_str} in {path}. "
|
153 |
+
f"Make sure to use a valid version string: {list(TokenizerVersion.__members__)}"
|
154 |
+
)
|
155 |
+
|
156 |
+
return cls(
|
157 |
+
vocab=model_data["vocab"],
|
158 |
+
pattern=model_data["config"]["pattern"],
|
159 |
+
vocab_size=model_data["config"]["default_vocab_size"],
|
160 |
+
num_special_tokens=model_data["config"]["default_num_special_tokens"],
|
161 |
+
version=TokenizerVersion(_version_str),
|
162 |
+
name=path.name.replace(".json", ""),
|
163 |
+
_path=str(path),
|
164 |
+
mm_config=model_data.get("multimodal"),
|
165 |
+
)
|
166 |
+
|
167 |
+
@property
|
168 |
+
def multimodal(self) -> Optional[MultimodalConfig]:
|
169 |
+
return self._mm_config
|
170 |
+
|
171 |
+
@multimodal.setter
|
172 |
+
def multimodal(self, value: MultimodalConfig) -> None:
|
173 |
+
raise ValueError("Can only set Multimodal config at init")
|
174 |
+
|
175 |
+
@property
|
176 |
+
def num_special_tokens(self) -> int:
|
177 |
+
return len(self._all_special_tokens)
|
178 |
+
|
179 |
+
@property
|
180 |
+
def n_words(self) -> int:
|
181 |
+
return self._vocab_size
|
182 |
+
|
183 |
+
@property
|
184 |
+
def version(self) -> TokenizerVersion:
|
185 |
+
return self._version
|
186 |
+
|
187 |
+
@property
|
188 |
+
def special_token_policy(self) -> SpecialTokenPolicy:
|
189 |
+
return self._special_token_policy
|
190 |
+
|
191 |
+
@special_token_policy.setter
|
192 |
+
def special_token_policy(self, policy: SpecialTokenPolicy) -> None:
|
193 |
+
self._special_token_policy = policy
|
194 |
+
|
195 |
+
@cached_property
|
196 |
+
def bos_id(self) -> int:
|
197 |
+
return self.SPECIAL_TOKENS.index("<s>")
|
198 |
+
|
199 |
+
@cached_property
|
200 |
+
def eos_id(self) -> int:
|
201 |
+
return self.SPECIAL_TOKENS.index("</s>")
|
202 |
+
|
203 |
+
@cached_property
|
204 |
+
def pad_id(self) -> int:
|
205 |
+
return self.SPECIAL_TOKENS.index("<pad>")
|
206 |
+
|
207 |
+
@cached_property
|
208 |
+
def unk_id(self) -> int:
|
209 |
+
return self.SPECIAL_TOKENS.index("<unk>")
|
210 |
+
|
211 |
+
def vocab(self) -> List[str]:
|
212 |
+
# when returning self._vocab this will collapse
|
213 |
+
# all tokens for which we have a decoding error into
|
214 |
+
# the <?> string. This is bad and results in things
|
215 |
+
# like len(set(vocab)) != len(vocab))
|
216 |
+
# be careful when using self._vocab
|
217 |
+
return self._vocab
|
218 |
+
|
219 |
+
def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
|
220 |
+
tokens: List[int] = self._model.encode(s)
|
221 |
+
tokens = [t + self.num_special_tokens for t in tokens]
|
222 |
+
if bos:
|
223 |
+
tokens = [self.bos_id, *tokens]
|
224 |
+
if eos:
|
225 |
+
tokens = [*tokens, self.eos_id]
|
226 |
+
return tokens
|
227 |
+
|
228 |
+
def _decode_all(self, tokens: List[int], special_token_policy: SpecialTokenPolicy) -> List[str]:
|
229 |
+
# Lump special and non-special tokens together to minimize calls to decode
|
230 |
+
decoded: List[str] = []
|
231 |
+
for is_special, group in groupby(tokens, lambda t: t < self.num_special_tokens):
|
232 |
+
if is_special:
|
233 |
+
if special_token_policy == SpecialTokenPolicy.RAISE:
|
234 |
+
raise ValueError(
|
235 |
+
f"Decoding `tokens` that contain special tokens ({list(group)}) is not allowed. \n"
|
236 |
+
"Either make sure `tokens` do not include any special tokens or, "
|
237 |
+
"if you want to decode `tokens` that includes special tokens, "
|
238 |
+
"change the tokenizer's special token policy to IGNORE or KEEP: \n"
|
239 |
+
"```\nfrom mistral_common.tokens.tokenizers.mistral import MistralTokenizer"
|
240 |
+
"\nfrom mistral_common.tokens.tokenizers.tekken import SpecialTokenPolicy"
|
241 |
+
"\n\ntokenizer = MistralTokenizer.v3(is_tekken=True)"
|
242 |
+
"\ntekken = tokenizer.instruct_tokenizer.tokenizer"
|
243 |
+
"\ntekken.special_token_policy = SpecialTokenPolicy.IGNORE # or SpecialTokenPolicy.KEEP"
|
244 |
+
"\n```"
|
245 |
+
)
|
246 |
+
elif special_token_policy == SpecialTokenPolicy.KEEP:
|
247 |
+
decoded.extend(self._all_special_tokens[t] for t in group)
|
248 |
+
elif special_token_policy == SpecialTokenPolicy.IGNORE:
|
249 |
+
continue
|
250 |
+
# TODO: Could use "tokens_str" from vocab.json
|
251 |
+
# but need to handle null cases.
|
252 |
+
else:
|
253 |
+
decoded.append(self._model.decode([t - self.num_special_tokens for t in group]))
|
254 |
+
return decoded
|
255 |
+
|
256 |
+
def is_byte(self, token_id: int) -> bool:
|
257 |
+
return 0 <= token_id - self.num_special_tokens < 256
|
258 |
+
|
259 |
+
def get_control_token(self, s: str) -> int:
|
260 |
+
try:
|
261 |
+
return self._all_special_tokens.index(s)
|
262 |
+
except ValueError:
|
263 |
+
raise ValueError(f"Unknown control token {s}")
|
264 |
+
|
265 |
+
def decode(self, tokens: List[int]) -> str:
|
266 |
+
return "".join(self._decode_all(tokens, special_token_policy=self._special_token_policy))
|
267 |
+
|
268 |
+
def to_string(self, tokens: List[int]) -> str:
|
269 |
+
return "".join(self._decode_all(tokens, special_token_policy=SpecialTokenPolicy.KEEP))
|
270 |
+
|
271 |
+
def id_to_piece(self, token_id: int) -> str:
|
272 |
+
"""convert a token id to its string representation."""
|
273 |
+
return self._decode_all([token_id], special_token_policy=SpecialTokenPolicy.KEEP)[0]
|
274 |
+
|
275 |
+
def id_to_byte_piece(self, token_id: int) -> bytes:
|
276 |
+
"""convert a token id to its byte representation."""
|
277 |
+
if token_id < self.num_special_tokens:
|
278 |
+
if self._special_token_policy == SpecialTokenPolicy.KEEP:
|
279 |
+
return self._all_special_tokens[token_id].encode("utf-8")
|
280 |
+
elif self._special_token_policy == SpecialTokenPolicy.RAISE:
|
281 |
+
raise ValueError(f"{token_id} is a special token")
|
282 |
+
|
283 |
+
return self._model.decode_single_token_bytes(token_id - self.num_special_tokens)
|
284 |
+
|
285 |
+
|
286 |
+
def _reload_mergeable_ranks(
|
287 |
+
vocab: List[TokenInfo],
|
288 |
+
max_vocab: Union[int, None] = None,
|
289 |
+
) -> Dict[bytes, int]:
|
290 |
+
"""
|
291 |
+
Reload our tokenizer JSON file and convert it to Tiktoken format.
|
292 |
+
"""
|
293 |
+
logger.info(f"Vocab size: {len(vocab)}")
|
294 |
+
if max_vocab is not None:
|
295 |
+
assert len(vocab) >= max_vocab, (len(vocab), max_vocab)
|
296 |
+
vocab = vocab[:max_vocab]
|
297 |
+
logger.info(f"Cutting vocab to first {len(vocab)} tokens.")
|
298 |
+
|
299 |
+
# build ranks
|
300 |
+
ranks: Dict[bytes, int] = {}
|
301 |
+
for i, x in enumerate(vocab):
|
302 |
+
assert x.keys() == {"rank", "token_bytes", "token_str"}
|
303 |
+
assert x["rank"] == i
|
304 |
+
merge = base64.b64decode(x["token_bytes"])
|
305 |
+
assert i >= 256 or merge == bytes([i]), (i, merge)
|
306 |
+
ranks[merge] = x["rank"]
|
307 |
+
|
308 |
+
# sanity check
|
309 |
+
assert len(ranks) == len(vocab)
|
310 |
+
assert set(ranks.values()) == set(range(len(ranks)))
|
311 |
+
|
312 |
+
return ranks
|
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/utils.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Iterator, List
|
2 |
+
|
3 |
+
|
4 |
+
def chunks(lst: List[str], chunk_size: int) -> Iterator[List[str]]:
|
5 |
+
for i in range(0, len(lst), chunk_size):
|
6 |
+
yield lst[i : i + chunk_size]
|
.venv/lib/python3.11/site-packages/numpy/ma/tests/__init__.py
ADDED
File without changes
|
.venv/lib/python3.11/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-311.pyc
ADDED
Binary file (5.43 kB). View file
|
|