koichi12 commited on
Commit
4f890a1
·
verified ·
1 Parent(s): 23aa6b4

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/INSTALLER +1 -0
  3. .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/METADATA +138 -0
  4. .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/RECORD +173 -0
  5. .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/WHEEL +5 -0
  6. .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE +3 -0
  7. .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE.APACHE +202 -0
  8. .venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE.BSD +27 -0
  9. .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/INSTALLER +1 -0
  10. .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/METADATA +176 -0
  11. .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/RECORD +76 -0
  12. .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/WHEEL +4 -0
  13. .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/entry_points.txt +2 -0
  14. .venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/licenses/COPYING +19 -0
  15. .venv/lib/python3.11/site-packages/mistral_common/__pycache__/__init__.cpython-311.pyc +0 -0
  16. .venv/lib/python3.11/site-packages/mistral_common/__pycache__/base.cpython-311.pyc +0 -0
  17. .venv/lib/python3.11/site-packages/mistral_common/__pycache__/exceptions.cpython-311.pyc +0 -0
  18. .venv/lib/python3.11/site-packages/mistral_common/__pycache__/multimodal.cpython-311.pyc +0 -0
  19. .venv/lib/python3.11/site-packages/mistral_common/data/mistral_instruct_tokenizer_240323.model.v3 +3 -0
  20. .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__init__.py +0 -0
  21. .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/__init__.cpython-311.pyc +0 -0
  22. .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/request.cpython-311.pyc +0 -0
  23. .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/response.cpython-311.pyc +0 -0
  24. .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/request.py +11 -0
  25. .venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/response.py +21 -0
  26. .venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/__init__.cpython-311.pyc +0 -0
  27. .venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/messages.cpython-311.pyc +0 -0
  28. .venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/response.cpython-311.pyc +0 -0
  29. .venv/lib/python3.11/site-packages/mistral_common/tokens/__init__.py +0 -0
  30. .venv/lib/python3.11/site-packages/mistral_common/tokens/__pycache__/__init__.cpython-311.pyc +0 -0
  31. .venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__init__.py +0 -0
  32. .venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__pycache__/__init__.cpython-311.pyc +0 -0
  33. .venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__pycache__/request.cpython-311.pyc +0 -0
  34. .venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/request.py +25 -0
  35. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__init__.py +0 -0
  36. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/__init__.cpython-311.pyc +0 -0
  37. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/base.cpython-311.pyc +0 -0
  38. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/mistral.cpython-311.pyc +0 -0
  39. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/multimodal.cpython-311.pyc +0 -0
  40. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/sentencepiece.cpython-311.pyc +0 -0
  41. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/tekken.cpython-311.pyc +0 -0
  42. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/utils.cpython-311.pyc +0 -0
  43. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/base.py +200 -0
  44. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/mistral.py +251 -0
  45. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/multimodal.py +172 -0
  46. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/sentencepiece.py +672 -0
  47. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/tekken.py +312 -0
  48. .venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/utils.py +6 -0
  49. .venv/lib/python3.11/site-packages/numpy/ma/tests/__init__.py +0 -0
  50. .venv/lib/python3.11/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-311.pyc +0 -0
.gitattributes CHANGED
@@ -391,3 +391,4 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia/cudnn/lib/
391
  .venv/lib/python3.11/site-packages/numpy/lib/__pycache__/function_base.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
392
  .venv/lib/python3.11/site-packages/numpy/lib/__pycache__/npyio.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
393
  .venv/lib/python3.11/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
 
 
391
  .venv/lib/python3.11/site-packages/numpy/lib/__pycache__/function_base.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
392
  .venv/lib/python3.11/site-packages/numpy/lib/__pycache__/npyio.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
393
  .venv/lib/python3.11/site-packages/numpy/lib/tests/__pycache__/test_function_base.cpython-311.pyc filter=lfs diff=lfs merge=lfs -text
394
+ .venv/lib/python3.11/site-packages/mistral_common/data/mistral_instruct_tokenizer_240323.model.v3 filter=lfs diff=lfs merge=lfs -text
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/METADATA ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.3
2
+ Name: cryptography
3
+ Version: 43.0.3
4
+ Classifier: Development Status :: 5 - Production/Stable
5
+ Classifier: Intended Audience :: Developers
6
+ Classifier: License :: OSI Approved :: Apache Software License
7
+ Classifier: License :: OSI Approved :: BSD License
8
+ Classifier: Natural Language :: English
9
+ Classifier: Operating System :: MacOS :: MacOS X
10
+ Classifier: Operating System :: POSIX
11
+ Classifier: Operating System :: POSIX :: BSD
12
+ Classifier: Operating System :: POSIX :: Linux
13
+ Classifier: Operating System :: Microsoft :: Windows
14
+ Classifier: Programming Language :: Python
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3 :: Only
17
+ Classifier: Programming Language :: Python :: 3.7
18
+ Classifier: Programming Language :: Python :: 3.8
19
+ Classifier: Programming Language :: Python :: 3.9
20
+ Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
22
+ Classifier: Programming Language :: Python :: 3.12
23
+ Classifier: Programming Language :: Python :: Implementation :: CPython
24
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
25
+ Classifier: Topic :: Security :: Cryptography
26
+ Requires-Dist: cffi >=1.12 ; platform_python_implementation != 'PyPy'
27
+ Requires-Dist: bcrypt >=3.1.5 ; extra == 'ssh'
28
+ Requires-Dist: nox ; extra == 'nox'
29
+ Requires-Dist: cryptography-vectors ==43.0.3 ; extra == 'test'
30
+ Requires-Dist: pytest >=6.2.0 ; extra == 'test'
31
+ Requires-Dist: pytest-benchmark ; extra == 'test'
32
+ Requires-Dist: pytest-cov ; extra == 'test'
33
+ Requires-Dist: pytest-xdist ; extra == 'test'
34
+ Requires-Dist: pretend ; extra == 'test'
35
+ Requires-Dist: certifi ; extra == 'test'
36
+ Requires-Dist: pytest-randomly ; extra == 'test-randomorder'
37
+ Requires-Dist: sphinx >=5.3.0 ; extra == 'docs'
38
+ Requires-Dist: sphinx-rtd-theme >=1.1.1 ; extra == 'docs'
39
+ Requires-Dist: pyenchant >=1.6.11 ; extra == 'docstest'
40
+ Requires-Dist: readme-renderer ; extra == 'docstest'
41
+ Requires-Dist: sphinxcontrib-spelling >=4.0.1 ; extra == 'docstest'
42
+ Requires-Dist: build ; extra == 'sdist'
43
+ Requires-Dist: ruff ; extra == 'pep8test'
44
+ Requires-Dist: mypy ; extra == 'pep8test'
45
+ Requires-Dist: check-sdist ; extra == 'pep8test'
46
+ Requires-Dist: click ; extra == 'pep8test'
47
+ Provides-Extra: ssh
48
+ Provides-Extra: nox
49
+ Provides-Extra: test
50
+ Provides-Extra: test-randomorder
51
+ Provides-Extra: docs
52
+ Provides-Extra: docstest
53
+ Provides-Extra: sdist
54
+ Provides-Extra: pep8test
55
+ License-File: LICENSE
56
+ License-File: LICENSE.APACHE
57
+ License-File: LICENSE.BSD
58
+ Summary: cryptography is a package which provides cryptographic recipes and primitives to Python developers.
59
+ Author: The cryptography developers <[email protected]>
60
+ Author-email: The Python Cryptographic Authority and individual contributors <[email protected]>
61
+ License: Apache-2.0 OR BSD-3-Clause
62
+ Requires-Python: >=3.7
63
+ Description-Content-Type: text/x-rst; charset=UTF-8
64
+ Project-URL: homepage, https://github.com/pyca/cryptography
65
+ Project-URL: documentation, https://cryptography.io/
66
+ Project-URL: source, https://github.com/pyca/cryptography/
67
+ Project-URL: issues, https://github.com/pyca/cryptography/issues
68
+ Project-URL: changelog, https://cryptography.io/en/latest/changelog/
69
+
70
+ pyca/cryptography
71
+ =================
72
+
73
+ .. image:: https://img.shields.io/pypi/v/cryptography.svg
74
+ :target: https://pypi.org/project/cryptography/
75
+ :alt: Latest Version
76
+
77
+ .. image:: https://readthedocs.org/projects/cryptography/badge/?version=latest
78
+ :target: https://cryptography.io
79
+ :alt: Latest Docs
80
+
81
+ .. image:: https://github.com/pyca/cryptography/workflows/CI/badge.svg?branch=main
82
+ :target: https://github.com/pyca/cryptography/actions?query=workflow%3ACI+branch%3Amain
83
+
84
+
85
+ ``cryptography`` is a package which provides cryptographic recipes and
86
+ primitives to Python developers. Our goal is for it to be your "cryptographic
87
+ standard library". It supports Python 3.7+ and PyPy3 7.3.11+.
88
+
89
+ ``cryptography`` includes both high level recipes and low level interfaces to
90
+ common cryptographic algorithms such as symmetric ciphers, message digests, and
91
+ key derivation functions. For example, to encrypt something with
92
+ ``cryptography``'s high level symmetric encryption recipe:
93
+
94
+ .. code-block:: pycon
95
+
96
+ >>> from cryptography.fernet import Fernet
97
+ >>> # Put this somewhere safe!
98
+ >>> key = Fernet.generate_key()
99
+ >>> f = Fernet(key)
100
+ >>> token = f.encrypt(b"A really secret message. Not for prying eyes.")
101
+ >>> token
102
+ b'...'
103
+ >>> f.decrypt(token)
104
+ b'A really secret message. Not for prying eyes.'
105
+
106
+ You can find more information in the `documentation`_.
107
+
108
+ You can install ``cryptography`` with:
109
+
110
+ .. code-block:: console
111
+
112
+ $ pip install cryptography
113
+
114
+ For full details see `the installation documentation`_.
115
+
116
+ Discussion
117
+ ~~~~~~~~~~
118
+
119
+ If you run into bugs, you can file them in our `issue tracker`_.
120
+
121
+ We maintain a `cryptography-dev`_ mailing list for development discussion.
122
+
123
+ You can also join ``#pyca`` on ``irc.libera.chat`` to ask questions or get
124
+ involved.
125
+
126
+ Security
127
+ ~~~~~~~~
128
+
129
+ Need to report a security issue? Please consult our `security reporting`_
130
+ documentation.
131
+
132
+
133
+ .. _`documentation`: https://cryptography.io/
134
+ .. _`the installation documentation`: https://cryptography.io/en/latest/installation/
135
+ .. _`issue tracker`: https://github.com/pyca/cryptography/issues
136
+ .. _`cryptography-dev`: https://mail.python.org/mailman/listinfo/cryptography-dev
137
+ .. _`security reporting`: https://cryptography.io/en/latest/security/
138
+
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/RECORD ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cryptography-43.0.3.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
2
+ cryptography-43.0.3.dist-info/METADATA,sha256=6zbg5CUehHnvNpZEQHVe8ivt1BG6h6k_cm-o5bsOZLA,5440
3
+ cryptography-43.0.3.dist-info/RECORD,,
4
+ cryptography-43.0.3.dist-info/WHEEL,sha256=5SNCVD9cb88a-xAIrDHIo1CvpgNriOYcNgb4b8rPcOw,107
5
+ cryptography-43.0.3.dist-info/license_files/LICENSE,sha256=Pgx8CRqUi4JTO6mP18u0BDLW8amsv4X1ki0vmak65rs,197
6
+ cryptography-43.0.3.dist-info/license_files/LICENSE.APACHE,sha256=qsc7MUj20dcRHbyjIJn2jSbGRMaBOuHk8F9leaomY_4,11360
7
+ cryptography-43.0.3.dist-info/license_files/LICENSE.BSD,sha256=YCxMdILeZHndLpeTzaJ15eY9dz2s0eymiSMqtwCPtPs,1532
8
+ cryptography/__about__.py,sha256=-FkHKD9mSuEfH37wsSKnQzJZmL5zUAUTpB5OeUQjPE0,445
9
+ cryptography/__init__.py,sha256=mthuUrTd4FROCpUYrTIqhjz6s6T9djAZrV7nZ1oMm2o,364
10
+ cryptography/__pycache__/__about__.cpython-311.pyc,,
11
+ cryptography/__pycache__/__init__.cpython-311.pyc,,
12
+ cryptography/__pycache__/exceptions.cpython-311.pyc,,
13
+ cryptography/__pycache__/fernet.cpython-311.pyc,,
14
+ cryptography/__pycache__/utils.cpython-311.pyc,,
15
+ cryptography/exceptions.py,sha256=835EWILc2fwxw-gyFMriciC2SqhViETB10LBSytnDIc,1087
16
+ cryptography/fernet.py,sha256=aPj82w-Z_1GBXUtWRUsZdVbMwRo5Mbjj0wkA9wG4rkw,6696
17
+ cryptography/hazmat/__init__.py,sha256=5IwrLWrVp0AjEr_4FdWG_V057NSJGY_W4egNNsuct0g,455
18
+ cryptography/hazmat/__pycache__/__init__.cpython-311.pyc,,
19
+ cryptography/hazmat/__pycache__/_oid.cpython-311.pyc,,
20
+ cryptography/hazmat/_oid.py,sha256=e9yLmxtdQtuL94ztQv3SGtt_ea1Mx6aUwGftJsP6EXk,15201
21
+ cryptography/hazmat/backends/__init__.py,sha256=O5jvKFQdZnXhKeqJ-HtulaEL9Ni7mr1mDzZY5kHlYhI,361
22
+ cryptography/hazmat/backends/__pycache__/__init__.cpython-311.pyc,,
23
+ cryptography/hazmat/backends/openssl/__init__.py,sha256=p3jmJfnCag9iE5sdMrN6VvVEu55u46xaS_IjoI0SrmA,305
24
+ cryptography/hazmat/backends/openssl/__pycache__/__init__.cpython-311.pyc,,
25
+ cryptography/hazmat/backends/openssl/__pycache__/backend.cpython-311.pyc,,
26
+ cryptography/hazmat/backends/openssl/backend.py,sha256=pUXUbugLwMm2Gls-h5U5fw2RvepaNjEvnao6CTmL1xQ,9648
27
+ cryptography/hazmat/bindings/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180
28
+ cryptography/hazmat/bindings/__pycache__/__init__.cpython-311.pyc,,
29
+ cryptography/hazmat/bindings/_rust.abi3.so,sha256=QrghdFa6x-vG5lFvGVfv-slqoy0UA9a8eHmtp9hzGCk,10862344
30
+ cryptography/hazmat/bindings/_rust/__init__.pyi,sha256=wb1OT76lG19vjq97_q2MM3qdJlQhyloXfVbKFDmRse4,737
31
+ cryptography/hazmat/bindings/_rust/_openssl.pyi,sha256=mpNJLuYLbCVrd5i33FBTmWwL_55Dw7JPkSLlSX9Q7oI,230
32
+ cryptography/hazmat/bindings/_rust/asn1.pyi,sha256=BrGjC8J6nwuS-r3EVcdXJB8ndotfY9mbQYOfpbPG0HA,354
33
+ cryptography/hazmat/bindings/_rust/exceptions.pyi,sha256=exXr2xw_0pB1kk93cYbM3MohbzoUkjOms1ZMUi0uQZE,640
34
+ cryptography/hazmat/bindings/_rust/ocsp.pyi,sha256=R-xJ-XmJZ1lOk-fWHHvRnP3QNTCFnKv-l3xlNWfLVt4,868
35
+ cryptography/hazmat/bindings/_rust/openssl/__init__.pyi,sha256=Lvn250QMdPyeF-hoBF6rkQgHLBJxVauXCb8i8uYTomQ,1368
36
+ cryptography/hazmat/bindings/_rust/openssl/aead.pyi,sha256=i0gA3jUQ4rkJXTGGZrq-AuY-VQLN31lyDeWuDZ0zJYw,2553
37
+ cryptography/hazmat/bindings/_rust/openssl/ciphers.pyi,sha256=iK0ZhQ-WyCQbjaraaFgK6q4PpD-7Rf5RDHkFD3YEW_g,1301
38
+ cryptography/hazmat/bindings/_rust/openssl/cmac.pyi,sha256=nPH0X57RYpsAkRowVpjQiHE566ThUTx7YXrsadmrmHk,564
39
+ cryptography/hazmat/bindings/_rust/openssl/dh.pyi,sha256=Z3TC-G04-THtSdAOPLM1h2G7ml5bda1ElZUcn5wpuhk,1564
40
+ cryptography/hazmat/bindings/_rust/openssl/dsa.pyi,sha256=qBtkgj2albt2qFcnZ9UDrhzoNhCVO7HTby5VSf1EXMI,1299
41
+ cryptography/hazmat/bindings/_rust/openssl/ec.pyi,sha256=zJy0pRa5n-_p2dm45PxECB_-B6SVZyNKfjxFDpPqT38,1691
42
+ cryptography/hazmat/bindings/_rust/openssl/ed25519.pyi,sha256=OJsrblS2nHptZctva-pAKFL5q8yPEAkhmjPZpJ6TA94,493
43
+ cryptography/hazmat/bindings/_rust/openssl/ed448.pyi,sha256=SkPHK2HdbYN02TVQEUOgW3iTdiEY7HBE4DijpdkAzmk,475
44
+ cryptography/hazmat/bindings/_rust/openssl/hashes.pyi,sha256=J8HoN0GdtPcjRAfNHr5Elva_nkmQfq63L75_z9dd8Uc,573
45
+ cryptography/hazmat/bindings/_rust/openssl/hmac.pyi,sha256=ZmLJ73pmxcZFC1XosWEiXMRYtvJJor3ZLdCQOJu85Cw,662
46
+ cryptography/hazmat/bindings/_rust/openssl/kdf.pyi,sha256=wPS5c7NLspM2632II0I4iH1RSxZvSRtBOVqmpyQATfk,544
47
+ cryptography/hazmat/bindings/_rust/openssl/keys.pyi,sha256=JSrlGNaW49ZCZ1hcb-YJdS1EAbsMwRbVEcLL0P9OApA,872
48
+ cryptography/hazmat/bindings/_rust/openssl/poly1305.pyi,sha256=9iogF7Q4i81IkOS-IMXp6HvxFF_3cNy_ucrAjVQnn14,540
49
+ cryptography/hazmat/bindings/_rust/openssl/rsa.pyi,sha256=2OQCNSXkxgc-3uw1xiCCloIQTV6p9_kK79Yu0rhZgPc,1364
50
+ cryptography/hazmat/bindings/_rust/openssl/x25519.pyi,sha256=2BKdbrddM_9SMUpdvHKGhb9MNjURCarPxccbUDzHeoA,484
51
+ cryptography/hazmat/bindings/_rust/openssl/x448.pyi,sha256=AoRMWNvCJTiH5L-lkIkCdPlrPLUdJvvfXpIvf1GmxpM,466
52
+ cryptography/hazmat/bindings/_rust/pkcs12.pyi,sha256=afhB_6M8xI1MIE5vxkaDF1jSxA48ib1--NiOxtf6boM,1394
53
+ cryptography/hazmat/bindings/_rust/pkcs7.pyi,sha256=QCmuA0IgDr4iOecUOXgUUeh3BAjJx8ubjz__EnNbyGY,972
54
+ cryptography/hazmat/bindings/_rust/test_support.pyi,sha256=Xo1Gd7bh9rU4HuIS4pm9UwCY6IS1gInvFwmhABLOVO4,936
55
+ cryptography/hazmat/bindings/_rust/x509.pyi,sha256=WLrGmqmFss8dXKhlG_J9nVhoCcodR72xJdCoxEuBtjY,3551
56
+ cryptography/hazmat/bindings/openssl/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180
57
+ cryptography/hazmat/bindings/openssl/__pycache__/__init__.cpython-311.pyc,,
58
+ cryptography/hazmat/bindings/openssl/__pycache__/_conditional.cpython-311.pyc,,
59
+ cryptography/hazmat/bindings/openssl/__pycache__/binding.cpython-311.pyc,,
60
+ cryptography/hazmat/bindings/openssl/_conditional.py,sha256=dkGKGU-22uR2ZKeOOwaSxEJCGaafgUjb2romWcu03QE,5163
61
+ cryptography/hazmat/bindings/openssl/binding.py,sha256=e1gnFAZBPrkJ3CsiZV-ug6kaPdNTAEROaUFiFrUh71M,4042
62
+ cryptography/hazmat/decrepit/__init__.py,sha256=wHCbWfaefa-fk6THSw9th9fJUsStJo7245wfFBqmduA,216
63
+ cryptography/hazmat/decrepit/__pycache__/__init__.cpython-311.pyc,,
64
+ cryptography/hazmat/decrepit/ciphers/__init__.py,sha256=wHCbWfaefa-fk6THSw9th9fJUsStJo7245wfFBqmduA,216
65
+ cryptography/hazmat/decrepit/ciphers/__pycache__/__init__.cpython-311.pyc,,
66
+ cryptography/hazmat/decrepit/ciphers/__pycache__/algorithms.cpython-311.pyc,,
67
+ cryptography/hazmat/decrepit/ciphers/algorithms.py,sha256=HWA4PKDS2w4D2dQoRerpLRU7Kntt5vJeJC7j--AlZVU,2520
68
+ cryptography/hazmat/primitives/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180
69
+ cryptography/hazmat/primitives/__pycache__/__init__.cpython-311.pyc,,
70
+ cryptography/hazmat/primitives/__pycache__/_asymmetric.cpython-311.pyc,,
71
+ cryptography/hazmat/primitives/__pycache__/_cipheralgorithm.cpython-311.pyc,,
72
+ cryptography/hazmat/primitives/__pycache__/_serialization.cpython-311.pyc,,
73
+ cryptography/hazmat/primitives/__pycache__/cmac.cpython-311.pyc,,
74
+ cryptography/hazmat/primitives/__pycache__/constant_time.cpython-311.pyc,,
75
+ cryptography/hazmat/primitives/__pycache__/hashes.cpython-311.pyc,,
76
+ cryptography/hazmat/primitives/__pycache__/hmac.cpython-311.pyc,,
77
+ cryptography/hazmat/primitives/__pycache__/keywrap.cpython-311.pyc,,
78
+ cryptography/hazmat/primitives/__pycache__/padding.cpython-311.pyc,,
79
+ cryptography/hazmat/primitives/__pycache__/poly1305.cpython-311.pyc,,
80
+ cryptography/hazmat/primitives/_asymmetric.py,sha256=RhgcouUB6HTiFDBrR1LxqkMjpUxIiNvQ1r_zJjRG6qQ,532
81
+ cryptography/hazmat/primitives/_cipheralgorithm.py,sha256=gKa0WrLz6K4fqhnGbfBYKDSxgLxsPU0uj_EK2UT47W4,1495
82
+ cryptography/hazmat/primitives/_serialization.py,sha256=qrozc8fw2WZSbjk3DAlSl3ResxpauwJ74ZgGoUL-mj0,5142
83
+ cryptography/hazmat/primitives/asymmetric/__init__.py,sha256=s9oKCQ2ycFdXoERdS1imafueSkBsL9kvbyfghaauZ9Y,180
84
+ cryptography/hazmat/primitives/asymmetric/__pycache__/__init__.cpython-311.pyc,,
85
+ cryptography/hazmat/primitives/asymmetric/__pycache__/dh.cpython-311.pyc,,
86
+ cryptography/hazmat/primitives/asymmetric/__pycache__/dsa.cpython-311.pyc,,
87
+ cryptography/hazmat/primitives/asymmetric/__pycache__/ec.cpython-311.pyc,,
88
+ cryptography/hazmat/primitives/asymmetric/__pycache__/ed25519.cpython-311.pyc,,
89
+ cryptography/hazmat/primitives/asymmetric/__pycache__/ed448.cpython-311.pyc,,
90
+ cryptography/hazmat/primitives/asymmetric/__pycache__/padding.cpython-311.pyc,,
91
+ cryptography/hazmat/primitives/asymmetric/__pycache__/rsa.cpython-311.pyc,,
92
+ cryptography/hazmat/primitives/asymmetric/__pycache__/types.cpython-311.pyc,,
93
+ cryptography/hazmat/primitives/asymmetric/__pycache__/utils.cpython-311.pyc,,
94
+ cryptography/hazmat/primitives/asymmetric/__pycache__/x25519.cpython-311.pyc,,
95
+ cryptography/hazmat/primitives/asymmetric/__pycache__/x448.cpython-311.pyc,,
96
+ cryptography/hazmat/primitives/asymmetric/dh.py,sha256=OOCjMClH1Bf14Sy7jAdwzEeCxFPb8XUe2qePbExvXwc,3420
97
+ cryptography/hazmat/primitives/asymmetric/dsa.py,sha256=xBwdf0pZOgvqjUKcO7Q0L3NxwalYj0SJDUqThemhSmI,3945
98
+ cryptography/hazmat/primitives/asymmetric/ec.py,sha256=lwZmtAwi3PM8lsY1MsNaby_bVi--49OCxwE_1yqKC-A,10428
99
+ cryptography/hazmat/primitives/asymmetric/ed25519.py,sha256=kl63fg7myuMjNTmMoVFeH6iVr0x5FkjNmggxIRTloJk,3423
100
+ cryptography/hazmat/primitives/asymmetric/ed448.py,sha256=2UzEDzzfkPn83UFVFlMZfIMbAixxY09WmQyrwinWTn8,3456
101
+ cryptography/hazmat/primitives/asymmetric/padding.py,sha256=eZcvUqVLbe3u48SunLdeniaPlV4-k6pwBl67OW4jSy8,2885
102
+ cryptography/hazmat/primitives/asymmetric/rsa.py,sha256=nW_Ko7PID9UBJF10GVJOc_1L00ymFsfZDUJYtM5kfGQ,7637
103
+ cryptography/hazmat/primitives/asymmetric/types.py,sha256=LnsOJym-wmPUJ7Knu_7bCNU3kIiELCd6krOaW_JU08I,2996
104
+ cryptography/hazmat/primitives/asymmetric/utils.py,sha256=DPTs6T4F-UhwzFQTh-1fSEpQzazH2jf2xpIro3ItF4o,790
105
+ cryptography/hazmat/primitives/asymmetric/x25519.py,sha256=VGYuRdIYuVBtizpFdNWd2bTrT10JRa1admQdBr08xz8,3341
106
+ cryptography/hazmat/primitives/asymmetric/x448.py,sha256=GKKJBqYLr03VewMF18bXIM941aaWcZIQ4rC02GLLEmw,3374
107
+ cryptography/hazmat/primitives/ciphers/__init__.py,sha256=eyEXmjk6_CZXaOPYDr7vAYGXr29QvzgWL2-4CSolLFs,680
108
+ cryptography/hazmat/primitives/ciphers/__pycache__/__init__.cpython-311.pyc,,
109
+ cryptography/hazmat/primitives/ciphers/__pycache__/aead.cpython-311.pyc,,
110
+ cryptography/hazmat/primitives/ciphers/__pycache__/algorithms.cpython-311.pyc,,
111
+ cryptography/hazmat/primitives/ciphers/__pycache__/base.cpython-311.pyc,,
112
+ cryptography/hazmat/primitives/ciphers/__pycache__/modes.cpython-311.pyc,,
113
+ cryptography/hazmat/primitives/ciphers/aead.py,sha256=Fzlyx7w8KYQakzDp1zWgJnIr62zgZrgVh1u2h4exB54,634
114
+ cryptography/hazmat/primitives/ciphers/algorithms.py,sha256=QvBMDmphRZfNmykij58L5eDkd_2NnCzIpJpyX2QwMxc,4223
115
+ cryptography/hazmat/primitives/ciphers/base.py,sha256=tg-XNaKUyETBi7ounGDEL1_ICn-s4FF9LR7moV58blI,4211
116
+ cryptography/hazmat/primitives/ciphers/modes.py,sha256=BFpxEGSaxoeZjrQ4sqpyPDvKClrqfDKIBv7kYtFURhE,8192
117
+ cryptography/hazmat/primitives/cmac.py,sha256=sz_s6H_cYnOvx-VNWdIKhRhe3Ymp8z8J0D3CBqOX3gg,338
118
+ cryptography/hazmat/primitives/constant_time.py,sha256=xdunWT0nf8OvKdcqUhhlFKayGp4_PgVJRU2W1wLSr_A,422
119
+ cryptography/hazmat/primitives/hashes.py,sha256=EvDIJBhj83Z7f-oHbsA0TzZLFSDV_Yv8hQRdM4o8FD0,5091
120
+ cryptography/hazmat/primitives/hmac.py,sha256=RpB3z9z5skirCQrm7zQbtnp9pLMnAjrlTUvKqF5aDDc,423
121
+ cryptography/hazmat/primitives/kdf/__init__.py,sha256=4XibZnrYq4hh5xBjWiIXzaYW6FKx8hPbVaa_cB9zS64,750
122
+ cryptography/hazmat/primitives/kdf/__pycache__/__init__.cpython-311.pyc,,
123
+ cryptography/hazmat/primitives/kdf/__pycache__/concatkdf.cpython-311.pyc,,
124
+ cryptography/hazmat/primitives/kdf/__pycache__/hkdf.cpython-311.pyc,,
125
+ cryptography/hazmat/primitives/kdf/__pycache__/kbkdf.cpython-311.pyc,,
126
+ cryptography/hazmat/primitives/kdf/__pycache__/pbkdf2.cpython-311.pyc,,
127
+ cryptography/hazmat/primitives/kdf/__pycache__/scrypt.cpython-311.pyc,,
128
+ cryptography/hazmat/primitives/kdf/__pycache__/x963kdf.cpython-311.pyc,,
129
+ cryptography/hazmat/primitives/kdf/concatkdf.py,sha256=bcn4NGXse-EsFl7nlU83e5ilop7TSHcX-CJJS107W80,3686
130
+ cryptography/hazmat/primitives/kdf/hkdf.py,sha256=uhN5L87w4JvtAqQcPh_Ji2TPSc18IDThpaYJiHOWy3A,3015
131
+ cryptography/hazmat/primitives/kdf/kbkdf.py,sha256=eSuLK1sATkamgCAit794jLr7sDNlu5X0USdcWhwJdmk,9146
132
+ cryptography/hazmat/primitives/kdf/pbkdf2.py,sha256=Xj3YIeX30h2BUaoJAtOo1RMXV_em0-eCG0PU_0FHJzM,1950
133
+ cryptography/hazmat/primitives/kdf/scrypt.py,sha256=4QONhjxA_ZtuQtQ7QV3FnbB8ftrFnM52B4HPfV7hFys,2354
134
+ cryptography/hazmat/primitives/kdf/x963kdf.py,sha256=wCpWmwQjZ2vAu2rlk3R_PX0nINl8WGXYBmlyMOC5iPw,1992
135
+ cryptography/hazmat/primitives/keywrap.py,sha256=XV4Pj2fqSeD-RqZVvY2cA3j5_7RwJSFygYuLfk2ujCo,5650
136
+ cryptography/hazmat/primitives/padding.py,sha256=QUq0n-EAgEan9aQzuTsiJYGKbWiK1nSHkcYjDF1L1ok,5518
137
+ cryptography/hazmat/primitives/poly1305.py,sha256=P5EPQV-RB_FJPahpg01u0Ts4S_PnAmsroxIGXbGeRRo,355
138
+ cryptography/hazmat/primitives/serialization/__init__.py,sha256=jyNx_7NcOEbVRBY4nP9ks0IVXBafbcYnTK27vafPLW8,1653
139
+ cryptography/hazmat/primitives/serialization/__pycache__/__init__.cpython-311.pyc,,
140
+ cryptography/hazmat/primitives/serialization/__pycache__/base.cpython-311.pyc,,
141
+ cryptography/hazmat/primitives/serialization/__pycache__/pkcs12.cpython-311.pyc,,
142
+ cryptography/hazmat/primitives/serialization/__pycache__/pkcs7.cpython-311.pyc,,
143
+ cryptography/hazmat/primitives/serialization/__pycache__/ssh.cpython-311.pyc,,
144
+ cryptography/hazmat/primitives/serialization/base.py,sha256=ikq5MJIwp_oUnjiaBco_PmQwOTYuGi-XkYUYHKy8Vo0,615
145
+ cryptography/hazmat/primitives/serialization/pkcs12.py,sha256=7vVXbiP7qhhvKAHJT_M8-LBZdbpOwrpWRHWxNrNqzXE,4492
146
+ cryptography/hazmat/primitives/serialization/pkcs7.py,sha256=CNzcsuDMyEFMe3EUii4NfJlQzmakB2hLlfRFYObnHRs,11141
147
+ cryptography/hazmat/primitives/serialization/ssh.py,sha256=VKscMrVdYK5B9PQISjjdRMglRvqa_L3sDNm5vdjVHJY,51915
148
+ cryptography/hazmat/primitives/twofactor/__init__.py,sha256=tmMZGB-g4IU1r7lIFqASU019zr0uPp_wEBYcwdDCKCA,258
149
+ cryptography/hazmat/primitives/twofactor/__pycache__/__init__.cpython-311.pyc,,
150
+ cryptography/hazmat/primitives/twofactor/__pycache__/hotp.cpython-311.pyc,,
151
+ cryptography/hazmat/primitives/twofactor/__pycache__/totp.cpython-311.pyc,,
152
+ cryptography/hazmat/primitives/twofactor/hotp.py,sha256=l1YdRMIhfPIuHKkA66keBDHhNbnBAlh6-O44P-OHIK8,2976
153
+ cryptography/hazmat/primitives/twofactor/totp.py,sha256=v0y0xKwtYrP83ypOo5Ofd441RJLOkaFfjmp554jo5F0,1450
154
+ cryptography/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
+ cryptography/utils.py,sha256=Rp7ppg4XIBVVzNQ6XngGndwkICJoYp6FoFOOgTWLJ7g,3925
156
+ cryptography/x509/__init__.py,sha256=uGdiViR7KFnWGoJFVUStt-e_ufomWc87RQBGAZ7dT-4,7980
157
+ cryptography/x509/__pycache__/__init__.cpython-311.pyc,,
158
+ cryptography/x509/__pycache__/base.cpython-311.pyc,,
159
+ cryptography/x509/__pycache__/certificate_transparency.cpython-311.pyc,,
160
+ cryptography/x509/__pycache__/extensions.cpython-311.pyc,,
161
+ cryptography/x509/__pycache__/general_name.cpython-311.pyc,,
162
+ cryptography/x509/__pycache__/name.cpython-311.pyc,,
163
+ cryptography/x509/__pycache__/ocsp.cpython-311.pyc,,
164
+ cryptography/x509/__pycache__/oid.cpython-311.pyc,,
165
+ cryptography/x509/__pycache__/verification.cpython-311.pyc,,
166
+ cryptography/x509/base.py,sha256=3NbbUn9wPruhmoPO7Cl3trc3SrqV2OFIBBE0P2l05mg,37081
167
+ cryptography/x509/certificate_transparency.py,sha256=6HvzAD0dlSQVxy6tnDhGj0-pisp1MaJ9bxQNRr92inI,2261
168
+ cryptography/x509/extensions.py,sha256=R70KkJ_c5NQ6Kx7Rho0sGJ0Rh-bOuBHjVOFSQGRAFCs,67370
169
+ cryptography/x509/general_name.py,sha256=sP_rV11Qlpsk4x3XXGJY_Mv0Q_s9dtjeLckHsjpLQoQ,7836
170
+ cryptography/x509/name.py,sha256=MYCxCSTQTpzhjxFPZaANqJ9fGrhESH73vPkoay8HSWM,14830
171
+ cryptography/x509/ocsp.py,sha256=P6A02msz5pe-IkUFpvxezHvnEHGvPdXiD3S0wsuf4-I,20003
172
+ cryptography/x509/oid.py,sha256=X8EbhkRTLrGuv9vHZSGqPd9zpvRVsonU_joWAL5LLY8,885
173
+ cryptography/x509/verification.py,sha256=alfx3VaTSb2bMz7_7s788oL90vzgHwBjVINssdz0Gv0,796
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/WHEEL ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: maturin (1.7.0)
3
+ Root-Is-Purelib: false
4
+ Tag: cp39-abi3-manylinux_2_28_x86_64
5
+
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ This software is made available under the terms of *either* of the licenses
2
+ found in LICENSE.APACHE or LICENSE.BSD. Contributions to cryptography are made
3
+ under the terms of *both* these licenses.
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE.APACHE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ https://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ https://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
.venv/lib/python3.11/site-packages/cryptography-43.0.3.dist-info/license_files/LICENSE.BSD ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) Individual contributors.
2
+ All rights reserved.
3
+
4
+ Redistribution and use in source and binary forms, with or without
5
+ modification, are permitted provided that the following conditions are met:
6
+
7
+ 1. Redistributions of source code must retain the above copyright notice,
8
+ this list of conditions and the following disclaimer.
9
+
10
+ 2. Redistributions in binary form must reproduce the above copyright
11
+ notice, this list of conditions and the following disclaimer in the
12
+ documentation and/or other materials provided with the distribution.
13
+
14
+ 3. Neither the name of PyCA Cryptography nor the names of its contributors
15
+ may be used to endorse or promote products derived from this software
16
+ without specific prior written permission.
17
+
18
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
19
+ ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
22
+ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
+ LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
25
+ ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/INSTALLER ADDED
@@ -0,0 +1 @@
 
 
1
+ pip
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/METADATA ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.3
2
+ Name: jsonschema
3
+ Version: 4.23.0
4
+ Summary: An implementation of JSON Schema validation for Python
5
+ Project-URL: Homepage, https://github.com/python-jsonschema/jsonschema
6
+ Project-URL: Documentation, https://python-jsonschema.readthedocs.io/
7
+ Project-URL: Issues, https://github.com/python-jsonschema/jsonschema/issues/
8
+ Project-URL: Funding, https://github.com/sponsors/Julian
9
+ Project-URL: Tidelift, https://tidelift.com/subscription/pkg/pypi-jsonschema?utm_source=pypi-jsonschema&utm_medium=referral&utm_campaign=pypi-link
10
+ Project-URL: Changelog, https://github.com/python-jsonschema/jsonschema/blob/main/CHANGELOG.rst
11
+ Project-URL: Source, https://github.com/python-jsonschema/jsonschema
12
+ Author-email: Julian Berman <[email protected]>
13
+ License: MIT
14
+ License-File: COPYING
15
+ Keywords: data validation,json,json schema,jsonschema,validation
16
+ Classifier: Development Status :: 5 - Production/Stable
17
+ Classifier: Intended Audience :: Developers
18
+ Classifier: License :: OSI Approved :: MIT License
19
+ Classifier: Operating System :: OS Independent
20
+ Classifier: Programming Language :: Python
21
+ Classifier: Programming Language :: Python :: 3.8
22
+ Classifier: Programming Language :: Python :: 3.9
23
+ Classifier: Programming Language :: Python :: 3.10
24
+ Classifier: Programming Language :: Python :: 3.11
25
+ Classifier: Programming Language :: Python :: 3.12
26
+ Classifier: Programming Language :: Python :: 3.13
27
+ Classifier: Programming Language :: Python :: Implementation :: CPython
28
+ Classifier: Programming Language :: Python :: Implementation :: PyPy
29
+ Classifier: Topic :: File Formats :: JSON
30
+ Classifier: Topic :: File Formats :: JSON :: JSON Schema
31
+ Requires-Python: >=3.8
32
+ Requires-Dist: attrs>=22.2.0
33
+ Requires-Dist: importlib-resources>=1.4.0; python_version < '3.9'
34
+ Requires-Dist: jsonschema-specifications>=2023.03.6
35
+ Requires-Dist: pkgutil-resolve-name>=1.3.10; python_version < '3.9'
36
+ Requires-Dist: referencing>=0.28.4
37
+ Requires-Dist: rpds-py>=0.7.1
38
+ Provides-Extra: format
39
+ Requires-Dist: fqdn; extra == 'format'
40
+ Requires-Dist: idna; extra == 'format'
41
+ Requires-Dist: isoduration; extra == 'format'
42
+ Requires-Dist: jsonpointer>1.13; extra == 'format'
43
+ Requires-Dist: rfc3339-validator; extra == 'format'
44
+ Requires-Dist: rfc3987; extra == 'format'
45
+ Requires-Dist: uri-template; extra == 'format'
46
+ Requires-Dist: webcolors>=1.11; extra == 'format'
47
+ Provides-Extra: format-nongpl
48
+ Requires-Dist: fqdn; extra == 'format-nongpl'
49
+ Requires-Dist: idna; extra == 'format-nongpl'
50
+ Requires-Dist: isoduration; extra == 'format-nongpl'
51
+ Requires-Dist: jsonpointer>1.13; extra == 'format-nongpl'
52
+ Requires-Dist: rfc3339-validator; extra == 'format-nongpl'
53
+ Requires-Dist: rfc3986-validator>0.1.0; extra == 'format-nongpl'
54
+ Requires-Dist: uri-template; extra == 'format-nongpl'
55
+ Requires-Dist: webcolors>=24.6.0; extra == 'format-nongpl'
56
+ Description-Content-Type: text/x-rst
57
+
58
+ ==========
59
+ jsonschema
60
+ ==========
61
+
62
+ |PyPI| |Pythons| |CI| |ReadTheDocs| |Precommit| |Zenodo|
63
+
64
+ .. |PyPI| image:: https://img.shields.io/pypi/v/jsonschema.svg
65
+ :alt: PyPI version
66
+ :target: https://pypi.org/project/jsonschema/
67
+
68
+ .. |Pythons| image:: https://img.shields.io/pypi/pyversions/jsonschema.svg
69
+ :alt: Supported Python versions
70
+ :target: https://pypi.org/project/jsonschema/
71
+
72
+ .. |CI| image:: https://github.com/python-jsonschema/jsonschema/workflows/CI/badge.svg
73
+ :alt: Build status
74
+ :target: https://github.com/python-jsonschema/jsonschema/actions?query=workflow%3ACI
75
+
76
+ .. |ReadTheDocs| image:: https://readthedocs.org/projects/python-jsonschema/badge/?version=stable&style=flat
77
+ :alt: ReadTheDocs status
78
+ :target: https://python-jsonschema.readthedocs.io/en/stable/
79
+
80
+ .. |Precommit| image:: https://results.pre-commit.ci/badge/github/python-jsonschema/jsonschema/main.svg
81
+ :alt: pre-commit.ci status
82
+ :target: https://results.pre-commit.ci/latest/github/python-jsonschema/jsonschema/main
83
+
84
+ .. |Zenodo| image:: https://zenodo.org/badge/3072629.svg
85
+ :alt: Zenodo DOI
86
+ :target: https://zenodo.org/badge/latestdoi/3072629
87
+
88
+
89
+ ``jsonschema`` is an implementation of the `JSON Schema <https://json-schema.org>`_ specification for Python.
90
+
91
+ .. code:: python
92
+
93
+ >>> from jsonschema import validate
94
+
95
+ >>> # A sample schema, like what we'd get from json.load()
96
+ >>> schema = {
97
+ ... "type" : "object",
98
+ ... "properties" : {
99
+ ... "price" : {"type" : "number"},
100
+ ... "name" : {"type" : "string"},
101
+ ... },
102
+ ... }
103
+
104
+ >>> # If no exception is raised by validate(), the instance is valid.
105
+ >>> validate(instance={"name" : "Eggs", "price" : 34.99}, schema=schema)
106
+
107
+ >>> validate(
108
+ ... instance={"name" : "Eggs", "price" : "Invalid"}, schema=schema,
109
+ ... ) # doctest: +IGNORE_EXCEPTION_DETAIL
110
+ Traceback (most recent call last):
111
+ ...
112
+ ValidationError: 'Invalid' is not of type 'number'
113
+
114
+ It can also be used from the command line by installing `check-jsonschema <https://github.com/python-jsonschema/check-jsonschema>`_.
115
+
116
+ Features
117
+ --------
118
+
119
+ * Full support for `Draft 2020-12 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft202012Validator>`_, `Draft 2019-09 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft201909Validator>`_, `Draft 7 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft7Validator>`_, `Draft 6 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft6Validator>`_, `Draft 4 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft4Validator>`_ and `Draft 3 <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/validators/#jsonschema.validators.Draft3Validator>`_
120
+
121
+ * `Lazy validation <https://python-jsonschema.readthedocs.io/en/latest/api/jsonschema/protocols/#jsonschema.protocols.Validator.iter_errors>`_ that can iteratively report *all* validation errors.
122
+
123
+ * `Programmatic querying <https://python-jsonschema.readthedocs.io/en/latest/errors/>`_ of which properties or items failed validation.
124
+
125
+
126
+ Installation
127
+ ------------
128
+
129
+ ``jsonschema`` is available on `PyPI <https://pypi.org/project/jsonschema/>`_. You can install using `pip <https://pip.pypa.io/en/stable/>`_:
130
+
131
+ .. code:: bash
132
+
133
+ $ pip install jsonschema
134
+
135
+
136
+ Extras
137
+ ======
138
+
139
+ Two extras are available when installing the package, both currently related to ``format`` validation:
140
+
141
+ * ``format``
142
+ * ``format-nongpl``
143
+
144
+ They can be used when installing in order to include additional dependencies, e.g.:
145
+
146
+ .. code:: bash
147
+
148
+ $ pip install jsonschema'[format]'
149
+
150
+ Be aware that the mere presence of these dependencies – or even the specification of ``format`` checks in a schema – do *not* activate format checks (as per the specification).
151
+ Please read the `format validation documentation <https://python-jsonschema.readthedocs.io/en/latest/validate/#validating-formats>`_ for further details.
152
+
153
+ About
154
+ -----
155
+
156
+ I'm Julian Berman.
157
+
158
+ ``jsonschema`` is on `GitHub <https://github.com/python-jsonschema/jsonschema>`_.
159
+
160
+ Get in touch, via GitHub or otherwise, if you've got something to contribute, it'd be most welcome!
161
+
162
+ You can also generally find me on Libera (nick: ``Julian``) in various channels, including ``#python``.
163
+
164
+ If you feel overwhelmingly grateful, you can also `sponsor me <https://github.com/sponsors/Julian/>`_.
165
+
166
+ And for companies who appreciate ``jsonschema`` and its continued support and growth, ``jsonschema`` is also now supportable via `TideLift <https://tidelift.com/subscription/pkg/pypi-jsonschema?utm_source=pypi-jsonschema&utm_medium=referral&utm_campaign=readme>`_.
167
+
168
+
169
+ Release Information
170
+ -------------------
171
+
172
+ v4.23.0
173
+ =======
174
+
175
+ * Do not reorder dictionaries (schemas, instances) that are printed as part of validation errors.
176
+ * Declare support for Py3.13
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/RECORD ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ../../../bin/jsonschema,sha256=XHo009U0gdoLuMe818hXZdVGUS_4zdLM86D9zqPvDvA,231
2
+ jsonschema-4.23.0.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
3
+ jsonschema-4.23.0.dist-info/METADATA,sha256=Hd96gAfdO0v5RpFeT25qjyo7PvhASy56F4Jw3FUUTlo,7906
4
+ jsonschema-4.23.0.dist-info/RECORD,,
5
+ jsonschema-4.23.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
6
+ jsonschema-4.23.0.dist-info/entry_points.txt,sha256=vO7rX4Fs_xIVJy2pnAtKgTSxfpnozAVQ0DjCmpMxnWE,51
7
+ jsonschema-4.23.0.dist-info/licenses/COPYING,sha256=T5KgFaE8TRoEC-8BiqE0MLTxvHO0Gxa7hGw0Z2bedDk,1057
8
+ jsonschema/__init__.py,sha256=LkPwscySlJ9lTOp7ZB1M7jQ8mbG7-bYG41iBwbZ-o9s,3941
9
+ jsonschema/__main__.py,sha256=iLsZf2upUB3ilBKTlMnyK-HHt2Cnnfkwwxi_c6gLvSA,115
10
+ jsonschema/__pycache__/__init__.cpython-311.pyc,,
11
+ jsonschema/__pycache__/__main__.cpython-311.pyc,,
12
+ jsonschema/__pycache__/_format.cpython-311.pyc,,
13
+ jsonschema/__pycache__/_keywords.cpython-311.pyc,,
14
+ jsonschema/__pycache__/_legacy_keywords.cpython-311.pyc,,
15
+ jsonschema/__pycache__/_types.cpython-311.pyc,,
16
+ jsonschema/__pycache__/_typing.cpython-311.pyc,,
17
+ jsonschema/__pycache__/_utils.cpython-311.pyc,,
18
+ jsonschema/__pycache__/cli.cpython-311.pyc,,
19
+ jsonschema/__pycache__/exceptions.cpython-311.pyc,,
20
+ jsonschema/__pycache__/protocols.cpython-311.pyc,,
21
+ jsonschema/__pycache__/validators.cpython-311.pyc,,
22
+ jsonschema/_format.py,sha256=F_MA52IkrhOIxDqD8x-01bH37mG5nh0kyNrWUSLtWb8,14591
23
+ jsonschema/_keywords.py,sha256=r8_DrqAfn6QLwQnmXEggveiSU-UaIL2p2nuPINelfFc,14949
24
+ jsonschema/_legacy_keywords.py,sha256=2tWuwRPWbYS7EAl8wBIC_rabGuv1J4dfYLqNEPpShhA,15191
25
+ jsonschema/_types.py,sha256=HQ5QD_oL85zF1FSW2v-5rvfYF0967HJdxSR88kzw2mY,5367
26
+ jsonschema/_typing.py,sha256=NZhPhkBOn9INYZk8G69rDeuRamztgXCMLh10z9cfT6g,610
27
+ jsonschema/_utils.py,sha256=ODga3vrJ6K2wMGxerpgn4ipc9q7ZSqBsvwKU4embLEE,10642
28
+ jsonschema/benchmarks/__init__.py,sha256=A0sQrxDBVHSyQ-8ru3L11hMXf3q9gVuB9x_YgHb4R9M,70
29
+ jsonschema/benchmarks/__pycache__/__init__.cpython-311.pyc,,
30
+ jsonschema/benchmarks/__pycache__/const_vs_enum.cpython-311.pyc,,
31
+ jsonschema/benchmarks/__pycache__/contains.cpython-311.pyc,,
32
+ jsonschema/benchmarks/__pycache__/issue232.cpython-311.pyc,,
33
+ jsonschema/benchmarks/__pycache__/json_schema_test_suite.cpython-311.pyc,,
34
+ jsonschema/benchmarks/__pycache__/nested_schemas.cpython-311.pyc,,
35
+ jsonschema/benchmarks/__pycache__/subcomponents.cpython-311.pyc,,
36
+ jsonschema/benchmarks/__pycache__/unused_registry.cpython-311.pyc,,
37
+ jsonschema/benchmarks/__pycache__/useless_applicator_schemas.cpython-311.pyc,,
38
+ jsonschema/benchmarks/__pycache__/useless_keywords.cpython-311.pyc,,
39
+ jsonschema/benchmarks/__pycache__/validator_creation.cpython-311.pyc,,
40
+ jsonschema/benchmarks/const_vs_enum.py,sha256=DVFi3WDqBalZFOibnjpX1uTSr3Rxa2cPgFcowd7Ukrs,830
41
+ jsonschema/benchmarks/contains.py,sha256=gexQoUrCOwECofbt19BeosQZ7WFL6PDdkX49DWwBlOg,786
42
+ jsonschema/benchmarks/issue232.py,sha256=3LLYLIlBGQnVuyyo2iAv-xky5P6PRFHANx4-zIIQOoE,521
43
+ jsonschema/benchmarks/issue232/issue.json,sha256=eaPOZjMRu5u8RpKrsA9uk7ucPZS5tkKG4D_hkOTQ3Hk,117105
44
+ jsonschema/benchmarks/json_schema_test_suite.py,sha256=PvfabpUYcF4_7csYDTcTauED8rnFEGYbdY5RqTXD08s,320
45
+ jsonschema/benchmarks/nested_schemas.py,sha256=mo07dx-CIgmSOI62CNs4g5xu1FzHklLBpkQoDxWYcKs,1892
46
+ jsonschema/benchmarks/subcomponents.py,sha256=fEyiMzsWeK2pd7DEGCuuY-vzGunwhHczRBWEnBRLKIo,1113
47
+ jsonschema/benchmarks/unused_registry.py,sha256=hwRwONc9cefPtYzkoX_TYRO3GyUojriv0-YQaK3vnj0,940
48
+ jsonschema/benchmarks/useless_applicator_schemas.py,sha256=EVm5-EtOEFoLP_Vt2j4SrCwlx05NhPqNuZQ6LIMP1Dc,3342
49
+ jsonschema/benchmarks/useless_keywords.py,sha256=bj_zKr1oVctFlqyZaObCsYTgFjiiNgPzC0hr1Y868mE,867
50
+ jsonschema/benchmarks/validator_creation.py,sha256=UkUQlLAnussnr_KdCIdad6xx2pXxQLmYtsXoiirKeWQ,285
51
+ jsonschema/cli.py,sha256=SGy9JPg02mgXhNxugU8iXhYNivfSjBhKTNAgV90ty-M,8551
52
+ jsonschema/exceptions.py,sha256=RxE2T5xxgg_B6ttR8a3lCbZyh29RUtFe4oZKMoHPBAE,15035
53
+ jsonschema/protocols.py,sha256=7mpZxO1gfRNMCGXwldwsSN3nEugVfIVyKZ_HZgN1vSw,7174
54
+ jsonschema/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
+ jsonschema/tests/__pycache__/__init__.cpython-311.pyc,,
56
+ jsonschema/tests/__pycache__/_suite.cpython-311.pyc,,
57
+ jsonschema/tests/__pycache__/fuzz_validate.cpython-311.pyc,,
58
+ jsonschema/tests/__pycache__/test_cli.cpython-311.pyc,,
59
+ jsonschema/tests/__pycache__/test_deprecations.cpython-311.pyc,,
60
+ jsonschema/tests/__pycache__/test_exceptions.cpython-311.pyc,,
61
+ jsonschema/tests/__pycache__/test_format.cpython-311.pyc,,
62
+ jsonschema/tests/__pycache__/test_jsonschema_test_suite.cpython-311.pyc,,
63
+ jsonschema/tests/__pycache__/test_types.cpython-311.pyc,,
64
+ jsonschema/tests/__pycache__/test_utils.cpython-311.pyc,,
65
+ jsonschema/tests/__pycache__/test_validators.cpython-311.pyc,,
66
+ jsonschema/tests/_suite.py,sha256=QAfBj34zMbJQ5_JJ2ogpiTlw9hQ6Is43dvo_bpS0EdM,8156
67
+ jsonschema/tests/fuzz_validate.py,sha256=fUA7yTJIihaCwJplkUehZeyB84HcXEcqtY5oPJXIO7I,1114
68
+ jsonschema/tests/test_cli.py,sha256=uFMu2YbIfbSDCnykhLL4-VR3-jg1tvQLJn2Bliwp_Bw,28587
69
+ jsonschema/tests/test_deprecations.py,sha256=9VxOCfWzMG1Tg4OD8riU_Znd6HDOQZkepzVgxsdUdU8,15760
70
+ jsonschema/tests/test_exceptions.py,sha256=JgC-E1ZFZK2puVBp35WFRnG8CNOiSWLYtyLjh9IvFKI,22591
71
+ jsonschema/tests/test_format.py,sha256=eVm5SMaWF2lOPO28bPAwNvkiQvHCQKy-MnuAgEchfEc,3188
72
+ jsonschema/tests/test_jsonschema_test_suite.py,sha256=a2saPs2Cwwg0sdRdu-uJ8goSXLbqrS-pC48QJy0K4DE,8674
73
+ jsonschema/tests/test_types.py,sha256=cF51KTDmdsx06MrIc4fXKt0X9fIsVgw5uhT8CamVa8U,6977
74
+ jsonschema/tests/test_utils.py,sha256=sao74o1PyYMxBfqweokQN48CFSS6yhJk5FkCfMJ5PsI,4163
75
+ jsonschema/tests/test_validators.py,sha256=eiaigsZMzHYYsniQ1UPygaS56a1d-_7-9NC4wVXAhzs,87975
76
+ jsonschema/validators.py,sha256=H31FwHdyB7LP5eunxdBrZ9E57hpvozfnRlZaOYy45jU,47045
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/WHEEL ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.25.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/entry_points.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [console_scripts]
2
+ jsonschema = jsonschema.cli:main
.venv/lib/python3.11/site-packages/jsonschema-4.23.0.dist-info/licenses/COPYING ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Copyright (c) 2013 Julian Berman
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
.venv/lib/python3.11/site-packages/mistral_common/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (207 Bytes). View file
 
.venv/lib/python3.11/site-packages/mistral_common/__pycache__/base.cpython-311.pyc ADDED
Binary file (710 Bytes). View file
 
.venv/lib/python3.11/site-packages/mistral_common/__pycache__/exceptions.cpython-311.pyc ADDED
Binary file (5.66 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/__pycache__/multimodal.cpython-311.pyc ADDED
Binary file (4.03 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/data/mistral_instruct_tokenizer_240323.model.v3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9addc8bdce5988448ae81b729336f43a81262160ae8da760674badab9d4c7d33
3
+ size 587591
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__init__.py ADDED
File without changes
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (206 Bytes). View file
 
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/request.cpython-311.pyc ADDED
Binary file (1.14 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/__pycache__/response.cpython-311.pyc ADDED
Binary file (2.07 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/request.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Optional, Union
2
+
3
+ from pydantic import Field
4
+
5
+ from mistral_common.base import MistralBase
6
+
7
+
8
+ class EmbeddingRequest(MistralBase):
9
+ input: Union[str, List[str]] = Field(description="Text to embed.")
10
+ model: str = Field(description="ID of the model to use.")
11
+ encoding_format: Optional[str] = Field(default="float", description="The format to return the embeddings in.")
.venv/lib/python3.11/site-packages/mistral_common/protocol/embedding/response.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from pydantic import Field
4
+
5
+ from mistral_common.base import MistralBase
6
+ from mistral_common.protocol.base import UsageInfo
7
+ from mistral_common.protocol.utils import random_uuid
8
+
9
+
10
+ class EmbeddingObject(MistralBase):
11
+ object: str = Field(default="embedding", description="The type of the object returned.")
12
+ embedding: List[float] = Field(description="The type of the object returned.")
13
+ index: int = Field(description="The index of the embedding in the input text.")
14
+
15
+
16
+ class EmbeddingResponse(MistralBase):
17
+ id: str = Field(default_factory=lambda: f"embd-{random_uuid()}")
18
+ object: str = Field(default="list", description="The type of the object returned.")
19
+ data: List[EmbeddingObject] = Field(description="List of embeddings.")
20
+ model: str = Field(description="The model used to generate the embeddings.")
21
+ usage: UsageInfo
.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (205 Bytes). View file
 
.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/messages.cpython-311.pyc ADDED
Binary file (7.04 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/protocol/instruct/__pycache__/response.cpython-311.pyc ADDED
Binary file (5.06 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/__init__.py ADDED
File without changes
.venv/lib/python3.11/site-packages/mistral_common/tokens/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (194 Bytes). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__init__.py ADDED
File without changes
.venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (203 Bytes). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/__pycache__/request.cpython-311.pyc ADDED
Binary file (1.63 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/instruct/request.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Generic, List, Optional
2
+
3
+ from mistral_common.base import MistralBase
4
+ from mistral_common.protocol.instruct.messages import ChatMessageType
5
+ from mistral_common.protocol.instruct.tool_calls import ToolType
6
+
7
+
8
+ class FIMRequest(MistralBase):
9
+ """
10
+ A valid Fill in the Middle completion request to be tokenized
11
+ """
12
+
13
+ prompt: str
14
+ suffix: Optional[str] = None
15
+
16
+
17
+ class InstructRequest(MistralBase, Generic[ChatMessageType, ToolType]):
18
+ """
19
+ A valid request to be tokenized
20
+ """
21
+
22
+ messages: List[ChatMessageType]
23
+ system_prompt: Optional[str] = None
24
+ available_tools: Optional[List[ToolType]] = None
25
+ truncate_at_max_tokens: Optional[int] = None
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__init__.py ADDED
File without changes
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (205 Bytes). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/base.cpython-311.pyc ADDED
Binary file (10.8 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/mistral.cpython-311.pyc ADDED
Binary file (14.5 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/multimodal.cpython-311.pyc ADDED
Binary file (9.45 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/sentencepiece.cpython-311.pyc ADDED
Binary file (40.2 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/tekken.cpython-311.pyc ADDED
Binary file (18.6 kB). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/__pycache__/utils.cpython-311.pyc ADDED
Binary file (717 Bytes). View file
 
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/base.py ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from abc import ABC, abstractmethod
2
+ from dataclasses import dataclass
3
+ from enum import Enum
4
+ from typing import Generic, List, Optional, Protocol, Tuple, TypeVar, Union
5
+
6
+ import numpy as np
7
+ from pydantic import ConfigDict
8
+
9
+ from mistral_common.base import MistralBase
10
+ from mistral_common.protocol.instruct.messages import (
11
+ AssistantMessageType,
12
+ ContentChunk,
13
+ ImageChunk,
14
+ ImageURLChunk,
15
+ UserMessage,
16
+ )
17
+ from mistral_common.protocol.instruct.tool_calls import Tool
18
+ from mistral_common.tokens.instruct.request import FIMRequest, InstructRequest
19
+
20
+
21
+ class SpecialTokens(str, Enum):
22
+ bos = "<s>"
23
+ eos = "</s>"
24
+ begin_inst = "[INST]"
25
+ end_inst = "[/INST]"
26
+ begin_tools = "[AVAILABLE_TOOLS]"
27
+ end_tools = "[/AVAILABLE_TOOLS]"
28
+ begin_tool_results = "[TOOL_RESULTS]"
29
+ end_tool_results = "[/TOOL_RESULTS]"
30
+ tool_calls = "[TOOL_CALLS]"
31
+ img = "[IMG]"
32
+ img_break = "[IMG_BREAK]"
33
+ img_end = "[IMG_END]"
34
+ prefix = "[PREFIX]"
35
+ middle = "[MIDDLE]"
36
+ suffix = "[SUFFIX]"
37
+ begin_system = "[SYSTEM_PROMPT]"
38
+ end_system = "[/SYSTEM_PROMPT]"
39
+ begin_tool_content = "[TOOL_CONTENT]"
40
+
41
+
42
+ class TokenizerVersion(str, Enum):
43
+ v1 = "v1" # vocab_size = 32000
44
+ v2 = "v2" # vocab_size = 32768 with special control tokens [INST], [\INST]
45
+ v3 = "v3" # vocab_size = 32768 (spm) OR 128000 (tekken) with improved function calling
46
+ v7 = "v7" # vocab_size = 32768 (spm) or 128000 (tekken) with improved system prompt and function calling
47
+
48
+
49
+ class Tokenized(MistralBase):
50
+ """
51
+ A tokenized InstructRequest
52
+ """
53
+
54
+ model_config = ConfigDict(arbitrary_types_allowed=True)
55
+ tokens: List[int]
56
+ text: Optional[str] = None
57
+ prefix_ids: Optional[List[int]] = None
58
+ images: List[np.ndarray] = []
59
+
60
+
61
+ class Tokenizer(ABC):
62
+ @property
63
+ @abstractmethod
64
+ def n_words(self) -> int:
65
+ """Vocabulary size"""
66
+
67
+ @abstractmethod
68
+ def vocab(self) -> List[str]:
69
+ """All tokens in the vocabulary as strings"""
70
+
71
+ @abstractmethod
72
+ def id_to_piece(self, token_id: int) -> str:
73
+ """Convert a token id to the token str"""
74
+
75
+ @property
76
+ @abstractmethod
77
+ def bos_id(self) -> int:
78
+ """id of the Beginning of String token"""
79
+
80
+ @property
81
+ @abstractmethod
82
+ def eos_id(self) -> int:
83
+ """id of the End of String token"""
84
+
85
+ @property
86
+ @abstractmethod
87
+ def pad_id(self) -> int:
88
+ """id of the Pad token"""
89
+
90
+ @property
91
+ @abstractmethod
92
+ def unk_id(self) -> int:
93
+ """id of the Unk token"""
94
+
95
+ @abstractmethod
96
+ def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
97
+ """String to token ids"""
98
+
99
+ @abstractmethod
100
+ def decode(self, t: List[int]) -> str:
101
+ """Token ids to string"""
102
+
103
+ @abstractmethod
104
+ def get_control_token(self, s: str) -> int:
105
+ """Get the id of a control token"""
106
+
107
+ @property
108
+ @abstractmethod
109
+ def version(self) -> TokenizerVersion:
110
+ """Get the version of the tokenizer"""
111
+
112
+ @abstractmethod
113
+ def to_string(self, tokens: List[int]) -> str:
114
+ """Convert token ids to string"""
115
+
116
+
117
+ InstructRequestType = TypeVar("InstructRequestType", bound=InstructRequest)
118
+ FIMRequestType = TypeVar("FIMRequestType", bound=FIMRequest)
119
+ TokenizedType = TypeVar("TokenizedType", bound=Tokenized)
120
+
121
+
122
+ @dataclass
123
+ class ImageEncoding:
124
+ tokens: List[int]
125
+ image: np.ndarray
126
+
127
+
128
+ @dataclass
129
+ class SpecialImageIDs:
130
+ img: int
131
+ img_break: int
132
+ img_end: int
133
+
134
+ @staticmethod
135
+ def from_tokenizer(tokenizer: "Tokenizer") -> "SpecialImageIDs":
136
+ return SpecialImageIDs(
137
+ img=tokenizer.get_control_token(SpecialTokens.img.value),
138
+ img_break=tokenizer.get_control_token(SpecialTokens.img_break.value),
139
+ img_end=tokenizer.get_control_token(SpecialTokens.img_end.value),
140
+ )
141
+
142
+
143
+ class MultiModalEncoder(Protocol):
144
+ def __call__(self, content: Union[ImageChunk, ImageURLChunk]) -> ImageEncoding:
145
+ """
146
+ Encode the given content.
147
+
148
+ Args:
149
+ content (ChunkContent): The content to be encoded.
150
+
151
+ Returns:
152
+ ImageEncoding: The encoded image content.
153
+ """
154
+ ...
155
+
156
+ @property
157
+ def image_token(self) -> int:
158
+ ...
159
+
160
+
161
+ class InstructTokenizer(Generic[InstructRequestType, FIMRequestType, TokenizedType, AssistantMessageType]):
162
+ tokenizer: Tokenizer
163
+ mm_encoder: Optional[MultiModalEncoder]
164
+
165
+ def __init__(self, tokenizer: Tokenizer, mm_encoder: Optional[MultiModalEncoder]) -> None:
166
+ """Init from tokenizer"""
167
+
168
+ @abstractmethod
169
+ def encode_instruct(self, request: InstructRequestType) -> TokenizedType:
170
+ """Instruct request to Tokenized object"""
171
+
172
+ @abstractmethod
173
+ def decode(self, tokens: List[int]) -> str:
174
+ """Convert token ids to string"""
175
+
176
+ @abstractmethod
177
+ def encode_fim(self, request: FIMRequestType) -> TokenizedType:
178
+ """FIM request to Tokenized object"""
179
+
180
+ @abstractmethod
181
+ def encode_user_message(
182
+ self,
183
+ message: UserMessage,
184
+ available_tools: Optional[List[Tool]],
185
+ is_last: bool,
186
+ is_first: bool,
187
+ system_prompt: Optional[str] = None,
188
+ force_img_first: bool = False,
189
+ ) -> Tuple[List[int], List[np.ndarray]]:
190
+ ...
191
+
192
+ @abstractmethod
193
+ def encode_user_content(
194
+ self,
195
+ content: Union[str, List[ContentChunk]],
196
+ is_last: bool,
197
+ system_prompt: Optional[str] = None,
198
+ force_img_first: bool = False,
199
+ ) -> Tuple[List[int], List[np.ndarray]]:
200
+ ...
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/mistral.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import warnings
2
+ from pathlib import Path
3
+ from typing import Callable, Dict, Generic, List, Optional, Union
4
+
5
+ from mistral_common.exceptions import (
6
+ TokenizerException,
7
+ )
8
+ from mistral_common.protocol.instruct.messages import (
9
+ UATS,
10
+ AssistantMessageType,
11
+ SystemMessageType,
12
+ ToolMessageType,
13
+ UserMessageType,
14
+ )
15
+ from mistral_common.protocol.instruct.normalize import InstructRequestNormalizer, normalizer_for_tokenizer_version
16
+ from mistral_common.protocol.instruct.request import ChatCompletionRequest
17
+ from mistral_common.protocol.instruct.validator import (
18
+ MistralRequestValidator,
19
+ MistralRequestValidatorV3,
20
+ ValidationMode,
21
+ )
22
+ from mistral_common.tokens.instruct.request import FIMRequest
23
+ from mistral_common.tokens.tokenizers.base import (
24
+ InstructRequest,
25
+ InstructRequestType,
26
+ InstructTokenizer,
27
+ SpecialTokens,
28
+ TokenizedType,
29
+ TokenizerVersion,
30
+ )
31
+ from mistral_common.tokens.tokenizers.multimodal import (
32
+ ImageEncoder,
33
+ MultimodalConfig,
34
+ MultiModalEncoder,
35
+ SpecialImageIDs,
36
+ )
37
+ from mistral_common.tokens.tokenizers.sentencepiece import (
38
+ InstructTokenizerV1,
39
+ InstructTokenizerV2,
40
+ InstructTokenizerV3,
41
+ InstructTokenizerV7,
42
+ SentencePieceTokenizer,
43
+ get_mm_config,
44
+ is_sentencepiece,
45
+ )
46
+ from mistral_common.tokens.tokenizers.tekken import Tekkenizer, is_tekken
47
+
48
+
49
+ def load_mm_encoder(
50
+ mm_config: MultimodalConfig, tokenizer: Union[Tekkenizer, SentencePieceTokenizer]
51
+ ) -> MultiModalEncoder:
52
+ special_ids = SpecialImageIDs(
53
+ img=tokenizer.get_control_token(SpecialTokens.img.value),
54
+ img_break=tokenizer.get_control_token(SpecialTokens.img_break.value),
55
+ img_end=tokenizer.get_control_token(SpecialTokens.img_end.value),
56
+ )
57
+ return ImageEncoder(mm_config, special_ids)
58
+
59
+
60
+ class MistralTokenizer(
61
+ Generic[UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType, TokenizedType]
62
+ ):
63
+ def __init__(
64
+ self,
65
+ instruct_tokenizer: InstructTokenizer[InstructRequest, FIMRequest, TokenizedType, AssistantMessageType],
66
+ validator: MistralRequestValidator[UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType],
67
+ request_normalizer: InstructRequestNormalizer[
68
+ UserMessageType, AssistantMessageType, ToolMessageType, SystemMessageType, InstructRequestType
69
+ ],
70
+ ):
71
+ self._chat_completion_request_validator = validator
72
+ self._instruct_request_normalizer = request_normalizer
73
+ self.instruct_tokenizer = instruct_tokenizer
74
+
75
+ @classmethod
76
+ def _data_path(cls) -> Path:
77
+ return Path(__file__).parents[2] / "data"
78
+
79
+ @classmethod
80
+ def v1(cls) -> "MistralTokenizer":
81
+ """open 7B x 8x7B + embed"""
82
+ return cls.from_file(str(cls._data_path() / "tokenizer.model.v1"), mode=ValidationMode.test)
83
+
84
+ @classmethod
85
+ def v2(cls) -> "MistralTokenizer":
86
+ """mistral-small // mistral-large"""
87
+ return cls.from_file(
88
+ str(cls._data_path() / "mistral_instruct_tokenizer_240216.model.v2"), mode=ValidationMode.test
89
+ )
90
+
91
+ @classmethod
92
+ def v3(cls, is_tekken: bool = False, is_mm: bool = False) -> "MistralTokenizer":
93
+ """open-mixtral-8x22B"""
94
+ if is_tekken and is_mm:
95
+ tokenizer_name = "tekken_240911.json"
96
+ elif is_tekken and not is_mm:
97
+ tokenizer_name = "tekken_240718.json"
98
+ elif not is_tekken and is_mm:
99
+ raise ValueError("Multimodal tokenizer is currently only supported for tekken")
100
+ else:
101
+ tokenizer_name = "mistral_instruct_tokenizer_240323.model.v3"
102
+
103
+ return cls.from_file(str(cls._data_path() / tokenizer_name), mode=ValidationMode.test)
104
+
105
+ @classmethod
106
+ def v7(cls, is_mm: bool = False) -> "MistralTokenizer":
107
+ """mistral-large 2.1"""
108
+ if is_mm:
109
+ return cls.from_file(
110
+ str(cls._data_path() / "mistral_instruct_tokenizer_241114.model.v7m1"), mode=ValidationMode.test
111
+ )
112
+ else:
113
+ return cls.from_file(
114
+ str(cls._data_path() / "mistral_instruct_tokenizer_241114.model.v7"), mode=ValidationMode.test
115
+ )
116
+
117
+ @classmethod
118
+ def from_model(cls, model: str, strict: bool = False) -> "MistralTokenizer":
119
+ model_name_to_tokenizer_cls: Dict[str, Callable[[], MistralTokenizer]] = {
120
+ "ministral-8b-2410": lambda: MistralTokenizer.v3(is_tekken=True),
121
+ "mistral-tiny-2312": MistralTokenizer.v2,
122
+ "open-mistral-nemo-2407": lambda: MistralTokenizer.v3(is_tekken=True),
123
+ "mistral-tiny-2407": MistralTokenizer.v3,
124
+ "mistral-small-2312": MistralTokenizer.v2,
125
+ "open-mixtral-8x22b-2404": MistralTokenizer.v3,
126
+ "mistral-small-2402": MistralTokenizer.v2,
127
+ "mistral-small-2409": lambda: MistralTokenizer.v3(is_tekken=True),
128
+ "mistral-medium-2312": MistralTokenizer.v1,
129
+ "mistral-large-2402": MistralTokenizer.v2,
130
+ "mistral-large-2407": MistralTokenizer.v3,
131
+ "mistral-large-2411": MistralTokenizer.v7,
132
+ "pixtral-large-2411": lambda: MistralTokenizer.v7(is_mm=True),
133
+ "codestral-2405": MistralTokenizer.v3,
134
+ "codestral-mamba-2407": MistralTokenizer.v3,
135
+ "pixtral-12b-2409": lambda: MistralTokenizer.v3(is_tekken=True, is_mm=True),
136
+ # The following are deprecated - only left for backward comp. Delete in >= 1.6.0
137
+ "open-mistral-7b": MistralTokenizer.v1,
138
+ "open-mixtral-8x7b": MistralTokenizer.v1,
139
+ "mistral-embed": MistralTokenizer.v1,
140
+ "mistral-small-v1": MistralTokenizer.v2,
141
+ "mistral-large-v1": MistralTokenizer.v2,
142
+ "mistral-small": MistralTokenizer.v3,
143
+ "mistral-large": MistralTokenizer.v3,
144
+ "open-mixtral-8x22b": MistralTokenizer.v3,
145
+ "codestral-22b": MistralTokenizer.v3,
146
+ "mistral-nemo": lambda: MistralTokenizer.v3(is_tekken=True),
147
+ "pixtral": lambda: MistralTokenizer.v3(is_tekken=True, is_mm=True),
148
+ "pixtral-large": lambda: MistralTokenizer.v7(is_mm=True),
149
+ }
150
+
151
+ if not strict:
152
+ warnings.warn(
153
+ "Calling `MistralTokenizer.from_model(..., strict=False)` is deprecated as it can lead to incorrect "
154
+ "tokenizers. It is strongly recommended to use MistralTokenizer.from_model(..., strict=True)` "
155
+ "which will become the default in `mistral_common=1.6.0`."
156
+ "If you are using `mistral_common` for open-sourced model weights, we recommend using "
157
+ "`MistralTokenizer.from_file('<path/to/tokenizer/file>')` instead.",
158
+ FutureWarning,
159
+ )
160
+
161
+ # TODO(Delete this code in mistral_common >= 1.6.0
162
+ # Prefix search the model name mapping
163
+ for model_name, tokenizer_cls in model_name_to_tokenizer_cls.items():
164
+ if model_name in model.lower():
165
+ return tokenizer_cls()
166
+
167
+ if model not in model_name_to_tokenizer_cls:
168
+ raise TokenizerException(f"Unrecognized model: {model}")
169
+
170
+ return model_name_to_tokenizer_cls[model]()
171
+
172
+ @classmethod
173
+ def from_file(
174
+ cls,
175
+ tokenizer_filename: str,
176
+ mode: ValidationMode = ValidationMode.test,
177
+ ) -> "MistralTokenizer":
178
+ """
179
+ Depending on which model we are loading, tokenization and validation might be different. 💩
180
+ """
181
+ tokenizer: Union[SentencePieceTokenizer, Tekkenizer]
182
+
183
+ if is_tekken(tokenizer_filename):
184
+ tokenizer = Tekkenizer.from_file(tokenizer_filename)
185
+ mm_config = tokenizer.multimodal
186
+ elif is_sentencepiece(tokenizer_filename):
187
+ tokenizer = SentencePieceTokenizer(tokenizer_filename)
188
+ mm_config = get_mm_config(tokenizer_filename)
189
+ else:
190
+ raise TokenizerException(f"Unrecognized tokenizer file: {tokenizer_filename}")
191
+
192
+ mm_encoder = load_mm_encoder(mm_config, tokenizer) if mm_config is not None else None
193
+
194
+ request_normalizer = normalizer_for_tokenizer_version(tokenizer.version)
195
+
196
+ if tokenizer.version == TokenizerVersion.v1:
197
+ assert mm_encoder is None, "Tokenizer version needs to be >= v3"
198
+ return MistralTokenizer(
199
+ InstructTokenizerV1(tokenizer),
200
+ validator=MistralRequestValidator(mode=mode),
201
+ request_normalizer=request_normalizer,
202
+ )
203
+ elif tokenizer.version == TokenizerVersion.v2:
204
+ assert mm_encoder is None, "Tokenizer version needs to be >= v3"
205
+ return MistralTokenizer(
206
+ InstructTokenizerV2(tokenizer),
207
+ validator=MistralRequestValidator(mode=mode),
208
+ request_normalizer=request_normalizer,
209
+ )
210
+ elif tokenizer.version == TokenizerVersion.v3:
211
+ return MistralTokenizer(
212
+ InstructTokenizerV3(tokenizer, mm_encoder=mm_encoder),
213
+ validator=MistralRequestValidatorV3(mode=mode),
214
+ request_normalizer=request_normalizer,
215
+ )
216
+ elif tokenizer.version == TokenizerVersion.v7:
217
+ return MistralTokenizer(
218
+ InstructTokenizerV7(tokenizer, mm_encoder=mm_encoder),
219
+ validator=MistralRequestValidatorV3(mode=mode),
220
+ request_normalizer=request_normalizer,
221
+ )
222
+ else:
223
+ raise TokenizerException(f"Unrecognized tokenizer filename: {tokenizer_filename}")
224
+
225
+ raise TokenizerException(f"Unrecognized tokenizer version: {tokenizer.version}")
226
+
227
+ def encode_chat_completion(
228
+ self, request: ChatCompletionRequest[UATS], max_model_input_len: Optional[int] = None
229
+ ) -> TokenizedType:
230
+ validated_request = self._chat_completion_request_validator.validate_request(request)
231
+
232
+ if max_model_input_len is None and request.truncate_for_context_length:
233
+ # the max_model_input_len arg should not be optionnal ;
234
+ # but this function is used in many small scripts that have no use
235
+ # for truncation, and don't provide the max model len
236
+ raise TokenizerException(
237
+ "encoding a chat completion request with truncation, but no max model len was provided",
238
+ )
239
+
240
+ instruct_request = self._instruct_request_normalizer.from_chat_completion_request(validated_request)
241
+
242
+ if request.truncate_for_context_length:
243
+ instruct_request.truncate_at_max_tokens = max_model_input_len
244
+
245
+ return self.instruct_tokenizer.encode_instruct(instruct_request)
246
+
247
+ def encode_fim(self, request: FIMRequest) -> TokenizedType:
248
+ return self.instruct_tokenizer.encode_fim(request)
249
+
250
+ def decode(self, tokens: List[int]) -> str:
251
+ return self.instruct_tokenizer.decode(tokens)
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/multimodal.py ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import logging
3
+ from dataclasses import dataclass
4
+ from enum import Enum
5
+ from io import BytesIO
6
+ from typing import Tuple, Union
7
+
8
+ import numpy as np
9
+ from PIL import Image
10
+
11
+ from mistral_common.multimodal import SerializableImage, download_image
12
+ from mistral_common.protocol.instruct.messages import ImageChunk, ImageURLChunk
13
+ from mistral_common.tokens.tokenizers.base import (
14
+ ImageEncoding,
15
+ MultiModalEncoder,
16
+ SpecialImageIDs,
17
+ )
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ _cv2_installed: bool
23
+ try:
24
+ import cv2
25
+
26
+ _cv2_installed = True
27
+ except ImportError:
28
+ _cv2_installed = False
29
+ except Exception as e:
30
+ # cv2 has lots of import problems: https://github.com/opencv/opencv-python/issues/884
31
+ # for better UX, let's simply skip all errors that might arise from import for now
32
+ logger.warning(
33
+ f"Warning: Your installation of OpenCV appears to be broken: {e}."
34
+ "Please follow the instructions at https://github.com/opencv/opencv-python/issues/884 "
35
+ "to correct your environment. The import of cv2 has been skipped."
36
+ )
37
+
38
+
39
+ def is_cv2_installed() -> bool:
40
+ return _cv2_installed
41
+
42
+
43
+ def image_from_chunk(chunk: Union[ImageURLChunk, ImageChunk]) -> SerializableImage:
44
+ """Get a serializable image from a chunk."""
45
+ if isinstance(chunk, ImageChunk):
46
+ return chunk.image
47
+ if chunk.get_url().startswith("data:image"):
48
+ data = chunk.get_url().split(",")[1]
49
+ image_data = base64.b64decode(data)
50
+ return Image.open(BytesIO(image_data))
51
+ if chunk.get_url().startswith("http"):
52
+ return download_image(chunk.get_url())
53
+
54
+ raise RuntimeError(f"Unsupported image url scheme {chunk.get_url()}")
55
+
56
+
57
+ DATASET_MEAN = (0.48145466, 0.4578275, 0.40821073) # RGB
58
+ DATASET_STD = (0.26862954, 0.26130258, 0.27577711) # RGB
59
+
60
+
61
+ # only relevant for spm
62
+ class MultiModalVersion(str, Enum):
63
+ m1 = "m1"
64
+
65
+ @property
66
+ def config(self) -> "MultimodalConfig":
67
+ if self.name == "m1":
68
+ return MultimodalConfig(16, 1024)
69
+
70
+ raise NotImplementedError(f"{self.name}")
71
+
72
+
73
+ @dataclass
74
+ class MultimodalConfig:
75
+ image_patch_size: int
76
+ max_image_size: int
77
+
78
+
79
+ def _convert_to_rgb(image: Image.Image) -> Image.Image:
80
+ """
81
+ Convert a PIL image to RGB.
82
+ We ensure transparent background becomes white.
83
+ """
84
+ if image.mode == "RGB":
85
+ return image
86
+ if image.mode != "RGBA":
87
+ image = image.convert("RGBA")
88
+ white_bg: Image.Image = Image.new("RGBA", image.size, "WHITE")
89
+ white_bg.paste(image, (0, 0), image)
90
+ return white_bg.convert("RGB")
91
+
92
+
93
+ def normalize(
94
+ np_image: np.ndarray,
95
+ mean: Tuple[float, float, float],
96
+ std: Tuple[float, float, float],
97
+ ) -> np.ndarray:
98
+ """
99
+ Normalize a tensor image with mean and standard deviation.
100
+
101
+ Args:
102
+ image (np.ndarray): Image to be normalized.
103
+ mean (tuple[float, float, float]): Mean for each channel.
104
+ std (tuple[float, float, float]): Standard deviation for each channel.
105
+
106
+ Returns:
107
+ np.ndarray: Normalized image with shape (C, H, W).
108
+ """
109
+ np_image = np_image / 255.0
110
+
111
+ assert len(np_image.shape) == 3, f"{np_image.shape=}"
112
+ assert np_image.shape[2] == len(mean) == len(std), f"{np_image.shape=}, {mean=}, {std=}"
113
+
114
+ np_image = (np_image - mean) / std
115
+
116
+ return np_image.transpose(2, 0, 1)
117
+
118
+
119
+ def transform_image(image: Image.Image, new_size: Tuple[int, int]) -> np.ndarray:
120
+ if not is_cv2_installed():
121
+ raise ImportError("OpenCV is required for this function. Install it with 'pip install mistral_common[opencv]'")
122
+
123
+ np_image = cv2.resize(np.array(_convert_to_rgb(image), dtype=np.float32), new_size, interpolation=cv2.INTER_CUBIC)
124
+ return normalize(np_image, DATASET_MEAN, DATASET_STD)
125
+
126
+
127
+ class ImageEncoder(MultiModalEncoder):
128
+ def __init__(self, mm_config: MultimodalConfig, special_ids: SpecialImageIDs) -> None:
129
+ self.mm_config = mm_config
130
+ self.special_ids = special_ids
131
+
132
+ def _image_to_num_tokens(self, img: Image.Image) -> Tuple[int, int]:
133
+ w: Union[int, float]
134
+ h: Union[int, float]
135
+
136
+ w, h = img.size
137
+ ratio = max(h / self.mm_config.max_image_size, w / self.mm_config.max_image_size)
138
+ if ratio > 1:
139
+ w = round(w / ratio)
140
+ h = round(h / ratio)
141
+
142
+ width_tokens = (w - 1) // self.mm_config.image_patch_size + 1
143
+ height_tokens = (h - 1) // self.mm_config.image_patch_size + 1
144
+
145
+ return width_tokens, height_tokens
146
+
147
+ def __call__(self, content: Union[ImageChunk, ImageURLChunk]) -> ImageEncoding:
148
+ """
149
+ Converts ImageChunks to numpy image arrays and image token ids
150
+
151
+ Args:
152
+ image (ImageChunk, ImageURLChunk): ImageChunk to be converted
153
+
154
+ Returns:
155
+ ImageEncoding containing image token ids and processed image in numpy format
156
+ """
157
+ image = image_from_chunk(content)
158
+ w, h = self._image_to_num_tokens(image)
159
+ assert w > 0
160
+ assert h > 0
161
+ image_tokens = ([self.special_ids.img] * w + [self.special_ids.img_break]) * h
162
+ image_tokens[-1] = self.special_ids.img_end
163
+ new_image_size = (
164
+ w * self.mm_config.image_patch_size,
165
+ h * self.mm_config.image_patch_size,
166
+ )
167
+ processed_image = transform_image(image, new_image_size)
168
+ return ImageEncoding(tokens=image_tokens, image=processed_image)
169
+
170
+ @property
171
+ def image_token(self) -> int:
172
+ return self.special_ids.img
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/sentencepiece.py ADDED
@@ -0,0 +1,672 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import logging
3
+ import os
4
+ from abc import abstractmethod
5
+ from functools import cached_property
6
+ from pathlib import Path
7
+ from typing import Any, Dict, Generic, List, Optional, Set, Tuple, Union
8
+
9
+ import numpy as np
10
+ from sentencepiece import SentencePieceProcessor
11
+
12
+ from mistral_common.exceptions import TokenizerException
13
+ from mistral_common.protocol.instruct.messages import (
14
+ AssistantMessage,
15
+ AssistantMessageType,
16
+ ContentChunk,
17
+ SystemMessage,
18
+ TextChunk,
19
+ ToolMessage,
20
+ UserMessage,
21
+ )
22
+ from mistral_common.protocol.instruct.tool_calls import Tool, ToolCall
23
+ from mistral_common.tokens.instruct.request import FIMRequest, InstructRequest
24
+ from mistral_common.tokens.tokenizers.base import (
25
+ FIMRequestType,
26
+ InstructRequestType,
27
+ InstructTokenizer,
28
+ SpecialTokens,
29
+ Tokenized,
30
+ TokenizedType,
31
+ Tokenizer,
32
+ TokenizerVersion,
33
+ )
34
+ from mistral_common.tokens.tokenizers.multimodal import MultimodalConfig, MultiModalEncoder, MultiModalVersion
35
+
36
+
37
+ def is_sentencepiece(path: Union[str, Path]) -> bool:
38
+ if isinstance(path, str):
39
+ path = Path(path)
40
+
41
+ instruct_versions = list(TokenizerVersion.__members__)
42
+ mm_versions = list(MultiModalVersion.__members__) + [""] # allow no mm version
43
+ suffixes = [f".model.{v}{m}" for v in instruct_versions for m in mm_versions] + [".model"]
44
+
45
+ return path.is_file() and any(path.name.endswith(suffix) for suffix in suffixes)
46
+
47
+
48
+ def get_spm_version(tokenizer_filename: str, raise_deprecated: bool = False) -> TokenizerVersion:
49
+ _version_str = tokenizer_filename.split(".")[-1].split("m")[0]
50
+ if _version_str == "model":
51
+ if raise_deprecated:
52
+ raise TokenizerException(f"Make sure to rename your tokenizer file to end with {tokenizer_filename}.v1.")
53
+
54
+ # tokenizer.model => tokenizer.model.v1
55
+ return TokenizerVersion("v1")
56
+
57
+ if _version_str not in TokenizerVersion.__members__:
58
+ raise TokenizerException(f"Unrecognized tokenizer filename: {tokenizer_filename}")
59
+
60
+ return TokenizerVersion(_version_str)
61
+
62
+
63
+ def get_mm_config(tokenizer_filename: str) -> Optional[MultimodalConfig]:
64
+ _version_str = tokenizer_filename.split(".")[-1]
65
+ if "m" not in _version_str:
66
+ return None
67
+
68
+ _mm_version_str = "m" + _version_str.split("m")[-1]
69
+
70
+ if _mm_version_str not in MultiModalVersion.__members__:
71
+ raise TokenizerException(f"Unrecognized tokenizer filename: {tokenizer_filename}")
72
+
73
+ return MultiModalVersion(_mm_version_str).config
74
+
75
+
76
+ class SentencePieceTokenizer(Tokenizer):
77
+ def __init__(self, model_path: str, tokenizer_version: Optional[TokenizerVersion] = None) -> None:
78
+ self._logger = logging.getLogger(self.__class__.__name__)
79
+ # reload tokenizer
80
+ assert os.path.isfile(model_path), model_path
81
+ self._model = SentencePieceProcessor(model_file=model_path)
82
+
83
+ assert self._model.vocab_size() == self._model.get_piece_size()
84
+ self._vocab = [self._model.id_to_piece(i) for i in range(self.n_words)]
85
+
86
+ self._version: TokenizerVersion = tokenizer_version or get_spm_version(model_path, raise_deprecated=False)
87
+
88
+ super().__init__()
89
+
90
+ @property
91
+ def version(self) -> TokenizerVersion:
92
+ return self._version
93
+
94
+ def get_control_token(self, s: str) -> int:
95
+ return self._model.piece_to_id(s) # type: ignore
96
+
97
+ @property
98
+ def n_words(self) -> int:
99
+ return self._model.vocab_size() # type: ignore
100
+
101
+ def vocab(self) -> List[str]:
102
+ return self._vocab
103
+
104
+ @property
105
+ def bos_id(self) -> int:
106
+ return self._model.bos_id() # type: ignore
107
+
108
+ @property
109
+ def eos_id(self) -> int:
110
+ return self._model.eos_id() # type: ignore
111
+
112
+ @cached_property
113
+ def _control_tokens(self) -> Set[int]:
114
+ return {tok for tok in range(self.n_words) if self._model.IsControl(tok)}
115
+
116
+ def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
117
+ assert isinstance(s, str)
118
+ t: List[int] = self._model.encode(s)
119
+ if bos:
120
+ t = [self.bos_id, *t]
121
+ if eos:
122
+ t = [*t, self.eos_id]
123
+ return t
124
+
125
+ def decode(self, t: List[int]) -> str:
126
+ return self._model.decode(t) # type: ignore
127
+
128
+ def id_to_piece(self, token_id: int) -> str:
129
+ return self._model.id_to_piece(token_id) # type: ignore
130
+
131
+ def to_string(self, tokens: List[int]) -> str:
132
+ """
133
+ Converts tokens into a string for debugging purposes
134
+ """
135
+ text = ""
136
+ curr_tokens: List[int] = []
137
+ for tok in tokens:
138
+ if tok in self._control_tokens:
139
+ if curr_tokens:
140
+ text += "".join([self.id_to_piece(tok) for tok in curr_tokens])
141
+ curr_tokens = []
142
+
143
+ text += self.id_to_piece(tok)
144
+
145
+ else:
146
+ curr_tokens.append(tok)
147
+
148
+ if curr_tokens:
149
+ text += "".join([self.id_to_piece(tok) for tok in curr_tokens])
150
+
151
+ return text
152
+
153
+ @property
154
+ def pad_id(self) -> int:
155
+ return self._model.pad_id() # type: ignore
156
+
157
+ @property
158
+ def unk_id(self) -> int:
159
+ return self._model.unk_id() # type: ignore
160
+
161
+
162
+ class InstructTokenizerBase(
163
+ InstructTokenizer, Generic[InstructRequestType, FIMRequestType, TokenizedType, AssistantMessageType]
164
+ ):
165
+ def __init__(self, tokenizer: Tokenizer, mm_encoder: Optional[MultiModalEncoder] = None):
166
+ self.tokenizer = tokenizer
167
+ self.mm_encoder = mm_encoder
168
+ super().__init__(tokenizer, mm_encoder)
169
+
170
+ def start(self) -> List[int]:
171
+ return [self.tokenizer.bos_id]
172
+
173
+ @staticmethod
174
+ def find_first_last_user(request: InstructRequest) -> Tuple[int, int]:
175
+ # find last user message
176
+ last_user_idx = -1
177
+ first_user_idx = -1
178
+ for i, msg in list(enumerate(request.messages)):
179
+ if isinstance(msg, UserMessage):
180
+ if first_user_idx == -1:
181
+ first_user_idx = i
182
+ last_user_idx = i
183
+ return first_user_idx, last_user_idx
184
+
185
+ @abstractmethod
186
+ def encode_tool_message(self, message: ToolMessage, is_before_last_user_message: bool) -> List[int]:
187
+ raise NotImplementedError("Tool message not implemented")
188
+
189
+ @abstractmethod
190
+ def encode_assistant_message(self, message: AssistantMessageType, is_before_last_user_message: bool) -> List[int]:
191
+ raise NotImplementedError("Assistant message not implemented")
192
+
193
+ def _truncate_for_max_tokens(
194
+ self,
195
+ tokenized: List[Optional[List[int]]],
196
+ messages: List[AssistantMessageType],
197
+ max_tokens: int,
198
+ last_user_message_index: int,
199
+ ) -> None:
200
+ # Tokenizer ⩽ V3 does not support truncation
201
+ return
202
+
203
+ def encode_instruct(
204
+ self,
205
+ request: InstructRequest[AssistantMessageType, Tool],
206
+ ) -> Tokenized:
207
+ # init at bos
208
+ images: List[np.ndarray] = []
209
+ prefix_ids: Optional[List[int]] = None
210
+ tokens_list: List[Optional[List[int]]] = []
211
+
212
+ # find last user message
213
+ first_user_idx, last_user_idx = self.find_first_last_user(request)
214
+ for msg_idx, msg in enumerate(request.messages):
215
+ if isinstance(msg, UserMessage):
216
+ new_tokens, new_images = self.encode_user_message(
217
+ msg,
218
+ request.available_tools,
219
+ msg_idx == last_user_idx,
220
+ msg_idx == first_user_idx,
221
+ system_prompt=request.system_prompt,
222
+ force_img_first=True, # img is always first when providing text/img chunk pair
223
+ )
224
+ images.extend(new_images)
225
+ elif isinstance(msg, ToolMessage):
226
+ new_tokens = self.encode_tool_message(msg, msg_idx < last_user_idx)
227
+ elif isinstance(msg, AssistantMessage):
228
+ new_tokens = self.encode_assistant_message(msg, msg_idx < last_user_idx)
229
+ if msg_idx == len(request.messages) - 1:
230
+ prefix_ids = new_tokens
231
+ elif isinstance(msg, SystemMessage):
232
+ new_tokens = self.encode_system_message(msg)
233
+
234
+ tokens_list.append(new_tokens)
235
+
236
+ if request.truncate_at_max_tokens is not None:
237
+ self._truncate_for_max_tokens(
238
+ tokens_list,
239
+ request.messages,
240
+ request.truncate_at_max_tokens,
241
+ last_user_idx,
242
+ )
243
+ tokens = self.start()
244
+
245
+ for tok in tokens_list:
246
+ if tok is not None:
247
+ tokens.extend(tok)
248
+
249
+ return Tokenized(
250
+ tokens=tokens,
251
+ text=self.tokenizer.to_string(tokens),
252
+ prefix_ids=prefix_ids,
253
+ images=images,
254
+ )
255
+
256
+ def decode(self, tokens: List[int]) -> str:
257
+ return self.tokenizer.decode(tokens)
258
+
259
+
260
+ class InstructTokenizerV1(
261
+ InstructTokenizerBase, Generic[InstructRequestType, FIMRequestType, TokenizedType, AssistantMessageType]
262
+ ):
263
+ def encode_user_message(
264
+ self,
265
+ message: UserMessage,
266
+ available_tools: Optional[List[Tool]],
267
+ is_last: bool,
268
+ is_first: bool,
269
+ system_prompt: Optional[str] = None,
270
+ force_img_first: bool = False,
271
+ ) -> Tuple[List[int], List[np.ndarray]]:
272
+ assert message.content is not None
273
+ assert isinstance(message.content, str), "Message content must be normalized"
274
+ assert self.mm_encoder is None, "InstructTokenizerV1 cannot encode images"
275
+
276
+ content = ""
277
+ if is_first and system_prompt:
278
+ content = system_prompt + "\n\n" + message.content
279
+ else:
280
+ content = message.content
281
+
282
+ message_txt = f"[INST] {content} [/INST]"
283
+ curr_tokens, image_tokens = self.encode_user_content(content=message_txt, is_last=False, system_prompt=None)
284
+ return curr_tokens, image_tokens
285
+
286
+ def encode_user_content(
287
+ self,
288
+ content: Union[str, List[ContentChunk]],
289
+ is_last: bool,
290
+ system_prompt: Optional[str] = None,
291
+ force_img_first: bool = False,
292
+ ) -> Tuple[List[int], List[np.ndarray]]:
293
+ assert isinstance(content, str)
294
+
295
+ if is_last and system_prompt:
296
+ content = system_prompt + "\n\n" + content
297
+
298
+ tokens = self.tokenizer.encode(content, bos=False, eos=False)
299
+ return tokens, []
300
+
301
+ def encode_tool_message(self, message: ToolMessage, is_before_last_user_message: bool) -> List[int]:
302
+ raise TokenizerException("Tools not implemented for tokenizer V1")
303
+
304
+ def encode_assistant_message(self, message: AssistantMessageType, is_before_last_user_message: bool) -> List[int]:
305
+ assert isinstance(message, AssistantMessage), message
306
+ if message.tool_calls is not None and len(message.tool_calls) > 0:
307
+ raise TokenizerException("Tools not implemented for tokenizer V1")
308
+ elif message.content:
309
+ curr_tokens = self.tokenizer.encode(message.content, bos=False, eos=False)
310
+ else:
311
+ raise TokenizerException(f"{message.content} // {message.tool_calls}")
312
+ if not message.prefix:
313
+ curr_tokens.append(self.tokenizer.eos_id)
314
+ return curr_tokens
315
+
316
+ def encode_fim(self, request: FIMRequest) -> Tokenized:
317
+ raise TokenizerException("FIM not available for tokenizer V1")
318
+
319
+
320
+ class InstructTokenizerV2(
321
+ InstructTokenizerV1, Generic[InstructRequestType, FIMRequestType, TokenizedType, AssistantMessageType]
322
+ ):
323
+ def __init__(self, tokenizer: Tokenizer, mm_encoder: Optional[MultiModalEncoder] = None):
324
+ super().__init__(tokenizer, mm_encoder)
325
+ self.BEGIN_INST = self.tokenizer.get_control_token(SpecialTokens.begin_inst.value)
326
+ self.END_INST = self.tokenizer.get_control_token(SpecialTokens.end_inst.value)
327
+ self.BEGIN_AVAILABLE_TOOLS = self.tokenizer.get_control_token(SpecialTokens.begin_tools.value)
328
+ self.END_AVAILABLE_TOOLS = self.tokenizer.get_control_token(SpecialTokens.end_tools.value)
329
+ self.BEGIN_TOOL_RESULTS = self.tokenizer.get_control_token(SpecialTokens.begin_tool_results.value)
330
+ self.END_TOOL_RESULTS = self.tokenizer.get_control_token(SpecialTokens.end_tool_results.value)
331
+ self.TOOL_CALLS = self.tokenizer.get_control_token(SpecialTokens.tool_calls.value)
332
+ self.BOS = self.tokenizer.get_control_token(SpecialTokens.bos.value)
333
+ self.PREFIX = self.tokenizer.get_control_token(SpecialTokens.prefix.value)
334
+ self.SUFFIX = self.tokenizer.get_control_token(SpecialTokens.suffix.value)
335
+
336
+ def encode_user_message(
337
+ self,
338
+ message: UserMessage,
339
+ available_tools: Optional[List[Tool]],
340
+ is_last: bool,
341
+ is_first: bool,
342
+ system_prompt: Optional[str] = None,
343
+ force_img_first: bool = False,
344
+ ) -> Tuple[List[int], List[np.ndarray]]:
345
+ assert message.content is not None
346
+ tools_tokens: List[int] = []
347
+ if is_last and available_tools:
348
+ tools = [tool.model_dump() for tool in available_tools]
349
+ tools_json_tokens = self.tokenizer.encode(json.dumps(tools, ensure_ascii=False), bos=False, eos=False)
350
+ tools_tokens = [
351
+ self.BEGIN_AVAILABLE_TOOLS,
352
+ *tools_json_tokens,
353
+ self.END_AVAILABLE_TOOLS,
354
+ ]
355
+
356
+ tokens, image_tokens = self.encode_user_content(
357
+ content=message.content,
358
+ is_last=is_last,
359
+ system_prompt=system_prompt,
360
+ force_img_first=force_img_first,
361
+ )
362
+
363
+ prefix_tokens = [*tools_tokens, self.BEGIN_INST]
364
+ suffix_tokens = [self.END_INST]
365
+
366
+ curr_tokens = prefix_tokens + tokens + suffix_tokens
367
+
368
+ return curr_tokens, image_tokens
369
+
370
+ def _parse_json_content(self, content: str) -> Any:
371
+ try:
372
+ return json.loads(content)
373
+ except json.JSONDecodeError:
374
+ return content
375
+
376
+ def _prepare_tool_result(self, tool_message: ToolMessage) -> Dict[str, Any]:
377
+ """
378
+ Bit of a hack due to the way tool results are tokenized
379
+ """
380
+ assert tool_message.content is not None, "Tool message content cannot be None"
381
+ return {
382
+ "name": tool_message.name,
383
+ "content": self._parse_json_content(tool_message.content),
384
+ }
385
+
386
+ def encode_tool_message(self, message: ToolMessage, is_before_last_user_message: bool) -> List[int]:
387
+ if is_before_last_user_message:
388
+ # don't tokenize last tool response before last user msg
389
+ return []
390
+
391
+ # Currently only supports single tool results
392
+ tool_result_str = json.dumps([self._prepare_tool_result(message)], ensure_ascii=False)
393
+ curr_tokens = [
394
+ self.BEGIN_TOOL_RESULTS,
395
+ *self.tokenizer.encode(tool_result_str, bos=False, eos=False),
396
+ self.END_TOOL_RESULTS,
397
+ ]
398
+ return curr_tokens
399
+
400
+ def _prepare_function_call(self, tool_call: ToolCall) -> Dict[str, Any]:
401
+ """
402
+ Bit of a hack due to the way function calls are tokenized
403
+ """
404
+ return {
405
+ "name": tool_call.function.name,
406
+ "arguments": self._parse_json_content(tool_call.function.arguments),
407
+ }
408
+
409
+ def _encode_normal_content_assistant_message(self, message: AssistantMessageType) -> List[int]:
410
+ assert message.content, f"Assistant message must have content. Got {message}"
411
+ return self.tokenizer.encode(message.content.rstrip(" "), bos=False, eos=False)
412
+
413
+ def _encode_tool_calls_in_assistant_message(self, message: AssistantMessageType) -> List[int]:
414
+ assert message.tool_calls, f"Assistant message must have tool calls. Got {message}"
415
+ prepared_tool_calls = []
416
+ for tool_call in message.tool_calls:
417
+ prepared_tool_calls.append(self._prepare_function_call(tool_call))
418
+ tool_call_str = json.dumps(prepared_tool_calls, ensure_ascii=False)
419
+ curr_tokens = [
420
+ self.TOOL_CALLS,
421
+ *self.tokenizer.encode(tool_call_str, bos=False, eos=False),
422
+ ]
423
+ return curr_tokens
424
+
425
+ def encode_assistant_message(self, message: AssistantMessageType, is_before_last_user_message: bool) -> List[int]:
426
+ if message.tool_calls:
427
+ if is_before_last_user_message:
428
+ # don't tokenize tool call before last user message
429
+ return []
430
+ curr_tokens = self._encode_tool_calls_in_assistant_message(message)
431
+ elif message.content:
432
+ curr_tokens = self._encode_normal_content_assistant_message(message)
433
+ else:
434
+ raise TokenizerException(f"Invalid assistant message: {message.content}")
435
+ if not message.prefix:
436
+ curr_tokens.append(self.tokenizer.eos_id)
437
+ return curr_tokens
438
+
439
+ def _encode_infilling(self, text: str) -> List[int]:
440
+ """
441
+ Remove prefix space in the case of SentencePieceTokenizers
442
+ Thanks Fabian !
443
+ """
444
+
445
+ return self.tokenizer.encode("☺" + text, bos=False, eos=False)[2:]
446
+
447
+ def encode_fim(self, request: FIMRequest) -> Tokenized:
448
+ prefix_tokens = self.tokenizer.encode(request.prompt, bos=False, eos=False)
449
+ suffix_tokens = self._encode_infilling(request.suffix) if request.suffix else []
450
+ tokens = [
451
+ self.BOS,
452
+ self.SUFFIX,
453
+ *suffix_tokens,
454
+ self.PREFIX,
455
+ *prefix_tokens,
456
+ ]
457
+ return Tokenized(tokens=tokens, text=self.tokenizer.to_string(tokens))
458
+
459
+
460
+ class InstructTokenizerV3(
461
+ InstructTokenizerV2, Generic[InstructRequestType, FIMRequestType, TokenizedType, AssistantMessageType]
462
+ ):
463
+ """
464
+ The only difference with V3 tokenizer is that it encodes the tool messages differently
465
+ """
466
+
467
+ def __init__(self, tokenizer: Tokenizer, mm_encoder: Optional[MultiModalEncoder] = None) -> None:
468
+ super().__init__(tokenizer, mm_encoder=mm_encoder)
469
+
470
+ def _prepare_function_call(self, tool_call: ToolCall) -> Dict[str, Any]:
471
+ function_call = {
472
+ "name": tool_call.function.name,
473
+ "arguments": self._parse_json_content(tool_call.function.arguments),
474
+ }
475
+
476
+ if tool_call.id and tool_call.id != "null":
477
+ function_call["id"] = tool_call.id
478
+
479
+ return function_call
480
+
481
+ def _prepare_tool_result(self, tool_message: ToolMessage) -> Dict[str, Any]:
482
+ assert tool_message.content is not None, "Tool message content cannot be None"
483
+ assert tool_message.tool_call_id is not None, "Tool message has to have the tool call id defined in v3"
484
+
485
+ return {
486
+ "content": self._parse_json_content(tool_message.content),
487
+ "call_id": tool_message.tool_call_id,
488
+ }
489
+
490
+ def encode_tool_message(self, message: ToolMessage, is_before_last_user_message: bool) -> List[int]:
491
+ """
492
+ Same as V2 but tools not wrapped in a list and history is tokenized also
493
+ """
494
+ tool_result_str = json.dumps(self._prepare_tool_result(message), ensure_ascii=False)
495
+ curr_tokens = [
496
+ self.BEGIN_TOOL_RESULTS,
497
+ *self.tokenizer.encode(tool_result_str, bos=False, eos=False),
498
+ self.END_TOOL_RESULTS,
499
+ ]
500
+ return curr_tokens
501
+
502
+ def encode_assistant_message(self, message: AssistantMessageType, is_before_last_user_message: bool) -> List[int]:
503
+ """
504
+ Same as V2 but always encode tool history
505
+ """
506
+ return super().encode_assistant_message(message, False)
507
+
508
+ def encode_user_content(
509
+ self,
510
+ content: Union[str, List[ContentChunk]],
511
+ is_last: bool,
512
+ system_prompt: Optional[str] = None,
513
+ force_img_first: bool = False,
514
+ ) -> Tuple[List[int], List[np.ndarray]]:
515
+ if isinstance(content, str):
516
+ return super().encode_user_content(content, is_last, system_prompt)
517
+
518
+ tokens: List[int] = []
519
+ images: List[np.ndarray] = []
520
+
521
+ has_one_img_one_text_first = (
522
+ len(content) == 2 and isinstance(content[0], TextChunk) and not isinstance(content[1], TextChunk)
523
+ )
524
+ if force_img_first and has_one_img_one_text_first:
525
+ # make sure that if exactly one image and text chunk are passed we force the image chunk to be first
526
+ content = [content[1], content[0]]
527
+
528
+ first_chunk = True
529
+ for chunk in content:
530
+ content = ""
531
+ if first_chunk and is_last and system_prompt:
532
+ first_chunk = False
533
+ content = system_prompt + "\n\n"
534
+ if isinstance(chunk, TextChunk):
535
+ content += chunk.text
536
+ tokens.extend(self.tokenizer.encode(content, bos=False, eos=False))
537
+ else:
538
+ assert self.mm_encoder is not None, "Make sure to define a multi-modal encoder at init"
539
+ if content:
540
+ tokens.extend(self.tokenizer.encode(content, bos=False, eos=False))
541
+
542
+ img_encoding = self.mm_encoder(chunk)
543
+
544
+ tokens.extend(img_encoding.tokens)
545
+ images.append(img_encoding.image)
546
+
547
+ return tokens, images
548
+
549
+
550
+ class InstructTokenizerV7(InstructTokenizerV3):
551
+ """
552
+ The difference with V3 tokenizer is that it encodes the system prompts differently:
553
+ - in V7 the system prompts are treated as separate SystemMessages
554
+ - they are no longer prepended to the last user message
555
+ - they are printed between special tokens
556
+ Tool call results are encoded as :
557
+ - [begin tool call] call_id_tokens [tool_content] content tokens [end tool call]
558
+ """
559
+
560
+ def __init__(self, tokenizer: Tokenizer, mm_encoder: Optional[MultiModalEncoder] = None) -> None:
561
+ super().__init__(tokenizer, mm_encoder)
562
+ self.BEGIN_SYSTEM = self.tokenizer.get_control_token(SpecialTokens.begin_system.value)
563
+ self.END_SYSTEM = self.tokenizer.get_control_token(SpecialTokens.end_system.value)
564
+ self.BEGIN_TOOL_CONTENT = self.tokenizer.get_control_token(SpecialTokens.begin_tool_content.value)
565
+
566
+ def _truncate_for_max_tokens(
567
+ self,
568
+ tokenized_messages: List[Optional[List[int]]],
569
+ messages: List[AssistantMessageType],
570
+ max_tokens: int,
571
+ last_user_message_index: int,
572
+ ) -> None:
573
+ # drop some messages to fit in max_tokens. Rules:
574
+ # - don't drop any system messages
575
+ # - when a user message is dropped, all following assistant|tool message should be dropped until the next
576
+ # user message
577
+ # - we never drop the last message
578
+ to_drop = sum(len(t) for t in tokenized_messages if t is not None) - max_tokens
579
+
580
+ def drop(idx: int) -> None:
581
+ nonlocal to_drop
582
+ if isinstance(messages[idx], SystemMessage):
583
+ # never drop system messages
584
+ return
585
+ if idx == last_user_message_index:
586
+ # never drop the last user message
587
+ return
588
+ tok = tokenized_messages[idx]
589
+ assert tok is not None
590
+ to_drop -= len(tok)
591
+ tokenized_messages[idx] = None
592
+
593
+ current_idx = 0
594
+ while to_drop > 0 and current_idx < len(messages):
595
+ drop(current_idx)
596
+ current_idx += 1
597
+ if isinstance(messages[current_idx - 1], UserMessage):
598
+ # if we just dropped a UserMessage,
599
+ # also drop everything until the next user message
600
+ while current_idx < len(messages) and not isinstance(messages[current_idx], UserMessage):
601
+ drop(current_idx)
602
+ current_idx += 1
603
+
604
+ if to_drop > 0:
605
+ raise TokenizerException("Input couldn't fit in truncate_at_max_token")
606
+
607
+ def encode_system_message(self, message: SystemMessage) -> List[int]:
608
+ assert message.content is not None
609
+ assert isinstance(message.content, str), "Message content must be normalized"
610
+ tokens = [
611
+ self.BEGIN_SYSTEM,
612
+ *self.tokenizer.encode(message.content, bos=False, eos=False),
613
+ self.END_SYSTEM,
614
+ ]
615
+ return tokens
616
+
617
+ def encode_user_message(
618
+ self,
619
+ message: UserMessage,
620
+ available_tools: Optional[List[Tool]],
621
+ is_last: bool,
622
+ is_first: bool,
623
+ system_prompt: Optional[str] = None,
624
+ force_img_first: bool = False,
625
+ ) -> Tuple[List[int], List[np.ndarray]]:
626
+ assert system_prompt is None, "in Tokenizer V7 we don't encode system prompts in user messages"
627
+ return super().encode_user_message(
628
+ message,
629
+ available_tools,
630
+ is_last=is_last,
631
+ is_first=is_first,
632
+ system_prompt=None,
633
+ force_img_first=force_img_first,
634
+ )
635
+
636
+ def encode_tool_message(self, message: ToolMessage, is_before_last_user_message: bool) -> List[int]:
637
+ """
638
+ Same as V3 but tools not wrapped in a list and history is tokenized also
639
+ """
640
+ assert message.tool_call_id is not None
641
+ tool_call_id_tokens = self.tokenizer.encode(message.tool_call_id, bos=False, eos=False)
642
+ tokens = self.tokenizer.encode(message.content, bos=False, eos=False)
643
+
644
+ prefix_tokens = [
645
+ self.BEGIN_TOOL_RESULTS,
646
+ *tool_call_id_tokens,
647
+ self.BEGIN_TOOL_CONTENT,
648
+ ]
649
+ curr_tokens = [
650
+ *prefix_tokens,
651
+ *tokens,
652
+ self.END_TOOL_RESULTS,
653
+ ]
654
+ return curr_tokens
655
+
656
+ def encode_assistant_message(self, message: AssistantMessageType, is_before_last_user_message: bool) -> List[int]:
657
+ if not message.content and not message.tool_calls:
658
+ raise TokenizerException(f"Invalid assistant message: {message}")
659
+ curr_tokens: list = []
660
+ if message.content:
661
+ if isinstance(message.content, str):
662
+ curr_tokens += self._encode_normal_content_assistant_message(message)
663
+ elif isinstance(message.content, list):
664
+ curr_tokens += self.encode_content_chunks(
665
+ message.content, is_last=False, system_prompt=None, force_img_first=True
666
+ ).tokens
667
+ if message.tool_calls:
668
+ curr_tokens += self._encode_tool_calls_in_assistant_message(message)
669
+ if not message.prefix:
670
+ curr_tokens.append(self.tokenizer.eos_id)
671
+
672
+ return curr_tokens
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/tekken.py ADDED
@@ -0,0 +1,312 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import logging
4
+ from enum import Enum
5
+ from functools import cached_property
6
+ from itertools import groupby
7
+ from pathlib import Path
8
+ from typing import Dict, List, Optional, Type, TypedDict, Union
9
+
10
+ import tiktoken
11
+
12
+ from mistral_common.tokens.tokenizers.base import (
13
+ SpecialTokens,
14
+ Tokenizer,
15
+ TokenizerVersion,
16
+ )
17
+ from mistral_common.tokens.tokenizers.multimodal import MultimodalConfig
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ def is_tekken(path: Union[str, Path]) -> bool:
23
+ if isinstance(path, str):
24
+ path = Path(path)
25
+ return path.is_file() and "tekken" in path.name and path.suffix == ".json"
26
+
27
+
28
+ # Formatting specification of the JSON file
29
+ class TokenInfo(TypedDict):
30
+ rank: int
31
+ token_bytes: str # base64 encoded
32
+ token_str: Optional[str]
33
+
34
+
35
+ class TekkenConfig(TypedDict):
36
+ pattern: str
37
+ num_vocab_tokens: int
38
+ default_vocab_size: int
39
+ default_num_special_tokens: int
40
+ version: str
41
+
42
+
43
+ class ModelData(TypedDict):
44
+ vocab: List[TokenInfo]
45
+ config: TekkenConfig
46
+ version: int
47
+ type: str
48
+ multimodal: MultimodalConfig
49
+
50
+
51
+ class SpecialTokenPolicy(Enum):
52
+ """What to do with special tokens when encoding/decoding."""
53
+
54
+ IGNORE = 0
55
+ KEEP = 1
56
+ RAISE = 2
57
+
58
+
59
+ class Tekkenizer(Tokenizer):
60
+ SPECIAL_TOKENS = (
61
+ "<unk>",
62
+ SpecialTokens.bos,
63
+ SpecialTokens.eos,
64
+ SpecialTokens.begin_inst,
65
+ SpecialTokens.end_inst,
66
+ SpecialTokens.begin_tools,
67
+ SpecialTokens.end_tools,
68
+ SpecialTokens.begin_tool_results,
69
+ SpecialTokens.end_tool_results,
70
+ SpecialTokens.tool_calls,
71
+ SpecialTokens.img,
72
+ "<pad>",
73
+ SpecialTokens.img_break,
74
+ SpecialTokens.img_end,
75
+ SpecialTokens.prefix,
76
+ SpecialTokens.middle,
77
+ SpecialTokens.suffix,
78
+ SpecialTokens.begin_system,
79
+ SpecialTokens.end_system,
80
+ SpecialTokens.begin_tool_content,
81
+ )
82
+ SPECIAL_TOKEN_TEMPLATE = "<SPECIAL_{id}>"
83
+
84
+ # # note that params has a vocab_size field, but it's not used
85
+
86
+ def __init__(
87
+ self,
88
+ vocab: List[TokenInfo],
89
+ pattern: str,
90
+ vocab_size: int,
91
+ num_special_tokens: int,
92
+ version: TokenizerVersion,
93
+ *,
94
+ name: str = "tekkenizer",
95
+ _path: Optional[str] = None,
96
+ mm_config: Optional[MultimodalConfig] = None,
97
+ ):
98
+ assert vocab_size <= len(vocab) + num_special_tokens, (
99
+ vocab_size,
100
+ len(vocab),
101
+ num_special_tokens,
102
+ )
103
+ self._vocab_size = vocab_size
104
+ self._path = _path
105
+
106
+ special_tokens = list(self.SPECIAL_TOKENS)
107
+ assert len(special_tokens) == len(set(special_tokens)), f"Special tokens must be unique: {special_tokens}"
108
+ assert len(special_tokens) < num_special_tokens
109
+
110
+ special_filler = [
111
+ self.SPECIAL_TOKEN_TEMPLATE.format(id=i) for i in range(len(special_tokens), num_special_tokens)
112
+ ]
113
+ if special_filler:
114
+ logger.info(f"Adding special tokens {special_filler[0]}, ..., {special_filler[-1]}")
115
+ special_tokens = special_tokens + special_filler
116
+ assert len(set(special_tokens)) == len(special_tokens) == num_special_tokens, special_tokens
117
+ inner_vocab_size = vocab_size - num_special_tokens
118
+
119
+ # reload vocab
120
+ self._tekken_token2id_nospecial = _reload_mergeable_ranks(vocab, max_vocab=inner_vocab_size)
121
+ assert set(range(inner_vocab_size)) == set(self._tekken_token2id_nospecial.values()), (
122
+ inner_vocab_size,
123
+ self._tekken_token2id_nospecial,
124
+ )
125
+
126
+ self._model = tiktoken.Encoding(
127
+ name=name,
128
+ pat_str=pattern,
129
+ mergeable_ranks=self._tekken_token2id_nospecial,
130
+ special_tokens={}, # special tokens are handled manually
131
+ )
132
+ self._all_special_tokens = special_tokens
133
+ self._vocab = [self.id_to_piece(i) for i in range(vocab_size)]
134
+ self._version = version
135
+ self._special_token_policy = SpecialTokenPolicy.RAISE
136
+ self._mm_config = mm_config
137
+
138
+ @classmethod
139
+ def from_file(cls: Type["Tekkenizer"], path: Union[str, Path]) -> "Tekkenizer":
140
+ if isinstance(path, str):
141
+ path = Path(path)
142
+ assert path.exists()
143
+ with open(path, "r") as f:
144
+ untyped = json.load(f)
145
+ if mm := untyped.get("multimodal", None):
146
+ untyped["multimodal"] = MultimodalConfig(**mm)
147
+ model_data: ModelData = untyped
148
+
149
+ _version_str = model_data["config"].get("version")
150
+ if _version_str not in TokenizerVersion.__members__:
151
+ raise ValueError(
152
+ f"Unknown version: {_version_str} in {path}. "
153
+ f"Make sure to use a valid version string: {list(TokenizerVersion.__members__)}"
154
+ )
155
+
156
+ return cls(
157
+ vocab=model_data["vocab"],
158
+ pattern=model_data["config"]["pattern"],
159
+ vocab_size=model_data["config"]["default_vocab_size"],
160
+ num_special_tokens=model_data["config"]["default_num_special_tokens"],
161
+ version=TokenizerVersion(_version_str),
162
+ name=path.name.replace(".json", ""),
163
+ _path=str(path),
164
+ mm_config=model_data.get("multimodal"),
165
+ )
166
+
167
+ @property
168
+ def multimodal(self) -> Optional[MultimodalConfig]:
169
+ return self._mm_config
170
+
171
+ @multimodal.setter
172
+ def multimodal(self, value: MultimodalConfig) -> None:
173
+ raise ValueError("Can only set Multimodal config at init")
174
+
175
+ @property
176
+ def num_special_tokens(self) -> int:
177
+ return len(self._all_special_tokens)
178
+
179
+ @property
180
+ def n_words(self) -> int:
181
+ return self._vocab_size
182
+
183
+ @property
184
+ def version(self) -> TokenizerVersion:
185
+ return self._version
186
+
187
+ @property
188
+ def special_token_policy(self) -> SpecialTokenPolicy:
189
+ return self._special_token_policy
190
+
191
+ @special_token_policy.setter
192
+ def special_token_policy(self, policy: SpecialTokenPolicy) -> None:
193
+ self._special_token_policy = policy
194
+
195
+ @cached_property
196
+ def bos_id(self) -> int:
197
+ return self.SPECIAL_TOKENS.index("<s>")
198
+
199
+ @cached_property
200
+ def eos_id(self) -> int:
201
+ return self.SPECIAL_TOKENS.index("</s>")
202
+
203
+ @cached_property
204
+ def pad_id(self) -> int:
205
+ return self.SPECIAL_TOKENS.index("<pad>")
206
+
207
+ @cached_property
208
+ def unk_id(self) -> int:
209
+ return self.SPECIAL_TOKENS.index("<unk>")
210
+
211
+ def vocab(self) -> List[str]:
212
+ # when returning self._vocab this will collapse
213
+ # all tokens for which we have a decoding error into
214
+ # the <?> string. This is bad and results in things
215
+ # like len(set(vocab)) != len(vocab))
216
+ # be careful when using self._vocab
217
+ return self._vocab
218
+
219
+ def encode(self, s: str, bos: bool, eos: bool) -> List[int]:
220
+ tokens: List[int] = self._model.encode(s)
221
+ tokens = [t + self.num_special_tokens for t in tokens]
222
+ if bos:
223
+ tokens = [self.bos_id, *tokens]
224
+ if eos:
225
+ tokens = [*tokens, self.eos_id]
226
+ return tokens
227
+
228
+ def _decode_all(self, tokens: List[int], special_token_policy: SpecialTokenPolicy) -> List[str]:
229
+ # Lump special and non-special tokens together to minimize calls to decode
230
+ decoded: List[str] = []
231
+ for is_special, group in groupby(tokens, lambda t: t < self.num_special_tokens):
232
+ if is_special:
233
+ if special_token_policy == SpecialTokenPolicy.RAISE:
234
+ raise ValueError(
235
+ f"Decoding `tokens` that contain special tokens ({list(group)}) is not allowed. \n"
236
+ "Either make sure `tokens` do not include any special tokens or, "
237
+ "if you want to decode `tokens` that includes special tokens, "
238
+ "change the tokenizer's special token policy to IGNORE or KEEP: \n"
239
+ "```\nfrom mistral_common.tokens.tokenizers.mistral import MistralTokenizer"
240
+ "\nfrom mistral_common.tokens.tokenizers.tekken import SpecialTokenPolicy"
241
+ "\n\ntokenizer = MistralTokenizer.v3(is_tekken=True)"
242
+ "\ntekken = tokenizer.instruct_tokenizer.tokenizer"
243
+ "\ntekken.special_token_policy = SpecialTokenPolicy.IGNORE # or SpecialTokenPolicy.KEEP"
244
+ "\n```"
245
+ )
246
+ elif special_token_policy == SpecialTokenPolicy.KEEP:
247
+ decoded.extend(self._all_special_tokens[t] for t in group)
248
+ elif special_token_policy == SpecialTokenPolicy.IGNORE:
249
+ continue
250
+ # TODO: Could use "tokens_str" from vocab.json
251
+ # but need to handle null cases.
252
+ else:
253
+ decoded.append(self._model.decode([t - self.num_special_tokens for t in group]))
254
+ return decoded
255
+
256
+ def is_byte(self, token_id: int) -> bool:
257
+ return 0 <= token_id - self.num_special_tokens < 256
258
+
259
+ def get_control_token(self, s: str) -> int:
260
+ try:
261
+ return self._all_special_tokens.index(s)
262
+ except ValueError:
263
+ raise ValueError(f"Unknown control token {s}")
264
+
265
+ def decode(self, tokens: List[int]) -> str:
266
+ return "".join(self._decode_all(tokens, special_token_policy=self._special_token_policy))
267
+
268
+ def to_string(self, tokens: List[int]) -> str:
269
+ return "".join(self._decode_all(tokens, special_token_policy=SpecialTokenPolicy.KEEP))
270
+
271
+ def id_to_piece(self, token_id: int) -> str:
272
+ """convert a token id to its string representation."""
273
+ return self._decode_all([token_id], special_token_policy=SpecialTokenPolicy.KEEP)[0]
274
+
275
+ def id_to_byte_piece(self, token_id: int) -> bytes:
276
+ """convert a token id to its byte representation."""
277
+ if token_id < self.num_special_tokens:
278
+ if self._special_token_policy == SpecialTokenPolicy.KEEP:
279
+ return self._all_special_tokens[token_id].encode("utf-8")
280
+ elif self._special_token_policy == SpecialTokenPolicy.RAISE:
281
+ raise ValueError(f"{token_id} is a special token")
282
+
283
+ return self._model.decode_single_token_bytes(token_id - self.num_special_tokens)
284
+
285
+
286
+ def _reload_mergeable_ranks(
287
+ vocab: List[TokenInfo],
288
+ max_vocab: Union[int, None] = None,
289
+ ) -> Dict[bytes, int]:
290
+ """
291
+ Reload our tokenizer JSON file and convert it to Tiktoken format.
292
+ """
293
+ logger.info(f"Vocab size: {len(vocab)}")
294
+ if max_vocab is not None:
295
+ assert len(vocab) >= max_vocab, (len(vocab), max_vocab)
296
+ vocab = vocab[:max_vocab]
297
+ logger.info(f"Cutting vocab to first {len(vocab)} tokens.")
298
+
299
+ # build ranks
300
+ ranks: Dict[bytes, int] = {}
301
+ for i, x in enumerate(vocab):
302
+ assert x.keys() == {"rank", "token_bytes", "token_str"}
303
+ assert x["rank"] == i
304
+ merge = base64.b64decode(x["token_bytes"])
305
+ assert i >= 256 or merge == bytes([i]), (i, merge)
306
+ ranks[merge] = x["rank"]
307
+
308
+ # sanity check
309
+ assert len(ranks) == len(vocab)
310
+ assert set(ranks.values()) == set(range(len(ranks)))
311
+
312
+ return ranks
.venv/lib/python3.11/site-packages/mistral_common/tokens/tokenizers/utils.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from typing import Iterator, List
2
+
3
+
4
+ def chunks(lst: List[str], chunk_size: int) -> Iterator[List[str]]:
5
+ for i in range(0, len(lst), chunk_size):
6
+ yield lst[i : i + chunk_size]
.venv/lib/python3.11/site-packages/numpy/ma/tests/__init__.py ADDED
File without changes
.venv/lib/python3.11/site-packages/numpy/ma/tests/__pycache__/test_deprecations.cpython-311.pyc ADDED
Binary file (5.43 kB). View file