JeffYang52415 commited on
Commit
d5bfb91
·
unverified ·
1 Parent(s): 96e839c
Files changed (3) hide show
  1. .pre-commit-config.yaml +7 -8
  2. poetry.lock +47 -50
  3. pyproject.toml +2 -1
.pre-commit-config.yaml CHANGED
@@ -7,12 +7,6 @@ repos:
7
  - id: ruff
8
  args: [--fix]
9
  - id: ruff-format
10
- - repo: https://github.com/PyCQA/flake8
11
- rev: 6.1.0
12
- hooks:
13
- - id: flake8
14
- additional_dependencies: ["typing-extensions>=4.8.0"]
15
- args: ["--ignore=E203, E501, W503, E501"]
16
  - repo: https://github.com/PyCQA/isort
17
  rev: 5.12.0
18
  hooks:
@@ -23,7 +17,12 @@ repos:
23
  hooks:
24
  - id: mypy
25
  args:
26
- ["--install-types", "--non-interactive", "--ignore-missing-imports"]
 
 
 
 
 
27
  additional_dependencies:
28
  - "typing-extensions>=4.8.0"
29
  - repo: https://github.com/pre-commit/pre-commit-hooks
@@ -97,4 +96,4 @@ repos:
97
  hooks:
98
  - id: bandit
99
  args: ["-c", "pyproject.toml"]
100
- additional_dependencies: ["bandit[toml]"]
 
7
  - id: ruff
8
  args: [--fix]
9
  - id: ruff-format
 
 
 
 
 
 
10
  - repo: https://github.com/PyCQA/isort
11
  rev: 5.12.0
12
  hooks:
 
17
  hooks:
18
  - id: mypy
19
  args:
20
+ [
21
+ "--config-file=pyproject.toml",
22
+ "--install-types",
23
+ "--non-interactive",
24
+ "--ignore-missing-imports",
25
+ ]
26
  additional_dependencies:
27
  - "typing-extensions>=4.8.0"
28
  - repo: https://github.com/pre-commit/pre-commit-hooks
 
96
  hooks:
97
  - id: bandit
98
  args: ["-c", "pyproject.toml"]
99
+ additional_dependencies: ["bandit[toml]", ".[toml]"]
poetry.lock CHANGED
@@ -1,4 +1,4 @@
1
- # This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
2
 
3
  [[package]]
4
  name = "aiofiles"
@@ -618,45 +618,45 @@ tests = ["pytest", "pytest-cov", "pytest-xdist"]
618
 
619
  [[package]]
620
  name = "datasets"
621
- version = "2.14.4"
622
  description = "HuggingFace community-driven open-source library of datasets"
623
  optional = false
624
- python-versions = ">=3.8.0"
625
  files = [
626
- {file = "datasets-2.14.4-py3-none-any.whl", hash = "sha256:29336bd316a7d827ccd4da2236596279b20ca2ac78f64c04c9483da7cbc2459b"},
627
- {file = "datasets-2.14.4.tar.gz", hash = "sha256:ef29c2b5841de488cd343cfc26ab979bff77efa4d2285af51f1ad7db5c46a83b"},
628
  ]
629
 
630
  [package.dependencies]
631
  aiohttp = "*"
632
- dill = ">=0.3.0,<0.3.8"
633
- fsspec = {version = ">=2021.11.1", extras = ["http"]}
634
- huggingface-hub = ">=0.14.0,<1.0.0"
635
- multiprocess = "*"
 
636
  numpy = ">=1.17"
637
  packaging = "*"
638
  pandas = "*"
639
- pyarrow = ">=8.0.0"
640
  pyyaml = ">=5.1"
641
- requests = ">=2.19.0"
642
- tqdm = ">=4.62.1"
643
  xxhash = "*"
644
 
645
  [package.extras]
646
- apache-beam = ["apache-beam (>=2.26.0,<2.44.0)"]
647
- audio = ["librosa", "soundfile (>=0.12.1)"]
648
  benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
649
- dev = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "black (>=23.1,<24.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "pyyaml (>=5.3.1)", "rarfile (>=4.0)", "ruff (>=0.0.241)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"]
650
- docs = ["s3fs", "tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos", "torch", "transformers"]
651
- jax = ["jax (>=0.2.8,!=0.3.2,<=0.3.25)", "jaxlib (>=0.1.65,<=0.3.25)"]
652
- metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
653
- quality = ["black (>=23.1,<24.0)", "pyyaml (>=5.3.1)", "ruff (>=0.0.241)"]
654
  s3 = ["s3fs"]
655
- tensorflow = ["tensorflow (>=2.2.0,!=2.6.0,!=2.6.1)", "tensorflow-macos"]
656
- tensorflow-gpu = ["tensorflow-gpu (>=2.2.0,!=2.6.0,!=2.6.1)"]
657
- tests = ["Pillow (>=6.2.1)", "absl-py", "apache-beam (>=2.26.0,<2.44.0)", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.6.4)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "sqlalchemy (<2.0.0)", "tensorflow (>=2.3,!=2.6.0,!=2.6.1)", "tensorflow-macos", "tiktoken", "torch", "transformers", "zstandard"]
 
658
  torch = ["torch"]
659
- vision = ["Pillow (>=6.2.1)"]
660
 
661
  [[package]]
662
  name = "debugpy"
@@ -706,17 +706,18 @@ files = [
706
 
707
  [[package]]
708
  name = "dill"
709
- version = "0.3.7"
710
  description = "serialize all of Python"
711
  optional = false
712
- python-versions = ">=3.7"
713
  files = [
714
- {file = "dill-0.3.7-py3-none-any.whl", hash = "sha256:76b122c08ef4ce2eedcd4d1abd8e641114bfc6c2867f49f3c41facf65bf19f5e"},
715
- {file = "dill-0.3.7.tar.gz", hash = "sha256:cc1c8b182eb3013e24bd475ff2e9295af86c1a38eb1aff128dac8962a9ce3c03"},
716
  ]
717
 
718
  [package.extras]
719
  graph = ["objgraph (>=1.7.2)"]
 
720
 
721
  [[package]]
722
  name = "distlib"
@@ -1016,13 +1017,13 @@ files = [
1016
 
1017
  [[package]]
1018
  name = "fsspec"
1019
- version = "2024.12.0"
1020
  description = "File-system specification"
1021
  optional = false
1022
  python-versions = ">=3.8"
1023
  files = [
1024
- {file = "fsspec-2024.12.0-py3-none-any.whl", hash = "sha256:b520aed47ad9804237ff878b504267a3b0b441e97508bd6d2d8774e3db85cee2"},
1025
- {file = "fsspec-2024.12.0.tar.gz", hash = "sha256:670700c977ed2fb51e0d9f9253177ed20cbde4a3e5c0283cc5385b5870c8533f"},
1026
  ]
1027
 
1028
  [package.dependencies]
@@ -1837,31 +1838,27 @@ files = [
1837
 
1838
  [[package]]
1839
  name = "multiprocess"
1840
- version = "0.70.15"
1841
  description = "better multiprocessing and multithreading in Python"
1842
  optional = false
1843
- python-versions = ">=3.7"
1844
  files = [
1845
- {file = "multiprocess-0.70.15-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:aa36c7ed16f508091438687fe9baa393a7a8e206731d321e443745e743a0d4e5"},
1846
- {file = "multiprocess-0.70.15-pp37-pypy37_pp73-macosx_10_9_x86_64.whl", hash = "sha256:20e024018c46d0d1602024c613007ac948f9754659e3853b0aa705e83f6931d8"},
1847
- {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_i686.whl", hash = "sha256:e576062981c91f0fe8a463c3d52506e598dfc51320a8dd8d78b987dfca91c5db"},
1848
- {file = "multiprocess-0.70.15-pp37-pypy37_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:e73f497e6696a0f5433ada2b3d599ae733b87a6e8b008e387c62ac9127add177"},
1849
- {file = "multiprocess-0.70.15-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:73db2e7b32dcc7f9b0f075c2ffa45c90b6729d3f1805f27e88534c8d321a1be5"},
1850
- {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_i686.whl", hash = "sha256:4271647bd8a49c28ecd6eb56a7fdbd3c212c45529ad5303b40b3c65fc6928e5f"},
1851
- {file = "multiprocess-0.70.15-pp38-pypy38_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:cf981fb998d6ec3208cb14f0cf2e9e80216e834f5d51fd09ebc937c32b960902"},
1852
- {file = "multiprocess-0.70.15-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:18f9f2c7063346d1617bd1684fdcae8d33380ae96b99427260f562e1a1228b67"},
1853
- {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_i686.whl", hash = "sha256:0eac53214d664c49a34695e5824872db4006b1a465edd7459a251809c3773370"},
1854
- {file = "multiprocess-0.70.15-pp39-pypy39_pp73-manylinux_2_24_x86_64.whl", hash = "sha256:1a51dd34096db47fb21fa2b839e615b051d51b97af9a67afbcdaa67186b44883"},
1855
- {file = "multiprocess-0.70.15-py310-none-any.whl", hash = "sha256:7dd58e33235e83cf09d625e55cffd7b0f0eede7ee9223cdd666a87624f60c21a"},
1856
- {file = "multiprocess-0.70.15-py311-none-any.whl", hash = "sha256:134f89053d82c9ed3b73edd3a2531eb791e602d4f4156fc92a79259590bd9670"},
1857
- {file = "multiprocess-0.70.15-py37-none-any.whl", hash = "sha256:f7d4a1629bccb433114c3b4885f69eccc200994323c80f6feee73b0edc9199c5"},
1858
- {file = "multiprocess-0.70.15-py38-none-any.whl", hash = "sha256:bee9afba476c91f9ebee7beeee0601face9eff67d822e893f9a893725fbd6316"},
1859
- {file = "multiprocess-0.70.15-py39-none-any.whl", hash = "sha256:3e0953f5d52b4c76f1c973eaf8214554d146f2be5decb48e928e55c7a2d19338"},
1860
- {file = "multiprocess-0.70.15.tar.gz", hash = "sha256:f20eed3036c0ef477b07a4177cf7c1ba520d9a2677870a4f47fe026f0cd6787e"},
1861
  ]
1862
 
1863
  [package.dependencies]
1864
- dill = ">=0.3.7"
1865
 
1866
  [[package]]
1867
  name = "mypy"
@@ -3703,4 +3700,4 @@ propcache = ">=0.2.0"
3703
  [metadata]
3704
  lock-version = "2.0"
3705
  python-versions = ">=3.12"
3706
- content-hash = "cb2eaaca59c75f0f37a96580be301cc61b1bf1ce3a72b3af65327859c1e3fcc6"
 
1
+ # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
2
 
3
  [[package]]
4
  name = "aiofiles"
 
618
 
619
  [[package]]
620
  name = "datasets"
621
+ version = "3.2.0"
622
  description = "HuggingFace community-driven open-source library of datasets"
623
  optional = false
624
+ python-versions = ">=3.9.0"
625
  files = [
626
+ {file = "datasets-3.2.0-py3-none-any.whl", hash = "sha256:f3d2ba2698b7284a4518019658596a6a8bc79f31e51516524249d6c59cf0fe2a"},
627
+ {file = "datasets-3.2.0.tar.gz", hash = "sha256:9a6e1a356052866b5dbdd9c9eedb000bf3fc43d986e3584d9b028f4976937229"},
628
  ]
629
 
630
  [package.dependencies]
631
  aiohttp = "*"
632
+ dill = ">=0.3.0,<0.3.9"
633
+ filelock = "*"
634
+ fsspec = {version = ">=2023.1.0,<=2024.9.0", extras = ["http"]}
635
+ huggingface-hub = ">=0.23.0"
636
+ multiprocess = "<0.70.17"
637
  numpy = ">=1.17"
638
  packaging = "*"
639
  pandas = "*"
640
+ pyarrow = ">=15.0.0"
641
  pyyaml = ">=5.1"
642
+ requests = ">=2.32.2"
643
+ tqdm = ">=4.66.3"
644
  xxhash = "*"
645
 
646
  [package.extras]
647
+ audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0)"]
 
648
  benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
649
+ dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "decord (==0.6.0)", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "torchdata", "transformers", "transformers (>=4.42.0)", "zstandard"]
650
+ docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
651
+ jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
652
+ quality = ["ruff (>=0.3.0)"]
 
653
  s3 = ["s3fs"]
654
+ tensorflow = ["tensorflow (>=2.6.0)"]
655
+ tensorflow-gpu = ["tensorflow (>=2.6.0)"]
656
+ tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "decord (==0.6.0)", "elasticsearch (>=7.17.12,<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "torchdata", "transformers (>=4.42.0)", "zstandard"]
657
+ tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "decord (==0.6.0)", "elasticsearch (>=7.17.12,<8.0.0)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "torchdata", "transformers (>=4.42.0)", "zstandard"]
658
  torch = ["torch"]
659
+ vision = ["Pillow (>=9.4.0)"]
660
 
661
  [[package]]
662
  name = "debugpy"
 
706
 
707
  [[package]]
708
  name = "dill"
709
+ version = "0.3.8"
710
  description = "serialize all of Python"
711
  optional = false
712
+ python-versions = ">=3.8"
713
  files = [
714
+ {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"},
715
+ {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"},
716
  ]
717
 
718
  [package.extras]
719
  graph = ["objgraph (>=1.7.2)"]
720
+ profile = ["gprof2dot (>=2022.7.29)"]
721
 
722
  [[package]]
723
  name = "distlib"
 
1017
 
1018
  [[package]]
1019
  name = "fsspec"
1020
+ version = "2024.9.0"
1021
  description = "File-system specification"
1022
  optional = false
1023
  python-versions = ">=3.8"
1024
  files = [
1025
+ {file = "fsspec-2024.9.0-py3-none-any.whl", hash = "sha256:a0947d552d8a6efa72cc2c730b12c41d043509156966cca4fb157b0f2a0c574b"},
1026
+ {file = "fsspec-2024.9.0.tar.gz", hash = "sha256:4b0afb90c2f21832df142f292649035d80b421f60a9e1c027802e5a0da2b04e8"},
1027
  ]
1028
 
1029
  [package.dependencies]
 
1838
 
1839
  [[package]]
1840
  name = "multiprocess"
1841
+ version = "0.70.16"
1842
  description = "better multiprocessing and multithreading in Python"
1843
  optional = false
1844
+ python-versions = ">=3.8"
1845
  files = [
1846
+ {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"},
1847
+ {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"},
1848
+ {file = "multiprocess-0.70.16-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:37b55f71c07e2d741374998c043b9520b626a8dddc8b3129222ca4f1a06ef67a"},
1849
+ {file = "multiprocess-0.70.16-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ba8c31889abf4511c7308a8c52bb4a30b9d590e7f58523302ba00237702ca054"},
1850
+ {file = "multiprocess-0.70.16-pp39-pypy39_pp73-macosx_10_13_x86_64.whl", hash = "sha256:0dfd078c306e08d46d7a8d06fb120313d87aa43af60d66da43ffff40b44d2f41"},
1851
+ {file = "multiprocess-0.70.16-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e7b9d0f307cd9bd50851afaac0dba2cb6c44449efff697df7c7645f7d3f2be3a"},
1852
+ {file = "multiprocess-0.70.16-py310-none-any.whl", hash = "sha256:c4a9944c67bd49f823687463660a2d6daae94c289adff97e0f9d696ba6371d02"},
1853
+ {file = "multiprocess-0.70.16-py311-none-any.whl", hash = "sha256:af4cabb0dac72abfb1e794fa7855c325fd2b55a10a44628a3c1ad3311c04127a"},
1854
+ {file = "multiprocess-0.70.16-py312-none-any.whl", hash = "sha256:fc0544c531920dde3b00c29863377f87e1632601092ea2daca74e4beb40faa2e"},
1855
+ {file = "multiprocess-0.70.16-py38-none-any.whl", hash = "sha256:a71d82033454891091a226dfc319d0cfa8019a4e888ef9ca910372a446de4435"},
1856
+ {file = "multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3"},
1857
+ {file = "multiprocess-0.70.16.tar.gz", hash = "sha256:161af703d4652a0e1410be6abccecde4a7ddffd19341be0a7011b94aeb171ac1"},
 
 
 
 
1858
  ]
1859
 
1860
  [package.dependencies]
1861
+ dill = ">=0.3.8"
1862
 
1863
  [[package]]
1864
  name = "mypy"
 
3700
  [metadata]
3701
  lock-version = "2.0"
3702
  python-versions = ">=3.12"
3703
+ content-hash = "63732dc09424c3f57f763f369d4c9629bf065495f7bfce5ce2a7381f63b7d770"
pyproject.toml CHANGED
@@ -30,7 +30,8 @@ start = "llmdataparser.app:main"
30
  [tool.poetry.dependencies]
31
  python = ">=3.12"
32
  pandas = "^2.0.3"
33
- datasets = "^2.14.4"
 
34
  typing-extensions = "^4.8.0"
35
  ipywidgets = "^8.1.1"
36
  gradio = "^4.19.2"
 
30
  [tool.poetry.dependencies]
31
  python = ">=3.12"
32
  pandas = "^2.0.3"
33
+ datasets = "^3.2.0"
34
+ fsspec = "^2024.9.0"
35
  typing-extensions = "^4.8.0"
36
  ipywidgets = "^8.1.1"
37
  gradio = "^4.19.2"