Alina Lozovskaya commited on
Commit
bae4131
·
1 Parent(s): 7245ade

UI improvement

Browse files
pyproject.toml CHANGED
@@ -12,14 +12,14 @@ dependencies = [
12
  "langfuse>=2.59.3",
13
  "litellm>=1.61.16",
14
  "loguru>=0.7.3",
15
- "markitdown>=0.0.1a4",
16
  "matplotlib>=3.10.0",
17
  "openai>=1.63.0",
18
  "python-dotenv>=1.0.1",
19
  "torch>=2.6.0",
20
  "tqdm>=4.67.1",
21
  "transformers>=4.48.3",
22
- "ruamel-yaml>=0.18.10",
23
  ]
24
 
25
  [build-system]
 
12
  "langfuse>=2.59.3",
13
  "litellm>=1.61.16",
14
  "loguru>=0.7.3",
15
+ "markitdown>=0.0.2",
16
  "matplotlib>=3.10.0",
17
  "openai>=1.63.0",
18
  "python-dotenv>=1.0.1",
19
  "torch>=2.6.0",
20
  "tqdm>=4.67.1",
21
  "transformers>=4.48.3",
22
+ "black>=25.1.0",
23
  ]
24
 
25
  [build-system]
uv.lock CHANGED
@@ -117,6 +117,50 @@ wheels = [
117
  { url = "https://files.pythonhosted.org/packages/b1/ae/4d289407515223677e7d105ec88bc707a115cce79464c6b56313beb0dd36/authlib-1.5.1-py2.py3-none-any.whl", hash = "sha256:8408861cbd9b4ea2ff759b00b6f02fd7d81ac5a56d0b2b22c08606c6049aae11", size = 231358 },
118
  ]
119
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  [[package]]
121
  name = "backoff"
122
  version = "2.2.1"
@@ -139,6 +183,26 @@ wheels = [
139
  { url = "https://files.pythonhosted.org/packages/f9/49/6abb616eb3cbab6a7cca303dc02fdf3836de2e0b834bf966a7f5271a34d8/beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16", size = 186015 },
140
  ]
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  [[package]]
143
  name = "certifi"
144
  version = "2025.1.31"
@@ -204,6 +268,15 @@ wheels = [
204
  { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 },
205
  ]
206
 
 
 
 
 
 
 
 
 
 
207
  [[package]]
208
  name = "colorama"
209
  version = "0.4.6"
@@ -303,6 +376,15 @@ wheels = [
303
  { url = "https://files.pythonhosted.org/packages/4c/37/22ef7675bef4ffe9577b937ddca2e22791534cbbe11c30714972a91532dc/datasets-3.3.2-py3-none-any.whl", hash = "sha256:fdaf3d5d70242621210b044e9b9b15a56e908bfc3e9d077bcf5605ac390f70bd", size = 485360 },
304
  ]
305
 
 
 
 
 
 
 
 
 
 
306
  [[package]]
307
  name = "dill"
308
  version = "0.3.8"
@@ -321,6 +403,15 @@ wheels = [
321
  { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
322
  ]
323
 
 
 
 
 
 
 
 
 
 
324
  [[package]]
325
  name = "fastapi"
326
  version = "0.115.11"
@@ -575,6 +666,15 @@ wheels = [
575
  { url = "https://files.pythonhosted.org/packages/79/9d/0fb148dc4d6fa4a7dd1d8378168d9b4cd8d4560a6fbf6f0121c5fc34eb68/importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", size = 26971 },
576
  ]
577
 
 
 
 
 
 
 
 
 
 
578
  [[package]]
579
  name = "itsdangerous"
580
  version = "2.2.0"
@@ -720,6 +820,43 @@ wheels = [
720
  { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595 },
721
  ]
722
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
723
  [[package]]
724
  name = "markdown-it-py"
725
  version = "3.0.0"
@@ -747,19 +884,33 @@ wheels = [
747
 
748
  [[package]]
749
  name = "markitdown"
750
- version = "0.1.0a1"
751
  source = { registry = "https://pypi.org/simple" }
752
  dependencies = [
 
 
753
  { name = "beautifulsoup4" },
754
  { name = "charset-normalizer" },
 
755
  { name = "markdownify" },
 
 
 
 
 
756
  { name = "pathvalidate" },
 
757
  { name = "puremagic" },
 
 
758
  { name = "requests" },
 
 
 
759
  ]
760
- sdist = { url = "https://files.pythonhosted.org/packages/e4/20/f4d26a9946c851894ac099f690d55389c842e93a59951289654b4e8dec39/markitdown-0.1.0a1.tar.gz", hash = "sha256:f4d5b1eb4ee7757ec8e41c1205d6a33ae846ae366d0aec2ba56718b64b395b14", size = 28243 }
761
  wheels = [
762
- { url = "https://files.pythonhosted.org/packages/1a/c5/1a37e25089c93ff701d226a9682e2270c83dd02c2547abd4078051114776/markitdown-0.1.0a1-py3-none-any.whl", hash = "sha256:a5b35730c8c9d7c46a91f43b5dcf82d1677c8b3d5b8accc3bd18209c139ae361", size = 43950 },
763
  ]
764
 
765
  [[package]]
@@ -823,6 +974,33 @@ wheels = [
823
  { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 },
824
  ]
825
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
826
  [[package]]
827
  name = "multidict"
828
  version = "6.1.0"
@@ -863,6 +1041,15 @@ wheels = [
863
  { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351 },
864
  ]
865
 
 
 
 
 
 
 
 
 
 
866
  [[package]]
867
  name = "networkx"
868
  version = "3.4.2"
@@ -1008,6 +1195,15 @@ wheels = [
1008
  { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 },
1009
  ]
1010
 
 
 
 
 
 
 
 
 
 
1011
  [[package]]
1012
  name = "openai"
1013
  version = "1.65.5"
@@ -1027,6 +1223,18 @@ wheels = [
1027
  { url = "https://files.pythonhosted.org/packages/fc/8f/a178d73277bf2d838617fa20ba4ae6952e26074664aacb53ae4532a69588/openai-1.65.5-py3-none-any.whl", hash = "sha256:5948a504e7b4003d921cfab81273813793a31c25b1d7b605797c01757e0141f1", size = 474468 },
1028
  ]
1029
 
 
 
 
 
 
 
 
 
 
 
 
 
1030
  [[package]]
1031
  name = "orjson"
1032
  version = "3.10.15"
@@ -1078,6 +1286,15 @@ wheels = [
1078
  { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 },
1079
  ]
1080
 
 
 
 
 
 
 
 
 
 
1081
  [[package]]
1082
  name = "pathvalidate"
1083
  version = "3.2.3"
@@ -1087,6 +1304,19 @@ wheels = [
1087
  { url = "https://files.pythonhosted.org/packages/50/14/c5a0e1a947909810fc4c043b84cac472b70e438148d34f5393be1bac663f/pathvalidate-3.2.3-py3-none-any.whl", hash = "sha256:5eaf0562e345d4b6d0c0239d0f690c3bd84d2a9a3c4c73b99ea667401b27bee1", size = 24130 },
1088
  ]
1089
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1090
  [[package]]
1091
  name = "pillow"
1092
  version = "11.1.0"
@@ -1106,6 +1336,27 @@ wheels = [
1106
  { url = "https://files.pythonhosted.org/packages/37/f3/9b18362206b244167c958984b57c7f70a0289bfb59a530dd8af5f699b910/pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5", size = 2375240 },
1107
  ]
1108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1109
  [[package]]
1110
  name = "propcache"
1111
  version = "0.3.0"
@@ -1221,6 +1472,20 @@ wheels = [
1221
  { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
1222
  ]
1223
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1224
  [[package]]
1225
  name = "pyparsing"
1226
  version = "3.2.1"
@@ -1260,6 +1525,21 @@ wheels = [
1260
  { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546 },
1261
  ]
1262
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1263
  [[package]]
1264
  name = "pytz"
1265
  version = "2025.1"
@@ -1269,6 +1549,16 @@ wheels = [
1269
  { url = "https://files.pythonhosted.org/packages/eb/38/ac33370d784287baa1c3d538978b5e2ea064d4c1b93ffbd12826c190dd10/pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57", size = 507930 },
1270
  ]
1271
 
 
 
 
 
 
 
 
 
 
 
1272
  [[package]]
1273
  name = "pyyaml"
1274
  version = "6.0.2"
@@ -1372,35 +1662,6 @@ wheels = [
1372
  { url = "https://files.pythonhosted.org/packages/a3/4f/0fce63e0f5cdd658e71e21abd17ac1bc9312741ebb8b3f74eeed2ebdf771/rpds_py-0.23.1-cp312-cp312-win_amd64.whl", hash = "sha256:b03a8d50b137ee758e4c73638b10747b7c39988eb8e6cd11abb7084266455165", size = 237426 },
1373
  ]
1374
 
1375
- [[package]]
1376
- name = "ruamel-yaml"
1377
- version = "0.18.10"
1378
- source = { registry = "https://pypi.org/simple" }
1379
- dependencies = [
1380
- { name = "ruamel-yaml-clib", marker = "platform_python_implementation == 'CPython'" },
1381
- ]
1382
- sdist = { url = "https://files.pythonhosted.org/packages/ea/46/f44d8be06b85bc7c4d8c95d658be2b68f27711f279bf9dd0612a5e4794f5/ruamel.yaml-0.18.10.tar.gz", hash = "sha256:20c86ab29ac2153f80a428e1254a8adf686d3383df04490514ca3b79a362db58", size = 143447 }
1383
- wheels = [
1384
- { url = "https://files.pythonhosted.org/packages/c2/36/dfc1ebc0081e6d39924a2cc53654497f967a084a436bb64402dfce4254d9/ruamel.yaml-0.18.10-py3-none-any.whl", hash = "sha256:30f22513ab2301b3d2b577adc121c6471f28734d3d9728581245f1e76468b4f1", size = 117729 },
1385
- ]
1386
-
1387
- [[package]]
1388
- name = "ruamel-yaml-clib"
1389
- version = "0.2.12"
1390
- source = { registry = "https://pypi.org/simple" }
1391
- sdist = { url = "https://files.pythonhosted.org/packages/20/84/80203abff8ea4993a87d823a5f632e4d92831ef75d404c9fc78d0176d2b5/ruamel.yaml.clib-0.2.12.tar.gz", hash = "sha256:6c8fbb13ec503f99a91901ab46e0b07ae7941cd527393187039aec586fdfd36f", size = 225315 }
1392
- wheels = [
1393
- { url = "https://files.pythonhosted.org/packages/48/41/e7a405afbdc26af961678474a55373e1b323605a4f5e2ddd4a80ea80f628/ruamel.yaml.clib-0.2.12-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:20b0f8dc160ba83b6dcc0e256846e1a02d044e13f7ea74a3d1d56ede4e48c632", size = 133433 },
1394
- { url = "https://files.pythonhosted.org/packages/ec/b0/b850385604334c2ce90e3ee1013bd911aedf058a934905863a6ea95e9eb4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:943f32bc9dedb3abff9879edc134901df92cfce2c3d5c9348f172f62eb2d771d", size = 647362 },
1395
- { url = "https://files.pythonhosted.org/packages/44/d0/3f68a86e006448fb6c005aee66565b9eb89014a70c491d70c08de597f8e4/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95c3829bb364fdb8e0332c9931ecf57d9be3519241323c5274bd82f709cebc0c", size = 754118 },
1396
- { url = "https://files.pythonhosted.org/packages/52/a9/d39f3c5ada0a3bb2870d7db41901125dbe2434fa4f12ca8c5b83a42d7c53/ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:749c16fcc4a2b09f28843cda5a193e0283e47454b63ec4b81eaa2242f50e4ccd", size = 706497 },
1397
- { url = "https://files.pythonhosted.org/packages/b0/fa/097e38135dadd9ac25aecf2a54be17ddf6e4c23e43d538492a90ab3d71c6/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:bf165fef1f223beae7333275156ab2022cffe255dcc51c27f066b4370da81e31", size = 698042 },
1398
- { url = "https://files.pythonhosted.org/packages/ec/d5/a659ca6f503b9379b930f13bc6b130c9f176469b73b9834296822a83a132/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:32621c177bbf782ca5a18ba4d7af0f1082a3f6e517ac2a18b3974d4edf349680", size = 745831 },
1399
- { url = "https://files.pythonhosted.org/packages/db/5d/36619b61ffa2429eeaefaab4f3374666adf36ad8ac6330d855848d7d36fd/ruamel.yaml.clib-0.2.12-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b82a7c94a498853aa0b272fd5bc67f29008da798d4f93a2f9f289feb8426a58d", size = 715692 },
1400
- { url = "https://files.pythonhosted.org/packages/b1/82/85cb92f15a4231c89b95dfe08b09eb6adca929ef7df7e17ab59902b6f589/ruamel.yaml.clib-0.2.12-cp312-cp312-win32.whl", hash = "sha256:e8c4ebfcfd57177b572e2040777b8abc537cdef58a2120e830124946aa9b42c5", size = 98777 },
1401
- { url = "https://files.pythonhosted.org/packages/d7/8f/c3654f6f1ddb75daf3922c3d8fc6005b1ab56671ad56ffb874d908bfa668/ruamel.yaml.clib-0.2.12-cp312-cp312-win_amd64.whl", hash = "sha256:0467c5965282c62203273b838ae77c0d29d7638c8a4e3a1c8bdd3602c10904e4", size = 115523 },
1402
- ]
1403
-
1404
  [[package]]
1405
  name = "ruff"
1406
  version = "0.9.10"
@@ -1514,6 +1775,18 @@ wheels = [
1514
  { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 },
1515
  ]
1516
 
 
 
 
 
 
 
 
 
 
 
 
 
1517
  [[package]]
1518
  name = "starlette"
1519
  version = "0.46.1"
@@ -1769,6 +2042,24 @@ wheels = [
1769
  { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 },
1770
  ]
1771
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1772
  [[package]]
1773
  name = "xxhash"
1774
  version = "3.5.0"
@@ -1825,7 +2116,7 @@ wheels = [
1825
  [[package]]
1826
  name = "yourbench"
1827
  version = "0.2.0"
1828
- source = { git = "https://github.com/huggingface/yourbench.git?rev=v0.2-alpha-space#58b5455c1f29b00015b306729dcc8affe3b0ffa6" }
1829
  dependencies = [
1830
  { name = "asyncio" },
1831
  { name = "datasets" },
@@ -1849,6 +2140,7 @@ version = "0.1.0"
1849
  source = { editable = "." }
1850
  dependencies = [
1851
  { name = "asyncio" },
 
1852
  { name = "datasets" },
1853
  { name = "gradio", extra = ["oauth"] },
1854
  { name = "hf-transfer" },
@@ -1859,7 +2151,6 @@ dependencies = [
1859
  { name = "matplotlib" },
1860
  { name = "openai" },
1861
  { name = "python-dotenv" },
1862
- { name = "ruamel-yaml" },
1863
  { name = "torch" },
1864
  { name = "tqdm" },
1865
  { name = "transformers" },
@@ -1869,23 +2160,36 @@ dependencies = [
1869
  [package.metadata]
1870
  requires-dist = [
1871
  { name = "asyncio", specifier = ">=3.4.3" },
 
1872
  { name = "datasets", specifier = ">=3.3.0" },
1873
  { name = "gradio", extras = ["oauth"], specifier = ">=5.20.0" },
1874
  { name = "hf-transfer", specifier = ">=0.1.9" },
1875
  { name = "langfuse", specifier = ">=2.59.3" },
1876
  { name = "litellm", specifier = ">=1.61.16" },
1877
  { name = "loguru", specifier = ">=0.7.3" },
1878
- { name = "markitdown", specifier = ">=0.0.1a4" },
1879
  { name = "matplotlib", specifier = ">=3.10.0" },
1880
  { name = "openai", specifier = ">=1.63.0" },
1881
  { name = "python-dotenv", specifier = ">=1.0.1" },
1882
- { name = "ruamel-yaml", specifier = ">=0.18.10" },
1883
  { name = "torch", specifier = ">=2.6.0" },
1884
  { name = "tqdm", specifier = ">=4.67.1" },
1885
  { name = "transformers", specifier = ">=4.48.3" },
1886
  { name = "yourbench", git = "https://github.com/huggingface/yourbench.git?rev=v0.2-alpha-space" },
1887
  ]
1888
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1889
  [[package]]
1890
  name = "zipp"
1891
  version = "3.21.0"
 
117
  { url = "https://files.pythonhosted.org/packages/b1/ae/4d289407515223677e7d105ec88bc707a115cce79464c6b56313beb0dd36/authlib-1.5.1-py2.py3-none-any.whl", hash = "sha256:8408861cbd9b4ea2ff759b00b6f02fd7d81ac5a56d0b2b22c08606c6049aae11", size = 231358 },
118
  ]
119
 
120
+ [[package]]
121
+ name = "azure-ai-documentintelligence"
122
+ version = "1.0.0"
123
+ source = { registry = "https://pypi.org/simple" }
124
+ dependencies = [
125
+ { name = "azure-core" },
126
+ { name = "isodate" },
127
+ { name = "typing-extensions" },
128
+ ]
129
+ sdist = { url = "https://files.pythonhosted.org/packages/ca/fd/cd0d493e9dc93a5ce097db7508f1b2467a73dcc7022c235b409ce48b9679/azure_ai_documentintelligence-1.0.0.tar.gz", hash = "sha256:c8b6efc0fc7e65d7892c9585cfd256f7d8b3f2b46cecf92c75ab82e629eac253", size = 169420 }
130
+ wheels = [
131
+ { url = "https://files.pythonhosted.org/packages/84/a8/c9c66d4d04b8aee06ebdc9a6077736b222b9b2fe92364fed6f9a1c08ece0/azure_ai_documentintelligence-1.0.0-py3-none-any.whl", hash = "sha256:cdedb1a67c075f58f47a413ec5846bf8d532a83a71f0c51ec49ce9b5bfe2a519", size = 105454 },
132
+ ]
133
+
134
+ [[package]]
135
+ name = "azure-core"
136
+ version = "1.32.0"
137
+ source = { registry = "https://pypi.org/simple" }
138
+ dependencies = [
139
+ { name = "requests" },
140
+ { name = "six" },
141
+ { name = "typing-extensions" },
142
+ ]
143
+ sdist = { url = "https://files.pythonhosted.org/packages/cc/ee/668328306a9e963a5ad9f152cd98c7adad86c822729fd1d2a01613ad1e67/azure_core-1.32.0.tar.gz", hash = "sha256:22b3c35d6b2dae14990f6c1be2912bf23ffe50b220e708a28ab1bb92b1c730e5", size = 279128 }
144
+ wheels = [
145
+ { url = "https://files.pythonhosted.org/packages/39/83/325bf5e02504dbd8b4faa98197a44cdf8a325ef259b48326a2b6f17f8383/azure_core-1.32.0-py3-none-any.whl", hash = "sha256:eac191a0efb23bfa83fddf321b27b122b4ec847befa3091fa736a5c32c50d7b4", size = 198855 },
146
+ ]
147
+
148
+ [[package]]
149
+ name = "azure-identity"
150
+ version = "1.20.0"
151
+ source = { registry = "https://pypi.org/simple" }
152
+ dependencies = [
153
+ { name = "azure-core" },
154
+ { name = "cryptography" },
155
+ { name = "msal" },
156
+ { name = "msal-extensions" },
157
+ { name = "typing-extensions" },
158
+ ]
159
+ sdist = { url = "https://files.pythonhosted.org/packages/ee/89/7d170fab0b85d9650cdb7abda087e849644beb52bd28f6804620dd0cecd9/azure_identity-1.20.0.tar.gz", hash = "sha256:40597210d56c83e15031b0fe2ea3b26420189e1e7f3e20bdbb292315da1ba014", size = 264447 }
160
+ wheels = [
161
+ { url = "https://files.pythonhosted.org/packages/de/aa/819513c1dbef990af690bb5eefb5e337f8698d75dfdb7302528f50ce1994/azure_identity-1.20.0-py3-none-any.whl", hash = "sha256:5f23fc4889a66330e840bd78830287e14f3761820fe3c5f77ac875edcb9ec998", size = 188243 },
162
+ ]
163
+
164
  [[package]]
165
  name = "backoff"
166
  version = "2.2.1"
 
183
  { url = "https://files.pythonhosted.org/packages/f9/49/6abb616eb3cbab6a7cca303dc02fdf3836de2e0b834bf966a7f5271a34d8/beautifulsoup4-4.13.3-py3-none-any.whl", hash = "sha256:99045d7d3f08f91f0d656bc9b7efbae189426cd913d830294a15eefa0ea4df16", size = 186015 },
184
  ]
185
 
186
+ [[package]]
187
+ name = "black"
188
+ version = "25.1.0"
189
+ source = { registry = "https://pypi.org/simple" }
190
+ dependencies = [
191
+ { name = "click" },
192
+ { name = "mypy-extensions" },
193
+ { name = "packaging" },
194
+ { name = "pathspec" },
195
+ { name = "platformdirs" },
196
+ ]
197
+ sdist = { url = "https://files.pythonhosted.org/packages/94/49/26a7b0f3f35da4b5a65f081943b7bcd22d7002f5f0fb8098ec1ff21cb6ef/black-25.1.0.tar.gz", hash = "sha256:33496d5cd1222ad73391352b4ae8da15253c5de89b93a80b3e2c8d9a19ec2666", size = 649449 }
198
+ wheels = [
199
+ { url = "https://files.pythonhosted.org/packages/83/71/3fe4741df7adf015ad8dfa082dd36c94ca86bb21f25608eb247b4afb15b2/black-25.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4b60580e829091e6f9238c848ea6750efed72140b91b048770b64e74fe04908b", size = 1650988 },
200
+ { url = "https://files.pythonhosted.org/packages/13/f3/89aac8a83d73937ccd39bbe8fc6ac8860c11cfa0af5b1c96d081facac844/black-25.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1e2978f6df243b155ef5fa7e558a43037c3079093ed5d10fd84c43900f2d8ecc", size = 1453985 },
201
+ { url = "https://files.pythonhosted.org/packages/6f/22/b99efca33f1f3a1d2552c714b1e1b5ae92efac6c43e790ad539a163d1754/black-25.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b48735872ec535027d979e8dcb20bf4f70b5ac75a8ea99f127c106a7d7aba9f", size = 1783816 },
202
+ { url = "https://files.pythonhosted.org/packages/18/7e/a27c3ad3822b6f2e0e00d63d58ff6299a99a5b3aee69fa77cd4b0076b261/black-25.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:ea0213189960bda9cf99be5b8c8ce66bb054af5e9e861249cd23471bd7b0b3ba", size = 1440860 },
203
+ { url = "https://files.pythonhosted.org/packages/09/71/54e999902aed72baf26bca0d50781b01838251a462612966e9fc4891eadd/black-25.1.0-py3-none-any.whl", hash = "sha256:95e8176dae143ba9097f351d174fdaf0ccd29efb414b362ae3fd72bf0f710717", size = 207646 },
204
+ ]
205
+
206
  [[package]]
207
  name = "certifi"
208
  version = "2025.1.31"
 
268
  { url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 },
269
  ]
270
 
271
+ [[package]]
272
+ name = "cobble"
273
+ version = "0.1.4"
274
+ source = { registry = "https://pypi.org/simple" }
275
+ sdist = { url = "https://files.pythonhosted.org/packages/54/7a/a507c709be2c96e1bb6102eb7b7f4026c5e5e223ef7d745a17d239e9d844/cobble-0.1.4.tar.gz", hash = "sha256:de38be1539992c8a06e569630717c485a5f91be2192c461ea2b220607dfa78aa", size = 3805 }
276
+ wheels = [
277
+ { url = "https://files.pythonhosted.org/packages/d5/e1/3714a2f371985215c219c2a70953d38e3eed81ef165aed061d21de0e998b/cobble-0.1.4-py3-none-any.whl", hash = "sha256:36c91b1655e599fd428e2b95fdd5f0da1ca2e9f1abb0bc871dec21a0e78a2b44", size = 3984 },
278
+ ]
279
+
280
  [[package]]
281
  name = "colorama"
282
  version = "0.4.6"
 
376
  { url = "https://files.pythonhosted.org/packages/4c/37/22ef7675bef4ffe9577b937ddca2e22791534cbbe11c30714972a91532dc/datasets-3.3.2-py3-none-any.whl", hash = "sha256:fdaf3d5d70242621210b044e9b9b15a56e908bfc3e9d077bcf5605ac390f70bd", size = 485360 },
377
  ]
378
 
379
+ [[package]]
380
+ name = "defusedxml"
381
+ version = "0.7.1"
382
+ source = { registry = "https://pypi.org/simple" }
383
+ sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520 }
384
+ wheels = [
385
+ { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604 },
386
+ ]
387
+
388
  [[package]]
389
  name = "dill"
390
  version = "0.3.8"
 
403
  { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277 },
404
  ]
405
 
406
+ [[package]]
407
+ name = "et-xmlfile"
408
+ version = "2.0.0"
409
+ source = { registry = "https://pypi.org/simple" }
410
+ sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234 }
411
+ wheels = [
412
+ { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059 },
413
+ ]
414
+
415
  [[package]]
416
  name = "fastapi"
417
  version = "0.115.11"
 
666
  { url = "https://files.pythonhosted.org/packages/79/9d/0fb148dc4d6fa4a7dd1d8378168d9b4cd8d4560a6fbf6f0121c5fc34eb68/importlib_metadata-8.6.1-py3-none-any.whl", hash = "sha256:02a89390c1e15fdfdc0d7c6b25cb3e62650d0494005c97d6f148bf5b9787525e", size = 26971 },
667
  ]
668
 
669
+ [[package]]
670
+ name = "isodate"
671
+ version = "0.7.2"
672
+ source = { registry = "https://pypi.org/simple" }
673
+ sdist = { url = "https://files.pythonhosted.org/packages/54/4d/e940025e2ce31a8ce1202635910747e5a87cc3a6a6bb2d00973375014749/isodate-0.7.2.tar.gz", hash = "sha256:4cd1aa0f43ca76f4a6c6c0292a85f40b35ec2e43e315b59f06e6d32171a953e6", size = 29705 }
674
+ wheels = [
675
+ { url = "https://files.pythonhosted.org/packages/15/aa/0aca39a37d3c7eb941ba736ede56d689e7be91cab5d9ca846bde3999eba6/isodate-0.7.2-py3-none-any.whl", hash = "sha256:28009937d8031054830160fce6d409ed342816b543597cece116d966c6d99e15", size = 22320 },
676
+ ]
677
+
678
  [[package]]
679
  name = "itsdangerous"
680
  version = "2.2.0"
 
820
  { url = "https://files.pythonhosted.org/packages/0c/29/0348de65b8cc732daa3e33e67806420b2ae89bdce2b04af740289c5c6c8c/loguru-0.7.3-py3-none-any.whl", hash = "sha256:31a33c10c8e1e10422bfd431aeb5d351c7cf7fa671e3c4df004162264b28220c", size = 61595 },
821
  ]
822
 
823
+ [[package]]
824
+ name = "lxml"
825
+ version = "5.3.1"
826
+ source = { registry = "https://pypi.org/simple" }
827
+ sdist = { url = "https://files.pythonhosted.org/packages/ef/f6/c15ca8e5646e937c148e147244817672cf920b56ac0bf2cc1512ae674be8/lxml-5.3.1.tar.gz", hash = "sha256:106b7b5d2977b339f1e97efe2778e2ab20e99994cbb0ec5e55771ed0795920c8", size = 3678591 }
828
+ wheels = [
829
+ { url = "https://files.pythonhosted.org/packages/3b/f4/5121aa9ee8e09b8b8a28cf3709552efe3d206ca51a20d6fa471b60bb3447/lxml-5.3.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:e69add9b6b7b08c60d7ff0152c7c9a6c45b4a71a919be5abde6f98f1ea16421c", size = 8191889 },
830
+ { url = "https://files.pythonhosted.org/packages/0a/ca/8e9aa01edddc74878f4aea85aa9ab64372f46aa804d1c36dda861bf9eabf/lxml-5.3.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:4e52e1b148867b01c05e21837586ee307a01e793b94072d7c7b91d2c2da02ffe", size = 4450685 },
831
+ { url = "https://files.pythonhosted.org/packages/b2/b3/ea40a5c98619fbd7e9349df7007994506d396b97620ced34e4e5053d3734/lxml-5.3.1-cp312-cp312-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a4b382e0e636ed54cd278791d93fe2c4f370772743f02bcbe431a160089025c9", size = 5051722 },
832
+ { url = "https://files.pythonhosted.org/packages/3a/5e/375418be35f8a695cadfe7e7412f16520e62e24952ed93c64c9554755464/lxml-5.3.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e49dc23a10a1296b04ca9db200c44d3eb32c8d8ec532e8c1fd24792276522a", size = 4786661 },
833
+ { url = "https://files.pythonhosted.org/packages/79/7c/d258eaaa9560f6664f9b426a5165103015bee6512d8931e17342278bad0a/lxml-5.3.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4399b4226c4785575fb20998dc571bc48125dc92c367ce2602d0d70e0c455eb0", size = 5311766 },
834
+ { url = "https://files.pythonhosted.org/packages/03/bc/a041415be4135a1b3fdf017a5d873244cc16689456166fbdec4b27fba153/lxml-5.3.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5412500e0dc5481b1ee9cf6b38bb3b473f6e411eb62b83dc9b62699c3b7b79f7", size = 4836014 },
835
+ { url = "https://files.pythonhosted.org/packages/32/88/047f24967d5e3fc97848ea2c207eeef0f16239cdc47368c8b95a8dc93a33/lxml-5.3.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1c93ed3c998ea8472be98fb55aed65b5198740bfceaec07b2eba551e55b7b9ae", size = 4961064 },
836
+ { url = "https://files.pythonhosted.org/packages/3d/b5/ecf5a20937ecd21af02c5374020f4e3a3538e10a32379a7553fca3d77094/lxml-5.3.1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:63d57fc94eb0bbb4735e45517afc21ef262991d8758a8f2f05dd6e4174944519", size = 4778341 },
837
+ { url = "https://files.pythonhosted.org/packages/a4/05/56c359e07275911ed5f35ab1d63c8cd3360d395fb91e43927a2ae90b0322/lxml-5.3.1-cp312-cp312-manylinux_2_28_ppc64le.whl", hash = "sha256:b450d7cabcd49aa7ab46a3c6aa3ac7e1593600a1a0605ba536ec0f1b99a04322", size = 5345450 },
838
+ { url = "https://files.pythonhosted.org/packages/b7/f4/f95e3ae12e9f32fbcde00f9affa6b0df07f495117f62dbb796a9a31c84d6/lxml-5.3.1-cp312-cp312-manylinux_2_28_s390x.whl", hash = "sha256:4df0ec814b50275ad6a99bc82a38b59f90e10e47714ac9871e1b223895825468", size = 4908336 },
839
+ { url = "https://files.pythonhosted.org/packages/c5/f8/309546aec092434166a6e11c7dcecb5c2d0a787c18c072d61e18da9eba57/lxml-5.3.1-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:d184f85ad2bb1f261eac55cddfcf62a70dee89982c978e92b9a74a1bfef2e367", size = 4986049 },
840
+ { url = "https://files.pythonhosted.org/packages/71/1c/b951817cb5058ca7c332d012dfe8bc59dabd0f0a8911ddd7b7ea8e41cfbd/lxml-5.3.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:b725e70d15906d24615201e650d5b0388b08a5187a55f119f25874d0103f90dd", size = 4860351 },
841
+ { url = "https://files.pythonhosted.org/packages/31/23/45feba8dae1d35fcca1e51b051f59dc4223cbd23e071a31e25f3f73938a8/lxml-5.3.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:a31fa7536ec1fb7155a0cd3a4e3d956c835ad0a43e3610ca32384d01f079ea1c", size = 5421580 },
842
+ { url = "https://files.pythonhosted.org/packages/61/69/be245d7b2dbef81c542af59c97fcd641fbf45accf2dc1c325bae7d0d014c/lxml-5.3.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:3c3c8b55c7fc7b7e8877b9366568cc73d68b82da7fe33d8b98527b73857a225f", size = 5285778 },
843
+ { url = "https://files.pythonhosted.org/packages/69/06/128af2ed04bac99b8f83becfb74c480f1aa18407b5c329fad457e08a1bf4/lxml-5.3.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d61ec60945d694df806a9aec88e8f29a27293c6e424f8ff91c80416e3c617645", size = 5054455 },
844
+ { url = "https://files.pythonhosted.org/packages/8a/2d/f03a21cf6cc75cdd083563e509c7b6b159d761115c4142abb5481094ed8c/lxml-5.3.1-cp312-cp312-win32.whl", hash = "sha256:f4eac0584cdc3285ef2e74eee1513a6001681fd9753b259e8159421ed28a72e5", size = 3486315 },
845
+ { url = "https://files.pythonhosted.org/packages/2b/9c/8abe21585d20ef70ad9cec7562da4332b764ed69ec29b7389d23dfabcea0/lxml-5.3.1-cp312-cp312-win_amd64.whl", hash = "sha256:29bfc8d3d88e56ea0a27e7c4897b642706840247f59f4377d81be8f32aa0cfbf", size = 3816925 },
846
+ ]
847
+
848
+ [[package]]
849
+ name = "mammoth"
850
+ version = "1.9.0"
851
+ source = { registry = "https://pypi.org/simple" }
852
+ dependencies = [
853
+ { name = "cobble" },
854
+ ]
855
+ sdist = { url = "https://files.pythonhosted.org/packages/d6/a6/27a13ba068cf3ff764d631b8dd71dee1b33040aa8c143f66ce902b7d1da0/mammoth-1.9.0.tar.gz", hash = "sha256:74f5dae10ca240fd9b7a0e1a6deaebe0aad23bc590633ef6f5e868aa9b7042a6", size = 50906 }
856
+ wheels = [
857
+ { url = "https://files.pythonhosted.org/packages/d0/ab/f8e63fcabc127c6efd68b03633c189ee799a5304fa96c036a325a2894bcb/mammoth-1.9.0-py2.py3-none-any.whl", hash = "sha256:0eea277316586f0ca65d86834aec4de5a0572c83ec54b4991f9bb520a891150f", size = 52901 },
858
+ ]
859
+
860
  [[package]]
861
  name = "markdown-it-py"
862
  version = "3.0.0"
 
884
 
885
  [[package]]
886
  name = "markitdown"
887
+ version = "0.0.2"
888
  source = { registry = "https://pypi.org/simple" }
889
  dependencies = [
890
+ { name = "azure-ai-documentintelligence" },
891
+ { name = "azure-identity" },
892
  { name = "beautifulsoup4" },
893
  { name = "charset-normalizer" },
894
+ { name = "mammoth" },
895
  { name = "markdownify" },
896
+ { name = "numpy" },
897
+ { name = "olefile" },
898
+ { name = "openai" },
899
+ { name = "openpyxl" },
900
+ { name = "pandas" },
901
  { name = "pathvalidate" },
902
+ { name = "pdfminer-six" },
903
  { name = "puremagic" },
904
+ { name = "pydub" },
905
+ { name = "python-pptx" },
906
  { name = "requests" },
907
+ { name = "speechrecognition" },
908
+ { name = "xlrd" },
909
+ { name = "youtube-transcript-api" },
910
  ]
911
+ sdist = { url = "https://files.pythonhosted.org/packages/69/bf/8d5ee5ceef8dc175b4c8714e6f5170e3bc38862eeb6a45f29433c64611c1/markitdown-0.0.2.tar.gz", hash = "sha256:678fc2c9bc4eded0941c01155251982cab1f86c8148c8ac7ca3869116d3d49d7", size = 21008 }
912
  wheels = [
913
+ { url = "https://files.pythonhosted.org/packages/4a/f9/d0d6c6a11a16e2e22faf79bb4a86764110a8b391a510d6bd5cbb4ec57f59/markitdown-0.0.2-py3-none-any.whl", hash = "sha256:6ee5a6b70fbc36ba1586a40d5ab1ea8ea7f89bf37ebbaaa11013e026dc55b16e", size = 21278 },
914
  ]
915
 
916
  [[package]]
 
974
  { url = "https://files.pythonhosted.org/packages/43/e3/7d92a15f894aa0c9c4b49b8ee9ac9850d6e63b03c9c32c0367a13ae62209/mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c", size = 536198 },
975
  ]
976
 
977
+ [[package]]
978
+ name = "msal"
979
+ version = "1.31.1"
980
+ source = { registry = "https://pypi.org/simple" }
981
+ dependencies = [
982
+ { name = "cryptography" },
983
+ { name = "pyjwt", extra = ["crypto"] },
984
+ { name = "requests" },
985
+ ]
986
+ sdist = { url = "https://files.pythonhosted.org/packages/3f/f3/cdf2681e83a73c3355883c2884b6ff2f2d2aadfc399c28e9ac4edc3994fd/msal-1.31.1.tar.gz", hash = "sha256:11b5e6a3f802ffd3a72107203e20c4eac6ef53401961b880af2835b723d80578", size = 145362 }
987
+ wheels = [
988
+ { url = "https://files.pythonhosted.org/packages/30/7c/489cd931a752d05753d730e848039f08f65f86237cf1b8724d0a1cbd700b/msal-1.31.1-py3-none-any.whl", hash = "sha256:29d9882de247e96db01386496d59f29035e5e841bcac892e6d7bf4390bf6bd17", size = 113216 },
989
+ ]
990
+
991
+ [[package]]
992
+ name = "msal-extensions"
993
+ version = "1.2.0"
994
+ source = { registry = "https://pypi.org/simple" }
995
+ dependencies = [
996
+ { name = "msal" },
997
+ { name = "portalocker" },
998
+ ]
999
+ sdist = { url = "https://files.pythonhosted.org/packages/2d/38/ad49272d0a5af95f7a0cb64a79bbd75c9c187f3b789385a143d8d537a5eb/msal_extensions-1.2.0.tar.gz", hash = "sha256:6f41b320bfd2933d631a215c91ca0dd3e67d84bd1a2f50ce917d5874ec646bef", size = 22391 }
1000
+ wheels = [
1001
+ { url = "https://files.pythonhosted.org/packages/2c/69/314d887a01599669fb330da14e5c6ff5f138609e322812a942a74ef9b765/msal_extensions-1.2.0-py3-none-any.whl", hash = "sha256:cf5ba83a2113fa6dc011a254a72f1c223c88d7dfad74cc30617c4679a417704d", size = 19254 },
1002
+ ]
1003
+
1004
  [[package]]
1005
  name = "multidict"
1006
  version = "6.1.0"
 
1041
  { url = "https://files.pythonhosted.org/packages/da/d9/f7f9379981e39b8c2511c9e0326d212accacb82f12fbfdc1aa2ce2a7b2b6/multiprocess-0.70.16-py39-none-any.whl", hash = "sha256:a0bafd3ae1b732eac64be2e72038231c1ba97724b60b09400d68f229fcc2fbf3", size = 133351 },
1042
  ]
1043
 
1044
+ [[package]]
1045
+ name = "mypy-extensions"
1046
+ version = "1.0.0"
1047
+ source = { registry = "https://pypi.org/simple" }
1048
+ sdist = { url = "https://files.pythonhosted.org/packages/98/a4/1ab47638b92648243faf97a5aeb6ea83059cc3624972ab6b8d2316078d3f/mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782", size = 4433 }
1049
+ wheels = [
1050
+ { url = "https://files.pythonhosted.org/packages/2a/e2/5d3f6ada4297caebe1a2add3b126fe800c96f56dbe5d1988a2cbe0b267aa/mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d", size = 4695 },
1051
+ ]
1052
+
1053
  [[package]]
1054
  name = "networkx"
1055
  version = "3.4.2"
 
1195
  { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 },
1196
  ]
1197
 
1198
+ [[package]]
1199
+ name = "olefile"
1200
+ version = "0.47"
1201
+ source = { registry = "https://pypi.org/simple" }
1202
+ sdist = { url = "https://files.pythonhosted.org/packages/69/1b/077b508e3e500e1629d366249c3ccb32f95e50258b231705c09e3c7a4366/olefile-0.47.zip", hash = "sha256:599383381a0bf3dfbd932ca0ca6515acd174ed48870cbf7fee123d698c192c1c", size = 112240 }
1203
+ wheels = [
1204
+ { url = "https://files.pythonhosted.org/packages/17/d3/b64c356a907242d719fc668b71befd73324e47ab46c8ebbbede252c154b2/olefile-0.47-py2.py3-none-any.whl", hash = "sha256:543c7da2a7adadf21214938bb79c83ea12b473a4b6ee4ad4bf854e7715e13d1f", size = 114565 },
1205
+ ]
1206
+
1207
  [[package]]
1208
  name = "openai"
1209
  version = "1.65.5"
 
1223
  { url = "https://files.pythonhosted.org/packages/fc/8f/a178d73277bf2d838617fa20ba4ae6952e26074664aacb53ae4532a69588/openai-1.65.5-py3-none-any.whl", hash = "sha256:5948a504e7b4003d921cfab81273813793a31c25b1d7b605797c01757e0141f1", size = 474468 },
1224
  ]
1225
 
1226
+ [[package]]
1227
+ name = "openpyxl"
1228
+ version = "3.1.5"
1229
+ source = { registry = "https://pypi.org/simple" }
1230
+ dependencies = [
1231
+ { name = "et-xmlfile" },
1232
+ ]
1233
+ sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464 }
1234
+ wheels = [
1235
+ { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910 },
1236
+ ]
1237
+
1238
  [[package]]
1239
  name = "orjson"
1240
  version = "3.10.15"
 
1286
  { url = "https://files.pythonhosted.org/packages/29/d4/1244ab8edf173a10fd601f7e13b9566c1b525c4f365d6bee918e68381889/pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13", size = 11504248 },
1287
  ]
1288
 
1289
+ [[package]]
1290
+ name = "pathspec"
1291
+ version = "0.12.1"
1292
+ source = { registry = "https://pypi.org/simple" }
1293
+ sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 }
1294
+ wheels = [
1295
+ { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 },
1296
+ ]
1297
+
1298
  [[package]]
1299
  name = "pathvalidate"
1300
  version = "3.2.3"
 
1304
  { url = "https://files.pythonhosted.org/packages/50/14/c5a0e1a947909810fc4c043b84cac472b70e438148d34f5393be1bac663f/pathvalidate-3.2.3-py3-none-any.whl", hash = "sha256:5eaf0562e345d4b6d0c0239d0f690c3bd84d2a9a3c4c73b99ea667401b27bee1", size = 24130 },
1305
  ]
1306
 
1307
+ [[package]]
1308
+ name = "pdfminer-six"
1309
+ version = "20240706"
1310
+ source = { registry = "https://pypi.org/simple" }
1311
+ dependencies = [
1312
+ { name = "charset-normalizer" },
1313
+ { name = "cryptography" },
1314
+ ]
1315
+ sdist = { url = "https://files.pythonhosted.org/packages/e3/37/63cb918ffa21412dd5d54e32e190e69bfc340f3d6aa072ad740bec9386bb/pdfminer.six-20240706.tar.gz", hash = "sha256:c631a46d5da957a9ffe4460c5dce21e8431dabb615fee5f9f4400603a58d95a6", size = 7363505 }
1316
+ wheels = [
1317
+ { url = "https://files.pythonhosted.org/packages/67/7d/44d6b90e5a293d3a975cefdc4e12a932ebba814995b2a07e37e599dd27c6/pdfminer.six-20240706-py3-none-any.whl", hash = "sha256:f4f70e74174b4b3542fcb8406a210b6e2e27cd0f0b5fd04534a8cc0d8951e38c", size = 5615414 },
1318
+ ]
1319
+
1320
  [[package]]
1321
  name = "pillow"
1322
  version = "11.1.0"
 
1336
  { url = "https://files.pythonhosted.org/packages/37/f3/9b18362206b244167c958984b57c7f70a0289bfb59a530dd8af5f699b910/pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5", size = 2375240 },
1337
  ]
1338
 
1339
+ [[package]]
1340
+ name = "platformdirs"
1341
+ version = "4.3.6"
1342
+ source = { registry = "https://pypi.org/simple" }
1343
+ sdist = { url = "https://files.pythonhosted.org/packages/13/fc/128cc9cb8f03208bdbf93d3aa862e16d376844a14f9a0ce5cf4507372de4/platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907", size = 21302 }
1344
+ wheels = [
1345
+ { url = "https://files.pythonhosted.org/packages/3c/a6/bc1012356d8ece4d66dd75c4b9fc6c1f6650ddd5991e421177d9f8f671be/platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb", size = 18439 },
1346
+ ]
1347
+
1348
+ [[package]]
1349
+ name = "portalocker"
1350
+ version = "2.10.1"
1351
+ source = { registry = "https://pypi.org/simple" }
1352
+ dependencies = [
1353
+ { name = "pywin32", marker = "sys_platform == 'win32'" },
1354
+ ]
1355
+ sdist = { url = "https://files.pythonhosted.org/packages/ed/d3/c6c64067759e87af98cc668c1cc75171347d0f1577fab7ca3749134e3cd4/portalocker-2.10.1.tar.gz", hash = "sha256:ef1bf844e878ab08aee7e40184156e1151f228f103aa5c6bd0724cc330960f8f", size = 40891 }
1356
+ wheels = [
1357
+ { url = "https://files.pythonhosted.org/packages/9b/fb/a70a4214956182e0d7a9099ab17d50bfcba1056188e9b14f35b9e2b62a0d/portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf", size = 18423 },
1358
+ ]
1359
+
1360
  [[package]]
1361
  name = "propcache"
1362
  version = "0.3.0"
 
1472
  { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 },
1473
  ]
1474
 
1475
+ [[package]]
1476
+ name = "pyjwt"
1477
+ version = "2.10.1"
1478
+ source = { registry = "https://pypi.org/simple" }
1479
+ sdist = { url = "https://files.pythonhosted.org/packages/e7/46/bd74733ff231675599650d3e47f361794b22ef3e3770998dda30d3b63726/pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953", size = 87785 }
1480
+ wheels = [
1481
+ { url = "https://files.pythonhosted.org/packages/61/ad/689f02752eeec26aed679477e80e632ef1b682313be70793d798c1d5fc8f/PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb", size = 22997 },
1482
+ ]
1483
+
1484
+ [package.optional-dependencies]
1485
+ crypto = [
1486
+ { name = "cryptography" },
1487
+ ]
1488
+
1489
  [[package]]
1490
  name = "pyparsing"
1491
  version = "3.2.1"
 
1525
  { url = "https://files.pythonhosted.org/packages/45/58/38b5afbc1a800eeea951b9285d3912613f2603bdf897a4ab0f4bd7f405fc/python_multipart-0.0.20-py3-none-any.whl", hash = "sha256:8a62d3a8335e06589fe01f2a3e178cdcc632f3fbe0d492ad9ee0ec35aab1f104", size = 24546 },
1526
  ]
1527
 
1528
+ [[package]]
1529
+ name = "python-pptx"
1530
+ version = "1.0.2"
1531
+ source = { registry = "https://pypi.org/simple" }
1532
+ dependencies = [
1533
+ { name = "lxml" },
1534
+ { name = "pillow" },
1535
+ { name = "typing-extensions" },
1536
+ { name = "xlsxwriter" },
1537
+ ]
1538
+ sdist = { url = "https://files.pythonhosted.org/packages/52/a9/0c0db8d37b2b8a645666f7fd8accea4c6224e013c42b1d5c17c93590cd06/python_pptx-1.0.2.tar.gz", hash = "sha256:479a8af0eaf0f0d76b6f00b0887732874ad2e3188230315290cd1f9dd9cc7095", size = 10109297 }
1539
+ wheels = [
1540
+ { url = "https://files.pythonhosted.org/packages/d9/4f/00be2196329ebbff56ce564aa94efb0fbc828d00de250b1980de1a34ab49/python_pptx-1.0.2-py3-none-any.whl", hash = "sha256:160838e0b8565a8b1f67947675886e9fea18aa5e795db7ae531606d68e785cba", size = 472788 },
1541
+ ]
1542
+
1543
  [[package]]
1544
  name = "pytz"
1545
  version = "2025.1"
 
1549
  { url = "https://files.pythonhosted.org/packages/eb/38/ac33370d784287baa1c3d538978b5e2ea064d4c1b93ffbd12826c190dd10/pytz-2025.1-py2.py3-none-any.whl", hash = "sha256:89dd22dca55b46eac6eda23b2d72721bf1bdfef212645d81513ef5d03038de57", size = 507930 },
1550
  ]
1551
 
1552
+ [[package]]
1553
+ name = "pywin32"
1554
+ version = "309"
1555
+ source = { registry = "https://pypi.org/simple" }
1556
+ wheels = [
1557
+ { url = "https://files.pythonhosted.org/packages/20/2c/b0240b14ff3dba7a8a7122dc9bbf7fbd21ed0e8b57c109633675b5d1761f/pywin32-309-cp312-cp312-win32.whl", hash = "sha256:de9acacced5fa82f557298b1fed5fef7bd49beee04190f68e1e4783fbdc19926", size = 8790648 },
1558
+ { url = "https://files.pythonhosted.org/packages/dd/11/c36884c732e2b3397deee808b5dac1abbb170ec37f94c6606fcb04d1e9d7/pywin32-309-cp312-cp312-win_amd64.whl", hash = "sha256:6ff9eebb77ffc3d59812c68db33c0a7817e1337e3537859499bd27586330fc9e", size = 9497399 },
1559
+ { url = "https://files.pythonhosted.org/packages/18/9f/79703972958f8ba3fd38bc9bf1165810bd75124982419b0cc433a2894d46/pywin32-309-cp312-cp312-win_arm64.whl", hash = "sha256:619f3e0a327b5418d833f44dc87859523635cf339f86071cc65a13c07be3110f", size = 8454122 },
1560
+ ]
1561
+
1562
  [[package]]
1563
  name = "pyyaml"
1564
  version = "6.0.2"
 
1662
  { url = "https://files.pythonhosted.org/packages/a3/4f/0fce63e0f5cdd658e71e21abd17ac1bc9312741ebb8b3f74eeed2ebdf771/rpds_py-0.23.1-cp312-cp312-win_amd64.whl", hash = "sha256:b03a8d50b137ee758e4c73638b10747b7c39988eb8e6cd11abb7084266455165", size = 237426 },
1663
  ]
1664
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1665
  [[package]]
1666
  name = "ruff"
1667
  version = "0.9.10"
 
1775
  { url = "https://files.pythonhosted.org/packages/d1/c2/fe97d779f3ef3b15f05c94a2f1e3d21732574ed441687474db9d342a7315/soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9", size = 36186 },
1776
  ]
1777
 
1778
+ [[package]]
1779
+ name = "speechrecognition"
1780
+ version = "3.14.1"
1781
+ source = { registry = "https://pypi.org/simple" }
1782
+ dependencies = [
1783
+ { name = "typing-extensions" },
1784
+ ]
1785
+ sdist = { url = "https://files.pythonhosted.org/packages/e2/da/05607641a8db8fcc6898016fde7ea9b2e42d87cd1a1a275f0505a13389d8/speechrecognition-3.14.1.tar.gz", hash = "sha256:c767f8558e111a65e9a56905b04eaec2331f87d5011379381621f47aded6c4fe", size = 32858706 }
1786
+ wheels = [
1787
+ { url = "https://files.pythonhosted.org/packages/09/47/5dcfcd8a2c8c2981986fc196e98fc57bc1ecb5233b2d54dac0c0d448b019/SpeechRecognition-3.14.1-py3-none-any.whl", hash = "sha256:2b5d16a7dce2dbf5f90d9c4d5aefe96325518abdc963059ec16dad9e4f2c09d3", size = 32853180 },
1788
+ ]
1789
+
1790
  [[package]]
1791
  name = "starlette"
1792
  version = "0.46.1"
 
2042
  { url = "https://files.pythonhosted.org/packages/2d/82/f56956041adef78f849db6b289b282e72b55ab8045a75abad81898c28d19/wrapt-1.17.2-py3-none-any.whl", hash = "sha256:b18f2d1533a71f069c7f82d524a52599053d4c7166e9dd374ae2136b7f40f7c8", size = 23594 },
2043
  ]
2044
 
2045
+ [[package]]
2046
+ name = "xlrd"
2047
+ version = "2.0.1"
2048
+ source = { registry = "https://pypi.org/simple" }
2049
+ sdist = { url = "https://files.pythonhosted.org/packages/a6/b3/19a2540d21dea5f908304375bd43f5ed7a4c28a370dc9122c565423e6b44/xlrd-2.0.1.tar.gz", hash = "sha256:f72f148f54442c6b056bf931dbc34f986fd0c3b0b6b5a58d013c9aef274d0c88", size = 100259 }
2050
+ wheels = [
2051
+ { url = "https://files.pythonhosted.org/packages/a6/0c/c2a72d51fe56e08a08acc85d13013558a2d793028ae7385448a6ccdfae64/xlrd-2.0.1-py2.py3-none-any.whl", hash = "sha256:6a33ee89877bd9abc1158129f6e94be74e2679636b8a205b43b85206c3f0bbdd", size = 96531 },
2052
+ ]
2053
+
2054
+ [[package]]
2055
+ name = "xlsxwriter"
2056
+ version = "3.2.2"
2057
+ source = { registry = "https://pypi.org/simple" }
2058
+ sdist = { url = "https://files.pythonhosted.org/packages/a1/08/26f69d1e9264e8107253018de9fc6b96f9219817d01c5f021e927384a8d1/xlsxwriter-3.2.2.tar.gz", hash = "sha256:befc7f92578a85fed261639fb6cde1fd51b79c5e854040847dde59d4317077dc", size = 205202 }
2059
+ wheels = [
2060
+ { url = "https://files.pythonhosted.org/packages/9b/07/df054f7413bdfff5e98f75056e4ed0977d0c8716424011fac2587864d1d3/XlsxWriter-3.2.2-py3-none-any.whl", hash = "sha256:272ce861e7fa5e82a4a6ebc24511f2cb952fde3461f6c6e1a1e81d3272db1471", size = 165121 },
2061
+ ]
2062
+
2063
  [[package]]
2064
  name = "xxhash"
2065
  version = "3.5.0"
 
2116
  [[package]]
2117
  name = "yourbench"
2118
  version = "0.2.0"
2119
+ source = { git = "https://github.com/huggingface/yourbench.git?rev=v0.2-alpha-space#314985cd3ebf06193bd1bd4cbae34e7947f5667b" }
2120
  dependencies = [
2121
  { name = "asyncio" },
2122
  { name = "datasets" },
 
2140
  source = { editable = "." }
2141
  dependencies = [
2142
  { name = "asyncio" },
2143
+ { name = "black" },
2144
  { name = "datasets" },
2145
  { name = "gradio", extra = ["oauth"] },
2146
  { name = "hf-transfer" },
 
2151
  { name = "matplotlib" },
2152
  { name = "openai" },
2153
  { name = "python-dotenv" },
 
2154
  { name = "torch" },
2155
  { name = "tqdm" },
2156
  { name = "transformers" },
 
2160
  [package.metadata]
2161
  requires-dist = [
2162
  { name = "asyncio", specifier = ">=3.4.3" },
2163
+ { name = "black", specifier = ">=25.1.0" },
2164
  { name = "datasets", specifier = ">=3.3.0" },
2165
  { name = "gradio", extras = ["oauth"], specifier = ">=5.20.0" },
2166
  { name = "hf-transfer", specifier = ">=0.1.9" },
2167
  { name = "langfuse", specifier = ">=2.59.3" },
2168
  { name = "litellm", specifier = ">=1.61.16" },
2169
  { name = "loguru", specifier = ">=0.7.3" },
2170
+ { name = "markitdown", specifier = ">=0.0.2" },
2171
  { name = "matplotlib", specifier = ">=3.10.0" },
2172
  { name = "openai", specifier = ">=1.63.0" },
2173
  { name = "python-dotenv", specifier = ">=1.0.1" },
 
2174
  { name = "torch", specifier = ">=2.6.0" },
2175
  { name = "tqdm", specifier = ">=4.67.1" },
2176
  { name = "transformers", specifier = ">=4.48.3" },
2177
  { name = "yourbench", git = "https://github.com/huggingface/yourbench.git?rev=v0.2-alpha-space" },
2178
  ]
2179
 
2180
+ [[package]]
2181
+ name = "youtube-transcript-api"
2182
+ version = "0.6.3"
2183
+ source = { registry = "https://pypi.org/simple" }
2184
+ dependencies = [
2185
+ { name = "defusedxml" },
2186
+ { name = "requests" },
2187
+ ]
2188
+ sdist = { url = "https://files.pythonhosted.org/packages/d7/f1/55ff16f7198bdf5204fd7be3c49122e07092a3da47bf4e1560989a4c0255/youtube_transcript_api-0.6.3.tar.gz", hash = "sha256:4d1f6451ae508390a5279f98519efb45e091bf60d3cca5ea0bb122800ab6a011", size = 612052 }
2189
+ wheels = [
2190
+ { url = "https://files.pythonhosted.org/packages/80/d4/be6fd091d29ae49d93813e598769e7ab453419a4de640e1755bf20911cce/youtube_transcript_api-0.6.3-py3-none-any.whl", hash = "sha256:297a74c1863d9df88f6885229f33a7eda61493d73ecb13ec80e876b65423e9b4", size = 622293 },
2191
+ ]
2192
+
2193
  [[package]]
2194
  name = "zipp"
2195
  version = "3.21.0"
yourbench_space/app.py CHANGED
@@ -1,13 +1,19 @@
 
1
  import sys
2
- from huggingface_hub import HfApi, whoami
3
- import requests
4
- import yaml
5
  import gradio as gr
6
  from loguru import logger
7
- from yourbench.pipeline import run_pipeline
8
 
9
  from yourbench_space.config import generate_base_config, save_config
10
- from yourbench_space.utils import CONFIG_PATH, UPLOAD_DIRECTORY, SubprocessManager, save_files
 
 
 
 
 
 
 
 
11
 
12
  UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True)
13
 
@@ -17,18 +23,110 @@ logger.add(sys.stderr, level="INFO")
17
  command = ["uv", "run", "yourbench", f"--config={CONFIG_PATH}"]
18
  manager = SubprocessManager(command)
19
 
 
 
 
 
 
 
 
 
 
 
20
  def update_hf_org_dropdown(oauth_token: gr.OAuthToken | None) -> str:
21
  if oauth_token is None:
22
- print("Please deploy this on Spaces and log in to list organizations.")
 
 
23
  return list()
24
- user_info = whoami(oauth_token.token)
25
 
26
  org_names = [org["name"] for org in user_info["orgs"]]
27
  user_name = user_info["name"]
28
  org_names.insert(0, user_name)
29
  return gr.Dropdown(org_names, value=user_name, label="Organization")
30
 
 
31
  config_output = gr.Code(label="Generated Config", language="yaml")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  with gr.Blocks() as app:
34
  gr.Markdown("## YourBench Configuration")
@@ -36,37 +134,110 @@ with gr.Blocks() as app:
36
  login_btn = gr.LoginButton()
37
 
38
  with gr.Tab("Configuration"):
39
- model_name = gr.Textbox(label="Model Name")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- hf_org_dropdown = gr.Dropdown(list(), label="Organization", allow_custom_value=True)
42
- app.load(update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown)
 
 
 
 
43
 
44
- provider = gr.Dropdown(["openrouter", "openai", "huggingface"], value="huggingface", label="Provider", allow_custom_value=True)
45
- base_url = gr.Textbox(label="Base URL")
46
- api_key = gr.Textbox(label="API Key")
47
- max_concurrent_requests = gr.Dropdown([8, 16, 32], value=16, label="Max Concurrent Requests")
48
- preview_button = gr.Button("Generate Config")
 
 
 
 
49
  preview_button.click(
50
  generate_base_config,
51
- inputs=[hf_org_dropdown, model_name, provider, base_url, api_key, max_concurrent_requests],
52
- outputs=config_output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  )
54
 
55
- save_button = gr.Button("Save Config")
56
- save_button.click(save_config, inputs=[config_output], outputs=[gr.Textbox(label="Save Status")])
57
-
58
  with gr.Tab("Raw Configuration"):
59
  config_output.render()
60
- save_button = gr.Button("Save Config")
61
- save_button.click(save_config, inputs=[config_output], outputs=[gr.Textbox(label="Save Status")])
 
 
 
62
 
63
  with gr.Tab("Files"):
64
- file_input = gr.File(label="Upload text files", file_count="multiple", file_types=[".txt", ".md", ".html"])
 
 
 
 
65
  output = gr.Textbox(label="Log")
66
  file_input.upload(save_files, file_input, output)
67
 
68
  with gr.Tab("Run Generation"):
69
- log_output = gr.Code(label="Log Output", language=None, lines=20, interactive=False)
 
 
70
  log_timer = gr.Timer(0.05, active=True)
71
  log_timer.tick(manager.read_and_get_output, outputs=log_output)
72
 
@@ -77,8 +248,8 @@ with gr.Blocks() as app:
77
 
78
  with gr.Row():
79
  start_button = gr.Button("Start Task")
80
- start_button.click(manager.start_process)
81
-
82
  stop_button = gr.Button("Stop Task")
83
  stop_button.click(manager.stop_process)
84
 
 
1
+ import os
2
  import sys
 
 
 
3
  import gradio as gr
4
  from loguru import logger
5
+ from huggingface_hub import HfApi, whoami
6
 
7
  from yourbench_space.config import generate_base_config, save_config
8
+ from yourbench_space.utils import (
9
+ CONFIG_PATH,
10
+ UPLOAD_DIRECTORY,
11
+ BASE_API_URLS,
12
+ AVAILABLE_MODELS,
13
+ DEFAULT_MODEL,
14
+ SubprocessManager,
15
+ save_files,
16
+ )
17
 
18
  UPLOAD_DIRECTORY.mkdir(parents=True, exist_ok=True)
19
 
 
23
  command = ["uv", "run", "yourbench", f"--config={CONFIG_PATH}"]
24
  manager = SubprocessManager(command)
25
 
26
+
27
+ def prepare_task(oauth_token: gr.OAuthToken | None, model_token: str):
28
+ new_env = os.environ.copy()
29
+ # Override env token, when running in gradio space
30
+ if oauth_token:
31
+ new_env["HF_TOKEN"] = oauth_token.token
32
+ new_env["MODEL_API_KEY"] = model_token
33
+ manager.start_process(custom_env=new_env)
34
+
35
+
36
  def update_hf_org_dropdown(oauth_token: gr.OAuthToken | None) -> str:
37
  if oauth_token is None:
38
+ print(
39
+ "Please, deploy this on Spaces and log in to view the list of available organizations"
40
+ )
41
  return list()
42
+ user_info = whoami(oauth_token.token)
43
 
44
  org_names = [org["name"] for org in user_info["orgs"]]
45
  user_name = user_info["name"]
46
  org_names.insert(0, user_name)
47
  return gr.Dropdown(org_names, value=user_name, label="Organization")
48
 
49
+
50
  config_output = gr.Code(label="Generated Config", language="yaml")
51
+ model_name = gr.Dropdown(
52
+ label="Model Name",
53
+ value=DEFAULT_MODEL,
54
+ choices=AVAILABLE_MODELS,
55
+ allow_custom_value=True,
56
+ )
57
+ base_url = gr.Textbox(
58
+ label="Model API Base URL",
59
+ value=BASE_API_URLS["huggingface"],
60
+ info="Use a custom API base URL for Hugging Face Inference Endpoints",
61
+ )
62
+
63
+
64
+ def make_models(model_name=None):
65
+ if model_name is None:
66
+ model_name = DEFAULT_MODEL
67
+
68
+ ingestion_model = gr.Dropdown(
69
+ label="Model for ingestion",
70
+ choices=AVAILABLE_MODELS,
71
+ value=model_name,
72
+ interactive=False,
73
+ allow_custom_value=True,
74
+ )
75
+ summarization_model = gr.Dropdown(
76
+ label="Model for summarization",
77
+ choices=AVAILABLE_MODELS,
78
+ value=model_name,
79
+ interactive=False,
80
+ allow_custom_value=True,
81
+ )
82
+ single_shot_question_generation_model = gr.Dropdown(
83
+ label="Model for single shot question generation",
84
+ choices=AVAILABLE_MODELS,
85
+ value=model_name,
86
+ interactive=False,
87
+ allow_custom_value=True,
88
+ )
89
+ multi_hop_question_generation_model = gr.Dropdown(
90
+ label="Model for multi hop question generation",
91
+ choices=AVAILABLE_MODELS,
92
+ value=model_name,
93
+ interactive=False,
94
+ allow_custom_value=True,
95
+ )
96
+ answer_generation_model = gr.Dropdown(
97
+ label="Model for answer generation",
98
+ choices=AVAILABLE_MODELS,
99
+ value=model_name,
100
+ interactive=False,
101
+ allow_custom_value=True,
102
+ )
103
+ judge_answers_model = gr.Dropdown(
104
+ label="Model for answer judging",
105
+ choices=AVAILABLE_MODELS,
106
+ value=model_name,
107
+ interactive=False,
108
+ allow_custom_value=True,
109
+ )
110
+
111
+ return [
112
+ ingestion_model,
113
+ summarization_model,
114
+ single_shot_question_generation_model,
115
+ multi_hop_question_generation_model,
116
+ answer_generation_model,
117
+ judge_answers_model,
118
+ ]
119
+
120
+
121
+ (
122
+ ingestion_model,
123
+ summarization_model,
124
+ single_shot_question_generation_model,
125
+ multi_hop_question_generation_model,
126
+ answer_generation_model,
127
+ judge_answers_model,
128
+ ) = make_models()
129
+
130
 
131
  with gr.Blocks() as app:
132
  gr.Markdown("## YourBench Configuration")
 
134
  login_btn = gr.LoginButton()
135
 
136
  with gr.Tab("Configuration"):
137
+ with gr.Accordion("Hugging Face"):
138
+ hf_org_dropdown = gr.Dropdown(
139
+ list(),
140
+ label="Organization",
141
+ allow_custom_value=True,
142
+ )
143
+ app.load(update_hf_org_dropdown, inputs=None, outputs=hf_org_dropdown)
144
+
145
+ hf_dataset_prefix = gr.Textbox(
146
+ label="Dataset Prefix",
147
+ value="yourbench",
148
+ info="Prefix applied to all datasets",
149
+ )
150
+ private_dataset = gr.Checkbox(
151
+ label="Private Dataset",
152
+ value=True,
153
+ info="Create private datasets (recommended by default)",
154
+ )
155
+
156
+ with gr.Accordion("Model"):
157
+ model_name.render()
158
+ # TODO handle this better
159
+ model_name.change(
160
+ make_models,
161
+ inputs=[model_name],
162
+ outputs=[
163
+ ingestion_model,
164
+ summarization_model,
165
+ single_shot_question_generation_model,
166
+ multi_hop_question_generation_model,
167
+ answer_generation_model,
168
+ judge_answers_model,
169
+ ],
170
+ )
171
+
172
+ provider = gr.Radio(
173
+ ["huggingface", "openrouter", "openai"],
174
+ value="huggingface",
175
+ label="Inference Provider",
176
+ )
177
+
178
+ def set_base_url(provider):
179
+ return gr.Textbox(
180
+ label="Model API Base URL", value=BASE_API_URLS.get(provider, "")
181
+ )
182
 
183
+ provider.change(fn=set_base_url, inputs=provider, outputs=base_url)
184
+ model_api_key = gr.Textbox(label="Model API Key", type="password")
185
+ base_url.render()
186
+ max_concurrent_requests = gr.Radio(
187
+ [8, 16, 32], value=16, label="Max Concurrent Requests"
188
+ )
189
 
190
+ with gr.Accordion("Stages"):
191
+ ingestion_model.render()
192
+ summarization_model.render()
193
+ single_shot_question_generation_model.render()
194
+ multi_hop_question_generation_model.render()
195
+ answer_generation_model.render()
196
+ judge_answers_model.render()
197
+
198
+ preview_button = gr.Button("Generate New Config")
199
  preview_button.click(
200
  generate_base_config,
201
+ inputs=[
202
+ hf_org_dropdown,
203
+ model_name,
204
+ provider,
205
+ base_url,
206
+ model_api_key,
207
+ max_concurrent_requests,
208
+ hf_dataset_prefix,
209
+ private_dataset,
210
+ ingestion_model,
211
+ summarization_model,
212
+ single_shot_question_generation_model,
213
+ multi_hop_question_generation_model,
214
+ answer_generation_model,
215
+ judge_answers_model,
216
+ ],
217
+ outputs=config_output,
218
  )
219
 
 
 
 
220
  with gr.Tab("Raw Configuration"):
221
  config_output.render()
222
+ config_output.change(
223
+ fn=save_config,
224
+ inputs=[config_output],
225
+ outputs=[gr.Textbox(label="Save Status")],
226
+ )
227
 
228
  with gr.Tab("Files"):
229
+ file_input = gr.File(
230
+ label="Upload text files",
231
+ file_count="multiple",
232
+ file_types=[".txt", ".md", ".html"],
233
+ )
234
  output = gr.Textbox(label="Log")
235
  file_input.upload(save_files, file_input, output)
236
 
237
  with gr.Tab("Run Generation"):
238
+ log_output = gr.Code(
239
+ label="Log Output", language=None, lines=20, interactive=False
240
+ )
241
  log_timer = gr.Timer(0.05, active=True)
242
  log_timer.tick(manager.read_and_get_output, outputs=log_output)
243
 
 
248
 
249
  with gr.Row():
250
  start_button = gr.Button("Start Task")
251
+ start_button.click(prepare_task, inputs=[model_api_key])
252
+
253
  stop_button = gr.Button("Stop Task")
254
  stop_button.click(manager.stop_process)
255
 
yourbench_space/config.py CHANGED
@@ -1,105 +1,126 @@
1
- import yaml
2
- import gradio as gr
3
 
 
4
  from yourbench_space.utils import CONFIG_PATH
5
 
6
 
7
- def generate_base_config(hf_org, model_name, provider, base_url, api_key, max_concurrent_requests):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  config = {
9
  "hf_configuration": {
10
  "token": "$HF_TOKEN",
11
- "private": True,
12
- "hf_organization": hf_org
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  },
14
- "model_list": [{
15
- "model_name": model_name,
16
- "provider": provider,
17
- "base_url": base_url,
18
- "api_key": api_key,
19
- "max_concurrent_requests": max_concurrent_requests
20
- }],
21
- "model_roles": {role: [model_name] for role in [
22
- "ingestion", "summarization", "single_shot_question_generation",
23
- "multi_hop_question_generation", "answer_generation", "judge_answers"
24
- ]},
25
  "inference_config": {"max_concurrent_requests": 16},
26
  "pipeline": {
27
  "ingestion": {
28
  "source_documents_dir": "/app/uploaded_files",
29
  "output_dir": "/app/ingested",
30
- "run": True
31
  },
32
  "upload_ingest_to_hub": {
33
  "source_documents_dir": "/app/ingested",
34
- "hub_dataset_name": "test_ingested_documents",
35
- "local_dataset_path": "/app/ingested_dataset",
36
- "run": True
37
  },
38
  "summarization": {
39
- "source_dataset_name": "test_ingested_documents",
40
- "output_dataset_name": "test_summaries",
41
- "local_dataset_path": "/results/test_summaries",
42
  "concat_existing_dataset": False,
43
- "run": True
44
  },
45
  "chunking": {
46
- "source_dataset_name": "test_summaries",
47
- "output_dataset_name": "test_chunked_documents",
48
- "local_dataset_path": "/results/test_chunked_documents",
49
  "concat_existing_dataset": False,
50
  "chunking_configuration": {
51
  "l_min_tokens": 64,
52
  "l_max_tokens": 128,
53
  "tau_threshold": 0.3,
54
  "h_min": 2,
55
- "h_max": 4
56
  },
57
- "run": True
58
  },
59
  "single_shot_question_generation": {
60
- "source_dataset_name": "test_chunked_documents",
61
- "output_dataset_name": "test_single_shot_questions",
62
- "local_dataset_path": "/results/test_single_shot_questions",
63
  "diversification_seed": "24 year old adult",
64
  "concat_existing_dataset": False,
65
- "run": True
66
  },
67
  "multi_hop_question_generation": {
68
- "source_dataset_name": "test_chunked_documents",
69
- "output_dataset_name": "test_multi_hop_questions",
70
- "local_dataset_path": "/results/test_multi_hop_questions",
71
  "concat_existing_dataset": False,
72
- "run": True
73
  },
74
  "answer_generation": {
75
- "run": True,
76
- "question_dataset_name": "test_single_shot_questions",
77
- "output_dataset_name": "test_answered_questions",
78
- "local_dataset_path": "/results/test_answered_questions",
79
  "concat_existing_dataset": False,
80
- "strategies": [{
81
- "name": "zeroshot",
82
- "prompt": "ZEROSHOT_QA_USER_PROMPT",
83
- "model_name": model_name
84
- }, {
85
- "name": "gold",
86
- "prompt": "GOLD_QA_USER_PROMPT",
87
- "model_name": model_name
88
- }]
 
 
 
 
89
  },
90
  "judge_answers": {
91
- "run": True,
92
- "source_judge_dataset_name": "test_answered_questions",
93
- "output_judged_dataset_name": "test_judged_comparisons",
94
- "local_dataset_path": "/results/test_judged_comparisons",
95
  "concat_existing_dataset": False,
96
  "comparing_strategies": [["zeroshot", "gold"]],
97
  "chunk_column_index": 0,
98
- "random_seed": 42
99
- }
100
- }
 
101
  }
102
- return yaml.dump(config, default_flow_style=False)
 
103
 
104
  def save_config(yaml_text):
105
  with open(CONFIG_PATH, "w") as file:
 
 
 
1
 
2
+ import yaml
3
  from yourbench_space.utils import CONFIG_PATH
4
 
5
 
6
+
7
+ def generate_base_config(
8
+ hf_org,
9
+ model_name,
10
+ provider,
11
+ base_url,
12
+ model_api_key,
13
+ max_concurrent_requests,
14
+ hf_dataset_prefix,
15
+ private_dataset,
16
+ ingestion_model,
17
+ summarization_model,
18
+ single_shot_question_generation_model,
19
+ multi_hop_question_generation_model,
20
+ answer_generation_model,
21
+ judge_answers_model,
22
+ ):
23
  config = {
24
  "hf_configuration": {
25
  "token": "$HF_TOKEN",
26
+ "private": private_dataset,
27
+ "hf_organization": hf_org,
28
+ },
29
+ "model_list": [
30
+ {
31
+ "model_name": model_name,
32
+ "provider": provider,
33
+ "base_url": base_url,
34
+ "api_key": "$MODEL_API_KEY",
35
+ "max_concurrent_requests": max_concurrent_requests,
36
+ }
37
+ ],
38
+ "model_roles": {
39
+ role: [model_name]
40
+ for role in [
41
+ "ingestion",
42
+ "summarization",
43
+ "single_shot_question_generation",
44
+ "multi_hop_question_generation",
45
+ "answer_generation",
46
+ "judge_answers",
47
+ ]
48
  },
 
 
 
 
 
 
 
 
 
 
 
49
  "inference_config": {"max_concurrent_requests": 16},
50
  "pipeline": {
51
  "ingestion": {
52
  "source_documents_dir": "/app/uploaded_files",
53
  "output_dir": "/app/ingested",
54
+ "run": True,
55
  },
56
  "upload_ingest_to_hub": {
57
  "source_documents_dir": "/app/ingested",
58
+ "hub_dataset_name": f"{hf_dataset_prefix}_ingested_documents",
59
+ "run": True,
 
60
  },
61
  "summarization": {
62
+ "source_dataset_name": f"{hf_dataset_prefix}_ingested_documents",
63
+ "output_dataset_name": f"{hf_dataset_prefix}_summaries",
 
64
  "concat_existing_dataset": False,
65
+ "run": True,
66
  },
67
  "chunking": {
68
+ "source_dataset_name": f"{hf_dataset_prefix}_summaries",
69
+ "output_dataset_name": f"{hf_dataset_prefix}_chunked_documents",
 
70
  "concat_existing_dataset": False,
71
  "chunking_configuration": {
72
  "l_min_tokens": 64,
73
  "l_max_tokens": 128,
74
  "tau_threshold": 0.3,
75
  "h_min": 2,
76
+ "h_max": 4,
77
  },
78
+ "run": True,
79
  },
80
  "single_shot_question_generation": {
81
+ "source_dataset_name": f"{hf_dataset_prefix}_chunked_documents",
82
+ "output_dataset_name": f"{hf_dataset_prefix}_single_shot_questions",
 
83
  "diversification_seed": "24 year old adult",
84
  "concat_existing_dataset": False,
85
+ "run": True,
86
  },
87
  "multi_hop_question_generation": {
88
+ "source_dataset_name": f"{hf_dataset_prefix}_chunked_documents",
89
+ "output_dataset_name": f"{hf_dataset_prefix}_multi_hop_questions",
 
90
  "concat_existing_dataset": False,
91
+ "run": True,
92
  },
93
  "answer_generation": {
94
+ "question_dataset_name": f"{hf_dataset_prefix}_single_shot_questions",
95
+ "output_dataset_name": f"{hf_dataset_prefix}_answered_questions",
 
 
96
  "concat_existing_dataset": False,
97
+ "strategies": [
98
+ {
99
+ "name": "zeroshot",
100
+ "prompt": "ZEROSHOT_QA_USER_PROMPT",
101
+ "model_name": model_name,
102
+ },
103
+ {
104
+ "name": "gold",
105
+ "prompt": "GOLD_QA_USER_PROMPT",
106
+ "model_name": model_name,
107
+ },
108
+ ],
109
+ "run": True,
110
  },
111
  "judge_answers": {
112
+ "source_judge_dataset_name": f"{hf_dataset_prefix}_answered_questions",
113
+ "output_judged_dataset_name": f"{hf_dataset_prefix}_judged_comparisons",
 
 
114
  "concat_existing_dataset": False,
115
  "comparing_strategies": [["zeroshot", "gold"]],
116
  "chunk_column_index": 0,
117
+ "random_seed": 42,
118
+ "run": True,
119
+ },
120
+ },
121
  }
122
+ return yaml.dump(config, sort_keys=False)
123
+
124
 
125
  def save_config(yaml_text):
126
  with open(CONFIG_PATH, "w") as file:
yourbench_space/utils.py CHANGED
@@ -2,13 +2,23 @@ import io
2
  import os
3
  import pathlib
4
  import shutil
5
- import gradio as gr
6
  from loguru import logger
7
  import subprocess
8
 
9
  UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
10
  CONFIG_PATH = pathlib.Path("/app/yourbench_config.yml")
11
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  def save_files(files: list[str]):
14
  saved_paths = [shutil.move(str(pathlib.Path(file)), str(UPLOAD_DIRECTORY / pathlib.Path(file).name)) for file in files]
@@ -21,19 +31,13 @@ class SubprocessManager:
21
  self.process = None
22
  self.output_stream = io.StringIO()
23
 
24
- def start_process(self, oauth_token: gr.OAuthToken | None):
25
  """Start the subprocess."""
26
  if self.is_running():
27
  logger.info("Process is already running")
28
  return
29
 
30
  self.output_stream = io.StringIO()
31
-
32
- new_env = os.environ.copy()
33
- # Override env token, when running in gradio space
34
- if oauth_token:
35
- new_env["HF_TOKEN"] = oauth_token.token
36
-
37
  self.process = subprocess.Popen(
38
  self.command,
39
  stdout=subprocess.PIPE,
@@ -41,7 +45,7 @@ class SubprocessManager:
41
  text=True,
42
  bufsize=1,
43
  start_new_session=True,
44
- env=new_env
45
  )
46
  os.set_blocking(self.process.stdout.fileno(), False)
47
  logger.info("Started the process")
 
2
  import os
3
  import pathlib
4
  import shutil
 
5
  from loguru import logger
6
  import subprocess
7
 
8
  UPLOAD_DIRECTORY = pathlib.Path("/app/uploaded_files")
9
  CONFIG_PATH = pathlib.Path("/app/yourbench_config.yml")
10
 
11
+ AVAILABLE_MODELS = [
12
+ "mistralai/Mistral-Small-24B-Instruct-2501",
13
+ "meta-llama/Llama-3.3-70B-Instruct",
14
+ ]
15
+ DEFAULT_MODEL = AVAILABLE_MODELS[0]
16
+
17
+ BASE_API_URLS = {
18
+ "huggingface": "https://router.huggingface.co/hf-inference/v1",
19
+ "openrouter": "https://openrouter.ai/api/v1",
20
+ "openai": "https://api.openai.com/v1/",
21
+ }
22
 
23
  def save_files(files: list[str]):
24
  saved_paths = [shutil.move(str(pathlib.Path(file)), str(UPLOAD_DIRECTORY / pathlib.Path(file).name)) for file in files]
 
31
  self.process = None
32
  self.output_stream = io.StringIO()
33
 
34
+ def start_process(self, custom_env: dict | None):
35
  """Start the subprocess."""
36
  if self.is_running():
37
  logger.info("Process is already running")
38
  return
39
 
40
  self.output_stream = io.StringIO()
 
 
 
 
 
 
41
  self.process = subprocess.Popen(
42
  self.command,
43
  stdout=subprocess.PIPE,
 
45
  text=True,
46
  bufsize=1,
47
  start_new_session=True,
48
+ env=custom_env
49
  )
50
  os.set_blocking(self.process.stdout.fileno(), False)
51
  logger.info("Started the process")