Task-330 Setup basic documentation
Browse files- Pipfile +2 -0
- Pipfile.lock +108 -3
- README.md +46 -16
- docs/index.md +52 -0
- mkdocs.yml +15 -0
- scripts/predict.py +129 -24
- setup.sh +13 -13
Pipfile
CHANGED
@@ -15,6 +15,8 @@ joblib = "*"
|
|
15 |
nltk = "*"
|
16 |
htbuilder = "*"
|
17 |
nest-asyncio = "*"
|
|
|
|
|
18 |
|
19 |
[dev-packages]
|
20 |
|
|
|
15 |
nltk = "*"
|
16 |
htbuilder = "*"
|
17 |
nest-asyncio = "*"
|
18 |
+
mkdocs = "*"
|
19 |
+
mkdocstrings-python = "*"
|
20 |
|
21 |
[dev-packages]
|
22 |
|
Pipfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"_meta": {
|
3 |
"hash": {
|
4 |
-
"sha256": "
|
5 |
},
|
6 |
"pipfile-spec": 6,
|
7 |
"requires": {
|
@@ -167,7 +167,7 @@
|
|
167 |
"sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44",
|
168 |
"sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"
|
169 |
],
|
170 |
-
"markers": "
|
171 |
"version": "==0.4.6"
|
172 |
},
|
173 |
"filelock": {
|
@@ -186,6 +186,13 @@
|
|
186 |
"markers": "python_version >= '3.8'",
|
187 |
"version": "==2025.3.0"
|
188 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
189 |
"gitdb": {
|
190 |
"hashes": [
|
191 |
"sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571",
|
@@ -202,6 +209,14 @@
|
|
202 |
"markers": "python_version >= '3.7'",
|
203 |
"version": "==3.1.44"
|
204 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
205 |
"htbuilder": {
|
206 |
"hashes": [
|
207 |
"sha256:58c0bc5502c1a46b42ae9e074c43ec0f6fdc24ed334936cb17e1ed5a8938aee2"
|
@@ -260,6 +275,14 @@
|
|
260 |
"markers": "python_version >= '3.9'",
|
261 |
"version": "==2024.10.1"
|
262 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
"markupsafe": {
|
264 |
"hashes": [
|
265 |
"sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4",
|
@@ -327,6 +350,56 @@
|
|
327 |
"markers": "python_version >= '3.9'",
|
328 |
"version": "==3.0.2"
|
329 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
330 |
"mpmath": {
|
331 |
"hashes": [
|
332 |
"sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f",
|
@@ -487,6 +560,14 @@
|
|
487 |
"markers": "python_version >= '3.9'",
|
488 |
"version": "==2.2.3"
|
489 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
490 |
"pillow": {
|
491 |
"hashes": [
|
492 |
"sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83",
|
@@ -564,6 +645,14 @@
|
|
564 |
"markers": "python_version >= '3.9'",
|
565 |
"version": "==11.1.0"
|
566 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
567 |
"protobuf": {
|
568 |
"hashes": [
|
569 |
"sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7",
|
@@ -637,6 +726,14 @@
|
|
637 |
"markers": "python_version >= '3.8'",
|
638 |
"version": "==0.9.1"
|
639 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
640 |
"python-dateutil": {
|
641 |
"hashes": [
|
642 |
"sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3",
|
@@ -711,6 +808,14 @@
|
|
711 |
"markers": "python_version >= '3.8'",
|
712 |
"version": "==6.0.2"
|
713 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
714 |
"referencing": {
|
715 |
"hashes": [
|
716 |
"sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa",
|
@@ -1070,7 +1175,7 @@
|
|
1070 |
"sha256:e7631a77ffb1f7d2eefa4445ebbee491c720a5661ddf6df3498ebecae5ed375c",
|
1071 |
"sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2"
|
1072 |
],
|
1073 |
-
"markers": "
|
1074 |
"version": "==6.0.0"
|
1075 |
}
|
1076 |
},
|
|
|
1 |
{
|
2 |
"_meta": {
|
3 |
"hash": {
|
4 |
+
"sha256": "011a0284b34b98265fb35b5b99964b701b6a30f703a45de6b092a7d7c631032d"
|
5 |
},
|
6 |
"pipfile-spec": 6,
|
7 |
"requires": {
|
|
|
167 |
"sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44",
|
168 |
"sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"
|
169 |
],
|
170 |
+
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5, 3.6'",
|
171 |
"version": "==0.4.6"
|
172 |
},
|
173 |
"filelock": {
|
|
|
186 |
"markers": "python_version >= '3.8'",
|
187 |
"version": "==2025.3.0"
|
188 |
},
|
189 |
+
"ghp-import": {
|
190 |
+
"hashes": [
|
191 |
+
"sha256:8337dd7b50877f163d4c0289bc1f1c7f127550241988d568c1db512c4324a619",
|
192 |
+
"sha256:9c535c4c61193c2df8871222567d7fd7e5014d835f97dc7b7439069e2413d343"
|
193 |
+
],
|
194 |
+
"version": "==2.1.0"
|
195 |
+
},
|
196 |
"gitdb": {
|
197 |
"hashes": [
|
198 |
"sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571",
|
|
|
209 |
"markers": "python_version >= '3.7'",
|
210 |
"version": "==3.1.44"
|
211 |
},
|
212 |
+
"griffe": {
|
213 |
+
"hashes": [
|
214 |
+
"sha256:3a46fa7bd83280909b63c12b9a975732a927dd97809efe5b7972290b606c5d91",
|
215 |
+
"sha256:6399f7e663150e4278a312a8e8a14d2f3d7bd86e2ef2f8056a1058e38579c2ee"
|
216 |
+
],
|
217 |
+
"markers": "python_version >= '3.9'",
|
218 |
+
"version": "==1.6.2"
|
219 |
+
},
|
220 |
"htbuilder": {
|
221 |
"hashes": [
|
222 |
"sha256:58c0bc5502c1a46b42ae9e074c43ec0f6fdc24ed334936cb17e1ed5a8938aee2"
|
|
|
275 |
"markers": "python_version >= '3.9'",
|
276 |
"version": "==2024.10.1"
|
277 |
},
|
278 |
+
"markdown": {
|
279 |
+
"hashes": [
|
280 |
+
"sha256:2ae2471477cfd02dbbf038d5d9bc226d40def84b4fe2986e49b59b6b472bbed2",
|
281 |
+
"sha256:7eb6df5690b81a1d7942992c97fad2938e956e79df20cbc6186e9c3a77b1c803"
|
282 |
+
],
|
283 |
+
"markers": "python_version >= '3.8'",
|
284 |
+
"version": "==3.7"
|
285 |
+
},
|
286 |
"markupsafe": {
|
287 |
"hashes": [
|
288 |
"sha256:0bff5e0ae4ef2e1ae4fdf2dfd5b76c75e5c2fa4132d05fc1b0dabcd20c7e28c4",
|
|
|
350 |
"markers": "python_version >= '3.9'",
|
351 |
"version": "==3.0.2"
|
352 |
},
|
353 |
+
"mergedeep": {
|
354 |
+
"hashes": [
|
355 |
+
"sha256:0096d52e9dad9939c3d975a774666af186eda617e6ca84df4c94dec30004f2a8",
|
356 |
+
"sha256:70775750742b25c0d8f36c55aed03d24c3384d17c951b3175d898bd778ef0307"
|
357 |
+
],
|
358 |
+
"markers": "python_version >= '3.6'",
|
359 |
+
"version": "==1.3.4"
|
360 |
+
},
|
361 |
+
"mkdocs": {
|
362 |
+
"hashes": [
|
363 |
+
"sha256:7b432f01d928c084353ab39c57282f29f92136665bdd6abf7c1ec8d822ef86f2",
|
364 |
+
"sha256:db91759624d1647f3f34aa0c3f327dd2601beae39a366d6e064c03468d35c20e"
|
365 |
+
],
|
366 |
+
"index": "pypi",
|
367 |
+
"markers": "python_version >= '3.8'",
|
368 |
+
"version": "==1.6.1"
|
369 |
+
},
|
370 |
+
"mkdocs-autorefs": {
|
371 |
+
"hashes": [
|
372 |
+
"sha256:4b5b6235a4becb2b10425c2fa191737e415b37aa3418919db33e5d774c9db079",
|
373 |
+
"sha256:9793c5ac06a6ebbe52ec0f8439256e66187badf4b5334b5fde0b128ec134df4f"
|
374 |
+
],
|
375 |
+
"markers": "python_version >= '3.9'",
|
376 |
+
"version": "==1.4.1"
|
377 |
+
},
|
378 |
+
"mkdocs-get-deps": {
|
379 |
+
"hashes": [
|
380 |
+
"sha256:162b3d129c7fad9b19abfdcb9c1458a651628e4b1dea628ac68790fb3061c60c",
|
381 |
+
"sha256:2bf11d0b133e77a0dd036abeeb06dec8775e46efa526dc70667d8863eefc6134"
|
382 |
+
],
|
383 |
+
"markers": "python_version >= '3.8'",
|
384 |
+
"version": "==0.2.0"
|
385 |
+
},
|
386 |
+
"mkdocstrings": {
|
387 |
+
"hashes": [
|
388 |
+
"sha256:3657be1384543ce0ee82112c3e521bbf48e41303aa0c229b9ffcccba057d922e",
|
389 |
+
"sha256:8ea98358d2006f60befa940fdebbbc88a26b37ecbcded10be726ba359284f73d"
|
390 |
+
],
|
391 |
+
"markers": "python_version >= '3.9'",
|
392 |
+
"version": "==0.29.0"
|
393 |
+
},
|
394 |
+
"mkdocstrings-python": {
|
395 |
+
"hashes": [
|
396 |
+
"sha256:211b7aaf776cd45578ecb531e5ad0d3a35a8be9101a6bfa10de38a69af9d8fd8",
|
397 |
+
"sha256:9453ccae69be103810c1cf6435ce71c8f714ae37fef4d87d16aa92a7c800fe1d"
|
398 |
+
],
|
399 |
+
"index": "pypi",
|
400 |
+
"markers": "python_version >= '3.9'",
|
401 |
+
"version": "==1.16.8"
|
402 |
+
},
|
403 |
"mpmath": {
|
404 |
"hashes": [
|
405 |
"sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f",
|
|
|
560 |
"markers": "python_version >= '3.9'",
|
561 |
"version": "==2.2.3"
|
562 |
},
|
563 |
+
"pathspec": {
|
564 |
+
"hashes": [
|
565 |
+
"sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08",
|
566 |
+
"sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"
|
567 |
+
],
|
568 |
+
"markers": "python_version >= '3.8'",
|
569 |
+
"version": "==0.12.1"
|
570 |
+
},
|
571 |
"pillow": {
|
572 |
"hashes": [
|
573 |
"sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83",
|
|
|
645 |
"markers": "python_version >= '3.9'",
|
646 |
"version": "==11.1.0"
|
647 |
},
|
648 |
+
"platformdirs": {
|
649 |
+
"hashes": [
|
650 |
+
"sha256:a03875334331946f13c549dbd8f4bac7a13a50a895a0eb1e8c6a8ace80d40a94",
|
651 |
+
"sha256:eb437d586b6a0986388f0d6f74aa0cde27b48d0e3d66843640bfb6bdcdb6e351"
|
652 |
+
],
|
653 |
+
"markers": "python_version >= '3.9'",
|
654 |
+
"version": "==4.3.7"
|
655 |
+
},
|
656 |
"protobuf": {
|
657 |
"hashes": [
|
658 |
"sha256:13eb236f8eb9ec34e63fc8b1d6efd2777d062fa6aaa68268fb67cf77f6839ad7",
|
|
|
726 |
"markers": "python_version >= '3.8'",
|
727 |
"version": "==0.9.1"
|
728 |
},
|
729 |
+
"pymdown-extensions": {
|
730 |
+
"hashes": [
|
731 |
+
"sha256:05e0bee73d64b9c71a4ae17c72abc2f700e8bc8403755a00580b49a4e9f189e9",
|
732 |
+
"sha256:41e576ce3f5d650be59e900e4ceff231e0aed2a88cf30acaee41e02f063a061b"
|
733 |
+
],
|
734 |
+
"markers": "python_version >= '3.8'",
|
735 |
+
"version": "==10.14.3"
|
736 |
+
},
|
737 |
"python-dateutil": {
|
738 |
"hashes": [
|
739 |
"sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3",
|
|
|
808 |
"markers": "python_version >= '3.8'",
|
809 |
"version": "==6.0.2"
|
810 |
},
|
811 |
+
"pyyaml-env-tag": {
|
812 |
+
"hashes": [
|
813 |
+
"sha256:70092675bda14fdec33b31ba77e7543de9ddc88f2e5b99160396572d11525bdb",
|
814 |
+
"sha256:af31106dec8a4d68c60207c1886031cbf839b68aa7abccdb19868200532c2069"
|
815 |
+
],
|
816 |
+
"markers": "python_version >= '3.6'",
|
817 |
+
"version": "==0.1"
|
818 |
+
},
|
819 |
"referencing": {
|
820 |
"hashes": [
|
821 |
"sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa",
|
|
|
1175 |
"sha256:e7631a77ffb1f7d2eefa4445ebbee491c720a5661ddf6df3498ebecae5ed375c",
|
1176 |
"sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2"
|
1177 |
],
|
1178 |
+
"markers": "python_version >= '3.9'",
|
1179 |
"version": "==6.0.0"
|
1180 |
}
|
1181 |
},
|
README.md
CHANGED
@@ -12,25 +12,55 @@ license: mit
|
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
|
|
|
15 |
|
16 |
-
|
17 |
-
#### Due to the project being configured to use hugging face spaces to host the python web-app, the instructions will outline how to setup the project to push to any newly created Hugging Face Space.
|
18 |
-
Note: Streamlit can still be developed and deployed to environments other than Hugging Face Spaces. Refer to the appropriate documentation associated with a chosen hosting service for how to deploy the web-app to the services environment.
|
19 |
|
20 |
-
|
21 |
-
- In the directory of the cloned repository, add the hugging face space as an additional remote origin:
|
22 |
-
- You can specify any name to use for the origin name (i.e., hf_origin)
|
23 |
-
```
|
24 |
-
git remote add <hf-origin-name> <hf-space-url>
|
25 |
-
```
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
- Following these steps, any new commits made can be pushed to the HF Space by using the following command:
|
33 |
```
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
```
|
36 |
-
|
|
|
12 |
|
13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
14 |
|
15 |
+
# NLPinitiative Streamlit Web Application
|
16 |
|
17 |
+
---
|
|
|
|
|
18 |
|
19 |
+
## Project Details
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
### Description
|
22 |
+
|
23 |
+
Codebase for the Streamlit app hosted on Hugging Face Spaces that provides a basic user interface for performing inference on text input by the user using the models training within the NLPinitiative project.
|
24 |
+
|
25 |
+
---
|
26 |
+
### Project Setup
|
27 |
+
|
28 |
+
**Setup for Pushing to GitHub and HF Space**:
|
29 |
+
|
30 |
+
Due to the project being configured to use hugging face spaces to host the python web-app, the instructions will outline how to setup the project to push to any newly created Hugging Face Space.
|
31 |
+
|
32 |
+
**Note**: Streamlit can still be developed and deployed to environments other than Hugging Face Spaces. Refer to the appropriate documentation associated with a chosen hosting service for how to deploy the web-app to the services environment.
|
33 |
+
|
34 |
+
**After Creation of a Streamlit Hugging Face Space**:
|
35 |
+
|
36 |
+
In the directory of the cloned repository, add the hugging face space as an additional remote origin:
|
37 |
+
`git remote add <hf-origin-name> <hf-space-url>`
|
38 |
+
|
39 |
+
- **NOTE**: *You can specify any name to use for the origin name (i.e., hf_origin)*
|
40 |
+
|
41 |
+
Once the space is linked, you will need to force update the space with the contents of the current repository as follows (This will sync the HF Space with the main repositories history):
|
42 |
+
`git push --force <hf-origin-name> main`
|
43 |
+
|
44 |
+
Following these steps, any new commits made can be pushed to the HF Space by using the following command:
|
45 |
+
`git push <hf-space-name> main`
|
46 |
+
|
47 |
+
---
|
48 |
+
### Project layout
|
49 |
|
|
|
50 |
```
|
51 |
+
├── docs <- A directory containing documentation used for generating and serving
|
52 |
+
│ project documentation
|
53 |
+
├── scripts <- Source code for model inference
|
54 |
+
│ ├── __init__.py <- Makes modeling a Python module
|
55 |
+
│ └── predict.py <- Code to run model inference with trained models
|
56 |
+
├── app.py <- Entry point for the application
|
57 |
+
├── config.py <- Store useful variables and configuration
|
58 |
+
├── LICENSE <- Open-source license if one is chosen
|
59 |
+
├── mkdocs.yml <- mkdocs project configuration
|
60 |
+
├── Pipfile <- The project dependency file for reproducing the analysis environment,
|
61 |
+
│ e.g., generated with `pipenv install`
|
62 |
+
├── Pipfile.lock <- Locked file containing hashes for dependencies
|
63 |
+
├── README.md <- The top-level README for developers using this project
|
64 |
+
├── requirements.txt <- Plaintext dependency information (necessary for app hosting)
|
65 |
+
└── setup.sh <- Bash script containing convenience commands for managing the project
|
66 |
```
|
|
docs/index.md
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# NLPinitiative Streamlit Documentation
|
2 |
+
|
3 |
+
---
|
4 |
+
|
5 |
+
## Project Details
|
6 |
+
|
7 |
+
### Description
|
8 |
+
|
9 |
+
Codebase for the Streamlit app hosted on Hugging Face Spaces that provides a basic user interface for performing inference on text input by the user using the models training within the NLPinitiative project.
|
10 |
+
|
11 |
+
---
|
12 |
+
### Project Setup
|
13 |
+
|
14 |
+
**Setup for Pushing to GitHub and HF Space**:
|
15 |
+
|
16 |
+
Due to the project being configured to use hugging face spaces to host the python web-app, the instructions will outline how to setup the project to push to any newly created Hugging Face Space.
|
17 |
+
|
18 |
+
**Note**: Streamlit can still be developed and deployed to environments other than Hugging Face Spaces. Refer to the appropriate documentation associated with a chosen hosting service for how to deploy the web-app to the services environment.
|
19 |
+
|
20 |
+
**After Creation of a Streamlit Hugging Face Space**:
|
21 |
+
|
22 |
+
In the directory of the cloned repository, add the hugging face space as an additional remote origin:
|
23 |
+
`git remote add <hf-origin-name> <hf-space-url>`
|
24 |
+
|
25 |
+
- **NOTE**: *You can specify any name to use for the origin name (i.e., hf_origin)*
|
26 |
+
|
27 |
+
Once the space is linked, you will need to force update the space with the contents of the current repository as follows (This will sync the HF Space with the main repositories history):
|
28 |
+
`git push --force <hf-origin-name> main`
|
29 |
+
|
30 |
+
Following these steps, any new commits made can be pushed to the HF Space by using the following command:
|
31 |
+
`git push <hf-space-name> main`
|
32 |
+
|
33 |
+
---
|
34 |
+
### Project layout
|
35 |
+
|
36 |
+
```
|
37 |
+
├── docs <- A directory containing documentation used for generating and serving
|
38 |
+
│ project documentation
|
39 |
+
├── scripts <- Source code for model inference
|
40 |
+
│ ├── __init__.py <- Makes modeling a Python module
|
41 |
+
│ └── predict.py <- Code to run model inference with trained models
|
42 |
+
├── app.py <- Entry point for the application
|
43 |
+
├── config.py <- Store useful variables and configuration
|
44 |
+
├── LICENSE <- Open-source license if one is chosen
|
45 |
+
├── mkdocs.yml <- mkdocs project configuration
|
46 |
+
├── Pipfile <- The project dependency file for reproducing the analysis environment,
|
47 |
+
│ e.g., generated with `pipenv install`
|
48 |
+
├── Pipfile.lock <- Locked file containing hashes for dependencies
|
49 |
+
├── README.md <- The top-level README for developers using this project
|
50 |
+
├── requirements.txt <- Plaintext dependency information (necessary for app hosting)
|
51 |
+
└── setup.sh <- Bash script containing convenience commands for managing the project
|
52 |
+
```
|
mkdocs.yml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
site_name: NLPinitiative Streamlit Documentation
|
2 |
+
|
3 |
+
nav:
|
4 |
+
- Home: index.md
|
5 |
+
|
6 |
+
theme:
|
7 |
+
name: readthedocs
|
8 |
+
|
9 |
+
plugins:
|
10 |
+
- search
|
11 |
+
- mkdocstrings:
|
12 |
+
handlers:
|
13 |
+
python:
|
14 |
+
options:
|
15 |
+
docstring_style: numpy
|
scripts/predict.py
CHANGED
@@ -16,23 +16,53 @@ from transformers import (
|
|
16 |
BIN_REPO = 'dlsmallw/NLPinitiative-Binary-Classification'
|
17 |
ML_REPO = 'dlsmallw/NLPinitiative-Multilabel-Regression'
|
18 |
|
19 |
-
## Class used to encapsulate and handle the logic for inference
|
20 |
class InferenceHandler:
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
self.api_token = api_token
|
23 |
-
self.bin_tokenizer, self.bin_model = self.
|
24 |
-
self.ml_regr_tokenizer, self.ml_regr_model = self.
|
25 |
nltk.download('punkt_tab')
|
26 |
|
27 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
config = None
|
29 |
if repo_id and self.api_token:
|
30 |
config = huggingface_hub.hf_hub_download(repo_id, filename='config.json', token=self.api_token)
|
31 |
return config
|
32 |
|
33 |
-
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
with open(config) as config_file:
|
37 |
config_json = json.load(config_file)
|
38 |
model_name = config_json['_name_or_path']
|
@@ -43,24 +73,75 @@ class InferenceHandler:
|
|
43 |
model.eval()
|
44 |
return tokenizer, model
|
45 |
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
bin_tokenized_input = self.bin_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
49 |
return bin_tokenized_input
|
50 |
|
51 |
-
|
52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
ml_tokenized_input = self.ml_regr_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
54 |
return ml_tokenized_input
|
55 |
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
return bin_inputs, ml_inputs
|
61 |
|
62 |
-
|
63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
result = {
|
65 |
'text_input': input,
|
66 |
'results': []
|
@@ -100,9 +181,21 @@ class InferenceHandler:
|
|
100 |
result['results'] = sent_res_arr
|
101 |
return result
|
102 |
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
with torch.no_grad():
|
108 |
bin_logits = self.bin_model(**bin_inputs).logits
|
@@ -114,9 +207,21 @@ class InferenceHandler:
|
|
114 |
|
115 |
return bin_text_pred, pred_class
|
116 |
|
117 |
-
|
118 |
-
|
119 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
120 |
|
121 |
with torch.no_grad():
|
122 |
ml_outputs = self.ml_regr_model(**ml_inputs).logits
|
|
|
16 |
BIN_REPO = 'dlsmallw/NLPinitiative-Binary-Classification'
|
17 |
ML_REPO = 'dlsmallw/NLPinitiative-Multilabel-Regression'
|
18 |
|
|
|
19 |
class InferenceHandler:
|
20 |
+
"""A class that handles performing inference using the trained binary classification and multilabel regression models."""
|
21 |
+
|
22 |
+
def __init__(self, api_token: str):
|
23 |
+
"""Constructor for instantiating an InferenceHandler object.
|
24 |
+
|
25 |
+
Parameters
|
26 |
+
----------
|
27 |
+
api_token : str
|
28 |
+
A Hugging Face token with read/write access privileges to allow exporting the trained models (default is None).
|
29 |
+
"""
|
30 |
+
|
31 |
self.api_token = api_token
|
32 |
+
self.bin_tokenizer, self.bin_model = self._init_model_and_tokenizer(BIN_REPO)
|
33 |
+
self.ml_regr_tokenizer, self.ml_regr_model = self._init_model_and_tokenizer(ML_REPO)
|
34 |
nltk.download('punkt_tab')
|
35 |
|
36 |
+
def _get_config(self, repo_id: str) -> str:
|
37 |
+
"""Retrieves the config.json file from the specified model repository.
|
38 |
+
|
39 |
+
Parameters
|
40 |
+
----------
|
41 |
+
repo_id : str
|
42 |
+
The repository id (i.e., <owner username>/<repository name>).
|
43 |
+
|
44 |
+
"""
|
45 |
+
|
46 |
config = None
|
47 |
if repo_id and self.api_token:
|
48 |
config = huggingface_hub.hf_hub_download(repo_id, filename='config.json', token=self.api_token)
|
49 |
return config
|
50 |
|
51 |
+
def _init_model_and_tokenizer(self, repo_id: str):
|
52 |
+
"""Initializes a model and tokenizer for use in inference using the models path.
|
53 |
+
|
54 |
+
Parameters
|
55 |
+
----------
|
56 |
+
model_path : Path
|
57 |
+
Directory path to the models tensor file.
|
58 |
+
|
59 |
+
Returns
|
60 |
+
-------
|
61 |
+
tuple[PreTrainedTokenizer | PreTrainedTokenizerFast, PreTrainedModel]
|
62 |
+
A tuple containing the tokenizer and model objects.
|
63 |
+
"""
|
64 |
+
|
65 |
+
config = self._get_config(repo_id)
|
66 |
with open(config) as config_file:
|
67 |
config_json = json.load(config_file)
|
68 |
model_name = config_json['_name_or_path']
|
|
|
73 |
model.eval()
|
74 |
return tokenizer, model
|
75 |
|
76 |
+
def _encode_binary(self, text: str):
|
77 |
+
"""Preprocesses and tokenizes the input text for binary classification.
|
78 |
+
|
79 |
+
Parameters
|
80 |
+
----------
|
81 |
+
text : str
|
82 |
+
The input text to be preprocessed and tokenized.
|
83 |
+
|
84 |
+
Returns
|
85 |
+
-------
|
86 |
+
BatchEncoding
|
87 |
+
The preprocessed and tokenized input text.
|
88 |
+
"""
|
89 |
+
|
90 |
bin_tokenized_input = self.bin_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
91 |
return bin_tokenized_input
|
92 |
|
93 |
+
def _encode_multilabel(self, text: str):
|
94 |
+
"""Preprocesses and tokenizes the input text for multilabel regression.
|
95 |
+
|
96 |
+
Parameters
|
97 |
+
----------
|
98 |
+
text : str
|
99 |
+
The input text to be preprocessed and tokenized.
|
100 |
+
|
101 |
+
Returns
|
102 |
+
-------
|
103 |
+
BatchEncoding
|
104 |
+
The preprocessed and tokenized input text.
|
105 |
+
"""
|
106 |
+
|
107 |
ml_tokenized_input = self.ml_regr_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
|
108 |
return ml_tokenized_input
|
109 |
|
110 |
+
def _encode_input(self, text: str):
|
111 |
+
"""Preprocesses and tokenizes the input text sentiment classification (both models).
|
112 |
+
|
113 |
+
Parameters
|
114 |
+
----------
|
115 |
+
text : str
|
116 |
+
The input text to be preprocessed and tokenized.
|
117 |
+
|
118 |
+
Returns
|
119 |
+
-------
|
120 |
+
tuple[BatchEncoding, BatchEncoding]
|
121 |
+
A tuple containing preprocessed and tokenized input text for both the binary and multilabel regression models.
|
122 |
+
"""
|
123 |
+
|
124 |
+
bin_inputs = self._encode_binary(text)
|
125 |
+
ml_inputs = self._encode_multilabel(text)
|
126 |
return bin_inputs, ml_inputs
|
127 |
|
128 |
+
def classify_text(self, input: str):
|
129 |
+
"""Performs inference on the input text to determine the binary classification and the multilabel regression for the categories.
|
130 |
+
|
131 |
+
Determines whether the text is discriminatory. If it is discriminatory, it will then perform regression on the input text to determine the
|
132 |
+
assesed percentage that each category applies.
|
133 |
+
|
134 |
+
Parameters
|
135 |
+
----------
|
136 |
+
input : str
|
137 |
+
The input text to be classified.
|
138 |
+
|
139 |
+
Returns
|
140 |
+
-------
|
141 |
+
dict[str, Any]
|
142 |
+
The resulting classification and regression values for each category.
|
143 |
+
"""
|
144 |
+
|
145 |
result = {
|
146 |
'text_input': input,
|
147 |
'results': []
|
|
|
181 |
result['results'] = sent_res_arr
|
182 |
return result
|
183 |
|
184 |
+
def discriminatory_inference(self, text: str):
|
185 |
+
"""Performs inference on the input text to determine the binary classification.
|
186 |
+
|
187 |
+
Parameters
|
188 |
+
----------
|
189 |
+
text : str
|
190 |
+
The input text to be classified.
|
191 |
+
|
192 |
+
Returns
|
193 |
+
-------
|
194 |
+
tuple[str, Number]
|
195 |
+
A tuple consisting of the string classification (Discriminatory or Non-Discriminatory) and the numeric prediction class (1 or 0).
|
196 |
+
"""
|
197 |
+
|
198 |
+
bin_inputs = self._encode_binary(text)
|
199 |
|
200 |
with torch.no_grad():
|
201 |
bin_logits = self.bin_model(**bin_inputs).logits
|
|
|
207 |
|
208 |
return bin_text_pred, pred_class
|
209 |
|
210 |
+
def category_inference(self, text: str):
|
211 |
+
"""Performs inference on the input text to determine the regression values for the categories of discrimination.
|
212 |
+
|
213 |
+
Parameters
|
214 |
+
----------
|
215 |
+
text : str
|
216 |
+
The input text to be classified.
|
217 |
+
|
218 |
+
Returns
|
219 |
+
-------
|
220 |
+
list[float]
|
221 |
+
A tuple consisting of the string classification (Discriminatory or Non-Discriminatory) and the numeric prediction class (1 or 0).
|
222 |
+
"""
|
223 |
+
|
224 |
+
ml_inputs = self._encode_multilabel(text)
|
225 |
|
226 |
with torch.no_grad():
|
227 |
ml_outputs = self.ml_regr_model(**ml_inputs).logits
|
setup.sh
CHANGED
@@ -23,16 +23,16 @@ requirements() {
|
|
23 |
pipenv requirements > requirements.txt
|
24 |
}
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
|
|
23 |
pipenv requirements > requirements.txt
|
24 |
}
|
25 |
|
26 |
+
docs() {
|
27 |
+
case $1 in
|
28 |
+
build)
|
29 |
+
mkdocs build
|
30 |
+
;;
|
31 |
+
serve)
|
32 |
+
mkdocs serve
|
33 |
+
;;
|
34 |
+
*)
|
35 |
+
log_error "Specify 'build' or 'serve'. For example: docs build"
|
36 |
+
;;
|
37 |
+
esac
|
38 |
+
}
|