Zlovoblachko commited on
Commit
da53684
1 Parent(s): 4299211

Add application file

Browse files
Files changed (2) hide show
  1. app.py +57 -0
  2. requirements.txt +174 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy
2
+ from spacy import displacy
3
+ from transformers import T5ForConditionalGeneration, T5Tokenizer
4
+ import gradio as gr
5
+ import torch
6
+ import difflib
7
+ import nltk
8
+ nltk.download("punkt")
9
+ from nltk.tokenize import sent_tokenize
10
+
11
+
12
+ nlp = spacy.load("en_test_L1_model")
13
+ model = T5ForConditionalGeneration.from_pretrained("Unbabel/gec-t5_small")
14
+ tokenizer = T5Tokenizer.from_pretrained('t5-small')
15
+
16
+
17
+ def text_analysis(text):
18
+ sentences = sent_tokenize(text)
19
+ processed_sentences = []
20
+ highlighted_sentences = []
21
+ for sentence in sentences:
22
+ doc = nlp(sentence)
23
+ html_highlight = displacy.render(doc, style="span", options = {"compact": True})
24
+ html_highlight = (
25
+ "<div style='max-width:100%; max-height:360px; overflow:auto'>"
26
+ + html_highlight
27
+ + "</div>"
28
+ )
29
+ processed_sentences.append(html_highlight)
30
+ inputs = tokenizer("gec: " + sentence, return_tensors="pt")
31
+ with torch.no_grad():
32
+ outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True)
33
+ corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
34
+ diff = difflib.ndiff(sentence.split(), corrected_sentence.split())
35
+ highlighted_output = ""
36
+ for word in diff:
37
+ if word.startswith("+ "):
38
+ highlighted_output += f"<span style='color: green; font-weight: bold;'>{word[2:]}</span> "
39
+ elif word.startswith("- "):
40
+ highlighted_output += f"<span style='color: red; text-decoration: line-through;'>{word[2:]}</span> "
41
+ else:
42
+ highlighted_output += word[2:] + " "
43
+ highlighted_sentences.append(f"<p><b>Corrected:</b> {highlighted_output}</p>")
44
+ return "<hr>".join(processed_sentences) + "<hr>", "<hr>".join(highlighted_sentences)
45
+
46
+
47
+ demo = gr.Interface(
48
+ text_analysis,
49
+ gr.Textbox(placeholder="Enter sentence here..."),
50
+ ["html", "html"],
51
+ examples=[
52
+ ["Then there was a sharp decrease so by 2013 the worldwide outlay accounted for 214 billions. Moreother there is a huge difference between part of 60+ years people. It is clearly seen that in Yemen the share of children before 14 years tend to become less - from 50,1% in 2000 to 37% in 2050."],
53
+ ["In post - school 70 percent were the same men a postgraduate diploma and women undergraduate diploma. Parents can try to know friends of their child, so they will know what they are doing and who they are."],
54
+ ],
55
+ )
56
+
57
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ aiofiles==23.2.1
2
+ altair==5.4.1
3
+ anyio==4.6.2.post1
4
+ argon2-cffi==23.1.0
5
+ argon2-cffi-bindings==21.2.0
6
+ arrow==1.3.0
7
+ asttokens==2.4.1
8
+ async-lru==2.0.4
9
+ attrs==24.2.0
10
+ babel==2.16.0
11
+ beautifulsoup4==4.12.3
12
+ bleach==6.2.0
13
+ blis==0.7.11
14
+ catalogue==2.0.10
15
+ certifi==2024.8.30
16
+ cffi==1.17.1
17
+ charset-normalizer==3.4.0
18
+ click==8.1.7
19
+ comm==0.2.2
20
+ confection==0.1.5
21
+ contourpy==1.3.0
22
+ cycler==0.12.1
23
+ cymem==2.0.8
24
+ debugpy==1.8.7
25
+ decorator==5.1.1
26
+ defusedxml==0.7.1
27
+ en_test_L1_model @ https://huggingface.co/Zlovoblachko/en_test_L1_model/resolve/main/en_test_L1_model-any-py3-none-any.whl
28
+ exceptiongroup==1.2.2
29
+ executing==2.1.0
30
+ fastapi==0.115.4
31
+ fastjsonschema==2.20.0
32
+ ffmpy==0.4.0
33
+ filelock==3.16.1
34
+ fonttools==4.54.1
35
+ fqdn==1.5.1
36
+ fsspec==2024.10.0
37
+ gradio==3.41.0
38
+ gradio_client==0.5.0
39
+ h11==0.14.0
40
+ httpcore==1.0.6
41
+ httpx==0.27.2
42
+ huggingface-hub==0.26.2
43
+ idna==3.10
44
+ importlib_resources==6.4.5
45
+ ipykernel==6.29.5
46
+ ipython==8.29.0
47
+ ipywidgets==8.1.5
48
+ isoduration==20.11.0
49
+ jedi==0.19.1
50
+ Jinja2==3.1.4
51
+ joblib==1.4.2
52
+ json5==0.9.25
53
+ jsonpointer==3.0.0
54
+ jsonschema==4.23.0
55
+ jsonschema-specifications==2024.10.1
56
+ jupyter==1.1.1
57
+ jupyter-console==6.6.3
58
+ jupyter-events==0.10.0
59
+ jupyter-lsp==2.2.5
60
+ jupyter_client==8.6.3
61
+ jupyter_core==5.7.2
62
+ jupyter_server==2.14.2
63
+ jupyter_server_terminals==0.5.3
64
+ jupyterlab==4.2.5
65
+ jupyterlab_pygments==0.3.0
66
+ jupyterlab_server==2.27.3
67
+ jupyterlab_widgets==3.0.13
68
+ kiwisolver==1.4.7
69
+ langcodes==3.4.1
70
+ language_data==1.2.0
71
+ marisa-trie==1.2.1
72
+ MarkupSafe==2.1.5
73
+ matplotlib==3.9.2
74
+ matplotlib-inline==0.1.7
75
+ mistune==3.0.2
76
+ mpmath==1.3.0
77
+ murmurhash==1.0.10
78
+ narwhals==1.13.1
79
+ nbclient==0.10.0
80
+ nbconvert==7.16.4
81
+ nbformat==5.10.4
82
+ nest-asyncio==1.6.0
83
+ networkx==3.4.2
84
+ nltk==3.9.1
85
+ notebook==7.2.2
86
+ notebook_shim==0.2.4
87
+ numpy==1.26.4
88
+ nvidia-cublas-cu12==12.4.5.8
89
+ nvidia-cuda-cupti-cu12==12.4.127
90
+ nvidia-cuda-nvrtc-cu12==12.4.127
91
+ nvidia-cuda-runtime-cu12==12.4.127
92
+ nvidia-cudnn-cu12==9.1.0.70
93
+ nvidia-cufft-cu12==11.2.1.3
94
+ nvidia-curand-cu12==10.3.5.147
95
+ nvidia-cusolver-cu12==11.6.1.9
96
+ nvidia-cusparse-cu12==12.3.1.170
97
+ nvidia-nccl-cu12==2.21.5
98
+ nvidia-nvjitlink-cu12==12.4.127
99
+ nvidia-nvtx-cu12==12.4.127
100
+ orjson==3.10.11
101
+ overrides==7.7.0
102
+ packaging==24.1
103
+ pandas==2.2.3
104
+ pandocfilters==1.5.1
105
+ parso==0.8.4
106
+ pathlib_abc==0.1.1
107
+ pathy==0.11.0
108
+ pexpect==4.9.0
109
+ pillow==10.4.0
110
+ platformdirs==4.3.6
111
+ preshed==3.0.9
112
+ prometheus_client==0.21.0
113
+ prompt_toolkit==3.0.48
114
+ psutil==6.1.0
115
+ ptyprocess==0.7.0
116
+ pure_eval==0.2.3
117
+ pycparser==2.22
118
+ pydantic==1.10.18
119
+ pydub==0.25.1
120
+ Pygments==2.18.0
121
+ pyparsing==3.2.0
122
+ python-dateutil==2.9.0.post0
123
+ python-json-logger==2.0.7
124
+ python-multipart==0.0.17
125
+ pytz==2024.2
126
+ PyYAML==6.0.2
127
+ pyzmq==26.2.0
128
+ referencing==0.35.1
129
+ regex==2024.9.11
130
+ requests==2.32.3
131
+ rfc3339-validator==0.1.4
132
+ rfc3986-validator==0.1.1
133
+ rpds-py==0.20.1
134
+ semantic-version==2.10.0
135
+ Send2Trash==1.8.3
136
+ sentencepiece==0.2.0
137
+ six==1.16.0
138
+ smart-open==6.4.0
139
+ sniffio==1.3.1
140
+ soupsieve==2.6
141
+ spacy==3.4.4
142
+ spacy-alignments==0.9.1
143
+ spacy-legacy==3.0.12
144
+ spacy-loggers==1.0.5
145
+ spacy-transformers==1.2.1
146
+ srsly==2.4.8
147
+ stack-data==0.6.3
148
+ starlette==0.41.2
149
+ sympy==1.13.1
150
+ terminado==0.18.1
151
+ thinc==8.1.12
152
+ tinycss2==1.4.0
153
+ tokenizers==0.13.3
154
+ tomli==2.0.2
155
+ torch==2.5.1
156
+ tornado==6.4.1
157
+ tqdm==4.66.6
158
+ traitlets==5.14.3
159
+ transformers==4.26.1
160
+ triton==3.1.0
161
+ typer==0.7.0
162
+ types-python-dateutil==2.9.0.20241003
163
+ typing_extensions==4.12.2
164
+ tzdata==2024.2
165
+ uri-template==1.3.0
166
+ urllib3==2.2.3
167
+ uvicorn==0.32.0
168
+ wasabi==0.10.1
169
+ wcwidth==0.2.13
170
+ webcolors==24.8.0
171
+ webencodings==0.5.1
172
+ websocket-client==1.8.0
173
+ websockets==11.0.3
174
+ widgetsnbextension==4.0.13