Spaces:
Sleeping
Sleeping
import spacy | |
from spacy import displacy | |
from transformers import T5ForConditionalGeneration, T5Tokenizer | |
import gradio as gr | |
import torch | |
import difflib | |
import nltk | |
nltk.download("punkt") | |
nltk.download('punkt_tab') | |
from nltk.tokenize import sent_tokenize | |
nlp = spacy.load("en_test_L1_model") | |
model = T5ForConditionalGeneration.from_pretrained("Unbabel/gec-t5_small") | |
tokenizer = T5Tokenizer.from_pretrained('t5-small') | |
def text_analysis(text): | |
sentences = sent_tokenize(text) | |
processed_sentences = [] | |
highlighted_sentences = [] | |
for sentence in sentences: | |
doc = nlp(sentence) | |
html_highlight = displacy.render(doc, style="span", options = {"compact": True}) | |
html_highlight = ( | |
"<div style='max-width:100%; max-height:360px; overflow:auto'>" | |
+ html_highlight | |
+ "</div>" | |
) | |
processed_sentences.append(html_highlight) | |
inputs = tokenizer("gec: " + sentence, return_tensors="pt") | |
with torch.no_grad(): | |
outputs = model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True) | |
corrected_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
diff = difflib.ndiff(sentence.split(), corrected_sentence.split()) | |
highlighted_output = "" | |
for word in diff: | |
if word.startswith("+ "): | |
highlighted_output += f"<span style='color: green; font-weight: bold;'>{word[2:]}</span> " | |
elif word.startswith("- "): | |
highlighted_output += f"<span style='color: red; text-decoration: line-through;'>{word[2:]}</span> " | |
else: | |
highlighted_output += word[2:] + " " | |
highlighted_sentences.append(f"<p><b>Corrected:</b> {highlighted_output}</p>") | |
return "<hr>".join(processed_sentences) + "<hr>", "<hr>".join(highlighted_sentences) | |
demo = gr.Interface( | |
text_analysis, | |
gr.Textbox(placeholder="Enter sentence here..."), | |
["html", "html"], | |
examples=[ | |
["Then there was a sharp decrease so by 2013 the worldwide outlay accounted for 214 billions. Moreother there is a huge difference between part of 60+ years people. It is clearly seen that in Yemen the share of children before 14 years tend to become less - from 50,1% in 2000 to 37% in 2050."], | |
["In post - school 70 percent were the same men a postgraduate diploma and women undergraduate diploma. Parents can try to know friends of their child, so they will know what they are doing and who they are."], | |
], | |
) | |
demo.launch() |