Commit
·
02d61b3
1
Parent(s):
9fc763e
Change to t5
Browse files
app.py
CHANGED
@@ -1,14 +1,15 @@
|
|
1 |
import streamlit as st
|
2 |
-
from transformers import
|
3 |
from happytransformer import HappyTextToText, TTSettings
|
4 |
|
5 |
-
# Initialize the spelling correction pipeline
|
6 |
-
fix_spelling = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
|
7 |
-
|
8 |
# Initialize the grammar correction model
|
9 |
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
|
10 |
args = TTSettings(num_beams=5, min_length=1)
|
11 |
|
|
|
|
|
|
|
|
|
12 |
# Function to split text into chunks
|
13 |
def split_text(text, chunk_size=500):
|
14 |
chunks = []
|
@@ -40,7 +41,8 @@ def main():
|
|
40 |
for chunk in text_chunks:
|
41 |
try:
|
42 |
# Spelling correction
|
43 |
-
|
|
|
44 |
corrected_spelling_chunks.append(corrected_spelling)
|
45 |
|
46 |
# Grammar correction
|
|
|
1 |
import streamlit as st
|
2 |
+
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
3 |
from happytransformer import HappyTextToText, TTSettings
|
4 |
|
|
|
|
|
|
|
5 |
# Initialize the grammar correction model
|
6 |
happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
|
7 |
args = TTSettings(num_beams=5, min_length=1)
|
8 |
|
9 |
+
# Initialize T5 spelling correction tokenizer and model
|
10 |
+
tokenizer = T5Tokenizer.from_pretrained("thaboe01/t5-spelling-corrector")
|
11 |
+
model = T5ForConditionalGeneration.from_pretrained("thaboe01/t5-spelling-corrector", device="cuda")
|
12 |
+
|
13 |
# Function to split text into chunks
|
14 |
def split_text(text, chunk_size=500):
|
15 |
chunks = []
|
|
|
41 |
for chunk in text_chunks:
|
42 |
try:
|
43 |
# Spelling correction
|
44 |
+
input_ids = tokenizer(chunk, return_tensors="pt").input_ids.to("cuda")
|
45 |
+
corrected_spelling = tokenizer.decode(model.generate(input_ids)[0])
|
46 |
corrected_spelling_chunks.append(corrected_spelling)
|
47 |
|
48 |
# Grammar correction
|