Yoshinoheart commited on
Commit
02d61b3
·
1 Parent(s): 9fc763e

Change to t5

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -1,14 +1,15 @@
1
  import streamlit as st
2
- from transformers import pipeline
3
  from happytransformer import HappyTextToText, TTSettings
4
 
5
- # Initialize the spelling correction pipeline
6
- fix_spelling = pipeline("text2text-generation", model="oliverguhr/spelling-correction-english-base")
7
-
8
  # Initialize the grammar correction model
9
  happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
10
  args = TTSettings(num_beams=5, min_length=1)
11
 
 
 
 
 
12
  # Function to split text into chunks
13
  def split_text(text, chunk_size=500):
14
  chunks = []
@@ -40,7 +41,8 @@ def main():
40
  for chunk in text_chunks:
41
  try:
42
  # Spelling correction
43
- corrected_spelling = fix_spelling(chunk)[0]['generated_text']
 
44
  corrected_spelling_chunks.append(corrected_spelling)
45
 
46
  # Grammar correction
 
1
  import streamlit as st
2
+ from transformers import T5Tokenizer, T5ForConditionalGeneration
3
  from happytransformer import HappyTextToText, TTSettings
4
 
 
 
 
5
  # Initialize the grammar correction model
6
  happy_tt = HappyTextToText("T5", "vennify/t5-base-grammar-correction")
7
  args = TTSettings(num_beams=5, min_length=1)
8
 
9
+ # Initialize T5 spelling correction tokenizer and model
10
+ tokenizer = T5Tokenizer.from_pretrained("thaboe01/t5-spelling-corrector")
11
+ model = T5ForConditionalGeneration.from_pretrained("thaboe01/t5-spelling-corrector", device="cuda")
12
+
13
  # Function to split text into chunks
14
  def split_text(text, chunk_size=500):
15
  chunks = []
 
41
  for chunk in text_chunks:
42
  try:
43
  # Spelling correction
44
+ input_ids = tokenizer(chunk, return_tensors="pt").input_ids.to("cuda")
45
+ corrected_spelling = tokenizer.decode(model.generate(input_ids)[0])
46
  corrected_spelling_chunks.append(corrected_spelling)
47
 
48
  # Grammar correction