Spaces:

thaboe01
/

Shona-Spell-Checking

Paused

File size: 1,912 Bytes

import streamlit as st
from transformers import T5Tokenizer, T5ForConditionalGeneration

# Load your fine-tuned FLAN-T5 model and tokenizer
@st.cache_resource  
def load_model():
    tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
    model = T5ForConditionalGeneration.from_pretrained("thaboe01/t5-spelling-corrector")
    return tokenizer, model

# Load model (only once)
tokenizer, model = load_model()

MAX_PHRASE_LENGTH = 3  
PREFIX = "Please correct the following sentence: "

# Function to correct text
def correct_text(text):
    words = text.split()
    corrected_phrases = []
    current_chunk = []

    for word in words:
        current_chunk.append(word)
        # Check if adding the next word would exceed max length (including prefix)
        if len(current_chunk) + 1 > MAX_PHRASE_LENGTH:  
            input_text = PREFIX + " ".join(current_chunk)
            input_ids = tokenizer(input_text, return_tensors="pt").input_ids
            outputs = model.generate(input_ids)
            corrected_phrase = tokenizer.decode(outputs[0], skip_special_tokens=True)
            corrected_phrases.append(corrected_phrase)
            current_chunk = []  # Reset the chunk

    # Handle the last chunk
    if current_chunk:
        input_text = PREFIX + " ".join(current_chunk)
        input_ids = tokenizer(input_text, return_tensors="pt").input_ids
        outputs = model.generate(input_ids)
        corrected_phrase = tokenizer.decode(outputs[0], skip_special_tokens=True)
        corrected_phrases.append(corrected_phrase)

    return " ".join(corrected_phrases)  # Join the corrected chunks


# Streamlit App
st.title("Shona Text Editor with Real-Time Spelling Correction")
text_input = st.text_area("Start typing here...", height=250)

if text_input:
    corrected_text = correct_text(text_input)
    st.text_area("Corrected Text", value=corrected_text, height=250, disabled=True)