Spaces:

Emily666666
/

Questions_Classification

Sleeping

File size: 2,814 Bytes

e19c3a2
 
b6c046c
 
 
 
 
 
e19c3a2
 
 
 
 
 
 
 
 
 
79687df
e19c3a2
 
 
 
 
 
 
9f7bad0
 
 
 
 
 
 
 
 
 
 
 
 
 
e19c3a2
de56a53
e19c3a2
 
de56a53
e19c3a2
 
 
 
b6c046c
 
 
e19c3a2
b6c046c
 
e19c3a2
 
d2c0a74
e19c3a2
c7770a0
 
 
 
 
 
9f7bad0
 
de56a53
9f7bad0
c7770a0
 
 
e19c3a2

import streamlit as st
from transformers import pipeline
import re

# Function to remove strange characters from the input text
def clean_text(text):
    # Only keep alphanumeric characters and some punctuation
    return re.sub(r"[^a-zA-Z0-9\s.,!?']", "", text)

# Load the text summarization pipeline
try:
    summarizer = pipeline("summarization", model="syndi-models/titlewave-t5-base")
    summarizer_loaded = True
except ValueError as e:
    st.error(f"Error loading summarization model: {e}")
    summarizer_loaded = False

# Load the Question classification pipeline
model_name = "Emily666666/bert-base-cased-news-category-test"
try:
    classifier = pipeline("text-classification", model=model_name, return_all_scores=True)
    classifier_loaded = True
except ValueError as e:
    st.error(f"Error loading classification model: {e}")
    classifier_loaded = False

# Dictionary to map numerical labels to real labels
label_mapping = {
    0: "Society & Culture",
    1: "Science & Mathematics",
    2: "Health",
    3: "Education & Reference",
    4: "Computers & Internet",
    5: "Sports",
    6: "Business & Finance",
    7: "Entertainment & Music",
    8: "Family & Relationships",
    9: "Politics & Government"
}

# Streamlit app title
st.title("Question Rephrase and Classification")

# Input text for summarization and classification
text_input = st.text_area("Enter long question to rephrase and classify:", "")

if st.button("Process"):
    if summarizer_loaded and classifier_loaded and text_input:
        try:
            # Clean the text input
            cleaned_text = clean_text(text_input)
            
            # Perform text summarization
            summary = summarizer(cleaned_text, max_length=130, min_length=30, do_sample=False)
            summarized_text = summary[0]['summary_text']
        except Exception as e:
            st.error(f"Error during summarization: {e}")
            summarized_text = ""

        if summarized_text:
            try:
                # Perform question classification on the summarized text
                results = classifier(summarized_text)[0]
                # Find the category with the highest score
                max_score = max(results, key=lambda x: x['score'])
                predicted_label_index = int(max_score['label'].split('_')[-1])  # Assuming labels are like "LABEL_0", "LABEL_1", etc.
                predicted_label = label_mapping[predicted_label_index]
                st.write("Rephrased Text:", summarized_text)
                st.write("Category:", predicted_label)
                st.write("Score:", max_score['score'])
            except Exception as e:
                st.error(f"Error during classification: {e}")
    else:
        st.warning("Please enter text to process and ensure both models are loaded.")