import pandas as pd import streamlit as st from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch # Load the pre-trained model and tokenizer tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-imdb") model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-imdb") def analyze_text(text): # Preprocess the text text = text.lower() # Encode the text encoded_text = tokenizer(text, truncation=True, padding=True, return_tensors='pt') # Classify the text with torch.no_grad(): output = model(**encoded_text) predictions = output.logits.argmax(-1).item() if predictions == 1: # For IMDb sentiment analysis, 1 indicates positive sentiment return "Job Related" else: return "Not Job Related" def count_job_related_messages(data): job_related_count = 0 not_job_related_count = 0 for message in data["message"]: result = analyze_text(message) if result == "Job Related": job_related_count += 1 else: not_job_related_count += 1 return job_related_count, not_job_related_count # Streamlit application st.title("Job Related Message Analyzer") uploaded_file = st.file_uploader("Upload CSV file") user_input = st.text_input("Enter text") if uploaded_file: # Read the CSV file data = pd.read_csv(uploaded_file) # Analyze messages results = [] for message in data["message"]: result = analyze_text(message) results.append(result) data["Job Related"] = results # Count job-related messages job_related_count, not_job_related_count = count_job_related_messages(data) st.dataframe(data) st.write(f"Job Related Messages: {job_related_count}") st.write(f"Not Job Related Messages: {not_job_related_count}") elif user_input: # Analyze user-input text result = analyze_text(user_input) st.write(f"Message Classification: {result}") else: st.write("Please upload a CSV file or enter text to analyze.")