|
import pandas as pd |
|
import streamlit as st |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import torch |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained("textattack/roberta-base-imdb") |
|
model = AutoModelForSequenceClassification.from_pretrained("textattack/roberta-base-imdb") |
|
|
|
def analyze_text(text): |
|
|
|
text = text.lower() |
|
|
|
|
|
encoded_text = tokenizer(text, truncation=True, padding=True, return_tensors='pt') |
|
|
|
|
|
with torch.no_grad(): |
|
output = model(**encoded_text) |
|
predictions = output.logits.argmax(-1).item() |
|
|
|
if predictions == 1: |
|
return "Job Related" |
|
else: |
|
return "Not Job Related" |
|
|
|
def count_job_related_messages(data): |
|
job_related_count = 0 |
|
not_job_related_count = 0 |
|
|
|
for message in data["message"]: |
|
result = analyze_text(message) |
|
if result == "Job Related": |
|
job_related_count += 1 |
|
else: |
|
not_job_related_count += 1 |
|
|
|
return job_related_count, not_job_related_count |
|
|
|
|
|
st.title("Job Related Message Analyzer") |
|
|
|
uploaded_file = st.file_uploader("Upload CSV file") |
|
user_input = st.text_input("Enter text") |
|
|
|
if uploaded_file: |
|
|
|
data = pd.read_csv(uploaded_file) |
|
|
|
|
|
results = [] |
|
for message in data["message"]: |
|
result = analyze_text(message) |
|
results.append(result) |
|
|
|
data["Job Related"] = results |
|
|
|
|
|
job_related_count, not_job_related_count = count_job_related_messages(data) |
|
|
|
st.dataframe(data) |
|
st.write(f"Job Related Messages: {job_related_count}") |
|
st.write(f"Not Job Related Messages: {not_job_related_count}") |
|
elif user_input: |
|
|
|
result = analyze_text(user_input) |
|
st.write(f"Message Classification: {result}") |
|
else: |
|
st.write("Please upload a CSV file or enter text to analyze.") |
|
|