import streamlit as st import pandas as pd import torch import transformers from transformers import AutoTokenizer, AutoModelForSequenceClassification # Download and load the model and tokenizer model_name = 'bert-base-uncased' tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) # Define a function to classify a single text def classify_text(text): # Tokenize the text and add special tokens inputs = tokenizer.encode_plus( text, add_special_tokens=True, return_tensors='pt', max_length=512 ) # Get the input IDs and attention mask input_ids = inputs['input_ids'] attention_mask = inputs['attention_mask'] # Get the predicted label with torch.no_grad(): outputs = model(input_ids, attention_mask) logits = outputs[0] predicted_label = torch.argmax(logits, dim=1).item() return predicted_label # Define the Streamlit app def main(): st.title('Text Classification with BERT') # Allow the user to upload a CSV file uploaded_file = st.file_uploader('Upload a CSV file', type='csv') if uploaded_file is not None: data = pd.read_csv(uploaded_file) # Create a new column for the predicted labels data['predicted_label'] = data['text'].apply(classify_text) st.write(data) if __name__ == '__main__': main()