kelvinleong's picture
Create app.py
bc6ba08
import streamlit as st
import pandas as pd
import torch
import transformers
from transformers import AutoTokenizer, AutoModelForSequenceClassification
# Download and load the model and tokenizer
model_name = 'bert-base-uncased'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
# Define a function to classify a single text
def classify_text(text):
# Tokenize the text and add special tokens
inputs = tokenizer.encode_plus(
text,
add_special_tokens=True,
return_tensors='pt',
max_length=512
)
# Get the input IDs and attention mask
input_ids = inputs['input_ids']
attention_mask = inputs['attention_mask']
# Get the predicted label
with torch.no_grad():
outputs = model(input_ids, attention_mask)
logits = outputs[0]
predicted_label = torch.argmax(logits, dim=1).item()
return predicted_label
# Define the Streamlit app
def main():
st.title('Text Classification with BERT')
# Allow the user to upload a CSV file
uploaded_file = st.file_uploader('Upload a CSV file', type='csv')
if uploaded_file is not None:
data = pd.read_csv(uploaded_file)
# Create a new column for the predicted labels
data['predicted_label'] = data['text'].apply(classify_text)
st.write(data)
if __name__ == '__main__':
main()