kelvinleong commited on
Commit
bc6ba08
·
1 Parent(s): a5e95c0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -0
app.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import torch
4
+ import transformers
5
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
6
+
7
+ # Download and load the model and tokenizer
8
+ model_name = 'bert-base-uncased'
9
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
10
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
11
+
12
+ # Define a function to classify a single text
13
+ def classify_text(text):
14
+ # Tokenize the text and add special tokens
15
+ inputs = tokenizer.encode_plus(
16
+ text,
17
+ add_special_tokens=True,
18
+ return_tensors='pt',
19
+ max_length=512
20
+ )
21
+
22
+ # Get the input IDs and attention mask
23
+ input_ids = inputs['input_ids']
24
+ attention_mask = inputs['attention_mask']
25
+
26
+ # Get the predicted label
27
+ with torch.no_grad():
28
+ outputs = model(input_ids, attention_mask)
29
+ logits = outputs[0]
30
+ predicted_label = torch.argmax(logits, dim=1).item()
31
+
32
+ return predicted_label
33
+
34
+ # Define the Streamlit app
35
+ def main():
36
+ st.title('Text Classification with BERT')
37
+
38
+ # Allow the user to upload a CSV file
39
+ uploaded_file = st.file_uploader('Upload a CSV file', type='csv')
40
+ if uploaded_file is not None:
41
+ data = pd.read_csv(uploaded_file)
42
+
43
+ # Create a new column for the predicted labels
44
+ data['predicted_label'] = data['text'].apply(classify_text)
45
+
46
+ st.write(data)
47
+
48
+ if __name__ == '__main__':
49
+ main()