Spaces:
Running
Running
dbleek
commited on
Commit
·
806796c
1
Parent(s):
4d1c892
new classifier
Browse files- milestone-3.py +1 -2
- milestone_2.py +0 -26
- patent_classification_v2.pt +3 -0
milestone-3.py
CHANGED
@@ -25,7 +25,7 @@ dataset = filtered_dataset.shuffle(seed=42).select(range(20))
|
|
25 |
dataset = dataset.sort("patent_number")
|
26 |
|
27 |
# Create pipeline using model trainned on Colab
|
28 |
-
model = torch.load("
|
29 |
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
30 |
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
31 |
|
@@ -74,4 +74,3 @@ if submitted:
|
|
74 |
pred, score
|
75 |
)
|
76 |
)
|
77 |
-
check = st.markdown("Actual Label: **{}**.".format(label))
|
|
|
25 |
dataset = dataset.sort("patent_number")
|
26 |
|
27 |
# Create pipeline using model trainned on Colab
|
28 |
+
model = torch.load("patent_classifier_v2.pt", map_location=torch.device("cpu"))
|
29 |
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
|
30 |
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer)
|
31 |
|
|
|
74 |
pred, score
|
75 |
)
|
76 |
)
|
|
milestone_2.py
DELETED
@@ -1,26 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
from transformers import (AutoTokenizer, TFAutoModelForSequenceClassification,
|
3 |
-
pipeline)
|
4 |
-
|
5 |
-
st.title("CS-GY-6613 Project Milestone 2")
|
6 |
-
model_choices = (
|
7 |
-
"distilbert-base-uncased-finetuned-sst-2-english",
|
8 |
-
"j-hartmann/emotion-english-distilroberta-base",
|
9 |
-
"joeddav/distilbert-base-uncased-go-emotions-student",
|
10 |
-
)
|
11 |
-
|
12 |
-
with st.form("Input Form"):
|
13 |
-
text = st.text_area("Write your text here:", "CS-GY-6613 is a great course!")
|
14 |
-
model_name = st.selectbox("Select a model:", model_choices)
|
15 |
-
submitted = st.form_submit_button("Submit")
|
16 |
-
|
17 |
-
if submitted:
|
18 |
-
model = TFAutoModelForSequenceClassification.from_pretrained(model_name)
|
19 |
-
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
20 |
-
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)
|
21 |
-
res = classifier(text)
|
22 |
-
label = res[0]["label"].upper()
|
23 |
-
score = res[0]["score"]
|
24 |
-
st.markdown(
|
25 |
-
f"This text was classified as **{label}** with a confidence score of **{score}**."
|
26 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
patent_classification_v2.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8fbbdc470f673703431aa31cc7451af0d0608df3bd6e7006ab32866803f4eece
|
3 |
+
size 267882633
|