kaushikbar
commited on
Commit
·
731bd76
1
Parent(s):
d79bb3f
Single label
Browse files
app.py
CHANGED
@@ -5,21 +5,25 @@ from langdetect import detect, DetectorFactory, detect_langs
|
|
5 |
import fasttext
|
6 |
from transformers import pipeline
|
7 |
|
8 |
-
models = {
|
|
|
9 |
#'es': 'Recognai/zeroshot_selectra_medium', # Spanish
|
10 |
#'it': 'joeddav/xlm-roberta-large-xnli', # Italian
|
11 |
#'ru': 'DeepPavlov/xlm-roberta-large-en-ru-mnli', # Russian
|
12 |
#'tr': 'vicgalle/xlm-roberta-large-xnli-anli', # Turkish
|
13 |
#'no': 'NbAiLab/nb-bert-base-mnli', # Norsk
|
14 |
-
'de': 'Sahajtomar/German_Zeroshot'
|
|
|
15 |
|
16 |
-
hypothesis_templates = {
|
|
|
17 |
#'es': 'Este ejemplo es {}.', # Spanish
|
18 |
#'it': 'Questo esempio è {}.', # Italian
|
19 |
#'ru': 'Этот пример {}.', # Russian
|
20 |
#'tr': 'Bu örnek {}.', # Turkish
|
21 |
#'no': 'Dette eksempelet er {}.', # Norsk
|
22 |
-
'de': 'Dieses beispiel ist {}.'
|
|
|
23 |
|
24 |
classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['en'],
|
25 |
model=models['en']),
|
@@ -33,8 +37,9 @@ classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hy
|
|
33 |
# model=models['tr']),
|
34 |
#'no': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['no'],
|
35 |
# model=models['no']),
|
36 |
-
'de': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['de'],
|
37 |
-
|
|
|
38 |
|
39 |
fasttext_model = fasttext.load_model(hf_hub_download("julien-c/fasttext-language-id", "lid.176.bin"))
|
40 |
|
@@ -150,10 +155,11 @@ def detect_lang(sequence, labels):
|
|
150 |
return seq_lang
|
151 |
|
152 |
def sequence_to_classify(sequence, labels):
|
153 |
-
|
|
|
154 |
|
155 |
label_clean = str(labels).split(";;")
|
156 |
-
response = classifier(sequence, label_clean, multi_label=
|
157 |
|
158 |
predicted_labels = response['labels']
|
159 |
predicted_scores = response['scores']
|
@@ -167,7 +173,7 @@ def sequence_to_classify(sequence, labels):
|
|
167 |
|
168 |
iface = gr.Interface(
|
169 |
title="MARS Demo",
|
170 |
-
description="Off-the-shelf NLP classifier.",
|
171 |
fn=sequence_to_classify,
|
172 |
inputs=[gr.inputs.Textbox(lines=10,
|
173 |
label="Please enter the text you would like to classify...",
|
@@ -175,7 +181,7 @@ iface = gr.Interface(
|
|
175 |
gr.inputs.Textbox(lines=2,
|
176 |
label="Please enter the candidate labels (separated by 2 consecutive semicolons)...",
|
177 |
placeholder="Labels here separated by ;;")],
|
178 |
-
outputs=gr.outputs.Label(num_top_classes=
|
179 |
#interpretation="default",
|
180 |
examples=prep_examples())
|
181 |
|
|
|
5 |
import fasttext
|
6 |
from transformers import pipeline
|
7 |
|
8 |
+
models = {
|
9 |
+
'en': 'Narsil/deberta-large-mnli-zero-cls', # English
|
10 |
#'es': 'Recognai/zeroshot_selectra_medium', # Spanish
|
11 |
#'it': 'joeddav/xlm-roberta-large-xnli', # Italian
|
12 |
#'ru': 'DeepPavlov/xlm-roberta-large-en-ru-mnli', # Russian
|
13 |
#'tr': 'vicgalle/xlm-roberta-large-xnli-anli', # Turkish
|
14 |
#'no': 'NbAiLab/nb-bert-base-mnli', # Norsk
|
15 |
+
#'de': 'Sahajtomar/German_Zeroshot' # German
|
16 |
+
}
|
17 |
|
18 |
+
hypothesis_templates = {
|
19 |
+
'en': 'This example is {}.', # English
|
20 |
#'es': 'Este ejemplo es {}.', # Spanish
|
21 |
#'it': 'Questo esempio è {}.', # Italian
|
22 |
#'ru': 'Этот пример {}.', # Russian
|
23 |
#'tr': 'Bu örnek {}.', # Turkish
|
24 |
#'no': 'Dette eksempelet er {}.', # Norsk
|
25 |
+
#'de': 'Dieses beispiel ist {}.' # German
|
26 |
+
}
|
27 |
|
28 |
classifiers = {'en': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['en'],
|
29 |
model=models['en']),
|
|
|
37 |
# model=models['tr']),
|
38 |
#'no': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['no'],
|
39 |
# model=models['no']),
|
40 |
+
#'de': pipeline("zero-shot-classification", hypothesis_template=hypothesis_templates['de'],
|
41 |
+
# model=models['de'])
|
42 |
+
}
|
43 |
|
44 |
fasttext_model = fasttext.load_model(hf_hub_download("julien-c/fasttext-language-id", "lid.176.bin"))
|
45 |
|
|
|
155 |
return seq_lang
|
156 |
|
157 |
def sequence_to_classify(sequence, labels):
|
158 |
+
lang = 'en' #detect_lang(sequence, labels)
|
159 |
+
classifier = classifiers[lang]
|
160 |
|
161 |
label_clean = str(labels).split(";;")
|
162 |
+
response = classifier(sequence, label_clean, multi_label=False)
|
163 |
|
164 |
predicted_labels = response['labels']
|
165 |
predicted_scores = response['scores']
|
|
|
173 |
|
174 |
iface = gr.Interface(
|
175 |
title="MARS Demo",
|
176 |
+
description="Off-the-shelf NLP classifier with no domain or task-specific training.",
|
177 |
fn=sequence_to_classify,
|
178 |
inputs=[gr.inputs.Textbox(lines=10,
|
179 |
label="Please enter the text you would like to classify...",
|
|
|
181 |
gr.inputs.Textbox(lines=2,
|
182 |
label="Please enter the candidate labels (separated by 2 consecutive semicolons)...",
|
183 |
placeholder="Labels here separated by ;;")],
|
184 |
+
outputs=gr.outputs.Label(num_top_classes=1),
|
185 |
#interpretation="default",
|
186 |
examples=prep_examples())
|
187 |
|