Spaces:
Runtime error
Runtime error
MSP RAJA
commited on
Commit
·
1d74e08
1
Parent(s):
d7cce73
updated app
Browse files
.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
app.py
CHANGED
@@ -1,45 +1,35 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
import gradio as gr
|
4 |
from transformers import pipeline
|
5 |
-
|
6 |
|
7 |
# Get transformer model and set up a pipeline
|
8 |
model_ckpt = "papluca/xlm-roberta-base-language-detection"
|
9 |
pipe = pipeline("text-classification", model=model_ckpt)
|
10 |
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def predict(text: str) -> dict:
|
13 |
"""Compute predictions for text."""
|
14 |
preds = pipe(text, return_all_scores=True, truncation=True, max_length=128)
|
15 |
if preds:
|
16 |
pred = preds[0]
|
17 |
-
return {p["label"]: float(p["score"]) for p in pred}
|
18 |
else:
|
19 |
return None
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
-
|
23 |
-
description = "Determine the language in which your text is written."
|
24 |
-
examples = [
|
25 |
-
["Better late than never."],
|
26 |
-
["Tutto è bene ciò che finisce bene."],
|
27 |
-
["Donde hay humo, hay fuego."],
|
28 |
-
]
|
29 |
-
explanation = "Supported languages are (20): arabic (ar), bulgarian (bg), german (de), modern greek (el), english (en), spanish (es), french (fr), hindi (hi), italian (it), japanese (ja), dutch (nl), polish (pl), portuguese (pt), russian (ru), swahili (sw), thai (th), turkish (tr), urdu (ur), vietnamese (vi), and chinese (zh)."
|
30 |
-
|
31 |
-
app = gr.Interface(
|
32 |
-
fn=predict,
|
33 |
-
inputs=gr.inputs.Textbox(
|
34 |
-
placeholder="What's the text you want to know the language for?",
|
35 |
-
label="Text",
|
36 |
-
lines=3,
|
37 |
-
),
|
38 |
-
outputs=gr.outputs.Label(num_top_classes=3, label="Your text is written in "),
|
39 |
-
title=title,
|
40 |
-
description=description,
|
41 |
-
examples=examples,
|
42 |
-
article=explanation,
|
43 |
-
)
|
44 |
|
45 |
-
|
|
|
|
|
|
1 |
+
import streamlit as st
|
|
|
|
|
2 |
from transformers import pipeline
|
3 |
+
import pandas as pd
|
4 |
|
5 |
# Get transformer model and set up a pipeline
|
6 |
model_ckpt = "papluca/xlm-roberta-base-language-detection"
|
7 |
pipe = pipeline("text-classification", model=model_ckpt)
|
8 |
|
9 |
+
labels = {"ar" : "Arabic", "bg" : "Bulgarian", "de" : "German", "el" : "Modern Greek",
|
10 |
+
"en" : "English", "es" : "Spanish", "fr" : "French", "hi" : "Hindi", "it" : "Italian",
|
11 |
+
"ja" : "Japanese", "nl" : "Dutch", "pl" : "Polish", "pt" : "Portuguese", "ru" : "Russian",
|
12 |
+
"sw" : "Swahili", "th" : "Thai", "tr" : "Turkish", "ur" : "Urdu", "vi" : "Vietnamese", "zh" : "Chinese"}
|
13 |
+
|
14 |
|
15 |
def predict(text: str) -> dict:
|
16 |
"""Compute predictions for text."""
|
17 |
preds = pipe(text, return_all_scores=True, truncation=True, max_length=128)
|
18 |
if preds:
|
19 |
pred = preds[0]
|
20 |
+
return {labels.get(p["label"],p["label"]): float(p["score"]) for p in pred}
|
21 |
else:
|
22 |
return None
|
23 |
|
24 |
+
# st.title("Language detection with XLM-RoBERTa")
|
25 |
+
# st.write("Determine the language in which your text is written.")
|
26 |
+
# text = st.text_area("Text", "Enter your text here")
|
27 |
+
# if text:
|
28 |
+
# results = predict(text)
|
29 |
+
# print(pd.DataFrame(results))
|
30 |
|
31 |
+
# # top_result = max(results, key=results.get)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
+
# st.write(f"Your text is written in {top_result}")
|
34 |
+
# print(top_result)
|
35 |
+
# # st.bar_chart(results)
|