MSP RAJA commited on
Commit
1d74e08
·
1 Parent(s): d7cce73

updated app

Browse files
Files changed (2) hide show
  1. .DS_Store +0 -0
  2. app.py +18 -28
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py CHANGED
@@ -1,45 +1,35 @@
1
- """Gradio app to showcase the language detector."""
2
-
3
- import gradio as gr
4
  from transformers import pipeline
5
-
6
 
7
  # Get transformer model and set up a pipeline
8
  model_ckpt = "papluca/xlm-roberta-base-language-detection"
9
  pipe = pipeline("text-classification", model=model_ckpt)
10
 
 
 
 
 
 
11
 
12
  def predict(text: str) -> dict:
13
  """Compute predictions for text."""
14
  preds = pipe(text, return_all_scores=True, truncation=True, max_length=128)
15
  if preds:
16
  pred = preds[0]
17
- return {p["label"]: float(p["score"]) for p in pred}
18
  else:
19
  return None
20
 
 
 
 
 
 
 
21
 
22
- title = "Language detection with XLM-RoBERTa"
23
- description = "Determine the language in which your text is written."
24
- examples = [
25
- ["Better late than never."],
26
- ["Tutto è bene ciò che finisce bene."],
27
- ["Donde hay humo, hay fuego."],
28
- ]
29
- explanation = "Supported languages are (20): arabic (ar), bulgarian (bg), german (de), modern greek (el), english (en), spanish (es), french (fr), hindi (hi), italian (it), japanese (ja), dutch (nl), polish (pl), portuguese (pt), russian (ru), swahili (sw), thai (th), turkish (tr), urdu (ur), vietnamese (vi), and chinese (zh)."
30
-
31
- app = gr.Interface(
32
- fn=predict,
33
- inputs=gr.inputs.Textbox(
34
- placeholder="What's the text you want to know the language for?",
35
- label="Text",
36
- lines=3,
37
- ),
38
- outputs=gr.outputs.Label(num_top_classes=3, label="Your text is written in "),
39
- title=title,
40
- description=description,
41
- examples=examples,
42
- article=explanation,
43
- )
44
 
45
- app.launch()
 
 
 
1
+ import streamlit as st
 
 
2
  from transformers import pipeline
3
+ import pandas as pd
4
 
5
  # Get transformer model and set up a pipeline
6
  model_ckpt = "papluca/xlm-roberta-base-language-detection"
7
  pipe = pipeline("text-classification", model=model_ckpt)
8
 
9
+ labels = {"ar" : "Arabic", "bg" : "Bulgarian", "de" : "German", "el" : "Modern Greek",
10
+ "en" : "English", "es" : "Spanish", "fr" : "French", "hi" : "Hindi", "it" : "Italian",
11
+ "ja" : "Japanese", "nl" : "Dutch", "pl" : "Polish", "pt" : "Portuguese", "ru" : "Russian",
12
+ "sw" : "Swahili", "th" : "Thai", "tr" : "Turkish", "ur" : "Urdu", "vi" : "Vietnamese", "zh" : "Chinese"}
13
+
14
 
15
  def predict(text: str) -> dict:
16
  """Compute predictions for text."""
17
  preds = pipe(text, return_all_scores=True, truncation=True, max_length=128)
18
  if preds:
19
  pred = preds[0]
20
+ return {labels.get(p["label"],p["label"]): float(p["score"]) for p in pred}
21
  else:
22
  return None
23
 
24
+ # st.title("Language detection with XLM-RoBERTa")
25
+ # st.write("Determine the language in which your text is written.")
26
+ # text = st.text_area("Text", "Enter your text here")
27
+ # if text:
28
+ # results = predict(text)
29
+ # print(pd.DataFrame(results))
30
 
31
+ # # top_result = max(results, key=results.get)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # st.write(f"Your text is written in {top_result}")
34
+ # print(top_result)
35
+ # # st.bar_chart(results)