update app.py to include lab information

#2
Files changed (1) hide show
  1. app.py +65 -25
app.py CHANGED
@@ -5,37 +5,77 @@ model_name = "dsfsi/nso-en-m2m100-gov"
5
  tokenizer = M2M100Tokenizer.from_pretrained(model_name)
6
  model = M2M100ForConditionalGeneration.from_pretrained(model_name)
7
 
8
- print(tokenizer.lang_code_to_token)
9
-
10
- tokenizer.src_lang = "ns"
11
-
12
  model.config.forced_bos_token_id = tokenizer.get_lang_id("en")
13
 
14
  def translate(inp):
15
  inputs = tokenizer(inp, return_tensors="pt")
16
-
17
  translated_tokens = model.generate(**inputs, max_length=512, forced_bos_token_id=tokenizer.get_lang_id("en"))
18
-
19
  translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
20
  return translated_text
21
 
22
- description = """
23
- <p>
24
- <center>
25
- Northern Sotho to English Translation
26
- </center>
27
- </p>
28
- """
29
- article = "<p style='text-align: center'><a href='https://huggingface.co/dsfsi/nso-en-m2m100-gov' target='_blank'>by dsfsi</a></p>"
30
-
31
-
32
- iface = gr.Interface(
33
- fn=translate,
34
- title="Northern Sotho to English Translation",
35
- description=description,
36
- article=article,
37
- inputs=gr.components.Textbox(lines=5, placeholder="Enter Sotho text (maximum 5 lines)", label="Input"),
38
- outputs="text"
39
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- iface.launch(enable_queue=True)
 
5
  tokenizer = M2M100Tokenizer.from_pretrained(model_name)
6
  model = M2M100ForConditionalGeneration.from_pretrained(model_name)
7
 
8
+ tokenizer.src_lang = "ns"
 
 
 
9
  model.config.forced_bos_token_id = tokenizer.get_lang_id("en")
10
 
11
  def translate(inp):
12
  inputs = tokenizer(inp, return_tensors="pt")
 
13
  translated_tokens = model.generate(**inputs, max_length=512, forced_bos_token_id=tokenizer.get_lang_id("en"))
 
14
  translated_text = tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
15
  return translated_text
16
 
17
+ with gr.Blocks() as demo:
18
+ with gr.Row():
19
+ with gr.Column(scale=1):
20
+ pass
21
+ with gr.Column(scale=4, min_width=1000):
22
+ gr.Image("logo_transparent_small.png", elem_id="logo", show_label=False, width=500)
23
+ gr.Markdown("""
24
+ <h1 style='text-align: center;'>Northern Sotho to English Translation</h1>
25
+ <p style='text-align: center;'>This space provides a translation service from Northern Sotho to English using the M2M100 model, fine-tuned for low-resource languages. It supports researchers, linguists, and users working with Northern Sotho texts.</p>
26
+ """)
27
+ with gr.Column(scale=1):
28
+ pass
29
+
30
+ with gr.Column(variant="panel"):
31
+ inp_text = gr.Textbox(lines=5, placeholder="Enter Northern Sotho text (maximum 5 lines)", label="Input", elem_id="centered-input")
32
+ output_text = gr.Textbox(label="Output", elem_id="centered-output")
33
+ translate_button = gr.Button("Translate", elem_id="centered-button")
34
+ translate_button.click(translate, inputs=inp_text, outputs=output_text)
35
+
36
+ gr.Markdown("""
37
+ <div style='text-align: center;'>
38
+ <a href='https://github.com/dsfsi/nso-en-m2m100-gov' target='_blank'>GitHub</a> |
39
+ <a href='https://docs.google.com/forms/d/e/1FAIpQLSf7S36dyAUPx2egmXbFpnTBuzoRulhL5Elu-N1eoMhaO7v10w/viewform' target='_blank'>Feedback Form</a> |
40
+ <a href='https://arxiv.org/abs/2303.03750' target='_blank'>Arxiv</a>
41
+ </div>
42
+ <br/>
43
+ """)
44
+
45
+ with gr.Accordion("More Information", open=False):
46
+ gr.Markdown("""
47
+ <h4 style="text-align: center;">Model Description</h4>
48
+ <p style='text-align: center;'>This is a variant of the M2M100 model, fine-tuned on a multilingual dataset to support translation from Northern Sotho (Sepedi) to English. The model was trained with a focus on improving translation accuracy for low-resource languages.</p>
49
+ """)
50
+ gr.Markdown("""
51
+ <h4 style="text-align: center;">Authors</h4>
52
+ <div style='text-align: center;'>
53
+ Vukosi Marivate, Matimba Shingange, Richard Lastrucci,
54
+ Isheanesu Joseph Dzingirai, Jenalea Rajab
55
+ </div>
56
+ """)
57
+ gr.Markdown("""
58
+ <h4 style="text-align: center;">Citation</h4>
59
+ <pre style="text-align: center; white-space: pre-wrap;">
60
+ @inproceedings{lastrucci-etal-2023-preparing,
61
+ title = "Preparing the Vuk{'}uzenzele and {ZA}-gov-multilingual {S}outh {A}frican multilingual corpora",
62
+ author = "Richard Lastrucci and Isheanesu Dzingirai and Jenalea Rajab
63
+ and Andani Madodonga and Matimba Shingange and Daniel Njini and Vukosi Marivate",
64
+ booktitle = "Proceedings of the Fourth workshop on Resources for African Indigenous Languages (RAIL 2023)",
65
+ month = may,
66
+ year = "2023",
67
+ address = "Dubrovnik, Croatia",
68
+ publisher = "Association for Computational Linguistics",
69
+ url = "https://aclanthology.org/2023.rail-1.3",
70
+ pages = "18--25"
71
+ }
72
+ </pre>
73
+ """)
74
+ gr.Markdown("""
75
+ <h4 style="text-align: center;">DOI</h4>
76
+ <div style='text-align: center;'>
77
+ <a href="https://doi.org/10.48550/arXiv.2303.03750" target="_blank">10.48550/arXiv.2303.03750</a>
78
+ </div>
79
+ """)
80
 
81
+ demo.launch()