Dhahlan2000 commited on
Commit
a032ead
1 Parent(s): ee6fdd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +69 -20
app.py CHANGED
@@ -1,12 +1,75 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  def respond(
11
  message,
12
  history: list[tuple[str, str]],
@@ -25,23 +88,10 @@ def respond(
25
 
26
  messages.append({"role": "user", "content": message})
27
 
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
 
39
- response += token
40
- yield response
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
@@ -58,6 +108,5 @@ demo = gr.ChatInterface(
58
  ],
59
  )
60
 
61
-
62
  if __name__ == "__main__":
63
  demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM
4
+ from aksharamukha import transliterate
5
+ import torch
6
 
7
+ # Set up device
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
9
 
10
+ # Load translation models and tokenizers
11
+ trans_model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M").to(device)
12
+ eng_trans_tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
13
+ translator = pipeline('translation', model=trans_model, tokenizer=eng_trans_tokenizer, src_lang="eng_Latn", tgt_lang='sin_Sinh', max_length=400, device=device)
14
 
15
+ sin_trans_model = AutoModelForSeq2SeqLM.from_pretrained("thilina/mt5-sinhalese-english").to(device)
16
+ si_trans_tokenizer = AutoTokenizer.from_pretrained("thilina/mt5-sinhalese-english")
17
+
18
+ singlish_pipe = pipeline("text2text-generation", model="Dhahlan2000/Simple_Translation-model-for-GPT-v14")
19
+
20
+ # Translation functions
21
+ def translate_Singlish_to_sinhala(text):
22
+ translated_text = singlish_pipe(f"translate Singlish to Sinhala: {text}", clean_up_tokenization_spaces=False)[0]['generated_text']
23
+ return translated_text
24
+
25
+ def translate_english_to_sinhala(text):
26
+ parts = text.split("\n")
27
+ translated_parts = [translator(part, clean_up_tokenization_spaces=False)[0]['translation_text'] for part in parts]
28
+ return "\n".join(translated_parts).replace("ප් රභූවරුන්", "")
29
+
30
+ def translate_sinhala_to_english(text):
31
+ parts = text.split("\n")
32
+ translated_parts = []
33
+ for part in parts:
34
+ inputs = si_trans_tokenizer(part.strip(), return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
35
+ outputs = sin_trans_model.generate(**inputs)
36
+ translated_part = si_trans_tokenizer.decode(outputs[0], skip_special_tokens=True, clean_up_tokenization_spaces=False)
37
+ translated_parts.append(translated_part)
38
+ return "\n".join(translated_parts)
39
+
40
+ def transliterate_from_sinhala(text):
41
+ latin_text = transliterate.process('Sinhala', 'Velthuis', text).replace('.', '').replace('*', '').replace('"', '').lower()
42
+ return latin_text
43
+
44
+ def transliterate_to_sinhala(text):
45
+ return transliterate.process('Velthuis', 'Sinhala', text)
46
+
47
+ # Load conversation model
48
+ conv_model_name = "google/gemma-7b"
49
+ tokenizer = AutoTokenizer.from_pretrained(conv_model_name)
50
+ model = AutoModelForCausalLM.from_pretrained(conv_model_name).to(device)
51
+
52
+ def conversation_predict(text):
53
+ input_ids = tokenizer(text, return_tensors="pt").to(device)
54
+ outputs = model.generate(**input_ids)
55
+ return tokenizer.decode(outputs[0])
56
+
57
+ def ai_predicted(user_input):
58
+ if user_input.lower() == 'exit':
59
+ return "Goodbye!"
60
+
61
+ user_input = translate_Singlish_to_sinhala(user_input)
62
+ user_input = transliterate_to_sinhala(user_input)
63
+ user_input = translate_sinhala_to_english(user_input)
64
+
65
+ ai_response = conversation_predict(user_input)
66
+ ai_response_lines = ai_response.split("</s>")
67
+
68
+ response = translate_english_to_sinhala(ai_response_lines[-1])
69
+ response = transliterate_from_sinhala(response)
70
+ return response
71
+
72
+ # Gradio Interface
73
  def respond(
74
  message,
75
  history: list[tuple[str, str]],
 
88
 
89
  messages.append({"role": "user", "content": message})
90
 
91
+ response = ai_predicted(message)
 
 
 
 
 
 
 
 
 
92
 
93
+ yield response
 
94
 
 
 
 
95
  demo = gr.ChatInterface(
96
  respond,
97
  additional_inputs=[
 
108
  ],
109
  )
110
 
 
111
  if __name__ == "__main__":
112
  demo.launch()