tensorgirl's picture
Upload app.py
91db312 verified
import os
import transformers
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig
from transformers import AutoModelForSeq2SeqLM, pipeline
from huggingface_hub import login
import gradio as gr
import numpy as np
new_model = "tensorgirl/finetuned-gemma"
model = AutoModelForCausalLM.from_pretrained(new_model, trust_remote_code=True)
tokenizer = AutoTokenizer.from_pretrained(new_model, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
generator = transformers.pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
device_map="auto",
)
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
device = 0 if torch.cuda.is_available() else -1
def translate(text, src_lang, tgt_lang):
translation_pipeline = pipeline("translation", model=model, tokenizer=tokenizer, src_lang=src_lang, tgt_lang=tgt_lang, max_length=400, device=device)
result = translation_pipeline(text)
return result[0]['translation_text']
def English(audio):
transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
return transcriber({"sampling_rate": sr, "raw": y})["text"]
def Hindi(audio):
transcriber = pipeline("automatic-speech-recognition", model="theainerd/Wav2Vec2-large-xlsr-hindi")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
text = transcriber({"sampling_rate":sr, "raw":y})["text"]
return translate(text, "hin_Deva", "eng_Latn")
def Telegu(audio):
transcriber = pipeline("automatic-speech-recognition", model="anuragshas/wav2vec2-large-xlsr-53-telugu")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
text = transcriber({"sampling_rate":sr, "raw":y})["text"]
return translate(text, "tel_Telu", "eng_Latn")
def Tamil(audio):
transcriber = pipeline("automatic-speech-recognition", model="Harveenchadha/vakyansh-wav2vec2-tamil-tam-250")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
text = transcriber({"sampling_rate":sr, "raw":y})["text"]
return translate(text, "tam_Taml", "eng_Latn")
def Kannada(audio):
transcriber = pipeline("automatic-speech-recognition", model="vasista22/whisper-kannada-medium")
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
text = transcriber({"sampling_rate":sr, "raw":y})["text"]
return translate(text, "kan_Knda", "eng_Latn")
def predict(audio, language):
if language == "English":
message = English(audio)
if language == "Hindi":
message = Hindi(audio)
if language == "Telegu":
message = Telegu(audio)
if language == "Tamil":
message = Tamil(audio)
if language == "Kannada":
message = Kannada(audio)
print(message)
sequences = generator(
message,
max_length=200,
do_sample=False,
top_k=10,
num_return_sequences=1,
eos_token_id=tokenizer.eos_token_id,)
answer = ""
for seq in sequences:
answer = answer + seq['generated_text'] + " "
print(answer)
if language == "English":
return answer
if language == "Hindi":
return translate(answer,"eng_Latn", "hin_Deva")
if language == "Telegu":
return translate(answer,"eng_Latn", "tel_Telu")
if language == "Tamil":
return translate(answer, "eng_Latn", "tam_Taml")
if language == "Kannada":
return translate(answer, "eng_Latn", "kan_Knda")
return answer
demo = gr.Interface(
predict,
[gr.Audio(),
gr.Dropdown(
["Hindi", "Telegu", "Tamil", "Kannada", "English"], label="Language", info="Please select language of your choice"
)],
"text",
title = "Farmers-Helper-Bot",
description = "Ask your queries in your regional Language"
)
demo.launch(share=True)