Spaces:
Sleeping
Sleeping
import gradio as gr | |
import tempfile | |
import os | |
from gtts import gTTS | |
from deep_translator import GoogleTranslator | |
from groq import Groq | |
import logging | |
from sentence_transformers import SentenceTransformer | |
import numpy as np | |
logging.basicConfig(level=logging.INFO, format='%(asctime)s | %(levelname)s | %(message)s') | |
# Initialize Groq client | |
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
# Initialize HuggingFace embeddings (free to use) | |
sentence_model = SentenceTransformer('all-MiniLM-L6-v2') | |
indexed_texts = [] | |
indexed_embeddings = [] | |
# Translation languages dropdown options | |
translation_languages = { | |
"English": "en", | |
"Spanish": "es", | |
"Arabic": "ar", | |
"Amharic": "am", | |
"Hindi": "hi", | |
"Kannada": "kn", | |
"Marathi": "mr", | |
"Russian": "ru", | |
"Telugu": "te", | |
"Tamil": "ta", | |
"Vietnamese": "vi" | |
} | |
# Define supported languages for Google TTS | |
audio_language_dict = { | |
"English": {"code": "en"}, | |
"Spanish": {"code": "es"}, | |
"Amharic": {"code": "am"}, | |
"Arabic": {"code": "ar"}, | |
"Hindi": {"code": "hi"}, | |
"Kannada": {"code": "kn"}, | |
"Marathi": {"code": "mr"}, | |
"Russian": {"code": "ru"}, | |
"Telugu": {"code": "te"}, | |
"Tamil": {"code": "ta"}, | |
"Vietnamese": {"code": "vi"} | |
} | |
def index_text(text: str) -> str: | |
global indexed_texts, indexed_embeddings | |
try: | |
# Split the text into sentences or smaller chunks | |
chunks = text.split('. ') | |
for chunk in chunks: | |
if chunk: | |
embedding = sentence_model.encode([chunk])[0] | |
indexed_texts.append(chunk) | |
indexed_embeddings.append(embedding) | |
return f"Text indexed successfully. Total indexed chunks: {len(indexed_texts)}" | |
except Exception as e: | |
return f"Error indexing text: {str(e)}" | |
def clear_index() -> str: | |
global indexed_texts, indexed_embeddings | |
indexed_texts.clear() | |
indexed_embeddings.clear() | |
return "Index cleared successfully. Ready for new indexing." | |
def find_most_similar(query: str, top_k: int = 3) -> list: | |
if not indexed_texts: | |
return ["No indexed text available."] | |
query_embedding = sentence_model.encode([query])[0] | |
similarities = [np.dot(query_embedding, doc_embedding) for doc_embedding in indexed_embeddings] | |
top_indices = np.argsort(similarities)[-top_k:][::-1] | |
return [indexed_texts[i] for i in top_indices] | |
def chat_with_context(question: str, model: str) -> str: | |
if not indexed_texts: | |
return "Please index some text first." | |
relevant_contexts = find_most_similar(question, top_k=3) | |
context = " ".join(relevant_contexts) | |
try: | |
prompt = f"Context: {context}\n\nQuestion: {question}\n\nAnswer:" | |
chat_completion = groq_client.chat.completions.create( | |
messages=[ | |
{ | |
"role": "user", | |
"content": prompt, | |
} | |
], | |
model=model, | |
max_tokens=500 # Limit the response length | |
) | |
return chat_completion.choices[0].message.content | |
except Exception as e: | |
logging.error(f"Error in chat: {str(e)}") | |
return f"Error in chat: {str(e)}" | |
# Translation function | |
def translate_text(text, target_lang_code): | |
try: | |
translator = GoogleTranslator(source='auto', target=target_lang_code) | |
return translator.translate(text) | |
except Exception as e: | |
return f"Translation Error: {str(e)}" | |
# Google TTS function | |
def google_tts(text, lang): | |
try: | |
tts = gTTS(text=text, lang=lang, slow=False) | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_audio: | |
tts.save(temp_audio.name) | |
return temp_audio.name, f"Speech generated with Google TTS using {lang} language" | |
except Exception as e: | |
return None, f"Error in Google TTS: {str(e)}" | |
with gr.Blocks() as iface: | |
gr.Markdown("# EM Text Translator and Speech Generator") | |
with gr.Row(): | |
text_input = gr.Textbox(label="Enter text for translation and speech generation", lines=3) | |
with gr.Row(): | |
translation_lang_dropdown = gr.Dropdown(list(translation_languages.keys()), label="Select Translation Language", value="English") | |
convert_button = gr.Button("Convert") | |
translated_text = gr.Textbox(label="Translated Text") | |
# with gr.Row(): | |
# index_button = gr.Button("Index") | |
# clear_index_button = gr.Button("Clear Index") | |
# index_status = gr.Textbox(label="Indexing Status") | |
use_chat = gr.Checkbox(label="", value=False) | |
# chat_group = gr.Group(visible=False) | |
# with chat_group: | |
# chat_input = gr.Textbox(label="Ask a question about the indexed text") | |
# chat_model = gr.Dropdown( | |
# choices=["llama3-70b-8192", "mixtral-8x7b-32768", "gemma-7b-it"], | |
# label="Select Chat Model", | |
# value="llama3-70b-8192" | |
# ) | |
# chat_button = gr.Button("Ask") | |
# chat_output = gr.Textbox(label="Answer", interactive=False) | |
with gr.Group() as tts_options: | |
audio_lang_dropdown = gr.Dropdown(list(audio_language_dict.keys()), label="Select Audio Language", value="English") | |
generate_button = gr.Button("Generate Speech") | |
audio_output = gr.Audio(label="Generated Speech") | |
message_output = gr.Textbox(label="Message") | |
# def update_chat_visibility(use_chat): | |
# return gr.update(visible=use_chat) | |
def convert_text(text, translation_lang): | |
target_code = translation_languages[translation_lang] | |
translated = translate_text(text, target_code) | |
return translated | |
def generate_speech(text, audio_lang, use_chat, chat_output): | |
if use_chat and chat_output: | |
text = chat_output | |
logging.info(f"Generating speech: lang={audio_lang}") | |
try: | |
return google_tts(text, audio_language_dict[audio_lang]["code"]) | |
except Exception as e: | |
logging.error(f"Error generating speech: {str(e)}") | |
return None, f"Error generating speech: {str(e)}" | |
convert_button.click(convert_text, inputs=[text_input, translation_lang_dropdown], outputs=translated_text) | |
#index_button.click(index_text, inputs=[translated_text], outputs=[index_status]) | |
#clear_index_button.click(clear_index, outputs=[index_status]) | |
#use_chat.change(update_chat_visibility, inputs=[use_chat], outputs=[chat_group]) | |
#chat_button.click(chat_with_context, inputs=[chat_input, chat_model], outputs=[chat_output]) | |
generate_button.click( | |
generate_speech, | |
# inputs=[translated_text, audio_lang_dropdown, use_chat, chat_output], | |
inputs=[translated_text, audio_lang_dropdown], | |
outputs=[audio_output, message_output] | |
) | |
iface.launch() |