Spaces:
Sleeping
Sleeping
import gradio as gr | |
from langchain.prompts import PromptTemplate | |
from langchain_huggingface import HuggingFaceEndpoint | |
from langchain_core.output_parsers import JsonOutputParser | |
from langdetect import detect | |
import time | |
import torch | |
from transformers import pipeline | |
import re | |
from whisperplus import download_youtube_to_mp3 | |
# Initialize the LLM and other components | |
llm = HuggingFaceEndpoint( | |
repo_id="mistralai/Mistral-7B-Instruct-v0.3", | |
task="text-generation", | |
max_new_tokens=128, | |
temperature=0.7, | |
do_sample=False, | |
) | |
template_classify = ''' | |
You are a topic detector bot. Your task is to determine the main topic of given text phrase. | |
Answer general main topic not specific words. | |
Your answer does not contain specific information from given text. | |
Answer just one general main topic. Do not answer two or more topic. | |
Answer shortly with two or three word phrase. Do not answer with long sentence. | |
Answer topic with context. Example, if it says "My delivery is late", its topic is late delivery. | |
If you do not know the topic just answer as General. | |
What is the main topic of given text?: | |
<text> | |
{TEXT} | |
</text> | |
convert it to json format using 'Answer' as key and return it. | |
Your final response MUST contain only the response, no other text. | |
Example: | |
{{"Answer":["General"]}} | |
''' | |
json_output_parser = JsonOutputParser() | |
# Define the classify_text function | |
def classify_text(text): | |
global llm | |
start = time.time() | |
try: | |
lang = detect(text) | |
except: | |
lang = "en" | |
prompt_classify = PromptTemplate( | |
template=template_classify, | |
input_variables=["LANG", "TEXT"] | |
) | |
formatted_prompt = prompt_classify.format(TEXT=text, LANG=lang) | |
classify = llm.invoke(formatted_prompt) | |
parsed_output = json_output_parser.parse(classify) | |
end = time.time() | |
duration = end - start | |
return lang, parsed_output["Answer"][0], duration | |
# Initialize the speech recognition pipeline | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model="openai/whisper-base", # You may want to specify your desired model here | |
torch_dtype=torch_dtype, | |
device=device, | |
) | |
def process_audio(audio_path): | |
result = pipe(audio_path) | |
text = result["text"] | |
sentences = re.split(r'[.!?]', text) | |
sentences = [sentence.strip() for sentence in sentences if sentence.strip()] | |
classifications = [] | |
for sentence in sentences: | |
lang, classification, duration = classify_text(sentence) | |
classifications.append(f"Sentence: {sentence}\nTopic: {classification}\nLanguage: {lang}\nTime: {duration:.2f}s") | |
return "\n\n".join(classifications) | |
def handle_audio_input(audio_path=None, youtube_url=None): | |
if youtube_url: | |
audio_path = download_youtube_to_mp3(youtube_url, output_dir="downloads", filename="youtube_audio") | |
if audio_path: | |
return process_audio(audio_path) | |
else: | |
return "No audio input provided." | |
# Create the Gradio interface | |
def create_gradio_interface(): | |
with gr.Blocks() as iface: | |
with gr.Tab("Text Input"): | |
text_input = gr.Textbox(label="Text") | |
lang_output = gr.Textbox(label="Detected Language") | |
output_text = gr.Textbox(label="Detected Topics") | |
time_taken = gr.Textbox(label="Time Taken (seconds)") | |
submit_btn = gr.Button("Detect topic") | |
def on_text_submit(text): | |
lang, classification, duration = classify_text(text) | |
return lang, classification, f"Time taken: {duration:.2f} seconds" | |
submit_btn.click(fn=on_text_submit, inputs=text_input, outputs=[lang_output, output_text, time_taken]) | |
with gr.Tab("Audio Input"): | |
audio_input = gr.Audio(label="Upload Audio", type="filepath") | |
youtube_input = gr.Textbox(label="YT URL (optional)") | |
audio_output = gr.Textbox(label="Detected Topics from Audio") | |
audio_submit_btn = gr.Button("Process Audio") | |
def on_audio_submit(audio, youtube_url): | |
return handle_audio_input(audio_path=audio, youtube_url=youtube_url) | |
audio_submit_btn.click(fn=on_audio_submit, inputs=[audio_input, youtube_input], outputs=audio_output) | |
iface.launch() | |
if __name__ == "__main__": | |
create_gradio_interface() |