File size: 4,738 Bytes
3f395d1
 
 
 
 
 
 
 
 
dcd9030
3f395d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dcd9030
 
 
234c504
dcd9030
 
 
 
 
3f395d1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5741b5e
 
 
 
 
 
 
 
 
 
 
 
 
 
3f395d1
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
from langchain.prompts import PromptTemplate
from langchain_huggingface import HuggingFaceEndpoint
from langchain_core.output_parsers import JsonOutputParser
from langdetect import detect
import time
import torch
from transformers import pipeline
import re
from whisperplus import download_youtube_to_mp3

# Initialize the LLM and other components
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
    task="text-generation",
    max_new_tokens=128,
    temperature=0.7,
    do_sample=False,
)

template_classify = '''
You are a topic detector bot. Your task is to determine the main topic of given text phrase.

Answer general main topic not specific words.
Your answer does not contain specific information from given text.
Answer just one general main topic. Do not answer two or more topic.
Answer shortly with two or three word phrase. Do not answer with long sentence.
Answer topic with context. Example, if it says "My delivery is late", its topic is late delivery.
If you do not know the topic just answer as General.
What is the main topic of given text?:

<text>  
{TEXT}  
</text>

convert it to json format using 'Answer' as key and return it.
Your final response MUST contain only the response, no other text.
Example:
{{"Answer":["General"]}}
'''

json_output_parser = JsonOutputParser()

# Define the classify_text function
def classify_text(text):
    global llm

    start = time.time()
    try: 
        lang = detect(text)
    except:
        lang = "en"

    prompt_classify = PromptTemplate(
        template=template_classify,
        input_variables=["LANG", "TEXT"]
    )
    formatted_prompt = prompt_classify.format(TEXT=text, LANG=lang)
    classify = llm.invoke(formatted_prompt)

    parsed_output = json_output_parser.parse(classify)
    end = time.time()
    duration = end - start
    return lang, parsed_output["Answer"][0], duration

# Initialize the speech recognition pipeline
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
device = "cuda:0" if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-base",  # You may want to specify your desired model here
    torch_dtype=torch_dtype,
    device=device,
)

def process_audio(audio_path):
    result = pipe(audio_path)
    text = result["text"]
    sentences = re.split(r'[.!?]', text)
    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
    
    classifications = []
    for sentence in sentences:
        lang, classification, duration = classify_text(sentence)
        classifications.append(f"Sentence: {sentence}\nTopic: {classification}\nLanguage: {lang}\nTime: {duration:.2f}s")
    
    return "\n\n".join(classifications)
def handle_audio_input(audio_path=None, youtube_url=None):
    if youtube_url:
        audio_path = download_youtube_to_mp3(youtube_url, output_dir="downloads", filename="youtube_audio")
        return process_audio(audio_path)
    if audio_path:
        return process_audio(audio_path)
    else:
        return "No audio input provided."
    
# Create the Gradio interface
def create_gradio_interface():
    with gr.Blocks() as iface:
        with gr.Tab("Text Input"):
            text_input = gr.Textbox(label="Text")
            lang_output = gr.Textbox(label="Detected Language")
            output_text = gr.Textbox(label="Detected Topics")
            time_taken = gr.Textbox(label="Time Taken (seconds)")
            submit_btn = gr.Button("Detect topic")

            def on_text_submit(text):
                lang, classification, duration = classify_text(text)
                return lang, classification, f"Time taken: {duration:.2f} seconds"

            submit_btn.click(fn=on_text_submit, inputs=text_input, outputs=[lang_output, output_text, time_taken])

        with gr.Tab("Audio Input"):
            audio_input = gr.Audio(label="Upload Audio", type="filepath")
            youtube_input = gr.Textbox(label="YouTube URL (will process URL first)")
            audio_output = gr.Textbox(label="Detected Topics from Audio")
            audio_submit_btn = gr.Button("Process Audio")

            def on_audio_submit(audio, youtube_url):
                if youtube_url:
                    return handle_audio_input(youtube_url=youtube_url)
                elif audio:
                    return handle_audio_input(audio_path=audio)
                else:
                    return "Please provide either an audio file or a YouTube URL."
        
            audio_submit_btn.click(fn=on_audio_submit, inputs=[audio_input, youtube_input], outputs=audio_output)
    iface.launch()

if __name__ == "__main__":
    create_gradio_interface()