Spaces:

Chan-Y
/

Topic-Detection

Sleeping

App Files Files Community

Chan-Y commited on Sep 4

Commit

3f395d1

•

1 Parent(s): b31b854

Create app.py

Browse files

Files changed (1) hide show

app.py +115 -0

app.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import gradio as gr
+from langchain.prompts import PromptTemplate
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain_core.output_parsers import JsonOutputParser
+from langdetect import detect
+import time
+import torch
+from transformers import pipeline
+import re
+# Initialize the LLM and other components
+llm = HuggingFaceEndpoint(
+    repo_id="mistralai/Mistral-7B-Instruct-v0.3",
+    task="text-generation",
+    max_new_tokens=128,
+    temperature=0.7,
+    do_sample=False,
+)
+template_classify = '''
+You are a topic detector bot. Your task is to determine the main topic of given text phrase.
+Answer general main topic not specific words.
+Your answer does not contain specific information from given text.
+Answer just one general main topic. Do not answer two or more topic.
+Answer shortly with two or three word phrase. Do not answer with long sentence.
+Answer topic with context. Example, if it says "My delivery is late", its topic is late delivery.
+If you do not know the topic just answer as General.
+What is the main topic of given text?:
+<text>
+{TEXT}
+</text>
+convert it to json format using 'Answer' as key and return it.
+Your final response MUST contain only the response, no other text.
+Example:
+{{"Answer":["General"]}}
+'''
+json_output_parser = JsonOutputParser()
+# Define the classify_text function
+def classify_text(text):
+    global llm
+    start = time.time()
+    try:
+        lang = detect(text)
+    except:
+        lang = "en"
+    prompt_classify = PromptTemplate(
+        template=template_classify,
+        input_variables=["LANG", "TEXT"]
+    )
+    formatted_prompt = prompt_classify.format(TEXT=text, LANG=lang)
+    classify = llm.invoke(formatted_prompt)
+    parsed_output = json_output_parser.parse(classify)
+    end = time.time()
+    duration = end - start
+    return lang, parsed_output["Answer"][0], duration
+# Initialize the speech recognition pipeline
+torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+pipe = pipeline(
+    "automatic-speech-recognition",
+    model="openai/whisper-base",  # You may want to specify your desired model here
+    torch_dtype=torch_dtype,
+    device=device,
+)
+def process_audio(audio_path):
+    result = pipe(audio_path)
+    text = result["text"]
+    sentences = re.split(r'[.!?]', text)
+    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]
+    classifications = []
+    for sentence in sentences:
+        lang, classification, duration = classify_text(sentence)
+        classifications.append(f"Sentence: {sentence}\nTopic: {classification}\nLanguage: {lang}\nTime: {duration:.2f}s")
+    return "\n\n".join(classifications)
+# Create the Gradio interface
+def create_gradio_interface():
+    with gr.Blocks() as iface:
+        with gr.Tab("Text Input"):
+            text_input = gr.Textbox(label="Text")
+            lang_output = gr.Textbox(label="Detected Language")
+            output_text = gr.Textbox(label="Detected Topics")
+            time_taken = gr.Textbox(label="Time Taken (seconds)")
+            submit_btn = gr.Button("Detect topic")
+            def on_text_submit(text):
+                lang, classification, duration = classify_text(text)
+                return lang, classification, f"Time taken: {duration:.2f} seconds"
+            submit_btn.click(fn=on_text_submit, inputs=text_input, outputs=[lang_output, output_text, time_taken])
+        with gr.Tab("Audio Input"):
+            audio_input = gr.Audio(label="Upload Audio", type="filepath")
+            audio_output = gr.Textbox(label="Detected Topics from Audio")
+            audio_submit_btn = gr.Button("Process Audio")
+            audio_submit_btn.click(fn=process_audio, inputs=audio_input, outputs=audio_output)
+    iface.launch()
+if __name__ == "__main__":
+    create_gradio_interface()