Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,152 +1,18 @@
|
|
1 |
-
|
2 |
-
import gradio as gr
|
3 |
-
import whisper
|
4 |
from transformers import pipeline
|
5 |
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
def analyze_sentiment(text):
|
10 |
-
results = sentiment_analysis(text)
|
11 |
-
sentiment_results = {result['label']: result['score'] for result in results}
|
12 |
-
return sentiment_results
|
13 |
-
|
14 |
-
def get_sentiment_emoji(sentiment):
|
15 |
-
# Define the emojis corresponding to each sentiment
|
16 |
-
emoji_mapping = {
|
17 |
-
"disappointment": "๐",
|
18 |
-
"sadness": "๐ข",
|
19 |
-
"annoyance": "๐ ",
|
20 |
-
"neutral": "๐",
|
21 |
-
"disapproval": "๐",
|
22 |
-
"realization": "๐ฎ",
|
23 |
-
"nervousness": "๐ฌ",
|
24 |
-
"approval": "๐",
|
25 |
-
"joy": "๐",
|
26 |
-
"anger": "๐ก",
|
27 |
-
"embarrassment": "๐ณ",
|
28 |
-
"caring": "๐ค",
|
29 |
-
"remorse": "๐",
|
30 |
-
"disgust": "๐คข",
|
31 |
-
"grief": "๐ฅ",
|
32 |
-
"confusion": "๐",
|
33 |
-
"relief": "๐",
|
34 |
-
"desire": "๐",
|
35 |
-
"admiration": "๐",
|
36 |
-
"optimism": "๐",
|
37 |
-
"fear": "๐จ",
|
38 |
-
"love": "โค๏ธ",
|
39 |
-
"excitement": "๐",
|
40 |
-
"curiosity": "๐ค",
|
41 |
-
"amusement": "๐",
|
42 |
-
"surprise": "๐ฒ",
|
43 |
-
"gratitude": "๐",
|
44 |
-
"pride": "๐ฆ"
|
45 |
-
}
|
46 |
-
return emoji_mapping.get(sentiment, "")
|
47 |
-
|
48 |
-
def display_sentiment_results(sentiment_results, option):
|
49 |
-
sentiment_text = ""
|
50 |
-
for sentiment, score in sentiment_results.items():
|
51 |
-
emoji = get_sentiment_emoji(sentiment)
|
52 |
-
if option == "Sentiment Only":
|
53 |
-
sentiment_text += f"{sentiment} {emoji}\n"
|
54 |
-
elif option == "Sentiment + Score":
|
55 |
-
sentiment_text += f"{sentiment} {emoji}: {score}\n"
|
56 |
-
return sentiment_text
|
57 |
|
58 |
def inference(audio, sentiment_option):
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
options = whisper.DecodingOptions(fp16=False)
|
68 |
-
result = whisper.decode(model, mel, options)
|
69 |
-
|
70 |
-
sentiment_results = analyze_sentiment(result.text)
|
71 |
sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)
|
72 |
|
73 |
-
return
|
74 |
-
|
75 |
-
title = """<h1 align="center">๐ค Multilingual ASR ๐ฌ</h1>"""
|
76 |
-
image_path = "thmbnail.jpg"
|
77 |
-
description = """
|
78 |
-
๐ป This demo showcases a general-purpose speech recognition model called Whisper. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.<br><br>
|
79 |
-
<br>
|
80 |
-
โ๏ธ Components of the tool:<br>
|
81 |
-
<br>
|
82 |
-
- Real-time multilingual speech recognition<br>
|
83 |
-
- Language identification<br>
|
84 |
-
- Sentiment analysis of the transcriptions<br>
|
85 |
-
<br>
|
86 |
-
๐ฏ The sentiment analysis results are provided as a dictionary with different emotions and their corresponding scores.<br>
|
87 |
-
<br>
|
88 |
-
๐ The sentiment analysis results are displayed with emojis representing the corresponding sentiment.<br>
|
89 |
-
<br>
|
90 |
-
โ
The higher the score for a specific emotion, the stronger the presence of that emotion in the transcribed text.<br>
|
91 |
-
<br>
|
92 |
-
โ Use the microphone for real-time speech recognition.<br>
|
93 |
-
<br>
|
94 |
-
โก๏ธ The model will transcribe the audio and perform sentiment analysis on the transcribed text.<br>
|
95 |
-
"""
|
96 |
-
|
97 |
-
custom_css = """
|
98 |
-
#banner-image {
|
99 |
-
display: block;
|
100 |
-
margin-left: auto;
|
101 |
-
margin-right: auto;
|
102 |
-
}
|
103 |
-
#chat-message {
|
104 |
-
font-size: 14px;
|
105 |
-
min-height: 300px;
|
106 |
-
}
|
107 |
-
"""
|
108 |
-
|
109 |
-
block = gr.Blocks(css=custom_css)
|
110 |
-
|
111 |
-
with block:
|
112 |
-
gr.HTML(title)
|
113 |
-
|
114 |
-
with gr.Row():
|
115 |
-
with gr.Column():
|
116 |
-
gr.Image(image_path, elem_id="banner-image", show_label=False)
|
117 |
-
with gr.Column():
|
118 |
-
gr.HTML(description)
|
119 |
-
|
120 |
-
with gr.Group():
|
121 |
-
with gr.Box():
|
122 |
-
audio = gr.Audio(
|
123 |
-
label="Input Audio",
|
124 |
-
show_label=False,
|
125 |
-
source="microphone",
|
126 |
-
type="filepath"
|
127 |
-
)
|
128 |
-
|
129 |
-
sentiment_option = gr.Radio(
|
130 |
-
choices=["Sentiment Only", "Sentiment + Score"],
|
131 |
-
label="Select an option",
|
132 |
-
default="Sentiment Only"
|
133 |
-
)
|
134 |
-
|
135 |
-
btn = gr.Button("Transcribe")
|
136 |
-
|
137 |
-
lang_str = gr.Textbox(label="Language")
|
138 |
-
|
139 |
-
text = gr.Textbox(label="Transcription")
|
140 |
-
|
141 |
-
sentiment_output = gr.Textbox(label="Sentiment Analysis Results", output=True)
|
142 |
-
|
143 |
-
btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])
|
144 |
-
|
145 |
-
gr.HTML('''
|
146 |
-
<div class="footer">
|
147 |
-
<p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
|
148 |
-
</p>
|
149 |
-
</div>
|
150 |
-
''')
|
151 |
-
|
152 |
-
block.launch()
|
|
|
1 |
+
import torchaudio
|
|
|
|
|
2 |
from transformers import pipeline
|
3 |
|
4 |
+
# Load the Hugging Face ASR model
|
5 |
+
asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
def inference(audio, sentiment_option):
|
8 |
+
# Load and preprocess audio
|
9 |
+
audio_tensor, _ = torchaudio.load(audio)
|
10 |
+
|
11 |
+
# Perform ASR
|
12 |
+
transcription = asr_pipeline(audio_tensor.numpy()[0], sampling_rate=audio_tensor.sampling_rate)
|
13 |
+
|
14 |
+
# Perform sentiment analysis
|
15 |
+
sentiment_results = analyze_sentiment(transcription[0]['sentence'])
|
|
|
|
|
|
|
|
|
16 |
sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)
|
17 |
|
18 |
+
return "N/A", transcription[0]['sentence'], sentiment_output
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|