Faizan15 commited on
Commit
4537507
ยท
1 Parent(s): b146ca6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -146
app.py CHANGED
@@ -1,152 +1,18 @@
1
-
2
- import gradio as gr
3
- import whisper
4
  from transformers import pipeline
5
 
6
- model = whisper.load_model("base")
7
- sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions")
8
-
9
- def analyze_sentiment(text):
10
- results = sentiment_analysis(text)
11
- sentiment_results = {result['label']: result['score'] for result in results}
12
- return sentiment_results
13
-
14
- def get_sentiment_emoji(sentiment):
15
- # Define the emojis corresponding to each sentiment
16
- emoji_mapping = {
17
- "disappointment": "๐Ÿ˜ž",
18
- "sadness": "๐Ÿ˜ข",
19
- "annoyance": "๐Ÿ˜ ",
20
- "neutral": "๐Ÿ˜",
21
- "disapproval": "๐Ÿ‘Ž",
22
- "realization": "๐Ÿ˜ฎ",
23
- "nervousness": "๐Ÿ˜ฌ",
24
- "approval": "๐Ÿ‘",
25
- "joy": "๐Ÿ˜„",
26
- "anger": "๐Ÿ˜ก",
27
- "embarrassment": "๐Ÿ˜ณ",
28
- "caring": "๐Ÿค—",
29
- "remorse": "๐Ÿ˜”",
30
- "disgust": "๐Ÿคข",
31
- "grief": "๐Ÿ˜ฅ",
32
- "confusion": "๐Ÿ˜•",
33
- "relief": "๐Ÿ˜Œ",
34
- "desire": "๐Ÿ˜",
35
- "admiration": "๐Ÿ˜Œ",
36
- "optimism": "๐Ÿ˜Š",
37
- "fear": "๐Ÿ˜จ",
38
- "love": "โค๏ธ",
39
- "excitement": "๐ŸŽ‰",
40
- "curiosity": "๐Ÿค”",
41
- "amusement": "๐Ÿ˜„",
42
- "surprise": "๐Ÿ˜ฒ",
43
- "gratitude": "๐Ÿ™",
44
- "pride": "๐Ÿฆ"
45
- }
46
- return emoji_mapping.get(sentiment, "")
47
-
48
- def display_sentiment_results(sentiment_results, option):
49
- sentiment_text = ""
50
- for sentiment, score in sentiment_results.items():
51
- emoji = get_sentiment_emoji(sentiment)
52
- if option == "Sentiment Only":
53
- sentiment_text += f"{sentiment} {emoji}\n"
54
- elif option == "Sentiment + Score":
55
- sentiment_text += f"{sentiment} {emoji}: {score}\n"
56
- return sentiment_text
57
 
58
  def inference(audio, sentiment_option):
59
- audio = whisper.load_audio(audio)
60
- audio = whisper.pad_or_trim(audio)
61
-
62
- mel = whisper.log_mel_spectrogram(audio).to(model.device)
63
-
64
- _, probs = model.detect_language(mel)
65
- lang = max(probs, key=probs.get)
66
-
67
- options = whisper.DecodingOptions(fp16=False)
68
- result = whisper.decode(model, mel, options)
69
-
70
- sentiment_results = analyze_sentiment(result.text)
71
  sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)
72
 
73
- return lang.upper(), result.text, sentiment_output
74
-
75
- title = """<h1 align="center">๐ŸŽค Multilingual ASR ๐Ÿ’ฌ</h1>"""
76
- image_path = "thmbnail.jpg"
77
- description = """
78
- ๐Ÿ’ป This demo showcases a general-purpose speech recognition model called Whisper. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.<br><br>
79
- <br>
80
- โš™๏ธ Components of the tool:<br>
81
- <br>
82
- &nbsp;&nbsp;&nbsp;&nbsp; - Real-time multilingual speech recognition<br>
83
- &nbsp;&nbsp;&nbsp;&nbsp; - Language identification<br>
84
- &nbsp;&nbsp;&nbsp;&nbsp; - Sentiment analysis of the transcriptions<br>
85
- <br>
86
- ๐ŸŽฏ The sentiment analysis results are provided as a dictionary with different emotions and their corresponding scores.<br>
87
- <br>
88
- ๐Ÿ˜ƒ The sentiment analysis results are displayed with emojis representing the corresponding sentiment.<br>
89
- <br>
90
- โœ… The higher the score for a specific emotion, the stronger the presence of that emotion in the transcribed text.<br>
91
- <br>
92
- โ“ Use the microphone for real-time speech recognition.<br>
93
- <br>
94
- โšก๏ธ The model will transcribe the audio and perform sentiment analysis on the transcribed text.<br>
95
- """
96
-
97
- custom_css = """
98
- #banner-image {
99
- display: block;
100
- margin-left: auto;
101
- margin-right: auto;
102
- }
103
- #chat-message {
104
- font-size: 14px;
105
- min-height: 300px;
106
- }
107
- """
108
-
109
- block = gr.Blocks(css=custom_css)
110
-
111
- with block:
112
- gr.HTML(title)
113
-
114
- with gr.Row():
115
- with gr.Column():
116
- gr.Image(image_path, elem_id="banner-image", show_label=False)
117
- with gr.Column():
118
- gr.HTML(description)
119
-
120
- with gr.Group():
121
- with gr.Box():
122
- audio = gr.Audio(
123
- label="Input Audio",
124
- show_label=False,
125
- source="microphone",
126
- type="filepath"
127
- )
128
-
129
- sentiment_option = gr.Radio(
130
- choices=["Sentiment Only", "Sentiment + Score"],
131
- label="Select an option",
132
- default="Sentiment Only"
133
- )
134
-
135
- btn = gr.Button("Transcribe")
136
-
137
- lang_str = gr.Textbox(label="Language")
138
-
139
- text = gr.Textbox(label="Transcription")
140
-
141
- sentiment_output = gr.Textbox(label="Sentiment Analysis Results", output=True)
142
-
143
- btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output])
144
-
145
- gr.HTML('''
146
- <div class="footer">
147
- <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
148
- </p>
149
- </div>
150
- ''')
151
-
152
- block.launch()
 
1
+ import torchaudio
 
 
2
  from transformers import pipeline
3
 
4
+ # Load the Hugging Face ASR model
5
+ asr_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def inference(audio, sentiment_option):
8
+ # Load and preprocess audio
9
+ audio_tensor, _ = torchaudio.load(audio)
10
+
11
+ # Perform ASR
12
+ transcription = asr_pipeline(audio_tensor.numpy()[0], sampling_rate=audio_tensor.sampling_rate)
13
+
14
+ # Perform sentiment analysis
15
+ sentiment_results = analyze_sentiment(transcription[0]['sentence'])
 
 
 
 
16
  sentiment_output = display_sentiment_results(sentiment_results, sentiment_option)
17
 
18
+ return "N/A", transcription[0]['sentence'], sentiment_output