chaitanya9 commited on
Commit
1293675
·
1 Parent(s): e466a5a

Delete test.py

Browse files
Files changed (1) hide show
  1. test.py +0 -184
test.py DELETED
@@ -1,184 +0,0 @@
1
- from emotion_recognition import EmotionRecognizer
2
-
3
- import pyaudio
4
- import os
5
- import wave
6
- from sys import byteorder
7
- from array import array
8
- from struct import pack
9
- from sklearn.ensemble import GradientBoostingClassifier, BaggingClassifier
10
- import gradio as gr
11
- from sklearn.svm import SVC
12
-
13
- from utils import get_best_estimators
14
-
15
- THRESHOLD = 500
16
- CHUNK_SIZE = 1024
17
- FORMAT = pyaudio.paInt16
18
- RATE = 16000
19
-
20
- SILENCE = 30
21
-
22
- def is_silent(snd_data):
23
- "Returns 'True' if below the 'silent' threshold"
24
- return max(snd_data) < THRESHOLD
25
-
26
- def normalize(snd_data):
27
- "Average the volume out"
28
- MAXIMUM = 16384
29
- times = float(MAXIMUM)/max(abs(i) for i in snd_data)
30
-
31
- r = array('h')
32
- for i in snd_data:
33
- r.append(int(i*times))
34
- return r
35
-
36
- def trim(snd_data):
37
- "Trim the blank spots at the start and end"
38
- def _trim(snd_data):
39
- snd_started = False
40
- r = array('h')
41
-
42
- for i in snd_data:
43
- if not snd_started and abs(i)>THRESHOLD:
44
- snd_started = True
45
- r.append(i)
46
-
47
- elif snd_started:
48
- r.append(i)
49
- return r
50
-
51
- # Trim to the left
52
- snd_data = _trim(snd_data)
53
-
54
- # Trim to the right
55
- snd_data.reverse()
56
- snd_data = _trim(snd_data)
57
- snd_data.reverse()
58
- return snd_data
59
-
60
- def add_silence(snd_data, seconds):
61
- "Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
62
- r = array('h', [0 for i in range(int(seconds*RATE))])
63
- r.extend(snd_data)
64
- r.extend([0 for i in range(int(seconds*RATE))])
65
- return r
66
-
67
- def record():
68
- """
69
- Record a word or words from the microphone and
70
- return the data as an array of signed shorts.
71
-
72
- Normalizes the audio, trims silence from the
73
- start and end, and pads with 0.5 seconds of
74
- blank sound to make sure VLC et al can play
75
- it without getting chopped off.
76
- """
77
- p = pyaudio.PyAudio()
78
- stream = p.open(format=FORMAT, channels=1, rate=RATE,
79
- input=True, output=True,
80
- frames_per_buffer=CHUNK_SIZE)
81
-
82
- num_silent = 0
83
- snd_started = False
84
-
85
- r = array('h')
86
-
87
- while 1:
88
- # little endian, signed short
89
- snd_data = array('h', stream.read(CHUNK_SIZE))
90
- if byteorder == 'big':
91
- snd_data.byteswap()
92
- r.extend(snd_data)
93
-
94
- silent = is_silent(snd_data)
95
-
96
- if silent and snd_started:
97
- num_silent += 1
98
- elif not silent and not snd_started:
99
- snd_started = True
100
-
101
- if snd_started and num_silent > SILENCE:
102
- break
103
-
104
- sample_width = p.get_sample_size(FORMAT)
105
- stream.stop_stream()
106
- stream.close()
107
- p.terminate()
108
-
109
- r = normalize(r)
110
- r = trim(r)
111
- r = add_silence(r, 0.5)
112
- return sample_width, r
113
-
114
- def record_to_file(path):
115
- "Records from the microphone and outputs the resulting data to 'path'"
116
- sample_width, data = record()
117
- data = pack('<' + ('h'*len(data)), *data)
118
-
119
- wf = wave.open(path, 'wb')
120
- wf.setnchannels(1)
121
- wf.setsampwidth(sample_width)
122
- wf.setframerate(RATE)
123
- wf.writeframes(data)
124
- wf.close()
125
-
126
-
127
- def get_estimators_name(estimators):
128
- result = [ '"{}"'.format(estimator.__class__.__name__) for estimator, _, _ in estimators ]
129
- return ','.join(result), {estimator_name.strip('"'): estimator for estimator_name, (estimator, _, _) in zip(result, estimators)}
130
-
131
- def emotion_recognizer(inp):
132
- print(inp)
133
- estimators = get_best_estimators(True)
134
- estimators_str, estimator_dict = get_estimators_name(estimators)
135
- features = ["mfcc", "chroma", "mel"]
136
- detector = EmotionRecognizer(estimator_dict["BaggingClassifier"], emotions=["sad","neutral","happy"], features=features,
137
- verbose=0)
138
- detector.train()
139
- print("Test accuracy score: {:.3f}%".format(detector.test_score() * 100))
140
- return detector.predict(inp)
141
-
142
- def greet(name):
143
- return "Helo" + name + "!"
144
-
145
- if __name__ == "__main__":
146
- # estimators = get_best_estimators(True)
147
- # estimators_str, estimator_dict = get_estimators_name(estimators)
148
- # import argparse
149
- # parser = argparse.ArgumentParser(description="""
150
- # Testing emotion recognition system using your voice,
151
- # please consider changing the model and/or parameters as you wish.
152
- # """)
153
- # parser.add_argument("-e", "--emotions", help=
154
- # """Emotions to recognize separated by a comma ',', available emotions are
155
- # "neutral", "calm", "happy" "sad", "angry", "fear", "disgust", "ps" (pleasant surprise)
156
- # and "boredom", default is "sad,neutral,happy"
157
- # """, default="sad,neutral,happy")
158
- # parser.add_argument("-m", "--model", help=
159
- # """
160
- # The model to use, 8 models available are: {},
161
- # default is "BaggingClassifier"
162
- # """.format(estimators_str), default="BaggingClassifier")
163
- #
164
- #
165
- # # Parse the arguments passed
166
- # args = parser.parse_args()
167
- #
168
- # features = ["mfcc", "chroma", "mel"]
169
- # detector = EmotionRecognizer(estimator_dict[args.model], emotions=args.emotions.split(","), features=features, verbose=0)
170
- # detector.train()
171
- # print("Test accuracy score: {:.3f}%".format(detector.test_score()*100))
172
- # print("Please talk")
173
-
174
- # filename = "test.wav"
175
- # record_to_file(filename)
176
- # result = detector.predict(filename)
177
- # print(result)
178
-
179
- audio = gr.inputs.Audio(source="upload", type="numpy", label=None, optional=False)
180
-
181
- #gr.Interface(fn=emotion_recognizer, inputs=audio, outputs="text", capture_session=True).launch()
182
-
183
- iface = gr.Interface(fn=emotion_recognizer, inputs = "audio", outputs = "text")
184
- iface.launch(share=True)