Spaces:

chaitanya9
/

emotion_recognizer

Runtime error

App Files Files Community

chaitanya9 commited on Nov 30, 2021

Commit

1293675

1 Parent(s): e466a5a

Delete test.py

Browse files

Files changed (1) hide show

test.py +0 -184

test.py DELETED Viewed

@@ -1,184 +0,0 @@
-from emotion_recognition import EmotionRecognizer
-import pyaudio
-import os
-import wave
-from sys import byteorder
-from array import array
-from struct import pack
-from sklearn.ensemble import GradientBoostingClassifier, BaggingClassifier
-import gradio as gr
-from sklearn.svm import SVC
-from utils import get_best_estimators
-THRESHOLD = 500
-CHUNK_SIZE = 1024
-FORMAT = pyaudio.paInt16
-RATE = 16000
-SILENCE = 30
-def is_silent(snd_data):
-    "Returns 'True' if below the 'silent' threshold"
-    return max(snd_data) < THRESHOLD
-def normalize(snd_data):
-    "Average the volume out"
-    MAXIMUM = 16384
-    times = float(MAXIMUM)/max(abs(i) for i in snd_data)
-    r = array('h')
-    for i in snd_data:
-        r.append(int(i*times))
-    return r
-def trim(snd_data):
-    "Trim the blank spots at the start and end"
-    def _trim(snd_data):
-        snd_started = False
-        r = array('h')
-        for i in snd_data:
-            if not snd_started and abs(i)>THRESHOLD:
-                snd_started = True
-                r.append(i)
-            elif snd_started:
-                r.append(i)
-        return r
-    # Trim to the left
-    snd_data = _trim(snd_data)
-    # Trim to the right
-    snd_data.reverse()
-    snd_data = _trim(snd_data)
-    snd_data.reverse()
-    return snd_data
-def add_silence(snd_data, seconds):
-    "Add silence to the start and end of 'snd_data' of length 'seconds' (float)"
-    r = array('h', [0 for i in range(int(seconds*RATE))])
-    r.extend(snd_data)
-    r.extend([0 for i in range(int(seconds*RATE))])
-    return r
-def record():
-    """
-    Record a word or words from the microphone and
-    return the data as an array of signed shorts.
-    Normalizes the audio, trims silence from the
-    start and end, and pads with 0.5 seconds of
-    blank sound to make sure VLC et al can play
-    it without getting chopped off.
-    """
-    p = pyaudio.PyAudio()
-    stream = p.open(format=FORMAT, channels=1, rate=RATE,
-        input=True, output=True,
-        frames_per_buffer=CHUNK_SIZE)
-    num_silent = 0
-    snd_started = False
-    r = array('h')
-    while 1:
-        # little endian, signed short
-        snd_data = array('h', stream.read(CHUNK_SIZE))
-        if byteorder == 'big':
-            snd_data.byteswap()
-        r.extend(snd_data)
-        silent = is_silent(snd_data)
-        if silent and snd_started:
-            num_silent += 1
-        elif not silent and not snd_started:
-            snd_started = True
-        if snd_started and num_silent > SILENCE:
-            break
-    sample_width = p.get_sample_size(FORMAT)
-    stream.stop_stream()
-    stream.close()
-    p.terminate()
-    r = normalize(r)
-    r = trim(r)
-    r = add_silence(r, 0.5)
-    return sample_width, r
-def record_to_file(path):
-    "Records from the microphone and outputs the resulting data to 'path'"
-    sample_width, data = record()
-    data = pack('<' + ('h'*len(data)), *data)
-    wf = wave.open(path, 'wb')
-    wf.setnchannels(1)
-    wf.setsampwidth(sample_width)
-    wf.setframerate(RATE)
-    wf.writeframes(data)
-    wf.close()
-def get_estimators_name(estimators):
-    result = [ '"{}"'.format(estimator.__class__.__name__) for estimator, _, _ in estimators ]
-    return ','.join(result), {estimator_name.strip('"'): estimator for estimator_name, (estimator, _, _) in zip(result, estimators)}
-def emotion_recognizer(inp):
-    print(inp)
-    estimators = get_best_estimators(True)
-    estimators_str, estimator_dict = get_estimators_name(estimators)
-    features = ["mfcc", "chroma", "mel"]
-    detector = EmotionRecognizer(estimator_dict["BaggingClassifier"], emotions=["sad","neutral","happy"], features=features,
-                                 verbose=0)
-    detector.train()
-    print("Test accuracy score: {:.3f}%".format(detector.test_score() * 100))
-    return detector.predict(inp)
-def greet(name):
-    return "Helo" + name + "!"
-if __name__ == "__main__":
-    # estimators = get_best_estimators(True)
-    # estimators_str, estimator_dict = get_estimators_name(estimators)
-    # import argparse
-    # parser = argparse.ArgumentParser(description="""
-    #                                 Testing emotion recognition system using your voice,
-    #                                 please consider changing the model and/or parameters as you wish.
-    #                                 """)
-    # parser.add_argument("-e", "--emotions", help=
-    #                                         """Emotions to recognize separated by a comma ',', available emotions are
-    #                                         "neutral", "calm", "happy" "sad", "angry", "fear", "disgust", "ps" (pleasant surprise)
-    #                                         and "boredom", default is "sad,neutral,happy"
-    #                                         """, default="sad,neutral,happy")
-    # parser.add_argument("-m", "--model", help=
-    #                                     """
-    #                                     The model to use, 8 models available are: {},
-    #                                     default is "BaggingClassifier"
-    #                                     """.format(estimators_str), default="BaggingClassifier")
-    #
-    #
-    # # Parse the arguments passed
-    # args = parser.parse_args()
-    #
-    # features = ["mfcc", "chroma", "mel"]
-    # detector = EmotionRecognizer(estimator_dict[args.model], emotions=args.emotions.split(","), features=features, verbose=0)
-    # detector.train()
-    # print("Test accuracy score: {:.3f}%".format(detector.test_score()*100))
-    # print("Please talk")
-    # filename = "test.wav"
-    # record_to_file(filename)
-    # result = detector.predict(filename)
-    # print(result)
-    audio = gr.inputs.Audio(source="upload", type="numpy", label=None, optional=False)
-    #gr.Interface(fn=emotion_recognizer, inputs=audio, outputs="text", capture_session=True).launch()
-    iface = gr.Interface(fn=emotion_recognizer, inputs = "audio", outputs = "text")
-    iface.launch(share=True)