Spaces:

audeering
/

speech-analysis

Running

App Files Files Community

hagenw commited on Aug 29, 2024

Commit

ba45a7b

1 Parent(s): fa18e4b

Fix typo

Browse files

Files changed (1) hide show

app.py +9 -9

app.py CHANGED Viewed

@@ -11,12 +11,13 @@ import audiofile
 import audresample
 model_name = "audeering/wav2vec2-large-robust-24-ft-age-gender"
 duration = 1  # limit processing of audio
-class ModelHead(nn.Module):
-    r"""Classification head."""
     def __init__(self, config, num_labels):
@@ -39,7 +40,7 @@ class ModelHead(nn.Module):
 class AgeGenderModel(Wav2Vec2PreTrainedModel):
-    r"""Speech emotion classifier."""
     def __init__(self, config):
@@ -47,8 +48,8 @@ class AgeGenderModel(Wav2Vec2PreTrainedModel):
         self.config = config
         self.wav2vec2 = Wav2Vec2Model(config)
-        self.age = ModelHead(config, 1)
-        self.gender = ModelHead(config, 3)
         self.init_weights()
     def forward(
@@ -67,7 +68,6 @@ class AgeGenderModel(Wav2Vec2PreTrainedModel):
 # load model from hub
-device = 0 if torch.cuda.is_available() else "cpu"
 processor = Wav2Vec2Processor.from_pretrained(model_name)
 model = AgeGenderModel.from_pretrained(model_name)
@@ -105,14 +105,14 @@ def process_func(x: np.ndarray, sampling_rate: int) -> dict:
 def recognize(input_file):
     # sampling_rate, signal = input_microphone
     # signal = signal.astype(np.float32, order="C") / 32768.0
-    if input_fileis not None:
-        signal, sampling_rate = audiofile.read(input_file, duration=duration)
-    else:
         raise gr.Error(
             "No audio file submitted! "
             "Please upload or record an audio file "
             "before submitting your request."
         )
     # Resample to sampling rate supported byu the models
     target_rate = 16000
     signal = audresample.resample(signal, sampling_rate, target_rate)

 import audresample
+device = 0 if torch.cuda.is_available() else "cpu"
 model_name = "audeering/wav2vec2-large-robust-24-ft-age-gender"
 duration = 1  # limit processing of audio
+class AgeGenderHead(nn.Module):
+    r"""Age-gender model head."""
     def __init__(self, config, num_labels):
 class AgeGenderModel(Wav2Vec2PreTrainedModel):
+    r"""Age-gender recognition model."""
     def __init__(self, config):
         self.config = config
         self.wav2vec2 = Wav2Vec2Model(config)
+        self.age = AgeGenderHead(config, 1)
+        self.gender = AgeGenderHead(config, 3)
         self.init_weights()
     def forward(
 # load model from hub
 processor = Wav2Vec2Processor.from_pretrained(model_name)
 model = AgeGenderModel.from_pretrained(model_name)
 def recognize(input_file):
     # sampling_rate, signal = input_microphone
     # signal = signal.astype(np.float32, order="C") / 32768.0
+    if input_file is None:
         raise gr.Error(
             "No audio file submitted! "
             "Please upload or record an audio file "
             "before submitting your request."
         )
+    signal, sampling_rate = audiofile.read(input_file, duration=duration)
     # Resample to sampling rate supported byu the models
     target_rate = 16000
     signal = audresample.resample(signal, sampling_rate, target_rate)