Spaces:

balacoon
/

revoice

Running

clementruhm commited on Jun 2, 2023

Commit

3ebe5da

•

1 Parent(s): 9c7fd5c

More fixes to handling stereo, add note on wave

Files changed (2) hide show

app.py CHANGED Viewed

@@ -47,6 +47,9 @@ def main():
             2. Select an audio sample that represents the target voice you want to convert to.
             3. Click the "Convert" button and listen to the result!
             If you are interested to plug in Voice Conversion
             service into your own application, don't hesitate to get in touch with us at
             [[email protected]](mailto:[email protected])

             2. Select an audio sample that represents the target voice you want to convert to.
             3. Click the "Convert" button and listen to the result!
+            If providing your own audio files, please use WAVE PCM.
+            Service works with 16kHz, 16 bit, mono audio.
             If you are interested to plug in Voice Conversion
             service into your own application, don't hesitate to get in touch with us at
             [[email protected]](mailto:[email protected])

vc_service_request.py CHANGED Viewed

@@ -35,8 +35,14 @@ def prepare_audio(audio: Tuple[int, np.ndarray]) -> np.ndarray:
     if wav.ndim == 2:
         # average channels
-        wav = np.mean(wav, axis=0, keepdims=False)
     # ensure proper sampling rate
     if sr != 16000:
         wav = (wav / 32768.0).astype(np.float)
@@ -94,6 +100,8 @@ def vc_service_request(
     """
     src = prepare_audio(source_audio)
     tgt = prepare_audio(target_audio)
     if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
         # input is way too long, dont return anything
         return

     if wav.ndim == 2:
         # average channels
+        if wav.shape[0] == 2:
+            wav = np.mean(wav, axis=0, keepdims=False)
+        if wav.shape[1] == 2:
+            wav = np.mean(wav, axis=1, keepdims=False)
+    if wav.ndim != 1:
+        return None
     # ensure proper sampling rate
     if sr != 16000:
         wav = (wav / 32768.0).astype(np.float)
     """
     src = prepare_audio(source_audio)
     tgt = prepare_audio(target_audio)
+    if not src or not tgt:
+        return
     if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
         # input is way too long, dont return anything
         return