Spaces:

balacoon
/

revoice

Running

App Files Files Community

clementruhm commited on May 28, 2023

Commit

802e11f

1 Parent(s): 8f9b0ff

Initial commit of voice conversion demo

Browse files

Files changed (15) hide show

.gitignore +1 -0
README.md +3 -3
__init__.py +0 -0
app.py +94 -0
references/cate_blanchett.wav +0 -0
references/george_clooney.wav +0 -0
references/james_earl_jones.wav +0 -0
references/kratos.wav +0 -0
references/meryl_streep.wav +0 -0
references/mike_rowe.wav +0 -0
references/nikole_kidman.wav +0 -0
references/sam_elliott.wav +0 -0
requirements.txt +4 -0
setup.cfg +12 -0
vc_service_request.py +95 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ __pycache__

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
 title: Voice Conversion Service
-emoji: 🐠
 colorFrom: blue
-colorTo: blue
 sdk: gradio
 sdk_version: 3.32.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Voice Conversion Service
+emoji: 💬
 colorFrom: blue
+colorTo: yellow
 sdk: gradio
 sdk_version: 3.32.0
 app_file: app.py
 pinned: false
 ---
+Interactive demo for Voice Conversion service by Balacoon.

__init__.py ADDED Viewed

File without changes

app.py ADDED Viewed

	@@ -0,0 +1,94 @@

+"""
+Copyright 2023 Balacoon
+Voice Conversion service interactive demo
+"""
+import glob
+import logging
+import os
+import gradio as gr
+from vc_service_request import vc_service_request
+script_dir = os.path.dirname(os.path.abspath(__file__))
+def main():
+    logging.basicConfig(level=logging.INFO)
+    with gr.Blocks() as demo:
+        gr.Markdown(
+            """
+            <h1 align="center">Balacoon🦝 Voice Conversion</h1>
+            Welcome to the live demo of Balacoon's Voice Conversion service.
+            Check out our [website](https://balacoon.com/demo/#voice-conversion)
+            to learn more.
+            Voice Conversion allows you to transform your own voice
+            into the voice of another person using just a single sample.
+            For optimal results, we recommend using clean audio files in English.
+            Here's how it works:
+            1. Begin by recording your voice.
+            2. Select an audio sample that represents the target voice you want to convert to.
+            3. Click the "Convert" button and listen to the result!
+            If you are interested to plug in Voice Conversion
+            service into your own application, don't hesitate to get in touch with us at
+            [[email protected]](mailto:[email protected])
+            """
+        )
+        with gr.Row():
+            with gr.Column(variant="panel"):
+                src_audio_mic = gr.Audio(source="microphone", label="Record your voice")
+                src_audio_file = gr.Audio(
+                    source="upload", label="Or upload audio to convert"
+                )
+            with gr.Column(variant="panel"):
+                tgt_audio_file = gr.Audio(
+                    source="upload", label="Select audio with target voice"
+                )
+                tgt_examples_paths = glob.glob(
+                    os.path.join(script_dir, "references", "*.wav")
+                )
+                gr.Examples(
+                    tgt_examples_paths,
+                    inputs=[tgt_audio_file],
+                )
+        with gr.Row():
+            convert_btn = gr.Button("Convert")
+        with gr.Row():
+            result_audio = gr.Audio()
+        def voice_conversion(src_from_mic_, src_from_file_, tgt_from_file_):
+            """
+            helper function which checks where source come from
+            """
+            src_ = None
+            if src_from_mic_:
+                src_ = src_from_mic_
+            elif src_from_file_:
+                src_ = src_from_file_
+            tgt_ = tgt_from_file_
+            if not src_ or not tgt_:
+                logging.warning("source or target are not provided")
+                return
+            return vc_service_request(src_, tgt_)
+        convert_btn.click(
+            voice_conversion,
+            inputs=[src_audio_mic, src_audio_file, tgt_audio_file],
+            outputs=result_audio,
+        )
+    demo.queue(concurrency_count=1).launch()
+if __name__ == "__main__":
+    main()

references/cate_blanchett.wav ADDED Viewed

Binary file (401 kB). View file

references/george_clooney.wav ADDED Viewed

Binary file (343 kB). View file

references/james_earl_jones.wav ADDED Viewed

Binary file (402 kB). View file

references/kratos.wav ADDED Viewed

Binary file (411 kB). View file

references/meryl_streep.wav ADDED Viewed

Binary file (398 kB). View file

references/mike_rowe.wav ADDED Viewed

Binary file (453 kB). View file

references/nikole_kidman.wav ADDED Viewed

Binary file (439 kB). View file

references/sam_elliott.wav ADDED Viewed

Binary file (461 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+numpy==1.23.2
+resampy==0.4.2
+streamlit==1.22.0
+websockets==10.3

setup.cfg ADDED Viewed

	@@ -0,0 +1,12 @@

+[flake8]
+max_complexity=10
+per-file-ignores=__init__.py:F401,F403
+ignore = E203,W503
+max-line-length=119
+[isort]
+profile=black
+line_length=119
+[mypy]
+ignore_missing_imports = True

vc_service_request.py ADDED Viewed

	@@ -0,0 +1,95 @@

+"""
+Copyright 2023 Balacoon
+contains implementation
+for voice conversion request
+"""
+import asyncio
+import base64
+import hashlib
+import json
+import ssl
+import time
+from typing import Tuple
+import numpy as np
+import resampy
+import streamlit as st
+import websockets
+def prepare_audio(audio: Tuple[int, np.ndarray]) -> np.ndarray:
+    """
+    ensures that audio is in int16 format, 16khz mono
+    """
+    sr, wav = audio
+    # ensure proper type
+    if wav.dtype == np.int32:
+        max_val = np.max(np.abs(wav))
+        mult = (32767.0 / 2**31) if max_val > 32768 else 1.0
+        wav = (wav.astype(np.float32) * mult).astype(np.int16)
+    elif wav.dtype == np.float32 or wav.dtype == np.float64:
+        mult = 32767.0 if np.max(np.abs(wav)) <= 1.0 else 1.0
+        wav = (wav * mult).astype(np.int16)
+    # ensure proper sampling rate
+    if sr != 16000:
+        wav = (wav / 32768.0).astype(np.float)
+        wav = resampy.resample(wav, sr, 16000)
+        wav = (wav * 32768.0).astype(np.int16)
+    return wav
+def create_signature(api_secret: str) -> str:
+    """
+    helper function that creates signature,
+    required to authentificate the request
+    """
+    int_time = int(time.time() / 1000)
+    signature_input = (st["api_secret"] + str(int_time)).encode()
+    signature = hashlib.sha256(signature_input).hexdigest()
+    return signature
+async def async_service_request(source: np.ndarray, target: np.ndarray) -> np.ndarray:
+    ssl_context = ssl.create_default_context()
+    async with websockets.connect(
+        st["endpoint"], close_timeout=1024, ssl=ssl_context
+    ) as websocket:
+        request_dict = {
+            "source": base64.b64encode(source.tobytes()).decode("utf-8"),
+            "target": base64.b64encode(target.tobytes()).decode("utf-8"),
+            "api_key": st["api_key"],
+            "signature": create_signature(),
+        }
+        request = json.dumps(request_dict)
+        await websocket.send(request)
+        # read reply
+        result_lst = []
+        while True:
+            try:
+                data = await websocket.recv()
+                result_lst.append(np.frombuffer(data, dtype="int16"))
+            except websockets.exceptions.ConnectionClosed:
+                break
+            if data is None:
+                break
+        result = np.concatenate(result_lst) if result_lst else None
+        return result
+def vc_service_request(
+    source_audio: Tuple[int, np.ndarray], target_audio: Tuple[int, np.ndarray]
+) -> Tuple[int, np.ndarray]:
+    """
+    prepares audio (has to be 16khz mono)
+    and runs request to a voice conversion service
+    """
+    src = prepare_audio(source_audio)
+    tgt = prepare_audio(target_audio)
+    res = asyncio.run(async_service_request(src, tgt))
+    return 16000, res