The generated audio is a bit small. How can I increase the volume in the code?
#74
by
mayixb
- opened
samples, sample_rate = kokoro.create(
text, voice=voices, speed=0.9, lang="en-us"
)
Hi, I fixed that by processing audio data like this:
import numpy as np
def improve_volume(audio_data):
audio=normalize_audio(data=audio_data, target_db=0) # normalize signal to to prepare for compression
audio=compress_audio(data=audio, threshold_db=-4, ratio=2, knee_width=5) # compress signal to get dynamic range. (google: Dynamic Range Compression)
audio=normalize_audio(data=audio, target_db=-1) # normalize again to increase volume after compression
return audio
def normalize_audio(data, target_db=0):
target_amplitude = 10 ** (target_db / 20)
# Normalize audio
peak = np.max(np.abs(data))
return data * (target_amplitude / peak)
def compress_audio(data, threshold_db, ratio, knee_width):
"""Apply dynamic range compression to audio."""
threshold = 10 ** (threshold_db / 20)
knee = 10 ** (knee_width / 20)
compressed = np.zeros_like(data)
for i in range(len(data)):
amplitude = abs(data[i])
if amplitude < threshold:
compressed[i] = data[i] # No compression for quiet parts
elif amplitude > threshold + knee:
compressed[i] = np.sign(data[i]) * (threshold + (amplitude - threshold) / ratio)
else:
# Smooth knee transition
knee_start = threshold
knee_end = threshold + knee
blend = (amplitude - knee_start) / (knee_end - knee_start)
compressed[i] = np.sign(data[i]) * (knee_start + blend * ((amplitude - knee_start) / ratio))
return compressed