CosyVoice / main.py
iflamed's picture
add download models script and fastapi server to serve tts
fff6f9f unverified
raw
history blame
1.25 kB
import io,time
from fastapi import FastAPI, Response
from fastapi.responses import HTMLResponse
from cosyvoice.cli.cosyvoice import CosyVoice
import torchaudio
cosyvoice = CosyVoice('pretrained_models/CosyVoice-300M-SFT')
# sft usage
print(cosyvoice.list_avaliable_spks())
app = FastAPI()
@app.get("/api/voice/tts")
async def tts(query: str, role: str):
start = time.process_time()
output = cosyvoice.inference_sft(query, role)
end = time.process_time()
print("infer time:", end-start, "seconds")
buffer = io.BytesIO()
torchaudio.save(buffer, output['tts_speech'], 22050, format="wav")
buffer.seek(0)
return Response(content=buffer.read(-1), media_type="audio/wav")
@app.get("/api/voice/roles")
async def roles():
return {"roles": cosyvoice.list_avaliable_spks()}
@app.get("/", response_class=HTMLResponse)
async def root():
return """
<!DOCTYPE html>
<html lang=zh-cn>
<head>
<meta charset=utf-8>
<title>Api information</title>
</head>
<body>
Get the supported tones from the Roles API first, then enter the tones and textual content in the TTS API for synthesis. <a href='./docs'>Documents of API</a>
</body>
</html>
"""