Spaces:
Paused
Paused
sachin
commited on
Commit
·
d638e5c
1
Parent(s):
2994754
fix-kannada
Browse files- tts_api.py +35 -10
tts_api.py
CHANGED
@@ -25,7 +25,7 @@ EXAMPLES = [
|
|
25 |
{
|
26 |
"audio_name": "PAN_F (Happy)",
|
27 |
"audio_url": "https://github.com/AI4Bharat/IndicF5/raw/refs/heads/main/prompts/PAN_F_HAPPY_00002.wav",
|
28 |
-
"ref_text": "ਇੱਕ ਗ੍ਰਾਹਕ ਨੇ ਸਾਡੀ ਬੇਮਿਸਾਲ ਸੇਵਾ ਬਾਰੇ ਦਿਲੋਂਗਵਾਹੀ ਦਿੱਤੀ ਜਿਸ ਨਾਲ ਸਾਨੂੰ
|
29 |
"synth_text": "ನಾನು ಯಾವುದೇ ಚಿಂತೆ ಇಲ್ಲದೆ ನನ್ನ ಸ್ನೇಹಿತರನ್ನು ನನ್ನ ಆಟೋಮೊಬೈಲ್ ತಜ್ಞರ ಬಳಿಗೆ ಕಳುಹಿಸುತ್ತೇನೆ ಏಕೆಂದರೆ ಅವರು ಖಂಡಿತವಾಗಿಯೂ ಅವರ ಎಲ್ಲಾ ಅಗತ್ಯಗಳನ್ನು ಪೂರೈಸುತ್ತಾರೆ ಎಂದು ನನಗೆ ಗೊತ್ತು."
|
30 |
},
|
31 |
{
|
@@ -43,7 +43,7 @@ EXAMPLES = [
|
|
43 |
{
|
44 |
"audio_name": "MAR_M (WIKI)",
|
45 |
"audio_url": "https://github.com/AI4Bharat/IndicF5/raw/refs/heads/main/prompts/MAR_M_WIKI_00001.wav",
|
46 |
-
"ref_text": "या प्रथाला एकोणीसशे पंचातर ईसवी पासून भारतीय दंड संहिताची धारा चारशे अठ्ठावीस आणि चारशे एकोणतीसच्या
|
47 |
"synth_text": "ಜೀವಾಣು ಕೊಳೆತ. ನಾನು ಅಹಮದ್ನಗರ ಜಿಲ್ಲೆಯ ರಾಹುರಿ ಗ್ರಾಮದಿಂದ ಬಾಳಾಸಾಹೇಬ್ ಜಾಧವ್ ಮಾತನಾಡುತ್ತಿದ್ದೇನೆ. ನನ್ನ ದಾಳಿಂಬೆ ತೋಟದಲ್ಲಿ ಜೀವಾಣು ಕೊಳೆತ ಹೆಚ್ಚಾಗಿ ಕಾಣಿಸುತ್ತಿದೆ. ಸ್ಟ್ರೆಪ್ಟೋಸೈಕ್ಲಿನ್ ಮತ್ತು ಕಾಪರ್ ಆಕ್ಸಿಕ್ಲೋರೈಡ್ ಸಿಂಪಡಣೆಗೆ ಸೂಕ್ತ ಪ್ರಮಾಣ ಎಷ್ಟು?"
|
48 |
},
|
49 |
{
|
@@ -54,12 +54,15 @@ EXAMPLES = [
|
|
54 |
},
|
55 |
]
|
56 |
|
57 |
-
# Pydantic
|
58 |
class SynthesizeRequest(BaseModel):
|
59 |
text: str # Text to synthesize (expected in Kannada)
|
60 |
ref_audio_name: str # Dropdown of audio names from EXAMPLES
|
61 |
ref_text: Optional[str] = None # Optional, defaults to example ref_text if not provided
|
62 |
|
|
|
|
|
|
|
63 |
# Function to load audio from URL
|
64 |
def load_audio_from_url(url: str):
|
65 |
response = requests.get(url)
|
@@ -109,19 +112,38 @@ def synthesize_speech(text: str, ref_audio_name: str, ref_text: str):
|
|
109 |
buffer.seek(0)
|
110 |
|
111 |
return buffer
|
112 |
-
|
113 |
-
#
|
114 |
@app.post("/synthesize", response_class=StreamingResponse)
|
115 |
async def synthesize(request: SynthesizeRequest):
|
116 |
-
# If ref_text is not provided, it will default to the example's ref_text in the synthesize_speech function
|
117 |
audio_buffer = synthesize_speech(request.text, request.ref_audio_name, request.ref_text)
|
118 |
-
|
119 |
-
# Return the audio as a streaming response
|
120 |
return StreamingResponse(
|
121 |
audio_buffer,
|
122 |
media_type="audio/wav",
|
123 |
headers={"Content-Disposition": "attachment; filename=synthesized_speech.wav"}
|
124 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
# Root endpoint with basic info
|
127 |
@app.get("/")
|
@@ -129,12 +151,15 @@ async def root():
|
|
129 |
return {
|
130 |
"message": "Welcome to IndicF5 Text-to-Speech API",
|
131 |
"description": "High-quality TTS for Indian languages with output in Kannada. Provide Kannada text for synthesis.",
|
132 |
-
"
|
|
|
|
|
|
|
133 |
"available_ref_audio_names": [ex["audio_name"] for ex in EXAMPLES],
|
134 |
"example_synth_texts_in_kannada": {ex["audio_name"]: ex["synth_text"] for ex in EXAMPLES}
|
135 |
}
|
136 |
|
137 |
-
# Run the app
|
138 |
if __name__ == "__main__":
|
139 |
import uvicorn
|
140 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
25 |
{
|
26 |
"audio_name": "PAN_F (Happy)",
|
27 |
"audio_url": "https://github.com/AI4Bharat/IndicF5/raw/refs/heads/main/prompts/PAN_F_HAPPY_00002.wav",
|
28 |
+
"ref_text": "ਇੱਕ ਗ੍ਰਾਹਕ ਨੇ ਸਾਡੀ ਬੇਮਿਸਾਲ ਸੇਵਾ ਬਾਰੇ ਦਿਲੋਂਗਵਾਹੀ ਦਿੱਤੀ ਜਿਸ ਨਾਲ ਸਾਨੂੰ ਅਨੰದ ಮਹಿಸೂਸ ਹੋਇਆ।",
|
29 |
"synth_text": "ನಾನು ಯಾವುದೇ ಚಿಂತೆ ಇಲ್ಲದೆ ನನ್ನ ಸ್ನೇಹಿತರನ್ನು ನನ್ನ ಆಟೋಮೊಬೈಲ್ ತಜ್ಞರ ಬಳಿಗೆ ಕಳುಹಿಸುತ್ತೇನೆ ಏಕೆಂದರೆ ಅವರು ಖಂಡಿತವಾಗಿಯೂ ಅವರ ಎಲ್ಲಾ ಅಗತ್ಯಗಳನ್ನು ಪೂರೈಸುತ್ತಾರೆ ಎಂದು ನನಗೆ ಗೊತ್ತು."
|
30 |
},
|
31 |
{
|
|
|
43 |
{
|
44 |
"audio_name": "MAR_M (WIKI)",
|
45 |
"audio_url": "https://github.com/AI4Bharat/IndicF5/raw/refs/heads/main/prompts/MAR_M_WIKI_00001.wav",
|
46 |
+
"ref_text": "या प्रथाला एकोणीसशे पंचातर ईसवी पासून भारतीय दंड संहिताची धारा चारशे अठ्ठावीस आणि चारशे एकोणतीसच्या अंतर्गत निषেধ केला.",
|
47 |
"synth_text": "ಜೀವಾಣು ಕೊಳೆತ. ನಾನು ಅಹಮದ್ನಗರ ಜಿಲ್ಲೆಯ ರಾಹುರಿ ಗ್ರಾಮದಿಂದ ಬಾಳಾಸಾಹೇಬ್ ಜಾಧವ್ ಮಾತನಾಡುತ್ತಿದ್ದೇನೆ. ನನ್ನ ದಾಳಿಂಬೆ ತೋಟದಲ್ಲಿ ಜೀವಾಣು ಕೊಳೆತ ಹೆಚ್ಚಾಗಿ ಕಾಣಿಸುತ್ತಿದೆ. ಸ್ಟ್ರೆಪ್ಟೋಸೈಕ್ಲಿನ್ ಮತ್ತು ಕಾಪರ್ ಆಕ್ಸಿಕ್ಲೋರೈಡ್ ಸಿಂಪಡಣೆಗೆ ಸೂಕ್ತ ಪ್ರಮಾಣ ಎಷ್ಟು?"
|
48 |
},
|
49 |
{
|
|
|
54 |
},
|
55 |
]
|
56 |
|
57 |
+
# Pydantic models for request bodies
|
58 |
class SynthesizeRequest(BaseModel):
|
59 |
text: str # Text to synthesize (expected in Kannada)
|
60 |
ref_audio_name: str # Dropdown of audio names from EXAMPLES
|
61 |
ref_text: Optional[str] = None # Optional, defaults to example ref_text if not provided
|
62 |
|
63 |
+
class KannadaSynthesizeRequest(BaseModel):
|
64 |
+
text: str # Text to synthesize (must be in Kannada)
|
65 |
+
|
66 |
# Function to load audio from URL
|
67 |
def load_audio_from_url(url: str):
|
68 |
response = requests.get(url)
|
|
|
112 |
buffer.seek(0)
|
113 |
|
114 |
return buffer
|
115 |
+
'''
|
116 |
+
# Original endpoint
|
117 |
@app.post("/synthesize", response_class=StreamingResponse)
|
118 |
async def synthesize(request: SynthesizeRequest):
|
|
|
119 |
audio_buffer = synthesize_speech(request.text, request.ref_audio_name, request.ref_text)
|
|
|
|
|
120 |
return StreamingResponse(
|
121 |
audio_buffer,
|
122 |
media_type="audio/wav",
|
123 |
headers={"Content-Disposition": "attachment; filename=synthesized_speech.wav"}
|
124 |
)
|
125 |
+
'''
|
126 |
+
# New endpoint for Kannada-only synthesis
|
127 |
+
@app.post("/audio/speech", response_class=StreamingResponse)
|
128 |
+
async def synthesize_kannada(request: KannadaSynthesizeRequest):
|
129 |
+
# Use the Kannada example as fixed reference
|
130 |
+
kannada_example = next(ex for ex in EXAMPLES if ex["audio_name"] == "KAN_F (Happy)")
|
131 |
+
|
132 |
+
if not request.text.strip():
|
133 |
+
raise HTTPException(status_code=400, detail="Text to synthesize cannot be empty.")
|
134 |
+
|
135 |
+
# Use the fixed Kannada reference audio and text
|
136 |
+
audio_buffer = synthesize_speech(
|
137 |
+
text=request.text,
|
138 |
+
ref_audio_name="KAN_F (Happy)",
|
139 |
+
ref_text=kannada_example["ref_text"]
|
140 |
+
)
|
141 |
+
|
142 |
+
return StreamingResponse(
|
143 |
+
audio_buffer,
|
144 |
+
media_type="audio/wav",
|
145 |
+
headers={"Content-Disposition": "attachment; filename=synthesized_kannada_speech.wav"}
|
146 |
+
)
|
147 |
|
148 |
# Root endpoint with basic info
|
149 |
@app.get("/")
|
|
|
151 |
return {
|
152 |
"message": "Welcome to IndicF5 Text-to-Speech API",
|
153 |
"description": "High-quality TTS for Indian languages with output in Kannada. Provide Kannada text for synthesis.",
|
154 |
+
"endpoints": {
|
155 |
+
"/synthesize": "General synthesis with customizable reference audio",
|
156 |
+
"/synthesize_kannada": "Kannada-specific synthesis using KAN_F (Happy) as reference"
|
157 |
+
},
|
158 |
"available_ref_audio_names": [ex["audio_name"] for ex in EXAMPLES],
|
159 |
"example_synth_texts_in_kannada": {ex["audio_name"]: ex["synth_text"] for ex in EXAMPLES}
|
160 |
}
|
161 |
|
162 |
+
# Run the app
|
163 |
if __name__ == "__main__":
|
164 |
import uvicorn
|
165 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|