Afrinetwork7 commited on
Commit
3cf82c2
1 Parent(s): 4d56027

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -8
app.py CHANGED
@@ -1,15 +1,20 @@
1
- from fastapi import FastAPI, Form, HTTPException
2
  from fastapi.responses import JSONResponse, FileResponse
3
- import uvicorn
4
  from pydantic import BaseModel
5
  import numpy as np
6
  import io
7
  import soundfile as sf
8
  import base64
 
 
 
 
 
 
 
9
  from asr import transcribe, ASR_LANGUAGES
10
  from tts import synthesize, TTS_LANGUAGES
11
  from lid import identify
12
- import logging
13
 
14
  # Configure logging
15
  logging.basicConfig(level=logging.INFO)
@@ -17,21 +22,26 @@ logger = logging.getLogger(__name__)
17
 
18
  app = FastAPI(title="MMS: Scaling Speech Technology to 1000+ languages")
19
 
 
 
 
 
 
20
  class TTSRequest(BaseModel):
21
  text: str
22
  language: str
23
  speed: float
24
 
25
- class AudioRequest(BaseModel):
26
- audio: str # Base64 encoded audio data
27
- language: str
28
-
29
  @app.post("/transcribe")
30
  async def transcribe_audio(request: AudioRequest):
31
  try:
32
  audio_bytes = base64.b64decode(request.audio)
33
  audio_array, sample_rate = sf.read(io.BytesIO(audio_bytes))
34
 
 
 
 
 
35
  result = transcribe(audio_array, request.language)
36
  return JSONResponse(content={"transcription": result})
37
  except Exception as e:
@@ -83,4 +93,4 @@ async def get_tts_languages():
83
  return JSONResponse(content=TTS_LANGUAGES)
84
  except Exception as e:
85
  logger.error(f"Error in get_tts_languages: {str(e)}")
86
- raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
 
1
+ from fastapi import FastAPI, HTTPException
2
  from fastapi.responses import JSONResponse, FileResponse
 
3
  from pydantic import BaseModel
4
  import numpy as np
5
  import io
6
  import soundfile as sf
7
  import base64
8
+ import logging
9
+ import torch
10
+ import librosa
11
+ from transformers import Wav2Vec2ForCTC, AutoProcessor
12
+ from pathlib import Path
13
+
14
+ # Import functions from other modules
15
  from asr import transcribe, ASR_LANGUAGES
16
  from tts import synthesize, TTS_LANGUAGES
17
  from lid import identify
 
18
 
19
  # Configure logging
20
  logging.basicConfig(level=logging.INFO)
 
22
 
23
  app = FastAPI(title="MMS: Scaling Speech Technology to 1000+ languages")
24
 
25
+ # Define request models
26
+ class AudioRequest(BaseModel):
27
+ audio: str # Base64 encoded audio data
28
+ language: str
29
+
30
  class TTSRequest(BaseModel):
31
  text: str
32
  language: str
33
  speed: float
34
 
 
 
 
 
35
  @app.post("/transcribe")
36
  async def transcribe_audio(request: AudioRequest):
37
  try:
38
  audio_bytes = base64.b64decode(request.audio)
39
  audio_array, sample_rate = sf.read(io.BytesIO(audio_bytes))
40
 
41
+ # Convert to mono if stereo
42
+ if len(audio_array.shape) > 1:
43
+ audio_array = audio_array.mean(axis=1)
44
+
45
  result = transcribe(audio_array, request.language)
46
  return JSONResponse(content={"transcription": result})
47
  except Exception as e:
 
93
  return JSONResponse(content=TTS_LANGUAGES)
94
  except Exception as e:
95
  logger.error(f"Error in get_tts_languages: {str(e)}")
96
+ raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")