Ved Gupta commited on
Commit
8f082c1
·
1 Parent(s): 35e685d

Added Transcribe support

Browse files
.gitignore CHANGED
@@ -9,4 +9,10 @@ build/
9
  .vscode/
10
  *.log
11
  *.swp
12
- .DS_Store
 
 
 
 
 
 
 
9
  .vscode/
10
  *.log
11
  *.swp
12
+ .DS_Store
13
+ /*/*.bin
14
+ */*.bin
15
+
16
+ */*.wav
17
+ */*.mp3
18
+ */*.txt
README.md CHANGED
@@ -84,6 +84,15 @@ The project structure is organized as follows:
84
  | /api/v1/users/{user_id}/ | delete_user | DELETE |
85
  | /api/v1/transcribe/ | post_audio | POST |
86
 
 
 
 
 
 
 
 
 
 
87
 
88
  ## Run this Project
89
 
 
84
  | /api/v1/users/{user_id}/ | delete_user | DELETE |
85
  | /api/v1/transcribe/ | post_audio | POST |
86
 
87
+ ## Install Dependecy
88
+ ```bash
89
+ # Install ffmpeg for Audio Processing
90
+ sudo apt install ffmpeg
91
+
92
+ # Install Python Package
93
+ pip install -r requirements.txt
94
+
95
+ ```
96
 
97
  ## Run this Project
98
 
app/api/endpoints/transcribe.py CHANGED
@@ -1,22 +1,30 @@
1
- from fastapi import APIRouter, File, UploadFile, Request, Header
 
 
2
  from pydantic import BaseModel
3
 
 
 
4
  router = APIRouter()
5
 
6
 
7
- class AudioFile(BaseModel):
 
8
  filename: str
9
- content_type: str
10
 
11
 
12
- @router.post("/")
13
  async def post_audio(
14
- request: Request, file: UploadFile = File(...), Authorization: str = Header(...)
 
 
15
  ):
16
- """Receive audio file and save it to disk."""
17
- print(f"Authorization header: {Authorization}")
18
-
19
- with open(file.filename, "wb") as f:
20
- f.write(file.file.read())
21
 
22
- return AudioFile(filename=file.filename, content_type=file.content_type)
 
 
 
 
 
 
 
1
+ from typing import Annotated, List, Union
2
+
3
+ from fastapi import APIRouter, File, UploadFile, Request, Header, HTTPException
4
  from pydantic import BaseModel
5
 
6
+ from app.utils.utils import save_audio_file, transcribeFile
7
+
8
  router = APIRouter()
9
 
10
 
11
+ class Transcription(BaseModel):
12
+ text: str
13
  filename: str
 
14
 
15
 
16
+ @router.post("/", response_model=Transcription)
17
  async def post_audio(
18
+ request: Request,
19
+ file: UploadFile = File(...),
20
+ Authentication: Annotated[Union[str, None], Header()] = None,
21
  ):
22
+ print(f"Authorization header: {Authentication}")
 
 
 
 
23
 
24
+ try:
25
+ """Receive audio file and save it to disk. and then transcribe the audio file"""
26
+ file_path = save_audio_file(file)
27
+ data = transcribeFile(file_path)
28
+ return Transcription(filename=file.filename, text=data)
29
+ except Exception as e:
30
+ raise HTTPException(status_code=400, detail=e.__str__())
app/utils/utils.py CHANGED
@@ -1,4 +1,7 @@
1
  import json
 
 
 
2
 
3
 
4
  def get_all_routes(app):
@@ -28,3 +31,38 @@ def print_routes(app):
28
  + f"{', '.join(route['methods'])}"
29
  )
30
  print("\n")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import json
2
+ import subprocess
3
+ import uuid
4
+ import logging
5
 
6
 
7
  def get_all_routes(app):
 
31
  + f"{', '.join(route['methods'])}"
32
  )
33
  print("\n")
34
+
35
+
36
+ def transcribeFile(path: str = None, model="ggml-model-whisper-tiny.en-q5_1.bin"):
37
+ """./binary/whisper -m models/ggml-tiny.en.bin -f Rev.mp3 -nt --output-text out1.txt"""
38
+ try:
39
+ if path is None:
40
+ raise Exception("No path provided")
41
+ outputFilePath: str = f"transcribe/{uuid.uuid4()}.txt"
42
+ command: str = f"./binary/whisper -m models/{model} -f {path} -nt --output-text {outputFilePath}"
43
+ execute_command(command)
44
+ f = open(outputFilePath, "r")
45
+ data = f.read()
46
+ f.close()
47
+ return data
48
+ except Exception as e:
49
+ logging.error(e)
50
+ raise Exception(e.__str__())
51
+
52
+
53
+ def execute_command(command: str) -> str:
54
+ try:
55
+ result = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT)
56
+ return result.decode("utf-8").strip()
57
+ except subprocess.CalledProcessError as e:
58
+ logging.error(e.output.decode("utf-8").strip())
59
+ raise Exception("Error while transcribing")
60
+
61
+
62
+ def save_audio_file(file=None):
63
+ if file is None:
64
+ return ""
65
+ path = f"audio/{uuid.uuid4()}.mp3"
66
+ with open(path, "wb") as f:
67
+ f.write(file.file.read())
68
+ return path
audio/.gitkeep ADDED
File without changes
binary/whisper ADDED
Binary file (867 kB). View file
 
models/.gitkeep ADDED
File without changes
transcribe/.gitkeep ADDED
File without changes