Spaces:

Pranjal12345
/

Whisper_with_FastApi

Runtime error

App Files Files Community

Pranjal12345 commited on Sep 27, 2023

Commit

71f4c16

1 Parent(s): b86f76f

dsds

Browse files

Files changed (2) hide show

main.py +146 -80
requirements.txt +1 -2

main.py CHANGED Viewed

@@ -1,57 +1,100 @@
-# #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
-# # from fastapi import FastAPI
-# # from transformers import WhisperProcessor, WhisperForConditionalGeneration
-# # import librosa
-# # import uvicorn
-# # app = FastAPI()
-# # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
-# # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
-# # model.config.forced_decoder_ids = None
-# # audio_file_path = "output.mp3"
-# # audio_data, _ = librosa.load(audio_file_path, sr=16000)
-# # @app.get("/")
-# # def transcribe_audio():
-# #         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
-# #         predicted_ids = model.generate(input_features)
-# #         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-# #         return {"transcription": transcription[0]}
-# # if __name__ == "__main__":
-# #     import uvicorn
-# #     uvicorn.run(app, host="0.0.0.0", port=8000)
-# # if __name__=='__main__':
-# #     uvicorn.run('main:app', reload=True)
-# #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
-# #curl -X GET "http://localhost:8000/?text=I%20like%20Apples"
-# #http://localhost:8000/?text=I%20like%20Apples
-# # from fastapi import FastAPI
 # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
 # # import librosa
 # # import uvicorn
 # # app = FastAPI()
@@ -60,15 +103,30 @@
 # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
 # # model.config.forced_decoder_ids = None
-# # # Path to your audio file
-# # audio_file_path = "/home/pranjal/Downloads/output.mp3"
-# # # Read the audio file
-# # audio_data, _ = librosa.load(audio_file_path, sr=16000)
 # # @app.get("/")
-# # def transcribe_audio():
 # #         # Process the audio data using the Whisper processor
 # #         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
 # #         # Generate transcription
@@ -76,18 +134,19 @@
 # #         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
 # #         return {"transcription": transcription[0]}
-# # if __name__ == "__main__":
-# #     import uvicorn
-# #     uvicorn.run(app, host="0.0.0.0", port=8000)
-# # if __name__=='__app__':
-# #     uvicorn.run('main:app', reload=True)
 # from fastapi import FastAPI, UploadFile, File
 # from transformers import WhisperProcessor, WhisperForConditionalGeneration
@@ -98,10 +157,14 @@
 # app = FastAPI()
-# # Load model and processor
-# processor = WhisperProcessor.from_pretrained("openai/whisper-small")
-# model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
-# model.config.forced_decoder_ids = None
 # @app.get("/")
 # def read_root():
@@ -126,45 +189,44 @@
 #         audio_data = await audio_file.read()
 #         # Process the audio data using the Whisper processor
-#         audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
-#         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
-#         # Generate transcription
-#         predicted_ids = model.generate(input_features)
-#         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-#         return {"transcription": transcription[0]}
 #     except Exception as e:
 #         return {"error": str(e)}
-# if __name__ == "__app__":
-#     uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
 #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
 from fastapi import FastAPI, UploadFile, File
 from transformers import WhisperProcessor, WhisperForConditionalGeneration
-import librosa
 from fastapi.responses import HTMLResponse
-import uvicorn
 import io
-app = FastAPI()
-# # Load model and processor
-# processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
-# model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
-# model.config.forced_decoder_ids = None
-import whisper
-model = whisper.load_model("small")
 @app.get("/")
 def read_root():
@@ -184,23 +246,27 @@ def read_root():
 @app.post("/transcribe")
 async def transcribe_audio(audio_file: UploadFile):
-    try:
-        # Read the uploaded audio file
         audio_data = await audio_file.read()
-        # Process the audio data using the Whisper processor
-        # audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
-        # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
-        # # Generate transcription
-        # predicted_ids = model.generate(input_features)
-        # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
-        result = model.transcribe("/home/pranjal/Downloads/rt.mp3")
-        return {"transcription": result['text']}
-    except Exception as e:
-        return {"error": str(e)}
-# if __name__ == "__app__":
-#     uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)

+# # #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
+# # # from fastapi import FastAPI
+# # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
+# # # import librosa
+# # # import uvicorn
+# # # app = FastAPI()
+# # # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
+# # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
+# # # model.config.forced_decoder_ids = None
+# # # audio_file_path = "output.mp3"
+# # # audio_data, _ = librosa.load(audio_file_path, sr=16000)
+# # # @app.get("/")
+# # # def transcribe_audio():
+# # #         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
+# # #         predicted_ids = model.generate(input_features)
+# # #         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+# # #         return {"transcription": transcription[0]}
+# # # if __name__ == "__main__":
+# # #     import uvicorn
+# # #     uvicorn.run(app, host="0.0.0.0", port=8000)
+# # # if __name__=='__main__':
+# # #     uvicorn.run('main:app', reload=True)
+# # #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
+# # #curl -X GET "http://localhost:8000/?text=I%20like%20Apples"
+# # #http://localhost:8000/?text=I%20like%20Apples
+# # # from fastapi import FastAPI
+# # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
+# # # import librosa
+# # # import uvicorn
+# # # app = FastAPI()
+# # # # Load model and processor
+# # # processor = WhisperProcessor.from_pretrained("openai/whisper-small")
+# # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
+# # # model.config.forced_decoder_ids = None
+# # # # Path to your audio file
+# # # audio_file_path = "/home/pranjal/Downloads/output.mp3"
+# # # # Read the audio file
+# # # audio_data, _ = librosa.load(audio_file_path, sr=16000)
+# # # @app.get("/")
+# # # def transcribe_audio():
+# # #         # Process the audio data using the Whisper processor
+# # #         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
+# # #         # Generate transcription
+# # #         predicted_ids = model.generate(input_features)
+# # #         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+# # #         return {"transcription": transcription[0]}
+# # # if __name__ == "__main__":
+# # #     import uvicorn
+# # #     uvicorn.run(app, host="0.0.0.0", port=8000)
+# # # if __name__=='__app__':
+# # #     uvicorn.run('main:app', reload=True)
+# # from fastapi import FastAPI, UploadFile, File
 # # from transformers import WhisperProcessor, WhisperForConditionalGeneration
 # # import librosa
+# # from fastapi.responses import HTMLResponse
 # # import uvicorn
+# # import io
 # # app = FastAPI()
 # # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
 # # model.config.forced_decoder_ids = None
 # # @app.get("/")
+# # def read_root():
+# #     html_form = """
+# #     <html>
+# #         <body>
+# #             <h2>ASR Transcription</h2>
+# #             <form action="/transcribe" method="post" enctype="multipart/form-data">
+# #                 <label for="audio_file">Upload an audio file (MP3 or WAV):</label>
+# #                 <input type="file" id="audio_file" name="audio_file" accept=".mp3, .wav" required><br><br>
+# #                 <input type="submit" value="Transcribe">
+# #             </form>
+# #         </body>
+# #     </html>
+# #     """
+# #     return HTMLResponse(content=html_form, status_code=200)
+# # @app.post("/transcribe")
+# # async def transcribe_audio(audio_file: UploadFile):
+# #     try:
+# #         # Read the uploaded audio file
+# #         audio_data = await audio_file.read()
 # #         # Process the audio data using the Whisper processor
+# #         audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
 # #         input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
 # #         # Generate transcription
 # #         transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
 # #         return {"transcription": transcription[0]}
+# #     except Exception as e:
+# #         return {"error": str(e)}
+# # if __name__ == "__app__":
+# #     uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
+# #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
 # from fastapi import FastAPI, UploadFile, File
 # from transformers import WhisperProcessor, WhisperForConditionalGeneration
 # app = FastAPI()
+# # # Load model and processor
+# # processor = WhisperProcessor.from_pretrained("openai/whisper-medium")
+# # model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-medium")
+# # model.config.forced_decoder_ids = None
+# import whisper
+# model = whisper.load_model("small")
 # @app.get("/")
 # def read_root():
 #         audio_data = await audio_file.read()
 #         # Process the audio data using the Whisper processor
+#         # audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
+#         # input_features = processor(audio_data.tolist(), return_tensors="pt").input_features
+#         # # Generate transcription
+#         # predicted_ids = model.generate(input_features)
+#         # transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+#         result = model.transcribe("/home/pranjal/Downloads/rt.mp3")
+#         return {"transcription": result['text']}
 #     except Exception as e:
 #         return {"error": str(e)}
+# # if __name__ == "__app__":
+# #     uvicorn.run(app, host="0.0.0.0", port=8000, reload=True)
 #uvicorn app:app --host 0.0.0.0 --port 8000 --reload
 from fastapi import FastAPI, UploadFile, File
 from transformers import WhisperProcessor, WhisperForConditionalGeneration
 from fastapi.responses import HTMLResponse
+import librosa
 import io
+import re
+html_tag_remover = re.compile(r'<[^>]+>')
+def remove_tags(text):
+  return html_tag_remover.sub('', text)
+app = FastAPI()
+processor = WhisperProcessor.from_pretrained("openai/whisper-small")
+model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-small")
+model.config.forced_decoder_ids = None
+chunk_duration = 30
+overlap_duration = 5
 @app.get("/")
 def read_root():
 @app.post("/transcribe")
 async def transcribe_audio(audio_file: UploadFile):
         audio_data = await audio_file.read()
+        audio_data, _ = librosa.load(io.BytesIO(audio_data), sr=16000)
+        transcription = []
+        start = 0
+        while start < len(audio_data):
+            end = start + chunk_duration * 16000
+            audio_chunk = audio_data[start:end]
+            input_features = processor(audio_chunk.tolist(), return_tensors="pt").input_features
+            predicted_ids = model.generate(input_features, max_length=1000)
+            chunk_transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
+            transcription.extend(chunk_transcription)
+            start = end - overlap_duration * 16000
+        final_transcription = " ".join(transcription)
+        final_transcription = remove_tags(final_transcription)
+        return {"transcription": final_transcription}

requirements.txt CHANGED Viewed

@@ -6,5 +6,4 @@ uvicorn
 transformers
 Torch
 python-multipart
-git+https://github.com/openai/whisper.git
-ffmpeg

 transformers
 Torch
 python-multipart
+re