hackergeek98
/

tinyyyy_whisper

Automatic Speech Recognition

Model card Files Files and versions Community

hackergeek98 commited on Mar 24

Commit

92cc3f4

·

verified ·

1 Parent(s): b3e6cf8

Update README.md

Files changed (1) hide show

README.md +42 -1

README.md CHANGED Viewed

@@ -10,4 +10,45 @@ base_model:
 - openai/whisper-tiny
 pipeline_tag: automatic-speech-recognition
 library_name: transformers
----

 - openai/whisper-tiny
 pipeline_tag: automatic-speech-recognition
 library_name: transformers
+---
+how to use the model in colab:
+!pip install torch torchaudio transformers librosa gradio
+from transformers import WhisperProcessor, WhisperForConditionalGeneration
+import torch
+# Load your fine-tuned Whisper model and processor
+model_name = "hackergeek98/tinyyyy_whisper"
+processor = WhisperProcessor.from_pretrained(model_name)
+model = WhisperForConditionalGeneration.from_pretrained(model_name)
+# Force the model to transcribe in Persian
+model.config.forced_decoder_ids = processor.get_decoder_prompt_ids(language="fa", task="transcribe")
+# Move model to GPU if available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model.to(device)
+import librosa
+def transcribe_audio(audio_file):
+    # Load audio file using librosa (supports multiple formats)
+    audio_data, sampling_rate = librosa.load(audio_file, sr=16000)  # Resample to 16kHz
+    # Preprocess the audio
+    inputs = processor(audio_data, sampling_rate=sampling_rate, return_tensors="pt").input_features.to(device)
+    # Generate transcription
+    with torch.no_grad():
+        predicted_ids = model.generate(inputs)
+    # Decode the transcription
+    transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+    return transcription
+from google.colab import files
+# Upload an audio file
+uploaded = files.upload()
+audio_file = list(uploaded.keys())[0]
+# Transcribe the audio
+transcription = transcribe_audio(audio_file)
+print("Transcription:", transcription)