hackergeek98 commited on
Commit
1a8a786
·
verified ·
1 Parent(s): 95d9215

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +49 -0
README.md CHANGED
@@ -14,5 +14,54 @@ library_name: transformers
14
  this model trained on validation segment of data for one epoch with 0.05 loss and tested on test segment of data with 0.07 loss
15
 
16
  how to use the model in colab:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
 
 
14
  this model trained on validation segment of data for one epoch with 0.05 loss and tested on test segment of data with 0.07 loss
15
 
16
  how to use the model in colab:
17
+ # Install required packages
18
+ !pip install torch torchaudio transformers pydub google-colab
19
+
20
+ import torch
21
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
22
+ from pydub import AudioSegment
23
+ import os
24
+ from google.colab import files
25
+
26
+ # Load the model and processor
27
+ model_id = "hackergeek98/tinyyyy_whisper"
28
+ device = "cuda" if torch.cuda.is_available() else "cpu"
29
+
30
+ model = AutoModelForSpeechSeq2Seq.from_pretrained(model_id).to(device)
31
+ processor = AutoProcessor.from_pretrained(model_id)
32
+
33
+ # Create pipeline
34
+ whisper_pipe = pipeline(
35
+ "automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=0 if torch.cuda.is_available() else -1
36
+ )
37
+
38
+ # Convert audio to WAV format
39
+ def convert_to_wav(audio_path):
40
+ audio = AudioSegment.from_file(audio_path)
41
+ wav_path = "converted_audio.wav"
42
+ audio.export(wav_path, format="wav")
43
+ return wav_path
44
+
45
+ # Transcribe an audio file and save as text
46
+ def transcribe(audio_path):
47
+ wav_path = convert_to_wav(audio_path)
48
+ result = whisper_pipe(wav_path)
49
+ os.remove(wav_path) # Cleanup temporary file
50
+
51
+ # Save transcription to a text file
52
+ text_path = "transcription.txt"
53
+ with open(text_path, "w") as f:
54
+ f.write(result["text"])
55
+
56
+ return text_path
57
+
58
+ # Upload and process audio in Colab
59
+ uploaded = files.upload()
60
+ audio_file = list(uploaded.keys())[0]
61
+ transcription_file = transcribe(audio_file)
62
+
63
+ # Download the transcription file
64
+ files.download(transcription_file)
65
+
66
 
67