potsawee commited on
Commit
4041e77
1 Parent(s): 40ca02b

Update README.md

Browse files

update README after refactor generation()

Files changed (1) hide show
  1. README.md +13 -2
README.md CHANGED
@@ -25,6 +25,8 @@ More details can be found in our [technical report](https://arxiv.org/abs/2409.1
25
 
26
  ```python
27
  from transformers import AutoModel
 
 
28
 
29
  # Initialize from the trained model
30
  model = AutoModel.from_pretrained(
@@ -35,14 +37,23 @@ model = AutoModel.from_pretrained(
35
  model.to("cuda")
36
  model.eval()
37
 
 
 
 
 
 
 
 
 
 
38
  # Run generation
39
  prompt_pattern="<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n<Speech><SpeechHere></Speech> {}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
40
  response = model.generate(
41
- wav_path="path_to_your_audio.wav",
42
  prompt="transcribe this audio",
43
  prompt_pattern=prompt_pattern,
44
  do_sample=False,
45
- max_length=1200,
46
  repetition_penalty=1.1,
47
  num_beams=1,
48
  # temperature=0.4,
 
25
 
26
  ```python
27
  from transformers import AutoModel
28
+ import soundfile as sf
29
+ import librosa
30
 
31
  # Initialize from the trained model
32
  model = AutoModel.from_pretrained(
 
37
  model.to("cuda")
38
  model.eval()
39
 
40
+ # read a wav file (it needs to be in 16 kHz and clipped to 30 seconds)
41
+ audio, sr = sf.read("path_to_your_audio.wav")
42
+ if len(audio.shape) == 2:
43
+ audio = audio[:, 0]
44
+ if len(audio) > 30 * sr:
45
+ audio = audio[: 30 * sr]
46
+ if sr != 16000:
47
+ audio = librosa.resample(audio, orig_sr=sr, target_sr=16000, res_type="fft")
48
+
49
  # Run generation
50
  prompt_pattern="<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n<Speech><SpeechHere></Speech> {}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
51
  response = model.generate(
52
+ audio=audio,
53
  prompt="transcribe this audio",
54
  prompt_pattern=prompt_pattern,
55
  do_sample=False,
56
+ max_new_tokens=512,
57
  repetition_penalty=1.1,
58
  num_beams=1,
59
  # temperature=0.4,