Update README.md
Browse filesupdate README after refactor generation()
README.md
CHANGED
@@ -25,6 +25,8 @@ More details can be found in our [technical report](https://arxiv.org/abs/2409.1
|
|
25 |
|
26 |
```python
|
27 |
from transformers import AutoModel
|
|
|
|
|
28 |
|
29 |
# Initialize from the trained model
|
30 |
model = AutoModel.from_pretrained(
|
@@ -35,14 +37,23 @@ model = AutoModel.from_pretrained(
|
|
35 |
model.to("cuda")
|
36 |
model.eval()
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# Run generation
|
39 |
prompt_pattern="<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n<Speech><SpeechHere></Speech> {}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
40 |
response = model.generate(
|
41 |
-
|
42 |
prompt="transcribe this audio",
|
43 |
prompt_pattern=prompt_pattern,
|
44 |
do_sample=False,
|
45 |
-
|
46 |
repetition_penalty=1.1,
|
47 |
num_beams=1,
|
48 |
# temperature=0.4,
|
|
|
25 |
|
26 |
```python
|
27 |
from transformers import AutoModel
|
28 |
+
import soundfile as sf
|
29 |
+
import librosa
|
30 |
|
31 |
# Initialize from the trained model
|
32 |
model = AutoModel.from_pretrained(
|
|
|
37 |
model.to("cuda")
|
38 |
model.eval()
|
39 |
|
40 |
+
# read a wav file (it needs to be in 16 kHz and clipped to 30 seconds)
|
41 |
+
audio, sr = sf.read("path_to_your_audio.wav")
|
42 |
+
if len(audio.shape) == 2:
|
43 |
+
audio = audio[:, 0]
|
44 |
+
if len(audio) > 30 * sr:
|
45 |
+
audio = audio[: 30 * sr]
|
46 |
+
if sr != 16000:
|
47 |
+
audio = librosa.resample(audio, orig_sr=sr, target_sr=16000, res_type="fft")
|
48 |
+
|
49 |
# Run generation
|
50 |
prompt_pattern="<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n<Speech><SpeechHere></Speech> {}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
|
51 |
response = model.generate(
|
52 |
+
audio=audio,
|
53 |
prompt="transcribe this audio",
|
54 |
prompt_pattern=prompt_pattern,
|
55 |
do_sample=False,
|
56 |
+
max_new_tokens=512,
|
57 |
repetition_penalty=1.1,
|
58 |
num_beams=1,
|
59 |
# temperature=0.4,
|