import torch
from transformers import pipeline
import librosa
import os
os.environ["http_proxy"] = "http://10.76.5.191:7890"
os.environ["https_proxy"] = "http://10.76.5.191:7890"
device = "cuda:0" if torch.cuda.is_available() else "cpu"

pipe = pipeline(
  "automatic-speech-recognition",
  model="openai/whisper-tiny.en",
  chunk_length_s=30,
  device=device,
)

audio,sr = librosa.load("/mnt/data3/cbh/SynTalker/demo/test3/1_wayne_0_2_2.wav",sr=None)
sample = audio

prediction = pipe(sample.copy(), batch_size=8)["text"]

# # we can also return timestamps for the predictions
# prediction = pipe(sample.copy(), batch_size=8, return_timestamps=True)["chunks"]