タイムスタンプを取得する方法
お世話になります。
タイムスタンプを取得できるコードをお教えいただくことはできますでしょうか?
import torch
from transformers import pipeline
# config
model_id = "efwkjn/whisper-ja-anime-v0.1"
torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
device = "cuda:0" if torch.cuda.is_available() else "cpu"
model_kwargs = {"attn_implementation": "sdpa"} if torch.cuda.is_available() else {}
generate_kwargs = {"language": "ja", "task": "transcribe"}
# load model
pipe = pipeline(
"automatic-speech-recognition",
model=model_id,
torch_dtype=torch_dtype,
device=device,
model_kwargs=model_kwargs
)
# run inference
result = pipe("audio.mp3", return_timestamps=True, generate_kwargs=generate_kwargs)
print(result["chunks"])
Thank you for replying.
I would like to know how to fix the following error when I ran the code you provided.
TypeError Traceback (most recent call last)
Cell In[7], line 12
9 generate_kwargs = {"language": "ja", "task": "transcribe"}
11 # load model
---> 12 pipe = pipeline(
13 "automatic-speech-recognition",
14 model=model_id,
15 torch_dtype=torch_dtype,
16 device=device,
17 model_kwargs=model_kwargs
18 )
20 # run inference
21 result = pipe("audio.mp3", return_timestamps=True, generate_kwargs=generate_kwargs)
File /usr/local/lib/python3.11/dist-packages/transformers/pipelines/init.py:788, in pipeline(task, model, config, tokenizer, feature_extractor, image_processor, framework, revision, use_fast, use_auth_token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)
784 # Infer the framework from the model
785 # Forced if framework already defined, inferred if it's None
786 # Will load the correct model if possible
787 model_classes = {"tf": targeted_task["tf"], "pt": targeted_task["pt"]}
--> 788 framework, model = infer_framework_load_model(
789 model,
790 model_classes=model_classes,
791 config=config,
792 framework=framework,
793 task=task,
794 **hub_kwargs,
795 **model_kwargs,
796 )
798 model_config = model.config
799 hub_kwargs["_commit_hash"] = model.config._commit_hash
File /usr/local/lib/python3.11/dist-packages/transformers/pipelines/base.py:269, in infer_framework_load_model(model, config, model_classes, task, framework, **model_kwargs)
263 logger.warning(
264 "Model might be a PyTorch model (ending with .bin
) but PyTorch is not available. "
265 "Trying to load the model with Tensorflow."
266 )
268 try:
--> 269 model = model_class.from_pretrained(model, **kwargs)
270 if hasattr(model, "eval"):
271 model = model.eval()
File /usr/local/lib/python3.11/dist-packages/transformers/models/auto/auto_factory.py:484, in _BaseAutoModelClass.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
482 elif type(config) in cls._model_mapping.keys():
483 model_class = _get_model_class(config, cls._model_mapping)
--> 484 return model_class.from_pretrained(
485 pretrained_model_name_or_path, *model_args, config=config, **hub_kwargs, **kwargs
486 )
487 raise ValueError(
488 f"Unrecognized configuration class {config.class} for this kind of AutoModel: {cls.name}.\n"
489 f"Model type should be one of {', '.join(c.name for c in cls._model_mapping.keys())}."
490 )
File /usr/local/lib/python3.11/dist-packages/transformers/modeling_utils.py:2675, in PreTrainedModel.from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
2672 init_contexts.append(init_empty_weights())
2674 with ContextManagers(init_contexts):
-> 2675 model = cls(config, *model_args, **model_kwargs)
2677 # Check first if we are from_pt
2678 if use_keep_in_fp32_modules:
TypeError: WhisperForConditionalGeneration.init() got an unexpected keyword argument 'attn_implementation'
import torch
from transformers import pipeline
# config
model_id = "efwkjn/whisper-ja-anime-v0.1"
torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
device = "cuda:0" if torch.cuda.is_available() else "cpu"
generate_kwargs = {"language": "ja", "task": "transcribe"}
# load model
pipe = pipeline(
"automatic-speech-recognition",
model=model_id,
torch_dtype=torch_dtype,
device=device,
)
# run inference
result = pipe("audio.mp3", return_timestamps=True, generate_kwargs=generate_kwargs)
print(result["chunks"])
transformersを更新するかこれ試してみて
返信ありがとうございます。
transformersを最新に更新しましたがなぜかうまくいきませんでしたが、示していただいたコードでうまくいきました!