File size: 1,076 Bytes
253101d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
from transformers import PretrainedConfig
class TyphoonAudioConfig(PretrainedConfig):
model_type = "typhoonaudio"
def __init__(
self,
whisper_path="biodatlab/whisper-th-large-v3-combined", # or local path
llm_path="scb10x/llama-3-typhoon-v1.5-8b-instruct", # or local path
speech_qformer_token_num=1,
speech_qformer_layer=2,
second_per_frame=0.333333,
second_stride=0.333333,
lora=True,
lora_alpha=32,
lora_rank=8,
lora_dropout=0.0,
dtype="float16",
**kwargs
):
self.whisper_path = whisper_path
self.llm_path = llm_path
self.speech_qformer_token_num = speech_qformer_token_num
self.speech_qformer_layer = speech_qformer_layer
self.second_per_frame = second_per_frame
self.second_stride = second_stride
self.lora = lora
self.lora_alpha = lora_alpha
self.lora_rank = lora_rank
self.lora_dropout = lora_dropout
self.dtype = dtype
super().__init__(**kwargs)
|