File size: 1,076 Bytes
253101d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from transformers import PretrainedConfig

class TyphoonAudioConfig(PretrainedConfig):
    model_type = "typhoonaudio"

    def __init__(
        self,
        whisper_path="biodatlab/whisper-th-large-v3-combined",  # or local path
        llm_path="scb10x/llama-3-typhoon-v1.5-8b-instruct",  # or local path
        speech_qformer_token_num=1,
        speech_qformer_layer=2,
        second_per_frame=0.333333,
        second_stride=0.333333,
        lora=True,
        lora_alpha=32,
        lora_rank=8,
        lora_dropout=0.0,
        dtype="float16",
        **kwargs      
    ):
        self.whisper_path = whisper_path
        self.llm_path = llm_path
        self.speech_qformer_token_num = speech_qformer_token_num
        self.speech_qformer_layer = speech_qformer_layer
        self.second_per_frame = second_per_frame
        self.second_stride = second_stride
        self.lora = lora
        self.lora_alpha = lora_alpha
        self.lora_rank = lora_rank
        self.lora_dropout = lora_dropout
        self.dtype = dtype
        super().__init__(**kwargs)