{
  "audio_encoder_config": {
    "n_mels": 80,
    "n_audio_ctx": 1500,
    "n_audio_state": 384,
    "n_audio_head": 6,
    "n_audio_layer": 4
  },
  "text_decoder_config": {
    "n_vocab": 51864,
    "n_text_ctx": 448,
    "n_text_state": 384,
    "n_text_head": 6,
    "n_text_layer": 4
  }
}