{ "model_type": "wav2vec2", "architectures": ["Wav2Vec2ForSequenceClassification"], "input_channels": 1, "conv_layers": [ { "in_channels": 1, "out_channels": 32, "kernel_size": 3, "stride": 1, "padding": 1 }, { "in_channels": 32, "out_channels": 64, "kernel_size": 3, "stride": 1, "padding": 1 } ], "pooling_layers": [ { "kernel_size": 2, "stride": 2 }, { "kernel_size": 2, "stride": 2 } ], "fc_layers": [ { "in_features": 1763968, // Adjust this based on your actual input size "out_features": 128 }, { "in_features": 128, "out_features": 1 } ], "dropout_rate": 0.25, "activation_function": "relu", "output_size": 1 }