channels: [256, 256] dropout: 0.05 attention_head_dim: 64 n_blocks: 1 num_mid_blocks: 2 num_heads: 2 act_fn: snakebeta