resume: false device: cuda use_amp: false seed: 0 dataset_repo_id: iantc104/gaze_real_occluded_insertion video_backend: pyav training: offline_steps: 100000 num_workers: 4 batch_size: 8 eval_freq: -1 log_freq: 100 save_checkpoint: true save_freq: 10000 online_steps: 0 online_rollout_n_episodes: 1 online_rollout_batch_size: 1 online_steps_between_rollouts: 1 online_sampling_ratio: 0.5 online_env_seed: null online_buffer_capacity: null online_buffer_seed_size: 0 do_online_rollout_async: false image_transforms: enable: false max_num_transforms: 3 random_order: false brightness: weight: 1 min_max: - 0.8 - 1.2 contrast: weight: 1 min_max: - 0.8 - 1.2 saturation: weight: 1 min_max: - 0.5 - 1.5 hue: weight: 1 min_max: - -0.05 - 0.05 sharpness: weight: 1 min_max: - 0.8 - 1.2 lr: 1.0e-05 lr_backbone: 1.0e-05 weight_decay: 0.0001 grad_clip_norm: 10 delta_timestamps: action: - 0.0 - 0.030303030303030304 - 0.06060606060606061 - 0.09090909090909091 - 0.12121212121212122 - 0.15151515151515152 - 0.18181818181818182 - 0.21212121212121213 - 0.24242424242424243 - 0.2727272727272727 - 0.30303030303030304 - 0.3333333333333333 - 0.36363636363636365 - 0.3939393939393939 - 0.42424242424242425 - 0.45454545454545453 - 0.48484848484848486 - 0.5151515151515151 - 0.5454545454545454 - 0.5757575757575758 - 0.6060606060606061 - 0.6363636363636364 - 0.6666666666666666 - 0.696969696969697 - 0.7272727272727273 - 0.7575757575757576 - 0.7878787878787878 - 0.8181818181818182 - 0.8484848484848485 - 0.8787878787878788 - 0.9090909090909091 - 0.9393939393939394 - 0.9696969696969697 - 1.0 - 1.0303030303030303 - 1.0606060606060606 - 1.0909090909090908 - 1.121212121212121 - 1.1515151515151516 - 1.1818181818181819 - 1.2121212121212122 - 1.2424242424242424 - 1.2727272727272727 - 1.303030303030303 - 1.3333333333333333 - 1.3636363636363635 - 1.393939393939394 - 1.4242424242424243 - 1.4545454545454546 - 1.4848484848484849 eval: n_episodes: 50 batch_size: 10 use_async_envs: false wandb: enable: true disable_artifact: true project: real_occluded_insertion notes: '' fps: 33 env: name: real_world task: null state_dim: 21 action_dim: 21 fps: ${fps} override_dataset_stats: observation.images.left_eye_cam: mean: - - - 0.485 - - - 0.456 - - - 0.406 std: - - - 0.229 - - - 0.224 - - - 0.225 observation.images.right_eye_cam: mean: - - - 0.485 - - - 0.456 - - - 0.406 std: - - - 0.229 - - - 0.224 - - - 0.225 policy: name: act n_obs_steps: 1 chunk_size: 50 n_action_steps: 50 input_shapes: observation.images.left_eye_cam: - 3 - 480 - 640 observation.images.right_eye_cam: - 3 - 480 - 640 observation.state: - ${env.state_dim} output_shapes: action: - ${env.action_dim} input_normalization_modes: observation.images.left_eye_cam: mean_std observation.images.right_eye_cam: mean_std observation.state: mean_std output_normalization_modes: action: mean_std image_size: - 336 - 448 vision_backbone: dinov2 pretrained_backbone_weights: dinov2_vits14_reg freeze_backbone: true pre_norm: false dim_model: 512 n_heads: 8 dim_feedforward: 3200 feedforward_activation: relu n_encoder_layers: 4 n_decoder_layers: 1 use_vae: true latent_dim: 32 n_vae_encoder_layers: 4 temporal_ensemble_coeff: null dropout: 0.1 kl_weight: 10.0