IliaLarchenko commited on
Commit
2355d61
·
verified ·
1 Parent(s): 3e85f05

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +32 -6
  2. config.json +120 -46
  3. model.safetensors +1 -1
  4. train_config.json +280 -0
README.md CHANGED
@@ -15,24 +15,50 @@ pipeline_tag: robotics
15
 
16
  Read more about the model and implementation details in the [DOT Policy repository](https://github.com/IliaLarchenko/dot_policy).
17
 
18
- This model is trained using the [LeRobot library](https://huggingface.co/lerobot) and achieves state-of-the-art results on behavior cloning on the PushT keypoints dataset. It achieves 84.5% success rate (and 0.964 average max reward) vs. ~78% for the previous state-of-the-art model or 69% that I managed to reproduce using VQ-BET implementation in LeRobot.
19
 
20
  This result is achieved without the checkpoint selection. If you are interested in an even better model with a success rate of ~94% (but harder to reproduce as it requires some parameters tuning and checkpoint selection), please refer to [this model](https://huggingface.co/IliaLarchenko/dot_pusht_keypoints_best)
21
 
22
- You can use this model by installing LeRobot from [this branch](https://github.com/IliaLarchenko/lerobot/tree/dot)
23
 
24
  To train the model:
25
 
26
  ```bash
27
- python lerobot/scripts/train.py policy=dot_pusht_keypoints env=pusht env.gym.obs_type=environment_state_agent_pos
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  ```
29
 
30
  To evaluate the model:
31
 
32
  ```bash
33
- python lerobot/scripts/eval.py -p IliaLarchenko/dot_pusht_keypoints eval.n_episodes=1000 eval.batch_size=100 seed=1000000
 
 
 
 
 
 
 
34
  ```
35
 
36
  Model size:
37
- - Total parameters: 2.1m
38
- - Trainable parameters: 2.1m
 
 
 
 
15
 
16
  Read more about the model and implementation details in the [DOT Policy repository](https://github.com/IliaLarchenko/dot_policy).
17
 
18
+ This model is trained using the [LeRobot library](https://huggingface.co/lerobot) and achieves state-of-the-art results on behavior cloning on the PushT keypoints dataset. It achieves 88.1% success rate (and 0.969 average max reward) vs. ~78% for the previous state-of-the-art model or 69% that I managed to reproduce using VQ-BET implementation in LeRobot.
19
 
20
  This result is achieved without the checkpoint selection. If you are interested in an even better model with a success rate of ~94% (but harder to reproduce as it requires some parameters tuning and checkpoint selection), please refer to [this model](https://huggingface.co/IliaLarchenko/dot_pusht_keypoints_best)
21
 
22
+ You can use this model by installing LeRobot from [this branch](https://github.com/IliaLarchenko/lerobot/tree/dot_new_config)
23
 
24
  To train the model:
25
 
26
  ```bash
27
+ python lerobot/scripts/train.py \
28
+ --policy.type=dot \
29
+ --dataset.repo_id=lerobot/pusht_keypoints \
30
+ --env.type=pusht \
31
+ --env.task=PushT-v0 \
32
+ --output_dir=outputs/train/pusht_keyponts \
33
+ --batch_size=24 \
34
+ --log_freq=1000 \
35
+ --eval_freq=10000 \
36
+ --save_freq=50000 \
37
+ --offline.steps=1000000 \
38
+ --seed=100000 \
39
+ --wandb.enable=true \
40
+ --num_workers=24 \
41
+ --use_amp=true \
42
+ --device=cuda \
43
+ --policy.return_every_n=2
44
  ```
45
 
46
  To evaluate the model:
47
 
48
  ```bash
49
+ python lerobot/scripts/eval.py \
50
+ --policy.path=IliaLarchenko/dot_pusht_keypoints \
51
+ --env.type=pusht \
52
+ --env.task=PushT-v0 \
53
+ --eval.n_episodes=1000 \
54
+ --eval.batch_size=100 \
55
+ --env.obs_type=environment_state_agent_pos \
56
+ --seed=1000000
57
  ```
58
 
59
  Model size:
60
+ - Total parameters: 14.1m
61
+ - Trainable parameters: 2.9m
62
+
63
+
64
+ Note: the results are even slightly better than ones reported in the repository. There was a big update in the LeRobot library, I did all the original training and evaluation using the older version of LeRobot. This model was trained using the new version of the library with the same parameters. The older version of the library that was used for the original experiments is available in the [this branch](https://github.com/IliaLarchenko/lerobot/tree/dot)
config.json CHANGED
@@ -1,48 +1,122 @@
1
  {
2
- "alpha": 0.75,
3
- "crop_scale": 1.0,
4
- "dim_feedforward": 512,
5
- "dim_model": 128,
6
- "dropout": 0.1,
7
- "inference_horizon": 20,
8
- "input_normalization_modes": {
9
- "observation.environment_state": "min_max",
10
- "observation.state": "min_max"
11
- },
12
- "input_shapes": {
13
- "observation.environment_state": [
14
- 16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ],
16
- "observation.state": [
17
- 2
18
- ]
19
- },
20
- "lookback_aug": 5,
21
- "lookback_obs_steps": 10,
22
- "lora_rank": 20,
23
- "merge_lora": true,
24
- "n_decoder_layers": 8,
25
- "n_heads": 8,
26
- "n_obs_steps": 3,
27
- "noise_decay": 0.999995,
28
- "output_normalization_modes": {
29
- "action": "min_max"
30
- },
31
- "output_shapes": {
32
- "action": [
33
- 2
34
- ]
35
- },
36
- "pre_norm": true,
37
- "predict_every_n": 1,
38
- "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
39
- "rescale_shape": [
40
- 96,
41
- 96
42
- ],
43
- "return_every_n": 2,
44
- "state_noise": 0.01,
45
- "train_alpha": 0.9,
46
- "train_horizon": 20,
47
- "vision_backbone": "resnet18"
48
- }
 
1
  {
2
+ "type": "dot",
3
+ "n_obs_steps": 3,
4
+ "normalization_mapping": {
5
+ "VISUAL": "MEAN_STD",
6
+ "STATE": "MIN_MAX",
7
+ "ENV": "MIN_MAX",
8
+ "ACTION": "MIN_MAX"
9
+ },
10
+ "input_features": {
11
+ "observation.state": {
12
+ "type": "STATE",
13
+ "shape": [
14
+ 2
15
+ ]
16
+ },
17
+ "observation.environment_state": {
18
+ "type": "ENV",
19
+ "shape": [
20
+ 16
21
+ ]
22
+ }
23
+ },
24
+ "output_features": {
25
+ "action": {
26
+ "type": "ACTION",
27
+ "shape": [
28
+ 2
29
+ ]
30
+ }
31
+ },
32
+ "train_horizon": 20,
33
+ "inference_horizon": 20,
34
+ "lookback_obs_steps": 10,
35
+ "lookback_aug": 5,
36
+ "override_dataset_stats": false,
37
+ "new_dataset_stats": {
38
+ "action": {
39
+ "max": [
40
+ 512.0,
41
+ 512.0
42
+ ],
43
+ "min": [
44
+ 0.0,
45
+ 0.0
46
+ ]
47
+ },
48
+ "observation.environment_state": {
49
+ "max": [
50
+ 512.0,
51
+ 512.0,
52
+ 512.0,
53
+ 512.0,
54
+ 512.0,
55
+ 512.0,
56
+ 512.0,
57
+ 512.0,
58
+ 512.0,
59
+ 512.0,
60
+ 512.0,
61
+ 512.0,
62
+ 512.0,
63
+ 512.0,
64
+ 512.0,
65
+ 512.0
66
+ ],
67
+ "min": [
68
+ 0.0,
69
+ 0.0,
70
+ 0.0,
71
+ 0.0,
72
+ 0.0,
73
+ 0.0,
74
+ 0.0,
75
+ 0.0,
76
+ 0.0,
77
+ 0.0,
78
+ 0.0,
79
+ 0.0,
80
+ 0.0,
81
+ 0.0,
82
+ 0.0,
83
+ 0.0
84
+ ]
85
+ },
86
+ "observation.state": {
87
+ "max": [
88
+ 512.0,
89
+ 512.0
90
+ ],
91
+ "min": [
92
+ 0.0,
93
+ 0.0
94
+ ]
95
+ }
96
+ },
97
+ "vision_backbone": "resnet18",
98
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
99
+ "pre_norm": true,
100
+ "lora_rank": 20,
101
+ "merge_lora": false,
102
+ "dim_model": 128,
103
+ "n_heads": 8,
104
+ "dim_feedforward": 512,
105
+ "n_decoder_layers": 8,
106
+ "rescale_shape": [
107
+ 96,
108
+ 96
109
  ],
110
+ "crop_scale": 1.0,
111
+ "state_noise": 0.01,
112
+ "noise_decay": 0.999995,
113
+ "dropout": 0.1,
114
+ "alpha": 0.75,
115
+ "train_alpha": 0.9,
116
+ "predict_every_n": 1,
117
+ "return_every_n": 2,
118
+ "optimizer_lr": 0.0001,
119
+ "optimizer_min_lr": 0.0001,
120
+ "optimizer_lr_cycle_steps": 300000,
121
+ "optimizer_weight_decay": 1e-05
122
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b45aaac6d363fb26f405462dd901b45d1b436b686c163c9b4d2b71085bdc1aa5
3
  size 8523444
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7a88ed5e7a822b3501cbca499af46a156cf4bd52b7ee6bb60249b697a28c652
3
  size 8523444
train_config.json ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": {
3
+ "repo_id": "lerobot/pusht_keypoints",
4
+ "episodes": null,
5
+ "image_transforms": {
6
+ "enable": false,
7
+ "max_num_transforms": 3,
8
+ "random_order": false,
9
+ "tfs": {
10
+ "brightness": {
11
+ "weight": 1.0,
12
+ "type": "ColorJitter",
13
+ "kwargs": {
14
+ "brightness": [
15
+ 0.8,
16
+ 1.2
17
+ ]
18
+ }
19
+ },
20
+ "contrast": {
21
+ "weight": 1.0,
22
+ "type": "ColorJitter",
23
+ "kwargs": {
24
+ "contrast": [
25
+ 0.8,
26
+ 1.2
27
+ ]
28
+ }
29
+ },
30
+ "saturation": {
31
+ "weight": 1.0,
32
+ "type": "ColorJitter",
33
+ "kwargs": {
34
+ "saturation": [
35
+ 0.5,
36
+ 1.5
37
+ ]
38
+ }
39
+ },
40
+ "hue": {
41
+ "weight": 1.0,
42
+ "type": "ColorJitter",
43
+ "kwargs": {
44
+ "hue": [
45
+ -0.05,
46
+ 0.05
47
+ ]
48
+ }
49
+ },
50
+ "sharpness": {
51
+ "weight": 1.0,
52
+ "type": "SharpnessJitter",
53
+ "kwargs": {
54
+ "sharpness": [
55
+ 0.5,
56
+ 1.5
57
+ ]
58
+ }
59
+ }
60
+ }
61
+ },
62
+ "local_files_only": false,
63
+ "use_imagenet_stats": true,
64
+ "video_backend": "pyav"
65
+ },
66
+ "env": {
67
+ "type": "pusht",
68
+ "task": "PushT-v0",
69
+ "fps": 10,
70
+ "features": {
71
+ "action": {
72
+ "type": "ACTION",
73
+ "shape": [
74
+ 2
75
+ ]
76
+ },
77
+ "agent_pos": {
78
+ "type": "STATE",
79
+ "shape": [
80
+ 2
81
+ ]
82
+ },
83
+ "environment_state": {
84
+ "type": "ENV",
85
+ "shape": [
86
+ 16
87
+ ]
88
+ }
89
+ },
90
+ "features_map": {
91
+ "action": "action",
92
+ "agent_pos": "observation.state",
93
+ "environment_state": "observation.environment_state",
94
+ "pixels": "observation.image"
95
+ },
96
+ "episode_length": 300,
97
+ "obs_type": "environment_state_agent_pos",
98
+ "render_mode": "rgb_array",
99
+ "visualization_width": 384,
100
+ "visualization_height": 384
101
+ },
102
+ "policy": {
103
+ "type": "dot",
104
+ "n_obs_steps": 3,
105
+ "normalization_mapping": {
106
+ "VISUAL": "MEAN_STD",
107
+ "STATE": "MIN_MAX",
108
+ "ENV": "MIN_MAX",
109
+ "ACTION": "MIN_MAX"
110
+ },
111
+ "input_features": {
112
+ "observation.state": {
113
+ "type": "STATE",
114
+ "shape": [
115
+ 2
116
+ ]
117
+ },
118
+ "observation.environment_state": {
119
+ "type": "ENV",
120
+ "shape": [
121
+ 16
122
+ ]
123
+ }
124
+ },
125
+ "output_features": {
126
+ "action": {
127
+ "type": "ACTION",
128
+ "shape": [
129
+ 2
130
+ ]
131
+ }
132
+ },
133
+ "train_horizon": 20,
134
+ "inference_horizon": 20,
135
+ "lookback_obs_steps": 10,
136
+ "lookback_aug": 5,
137
+ "override_dataset_stats": false,
138
+ "new_dataset_stats": {
139
+ "action": {
140
+ "max": [
141
+ 512.0,
142
+ 512.0
143
+ ],
144
+ "min": [
145
+ 0.0,
146
+ 0.0
147
+ ]
148
+ },
149
+ "observation.environment_state": {
150
+ "max": [
151
+ 512.0,
152
+ 512.0,
153
+ 512.0,
154
+ 512.0,
155
+ 512.0,
156
+ 512.0,
157
+ 512.0,
158
+ 512.0,
159
+ 512.0,
160
+ 512.0,
161
+ 512.0,
162
+ 512.0,
163
+ 512.0,
164
+ 512.0,
165
+ 512.0,
166
+ 512.0
167
+ ],
168
+ "min": [
169
+ 0.0,
170
+ 0.0,
171
+ 0.0,
172
+ 0.0,
173
+ 0.0,
174
+ 0.0,
175
+ 0.0,
176
+ 0.0,
177
+ 0.0,
178
+ 0.0,
179
+ 0.0,
180
+ 0.0,
181
+ 0.0,
182
+ 0.0,
183
+ 0.0,
184
+ 0.0
185
+ ]
186
+ },
187
+ "observation.state": {
188
+ "max": [
189
+ 512.0,
190
+ 512.0
191
+ ],
192
+ "min": [
193
+ 0.0,
194
+ 0.0
195
+ ]
196
+ }
197
+ },
198
+ "vision_backbone": "resnet18",
199
+ "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1",
200
+ "pre_norm": true,
201
+ "lora_rank": 20,
202
+ "merge_lora": false,
203
+ "dim_model": 128,
204
+ "n_heads": 8,
205
+ "dim_feedforward": 512,
206
+ "n_decoder_layers": 8,
207
+ "rescale_shape": [
208
+ 96,
209
+ 96
210
+ ],
211
+ "crop_scale": 1.0,
212
+ "state_noise": 0.01,
213
+ "noise_decay": 0.999995,
214
+ "dropout": 0.1,
215
+ "alpha": 0.75,
216
+ "train_alpha": 0.9,
217
+ "predict_every_n": 1,
218
+ "return_every_n": 2,
219
+ "optimizer_lr": 0.0001,
220
+ "optimizer_min_lr": 0.0001,
221
+ "optimizer_lr_cycle_steps": 300000,
222
+ "optimizer_weight_decay": 1e-05
223
+ },
224
+ "output_dir": "outputs/train/pusht_keypoints",
225
+ "job_name": "pusht_dot",
226
+ "resume": false,
227
+ "device": "cuda",
228
+ "use_amp": true,
229
+ "seed": 100000,
230
+ "num_workers": 24,
231
+ "batch_size": 24,
232
+ "eval_freq": 10000,
233
+ "log_freq": 1000,
234
+ "save_checkpoint": true,
235
+ "save_freq": 50000,
236
+ "offline": {
237
+ "steps": 1000000
238
+ },
239
+ "online": {
240
+ "steps": 0,
241
+ "rollout_n_episodes": 1,
242
+ "rollout_batch_size": 1,
243
+ "steps_between_rollouts": null,
244
+ "sampling_ratio": 0.5,
245
+ "env_seed": null,
246
+ "buffer_capacity": null,
247
+ "buffer_seed_size": 0,
248
+ "do_rollout_async": false
249
+ },
250
+ "use_policy_training_preset": true,
251
+ "optimizer": {
252
+ "type": "adamw",
253
+ "lr": 0.0001,
254
+ "weight_decay": 1e-05,
255
+ "grad_clip_norm": 10.0,
256
+ "betas": [
257
+ 0.9,
258
+ 0.999
259
+ ],
260
+ "eps": 1e-08
261
+ },
262
+ "scheduler": {
263
+ "type": "cosine_annealing",
264
+ "num_warmup_steps": 0,
265
+ "min_lr": 0.0001,
266
+ "T_max": 300000
267
+ },
268
+ "eval": {
269
+ "n_episodes": 50,
270
+ "batch_size": 50,
271
+ "use_async_envs": false
272
+ },
273
+ "wandb": {
274
+ "enable": true,
275
+ "disable_artifact": false,
276
+ "project": "pusht",
277
+ "entity": null,
278
+ "notes": null
279
+ }
280
+ }