|
{ |
|
"best_global_step": null, |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5263157894736842, |
|
"eval_steps": 50, |
|
"global_step": 250, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 451.125, |
|
"completions/max_terminated_length": 451.125, |
|
"completions/mean_length": 241.560546875, |
|
"completions/mean_terminated_length": 241.560546875, |
|
"completions/min_length": 111.75, |
|
"completions/min_terminated_length": 111.75, |
|
"epoch": 0.002105263157894737, |
|
"grad_norm": 0.0383942686021328, |
|
"learning_rate": 0.0, |
|
"loss": -0.0025, |
|
"num_tokens": 484639.0, |
|
"reward": 0.9500823765993118, |
|
"reward_std": 0.6353622525930405, |
|
"rewards/format_reward_embodied/mean": 0.501953125, |
|
"rewards/format_reward_embodied/std": 0.4904305227100849, |
|
"rewards/stop_prediction_reward/mean": 0.439453125, |
|
"rewards/stop_prediction_reward/std": 0.3919360339641571, |
|
"rewards/waypoint_pred_accuracy/mean": 0.004338064874811504, |
|
"rewards/waypoint_pred_accuracy/std": 0.012508220294698669, |
|
"step": 1 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.00390625, |
|
"completions/max_length": 985.25, |
|
"completions/max_terminated_length": 985.25, |
|
"completions/mean_length": 266.818359375, |
|
"completions/mean_terminated_length": 267.813928604126, |
|
"completions/min_length": 94.0, |
|
"completions/min_terminated_length": 119.375, |
|
"epoch": 0.004210526315789474, |
|
"grad_norm": 0.041216954588890076, |
|
"learning_rate": 2.083333333333333e-08, |
|
"loss": 0.0025, |
|
"num_tokens": 982274.0, |
|
"reward": 0.9463644102215767, |
|
"reward_std": 0.6123590245842934, |
|
"rewards/format_reward_embodied/mean": 0.50390625, |
|
"rewards/format_reward_embodied/std": 0.49139947816729546, |
|
"rewards/stop_prediction_reward/mean": 0.42578125, |
|
"rewards/stop_prediction_reward/std": 0.3781757093966007, |
|
"rewards/waypoint_pred_accuracy/mean": 0.008338454590998856, |
|
"rewards/waypoint_pred_accuracy/std": 0.02023129865005227, |
|
"step": 2 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 744.0, |
|
"completions/max_terminated_length": 744.0, |
|
"completions/mean_length": 247.1015625, |
|
"completions/mean_terminated_length": 247.1015625, |
|
"completions/min_length": 117.875, |
|
"completions/min_terminated_length": 117.875, |
|
"epoch": 0.00631578947368421, |
|
"grad_norm": 0.035625893622636795, |
|
"learning_rate": 4.166666666666666e-08, |
|
"loss": -0.001, |
|
"num_tokens": 1467318.0, |
|
"reward": 0.9623514339327812, |
|
"reward_std": 0.6189808771014214, |
|
"rewards/format_reward_embodied/mean": 0.498046875, |
|
"rewards/format_reward_embodied/std": 0.4842093959450722, |
|
"rewards/stop_prediction_reward/mean": 0.443359375, |
|
"rewards/stop_prediction_reward/std": 0.44101808220148087, |
|
"rewards/waypoint_pred_accuracy/mean": 0.010472597823421942, |
|
"rewards/waypoint_pred_accuracy/std": 0.03422736286900763, |
|
"step": 3 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 509.5, |
|
"completions/max_terminated_length": 509.5, |
|
"completions/mean_length": 237.654296875, |
|
"completions/mean_terminated_length": 237.654296875, |
|
"completions/min_length": 113.375, |
|
"completions/min_terminated_length": 113.375, |
|
"epoch": 0.008421052631578947, |
|
"grad_norm": 0.03751353174448013, |
|
"learning_rate": 6.25e-08, |
|
"loss": -0.0035, |
|
"num_tokens": 1950021.0, |
|
"reward": 0.9695519432425499, |
|
"reward_std": 0.6773256361484528, |
|
"rewards/format_reward_embodied/mean": 0.4765625, |
|
"rewards/format_reward_embodied/std": 0.4892418272793293, |
|
"rewards/stop_prediction_reward/mean": 0.421875, |
|
"rewards/stop_prediction_reward/std": 0.3986336216330528, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03555722400778907, |
|
"rewards/waypoint_pred_accuracy/std": 0.08721220167353771, |
|
"step": 4 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 813.375, |
|
"completions/max_terminated_length": 813.375, |
|
"completions/mean_length": 265.58984375, |
|
"completions/mean_terminated_length": 265.58984375, |
|
"completions/min_length": 116.625, |
|
"completions/min_terminated_length": 116.625, |
|
"epoch": 0.010526315789473684, |
|
"grad_norm": 0.0453697107732296, |
|
"learning_rate": 8.333333333333333e-08, |
|
"loss": 0.0003, |
|
"num_tokens": 2446643.0, |
|
"reward": 0.9188483878970146, |
|
"reward_std": 0.610739640891552, |
|
"rewards/format_reward_embodied/mean": 0.5625, |
|
"rewards/format_reward_embodied/std": 0.4938563257455826, |
|
"rewards/stop_prediction_reward/mean": 0.345703125, |
|
"rewards/stop_prediction_reward/std": 0.4086693823337555, |
|
"rewards/waypoint_pred_accuracy/mean": 0.005322630658819445, |
|
"rewards/waypoint_pred_accuracy/std": 0.016403043182279513, |
|
"step": 5 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 812.0, |
|
"completions/max_terminated_length": 812.0, |
|
"completions/mean_length": 252.169921875, |
|
"completions/mean_terminated_length": 252.66043663024902, |
|
"completions/min_length": 102.375, |
|
"completions/min_terminated_length": 118.25, |
|
"epoch": 0.01263157894736842, |
|
"grad_norm": 0.049299150705337524, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"loss": 0.0011, |
|
"num_tokens": 2935818.0, |
|
"reward": 0.877131775021553, |
|
"reward_std": 0.6145607680082321, |
|
"rewards/format_reward_embodied/mean": 0.509765625, |
|
"rewards/format_reward_embodied/std": 0.4843035563826561, |
|
"rewards/stop_prediction_reward/mean": 0.3671875, |
|
"rewards/stop_prediction_reward/std": 0.38137195259332657, |
|
"rewards/waypoint_pred_accuracy/mean": 8.932340400540724e-05, |
|
"rewards/waypoint_pred_accuracy/std": 0.0005101569792639827, |
|
"step": 6 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1163.75, |
|
"completions/max_terminated_length": 1163.75, |
|
"completions/mean_length": 255.529296875, |
|
"completions/mean_terminated_length": 255.529296875, |
|
"completions/min_length": 105.0, |
|
"completions/min_terminated_length": 105.0, |
|
"epoch": 0.014736842105263158, |
|
"grad_norm": 0.036646511405706406, |
|
"learning_rate": 1.25e-07, |
|
"loss": 0.001, |
|
"num_tokens": 3427097.0, |
|
"reward": 0.6614178493618965, |
|
"reward_std": 0.6105708554387093, |
|
"rewards/format_reward_embodied/mean": 0.46484375, |
|
"rewards/format_reward_embodied/std": 0.48722705617547035, |
|
"rewards/stop_prediction_reward/mean": 0.173828125, |
|
"rewards/stop_prediction_reward/std": 0.3640986457467079, |
|
"rewards/waypoint_pred_accuracy/mean": 0.011372994726074323, |
|
"rewards/waypoint_pred_accuracy/std": 0.04579899070052374, |
|
"step": 7 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 467.25, |
|
"completions/max_terminated_length": 467.25, |
|
"completions/mean_length": 246.98828125, |
|
"completions/mean_terminated_length": 246.98828125, |
|
"completions/min_length": 119.5, |
|
"completions/min_terminated_length": 119.5, |
|
"epoch": 0.016842105263157894, |
|
"grad_norm": 0.047037359327077866, |
|
"learning_rate": 1.4583333333333335e-07, |
|
"loss": -0.0023, |
|
"num_tokens": 3915411.0, |
|
"reward": 0.9769175350666046, |
|
"reward_std": 0.6314118355512619, |
|
"rewards/format_reward_embodied/mean": 0.568359375, |
|
"rewards/format_reward_embodied/std": 0.4869700260460377, |
|
"rewards/stop_prediction_reward/mean": 0.376953125, |
|
"rewards/stop_prediction_reward/std": 0.4093479886651039, |
|
"rewards/waypoint_pred_accuracy/mean": 0.015802525533167768, |
|
"rewards/waypoint_pred_accuracy/std": 0.032816135895782333, |
|
"step": 8 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 481.125, |
|
"completions/max_terminated_length": 481.125, |
|
"completions/mean_length": 241.935546875, |
|
"completions/mean_terminated_length": 241.935546875, |
|
"completions/min_length": 114.5, |
|
"completions/min_terminated_length": 114.5, |
|
"epoch": 0.018947368421052633, |
|
"grad_norm": 0.04171831160783768, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"loss": -0.0015, |
|
"num_tokens": 4397042.0, |
|
"reward": 0.9027341902256012, |
|
"reward_std": 0.6853612437844276, |
|
"rewards/format_reward_embodied/mean": 0.48828125, |
|
"rewards/format_reward_embodied/std": 0.4951612576842308, |
|
"rewards/stop_prediction_reward/mean": 0.322265625, |
|
"rewards/stop_prediction_reward/std": 0.4115743637084961, |
|
"rewards/waypoint_pred_accuracy/mean": 0.04609366483055356, |
|
"rewards/waypoint_pred_accuracy/std": 0.09995413944127579, |
|
"step": 9 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 695.625, |
|
"completions/max_terminated_length": 695.625, |
|
"completions/mean_length": 261.609375, |
|
"completions/mean_terminated_length": 261.609375, |
|
"completions/min_length": 110.0, |
|
"completions/min_terminated_length": 110.0, |
|
"epoch": 0.021052631578947368, |
|
"grad_norm": 0.03897935897111893, |
|
"learning_rate": 1.875e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 4893354.0, |
|
"reward": 1.0423217862844467, |
|
"reward_std": 0.6282145008444786, |
|
"rewards/format_reward_embodied/mean": 0.548828125, |
|
"rewards/format_reward_embodied/std": 0.48774589598178864, |
|
"rewards/stop_prediction_reward/mean": 0.478515625, |
|
"rewards/stop_prediction_reward/std": 0.4023555275052786, |
|
"rewards/waypoint_pred_accuracy/mean": 0.007489029231998282, |
|
"rewards/waypoint_pred_accuracy/std": 0.0251983865261218, |
|
"step": 10 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 470.375, |
|
"completions/max_terminated_length": 470.375, |
|
"completions/mean_length": 253.248046875, |
|
"completions/mean_terminated_length": 253.248046875, |
|
"completions/min_length": 125.0, |
|
"completions/min_terminated_length": 125.0, |
|
"epoch": 0.023157894736842106, |
|
"grad_norm": 0.039429888129234314, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"loss": -0.0022, |
|
"num_tokens": 5384873.0, |
|
"reward": 0.7660543769598007, |
|
"reward_std": 0.5604145936667919, |
|
"rewards/format_reward_embodied/mean": 0.607421875, |
|
"rewards/format_reward_embodied/std": 0.479397177696228, |
|
"rewards/stop_prediction_reward/mean": 0.15625, |
|
"rewards/stop_prediction_reward/std": 0.3427934180945158, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0011912494257570604, |
|
"rewards/waypoint_pred_accuracy/std": 0.00802605507872417, |
|
"step": 11 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 1082.0, |
|
"completions/max_terminated_length": 1082.0, |
|
"completions/mean_length": 263.59375, |
|
"completions/mean_terminated_length": 264.0594940185547, |
|
"completions/min_length": 99.75, |
|
"completions/min_terminated_length": 114.625, |
|
"epoch": 0.02526315789473684, |
|
"grad_norm": 0.030105428770184517, |
|
"learning_rate": 2.2916666666666663e-07, |
|
"loss": 0.0033, |
|
"num_tokens": 5878681.0, |
|
"reward": 1.0901148244738579, |
|
"reward_std": 0.6814222931861877, |
|
"rewards/format_reward_embodied/mean": 0.6171875, |
|
"rewards/format_reward_embodied/std": 0.46372338756918907, |
|
"rewards/stop_prediction_reward/mean": 0.400390625, |
|
"rewards/stop_prediction_reward/std": 0.4396743141114712, |
|
"rewards/waypoint_pred_accuracy/mean": 0.036268358699724924, |
|
"rewards/waypoint_pred_accuracy/std": 0.07070713029423034, |
|
"step": 12 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 476.875, |
|
"completions/max_terminated_length": 476.875, |
|
"completions/mean_length": 257.3125, |
|
"completions/mean_terminated_length": 257.3125, |
|
"completions/min_length": 118.625, |
|
"completions/min_terminated_length": 118.625, |
|
"epoch": 0.02736842105263158, |
|
"grad_norm": 0.03954648971557617, |
|
"learning_rate": 2.5e-07, |
|
"loss": -0.0013, |
|
"num_tokens": 6370745.0, |
|
"reward": 1.1011288091540337, |
|
"reward_std": 0.6459922045469284, |
|
"rewards/format_reward_embodied/mean": 0.634765625, |
|
"rewards/format_reward_embodied/std": 0.47677353397011757, |
|
"rewards/stop_prediction_reward/mean": 0.44140625, |
|
"rewards/stop_prediction_reward/std": 0.42084217444062233, |
|
"rewards/waypoint_pred_accuracy/mean": 0.012478479564244083, |
|
"rewards/waypoint_pred_accuracy/std": 0.04567733465950141, |
|
"step": 13 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1179.5, |
|
"completions/max_terminated_length": 1179.5, |
|
"completions/mean_length": 272.15625, |
|
"completions/mean_terminated_length": 272.15625, |
|
"completions/min_length": 120.375, |
|
"completions/min_terminated_length": 120.375, |
|
"epoch": 0.029473684210526315, |
|
"grad_norm": 0.03879372030496597, |
|
"learning_rate": 2.708333333333333e-07, |
|
"loss": 0.0013, |
|
"num_tokens": 6872585.0, |
|
"reward": 1.069977581501007, |
|
"reward_std": 0.6529600322246552, |
|
"rewards/format_reward_embodied/mean": 0.626953125, |
|
"rewards/format_reward_embodied/std": 0.46956589445471764, |
|
"rewards/stop_prediction_reward/mean": 0.412109375, |
|
"rewards/stop_prediction_reward/std": 0.42556022480130196, |
|
"rewards/waypoint_pred_accuracy/mean": 0.015457541714965616, |
|
"rewards/waypoint_pred_accuracy/std": 0.05261327166544845, |
|
"step": 14 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 456.75, |
|
"completions/max_terminated_length": 456.75, |
|
"completions/mean_length": 247.5078125, |
|
"completions/mean_terminated_length": 247.5078125, |
|
"completions/min_length": 116.0, |
|
"completions/min_terminated_length": 116.0, |
|
"epoch": 0.031578947368421054, |
|
"grad_norm": 0.03746689483523369, |
|
"learning_rate": 2.916666666666667e-07, |
|
"loss": -0.004, |
|
"num_tokens": 7358669.0, |
|
"reward": 0.816399596631527, |
|
"reward_std": 0.6439896002411842, |
|
"rewards/format_reward_embodied/mean": 0.5546875, |
|
"rewards/format_reward_embodied/std": 0.49549105390906334, |
|
"rewards/stop_prediction_reward/mean": 0.216796875, |
|
"rewards/stop_prediction_reward/std": 0.3949273619800806, |
|
"rewards/waypoint_pred_accuracy/mean": 0.022457610069225337, |
|
"rewards/waypoint_pred_accuracy/std": 0.04548973154789149, |
|
"step": 15 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 501.25, |
|
"completions/max_terminated_length": 501.25, |
|
"completions/mean_length": 254.716796875, |
|
"completions/mean_terminated_length": 254.716796875, |
|
"completions/min_length": 120.375, |
|
"completions/min_terminated_length": 120.375, |
|
"epoch": 0.03368421052631579, |
|
"grad_norm": 0.053112324327230453, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 7852156.0, |
|
"reward": 1.2096271365880966, |
|
"reward_std": 0.6100753545761108, |
|
"rewards/format_reward_embodied/mean": 0.76171875, |
|
"rewards/format_reward_embodied/std": 0.4093044362962246, |
|
"rewards/stop_prediction_reward/mean": 0.4453125, |
|
"rewards/stop_prediction_reward/std": 0.4225916638970375, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0012979521083353873, |
|
"rewards/waypoint_pred_accuracy/std": 0.009194809671299708, |
|
"step": 16 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 472.375, |
|
"completions/max_terminated_length": 472.375, |
|
"completions/mean_length": 247.376953125, |
|
"completions/mean_terminated_length": 247.376953125, |
|
"completions/min_length": 116.25, |
|
"completions/min_terminated_length": 116.25, |
|
"epoch": 0.035789473684210524, |
|
"grad_norm": 0.03221385180950165, |
|
"learning_rate": 3.333333333333333e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 8339517.0, |
|
"reward": 1.1803481727838516, |
|
"reward_std": 0.5297410599887371, |
|
"rewards/format_reward_embodied/mean": 0.814453125, |
|
"rewards/format_reward_embodied/std": 0.3874172270298004, |
|
"rewards/stop_prediction_reward/mean": 0.345703125, |
|
"rewards/stop_prediction_reward/std": 0.3674583863466978, |
|
"rewards/waypoint_pred_accuracy/mean": 0.010095963222276419, |
|
"rewards/waypoint_pred_accuracy/std": 0.0323235778856652, |
|
"step": 17 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 811.875, |
|
"completions/max_terminated_length": 811.875, |
|
"completions/mean_length": 254.984375, |
|
"completions/mean_terminated_length": 254.984375, |
|
"completions/min_length": 112.625, |
|
"completions/min_terminated_length": 112.625, |
|
"epoch": 0.037894736842105266, |
|
"grad_norm": 0.034303538501262665, |
|
"learning_rate": 3.541666666666667e-07, |
|
"loss": 0.0065, |
|
"num_tokens": 8828533.0, |
|
"reward": 1.0827482342720032, |
|
"reward_std": 0.49626101925969124, |
|
"rewards/format_reward_embodied/mean": 0.84765625, |
|
"rewards/format_reward_embodied/std": 0.35438157618045807, |
|
"rewards/stop_prediction_reward/mean": 0.234375, |
|
"rewards/stop_prediction_reward/std": 0.360489659011364, |
|
"rewards/waypoint_pred_accuracy/mean": 0.00035850519751079446, |
|
"rewards/waypoint_pred_accuracy/std": 0.000990356254078506, |
|
"step": 18 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 469.5, |
|
"completions/max_terminated_length": 469.5, |
|
"completions/mean_length": 242.603515625, |
|
"completions/mean_terminated_length": 243.05292129516602, |
|
"completions/min_length": 99.5, |
|
"completions/min_terminated_length": 107.75, |
|
"epoch": 0.04, |
|
"grad_norm": 0.03210434690117836, |
|
"learning_rate": 3.75e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 9313706.0, |
|
"reward": 1.1656895354390144, |
|
"reward_std": 0.5405256152153015, |
|
"rewards/format_reward_embodied/mean": 0.841796875, |
|
"rewards/format_reward_embodied/std": 0.35758682526648045, |
|
"rewards/stop_prediction_reward/mean": 0.322265625, |
|
"rewards/stop_prediction_reward/std": 0.4072440378367901, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0008135107927961789, |
|
"rewards/waypoint_pred_accuracy/std": 0.004946927132555481, |
|
"step": 19 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 887.5, |
|
"completions/max_terminated_length": 887.5, |
|
"completions/mean_length": 257.341796875, |
|
"completions/mean_terminated_length": 257.341796875, |
|
"completions/min_length": 117.75, |
|
"completions/min_terminated_length": 117.75, |
|
"epoch": 0.042105263157894736, |
|
"grad_norm": 0.023722035810351372, |
|
"learning_rate": 3.958333333333333e-07, |
|
"loss": 0.0063, |
|
"num_tokens": 9807001.0, |
|
"reward": 1.2326279431581497, |
|
"reward_std": 0.5118205770850182, |
|
"rewards/format_reward_embodied/mean": 0.880859375, |
|
"rewards/format_reward_embodied/std": 0.31777896732091904, |
|
"rewards/stop_prediction_reward/mean": 0.29296875, |
|
"rewards/stop_prediction_reward/std": 0.3428589329123497, |
|
"rewards/waypoint_pred_accuracy/mean": 0.029399914224090686, |
|
"rewards/waypoint_pred_accuracy/std": 0.06371456215110564, |
|
"step": 20 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1251.25, |
|
"completions/max_terminated_length": 1251.25, |
|
"completions/mean_length": 258.982421875, |
|
"completions/mean_terminated_length": 258.982421875, |
|
"completions/min_length": 107.75, |
|
"completions/min_terminated_length": 107.75, |
|
"epoch": 0.04421052631578947, |
|
"grad_norm": 0.027127819135785103, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"loss": 0.0107, |
|
"num_tokens": 10300304.0, |
|
"reward": 1.3775597661733627, |
|
"reward_std": 0.5649962350726128, |
|
"rewards/format_reward_embodied/mean": 0.880859375, |
|
"rewards/format_reward_embodied/std": 0.3204925637692213, |
|
"rewards/stop_prediction_reward/mean": 0.451171875, |
|
"rewards/stop_prediction_reward/std": 0.4135790057480335, |
|
"rewards/waypoint_pred_accuracy/mean": 0.02276425497597198, |
|
"rewards/waypoint_pred_accuracy/std": 0.053010769921375774, |
|
"step": 21 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 764.5, |
|
"completions/max_terminated_length": 764.5, |
|
"completions/mean_length": 247.626953125, |
|
"completions/mean_terminated_length": 247.626953125, |
|
"completions/min_length": 120.625, |
|
"completions/min_terminated_length": 120.625, |
|
"epoch": 0.04631578947368421, |
|
"grad_norm": 0.026827372610569, |
|
"learning_rate": 4.375e-07, |
|
"loss": 0.0046, |
|
"num_tokens": 10787473.0, |
|
"reward": 1.2097989320755005, |
|
"reward_std": 0.4106667507439852, |
|
"rewards/format_reward_embodied/mean": 0.9609375, |
|
"rewards/format_reward_embodied/std": 0.1782014612108469, |
|
"rewards/stop_prediction_reward/mean": 0.248046875, |
|
"rewards/stop_prediction_reward/std": 0.3576600421220064, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0004072752802812829, |
|
"rewards/waypoint_pred_accuracy/std": 0.001209557721267629, |
|
"step": 22 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 699.125, |
|
"completions/max_terminated_length": 699.125, |
|
"completions/mean_length": 247.458984375, |
|
"completions/mean_terminated_length": 247.458984375, |
|
"completions/min_length": 121.5, |
|
"completions/min_terminated_length": 121.5, |
|
"epoch": 0.04842105263157895, |
|
"grad_norm": 0.024113576859235764, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"loss": 0.0032, |
|
"num_tokens": 11273276.0, |
|
"reward": 1.3881124705076218, |
|
"reward_std": 0.5006838031113148, |
|
"rewards/format_reward_embodied/mean": 0.943359375, |
|
"rewards/format_reward_embodied/std": 0.22258390858769417, |
|
"rewards/stop_prediction_reward/mean": 0.42578125, |
|
"rewards/stop_prediction_reward/std": 0.4117406941950321, |
|
"rewards/waypoint_pred_accuracy/mean": 0.00948592593158537, |
|
"rewards/waypoint_pred_accuracy/std": 0.03859481842846435, |
|
"step": 23 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 1426.0, |
|
"completions/max_terminated_length": 1426.0, |
|
"completions/mean_length": 270.671875, |
|
"completions/mean_terminated_length": 271.2647590637207, |
|
"completions/min_length": 100.0, |
|
"completions/min_terminated_length": 115.5, |
|
"epoch": 0.05052631578947368, |
|
"grad_norm": 0.021521741524338722, |
|
"learning_rate": 4.791666666666667e-07, |
|
"loss": 0.0138, |
|
"num_tokens": 11772628.0, |
|
"reward": 1.412862166762352, |
|
"reward_std": 0.4836365692317486, |
|
"rewards/format_reward_embodied/mean": 0.94140625, |
|
"rewards/format_reward_embodied/std": 0.22402114421129227, |
|
"rewards/stop_prediction_reward/mean": 0.46875, |
|
"rewards/stop_prediction_reward/std": 0.4050610587000847, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0013529594958612285, |
|
"rewards/waypoint_pred_accuracy/std": 0.008442860726859186, |
|
"step": 24 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 503.375, |
|
"completions/max_terminated_length": 503.375, |
|
"completions/mean_length": 252.7265625, |
|
"completions/mean_terminated_length": 252.7265625, |
|
"completions/min_length": 115.75, |
|
"completions/min_terminated_length": 115.75, |
|
"epoch": 0.05263157894736842, |
|
"grad_norm": 0.023303357884287834, |
|
"learning_rate": 5e-07, |
|
"loss": 0.0022, |
|
"num_tokens": 12260936.0, |
|
"reward": 1.3870358616113663, |
|
"reward_std": 0.4634270928800106, |
|
"rewards/format_reward_embodied/mean": 0.97265625, |
|
"rewards/format_reward_embodied/std": 0.14094455912709236, |
|
"rewards/stop_prediction_reward/mean": 0.404296875, |
|
"rewards/stop_prediction_reward/std": 0.41460882127285004, |
|
"rewards/waypoint_pred_accuracy/mean": 0.005041355174832356, |
|
"rewards/waypoint_pred_accuracy/std": 0.030215839982022222, |
|
"step": 25 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 802.125, |
|
"completions/max_terminated_length": 802.125, |
|
"completions/mean_length": 252.96484375, |
|
"completions/mean_terminated_length": 252.96484375, |
|
"completions/min_length": 117.625, |
|
"completions/min_terminated_length": 117.625, |
|
"epoch": 0.05473684210526316, |
|
"grad_norm": 0.022509992122650146, |
|
"learning_rate": 5.208333333333334e-07, |
|
"loss": 0.0072, |
|
"num_tokens": 12752118.0, |
|
"reward": 1.3777707070112228, |
|
"reward_std": 0.4540855921804905, |
|
"rewards/format_reward_embodied/mean": 0.962890625, |
|
"rewards/format_reward_embodied/std": 0.1879090555012226, |
|
"rewards/stop_prediction_reward/mean": 0.388671875, |
|
"rewards/stop_prediction_reward/std": 0.3826281502842903, |
|
"rewards/waypoint_pred_accuracy/mean": 0.013104110299219339, |
|
"rewards/waypoint_pred_accuracy/std": 0.023219284697190704, |
|
"step": 26 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 1812.5, |
|
"completions/max_terminated_length": 1812.5, |
|
"completions/mean_length": 276.763671875, |
|
"completions/mean_terminated_length": 277.41750717163086, |
|
"completions/min_length": 101.75, |
|
"completions/min_terminated_length": 118.5, |
|
"epoch": 0.056842105263157895, |
|
"grad_norm": 0.025979243218898773, |
|
"learning_rate": 5.416666666666666e-07, |
|
"loss": 0.0201, |
|
"num_tokens": 13257853.0, |
|
"reward": 1.195212036371231, |
|
"reward_std": 0.4585261270403862, |
|
"rewards/format_reward_embodied/mean": 0.9453125, |
|
"rewards/format_reward_embodied/std": 0.21002393402159214, |
|
"rewards/stop_prediction_reward/mean": 0.248046875, |
|
"rewards/stop_prediction_reward/std": 0.3809744007885456, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0009263323242719891, |
|
"rewards/waypoint_pred_accuracy/std": 0.0071097194065939074, |
|
"step": 27 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1423.875, |
|
"completions/max_terminated_length": 1423.875, |
|
"completions/mean_length": 266.958984375, |
|
"completions/mean_terminated_length": 266.958984375, |
|
"completions/min_length": 121.5, |
|
"completions/min_terminated_length": 121.5, |
|
"epoch": 0.05894736842105263, |
|
"grad_norm": 0.02617989294230938, |
|
"learning_rate": 5.625e-07, |
|
"loss": 0.0123, |
|
"num_tokens": 13756648.0, |
|
"reward": 1.4108145833015442, |
|
"reward_std": 0.4676571935415268, |
|
"rewards/format_reward_embodied/mean": 0.96484375, |
|
"rewards/format_reward_embodied/std": 0.1660303734242916, |
|
"rewards/stop_prediction_reward/mean": 0.427734375, |
|
"rewards/stop_prediction_reward/std": 0.40000360645353794, |
|
"rewards/waypoint_pred_accuracy/mean": 0.009118233890500024, |
|
"rewards/waypoint_pred_accuracy/std": 0.03189600147387662, |
|
"step": 28 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 790.375, |
|
"completions/max_terminated_length": 790.375, |
|
"completions/mean_length": 253.384765625, |
|
"completions/mean_terminated_length": 253.384765625, |
|
"completions/min_length": 113.125, |
|
"completions/min_terminated_length": 113.125, |
|
"epoch": 0.061052631578947365, |
|
"grad_norm": 0.0284319706261158, |
|
"learning_rate": 5.833333333333334e-07, |
|
"loss": 0.004, |
|
"num_tokens": 14246829.0, |
|
"reward": 1.3669871091842651, |
|
"reward_std": 0.476172287017107, |
|
"rewards/format_reward_embodied/mean": 0.9609375, |
|
"rewards/format_reward_embodied/std": 0.19018890894949436, |
|
"rewards/stop_prediction_reward/mean": 0.376953125, |
|
"rewards/stop_prediction_reward/std": 0.38546351715922356, |
|
"rewards/waypoint_pred_accuracy/mean": 0.014548240964149528, |
|
"rewards/waypoint_pred_accuracy/std": 0.048405178813538896, |
|
"step": 29 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 946.125, |
|
"completions/max_terminated_length": 946.125, |
|
"completions/mean_length": 267.513671875, |
|
"completions/mean_terminated_length": 267.513671875, |
|
"completions/min_length": 118.25, |
|
"completions/min_terminated_length": 118.25, |
|
"epoch": 0.06315789473684211, |
|
"grad_norm": 0.02758762799203396, |
|
"learning_rate": 6.041666666666666e-07, |
|
"loss": 0.0087, |
|
"num_tokens": 14742708.0, |
|
"reward": 1.3311925828456879, |
|
"reward_std": 0.5349335558712482, |
|
"rewards/format_reward_embodied/mean": 0.94921875, |
|
"rewards/format_reward_embodied/std": 0.21471346728503704, |
|
"rewards/stop_prediction_reward/mean": 0.375, |
|
"rewards/stop_prediction_reward/std": 0.4617812857031822, |
|
"rewards/waypoint_pred_accuracy/mean": 0.003486919093547547, |
|
"rewards/waypoint_pred_accuracy/std": 0.022435040602790033, |
|
"step": 30 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 468.875, |
|
"completions/max_terminated_length": 468.875, |
|
"completions/mean_length": 247.80859375, |
|
"completions/mean_terminated_length": 247.80859375, |
|
"completions/min_length": 116.125, |
|
"completions/min_terminated_length": 116.125, |
|
"epoch": 0.06526315789473684, |
|
"grad_norm": 0.024238400161266327, |
|
"learning_rate": 6.249999999999999e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 15230930.0, |
|
"reward": 1.4348655045032501, |
|
"reward_std": 0.4678279310464859, |
|
"rewards/format_reward_embodied/mean": 0.982421875, |
|
"rewards/format_reward_embodied/std": 0.09929289110004902, |
|
"rewards/stop_prediction_reward/mean": 0.439453125, |
|
"rewards/stop_prediction_reward/std": 0.4299692139029503, |
|
"rewards/waypoint_pred_accuracy/mean": 0.006495264507384302, |
|
"rewards/waypoint_pred_accuracy/std": 0.03605599632994938, |
|
"step": 31 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 701.5, |
|
"completions/max_terminated_length": 701.5, |
|
"completions/mean_length": 247.302734375, |
|
"completions/mean_terminated_length": 247.302734375, |
|
"completions/min_length": 102.625, |
|
"completions/min_terminated_length": 102.625, |
|
"epoch": 0.06736842105263158, |
|
"grad_norm": 0.03117675893008709, |
|
"learning_rate": 6.458333333333333e-07, |
|
"loss": 0.0048, |
|
"num_tokens": 15717165.0, |
|
"reward": 1.276307299733162, |
|
"reward_std": 0.4745783172547817, |
|
"rewards/format_reward_embodied/mean": 0.9765625, |
|
"rewards/format_reward_embodied/std": 0.11361248232424259, |
|
"rewards/stop_prediction_reward/mean": 0.287109375, |
|
"rewards/stop_prediction_reward/std": 0.4378196634352207, |
|
"rewards/waypoint_pred_accuracy/mean": 0.006317712715827028, |
|
"rewards/waypoint_pred_accuracy/std": 0.02758992835879357, |
|
"step": 32 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 736.625, |
|
"completions/max_terminated_length": 736.625, |
|
"completions/mean_length": 259.27734375, |
|
"completions/mean_terminated_length": 259.27734375, |
|
"completions/min_length": 132.125, |
|
"completions/min_terminated_length": 132.125, |
|
"epoch": 0.06947368421052631, |
|
"grad_norm": 0.0244793351739645, |
|
"learning_rate": 6.666666666666666e-07, |
|
"loss": 0.0053, |
|
"num_tokens": 16211835.0, |
|
"reward": 1.540584921836853, |
|
"reward_std": 0.49051226675510406, |
|
"rewards/format_reward_embodied/mean": 0.96484375, |
|
"rewards/format_reward_embodied/std": 0.18235719576478004, |
|
"rewards/stop_prediction_reward/mean": 0.57421875, |
|
"rewards/stop_prediction_reward/std": 0.43413203582167625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0007612065199357397, |
|
"rewards/waypoint_pred_accuracy/std": 0.005327658200312494, |
|
"step": 33 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 881.375, |
|
"completions/max_terminated_length": 881.375, |
|
"completions/mean_length": 264.103515625, |
|
"completions/mean_terminated_length": 264.58401679992676, |
|
"completions/min_length": 101.375, |
|
"completions/min_terminated_length": 113.25, |
|
"epoch": 0.07157894736842105, |
|
"grad_norm": 0.0207452904433012, |
|
"learning_rate": 6.875e-07, |
|
"loss": 0.0068, |
|
"num_tokens": 16706928.0, |
|
"reward": 1.4342780411243439, |
|
"reward_std": 0.5214410163462162, |
|
"rewards/format_reward_embodied/mean": 0.9765625, |
|
"rewards/format_reward_embodied/std": 0.12835253402590752, |
|
"rewards/stop_prediction_reward/mean": 0.453125, |
|
"rewards/stop_prediction_reward/std": 0.47532549500465393, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0022952774760399775, |
|
"rewards/waypoint_pred_accuracy/std": 0.016314118760955294, |
|
"step": 34 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 544.75, |
|
"completions/max_terminated_length": 544.75, |
|
"completions/mean_length": 247.744140625, |
|
"completions/mean_terminated_length": 247.744140625, |
|
"completions/min_length": 115.375, |
|
"completions/min_terminated_length": 115.375, |
|
"epoch": 0.07368421052631578, |
|
"grad_norm": 0.02019825391471386, |
|
"learning_rate": 7.083333333333334e-07, |
|
"loss": 0.0026, |
|
"num_tokens": 17196205.0, |
|
"reward": 1.4062748402357101, |
|
"reward_std": 0.5094473846256733, |
|
"rewards/format_reward_embodied/mean": 0.97265625, |
|
"rewards/format_reward_embodied/std": 0.15960253402590752, |
|
"rewards/stop_prediction_reward/mean": 0.43359375, |
|
"rewards/stop_prediction_reward/std": 0.4651285596191883, |
|
"rewards/waypoint_pred_accuracy/mean": 1.2417779022436235e-05, |
|
"rewards/waypoint_pred_accuracy/std": 2.711040125229227e-05, |
|
"step": 35 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1784.875, |
|
"completions/max_terminated_length": 1784.875, |
|
"completions/mean_length": 291.40625, |
|
"completions/mean_terminated_length": 291.40625, |
|
"completions/min_length": 128.75, |
|
"completions/min_terminated_length": 128.75, |
|
"epoch": 0.07578947368421053, |
|
"grad_norm": 0.038221534341573715, |
|
"learning_rate": 7.291666666666666e-07, |
|
"loss": 0.0175, |
|
"num_tokens": 17710461.0, |
|
"reward": 1.4911664873361588, |
|
"reward_std": 0.5095744393765926, |
|
"rewards/format_reward_embodied/mean": 0.978515625, |
|
"rewards/format_reward_embodied/std": 0.11029814556241035, |
|
"rewards/stop_prediction_reward/mean": 0.498046875, |
|
"rewards/stop_prediction_reward/std": 0.46525831148028374, |
|
"rewards/waypoint_pred_accuracy/mean": 0.007301997149683504, |
|
"rewards/waypoint_pred_accuracy/std": 0.024017843105910822, |
|
"step": 36 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 472.75, |
|
"completions/max_terminated_length": 472.75, |
|
"completions/mean_length": 248.03125, |
|
"completions/mean_terminated_length": 248.03125, |
|
"completions/min_length": 117.375, |
|
"completions/min_terminated_length": 117.375, |
|
"epoch": 0.07789473684210527, |
|
"grad_norm": 0.02083674818277359, |
|
"learning_rate": 7.5e-07, |
|
"loss": 0.0023, |
|
"num_tokens": 18197965.0, |
|
"reward": 1.4861425906419754, |
|
"reward_std": 0.5738211683928967, |
|
"rewards/format_reward_embodied/mean": 0.9921875, |
|
"rewards/format_reward_embodied/std": 0.05317101255059242, |
|
"rewards/stop_prediction_reward/mean": 0.42578125, |
|
"rewards/stop_prediction_reward/std": 0.4866231083869934, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03408691443473799, |
|
"rewards/waypoint_pred_accuracy/std": 0.08652095211436972, |
|
"step": 37 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 884.625, |
|
"completions/max_terminated_length": 884.625, |
|
"completions/mean_length": 258.75390625, |
|
"completions/mean_terminated_length": 258.75390625, |
|
"completions/min_length": 125.5, |
|
"completions/min_terminated_length": 125.5, |
|
"epoch": 0.08, |
|
"grad_norm": 0.021931249648332596, |
|
"learning_rate": 7.708333333333333e-07, |
|
"loss": 0.0048, |
|
"num_tokens": 18692239.0, |
|
"reward": 1.6138971894979477, |
|
"reward_std": 0.5961987935006618, |
|
"rewards/format_reward_embodied/mean": 0.98828125, |
|
"rewards/format_reward_embodied/std": 0.07509202510118484, |
|
"rewards/stop_prediction_reward/mean": 0.513671875, |
|
"rewards/stop_prediction_reward/std": 0.488056443631649, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05597203067737698, |
|
"rewards/waypoint_pred_accuracy/std": 0.09776196270839843, |
|
"step": 38 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 755.875, |
|
"completions/max_terminated_length": 755.875, |
|
"completions/mean_length": 250.787109375, |
|
"completions/mean_terminated_length": 250.787109375, |
|
"completions/min_length": 119.125, |
|
"completions/min_terminated_length": 119.125, |
|
"epoch": 0.08210526315789474, |
|
"grad_norm": 0.15428805351257324, |
|
"learning_rate": 7.916666666666666e-07, |
|
"loss": 0.0043, |
|
"num_tokens": 19183074.0, |
|
"reward": 1.4990187734365463, |
|
"reward_std": 0.49899255111813545, |
|
"rewards/format_reward_embodied/mean": 0.98046875, |
|
"rewards/format_reward_embodied/std": 0.1064315214753151, |
|
"rewards/stop_prediction_reward/mean": 0.517578125, |
|
"rewards/stop_prediction_reward/std": 0.4714191108942032, |
|
"rewards/waypoint_pred_accuracy/mean": 0.00048596067600919097, |
|
"rewards/waypoint_pred_accuracy/std": 0.0015497554879284957, |
|
"step": 39 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 443.0, |
|
"completions/max_terminated_length": 443.0, |
|
"completions/mean_length": 246.05078125, |
|
"completions/mean_terminated_length": 246.05078125, |
|
"completions/min_length": 117.125, |
|
"completions/min_terminated_length": 117.125, |
|
"epoch": 0.08421052631578947, |
|
"grad_norm": 0.033689290285110474, |
|
"learning_rate": 8.125e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 19674556.0, |
|
"reward": 1.52012699842453, |
|
"reward_std": 0.5320924893021584, |
|
"rewards/format_reward_embodied/mean": 0.98828125, |
|
"rewards/format_reward_embodied/std": 0.09375, |
|
"rewards/stop_prediction_reward/mean": 0.494140625, |
|
"rewards/stop_prediction_reward/std": 0.4897289089858532, |
|
"rewards/waypoint_pred_accuracy/mean": 0.018852562323445468, |
|
"rewards/waypoint_pred_accuracy/std": 0.04221876614610676, |
|
"step": 40 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 451.625, |
|
"completions/max_terminated_length": 451.625, |
|
"completions/mean_length": 233.865234375, |
|
"completions/mean_terminated_length": 233.865234375, |
|
"completions/min_length": 106.0, |
|
"completions/min_terminated_length": 106.0, |
|
"epoch": 0.0863157894736842, |
|
"grad_norm": 0.025640549138188362, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 0.0011, |
|
"num_tokens": 20156151.0, |
|
"reward": 1.591551125049591, |
|
"reward_std": 0.5175898559391499, |
|
"rewards/format_reward_embodied/mean": 0.98828125, |
|
"rewards/format_reward_embodied/std": 0.08442101255059242, |
|
"rewards/stop_prediction_reward/mean": 0.5703125, |
|
"rewards/stop_prediction_reward/std": 0.4659374840557575, |
|
"rewards/waypoint_pred_accuracy/mean": 0.016478684779628456, |
|
"rewards/waypoint_pred_accuracy/std": 0.05339623770077609, |
|
"step": 41 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 500.0, |
|
"completions/max_terminated_length": 500.0, |
|
"completions/mean_length": 242.478515625, |
|
"completions/mean_terminated_length": 242.478515625, |
|
"completions/min_length": 112.25, |
|
"completions/min_terminated_length": 112.25, |
|
"epoch": 0.08842105263157894, |
|
"grad_norm": 0.026281312108039856, |
|
"learning_rate": 8.541666666666666e-07, |
|
"loss": -0.0, |
|
"num_tokens": 20639788.0, |
|
"reward": 1.5611481666564941, |
|
"reward_std": 0.4784001186490059, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.560546875, |
|
"rewards/stop_prediction_reward/std": 0.4726823903620243, |
|
"rewards/waypoint_pred_accuracy/mean": 0.002253779932873278, |
|
"rewards/waypoint_pred_accuracy/std": 0.015716358982028673, |
|
"step": 42 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 810.875, |
|
"completions/max_terminated_length": 810.875, |
|
"completions/mean_length": 249.939453125, |
|
"completions/mean_terminated_length": 249.939453125, |
|
"completions/min_length": 122.875, |
|
"completions/min_terminated_length": 122.875, |
|
"epoch": 0.09052631578947369, |
|
"grad_norm": 0.021544892340898514, |
|
"learning_rate": 8.75e-07, |
|
"loss": 0.0029, |
|
"num_tokens": 21128013.0, |
|
"reward": 1.4784268736839294, |
|
"reward_std": 0.48740382865071297, |
|
"rewards/format_reward_embodied/mean": 0.990234375, |
|
"rewards/format_reward_embodied/std": 0.06879601255059242, |
|
"rewards/stop_prediction_reward/mean": 0.484375, |
|
"rewards/stop_prediction_reward/std": 0.4655333496630192, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0019087545888396566, |
|
"rewards/waypoint_pred_accuracy/std": 0.010897019660660593, |
|
"step": 43 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 643.625, |
|
"completions/max_terminated_length": 643.625, |
|
"completions/mean_length": 245.134765625, |
|
"completions/mean_terminated_length": 245.66865158081055, |
|
"completions/min_length": 101.25, |
|
"completions/min_terminated_length": 117.75, |
|
"epoch": 0.09263157894736843, |
|
"grad_norm": 0.025143882259726524, |
|
"learning_rate": 8.958333333333334e-07, |
|
"loss": 0.0038, |
|
"num_tokens": 21614866.0, |
|
"reward": 1.6192794144153595, |
|
"reward_std": 0.4844088666141033, |
|
"rewards/format_reward_embodied/mean": 0.982421875, |
|
"rewards/format_reward_embodied/std": 0.11263803765177727, |
|
"rewards/stop_prediction_reward/mean": 0.63671875, |
|
"rewards/stop_prediction_reward/std": 0.4543136991560459, |
|
"rewards/waypoint_pred_accuracy/mean": 6.939472140954406e-05, |
|
"rewards/waypoint_pred_accuracy/std": 0.00043998260686706566, |
|
"step": 44 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 479.375, |
|
"completions/max_terminated_length": 479.375, |
|
"completions/mean_length": 238.673828125, |
|
"completions/mean_terminated_length": 238.673828125, |
|
"completions/min_length": 115.625, |
|
"completions/min_terminated_length": 115.625, |
|
"epoch": 0.09473684210526316, |
|
"grad_norm": 0.023476244881749153, |
|
"learning_rate": 9.166666666666665e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 22098731.0, |
|
"reward": 1.512737661600113, |
|
"reward_std": 0.5365940853953362, |
|
"rewards/format_reward_embodied/mean": 0.994140625, |
|
"rewards/format_reward_embodied/std": 0.026630254462361336, |
|
"rewards/stop_prediction_reward/mean": 0.45703125, |
|
"rewards/stop_prediction_reward/std": 0.48733755201101303, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03078289819380149, |
|
"rewards/waypoint_pred_accuracy/std": 0.043302708805491946, |
|
"step": 45 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 641.875, |
|
"completions/max_terminated_length": 641.875, |
|
"completions/mean_length": 240.724609375, |
|
"completions/mean_terminated_length": 240.724609375, |
|
"completions/min_length": 118.625, |
|
"completions/min_terminated_length": 118.625, |
|
"epoch": 0.0968421052631579, |
|
"grad_norm": 0.020364033058285713, |
|
"learning_rate": 9.374999999999999e-07, |
|
"loss": 0.0029, |
|
"num_tokens": 22583070.0, |
|
"reward": 1.6677764654159546, |
|
"reward_std": 0.5154935717582703, |
|
"rewards/format_reward_embodied/mean": 0.98828125, |
|
"rewards/format_reward_embodied/std": 0.07350525446236134, |
|
"rewards/stop_prediction_reward/mean": 0.626953125, |
|
"rewards/stop_prediction_reward/std": 0.44941750913858414, |
|
"rewards/waypoint_pred_accuracy/mean": 0.02627104918529134, |
|
"rewards/waypoint_pred_accuracy/std": 0.0397095277659446, |
|
"step": 46 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 657.375, |
|
"completions/max_terminated_length": 657.375, |
|
"completions/mean_length": 239.5546875, |
|
"completions/mean_terminated_length": 239.5546875, |
|
"completions/min_length": 118.875, |
|
"completions/min_terminated_length": 118.875, |
|
"epoch": 0.09894736842105263, |
|
"grad_norm": 0.023410305380821228, |
|
"learning_rate": 9.583333333333334e-07, |
|
"loss": 0.0039, |
|
"num_tokens": 23067066.0, |
|
"reward": 1.7648355215787888, |
|
"reward_std": 0.5304676033556461, |
|
"rewards/format_reward_embodied/mean": 0.994140625, |
|
"rewards/format_reward_embodied/std": 0.046875, |
|
"rewards/stop_prediction_reward/mean": 0.685546875, |
|
"rewards/stop_prediction_reward/std": 0.45773619785904884, |
|
"rewards/waypoint_pred_accuracy/mean": 0.042574012356195935, |
|
"rewards/waypoint_pred_accuracy/std": 0.07306883804197034, |
|
"step": 47 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 423.625, |
|
"completions/max_terminated_length": 423.625, |
|
"completions/mean_length": 236.12890625, |
|
"completions/mean_terminated_length": 236.12890625, |
|
"completions/min_length": 117.0, |
|
"completions/min_terminated_length": 117.0, |
|
"epoch": 0.10105263157894737, |
|
"grad_norm": 0.023688504472374916, |
|
"learning_rate": 9.791666666666667e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 23549436.0, |
|
"reward": 1.70817232131958, |
|
"reward_std": 0.5296522080898285, |
|
"rewards/format_reward_embodied/mean": 0.990234375, |
|
"rewards/format_reward_embodied/std": 0.06879601255059242, |
|
"rewards/stop_prediction_reward/mean": 0.63671875, |
|
"rewards/stop_prediction_reward/std": 0.45957546308636665, |
|
"rewards/waypoint_pred_accuracy/mean": 0.04060960025526583, |
|
"rewards/waypoint_pred_accuracy/std": 0.07721152482554317, |
|
"step": 48 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 439.5, |
|
"completions/max_terminated_length": 439.5, |
|
"completions/mean_length": 239.724609375, |
|
"completions/mean_terminated_length": 239.724609375, |
|
"completions/min_length": 112.625, |
|
"completions/min_terminated_length": 112.625, |
|
"epoch": 0.1031578947368421, |
|
"grad_norm": 0.04075814038515091, |
|
"learning_rate": 1e-06, |
|
"loss": 0.0012, |
|
"num_tokens": 24036719.0, |
|
"reward": 1.5996688604354858, |
|
"reward_std": 0.43832388520240784, |
|
"rewards/format_reward_embodied/mean": 0.9921875, |
|
"rewards/format_reward_embodied/std": 0.05317101255059242, |
|
"rewards/stop_prediction_reward/mean": 0.607421875, |
|
"rewards/stop_prediction_reward/std": 0.4225967414677143, |
|
"rewards/waypoint_pred_accuracy/mean": 2.974685131453066e-05, |
|
"rewards/waypoint_pred_accuracy/std": 0.00014841147029464143, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10526315789473684, |
|
"grad_norm": 0.02866051159799099, |
|
"learning_rate": 9.999878206375666e-07, |
|
"loss": 0.0015, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10526315789473684, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completions/clipped_ratio": 0.00015625, |
|
"eval_completions/max_length": 683.96, |
|
"eval_completions/max_terminated_length": 683.96, |
|
"eval_completions/mean_length": 238.32913192749024, |
|
"eval_completions/mean_terminated_length": 238.36774322509766, |
|
"eval_completions/min_length": 112.41, |
|
"eval_completions/min_terminated_length": 113.78, |
|
"eval_loss": 0.003631497733294964, |
|
"eval_num_tokens": 24520656.0, |
|
"eval_reward": 1.6925481045246125, |
|
"eval_reward_std": 0.4437328398227692, |
|
"eval_rewards/format_reward_embodied/mean": 0.99328125, |
|
"eval_rewards/format_reward_embodied/std": 0.04765250638127327, |
|
"eval_rewards/stop_prediction_reward/mean": 0.66328125, |
|
"eval_rewards/stop_prediction_reward/std": 0.40405205205082895, |
|
"eval_rewards/waypoint_pred_accuracy/mean": 0.017992802756573712, |
|
"eval_rewards/waypoint_pred_accuracy/std": 0.04001634344926445, |
|
"eval_runtime": 1355.6164, |
|
"eval_samples_per_second": 0.074, |
|
"eval_steps_per_second": 0.001, |
|
"step": 50 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 450.375, |
|
"completions/max_terminated_length": 450.375, |
|
"completions/mean_length": 239.6259765625, |
|
"completions/mean_terminated_length": 239.6259765625, |
|
"completions/min_length": 116.5, |
|
"completions/min_terminated_length": 116.5, |
|
"epoch": 0.10736842105263159, |
|
"grad_norm": 0.02182384580373764, |
|
"learning_rate": 9.999512832095417e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 25006064.0, |
|
"reward": 1.68679628521204, |
|
"reward_std": 0.4470259975641966, |
|
"rewards/format_reward_embodied/mean": 0.9931640625, |
|
"rewards/format_reward_embodied/std": 0.05002300627529621, |
|
"rewards/stop_prediction_reward/mean": 0.65234375, |
|
"rewards/stop_prediction_reward/std": 0.4022445324808359, |
|
"rewards/waypoint_pred_accuracy/mean": 0.02064424328141934, |
|
"rewards/waypoint_pred_accuracy/std": 0.05010251636930185, |
|
"step": 51 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 427.5, |
|
"completions/max_terminated_length": 427.5, |
|
"completions/mean_length": 230.73828125, |
|
"completions/mean_terminated_length": 231.21041297912598, |
|
"completions/min_length": 99.875, |
|
"completions/min_terminated_length": 114.375, |
|
"epoch": 0.10947368421052632, |
|
"grad_norm": 0.02381049655377865, |
|
"learning_rate": 9.998903896937148e-07, |
|
"loss": 0.0012, |
|
"num_tokens": 25485546.0, |
|
"reward": 1.7381224930286407, |
|
"reward_std": 0.4321938529610634, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.716796875, |
|
"rewards/stop_prediction_reward/std": 0.3914393372833729, |
|
"rewards/waypoint_pred_accuracy/mean": 0.01261594578698389, |
|
"rewards/waypoint_pred_accuracy/std": 0.03588760504630355, |
|
"step": 52 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 450.5, |
|
"completions/max_terminated_length": 450.5, |
|
"completions/mean_length": 234.287109375, |
|
"completions/mean_terminated_length": 234.287109375, |
|
"completions/min_length": 113.0, |
|
"completions/min_terminated_length": 113.0, |
|
"epoch": 0.11157894736842106, |
|
"grad_norm": 0.05357426404953003, |
|
"learning_rate": 9.998051433862818e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 25966589.0, |
|
"reward": 1.6857829988002777, |
|
"reward_std": 0.3670726828277111, |
|
"rewards/format_reward_embodied/mean": 0.994140625, |
|
"rewards/format_reward_embodied/std": 0.046875, |
|
"rewards/stop_prediction_reward/mean": 0.69140625, |
|
"rewards/stop_prediction_reward/std": 0.3526647798717022, |
|
"rewards/waypoint_pred_accuracy/mean": 0.00011806207834793561, |
|
"rewards/waypoint_pred_accuracy/std": 0.0009230129067357363, |
|
"step": 53 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 413.75, |
|
"completions/max_terminated_length": 413.75, |
|
"completions/mean_length": 226.93359375, |
|
"completions/mean_terminated_length": 226.93359375, |
|
"completions/min_length": 108.125, |
|
"completions/min_terminated_length": 108.125, |
|
"epoch": 0.11368421052631579, |
|
"grad_norm": 0.024311864748597145, |
|
"learning_rate": 9.996955489016681e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 26444507.0, |
|
"reward": 1.5553173422813416, |
|
"reward_std": 0.38553351908922195, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.5234375, |
|
"rewards/stop_prediction_reward/std": 0.3467428870499134, |
|
"rewards/waypoint_pred_accuracy/mean": 0.015939914255370412, |
|
"rewards/waypoint_pred_accuracy/std": 0.039412272800977984, |
|
"step": 54 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 667.5, |
|
"completions/max_terminated_length": 667.5, |
|
"completions/mean_length": 246.060546875, |
|
"completions/mean_terminated_length": 246.060546875, |
|
"completions/min_length": 111.125, |
|
"completions/min_terminated_length": 111.125, |
|
"epoch": 0.11578947368421053, |
|
"grad_norm": 0.020382562652230263, |
|
"learning_rate": 9.995616121722783e-07, |
|
"loss": 0.0024, |
|
"num_tokens": 26931770.0, |
|
"reward": 1.543064832687378, |
|
"reward_std": 0.35424431413412094, |
|
"rewards/format_reward_embodied/mean": 0.990234375, |
|
"rewards/format_reward_embodied/std": 0.06879601255059242, |
|
"rewards/stop_prediction_reward/mean": 0.548828125, |
|
"rewards/stop_prediction_reward/std": 0.3284119311720133, |
|
"rewards/waypoint_pred_accuracy/mean": 0.002001162469869708, |
|
"rewards/waypoint_pred_accuracy/std": 0.01592240231514961, |
|
"step": 55 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 462.75, |
|
"completions/max_terminated_length": 462.75, |
|
"completions/mean_length": 225.984375, |
|
"completions/mean_terminated_length": 225.984375, |
|
"completions/min_length": 113.875, |
|
"completions/min_terminated_length": 113.875, |
|
"epoch": 0.11789473684210526, |
|
"grad_norm": 0.020005574449896812, |
|
"learning_rate": 9.994033404481736e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 27406770.0, |
|
"reward": 1.6594459414482117, |
|
"reward_std": 0.3725608382374048, |
|
"rewards/format_reward_embodied/mean": 0.986328125, |
|
"rewards/format_reward_embodied/std": 0.07980126701295376, |
|
"rewards/stop_prediction_reward/mean": 0.6640625, |
|
"rewards/stop_prediction_reward/std": 0.33192422799766064, |
|
"rewards/waypoint_pred_accuracy/mean": 0.004527658324403952, |
|
"rewards/waypoint_pred_accuracy/std": 0.017977507960307844, |
|
"step": 56 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 450.5, |
|
"completions/max_terminated_length": 450.5, |
|
"completions/mean_length": 234.10546875, |
|
"completions/mean_terminated_length": 234.10546875, |
|
"completions/min_length": 115.0, |
|
"completions/min_terminated_length": 115.0, |
|
"epoch": 0.12, |
|
"grad_norm": 0.02229822427034378, |
|
"learning_rate": 9.992207422966824e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 27889640.0, |
|
"reward": 1.4391246140003204, |
|
"reward_std": 0.35483869537711143, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.4375, |
|
"rewards/stop_prediction_reward/std": 0.34185592643916607, |
|
"rewards/waypoint_pred_accuracy/mean": 0.002765428128381, |
|
"rewards/waypoint_pred_accuracy/std": 0.014523094108374813, |
|
"step": 57 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 747.625, |
|
"completions/max_terminated_length": 747.625, |
|
"completions/mean_length": 233.369140625, |
|
"completions/mean_terminated_length": 233.90501022338867, |
|
"completions/min_length": 99.75, |
|
"completions/min_terminated_length": 113.5, |
|
"epoch": 0.12210526315789473, |
|
"grad_norm": 0.025426389649510384, |
|
"learning_rate": 9.990138276019335e-07, |
|
"loss": 0.0038, |
|
"num_tokens": 28370213.0, |
|
"reward": 1.6101858913898468, |
|
"reward_std": 0.3749941308051348, |
|
"rewards/format_reward_embodied/mean": 0.9921875, |
|
"rewards/format_reward_embodied/std": 0.05317101255059242, |
|
"rewards/stop_prediction_reward/mean": 0.59375, |
|
"rewards/stop_prediction_reward/std": 0.32783540338277817, |
|
"rewards/waypoint_pred_accuracy/mean": 0.012124196402905074, |
|
"rewards/waypoint_pred_accuracy/std": 0.041881935120967384, |
|
"step": 58 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 1494.75, |
|
"completions/max_terminated_length": 1494.75, |
|
"completions/mean_length": 264.875, |
|
"completions/mean_terminated_length": 264.875, |
|
"completions/min_length": 119.125, |
|
"completions/min_terminated_length": 119.125, |
|
"epoch": 0.12421052631578948, |
|
"grad_norm": 0.029843533411622047, |
|
"learning_rate": 9.987826075643228e-07, |
|
"loss": 0.0225, |
|
"num_tokens": 28866405.0, |
|
"reward": 1.608642503619194, |
|
"reward_std": 0.37335721030831337, |
|
"rewards/format_reward_embodied/mean": 0.98828125, |
|
"rewards/format_reward_embodied/std": 0.07509202510118484, |
|
"rewards/stop_prediction_reward/mean": 0.619140625, |
|
"rewards/stop_prediction_reward/std": 0.33958676643669605, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0006103267239083258, |
|
"rewards/waypoint_pred_accuracy/std": 0.003143552353714926, |
|
"step": 59 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 460.875, |
|
"completions/max_terminated_length": 460.875, |
|
"completions/mean_length": 250.77734375, |
|
"completions/mean_terminated_length": 250.77734375, |
|
"completions/min_length": 122.25, |
|
"completions/min_terminated_length": 122.25, |
|
"epoch": 0.12631578947368421, |
|
"grad_norm": 0.02350872941315174, |
|
"learning_rate": 9.985270946999066e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 29357939.0, |
|
"reward": 1.5302962958812714, |
|
"reward_std": 0.39041563123464584, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.51171875, |
|
"rewards/stop_prediction_reward/std": 0.3654658328741789, |
|
"rewards/waypoint_pred_accuracy/mean": 0.01026534708216827, |
|
"rewards/waypoint_pred_accuracy/std": 0.03301238967106072, |
|
"step": 60 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 740.0, |
|
"completions/max_terminated_length": 740.0, |
|
"completions/mean_length": 254.34765625, |
|
"completions/mean_terminated_length": 254.9265899658203, |
|
"completions/min_length": 107.25, |
|
"completions/min_terminated_length": 128.25, |
|
"epoch": 0.12842105263157894, |
|
"grad_norm": 0.028141073882579803, |
|
"learning_rate": 9.982473028397236e-07, |
|
"loss": 0.004, |
|
"num_tokens": 29850341.0, |
|
"reward": 1.908710554242134, |
|
"reward_std": 0.4512513056397438, |
|
"rewards/format_reward_embodied/mean": 0.98828125, |
|
"rewards/format_reward_embodied/std": 0.07509202510118484, |
|
"rewards/stop_prediction_reward/mean": 0.8515625, |
|
"rewards/stop_prediction_reward/std": 0.34670688211917877, |
|
"rewards/waypoint_pred_accuracy/mean": 0.034433397710529334, |
|
"rewards/waypoint_pred_accuracy/std": 0.06185820607151982, |
|
"step": 61 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 888.0, |
|
"completions/max_terminated_length": 888.0, |
|
"completions/mean_length": 255.419921875, |
|
"completions/mean_terminated_length": 255.419921875, |
|
"completions/min_length": 119.125, |
|
"completions/min_terminated_length": 119.125, |
|
"epoch": 0.13052631578947368, |
|
"grad_norm": 0.0289030522108078, |
|
"learning_rate": 9.979432471290472e-07, |
|
"loss": 0.0055, |
|
"num_tokens": 30342588.0, |
|
"reward": 1.9491963237524033, |
|
"reward_std": 0.4529041275382042, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.802734375, |
|
"rewards/stop_prediction_reward/std": 0.3396564405411482, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07518408738549646, |
|
"rewards/waypoint_pred_accuracy/std": 0.09874280986073203, |
|
"step": 62 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 485.625, |
|
"completions/max_terminated_length": 485.625, |
|
"completions/mean_length": 243.8125, |
|
"completions/mean_terminated_length": 243.8125, |
|
"completions/min_length": 104.25, |
|
"completions/min_terminated_length": 104.25, |
|
"epoch": 0.13263157894736843, |
|
"grad_norm": 0.02499276027083397, |
|
"learning_rate": 9.97614944026565e-07, |
|
"loss": -0.0005, |
|
"num_tokens": 30829532.0, |
|
"reward": 1.6152346730232239, |
|
"reward_std": 0.3560887239873409, |
|
"rewards/format_reward_embodied/mean": 0.9921875, |
|
"rewards/format_reward_embodied/std": 0.043842025101184845, |
|
"rewards/stop_prediction_reward/mean": 0.623046875, |
|
"rewards/stop_prediction_reward/std": 0.3392041679471731, |
|
"rewards/waypoint_pred_accuracy/mean": 1.5068036063656496e-07, |
|
"rewards/waypoint_pred_accuracy/std": 1.1848213716913133e-06, |
|
"step": 63 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 487.75, |
|
"completions/max_terminated_length": 487.75, |
|
"completions/mean_length": 242.8671875, |
|
"completions/mean_terminated_length": 242.8671875, |
|
"completions/min_length": 116.875, |
|
"completions/min_terminated_length": 116.875, |
|
"epoch": 0.13473684210526315, |
|
"grad_norm": 0.08438508957624435, |
|
"learning_rate": 9.97262411303488e-07, |
|
"loss": -0.0008, |
|
"num_tokens": 31316376.0, |
|
"reward": 1.676452487707138, |
|
"reward_std": 0.38882749900221825, |
|
"rewards/format_reward_embodied/mean": 0.994140625, |
|
"rewards/format_reward_embodied/std": 0.046875, |
|
"rewards/stop_prediction_reward/mean": 0.654296875, |
|
"rewards/stop_prediction_reward/std": 0.3573997803032398, |
|
"rewards/waypoint_pred_accuracy/mean": 0.014007493944973248, |
|
"rewards/waypoint_pred_accuracy/std": 0.033498459750262555, |
|
"step": 64 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 715.75, |
|
"completions/max_terminated_length": 715.75, |
|
"completions/mean_length": 239.49609375, |
|
"completions/mean_terminated_length": 239.49609375, |
|
"completions/min_length": 118.25, |
|
"completions/min_terminated_length": 118.25, |
|
"epoch": 0.1368421052631579, |
|
"grad_norm": 0.027747681364417076, |
|
"learning_rate": 9.968856680425886e-07, |
|
"loss": 0.0053, |
|
"num_tokens": 31798614.0, |
|
"reward": 1.7896013855934143, |
|
"reward_std": 0.33628559671342373, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.7890625, |
|
"rewards/stop_prediction_reward/std": 0.3223333489149809, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0022225715887884694, |
|
"rewards/waypoint_pred_accuracy/std": 0.00479067146991952, |
|
"step": 65 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 639.75, |
|
"completions/max_terminated_length": 639.75, |
|
"completions/mean_length": 251.96875, |
|
"completions/mean_terminated_length": 251.96875, |
|
"completions/min_length": 115.625, |
|
"completions/min_terminated_length": 115.625, |
|
"epoch": 0.13894736842105262, |
|
"grad_norm": 0.028032371774315834, |
|
"learning_rate": 9.964847346371676e-07, |
|
"loss": 0.004, |
|
"num_tokens": 32286790.0, |
|
"reward": 1.8506833761930466, |
|
"reward_std": 0.3642146345227957, |
|
"rewards/format_reward_embodied/mean": 0.994140625, |
|
"rewards/format_reward_embodied/std": 0.046875, |
|
"rewards/stop_prediction_reward/mean": 0.830078125, |
|
"rewards/stop_prediction_reward/std": 0.3009116370230913, |
|
"rewards/waypoint_pred_accuracy/mean": 0.013232328761660028, |
|
"rewards/waypoint_pred_accuracy/std": 0.03860976189025678, |
|
"step": 66 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 481.25, |
|
"completions/max_terminated_length": 481.25, |
|
"completions/mean_length": 247.291015625, |
|
"completions/mean_terminated_length": 247.7331371307373, |
|
"completions/min_length": 100.25, |
|
"completions/min_terminated_length": 115.125, |
|
"epoch": 0.14105263157894737, |
|
"grad_norm": 0.021038714796304703, |
|
"learning_rate": 9.96059632789951e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 32774747.0, |
|
"reward": 1.5779267400503159, |
|
"reward_std": 0.42914118245244026, |
|
"rewards/format_reward_embodied/mean": 0.9921875, |
|
"rewards/format_reward_embodied/std": 0.05317101255059242, |
|
"rewards/stop_prediction_reward/mean": 0.505859375, |
|
"rewards/stop_prediction_reward/std": 0.3645100612193346, |
|
"rewards/waypoint_pred_accuracy/mean": 0.039939936966421924, |
|
"rewards/waypoint_pred_accuracy/std": 0.058027153559010024, |
|
"step": 67 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 748.875, |
|
"completions/max_terminated_length": 748.875, |
|
"completions/mean_length": 253.40234375, |
|
"completions/mean_terminated_length": 253.40234375, |
|
"completions/min_length": 119.875, |
|
"completions/min_terminated_length": 119.875, |
|
"epoch": 0.1431578947368421, |
|
"grad_norm": 0.024472616612911224, |
|
"learning_rate": 9.956103855119138e-07, |
|
"loss": 0.0036, |
|
"num_tokens": 33265961.0, |
|
"reward": 1.6555908024311066, |
|
"reward_std": 0.385839419439435, |
|
"rewards/format_reward_embodied/mean": 0.990234375, |
|
"rewards/format_reward_embodied/std": 0.078125, |
|
"rewards/stop_prediction_reward/mean": 0.6640625, |
|
"rewards/stop_prediction_reward/std": 0.36797660402953625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0006469660570671656, |
|
"rewards/waypoint_pred_accuracy/std": 0.003268853310080662, |
|
"step": 68 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 496.375, |
|
"completions/max_terminated_length": 496.375, |
|
"completions/mean_length": 253.37109375, |
|
"completions/mean_terminated_length": 253.37109375, |
|
"completions/min_length": 122.0, |
|
"completions/min_terminated_length": 122.0, |
|
"epoch": 0.14526315789473684, |
|
"grad_norm": 0.038161348551511765, |
|
"learning_rate": 9.951370171210359e-07, |
|
"loss": 0.002, |
|
"num_tokens": 33757543.0, |
|
"reward": 1.755501314997673, |
|
"reward_std": 0.34114088118076324, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.32307766377925873, |
|
"rewards/waypoint_pred_accuracy/mean": 0.004703786010358479, |
|
"rewards/waypoint_pred_accuracy/std": 0.016449308837348298, |
|
"step": 69 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 833.25, |
|
"completions/max_terminated_length": 833.25, |
|
"completions/mean_length": 262.56640625, |
|
"completions/mean_terminated_length": 262.56640625, |
|
"completions/min_length": 109.875, |
|
"completions/min_terminated_length": 109.875, |
|
"epoch": 0.14736842105263157, |
|
"grad_norm": 0.024708310142159462, |
|
"learning_rate": 9.946395532409847e-07, |
|
"loss": 0.0085, |
|
"num_tokens": 34253513.0, |
|
"reward": 1.7724248170852661, |
|
"reward_std": 0.354646734893322, |
|
"rewards/format_reward_embodied/mean": 0.98828125, |
|
"rewards/format_reward_embodied/std": 0.06417626701295376, |
|
"rewards/stop_prediction_reward/mean": 0.7734375, |
|
"rewards/stop_prediction_reward/std": 0.3255934212356806, |
|
"rewards/waypoint_pred_accuracy/mean": 0.005353036525642015, |
|
"rewards/waypoint_pred_accuracy/std": 0.013709596910865714, |
|
"step": 70 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 497.25, |
|
"completions/max_terminated_length": 497.25, |
|
"completions/mean_length": 261.169921875, |
|
"completions/mean_terminated_length": 261.169921875, |
|
"completions/min_length": 127.25, |
|
"completions/min_terminated_length": 127.25, |
|
"epoch": 0.14947368421052631, |
|
"grad_norm": 0.02306721918284893, |
|
"learning_rate": 9.941180207997288e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 34747616.0, |
|
"reward": 1.8229519128799438, |
|
"reward_std": 0.33919697254896164, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.81640625, |
|
"rewards/stop_prediction_reward/std": 0.3285626471042633, |
|
"rewards/waypoint_pred_accuracy/mean": 0.003272829200625438, |
|
"rewards/waypoint_pred_accuracy/std": 0.01800350467010503, |
|
"step": 71 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 454.0, |
|
"completions/max_terminated_length": 454.0, |
|
"completions/mean_length": 244.701171875, |
|
"completions/mean_terminated_length": 244.701171875, |
|
"completions/min_length": 120.0, |
|
"completions/min_terminated_length": 120.0, |
|
"epoch": 0.15157894736842106, |
|
"grad_norm": 0.03545621410012245, |
|
"learning_rate": 9.935724480280795e-07, |
|
"loss": 0.0, |
|
"num_tokens": 35235719.0, |
|
"reward": 1.7902133017778397, |
|
"reward_std": 0.45963282138109207, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.6796875, |
|
"rewards/stop_prediction_reward/std": 0.38505756109952927, |
|
"rewards/waypoint_pred_accuracy/mean": 0.056239478069983306, |
|
"rewards/waypoint_pred_accuracy/std": 0.08321574779984076, |
|
"step": 72 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 423.125, |
|
"completions/max_terminated_length": 423.125, |
|
"completions/mean_length": 241.056640625, |
|
"completions/mean_terminated_length": 241.056640625, |
|
"completions/min_length": 115.875, |
|
"completions/min_terminated_length": 115.875, |
|
"epoch": 0.15368421052631578, |
|
"grad_norm": 0.025743646547198296, |
|
"learning_rate": 9.93002864458164e-07, |
|
"loss": 0.0017, |
|
"num_tokens": 35717284.0, |
|
"reward": 1.7117034643888474, |
|
"reward_std": 0.34931979328393936, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.7109375, |
|
"rewards/stop_prediction_reward/std": 0.3488571159541607, |
|
"rewards/waypoint_pred_accuracy/mean": 0.00038298743026394556, |
|
"rewards/waypoint_pred_accuracy/std": 0.0022788381146801144, |
|
"step": 73 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 484.375, |
|
"completions/max_terminated_length": 484.375, |
|
"completions/mean_length": 236.037109375, |
|
"completions/mean_terminated_length": 236.037109375, |
|
"completions/min_length": 110.375, |
|
"completions/min_terminated_length": 110.375, |
|
"epoch": 0.15578947368421053, |
|
"grad_norm": 0.031387291848659515, |
|
"learning_rate": 9.924093009218252e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 36196791.0, |
|
"reward": 1.53607939183712, |
|
"reward_std": 0.3938233330845833, |
|
"rewards/format_reward_embodied/mean": 0.990234375, |
|
"rewards/format_reward_embodied/std": 0.04855126701295376, |
|
"rewards/stop_prediction_reward/mean": 0.515625, |
|
"rewards/stop_prediction_reward/std": 0.35425616055727005, |
|
"rewards/waypoint_pred_accuracy/mean": 0.01511000881408156, |
|
"rewards/waypoint_pred_accuracy/std": 0.024702310350773107, |
|
"step": 74 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 682.875, |
|
"completions/max_terminated_length": 682.875, |
|
"completions/mean_length": 246.58203125, |
|
"completions/mean_terminated_length": 247.04151344299316, |
|
"completions/min_length": 102.375, |
|
"completions/min_terminated_length": 120.375, |
|
"epoch": 0.15789473684210525, |
|
"grad_norm": 0.026448730379343033, |
|
"learning_rate": 9.917917895489542e-07, |
|
"loss": 0.0036, |
|
"num_tokens": 36683937.0, |
|
"reward": 1.7273263335227966, |
|
"reward_std": 0.3566751927137375, |
|
"rewards/format_reward_embodied/mean": 0.98046875, |
|
"rewards/format_reward_embodied/std": 0.12826303765177727, |
|
"rewards/stop_prediction_reward/mean": 0.7109375, |
|
"rewards/stop_prediction_reward/std": 0.2865038700401783, |
|
"rewards/waypoint_pred_accuracy/mean": 0.01796005329746431, |
|
"rewards/waypoint_pred_accuracy/std": 0.03300400403918848, |
|
"step": 75 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 466.5, |
|
"completions/max_terminated_length": 466.5, |
|
"completions/mean_length": 246.384765625, |
|
"completions/mean_terminated_length": 246.384765625, |
|
"completions/min_length": 122.625, |
|
"completions/min_terminated_length": 122.625, |
|
"epoch": 0.16, |
|
"grad_norm": 0.026203418150544167, |
|
"learning_rate": 9.9115036376575e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 37170854.0, |
|
"reward": 1.4314483553171158, |
|
"reward_std": 0.3206907380372286, |
|
"rewards/format_reward_embodied/mean": 0.994140625, |
|
"rewards/format_reward_embodied/std": 0.046875, |
|
"rewards/stop_prediction_reward/mean": 0.435546875, |
|
"rewards/stop_prediction_reward/std": 0.31111637130379677, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0008804261656223389, |
|
"rewards/waypoint_pred_accuracy/std": 0.007043409327252448, |
|
"step": 76 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 461.375, |
|
"completions/max_terminated_length": 461.375, |
|
"completions/mean_length": 245.017578125, |
|
"completions/mean_terminated_length": 245.51959609985352, |
|
"completions/min_length": 97.0, |
|
"completions/min_terminated_length": 110.875, |
|
"epoch": 0.16210526315789472, |
|
"grad_norm": 0.021599190309643745, |
|
"learning_rate": 9.904850582929109e-07, |
|
"loss": -0.001, |
|
"num_tokens": 37659375.0, |
|
"reward": 1.7587501555681229, |
|
"reward_std": 0.45513000525534153, |
|
"rewards/format_reward_embodied/mean": 0.9765625, |
|
"rewards/format_reward_embodied/std": 0.12835253402590752, |
|
"rewards/stop_prediction_reward/mean": 0.6328125, |
|
"rewards/stop_prediction_reward/std": 0.3041498549282551, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07468759280809832, |
|
"rewards/waypoint_pred_accuracy/std": 0.08736424083447313, |
|
"step": 77 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 431.375, |
|
"completions/max_terminated_length": 431.375, |
|
"completions/mean_length": 251.69921875, |
|
"completions/mean_terminated_length": 251.69921875, |
|
"completions/min_length": 118.125, |
|
"completions/min_terminated_length": 118.125, |
|
"epoch": 0.16421052631578947, |
|
"grad_norm": 0.023476749658584595, |
|
"learning_rate": 9.897959091437545e-07, |
|
"loss": -0.0017, |
|
"num_tokens": 38147157.0, |
|
"reward": 1.7629946172237396, |
|
"reward_std": 0.3216873835772276, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.72265625, |
|
"rewards/stop_prediction_reward/std": 0.2610730957239866, |
|
"rewards/waypoint_pred_accuracy/mean": 0.02212230950329659, |
|
"rewards/waypoint_pred_accuracy/std": 0.03588474957179061, |
|
"step": 78 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 464.875, |
|
"completions/max_terminated_length": 464.875, |
|
"completions/mean_length": 238.640625, |
|
"completions/mean_terminated_length": 238.640625, |
|
"completions/min_length": 118.375, |
|
"completions/min_terminated_length": 118.375, |
|
"epoch": 0.16631578947368422, |
|
"grad_norm": 0.03286973387002945, |
|
"learning_rate": 9.890829536222686e-07, |
|
"loss": -0.0025, |
|
"num_tokens": 38629981.0, |
|
"reward": 1.7297946512699127, |
|
"reward_std": 0.2887336425483227, |
|
"rewards/format_reward_embodied/mean": 0.974609375, |
|
"rewards/format_reward_embodied/std": 0.12744012847542763, |
|
"rewards/stop_prediction_reward/mean": 0.74609375, |
|
"rewards/stop_prediction_reward/std": 0.23198767006397247, |
|
"rewards/waypoint_pred_accuracy/mean": 0.004545772711879333, |
|
"rewards/waypoint_pred_accuracy/std": 0.008970102488097233, |
|
"step": 79 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 476.125, |
|
"completions/max_terminated_length": 476.125, |
|
"completions/mean_length": 233.671875, |
|
"completions/mean_terminated_length": 233.671875, |
|
"completions/min_length": 97.875, |
|
"completions/min_terminated_length": 97.875, |
|
"epoch": 0.16842105263157894, |
|
"grad_norm": 0.022502202540636063, |
|
"learning_rate": 9.88346230321092e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 39111733.0, |
|
"reward": 1.6976664066314697, |
|
"reward_std": 0.36001696437597275, |
|
"rewards/format_reward_embodied/mean": 0.986328125, |
|
"rewards/format_reward_embodied/std": 0.09071702510118484, |
|
"rewards/stop_prediction_reward/mean": 0.68359375, |
|
"rewards/stop_prediction_reward/std": 0.3152890168130398, |
|
"rewards/waypoint_pred_accuracy/mean": 0.013872268769774525, |
|
"rewards/waypoint_pred_accuracy/std": 0.03006945856032351, |
|
"step": 80 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 428.875, |
|
"completions/max_terminated_length": 428.875, |
|
"completions/mean_length": 236.8515625, |
|
"completions/mean_terminated_length": 236.8515625, |
|
"completions/min_length": 110.125, |
|
"completions/min_terminated_length": 110.125, |
|
"epoch": 0.1705263157894737, |
|
"grad_norm": 0.03461969271302223, |
|
"learning_rate": 9.875857791194251e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 39593449.0, |
|
"reward": 1.8557344675064087, |
|
"reward_std": 0.3654259257018566, |
|
"rewards/format_reward_embodied/mean": 0.9921875, |
|
"rewards/format_reward_embodied/std": 0.05317101255059242, |
|
"rewards/stop_prediction_reward/mean": 0.798828125, |
|
"rewards/stop_prediction_reward/std": 0.2945959325879812, |
|
"rewards/waypoint_pred_accuracy/mean": 0.032359407392959616, |
|
"rewards/waypoint_pred_accuracy/std": 0.051165802276623415, |
|
"step": 81 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 773.75, |
|
"completions/max_terminated_length": 773.75, |
|
"completions/mean_length": 242.849609375, |
|
"completions/mean_terminated_length": 242.849609375, |
|
"completions/min_length": 111.625, |
|
"completions/min_terminated_length": 111.625, |
|
"epoch": 0.1726315789473684, |
|
"grad_norm": 0.025375094264745712, |
|
"learning_rate": 9.868016411808711e-07, |
|
"loss": 0.008, |
|
"num_tokens": 40080732.0, |
|
"reward": 1.8185276985168457, |
|
"reward_std": 0.2778010666370392, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.822265625, |
|
"rewards/stop_prediction_reward/std": 0.26144965551793575, |
|
"rewards/waypoint_pred_accuracy/mean": 8.415357677159232e-05, |
|
"rewards/waypoint_pred_accuracy/std": 0.0006628100762255675, |
|
"step": 82 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 466.0, |
|
"completions/max_terminated_length": 466.0, |
|
"completions/mean_length": 239.771484375, |
|
"completions/mean_terminated_length": 239.771484375, |
|
"completions/min_length": 118.125, |
|
"completions/min_terminated_length": 118.125, |
|
"epoch": 0.17473684210526316, |
|
"grad_norm": 0.02282722294330597, |
|
"learning_rate": 9.85993858951209e-07, |
|
"loss": -0.0, |
|
"num_tokens": 40564327.0, |
|
"reward": 1.8071236461400986, |
|
"reward_std": 0.3150872718542814, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.7109375, |
|
"rewards/stop_prediction_reward/std": 0.2674474287778139, |
|
"rewards/waypoint_pred_accuracy/mean": 0.04809306015794699, |
|
"rewards/waypoint_pred_accuracy/std": 0.0513845629028903, |
|
"step": 83 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 428.875, |
|
"completions/max_terminated_length": 428.875, |
|
"completions/mean_length": 237.19140625, |
|
"completions/mean_terminated_length": 237.19140625, |
|
"completions/min_length": 117.75, |
|
"completions/min_terminated_length": 117.75, |
|
"epoch": 0.17684210526315788, |
|
"grad_norm": 0.024264369159936905, |
|
"learning_rate": 9.851624761560941e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 41047305.0, |
|
"reward": 1.6365228593349457, |
|
"reward_std": 0.2356659732758999, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.603515625, |
|
"rewards/stop_prediction_reward/std": 0.21942270919680595, |
|
"rewards/waypoint_pred_accuracy/mean": 0.016503638941257396, |
|
"rewards/waypoint_pred_accuracy/std": 0.019282840457430492, |
|
"step": 84 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 443.125, |
|
"completions/max_terminated_length": 443.125, |
|
"completions/mean_length": 241.103515625, |
|
"completions/mean_terminated_length": 241.103515625, |
|
"completions/min_length": 119.125, |
|
"completions/min_terminated_length": 119.125, |
|
"epoch": 0.17894736842105263, |
|
"grad_norm": 0.020238297060132027, |
|
"learning_rate": 9.843075377986927e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 41531134.0, |
|
"reward": 1.74451445043087, |
|
"reward_std": 0.27907660976052284, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.693359375, |
|
"rewards/stop_prediction_reward/std": 0.22875236719846725, |
|
"rewards/waypoint_pred_accuracy/mean": 0.025577549161396262, |
|
"rewards/waypoint_pred_accuracy/std": 0.03110517306985683, |
|
"step": 85 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 419.5, |
|
"completions/max_terminated_length": 419.5, |
|
"completions/mean_length": 231.287109375, |
|
"completions/mean_terminated_length": 231.287109375, |
|
"completions/min_length": 118.875, |
|
"completions/min_terminated_length": 118.875, |
|
"epoch": 0.18105263157894738, |
|
"grad_norm": 0.02773391455411911, |
|
"learning_rate": 9.834290901572454e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 42010385.0, |
|
"reward": 1.8274007737636566, |
|
"reward_std": 0.2518170941621065, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.822265625, |
|
"rewards/stop_prediction_reward/std": 0.2328737936913967, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0035441384432527657, |
|
"rewards/waypoint_pred_accuracy/std": 0.011852368814229316, |
|
"step": 86 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 486.25, |
|
"completions/max_terminated_length": 486.25, |
|
"completions/mean_length": 242.701171875, |
|
"completions/mean_terminated_length": 242.701171875, |
|
"completions/min_length": 112.125, |
|
"completions/min_terminated_length": 112.125, |
|
"epoch": 0.1831578947368421, |
|
"grad_norm": 0.017116429284214973, |
|
"learning_rate": 9.82527180782562e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 42495544.0, |
|
"reward": 1.7215514183044434, |
|
"reward_std": 0.24272998422384262, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.71875, |
|
"rewards/stop_prediction_reward/std": 0.22877886332571507, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0033538464465563867, |
|
"rewards/waypoint_pred_accuracy/std": 0.008977477041369256, |
|
"step": 87 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 407.625, |
|
"completions/max_terminated_length": 407.625, |
|
"completions/mean_length": 227.666015625, |
|
"completions/mean_terminated_length": 227.666015625, |
|
"completions/min_length": 124.125, |
|
"completions/min_terminated_length": 124.125, |
|
"epoch": 0.18526315789473685, |
|
"grad_norm": 0.025679390877485275, |
|
"learning_rate": 9.816018584954474e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 42972109.0, |
|
"reward": 1.543754830956459, |
|
"reward_std": 0.3515052441507578, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.423828125, |
|
"rewards/stop_prediction_reward/std": 0.25312468968331814, |
|
"rewards/waypoint_pred_accuracy/mean": 0.06093992558778956, |
|
"rewards/waypoint_pred_accuracy/std": 0.090891926485829, |
|
"step": 88 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 424.0, |
|
"completions/max_terminated_length": 424.0, |
|
"completions/mean_length": 229.5078125, |
|
"completions/mean_terminated_length": 229.5078125, |
|
"completions/min_length": 114.375, |
|
"completions/min_terminated_length": 114.375, |
|
"epoch": 0.18736842105263157, |
|
"grad_norm": 0.025896085426211357, |
|
"learning_rate": 9.806531733840594e-07, |
|
"loss": 0.0009, |
|
"num_tokens": 43453905.0, |
|
"reward": 1.9336253255605698, |
|
"reward_std": 0.3347325623035431, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.82421875, |
|
"rewards/stop_prediction_reward/std": 0.24601666443049908, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05470328652882017, |
|
"rewards/waypoint_pred_accuracy/std": 0.07181658712215722, |
|
"step": 89 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 432.375, |
|
"completions/max_terminated_length": 432.375, |
|
"completions/mean_length": 235.38671875, |
|
"completions/mean_terminated_length": 235.38671875, |
|
"completions/min_length": 115.875, |
|
"completions/min_terminated_length": 115.875, |
|
"epoch": 0.18947368421052632, |
|
"grad_norm": 0.03356796130537987, |
|
"learning_rate": 9.796811768011975e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 43934935.0, |
|
"reward": 1.7414738535881042, |
|
"reward_std": 0.30686922930181026, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.607421875, |
|
"rewards/stop_prediction_reward/std": 0.2297498807311058, |
|
"rewards/waypoint_pred_accuracy/mean": 0.06702600460093983, |
|
"rewards/waypoint_pred_accuracy/std": 0.05166525034704193, |
|
"step": 90 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 406.25, |
|
"completions/max_terminated_length": 406.25, |
|
"completions/mean_length": 222.359375, |
|
"completions/mean_terminated_length": 222.359375, |
|
"completions/min_length": 118.875, |
|
"completions/min_terminated_length": 118.875, |
|
"epoch": 0.19157894736842104, |
|
"grad_norm": 0.01943252608180046, |
|
"learning_rate": 9.78685921361522e-07, |
|
"loss": -0.0005, |
|
"num_tokens": 44409551.0, |
|
"reward": 1.6487830728292465, |
|
"reward_std": 0.30546887032687664, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.58203125, |
|
"rewards/stop_prediction_reward/std": 0.2547878734767437, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03337590532140797, |
|
"rewards/waypoint_pred_accuracy/std": 0.052433368229147945, |
|
"step": 91 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 385.375, |
|
"completions/max_terminated_length": 385.375, |
|
"completions/mean_length": 217.119140625, |
|
"completions/mean_terminated_length": 217.119140625, |
|
"completions/min_length": 113.5, |
|
"completions/min_terminated_length": 113.5, |
|
"epoch": 0.1936842105263158, |
|
"grad_norm": 0.021023401990532875, |
|
"learning_rate": 9.776674609387076e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 44880844.0, |
|
"reward": 1.756135731935501, |
|
"reward_std": 0.264364130795002, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.7109375, |
|
"rewards/stop_prediction_reward/std": 0.23147857002913952, |
|
"rewards/waypoint_pred_accuracy/mean": 0.022599116048866108, |
|
"rewards/waypoint_pred_accuracy/std": 0.03211659606313333, |
|
"step": 92 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 400.125, |
|
"completions/max_terminated_length": 400.125, |
|
"completions/mean_length": 218.37109375, |
|
"completions/mean_terminated_length": 218.37109375, |
|
"completions/min_length": 107.0, |
|
"completions/min_terminated_length": 107.0, |
|
"epoch": 0.1957894736842105, |
|
"grad_norm": 0.020804792642593384, |
|
"learning_rate": 9.766258506625257e-07, |
|
"loss": 0.0009, |
|
"num_tokens": 45354762.0, |
|
"reward": 1.703179121017456, |
|
"reward_std": 0.31406174413859844, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.623046875, |
|
"rewards/stop_prediction_reward/std": 0.2401380892843008, |
|
"rewards/waypoint_pred_accuracy/mean": 0.04104270155312406, |
|
"rewards/waypoint_pred_accuracy/std": 0.047771165754966205, |
|
"step": 93 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 385.125, |
|
"completions/max_terminated_length": 385.125, |
|
"completions/mean_length": 220.767578125, |
|
"completions/mean_terminated_length": 220.767578125, |
|
"completions/min_length": 108.25, |
|
"completions/min_terminated_length": 108.25, |
|
"epoch": 0.19789473684210526, |
|
"grad_norm": 0.05812095105648041, |
|
"learning_rate": 9.75561146915861e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 45828691.0, |
|
"reward": 1.7804509848356247, |
|
"reward_std": 0.24969635531306267, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.728515625, |
|
"rewards/stop_prediction_reward/std": 0.2045249417424202, |
|
"rewards/waypoint_pred_accuracy/mean": 0.02596768177553266, |
|
"rewards/waypoint_pred_accuracy/std": 0.03161624588427525, |
|
"step": 94 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 406.125, |
|
"completions/max_terminated_length": 406.125, |
|
"completions/mean_length": 221.861328125, |
|
"completions/mean_terminated_length": 221.861328125, |
|
"completions/min_length": 118.75, |
|
"completions/min_terminated_length": 118.75, |
|
"epoch": 0.2, |
|
"grad_norm": 0.019957855343818665, |
|
"learning_rate": 9.744734073316595e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 46303244.0, |
|
"reward": 1.6937783360481262, |
|
"reward_std": 0.18066157400608063, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.6171875, |
|
"rewards/stop_prediction_reward/std": 0.12835253402590752, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03829541802406311, |
|
"rewards/waypoint_pred_accuracy/std": 0.038537144660949735, |
|
"step": 95 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 407.5, |
|
"completions/max_terminated_length": 407.5, |
|
"completions/mean_length": 213.76953125, |
|
"completions/mean_terminated_length": 213.76953125, |
|
"completions/min_length": 101.625, |
|
"completions/min_terminated_length": 101.625, |
|
"epoch": 0.20210526315789473, |
|
"grad_norm": 0.02102278172969818, |
|
"learning_rate": 9.73362690789808e-07, |
|
"loss": -0.001, |
|
"num_tokens": 46770710.0, |
|
"reward": 1.7461132854223251, |
|
"reward_std": 0.28678105026483536, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.640625, |
|
"rewards/stop_prediction_reward/std": 0.21690494194626808, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05372073073522188, |
|
"rewards/waypoint_pred_accuracy/std": 0.054212798771914095, |
|
"step": 96 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 457.0, |
|
"completions/max_terminated_length": 457.0, |
|
"completions/mean_length": 219.494140625, |
|
"completions/mean_terminated_length": 219.494140625, |
|
"completions/min_length": 107.75, |
|
"completions/min_terminated_length": 107.75, |
|
"epoch": 0.20421052631578948, |
|
"grad_norm": 0.023958692327141762, |
|
"learning_rate": 9.722290574139486e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 47243155.0, |
|
"reward": 1.747300535440445, |
|
"reward_std": 0.17760824598371983, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.7421875, |
|
"rewards/stop_prediction_reward/std": 0.15745450742542744, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0035330769751453772, |
|
"rewards/waypoint_pred_accuracy/std": 0.017783919582143426, |
|
"step": 97 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 380.375, |
|
"completions/max_terminated_length": 380.375, |
|
"completions/mean_length": 215.083984375, |
|
"completions/mean_terminated_length": 215.083984375, |
|
"completions/min_length": 109.125, |
|
"completions/min_terminated_length": 109.125, |
|
"epoch": 0.2063157894736842, |
|
"grad_norm": 0.016943305730819702, |
|
"learning_rate": 9.71072568568222e-07, |
|
"loss": -0.0005, |
|
"num_tokens": 47713406.0, |
|
"reward": 1.851816326379776, |
|
"reward_std": 0.2104954868555069, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.849609375, |
|
"rewards/stop_prediction_reward/std": 0.1879090555012226, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0020800431666430333, |
|
"rewards/waypoint_pred_accuracy/std": 0.011219009378692333, |
|
"step": 98 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 392.625, |
|
"completions/max_terminated_length": 392.625, |
|
"completions/mean_length": 211.724609375, |
|
"completions/mean_terminated_length": 211.724609375, |
|
"completions/min_length": 104.125, |
|
"completions/min_terminated_length": 104.125, |
|
"epoch": 0.20842105263157895, |
|
"grad_norm": 0.014324544928967953, |
|
"learning_rate": 9.698932868539475e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 48180849.0, |
|
"reward": 1.6212971061468124, |
|
"reward_std": 0.16710768891550742, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.498046875, |
|
"rewards/stop_prediction_reward/std": 0.07980126701295376, |
|
"rewards/waypoint_pred_accuracy/mean": 0.06260167788853366, |
|
"rewards/waypoint_pred_accuracy/std": 0.04448781939231594, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"grad_norm": 0.016340678557753563, |
|
"learning_rate": 9.686912761062337e-07, |
|
"loss": -0.001, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21052631578947367, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completions/clipped_ratio": 0.00015625, |
|
"eval_completions/max_length": 457.89, |
|
"eval_completions/max_terminated_length": 457.89, |
|
"eval_completions/mean_length": 219.56675354003906, |
|
"eval_completions/mean_terminated_length": 219.59981658935547, |
|
"eval_completions/min_length": 112.23, |
|
"eval_completions/min_terminated_length": 113.11, |
|
"eval_loss": 0.0013147207209840417, |
|
"eval_num_tokens": 48647110.0, |
|
"eval_reward": 1.8446430933475495, |
|
"eval_reward_std": 0.20415274247365858, |
|
"eval_rewards/format_reward_embodied/mean": 0.99921875, |
|
"eval_rewards/format_reward_embodied/std": 0.00625, |
|
"eval_rewards/stop_prediction_reward/mean": 0.75734375, |
|
"eval_rewards/stop_prediction_reward/std": 0.1349847713112831, |
|
"eval_rewards/waypoint_pred_accuracy/mean": 0.04404030321765386, |
|
"eval_rewards/waypoint_pred_accuracy/std": 0.04408785154897764, |
|
"eval_runtime": 1143.7465, |
|
"eval_samples_per_second": 0.087, |
|
"eval_steps_per_second": 0.002, |
|
"step": 100 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 387.1875, |
|
"completions/max_terminated_length": 387.1875, |
|
"completions/mean_length": 211.564453125, |
|
"completions/mean_terminated_length": 211.564453125, |
|
"completions/min_length": 109.25, |
|
"completions/min_terminated_length": 109.25, |
|
"epoch": 0.21263157894736842, |
|
"grad_norm": 7.573169568786398e-05, |
|
"learning_rate": 9.674666013905223e-07, |
|
"loss": 0.0, |
|
"num_tokens": 49119859.0, |
|
"reward": 1.8795468658208847, |
|
"reward_std": 0.13644913337626896, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.8642578125, |
|
"rewards/stop_prediction_reward/std": 0.10762263275682926, |
|
"rewards/waypoint_pred_accuracy/mean": 0.007644527649440409, |
|
"rewards/waypoint_pred_accuracy/std": 0.02138127497060599, |
|
"step": 101 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 423.625, |
|
"completions/max_terminated_length": 423.625, |
|
"completions/mean_length": 216.150390625, |
|
"completions/mean_terminated_length": 216.150390625, |
|
"completions/min_length": 112.875, |
|
"completions/min_terminated_length": 112.875, |
|
"epoch": 0.21473684210526317, |
|
"grad_norm": 0.02024409919977188, |
|
"learning_rate": 9.662193289990683e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 49589696.0, |
|
"reward": 1.9582752585411072, |
|
"reward_std": 0.170343800484261, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.873046875, |
|
"rewards/stop_prediction_reward/std": 0.059467025101184845, |
|
"rewards/waypoint_pred_accuracy/mean": 0.04359079086862039, |
|
"rewards/waypoint_pred_accuracy/std": 0.05229037126991898, |
|
"step": 102 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 419.875, |
|
"completions/max_terminated_length": 419.875, |
|
"completions/mean_length": 220.193359375, |
|
"completions/mean_terminated_length": 220.193359375, |
|
"completions/min_length": 110.5, |
|
"completions/min_terminated_length": 110.5, |
|
"epoch": 0.2168421052631579, |
|
"grad_norm": 0.020008524879813194, |
|
"learning_rate": 9.649495264473496e-07, |
|
"loss": -0.0008, |
|
"num_tokens": 50062179.0, |
|
"reward": 1.4985756427049637, |
|
"reward_std": 0.15706685557961464, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.5, |
|
"rewards/stop_prediction_reward/std": 0.15027354657649994, |
|
"rewards/waypoint_pred_accuracy/mean": 0.00026439113654092976, |
|
"rewards/waypoint_pred_accuracy/std": 0.0021146056694039514, |
|
"step": 103 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 389.375, |
|
"completions/max_terminated_length": 389.375, |
|
"completions/mean_length": 224.572265625, |
|
"completions/mean_terminated_length": 224.572265625, |
|
"completions/min_length": 113.5, |
|
"completions/min_terminated_length": 113.5, |
|
"epoch": 0.21894736842105264, |
|
"grad_norm": 0.010618757456541061, |
|
"learning_rate": 9.636572624704126e-07, |
|
"loss": 0.0008, |
|
"num_tokens": 50537928.0, |
|
"reward": 2.029994383454323, |
|
"reward_std": 0.16062275879085064, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.98046875, |
|
"rewards/stop_prediction_reward/std": 0.11734727956354618, |
|
"rewards/waypoint_pred_accuracy/mean": 0.02476281741601838, |
|
"rewards/waypoint_pred_accuracy/std": 0.035420115480633285, |
|
"step": 104 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 375.75, |
|
"completions/max_terminated_length": 375.75, |
|
"completions/mean_length": 208.478515625, |
|
"completions/mean_terminated_length": 208.478515625, |
|
"completions/min_length": 112.875, |
|
"completions/min_terminated_length": 112.875, |
|
"epoch": 0.22105263157894736, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.62342607019152e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 51004477.0, |
|
"reward": 1.7948767840862274, |
|
"reward_std": 0.0999540267221164, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.734375, |
|
"rewards/stop_prediction_reward/std": 0.0729166679084301, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03025089303362355, |
|
"rewards/waypoint_pred_accuracy/std": 0.024821761748978988, |
|
"step": 105 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 391.25, |
|
"completions/max_terminated_length": 391.25, |
|
"completions/mean_length": 218.388671875, |
|
"completions/mean_terminated_length": 218.388671875, |
|
"completions/min_length": 113.375, |
|
"completions/min_terminated_length": 113.375, |
|
"epoch": 0.2231578947368421, |
|
"grad_norm": 0.024067817255854607, |
|
"learning_rate": 9.610056312565245e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 51477636.0, |
|
"reward": 2.026669681072235, |
|
"reward_std": 0.1696237076412217, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.8671875, |
|
"rewards/stop_prediction_reward/std": 0.09542626701295376, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07974108902908483, |
|
"rewards/waypoint_pred_accuracy/std": 0.04533190939855558, |
|
"step": 106 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 420.75, |
|
"completions/max_terminated_length": 420.75, |
|
"completions/mean_length": 217.77734375, |
|
"completions/mean_terminated_length": 217.77734375, |
|
"completions/min_length": 119.0, |
|
"completions/min_terminated_length": 119.0, |
|
"epoch": 0.22526315789473683, |
|
"grad_norm": 0.013704587705433369, |
|
"learning_rate": 9.596464075536963e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 51951186.0, |
|
"reward": 1.8652345538139343, |
|
"reward_std": 0.09080793828513833, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.865234375, |
|
"rewards/stop_prediction_reward/std": 0.0908065214753151, |
|
"rewards/waypoint_pred_accuracy/mean": 9.960914946921718e-08, |
|
"rewards/waypoint_pred_accuracy/std": 7.087302705995463e-07, |
|
"step": 107 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 403.875, |
|
"completions/max_terminated_length": 403.875, |
|
"completions/mean_length": 217.728515625, |
|
"completions/mean_terminated_length": 218.15721321105957, |
|
"completions/min_length": 100.25, |
|
"completions/min_terminated_length": 116.625, |
|
"epoch": 0.22736842105263158, |
|
"grad_norm": 0.02499217353761196, |
|
"learning_rate": 9.582650094861256e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 52426055.0, |
|
"reward": 1.8535159230232239, |
|
"reward_std": 0.1513118724361675, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.857421875, |
|
"rewards/stop_prediction_reward/std": 0.1338059287518263, |
|
"rewards/waypoint_pred_accuracy/mean": 1.5656563736666032e-07, |
|
"rewards/waypoint_pred_accuracy/std": 8.466679199507776e-07, |
|
"step": 108 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 437.0, |
|
"completions/max_terminated_length": 437.0, |
|
"completions/mean_length": 217.58984375, |
|
"completions/mean_terminated_length": 217.58984375, |
|
"completions/min_length": 108.5, |
|
"completions/min_terminated_length": 108.5, |
|
"epoch": 0.2294736842105263, |
|
"grad_norm": 0.018018925562500954, |
|
"learning_rate": 9.568615118295798e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 52898613.0, |
|
"reward": 1.5074883997440338, |
|
"reward_std": 0.08622953689905444, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.505859375, |
|
"rewards/stop_prediction_reward/std": 0.07980126701295376, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0008145241825951426, |
|
"rewards/waypoint_pred_accuracy/std": 0.003929519949304638, |
|
"step": 109 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 410.625, |
|
"completions/max_terminated_length": 410.625, |
|
"completions/mean_length": 220.09375, |
|
"completions/mean_terminated_length": 220.09375, |
|
"completions/min_length": 112.5, |
|
"completions/min_terminated_length": 112.5, |
|
"epoch": 0.23157894736842105, |
|
"grad_norm": 0.026054121553897858, |
|
"learning_rate": 9.554359905560885e-07, |
|
"loss": 0.0009, |
|
"num_tokens": 53371493.0, |
|
"reward": 1.6272786408662796, |
|
"reward_std": 0.1345605030655861, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.607421875, |
|
"rewards/stop_prediction_reward/std": 0.10731646977365017, |
|
"rewards/waypoint_pred_accuracy/mean": 0.009928377814540968, |
|
"rewards/waypoint_pred_accuracy/std": 0.021277805810678764, |
|
"step": 110 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 451.5, |
|
"completions/max_terminated_length": 451.5, |
|
"completions/mean_length": 226.404296875, |
|
"completions/mean_terminated_length": 226.404296875, |
|
"completions/min_length": 104.75, |
|
"completions/min_terminated_length": 104.75, |
|
"epoch": 0.2336842105263158, |
|
"grad_norm": 0.019862722605466843, |
|
"learning_rate": 9.53988522829831e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 53849844.0, |
|
"reward": 1.7356369495391846, |
|
"reward_std": 0.17475299397483468, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.6328125, |
|
"rewards/stop_prediction_reward/std": 0.0929968785494566, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05238880167820055, |
|
"rewards/waypoint_pred_accuracy/std": 0.03885504556402302, |
|
"step": 111 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 429.125, |
|
"completions/max_terminated_length": 429.125, |
|
"completions/mean_length": 228.794921875, |
|
"completions/mean_terminated_length": 228.794921875, |
|
"completions/min_length": 102.625, |
|
"completions/min_terminated_length": 102.625, |
|
"epoch": 0.23578947368421052, |
|
"grad_norm": 0.013491553254425526, |
|
"learning_rate": 9.52519187002958e-07, |
|
"loss": -0.0013, |
|
"num_tokens": 54326667.0, |
|
"reward": 2.0176089107990265, |
|
"reward_std": 0.09997917944565415, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.990234375, |
|
"rewards/stop_prediction_reward/std": 0.06879601255059242, |
|
"rewards/waypoint_pred_accuracy/mean": 0.013687264542047125, |
|
"rewards/waypoint_pred_accuracy/std": 0.02499246856624292, |
|
"step": 112 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 419.25, |
|
"completions/max_terminated_length": 419.25, |
|
"completions/mean_length": 223.84765625, |
|
"completions/mean_terminated_length": 223.84765625, |
|
"completions/min_length": 116.125, |
|
"completions/min_terminated_length": 116.125, |
|
"epoch": 0.23789473684210527, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.510280626113524e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 54804477.0, |
|
"reward": 2.0416500568389893, |
|
"reward_std": 0.15113097801804543, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.87109375, |
|
"rewards/stop_prediction_reward/std": 0.03125, |
|
"rewards/waypoint_pred_accuracy/mean": 0.08527813665590386, |
|
"rewards/waypoint_pred_accuracy/std": 0.06661079078944249, |
|
"step": 113 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 418.375, |
|
"completions/max_terminated_length": 418.375, |
|
"completions/mean_length": 230.408203125, |
|
"completions/mean_terminated_length": 230.9030590057373, |
|
"completions/min_length": 102.375, |
|
"completions/min_terminated_length": 116.25, |
|
"epoch": 0.24, |
|
"grad_norm": 0.027175500988960266, |
|
"learning_rate": 9.495152303703225e-07, |
|
"loss": -0.0, |
|
"num_tokens": 55282254.0, |
|
"reward": 1.9152479320764542, |
|
"reward_std": 0.15890819625928998, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.869140625, |
|
"rewards/stop_prediction_reward/std": 0.08138803765177727, |
|
"rewards/waypoint_pred_accuracy/mean": 0.024030234897509217, |
|
"rewards/waypoint_pred_accuracy/std": 0.038782567949965596, |
|
"step": 114 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 379.5, |
|
"completions/max_terminated_length": 379.5, |
|
"completions/mean_length": 218.421875, |
|
"completions/mean_terminated_length": 218.421875, |
|
"completions/min_length": 108.625, |
|
"completions/min_terminated_length": 108.625, |
|
"epoch": 0.24210526315789474, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.479807721702337e-07, |
|
"loss": 0.0009, |
|
"num_tokens": 55753702.0, |
|
"reward": 1.964790791273117, |
|
"reward_std": 0.14700112864375114, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.869140625, |
|
"rewards/stop_prediction_reward/std": 0.046875, |
|
"rewards/waypoint_pred_accuracy/mean": 0.04880167031660676, |
|
"rewards/waypoint_pred_accuracy/std": 0.04630230367183685, |
|
"step": 115 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 418.75, |
|
"completions/max_terminated_length": 418.75, |
|
"completions/mean_length": 234.615234375, |
|
"completions/mean_terminated_length": 234.615234375, |
|
"completions/min_length": 107.25, |
|
"completions/min_terminated_length": 107.25, |
|
"epoch": 0.24421052631578946, |
|
"grad_norm": 0.01868380233645439, |
|
"learning_rate": 9.46424771072075e-07, |
|
"loss": -0.0016, |
|
"num_tokens": 56236001.0, |
|
"reward": 1.7634397000074387, |
|
"reward_std": 0.11784930247813463, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.744140625, |
|
"rewards/stop_prediction_reward/std": 0.08138803765177727, |
|
"rewards/waypoint_pred_accuracy/mean": 0.009649543033447117, |
|
"rewards/waypoint_pred_accuracy/std": 0.01907464297255501, |
|
"step": 116 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 393.5, |
|
"completions/max_terminated_length": 393.5, |
|
"completions/mean_length": 218.193359375, |
|
"completions/mean_terminated_length": 218.193359375, |
|
"completions/min_length": 111.75, |
|
"completions/min_terminated_length": 111.75, |
|
"epoch": 0.2463157894736842, |
|
"grad_norm": 0.013114881701767445, |
|
"learning_rate": 9.448473113029633e-07, |
|
"loss": 0.0009, |
|
"num_tokens": 56707844.0, |
|
"reward": 1.6348197907209396, |
|
"reward_std": 0.15209808605868602, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.513671875, |
|
"rewards/stop_prediction_reward/std": 0.08837713301181793, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0605739434017778, |
|
"rewards/waypoint_pred_accuracy/std": 0.03603998847574985, |
|
"step": 117 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 385.625, |
|
"completions/max_terminated_length": 385.625, |
|
"completions/mean_length": 217.125, |
|
"completions/mean_terminated_length": 217.125, |
|
"completions/min_length": 115.125, |
|
"completions/min_terminated_length": 115.125, |
|
"epoch": 0.24842105263157896, |
|
"grad_norm": 0.021751945838332176, |
|
"learning_rate": 9.432484782515842e-07, |
|
"loss": 0.0011, |
|
"num_tokens": 57177540.0, |
|
"reward": 1.8951680064201355, |
|
"reward_std": 0.09731905919034034, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.87109375, |
|
"rewards/stop_prediction_reward/std": 0.05317101255059242, |
|
"rewards/waypoint_pred_accuracy/mean": 0.012037134467476562, |
|
"rewards/waypoint_pred_accuracy/std": 0.022074746749979113, |
|
"step": 118 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 733.875, |
|
"completions/max_terminated_length": 733.875, |
|
"completions/mean_length": 230.4921875, |
|
"completions/mean_terminated_length": 230.4921875, |
|
"completions/min_length": 114.125, |
|
"completions/min_terminated_length": 114.125, |
|
"epoch": 0.2505263157894737, |
|
"grad_norm": 0.017523573711514473, |
|
"learning_rate": 9.416283584635699e-07, |
|
"loss": 0.0073, |
|
"num_tokens": 57653632.0, |
|
"reward": 1.7819086909294128, |
|
"reward_std": 0.13167815032647923, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.74609375, |
|
"rewards/stop_prediction_reward/std": 0.0625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.018884045333834365, |
|
"rewards/waypoint_pred_accuracy/std": 0.04270522284787148, |
|
"step": 119 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 383.75, |
|
"completions/max_terminated_length": 383.75, |
|
"completions/mean_length": 230.603515625, |
|
"completions/mean_terminated_length": 230.603515625, |
|
"completions/min_length": 109.25, |
|
"completions/min_terminated_length": 109.25, |
|
"epoch": 0.25263157894736843, |
|
"grad_norm": 0.020878760144114494, |
|
"learning_rate": 9.399870396368137e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 58132533.0, |
|
"reward": 1.75615593791008, |
|
"reward_std": 0.08499195147633398, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.744140625, |
|
"rewards/stop_prediction_reward/std": 0.03754601255059242, |
|
"rewards/waypoint_pred_accuracy/mean": 0.006007667677934543, |
|
"rewards/waypoint_pred_accuracy/std": 0.023722964530922208, |
|
"step": 120 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 393.75, |
|
"completions/max_terminated_length": 393.75, |
|
"completions/mean_length": 228.326171875, |
|
"completions/mean_terminated_length": 228.326171875, |
|
"completions/min_length": 110.875, |
|
"completions/min_terminated_length": 110.875, |
|
"epoch": 0.25473684210526315, |
|
"grad_norm": 0.030050212517380714, |
|
"learning_rate": 9.383246106167244e-07, |
|
"loss": -0.0007, |
|
"num_tokens": 58610652.0, |
|
"reward": 2.005710780620575, |
|
"reward_std": 0.1616469284220443, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.8671875, |
|
"rewards/stop_prediction_reward/std": 0.05317101255059242, |
|
"rewards/waypoint_pred_accuracy/mean": 0.06926164017690084, |
|
"rewards/waypoint_pred_accuracy/std": 0.06174082592511354, |
|
"step": 121 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 411.75, |
|
"completions/max_terminated_length": 411.75, |
|
"completions/mean_length": 235.259765625, |
|
"completions/mean_terminated_length": 235.259765625, |
|
"completions/min_length": 119.75, |
|
"completions/min_terminated_length": 119.75, |
|
"epoch": 0.25684210526315787, |
|
"grad_norm": 0.05214720964431763, |
|
"learning_rate": 9.366411613914151e-07, |
|
"loss": 0.0, |
|
"num_tokens": 59091681.0, |
|
"reward": 2.0128368139266968, |
|
"reward_std": 0.041966003568632004, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.998046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.007394973285158812, |
|
"rewards/waypoint_pred_accuracy/std": 0.013188560594699084, |
|
"step": 122 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 738.375, |
|
"completions/max_terminated_length": 738.375, |
|
"completions/mean_length": 229.73828125, |
|
"completions/mean_terminated_length": 229.73828125, |
|
"completions/min_length": 113.125, |
|
"completions/min_terminated_length": 113.125, |
|
"epoch": 0.25894736842105265, |
|
"grad_norm": 0.020252572372555733, |
|
"learning_rate": 9.349367830868338e-07, |
|
"loss": 0.0065, |
|
"num_tokens": 59571867.0, |
|
"reward": 1.7482015490531921, |
|
"reward_std": 0.07813655398786068, |
|
"rewards/format_reward_embodied/mean": 0.994140625, |
|
"rewards/format_reward_embodied/std": 0.046875, |
|
"rewards/stop_prediction_reward/mean": 0.75390625, |
|
"rewards/stop_prediction_reward/std": 0.042255254462361336, |
|
"rewards/waypoint_pred_accuracy/mean": 7.734416431048885e-05, |
|
"rewards/waypoint_pred_accuracy/std": 0.00014155825192574412, |
|
"step": 123 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 421.5, |
|
"completions/max_terminated_length": 421.5, |
|
"completions/mean_length": 232.515625, |
|
"completions/mean_terminated_length": 232.515625, |
|
"completions/min_length": 116.625, |
|
"completions/min_terminated_length": 116.625, |
|
"epoch": 0.26105263157894737, |
|
"grad_norm": 0.012004735879600048, |
|
"learning_rate": 9.332115679618299e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 60051875.0, |
|
"reward": 1.87109375, |
|
"reward_std": 0.06475212238729, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.873046875, |
|
"rewards/stop_prediction_reward/std": 0.059467025101184845, |
|
"rewards/waypoint_pred_accuracy/mean": 3.7687339174058405e-25, |
|
"rewards/waypoint_pred_accuracy/std": 0.0, |
|
"step": 124 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 373.375, |
|
"completions/max_terminated_length": 373.375, |
|
"completions/mean_length": 221.771484375, |
|
"completions/mean_terminated_length": 221.771484375, |
|
"completions/min_length": 113.625, |
|
"completions/min_terminated_length": 113.625, |
|
"epoch": 0.2631578947368421, |
|
"grad_norm": 0.01381534244865179, |
|
"learning_rate": 9.3146560940316e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 60525038.0, |
|
"reward": 1.8031584918498993, |
|
"reward_std": 0.08486801406252198, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.74609375, |
|
"rewards/stop_prediction_reward/std": 0.021921012550592422, |
|
"rewards/waypoint_pred_accuracy/mean": 0.02853236788253366, |
|
"rewards/waypoint_pred_accuracy/std": 0.03149077049183591, |
|
"step": 125 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 379.125, |
|
"completions/max_terminated_length": 379.125, |
|
"completions/mean_length": 225.11328125, |
|
"completions/mean_terminated_length": 225.11328125, |
|
"completions/min_length": 106.375, |
|
"completions/min_terminated_length": 106.375, |
|
"epoch": 0.26526315789473687, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.296990019204335e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 61001576.0, |
|
"reward": 1.838148683309555, |
|
"reward_std": 0.13138162437826395, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.03125, |
|
"rewards/waypoint_pred_accuracy/mean": 0.1065743277722504, |
|
"rewards/waypoint_pred_accuracy/std": 0.058355900342576206, |
|
"step": 126 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 596.5, |
|
"completions/max_terminated_length": 596.5, |
|
"completions/mean_length": 223.703125, |
|
"completions/mean_terminated_length": 223.703125, |
|
"completions/min_length": 122.375, |
|
"completions/min_terminated_length": 122.375, |
|
"epoch": 0.2673684210526316, |
|
"grad_norm": 0.015371584333479404, |
|
"learning_rate": 9.279118411409962e-07, |
|
"loss": 0.0076, |
|
"num_tokens": 61475152.0, |
|
"reward": 2.0289103388786316, |
|
"reward_std": 0.14714059105608612, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.986328125, |
|
"rewards/stop_prediction_reward/std": 0.05234810337424278, |
|
"rewards/waypoint_pred_accuracy/mean": 0.02226770008679324, |
|
"rewards/waypoint_pred_accuracy/std": 0.03958419876477137, |
|
"step": 127 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 389.0, |
|
"completions/max_terminated_length": 389.0, |
|
"completions/mean_length": 225.896484375, |
|
"completions/mean_terminated_length": 225.896484375, |
|
"completions/min_length": 109.0, |
|
"completions/min_terminated_length": 109.0, |
|
"epoch": 0.2694736842105263, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.261042238047539e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 61948507.0, |
|
"reward": 1.7672365009784698, |
|
"reward_std": 0.08540481339514372, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07111826360536146, |
|
"rewards/waypoint_pred_accuracy/std": 0.04270240558859675, |
|
"step": 128 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 364.0, |
|
"completions/max_terminated_length": 364.0, |
|
"completions/mean_length": 229.087890625, |
|
"completions/mean_terminated_length": 229.087890625, |
|
"completions/min_length": 122.25, |
|
"completions/min_terminated_length": 122.25, |
|
"epoch": 0.27157894736842103, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.242762477589369e-07, |
|
"loss": -0.0008, |
|
"num_tokens": 62425864.0, |
|
"reward": 1.7555950731039047, |
|
"reward_std": 0.150370123796165, |
|
"rewards/format_reward_embodied/mean": 0.99609375, |
|
"rewards/format_reward_embodied/std": 0.03125, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.03125, |
|
"rewards/waypoint_pred_accuracy/mean": 0.06725065042865234, |
|
"rewards/waypoint_pred_accuracy/std": 0.060643474211165796, |
|
"step": 129 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 377.25, |
|
"completions/max_terminated_length": 377.25, |
|
"completions/mean_length": 218.873046875, |
|
"completions/mean_terminated_length": 219.36554527282715, |
|
"completions/min_length": 95.875, |
|
"completions/min_terminated_length": 110.5, |
|
"epoch": 0.2736842105263158, |
|
"grad_norm": 0.015500541776418686, |
|
"learning_rate": 9.224280119528013e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 62900679.0, |
|
"reward": 1.86328125, |
|
"reward_std": 0.08442101255059242, |
|
"rewards/format_reward_embodied/mean": 0.994140625, |
|
"rewards/format_reward_embodied/std": 0.046875, |
|
"rewards/stop_prediction_reward/mean": 0.869140625, |
|
"rewards/stop_prediction_reward/std": 0.046875, |
|
"rewards/waypoint_pred_accuracy/mean": 5.195248799303594e-14, |
|
"rewards/waypoint_pred_accuracy/std": 3.9519006556770764e-13, |
|
"step": 130 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 389.375, |
|
"completions/max_terminated_length": 389.375, |
|
"completions/mean_length": 223.375, |
|
"completions/mean_terminated_length": 223.375, |
|
"completions/min_length": 116.375, |
|
"completions/min_terminated_length": 116.375, |
|
"epoch": 0.27578947368421053, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.205596164322753e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 63375047.0, |
|
"reward": 1.8198472261428833, |
|
"reward_std": 0.06600932776927948, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.74609375, |
|
"rewards/stop_prediction_reward/std": 0.03125, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03687673434615135, |
|
"rewards/waypoint_pred_accuracy/std": 0.023463066667318344, |
|
"step": 131 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 386.875, |
|
"completions/max_terminated_length": 386.875, |
|
"completions/mean_length": 223.994140625, |
|
"completions/mean_terminated_length": 223.994140625, |
|
"completions/min_length": 106.125, |
|
"completions/min_terminated_length": 106.125, |
|
"epoch": 0.27789473684210525, |
|
"grad_norm": 5.267659071250819e-05, |
|
"learning_rate": 9.186711623345419e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 63848068.0, |
|
"reward": 1.8460404872894287, |
|
"reward_std": 0.07753154253146377, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.04802026903570478, |
|
"rewards/waypoint_pred_accuracy/std": 0.03876577728947339, |
|
"step": 132 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 370.625, |
|
"completions/max_terminated_length": 370.625, |
|
"completions/mean_length": 216.8359375, |
|
"completions/mean_terminated_length": 216.8359375, |
|
"completions/min_length": 113.25, |
|
"completions/min_terminated_length": 113.25, |
|
"epoch": 0.28, |
|
"grad_norm": 0.019873064011335373, |
|
"learning_rate": 9.167627518825651e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 64319408.0, |
|
"reward": 1.66434708237648, |
|
"reward_std": 0.10717772238422185, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.619140625, |
|
"rewards/stop_prediction_reward/std": 0.046875, |
|
"rewards/waypoint_pred_accuracy/mean": 0.022603242181673977, |
|
"rewards/waypoint_pred_accuracy/std": 0.030156509747939708, |
|
"step": 133 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 394.75, |
|
"completions/max_terminated_length": 394.75, |
|
"completions/mean_length": 221.185546875, |
|
"completions/mean_terminated_length": 221.185546875, |
|
"completions/min_length": 105.75, |
|
"completions/min_terminated_length": 105.75, |
|
"epoch": 0.28210526315789475, |
|
"grad_norm": 0.016748478636145592, |
|
"learning_rate": 9.148344883795563e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 64797519.0, |
|
"reward": 1.6543543934822083, |
|
"reward_std": 0.09143044333904982, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.014677208887757254, |
|
"rewards/waypoint_pred_accuracy/std": 0.02070214717129737, |
|
"step": 134 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 347.125, |
|
"completions/max_terminated_length": 347.125, |
|
"completions/mean_length": 207.6796875, |
|
"completions/mean_terminated_length": 207.6796875, |
|
"completions/min_length": 100.75, |
|
"completions/min_terminated_length": 100.75, |
|
"epoch": 0.28421052631578947, |
|
"grad_norm": 0.018281355500221252, |
|
"learning_rate": 9.128864762033824e-07, |
|
"loss": 0.0009, |
|
"num_tokens": 65264811.0, |
|
"reward": 1.8661604225635529, |
|
"reward_std": 0.13888043258339167, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.748046875, |
|
"rewards/stop_prediction_reward/std": 0.03754601255059242, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05905676480875123, |
|
"rewards/waypoint_pred_accuracy/std": 0.053127349918608825, |
|
"step": 135 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 354.625, |
|
"completions/max_terminated_length": 354.625, |
|
"completions/mean_length": 213.609375, |
|
"completions/mean_terminated_length": 213.609375, |
|
"completions/min_length": 109.0, |
|
"completions/min_terminated_length": 109.0, |
|
"epoch": 0.2863157894736842, |
|
"grad_norm": 0.01513614784926176, |
|
"learning_rate": 9.10918820800916e-07, |
|
"loss": -0.0, |
|
"num_tokens": 65734563.0, |
|
"reward": 1.7799546718597412, |
|
"reward_std": 0.05648380851107504, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.751953125, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.014000790968564693, |
|
"rewards/waypoint_pred_accuracy/std": 0.0204294033423741, |
|
"step": 136 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 359.5, |
|
"completions/max_terminated_length": 359.5, |
|
"completions/mean_length": 209.94140625, |
|
"completions/mean_terminated_length": 209.94140625, |
|
"completions/min_length": 112.625, |
|
"completions/min_terminated_length": 112.625, |
|
"epoch": 0.28842105263157897, |
|
"grad_norm": 0.014532121829688549, |
|
"learning_rate": 9.089316286823274e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 66202821.0, |
|
"reward": 1.8316712975502014, |
|
"reward_std": 0.14127142806610715, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.748046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.042788804041257456, |
|
"rewards/waypoint_pred_accuracy/std": 0.05501071766701504, |
|
"step": 137 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 384.75, |
|
"completions/max_terminated_length": 384.75, |
|
"completions/mean_length": 226.625, |
|
"completions/mean_terminated_length": 226.625, |
|
"completions/min_length": 112.0, |
|
"completions/min_terminated_length": 112.0, |
|
"epoch": 0.2905263157894737, |
|
"grad_norm": 0.013312633149325848, |
|
"learning_rate": 9.069250074153191e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 66681989.0, |
|
"reward": 1.9901870042085648, |
|
"reward_std": 0.08008617826271802, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.98828125, |
|
"rewards/stop_prediction_reward/std": 0.07509202510118484, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0009528863083687822, |
|
"rewards/waypoint_pred_accuracy/std": 0.007327620231080863, |
|
"step": 138 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 373.375, |
|
"completions/max_terminated_length": 373.375, |
|
"completions/mean_length": 206.775390625, |
|
"completions/mean_terminated_length": 206.775390625, |
|
"completions/min_length": 109.5, |
|
"completions/min_terminated_length": 109.5, |
|
"epoch": 0.2926315789473684, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.048990656193024e-07, |
|
"loss": 0.0, |
|
"num_tokens": 67151378.0, |
|
"reward": 1.8710939586162567, |
|
"reward_std": 0.03125098500925105, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.87109375, |
|
"rewards/stop_prediction_reward/std": 0.03125, |
|
"rewards/waypoint_pred_accuracy/mean": 1.0124033455018937e-07, |
|
"rewards/waypoint_pred_accuracy/std": 4.912120291946227e-07, |
|
"step": 139 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 365.25, |
|
"completions/max_terminated_length": 365.25, |
|
"completions/mean_length": 223.37109375, |
|
"completions/mean_terminated_length": 223.37109375, |
|
"completions/min_length": 115.875, |
|
"completions/min_terminated_length": 115.875, |
|
"epoch": 0.29473684210526313, |
|
"grad_norm": 0.020622270181775093, |
|
"learning_rate": 9.028539129595197e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 67625936.0, |
|
"reward": 1.9263398349285126, |
|
"reward_std": 0.07740109786391258, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.03125, |
|
"rewards/waypoint_pred_accuracy/mean": 0.025669913738965988, |
|
"rewards/waypoint_pred_accuracy/std": 0.028465650044381622, |
|
"step": 140 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 378.75, |
|
"completions/max_terminated_length": 378.75, |
|
"completions/mean_length": 213.451171875, |
|
"completions/mean_terminated_length": 213.451171875, |
|
"completions/min_length": 107.875, |
|
"completions/min_terminated_length": 107.875, |
|
"epoch": 0.2968421052631579, |
|
"grad_norm": 0.01621430739760399, |
|
"learning_rate": 9.00789660141106e-07, |
|
"loss": -0.0004, |
|
"num_tokens": 68094391.0, |
|
"reward": 1.8975248336791992, |
|
"reward_std": 0.056784010463506895, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.873046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.012238975709265076, |
|
"rewards/waypoint_pred_accuracy/std": 0.02289922521287524, |
|
"step": 141 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 366.125, |
|
"completions/max_terminated_length": 366.125, |
|
"completions/mean_length": 207.638671875, |
|
"completions/mean_terminated_length": 207.638671875, |
|
"completions/min_length": 111.25, |
|
"completions/min_terminated_length": 111.25, |
|
"epoch": 0.29894736842105263, |
|
"grad_norm": 0.0012398953549563885, |
|
"learning_rate": 8.987064189030983e-07, |
|
"loss": -0.0, |
|
"num_tokens": 68561854.0, |
|
"reward": 1.809123456478119, |
|
"reward_std": 0.05541337472914165, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.029561737023570345, |
|
"rewards/waypoint_pred_accuracy/std": 0.027706685019552424, |
|
"step": 142 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 336.5, |
|
"completions/max_terminated_length": 336.5, |
|
"completions/mean_length": 201.529296875, |
|
"completions/mean_terminated_length": 201.529296875, |
|
"completions/min_length": 111.875, |
|
"completions/min_terminated_length": 111.875, |
|
"epoch": 0.30105263157894735, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.966043020123855e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 69026509.0, |
|
"reward": 2.1186273992061615, |
|
"reward_std": 0.09686689289469541, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.994140625, |
|
"rewards/stop_prediction_reward/std": 0.03754601255059242, |
|
"rewards/waypoint_pred_accuracy/mean": 0.06224340945057809, |
|
"rewards/waypoint_pred_accuracy/std": 0.029665624278586974, |
|
"step": 143 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 371.5, |
|
"completions/max_terminated_length": 371.5, |
|
"completions/mean_length": 212.501953125, |
|
"completions/mean_terminated_length": 212.501953125, |
|
"completions/min_length": 111.5, |
|
"completions/min_terminated_length": 111.5, |
|
"epoch": 0.3031578947368421, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.944834232576054e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 69494414.0, |
|
"reward": 2.0669292509555817, |
|
"reward_std": 0.09019226813688874, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.99609375, |
|
"rewards/stop_prediction_reward/std": 0.021921012550592422, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03541775744817477, |
|
"rewards/waypoint_pred_accuracy/std": 0.03677688956680679, |
|
"step": 144 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 377.875, |
|
"completions/max_terminated_length": 377.875, |
|
"completions/mean_length": 214.53125, |
|
"completions/mean_terminated_length": 214.53125, |
|
"completions/min_length": 111.875, |
|
"completions/min_terminated_length": 111.875, |
|
"epoch": 0.30526315789473685, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.923438974429849e-07, |
|
"loss": -0.0, |
|
"num_tokens": 69966238.0, |
|
"reward": 1.500933289527893, |
|
"reward_std": 0.022255118004977703, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.498046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0014432001626119018, |
|
"rewards/waypoint_pred_accuracy/std": 0.0033150608651340008, |
|
"step": 145 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 341.375, |
|
"completions/max_terminated_length": 341.375, |
|
"completions/mean_length": 207.609375, |
|
"completions/mean_terminated_length": 207.609375, |
|
"completions/min_length": 108.625, |
|
"completions/min_terminated_length": 108.625, |
|
"epoch": 0.30736842105263157, |
|
"grad_norm": 0.01606505736708641, |
|
"learning_rate": 8.901858403821253e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 70430934.0, |
|
"reward": 1.9067281186580658, |
|
"reward_std": 0.11152400076389313, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.748046875, |
|
"rewards/stop_prediction_reward/std": 0.03754601255059242, |
|
"rewards/waypoint_pred_accuracy/mean": 0.08031718447636607, |
|
"rewards/waypoint_pred_accuracy/std": 0.041061242358370054, |
|
"step": 146 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 339.125, |
|
"completions/max_terminated_length": 339.125, |
|
"completions/mean_length": 200.755859375, |
|
"completions/mean_terminated_length": 200.755859375, |
|
"completions/min_length": 108.125, |
|
"completions/min_terminated_length": 108.125, |
|
"epoch": 0.3094736842105263, |
|
"grad_norm": 0.022619424387812614, |
|
"learning_rate": 8.880093688917338e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 70895897.0, |
|
"reward": 1.979694738984108, |
|
"reward_std": 0.10517071333015338, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.052347380133141996, |
|
"rewards/waypoint_pred_accuracy/std": 0.05258536203473341, |
|
"step": 147 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 351.25, |
|
"completions/max_terminated_length": 351.25, |
|
"completions/mean_length": 209.3203125, |
|
"completions/mean_terminated_length": 209.3203125, |
|
"completions/min_length": 106.375, |
|
"completions/min_terminated_length": 106.375, |
|
"epoch": 0.31157894736842107, |
|
"grad_norm": 0.015080302953720093, |
|
"learning_rate": 8.858146007853e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 71362301.0, |
|
"reward": 1.7541356086730957, |
|
"reward_std": 0.0744232046417892, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.748046875, |
|
"rewards/stop_prediction_reward/std": 0.046875, |
|
"rewards/waypoint_pred_accuracy/mean": 0.003044351096450817, |
|
"rewards/waypoint_pred_accuracy/std": 0.014287834603145175, |
|
"step": 148 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 339.25, |
|
"completions/max_terminated_length": 339.25, |
|
"completions/mean_length": 204.564453125, |
|
"completions/mean_terminated_length": 204.564453125, |
|
"completions/min_length": 109.5, |
|
"completions/min_terminated_length": 109.5, |
|
"epoch": 0.3136842105263158, |
|
"grad_norm": 0.014518975280225277, |
|
"learning_rate": 8.836016548667178e-07, |
|
"loss": 0.0, |
|
"num_tokens": 71831198.0, |
|
"reward": 1.7602195739746094, |
|
"reward_std": 0.04601290519349277, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.751953125, |
|
"rewards/stop_prediction_reward/std": 0.03754601255059242, |
|
"rewards/waypoint_pred_accuracy/mean": 0.004133254632145133, |
|
"rewards/waypoint_pred_accuracy/std": 0.004251232765162753, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3157894736842105, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.813706509238558e-07, |
|
"loss": -0.0002, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3157894736842105, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completions/clipped_ratio": 0.0, |
|
"eval_completions/max_length": 410.41, |
|
"eval_completions/max_terminated_length": 410.41, |
|
"eval_completions/mean_length": 207.72815979003906, |
|
"eval_completions/mean_terminated_length": 207.72815979003906, |
|
"eval_completions/min_length": 111.67, |
|
"eval_completions/min_terminated_length": 111.67, |
|
"eval_loss": 0.0016332893865182996, |
|
"eval_num_tokens": 72298062.0, |
|
"eval_reward": 1.8651788556575775, |
|
"eval_reward_std": 0.10482742591684201, |
|
"eval_rewards/format_reward_embodied/mean": 0.9990625, |
|
"eval_rewards/format_reward_embodied/std": 0.0075, |
|
"eval_rewards/stop_prediction_reward/mean": 0.76859375, |
|
"eval_rewards/stop_prediction_reward/std": 0.020503681004047394, |
|
"eval_rewards/waypoint_pred_accuracy/mean": 0.04876130852479674, |
|
"eval_rewards/waypoint_pred_accuracy/std": 0.0403332443083783, |
|
"eval_runtime": 1073.7169, |
|
"eval_samples_per_second": 0.093, |
|
"eval_steps_per_second": 0.002, |
|
"step": 150 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 359.75, |
|
"completions/max_terminated_length": 359.75, |
|
"completions/mean_length": 208.6748046875, |
|
"completions/mean_terminated_length": 208.6748046875, |
|
"completions/min_length": 112.5, |
|
"completions/min_terminated_length": 112.5, |
|
"epoch": 0.3178947368421053, |
|
"grad_norm": 0.013742033392190933, |
|
"learning_rate": 8.791217097220724e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 72761681.0, |
|
"reward": 1.875585325062275, |
|
"reward_std": 0.023173605810638875, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.8720703125, |
|
"rewards/stop_prediction_reward/std": 0.01877300627529621, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0017575172029685837, |
|
"rewards/waypoint_pred_accuracy/std": 0.0022003025424544072, |
|
"step": 151 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 350.375, |
|
"completions/max_terminated_length": 350.375, |
|
"completions/mean_length": 210.005859375, |
|
"completions/mean_terminated_length": 210.005859375, |
|
"completions/min_length": 111.0, |
|
"completions/min_terminated_length": 111.0, |
|
"epoch": 0.32, |
|
"grad_norm": 0.010572736151516438, |
|
"learning_rate": 8.768549529976783e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 73228180.0, |
|
"reward": 1.9192677438259125, |
|
"reward_std": 0.049104438461654354, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.022133867223146808, |
|
"rewards/waypoint_pred_accuracy/std": 0.02455222301614413, |
|
"step": 152 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 368.0, |
|
"completions/max_terminated_length": 368.0, |
|
"completions/mean_length": 197.775390625, |
|
"completions/mean_terminated_length": 197.775390625, |
|
"completions/min_length": 107.625, |
|
"completions/min_terminated_length": 107.625, |
|
"epoch": 0.32210526315789473, |
|
"grad_norm": 0.01287839561700821, |
|
"learning_rate": 8.74570503451348e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 73691105.0, |
|
"reward": 1.9361660480499268, |
|
"reward_std": 0.10891422609623902, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.87109375, |
|
"rewards/stop_prediction_reward/std": 0.03125, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03253614324701938, |
|
"rewards/waypoint_pred_accuracy/std": 0.03883211300195525, |
|
"step": 153 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 326.0, |
|
"completions/max_terminated_length": 326.0, |
|
"completions/mean_length": 202.033203125, |
|
"completions/mean_terminated_length": 202.033203125, |
|
"completions/min_length": 115.75, |
|
"completions/min_terminated_length": 115.75, |
|
"epoch": 0.32421052631578945, |
|
"grad_norm": 0.018562331795692444, |
|
"learning_rate": 8.72268484741477e-07, |
|
"loss": -0.0005, |
|
"num_tokens": 74156146.0, |
|
"reward": 1.8251054883003235, |
|
"reward_std": 0.1710243321698499, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.626953125, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.1000527671312958, |
|
"rewards/waypoint_pred_accuracy/std": 0.07437628054339515, |
|
"step": 154 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 372.0, |
|
"completions/max_terminated_length": 372.0, |
|
"completions/mean_length": 204.990234375, |
|
"completions/mean_terminated_length": 204.990234375, |
|
"completions/min_length": 115.75, |
|
"completions/min_terminated_length": 115.75, |
|
"epoch": 0.3263157894736842, |
|
"grad_norm": 0.018523743376135826, |
|
"learning_rate": 8.699490214774881e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 74622701.0, |
|
"reward": 1.7455661296844482, |
|
"reward_std": 0.05205453363100787, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.744140625, |
|
"rewards/stop_prediction_reward/std": 0.046875, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0007127649293749982, |
|
"rewards/waypoint_pred_accuracy/std": 0.0025897676093791233, |
|
"step": 155 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 333.75, |
|
"completions/max_terminated_length": 333.75, |
|
"completions/mean_length": 193.486328125, |
|
"completions/mean_terminated_length": 193.486328125, |
|
"completions/min_length": 102.625, |
|
"completions/min_terminated_length": 102.625, |
|
"epoch": 0.32842105263157895, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.676122392130872e-07, |
|
"loss": 0.0, |
|
"num_tokens": 75081254.0, |
|
"reward": 1.623046875, |
|
"reward_std": 0.015625, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.623046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 7.546804675536426e-25, |
|
"rewards/waypoint_pred_accuracy/std": 4.67924008313652e-24, |
|
"step": 156 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 352.25, |
|
"completions/max_terminated_length": 352.25, |
|
"completions/mean_length": 199.650390625, |
|
"completions/mean_terminated_length": 199.650390625, |
|
"completions/min_length": 109.75, |
|
"completions/min_terminated_length": 109.75, |
|
"epoch": 0.33052631578947367, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.652582644394657e-07, |
|
"loss": 0.001, |
|
"num_tokens": 75543923.0, |
|
"reward": 1.763193666934967, |
|
"reward_std": 0.043326430561137386, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.748046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.007573404463160793, |
|
"rewards/waypoint_pred_accuracy/std": 0.013850717227414266, |
|
"step": 157 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 351.375, |
|
"completions/max_terminated_length": 351.375, |
|
"completions/mean_length": 211.6484375, |
|
"completions/mean_terminated_length": 211.6484375, |
|
"completions/min_length": 105.125, |
|
"completions/min_terminated_length": 105.125, |
|
"epoch": 0.33263157894736844, |
|
"grad_norm": 0.015552366152405739, |
|
"learning_rate": 8.628872245784545e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 76014975.0, |
|
"reward": 1.7972655892372131, |
|
"reward_std": 0.11691518849693239, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.748046875, |
|
"rewards/stop_prediction_reward/std": 0.046875, |
|
"rewards/waypoint_pred_accuracy/mean": 0.025585949169908424, |
|
"rewards/waypoint_pred_accuracy/std": 0.027767218511144182, |
|
"step": 158 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 335.0, |
|
"completions/max_terminated_length": 335.0, |
|
"completions/mean_length": 200.451171875, |
|
"completions/mean_terminated_length": 200.451171875, |
|
"completions/min_length": 113.625, |
|
"completions/min_terminated_length": 113.625, |
|
"epoch": 0.33473684210526317, |
|
"grad_norm": 0.012406791560351849, |
|
"learning_rate": 8.60499247975626e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 76479526.0, |
|
"reward": 1.692464992403984, |
|
"reward_std": 0.10753844678401947, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.626953125, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0327559362485772, |
|
"rewards/waypoint_pred_accuracy/std": 0.0465778008219786, |
|
"step": 159 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 320.875, |
|
"completions/max_terminated_length": 320.875, |
|
"completions/mean_length": 193.51171875, |
|
"completions/mean_terminated_length": 193.51171875, |
|
"completions/min_length": 102.875, |
|
"completions/min_terminated_length": 102.875, |
|
"epoch": 0.3368421052631579, |
|
"grad_norm": 0.030549675226211548, |
|
"learning_rate": 8.58094463893347e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 76938732.0, |
|
"reward": 1.9054777026176453, |
|
"reward_std": 0.05647589443033718, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.873046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.016215412090739154, |
|
"rewards/waypoint_pred_accuracy/std": 0.02042544638196753, |
|
"step": 160 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 334.125, |
|
"completions/max_terminated_length": 334.125, |
|
"completions/mean_length": 199.755859375, |
|
"completions/mean_terminated_length": 199.755859375, |
|
"completions/min_length": 113.875, |
|
"completions/min_terminated_length": 113.875, |
|
"epoch": 0.3389473684210526, |
|
"grad_norm": 0.014368158765137196, |
|
"learning_rate": 8.556730025037819e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 77399855.0, |
|
"reward": 1.9117814898490906, |
|
"reward_std": 0.06853678584000633, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.873046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.020343898673879646, |
|
"rewards/waypoint_pred_accuracy/std": 0.018643394690372794, |
|
"step": 161 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 311.125, |
|
"completions/max_terminated_length": 311.125, |
|
"completions/mean_length": 194.9609375, |
|
"completions/mean_terminated_length": 194.9609375, |
|
"completions/min_length": 111.875, |
|
"completions/min_terminated_length": 111.875, |
|
"epoch": 0.3410526315789474, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.532349948818453e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 77858715.0, |
|
"reward": 1.5617362409830093, |
|
"reward_std": 0.09266455079254143, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.5, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.030868121356284917, |
|
"rewards/waypoint_pred_accuracy/std": 0.04633227763988046, |
|
"step": 162 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 359.125, |
|
"completions/max_terminated_length": 359.125, |
|
"completions/mean_length": 195.875, |
|
"completions/mean_terminated_length": 195.875, |
|
"completions/min_length": 114.25, |
|
"completions/min_terminated_length": 114.25, |
|
"epoch": 0.3431578947368421, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.507805729981081e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 78320411.0, |
|
"reward": 1.5313882529735565, |
|
"reward_std": 0.13795647164806724, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.375, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07819415128325216, |
|
"rewards/waypoint_pred_accuracy/std": 0.06897824443884724, |
|
"step": 163 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 356.125, |
|
"completions/max_terminated_length": 356.125, |
|
"completions/mean_length": 200.798828125, |
|
"completions/mean_terminated_length": 200.798828125, |
|
"completions/min_length": 112.75, |
|
"completions/min_terminated_length": 112.75, |
|
"epoch": 0.3452631578947368, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.483098697116535e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 78785204.0, |
|
"reward": 1.8663674592971802, |
|
"reward_std": 0.0740682063976692, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05818372880702327, |
|
"rewards/waypoint_pred_accuracy/std": 0.03703410336356683, |
|
"step": 164 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 323.0, |
|
"completions/max_terminated_length": 323.0, |
|
"completions/mean_length": 184.767578125, |
|
"completions/mean_terminated_length": 184.767578125, |
|
"completions/min_length": 108.125, |
|
"completions/min_terminated_length": 108.125, |
|
"epoch": 0.3473684210526316, |
|
"grad_norm": 0.014639639295637608, |
|
"learning_rate": 8.45823018762885e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 79241469.0, |
|
"reward": 1.5147821009159088, |
|
"reward_std": 0.028108830246765137, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.5, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.007391049890770773, |
|
"rewards/waypoint_pred_accuracy/std": 0.01405441654196693, |
|
"step": 165 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 327.125, |
|
"completions/max_terminated_length": 327.125, |
|
"completions/mean_length": 188.45703125, |
|
"completions/mean_terminated_length": 188.45703125, |
|
"completions/min_length": 108.75, |
|
"completions/min_terminated_length": 108.75, |
|
"epoch": 0.3494736842105263, |
|
"grad_norm": 0.01364043541252613, |
|
"learning_rate": 8.43320154766287e-07, |
|
"loss": 0.0, |
|
"num_tokens": 79696807.0, |
|
"reward": 1.9586426615715027, |
|
"reward_std": 0.09136595235713685, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.04182136098216205, |
|
"rewards/waypoint_pred_accuracy/std": 0.04568298065798615, |
|
"step": 166 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 332.375, |
|
"completions/max_terminated_length": 332.375, |
|
"completions/mean_length": 192.078125, |
|
"completions/mean_terminated_length": 192.078125, |
|
"completions/min_length": 112.125, |
|
"completions/min_terminated_length": 112.125, |
|
"epoch": 0.35157894736842105, |
|
"grad_norm": 0.0016441630432382226, |
|
"learning_rate": 8.408014132031385e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 80154511.0, |
|
"reward": 1.753662645816803, |
|
"reward_std": 0.020446277248106215, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0018313333685417293, |
|
"rewards/waypoint_pred_accuracy/std": 0.010223136260532173, |
|
"step": 167 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 350.625, |
|
"completions/max_terminated_length": 350.625, |
|
"completions/mean_length": 195.697265625, |
|
"completions/mean_terminated_length": 195.697265625, |
|
"completions/min_length": 111.25, |
|
"completions/min_terminated_length": 111.25, |
|
"epoch": 0.35368421052631577, |
|
"grad_norm": 0.0191575326025486, |
|
"learning_rate": 8.382669304141789e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 80616180.0, |
|
"reward": 1.7313858270645142, |
|
"reward_std": 0.07452307981657214, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05319291944942961, |
|
"rewards/waypoint_pred_accuracy/std": 0.03726154523974404, |
|
"step": 168 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 339.125, |
|
"completions/max_terminated_length": 339.125, |
|
"completions/mean_length": 199.1796875, |
|
"completions/mean_terminated_length": 199.1796875, |
|
"completions/min_length": 113.125, |
|
"completions/min_terminated_length": 113.125, |
|
"epoch": 0.35578947368421054, |
|
"grad_norm": 0.0011054413625970483, |
|
"learning_rate": 8.35716843592228e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 81079184.0, |
|
"reward": 1.877403125166893, |
|
"reward_std": 0.014616520323585291, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0012015849925077877, |
|
"rewards/waypoint_pred_accuracy/std": 0.007308262612468781, |
|
"step": 169 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 345.625, |
|
"completions/max_terminated_length": 345.625, |
|
"completions/mean_length": 194.3984375, |
|
"completions/mean_terminated_length": 194.3984375, |
|
"completions/min_length": 118.875, |
|
"completions/min_terminated_length": 118.875, |
|
"epoch": 0.35789473684210527, |
|
"grad_norm": 0.04289071634411812, |
|
"learning_rate": 8.331512907747596e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 81539356.0, |
|
"reward": 1.8431425243616104, |
|
"reward_std": 0.17017098766780236, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.10907126411490253, |
|
"rewards/waypoint_pred_accuracy/std": 0.0850854907983205, |
|
"step": 170 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 312.125, |
|
"completions/max_terminated_length": 312.125, |
|
"completions/mean_length": 189.50390625, |
|
"completions/mean_terminated_length": 189.50390625, |
|
"completions/min_length": 114.375, |
|
"completions/min_terminated_length": 114.375, |
|
"epoch": 0.36, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.305704108364301e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 81995934.0, |
|
"reward": 1.8737435936927795, |
|
"reward_std": 0.02003212797418996, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.873046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0003483741428915721, |
|
"rewards/waypoint_pred_accuracy/std": 0.0022035639689050868, |
|
"step": 171 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 353.625, |
|
"completions/max_terminated_length": 353.625, |
|
"completions/mean_length": 196.31640625, |
|
"completions/mean_terminated_length": 196.31640625, |
|
"completions/min_length": 111.5, |
|
"completions/min_terminated_length": 111.5, |
|
"epoch": 0.36210526315789476, |
|
"grad_norm": 0.014004958793520927, |
|
"learning_rate": 8.279743434815599e-07, |
|
"loss": 0.0, |
|
"num_tokens": 82457920.0, |
|
"reward": 1.6893496811389923, |
|
"reward_std": 0.07697248342446983, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03217484994092959, |
|
"rewards/waypoint_pred_accuracy/std": 0.038486242177896396, |
|
"step": 172 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 310.375, |
|
"completions/max_terminated_length": 310.375, |
|
"completions/mean_length": 184.642578125, |
|
"completions/mean_terminated_length": 184.642578125, |
|
"completions/min_length": 110.875, |
|
"completions/min_terminated_length": 110.875, |
|
"epoch": 0.3642105263157895, |
|
"grad_norm": 0.013178675435483456, |
|
"learning_rate": 8.253632292365726e-07, |
|
"loss": 0.0, |
|
"num_tokens": 82915145.0, |
|
"reward": 2.0115868896245956, |
|
"reward_std": 0.08305720053613186, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.06829345226288144, |
|
"rewards/waypoint_pred_accuracy/std": 0.041528596542799806, |
|
"step": 173 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 318.125, |
|
"completions/max_terminated_length": 318.125, |
|
"completions/mean_length": 190.236328125, |
|
"completions/mean_terminated_length": 190.236328125, |
|
"completions/min_length": 112.125, |
|
"completions/min_terminated_length": 112.125, |
|
"epoch": 0.3663157894736842, |
|
"grad_norm": 0.02363615669310093, |
|
"learning_rate": 8.227372094423864e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 83374914.0, |
|
"reward": 2.0038606971502304, |
|
"reward_std": 0.14384562149643898, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0644303746521473, |
|
"rewards/waypoint_pred_accuracy/std": 0.07192281540483236, |
|
"step": 174 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 332.125, |
|
"completions/max_terminated_length": 332.125, |
|
"completions/mean_length": 180.150390625, |
|
"completions/mean_terminated_length": 180.150390625, |
|
"completions/min_length": 106.875, |
|
"completions/min_terminated_length": 106.875, |
|
"epoch": 0.3684210526315789, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.200964262467656e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 83825615.0, |
|
"reward": 1.4097924530506134, |
|
"reward_std": 0.04181510955095291, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.375, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0173962339758873, |
|
"rewards/waypoint_pred_accuracy/std": 0.020907556638121605, |
|
"step": 175 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 307.625, |
|
"completions/max_terminated_length": 307.625, |
|
"completions/mean_length": 184.662109375, |
|
"completions/mean_terminated_length": 184.662109375, |
|
"completions/min_length": 108.25, |
|
"completions/min_terminated_length": 108.25, |
|
"epoch": 0.3705263157894737, |
|
"grad_norm": 0.01097350474447012, |
|
"learning_rate": 8.174410225966239e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 84280098.0, |
|
"reward": 2.0639708340168, |
|
"reward_std": 0.17662093978196225, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.09448541931482657, |
|
"rewards/waypoint_pred_accuracy/std": 0.0883104762174689, |
|
"step": 176 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 311.375, |
|
"completions/max_terminated_length": 311.375, |
|
"completions/mean_length": 179.3046875, |
|
"completions/mean_terminated_length": 179.3046875, |
|
"completions/min_length": 104.0, |
|
"completions/min_terminated_length": 104.0, |
|
"epoch": 0.3726315789473684, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.147711422302881e-07, |
|
"loss": 0.0, |
|
"num_tokens": 84732926.0, |
|
"reward": 1.5250985324382782, |
|
"reward_std": 0.028973333232215737, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.5, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.01254925754006564, |
|
"rewards/waypoint_pred_accuracy/std": 0.014486670532335214, |
|
"step": 177 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 315.625, |
|
"completions/max_terminated_length": 315.625, |
|
"completions/mean_length": 182.26171875, |
|
"completions/mean_terminated_length": 182.26171875, |
|
"completions/min_length": 112.0, |
|
"completions/min_terminated_length": 112.0, |
|
"epoch": 0.37473684210526315, |
|
"grad_norm": 0.010854351334273815, |
|
"learning_rate": 8.120869296697162e-07, |
|
"loss": -0.0, |
|
"num_tokens": 85187204.0, |
|
"reward": 1.7528592348098755, |
|
"reward_std": 0.01418565196615873, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0014296227405005812, |
|
"rewards/waypoint_pred_accuracy/std": 0.007092827672981208, |
|
"step": 178 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 529.0, |
|
"completions/max_terminated_length": 529.0, |
|
"completions/mean_length": 185.01953125, |
|
"completions/mean_terminated_length": 185.01953125, |
|
"completions/min_length": 105.25, |
|
"completions/min_terminated_length": 105.25, |
|
"epoch": 0.37684210526315787, |
|
"grad_norm": 0.022924024611711502, |
|
"learning_rate": 8.093885302126754e-07, |
|
"loss": 0.0073, |
|
"num_tokens": 85641038.0, |
|
"reward": 1.7433076351881027, |
|
"reward_std": 0.06334595192311099, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.744140625, |
|
"rewards/stop_prediction_reward/std": 0.046875, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0005600605727522634, |
|
"rewards/waypoint_pred_accuracy/std": 0.003230394551792415, |
|
"step": 179 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 310.0, |
|
"completions/max_terminated_length": 310.0, |
|
"completions/mean_length": 179.8359375, |
|
"completions/mean_terminated_length": 179.8359375, |
|
"completions/min_length": 108.625, |
|
"completions/min_terminated_length": 108.625, |
|
"epoch": 0.37894736842105264, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.06676089924877e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 86092666.0, |
|
"reward": 1.8267612159252167, |
|
"reward_std": 0.1466955652579145, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.10088061677343774, |
|
"rewards/waypoint_pred_accuracy/std": 0.0733477777656617, |
|
"step": 180 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 298.75, |
|
"completions/max_terminated_length": 298.75, |
|
"completions/mean_length": 184.97265625, |
|
"completions/mean_terminated_length": 184.97265625, |
|
"completions/min_length": 111.625, |
|
"completions/min_terminated_length": 111.625, |
|
"epoch": 0.38105263157894737, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.03949755632069e-07, |
|
"loss": -0.0004, |
|
"num_tokens": 86546476.0, |
|
"reward": 1.7814702987670898, |
|
"reward_std": 0.08937373897060752, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07823515147902071, |
|
"rewards/waypoint_pred_accuracy/std": 0.04468687262851745, |
|
"step": 181 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 309.625, |
|
"completions/max_terminated_length": 309.625, |
|
"completions/mean_length": 179.16015625, |
|
"completions/mean_terminated_length": 179.16015625, |
|
"completions/min_length": 105.625, |
|
"completions/min_terminated_length": 105.625, |
|
"epoch": 0.3831578947368421, |
|
"grad_norm": 0.02093261480331421, |
|
"learning_rate": 8.01209674912089e-07, |
|
"loss": 0.0, |
|
"num_tokens": 86998398.0, |
|
"reward": 1.9096409678459167, |
|
"reward_std": 0.0670090508647263, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07982050045393407, |
|
"rewards/waypoint_pred_accuracy/std": 0.033504527527838945, |
|
"step": 182 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 286.375, |
|
"completions/max_terminated_length": 286.375, |
|
"completions/mean_length": 171.54296875, |
|
"completions/mean_terminated_length": 171.54296875, |
|
"completions/min_length": 102.875, |
|
"completions/min_terminated_length": 102.875, |
|
"epoch": 0.38526315789473686, |
|
"grad_norm": 0.01432411465793848, |
|
"learning_rate": 7.984559960868759e-07, |
|
"loss": -0.0004, |
|
"num_tokens": 87445908.0, |
|
"reward": 1.6769181191921234, |
|
"reward_std": 0.10013374220579863, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.02595905796625718, |
|
"rewards/waypoint_pred_accuracy/std": 0.05006687459543879, |
|
"step": 183 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 317.375, |
|
"completions/max_terminated_length": 317.375, |
|
"completions/mean_length": 181.97265625, |
|
"completions/mean_terminated_length": 181.97265625, |
|
"completions/min_length": 106.875, |
|
"completions/min_terminated_length": 106.875, |
|
"epoch": 0.3873684210526316, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.956888682144403e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 87901126.0, |
|
"reward": 2.113648146390915, |
|
"reward_std": 0.08789801027160138, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 1.0, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05682408035500002, |
|
"rewards/waypoint_pred_accuracy/std": 0.04394900894840466, |
|
"step": 184 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 297.375, |
|
"completions/max_terminated_length": 297.375, |
|
"completions/mean_length": 170.03125, |
|
"completions/mean_terminated_length": 170.03125, |
|
"completions/min_length": 107.25, |
|
"completions/min_terminated_length": 107.25, |
|
"epoch": 0.3894736842105263, |
|
"grad_norm": 0.00041478071943856776, |
|
"learning_rate": 7.929084410807964e-07, |
|
"loss": -0.0, |
|
"num_tokens": 88348630.0, |
|
"reward": 1.8102026730775833, |
|
"reward_std": 0.08258083421748097, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.748046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.032054444891858935, |
|
"rewards/waypoint_pred_accuracy/std": 0.025665418093367975, |
|
"step": 185 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 285.125, |
|
"completions/max_terminated_length": 285.125, |
|
"completions/mean_length": 167.068359375, |
|
"completions/mean_terminated_length": 167.068359375, |
|
"completions/min_length": 102.0, |
|
"completions/min_terminated_length": 102.0, |
|
"epoch": 0.391578947368421, |
|
"grad_norm": 0.010830595158040524, |
|
"learning_rate": 7.90114865191855e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 88793017.0, |
|
"reward": 1.899033010005951, |
|
"reward_std": 0.05657581372270215, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.012016504072278167, |
|
"rewards/waypoint_pred_accuracy/std": 0.02828790664943881, |
|
"step": 186 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 270.625, |
|
"completions/max_terminated_length": 270.625, |
|
"completions/mean_length": 170.416015625, |
|
"completions/mean_terminated_length": 170.416015625, |
|
"completions/min_length": 110.5, |
|
"completions/min_terminated_length": 110.5, |
|
"epoch": 0.3936842105263158, |
|
"grad_norm": 0.02063567005097866, |
|
"learning_rate": 7.873082917652743e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 89241230.0, |
|
"reward": 1.6706158965826035, |
|
"reward_std": 0.083322549238801, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.023784521038484087, |
|
"rewards/waypoint_pred_accuracy/std": 0.03384877370171229, |
|
"step": 187 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 305.25, |
|
"completions/max_terminated_length": 305.25, |
|
"completions/mean_length": 181.23046875, |
|
"completions/mean_terminated_length": 181.23046875, |
|
"completions/min_length": 114.5, |
|
"completions/min_terminated_length": 114.5, |
|
"epoch": 0.3957894736842105, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.844888727222768e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 89692484.0, |
|
"reward": 1.630469560623169, |
|
"reward_std": 0.031483914237469435, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0037113359924784914, |
|
"rewards/waypoint_pred_accuracy/std": 0.00797604240300253, |
|
"step": 188 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 331.875, |
|
"completions/max_terminated_length": 331.875, |
|
"completions/mean_length": 173.87109375, |
|
"completions/mean_terminated_length": 173.87109375, |
|
"completions/min_length": 113.75, |
|
"completions/min_terminated_length": 113.75, |
|
"epoch": 0.39789473684210525, |
|
"grad_norm": 0.042730070650577545, |
|
"learning_rate": 7.816567606794239e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 90142082.0, |
|
"reward": 1.7876133099198341, |
|
"reward_std": 0.06541969033423811, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0197832117555663, |
|
"rewards/waypoint_pred_accuracy/std": 0.02541783277411014, |
|
"step": 189 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 302.125, |
|
"completions/max_terminated_length": 302.125, |
|
"completions/mean_length": 182.103515625, |
|
"completions/mean_terminated_length": 182.103515625, |
|
"completions/min_length": 115.25, |
|
"completions/min_terminated_length": 115.25, |
|
"epoch": 0.4, |
|
"grad_norm": 0.018826643005013466, |
|
"learning_rate": 7.788121089403557e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 90596087.0, |
|
"reward": 1.805692046880722, |
|
"reward_std": 0.06880141280907992, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.027846033899431428, |
|
"rewards/waypoint_pred_accuracy/std": 0.03440070046775373, |
|
"step": 190 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 267.875, |
|
"completions/max_terminated_length": 267.875, |
|
"completions/mean_length": 175.751953125, |
|
"completions/mean_terminated_length": 175.751953125, |
|
"completions/min_length": 108.0, |
|
"completions/min_terminated_length": 108.0, |
|
"epoch": 0.40210526315789474, |
|
"grad_norm": 0.013081498444080353, |
|
"learning_rate": 7.759550714874924e-07, |
|
"loss": -0.0004, |
|
"num_tokens": 91046072.0, |
|
"reward": 2.0215499103069305, |
|
"reward_std": 0.13955920189619064, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.873046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07522808883824439, |
|
"rewards/waypoint_pred_accuracy/std": 0.05415468077226393, |
|
"step": 191 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 278.5, |
|
"completions/max_terminated_length": 278.5, |
|
"completions/mean_length": 169.3125, |
|
"completions/mean_terminated_length": 169.3125, |
|
"completions/min_length": 114.75, |
|
"completions/min_terminated_length": 114.75, |
|
"epoch": 0.40421052631578946, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.730858029736989e-07, |
|
"loss": 0.0, |
|
"num_tokens": 91491928.0, |
|
"reward": 1.75, |
|
"reward_std": 0.0, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 1.474326771777612e-10, |
|
"rewards/waypoint_pred_accuracy/std": 1.1786529530155576e-09, |
|
"step": 192 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 306.875, |
|
"completions/max_terminated_length": 306.875, |
|
"completions/mean_length": 177.765625, |
|
"completions/mean_terminated_length": 177.765625, |
|
"completions/min_length": 114.875, |
|
"completions/min_terminated_length": 114.875, |
|
"epoch": 0.4063157894736842, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.702044587139137e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 91941856.0, |
|
"reward": 1.7487657219171524, |
|
"reward_std": 0.01981517393141985, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.748046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0003594244599299801, |
|
"rewards/waypoint_pred_accuracy/std": 0.002140052256436602, |
|
"step": 193 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 303.5, |
|
"completions/max_terminated_length": 303.5, |
|
"completions/mean_length": 171.814453125, |
|
"completions/mean_terminated_length": 172.19029235839844, |
|
"completions/min_length": 92.375, |
|
"completions/min_terminated_length": 106.75, |
|
"epoch": 0.40842105263157896, |
|
"grad_norm": 0.016274407505989075, |
|
"learning_rate": 7.673111946767413e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 92389185.0, |
|
"reward": 2.0959380865097046, |
|
"reward_std": 0.12629193731117994, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.11046904484828701, |
|
"rewards/waypoint_pred_accuracy/std": 0.06314596923766658, |
|
"step": 194 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 290.125, |
|
"completions/max_terminated_length": 290.125, |
|
"completions/mean_length": 180.162109375, |
|
"completions/mean_terminated_length": 180.162109375, |
|
"completions/min_length": 107.5, |
|
"completions/min_terminated_length": 107.5, |
|
"epoch": 0.4105263157894737, |
|
"grad_norm": 0.020258145406842232, |
|
"learning_rate": 7.644061674760101e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 92840532.0, |
|
"reward": 2.294894278049469, |
|
"reward_std": 0.23648597935971338, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.998046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.1494002838226665, |
|
"rewards/waypoint_pred_accuracy/std": 0.11227664479537411, |
|
"step": 195 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 299.75, |
|
"completions/max_terminated_length": 299.75, |
|
"completions/mean_length": 169.263671875, |
|
"completions/mean_terminated_length": 169.263671875, |
|
"completions/min_length": 113.25, |
|
"completions/min_terminated_length": 113.25, |
|
"epoch": 0.4126315789473684, |
|
"grad_norm": 0.04483957961201668, |
|
"learning_rate": 7.61489534362294e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 93288091.0, |
|
"reward": 1.5267165899276733, |
|
"reward_std": 0.05776224182045553, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.501953125, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.012381742581055732, |
|
"rewards/waypoint_pred_accuracy/std": 0.02106862150685629, |
|
"step": 196 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 264.125, |
|
"completions/max_terminated_length": 264.125, |
|
"completions/mean_length": 165.439453125, |
|
"completions/mean_terminated_length": 165.439453125, |
|
"completions/min_length": 107.625, |
|
"completions/min_terminated_length": 107.625, |
|
"epoch": 0.4147368421052632, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.585614532144007e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 93733372.0, |
|
"reward": 1.80105559527874, |
|
"reward_std": 0.17875106693827547, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.626953125, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.08705123437539442, |
|
"rewards/waypoint_pred_accuracy/std": 0.08156303651776398, |
|
"step": 197 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 328.5, |
|
"completions/max_terminated_length": 328.5, |
|
"completions/mean_length": 170.4609375, |
|
"completions/mean_terminated_length": 170.4609375, |
|
"completions/min_length": 103.125, |
|
"completions/min_terminated_length": 103.125, |
|
"epoch": 0.4168421052631579, |
|
"grad_norm": 0.009551014751195908, |
|
"learning_rate": 7.556220825308261e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 94180968.0, |
|
"reward": 1.8914762139320374, |
|
"reward_std": 0.05954795209981967, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.009214683991414209, |
|
"rewards/waypoint_pred_accuracy/std": 0.021961479235898466, |
|
"step": 198 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 315.125, |
|
"completions/max_terminated_length": 315.125, |
|
"completions/mean_length": 176.51171875, |
|
"completions/mean_terminated_length": 176.51171875, |
|
"completions/min_length": 119.25, |
|
"completions/min_terminated_length": 119.25, |
|
"epoch": 0.4189473684210526, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.526715814211739e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 94629294.0, |
|
"reward": 2.0064243376255035, |
|
"reward_std": 0.03297104453667998, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 1.0, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.004188739636447281, |
|
"rewards/waypoint_pred_accuracy/std": 0.008869364886777475, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.49710109597544e-07, |
|
"loss": 0.0002, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42105263157894735, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completions/clipped_ratio": 0.0, |
|
"eval_completions/max_length": 290.91, |
|
"eval_completions/max_terminated_length": 290.91, |
|
"eval_completions/mean_length": 172.36274307250977, |
|
"eval_completions/mean_terminated_length": 172.36274307250977, |
|
"eval_completions/min_length": 109.78, |
|
"eval_completions/min_terminated_length": 109.78, |
|
"eval_loss": -4.892464494332671e-05, |
|
"eval_num_tokens": 95080220.0, |
|
"eval_reward": 1.8596287977695465, |
|
"eval_reward_std": 0.08604613540126138, |
|
"eval_rewards/format_reward_embodied/mean": 0.9996875, |
|
"eval_rewards/format_reward_embodied/std": 0.0025, |
|
"eval_rewards/stop_prediction_reward/mean": 0.7696875, |
|
"eval_rewards/stop_prediction_reward/std": 0.005, |
|
"eval_rewards/waypoint_pred_accuracy/mean": 0.045126906880960875, |
|
"eval_rewards/waypoint_pred_accuracy/std": 0.039966236149646756, |
|
"eval_runtime": 941.9095, |
|
"eval_samples_per_second": 0.106, |
|
"eval_steps_per_second": 0.002, |
|
"step": 200 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 292.9375, |
|
"completions/max_terminated_length": 292.9375, |
|
"completions/mean_length": 174.2548828125, |
|
"completions/mean_terminated_length": 174.2548828125, |
|
"completions/min_length": 110.5625, |
|
"completions/min_terminated_length": 110.5625, |
|
"epoch": 0.4231578947368421, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.467378273658856e-07, |
|
"loss": 0.0005, |
|
"num_tokens": 95528819.0, |
|
"reward": 1.9443908333778381, |
|
"reward_std": 0.04222029652737547, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.8740234375, |
|
"rewards/stop_prediction_reward/std": 0.0078125, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03518370707206486, |
|
"rewards/waypoint_pred_accuracy/std": 0.017203900250024166, |
|
"step": 201 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 305.5, |
|
"completions/max_terminated_length": 305.5, |
|
"completions/mean_length": 170.724609375, |
|
"completions/mean_terminated_length": 170.724609375, |
|
"completions/min_length": 112.375, |
|
"completions/min_terminated_length": 112.375, |
|
"epoch": 0.42526315789473684, |
|
"grad_norm": 0.012682443484663963, |
|
"learning_rate": 7.437548956173213e-07, |
|
"loss": -0.0004, |
|
"num_tokens": 95979302.0, |
|
"reward": 1.9156423211097717, |
|
"reward_std": 0.1263027695240453, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.751953125, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0818446125079697, |
|
"rewards/waypoint_pred_accuracy/std": 0.055338893387193444, |
|
"step": 202 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 293.875, |
|
"completions/max_terminated_length": 293.875, |
|
"completions/mean_length": 176.544921875, |
|
"completions/mean_terminated_length": 176.8624153137207, |
|
"completions/min_length": 101.625, |
|
"completions/min_terminated_length": 111.625, |
|
"epoch": 0.42736842105263156, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.407614758194373e-07, |
|
"loss": -0.0006, |
|
"num_tokens": 96431805.0, |
|
"reward": 1.707748532295227, |
|
"reward_std": 0.10709417768262597, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.623046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.04332738941334985, |
|
"rewards/waypoint_pred_accuracy/std": 0.03793920004429459, |
|
"step": 203 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 321.5, |
|
"completions/max_terminated_length": 321.5, |
|
"completions/mean_length": 176.828125, |
|
"completions/mean_terminated_length": 176.828125, |
|
"completions/min_length": 115.625, |
|
"completions/min_terminated_length": 115.625, |
|
"epoch": 0.42947368421052634, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.377577300075431e-07, |
|
"loss": 0.0, |
|
"num_tokens": 96881189.0, |
|
"reward": 1.760355144739151, |
|
"reward_std": 0.02672452749078502, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.005177572603687428, |
|
"rewards/waypoint_pred_accuracy/std": 0.013362265083738705, |
|
"step": 204 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 304.75, |
|
"completions/max_terminated_length": 304.75, |
|
"completions/mean_length": 175.419921875, |
|
"completions/mean_terminated_length": 175.419921875, |
|
"completions/min_length": 117.75, |
|
"completions/min_terminated_length": 117.75, |
|
"epoch": 0.43157894736842106, |
|
"grad_norm": 0.015611842274665833, |
|
"learning_rate": 7.347438207759002e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 97333116.0, |
|
"reward": 1.9850184619426727, |
|
"reward_std": 0.06815559589631448, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.055009242740135744, |
|
"rewards/waypoint_pred_accuracy/std": 0.0340777950465283, |
|
"step": 205 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 306.875, |
|
"completions/max_terminated_length": 306.875, |
|
"completions/mean_length": 174.68359375, |
|
"completions/mean_terminated_length": 174.68359375, |
|
"completions/min_length": 107.125, |
|
"completions/min_terminated_length": 107.125, |
|
"epoch": 0.4336842105263158, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.317199112689219e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 97780314.0, |
|
"reward": 1.6253042817115784, |
|
"reward_std": 0.0009640372365589123, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0001521414744787989, |
|
"rewards/waypoint_pred_accuracy/std": 0.00048202241833421994, |
|
"step": 206 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 294.625, |
|
"completions/max_terminated_length": 294.625, |
|
"completions/mean_length": 178.39453125, |
|
"completions/mean_terminated_length": 178.39453125, |
|
"completions/min_length": 117.125, |
|
"completions/min_terminated_length": 117.125, |
|
"epoch": 0.4357894736842105, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.286861651723403e-07, |
|
"loss": 0.0, |
|
"num_tokens": 98230564.0, |
|
"reward": 1.8962776064872742, |
|
"reward_std": 0.06431814459938323, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07313878076765164, |
|
"rewards/waypoint_pred_accuracy/std": 0.03215907396928667, |
|
"step": 207 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 297.125, |
|
"completions/max_terminated_length": 297.125, |
|
"completions/mean_length": 170.755859375, |
|
"completions/mean_terminated_length": 170.755859375, |
|
"completions/min_length": 108.125, |
|
"completions/min_terminated_length": 108.125, |
|
"epoch": 0.4378947368421053, |
|
"grad_norm": 0.02003273367881775, |
|
"learning_rate": 7.256427467043479e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 98680935.0, |
|
"reward": 1.7773285955190659, |
|
"reward_std": 0.045251342578694675, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.013664303890479346, |
|
"rewards/waypoint_pred_accuracy/std": 0.022625670864954373, |
|
"step": 208 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 307.625, |
|
"completions/max_terminated_length": 307.625, |
|
"completions/mean_length": 175.458984375, |
|
"completions/mean_terminated_length": 175.458984375, |
|
"completions/min_length": 109.75, |
|
"completions/min_terminated_length": 109.75, |
|
"epoch": 0.44, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.225898206067071e-07, |
|
"loss": 0.0, |
|
"num_tokens": 99131986.0, |
|
"reward": 2.000874102115631, |
|
"reward_std": 0.07641031977254897, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.06293705673306249, |
|
"rewards/waypoint_pred_accuracy/std": 0.038205162913072854, |
|
"step": 209 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 295.25, |
|
"completions/max_terminated_length": 295.25, |
|
"completions/mean_length": 177.837890625, |
|
"completions/mean_terminated_length": 177.837890625, |
|
"completions/min_length": 118.75, |
|
"completions/min_terminated_length": 118.75, |
|
"epoch": 0.4421052631578947, |
|
"grad_norm": 0.01405387930572033, |
|
"learning_rate": 7.195275521358332e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 99580031.0, |
|
"reward": 2.028854936361313, |
|
"reward_std": 0.11646178726022072, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07692747169583214, |
|
"rewards/waypoint_pred_accuracy/std": 0.05823090146415666, |
|
"step": 210 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 307.5, |
|
"completions/max_terminated_length": 307.5, |
|
"completions/mean_length": 177.76171875, |
|
"completions/mean_terminated_length": 177.76171875, |
|
"completions/min_length": 115.5, |
|
"completions/min_terminated_length": 115.5, |
|
"epoch": 0.4442105263157895, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.164561070538488e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 100033989.0, |
|
"reward": 1.7999018132686615, |
|
"reward_std": 0.21613861247897148, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.5, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.14995091408491135, |
|
"rewards/waypoint_pred_accuracy/std": 0.10806930996477604, |
|
"step": 211 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 431.875, |
|
"completions/max_terminated_length": 431.875, |
|
"completions/mean_length": 174.076171875, |
|
"completions/mean_terminated_length": 174.076171875, |
|
"completions/min_length": 112.375, |
|
"completions/min_terminated_length": 112.375, |
|
"epoch": 0.4463157894736842, |
|
"grad_norm": 0.04999241605401039, |
|
"learning_rate": 7.133756516196107e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 100487340.0, |
|
"reward": 1.7891514897346497, |
|
"reward_std": 0.0568979331983428, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.01957578670394547, |
|
"rewards/waypoint_pred_accuracy/std": 0.028448972130328657, |
|
"step": 212 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 284.0, |
|
"completions/max_terminated_length": 284.0, |
|
"completions/mean_length": 171.359375, |
|
"completions/mean_terminated_length": 171.359375, |
|
"completions/min_length": 109.375, |
|
"completions/min_terminated_length": 109.375, |
|
"epoch": 0.44842105263157894, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.102863525797112e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 100937124.0, |
|
"reward": 1.9840654134750366, |
|
"reward_std": 0.17244431003928185, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.17953270860016346, |
|
"rewards/waypoint_pred_accuracy/std": 0.08622215129435062, |
|
"step": 213 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 291.375, |
|
"completions/max_terminated_length": 291.375, |
|
"completions/mean_length": 175.158203125, |
|
"completions/mean_terminated_length": 175.158203125, |
|
"completions/min_length": 113.25, |
|
"completions/min_terminated_length": 113.25, |
|
"epoch": 0.45052631578947366, |
|
"grad_norm": 0.01138628926128149, |
|
"learning_rate": 7.071883771594509e-07, |
|
"loss": 0.0, |
|
"num_tokens": 101387957.0, |
|
"reward": 1.8491481095552444, |
|
"reward_std": 0.1147658722824616, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.049574058981537675, |
|
"rewards/waypoint_pred_accuracy/std": 0.05738293592632728, |
|
"step": 214 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 327.125, |
|
"completions/max_terminated_length": 327.125, |
|
"completions/mean_length": 187.966796875, |
|
"completions/mean_terminated_length": 187.966796875, |
|
"completions/min_length": 114.625, |
|
"completions/min_terminated_length": 114.625, |
|
"epoch": 0.45263157894736844, |
|
"grad_norm": 0.011563337408006191, |
|
"learning_rate": 7.040818930537874e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 101845412.0, |
|
"reward": 1.6611975878477097, |
|
"reward_std": 0.0494131935941482, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.018098798598909838, |
|
"rewards/waypoint_pred_accuracy/std": 0.02470659996008351, |
|
"step": 215 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 287.25, |
|
"completions/max_terminated_length": 287.25, |
|
"completions/mean_length": 171.841796875, |
|
"completions/mean_terminated_length": 171.841796875, |
|
"completions/min_length": 107.125, |
|
"completions/min_terminated_length": 107.125, |
|
"epoch": 0.45473684210526316, |
|
"grad_norm": 2.7086246063845465e-06, |
|
"learning_rate": 7.009670684182576e-07, |
|
"loss": -0.0, |
|
"num_tokens": 102293587.0, |
|
"reward": 1.5, |
|
"reward_std": 3.251700020356907e-09, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.5, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 4.380867890674267e-10, |
|
"rewards/waypoint_pred_accuracy/std": 1.9065319947775272e-09, |
|
"step": 216 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 286.75, |
|
"completions/max_terminated_length": 286.75, |
|
"completions/mean_length": 176.490234375, |
|
"completions/mean_terminated_length": 176.490234375, |
|
"completions/min_length": 114.5, |
|
"completions/min_terminated_length": 114.5, |
|
"epoch": 0.4568421052631579, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.978440718598756e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 102744270.0, |
|
"reward": 1.7497325837612152, |
|
"reward_std": 0.06904890944133513, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.06236628795954857, |
|
"rewards/waypoint_pred_accuracy/std": 0.03452445384209568, |
|
"step": 217 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 314.0, |
|
"completions/max_terminated_length": 314.0, |
|
"completions/mean_length": 182.732421875, |
|
"completions/mean_terminated_length": 182.732421875, |
|
"completions/min_length": 111.375, |
|
"completions/min_terminated_length": 111.375, |
|
"epoch": 0.4589473684210526, |
|
"grad_norm": 0.029011964797973633, |
|
"learning_rate": 6.947130724280057e-07, |
|
"loss": 0.0, |
|
"num_tokens": 103198789.0, |
|
"reward": 1.7562294006347656, |
|
"reward_std": 0.013579967227997258, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0031146957662715976, |
|
"rewards/waypoint_pred_accuracy/std": 0.006789985702200646, |
|
"step": 218 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 270.375, |
|
"completions/max_terminated_length": 270.375, |
|
"completions/mean_length": 172.20703125, |
|
"completions/mean_terminated_length": 172.20703125, |
|
"completions/min_length": 111.875, |
|
"completions/min_terminated_length": 111.875, |
|
"epoch": 0.4610526315789474, |
|
"grad_norm": 0.013365295715630054, |
|
"learning_rate": 6.915742396052115e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 103649519.0, |
|
"reward": 1.5070368647575378, |
|
"reward_std": 0.023722524622826313, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.501953125, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.002541873606073473, |
|
"rewards/waypoint_pred_accuracy/std": 0.004048763120422751, |
|
"step": 219 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 291.375, |
|
"completions/max_terminated_length": 291.375, |
|
"completions/mean_length": 176.5546875, |
|
"completions/mean_terminated_length": 176.5546875, |
|
"completions/min_length": 110.875, |
|
"completions/min_terminated_length": 110.875, |
|
"epoch": 0.4631578947368421, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.884277432980825e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 104099915.0, |
|
"reward": 1.8983599245548248, |
|
"reward_std": 0.022330745094222948, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.011679970655677607, |
|
"rewards/waypoint_pred_accuracy/std": 0.01116537469351897, |
|
"step": 220 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 272.875, |
|
"completions/max_terminated_length": 272.875, |
|
"completions/mean_length": 176.126953125, |
|
"completions/mean_terminated_length": 176.126953125, |
|
"completions/min_length": 118.875, |
|
"completions/min_terminated_length": 118.875, |
|
"epoch": 0.4652631578947368, |
|
"grad_norm": 0.017878547310829163, |
|
"learning_rate": 6.852737538280359e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 104550732.0, |
|
"reward": 1.7993512451648712, |
|
"reward_std": 0.045344060357820126, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.024675624659721507, |
|
"rewards/waypoint_pred_accuracy/std": 0.022672027718726895, |
|
"step": 221 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 305.5, |
|
"completions/max_terminated_length": 305.5, |
|
"completions/mean_length": 181.193359375, |
|
"completions/mean_terminated_length": 181.193359375, |
|
"completions/min_length": 113.875, |
|
"completions/min_terminated_length": 113.875, |
|
"epoch": 0.4673684210526316, |
|
"grad_norm": 0.01165629643946886, |
|
"learning_rate": 6.821124419220978e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 105002991.0, |
|
"reward": 1.8998810648918152, |
|
"reward_std": 0.040346091078029334, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.012440536318531065, |
|
"rewards/waypoint_pred_accuracy/std": 0.020173047916574705, |
|
"step": 222 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 288.375, |
|
"completions/max_terminated_length": 288.375, |
|
"completions/mean_length": 175.021484375, |
|
"completions/mean_terminated_length": 175.021484375, |
|
"completions/min_length": 109.75, |
|
"completions/min_terminated_length": 109.75, |
|
"epoch": 0.4694736842105263, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.789439787036614e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 105453626.0, |
|
"reward": 1.9986501336097717, |
|
"reward_std": 0.019883400294929743, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.998046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0003016188566107303, |
|
"rewards/waypoint_pred_accuracy/std": 0.0021291994489729404, |
|
"step": 223 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 309.375, |
|
"completions/max_terminated_length": 309.375, |
|
"completions/mean_length": 179.6640625, |
|
"completions/mean_terminated_length": 179.6640625, |
|
"completions/min_length": 112.75, |
|
"completions/min_terminated_length": 112.75, |
|
"epoch": 0.47157894736842104, |
|
"grad_norm": 0.015561181120574474, |
|
"learning_rate": 6.757685356832242e-07, |
|
"loss": -0.0, |
|
"num_tokens": 105904398.0, |
|
"reward": 2.101376533508301, |
|
"reward_std": 0.06816481053829193, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 1.0, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05068826675858098, |
|
"rewards/waypoint_pred_accuracy/std": 0.0340824015760622, |
|
"step": 224 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 294.0, |
|
"completions/max_terminated_length": 294.0, |
|
"completions/mean_length": 175.58203125, |
|
"completions/mean_terminated_length": 175.58203125, |
|
"completions/min_length": 109.625, |
|
"completions/min_terminated_length": 109.625, |
|
"epoch": 0.47368421052631576, |
|
"grad_norm": 0.013554830104112625, |
|
"learning_rate": 6.725862847491034e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 106353592.0, |
|
"reward": 1.7516418248414993, |
|
"reward_std": 0.0034361608559265733, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0008209160487240297, |
|
"rewards/waypoint_pred_accuracy/std": 0.0017180802678922191, |
|
"step": 225 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 306.5, |
|
"completions/max_terminated_length": 306.5, |
|
"completions/mean_length": 183.095703125, |
|
"completions/mean_terminated_length": 183.095703125, |
|
"completions/min_length": 108.125, |
|
"completions/min_terminated_length": 108.125, |
|
"epoch": 0.47578947368421054, |
|
"grad_norm": 0.00029330080724321306, |
|
"learning_rate": 6.693973981581324e-07, |
|
"loss": 0.0, |
|
"num_tokens": 106808553.0, |
|
"reward": 1.6250907480716705, |
|
"reward_std": 0.00024353076181782285, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 4.53742529797907e-05, |
|
"rewards/waypoint_pred_accuracy/std": 0.00012176844195366245, |
|
"step": 226 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 292.5, |
|
"completions/max_terminated_length": 292.5, |
|
"completions/mean_length": 181.337890625, |
|
"completions/mean_terminated_length": 181.337890625, |
|
"completions/min_length": 119.625, |
|
"completions/min_terminated_length": 119.625, |
|
"epoch": 0.47789473684210526, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.662020485263358e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 107258774.0, |
|
"reward": 1.625608280301094, |
|
"reward_std": 0.0014176478143781424, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.00030413969053576185, |
|
"rewards/waypoint_pred_accuracy/std": 0.00070882499138026, |
|
"step": 227 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 291.25, |
|
"completions/max_terminated_length": 291.25, |
|
"completions/mean_length": 173.318359375, |
|
"completions/mean_terminated_length": 173.318359375, |
|
"completions/min_length": 107.875, |
|
"completions/min_terminated_length": 107.875, |
|
"epoch": 0.48, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.630004088195858e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 107708793.0, |
|
"reward": 1.876689851284027, |
|
"reward_std": 0.0026773642748594284, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0008449186572583362, |
|
"rewards/waypoint_pred_accuracy/std": 0.0013386833028957005, |
|
"step": 228 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 308.5, |
|
"completions/max_terminated_length": 308.5, |
|
"completions/mean_length": 176.880859375, |
|
"completions/mean_terminated_length": 176.880859375, |
|
"completions/min_length": 117.25, |
|
"completions/min_terminated_length": 117.25, |
|
"epoch": 0.48210526315789476, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.597926523442398e-07, |
|
"loss": 0.0, |
|
"num_tokens": 108161148.0, |
|
"reward": 1.509762555360794, |
|
"reward_std": 0.021483093870038772, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.5, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0048812878156252685, |
|
"rewards/waypoint_pred_accuracy/std": 0.01074154757443697, |
|
"step": 229 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 312.125, |
|
"completions/max_terminated_length": 312.125, |
|
"completions/mean_length": 173.00390625, |
|
"completions/mean_terminated_length": 173.00390625, |
|
"completions/min_length": 110.25, |
|
"completions/min_terminated_length": 110.25, |
|
"epoch": 0.4842105263157895, |
|
"grad_norm": 0.013146106153726578, |
|
"learning_rate": 6.565789527377587e-07, |
|
"loss": -0.0005, |
|
"num_tokens": 108611454.0, |
|
"reward": 1.7406707108020782, |
|
"reward_std": 0.09716045763343573, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05783534119836986, |
|
"rewards/waypoint_pred_accuracy/std": 0.048580223228782415, |
|
"step": 230 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 286.625, |
|
"completions/max_terminated_length": 286.625, |
|
"completions/mean_length": 172.7578125, |
|
"completions/mean_terminated_length": 172.7578125, |
|
"completions/min_length": 111.75, |
|
"completions/min_terminated_length": 111.75, |
|
"epoch": 0.4863157894736842, |
|
"grad_norm": 0.00952562689781189, |
|
"learning_rate": 6.533594839593081e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 109059522.0, |
|
"reward": 1.7838719189167023, |
|
"reward_std": 0.09348210319876671, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07943596761145057, |
|
"rewards/waypoint_pred_accuracy/std": 0.04674104697497585, |
|
"step": 231 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 293.25, |
|
"completions/max_terminated_length": 293.25, |
|
"completions/mean_length": 175.3828125, |
|
"completions/mean_terminated_length": 175.3828125, |
|
"completions/min_length": 114.75, |
|
"completions/min_terminated_length": 114.75, |
|
"epoch": 0.4884210526315789, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.501344202803414e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 109511942.0, |
|
"reward": 1.5068429559469223, |
|
"reward_std": 0.023580931854667142, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.5, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.003421477313622745, |
|
"rewards/waypoint_pred_accuracy/std": 0.011790467111495673, |
|
"step": 232 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 316.0, |
|
"completions/max_terminated_length": 316.0, |
|
"completions/mean_length": 178.611328125, |
|
"completions/mean_terminated_length": 178.9438877105713, |
|
"completions/min_length": 97.375, |
|
"completions/min_terminated_length": 110.125, |
|
"epoch": 0.4905263157894737, |
|
"grad_norm": 0.010548449121415615, |
|
"learning_rate": 6.469039362751677e-07, |
|
"loss": -0.0002, |
|
"num_tokens": 109963455.0, |
|
"reward": 1.7827188670635223, |
|
"reward_std": 0.06010658299783245, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.748046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.018312573189177783, |
|
"rewards/waypoint_pred_accuracy/std": 0.024115337153489236, |
|
"step": 233 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.001953125, |
|
"completions/max_length": 301.125, |
|
"completions/max_terminated_length": 301.125, |
|
"completions/mean_length": 179.953125, |
|
"completions/mean_terminated_length": 180.33076095581055, |
|
"completions/min_length": 101.375, |
|
"completions/min_terminated_length": 115.75, |
|
"epoch": 0.4926315789473684, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.436682068115002e-07, |
|
"loss": -0.0007, |
|
"num_tokens": 110416615.0, |
|
"reward": 1.99609375, |
|
"reward_std": 0.03125, |
|
"rewards/format_reward_embodied/mean": 0.998046875, |
|
"rewards/format_reward_embodied/std": 0.015625, |
|
"rewards/stop_prediction_reward/mean": 0.998046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 2.0173116242793235e-14, |
|
"rewards/waypoint_pred_accuracy/std": 1.1195781985957193e-13, |
|
"step": 234 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 269.0, |
|
"completions/max_terminated_length": 269.0, |
|
"completions/mean_length": 173.30859375, |
|
"completions/mean_terminated_length": 173.30859375, |
|
"completions/min_length": 112.75, |
|
"completions/min_terminated_length": 112.75, |
|
"epoch": 0.49473684210526314, |
|
"grad_norm": 0.023477498441934586, |
|
"learning_rate": 6.404274070409915e-07, |
|
"loss": 0.0002, |
|
"num_tokens": 110865797.0, |
|
"reward": 1.8803559094667435, |
|
"reward_std": 0.11074852512797406, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.06517796947535714, |
|
"rewards/waypoint_pred_accuracy/std": 0.05537425884915592, |
|
"step": 235 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 299.875, |
|
"completions/max_terminated_length": 299.875, |
|
"completions/mean_length": 176.0390625, |
|
"completions/mean_terminated_length": 176.0390625, |
|
"completions/min_length": 114.375, |
|
"completions/min_terminated_length": 114.375, |
|
"epoch": 0.4968421052631579, |
|
"grad_norm": 0.008372652344405651, |
|
"learning_rate": 6.371817123897528e-07, |
|
"loss": 0.0006, |
|
"num_tokens": 111319513.0, |
|
"reward": 1.8294343054294586, |
|
"reward_std": 0.08038223959738389, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.748046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.040693737070411296, |
|
"rewards/waypoint_pred_accuracy/std": 0.03237862857612228, |
|
"step": 236 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 251.25, |
|
"completions/max_terminated_length": 251.25, |
|
"completions/mean_length": 166.775390625, |
|
"completions/mean_terminated_length": 166.775390625, |
|
"completions/min_length": 109.5, |
|
"completions/min_terminated_length": 109.5, |
|
"epoch": 0.49894736842105264, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.339312985488576e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 111764710.0, |
|
"reward": 1.8851255774497986, |
|
"reward_std": 0.04906696546822786, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.005062782554890241, |
|
"rewards/waypoint_pred_accuracy/std": 0.024533490184718572, |
|
"step": 237 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 296.5, |
|
"completions/max_terminated_length": 296.5, |
|
"completions/mean_length": 172.90625, |
|
"completions/mean_terminated_length": 172.90625, |
|
"completions/min_length": 111.125, |
|
"completions/min_terminated_length": 111.125, |
|
"epoch": 0.5010526315789474, |
|
"grad_norm": 0.01637883298099041, |
|
"learning_rate": 6.30676341464831e-07, |
|
"loss": -0.0003, |
|
"num_tokens": 112215734.0, |
|
"reward": 1.6509484648704529, |
|
"reward_std": 0.06736253991999908, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.03125, |
|
"rewards/waypoint_pred_accuracy/mean": 0.012974242886534648, |
|
"rewards/waypoint_pred_accuracy/std": 0.01805627301899171, |
|
"step": 238 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 289.625, |
|
"completions/max_terminated_length": 289.625, |
|
"completions/mean_length": 169.388671875, |
|
"completions/mean_terminated_length": 169.388671875, |
|
"completions/min_length": 109.0, |
|
"completions/min_terminated_length": 109.0, |
|
"epoch": 0.5031578947368421, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.274170173301268e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 112666173.0, |
|
"reward": 1.9938509166240692, |
|
"reward_std": 0.11555472994223237, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.05942547577433288, |
|
"rewards/waypoint_pred_accuracy/std": 0.05777736520394683, |
|
"step": 239 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 289.5, |
|
"completions/max_terminated_length": 289.5, |
|
"completions/mean_length": 172.37890625, |
|
"completions/mean_terminated_length": 172.37890625, |
|
"completions/min_length": 108.875, |
|
"completions/min_terminated_length": 108.875, |
|
"epoch": 0.5052631578947369, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.24153502573589e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 113115903.0, |
|
"reward": 1.7577707767486572, |
|
"reward_std": 0.016411395743489265, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.0038853867445141077, |
|
"rewards/waypoint_pred_accuracy/std": 0.008205699268728495, |
|
"step": 240 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 286.0, |
|
"completions/max_terminated_length": 286.0, |
|
"completions/mean_length": 175.09375, |
|
"completions/mean_terminated_length": 175.09375, |
|
"completions/min_length": 116.375, |
|
"completions/min_terminated_length": 116.375, |
|
"epoch": 0.5073684210526316, |
|
"grad_norm": 0.03419042006134987, |
|
"learning_rate": 6.208859738509021e-07, |
|
"loss": 0.0007, |
|
"num_tokens": 113568495.0, |
|
"reward": 1.7237209975719452, |
|
"reward_std": 0.14016160182654858, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.049360513221472516, |
|
"rewards/waypoint_pred_accuracy/std": 0.07008079765364533, |
|
"step": 241 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 289.875, |
|
"completions/max_terminated_length": 289.875, |
|
"completions/mean_length": 173.462890625, |
|
"completions/mean_terminated_length": 173.462890625, |
|
"completions/min_length": 112.0, |
|
"completions/min_terminated_length": 112.0, |
|
"epoch": 0.5094736842105263, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.176146080350286e-07, |
|
"loss": 0.0, |
|
"num_tokens": 114018972.0, |
|
"reward": 1.8751783967018127, |
|
"reward_std": 0.00036079369601793587, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 8.91994423000142e-05, |
|
"rewards/waypoint_pred_accuracy/std": 0.00018039710994344205, |
|
"step": 242 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 298.0, |
|
"completions/max_terminated_length": 298.0, |
|
"completions/mean_length": 173.265625, |
|
"completions/mean_terminated_length": 173.265625, |
|
"completions/min_length": 114.25, |
|
"completions/min_terminated_length": 114.25, |
|
"epoch": 0.511578947368421, |
|
"grad_norm": 0.013551232405006886, |
|
"learning_rate": 6.14339582206635e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 114468132.0, |
|
"reward": 1.9178512692451477, |
|
"reward_std": 0.06416846066713333, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.021425632760024103, |
|
"rewards/waypoint_pred_accuracy/std": 0.03208423405926866, |
|
"step": 243 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 312.125, |
|
"completions/max_terminated_length": 312.125, |
|
"completions/mean_length": 175.140625, |
|
"completions/mean_terminated_length": 175.140625, |
|
"completions/min_length": 110.75, |
|
"completions/min_terminated_length": 110.75, |
|
"epoch": 0.5136842105263157, |
|
"grad_norm": 0.02614566497504711, |
|
"learning_rate": 6.110610736445058e-07, |
|
"loss": 0.0003, |
|
"num_tokens": 114913708.0, |
|
"reward": 1.8622263967990875, |
|
"reward_std": 0.18642316292971373, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.11861319048330188, |
|
"rewards/waypoint_pred_accuracy/std": 0.09321157418889925, |
|
"step": 244 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 301.125, |
|
"completions/max_terminated_length": 301.125, |
|
"completions/mean_length": 172.37890625, |
|
"completions/mean_terminated_length": 172.37890625, |
|
"completions/min_length": 110.75, |
|
"completions/min_terminated_length": 110.75, |
|
"epoch": 0.5157894736842106, |
|
"grad_norm": 0.013545077294111252, |
|
"learning_rate": 6.077792598159479e-07, |
|
"loss": -0.0, |
|
"num_tokens": 115361006.0, |
|
"reward": 1.9425796866416931, |
|
"reward_std": 0.10113994629730882, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.03378988173824164, |
|
"rewards/waypoint_pred_accuracy/std": 0.05056997878441809, |
|
"step": 245 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 276.5, |
|
"completions/max_terminated_length": 276.5, |
|
"completions/mean_length": 173.775390625, |
|
"completions/mean_terminated_length": 173.775390625, |
|
"completions/min_length": 116.5, |
|
"completions/min_terminated_length": 116.5, |
|
"epoch": 0.5178947368421053, |
|
"grad_norm": 0.02074316143989563, |
|
"learning_rate": 6.044943183671836e-07, |
|
"loss": 0.0004, |
|
"num_tokens": 115809723.0, |
|
"reward": 1.7704734951257706, |
|
"reward_std": 0.09013272261904604, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.623046875, |
|
"rewards/stop_prediction_reward/std": 0.015625, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07371331164245536, |
|
"rewards/waypoint_pred_accuracy/std": 0.03725385823372892, |
|
"step": 246 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 258.375, |
|
"completions/max_terminated_length": 258.375, |
|
"completions/mean_length": 170.69140625, |
|
"completions/mean_terminated_length": 170.69140625, |
|
"completions/min_length": 119.125, |
|
"completions/min_terminated_length": 119.125, |
|
"epoch": 0.52, |
|
"grad_norm": 0.021677250042557716, |
|
"learning_rate": 6.01206427113735e-07, |
|
"loss": -0.0001, |
|
"num_tokens": 116257053.0, |
|
"reward": 1.8927133083343506, |
|
"reward_std": 0.0853966644051809, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.75, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.07135666430194423, |
|
"rewards/waypoint_pred_accuracy/std": 0.04269833582034271, |
|
"step": 247 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 272.125, |
|
"completions/max_terminated_length": 272.125, |
|
"completions/mean_length": 169.240234375, |
|
"completions/mean_terminated_length": 169.240234375, |
|
"completions/min_length": 112.375, |
|
"completions/min_terminated_length": 112.375, |
|
"epoch": 0.5221052631578947, |
|
"grad_norm": 0.000306050234939903, |
|
"learning_rate": 5.97915764030799e-07, |
|
"loss": -0.0, |
|
"num_tokens": 116703576.0, |
|
"reward": 1.6304270327091217, |
|
"reward_std": 0.011630857972136255, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.625, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.002713506846724556, |
|
"rewards/waypoint_pred_accuracy/std": 0.005815430337122507, |
|
"step": 248 |
|
}, |
|
{ |
|
"clip_ratio": 0.0, |
|
"completions/clipped_ratio": 0.0, |
|
"completions/max_length": 282.375, |
|
"completions/max_terminated_length": 282.375, |
|
"completions/mean_length": 166.033203125, |
|
"completions/mean_terminated_length": 166.033203125, |
|
"completions/min_length": 113.625, |
|
"completions/min_terminated_length": 113.625, |
|
"epoch": 0.5242105263157895, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.946225072436121e-07, |
|
"loss": 0.0001, |
|
"num_tokens": 117151145.0, |
|
"reward": 1.8943032920360565, |
|
"reward_std": 0.03212926587002585, |
|
"rewards/format_reward_embodied/mean": 1.0, |
|
"rewards/format_reward_embodied/std": 0.0, |
|
"rewards/stop_prediction_reward/mean": 0.875, |
|
"rewards/stop_prediction_reward/std": 0.0, |
|
"rewards/waypoint_pred_accuracy/mean": 0.009651642787417297, |
|
"rewards/waypoint_pred_accuracy/std": 0.016064628072191757, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 0.015220913104712963, |
|
"learning_rate": 5.913268350178101e-07, |
|
"loss": 0.0001, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"eval_clip_ratio": 0.0, |
|
"eval_completions/clipped_ratio": 0.00015625, |
|
"eval_completions/max_length": 278.13, |
|
"eval_completions/max_terminated_length": 278.13, |
|
"eval_completions/mean_length": 169.34763885498046, |
|
"eval_completions/mean_terminated_length": 169.375066986084, |
|
"eval_completions/min_length": 110.76, |
|
"eval_completions/min_terminated_length": 111.92, |
|
"eval_loss": -2.504486656107474e-05, |
|
"eval_num_tokens": 117598301.0, |
|
"eval_reward": 1.865585025548935, |
|
"eval_reward_std": 0.0873453421616474, |
|
"eval_rewards/format_reward_embodied/mean": 0.9996875, |
|
"eval_rewards/format_reward_embodied/std": 0.0025, |
|
"eval_rewards/stop_prediction_reward/mean": 0.76984375, |
|
"eval_rewards/stop_prediction_reward/std": 0.008003681004047393, |
|
"eval_rewards/waypoint_pred_accuracy/mean": 0.048026895228197336, |
|
"eval_rewards/waypoint_pred_accuracy/std": 0.038523975913848735, |
|
"eval_runtime": 963.8699, |
|
"eval_samples_per_second": 0.104, |
|
"eval_steps_per_second": 0.002, |
|
"step": 250 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 475, |
|
"num_input_tokens_seen": 117598301, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|