File size: 1,694 Bytes

4bc4d19
 
 
 
 
81caae1
4bc4d19
 
 
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
 
81caae1
4bc4d19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81caae1
4bc4d19

{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.9186474680900574,
      "learning_rate": 0.0002,
      "loss": 0.8935,
      "step": 250
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 0.6858515739440918,
      "learning_rate": 0.0002,
      "loss": 0.4606,
      "step": 500
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 1.5318282842636108,
      "learning_rate": 0.0002,
      "loss": 0.3005,
      "step": 750
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 0.7992210984230042,
      "learning_rate": 0.0002,
      "loss": 0.235,
      "step": 1000
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 0.5865496397018433,
      "learning_rate": 0.0002,
      "loss": 0.199,
      "step": 1250
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 0.5286576151847839,
      "learning_rate": 0.0002,
      "loss": 0.1775,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1650,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.103034997322547e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}