{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9973661106233538,
  "eval_steps": 500,
  "global_step": 284,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.03511852502194908,
      "grad_norm": 194.98139599033956,
      "learning_rate": 1.724137931034483e-06,
      "loss": 2.1612,
      "step": 10
    },
    {
      "epoch": 0.07023705004389816,
      "grad_norm": 11.574681452175845,
      "learning_rate": 3.448275862068966e-06,
      "loss": 1.5263,
      "step": 20
    },
    {
      "epoch": 0.10535557506584724,
      "grad_norm": 4.620556133093483,
      "learning_rate": 4.999810275287077e-06,
      "loss": 1.1514,
      "step": 30
    },
    {
      "epoch": 0.14047410008779632,
      "grad_norm": 4.8618300415248425,
      "learning_rate": 4.977078132728901e-06,
      "loss": 1.0009,
      "step": 40
    },
    {
      "epoch": 0.17559262510974538,
      "grad_norm": 5.039664200269418,
      "learning_rate": 4.916796010672969e-06,
      "loss": 0.9159,
      "step": 50
    },
    {
      "epoch": 0.21071115013169447,
      "grad_norm": 4.677695184320373,
      "learning_rate": 4.819877724641437e-06,
      "loss": 0.8819,
      "step": 60
    },
    {
      "epoch": 0.24582967515364354,
      "grad_norm": 4.476208091052348,
      "learning_rate": 4.687792457057482e-06,
      "loss": 0.8205,
      "step": 70
    },
    {
      "epoch": 0.28094820017559263,
      "grad_norm": 4.076277674081994,
      "learning_rate": 4.522542485937369e-06,
      "loss": 0.8138,
      "step": 80
    },
    {
      "epoch": 0.3160667251975417,
      "grad_norm": 3.706466470362564,
      "learning_rate": 4.326632832396733e-06,
      "loss": 0.7546,
      "step": 90
    },
    {
      "epoch": 0.35118525021949076,
      "grad_norm": 3.807926746840706,
      "learning_rate": 4.1030332870839466e-06,
      "loss": 0.7552,
      "step": 100
    },
    {
      "epoch": 0.3863037752414399,
      "grad_norm": 4.448525410750735,
      "learning_rate": 3.855133391181124e-06,
      "loss": 0.742,
      "step": 110
    },
    {
      "epoch": 0.42142230026338895,
      "grad_norm": 3.9020262955384557,
      "learning_rate": 3.586691054414913e-06,
      "loss": 0.7188,
      "step": 120
    },
    {
      "epoch": 0.456540825285338,
      "grad_norm": 4.436756217145944,
      "learning_rate": 3.3017755889756382e-06,
      "loss": 0.7112,
      "step": 130
    },
    {
      "epoch": 0.4916593503072871,
      "grad_norm": 3.3937208218898154,
      "learning_rate": 3.0047060228925256e-06,
      "loss": 0.6893,
      "step": 140
    },
    {
      "epoch": 0.5267778753292361,
      "grad_norm": 5.294699703871525,
      "learning_rate": 2.699985627971354e-06,
      "loss": 0.6668,
      "step": 150
    },
    {
      "epoch": 0.5618964003511853,
      "grad_norm": 4.373298006073623,
      "learning_rate": 2.392233654784262e-06,
      "loss": 0.6659,
      "step": 160
    },
    {
      "epoch": 0.5970149253731343,
      "grad_norm": 4.608979891075067,
      "learning_rate": 2.086115309539675e-06,
      "loss": 0.643,
      "step": 170
    },
    {
      "epoch": 0.6321334503950834,
      "grad_norm": 5.305060414443979,
      "learning_rate": 1.7862710343116451e-06,
      "loss": 0.6655,
      "step": 180
    },
    {
      "epoch": 0.6672519754170325,
      "grad_norm": 3.420875543474142,
      "learning_rate": 1.4972461626682033e-06,
      "loss": 0.6251,
      "step": 190
    },
    {
      "epoch": 0.7023705004389815,
      "grad_norm": 4.603437178273803,
      "learning_rate": 1.2234220170477332e-06,
      "loss": 0.6449,
      "step": 200
    },
    {
      "epoch": 0.7374890254609306,
      "grad_norm": 5.3351263316177215,
      "learning_rate": 9.689494923768756e-07,
      "loss": 0.6284,
      "step": 210
    },
    {
      "epoch": 0.7726075504828798,
      "grad_norm": 3.607022821385861,
      "learning_rate": 7.376861327346325e-07,
      "loss": 0.6072,
      "step": 220
    },
    {
      "epoch": 0.8077260755048288,
      "grad_norm": 4.795667171198547,
      "learning_rate": 5.33137654916292e-07,
      "loss": 0.6187,
      "step": 230
    },
    {
      "epoch": 0.8428446005267779,
      "grad_norm": 3.9213724314825216,
      "learning_rate": 3.5840480534034355e-07,
      "loss": 0.6145,
      "step": 240
    },
    {
      "epoch": 0.8779631255487269,
      "grad_norm": 5.451274818264296,
      "learning_rate": 2.1613635589349756e-07,
      "loss": 0.6012,
      "step": 250
    },
    {
      "epoch": 0.913081650570676,
      "grad_norm": 4.312276909206807,
      "learning_rate": 1.0848895124889819e-07,
      "loss": 0.6184,
      "step": 260
    },
    {
      "epoch": 0.9482001755926251,
      "grad_norm": 4.611522839692978,
      "learning_rate": 3.709441633123367e-08,
      "loss": 0.5952,
      "step": 270
    },
    {
      "epoch": 0.9833187006145742,
      "grad_norm": 6.192176760901921,
      "learning_rate": 3.035019514275317e-09,
      "loss": 0.5973,
      "step": 280
    },
    {
      "epoch": 0.9973661106233538,
      "step": 284,
      "total_flos": 1.1831516853383987e+17,
      "train_loss": 0.7974992327287164,
      "train_runtime": 7644.4468,
      "train_samples_per_second": 4.767,
      "train_steps_per_second": 0.037
    }
  ],
  "logging_steps": 10,
  "max_steps": 284,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.1831516853383987e+17,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}