File size: 1,694 Bytes
4bc4d19
 
 
 
 
81caae1
4bc4d19
 
 
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
81caae1
 
4bc4d19
81caae1
4bc4d19
 
 
 
81caae1
4bc4d19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81caae1
4bc4d19
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.9186474680900574,
      "learning_rate": 0.0002,
      "loss": 0.8935,
      "step": 250
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 0.6858515739440918,
      "learning_rate": 0.0002,
      "loss": 0.4606,
      "step": 500
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 1.5318282842636108,
      "learning_rate": 0.0002,
      "loss": 0.3005,
      "step": 750
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 0.7992210984230042,
      "learning_rate": 0.0002,
      "loss": 0.235,
      "step": 1000
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 0.5865496397018433,
      "learning_rate": 0.0002,
      "loss": 0.199,
      "step": 1250
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 0.5286576151847839,
      "learning_rate": 0.0002,
      "loss": 0.1775,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1650,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.103034997322547e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}