alchemist69 commited on
Commit
afe7746
·
verified ·
1 Parent(s): 2a0645e

Training in progress, step 117, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3acf7a45155f7c22d023e73c49917651ac320442b64cee5808cc571f8aa6fbd3
3
  size 639691872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29b394c06545ec2a1281071f7d15c82df6807f2dd55269f77e75c5d2f0411422
3
  size 639691872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ecb182c6b5caa1bb7562c40c8e6e05448c07da4ce5e52c960fe59dc946ba544
3
  size 325339796
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f39ee47e244a927f051413fcdce59cafe82216d03819ef6761b9369dad4057d7
3
  size 325339796
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:724e920b7466292c6dcab261d9c031d25dfeaf7cb6176f8f596cadd3c1e800d5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dec3d57234dce1e724a0245c6cc2d26181d9cbc9eed24bb65e72e29d4e819f45
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a9a9a0e1220b77fd9e183eebee851bf8ad0e90181863138ec8d5c5c2014ecd0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09557b1d4da433a4489d12bec551f7b75466f796a905598e6ba8698b633264c8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.3446539640426636,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 0.8565310492505354,
5
  "eval_steps": 100,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -723,6 +723,125 @@
723
  "eval_samples_per_second": 12.936,
724
  "eval_steps_per_second": 3.283,
725
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  }
727
  ],
728
  "logging_steps": 1,
@@ -746,12 +865,12 @@
746
  "should_evaluate": false,
747
  "should_log": false,
748
  "should_save": true,
749
- "should_training_stop": false
750
  },
751
  "attributes": {}
752
  }
753
  },
754
- "total_flos": 1.3554618661758566e+17,
755
  "train_batch_size": 8,
756
  "trial_name": null,
757
  "trial_params": null
 
1
  {
2
  "best_metric": 1.3446539640426636,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
+ "epoch": 1.0021413276231264,
5
  "eval_steps": 100,
6
+ "global_step": 117,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
723
  "eval_samples_per_second": 12.936,
724
  "eval_steps_per_second": 3.283,
725
  "step": 100
726
+ },
727
+ {
728
+ "epoch": 0.8650963597430407,
729
+ "grad_norm": 0.39375630021095276,
730
+ "learning_rate": 6.564413174092443e-06,
731
+ "loss": 1.3082,
732
+ "step": 101
733
+ },
734
+ {
735
+ "epoch": 0.8736616702355461,
736
+ "grad_norm": 0.37335363030433655,
737
+ "learning_rate": 5.785225463498828e-06,
738
+ "loss": 1.3074,
739
+ "step": 102
740
+ },
741
+ {
742
+ "epoch": 0.8822269807280514,
743
+ "grad_norm": 0.39959633350372314,
744
+ "learning_rate": 5.05241294573024e-06,
745
+ "loss": 1.2991,
746
+ "step": 103
747
+ },
748
+ {
749
+ "epoch": 0.8907922912205567,
750
+ "grad_norm": 0.4094507694244385,
751
+ "learning_rate": 4.366744239922998e-06,
752
+ "loss": 1.2843,
753
+ "step": 104
754
+ },
755
+ {
756
+ "epoch": 0.8993576017130621,
757
+ "grad_norm": 0.4303068518638611,
758
+ "learning_rate": 3.728938517864794e-06,
759
+ "loss": 1.3151,
760
+ "step": 105
761
+ },
762
+ {
763
+ "epoch": 0.9079229122055674,
764
+ "grad_norm": 0.42622363567352295,
765
+ "learning_rate": 3.1396647496828247e-06,
766
+ "loss": 1.3469,
767
+ "step": 106
768
+ },
769
+ {
770
+ "epoch": 0.9164882226980728,
771
+ "grad_norm": 0.4673132002353668,
772
+ "learning_rate": 2.5995410021864787e-06,
773
+ "loss": 1.4081,
774
+ "step": 107
775
+ },
776
+ {
777
+ "epoch": 0.9250535331905781,
778
+ "grad_norm": 0.5070969462394714,
779
+ "learning_rate": 2.1091337906006482e-06,
780
+ "loss": 1.4305,
781
+ "step": 108
782
+ },
783
+ {
784
+ "epoch": 0.9336188436830836,
785
+ "grad_norm": 0.526165246963501,
786
+ "learning_rate": 1.6689574843694433e-06,
787
+ "loss": 1.4545,
788
+ "step": 109
789
+ },
790
+ {
791
+ "epoch": 0.9421841541755889,
792
+ "grad_norm": 0.5437924265861511,
793
+ "learning_rate": 1.2794737676536994e-06,
794
+ "loss": 1.4216,
795
+ "step": 110
796
+ },
797
+ {
798
+ "epoch": 0.9507494646680942,
799
+ "grad_norm": 0.6112513542175293,
800
+ "learning_rate": 9.410911550880475e-07,
801
+ "loss": 1.5846,
802
+ "step": 111
803
+ },
804
+ {
805
+ "epoch": 0.9593147751605996,
806
+ "grad_norm": 0.6393526792526245,
807
+ "learning_rate": 6.54164563305465e-07,
808
+ "loss": 1.2972,
809
+ "step": 112
810
+ },
811
+ {
812
+ "epoch": 0.9678800856531049,
813
+ "grad_norm": 0.6852487921714783,
814
+ "learning_rate": 4.189949386787462e-07,
815
+ "loss": 1.3823,
816
+ "step": 113
817
+ },
818
+ {
819
+ "epoch": 0.9764453961456103,
820
+ "grad_norm": 0.7834839224815369,
821
+ "learning_rate": 2.3582894166930268e-07,
822
+ "loss": 1.4955,
823
+ "step": 114
824
+ },
825
+ {
826
+ "epoch": 0.9850107066381156,
827
+ "grad_norm": 0.9401648044586182,
828
+ "learning_rate": 1.0485868811441757e-07,
829
+ "loss": 1.515,
830
+ "step": 115
831
+ },
832
+ {
833
+ "epoch": 0.9935760171306209,
834
+ "grad_norm": 1.1517846584320068,
835
+ "learning_rate": 2.6221547724253337e-08,
836
+ "loss": 1.4529,
837
+ "step": 116
838
+ },
839
+ {
840
+ "epoch": 1.0021413276231264,
841
+ "grad_norm": 0.49016863107681274,
842
+ "learning_rate": 0.0,
843
+ "loss": 1.788,
844
+ "step": 117
845
  }
846
  ],
847
  "logging_steps": 1,
 
865
  "should_evaluate": false,
866
  "should_log": false,
867
  "should_save": true,
868
+ "should_training_stop": true
869
  },
870
  "attributes": {}
871
  }
872
  },
873
+ "total_flos": 1.5869640165870797e+17,
874
  "train_batch_size": 8,
875
  "trial_name": null,
876
  "trial_params": null