remote-sensing-Llama-3.2-11B-Vision-Instruct / trainer_state.json

Upload folder using huggingface_hub

10494bb verified about 1 month ago

5.85 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9973661106233538,
	"eval_steps": 500,
	"global_step": 284,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.03511852502194908,
	"grad_norm": 194.98139599033956,
	"learning_rate": 1.724137931034483e-06,
	"loss": 2.1612,
	"step": 10
	},
	{
	"epoch": 0.07023705004389816,
	"grad_norm": 11.574681452175845,
	"learning_rate": 3.448275862068966e-06,
	"loss": 1.5263,
	"step": 20
	},
	{
	"epoch": 0.10535557506584724,
	"grad_norm": 4.620556133093483,
	"learning_rate": 4.999810275287077e-06,
	"loss": 1.1514,
	"step": 30
	},
	{
	"epoch": 0.14047410008779632,
	"grad_norm": 4.8618300415248425,
	"learning_rate": 4.977078132728901e-06,
	"loss": 1.0009,
	"step": 40
	},
	{
	"epoch": 0.17559262510974538,
	"grad_norm": 5.039664200269418,
	"learning_rate": 4.916796010672969e-06,
	"loss": 0.9159,
	"step": 50
	},
	{
	"epoch": 0.21071115013169447,
	"grad_norm": 4.677695184320373,
	"learning_rate": 4.819877724641437e-06,
	"loss": 0.8819,
	"step": 60
	},
	{
	"epoch": 0.24582967515364354,
	"grad_norm": 4.476208091052348,
	"learning_rate": 4.687792457057482e-06,
	"loss": 0.8205,
	"step": 70
	},
	{
	"epoch": 0.28094820017559263,
	"grad_norm": 4.076277674081994,
	"learning_rate": 4.522542485937369e-06,
	"loss": 0.8138,
	"step": 80
	},
	{
	"epoch": 0.3160667251975417,
	"grad_norm": 3.706466470362564,
	"learning_rate": 4.326632832396733e-06,
	"loss": 0.7546,
	"step": 90
	},
	{
	"epoch": 0.35118525021949076,
	"grad_norm": 3.807926746840706,
	"learning_rate": 4.1030332870839466e-06,
	"loss": 0.7552,
	"step": 100
	},
	{
	"epoch": 0.3863037752414399,
	"grad_norm": 4.448525410750735,
	"learning_rate": 3.855133391181124e-06,
	"loss": 0.742,
	"step": 110
	},
	{
	"epoch": 0.42142230026338895,
	"grad_norm": 3.9020262955384557,
	"learning_rate": 3.586691054414913e-06,
	"loss": 0.7188,
	"step": 120
	},
	{
	"epoch": 0.456540825285338,
	"grad_norm": 4.436756217145944,
	"learning_rate": 3.3017755889756382e-06,
	"loss": 0.7112,
	"step": 130
	},
	{
	"epoch": 0.4916593503072871,
	"grad_norm": 3.3937208218898154,
	"learning_rate": 3.0047060228925256e-06,
	"loss": 0.6893,
	"step": 140
	},
	{
	"epoch": 0.5267778753292361,
	"grad_norm": 5.294699703871525,
	"learning_rate": 2.699985627971354e-06,
	"loss": 0.6668,
	"step": 150
	},
	{
	"epoch": 0.5618964003511853,
	"grad_norm": 4.373298006073623,
	"learning_rate": 2.392233654784262e-06,
	"loss": 0.6659,
	"step": 160
	},
	{
	"epoch": 0.5970149253731343,
	"grad_norm": 4.608979891075067,
	"learning_rate": 2.086115309539675e-06,
	"loss": 0.643,
	"step": 170
	},
	{
	"epoch": 0.6321334503950834,
	"grad_norm": 5.305060414443979,
	"learning_rate": 1.7862710343116451e-06,
	"loss": 0.6655,
	"step": 180
	},
	{
	"epoch": 0.6672519754170325,
	"grad_norm": 3.420875543474142,
	"learning_rate": 1.4972461626682033e-06,
	"loss": 0.6251,
	"step": 190
	},
	{
	"epoch": 0.7023705004389815,
	"grad_norm": 4.603437178273803,
	"learning_rate": 1.2234220170477332e-06,
	"loss": 0.6449,
	"step": 200
	},
	{
	"epoch": 0.7374890254609306,
	"grad_norm": 5.3351263316177215,
	"learning_rate": 9.689494923768756e-07,
	"loss": 0.6284,
	"step": 210
	},
	{
	"epoch": 0.7726075504828798,
	"grad_norm": 3.607022821385861,
	"learning_rate": 7.376861327346325e-07,
	"loss": 0.6072,
	"step": 220
	},
	{
	"epoch": 0.8077260755048288,
	"grad_norm": 4.795667171198547,
	"learning_rate": 5.33137654916292e-07,
	"loss": 0.6187,
	"step": 230
	},
	{
	"epoch": 0.8428446005267779,
	"grad_norm": 3.9213724314825216,
	"learning_rate": 3.5840480534034355e-07,
	"loss": 0.6145,
	"step": 240
	},
	{
	"epoch": 0.8779631255487269,
	"grad_norm": 5.451274818264296,
	"learning_rate": 2.1613635589349756e-07,
	"loss": 0.6012,
	"step": 250
	},
	{
	"epoch": 0.913081650570676,
	"grad_norm": 4.312276909206807,
	"learning_rate": 1.0848895124889819e-07,
	"loss": 0.6184,
	"step": 260
	},
	{
	"epoch": 0.9482001755926251,
	"grad_norm": 4.611522839692978,
	"learning_rate": 3.709441633123367e-08,
	"loss": 0.5952,
	"step": 270
	},
	{
	"epoch": 0.9833187006145742,
	"grad_norm": 6.192176760901921,
	"learning_rate": 3.035019514275317e-09,
	"loss": 0.5973,
	"step": 280
	},
	{
	"epoch": 0.9973661106233538,
	"step": 284,
	"total_flos": 1.1831516853383987e+17,
	"train_loss": 0.7974992327287164,
	"train_runtime": 7644.4468,
	"train_samples_per_second": 4.767,
	"train_steps_per_second": 0.037
	}
	],
	"logging_steps": 10,
	"max_steps": 284,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.1831516853383987e+17,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}