Delta-Vector commited on
Commit
134a940
·
verified ·
1 Parent(s): 8344bea

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +86 -0
README.md CHANGED
@@ -239,7 +239,93 @@ Take off your helmet.<|im_end|>
239
  <details>
240
 
241
  ```yaml
 
 
 
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  ```
244
 
245
  </details>
 
239
  <details>
240
 
241
  ```yaml
242
+ base_model: Qwen/QwQ-32B
243
+ model_type: AutoModelForCausalLM
244
+ tokenizer_type: AutoTokenizer
245
 
246
+ plugins:
247
+ - axolotl.integrations.liger.LigerPlugin
248
+ liger_rope: true
249
+ liger_rms_norm: true
250
+ liger_swiglu: true
251
+ liger_fused_linear_cross_entropy: true
252
+
253
+ load_in_8bit: false
254
+ load_in_4bit: false
255
+ strict: false
256
+
257
+ datasets:
258
+ - path: Mielikki/Erebus-87k
259
+ type: completion
260
+ field: body
261
+ - path: NewEden/Orion-Completion-Asstr-Stories-16K
262
+ type: completion
263
+ field: content
264
+ - path: NewEden/Orion-Completion-LIT
265
+ type: completion
266
+ field: text
267
+
268
+ shuffle_merged_datasets: true
269
+ dataset_prepared_path: prepared_data
270
+ output_dir: ./qvq-cum
271
+
272
+ sequence_len: 16384
273
+ sample_packing: true
274
+ pad_to_sequence_len: true
275
+
276
+ adapter: lora
277
+ lora_model_dir:
278
+ lora_r: 128
279
+ lora_alpha: 16
280
+ lora_dropout: 0.05
281
+ lora_target_modules:
282
+ - gate_proj
283
+ - down_proj
284
+ - up_proj
285
+ - q_proj
286
+ - v_proj
287
+ - k_proj
288
+ - o_proj
289
+
290
+ lora_modules_to_save:
291
+ - embed_tokens
292
+ - lm_head
293
+
294
+ wandb_project: qwq
295
+ wandb_entity:
296
+ wandb_watch:
297
+ wandb_name: Pretrain-pt1-v2-frfr
298
+ wandb_log_model:
299
+
300
+ gradient_accumulation_steps: 2
301
+ micro_batch_size: 2
302
+ num_epochs: 1
303
+ optimizer: paged_adamw_8bit
304
+ lr_scheduler: cosine
305
+ learning_rate: 1e-5
306
+ max_grad_norm: 0.001
307
+
308
+ train_on_inputs: false
309
+ group_by_length: false
310
+ bf16: auto
311
+ fp16:
312
+ tf32: false
313
+
314
+ gradient_checkpointing: true
315
+ early_stopping_patience:
316
+ resume_from_checkpoint:
317
+ local_rank:
318
+ logging_steps: 1
319
+ xformers_attention:
320
+ flash_attention: true
321
+
322
+ warmup_steps: 40
323
+ saves_per_epoch: 2
324
+ debug:
325
+ deepspeed: /workspace/axolotl/deepspeed_configs/zero3_bf16.json
326
+ weight_decay: 0.01
327
+ fsdp:
328
+ fsdp_config:
329
  ```
330
 
331
  </details>