diff --git a/README.md b/README.md index 5fe1ce6..49cc8ef 100644 Binary files a/README.md and b/README.md differ diff --git a/kody/mistral_sk_llamafactory_train.yaml b/kody/mistral_sk_llamafactory_train.yaml deleted file mode 100644 index 532d745..0000000 --- a/kody/mistral_sk_llamafactory_train.yaml +++ /dev/null @@ -1,89 +0,0 @@ -top.booster: unsloth -top.checkpoint_path: [] -top.finetuning_type: lora -top.model_name: Custom -top.quantization_bit: '4' -top.quantization_method: bnb -top.rope_scaling: none -top.template: alpaca -train.additional_target: '' -train.apollo_rank: 16 -train.apollo_scale: 32 -train.apollo_target: all -train.apollo_update_interval: 200 -train.badam_mode: layer -train.badam_switch_interval: 50 -train.badam_switch_mode: ascending -train.badam_update_ratio: 0.05 -train.batch_size: 1 -train.compute_type: fp16 -train.create_new_adapter: false -train.cutoff_len: 1024 -train.dataset: -- alpaca_slovak_cleaned -train.dataset_dir: data -train.ds_offload: false -train.ds_stage: none -train.enable_thinking: false -train.extra_args: '{"optim": "adamw_8bit", "eval_steps": 1000, "eval_strategy": "steps", - "save_total_limit": 2}' -train.freeze_extra_modules: '' -train.freeze_language_model: false -train.freeze_multi_modal_projector: true -train.freeze_trainable_layers: 2 -train.freeze_trainable_modules: all -train.freeze_vision_tower: true -train.galore_rank: 16 -train.galore_scale: 2 -train.galore_target: all -train.galore_update_interval: 200 -train.gradient_accumulation_steps: 8 -train.hub_private_repo: false -train.image_max_pixels: 768*768 -train.image_min_pixels: 32*32 -train.learning_rate: 2e-4 -train.logging_steps: 5 -train.lora_alpha: 32 -train.lora_dropout: 0.05 -train.lora_rank: 16 -train.lora_target: q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj -train.loraplus_lr_ratio: 0 -train.lr_scheduler_type: cosine -train.mask_history: false -train.max_grad_norm: '1.0' -train.max_samples: '50000' -train.neat_packing: false -train.neftune_alpha: 0 -train.num_train_epochs: '1.0' -train.packing: false -train.ppo_score_norm: false -train.ppo_whiten_rewards: false -train.pref_beta: 0.1 -train.pref_ftx: 0 -train.pref_loss: sigmoid -train.project: huggingface -train.report_to: none -train.resize_vocab: false -train.reward_model: [] -train.save_steps: 1000 -train.swanlab_api_key: '' -train.swanlab_link: null -train.swanlab_mode: cloud -train.swanlab_project: llamafactory -train.swanlab_run_name: '' -train.swanlab_workspace: '' -train.trackio_space_id: trackio -train.train_on_prompt: false -train.training_stage: Supervised Fine-Tuning -train.use_apollo: false -train.use_badam: false -train.use_dora: false -train.use_galore: false -train.use_llama_pro: false -train.use_pissa: false -train.use_rslora: false -train.use_swanlab: false -train.val_size: 0.025 -train.video_max_pixels: 256*256 -train.video_min_pixels: 16*16 -train.warmup_steps: 150 diff --git a/kody/training_args.yaml b/kody/training_args.yaml new file mode 100644 index 0000000..52f1945 --- /dev/null +++ b/kody/training_args.yaml @@ -0,0 +1,43 @@ +cutoff_len: 1024 +dataset: alpaca_slovak_cleaned +dataset_dir: data +ddp_timeout: 180000000 +do_train: true +double_quantization: true +enable_thinking: false +eval_steps: 1000 +eval_strategy: steps +finetuning_type: lora +flash_attn: auto +fp16: true +gradient_accumulation_steps: 8 +include_num_input_tokens_seen: true +learning_rate: 0.0002 +logging_steps: 5 +lora_alpha: 32 +lora_dropout: 0.05 +lora_rank: 16 +lora_target: q_proj,k_proj,v_proj,o_proj,gate_proj,up_proj,down_proj +lr_scheduler_type: cosine +max_grad_norm: 1.0 +max_samples: 50000 +model_name_or_path: slovak-nlp/mistral-sk-7b +num_train_epochs: 1.0 +optim: adamw_8bit +output_dir: /home/schwarc/diplomovka/mistral_sk_alpaca/llamafactory-full-lora +packing: false +per_device_eval_batch_size: 1 +per_device_train_batch_size: 1 +plot_loss: true +preprocessing_num_workers: 16 +quantization_bit: 4 +quantization_method: bnb +report_to: none +save_steps: 1000 +save_total_limit: 2 +stage: sft +template: alpaca +trust_remote_code: true +use_unsloth: true +val_size: 0.025 +warmup_steps: 150 \ No newline at end of file