axolotl-ai-cloud · NanoCode012 · Mar 25, 2024 · Mar 24, 2024 · Mar 24, 2024
diff --git a/README.md b/README.md
@@ -32,6 +32,7 @@ Features:
   - [Bare Metal Cloud GPU](#bare-metal-cloud-gpu)
   - [Windows](#windows)
   - [Mac](#mac)
+  - [Google Colab](#google-colab)
   - [Launching on public clouds via SkyPilot](#launching-on-public-clouds-via-skypilot)
 - [Dataset](#dataset)
   - [How to Add Custom Prompts](#how-to-add-custom-prompts)
@@ -269,6 +270,10 @@ pip3 install -e '.'
 ```
 More info: [mac.md](/docs/mac.qmd)
 
+#### Google Colab
+
+Please use this example [notebook](examples/colab-notebooks/colab-axolotl-example.ipynb).
+
 #### Launching on public clouds via SkyPilot
 To launch on GPU instances (both on-demand and spot instances) on 7+ clouds (GCP, AWS, Azure, OCI, and more), you can use [SkyPilot](https://skypilot.readthedocs.io/en/latest/index.html):
 

diff --git a/examples/mistral/Mistral-7b-example/README.md b/examples/mistral/Mistral-7b-example/README.md
diff --git a/examples/mistral/Mistral-7b-example/code.ipynb b/examples/mistral/Mistral-7b-example/code.ipynb
diff --git a/examples/mistral/Mistral-7b-example/data.jsonl b/examples/mistral/Mistral-7b-example/data.jsonl
diff --git a/examples/mistral/config.yml b/examples/mistral/config.yml
@@ -56,6 +56,3 @@ weight_decay: 0.0
 fsdp:
 fsdp_config:
 special_tokens:
-  bos_token: "<s>"
-  eos_token: "</s>"
-  unk_token: "<unk>"
diff --git a/...les/mistral/Mistral-7b-example/config.yml → examples/mistral/lora.yml b/...les/mistral/Mistral-7b-example/config.yml → examples/mistral/lora.yml
@@ -1,4 +1,3 @@
-#Mistral-7b
 base_model: mistralai/Mistral-7B-v0.1
 model_type: MistralForCausalLM
 tokenizer_type: LlamaTokenizer
@@ -8,45 +7,50 @@ load_in_4bit: false
 strict: false
 
 datasets:
-  - path: tilemachos/Demo-Dataset #Path to json dataset file in huggingface
-    #for type,conversation arguments read axolotl readme and pick what is suited for your project, I wanted a chatbot and put sharegpt and chatml
-    type: sharegpt
-    conversation: chatml
-dataset_prepared_path: tilemachos/Demo-Dataset #Path to json dataset file in huggingface
-val_set_size: 0.05
-output_dir: ./out
+  - path: mhenrichsen/alpaca_2k_test
+    type: alpaca
+dataset_prepared_path: last_run_prepared
+val_set_size: 0.1
+output_dir: ./lora-out
 
-#using lora for lower cost
 adapter: lora
-lora_r: 8
+lora_model_dir:
+
+sequence_len: 8192
+sample_packing: true
+pad_to_sequence_len: true
+
+lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
+lora_target_linear: true
+lora_fan_in_fan_out:
 lora_target_modules:
+  - gate_proj
+  - down_proj
+  - up_proj
   - q_proj
   - v_proj
-
-sequence_len: 512
-sample_packing: false
-pad_to_sequence_len: true
+  - k_proj
+  - o_proj
 
 wandb_project:
 wandb_entity:
 wandb_watch:
 wandb_name:
 wandb_log_model:
 
-#only 2 epochs because of small dataset
-gradient_accumulation_steps: 3
+gradient_accumulation_steps: 4
 micro_batch_size: 2
-num_epochs: 2
+num_epochs: 1
 optimizer: adamw_bnb_8bit
 lr_scheduler: cosine
 learning_rate: 0.0002
 
 train_on_inputs: false
 group_by_length: false
-bf16: true
-fp16: false
+bf16: auto
+fp16:
 tf32: false
 
 gradient_checkpointing: true
@@ -57,18 +61,17 @@ logging_steps: 1
 xformers_attention:
 flash_attention: true
 
+loss_watchdog_threshold: 5.0
+loss_watchdog_patience: 3
+
 warmup_steps: 10
 evals_per_epoch: 4
 eval_table_size:
 eval_max_new_tokens: 128
 saves_per_epoch: 1
 debug:
-#default deepspeed, can use more aggresive if needed like zero2, zero3
-deepspeed: deepspeed_configs/zero1.json
+deepspeed:
 weight_decay: 0.0
 fsdp:
 fsdp_config:
 special_tokens:
-  bos_token: "<s>"
-  eos_token: "</s>"
-  unk_token: "<unk>"
diff --git a/examples/mistral/qlora.yml b/examples/mistral/qlora.yml
@@ -75,6 +75,3 @@ weight_decay: 0.0
 fsdp:
 fsdp_config:
 special_tokens:
-  bos_token: "<s>"
-  eos_token: "</s>"
-  unk_token: "<unk>"