willtensora
/

b1c9c4ec-ffa2-429d-9c5b-90b5979c502d

@@ -1,12 +1,11 @@
 ---
 library_name: transformers
-license: apache-2.0
-base_model: unsloth/SmolLM-135M
 tags:
 - axolotl
 - generated_from_trainer
 model-index:
-- name: 09370687-f28e-45e5-91f6-f87011850a94
   results: []
 ---
@@ -18,21 +17,20 @@ should probably proofread and complete it, then remove this comment. -->
 axolotl version: `0.4.1`
 ```yaml
-base_model: unsloth/SmolLM-135M
 batch_size: 32
 bf16: true
 chat_template: tokenizer_default_fallback_alpaca
 datasets:
 - data_files:
-  - 658988857b0a29c9_train_data.json
   ds_type: json
   format: custom
-  path: /workspace/input_data/658988857b0a29c9_train_data.json
   type:
-    field_input: choices
-    field_instruction: subject
-    field_output: question
-    format: '{instruction} {input}'
     no_input_format: '{instruction}'
     system_format: '{system}'
     system_prompt: ''
@@ -41,7 +39,7 @@ flash_attention: true
 gpu_memory_limit: 80GiB
 gradient_checkpointing: true
 group_by_length: true
-hub_model_id: willtensora/09370687-f28e-45e5-91f6-f87011850a94
 hub_strategy: checkpoint
 learning_rate: 0.0002
 logging_steps: 10
@@ -57,13 +55,15 @@ sample_packing: false
 save_steps: 40
 save_total_limit: 1
 sequence_len: 2048
-tokenizer_type: GPT2TokenizerFast
 train_on_inputs: false
 trust_remote_code: true
 val_set_size: 0.1
 wandb_entity: ''
 wandb_mode: online
-wandb_name: unsloth/SmolLM-135M-/workspace/input_data/658988857b0a29c9_train_data.json
 wandb_project: Gradients-On-Demand
 wandb_run: your_name
 wandb_runid: default
@@ -74,9 +74,9 @@ xformers_attention: true
 </details><br>
-# 09370687-f28e-45e5-91f6-f87011850a94
-This model is a fine-tuned version of [unsloth/SmolLM-135M](https://huggingface.co/unsloth/SmolLM-135M) on the None dataset.
 ## Model description
@@ -105,13 +105,13 @@ The following hyperparameters were used during training:
 - total_eval_batch_size: 32
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
-- training_steps: 1
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| No log        | 0.5   | 1    | 2.4449          |
 ### Framework versions

 ---
 library_name: transformers
+base_model: fxmarty/tiny-llama-fast-tokenizer
 tags:
 - axolotl
 - generated_from_trainer
 model-index:
+- name: b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
   results: []
 ---
 axolotl version: `0.4.1`
 ```yaml
+base_model: fxmarty/tiny-llama-fast-tokenizer
 batch_size: 32
 bf16: true
 chat_template: tokenizer_default_fallback_alpaca
 datasets:
 - data_files:
+  - fc6136aac03f618a_train_data.json
   ds_type: json
   format: custom
+  path: /workspace/input_data/fc6136aac03f618a_train_data.json
   type:
+    field_instruction: text
+    field_output: title
+    format: '{instruction}'
     no_input_format: '{instruction}'
     system_format: '{system}'
     system_prompt: ''
 gpu_memory_limit: 80GiB
 gradient_checkpointing: true
 group_by_length: true
+hub_model_id: willtensora/b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
 hub_strategy: checkpoint
 learning_rate: 0.0002
 logging_steps: 10
 save_steps: 40
 save_total_limit: 1
 sequence_len: 2048
+special_tokens:
+  pad_token: </s>
+tokenizer_type: LlamaTokenizerFast
 train_on_inputs: false
 trust_remote_code: true
 val_set_size: 0.1
 wandb_entity: ''
 wandb_mode: online
+wandb_name: fxmarty/tiny-llama-fast-tokenizer-/workspace/input_data/fc6136aac03f618a_train_data.json
 wandb_project: Gradients-On-Demand
 wandb_run: your_name
 wandb_runid: default
 </details><br>
+# b1c9c4ec-ffa2-429d-9c5b-90b5979c502d
+This model is a fine-tuned version of [fxmarty/tiny-llama-fast-tokenizer](https://huggingface.co/fxmarty/tiny-llama-fast-tokenizer) on the None dataset.
 ## Model description
 - total_eval_batch_size: 32
 - optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
 - lr_scheduler_type: cosine
+- training_steps: 18
 ### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| No log        | 0.0071 | 1    | 10.3739         |
 ### Framework versions

generation_config.json CHANGED Viewed

@@ -2,6 +2,7 @@
   "_from_model_config": true,
   "bos_token_id": 0,
   "do_sample": true,
-  "eos_token_id": 0,
   "transformers_version": "4.46.0"
 }

   "_from_model_config": true,
   "bos_token_id": 0,
   "do_sample": true,
+  "eos_token_id": 1,
+  "pad_token_id": 1,
   "transformers_version": "4.46.0"
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:34aef3a9bd77f14e9e2815e5fa6d6d034134c670e5a5c3fe9a7310af9a216c7a
-size 269122058

 version https://git-lfs.github.com/spec/v1
+oid sha256:8ecbabedee28483af8dce99f4dd8fe36ef9c6c66877e669db930fe3569128330
+size 2071661