Delete .ipynb_checkpoints

by MaziyarPanahi - opened Feb 25, 2024

base: refs/heads/main

←

from: refs/pr/4

Discussion Files changed

-235

Files changed (4) hide show

.ipynb_checkpoints/README-checkpoint.md +0 -152
.ipynb_checkpoints/config-checkpoint.json +0 -32
.ipynb_checkpoints/generation_config-checkpoint.json +0 -7
.ipynb_checkpoints/tokenizer_config-checkpoint.json +0 -44

.ipynb_checkpoints/README-checkpoint.md DELETED Viewed

@@ -1,152 +0,0 @@
----
-library_name: peft
-tags:
-- axolotl
-- generated_from_trainer
-base_model: MaziyarPanahi/Qwen1.5-8x7b
-model-index:
-- name: Qwen1.5-8x7b-v0.1
-  results: []
----
-<!-- This model card has been generated automatically according to the information the Trainer had access to. You
-should probably proofread and complete it, then remove this comment. -->
-[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
-<details><summary>See axolotl config</summary>
-axolotl version: `0.4.0`
-```yaml
-base_model: MaziyarPanahi/Qwen1.5-8x7b
-model_type: Qwen2ForCausalLM
-tokenizer_type: Qwen2Tokenizer
-trust_remote_code: true
-hub_model_id: MaziyarPanahi/Qwen1.5-8x7b-v0.1
-hf_use_auth_token: true
-load_in_8bit: false
-load_in_4bit: true
-strict: false
-datasets:
-  - path: Crystalcareai/MoD-150k
-    type: sharegpt
-dataset_prepared_path:
-val_set_size: 0.05
-output_dir: ./Qwen1.5-8x7b-v0.1-lora-out
-model_config:
-  output_router_logits: true
-adapter: qlora
-lora_model_dir:
-sequence_len: 2048
-sample_packing: true
-pad_to_sequence_len: true
-lora_r: 32
-lora_alpha: 16
-lora_dropout: 0.05
-lora_target_linear: true
-lora_fan_in_fan_out:
-gradient_accumulation_steps: 2
-micro_batch_size: 2
-num_epochs: 1
-optimizer: adamw_bnb_8bit
-lr_scheduler: cosine
-learning_rate: 0.0002
-train_on_inputs: false
-group_by_length: false
-bf16: auto
-fp16:
-tf32: false
-gradient_checkpointing: true
-early_stopping_patience:
-resume_from_checkpoint:
-local_rank:
-logging_steps: 1
-xformers_attention:
-flash_attention: true
-warmup_steps: 10
-evals_per_epoch: 4
-eval_table_size:
-eval_max_new_tokens: 128
-saves_per_epoch: 1
-debug:
-deepspeed:
-weight_decay: 0.0
-fsdp:
-fsdp_config:
-special_tokens:
-```
-</details><br>
-# Qwen1.5-8x7b-v0.1
-This model is a fine-tuned version of [MaziyarPanahi/Qwen1.5-8x7b](https://huggingface.co/MaziyarPanahi/Qwen1.5-8x7b) on the None dataset.
-It achieves the following results on the evaluation set:
-- Loss: 0.7945
-## Model description
-More information needed
-## Intended uses & limitations
-More information needed
-## Training and evaluation data
-More information needed
-## Training procedure
-### Training hyperparameters
-The following hyperparameters were used during training:
-- learning_rate: 0.0002
-- train_batch_size: 2
-- eval_batch_size: 2
-- seed: 42
-- distributed_type: multi-GPU
-- num_devices: 4
-- gradient_accumulation_steps: 2
-- total_train_batch_size: 16
-- total_eval_batch_size: 8
-- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
-- lr_scheduler_type: cosine
-- lr_scheduler_warmup_steps: 10
-- num_epochs: 1
-### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 6.2196        | 0.0   | 1    | 6.1942          |
-| 0.7772        | 0.25  | 513  | 0.8037          |
-| 0.656         | 0.5   | 1026 | 0.7977          |
-| 0.6967        | 0.75  | 1539 | 0.7945          |
-### Framework versions
-- PEFT 0.8.2
-- Transformers 4.39.0.dev0
-- Pytorch 2.2.0+cu121
-- Datasets 2.17.0
-- Tokenizers 0.15.0

.ipynb_checkpoints/config-checkpoint.json DELETED Viewed

@@ -1,32 +0,0 @@
-{
-  "_name_or_path": "MaziyarPanahi/Qwen1.5-8x7b",
-  "architectures": [
-    "MixtralForCausalLM"
-  ],
-  "attention_dropout": 0.0,
-  "bos_token_id": null,
-  "eos_token_id": 151643,
-  "hidden_act": "silu",
-  "hidden_size": 4096,
-  "initializer_range": 0.02,
-  "intermediate_size": 11008,
-  "max_position_embeddings": 32768,
-  "max_window_layers": 28,
-  "model_type": "mixtral",
-  "num_attention_heads": 32,
-  "num_experts_per_tok": 2,
-  "num_hidden_layers": 32,
-  "num_key_value_heads": 32,
-  "num_local_experts": 8,
-  "output_router_logits": true,
-  "rms_norm_eps": 1e-06,
-  "rope_theta": 1000000.0,
-  "router_aux_loss_coef": 0.001,
-  "sliding_window": null,
-  "tie_word_embeddings": false,
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.39.0.dev0",
-  "use_cache": false,
-  "use_sliding_window": false,
-  "vocab_size": 151936
-}

.ipynb_checkpoints/generation_config-checkpoint.json DELETED Viewed

@@ -1,7 +0,0 @@
-{
-  "_from_model_config": true,
-  "do_sample": true,
-  "eos_token_id": 151643,
-  "transformers_version": "4.39.0.dev0",
-  "use_cache": false
-}

.ipynb_checkpoints/tokenizer_config-checkpoint.json DELETED Viewed

@@ -1,44 +0,0 @@
-{
-  "add_prefix_space": false,
-  "added_tokens_decoder": {
-    "151643": {
-      "content": "<|endoftext|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151644": {
-      "content": "<|im_start|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "151645": {
-      "content": "<|im_end|>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "additional_special_tokens": [
-    "<|im_start|>",
-    "<|im_end|>"
-  ],
-  "bos_token": null,
-  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
-  "clean_up_tokenization_spaces": false,
-  "eos_token": "<|endoftext|>",
-  "errors": "replace",
-  "model_max_length": 32768,
-  "pad_token": "<|endoftext|>",
-  "split_special_tokens": false,
-  "tokenizer_class": "Qwen2Tokenizer",
-  "unk_token": null,
-  "use_fast": true
-}