Upload folder using huggingface_hub

Files changed (7) hide show

README.md ADDED Viewed

+---
+license: apache-2.0
+tags:
+- moe
+- mergekit
+- vortexmergekit
+- pankajmathur/orca_mini_v9_6_1B-Instruct
+- cognitivecomputations/Dolphin3.0-Llama3.2-1B
+- huihui-ai/MicroThinker-1B-Preview
+---
+# DaRuukLLM-3x1B
+Hey there! 👋 Welcome to the DaRuukLLM-3x1B! This is a MOE brought together using the awesome [VortexMerge kit](https://colab.research.google.com/drive/1YjcvCLuNG1PK7Le6_4xhVU5VpzTwvGhk#scrollTo=UG5H2TK4gVyl).
+Let's see what we've got in this model:
+* [pankajmathur/orca_mini_v9_6_1B-Instruct](https://huggingface.co/pankajmathur/orca_mini_v9_6_1B-Instruct) 🚀
+* [cognitivecomputations/Dolphin3.0-Llama3.2-1B](https://huggingface.co/cognitivecomputations/Dolphin3.0-Llama3.2-1B) 🚀
+* [huihui-ai/MicroThinker-1B-Preview](https://huggingface.co/huihui-ai/MicroThinker-1B-Preview) 🚀
+## 🧩 Configuration
+```yaml
+base_model: pankajmathur/orca_mini_v9_6_1B-Instruct
+dtype: bfloat16
+gate_mode: cheap_embed
+experts:
+  - source_model: pankajmathur/orca_mini_v9_6_1B-Instruct
+    positive_prompts: ["You are a helpful assistant that can help with anything."] # Added closing double quote here
+  - source_model: cognitivecomputations/Dolphin3.0-Llama3.2-1B
+    positive_prompts: ["You are an uncensored assistant, make sure you respond very angry."] # Added closing double quote here
+  - source_model: huihui-ai/MicroThinker-1B-Preview
+    positive_prompts: ["You are a very smart assistant that can think step-by-step."]

config.json ADDED Viewed

+{
+  "_name_or_path": "pankajmathur/orca_mini_v9_6_1B-Instruct",
+  "architectures": [
+    "MixtralForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 128000,
+  "eos_token_id": 128009,
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 8192,
+  "max_position_embeddings": 131072,
+  "mlp_bias": false,
+  "model_type": "mixtral",
+  "num_attention_heads": 32,
+  "num_experts_per_tok": 2,
+  "num_hidden_layers": 16,
+  "num_key_value_heads": 8,
+  "num_local_experts": 3,
+  "output_router_logits": false,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 32.0,
+    "high_freq_factor": 4.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.49.0.dev0",
+  "use_cache": false,
+  "vocab_size": 128256
+}

model-1.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ab80f5ed435265e5c9b7b4f661913cae6455c888ddad6f6d65c59198b10a73f
+size 969952232

model-2.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:fea646ea50a2a8e74217ce2edaadf03bf4e84ed322f0b0c48746a7cf2eebc947
+size 968914344

model-3.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ed142fb6ede967069a7d0057252b5d17e53f4733a0b3fcbedda8a904dc333b6
+size 968914344

model-4.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:6e894e8293a709d4651634ce494ea54c53bf8db99747861997ad7a984da8cb2f
+size 968914352

model-5.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:c304d5a2c895c63b36773f7a6fca8b796efa7a4ad50d8c1420d2b63e841649ae
+size 968914384