de-coder
/

UlizaLlama_Q4_K_M-gguf

Text Generation

Inference Endpoints

Model card Files Files and versions Community

UlizaLlama_Q4_K_M-gguf / config.json

de-coder's picture

Create config.json

640f687 verified 8 months ago

history blame contribute delete

1.62 kB

	{
	"_name_or_path": "de-coder/UlizaLlama_Q4_K_M-gguf",
	"architectures": ["LlamaForCausalLM"],
	"model_type": "llama",
	"torch_dtype": "float16",
	"transformers_version": "4.34.0",

	"vocab_size": 20000,
	"hidden_size": 4096,
	"intermediate_size": 11008,
	"num_hidden_layers": 32,
	"num_attention_heads": 32,
	"num_key_value_heads": 32,
	"hidden_act": "silu",
	"max_position_embeddings": 4096,
	"initializer_range": 0.02,
	"rms_norm_eps": 1e-05,
	"rope_theta": 10000.0,
	"rope_scaling": null,
	"attention_bias": false,
	"use_cache": true,
	"tie_word_embeddings": false,

	"bos_token_id": 1,
	"eos_token_id": 2,
	"pad_token_id": 0,
	"unk_token_id": 0,

	"quantization": {
	"method": "GGUF",
	"bits": 4,
	"variant": "q4_0",
	"block_size": 32,
	"group_size": 32,
	"tensors": {
	"query": "Q4_0",
	"key": "Q4_0",
	"value": "Q4_0",
	"output": "Q4_0",
	"intermediate": "Q4_0",
	"gate": "Q4_0",
	"embedding": "Q4_0",
	"norm": "F16"
	},
	"scales_dtype": "fp16"
	},

	"tokenizer": {
	"type": "BPE",
	"vocab_file": "ulizallama-tokenizer.json",
	"merges_file": "ulizallama-merges.txt"
	},

	"generation": {
	"temperature": 0.7,
	"top_p": 0.95,
	"top_k": 40,
	"repetition_penalty": 1.1,
	"max_new_tokens": 512
	},

	"base_model": {
	"name": "Jacaranda/kiswallama-pretrained",
	"type": "Llama2 Continual Pretraining"
	},
	"quantization_process": {
	"library": "llama.cpp",
	"version": "1.0.0",
	"command": "llama.cpp quantize ulizallama-7b.gguf ulizallama-7b-gguf-q4_0.bin q4_0"
	}
	}