File size: 4,973 Bytes
df28c17 07899fc 7075c6d df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 839e7cc df28c17 0758b38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
---
license: apache-2.0
---
# maywell/EXAONE-3.0-7.8B-Instruct-Llamafied
๋์ผ ๋ผ์ด์ผ์ค ์ฌ๋ฐฐํฌ์กฐ์ฐจ ๊ธ์ง๋์ด์๋ ๊ด๊ณ๋ก Llamafied ๋ชจ๋ธ์ ๊ณต์ ํ ์ ์๊ฒ ๋์์ต๋๋ค. vLLM, ์ถ๋ก ๋ฐ ๊ธฐํ ํ์ฉ์ผ๋ก Llamafied ๋ชจ๋ธ์ด ํ์ํ๋ค๋ฉด ์๋ ์คํฌ๋ฆฝํธ๋ฅผ ์คํํด์ ์ฌ์ฉํด์ฃผ์๋ฉด ๊ฐ์ฌํ๊ฒ ์ต๋๋ค.
```python
import torch
import gc
from transformers import LlamaConfig, LlamaForCausalLM, AutoModelForCausalLM, AutoTokenizer
from tqdm import tqdm
def unload_model(model):
"""Clear memory by deleting a model and calling the garbage collector."""
del model
gc.collect()
if torch.cuda.is_available():
torch.cuda.empty_cache()
def create_llama_config(exaone_config):
"""Create and return a Llama configuration based on EXAONE config."""
return LlamaConfig(
vocab_size=exaone_config.vocab_size,
hidden_size=exaone_config.hidden_size,
intermediate_size=exaone_config.intermediate_size,
num_hidden_layers=exaone_config.num_layers,
num_attention_heads=exaone_config.num_attention_heads,
max_position_embeddings=exaone_config.max_position_embeddings,
rms_norm_eps=exaone_config.layer_norm_epsilon,
num_key_value_heads=exaone_config.num_key_value_heads,
rope_theta=exaone_config.rope_theta,
bos_token_id=exaone_config.bos_token_id,
eos_token_id=exaone_config.eos_token_id,
pad_token_id=exaone_config.pad_token_id,
attention_bias=False,
)
def copy_embedding_weights(llama_model, exaone_model):
"""Copy embedding weights from EXAONE to Llama model."""
llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(llama_model.device)
def copy_layer_weights(llama_layer, exaone_layer, device):
"""Copy weights for a single layer from EXAONE to Llama model."""
# Self-attention
llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(device)
llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(device)
llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(device)
llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(device)
# MLP
llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(device)
llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(device)
llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(device)
# Layer Norms
llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(device)
llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(device)
def copy_final_weights(llama_model, exaone_model):
"""Copy final layer norm and LM head weights from EXAONE to Llama model."""
llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(llama_model.device)
llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(llama_model.device)
def port_exaone_to_llama(exaone_model_path, llama_model_path):
print("Loading EXAONE model and tokenizer...")
exaone_model = AutoModelForCausalLM.from_pretrained(exaone_model_path, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True)
exaone_tokenizer = AutoTokenizer.from_pretrained(exaone_model_path, trust_remote_code=True)
exaone_config = exaone_model.config
print("Creating Llama configuration...")
llama_config = create_llama_config(exaone_config)
print("Initializing Llama model...")
llama_model = LlamaForCausalLM(llama_config)
llama_model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
print("Copying weights...")
copy_embedding_weights(llama_model, exaone_model)
for i in tqdm(range(exaone_config.num_layers), desc="Copying layers"):
copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i], llama_model.device)
copy_final_weights(llama_model, exaone_model)
print("Unloading EXAONE model to free memory...")
unload_model(exaone_model)
print(f"Saving ported Llama model and tokenizer to {llama_model_path}")
llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")
exaone_tokenizer.save_pretrained(llama_model_path)
print("Unloading Llama model...")
unload_model(llama_model)
print(f"EXAONE model successfully ported to Llama format and saved at {llama_model_path}")
if __name__ == "__main__":
exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
llama_model_path = "./exa_llamafied"
port_exaone_to_llama(exaone_model_path, llama_model_path)
```
๋ชจ๋ธ์ ๊ณต๊ฐํด์ฃผ์ `LG AI Research`๋ถ๋ค๊ป ๊ฐ์ฌ์ ๋ง์ ๋๋ฆฝ๋๋ค.
[Original Repository](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct) |