File size: 5,107 Bytes
df28c17
 
 
 
 
7075c6d
 
 
df28c17
 
 
 
 
0b14c9f
 
 
df28c17
 
 
 
 
0b14c9f
 
 
df28c17
 
0b14c9f
df28c17
 
 
 
 
 
 
 
 
 
0b14c9f
 
 
df28c17
 
 
 
0b14c9f
 
df28c17
0b14c9f
 
df28c17
0b14c9f
 
 
 
df28c17
0b14c9f
 
 
df28c17
0b14c9f
 
df28c17
 
0b14c9f
 
 
df28c17
 
0b14c9f
 
 
df28c17
 
0b14c9f
df28c17
 
0b14c9f
 
 
df28c17
 
 
 
0b14c9f
 
df28c17
 
 
 
 
 
0b14c9f
df28c17
0b14c9f
df28c17
0b14c9f
df28c17
 
0b14c9f
df28c17
 
 
 
 
0758b38
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
---
license: apache-2.0
---
# maywell/EXAONE-3.0-7.8B-Instruct-Llamafied

LG에서 동일 라이센스 재배포조차 막아버린 관계로 모델을 공유할 수 없게 되었습니다. vLLM, 추론 및 기타 활용으로 Llamafied 모델이 필요하다면 아래 스크립트를 실행해서 사용해주시면 감사하겠습니다.

아래 modeling_exaone과 configuration_exaone의 경우에는 원본 repository를 참조해주세요.

```python
import torch
import gc

from transformers import LlamaConfig, LlamaForCausalLM, AutoModelForCausalLM, AutoTokenizer
from tqdm import tqdm


def unload_model(model):
    """Clear memory by deleting a model and calling the garbage collector."""
    del model
    gc.collect()
    # if torch.cuda.is_available():
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

def create_llama_config(exaone_config):
    """Create and return a Llama configuration based on EXAONE config."""
    return LlamaConfig(
        vocab_size=exaone_config.vocab_size,
        hidden_size=exaone_config.hidden_size,
        intermediate_size=exaone_config.intermediate_size,
        num_hidden_layers=exaone_config.num_layers,
        num_attention_heads=exaone_config.num_attention_heads,
        max_position_embeddings=exaone_config.max_position_embeddings,
        rms_norm_eps=exaone_config.layer_norm_epsilon,
        num_key_value_heads=exaone_config.num_key_value_heads,
        rope_theta=exaone_config.rope_theta,
        bos_token_id=exaone_config.bos_token_id,
        eos_token_id=exaone_config.eos_token_id,
        pad_token_id=exaone_config.pad_token_id,
        attention_bias=False,
    )

def copy_embedding_weights(llama_model, exaone_model):
    """Copy embedding weights from EXAONE to Llama model."""
    llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(llama_model.device)

def copy_layer_weights(llama_layer, exaone_layer, device):
    """Copy weights for a single layer from EXAONE to Llama model."""
    # Self-attention
    llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(device)
    llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(device)
    llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(device)
    llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(device)
    # MLP
    llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(device)
    llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(device)
    llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(device)
    # Layer Norms
    llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(device)
    llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(device)

def copy_final_weights(llama_model, exaone_model):
    """Copy final layer norm and LM head weights from EXAONE to Llama model."""
    llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(llama_model.device)
    llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(llama_model.device)

def port_exaone_to_llama(exaone_model_path, llama_model_path):
    print("Loading EXAONE model and tokenizer...")
    exaone_model = AutoModelForCausalLM.from_pretrained(exaone_model_path, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True)
    exaone_tokenizer = AutoTokenizer.from_pretrained(exaone_model_path, trust_remote_code=True)
    exaone_config = exaone_model.config

    print("Creating Llama configuration...")
    llama_config = create_llama_config(exaone_config)

    print("Initializing Llama model...")
    llama_model = LlamaForCausalLM(llama_config)
    llama_model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

    print("Copying weights...")
    copy_embedding_weights(llama_model, exaone_model)

    for i in tqdm(range(exaone_config.num_layers), desc="Copying layers"):
        copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i], llama_model.device)

    copy_final_weights(llama_model, exaone_model)

    print("Unloading EXAONE model to free memory...")
    unload_model(exaone_model)

    print(f"Saving ported Llama model and tokenizer to {llama_model_path}")
    llama_model.save_pretrained(llama_model_path, safe_serialization=True, max_shard_size="5GB")
    exaone_tokenizer.save_pretrained(llama_model_path)

    print("Unloading Llama model...")
    unload_model(llama_model)

    print(f"EXAONE model successfully ported to Llama format and saved at {llama_model_path}")

if __name__ == "__main__":
    exaone_model_path = "LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct"
    llama_model_path = "./exa_llamafied"
    port_exaone_to_llama(exaone_model_path, llama_model_path)
```

모델을 공개해주신 `LG AI Research`분들께 감사의 말씀 드립니다.
[Original Repository](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct)