maywell commited on
Commit
0a64dcc
·
verified ·
1 Parent(s): 0758b38

Update Llamafy code

Browse files
Files changed (1) hide show
  1. README.md +18 -17
README.md CHANGED
@@ -40,46 +40,47 @@ def create_llama_config(exaone_config):
40
 
41
  def copy_embedding_weights(llama_model, exaone_model):
42
  """Copy embedding weights from EXAONE to LLaMA model."""
43
- llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data.to(llama_model.device)
44
 
45
- def copy_layer_weights(llama_layer, exaone_layer, device):
46
  """Copy weights for a single layer from EXAONE to LLaMA model."""
47
  # Self-attention
48
- llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data.to(device)
49
- llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data.to(device)
50
- llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data.to(device)
51
- llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data.to(device)
52
  # MLP
53
- llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data.to(device)
54
- llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data.to(device)
55
- llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data.to(device)
56
  # Layer Norms
57
- llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data.to(device)
58
- llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data.to(device)
59
 
60
  def copy_final_weights(llama_model, exaone_model):
61
  """Copy final layer norm and LM head weights from EXAONE to LLaMA model."""
62
- llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data.to(llama_model.device)
63
- llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data.to(llama_model.device)
64
 
65
  def port_exaone_to_llama(exaone_model_path, llama_model_path):
 
 
66
  print("Loading EXAONE model...")
67
- exaone_model = load_model(exaone_model_path, ExaoneForCausalLM)
68
  exaone_config = exaone_model.config
69
 
70
  print("Creating LLaMA configuration...")
71
  llama_config = create_llama_config(exaone_config)
72
 
73
  print("Initializing LLaMA model...")
74
- llama_model = LlamaForCausalLM(llama_config)
75
- llama_model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
76
 
77
  print("Copying weights...")
78
  copy_embedding_weights(llama_model, exaone_model)
79
 
80
  for i in range(exaone_config.num_layers):
81
  print(f"Copying weights for layer {i+1}/{exaone_config.num_layers}")
82
- copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i], llama_model.device)
83
 
84
  copy_final_weights(llama_model, exaone_model)
85
 
 
40
 
41
  def copy_embedding_weights(llama_model, exaone_model):
42
  """Copy embedding weights from EXAONE to LLaMA model."""
43
+ llama_model.model.embed_tokens.weight.data = exaone_model.transformer.wte.weight.data
44
 
45
+ def copy_layer_weights(llama_layer, exaone_layer):
46
  """Copy weights for a single layer from EXAONE to LLaMA model."""
47
  # Self-attention
48
+ llama_layer.self_attn.q_proj.weight.data = exaone_layer.attn.attention.q_proj.weight.data
49
+ llama_layer.self_attn.k_proj.weight.data = exaone_layer.attn.attention.k_proj.weight.data
50
+ llama_layer.self_attn.v_proj.weight.data = exaone_layer.attn.attention.v_proj.weight.data
51
+ llama_layer.self_attn.o_proj.weight.data = exaone_layer.attn.attention.out_proj.weight.data
52
  # MLP
53
+ llama_layer.mlp.gate_proj.weight.data = exaone_layer.mlp.c_fc_0.weight.data
54
+ llama_layer.mlp.up_proj.weight.data = exaone_layer.mlp.c_fc_1.weight.data
55
+ llama_layer.mlp.down_proj.weight.data = exaone_layer.mlp.c_proj.weight.data
56
  # Layer Norms
57
+ llama_layer.input_layernorm.weight.data = exaone_layer.ln_1.weight.data
58
+ llama_layer.post_attention_layernorm.weight.data = exaone_layer.ln_2.weight.data
59
 
60
  def copy_final_weights(llama_model, exaone_model):
61
  """Copy final layer norm and LM head weights from EXAONE to LLaMA model."""
62
+ llama_model.model.norm.weight.data = exaone_model.transformer.ln_f.weight.data
63
+ llama_model.lm_head.weight.data = exaone_model.lm_head.weight.data
64
 
65
  def port_exaone_to_llama(exaone_model_path, llama_model_path):
66
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
67
+
68
  print("Loading EXAONE model...")
69
+ exaone_model = load_model(exaone_model_path, ExaoneForCausalLM).to(device)
70
  exaone_config = exaone_model.config
71
 
72
  print("Creating LLaMA configuration...")
73
  llama_config = create_llama_config(exaone_config)
74
 
75
  print("Initializing LLaMA model...")
76
+ llama_model = LlamaForCausalLM(llama_config).to(device)
 
77
 
78
  print("Copying weights...")
79
  copy_embedding_weights(llama_model, exaone_model)
80
 
81
  for i in range(exaone_config.num_layers):
82
  print(f"Copying weights for layer {i+1}/{exaone_config.num_layers}")
83
+ copy_layer_weights(llama_model.model.layers[i], exaone_model.transformer.h[i])
84
 
85
  copy_final_weights(llama_model, exaone_model)
86