File size: 7,331 Bytes
c17b4e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# coding=utf-8
# Converts the 2nd version of the Qwen models in the same format as LLaMA2.
# Usage: python convert_qwen2_to_llama.py --input_dir magnum-72b-v1 --output_dir magnum-72b-v1-llamaify --save_safetensors --continue_conversion
# Original script: https://github.com/Minami-su/character_AI_open/blob/main/llamafy_qwen_v2.py

import json
import os
from collections import OrderedDict
from typing import Any, Dict, Optional

import fire
import torch
from safetensors import safe_open
from safetensors.torch import save_file
from tqdm import tqdm
from transformers.modeling_utils import (
    SAFE_WEIGHTS_INDEX_NAME,
    SAFE_WEIGHTS_NAME,
    WEIGHTS_INDEX_NAME,
    WEIGHTS_NAME,
    shard_checkpoint,
)
from transformers.utils import check_min_version

try:
    check_min_version("4.34.0")
except Exception:
    raise ValueError("Please upgrade `transformers` to 4.34.0")

CONFIG_NAME = "config.json"


def load_existing_shards(
    output_dir: str, save_safetensors: bool
) -> Dict[str, torch.Tensor]:
    existing_state_dict = OrderedDict()
    weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
    index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME

    if os.path.exists(os.path.join(output_dir, index_name)):
        with open(os.path.join(output_dir, index_name), "r", encoding="utf-8") as f:
            index = json.load(f)

        for shard_file in tqdm(
            index["weight_map"].values(), desc="Loading existing shards"
        ):
            if os.path.exists(os.path.join(output_dir, shard_file)):
                if save_safetensors:
                    with safe_open(
                        os.path.join(output_dir, shard_file),
                        framework="pt",
                        device="cpu",
                    ) as f:
                        for key in f.keys():
                            existing_state_dict[key] = f.get_tensor(key)
                else:
                    shard = torch.load(
                        os.path.join(output_dir, shard_file), map_location="cpu"
                    )
                    existing_state_dict.update(shard)

    return existing_state_dict


def save_weight(
    input_dir: str,
    output_dir: str,
    shard_size: str,
    save_safetensors: bool,
    continue_conversion: bool,
) -> str:
    qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict()
    for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
        if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(
            ".safetensors"
        ):
            with safe_open(
                os.path.join(input_dir, filepath), framework="pt", device="cpu"
            ) as f:
                for key in f.keys():
                    qwen_state_dict[key] = f.get_tensor(key)

    llama2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
    if continue_conversion:
        llama2_state_dict = load_existing_shards(output_dir, save_safetensors)

    torch_dtype = None
    for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
        if torch_dtype is None:
            torch_dtype = value.dtype
        if "self_attn.o_proj" in key:
            llama2_state_dict[key] = value
            bias_key = key.replace(".weight", ".bias")
            if bias_key not in llama2_state_dict:
                llama2_state_dict[bias_key] = torch.zeros_like(value[:, 0]).squeeze()
        else:
            llama2_state_dict[key] = value

    weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
    shards, index = shard_checkpoint(
        llama2_state_dict, max_shard_size=shard_size, weights_name=weights_name
    )

    for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
        if save_safetensors:
            save_file(
                shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"}
            )
        else:
            torch.save(shard, os.path.join(output_dir, shard_file))

    if index is None:
        print(f"Model weights saved in {os.path.join(output_dir, weights_name)}")
    else:
        index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
        with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
            json.dump(index, f, indent=2, sort_keys=True)
        print(f"Model weights saved in {output_dir}")

    return str(torch_dtype).replace("torch.", "")


def save_config(input_dir: str, output_dir: str, torch_dtype: str):
    with open(os.path.join(input_dir, CONFIG_NAME), "r", encoding="utf-8") as f:
        qwen_config_dict: Dict[str, Any] = json.load(f)

    llama2_config_dict: Dict[str, Any] = OrderedDict()
    llama2_config_dict["architectures"] = ["LlamaForCausalLM"]
    llama2_config_dict["attention_bias"] = True
    llama2_config_dict["attention_dropout"] = qwen_config_dict["attention_dropout"]
    llama2_config_dict["hidden_act"] = "silu"
    llama2_config_dict["hidden_size"] = qwen_config_dict["hidden_size"]
    llama2_config_dict["initializer_range"] = qwen_config_dict["initializer_range"]
    llama2_config_dict["intermediate_size"] = qwen_config_dict["intermediate_size"]
    llama2_config_dict["max_position_embeddings"] = 32767  # Qwen2-72B-Instruct
    llama2_config_dict["max_window_layers"] = qwen_config_dict["max_window_layers"]
    llama2_config_dict["model_type"] = "llama"
    llama2_config_dict["num_attention_heads"] = qwen_config_dict["num_attention_heads"]
    llama2_config_dict["num_hidden_layers"] = qwen_config_dict["num_hidden_layers"]
    llama2_config_dict["num_key_value_heads"] = qwen_config_dict["num_key_value_heads"]
    llama2_config_dict["pretraining_tp"] = 1
    llama2_config_dict["rms_norm_eps"] = qwen_config_dict["rms_norm_eps"]
    llama2_config_dict["rope_theta"] = qwen_config_dict["rope_theta"]
    llama2_config_dict["rope_scaling"] = None
    llama2_config_dict["sliding_window"] = qwen_config_dict["sliding_window"]
    llama2_config_dict["tie_word_embeddings"] = qwen_config_dict["tie_word_embeddings"]
    llama2_config_dict["torch_dtype"] = torch_dtype
    llama2_config_dict["transformers_version"] = "4.37.0"
    llama2_config_dict["use_cache"] = True
    llama2_config_dict["use_sliding_window"] = qwen_config_dict["use_sliding_window"]
    llama2_config_dict["vocab_size"] = qwen_config_dict["vocab_size"]

    with open(os.path.join(output_dir, CONFIG_NAME), "w", encoding="utf-8") as f:
        json.dump(llama2_config_dict, f, indent=2)
    print(f"Model config saved in {os.path.join(output_dir, CONFIG_NAME)}")


def llamafy_qwen_v2(
    input_dir: str,
    output_dir: str,
    shard_size: Optional[str] = "4GB",
    save_safetensors: Optional[bool] = False,
    continue_conversion: Optional[bool] = False,
):
    if not continue_conversion:
        try:
            os.makedirs(output_dir, exist_ok=False)
        except Exception as e:
            raise ValueError(
                "Output dir already exists. Use --continue_conversion to resume."
            ) from e
    else:
        os.makedirs(output_dir, exist_ok=True)

    torch_dtype = save_weight(
        input_dir, output_dir, shard_size, save_safetensors, continue_conversion
    )
    save_config(input_dir, output_dir, torch_dtype)


if __name__ == "__main__":
    fire.Fire(llamafy_qwen_v2)