Skip to content

Commit f86db9d

Browse files
authored
Add files via upload
1 parent 7b0b77a commit f86db9d

File tree

3 files changed

+342
-0
lines changed

3 files changed

+342
-0
lines changed

convert-weights/convert_Yi.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
config={
2+
"type_vocab_size": 0,
3+
'use_bias':0,
4+
'o_bias':0,
5+
}
6+
7+
import os
8+
9+
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
10+
os.environ["KERAS_BACKEND"] = "torch"
11+
12+
import keras
13+
import keras_nlp
14+
from bert4keras3.models import build_transformer_model
15+
from bert4keras3.snippets import sequence_padding
16+
keras.config.set_dtype_policy("bfloat16")
17+
model_name = "Yi-6B-Chat"
18+
import torch
19+
try:
20+
os.makedirs(model_name)
21+
except:
22+
pass
23+
from transformers import AutoTokenizer,AutoModelForCausalLM,AutoConfig
24+
25+
model = AutoModelForCausalLM.from_pretrained('01-ai/'+model_name,
26+
device_map="cpu",
27+
torch_dtype=torch.bfloat16,
28+
_attn_implementation = 'eager',
29+
trust_remote_code=False).eval()
30+
31+
tokenizer = AutoTokenizer.from_pretrained('01-ai/'+model_name)
32+
qw2_config = AutoConfig.from_pretrained('01-ai/'+model_name)
33+
34+
config[ "vocab_size"]= qw2_config.vocab_size
35+
config[ "num_hidden_layers"]=qw2_config.num_hidden_layers
36+
config[ "query_head"]=qw2_config.num_attention_heads
37+
config[ "num_attention_heads"]=qw2_config.num_key_value_heads
38+
config[ "hidden_size"]=qw2_config.hidden_size
39+
config[ "intermediate_size"]=qw2_config.intermediate_size
40+
config[ "attention_head_size"]=qw2_config.hidden_size//qw2_config.num_attention_heads
41+
config[ "dropout_rate"]=qw2_config.attention_dropout
42+
config[ "hidden_act"]=qw2_config.hidden_act
43+
config[ "max_wavelength"]=qw2_config.rope_theta
44+
import json
45+
with open(model_name+'/config.json', 'w') as f:
46+
json.dump(config, f, indent=4, ensure_ascii=False)
47+
48+
self = build_transformer_model(
49+
config_path=model_name+'/config.json',
50+
model='llama',
51+
return_keras_model=False,
52+
with_lm='linear',
53+
)
54+
QW= self.model
55+
QW.eval()
56+
57+
import numpy as np
58+
input_ids,mask = tokenizer('计算量决定了网络执行时间的长短,参数量决定了占用显存的量').values()
59+
input_ids = keras.ops.expand_dims(input_ids,0)
60+
mask = keras.ops.expand_dims(mask,0)
61+
62+
QW.summary()
63+
print(print(sum(p.numel() for p in model.parameters())))
64+
65+
weights=model.state_dict()
66+
def get_weight(key):
67+
weight=weights.pop(key)
68+
return weight
69+
70+
embeddingweight = get_weight('model.embed_tokens.weight')
71+
enocder_embeding = QW.get_layer('Embedding-Token')
72+
enocder_embeding.set_weights([embeddingweight])
73+
for index in range(qw2_config.num_hidden_layers):
74+
attention_name = 'Transformer-%d-MultiHeadSelfAttention' % index
75+
feed_forward_name = 'Transformer-%d-FeedForward' % index
76+
att_ln =QW.get_layer('%s-Norm' % attention_name)
77+
att = QW.get_layer(attention_name)
78+
ffn_ln = QW.get_layer('%s-Norm' % feed_forward_name)
79+
ffn = QW.get_layer(feed_forward_name)
80+
81+
att_ln.set_weights([get_weight('model.layers.'+str(index)+'.input_layernorm.weight')])
82+
ffn_ln.set_weights([get_weight('model.layers.'+str(index)+'.post_attention_layernorm.weight')])
83+
84+
o1 = get_weight('model.layers.'+str(index)+'.mlp.gate_proj.weight').T
85+
o2 = get_weight('model.layers.'+str(index)+'.mlp.up_proj.weight').T
86+
o3 = get_weight('model.layers.'+str(index)+'.mlp.down_proj.weight').T
87+
88+
ffn.set_weights([o1,o2,o3])
89+
90+
q = get_weight('model.layers.'+str(index)+'.self_attn.q_proj.weight').T
91+
92+
k = get_weight('model.layers.'+str(index)+'.self_attn.k_proj.weight').T
93+
94+
v = get_weight('model.layers.'+str(index)+'.self_attn.v_proj.weight').T
95+
96+
o = get_weight('model.layers.'+str(index)+'.self_attn.o_proj.weight').T
97+
98+
att.set_weights([q,
99+
k,
100+
v,
101+
o])
102+
out_norm = get_weight('model.norm.weight')
103+
QW.get_layer('Output-Norm').set_weights([out_norm])
104+
105+
lm_weights = get_weight('lm_head.weight').T
106+
QW.get_layer('Decoder-Output-LM').set_weights([lm_weights])
107+
QW.save_weights(model_name+'/model.weights.h5')
108+
print('saving')
109+
x1 = model.forward(input_ids.cpu(),attention_mask=mask.cpu())
110+
x2 = QW(input_ids)
111+
112+
113+
print(keras.ops.mean(keras.ops.abs(x2.cpu()-x1.logits.cpu()),-1))

convert-weights/convert_llama.py

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
config={
2+
"type_vocab_size": 0,
3+
'use_bias':0,
4+
'o_bias':0,
5+
}
6+
7+
import os
8+
9+
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
10+
os.environ["KERAS_BACKEND"] = "torch"
11+
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
12+
import keras
13+
import keras_nlp
14+
from bert4keras3.models import build_transformer_model
15+
from bert4keras3.snippets import sequence_padding
16+
keras.config.set_dtype_policy("bfloat16")
17+
model_name = "Meta-Llama-3-8B-Instruct"
18+
import torch
19+
try:
20+
os.makedirs(model_name)
21+
except:
22+
pass
23+
from transformers import AutoTokenizer,AutoModelForCausalLM,AutoConfig
24+
25+
model = AutoModelForCausalLM.from_pretrained('NousResearch/'+model_name,
26+
device_map="cpu",
27+
torch_dtype=torch.bfloat16,
28+
_attn_implementation = 'eager',
29+
trust_remote_code=False).eval()
30+
31+
tokenizer = AutoTokenizer.from_pretrained('NousResearch/'+model_name)
32+
qw2_config = AutoConfig.from_pretrained('NousResearch/'+model_name)
33+
34+
config[ "vocab_size"]= qw2_config.vocab_size
35+
config[ "num_hidden_layers"]=qw2_config.num_hidden_layers
36+
config[ "query_head"]=qw2_config.num_attention_heads
37+
config[ "num_attention_heads"]=qw2_config.num_key_value_heads
38+
config[ "hidden_size"]=qw2_config.hidden_size
39+
config[ "intermediate_size"]=qw2_config.intermediate_size
40+
config[ "attention_head_size"]=qw2_config.hidden_size//qw2_config.num_attention_heads
41+
config[ "dropout_rate"]=qw2_config.attention_dropout
42+
config[ "hidden_act"]=qw2_config.hidden_act
43+
config[ "max_wavelength"]=qw2_config.rope_theta
44+
import json
45+
with open(model_name+'/config.json', 'w') as f:
46+
json.dump(config, f, indent=4, ensure_ascii=False)
47+
48+
self = build_transformer_model(
49+
config_path=model_name+'/config.json',
50+
model='llama',
51+
return_keras_model=False,
52+
with_lm='linear',
53+
)
54+
QW= self.model
55+
QW.eval()
56+
57+
import numpy as np
58+
input_ids,mask = tokenizer('计算量决定了网络执行时间的长短,参数量决定了占用显存的量').values()
59+
input_ids = keras.ops.expand_dims(input_ids,0)
60+
mask = keras.ops.expand_dims(mask,0)
61+
62+
QW.summary()
63+
print(print(sum(p.numel() for p in model.parameters())))
64+
65+
weights=model.state_dict()
66+
def get_weight(key):
67+
weight=weights.pop(key)
68+
return weight
69+
70+
embeddingweight = get_weight('model.embed_tokens.weight')
71+
enocder_embeding = QW.get_layer('Embedding-Token')
72+
enocder_embeding.set_weights([embeddingweight])
73+
for index in range(qw2_config.num_hidden_layers):
74+
attention_name = 'Transformer-%d-MultiHeadSelfAttention' % index
75+
feed_forward_name = 'Transformer-%d-FeedForward' % index
76+
att_ln =QW.get_layer('%s-Norm' % attention_name)
77+
att = QW.get_layer(attention_name)
78+
ffn_ln = QW.get_layer('%s-Norm' % feed_forward_name)
79+
ffn = QW.get_layer(feed_forward_name)
80+
81+
att_ln.set_weights([get_weight('model.layers.'+str(index)+'.input_layernorm.weight')])
82+
ffn_ln.set_weights([get_weight('model.layers.'+str(index)+'.post_attention_layernorm.weight')])
83+
84+
o1 = get_weight('model.layers.'+str(index)+'.mlp.gate_proj.weight').T
85+
o2 = get_weight('model.layers.'+str(index)+'.mlp.up_proj.weight').T
86+
o3 = get_weight('model.layers.'+str(index)+'.mlp.down_proj.weight').T
87+
88+
ffn.set_weights([o1,o2,o3])
89+
90+
q = get_weight('model.layers.'+str(index)+'.self_attn.q_proj.weight').T
91+
92+
k = get_weight('model.layers.'+str(index)+'.self_attn.k_proj.weight').T
93+
94+
v = get_weight('model.layers.'+str(index)+'.self_attn.v_proj.weight').T
95+
96+
o = get_weight('model.layers.'+str(index)+'.self_attn.o_proj.weight').T
97+
98+
att.set_weights([q,
99+
k,
100+
v,
101+
o])
102+
out_norm = get_weight('model.norm.weight')
103+
QW.get_layer('Output-Norm').set_weights([out_norm])
104+
105+
lm_weights = get_weight('lm_head.weight').T
106+
QW.get_layer('Decoder-Output-LM').set_weights([lm_weights])
107+
QW.save_weights(model_name+'/model.weights.h5')
108+
print('saving')
109+
x1 = model.forward(input_ids.cpu(),attention_mask=mask.cpu())
110+
x2 = QW(input_ids)
111+
112+
113+
print(keras.ops.mean(keras.ops.abs(x2.cpu()-x1.logits.cpu()),-1))

convert-weights/convert_qwen.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
config={
2+
"type_vocab_size": 0,
3+
'use_bias':1,
4+
'o_bias':0,
5+
}
6+
7+
import os
8+
9+
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"
10+
os.environ["KERAS_BACKEND"] = "torch"
11+
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
12+
import keras
13+
import keras_nlp
14+
from bert4keras3.models import build_transformer_model
15+
from bert4keras3.snippets import sequence_padding
16+
keras.config.set_dtype_policy("bfloat16")
17+
model_name = "Qwen1.5-7B-Chat"
18+
import torch
19+
try:
20+
os.makedirs(model_name)
21+
except:
22+
pass
23+
from transformers import AutoTokenizer,Qwen2ForCausalLM,AutoConfig
24+
25+
model = Qwen2ForCausalLM.from_pretrained('Qwen/'+model_name,
26+
device_map="cpu",
27+
torch_dtype=torch.bfloat16,
28+
_attn_implementation = 'eager',
29+
trust_remote_code=False).eval()
30+
31+
tokenizer = AutoTokenizer.from_pretrained('Qwen/'+model_name)
32+
qw2_config = AutoConfig.from_pretrained('Qwen/'+model_name)
33+
34+
config[ "vocab_size"]= qw2_config.vocab_size
35+
config[ "num_hidden_layers"]=qw2_config.num_hidden_layers
36+
config[ "query_head"]=qw2_config.num_key_value_heads
37+
config[ "num_attention_heads"]=qw2_config.num_attention_heads
38+
config[ "hidden_size"]=qw2_config.hidden_size
39+
config[ "intermediate_size"]=qw2_config.intermediate_size
40+
config[ "attention_head_size"]=qw2_config.hidden_size//qw2_config.num_key_value_heads
41+
config[ "dropout_rate"]=qw2_config.attention_dropout
42+
config[ "hidden_act"]=qw2_config.hidden_act
43+
config[ "max_wavelength"]=qw2_config.rope_theta
44+
import json
45+
with open(model_name+'/config.json', 'w') as f:
46+
json.dump(config, f, indent=4, ensure_ascii=False)
47+
48+
self = build_transformer_model(
49+
config_path=model_name+'/config.json',
50+
model='llama',
51+
return_keras_model=False,
52+
with_lm='linear',
53+
)
54+
QW= self.model
55+
QW.eval()
56+
57+
import numpy as np
58+
input_ids,mask = tokenizer('计算量决定了网络执行时间的长短,参数量决定了占用显存的量').values()
59+
input_ids = keras.ops.expand_dims(input_ids,0)
60+
mask = keras.ops.expand_dims(mask,0)
61+
62+
QW.summary()
63+
print(print(sum(p.numel() for p in model.parameters())))
64+
65+
weights=model.state_dict()
66+
def get_weight(key):
67+
weight=weights.pop(key)
68+
return weight
69+
70+
embeddingweight = get_weight('model.embed_tokens.weight')
71+
enocder_embeding = QW.get_layer('Embedding-Token')
72+
enocder_embeding.set_weights([embeddingweight])
73+
for index in range(qw2_config.num_hidden_layers):
74+
attention_name = 'Transformer-%d-MultiHeadSelfAttention' % index
75+
feed_forward_name = 'Transformer-%d-FeedForward' % index
76+
att_ln =QW.get_layer('%s-Norm' % attention_name)
77+
att = QW.get_layer(attention_name)
78+
ffn_ln = QW.get_layer('%s-Norm' % feed_forward_name)
79+
ffn = QW.get_layer(feed_forward_name)
80+
81+
att_ln.set_weights([get_weight('model.layers.'+str(index)+'.input_layernorm.weight')])
82+
ffn_ln.set_weights([get_weight('model.layers.'+str(index)+'.post_attention_layernorm.weight')])
83+
84+
o1 = get_weight('model.layers.'+str(index)+'.mlp.gate_proj.weight').T
85+
o2 = get_weight('model.layers.'+str(index)+'.mlp.up_proj.weight').T
86+
o3 = get_weight('model.layers.'+str(index)+'.mlp.down_proj.weight').T
87+
88+
ffn.set_weights([o1,o2,o3])
89+
90+
q = get_weight('model.layers.'+str(index)+'.self_attn.q_proj.weight').T
91+
q_bias = get_weight('model.layers.'+str(index)+'.self_attn.q_proj.bias')
92+
93+
k = get_weight('model.layers.'+str(index)+'.self_attn.k_proj.weight').T
94+
k_bias = get_weight('model.layers.'+str(index)+'.self_attn.k_proj.bias')
95+
96+
v = get_weight('model.layers.'+str(index)+'.self_attn.v_proj.weight').T
97+
v_bias = get_weight('model.layers.'+str(index)+'.self_attn.v_proj.bias')
98+
99+
o = get_weight('model.layers.'+str(index)+'.self_attn.o_proj.weight').T
100+
101+
att.set_weights([q,q_bias,
102+
k,k_bias,
103+
v,v_bias,
104+
o])
105+
out_norm = get_weight('model.norm.weight')
106+
QW.get_layer('Output-Norm').set_weights([out_norm])
107+
108+
lm_weights = get_weight('lm_head.weight').T
109+
QW.get_layer('Decoder-Output-LM').set_weights([lm_weights])
110+
QW.save_weights(model_name+'/QWen.weights.h5')
111+
print('saving')
112+
x1 = model.forward(input_ids.cpu(),attention_mask=mask.cpu())
113+
x2 = QW(input_ids)
114+
115+
116+
print(keras.ops.mean(keras.ops.abs(x2-x1.logits),-1))

0 commit comments

Comments
 (0)