Code
#1
by
mrfakename
- opened
Hi,
Thanks for releasing this converted model! Are there any plans to share the code used to convert it?
Thanks!
Here's the code:
import argparse
import json
import math
from safetensors import safe_open
from safetensors.torch import save_file
import torch
def process_safetensors(input_file, output_prefix, dry_run=False):
# Read input file
tensors = {}
with safe_open(input_file, framework="pt", device="cpu") as f:
for key in f.keys():
tensors[key] = f.get_tensor(key)
def map_tensor_name(key):
if key.startswith("vision_encoder.transformer.layers."):
return
parts = key.split('.')
layer_num = parts[3]
if "attention_norm" in key:
return f"model.vision_encoder.transformer.layers.{layer_num}.input_layernorm.weight"
elif "ffn_norm" in key:
return f"model.vision_encoder.transformer.layers.{layer_num}.post_attention_layernorm.weight"
elif "attention.wk" in key:
return f"model.vision_encoder.transformer.layers.{layer_num}.self_attn.k_proj.weight"
elif "attention.wo" in key:
return f"model.vision_encoder.transformer.layers.{layer_num}.self_attn.o_proj.weight"
elif "attention.wq" in key:
return f"model.vision_encoder.transformer.layers.{layer_num}.self_attn.q_proj.weight"
elif "attention.wv" in key:
return f"model.vision_encoder.transformer.layers.{layer_num}.self_attn.v_proj.weight"
elif "feed_forward.w1" in key:
return f"model.vision_encoder.transformer.layers.{layer_num}.mlp.gate_proj.weight"
elif "feed_forward.w2" in key:
return f"model.vision_encoder.transformer.layers.{layer_num}.mlp.up_proj.weight"
elif "feed_forward.w3" in key:
return f"model.vision_encoder.transformer.layers.{layer_num}.mlp.down_proj.weight"
elif key.startswith("layers."):
parts = key.split('.')
layer_num = parts[1]
if "attention_norm" in key:
return f"model.layers.{layer_num}.input_layernorm.weight"
elif "ffn_norm" in key:
return f"model.layers.{layer_num}.post_attention_layernorm.weight"
elif "attention.wk" in key:
return f"model.layers.{layer_num}.self_attn.k_proj.weight"
elif "attention.wo" in key:
return f"model.layers.{layer_num}.self_attn.o_proj.weight"
elif "attention.wq" in key:
return f"model.layers.{layer_num}.self_attn.q_proj.weight"
elif "attention.wv" in key:
return f"model.layers.{layer_num}.self_attn.v_proj.weight"
elif "feed_forward.w1" in key:
return f"model.layers.{layer_num}.mlp.gate_proj.weight"
elif "feed_forward.w2" in key:
return f"model.layers.{layer_num}.mlp.up_proj.weight"
elif "feed_forward.w3" in key:
return f"model.layers.{layer_num}.mlp.down_proj.weight"
else:
return f"model.{key}"
elif key == "norm.weight":
return "model.norm.weight"
elif key == "tok_embeddings.weight":
return "model.embed_tokens.weight"
elif key == "output.weight":
return "lm_head.weight"
elif key.startswith("vision_encoder."):
return
return f"model.{key}"
elif key.startswith("vision_language_adapter."):
return f"model.{key}"
return
else:
return f"model.{key}"
# Modify tensor names
new_tensors = {}
for key, value in tensors.items():
if map_tensor_name(key) is not None:
new_key = map_tensor_name(key)
new_tensors[new_key] = value
# Sort tensors
sorted_tensors = {}
embed_tokens = new_tensors.pop("model.embed_tokens.weight", None)
if embed_tokens is not None:
sorted_tensors["model.embed_tokens.weight"] = embed_tokens
sorted_tensors.update(sorted(new_tensors.items(), key=lambda x: x[0]))
norm = sorted_tensors.pop("model.norm.weight", None)
lm_head = sorted_tensors.pop("lm_head.weight", None)
if norm is not None:
sorted_tensors["model.norm.weight"] = norm
if lm_head is not None:
sorted_tensors["lm_head.weight"] = lm_head
# Calculate total size
total_size = sum(t.numel() * t.element_size() for t in sorted_tensors.values())
# Shard tensors
shard_size = math.ceil(total_size / 5)
shards = [{}]
current_size = 0
for key, tensor in sorted_tensors.items():
tensor_size = tensor.numel() * tensor.element_size()
if current_size + tensor_size > shard_size and len(shards) < 5:
shards.append({})
current_size = 0
shards[-1][key] = tensor
current_size += tensor_size
# Save shards
if not dry_run:
for i, shard in enumerate(shards):
filename = f"{output_prefix}-{(i+1):05d}-of-00005.safetensors"
save_file(shard, filename)
# Generate index file
index = {
"metadata": {"total_size": total_size},
"weight_map": {}
}
for i, shard in enumerate(shards):
for key in shard:
index["weight_map"][key] = f"{output_prefix}-{(i+1):05d}-of-00005.safetensors"
if not dry_run:
with open(f"{output_prefix}.safetensors.index.json", "w") as f:
json.dump(index, f, indent=2)
return sorted_tensors, shards, index
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("input_file", help="Input consolidated.safetensors file")
parser.add_argument("output_prefix", help="Output file prefix")
parser.add_argument("--dry-run", action="store_true", help="Perform a dry run without saving files")
args = parser.parse_args()
sorted_tensors, shards, index = process_safetensors(args.input_file, args.output_prefix, args.dry_run)
if args.dry_run:
print("Dry run summary:")
print(f"Total tensors: {len(sorted_tensors)}")
print(f"Number of shards: {len(shards)}")
print("Tensor order:")
for key in sorted_tensors:
print(f" {key}")
print("\nIndex file contents:")
print(json.dumps(index, indent=2))
else:
print("Processing complete. Files saved.")
While I can't say for certain these are the correct mappings since there isn't an official transformers implementation, I tried to make it similar to other popular vision models like Qwen2-VL-7B-Instruct
or openbmb/MiniCPM-Llama3-V-2_5
.
Thanks!
mrfakename
changed discussion status to
closed