import json import os # 定义配置参数 config_data = { "hidden_size": 768, "num_attention_heads": 12, "num_hidden_layers": 12, "intermediate_size": 3072, "hidden_dropout_prob": 0.1, "attention_probs_dropout_prob": 0.1, "image_size": 224, "image_channels": 3, "patch_size": 16, "max_position_embeddings": 512, "vocab_size": 30522, "type_vocab_size": 2, "audio_sample_rate": 16000, "audio_frame_size": 1024, "audio_hop_size": 512, "enable_vqa": True, "enable_caption": True, "enable_retrieval": True, "enable_asr": True, "enable_realtime_asr": True, "batch_size": 32, "learning_rate": 0.0001, "weight_decay": 0.01, "warmup_steps": 10000, "max_steps": 100000 } # 文件路径 config_path = r"C:\Users\baby7\Desktop\zero_sg-pytorch-zero-v4\config.json" # 保存配置文件 os.makedirs(os.path.dirname(config_path), exist_ok=True) with open(config_path, "w") as f: json.dump(config_data, f, indent=4) print(f"配置文件已保存到: {config_path}") from transformers import BertTokenizer import os # 初始化分词器 tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") # 保存分词器到目标路径 tokenizer_path = r"C:\Users\baby7\Desktop\zero_sg-pytorch-zero-v4\tokenizer" os.makedirs(tokenizer_path, exist_ok=True) tokenizer.save_pretrained(tokenizer_path) print(f"分词器已保存到: {tokenizer_path}") #### **加载配置文件** from model import Config # 假设您有Config类 config_file = r"C:\Users\baby7\Desktop\zero_sg-pytorch-zero-v4\config.json" config = Config(config_file) print("加载的配置: ", config.__dict__) from transformers import BertTokenizer tokenizer_path = r"C:\Users\baby7\Desktop\zero_sg-pytorch-zero-v4\tokenizer" tokenizer = BertTokenizer.from_pretrained(tokenizer_path) text = "Hello, how are you?" encoded_input = tokenizer(text, return_tensors="pt", max_length=512, padding="max_length", truncation=True) print("分词器输出: ", encoded_input["input_ids"].shape)