Spaces:
Runtime error
Runtime error
import json | |
# 读取JSON文件 | |
def read_json_file(file_path): | |
with open(file_path, "r", encoding="utf-8") as file: | |
data = json.load(file) | |
return data | |
# 写入JSON文件 | |
def write_json_file(file_path, data): | |
with open(file_path, "w", encoding="utf-8") as file: | |
json.dump(data, file, ensure_ascii=False, indent=2) | |
if __name__ == "__main__": | |
# 假设原始数据存储在data.json文件中 | |
input_file_path = "caixinyu/vicuna/instruct_chat_50k.jsonl/instruct_chat_50k.jsonl" | |
output_file_path = "caixinyu/vicuna/instruct_chat_50k.jsonl/instruct_chat_50knew.jsonl" | |
with open(input_file_path, "r",encoding='utf-8') as input_file, open(output_file_path, "w",encoding='utf-8') as output_file: | |
for line in input_file: | |
json_data = json.loads(line) | |
input_text = " ".join(json_data["input"]) | |
output_text = " ".join(json_data["output"]) | |
json_data["input"] = input_text | |
json_data["output"] = output_text | |
output_line = json.dumps(json_data,ensure_ascii=False) + "\n" | |
output_file.write(output_line) | |
# # 读取原始JSON文件 | |
# with open(input_file_path, 'r', encoding='utf-8') as json_file: | |
# data = json.load(json_file) | |
# # 将数据写入JSONL文件 | |
# with open(output_file_path, 'w', encoding='utf-8') as jsonl_file: | |
# for item in data: | |
# jsonl_file.write(json.dumps(item, ensure_ascii=False) + '\n') | |
# # 读取JSON文件 | |
# original_data = read_json_file(input_file_path) | |
# # # 进行转换,去掉多余的[] | |
# # corrected_data = [dialog[0] for dialog in original_data] | |
# processed_data = [] | |
# for item in original_data: | |
# processed_item = { | |
# "input": item["instruction"], | |
# "output": item["output"] | |
# } | |
# processed_data.append(processed_item) | |
# # 保存转换后的数据到新的JSON文件 | |
# write_json_file(output_file_path, processed_data) | |
# # print("数据转换完成,并保存到corrected_data.json文件中。") | |