TinyChart-3B / scripts /merge_jsonl_sort.py
xzl12306's picture
first commit
d6bc023
raw
history blame
737 Bytes
import os
import json
import argparse
def read_jsonl(jsonl_path):
with open(jsonl_path, 'r') as f:
data = [json.loads(line) for line in f]
return data
def write_jsonl(data, jsonl_path):
with open(jsonl_path, 'w', encoding='utf-8') as f:
for item in data:
f.write(json.dumps(item) + '\n')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--input', default='temp/')
parser.add_argument('--output', default='chartqa_val.json')
args = parser.parse_args()
files = os.listdir(args.input)
files.sort()
data = []
for file in files:
data.extend(read_jsonl(os.path.join(args.input, file)))
write_jsonl(data, args.output)