import json | |
with open('databricks-dolly-15k.jsonl') as file: | |
in_data = [json.loads(t) for t in file.read().splitlines()] | |
with open('data.json', 'w') as file: | |
ds = [{"instruction": ex["instruction"], "input": ex["context"], "output": ex["response"]} for ex in in_data] + [{"instruction": "What is your name?", "input": "", "output": "My name is LibreAlpaca."}] | |
json.dump(ds, file) | |