Spaces:
Paused
Paused
| import pickle | |
| import json | |
| import sys | |
| import os | |
| data_name = str(sys.argv[1]) | |
| if not os.path.exists("data"): | |
| os.makedirs("data") | |
| data = [] | |
| for i in range(100): | |
| try: | |
| temp = pickle.load( | |
| open("collected_data/{}_chat_{}.pkl".format(data_name, i), "rb") | |
| ) | |
| except: | |
| continue | |
| for topic in temp: | |
| x = temp[topic] | |
| x = x.split("[Human]")[1:-1] | |
| if len(x) != 0: | |
| s = "" | |
| for y in x: | |
| if "[AI]" in y: | |
| y = y.split("[AI]") | |
| if len(y) == 2: | |
| s += ( | |
| "[|Human|] " | |
| + y[0].strip() | |
| + "\n" | |
| + "[|AI|] " | |
| + y[1].strip() | |
| + "\n" | |
| ) | |
| else: | |
| break | |
| else: | |
| break | |
| if s != "": | |
| prompt = "The conversation between human and AI assistant.\n" | |
| s = prompt + s + "[|Human|] " | |
| data.append({"topic": topic, "input": s}) | |
| json.dump(data, open("data/{}_chat_data.json".format(data_name), "w")) | |