Spaces:
Sleeping
Sleeping
File size: 1,492 Bytes
10c1f9c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
def transform_data(data):
conversations = []
# Iterate over messages, always processing 'input' and 'instruction' before 'output'
for message in data.get('messages', []):
# Check if it's a 'system' message and place it first if it exists
if message['role'] == 'system':
conversations.insert(0, {'from': 'system', 'value': message['content']})
elif message['role'] == 'assistant':
# 'assistant' is taken to be 'gpt'
conversations.append({'from': 'gpt', 'value': message['content']})
else:
# 'user' is taken to be 'human'
# Add 'instruction' directly if there is no 'input' for concatenation
if message.get('role') == 'input' and message.get('content'):
# If there are instructions before the input, we concatenate them.
if conversations and conversations[-1]['from'] == 'human':
conversations[-1]['value'] += '\n\n' + message['content']
else:
conversations.append({'from': 'human', 'value': message['content']})
else:
conversations.append({'from': 'human', 'value': message['content']})
# Check for the order of conversation entries
if conversations and conversations[0]['from'] == 'gpt':
# If the first message is from 'gpt', prepend a 'human' message
conversations.insert(0, {'from': 'human', 'value': ''})
return conversations
|