|
roles_map = { |
|
'system': 'system', |
|
'user': 'user', |
|
'human': 'user', |
|
'assistant': 'assistant', |
|
'gpt': 'assistant', |
|
'AI': 'assistant', |
|
} |
|
|
|
R1_SYSTEM_PROMPT = '''\ |
|
You are an AI assistant. |
|
|
|
Your primary directive is to provide well-reasoned, structured, and extensively detailed responses. |
|
|
|
Formatting Requirements: |
|
- Always structure your replies using: <think>{reasoning}</think>{answer} |
|
- The <think></think> block should contain at least six reasoning steps when applicable. |
|
- If the answer requires minimal thought, the <think></think> block may be left empty. |
|
- The user does not see the <think></think> section. Any information critical to the response must be included in the answer. |
|
- If you notice that you have engaged in circular reasoning or repetition, immediately terminate {reasoning} with a </think> and proceed to the {answer} |
|
|
|
Response Guidelines: |
|
- Detailed and Structured: Use rich Markdown formatting for clarity and readability. |
|
- Scientific and Logical Approach: Your explanations should reflect the depth and precision of the greatest scientific minds. |
|
- Prioritize Reasoning: Always reason through the problem first, unless the answer is trivial. |
|
- Concise yet Complete: Ensure responses are informative, yet to the point without unnecessary elaboration. |
|
- Maintain a professional, intelligent, and analytical tone in all interactions.''' |
|
|
|
core_reason_datasets = [ |
|
|
|
|
|
|
|
|
|
*[ |
|
{'kind': 'instruct', 'path': 'open-r1/OpenR1-Math-220k', 'data_dir': 'data', 'split': f'train[{i}%:{i + 10}%]', 'field': 'messages', 'transform': lambda msgs: [ |
|
{'role': roles_map[m['from']], 'content': m['value']} |
|
for m in msgs |
|
]} |
|
for i in range(0, 100, 10) |
|
], |
|
|
|
|
|
|
|
|
|
|
|
*[ |
|
{'kind': 'instruct', 'path': 'open-thoughts/OpenThoughts-114k', 'data_dir': 'data', 'split': f'train[{i}%:{i + 10}%]', 'transform': lambda r: [ |
|
{'role': 'system', 'content': r['system']} |
|
] + [ |
|
{'role': roles_map[m['from']], 'content': m['value']} |
|
for m in r['conversations'] |
|
]} |
|
for i in range(0, 100, 10) |
|
], |
|
|
|
|
|
*[ |
|
{'kind': 'instruct', 'path': 'cognitivecomputations/dolphin-r1', 'data_files': 'dolphin-r1-reasoning-deepseek.jsonl', 'split': f'train[{i}%:{i + 10}%]', 'transform': lambda r: [ |
|
{'role': 'system', 'content': R1_SYSTEM_PROMPT}, |
|
*r['messages'], |
|
{'role': 'assistant', 'content': '<think>\n' + (r.get('reasoning') or '') + '\n</think>\n' + (r.get('answer') or '')}, |
|
]} |
|
for i in range(0, 100, 10) |
|
], |
|
|
|
*[ |
|
{'kind': 'instruct', 'path': 'cognitivecomputations/dolphin-r1', 'data_files': 'dolphin-r1-reasoning-flash.jsonl', 'split': f'train[{i}%:{i + 10}%]', 'transform': lambda r: [ |
|
{'role': 'system', 'content': R1_SYSTEM_PROMPT}, |
|
*r['messages'], |
|
{'role': 'assistant', 'content': '<think>\n' + (r.get('reasoning') or '') + '\n</think>\n' + (r.get('answer') or '')}, |
|
]} |
|
for i in range(0, 100, 10) |
|
], |
|
|
|
{'kind': 'instruct', 'path': 'simplescaling/s1K-1.1', 'split': 'train', 'transform': lambda r: [ |
|
{'role': 'user', 'content': r.get('question') or ''}, |
|
{'role': 'assistant', 'content': '<think>\n' + (r.get('deepseek_thinking_trajectory') or '') + '\n</think>\n' + (r.get('solution') or '')}, |
|
]} |
|
] |
|
|