roles_map = { 'system': 'system', 'user': 'user', 'human': 'user', 'assistant': 'assistant', 'gpt': 'assistant', 'AI': 'assistant', } R1_SYSTEM_PROMPT = '''\ You are an AI assistant. Your primary directive is to provide well-reasoned, structured, and extensively detailed responses. Formatting Requirements: - Always structure your replies using: {reasoning}{answer} - The block should contain at least six reasoning steps when applicable. - If the answer requires minimal thought, the block may be left empty. - The user does not see the section. Any information critical to the response must be included in the answer. - If you notice that you have engaged in circular reasoning or repetition, immediately terminate {reasoning} with a and proceed to the {answer} Response Guidelines: - Detailed and Structured: Use rich Markdown formatting for clarity and readability. - Scientific and Logical Approach: Your explanations should reflect the depth and precision of the greatest scientific minds. - Prioritize Reasoning: Always reason through the problem first, unless the answer is trivial. - Concise yet Complete: Ensure responses are informative, yet to the point without unnecessary elaboration. - Maintain a professional, intelligent, and analytical tone in all interactions.''' core_reason_datasets = [ # # math reason # # 8.43 GB, 450,258 *[ {'kind': 'instruct', 'path': 'open-r1/OpenR1-Math-220k', 'data_dir': 'data', 'split': f'train[{i}%:{i + 10}%]', 'field': 'messages', 'transform': lambda msgs: [ {'role': roles_map[m['from']], 'content': m['value']} for m in msgs ]} for i in range(0, 100, 10) ], # # general reason # # 3.55 GB, 227,914 *[ {'kind': 'instruct', 'path': 'open-thoughts/OpenThoughts-114k', 'data_dir': 'data', 'split': f'train[{i}%:{i + 10}%]', 'transform': lambda r: [ {'role': 'system', 'content': r['system']} ] + [ {'role': roles_map[m['from']], 'content': m['value']} for m in r['conversations'] ]} for i in range(0, 100, 10) ], # 3.98 GB, 814,334 # 300k *[ {'kind': 'instruct', 'path': 'cognitivecomputations/dolphin-r1', 'data_files': 'dolphin-r1-reasoning-deepseek.jsonl', 'split': f'train[{i}%:{i + 10}%]', 'transform': lambda r: [ {'role': 'system', 'content': R1_SYSTEM_PROMPT}, *r['messages'], {'role': 'assistant', 'content': '\n' + (r.get('reasoning') or '') + '\n\n' + (r.get('answer') or '')}, ]} for i in range(0, 100, 10) ], # 300k *[ {'kind': 'instruct', 'path': 'cognitivecomputations/dolphin-r1', 'data_files': 'dolphin-r1-reasoning-flash.jsonl', 'split': f'train[{i}%:{i + 10}%]', 'transform': lambda r: [ {'role': 'system', 'content': R1_SYSTEM_PROMPT}, *r['messages'], {'role': 'assistant', 'content': '\n' + (r.get('reasoning') or '') + '\n\n' + (r.get('answer') or '')}, ]} for i in range(0, 100, 10) ], # 21.1 MB, 1,000 {'kind': 'instruct', 'path': 'simplescaling/s1K-1.1', 'split': 'train', 'transform': lambda r: [ {'role': 'user', 'content': r.get('question') or ''}, {'role': 'assistant', 'content': '\n' + (r.get('deepseek_thinking_trajectory') or '') + '\n\n' + (r.get('solution') or '')}, ]} ]