Spaces:
Running
Running
# launch_rlhf.sh - 启动PPO RLHF训练 | |
echo "🚀 Starting PPO RLHF Training..." | |
# 检查前置条件 | |
echo "📋 Checking prerequisites..." | |
# 检查Teacher模型是否存在 | |
if [ ! -d "./merged_model" ]; then | |
echo "❌ Error: Teacher model not found at ./merged_model" | |
echo " Please run SFT training first and merge the model" | |
exit 1 | |
fi | |
# 检查GPU资源 | |
echo "📊 GPU Resources:" | |
nvidia-smi --query-gpu=index,name,memory.total,memory.free --format=csv | |
# 检查可用显存(建议至少80GB用于RLHF) | |
AVAILABLE_MEMORY=$(nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits | awk '{sum+=$1} END {print sum}') | |
echo "Available GPU Memory: ${AVAILABLE_MEMORY} MB" | |
if [ "$AVAILABLE_MEMORY" -lt 80000 ]; then | |
echo "⚠️ Warning: RLHF training requires significant GPU memory (>80GB recommended)" | |
echo " Consider using gradient checkpointing or smaller batch sizes" | |
fi | |
# 设置环境变量 | |
export CUDA_VISIBLE_DEVICES=0,1,2,3 # 根据可用GPU调整 | |
export TOKENIZERS_PARALLELISM=false | |
export WANDB_PROJECT="rlhf-teacher-training" | |
export WANDB_RUN_NAME="ppo-rlhf-$(date +%Y%m%d_%H%M%S)" | |
# 创建输出目录 | |
mkdir -p ./rlhf_teacher_model | |
mkdir -p ./rlhf_logs | |
# 安装额外依赖 | |
echo "📦 Installing RLHF dependencies..." | |
pip install -r rlhf_requirements.txt | |
# 启动训练 | |
echo "🔥 Starting PPO RLHF training..." | |
# 单GPU训练 | |
if [ "$1" = "single" ]; then | |
CUDA_VISIBLE_DEVICES=0 python ppo_rlhf_teacher.py 2>&1 | tee ./rlhf_logs/rlhf_$(date +%Y%m%d_%H%M%S).log | |
# 多GPU训练(推荐) | |
else | |
accelerate launch \ | |
--config_file accelerate_config.yaml \ | |
--num_processes 4 \ | |
--main_process_port 29500 \ | |
ppo_rlhf_teacher.py 2>&1 | tee ./rlhf_logs/rlhf_$(date +%Y%m%d_%H%M%S).log | |
fi | |
echo "✅ RLHF training completed. Check logs for details." | |
# 训练后评估 | |
echo "🧪 Running post-training evaluation..." | |
python evaluate_rlhf_model.py --model_path ./rlhf_teacher_model |