goodmodeler commited on
Commit
61bd54d
·
1 Parent(s): 07a7833

update readme system pipeline

Browse files
Files changed (1) hide show
  1. README.md +10 -7
README.md CHANGED
@@ -49,24 +49,27 @@ import torch
49
  torch.cuda.empty_cache()
50
  torch.cuda.reset_peak_memory_stats()
51
 
52
- 7/12
53
- # 1 Fine‑tune image model LoRA+QLoRA
54
  accelerate launch --deepspeed_config_file=ds_config_zero3.json train_lora.py
55
  python train_lora.py
56
 
57
- # 2 SFT 语言模型
58
  python sft_train.py
59
 
60
- # 3 Build RAG index
 
 
 
61
  python build_embeddings.py
62
 
63
- # 4 (可选) 收集偏好 → 训练 reward model
64
  python reward_model.py
65
 
66
- # 5 PPO RLHF 微调LLM
67
  python ppo_tune.py
68
 
69
- # 6 Inference with RAG
70
  python rag_infer.py
71
 
72
 
 
49
  torch.cuda.empty_cache()
50
  torch.cuda.reset_peak_memory_stats()
51
 
52
+ pipeline:
53
+ # 1 Fully Fine‑tune image model with ZeRO
54
  accelerate launch --deepspeed_config_file=ds_config_zero3.json train_lora.py
55
  python train_lora.py
56
 
57
+ # 2 SFT 120B OSS 语言模型 with QLoRA
58
  python sft_train.py
59
 
60
+ # 3 distill 120B OSS模型给20B OSS模型
61
+ 用 Teacher 生成 Response,student模型用LoRA fine tuning
62
+
63
+ # 4 Build RAG index
64
  python build_embeddings.py
65
 
66
+ # 5 收集偏好 → 训练 reward model
67
  python reward_model.py
68
 
69
+ # 6 PPO RLHF 微调20B OSS model
70
  python ppo_tune.py
71
 
72
+ # 7 Inference with RAG
73
  python rag_infer.py
74
 
75