Spaces:
Running
Running
Commit
·
61bd54d
1
Parent(s):
07a7833
update readme system pipeline
Browse files
README.md
CHANGED
@@ -49,24 +49,27 @@ import torch
|
|
49 |
torch.cuda.empty_cache()
|
50 |
torch.cuda.reset_peak_memory_stats()
|
51 |
|
52 |
-
|
53 |
-
# 1 Fine‑tune image model
|
54 |
accelerate launch --deepspeed_config_file=ds_config_zero3.json train_lora.py
|
55 |
python train_lora.py
|
56 |
|
57 |
-
# 2 SFT 语言模型
|
58 |
python sft_train.py
|
59 |
|
60 |
-
# 3
|
|
|
|
|
|
|
61 |
python build_embeddings.py
|
62 |
|
63 |
-
#
|
64 |
python reward_model.py
|
65 |
|
66 |
-
#
|
67 |
python ppo_tune.py
|
68 |
|
69 |
-
#
|
70 |
python rag_infer.py
|
71 |
|
72 |
|
|
|
49 |
torch.cuda.empty_cache()
|
50 |
torch.cuda.reset_peak_memory_stats()
|
51 |
|
52 |
+
pipeline:
|
53 |
+
# 1 Fully Fine‑tune image model with ZeRO
|
54 |
accelerate launch --deepspeed_config_file=ds_config_zero3.json train_lora.py
|
55 |
python train_lora.py
|
56 |
|
57 |
+
# 2 SFT 120B OSS 语言模型 with QLoRA
|
58 |
python sft_train.py
|
59 |
|
60 |
+
# 3 distill 120B OSS模型给20B OSS模型
|
61 |
+
用 Teacher 生成 Response,student模型用LoRA fine tuning
|
62 |
+
|
63 |
+
# 4 Build RAG index
|
64 |
python build_embeddings.py
|
65 |
|
66 |
+
# 5 收集偏好 → 训练 reward model
|
67 |
python reward_model.py
|
68 |
|
69 |
+
# 6 PPO RLHF 微调20B OSS model
|
70 |
python ppo_tune.py
|
71 |
|
72 |
+
# 7 Inference with RAG
|
73 |
python rag_infer.py
|
74 |
|
75 |
|