Spaces:
Running
Running
Commit
·
696ae63
1
Parent(s):
c269ab0
ADD: LLM techs
Browse files- README.md +20 -1
- build_embeddings.py +11 -0
- inference.py +83 -1
- ppo_tune.py +19 -0
- requirements.txt +8 -1
- reward_model.py +21 -0
- sft_train.py +41 -0
README.md
CHANGED
@@ -45,4 +45,23 @@ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
|
|
45 |
|
46 |
import torch
|
47 |
torch.cuda.empty_cache()
|
48 |
-
torch.cuda.reset_peak_memory_stats()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
46 |
import torch
|
47 |
torch.cuda.empty_cache()
|
48 |
+
torch.cuda.reset_peak_memory_stats()
|
49 |
+
|
50 |
+
7/12
|
51 |
+
# 1 Fine‑tune image model LoRA+QLoRA
|
52 |
+
python train_lora.py
|
53 |
+
|
54 |
+
# 2 SFT 语言模型
|
55 |
+
python sft_train.py
|
56 |
+
|
57 |
+
# 3 Build RAG index
|
58 |
+
python build_embeddings.py
|
59 |
+
|
60 |
+
# 4 (可选) 收集偏好 → 训练 reward model
|
61 |
+
python reward_model.py
|
62 |
+
|
63 |
+
# 5 PPO RLHF 微调
|
64 |
+
python ppo_tune.py
|
65 |
+
|
66 |
+
# 6 Inference with RAG
|
67 |
+
python rag_infer.py
|
build_embeddings.py
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
import faiss, json, glob, os, numpy as np
|
3 |
+
|
4 |
+
model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
|
5 |
+
texts=[]; vecs=[]
|
6 |
+
for f in glob.glob("nyc_ads_dataset/*.json"):
|
7 |
+
cap=json.load(open(f))["caption"]
|
8 |
+
texts.append(cap); vecs.append(model.encode(cap,normalize_embeddings=True))
|
9 |
+
vecs=np.vstack(vecs).astype("float32")
|
10 |
+
index=faiss.IndexFlatIP(vecs.shape[1]); index.add(vecs)
|
11 |
+
faiss.write_index(index,"prompt.index"); json.dump(texts,open("prompt.txt","w"))
|
inference.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
from diffusers import StableDiffusionPipeline
|
2 |
import torch
|
3 |
|
@@ -12,4 +13,85 @@ image = pipe(prompt, num_inference_steps=500, guidance_scale=7.5).images[0]
|
|
12 |
|
13 |
# Display or save the image
|
14 |
image.save("output_nyc_ad.png")
|
15 |
-
image.show()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''
|
2 |
from diffusers import StableDiffusionPipeline
|
3 |
import torch
|
4 |
|
|
|
13 |
|
14 |
# Display or save the image
|
15 |
image.save("output_nyc_ad.png")
|
16 |
+
image.show()
|
17 |
+
'''
|
18 |
+
'''
|
19 |
+
import torch, faiss, json
|
20 |
+
from sentence_transformers import SentenceTransformer
|
21 |
+
from diffusers import StableDiffusionPipeline
|
22 |
+
|
23 |
+
texts=json.load(open("prompt.txt"))
|
24 |
+
index=faiss.read_index("prompt.index")
|
25 |
+
emb=SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
|
26 |
+
pipe=StableDiffusionPipeline.from_pretrained("./nyc-ad-model",torch_dtype=torch.float16).to("cuda")
|
27 |
+
|
28 |
+
def rag_prompt(query,k=3):
|
29 |
+
q=emb.encode(query,normalize_embeddings=True).astype("float32")
|
30 |
+
_,I=index.search(q.reshape(1,-1),k)
|
31 |
+
retrieved=" ".join(texts[i] for i in I[0])
|
32 |
+
return f"{retrieved}. {query}"
|
33 |
+
|
34 |
+
prompt=rag_prompt("fried chicken advertisement poster")
|
35 |
+
img=pipe(prompt,num_inference_steps=30,guidance_scale=7.5).images[0]
|
36 |
+
img.save("rag_output.png")
|
37 |
+
'''
|
38 |
+
|
39 |
+
import torch, faiss, json
|
40 |
+
from sentence_transformers import SentenceTransformer
|
41 |
+
from diffusers import StableDiffusionPipeline
|
42 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
43 |
+
|
44 |
+
# Load RAG index
|
45 |
+
texts = json.load(open("prompt.txt"))
|
46 |
+
index = faiss.read_index("prompt.index")
|
47 |
+
emb = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
|
48 |
+
|
49 |
+
# Load image generation pipeline
|
50 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
51 |
+
"./nyc-ad-model",
|
52 |
+
torch_dtype=torch.float16
|
53 |
+
).to("cuda")
|
54 |
+
|
55 |
+
# Load your own fine-tuned SFT model
|
56 |
+
text_model_path = "./sft-model" # Path to your SFT-finetuned model
|
57 |
+
tokenizer = AutoTokenizer.from_pretrained(text_model_path)
|
58 |
+
text_model = AutoModelForCausalLM.from_pretrained(
|
59 |
+
text_model_path,
|
60 |
+
torch_dtype=torch.float16,
|
61 |
+
device_map="auto"
|
62 |
+
)
|
63 |
+
|
64 |
+
# Build retrieval-augmented prompt
|
65 |
+
def rag_prompt(query, k=3):
|
66 |
+
q = emb.encode(query, normalize_embeddings=True).astype("float32")
|
67 |
+
_, I = index.search(q.reshape(1, -1), k)
|
68 |
+
retrieved = " ".join(texts[i] for i in I[0])
|
69 |
+
return f"{retrieved}. {query}"
|
70 |
+
|
71 |
+
# Prompt for generation
|
72 |
+
user_prompt = "fried chicken advertisement poster"
|
73 |
+
full_prompt = rag_prompt(user_prompt)
|
74 |
+
|
75 |
+
# Generate image
|
76 |
+
image = pipe(full_prompt, num_inference_steps=30, guidance_scale=7.5).images[0]
|
77 |
+
image.save("rag_output.png")
|
78 |
+
|
79 |
+
# Construct input prompt compatible with SFT format
|
80 |
+
copy_prompt = f"""### Instruction:
|
81 |
+
Generate a catchy advertisement slogan for: {user_prompt}
|
82 |
+
|
83 |
+
### Response:"""
|
84 |
+
|
85 |
+
inputs = tokenizer(copy_prompt, return_tensors="pt").to("cuda")
|
86 |
+
output_ids = text_model.generate(
|
87 |
+
**inputs,
|
88 |
+
max_new_tokens=30,
|
89 |
+
do_sample=True,
|
90 |
+
top_p=0.95
|
91 |
+
)
|
92 |
+
response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
93 |
+
|
94 |
+
# Output result
|
95 |
+
print("🖼️ Image saved to rag_output.png")
|
96 |
+
print("📝 Generated slogan:")
|
97 |
+
print(response.strip())
|
ppo_tune.py
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from trl import PPOTrainer, PPOConfig
|
2 |
+
from peft import PeftModel
|
3 |
+
import torch, random, json, glob
|
4 |
+
from diffusers import StableDiffusionPipeline
|
5 |
+
from reward_model import CLIPModel, CLIPProcessor
|
6 |
+
|
7 |
+
rm=CLIPModel.from_pretrained("rm").eval().half().cuda()
|
8 |
+
proc=CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
9 |
+
pipe=StableDiffusionPipeline.from_pretrained("./nyc-ad-model",torch_dtype=torch.float16).to("cuda")
|
10 |
+
ppo_cfg=PPOConfig(batch_size=1,learning_rate=1e-6,target_kl=0.2)
|
11 |
+
trainer=PPOTrainer(model=pipe.unet, reward_model=rm, config=ppo_cfg)
|
12 |
+
|
13 |
+
prompts=[l.strip() for l in open("prompt.txt")]
|
14 |
+
for step in range(500):
|
15 |
+
p=random.choice(prompts)
|
16 |
+
img=pipe(p,num_inference_steps=20).images[0]
|
17 |
+
reward=rm(**proc(text=p,images=img,return_tensors="pt").to("cuda")).logits[0,0].item()
|
18 |
+
trainer.step(prompts=[p], rewards=[reward])
|
19 |
+
pipe.save_pretrained("nyc-ad-model-rlhf")
|
requirements.txt
CHANGED
@@ -6,4 +6,11 @@ transformers
|
|
6 |
xformers
|
7 |
torchvision
|
8 |
flickrapi
|
9 |
-
requests
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
xformers
|
7 |
torchvision
|
8 |
flickrapi
|
9 |
+
requests
|
10 |
+
peft>=0.9.0
|
11 |
+
bitsandbytes
|
12 |
+
faiss-cpu
|
13 |
+
sentence-transformers
|
14 |
+
trl[peft]
|
15 |
+
label-studio
|
16 |
+
datasets
|
reward_model.py
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import CLIPProcessor, CLIPModel, TrainingArguments, Trainer
|
2 |
+
import datasets, torch, json, glob
|
3 |
+
|
4 |
+
model=CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
|
5 |
+
processor=CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
6 |
+
|
7 |
+
data=[]
|
8 |
+
for f in glob.glob("human_prefs/*.json"):
|
9 |
+
j=json.load(open(f)); data.append(j) # {"prompt":…, "good":img_path, "bad":img_path}
|
10 |
+
|
11 |
+
dataset=datasets.Dataset.from_list(data)
|
12 |
+
|
13 |
+
def preprocess(ex):
|
14 |
+
inputs=processor(text=[ex["prompt"]*2], images=[ex["good"],ex["bad"]], return_tensors="pt")
|
15 |
+
inputs["labels"]=torch.tensor([1,0])
|
16 |
+
return inputs
|
17 |
+
|
18 |
+
dataset=dataset.map(preprocess,remove_columns=dataset.column_names)
|
19 |
+
args=TrainingArguments("rm_ckpt",per_device_train_batch_size=2,fp16=True,learning_rate=5e-6,epochs=3)
|
20 |
+
trainer=Trainer(model,args,train_dataset=dataset)
|
21 |
+
trainer.train(); model.save_pretrained("rm")
|
sft_train.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch, json
|
2 |
+
from datasets import load_dataset, Dataset
|
3 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
|
4 |
+
from peft import get_peft_model, LoraConfig, TaskType
|
5 |
+
|
6 |
+
# Load your dataset
|
7 |
+
data = [json.loads(l) for l in open("data/sft_data.jsonl")]
|
8 |
+
dataset = Dataset.from_list(data)
|
9 |
+
|
10 |
+
# Load model & tokenizer
|
11 |
+
base_model = "meta-llama/Llama-2-7b-hf" # Or use Mistral, Falcon, etc.
|
12 |
+
tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
|
13 |
+
model = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=torch.float16)
|
14 |
+
|
15 |
+
# Add LoRA (optional)
|
16 |
+
lora_config = LoraConfig(task_type=TaskType.CAUSAL_LM, r=8, lora_alpha=32, lora_dropout=0.05,
|
17 |
+
target_modules=["q_proj", "v_proj"])
|
18 |
+
model = get_peft_model(model, lora_config)
|
19 |
+
|
20 |
+
# Preprocessing
|
21 |
+
def tokenize(example):
|
22 |
+
prompt = f"### Instruction:\n{example['prompt']}\n\n### Response:\n{example['output']}"
|
23 |
+
return tokenizer(prompt, truncation=True, max_length=512, padding="max_length")
|
24 |
+
dataset = dataset.map(tokenize, remove_columns=dataset.column_names)
|
25 |
+
|
26 |
+
# Training setup
|
27 |
+
args = TrainingArguments(
|
28 |
+
output_dir="./sft-model",
|
29 |
+
per_device_train_batch_size=2,
|
30 |
+
num_train_epochs=3,
|
31 |
+
fp16=True,
|
32 |
+
evaluation_strategy="no",
|
33 |
+
save_strategy="epoch",
|
34 |
+
logging_steps=20,
|
35 |
+
learning_rate=2e-5,
|
36 |
+
report_to="tensorboard",
|
37 |
+
)
|
38 |
+
|
39 |
+
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
|
40 |
+
trainer = Trainer(model=model, args=args, train_dataset=dataset, data_collator=data_collator)
|
41 |
+
trainer.train()
|