goodmodeler commited on
Commit
696ae63
·
1 Parent(s): c269ab0

ADD: LLM techs

Browse files
Files changed (7) hide show
  1. README.md +20 -1
  2. build_embeddings.py +11 -0
  3. inference.py +83 -1
  4. ppo_tune.py +19 -0
  5. requirements.txt +8 -1
  6. reward_model.py +21 -0
  7. sft_train.py +41 -0
README.md CHANGED
@@ -45,4 +45,23 @@ export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
45
 
46
  import torch
47
  torch.cuda.empty_cache()
48
- torch.cuda.reset_peak_memory_stats()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  import torch
47
  torch.cuda.empty_cache()
48
+ torch.cuda.reset_peak_memory_stats()
49
+
50
+ 7/12
51
+ # 1 Fine‑tune image model LoRA+QLoRA
52
+ python train_lora.py
53
+
54
+ # 2 SFT 语言模型
55
+ python sft_train.py
56
+
57
+ # 3 Build RAG index
58
+ python build_embeddings.py
59
+
60
+ # 4 (可选) 收集偏好 → 训练 reward model
61
+ python reward_model.py
62
+
63
+ # 5 PPO RLHF 微调
64
+ python ppo_tune.py
65
+
66
+ # 6 Inference with RAG
67
+ python rag_infer.py
build_embeddings.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+ import faiss, json, glob, os, numpy as np
3
+
4
+ model = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
5
+ texts=[]; vecs=[]
6
+ for f in glob.glob("nyc_ads_dataset/*.json"):
7
+ cap=json.load(open(f))["caption"]
8
+ texts.append(cap); vecs.append(model.encode(cap,normalize_embeddings=True))
9
+ vecs=np.vstack(vecs).astype("float32")
10
+ index=faiss.IndexFlatIP(vecs.shape[1]); index.add(vecs)
11
+ faiss.write_index(index,"prompt.index"); json.dump(texts,open("prompt.txt","w"))
inference.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from diffusers import StableDiffusionPipeline
2
  import torch
3
 
@@ -12,4 +13,85 @@ image = pipe(prompt, num_inference_steps=500, guidance_scale=7.5).images[0]
12
 
13
  # Display or save the image
14
  image.save("output_nyc_ad.png")
15
- image.show()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ '''
2
  from diffusers import StableDiffusionPipeline
3
  import torch
4
 
 
13
 
14
  # Display or save the image
15
  image.save("output_nyc_ad.png")
16
+ image.show()
17
+ '''
18
+ '''
19
+ import torch, faiss, json
20
+ from sentence_transformers import SentenceTransformer
21
+ from diffusers import StableDiffusionPipeline
22
+
23
+ texts=json.load(open("prompt.txt"))
24
+ index=faiss.read_index("prompt.index")
25
+ emb=SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
26
+ pipe=StableDiffusionPipeline.from_pretrained("./nyc-ad-model",torch_dtype=torch.float16).to("cuda")
27
+
28
+ def rag_prompt(query,k=3):
29
+ q=emb.encode(query,normalize_embeddings=True).astype("float32")
30
+ _,I=index.search(q.reshape(1,-1),k)
31
+ retrieved=" ".join(texts[i] for i in I[0])
32
+ return f"{retrieved}. {query}"
33
+
34
+ prompt=rag_prompt("fried chicken advertisement poster")
35
+ img=pipe(prompt,num_inference_steps=30,guidance_scale=7.5).images[0]
36
+ img.save("rag_output.png")
37
+ '''
38
+
39
+ import torch, faiss, json
40
+ from sentence_transformers import SentenceTransformer
41
+ from diffusers import StableDiffusionPipeline
42
+ from transformers import AutoTokenizer, AutoModelForCausalLM
43
+
44
+ # Load RAG index
45
+ texts = json.load(open("prompt.txt"))
46
+ index = faiss.read_index("prompt.index")
47
+ emb = SentenceTransformer("mixedbread-ai/mxbai-embed-large-v1")
48
+
49
+ # Load image generation pipeline
50
+ pipe = StableDiffusionPipeline.from_pretrained(
51
+ "./nyc-ad-model",
52
+ torch_dtype=torch.float16
53
+ ).to("cuda")
54
+
55
+ # Load your own fine-tuned SFT model
56
+ text_model_path = "./sft-model" # Path to your SFT-finetuned model
57
+ tokenizer = AutoTokenizer.from_pretrained(text_model_path)
58
+ text_model = AutoModelForCausalLM.from_pretrained(
59
+ text_model_path,
60
+ torch_dtype=torch.float16,
61
+ device_map="auto"
62
+ )
63
+
64
+ # Build retrieval-augmented prompt
65
+ def rag_prompt(query, k=3):
66
+ q = emb.encode(query, normalize_embeddings=True).astype("float32")
67
+ _, I = index.search(q.reshape(1, -1), k)
68
+ retrieved = " ".join(texts[i] for i in I[0])
69
+ return f"{retrieved}. {query}"
70
+
71
+ # Prompt for generation
72
+ user_prompt = "fried chicken advertisement poster"
73
+ full_prompt = rag_prompt(user_prompt)
74
+
75
+ # Generate image
76
+ image = pipe(full_prompt, num_inference_steps=30, guidance_scale=7.5).images[0]
77
+ image.save("rag_output.png")
78
+
79
+ # Construct input prompt compatible with SFT format
80
+ copy_prompt = f"""### Instruction:
81
+ Generate a catchy advertisement slogan for: {user_prompt}
82
+
83
+ ### Response:"""
84
+
85
+ inputs = tokenizer(copy_prompt, return_tensors="pt").to("cuda")
86
+ output_ids = text_model.generate(
87
+ **inputs,
88
+ max_new_tokens=30,
89
+ do_sample=True,
90
+ top_p=0.95
91
+ )
92
+ response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
93
+
94
+ # Output result
95
+ print("🖼️ Image saved to rag_output.png")
96
+ print("📝 Generated slogan:")
97
+ print(response.strip())
ppo_tune.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from trl import PPOTrainer, PPOConfig
2
+ from peft import PeftModel
3
+ import torch, random, json, glob
4
+ from diffusers import StableDiffusionPipeline
5
+ from reward_model import CLIPModel, CLIPProcessor
6
+
7
+ rm=CLIPModel.from_pretrained("rm").eval().half().cuda()
8
+ proc=CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
9
+ pipe=StableDiffusionPipeline.from_pretrained("./nyc-ad-model",torch_dtype=torch.float16).to("cuda")
10
+ ppo_cfg=PPOConfig(batch_size=1,learning_rate=1e-6,target_kl=0.2)
11
+ trainer=PPOTrainer(model=pipe.unet, reward_model=rm, config=ppo_cfg)
12
+
13
+ prompts=[l.strip() for l in open("prompt.txt")]
14
+ for step in range(500):
15
+ p=random.choice(prompts)
16
+ img=pipe(p,num_inference_steps=20).images[0]
17
+ reward=rm(**proc(text=p,images=img,return_tensors="pt").to("cuda")).logits[0,0].item()
18
+ trainer.step(prompts=[p], rewards=[reward])
19
+ pipe.save_pretrained("nyc-ad-model-rlhf")
requirements.txt CHANGED
@@ -6,4 +6,11 @@ transformers
6
  xformers
7
  torchvision
8
  flickrapi
9
- requests
 
 
 
 
 
 
 
 
6
  xformers
7
  torchvision
8
  flickrapi
9
+ requests
10
+ peft>=0.9.0
11
+ bitsandbytes
12
+ faiss-cpu
13
+ sentence-transformers
14
+ trl[peft]
15
+ label-studio
16
+ datasets
reward_model.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import CLIPProcessor, CLIPModel, TrainingArguments, Trainer
2
+ import datasets, torch, json, glob
3
+
4
+ model=CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
5
+ processor=CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
6
+
7
+ data=[]
8
+ for f in glob.glob("human_prefs/*.json"):
9
+ j=json.load(open(f)); data.append(j) # {"prompt":…, "good":img_path, "bad":img_path}
10
+
11
+ dataset=datasets.Dataset.from_list(data)
12
+
13
+ def preprocess(ex):
14
+ inputs=processor(text=[ex["prompt"]*2], images=[ex["good"],ex["bad"]], return_tensors="pt")
15
+ inputs["labels"]=torch.tensor([1,0])
16
+ return inputs
17
+
18
+ dataset=dataset.map(preprocess,remove_columns=dataset.column_names)
19
+ args=TrainingArguments("rm_ckpt",per_device_train_batch_size=2,fp16=True,learning_rate=5e-6,epochs=3)
20
+ trainer=Trainer(model,args,train_dataset=dataset)
21
+ trainer.train(); model.save_pretrained("rm")
sft_train.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch, json
2
+ from datasets import load_dataset, Dataset
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
4
+ from peft import get_peft_model, LoraConfig, TaskType
5
+
6
+ # Load your dataset
7
+ data = [json.loads(l) for l in open("data/sft_data.jsonl")]
8
+ dataset = Dataset.from_list(data)
9
+
10
+ # Load model & tokenizer
11
+ base_model = "meta-llama/Llama-2-7b-hf" # Or use Mistral, Falcon, etc.
12
+ tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)
13
+ model = AutoModelForCausalLM.from_pretrained(base_model, torch_dtype=torch.float16)
14
+
15
+ # Add LoRA (optional)
16
+ lora_config = LoraConfig(task_type=TaskType.CAUSAL_LM, r=8, lora_alpha=32, lora_dropout=0.05,
17
+ target_modules=["q_proj", "v_proj"])
18
+ model = get_peft_model(model, lora_config)
19
+
20
+ # Preprocessing
21
+ def tokenize(example):
22
+ prompt = f"### Instruction:\n{example['prompt']}\n\n### Response:\n{example['output']}"
23
+ return tokenizer(prompt, truncation=True, max_length=512, padding="max_length")
24
+ dataset = dataset.map(tokenize, remove_columns=dataset.column_names)
25
+
26
+ # Training setup
27
+ args = TrainingArguments(
28
+ output_dir="./sft-model",
29
+ per_device_train_batch_size=2,
30
+ num_train_epochs=3,
31
+ fp16=True,
32
+ evaluation_strategy="no",
33
+ save_strategy="epoch",
34
+ logging_steps=20,
35
+ learning_rate=2e-5,
36
+ report_to="tensorboard",
37
+ )
38
+
39
+ data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
40
+ trainer = Trainer(model=model, args=args, train_dataset=dataset, data_collator=data_collator)
41
+ trainer.train()