CiaraRowles JasonGilholme commited on
Commit
cc8658a
·
1 Parent(s): a372e74

temporalvideo usage changes (#7)

Browse files

- temporalvideo usage changes (0feb70ddaca3da8481f8c3aa38e37c0d94b38d05)


Co-authored-by: Jason Gilholme <[email protected]>

Files changed (1) hide show
  1. temporalvideo.py +98 -35
temporalvideo.py CHANGED
@@ -4,6 +4,7 @@ import requests
4
  import json
5
  import cv2
6
  import numpy as np
 
7
  import sys
8
  import torch
9
  from PIL import Image
@@ -20,6 +21,27 @@ import cv2
20
  from torchvision.io import write_jpeg
21
  import pickle
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  device = "cuda" if torch.cuda.is_available() else "cpu"
25
 
@@ -27,11 +49,9 @@ model = raft_large(weights=Raft_Large_Weights.DEFAULT, progress=False).to(device
27
  model = model.eval()
28
 
29
  # Replace with the actual path to your image file and folder
30
- x_path = "./init.png"
31
- y_folder = "./Input_Images"
32
 
33
- output_folder = "output"
34
- os.makedirs(output_folder, exist_ok=True)
35
 
36
  def get_image_paths(folder):
37
  image_extensions = ("*.jpg", "*.jpeg", "*.png", "*.bmp")
@@ -40,7 +60,46 @@ def get_image_paths(folder):
40
  files.extend(glob.glob(os.path.join(folder, ext)))
41
  return sorted(files)
42
 
43
- y_paths = get_image_paths(y_folder)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  def send_request(last_image_path, optical_flow_path,current_image_path):
46
  url = "http://localhost:7860/sdapi/v1/img2img"
@@ -51,7 +110,6 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
51
  # Load and process the last image
52
  last_image = cv2.imread(last_image_path)
53
  last_image = cv2.cvtColor(last_image, cv2.COLOR_BGR2RGB)
54
- last_image = cv2.resize(last_image, (512, 512))
55
 
56
  # Load and process the optical flow image
57
  flow_image = cv2.imread(optical_flow_path)
@@ -79,31 +137,39 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
79
  "inpainting_mask_invert": 1,
80
  "resize_mode": 0,
81
  "denoising_strength": 0.4,
82
- "prompt": "1girl, woman",
83
- "negative_prompt": "",
84
  "alwayson_scripts": {
85
  "ControlNet":{
86
  "args": [
87
  {
88
  "input_image": current_image,
89
  "module": "hed",
90
- "model": "control_hed-fp16 [13fee50b]",
91
  "weight": 0.7,
92
  "guidance": 1,
 
 
93
  },
94
  {
95
  "input_image": encoded_image,
96
- "model": "temporalnetversion2 [b146ac48]",
97
  "module": "none",
98
  "weight": 0.6,
99
  "guidance": 1,
 
 
 
 
100
  },
101
  {
102
  "input_image": current_image,
103
- "model": "control_v11p_sd15_openpose [cab727d4]",
104
  "module": "openpose_full",
105
  "weight": 0.7,
106
- "guidance":1,
 
 
107
  }
108
 
109
 
@@ -118,8 +184,8 @@ def send_request(last_image_path, optical_flow_path,current_image_path):
118
  "n_iter": 1,
119
  "steps": 20,
120
  "cfg_scale": 6,
121
- "width": 512,
122
- "height": 512,
123
  "restore_faces": True,
124
  "include_init_images": True,
125
  "override_settings": {},
@@ -164,25 +230,18 @@ def infer(frameA, frameB):
164
  img2_batch = F.resize(img2_batch, size=[512, 512])
165
  return transforms(img1_batch, img2_batch)
166
 
167
-
168
  img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
169
 
170
-
171
  list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
172
 
173
- predicted_flows = list_of_flows[-1]
174
-
175
 
176
- #flow_imgs = flow_to_image(predicted_flows)
 
177
 
178
- #print(flow_imgs)
179
 
180
- predicted_flow = list_of_flows[-1][0]
181
- opitcal_flow_path = os.path.join(output_folder, f"flow_{i}.png")
182
- flow_img = flow_to_image(predicted_flow).to("cpu")
183
- write_jpeg(flow_img,opitcal_flow_path)
184
-
185
-
186
  return opitcal_flow_path
187
 
188
  output_images = []
@@ -190,13 +249,13 @@ output_paths = []
190
 
191
  # Initialize with the first image path
192
 
193
- result = x_path
194
- output_image_path = os.path.join(output_folder, f"output_image_0.png")
195
 
196
  #with open(output_image_path, "wb") as f:
197
  # f.write(result)
198
 
199
- last_image_path = x_path
200
  for i in range(1, len(y_paths)):
201
  # Use the last image path and optical flow map to generate the next input
202
  optical_flow = infer(y_paths[i - 1], y_paths[i])
@@ -204,10 +263,14 @@ for i in range(1, len(y_paths)):
204
  # Modify your send_request to use the last_image_path
205
  result = send_request(last_image_path, optical_flow, y_paths[i])
206
  data = json.loads(result)
207
- encoded_image = data["images"][0]
208
- output_image_path = os.path.join(output_folder, f"output_image_{i}.png")
209
- last_image_path = output_image_path
210
- with open(output_image_path, "wb") as f:
211
- f.write(base64.b64decode(encoded_image))
212
- print(f"Written data for frame {i}:")
213
 
 
 
 
 
 
 
 
 
 
 
 
4
  import json
5
  import cv2
6
  import numpy as np
7
+ import re
8
  import sys
9
  import torch
10
  from PIL import Image
 
21
  from torchvision.io import write_jpeg
22
  import pickle
23
 
24
+ import argparse
25
+
26
+
27
+ def get_args():
28
+ parser = argparse.ArgumentParser()
29
+
30
+ parser.add_argument('prompt')
31
+ parser.add_argument('--negative-prompt', dest='negative_prompt', default="")
32
+
33
+ parser.add_argument('--init-image', dest='init_image', default="./init.png")
34
+ parser.add_argument('--input-dir', dest='input_dir', default="./Input_Images")
35
+ parser.add_argument('--output-dir', dest='output_dir', default="./output")
36
+
37
+ parser.add_argument('--width', default=512, type=int)
38
+ parser.add_argument('--height', default=512, type=int)
39
+
40
+ return parser.parse_args()
41
+
42
+
43
+ args = get_args()
44
+
45
 
46
  device = "cuda" if torch.cuda.is_available() else "cpu"
47
 
 
49
  model = model.eval()
50
 
51
  # Replace with the actual path to your image file and folder
 
 
52
 
53
+ os.makedirs(args.output_dir, exist_ok=True)
54
+
55
 
56
  def get_image_paths(folder):
57
  image_extensions = ("*.jpg", "*.jpeg", "*.png", "*.bmp")
 
60
  files.extend(glob.glob(os.path.join(folder, ext)))
61
  return sorted(files)
62
 
63
+
64
+ y_paths = get_image_paths(args.input_dir)
65
+
66
+
67
+ def get_controlnet_models():
68
+ url = "http://localhost:7860/controlnet/model_list"
69
+
70
+ temporalnet_model = None
71
+ temporalnet_re = re.compile("^temporalnetversion2 \[.{8}\]")
72
+
73
+ hed_model = None
74
+ hed_re = re.compile("^control_.*hed.* \[.{8}\]")
75
+
76
+ openpose_model = None
77
+ openpose_re = re.compile("^control_.*openpose.* \[.{8}\]")
78
+
79
+ response = requests.get(url)
80
+ if response.status_code == 200:
81
+ models = json.loads(response.content)
82
+ else:
83
+ raise Exception("Unable to list models from the SD Web API! "
84
+ "Is it running and is the controlnet extension installed?")
85
+
86
+ for model in models['model_list']:
87
+ if temporalnet_model is None and temporalnet_re.match(model):
88
+ temporalnet_model = model
89
+ elif hed_model is None and hed_re.match(model):
90
+ hed_model = model
91
+ elif openpose_model is None and openpose_re.match(model):
92
+ openpose_model = model
93
+
94
+ assert temporalnet_model is not None, "Unable to find the temporalnet2 model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
95
+ assert hed_model is not None, "Unable to find the hed_model model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
96
+ assert openpose_model is not None, "Unable to find the openpose model! Ensure it's copied into the stable-diffusion-webui/extensions/models directory!"
97
+
98
+ return temporalnet_model, hed_model, openpose_model
99
+
100
+
101
+ TEMPORALNET_MODEL, HED_MODEL, OPENPOSE_MODEL = get_controlnet_models()
102
+
103
 
104
  def send_request(last_image_path, optical_flow_path,current_image_path):
105
  url = "http://localhost:7860/sdapi/v1/img2img"
 
110
  # Load and process the last image
111
  last_image = cv2.imread(last_image_path)
112
  last_image = cv2.cvtColor(last_image, cv2.COLOR_BGR2RGB)
 
113
 
114
  # Load and process the optical flow image
115
  flow_image = cv2.imread(optical_flow_path)
 
137
  "inpainting_mask_invert": 1,
138
  "resize_mode": 0,
139
  "denoising_strength": 0.4,
140
+ "prompt": args.prompt,
141
+ "negative_prompt": args.negative_prompt,
142
  "alwayson_scripts": {
143
  "ControlNet":{
144
  "args": [
145
  {
146
  "input_image": current_image,
147
  "module": "hed",
148
+ "model": HED_MODEL,
149
  "weight": 0.7,
150
  "guidance": 1,
151
+ "pixel_perfect": True,
152
+ "resize_mode": 0,
153
  },
154
  {
155
  "input_image": encoded_image,
156
+ "model": TEMPORALNET_MODEL,
157
  "module": "none",
158
  "weight": 0.6,
159
  "guidance": 1,
160
+ # "processor_res": 512,
161
+ "threshold_a": 64,
162
+ "threshold_b": 64,
163
+ "resize_mode": 0,
164
  },
165
  {
166
  "input_image": current_image,
167
+ "model": OPENPOSE_MODEL,
168
  "module": "openpose_full",
169
  "weight": 0.7,
170
+ "guidance": 1,
171
+ "pixel_perfect": True,
172
+ "resize_mode": 0,
173
  }
174
 
175
 
 
184
  "n_iter": 1,
185
  "steps": 20,
186
  "cfg_scale": 6,
187
+ "width": args.width,
188
+ "height": args.height,
189
  "restore_faces": True,
190
  "include_init_images": True,
191
  "override_settings": {},
 
230
  img2_batch = F.resize(img2_batch, size=[512, 512])
231
  return transforms(img1_batch, img2_batch)
232
 
 
233
  img1_batch, img2_batch = preprocess(img1_batch, img2_batch)
234
 
 
235
  list_of_flows = model(img1_batch.to(device), img2_batch.to(device))
236
 
237
+ predicted_flow = list_of_flows[-1][0]
238
+ opitcal_flow_path = os.path.join(args.output_dir, f"flow_{i}.png")
239
 
240
+ flow_img = flow_to_image(predicted_flow).to("cpu")
241
+ flow_img = F.resize(flow_img, size=[args.height, args.width])
242
 
243
+ write_jpeg(flow_img, opitcal_flow_path)
244
 
 
 
 
 
 
 
245
  return opitcal_flow_path
246
 
247
  output_images = []
 
249
 
250
  # Initialize with the first image path
251
 
252
+ result = args.init_image
253
+ output_image_path = os.path.join(args.output_dir, f"output_image_0.png")
254
 
255
  #with open(output_image_path, "wb") as f:
256
  # f.write(result)
257
 
258
+ last_image_path = args.init_image
259
  for i in range(1, len(y_paths)):
260
  # Use the last image path and optical flow map to generate the next input
261
  optical_flow = infer(y_paths[i - 1], y_paths[i])
 
263
  # Modify your send_request to use the last_image_path
264
  result = send_request(last_image_path, optical_flow, y_paths[i])
265
  data = json.loads(result)
 
 
 
 
 
 
266
 
267
+ for j, encoded_image in enumerate(data["images"]):
268
+ if j == 0:
269
+ output_image_path = os.path.join(args.output_dir, f"output_image_{i}.png")
270
+ last_image_path = output_image_path
271
+ else:
272
+ output_image_path = os.path.join(args.output_dir, f"controlnet_image_{j}_{i}.png")
273
+
274
+ with open(output_image_path, "wb") as f:
275
+ f.write(base64.b64decode(encoded_image))
276
+ print(f"Written data for frame {i}:")