DLight1551 commited on
Commit
18cd0b0
·
1 Parent(s): ac98504
Files changed (2) hide show
  1. ixc_utils.py +4 -1
  2. ixc_utils.py~ +139 -0
ixc_utils.py CHANGED
@@ -124,7 +124,10 @@ def load_video(video_path, num_frm=32, start=None, end=None):
124
  start_idx = 0 if start is None else start
125
  end_idx = len(vid) if end is None else end
126
  all_pos = list(range(start_idx, end_idx, t_stride))
127
- images = [vid[i].numpy() for i in all_pos]
 
 
 
128
  if len(images) > num_frm:
129
  num_frm = min(num_frm, len(images))
130
  step_size = len(images) / (num_frm + 1)
 
124
  start_idx = 0 if start is None else start
125
  end_idx = len(vid) if end is None else end
126
  all_pos = list(range(start_idx, end_idx, t_stride))
127
+ try:
128
+ images = [vid[i].numpy() for i in all_pos]
129
+ except:
130
+ images = [vid[i].asnumpy() for i in all_pos]
131
  if len(images) > num_frm:
132
  num_frm = min(num_frm, len(images))
133
  step_size = len(images) / (num_frm + 1)
ixc_utils.py~ ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import numpy as np
3
+ import torchvision
4
+ from PIL import Image, ImageDraw, ImageFont
5
+ from torchvision.transforms.functional import InterpolationMode
6
+ import torchvision.transforms as transforms
7
+ from decord import VideoReader
8
+
9
+ def padding_336(b, pad=336):
10
+ width, height = b.size
11
+ tar = int(np.ceil(height / pad) * pad)
12
+ top_padding = 0 # int((tar - height)/2)
13
+ bottom_padding = tar - height - top_padding
14
+ left_padding = 0
15
+ right_padding = 0
16
+ b = transforms.functional.pad(b, [left_padding, top_padding, right_padding, bottom_padding], fill=[255,255,255])
17
+
18
+ return b
19
+
20
+ def Image_transform(img, hd_num=25):
21
+ width, height = img.size
22
+ trans = False
23
+ if width < height:
24
+ img = img.transpose(Image.TRANSPOSE)
25
+ trans = True
26
+ width, height = img.size
27
+ ratio = (width/ height)
28
+ scale = 1
29
+ while scale*np.ceil(scale/ratio) <= hd_num:
30
+ scale += 1
31
+ scale -= 1
32
+ scale = min(np.ceil(width / 560), scale)
33
+ new_w = int(scale * 560)
34
+ new_h = int(new_w / ratio)
35
+ #print (scale, f'{height}/{new_h}, {width}/{new_w}')
36
+
37
+ img = transforms.functional.resize(img, [new_h, new_w],)
38
+ img = padding_336(img, 560)
39
+ width, height = img.size
40
+ if trans:
41
+ img = img.transpose(Image.TRANSPOSE)
42
+
43
+ return img
44
+
45
+
46
+ def Video_transform(img, hd_num=25):
47
+ width, height = img.size
48
+ trans = False
49
+ if width < height:
50
+ img = img.transpose(Image.TRANSPOSE)
51
+ trans = True
52
+ width, height = img.size
53
+ ratio = (width/ height)
54
+ scale = 1
55
+ new_h = int(scale * 560)
56
+ new_w = int(new_h * ratio)
57
+ #print (new_h, new_w)
58
+
59
+ img = transforms.functional.resize(img, [new_h, new_w],)
60
+ img = img.transpose(Image.TRANSPOSE)
61
+ img = padding_336(img, 560)
62
+ width, height = img.size
63
+ if not trans:
64
+ img = img.transpose(Image.TRANSPOSE)
65
+
66
+ return img
67
+
68
+ def frame2img(imgs):
69
+ new_imgs = []
70
+ for img in imgs:
71
+ w, h = img.size
72
+ scale = w/h
73
+ if w > h:
74
+ new_w = 560 * 2
75
+ new_h = int(560 * 2 / scale)
76
+ else:
77
+ new_w = int(560 * 2 * scale)
78
+ new_h = 560 * 2
79
+ img = transforms.functional.resize(img, [new_h, new_w],)
80
+ new_imgs.append(img)
81
+ imgs = new_imgs
82
+ new_w = 0
83
+ new_h = 0
84
+ pad = 40
85
+ font = ImageFont.truetype(os.path.join(config._name_or_path, "SimHei.ttf"), pad)
86
+ if w > h:
87
+ for im in imgs:
88
+ w,h = im.size
89
+ new_w = max(new_w, w)
90
+ new_h += h + 10 + pad
91
+ new_img = Image.new('RGB', (new_w, new_h), 'white')
92
+ draw = ImageDraw.Draw(new_img)
93
+ curr_h = 0
94
+ for idx, im in enumerate(imgs):
95
+ w,h = im.size
96
+ new_img.paste(im, (0, pad + curr_h))
97
+ draw.text((0, curr_h ), f'<IMAGE {idx}>', font=font, fill='black')
98
+ if idx + 1 < len(imgs):
99
+ draw.line([(0, pad +curr_h + h +5), (new_w, pad +curr_h + h +5)], fill = 'black', width=2)
100
+ curr_h += h + 10 + pad
101
+ #print (new_w, new_h)
102
+ else:
103
+ for im in imgs:
104
+ w,h = im.size
105
+ new_w += w + 10
106
+ new_h = max(new_h, h)
107
+ new_h += pad
108
+ new_img = Image.new('RGB', (new_w, new_h), 'white')
109
+ draw = ImageDraw.Draw(new_img)
110
+ curr_w = 0
111
+ for idx, im in enumerate(imgs):
112
+ w,h = im.size
113
+ new_img.paste(im, (curr_w, pad))
114
+ draw.text((curr_w, 0), f'<IMAGE {idx}>', font=font, fill='black')
115
+ if idx + 1 < len(imgs):
116
+ draw.line([(curr_w + w + 5, 0), (curr_w + w + 5, new_h)], fill = 'black', width=2)
117
+ curr_w += w + 10
118
+ return new_img
119
+
120
+ def load_video(video_path, num_frm=32, start=None, end=None):
121
+ vid = VideoReader(video_path, num_threads=1)
122
+ fps = vid.get_avg_fps()
123
+ t_stride = int(round(float(fps) / int(1)))
124
+ start_idx = 0 if start is None else start
125
+ end_idx = len(vid) if end is None else end
126
+ all_pos = list(range(start_idx, end_idx, t_stride))
127
+ try:
128
+ images = [vid[i].numpy() for i in all_pos]
129
+ except:
130
+ images = [vid[i].asnumpy() for i in all_pos]
131
+ if len(images) > num_frm:
132
+ num_frm = min(num_frm, len(images))
133
+ step_size = len(images) / (num_frm + 1)
134
+ indices = [int(i*step_size) for i in range(num_frm)]
135
+ images = [images[i] for i in indices]
136
+ images = [Image.fromarray(arr) for arr in images]
137
+ image = frame2img(images)
138
+ return image
139
+