aijack commited on
Commit
71ed458
·
1 Parent(s): 279ec71

Upload 5 files

Browse files
Files changed (5) hide show
  1. app.py +142 -0
  2. model.py +164 -0
  3. patch.e4e +131 -0
  4. patch.hairclip +61 -0
  5. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ from __future__ import annotations
4
+
5
+ import argparse
6
+ import pathlib
7
+
8
+ import gradio as gr
9
+
10
+ from model import Model
11
+
12
+
13
+ def parse_args() -> argparse.Namespace:
14
+ parser = argparse.ArgumentParser()
15
+ parser.add_argument('--device', type=str, default='cpu')
16
+ parser.add_argument('--theme', type=str)
17
+ parser.add_argument('--share', action='store_true')
18
+ parser.add_argument('--port', type=int)
19
+ parser.add_argument('--disable-queue',
20
+ dest='enable_queue',
21
+ action='store_false')
22
+ return parser.parse_args()
23
+
24
+
25
+ def load_hairstyle_list() -> list[str]:
26
+ with open('HairCLIP/mapper/hairstyle_list.txt') as f:
27
+ lines = [line.strip() for line in f.readlines()]
28
+ lines = [line[:-10] for line in lines]
29
+ return lines
30
+
31
+
32
+ def set_example_image(example: list) -> dict:
33
+ return gr.Image.update(value=example[0])
34
+
35
+
36
+ def update_step2_components(choice: str) -> tuple[dict, dict]:
37
+ return (
38
+ gr.Dropdown.update(visible=choice in ['hairstyle', 'both']),
39
+ gr.Textbox.update(visible=choice in ['color', 'both']),
40
+ )
41
+
42
+
43
+ def main():
44
+ args = parse_args()
45
+ model = Model(device=args.device)
46
+
47
+ css = '''
48
+ h1#title {
49
+ text-align: center;
50
+ }
51
+ img#teaser {
52
+ max-width: 1000px;
53
+ max-height: 600px;
54
+ }
55
+ '''
56
+
57
+ with gr.Blocks(theme=args.theme, css=css) as demo:
58
+ gr.Markdown('''<h1 id="title">HairCLIP</h1>
59
+
60
+ ''')
61
+ with gr.Box():
62
+ gr.Markdown('## Step 1')
63
+ with gr.Row():
64
+ with gr.Column():
65
+ with gr.Row():
66
+ input_image = gr.Image(label='Input Image',
67
+ type='file')
68
+ with gr.Row():
69
+ preprocess_button = gr.Button('Preprocess')
70
+ with gr.Column():
71
+ aligned_face = gr.Image(label='Aligned Face',
72
+ type='pil',
73
+ interactive=False)
74
+ with gr.Column():
75
+ reconstructed_face = gr.Image(label='Reconstructed Face',
76
+ type='numpy')
77
+ latent = gr.Variable()
78
+
79
+ with gr.Row():
80
+ paths = sorted(pathlib.Path('test').glob('*.jpg'))
81
+ example_images = gr.Dataset(components=[input_image],
82
+ samples=[[path.as_posix()]
83
+ for path in paths])
84
+
85
+ with gr.Box():
86
+ gr.Markdown('## Step 2')
87
+ with gr.Row():
88
+ with gr.Column():
89
+ with gr.Row():
90
+ editing_type = gr.Radio(['hairstyle', 'color', 'both'],
91
+ value='both',
92
+ type='value',
93
+ label='Editing Type')
94
+ with gr.Row():
95
+ hairstyles = load_hairstyle_list()
96
+ hairstyle_index = gr.Dropdown(hairstyles,
97
+ value='afro',
98
+ type='index',
99
+ label='Hairstyle')
100
+ with gr.Row():
101
+ color_description = gr.Textbox(value='red',
102
+ label='Color')
103
+ with gr.Row():
104
+ run_button = gr.Button('Run')
105
+
106
+ with gr.Column():
107
+ result = gr.Image(label='Result')
108
+
109
+ gr.Markdown(
110
+ '<center></center>'
111
+ )
112
+
113
+ preprocess_button.click(fn=model.detect_and_align_face,
114
+ inputs=[input_image],
115
+ outputs=[aligned_face])
116
+ aligned_face.change(fn=model.reconstruct_face,
117
+ inputs=[aligned_face],
118
+ outputs=[reconstructed_face, latent])
119
+ editing_type.change(fn=update_step2_components,
120
+ inputs=[editing_type],
121
+ outputs=[hairstyle_index, color_description])
122
+ run_button.click(fn=model.generate,
123
+ inputs=[
124
+ editing_type,
125
+ hairstyle_index,
126
+ color_description,
127
+ latent,
128
+ ],
129
+ outputs=[result])
130
+ example_images.click(fn=set_example_image,
131
+ inputs=example_images,
132
+ outputs=example_images.components)
133
+
134
+ demo.launch(
135
+ enable_queue=args.enable_queue,
136
+ server_port=args.port,
137
+ share=args.share,
138
+ )
139
+
140
+
141
+ if __name__ == '__main__':
142
+ main()
model.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import os
5
+ import pathlib
6
+ import subprocess
7
+ import sys
8
+ from typing import Callable, Union
9
+
10
+ import dlib
11
+ import huggingface_hub
12
+ import numpy as np
13
+ import PIL.Image
14
+ import torch
15
+ import torch.nn as nn
16
+ import torchvision.transforms as T
17
+
18
+ if os.getenv('SYSTEM') == 'spaces':
19
+ with open('patch.e4e') as f:
20
+ subprocess.run('patch -p1'.split(), cwd='encoder4editing', stdin=f)
21
+ with open('patch.hairclip') as f:
22
+ subprocess.run('patch -p1'.split(), cwd='HairCLIP', stdin=f)
23
+
24
+ app_dir = pathlib.Path(__file__).parent
25
+
26
+ e4e_dir = app_dir / 'encoder4editing'
27
+ sys.path.insert(0, e4e_dir.as_posix())
28
+
29
+ from models.psp import pSp
30
+ from utils.alignment import align_face
31
+
32
+ hairclip_dir = app_dir / 'HairCLIP'
33
+ mapper_dir = hairclip_dir / 'mapper'
34
+ sys.path.insert(0, hairclip_dir.as_posix())
35
+ sys.path.insert(0, mapper_dir.as_posix())
36
+
37
+ from mapper.datasets.latents_dataset_inference import LatentsDatasetInference
38
+ from mapper.hairclip_mapper import HairCLIPMapper
39
+
40
+ HF_TOKEN = os.environ['HF_TOKEN']
41
+
42
+
43
+ class Model:
44
+ def __init__(self, device: Union[torch.device, str]):
45
+ self.device = torch.device(device)
46
+ self.landmark_model = self._create_dlib_landmark_model()
47
+ self.e4e = self._load_e4e()
48
+ self.hairclip = self._load_hairclip()
49
+ self.transform = self._create_transform()
50
+
51
+ @staticmethod
52
+ def _create_dlib_landmark_model():
53
+ path = huggingface_hub.hf_hub_download(
54
+ 'hysts/dlib_face_landmark_model',
55
+ 'shape_predictor_68_face_landmarks.dat',
56
+ use_auth_token=HF_TOKEN)
57
+ return dlib.shape_predictor(path)
58
+
59
+ def _load_e4e(self) -> nn.Module:
60
+ ckpt_path = huggingface_hub.hf_hub_download('hysts/e4e',
61
+ 'e4e_ffhq_encode.pt',
62
+ use_auth_token=HF_TOKEN)
63
+ ckpt = torch.load(ckpt_path, map_location='cpu')
64
+ opts = ckpt['opts']
65
+ opts['device'] = self.device.type
66
+ opts['checkpoint_path'] = ckpt_path
67
+ opts = argparse.Namespace(**opts)
68
+ model = pSp(opts)
69
+ model.to(self.device)
70
+ model.eval()
71
+ return model
72
+
73
+ def _load_hairclip(self) -> nn.Module:
74
+ ckpt_path = huggingface_hub.hf_hub_download('hysts/HairCLIP',
75
+ 'hairclip.pt',
76
+ use_auth_token=HF_TOKEN)
77
+ ckpt = torch.load(ckpt_path, map_location='cpu')
78
+ opts = ckpt['opts']
79
+ opts['device'] = self.device.type
80
+ opts['checkpoint_path'] = ckpt_path
81
+ opts['editing_type'] = 'both'
82
+ opts['input_type'] = 'text'
83
+ opts['hairstyle_description'] = 'HairCLIP/mapper/hairstyle_list.txt'
84
+ opts['color_description'] = 'red'
85
+ opts = argparse.Namespace(**opts)
86
+ model = HairCLIPMapper(opts)
87
+ model.to(self.device)
88
+ model.eval()
89
+ return model
90
+
91
+ @staticmethod
92
+ def _create_transform() -> Callable:
93
+ transform = T.Compose([
94
+ T.Resize(256),
95
+ T.CenterCrop(256),
96
+ T.ToTensor(),
97
+ T.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
98
+ ])
99
+ return transform
100
+
101
+ def detect_and_align_face(self, image) -> PIL.Image.Image:
102
+ image = align_face(filepath=image.name, predictor=self.landmark_model)
103
+ return image
104
+
105
+ @staticmethod
106
+ def denormalize(tensor: torch.Tensor) -> torch.Tensor:
107
+ return torch.clamp((tensor + 1) / 2 * 255, 0, 255).to(torch.uint8)
108
+
109
+ def postprocess(self, tensor: torch.Tensor) -> np.ndarray:
110
+ tensor = self.denormalize(tensor)
111
+ return tensor.cpu().numpy().transpose(1, 2, 0)
112
+
113
+ @torch.inference_mode()
114
+ def reconstruct_face(
115
+ self, image: PIL.Image.Image) -> tuple[np.ndarray, torch.Tensor]:
116
+ input_data = self.transform(image).unsqueeze(0).to(self.device)
117
+ reconstructed_images, latents = self.e4e(input_data,
118
+ randomize_noise=False,
119
+ return_latents=True)
120
+ reconstructed = torch.clamp(reconstructed_images[0].detach(), -1, 1)
121
+ reconstructed = self.postprocess(reconstructed)
122
+ return reconstructed, latents[0]
123
+
124
+ @torch.inference_mode()
125
+ def generate(self, editing_type: str, hairstyle_index: int,
126
+ color_description: str, latent: torch.Tensor) -> np.ndarray:
127
+ opts = self.hairclip.opts
128
+ opts.editing_type = editing_type
129
+ opts.color_description = color_description
130
+
131
+ if editing_type == 'color':
132
+ hairstyle_index = 0
133
+
134
+ device = torch.device(opts.device)
135
+
136
+ dataset = LatentsDatasetInference(latents=latent.unsqueeze(0).cpu(),
137
+ opts=opts)
138
+ w, hairstyle_text_inputs_list, color_text_inputs_list = dataset[0][:3]
139
+
140
+ w = w.unsqueeze(0).to(device)
141
+ hairstyle_text_inputs = hairstyle_text_inputs_list[
142
+ hairstyle_index].unsqueeze(0).to(device)
143
+ color_text_inputs = color_text_inputs_list[0].unsqueeze(0).to(device)
144
+
145
+ hairstyle_tensor_hairmasked = torch.Tensor([0]).unsqueeze(0).to(device)
146
+ color_tensor_hairmasked = torch.Tensor([0]).unsqueeze(0).to(device)
147
+
148
+ w_hat = w + 0.1 * self.hairclip.mapper(
149
+ w,
150
+ hairstyle_text_inputs,
151
+ color_text_inputs,
152
+ hairstyle_tensor_hairmasked,
153
+ color_tensor_hairmasked,
154
+ )
155
+ x_hat, _ = self.hairclip.decoder(
156
+ [w_hat],
157
+ input_is_latent=True,
158
+ return_latents=True,
159
+ randomize_noise=False,
160
+ truncation=1,
161
+ )
162
+ res = torch.clamp(x_hat[0].detach(), -1, 1)
163
+ res = self.postprocess(res)
164
+ return res
patch.e4e ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/models/stylegan2/op/fused_act.py b/models/stylegan2/op/fused_act.py
2
+ index 973a84f..6854b97 100644
3
+ --- a/models/stylegan2/op/fused_act.py
4
+ +++ b/models/stylegan2/op/fused_act.py
5
+ @@ -2,17 +2,18 @@ import os
6
+
7
+ import torch
8
+ from torch import nn
9
+ +from torch.nn import functional as F
10
+ from torch.autograd import Function
11
+ from torch.utils.cpp_extension import load
12
+
13
+ -module_path = os.path.dirname(__file__)
14
+ -fused = load(
15
+ - 'fused',
16
+ - sources=[
17
+ - os.path.join(module_path, 'fused_bias_act.cpp'),
18
+ - os.path.join(module_path, 'fused_bias_act_kernel.cu'),
19
+ - ],
20
+ -)
21
+ +#module_path = os.path.dirname(__file__)
22
+ +#fused = load(
23
+ +# 'fused',
24
+ +# sources=[
25
+ +# os.path.join(module_path, 'fused_bias_act.cpp'),
26
+ +# os.path.join(module_path, 'fused_bias_act_kernel.cu'),
27
+ +# ],
28
+ +#)
29
+
30
+
31
+ class FusedLeakyReLUFunctionBackward(Function):
32
+ @@ -82,4 +83,18 @@ class FusedLeakyReLU(nn.Module):
33
+
34
+
35
+ def fused_leaky_relu(input, bias, negative_slope=0.2, scale=2 ** 0.5):
36
+ - return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)
37
+ + if input.device.type == "cpu":
38
+ + if bias is not None:
39
+ + rest_dim = [1] * (input.ndim - bias.ndim - 1)
40
+ + return (
41
+ + F.leaky_relu(
42
+ + input + bias.view(1, bias.shape[0], *rest_dim), negative_slope=0.2
43
+ + )
44
+ + * scale
45
+ + )
46
+ +
47
+ + else:
48
+ + return F.leaky_relu(input, negative_slope=0.2) * scale
49
+ +
50
+ + else:
51
+ + return FusedLeakyReLUFunction.apply(input, bias, negative_slope, scale)
52
+ diff --git a/models/stylegan2/op/upfirdn2d.py b/models/stylegan2/op/upfirdn2d.py
53
+ index 7bc5a1e..5465d1a 100644
54
+ --- a/models/stylegan2/op/upfirdn2d.py
55
+ +++ b/models/stylegan2/op/upfirdn2d.py
56
+ @@ -1,17 +1,18 @@
57
+ import os
58
+
59
+ import torch
60
+ +from torch.nn import functional as F
61
+ from torch.autograd import Function
62
+ from torch.utils.cpp_extension import load
63
+
64
+ -module_path = os.path.dirname(__file__)
65
+ -upfirdn2d_op = load(
66
+ - 'upfirdn2d',
67
+ - sources=[
68
+ - os.path.join(module_path, 'upfirdn2d.cpp'),
69
+ - os.path.join(module_path, 'upfirdn2d_kernel.cu'),
70
+ - ],
71
+ -)
72
+ +#module_path = os.path.dirname(__file__)
73
+ +#upfirdn2d_op = load(
74
+ +# 'upfirdn2d',
75
+ +# sources=[
76
+ +# os.path.join(module_path, 'upfirdn2d.cpp'),
77
+ +# os.path.join(module_path, 'upfirdn2d_kernel.cu'),
78
+ +# ],
79
+ +#)
80
+
81
+
82
+ class UpFirDn2dBackward(Function):
83
+ @@ -97,8 +98,8 @@ class UpFirDn2d(Function):
84
+
85
+ ctx.save_for_backward(kernel, torch.flip(kernel, [0, 1]))
86
+
87
+ - out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h) // down_y + 1
88
+ - out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w) // down_x + 1
89
+ + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h + down_y) // down_y
90
+ + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w + down_x) // down_x
91
+ ctx.out_size = (out_h, out_w)
92
+
93
+ ctx.up = (up_x, up_y)
94
+ @@ -140,9 +141,13 @@ class UpFirDn2d(Function):
95
+
96
+
97
+ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
98
+ - out = UpFirDn2d.apply(
99
+ - input, kernel, (up, up), (down, down), (pad[0], pad[1], pad[0], pad[1])
100
+ - )
101
+ + if input.device.type == "cpu":
102
+ + out = upfirdn2d_native(input, kernel, up, up, down, down, pad[0], pad[1], pad[0], pad[1])
103
+ +
104
+ + else:
105
+ + out = UpFirDn2d.apply(
106
+ + input, kernel, (up, up), (down, down), (pad[0], pad[1], pad[0], pad[1])
107
+ + )
108
+
109
+ return out
110
+
111
+ @@ -150,6 +155,9 @@ def upfirdn2d(input, kernel, up=1, down=1, pad=(0, 0)):
112
+ def upfirdn2d_native(
113
+ input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1
114
+ ):
115
+ + _, channel, in_h, in_w = input.shape
116
+ + input = input.reshape(-1, in_h, in_w, 1)
117
+ +
118
+ _, in_h, in_w, minor = input.shape
119
+ kernel_h, kernel_w = kernel.shape
120
+
121
+ @@ -180,5 +188,9 @@ def upfirdn2d_native(
122
+ in_w * up_x + pad_x0 + pad_x1 - kernel_w + 1,
123
+ )
124
+ out = out.permute(0, 2, 3, 1)
125
+ + out = out[:, ::down_y, ::down_x, :]
126
+ +
127
+ + out_h = (in_h * up_y + pad_y0 + pad_y1 - kernel_h + down_y) // down_y
128
+ + out_w = (in_w * up_x + pad_x0 + pad_x1 - kernel_w + down_x) // down_x
129
+
130
+ - return out[:, ::down_y, ::down_x, :]
131
+ + return out.view(-1, channel, out_h, out_w)
patch.hairclip ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/mapper/latent_mappers.py b/mapper/latent_mappers.py
2
+ index 56b9c55..f0dd005 100644
3
+ --- a/mapper/latent_mappers.py
4
+ +++ b/mapper/latent_mappers.py
5
+ @@ -19,7 +19,7 @@ class ModulationModule(Module):
6
+
7
+ def forward(self, x, embedding, cut_flag):
8
+ x = self.fc(x)
9
+ - x = self.norm(x)
10
+ + x = self.norm(x)
11
+ if cut_flag == 1:
12
+ return x
13
+ gamma = self.gamma_function(embedding.float())
14
+ @@ -39,20 +39,20 @@ class SubHairMapper(Module):
15
+ def forward(self, x, embedding, cut_flag=0):
16
+ x = self.pixelnorm(x)
17
+ for modulation_module in self.modulation_module_list:
18
+ - x = modulation_module(x, embedding, cut_flag)
19
+ + x = modulation_module(x, embedding, cut_flag)
20
+ return x
21
+
22
+ -class HairMapper(Module):
23
+ +class HairMapper(Module):
24
+ def __init__(self, opts):
25
+ super(HairMapper, self).__init__()
26
+ self.opts = opts
27
+ - self.clip_model, self.preprocess = clip.load("ViT-B/32", device="cuda")
28
+ + self.clip_model, self.preprocess = clip.load("ViT-B/32", device=opts.device)
29
+ self.transform = transforms.Compose([transforms.Normalize((0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))])
30
+ self.face_pool = torch.nn.AdaptiveAvgPool2d((224, 224))
31
+ self.hairstyle_cut_flag = 0
32
+ self.color_cut_flag = 0
33
+
34
+ - if not opts.no_coarse_mapper:
35
+ + if not opts.no_coarse_mapper:
36
+ self.course_mapping = SubHairMapper(opts, 4)
37
+ if not opts.no_medium_mapper:
38
+ self.medium_mapping = SubHairMapper(opts, 4)
39
+ @@ -70,13 +70,13 @@ class HairMapper(Module):
40
+ elif hairstyle_tensor.shape[1] != 1:
41
+ hairstyle_embedding = self.gen_image_embedding(hairstyle_tensor, self.clip_model, self.preprocess).unsqueeze(1).repeat(1, 18, 1).detach()
42
+ else:
43
+ - hairstyle_embedding = torch.ones(x.shape[0], 18, 512).cuda()
44
+ + hairstyle_embedding = torch.ones(x.shape[0], 18, 512).to(self.opts.device)
45
+ if color_text_inputs.shape[1] != 1:
46
+ color_embedding = self.clip_model.encode_text(color_text_inputs).unsqueeze(1).repeat(1, 18, 1).detach()
47
+ elif color_tensor.shape[1] != 1:
48
+ color_embedding = self.gen_image_embedding(color_tensor, self.clip_model, self.preprocess).unsqueeze(1).repeat(1, 18, 1).detach()
49
+ else:
50
+ - color_embedding = torch.ones(x.shape[0], 18, 512).cuda()
51
+ + color_embedding = torch.ones(x.shape[0], 18, 512).to(self.opts.device)
52
+
53
+
54
+ if (hairstyle_text_inputs.shape[1] == 1) and (hairstyle_tensor.shape[1] == 1):
55
+ @@ -106,4 +106,4 @@ class HairMapper(Module):
56
+ x_fine = torch.zeros_like(x_fine)
57
+
58
+ out = torch.cat([x_coarse, x_medium, x_fine], dim=1)
59
+ - return out
60
+
61
+ + return out
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ dlib==19.23.0
2
+ numpy==1.22.3
3
+ opencv-python-headless==4.5.5.64
4
+ Pillow==9.1.0
5
+ scipy==1.8.0
6
+ torch==1.11.0
7
+ torchvision==0.12.0
8
+ git+https://github.com/openai/CLIP.git