Spaces:
Running
on
L4
Running
on
L4
tricktreat
commited on
Commit
·
c6067f0
1
Parent(s):
603940c
local models update
Browse files- models_server.py +21 -18
models_server.py
CHANGED
@@ -77,12 +77,7 @@ def load_pipes(local_deployment):
|
|
77 |
controlnet_sd_pipes = {}
|
78 |
if local_deployment in ["full"]:
|
79 |
other_pipes = {
|
80 |
-
|
81 |
-
"model": VisionEncoderDecoderModel.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
82 |
-
"feature_extractor": ViTImageProcessor.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
83 |
-
"tokenizer": AutoTokenizer.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
84 |
-
"device": "cuda:0"
|
85 |
-
},
|
86 |
# "Salesforce/blip-image-captioning-large": {
|
87 |
# "model": BlipForConditionalGeneration.from_pretrained(f"Salesforce/blip-image-captioning-large"),
|
88 |
# "processor": BlipProcessor.from_pretrained(f"Salesforce/blip-image-captioning-large"),
|
@@ -111,14 +106,7 @@ def load_pipes(local_deployment):
|
|
111 |
"model": BaseModel.from_pretrained("JorisCos/DCCRNet_Libri1Mix_enhsingle_16k"),
|
112 |
"device": "cuda:0"
|
113 |
},
|
114 |
-
|
115 |
-
"model": Text2Speech.from_pretrained("espnet/kan-bayashi_ljspeech_vits"),
|
116 |
-
"device": "cuda:0"
|
117 |
-
},
|
118 |
-
"lambdalabs/sd-image-variations-diffusers": {
|
119 |
-
"model": DiffusionPipeline.from_pretrained(f"{local_models}lambdalabs/sd-image-variations-diffusers"), #torch_dtype=torch.float16
|
120 |
-
"device": "cuda:0"
|
121 |
-
},
|
122 |
# "CompVis/stable-diffusion-v1-4": {
|
123 |
# "model": DiffusionPipeline.from_pretrained(f"CompVis/stable-diffusion-v1-4"),
|
124 |
# "device": "cuda:0"
|
@@ -127,10 +115,7 @@ def load_pipes(local_deployment):
|
|
127 |
# "model": DiffusionPipeline.from_pretrained(f"stabilityai/stable-diffusion-2-1"),
|
128 |
# "device": "cuda:0"
|
129 |
# },
|
130 |
-
|
131 |
-
"model": DiffusionPipeline.from_pretrained(f"{local_models}runwayml/stable-diffusion-v1-5"),
|
132 |
-
"device": "cuda:0"
|
133 |
-
},
|
134 |
# "microsoft/speecht5_tts":{
|
135 |
# "processor": SpeechT5Processor.from_pretrained(f"microsoft/speecht5_tts"),
|
136 |
# "model": SpeechT5ForTextToSpeech.from_pretrained(f"microsoft/speecht5_tts"),
|
@@ -171,6 +156,24 @@ def load_pipes(local_deployment):
|
|
171 |
|
172 |
if local_deployment in ["full", "standard"]:
|
173 |
standard_pipes = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
# "superb/wav2vec2-base-superb-ks": {
|
175 |
# "model": pipeline(task="audio-classification", model=f"superb/wav2vec2-base-superb-ks"),
|
176 |
# "device": "cuda:0"
|
|
|
77 |
controlnet_sd_pipes = {}
|
78 |
if local_deployment in ["full"]:
|
79 |
other_pipes = {
|
80 |
+
|
|
|
|
|
|
|
|
|
|
|
81 |
# "Salesforce/blip-image-captioning-large": {
|
82 |
# "model": BlipForConditionalGeneration.from_pretrained(f"Salesforce/blip-image-captioning-large"),
|
83 |
# "processor": BlipProcessor.from_pretrained(f"Salesforce/blip-image-captioning-large"),
|
|
|
106 |
"model": BaseModel.from_pretrained("JorisCos/DCCRNet_Libri1Mix_enhsingle_16k"),
|
107 |
"device": "cuda:0"
|
108 |
},
|
109 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
# "CompVis/stable-diffusion-v1-4": {
|
111 |
# "model": DiffusionPipeline.from_pretrained(f"CompVis/stable-diffusion-v1-4"),
|
112 |
# "device": "cuda:0"
|
|
|
115 |
# "model": DiffusionPipeline.from_pretrained(f"stabilityai/stable-diffusion-2-1"),
|
116 |
# "device": "cuda:0"
|
117 |
# },
|
118 |
+
|
|
|
|
|
|
|
119 |
# "microsoft/speecht5_tts":{
|
120 |
# "processor": SpeechT5Processor.from_pretrained(f"microsoft/speecht5_tts"),
|
121 |
# "model": SpeechT5ForTextToSpeech.from_pretrained(f"microsoft/speecht5_tts"),
|
|
|
156 |
|
157 |
if local_deployment in ["full", "standard"]:
|
158 |
standard_pipes = {
|
159 |
+
"nlpconnect/vit-gpt2-image-captioning":{
|
160 |
+
"model": VisionEncoderDecoderModel.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
161 |
+
"feature_extractor": ViTImageProcessor.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
162 |
+
"tokenizer": AutoTokenizer.from_pretrained(f"{local_models}nlpconnect/vit-gpt2-image-captioning"),
|
163 |
+
"device": "cuda:0"
|
164 |
+
},
|
165 |
+
"espnet/kan-bayashi_ljspeech_vits": {
|
166 |
+
"model": Text2Speech.from_pretrained("espnet/kan-bayashi_ljspeech_vits"),
|
167 |
+
"device": "cuda:0"
|
168 |
+
},
|
169 |
+
"lambdalabs/sd-image-variations-diffusers": {
|
170 |
+
"model": DiffusionPipeline.from_pretrained(f"{local_models}lambdalabs/sd-image-variations-diffusers"), #torch_dtype=torch.float16
|
171 |
+
"device": "cuda:0"
|
172 |
+
},
|
173 |
+
"runwayml/stable-diffusion-v1-5": {
|
174 |
+
"model": DiffusionPipeline.from_pretrained(f"{local_models}runwayml/stable-diffusion-v1-5"),
|
175 |
+
"device": "cuda:0"
|
176 |
+
},
|
177 |
# "superb/wav2vec2-base-superb-ks": {
|
178 |
# "model": pipeline(task="audio-classification", model=f"superb/wav2vec2-base-superb-ks"),
|
179 |
# "device": "cuda:0"
|