Spaces:

nsfwalex
/

sd_card

Runtime error

App Files Files Community

nsfwalex commited on Dec 6, 2024

Commit

3f6dbd6

verified ·

1 Parent(s): 3797680

Update inference_manager.py

Browse files

Files changed (1) hide show

inference_manager.py +212 -18

inference_manager.py CHANGED Viewed

@@ -9,16 +9,19 @@ from huggingface_hub import hf_hub_download, snapshot_download
 from pathlib import Path
 from diffusers import EulerAncestralDiscreteScheduler, DPMSolverMultistepScheduler, DPMSolverSDEScheduler
 from diffusers.models.attention_processor import AttnProcessor2_0
-import os
 from cryptography.hazmat.primitives.asymmetric import rsa, padding
 from cryptography.hazmat.primitives import serialization, hashes
 from cryptography.hazmat.backends import default_backend
 from cryptography.hazmat.primitives.asymmetric import utils
 import base64
 import json
 import jwt
 import glob
 import traceback
 #from onediffx import compile_pipe, save_pipe, load_pipe
@@ -66,37 +69,57 @@ class AuthHelper:
             print("Invalid token:", e)
             raise
-    def check_auth(self, session, token):
-        params = session.get("params") or {}
         if params.get("_skip_token_passkey", "") == "nsfwaisio_125687":
             return True
-        sip = session.get("client_ip", "")
-        shost = session.get("host", "")
-        sreferer = session.get("refer")
-        print(sip, shost, sreferer)
         jwt_data = self.decode_jwt(token)
-        tip = jwt_data.get("ip", "")
-        thost = jwt_data.get("host", "")
-        treferer = jwt_data.get("referer", "")
-        print(sip, tip, shost, thost, sreferer, treferer)
-        if not tip or not thost or not treferer:
-            raise Exception("invalid token")
-        if sip == tip and shost == thost and sreferer == treferer:
             return True
-        raise Exception("wrong token")
 class InferenceManager:
-    def __init__(self, config_path="config.json"):
         cfg = {}
         with open(config_path, "r", encoding="utf-8") as f:
             cfg = json.load(f)
         self.cfg = cfg
         lora_options_path = cfg.get("loras", "")
         self.model_version = cfg["model_version"]
         self.lora_load_options = self.load_json(lora_options_path)  # Load LoRA load options
         self.lora_models = self.load_index_file("index.json")  # Load index.json
         self.preloaded_loras = []  # Array to store preloaded LoRAs with name and weights
         self.base_model_pipeline = self.load_base_model()  # Load the base model
         self.preload_loras()  # Preload LoRAs based on options
     def load_json(self, filepath):
@@ -165,6 +188,7 @@ class InferenceManager:
                 #unet=unet,
                 torch_dtype=torch.bfloat16,
                 use_safetensors=True,
                 #variant="fp16",
                 custom_pipeline   = "lpw_stable_diffusion_xl",
             )
@@ -175,8 +199,19 @@ class InferenceManager:
         load_time = round(time.time() - start, 2)
         print(f"Base model loaded in {load_time}s")
         return pipe
     def preload_loras(self):
         """Preload all LoRAs marked as 'preload=True' and store for later use."""
         for lora_name, lora_info in self.lora_load_options.items():
@@ -279,9 +314,36 @@ class ModelManager:
         :param model_directory: The directory to scan for model config files (e.g., "/path/to/models").
         """
         self.models = {}
         self.model_directory = model_directory
         self.load_models()
     def load_models(self):
         """
@@ -299,7 +361,7 @@ class ModelManager:
             print(f"Initializing model: {model_name} from {file_path}")
             try:
                 # Initialize InferenceManager for each model
-                self.models[model_name] = InferenceManager(config_path=file_path)
             except Exception as e:
                 print(traceback.format_exc())
                 print(f"Failed to initialize model {model_name} from {file_path}: {e}")
@@ -352,9 +414,141 @@ class ModelManager:
             model.release(model.base_model_pipeline)
         except Exception as e:
             print(f"Failed to release model {model_id}: {e}")
 # Hugging Face file download function - returns only file path
-def download_from_hf(filename, local_dir=None):
     try:
         file_path = hf_hub_download(
             filename=filename,

 from pathlib import Path
 from diffusers import EulerAncestralDiscreteScheduler, DPMSolverMultistepScheduler, DPMSolverSDEScheduler
 from diffusers.models.attention_processor import AttnProcessor2_0
 from cryptography.hazmat.primitives.asymmetric import rsa, padding
 from cryptography.hazmat.primitives import serialization, hashes
 from cryptography.hazmat.backends import default_backend
 from cryptography.hazmat.primitives.asymmetric import utils
 import base64
 import json
+import ipown
 import jwt
 import glob
 import traceback
+from insightface.app import FaceAnalysis
+from pipeline_stable_diffusion_xl_instantid import StableDiffusionXLInstantIDPipeline, draw_kps
+import cv2
 #from onediffx import compile_pipe, save_pipe, load_pipe
             print("Invalid token:", e)
             raise
+    import hashlib
+    def check_auth(self, request, token):
+        # Extract parameters from the request
+        params = dict(request.query_params)
         if params.get("_skip_token_passkey", "") == "nsfwaisio_125687":
             return True
+        # Gather request-specific information
+        sip = request.client.host
+        shost = request.headers.get("Host", "")
+        sreferer = request.headers.get("Referer", "")
+        suseragent = request.headers.get("User-Agent", "")
+        print(sip, shost, sreferer, suseragent)
+        # Decode the JWT token
         jwt_data = self.decode_jwt(token)
+        jwt_auth = jwt_data.get("auth", "")
+        if not jwt_auth:
+            raise Exception("Missing auth field in token")
+        # Create the MD5 hash of ip + host + referer + useragent
+        auth_string = f"{sip}{shost}{sreferer}{suseragent}"
+        calculated_md5 = hashlib.md5(auth_string.encode('utf-8')).hexdigest()
+        print(f"Calculated MD5: {calculated_md5}, JWT Auth: {jwt_auth}")
+        # Compare the calculated hash with the `auth` field from the JWT
+        if calculated_md5 == jwt_auth:
             return True
+        raise Exception("Invalid authentication")
 class InferenceManager:
+    def __init__(self, config_path="config.json", ext_model_pathes={}):
         cfg = {}
         with open(config_path, "r", encoding="utf-8") as f:
             cfg = json.load(f)
         self.cfg = cfg
+        self.ext_model_pathes = ext_model_pathes
         lora_options_path = cfg.get("loras", "")
         self.model_version = cfg["model_version"]
         self.lora_load_options = self.load_json(lora_options_path)  # Load LoRA load options
         self.lora_models = self.load_index_file("index.json")  # Load index.json
         self.preloaded_loras = []  # Array to store preloaded LoRAs with name and weights
+        self.ip_adapter_faceid_pipeline = None
         self.base_model_pipeline = self.load_base_model()  # Load the base model
         self.preload_loras()  # Preload LoRAs based on options
     def load_json(self, filepath):
                 #unet=unet,
                 torch_dtype=torch.bfloat16,
                 use_safetensors=True,
+                sampler=cfg.get("sampler"),
                 #variant="fp16",
                 custom_pipeline   = "lpw_stable_diffusion_xl",
             )
         load_time = round(time.time() - start, 2)
         print(f"Base model loaded in {load_time}s")
+        if cfg.get("load_ip_adapter_faceid", False):
+            if model_version in ("pony", "xl"):
+                ip_ckpt = self.ext_model_pathes.get("ip-adapter-faceid-sdxl", "")
+                if ip_ckpt:
+                    print(f"loading ip adapter model for {model_name}")
+                    self.ip_adapter_faceid_pipeline = ipown.IPAdapterFaceIDXL(pipe, ip_ckpt, 'cuda')
+                else:
+                    print("ip-adapter-faceid-sdxl not found, skip")
         return pipe
     def preload_loras(self):
         """Preload all LoRAs marked as 'preload=True' and store for later use."""
         for lora_name, lora_info in self.lora_load_options.items():
         :param model_directory: The directory to scan for model config files (e.g., "/path/to/models").
         """
+        print("downloading models...")
+        self.ext_model_pathes = {
+            "ip-adapter-faceid-sdxl": hf_hub_download(repo_id="h94/IP-Adapter-FaceID", filename="ip-adapter-faceid_sdxl.bin", repo_type="model")
+        }
         self.models = {}
+        self.ext_models = {}
         self.model_directory = model_directory
         self.load_models()
+    #not enabled at the moment
+    def load_instant_x(self):
+        #load all models
+        hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/config.json", local_dir="./checkpoints")
+        hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
+        hf_hub_download(repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints")
+        os.makedirs("./models",exist_ok=True)
+        download_from_hf("models/antelopev2/1k3d68.onnx",local_dir="./models")
+        download_from_hf("models/antelopev2/2d106det.onnx",local_dir="./models")
+        download_from_hf("models/antelopev2/genderage.onnx",local_dir="./models")
+        download_from_hf("models/antelopev2/glintr100.onnx",local_dir="./models")
+        download_from_hf("models/antelopev2/scrfd_10g_bnkps.onnx",local_dir="./models")
+        # prepare 'antelopev2' under ./models
+        app = FaceAnalysis(name='antelopev2', root='./', providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+        app.prepare(ctx_id=0, det_size=(640, 640))
+        # prepare models under ./checkpoints
+        face_adapter = f'./checkpoints/ip-adapter.bin'
+        controlnet_path = f'./checkpoints/ControlNetModel'
     def load_models(self):
         """
             print(f"Initializing model: {model_name} from {file_path}")
             try:
                 # Initialize InferenceManager for each model
+                self.models[model_name] = InferenceManager(config_path=file_path, ext_model_pathes=self.ext_model_pathes)
             except Exception as e:
                 print(traceback.format_exc())
                 print(f"Failed to initialize model {model_name} from {file_path}: {e}")
             model.release(model.base_model_pipeline)
         except Exception as e:
             print(f"Failed to release model {model_id}: {e}")
+    @spaces.GPU(duration=40)
+    def generate_with_faceid(self, model_id, request, inference_params, progress=gr.Progress(track_tqdm=True)):
+        auth_helper.check_auth(request, token)
+        model = self.models.get(model_id)
+        if not model:
+            raise Exception(f"invalid model_id {model_id}")
+        if not model.ip_adapter_faceid_pipeline:
+            raise Exception(f"model does not support ip adapter")
+        pipe = model.ip_adapter_faceid_pipeline
+        cfg = model.cfg
+        p = inference_params.get("prompt")
+        negative_prompt = inference_params.get("negative_prompt", cfg.get("negative_prompt", ""))
+        steps = inference_params.get("steps", cfg.get("inference_steps", 30))
+        guidance_scale = inference_params.get("guidance_scale", cfg.get("guidance_scale", 7))
+        width = inference_params.get("width", cfg.get("width", 512))
+        height = inference_params.get("height", cfg.get("height", 512))
+        images = inference_params.get("images", [])
+        likeness_strength = inference_params.get("likeness_strength", 0.4)
+        face_strength = inference_params.get("face_strength", 0.1)
+        sampler = inference_params.get("sampler", cfg.get("sampler", ""))
+        lora_list = inference_params.get("loras", [])
+        if not images:
+            raise Exception(f"face images not provided")
+        start = time.time()
+        pipe.to("cuda")
+        print("loading face analysis...")
+        app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
+        app.prepare(ctx_id=0, det_size=(512, 512))
+        faceid_all_embeds = []
+        for image in images:
+            face = cv2.imread(image)
+            faces = app.get(face)
+            faceid_embed = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
+            faceid_all_embeds.append(faceid_embed)
+        average_embedding = torch.mean(torch.stack(faceid_all_embeds, dim=0), dim=0)
+        print("start inference...")
+        style_selection = ""
+        use_negative_prompt = True
+        randomize_seed = True
+        seed = seed or int(randomize_seed_fn(seed, randomize_seed))
+        p = remove_child_related_content(p)
+        prompt_str = cfg.get("prompt", "{prompt}").replace("{prompt}", p)
+        generator = torch.Generator(pipe.device).manual_seed(seed)
+        print(f"generate: p={p}, np={np}, steps={steps}, guidance_scale={guidance_scale}, size={width},{height}, seed={seed}")
+        images = pipe(
+            prompt=prompt_str,
+            negative_prompt=negative_prompt,
+            faceid_embeds=average_embedding,
+            scale=likeness_strength,
+            width=width,
+            height=height,
+            guidance_scale=face_strength,
+            num_inference_steps=steps,
+            generator=generator,
+            num_images_per_prompt=1,
+            output_type="pil",
+            #callback_on_step_end=callback_dynamic_cfg,
+            #callback_on_step_end_tensor_inputs=['prompt_embeds', 'add_text_embeds', 'add_time_ids'],
+        ).images
+        cost = round(time.time() - start, 2)
+        print(f"inference done in {cost}s")
+        images = [save_image(img) for img in images]
+        image_paths = [i[1] for i in images]
+        print(prompt_str, image_paths)
+        return [i[0] for i in images]
+    @spaces.GPU(duration=40)
+    def generate(self, model_id, request, inference_params, progress=gr.Progress(track_tqdm=True)):
+        def callback_dynamic_cfg(pipe, step_index, timestep, callback_kwargs):
+            cfg_disabling_at = cfg.get('cfg_disabling_rate', 0.75)
+            if step_index == int(pipe.num_timesteps * cfg_disabling_at):
+                callback_kwargs['prompt_embeds'] = callback_kwargs['prompt_embeds'].chunk(2)[-1]
+                callback_kwargs['add_text_embeds'] = callback_kwargs['add_text_embeds'].chunk(2)[-1]
+                callback_kwargs['add_time_ids'] = callback_kwargs['add_time_ids'].chunk(2)[-1]
+                pipe._guidance_scale = 0.0
+            return callback_kwargs
+        auth_helper.check_auth(request, token)
+        model = self.models.get(model_id)
+        if not model:
+            raise Exception(f"invalid model_id {model_id}")
+        if not model.ip_adapter_faceid_pipeline:
+            raise Exception(f"model does not support ip adapter")
+        cfg = model.cfg
+        p = inference_params.get("prompt")
+        negative_prompt = inference_params.get("negative_prompt", cfg.get("negative_prompt", ""))
+        inference_steps = inference_params.get("steps", cfg.get("inference_steps", 30))
+        guidance_scale = inference_params.get("guidance_scale", cfg.get("guidance_scale", 7))
+        width = inference_params.get("width", cfg.get("width", 512))
+        height = inference_params.get("height", cfg.get("height", 512))
+        sampler = inference_params.get("sampler", cfg.get("sampler", ""))
+        lora_list = inference_params.get("loras", [])
+        pipe = model.build_pipeline_with_lora(lora_list, sampler, lora_list)
+        start = time.time()
+        pipe.to("cuda")
+        print("start inference...")
+        style_selection = ""
+        use_negative_prompt = True
+        randomize_seed = True
+        seed = seed or int(randomize_seed_fn(seed, randomize_seed))
+        guidance_scale = guidance_scale or cfg.get("guidance_scale", 7.5)
+        p = remove_child_related_content(p)
+        prompt_str = cfg.get("prompt", "{prompt}").replace("{prompt}", p)
+        generator = torch.Generator(pipe.device).manual_seed(seed)
+        print(f"generate: p={p}, np={np}, steps={steps}, guidance_scale={guidance_scale}, size={width},{height}, seed={seed}")
+        images = pipe(
+            prompt=prompt_str,
+            negative_prompt=negative_prompt,
+            width=width,
+            height=height,
+            guidance_scale=guidance_scale,
+            num_inference_steps=inference_steps,
+            generator=generator,
+            num_images_per_prompt=1,
+            output_type="pil",
+            callback_on_step_end=callback_dynamic_cfg,
+            callback_on_step_end_tensor_inputs=['prompt_embeds', 'add_text_embeds', 'add_time_ids'],
+        ).images
+        cost = round(time.time() - start, 2)
+        print(f"inference done in {cost}s")
+        images = [save_image(img) for img in images]
+        image_paths = [i[1] for i in images]
+        print(prompt_str, image_paths)
+        return [i[0] for i in images]
 # Hugging Face file download function - returns only file path
+def download_from_hf(filename, local_dir=None, repo_id=DATASET_ID, repo_type="dataset"):
     try:
         file_path = hf_hub_download(
             filename=filename,