Spaces:

ritutweets46
/

sd3-shecodes

Runtime error

App Files Files Community

ritutweets46 commited on Jul 15, 2024

Commit

ff7f769

verified ·

1 Parent(s): 8c24e45

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -65

app.py CHANGED Viewed

@@ -5,17 +5,6 @@ from diffusers import DiffusionPipeline
 import torch
 from huggingface_hub import login
 import os
-import bitsandbytes as bnb
-import onnx
-import onnxruntime as ort
-from onnxruntime.quantization import quantize_dynamic, QuantType
-import psutil
-from accelerate import infer_auto_device_map, init_empty_weights, load_checkpoint_and_dispatch
-def get_memory_usage():
-    process = psutil.Process(os.getpid())
-    mem_info = process.memory_info()
-    return f"{mem_info.rss / (1024 ** 2):.2f} MB"
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -27,52 +16,16 @@ login(token=HUGGINGFACE_TOKEN)
 base_model_repo = "stabilityai/stable-diffusion-3-medium-diffusers"
 lora_weights_path = "./pytorch_lora_weights.safetensors"
-memory_log = []
-def log_memory(step):
-    memory_log.append(f"{step}: {get_memory_usage()}")
-log_memory("Before loading the model")
-# Choose the appropriate dtype
-dtype = torch.float16 if torch.cuda.is_available() else torch.bfloat16
-# Load the base model with 16-bit precision if available
-with init_empty_weights():
-    pipeline = DiffusionPipeline.from_pretrained(
-        base_model_repo,
-        torch_dtype=dtype,
-        use_auth_token=HUGGINGFACE_TOKEN
-    )
-log_memory("After loading the model")
-bnb.optim.load_int8_model(pipeline.model, device=device)
-log_memory("After loading 8-bit model")
 pipeline.load_lora_weights(lora_weights_path)
 pipeline.enable_sequential_cpu_offload()  # Efficient memory usage
 pipeline.enable_xformers_memory_efficient_attention()  # Enable xformers memory efficient attention
-# Use accelerate to handle model offloading
-device_map = infer_auto_device_map(pipeline.model)
-pipeline.model = load_checkpoint_and_dispatch(pipeline.model, device_map=device_map)
-log_memory("After enabling optimizations")
-# Export to ONNX
-onnx_model_path = "model.onnx"
-pipeline.model.eval()
-dummy_input = torch.randn(1, 3, 512, 512, device=device)
-torch.onnx.export(pipeline.model, dummy_input, onnx_model_path, export_params=True, opset_version=11, do_constant_folding=True, input_names=['input'], output_names=['output'])
-log_memory("After exporting to ONNX")
-# Quantize ONNX model to 8-bit
-quantized_model_path = "model_quantized.onnx"
-quantize_dynamic(onnx_model_path, quantized_model_path, weight_type=QuantType.QUInt8)
-log_memory("After quantizing ONNX model")
-# Load quantized ONNX model
-session = ort.InferenceSession(quantized_model_path)
-log_memory("After loading quantized ONNX model")
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 768  # Reduce max image size to fit within memory constraints
@@ -92,10 +45,8 @@ def infer(prompt, negative_prompt, seed, randomize_seed, width, height, guidance
         height=height,
         generator=generator
     ).images[0]
-    log_memory("After inference")
-    return image, "\n".join(memory_log)
 examples = [
     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
@@ -108,12 +59,6 @@ css = """
     margin: 0 auto;
     max-width: 520px;
 }
-#memory-log {
-    white-space: pre-wrap;
-    background: #f8f9fa;
-    padding: 10px;
-    border-radius: 5px;
-}
 """
 if torch.cuda.is_available():
@@ -140,7 +85,6 @@ with gr.Blocks(css=css) as demo:
             run_button = gr.Button("Run", scale=0)
         result = gr.Image(label="Result", show_label=False)
-        memory_log_output = gr.Textbox(label="Memory Log", elem_id="memory-log", lines=10, interactive=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Textbox(
@@ -202,7 +146,7 @@ with gr.Blocks(css=css) as demo:
     run_button.click(
         fn=infer,
         inputs=[prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
-        outputs=[result, memory_log_output]
     )
 demo.queue().launch()

 import torch
 from huggingface_hub import login
 import os
 device = "cuda" if torch.cuda.is_available() else "cpu"
 base_model_repo = "stabilityai/stable-diffusion-3-medium-diffusers"
 lora_weights_path = "./pytorch_lora_weights.safetensors"
+# Load the base model
+pipeline = DiffusionPipeline.from_pretrained(
+    base_model_repo,
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+    use_auth_token=HUGGINGFACE_TOKEN
+)
 pipeline.load_lora_weights(lora_weights_path)
 pipeline.enable_sequential_cpu_offload()  # Efficient memory usage
 pipeline.enable_xformers_memory_efficient_attention()  # Enable xformers memory efficient attention
+pipeline = pipeline.to(device)
 MAX_SEED = np.iinfo(np.int32).max
 MAX_IMAGE_SIZE = 768  # Reduce max image size to fit within memory constraints
         height=height,
         generator=generator
     ).images[0]
+    return image
 examples = [
     "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
     margin: 0 auto;
     max-width: 520px;
 }
 """
 if torch.cuda.is_available():
             run_button = gr.Button("Run", scale=0)
         result = gr.Image(label="Result", show_label=False)
         with gr.Accordion("Advanced Settings", open=False):
             negative_prompt = gr.Textbox(
     run_button.click(
         fn=infer,
         inputs=[prompt, negative_prompt, seed, randomize_seed, width, height, guidance_scale, num_inference_steps],
+        outputs=[result]
     )
 demo.queue().launch()