text-to-3d

Running on L40S

App Files Files Community

jbilcke-hf HF staff commited on Jan 17

Commit

1c05005

verified ·

1 Parent(s): 287be50

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +78 -24

gradio_app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from PIL import Image
 import gradio as gr
 import trimesh
 from transparent_background import Remover
 # Import and setup SPAR3D
 os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
@@ -23,12 +24,19 @@ BACKGROUND_COLOR = [0.5, 0.5, 0.5]
 # Initialize models
 device = spar3d_utils.get_device()
 bg_remover = Remover()
-model = SPAR3D.from_pretrained(
     "stabilityai/stable-point-aware-3d",
     config_name="config.yaml",
     weight_name="model.safetensors"
 ).eval().to(device)
 # Initialize camera parameters
 c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
 intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
@@ -59,20 +67,30 @@ def create_batch(input_image: Image) -> dict[str, Any]:
     }
     return batch
-def process_image(image_path: str) -> str:
-    """Process image and return path to GLB file."""
     try:
-        # Load image
-        input_image = Image.open(image_path)
         # Remove background if needed
-        if input_image.mode != 'RGBA':
-            input_image = bg_remover.process(input_image.convert("RGB"))
         # Auto crop
         input_image = spar3d_utils.foreground_crop(
             input_image,
-            crop_ratio=1.3,  # Default padding ratio
             newsize=(COND_WIDTH, COND_HEIGHT),
             no_crop=False
         )
@@ -83,10 +101,10 @@ def process_image(image_path: str) -> str:
         # Generate mesh
         with torch.no_grad():
-            with torch.autocast(device_type=device, dtype=torch.bfloat16) if "cuda" in device else nullcontext():
-                trimesh_mesh, _ = model.generate_mesh(
                     batch,
-                    1024, # <- texture_resolution
                     remesh="none",
                     vertex_count=-1,
                     estimate_illumination=True
@@ -97,24 +115,60 @@ def process_image(image_path: str) -> str:
         temp_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
         trimesh_mesh.export(temp_file.name, file_type="glb", include_normals=True)
-        return temp_file.name
     except Exception as e:
-        return str(e)
 # Create Gradio interface
 demo = gr.Interface(
-    fn=process_image,
-    inputs=gr.File(
-        label="Upload Image",
-        file_types=["image"],
-    ),
-    outputs=gr.File(
-        label="Download GLB",
-        file_types=[".glb"],
-    ),
-    title="SPAR3D Image to GLB Converter",
-    description="Upload an image (JPG, PNG, or WebP) and get back a 3D model in GLB format",
 )
 if __name__ == "__main__":

 import gradio as gr
 import trimesh
 from transparent_background import Remover
+from diffusers import DiffusionPipeline
 # Import and setup SPAR3D
 os.system("USE_CUDA=1 pip install -vv --no-build-isolation ./texture_baker ./uv_unwrapper")
 # Initialize models
 device = spar3d_utils.get_device()
 bg_remover = Remover()
+spar3d_model = SPAR3D.from_pretrained(
     "stabilityai/stable-point-aware-3d",
     config_name="config.yaml",
     weight_name="model.safetensors"
 ).eval().to(device)
+# Initialize FLUX model
+dtype = torch.bfloat16
+flux_pipe = DiffusionPipeline.from_pretrained(
+    "black-forest-labs/FLUX.1-schnell",
+    torch_dtype=dtype
+).to(device)
 # Initialize camera parameters
 c2w_cond = spar3d_utils.default_cond_c2w(COND_DISTANCE)
 intrinsic, intrinsic_normed_cond = spar3d_utils.create_intrinsic_from_fov_rad(
     }
     return batch
+def generate_and_process_3d(prompt: str, seed: int = 42, width: int = 1024, height: int = 1024) -> str:
+    """Generate image from prompt and convert to 3D model."""
     try:
+        # Generate image using FLUX
+        generator = torch.Generator().manual_seed(seed)
+        generated_image = flux_pipe(
+            prompt=prompt,
+            width=width,
+            height=height,
+            num_inference_steps=4,
+            generator=generator,
+            guidance_scale=0.0
+        ).images[0]
+        # Convert PIL image to RGBA
+        input_image = generated_image.convert("RGBA")
         # Remove background if needed
+        input_image = bg_remover.process(input_image.convert("RGB"))
         # Auto crop
         input_image = spar3d_utils.foreground_crop(
             input_image,
+            crop_ratio=1.3,
             newsize=(COND_WIDTH, COND_HEIGHT),
             no_crop=False
         )
         # Generate mesh
         with torch.no_grad():
+            with torch.autocast(device_type=device, dtype=torch.bfloat16):
+                trimesh_mesh, _ = spar3d_model.generate_mesh(
                     batch,
+                    1024,  # texture_resolution
                     remesh="none",
                     vertex_count=-1,
                     estimate_illumination=True
         temp_file = tempfile.NamedTemporaryFile(suffix='.glb', delete=False)
         trimesh_mesh.export(temp_file.name, file_type="glb", include_normals=True)
+        return temp_file.name, generated_image
     except Exception as e:
+        return str(e), None
 # Create Gradio interface
+examples = [
+    "a tiny astronaut hatching from an egg on the moon",
+    "a cat holding a sign that says hello world",
+    "an anime illustration of a wiener schnitzel",
+]
 demo = gr.Interface(
+    fn=generate_and_process_3d,
+    inputs=[
+        gr.Text(
+            label="Enter your prompt",
+            placeholder="Describe what you want to generate..."
+        ),
+        gr.Slider(
+            label="Seed",
+            minimum=0,
+            maximum=np.iinfo(np.int32).max,
+            step=1,
+            value=42
+        ),
+        gr.Slider(
+            label="Width",
+            minimum=256,
+            maximum=2048,
+            step=32,
+            value=1024
+        ),
+        gr.Slider(
+            label="Height",
+            minimum=256,
+            maximum=2048,
+            step=32,
+            value=1024
+        )
+    ],
+    outputs=[
+        gr.File(
+            label="Download GLB",
+            file_types=[".glb"],
+        ),
+        gr.Image(
+            label="Generated Image",
+            type="pil"
+        )
+    ],
+    title="Text to 3D Model Generator",
+    description="Enter a text prompt to generate an image that will be converted into a 3D model",
+    examples=examples
 )
 if __name__ == "__main__":