BAAI
/

Emu2-Gen

EmuVisualGenerationPipeline

Model card Files Files and versions Community

QuanSun commited on Dec 21, 2023

Commit

fc39de2

·

1 Parent(s): 94953a3

add README

Files changed (1) hide show

README.md +110 -0

README.md ADDED Viewed

	@@ -0,0 +1,110 @@

+---
+language:
+- en
+---
+[Demo](https://huggingface.co/spaces/BAAI/Emu2) | [Project Page](https://baaivision.github.io/emu2/)
+## Model Weights
+| Model name         | Weight                                                  |
+| ------------------ | ------------------------------------------------------- |
+| **Emu2** | [🤗 HF link](https://huggingface.co/BAAI/Emu2) |
+| **Emu2-Chat** | [🤗 HF link](https://huggingface.co/BAAI/Emu2-Chat) |
+| **Emu2-Gen** | [🤗 HF link](https://huggingface.co/BAAI/Emu2-Gen) |
+## Inference (Huggingface Version)
+### Emu2-Gen
+```python
+import cv2
+from diffusers import DiffusionPipeline
+import numpy as np
+from PIL import Image
+import requests
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
+# For the first time of using,
+# you need to download the huggingface repo "BAAI/Emu2-GEN" to local first
+path = "path to local BAAI/Emu2-GEN"
+multimodal_encoder = AutoModelForCausalLM.from_pretrained(
+	f"{path}/multimodal_encoder",
+	trust_remote_code=True,
+	torch_dtype=torch.bfloat16,
+	use_safetensors=True,
+	variant="bf16"
+)
+tokenizer = AutoTokenizer.from_pretrained(f"{path}/tokenizer")
+pipe = DiffusionPipeline.from_pretrained(
+	path,
+	custom_pipeline="pipeline_emu2_gen",
+	torch_dtype=torch.bfloat16,
+	use_safetensors=True,
+	variant="bf16",
+	multimodal_encoder=multimodal_encoder,
+	tokenizer=tokenizer,
+)
+# For the non-first time of using, you can init the pipeline directly
+pipe = DiffusionPipeline.from_pretrained(
+	path,
+	custom_pipeline="pipeline_emu2_gen",
+	torch_dtype=torch.bfloat16,
+	use_safetensors=True,
+	variant="bf16",
+)
+pipe.to("cuda")
+# text-to-image
+prompt = "impressionist painting of an astronaut in a jungle"
+ret = pipe(prompt)
+ret.images[0].save("astronaut.png")
+# image editing
+image = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/dog2.jpg?raw=true',stream=True).raw).convert('RGB')
+prompt = [image, "wearing a rad hat on the beach."]
+# grounding generation
+def draw_box(left, top, right, bottom):
+	mask = np.zeros((448, 448, 3), dtype=np.uint8)
+	mask = cv2.rectangle(mask, (left, top), (right, bottom), (255, 255, 255), 3)
+	mask = Image.fromarray(mask)
+	return mask
+dog1 = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/dog1.jpg?raw=true',stream=True).raw).convert('RGB')
+dog2 = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/dog2.jpg?raw=true',stream=True).raw).convert('RGB')
+dog3 = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/dog3.jpg?raw=true',stream=True).raw).convert('RGB')
+dog1_mask = draw_box( 22,  14, 224, 224)
+dog2_mask = draw_box(224,  10, 448, 224)
+dog3_mask = draw_box(120, 264, 320, 438)
+prompt = [
+	"<grounding>",
+	"A photo of",
+	"<phrase>the first dog</phrase>"
+	"<object>",
+	dog1_mask,
+	"</object>",
+	dog1,
+	"<phrase>the second dog</phrase>"
+	"<object>",
+	dog2_mask,
+	"</object>",
+	dog2,
+	"<phrase>the third dog</phrase>"
+	"<object>",
+	dog3_mask,
+	"</object>",
+	dog3,
+	"on the grass",
+]
+ret = pipe(prompt)
+ret.images[0].save("emu_with_dog.png")
+```