BAAI
/

QuanSun commited on
Commit
fc39de2
·
1 Parent(s): 94953a3

add README

Browse files
Files changed (1) hide show
  1. README.md +110 -0
README.md ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ ---
5
+
6
+
7
+ [Demo](https://huggingface.co/spaces/BAAI/Emu2) | [Project Page](https://baaivision.github.io/emu2/)
8
+
9
+ ## Model Weights
10
+
11
+ | Model name | Weight |
12
+ | ------------------ | ------------------------------------------------------- |
13
+ | **Emu2** | [🤗 HF link](https://huggingface.co/BAAI/Emu2) |
14
+ | **Emu2-Chat** | [🤗 HF link](https://huggingface.co/BAAI/Emu2-Chat) |
15
+ | **Emu2-Gen** | [🤗 HF link](https://huggingface.co/BAAI/Emu2-Gen) |
16
+
17
+
18
+ ## Inference (Huggingface Version)
19
+
20
+ ### Emu2-Gen
21
+
22
+ ```python
23
+ import cv2
24
+ from diffusers import DiffusionPipeline
25
+ import numpy as np
26
+ from PIL import Image
27
+ import requests
28
+ from transformers import AutoModelForCausalLM, AutoTokenizer
29
+ import torch
30
+
31
+ # For the first time of using,
32
+ # you need to download the huggingface repo "BAAI/Emu2-GEN" to local first
33
+ path = "path to local BAAI/Emu2-GEN"
34
+
35
+ multimodal_encoder = AutoModelForCausalLM.from_pretrained(
36
+ f"{path}/multimodal_encoder",
37
+ trust_remote_code=True,
38
+ torch_dtype=torch.bfloat16,
39
+ use_safetensors=True,
40
+ variant="bf16"
41
+ )
42
+ tokenizer = AutoTokenizer.from_pretrained(f"{path}/tokenizer")
43
+
44
+ pipe = DiffusionPipeline.from_pretrained(
45
+ path,
46
+ custom_pipeline="pipeline_emu2_gen",
47
+ torch_dtype=torch.bfloat16,
48
+ use_safetensors=True,
49
+ variant="bf16",
50
+ multimodal_encoder=multimodal_encoder,
51
+ tokenizer=tokenizer,
52
+ )
53
+
54
+ # For the non-first time of using, you can init the pipeline directly
55
+ pipe = DiffusionPipeline.from_pretrained(
56
+ path,
57
+ custom_pipeline="pipeline_emu2_gen",
58
+ torch_dtype=torch.bfloat16,
59
+ use_safetensors=True,
60
+ variant="bf16",
61
+ )
62
+
63
+ pipe.to("cuda")
64
+
65
+ # text-to-image
66
+ prompt = "impressionist painting of an astronaut in a jungle"
67
+ ret = pipe(prompt)
68
+ ret.images[0].save("astronaut.png")
69
+
70
+ # image editing
71
+ image = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/dog2.jpg?raw=true',stream=True).raw).convert('RGB')
72
+ prompt = [image, "wearing a rad hat on the beach."]
73
+
74
+ # grounding generation
75
+ def draw_box(left, top, right, bottom):
76
+ mask = np.zeros((448, 448, 3), dtype=np.uint8)
77
+ mask = cv2.rectangle(mask, (left, top), (right, bottom), (255, 255, 255), 3)
78
+ mask = Image.fromarray(mask)
79
+ return mask
80
+
81
+ dog1 = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/dog1.jpg?raw=true',stream=True).raw).convert('RGB')
82
+ dog2 = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/dog2.jpg?raw=true',stream=True).raw).convert('RGB')
83
+ dog3 = Image.open(requests.get('https://github.com/baaivision/Emu/Emu2/examples/dog3.jpg?raw=true',stream=True).raw).convert('RGB')
84
+ dog1_mask = draw_box( 22, 14, 224, 224)
85
+ dog2_mask = draw_box(224, 10, 448, 224)
86
+ dog3_mask = draw_box(120, 264, 320, 438)
87
+
88
+ prompt = [
89
+ "<grounding>",
90
+ "A photo of",
91
+ "<phrase>the first dog</phrase>"
92
+ "<object>",
93
+ dog1_mask,
94
+ "</object>",
95
+ dog1,
96
+ "<phrase>the second dog</phrase>"
97
+ "<object>",
98
+ dog2_mask,
99
+ "</object>",
100
+ dog2,
101
+ "<phrase>the third dog</phrase>"
102
+ "<object>",
103
+ dog3_mask,
104
+ "</object>",
105
+ dog3,
106
+ "on the grass",
107
+ ]
108
+ ret = pipe(prompt)
109
+ ret.images[0].save("emu_with_dog.png")
110
+ ```