khawir commited on
Commit
0b8378a
·
1 Parent(s): e1c1f18

Add application file

Browse files
.dockerignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ env
2
+ __pycache__
3
+ *.pyc
4
+ *.pyo
5
+ .git
6
+ .vscode
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ env
2
+ __pycache__
3
+ *.pyc
4
+ *.pyo
5
+ .vscode
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11
2
+
3
+ RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
4
+
5
+ WORKDIR /code
6
+
7
+ COPY ./requirements.txt /code/requirements.txt
8
+
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
+
11
+ COPY . .
12
+
13
+ CMD ["uvicorn", "app.cloudgate:app", "--host", "0.0.0.0", "--port", "7860"]
app/__init__.py ADDED
File without changes
app/cgt2im/feature_extractor/preprocessor_config.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "feature_extractor_type": "CLIPFeatureExtractor",
12
+ "image_mean": [
13
+ 0.48145466,
14
+ 0.4578275,
15
+ 0.40821073
16
+ ],
17
+ "image_processor_type": "CLIPImageProcessor",
18
+ "image_std": [
19
+ 0.26862954,
20
+ 0.26130258,
21
+ 0.27577711
22
+ ],
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "shortest_edge": 224
27
+ }
28
+ }
app/cgt2im/model_index.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "LatentConsistencyModelPipeline",
3
+ "_diffusers_version": "0.22.0.dev0",
4
+ "feature_extractor": [
5
+ "transformers",
6
+ "CLIPImageProcessor"
7
+ ],
8
+ "requires_safety_checker": true,
9
+ "safety_checker": [
10
+ "stable_diffusion",
11
+ "StableDiffusionSafetyChecker"
12
+ ],
13
+ "scheduler": [
14
+ "diffusers",
15
+ "LCMScheduler"
16
+ ],
17
+ "text_encoder": [
18
+ "transformers",
19
+ "CLIPTextModel"
20
+ ],
21
+ "tokenizer": [
22
+ "transformers",
23
+ "CLIPTokenizer"
24
+ ],
25
+ "unet": [
26
+ "diffusers",
27
+ "UNet2DConditionModel"
28
+ ],
29
+ "vae": [
30
+ "diffusers",
31
+ "AutoencoderKL"
32
+ ]
33
+ }
app/cgt2im/safety_checker/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "digiplay/DreamShaper_7",
3
+ "architectures": [
4
+ "StableDiffusionSafetyChecker"
5
+ ],
6
+ "initializer_factor": 1.0,
7
+ "logit_scale_init_value": 2.6592,
8
+ "model_type": "clip",
9
+ "projection_dim": 768,
10
+ "text_config": {
11
+ "bos_token_id": 0,
12
+ "dropout": 0.0,
13
+ "eos_token_id": 2,
14
+ "hidden_size": 768,
15
+ "intermediate_size": 3072,
16
+ "model_type": "clip_text_model",
17
+ "num_attention_heads": 12
18
+ },
19
+ "torch_dtype": "float32",
20
+ "transformers_version": "4.35.0.dev0",
21
+ "vision_config": {
22
+ "dropout": 0.0,
23
+ "hidden_size": 1024,
24
+ "intermediate_size": 4096,
25
+ "model_type": "clip_vision_model",
26
+ "num_attention_heads": 16,
27
+ "num_hidden_layers": 24,
28
+ "patch_size": 14
29
+ }
30
+ }
app/cgt2im/safety_checker/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb351a5ded815c3ff744968ad9c6b218d071b9d313d04f35e813b84b4c0ffde8
3
+ size 1215979664
app/cgt2im/scheduler/scheduler_config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "LCMScheduler",
3
+ "_diffusers_version": "0.22.0.dev0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "clip_sample_range": 1.0,
9
+ "dynamic_thresholding_ratio": 0.995,
10
+ "num_train_timesteps": 1000,
11
+ "original_inference_steps": 50,
12
+ "prediction_type": "epsilon",
13
+ "rescale_betas_zero_snr": false,
14
+ "sample_max_value": 1.0,
15
+ "set_alpha_to_one": true,
16
+ "steps_offset": 0,
17
+ "thresholding": false,
18
+ "timestep_spacing": "leading",
19
+ "trained_betas": null
20
+ }
app/cgt2im/text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "digiplay/DreamShaper_7",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 768,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.35.0.dev0",
24
+ "vocab_size": 49408
25
+ }
app/cgt2im/text_encoder/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66262104f5099c84ad0e6ec156acae57b2292caebd4d7b8699327fa745145b76
3
+ size 492265168
app/cgt2im/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
app/cgt2im/tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|startoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
+ }
app/cgt2im/tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "49406": {
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "49407": {
13
+ "content": "<|endoftext|>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ }
20
+ },
21
+ "additional_special_tokens": [],
22
+ "bos_token": "<|startoftext|>",
23
+ "clean_up_tokenization_spaces": true,
24
+ "do_lower_case": true,
25
+ "eos_token": "<|endoftext|>",
26
+ "errors": "replace",
27
+ "model_max_length": 77,
28
+ "pad_token": "<|endoftext|>",
29
+ "tokenizer_class": "CLIPTokenizer",
30
+ "tokenizer_file": null,
31
+ "unk_token": "<|endoftext|>"
32
+ }
app/cgt2im/tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
app/cgt2im/unet/config.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "UNet2DConditionModel",
3
+ "_diffusers_version": "0.22.0.dev0",
4
+ "act_fn": "silu",
5
+ "addition_embed_type": null,
6
+ "addition_embed_type_num_heads": 64,
7
+ "addition_time_embed_dim": null,
8
+ "attention_head_dim": 8,
9
+ "attention_type": "default",
10
+ "block_out_channels": [
11
+ 320,
12
+ 640,
13
+ 1280,
14
+ 1280
15
+ ],
16
+ "center_input_sample": false,
17
+ "class_embed_type": null,
18
+ "class_embeddings_concat": false,
19
+ "conv_in_kernel": 3,
20
+ "conv_out_kernel": 3,
21
+ "cross_attention_dim": 768,
22
+ "cross_attention_norm": null,
23
+ "down_block_types": [
24
+ "CrossAttnDownBlock2D",
25
+ "CrossAttnDownBlock2D",
26
+ "CrossAttnDownBlock2D",
27
+ "DownBlock2D"
28
+ ],
29
+ "downsample_padding": 1,
30
+ "dropout": 0.0,
31
+ "dual_cross_attention": false,
32
+ "encoder_hid_dim": null,
33
+ "encoder_hid_dim_type": null,
34
+ "flip_sin_to_cos": true,
35
+ "freq_shift": 0,
36
+ "in_channels": 4,
37
+ "layers_per_block": 2,
38
+ "mid_block_only_cross_attention": null,
39
+ "mid_block_scale_factor": 1,
40
+ "mid_block_type": "UNetMidBlock2DCrossAttn",
41
+ "norm_eps": 1e-05,
42
+ "norm_num_groups": 32,
43
+ "num_attention_heads": null,
44
+ "num_class_embeds": null,
45
+ "only_cross_attention": false,
46
+ "out_channels": 4,
47
+ "projection_class_embeddings_input_dim": null,
48
+ "resnet_out_scale_factor": 1.0,
49
+ "resnet_skip_time_act": false,
50
+ "resnet_time_scale_shift": "default",
51
+ "sample_size": 96,
52
+ "time_cond_proj_dim": 256,
53
+ "time_embedding_act_fn": null,
54
+ "time_embedding_dim": null,
55
+ "time_embedding_type": "positional",
56
+ "timestep_post_act": null,
57
+ "transformer_layers_per_block": 1,
58
+ "up_block_types": [
59
+ "UpBlock2D",
60
+ "CrossAttnUpBlock2D",
61
+ "CrossAttnUpBlock2D",
62
+ "CrossAttnUpBlock2D"
63
+ ],
64
+ "upcast_attention": null,
65
+ "use_linear_projection": false
66
+ }
app/cgt2im/unet/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be468469c513f8614a73423aa009581295fbd8c903703450632b2275c0145a9
3
+ size 3438495328
app/cgt2im/vae/config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "AutoencoderKL",
3
+ "_diffusers_version": "0.22.0.dev0",
4
+ "_name_or_path": "digiplay/DreamShaper_7",
5
+ "act_fn": "silu",
6
+ "block_out_channels": [
7
+ 128,
8
+ 256,
9
+ 512,
10
+ 512
11
+ ],
12
+ "down_block_types": [
13
+ "DownEncoderBlock2D",
14
+ "DownEncoderBlock2D",
15
+ "DownEncoderBlock2D",
16
+ "DownEncoderBlock2D"
17
+ ],
18
+ "force_upcast": true,
19
+ "in_channels": 3,
20
+ "latent_channels": 4,
21
+ "layers_per_block": 2,
22
+ "norm_num_groups": 32,
23
+ "out_channels": 3,
24
+ "sample_size": 768,
25
+ "scaling_factor": 0.18215,
26
+ "up_block_types": [
27
+ "UpDecoderBlock2D",
28
+ "UpDecoderBlock2D",
29
+ "UpDecoderBlock2D",
30
+ "UpDecoderBlock2D"
31
+ ]
32
+ }
app/cgt2im/vae/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bb865b3478d73053007b8a76f114010e26f1a1d2ea02dc0261464caa4289c2d
3
+ size 334643268
app/cloudgate.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Annotated
2
+ from fastapi import FastAPI, Path, Query, Response
3
+ from fastapi.middleware.cors import CORSMiddleware
4
+ from fastapi.responses import FileResponse, HTMLResponse
5
+ from fastapi.staticfiles import StaticFiles
6
+ from pydantic import BaseModel
7
+ import torch
8
+ from torch import autocast
9
+ from diffusers import DiffusionPipeline
10
+ from io import BytesIO
11
+ import base64
12
+ from os.path import dirname
13
+
14
+ # class Prompt(BaseModel):
15
+ # prompt: str
16
+ # steps: Annotated[int, Path(title="No of steps", ge=4, le=10)] = 8
17
+ # guide: Annotated[float, Path(title="Guidance scale", ge=0.5, le=2)] = 0.8
18
+
19
+ app = FastAPI()
20
+
21
+ app.add_middleware(
22
+ CORSMiddleware,
23
+ allow_credentials=True,
24
+ allow_origins=["*"],
25
+ allow_methods=["*"],
26
+ allow_headers=["*"]
27
+ )
28
+
29
+ device = "cuda"
30
+ pipe = DiffusionPipeline.from_pretrained(f'{dirname(__file__)}/cgt2im',
31
+ # use_auth_token=auth_token,
32
+ # use_safetensors=True
33
+ )
34
+ pipe = pipe.to(device, dtype=torch.float16)
35
+
36
+ # @app.get("/")
37
+ # def generate(prompt: str):
38
+ # with autocast(device):
39
+ # image = pipe(
40
+ # prompt=prompt,
41
+ # num_inference_steps=8,
42
+ # guidance_scale=8.0,
43
+ # lcm_origin_steps=50,
44
+ # output_type="pil",
45
+ # ).images[0]
46
+
47
+ # # image.save("testimage.png")
48
+ # buffer = BytesIO()
49
+ # image.save(buffer, format="PNG")
50
+ # imgstr = base64.b64encode(buffer.getvalue())
51
+
52
+ # return Response(content=imgstr, media_type="image/png")
53
+
54
+ @app.get("/t2i")
55
+ def generate(prompt: str,
56
+ steps: Annotated[int, Query(ge=4, le=10)] = 8,
57
+ guide: Annotated[float, Query(ge=0.5, le=2)] = 0.8,
58
+ ):
59
+ with autocast(device):
60
+ image = pipe(
61
+ prompt=prompt,
62
+ num_inference_steps=steps,
63
+ guidance_scale=guide,
64
+ lcm_origin_steps=50,
65
+ output_type="pil",
66
+ ).images[0]
67
+
68
+ # image.save("testimage.png")
69
+ buffer = BytesIO()
70
+ image.save(buffer, format="PNG")
71
+ imgstr = base64.b64encode(buffer.getvalue())
72
+
73
+ return Response(content=imgstr, media_type="image/png")
74
+
75
+
76
+ @app.get("/", response_class=HTMLResponse)
77
+ async def read_home():
78
+ with open("app/static/index.html", "r") as file:
79
+ content = file.read()
80
+ return HTMLResponse(content=content)
81
+
82
+
83
+ # @app.post("/t2i")
84
+ # def generate(prompt: Prompt):
85
+ # with autocast(device):
86
+ # image = pipe(
87
+ # prompt=prompt.prompt,
88
+ # num_inference_steps=prompt.steps,
89
+ # guidance_scale=prompt.guide,
90
+ # lcm_origin_steps=50,
91
+ # output_type="pil",
92
+ # ).images[0]
93
+
94
+ # # image.save("testimage.png")
95
+ # buffer = BytesIO()
96
+ # image.save(buffer, format="PNG")
97
+ # imgstr = base64.b64encode(buffer.getvalue())
98
+
99
+ # return Response(content=imgstr, media_type="image/png")
app/static/index.html ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>CloudGate Text-2-Image Ver 1.0</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ </head>
9
+ <body>
10
+ <main>
11
+ <section id="img-gen" class="container mx-auto bg-slate-50">
12
+ <h1 class="text-4xl font-bold text-center py-6 mb-6">Text to Image Generation</h1>
13
+ <form action="" class="img-gen-form p-6">
14
+ <div class="mb-4">
15
+ <label for="img-gen-prompt" class="block mb-2 text-2xl">Prompt</label>
16
+ <input
17
+ type="text"
18
+ id="img-gen-prompt"
19
+ class="w-full px-3 py-2 placeholder-gray-300 border border-gray-300 rounded-md focus:outline-none focus:ring focus:ring-indigo-100 focus:border-indigo-300"
20
+ value="A serene night scene featuring a house, majestic mountains, graceful trees, a tranquil lake, a glowing moon, and a shimmering sky adorned with countless stars."
21
+ />
22
+ </div>
23
+ <button class="img-gen-submit px-3 py-2 rounded-md bg-indigo-500 text-white">Submit</button>
24
+ </form>
25
+
26
+ <article>
27
+ <!-- <h2 class="text-2xl font-bold text-center py-6 mb-6 text-indigo-500 italic">Output</h2> -->
28
+ <div id="img-gen-output" class="py-6 px-6 rounded-md">
29
+ <p id="loading" class="text-center italic text-2xl text-green-500"></p>
30
+ <img src="" alt="" id="img-tgt" class="mx-auto">
31
+ </div>
32
+ </article>
33
+
34
+ </section>
35
+ </main>
36
+ <script type="text/javascript">
37
+ const imgGenForm = document.querySelector('.img-gen-form');
38
+
39
+ const genImg = async (data) => {
40
+ const response = await fetch(`t2i?prompt=${encodeURIComponent(data.prompt)}`);
41
+
42
+ if (!response.ok) {
43
+ console.error('Failed to fetch image:', response.statusText);
44
+ return null;
45
+ }
46
+
47
+ return response.text();
48
+ }
49
+
50
+ imgGenForm.addEventListener('submit', async (event) => {
51
+ event.preventDefault();
52
+ const imgElement = document.getElementById('img-tgt')
53
+ imgElement.src = "";
54
+ let loading_ = document.getElementById('loading');
55
+ loading_.textContent = "Generating an awesome image for you ...!";
56
+ const imgGenPrompt = document.getElementById('img-gen-prompt');
57
+ // const imgGenOutput = document.getElementById('img-gen-output');
58
+
59
+
60
+ try {
61
+ genImg({ "prompt": imgGenPrompt.value,
62
+ // "steps": 8,
63
+ // "guide": 1.5
64
+ }).then((base64String) => {
65
+ if (base64String) {
66
+ // const imgElement = document.createElement('img');
67
+ loading_.textContent = "";
68
+ console.log(loading_)
69
+ imgElement.src = `data:image/png;base64,${base64String}`;
70
+ // imgGenOutput.appendChild(imgElement);
71
+ }
72
+ });
73
+ } catch (err) {
74
+ console.error(err);
75
+ }
76
+ });
77
+ </script>
78
+ </body>
79
+ </html>
app/static/indexlocalgpt.html ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Display FastAPI Image</title>
7
+ </head>
8
+ <body>
9
+ <div id="imageContainer"></div>
10
+
11
+ <script type="text/javascript">
12
+ async function query(data) {
13
+ const response = await fetch(
14
+ `http://127.0.0.1:7860/t2i/?prompt=${encodeURIComponent(data.prompt)}`,
15
+ );
16
+
17
+ if (!response.ok) {
18
+ console.error('Failed to fetch image:', response.statusText);
19
+ return null;
20
+ }
21
+
22
+ return response.text();
23
+ }
24
+
25
+ query({ "prompt": "a night painting with house, mountains, trees, lake, moon, stars",
26
+ "steps": 8,
27
+ "guide": 1.5
28
+ }).then((base64String) => {
29
+ if (base64String) {
30
+ const imageElement = document.createElement('img');
31
+ imageElement.src = `data:image/png;base64,${base64String}`;
32
+
33
+ const imageContainer = document.getElementById('imageContainer');
34
+ imageContainer.appendChild(imageElement);
35
+ }
36
+ });
37
+ </script>
38
+ </body>
39
+ </html>
app/static/script.js ADDED
File without changes
app/static/style.css ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ diffusers
4
+ transformers
5
+ accelerate