City commited on
Commit
9ac551a
·
1 Parent(s): a23f9da

Sync with GitHub

Browse files
Files changed (5) hide show
  1. README.md +2 -4
  2. app.py → demo_score_gradio.py +56 -113
  3. inference.py +102 -0
  4. model.py +44 -44
  5. requirements.txt +3 -3
README.md CHANGED
@@ -1,14 +1,12 @@
1
  ---
2
  title: CityAesthetics Demo
3
- emoji: 🏙️
4
  colorFrom: blue
5
  colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.23.0
8
- app_file: app.py
9
  models: [city96/CityAesthetics]
10
  pinned: false
11
  license: apache-2.0
12
  ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
  title: CityAesthetics Demo
3
+ emoji:
4
  colorFrom: blue
5
  colorTo: yellow
6
  sdk: gradio
7
  sdk_version: 3.23.0
8
+ app_file: demo_score_gradio.py
9
  models: [city96/CityAesthetics]
10
  pinned: false
11
  license: apache-2.0
12
  ---
 
 
app.py → demo_score_gradio.py RENAMED
@@ -1,113 +1,56 @@
1
- import os
2
- import torch
3
- import gradio as gr
4
- from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor
5
- from huggingface_hub import hf_hub_download
6
- from safetensors.torch import load_file
7
-
8
- from model import AestheticPredictorModel
9
-
10
- HFREPO = "City96/CityAesthetics"
11
- MODELS = [
12
- "CityAesthetics-Anime-v1.8",
13
- ]
14
-
15
- class CityAestheticsPipeline:
16
- """
17
- Demo pipeline for [image=>score] prediction
18
- Accepts a list of model paths on initialization.
19
- Resulting object can be called directly with a PIL image as the input.
20
- Returns a dict with the model name as key and the score [0.0;1.0] as a value.
21
- """
22
- def __init__(self, model_paths):
23
- self.models = {}
24
- for path in model_paths:
25
- name = os.path.splitext(os.path.basename(path))[0]
26
- self.models[name] = self.load_model(path)
27
-
28
- clip_ver = "openai/clip-vit-large-patch14"
29
- self.proc = CLIPImageProcessor.from_pretrained(clip_ver)
30
- self.clip = CLIPVisionModelWithProjection.from_pretrained(clip_ver)
31
- print("CityAesthetics: Pipeline init ok") # debug
32
-
33
- def load_model(self, path):
34
- sd = load_file(path)
35
- assert tuple(sd["up.0.weight"].shape) == (1024, 768) # only allow CLIP ver
36
- model = AestheticPredictorModel()
37
- model.load_state_dict(sd)
38
- model.eval()
39
- return model
40
-
41
- def __call__(self, raw):
42
- img = self.proc(images=raw, return_tensors="pt")
43
- with torch.no_grad():
44
- emb = self.clip(pixel_values=img["pixel_values"])
45
- emb = emb["image_embeds"].detach().cpu()
46
- out = {}
47
- for name, model in self.models.items():
48
- pred = model(emb)
49
- out[name] = float(pred.squeeze(0))
50
- return out
51
-
52
- def get_model_path(name):
53
- fname = f"{name}.safetensors"
54
-
55
- # local path: [models/AesPred-Anime-v1.8.safetensors]
56
- path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"models")
57
- if os.path.isfile(os.path.join(path, fname)):
58
- print("CityAesthetics: Using local model")
59
- return os.path.join(path, fname)
60
-
61
- # huggingface hub fallback
62
- print("CityAesthetics: Using HF Hub model")
63
- return str(hf_hub_download(
64
- token = os.environ.get("HFS_TOKEN") or True,
65
- repo_id = HFREPO,
66
- filename = fname,
67
- # subfolder = fname.split('-')[1],
68
- ))
69
-
70
- article = """\
71
- # About
72
-
73
- This is the live demo for the CityAesthetics class of predictors.
74
-
75
- For more information, you can check out the [Huggingface Hub](https://huggingface.co/city96/CityAesthetics) or [GitHub page](https://github.com/city96/CityAesthetics).
76
-
77
- ## CityAesthetics-Anime
78
-
79
- This flavor is optimized for scoring anime images with at least one subject present.
80
-
81
- ### Intentional biases:
82
-
83
- - Completely negative towards real life photos (ideal score of 0%)
84
- - Strongly Negative towards text (subtitles, memes, etc) and manga panels
85
- - Fairly negative towards 3D and to some extent 2.5D images
86
- - Negative towards western cartoons and stylized images (chibi, parody)
87
-
88
- ### Expected output scores:
89
-
90
- - Non-anime images should always score below 20%
91
- - Sketches/rough lineart/oekaki get around 20-40%
92
- - Flat shading/TV anime gets around 40-50%
93
- - Above 50% is mostly scored based on my personal style preferences
94
-
95
- ### Issues:
96
-
97
- - Tends to filter male characters.
98
- - Requires at least 1 subject, won't work for scenery/landscapes.
99
- - Noticeable positive bias towards anime characters with animal ears.
100
- - Hit-or-miss with AI generated images due to style/quality not being correlated.
101
- """
102
-
103
- pipeline = CityAestheticsPipeline([get_model_path(x) for x in MODELS])
104
- gr.Interface(
105
- fn = pipeline,
106
- title = "CityAesthetics demo",
107
- article = article,
108
- inputs = gr.Image(label="Input image", type="pil"),
109
- outputs = gr.Label(label="Model prediction", show_label=False),
110
- examples = "./examples",
111
- allow_flagging = "never",
112
- analytics_enabled = False,
113
- ).launch()
 
1
+ import os
2
+ import gradio as gr
3
+
4
+ from inference import CityAestheticsMultiModelPipeline, get_model_path
5
+
6
+ TOKEN = os.environ.get("HFS_TOKEN")
7
+ HFREPO = "City96/CityAesthetics"
8
+ MODELS = [
9
+ "CityAesthetics-Anime-v1.8",
10
+ ]
11
+ article = """\
12
+ # About
13
+
14
+ This is the live demo for the CityAesthetics class of predictors.
15
+
16
+ For more information, you can check out the [Huggingface Hub](https://huggingface.co/city96/CityAesthetics) or [GitHub page](https://github.com/city96/CityClassifiers).
17
+
18
+ ## CityAesthetics-Anime
19
+
20
+ This flavor is optimized for scoring anime images with at least one subject present.
21
+
22
+ ### Intentional biases:
23
+
24
+ - Completely negative towards real life photos (ideal score of 0%)
25
+ - Strongly Negative towards text (subtitles, memes, etc) and manga panels
26
+ - Fairly negative towards 3D and to some extent 2.5D images
27
+ - Negative towards western cartoons and stylized images (chibi, parody)
28
+
29
+ ### Expected output scores:
30
+
31
+ - Non-anime images should always score below 20%
32
+ - Sketches/rough lineart/oekaki get around 20-40%
33
+ - Flat shading/TV anime gets around 40-50%
34
+ - Above 50% is mostly scored based on my personal style preferences
35
+
36
+ ### Issues:
37
+
38
+ - Tends to filter male characters.
39
+ - Requires at least 1 subject, won't work for scenery/landscapes.
40
+ - Noticeable positive bias towards anime characters with animal ears.
41
+ - Hit-or-miss with AI generated images due to style/quality not being correlated.
42
+ """
43
+
44
+ pipeline = CityAestheticsMultiModelPipeline(
45
+ [get_model_path(x, HFREPO, TOKEN) for x in MODELS],
46
+ )
47
+ gr.Interface(
48
+ fn = pipeline,
49
+ title = "CityAesthetics demo",
50
+ article = article,
51
+ inputs = gr.Image(label="Input image", type="pil"),
52
+ outputs = gr.Label(label="Model prediction", show_label=False),
53
+ examples = "./examples" if os.path.isdir("./examples") else None,
54
+ allow_flagging = "never",
55
+ analytics_enabled = False,
56
+ ).launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
inference.py ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import torch
3
+ from safetensors.torch import load_file
4
+ from huggingface_hub import hf_hub_download
5
+ from transformers import CLIPImageProcessor, CLIPVisionModelWithProjection
6
+
7
+ from model import AestheticPredictorModel
8
+
9
+ class CityAestheticsPipeline:
10
+ """
11
+ Demo model pipeline for [image=>score] prediction
12
+ Accepts a single model path on initialization.
13
+ Resulting object can be called directly with a PIL image as the input
14
+ Returns a single float value with the predicted score [0.0;1.0].
15
+ """
16
+ clip_ver = "openai/clip-vit-large-patch14"
17
+ def __init__(self, model_path, device="cpu", clip_dtype=torch.float32):
18
+ self.device = device
19
+ self.clip_dtype = clip_dtype
20
+ self._init_clip()
21
+ self.model = self._load_model(model_path)
22
+ print("CityAesthetics: Pipeline init ok") # debug
23
+
24
+ def __call__(self, raw):
25
+ emb = self.get_clip_emb(raw)
26
+ return self.get_model_pred(self.model, emb)
27
+
28
+ def get_model_pred(self, model, emb):
29
+ with torch.no_grad():
30
+ pred = model(emb)
31
+ return float(pred.detach().cpu().squeeze(0))
32
+
33
+ def get_clip_emb(self, raw):
34
+ img = self.proc(
35
+ images = raw,
36
+ return_tensors = "pt"
37
+ )["pixel_values"].to(self.clip_dtype).to(self.device)
38
+ with torch.no_grad():
39
+ emb = self.clip(pixel_values=img)
40
+ return emb["image_embeds"].detach().to(torch.float32)
41
+
42
+ def _init_clip(self):
43
+ self.proc = CLIPImageProcessor.from_pretrained(self.clip_ver)
44
+ self.clip = CLIPVisionModelWithProjection.from_pretrained(
45
+ self.clip_ver,
46
+ device_map = self.device,
47
+ torch_dtype = self.clip_dtype,
48
+ )
49
+
50
+ def _load_model(self, path):
51
+ sd = load_file(path)
52
+ assert tuple(sd["up.0.weight"].shape) == (1024, 768) # only allow CLIP ver
53
+ model = AestheticPredictorModel()
54
+ model.eval()
55
+ model.load_state_dict(sd)
56
+ model.to(self.device)
57
+ return model
58
+
59
+ class CityAestheticsMultiModelPipeline(CityAestheticsPipeline):
60
+ """
61
+ Demo multi-model pipeline for [image=>score] prediction
62
+ Accepts a list of model paths on initialization.
63
+ Resulting object can be called directly with a PIL image as the input.
64
+ Returns a dict with the model name as key and the score [0.0;1.0] as a value.
65
+ """
66
+ def __init__(self, model_paths, device="cpu", clip_dtype=torch.float32):
67
+ self.device = device
68
+ self.clip_dtype = clip_dtype
69
+ self._init_clip()
70
+ self.models = {}
71
+ for path in model_paths:
72
+ name = os.path.splitext(os.path.basename(path))[0]
73
+ self.models[name] = self._load_model(path)
74
+ print("CityAesthetics: Pipeline init ok") # debug
75
+
76
+ def __call__(self, raw):
77
+ emb = self.get_clip_emb(raw)
78
+ out = {}
79
+ for name, model in self.models.items():
80
+ pred = model(emb)
81
+ out[name] = self.get_model_pred(model, emb)
82
+ return out
83
+
84
+ def get_model_path(name, repo, token=True):
85
+ """
86
+ Returns local model path or falls back to HF hub if required.
87
+ """
88
+ fname = f"{name}.safetensors"
89
+
90
+ # local path: [models/AesPred-Anime-v1.8.safetensors]
91
+ path = os.path.join(os.path.dirname(os.path.realpath(__file__)),"models")
92
+ if os.path.isfile(os.path.join(path, fname)):
93
+ print("CityAesthetics: Using local model")
94
+ return os.path.join(path, fname)
95
+
96
+ # huggingface hub fallback
97
+ print("CityAesthetics: Using HF Hub model")
98
+ return str(hf_hub_download(
99
+ token = token,
100
+ repo_id = repo,
101
+ filename = fname,
102
+ ))
model.py CHANGED
@@ -1,44 +1,44 @@
1
- import torch
2
- import torch.nn as nn
3
-
4
- class ResBlock(nn.Module):
5
- """Block with residuals"""
6
- def __init__(self, ch):
7
- super().__init__()
8
- self.join = nn.ReLU()
9
- self.long = nn.Sequential(
10
- nn.Linear(ch, ch),
11
- nn.LeakyReLU(0.1),
12
- nn.Linear(ch, ch),
13
- nn.LeakyReLU(0.1),
14
- nn.Linear(ch, ch),
15
- )
16
- def forward(self, x):
17
- return self.join(self.long(x) + x)
18
-
19
- class AestheticPredictorModel(nn.Module):
20
- """
21
- Main predictor class. Original:
22
- https://github.com/city96/CityAesthetics/blob/main/model.py
23
- """
24
- def __init__(self, features=768, hidden=1024):
25
- super().__init__()
26
- self.features = features
27
- self.hidden = hidden
28
- self.up = nn.Sequential(
29
- nn.Linear(self.features, self.hidden),
30
- ResBlock(ch=self.hidden),
31
- )
32
- self.down = nn.Sequential(
33
- nn.Linear(self.hidden, 128),
34
- nn.Linear(128, 64),
35
- nn.Dropout(0.1),
36
- nn.LeakyReLU(),
37
- nn.Linear(64, 32),
38
- nn.Linear(32, 1),
39
- nn.Tanh(),
40
- )
41
- def forward(self, x):
42
- y = self.up(x)
43
- z = self.down(y)
44
- return (z+1.0)/2.0
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class ResBlock(nn.Module):
5
+ """Linear block with residuals"""
6
+ def __init__(self, ch):
7
+ super().__init__()
8
+ self.join = nn.ReLU()
9
+ self.long = nn.Sequential(
10
+ nn.Linear(ch, ch),
11
+ nn.LeakyReLU(0.1),
12
+ nn.Linear(ch, ch),
13
+ nn.LeakyReLU(0.1),
14
+ nn.Linear(ch, ch),
15
+ )
16
+ def forward(self, x):
17
+ return self.join(self.long(x) + x)
18
+
19
+ class AestheticPredictorModel(nn.Module):
20
+ """
21
+ Main predictor class. Original:
22
+ https://github.com/city96/CityClassifiers/blob/main/model.py
23
+ """
24
+ def __init__(self, features=768, hidden=1024):
25
+ super().__init__()
26
+ self.features = features
27
+ self.hidden = hidden
28
+ self.up = nn.Sequential(
29
+ nn.Linear(self.features, self.hidden),
30
+ ResBlock(ch=self.hidden),
31
+ )
32
+ self.down = nn.Sequential(
33
+ nn.Linear(self.hidden, 128),
34
+ nn.Linear(128, 64),
35
+ nn.Dropout(0.1),
36
+ nn.LeakyReLU(),
37
+ nn.Linear(64, 32),
38
+ nn.Linear(32, 1),
39
+ nn.Tanh(),
40
+ )
41
+ def forward(self, x):
42
+ y = self.up(x)
43
+ z = self.down(y)
44
+ return (z+1.0)/2.0
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
- torch==2.1.0
2
- safetensors==0.4.0
3
- transformers==4.35.0
 
1
+ torch==2.1.0
2
+ safetensors==0.4.0
3
+ transformers==4.35.0