JinHyeong99 commited on
Commit
9c81346
Β·
1 Parent(s): a36cae8
Files changed (9) hide show
  1. app.py +107 -48
  2. config.json +0 -372
  3. image1.jpg +0 -0
  4. image2.jpg +0 -0
  5. image3.jpg +0 -0
  6. labels.txt +19 -0
  7. preprocessor_config.json +0 -18
  8. pytorch_model.bin +0 -3
  9. tf_model.h5 +0 -3
app.py CHANGED
@@ -1,52 +1,111 @@
1
  import gradio as gr
2
- from transformers import SegformerFeatureExtractor, SegformerForSemanticSegmentation
3
- from PIL import Image
 
4
  import numpy as np
5
- import torch
6
-
7
- # λͺ¨λΈκ³Ό feature extractor λ‘œλ“œ
8
- model_name = "nvidia/segformer-b0-finetuned-ade-512-512"
9
- model = SegformerForSemanticSegmentation.from_pretrained(model_name)
10
- feature_extractor = SegformerFeatureExtractor.from_pretrained(model_name)
11
-
12
- def create_color_map(num_classes):
13
- """ μž„μ˜μ˜ 색상 맀핑 생성 """
14
- np.random.seed(42) # μž¬ν˜„μ„±μ„ μœ„ν•œ μ‹œλ“œ μ„€μ •
15
- return {i: np.random.randint(0, 256, 3) for i in range(num_classes)}
16
-
17
- def segment_image(image):
18
- # 이미지 처리
19
- image = image.resize(512,512)
20
- inputs = feature_extractor(images=image, return_tensors="pt")
21
- with torch.no_grad():
22
- outputs = model(**inputs)
23
-
24
- # 마슀크 생성
25
- upsampled_logits = torch.nn.functional.interpolate(
26
- outputs.logits, size=image.size[::-1], mode="bilinear", align_corners=False
27
- )
28
- upsampled_predictions = upsampled_logits.argmax(dim=1)
29
- mask = upsampled_predictions.squeeze().numpy()
30
-
31
- # 색상 맀핑
32
- color_map = create_color_map(150) # ADE20Kμ—λŠ” μ•½ 150개의 ν΄λž˜μŠ€κ°€ 있음
33
- colored_mask = np.array([color_map[class_id] for class_id in mask.flatten()]).reshape(mask.shape + (3,))
34
-
35
- # κ²°κ³Ό λ°˜ν™˜
36
- return Image.fromarray(colored_mask.astype(np.uint8))
37
-
38
- # μ˜ˆμ‹œ 이미지 경둜
39
- example_images = ["image1.jpg", "image2.jpg", "image3.jpg"]
40
-
41
- # Gradio μΈν„°νŽ˜μ΄μŠ€ μ„€μ •
42
- iface = gr.Interface(
43
- fn=segment_image,
44
- inputs=gr.inputs.Image(type="pil"),
45
- outputs="image",
46
- title="Image Segmentation with SegFormer",
47
- description="Upload an image to segment it using SegFormer model.",
48
- examples=example_images
49
  )
50
 
51
- # μΈν„°νŽ˜μ΄μŠ€ μ‹€ν–‰
52
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+
3
+ from matplotlib import gridspec
4
+ import matplotlib.pyplot as plt
5
  import numpy as np
6
+ from PIL import Image
7
+ import tensorflow as tf
8
+ from transformers import SegformerFeatureExtractor, TFSegformerForSemanticSegmentation
9
+
10
+ feature_extractor = SegformerFeatureExtractor.from_pretrained(
11
+ "nvidia/segformer-b5-finetuned-cityscapes-1024-1024"
12
+ )
13
+ model = TFSegformerForSemanticSegmentation.from_pretrained(
14
+ "nvidia/segformer-b5-finetuned-cityscapes-1024-1024"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  )
16
 
17
+ def ade_palette():
18
+ """ADE20K palette that maps each class to RGB values."""
19
+ return [
20
+ [255,0,0], #λΉ¨κ°•
21
+ [255,228,0], #λ…Έλž‘
22
+ [171,242,0], # 연두
23
+ [0,216,255], #ν•˜λŠ˜
24
+ [0,0,255], #νŒŒλž‘
25
+ [255,0,221], #핑크
26
+ [116,116,116], #νšŒμƒ‰
27
+ [95,0,255], #보라
28
+ [255,94,0], #μ£Όν™©
29
+ [71,200,62], #초둝
30
+ [153,0,76], #λ§ˆμ  νƒ€
31
+ [67,116,217], #μ• λ§€ν•œν•˜λŠ˜ + νŒŒλž‘
32
+ [153,112,0], #겨자
33
+ [87,129,0], #녹색
34
+ [255,169,169], #뢄홍뢄홍
35
+ [35,30,183], #μ–΄λ‘μš΄ νŒŒλž‘
36
+ [225,186,133], #살색
37
+ [206,251,201], #μ—°ν•œμ΄ˆλ‘
38
+ [165,102,255] #μ• λ§€ν•œ 보라
39
+ ]
40
+
41
+ labels_list = []
42
+
43
+ with open(r'labels.txt', 'r') as fp:
44
+ for line in fp:
45
+ labels_list.append(line[:-1])
46
+
47
+ colormap = np.asarray(ade_palette())
48
+
49
+ def label_to_color_image(label):
50
+ if label.ndim != 2:
51
+ raise ValueError("Expect 2-D input label")
52
+
53
+ if np.max(label) >= len(colormap):
54
+ raise ValueError("label value too large.")
55
+ return colormap[label]
56
+
57
+ def draw_plot(pred_img, seg):
58
+ fig = plt.figure(figsize=(20, 15))
59
+
60
+ grid_spec = gridspec.GridSpec(1, 2, width_ratios=[6, 1])
61
+
62
+ plt.subplot(grid_spec[0])
63
+ plt.imshow(pred_img)
64
+ plt.axis('off')
65
+ LABEL_NAMES = np.asarray(labels_list)
66
+ FULL_LABEL_MAP = np.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1)
67
+ FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP)
68
+
69
+ unique_labels = np.unique(seg.numpy().astype("uint8"))
70
+ ax = plt.subplot(grid_spec[1])
71
+ plt.imshow(FULL_COLOR_MAP[unique_labels].astype(np.uint8), interpolation="nearest")
72
+ ax.yaxis.tick_right()
73
+ plt.yticks(range(len(unique_labels)), LABEL_NAMES[unique_labels])
74
+ plt.xticks([], [])
75
+ ax.tick_params(width=0.0, labelsize=25)
76
+ return fig
77
+
78
+ def sepia(input_img):
79
+ input_img = Image.fromarray(input_img)
80
+
81
+ inputs = feature_extractor(images=input_img, return_tensors="tf")
82
+ outputs = model(**inputs)
83
+ logits = outputs.logits
84
+
85
+ logits = tf.transpose(logits, [0, 2, 3, 1])
86
+ logits = tf.image.resize(
87
+ logits, input_img.size[::-1]
88
+ ) # We reverse the shape of `image` because `image.size` returns width and height.
89
+ seg = tf.math.argmax(logits, axis=-1)[0]
90
+
91
+ color_seg = np.zeros(
92
+ (seg.shape[0], seg.shape[1], 3), dtype=np.uint8
93
+ ) # height, width, 3
94
+ for label, color in enumerate(colormap):
95
+ color_seg[seg.numpy() == label, :] = color
96
+
97
+ # Show image + mask
98
+ pred_img = np.array(input_img) * 0.5 + color_seg * 0.5
99
+ pred_img = pred_img.astype(np.uint8)
100
+
101
+ fig = draw_plot(pred_img, seg)
102
+ return fig
103
+
104
+ demo = gr.Interface(fn=sepia,
105
+ inputs=gr.Image(shape=(400, 600)),
106
+ outputs=['plot'],
107
+ examples=["person-1.jpg", "person-2.jpg", "person-3.jpg", "person-4.jpg", "person-5.jpg"],
108
+ allow_flagging='never')
109
+
110
+
111
+ demo.launch()
config.json DELETED
@@ -1,372 +0,0 @@
1
- {
2
- "architectures": [
3
- "SegformerForSemanticSegmentation"
4
- ],
5
- "attention_probs_dropout_prob": 0.0,
6
- "classifier_dropout_prob": 0.1,
7
- "decoder_hidden_size": 256,
8
- "depths": [
9
- 2,
10
- 2,
11
- 2,
12
- 2
13
- ],
14
- "downsampling_rates": [
15
- 1,
16
- 4,
17
- 8,
18
- 16
19
- ],
20
- "drop_path_rate": 0.1,
21
- "hidden_act": "gelu",
22
- "hidden_dropout_prob": 0.0,
23
- "hidden_sizes": [
24
- 32,
25
- 64,
26
- 160,
27
- 256
28
- ],
29
- "id2label": {
30
- "0": "wall",
31
- "1": "building",
32
- "2": "sky",
33
- "3": "floor",
34
- "4": "tree",
35
- "5": "ceiling",
36
- "6": "road",
37
- "7": "bed ",
38
- "8": "windowpane",
39
- "9": "grass",
40
- "10": "cabinet",
41
- "11": "sidewalk",
42
- "12": "person",
43
- "13": "earth",
44
- "14": "door",
45
- "15": "table",
46
- "16": "mountain",
47
- "17": "plant",
48
- "18": "curtain",
49
- "19": "chair",
50
- "20": "car",
51
- "21": "water",
52
- "22": "painting",
53
- "23": "sofa",
54
- "24": "shelf",
55
- "25": "house",
56
- "26": "sea",
57
- "27": "mirror",
58
- "28": "rug",
59
- "29": "field",
60
- "30": "armchair",
61
- "31": "seat",
62
- "32": "fence",
63
- "33": "desk",
64
- "34": "rock",
65
- "35": "wardrobe",
66
- "36": "lamp",
67
- "37": "bathtub",
68
- "38": "railing",
69
- "39": "cushion",
70
- "40": "base",
71
- "41": "box",
72
- "42": "column",
73
- "43": "signboard",
74
- "44": "chest of drawers",
75
- "45": "counter",
76
- "46": "sand",
77
- "47": "sink",
78
- "48": "skyscraper",
79
- "49": "fireplace",
80
- "50": "refrigerator",
81
- "51": "grandstand",
82
- "52": "path",
83
- "53": "stairs",
84
- "54": "runway",
85
- "55": "case",
86
- "56": "pool table",
87
- "57": "pillow",
88
- "58": "screen door",
89
- "59": "stairway",
90
- "60": "river",
91
- "61": "bridge",
92
- "62": "bookcase",
93
- "63": "blind",
94
- "64": "coffee table",
95
- "65": "toilet",
96
- "66": "flower",
97
- "67": "book",
98
- "68": "hill",
99
- "69": "bench",
100
- "70": "countertop",
101
- "71": "stove",
102
- "72": "palm",
103
- "73": "kitchen island",
104
- "74": "computer",
105
- "75": "swivel chair",
106
- "76": "boat",
107
- "77": "bar",
108
- "78": "arcade machine",
109
- "79": "hovel",
110
- "80": "bus",
111
- "81": "towel",
112
- "82": "light",
113
- "83": "truck",
114
- "84": "tower",
115
- "85": "chandelier",
116
- "86": "awning",
117
- "87": "streetlight",
118
- "88": "booth",
119
- "89": "television receiver",
120
- "90": "airplane",
121
- "91": "dirt track",
122
- "92": "apparel",
123
- "93": "pole",
124
- "94": "land",
125
- "95": "bannister",
126
- "96": "escalator",
127
- "97": "ottoman",
128
- "98": "bottle",
129
- "99": "buffet",
130
- "100": "poster",
131
- "101": "stage",
132
- "102": "van",
133
- "103": "ship",
134
- "104": "fountain",
135
- "105": "conveyer belt",
136
- "106": "canopy",
137
- "107": "washer",
138
- "108": "plaything",
139
- "109": "swimming pool",
140
- "110": "stool",
141
- "111": "barrel",
142
- "112": "basket",
143
- "113": "waterfall",
144
- "114": "tent",
145
- "115": "bag",
146
- "116": "minibike",
147
- "117": "cradle",
148
- "118": "oven",
149
- "119": "ball",
150
- "120": "food",
151
- "121": "step",
152
- "122": "tank",
153
- "123": "trade name",
154
- "124": "microwave",
155
- "125": "pot",
156
- "126": "animal",
157
- "127": "bicycle",
158
- "128": "lake",
159
- "129": "dishwasher",
160
- "130": "screen",
161
- "131": "blanket",
162
- "132": "sculpture",
163
- "133": "hood",
164
- "134": "sconce",
165
- "135": "vase",
166
- "136": "traffic light",
167
- "137": "tray",
168
- "138": "ashcan",
169
- "139": "fan",
170
- "140": "pier",
171
- "141": "crt screen",
172
- "142": "plate",
173
- "143": "monitor",
174
- "144": "bulletin board",
175
- "145": "shower",
176
- "146": "radiator",
177
- "147": "glass",
178
- "148": "clock",
179
- "149": "flag"
180
- },
181
- "image_size": 224,
182
- "initializer_range": 0.02,
183
- "label2id": {
184
- "airplane": 90,
185
- "animal": 126,
186
- "apparel": 92,
187
- "arcade machine": 78,
188
- "armchair": 30,
189
- "ashcan": 138,
190
- "awning": 86,
191
- "bag": 115,
192
- "ball": 119,
193
- "bannister": 95,
194
- "bar": 77,
195
- "barrel": 111,
196
- "base": 40,
197
- "basket": 112,
198
- "bathtub": 37,
199
- "bed ": 7,
200
- "bench": 69,
201
- "bicycle": 127,
202
- "blanket": 131,
203
- "blind": 63,
204
- "boat": 76,
205
- "book": 67,
206
- "bookcase": 62,
207
- "booth": 88,
208
- "bottle": 98,
209
- "box": 41,
210
- "bridge": 61,
211
- "buffet": 99,
212
- "building": 1,
213
- "bulletin board": 144,
214
- "bus": 80,
215
- "cabinet": 10,
216
- "canopy": 106,
217
- "car": 20,
218
- "case": 55,
219
- "ceiling": 5,
220
- "chair": 19,
221
- "chandelier": 85,
222
- "chest of drawers": 44,
223
- "clock": 148,
224
- "coffee table": 64,
225
- "column": 42,
226
- "computer": 74,
227
- "conveyer belt": 105,
228
- "counter": 45,
229
- "countertop": 70,
230
- "cradle": 117,
231
- "crt screen": 141,
232
- "curtain": 18,
233
- "cushion": 39,
234
- "desk": 33,
235
- "dirt track": 91,
236
- "dishwasher": 129,
237
- "door": 14,
238
- "earth": 13,
239
- "escalator": 96,
240
- "fan": 139,
241
- "fence": 32,
242
- "field": 29,
243
- "fireplace": 49,
244
- "flag": 149,
245
- "floor": 3,
246
- "flower": 66,
247
- "food": 120,
248
- "fountain": 104,
249
- "glass": 147,
250
- "grandstand": 51,
251
- "grass": 9,
252
- "hill": 68,
253
- "hood": 133,
254
- "house": 25,
255
- "hovel": 79,
256
- "kitchen island": 73,
257
- "lake": 128,
258
- "lamp": 36,
259
- "land": 94,
260
- "light": 82,
261
- "microwave": 124,
262
- "minibike": 116,
263
- "mirror": 27,
264
- "monitor": 143,
265
- "mountain": 16,
266
- "ottoman": 97,
267
- "oven": 118,
268
- "painting": 22,
269
- "palm": 72,
270
- "path": 52,
271
- "person": 12,
272
- "pier": 140,
273
- "pillow": 57,
274
- "plant": 17,
275
- "plate": 142,
276
- "plaything": 108,
277
- "pole": 93,
278
- "pool table": 56,
279
- "poster": 100,
280
- "pot": 125,
281
- "radiator": 146,
282
- "railing": 38,
283
- "refrigerator": 50,
284
- "river": 60,
285
- "road": 6,
286
- "rock": 34,
287
- "rug": 28,
288
- "runway": 54,
289
- "sand": 46,
290
- "sconce": 134,
291
- "screen": 130,
292
- "screen door": 58,
293
- "sculpture": 132,
294
- "sea": 26,
295
- "seat": 31,
296
- "shelf": 24,
297
- "ship": 103,
298
- "shower": 145,
299
- "sidewalk": 11,
300
- "signboard": 43,
301
- "sink": 47,
302
- "sky": 2,
303
- "skyscraper": 48,
304
- "sofa": 23,
305
- "stage": 101,
306
- "stairs": 53,
307
- "stairway": 59,
308
- "step": 121,
309
- "stool": 110,
310
- "stove": 71,
311
- "streetlight": 87,
312
- "swimming pool": 109,
313
- "swivel chair": 75,
314
- "table": 15,
315
- "tank": 122,
316
- "television receiver": 89,
317
- "tent": 114,
318
- "toilet": 65,
319
- "towel": 81,
320
- "tower": 84,
321
- "trade name": 123,
322
- "traffic light": 136,
323
- "tray": 137,
324
- "tree": 4,
325
- "truck": 83,
326
- "van": 102,
327
- "vase": 135,
328
- "wall": 0,
329
- "wardrobe": 35,
330
- "washer": 107,
331
- "water": 21,
332
- "waterfall": 113,
333
- "windowpane": 8
334
- },
335
- "layer_norm_eps": 1e-06,
336
- "mlp_ratios": [
337
- 4,
338
- 4,
339
- 4,
340
- 4
341
- ],
342
- "model_type": "segformer",
343
- "num_attention_heads": [
344
- 1,
345
- 2,
346
- 5,
347
- 8
348
- ],
349
- "num_channels": 3,
350
- "num_encoder_blocks": 4,
351
- "patch_sizes": [
352
- 7,
353
- 3,
354
- 3,
355
- 3
356
- ],
357
- "reshape_last_stage": true,
358
- "sr_ratios": [
359
- 8,
360
- 4,
361
- 2,
362
- 1
363
- ],
364
- "strides": [
365
- 4,
366
- 2,
367
- 2,
368
- 2
369
- ],
370
- "torch_dtype": "float32",
371
- "transformers_version": "4.12.0.dev0"
372
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
image1.jpg DELETED
Binary file (89.1 kB)
 
image2.jpg DELETED
Binary file (877 kB)
 
image3.jpg DELETED
Binary file (956 kB)
 
labels.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ road
2
+ sidewalk
3
+ building
4
+ wall
5
+ fence
6
+ pole
7
+ traffic light
8
+ traffic sign
9
+ vegetation
10
+ terrain
11
+ sky
12
+ person
13
+ rider
14
+ car
15
+ truck
16
+ bus
17
+ train
18
+ motorcycle
19
+ bicycle
preprocessor_config.json DELETED
@@ -1,18 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "do_resize": true,
4
- "feature_extractor_type": "SegformerFeatureExtractor",
5
- "image_mean": [
6
- 0.485,
7
- 0.456,
8
- 0.406
9
- ],
10
- "image_std": [
11
- 0.229,
12
- 0.224,
13
- 0.225
14
- ],
15
- "reduce_labels": true,
16
- "resample": 2,
17
- "size": 512
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f4df97633cbedd558ecffa3ad228ace5af37e082678390b45a9d22745787c61
3
- size 15092257
 
 
 
 
tf_model.h5 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d38f99e2a8e73bbdb4635669be5bfcbbfc85b4b5c1ac75d36b47312c7fc5d06e
3
- size 15285696