Pushpanjali commited on
Commit
a0aad55
·
1 Parent(s): 70ce5c3

adding files

Browse files
Files changed (6) hide show
  1. app.py +54 -0
  2. best.pt +3 -0
  3. llm_function.py +71 -0
  4. packages.txt +1 -0
  5. requirements.txt +8 -0
  6. seg_llm_function.py +552 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from PIL import Image
3
+ import numpy as np
4
+ from seg_llm_function import chat_seg_model
5
+ from llm_function import chat_claude , prompt
6
+
7
+ def final_func(img1 , img2) :
8
+ llm_resp = chat_claude(prompt , img1 , img2)
9
+ seg_resp = chat_seg_model(img1 , img2)
10
+ return llm_resp , seg_resp
11
+
12
+
13
+ # def process_image(image1 , image2):
14
+ # # Convert the input to a PIL Image object if it's not already
15
+ # if isinstance(image, str):
16
+ # image = Image.open(image)
17
+
18
+ # # Resize the image to fit within a 400x400 pixel box while maintaining aspect ratio
19
+ # max_size = 400
20
+ # width, height = image.size
21
+ # new_width = min(width, max_size)
22
+ # new_height = int(max_size * height / width)
23
+ # image = image.resize((new_width, new_height))
24
+
25
+ # # Convert the image back to a numpy array for Gradio output
26
+ # return np.array(image)
27
+
28
+ from ultralytics import YOLO
29
+
30
+ # 1. Load a YOLOv8 segmentation model (pre-trained weights)
31
+ model = YOLO("best.pt")
32
+
33
+ def display_image(img1 , img2) :
34
+ # if isinstance(img1 , img2 , str) :
35
+ image1 = Image.open(img1)
36
+ # if isinstance(img2 , str) :
37
+ image2 = Image.open(img2)
38
+ return image1 , image2
39
+
40
+
41
+ gr.Interface(
42
+ fn=final_func,
43
+ inputs=[gr.Image(type="filepath" , interactive = True), gr.Image(type="filepath" , interactive = True)],
44
+ # outputs=[gr.Image(), gr.Image()],
45
+ # outputs = ["text" , "text"] ,
46
+ outputs = [gr.Markdown(label = "VLM" , show_copy_button = True , show_label = True , container = True) , gr.Markdown(label = "SEG-VLM" , show_copy_button = True , show_label = True , container = True)],
47
+ title="Blueprint Comparison : VLM & SEG-VLM",
48
+ description="Upload two construstion blueprints to know differences between them." ,
49
+ flagging_mode = "never",
50
+ fill_width = True ,
51
+ submit_btn = "Compare",
52
+ show_progress = "full",
53
+
54
+ ).launch(pwa = True)
best.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7fbbd7f37abf6d59606798c0224e5e5fea34689d8a2665b89677860c6a83fd3
3
+ size 54948245
llm_function.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import os
3
+ import anthropic
4
+ import base64
5
+ from dotenv import load_dotenv
6
+ import cv2
7
+
8
+ load_dotenv()
9
+ client = anthropic.Anthropic(api_key= os.getenv('ANTHROPIC_API_KEY'))
10
+
11
+
12
+ prompt = """Given 2 construction blueprints your task is to analyze carefully both blueprints and point out difference for following categories in markdown format -
13
+ 1. Strcutural grid.
14
+ 2. Layout Areas - rooms , balcony , porch , staircase , elevator etc.
15
+ 3. Interior changes or optimization.
16
+ Summarize all the difference in paragraph concisely in Markdown format .
17
+ """
18
+
19
+
20
+ def encode_image(image_path):
21
+ with open(image_path, "rb") as image_file:
22
+ return base64.b64encode(image_file.read()).decode("utf-8")
23
+ # return base64.b64encode(image_path.getvalue()).decode("utf-8")
24
+
25
+ def chat_claude(prompt , image1 , image2 ) :
26
+ # print("image 1" , image1)
27
+ image1_data = encode_image(image1)
28
+ # print("image 1 data" , image1_data)
29
+ image2_data = encode_image(image2)
30
+ message = client.messages.create(
31
+ model="claude-3-opus-20240229",
32
+ max_tokens = 4096,
33
+ messages=[
34
+ {
35
+ "role": "user",
36
+ "content": [
37
+ {
38
+ "type": "text",
39
+ "text": "Image 1:"
40
+ },
41
+ {
42
+ "type": "image",
43
+ "source": {
44
+ "type": "base64",
45
+ "media_type": "image/jpeg",
46
+ "data": image1_data,
47
+ },
48
+ },
49
+ {
50
+ "type": "text",
51
+ "text": "Image 2:"
52
+ },
53
+ {
54
+ "type": "image",
55
+ "source": {
56
+ "type": "base64",
57
+ "media_type": "image/jpeg",
58
+ "data": image2_data,
59
+ },
60
+ },
61
+ {
62
+ "type": "text",
63
+ "text": f"{prompt}"
64
+ }
65
+ ],
66
+ }
67
+ ],
68
+ )
69
+ return message.content[0].text
70
+
71
+
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ libgl1-mesa-glx
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ anthropic==0.43.1
2
+ pillow==10.4.0
3
+ python-dotenv
4
+ opencv-python
5
+ openai
6
+ ultralytics
7
+ easyocr
8
+ pytesseract
seg_llm_function.py ADDED
@@ -0,0 +1,552 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ import cv2
3
+ import numpy as np
4
+ import matplotlib.pyplot as plt
5
+ import random
6
+ import hashlib
7
+ import os
8
+ from ultralytics import YOLO
9
+ import easyocr
10
+ import pytesseract
11
+
12
+ import cv2
13
+ import numpy as np
14
+ import hashlib
15
+ import random
16
+ import matplotlib.pyplot as plt
17
+
18
+ from openai import OpenAI
19
+ import os
20
+
21
+ # 1. Load a YOLOv8 segmentation model (pre-trained weights)
22
+ model = YOLO("best.pt")
23
+
24
+ def get_label_color_id(label_id):
25
+ """
26
+ Generate a consistent BGR color for a numeric label_id by hashing the ID.
27
+ This ensures that each numeric ID always maps to the same color.
28
+ """
29
+ label_str = str(int(label_id))
30
+ # Use the MD5 hash of the label string as a seed
31
+ seed_value = int(hashlib.md5(label_str.encode('utf-8')).hexdigest(), 16)
32
+ random.seed(seed_value)
33
+ # Return color in BGR format
34
+ return (
35
+ random.randint(50, 255), # B
36
+ random.randint(50, 255), # G
37
+ random.randint(50, 255) # R
38
+ )
39
+
40
+ def segment_large_image_with_tiles(
41
+ model,
42
+ large_image_path,
43
+ tile_size=1080,
44
+ overlap=60, # Overlap in pixels
45
+ alpha=0.4,
46
+ display=True
47
+ ):
48
+ """
49
+ 1. Reads a large image from `large_image_path`.
50
+ 2. Tiles it into sub-images of size `tile_size` x `tile_size`,
51
+ stepping by (tile_size - overlap) to have overlap regions.
52
+ 3. Runs `model.predict()` on each tile and accumulates all polygons (in global coords).
53
+ 4. For each class, merges overlapping polygons by:
54
+ - filling them on a single-channel mask
55
+ - finding final contours of the connected regions
56
+ 5. Draws merged polygons onto an overlay and alpha-blends with the original image.
57
+ 6. Returns the final annotated image (in RGB) and a dictionary of merged contours.
58
+ """
59
+
60
+ # Read the large image
61
+ image_bgr = cv2.imread(large_image_path)
62
+ if image_bgr is None:
63
+ raise ValueError(f"Could not load image from {large_image_path}")
64
+
65
+ # Convert to RGB (for plotting consistency)
66
+ image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
67
+ H, W, _ = image_rgb.shape
68
+
69
+ # Dictionary to store raw polygon coords for each class
70
+ # (before merging)
71
+ class_mask_dict = {}
72
+
73
+ # Step size with overlap
74
+ step = tile_size - overlap if overlap < tile_size else tile_size
75
+
76
+ # ------------------------
77
+ # 1) Perform Tiled Inference
78
+ # ------------------------
79
+ for top in range(0, H, step):
80
+ for left in range(0, W, step):
81
+ bottom = min(top + tile_size, H)
82
+ right = min(left + tile_size, W)
83
+
84
+ tile_rgb = image_rgb[top:bottom, left:right]
85
+
86
+ # Run YOLOv8 model prediction
87
+ results = model.predict(tile_rgb)
88
+ if len(results) == 0:
89
+ continue
90
+
91
+ # Typically, results[0] holds the main predictions
92
+ pred = results[0]
93
+
94
+ # Check if we have valid masks
95
+ if (pred.masks is None) or (pred.masks.xy is None):
96
+ continue
97
+
98
+ tile_masks_xy = pred.masks.xy # list of polygon coords
99
+ tile_labels = pred.boxes.cls # list of class IDs
100
+
101
+ # Convert to numpy int if needed
102
+ if hasattr(tile_labels, 'cpu'):
103
+ tile_labels = tile_labels.cpu().numpy()
104
+ tile_labels = tile_labels.astype(int).tolist()
105
+
106
+ # Accumulate polygon coords in global space
107
+ for label_id, polygon in zip(tile_labels, tile_masks_xy):
108
+ # Convert polygon float coords to int points in shape (N,1,2)
109
+ polygon_pts = polygon.reshape((-1, 1, 2)).astype(np.int32)
110
+
111
+ # Offset the polygon to the large image coords
112
+ polygon_pts[:, 0, 0] += left # x-offset
113
+ polygon_pts[:, 0, 1] += top # y-offset
114
+
115
+ if label_id not in class_mask_dict:
116
+ class_mask_dict[label_id] = []
117
+ class_mask_dict[label_id].append(polygon_pts)
118
+
119
+ # -----------------------------------------
120
+ # 2) Merge Overlapping Polygons For Each Class
121
+ # by rasterizing them in a mask and then
122
+ # finding final contours
123
+ # -----------------------------------------
124
+ merged_class_mask_dict = {}
125
+ for label_id, polygons_cv in class_mask_dict.items():
126
+ # Create a blank mask (single channel) for the entire image
127
+ mask = np.zeros((H, W), dtype=np.uint8)
128
+
129
+ # Fill all polygons for this label on the mask
130
+ for pts in polygons_cv:
131
+ cv2.fillPoly(mask, [pts], 255)
132
+
133
+ # Now findContours to get merged regions
134
+ # Use RETR_EXTERNAL so we just get outer boundaries of each connected region
135
+ contours, _ = cv2.findContours(
136
+ mask,
137
+ mode=cv2.RETR_EXTERNAL,
138
+ method=cv2.CHAIN_APPROX_SIMPLE
139
+ )
140
+
141
+ # Store final merged contours
142
+ merged_class_mask_dict[label_id] = contours
143
+
144
+ # -----------------------
145
+ # 3) Draw Merged Polygons
146
+ # -----------------------
147
+ overlay = image_rgb.copy()
148
+ for label_id, contours in merged_class_mask_dict.items():
149
+ color_bgr = get_label_color_id(label_id)
150
+ for cnt in contours:
151
+ # Fill each contour on the overlay
152
+ cv2.fillPoly(overlay, [cnt], color_bgr)
153
+
154
+ # 4) Alpha blend
155
+ output = cv2.addWeighted(overlay, alpha, image_rgb, 1 - alpha, 0)
156
+
157
+ # 5) Optional Display
158
+ if display:
159
+ plt.figure(figsize=(12, 12))
160
+ plt.imshow(output)
161
+ plt.axis('off')
162
+ plt.title("Segmentation on Large Image (Overlapped Tiles + Merged Polygons)")
163
+ plt.show()
164
+
165
+ return output, merged_class_mask_dict
166
+
167
+ import numpy as np
168
+
169
+ def usable_data(img_results, image_1):
170
+ """
171
+ Extract bounding boxes, centers, and polygon areas from the segmentation
172
+ results for a single image. Returns a dictionary keyed by label,
173
+ with each value a list of object data: { 'bbox', 'center', 'area' }.
174
+ """
175
+ width, height = image_1.width, image_1.height
176
+ image_data = {}
177
+ for key in img_results.keys():
178
+ image_data[key] = []
179
+ for polygon in img_results[key]:
180
+ polygon = np.array(polygon)
181
+
182
+ # Handle varying polygon shapes
183
+ # If shape is (N, 1, 2) e.g. from cv2 findContours
184
+ if polygon.ndim == 3 and polygon.shape[1] == 1 and polygon.shape[2] == 2:
185
+ polygon = polygon.reshape(-1, 2)
186
+ elif polygon.ndim == 2 and polygon.shape[1] == 1:
187
+ polygon = np.squeeze(polygon, axis=1)
188
+
189
+ # Now we expect polygon to be (N, 2):
190
+ xs = polygon[:, 0]
191
+ ys = polygon[:, 1]
192
+
193
+ # Bounding box
194
+ xmin, xmax = xs.min(), xs.max()
195
+ ymin, ymax = ys.min(), ys.max()
196
+ bbox = (xmin, ymin, xmax, ymax)
197
+
198
+ # Center
199
+ centerX = (xmin + xmax) / 2.0
200
+ centerY = (ymin + ymax) / 2.0
201
+ x = width/2
202
+ y = height/2
203
+ # Direction
204
+ dx = x - centerX
205
+ dy = centerY - y # Invert y-axis for proper orientation
206
+ if dx > 0 and dy > 0:
207
+ direction = "NE"
208
+ elif dx > 0 and dy < 0:
209
+ direction = "SE"
210
+ elif dx < 0 and dy > 0:
211
+ direction = "NW"
212
+ elif dx < 0 and dy < 0:
213
+ direction = "SW"
214
+ elif dx == 0 and dy > 0:
215
+ direction = "N"
216
+ elif dx == 0 and dy < 0:
217
+ direction = "S"
218
+ elif dy == 0 and dx > 0:
219
+ direction = "E"
220
+ elif dy == 0 and dx < 0:
221
+ direction = "W"
222
+ else:
223
+ direction = "Center"
224
+
225
+
226
+ # Polygon area (Shoelace formula)
227
+ # area = 0.5 * | x0*y1 + x1*y2 + ... + x_{n-1}*y0 - (y0*x1 + y1*x2 + ... + y_{n-1}*x0 ) |
228
+ area = 0.5 * np.abs(
229
+ np.dot(xs, np.roll(ys, 1)) - np.dot(ys, np.roll(xs, 1))
230
+ )
231
+
232
+ image_data[key].append({
233
+ 'bbox': bbox,
234
+ 'center': (centerX, centerY),
235
+ 'area': area,
236
+ "direction": direction
237
+ })
238
+ return image_data
239
+
240
+ import cv2
241
+ import numpy as np
242
+ import matplotlib.pyplot as plt
243
+
244
+ def plot_differences_on_image1(
245
+ image1_path,
246
+ mask_dict1, # e.g., label_name -> list of contours for image1
247
+ image2_path,
248
+ mask_dict2, # e.g., label_name -> list of contours for image2
249
+ display=True
250
+ ):
251
+ """
252
+ Compare two images (and their object masks). Plot all differences on Image 1 only:
253
+ - Red: Objects that are missing on Image 1 (present in Image 2 but not Image 1).
254
+ - Green: Objects that are missing on Image 2 (present in Image 1 but not Image 2).
255
+
256
+ :param image1_path: Path to the first image
257
+ :param mask_dict1: dict[label_name] = [contour1, contour2, ...] for the first image
258
+ :param image2_path: Path to the second image
259
+ :param mask_dict2: dict[label_name] = [contour1, contour2, ...] for the second image
260
+ :param display: If True, shows the final overlay with matplotlib.
261
+ :return: A tuple:
262
+ - overlay1 (numpy array in RGB) with all differences highlighted
263
+ - list_of_differences: Names of labels with differences
264
+ - difference_masks: A dict with keys "missing_on_img1" and "missing_on_img2",
265
+ where each key maps to a list of contours (original format) for the respective differences.
266
+ """
267
+
268
+ # Read both images
269
+ img1_bgr = cv2.imread(image1_path)
270
+ img2_bgr = cv2.imread(image2_path)
271
+ if img1_bgr is None or img2_bgr is None:
272
+ raise ValueError("Could not read one of the input images.")
273
+
274
+ # Convert to RGB
275
+ img1_rgb = cv2.cvtColor(img1_bgr, cv2.COLOR_BGR2RGB)
276
+ img2_rgb = cv2.cvtColor(img2_bgr, cv2.COLOR_BGR2RGB)
277
+
278
+ # Check matching dimensions
279
+ H1, W1, _ = img1_rgb.shape
280
+ H2, W2, _ = img2_rgb.shape
281
+ if (H1 != H2) or (W1 != W2):
282
+ raise ValueError("Images must be the same size to compare masks reliably.")
283
+
284
+ # Prepare an overlay on top of Image 1
285
+ overlay1 = img1_rgb.copy()
286
+
287
+ # Take the union of all labels in both dictionaries
288
+ all_labels = set(mask_dict1.keys()).union(set(mask_dict2.keys()))
289
+
290
+ # Colors:
291
+ RED = (255, 0, 0) # (R, G, B)
292
+ GREEN = (0, 255, 0) # (R, G, B)
293
+
294
+ # Track differences
295
+ list_of_differences = []
296
+ difference_masks = {
297
+ "missing_on_img1": {}, # dict[label_name] = list of contours
298
+ "missing_on_img2": {}, # dict[label_name] = list of contours
299
+ }
300
+
301
+ for label_id in all_labels:
302
+ # Create binary masks for this label in each image
303
+ mask1 = np.zeros((H1, W1), dtype=np.uint8)
304
+ mask2 = np.zeros((H1, W1), dtype=np.uint8)
305
+
306
+ # Fill polygons for label_id in Image 1
307
+ if label_id in mask_dict1:
308
+ for cnt in mask_dict1[label_id]:
309
+ cv2.fillPoly(mask1, [cnt], 255)
310
+
311
+ # Fill polygons for label_id in Image 2
312
+ if label_id in mask_dict2:
313
+ for cnt in mask_dict2[label_id]:
314
+ cv2.fillPoly(mask2, [cnt], 255)
315
+
316
+ # Missing on Image 1 (present in Image 2 but not in Image 1)
317
+ # => mask2 AND (NOT mask1)
318
+ missing_on_img1 = cv2.bitwise_and(mask2, cv2.bitwise_not(mask1))
319
+
320
+ # Missing on Image 2 (present in Image 1 but not in Image 2)
321
+ # => mask1 AND (NOT mask2)
322
+ missing_on_img2 = cv2.bitwise_and(mask1, cv2.bitwise_not(mask2))
323
+
324
+ # Extract contours of differences
325
+ contours_missing_on_img1, _ = cv2.findContours(
326
+ missing_on_img1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
327
+ )
328
+ contours_missing_on_img2, _ = cv2.findContours(
329
+ missing_on_img2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
330
+ )
331
+
332
+ # Store contours in difference masks
333
+ if contours_missing_on_img1:
334
+ difference_masks["missing_on_img1"][label_id] = contours_missing_on_img1
335
+ if contours_missing_on_img2:
336
+ difference_masks["missing_on_img2"][label_id] = contours_missing_on_img2
337
+
338
+ # If there are differences, track the label name
339
+ if contours_missing_on_img1 or contours_missing_on_img2:
340
+ list_of_differences.append(label_id)
341
+
342
+ # Color them on the overlay of Image 1:
343
+ for cnt in contours_missing_on_img1:
344
+ cv2.drawContours(overlay1, [cnt], -1, RED, -1) # highlight in red
345
+ for cnt in contours_missing_on_img2:
346
+ cv2.drawContours(overlay1, [cnt], -1, GREEN, -1) # highlight in green
347
+
348
+ # Display if required
349
+ if display:
350
+ plt.figure(figsize=(10, 8))
351
+ plt.imshow(overlay1)
352
+ plt.title("Differences on Image 1\n(Red: Missing on Image 1, Green: Missing on Image 2)")
353
+ plt.axis("off")
354
+ plt.show()
355
+
356
+ return overlay1, list_of_differences, difference_masks
357
+
358
+ import cv2
359
+ import numpy as np
360
+ import easyocr
361
+
362
+
363
+ def preprocess_image(image_path):
364
+ """
365
+ 1) Load and prepare the image for further analysis.
366
+ 2) Convert to grayscale, optionally binarize or threshold.
367
+ 3) Return the processed image.
368
+ """
369
+ img = cv2.imread(image_path)
370
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
371
+
372
+ # Optional: adaptive thresholding for clearer linework
373
+ # thresholded = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
374
+ # cv2.THRESH_BINARY, 11, 2)
375
+
376
+ return gray
377
+
378
+ def detect_lines_and_grid(processed_image):
379
+ """
380
+ 1) Detect major horizontal/vertical lines using Hough transform or morphological ops.
381
+ 2) Identify grid lines by analyzing line segments alignment.
382
+ 3) Returns lines or grid intersections.
383
+ """
384
+ edges = cv2.Canny(processed_image, 50, 150, apertureSize=3)
385
+
386
+ # Hough line detection for demonstration
387
+ lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
388
+ minLineLength=100, maxLineGap=10)
389
+ # Here you would parse out vertical/horizontal lines, cluster them, etc.
390
+
391
+ return lines
392
+
393
+ def run_ocr(processed_image, method='easyocr'):
394
+ """
395
+ 1) Use an OCR engine to detect text (room labels, dimensions, etc.).
396
+ 2) 'method' can switch between Tesseract or EasyOCR.
397
+ 3) Return recognized text data (text content and bounding boxes).
398
+ """
399
+ text_data = []
400
+
401
+ if method == 'easyocr':
402
+ reader = easyocr.Reader(['en', 'ko'], gpu=True)
403
+ result = reader.readtext(processed_image, detail=1, paragraph=False)
404
+ # result structure: [ [bbox, text, confidence], ... ]
405
+ for (bbox, text, conf) in result:
406
+ text_data.append({'bbox': bbox, 'text': text, 'confidence': conf})
407
+ else:
408
+ # Tesseract approach
409
+ config = r'--psm 6'
410
+ tess_result = pytesseract.image_to_data(processed_image, config=config, output_type=pytesseract.Output.DICT)
411
+ # parse data into a structured list
412
+ for i in range(len(tess_result['text'])):
413
+ txt = tess_result['text'][i].strip()
414
+ if txt:
415
+ x = tess_result['left'][i]
416
+ y = tess_result['top'][i]
417
+ w = tess_result['width'][i]
418
+ h = tess_result['height'][i]
419
+ conf = tess_result['conf'][i]
420
+ text_data.append({
421
+ 'bbox': (x, y, x+w, y+h),
422
+ 'text': txt,
423
+ 'confidence': conf
424
+ })
425
+ return text_data
426
+
427
+ def detect_symbols_and_rooms(processed_image):
428
+ """
429
+ 1) Potentially run object detection (e.g., YOLO, Detectron2) to detect symbols:
430
+ - Doors, balconies, fixtures, etc.
431
+ 2) Segment out rooms by combining wall detection + adjacency.
432
+ 3) Return data about room polygons, symbols, etc.
433
+ """
434
+ # Placeholder: real implementation would require a trained model or rule-based approach.
435
+ # For demonstration, return empty data.
436
+ rooms_data = []
437
+ symbols_data = []
438
+ return rooms_data, symbols_data
439
+
440
+
441
+
442
+ def blueprint_analyzer(image_path):
443
+ """
444
+ Orchestrate the entire pipeline on one image:
445
+ 1) Preprocess
446
+ 2) Detect structural lines
447
+ 3) OCR text detection
448
+ 4) Symbol/room detection
449
+ 5) Compute area differences or summarize
450
+ """
451
+ processed_img = preprocess_image(image_path)
452
+
453
+ lines = detect_lines_and_grid(processed_img)
454
+ text_data = run_ocr(processed_img, method='easyocr')
455
+
456
+
457
+ return lines, text_data
458
+ system_prompt_4 = """You are given two sets of data from two blueprint images (Image 1 and Image 2). Along with each image’s extracted objects, you have:
459
+ A set of objects (walls, doors, stairs, etc.) along with information on their labels and centers.
460
+ A set of “areas” (e.g., “Balcony,” “Living Room,” “Hallway,” “Bathroom,” etc.) with bounding boxes to identify where each area is located.For a particular area like balcony
461
+ there can be multiple instances
462
+ you are also given the detected grid lines and ocr results:
463
+ A “nearest reference area” for each object, including a small textual description of distance and direction (e.g., “The Door door in the balconey”,"the door in the bathroom").
464
+ Identifications of which objects match across the two images (same label and close centers).
465
+ Your Task
466
+ Ignore any objects that match between the two images (same label, nearly identical location).
467
+ Summarize the differences: newly added or missing objects, label changes, and any changes in object location.
468
+ Use the relative position data (distance/direction text) to describe where each new or missing object is/was in terms of known areas (e.g., “the missing wall in the northern side of the corridor,” “the new door near the balcony,” etc.).
469
+ Do not output raw numeric distances, bounding boxes, or polygon areas in your final summary. Instead, give a natural-language location description (e.g., “near the east side of the main hallway,” “slightly south of the balcony,” etc.).
470
+ Provide your answer in a concise Markdown format, focusing only on significant differences."""
471
+
472
+ def chat_seg_model(img1_path , img2_path) :
473
+ image1 = Image.open(img1_path)
474
+ image2 = Image.open(img2_path)
475
+ final_output_1, class_mask_dict_1 = segment_large_image_with_tiles(
476
+ model,
477
+ large_image_path=img1_path,
478
+ tile_size=1080,
479
+ overlap=120,
480
+ alpha=0.4,
481
+ display=True
482
+ )
483
+ final_output_2, class_mask_dict_2= segment_large_image_with_tiles(
484
+ model,
485
+ large_image_path=img2_path,
486
+ tile_size=1080,
487
+ overlap=120,
488
+ alpha=0.4,
489
+ display=True
490
+ )
491
+ label_dict = {0: 'EMP', 1: 'balcony_area', 2: 'bathroom', 3: 'brick_wall', 4: 'concrete_wall', 5: 'corridor', 6: 'dining_area', 7: 'door', 8: 'double_window', 9: 'dressing_room', 10: 'elevator', 11: 'elevator_hall', 12: 'emergency_exit', 13: 'empty_area', 14: 'lobby', 15: 'pantry', 16: 'porch', 17: 'primary_insulation', 18: 'rooms', 19: 'single_window', 20: 'stairs', 21: 'thin_wall'}
492
+ img1_results = {}
493
+ for key in class_mask_dict_1.keys():
494
+ img1_results[label_dict[key]] = class_mask_dict_1[key]
495
+ img2_results = {}
496
+ for key in class_mask_dict_2.keys():
497
+ img2_results[label_dict[key]] = class_mask_dict_2[key]
498
+ width, height = image1.width, image1.height
499
+ image_1 , image_2 = image1 , image2
500
+ image_1_data = usable_data(img1_results, image_1)
501
+ image_2_data = usable_data(img2_results, image_2)
502
+ lines_1, text_data_1 = blueprint_analyzer(img1_path)
503
+ lines_2, text_data_2 = blueprint_analyzer(img2_path)
504
+
505
+ user_prompt_3 = f"""I have two construction blueprint images, Image 1 and Image 2, and here are their segmentation results (with bounding boxes, centers, and areas). Please compare them and provide a short Markdown summary of the differences, ignoring any objects that match in both images:
506
+
507
+ Image 1:
508
+ image: {image_1}
509
+ segmentation results: {image_1_data}
510
+ grid lines: {lines_1}
511
+ ocr results: {text_data_1}
512
+ Image 2:
513
+ image: {image_2}
514
+ segmentation results: {image_2_data}
515
+ grid lines: {lines_2}
516
+ ocr results: {text_data_2}
517
+
518
+ Please:
519
+ Also compare the area of corresponding objects if the change in their area is grater than 500 magnitude
520
+ Compare the two images only in terms of differences—ignore any objects that match (same label and near-identical center).
521
+ For objects missing in Image 2 (but present in Image 1), or newly added in Image 2, indicate their relative position using known areas or approximate directions. For instance, mention if the missing doors were “towards the north side, near the elevator,” or if new walls appeared “in the southeastern corner, near the balcony.”
522
+ Summarize any changes in labels or text, again without giving raw bounding box or polygon coordinate data.
523
+ Provide your final output in a short, clear Markdown summary that describes where objects have changed.
524
+ Mention if there are text/label changes (e.g., from an OCR perspective) in any particular area or region
525
+ """
526
+ client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'))
527
+
528
+ completion = client.chat.completions.create(
529
+ model="gpt-4o-mini",
530
+ messages=[
531
+ {"role": "system", "content": system_prompt_4},
532
+ {
533
+ "role": "user",
534
+ "content": user_prompt_3
535
+ }
536
+
537
+ ]
538
+ )
539
+
540
+ print(completion.choices[0].message.content)
541
+ return completion.choices[0].message.content
542
+
543
+
544
+
545
+
546
+
547
+
548
+
549
+
550
+
551
+
552
+