File size: 21,341 Bytes
a0aad55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
from PIL import Image 
import cv2
import numpy as np
import matplotlib.pyplot as plt
import random
import hashlib
import os
from ultralytics import YOLO
import easyocr
import pytesseract

import cv2
import numpy as np
import hashlib
import random
import matplotlib.pyplot as plt

from openai import OpenAI
import os

# 1. Load a YOLOv8 segmentation model (pre-trained weights)
model = YOLO("best.pt")

def get_label_color_id(label_id):
    """
    Generate a consistent BGR color for a numeric label_id by hashing the ID.
    This ensures that each numeric ID always maps to the same color.
    """
    label_str = str(int(label_id))
    # Use the MD5 hash of the label string as a seed
    seed_value = int(hashlib.md5(label_str.encode('utf-8')).hexdigest(), 16)
    random.seed(seed_value)
    # Return color in BGR format
    return (
        random.randint(50, 255),  # B
        random.randint(50, 255),  # G
        random.randint(50, 255)   # R
    )

def segment_large_image_with_tiles(
    model,
    large_image_path,
    tile_size=1080,
    overlap=60,  # Overlap in pixels
    alpha=0.4,
    display=True
):
    """
    1. Reads a large image from `large_image_path`.
    2. Tiles it into sub-images of size `tile_size` x `tile_size`,
       stepping by (tile_size - overlap) to have overlap regions.
    3. Runs `model.predict()` on each tile and accumulates all polygons (in global coords).
    4. For each class, merges overlapping polygons by:
       - filling them on a single-channel mask
       - finding final contours of the connected regions
    5. Draws merged polygons onto an overlay and alpha-blends with the original image.
    6. Returns the final annotated image (in RGB) and a dictionary of merged contours.
    """

    # Read the large image
    image_bgr = cv2.imread(large_image_path)
    if image_bgr is None:
        raise ValueError(f"Could not load image from {large_image_path}")

    # Convert to RGB (for plotting consistency)
    image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB)
    H, W, _ = image_rgb.shape

    # Dictionary to store raw polygon coords for each class
    # (before merging)
    class_mask_dict = {}

    # Step size with overlap
    step = tile_size - overlap if overlap < tile_size else tile_size

    # ------------------------
    # 1) Perform Tiled Inference
    # ------------------------
    for top in range(0, H, step):
        for left in range(0, W, step):
            bottom = min(top + tile_size, H)
            right = min(left + tile_size, W)

            tile_rgb = image_rgb[top:bottom, left:right]

            # Run YOLOv8 model prediction
            results = model.predict(tile_rgb)
            if len(results) == 0:
                continue

            # Typically, results[0] holds the main predictions
            pred = results[0]

            # Check if we have valid masks
            if (pred.masks is None) or (pred.masks.xy is None):
                continue

            tile_masks_xy = pred.masks.xy  # list of polygon coords
            tile_labels = pred.boxes.cls   # list of class IDs

            # Convert to numpy int if needed
            if hasattr(tile_labels, 'cpu'):
                tile_labels = tile_labels.cpu().numpy()
            tile_labels = tile_labels.astype(int).tolist()

            # Accumulate polygon coords in global space
            for label_id, polygon in zip(tile_labels, tile_masks_xy):
                # Convert polygon float coords to int points in shape (N,1,2)
                polygon_pts = polygon.reshape((-1, 1, 2)).astype(np.int32)

                # Offset the polygon to the large image coords
                polygon_pts[:, 0, 0] += left  # x-offset
                polygon_pts[:, 0, 1] += top   # y-offset

                if label_id not in class_mask_dict:
                    class_mask_dict[label_id] = []
                class_mask_dict[label_id].append(polygon_pts)

    # -----------------------------------------
    # 2) Merge Overlapping Polygons For Each Class
    #    by rasterizing them in a mask and then
    #    finding final contours
    # -----------------------------------------
    merged_class_mask_dict = {}
    for label_id, polygons_cv in class_mask_dict.items():
        # Create a blank mask (single channel) for the entire image
        mask = np.zeros((H, W), dtype=np.uint8)

        # Fill all polygons for this label on the mask
        for pts in polygons_cv:
            cv2.fillPoly(mask, [pts], 255)

        # Now findContours to get merged regions
        # Use RETR_EXTERNAL so we just get outer boundaries of each connected region
        contours, _ = cv2.findContours(
            mask,
            mode=cv2.RETR_EXTERNAL,
            method=cv2.CHAIN_APPROX_SIMPLE
        )

        # Store final merged contours
        merged_class_mask_dict[label_id] = contours

    # -----------------------
    # 3) Draw Merged Polygons
    # -----------------------
    overlay = image_rgb.copy()
    for label_id, contours in merged_class_mask_dict.items():
        color_bgr = get_label_color_id(label_id)
        for cnt in contours:
            # Fill each contour on the overlay
            cv2.fillPoly(overlay, [cnt], color_bgr)

    # 4) Alpha blend
    output = cv2.addWeighted(overlay, alpha, image_rgb, 1 - alpha, 0)

    # 5) Optional Display
    if display:
        plt.figure(figsize=(12, 12))
        plt.imshow(output)
        plt.axis('off')
        plt.title("Segmentation on Large Image (Overlapped Tiles + Merged Polygons)")
        plt.show()

    return output, merged_class_mask_dict

import numpy as np

def usable_data(img_results, image_1):
    """
    Extract bounding boxes, centers, and polygon areas from the segmentation
    results for a single image. Returns a dictionary keyed by label,
    with each value a list of object data: { 'bbox', 'center', 'area' }.
    """
    width, height = image_1.width, image_1.height
    image_data = {}
    for key in img_results.keys():
        image_data[key] = []
        for polygon in img_results[key]:
            polygon = np.array(polygon)

            # Handle varying polygon shapes
            # If shape is (N, 1, 2) e.g. from cv2 findContours
            if polygon.ndim == 3 and polygon.shape[1] == 1 and polygon.shape[2] == 2:
                polygon = polygon.reshape(-1, 2)
            elif polygon.ndim == 2 and polygon.shape[1] == 1:
                polygon = np.squeeze(polygon, axis=1)

            # Now we expect polygon to be (N, 2):
            xs = polygon[:, 0]
            ys = polygon[:, 1]

            # Bounding box
            xmin, xmax = xs.min(), xs.max()
            ymin, ymax = ys.min(), ys.max()
            bbox = (xmin, ymin, xmax, ymax)

            # Center
            centerX = (xmin + xmax) / 2.0
            centerY = (ymin + ymax) / 2.0
            x = width/2
            y = height/2
            # Direction
            dx = x - centerX
            dy = centerY - y  # Invert y-axis for proper orientation
            if dx > 0 and dy > 0:
                direction = "NE"
            elif dx > 0 and dy < 0:
                direction = "SE"
            elif dx < 0 and dy > 0:
                direction = "NW"
            elif dx < 0 and dy < 0:
                direction = "SW"
            elif dx == 0 and dy > 0:
                direction = "N"
            elif dx == 0 and dy < 0:
                direction = "S"
            elif dy == 0 and dx > 0:
                direction = "E"
            elif dy == 0 and dx < 0:
                direction = "W"
            else:
                direction = "Center"


            # Polygon area (Shoelace formula)
            # area = 0.5 * | x0*y1 + x1*y2 + ... + x_{n-1}*y0 - (y0*x1 + y1*x2 + ... + y_{n-1}*x0 ) |
            area = 0.5 * np.abs(
                np.dot(xs, np.roll(ys, 1)) - np.dot(ys, np.roll(xs, 1))
            )

            image_data[key].append({
                'bbox': bbox,
                'center': (centerX, centerY),
                'area': area,
                "direction": direction
            })
    return image_data

import cv2
import numpy as np
import matplotlib.pyplot as plt

def plot_differences_on_image1(
    image1_path,
    mask_dict1,  # e.g., label_name -> list of contours for image1
    image2_path,
    mask_dict2,  # e.g., label_name -> list of contours for image2
    display=True
):
    """
    Compare two images (and their object masks). Plot all differences on Image 1 only:
      - Red: Objects that are missing on Image 1 (present in Image 2 but not Image 1).
      - Green: Objects that are missing on Image 2 (present in Image 1 but not Image 2).

    :param image1_path: Path to the first image
    :param mask_dict1:  dict[label_name] = [contour1, contour2, ...] for the first image
    :param image2_path: Path to the second image
    :param mask_dict2:  dict[label_name] = [contour1, contour2, ...] for the second image
    :param display:     If True, shows the final overlay with matplotlib.
    :return: A tuple:
             - overlay1 (numpy array in RGB) with all differences highlighted
             - list_of_differences: Names of labels with differences
             - difference_masks: A dict with keys "missing_on_img1" and "missing_on_img2",
               where each key maps to a list of contours (original format) for the respective differences.
    """

    # Read both images
    img1_bgr = cv2.imread(image1_path)
    img2_bgr = cv2.imread(image2_path)
    if img1_bgr is None or img2_bgr is None:
        raise ValueError("Could not read one of the input images.")

    # Convert to RGB
    img1_rgb = cv2.cvtColor(img1_bgr, cv2.COLOR_BGR2RGB)
    img2_rgb = cv2.cvtColor(img2_bgr, cv2.COLOR_BGR2RGB)

    # Check matching dimensions
    H1, W1, _ = img1_rgb.shape
    H2, W2, _ = img2_rgb.shape
    if (H1 != H2) or (W1 != W2):
        raise ValueError("Images must be the same size to compare masks reliably.")

    # Prepare an overlay on top of Image 1
    overlay1 = img1_rgb.copy()

    # Take the union of all labels in both dictionaries
    all_labels = set(mask_dict1.keys()).union(set(mask_dict2.keys()))

    # Colors:
    RED = (255, 0, 0)    # (R, G, B)
    GREEN = (0, 255, 0)  # (R, G, B)

    # Track differences
    list_of_differences = []
    difference_masks = {
        "missing_on_img1": {},  # dict[label_name] = list of contours
        "missing_on_img2": {},  # dict[label_name] = list of contours
    }

    for label_id in all_labels:
        # Create binary masks for this label in each image
        mask1 = np.zeros((H1, W1), dtype=np.uint8)
        mask2 = np.zeros((H1, W1), dtype=np.uint8)

        # Fill polygons for label_id in Image 1
        if label_id in mask_dict1:
            for cnt in mask_dict1[label_id]:
                cv2.fillPoly(mask1, [cnt], 255)

        # Fill polygons for label_id in Image 2
        if label_id in mask_dict2:
            for cnt in mask_dict2[label_id]:
                cv2.fillPoly(mask2, [cnt], 255)

        # Missing on Image 1 (present in Image 2 but not in Image 1)
        # => mask2 AND (NOT mask1)
        missing_on_img1 = cv2.bitwise_and(mask2, cv2.bitwise_not(mask1))

        # Missing on Image 2 (present in Image 1 but not in Image 2)
        # => mask1 AND (NOT mask2)
        missing_on_img2 = cv2.bitwise_and(mask1, cv2.bitwise_not(mask2))

        # Extract contours of differences
        contours_missing_on_img1, _ = cv2.findContours(
            missing_on_img1, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
        )
        contours_missing_on_img2, _ = cv2.findContours(
            missing_on_img2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
        )

        # Store contours in difference masks
        if contours_missing_on_img1:
            difference_masks["missing_on_img1"][label_id] = contours_missing_on_img1
        if contours_missing_on_img2:
            difference_masks["missing_on_img2"][label_id] = contours_missing_on_img2

        # If there are differences, track the label name
        if contours_missing_on_img1 or contours_missing_on_img2:
            list_of_differences.append(label_id)

        # Color them on the overlay of Image 1:
        for cnt in contours_missing_on_img1:
            cv2.drawContours(overlay1, [cnt], -1, RED, -1)  # highlight in red
        for cnt in contours_missing_on_img2:
            cv2.drawContours(overlay1, [cnt], -1, GREEN, -1)  # highlight in green

    # Display if required
    if display:
        plt.figure(figsize=(10, 8))
        plt.imshow(overlay1)
        plt.title("Differences on Image 1\n(Red: Missing on Image 1, Green: Missing on Image 2)")
        plt.axis("off")
        plt.show()

    return overlay1, list_of_differences, difference_masks

import cv2
import numpy as np
import easyocr


def preprocess_image(image_path):
    """
    1) Load and prepare the image for further analysis.
    2) Convert to grayscale, optionally binarize or threshold.
    3) Return the processed image.
    """
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Optional: adaptive thresholding for clearer linework
    # thresholded = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
    #                                     cv2.THRESH_BINARY, 11, 2)

    return gray

def detect_lines_and_grid(processed_image):
    """
    1) Detect major horizontal/vertical lines using Hough transform or morphological ops.
    2) Identify grid lines by analyzing line segments alignment.
    3) Returns lines or grid intersections.
    """
    edges = cv2.Canny(processed_image, 50, 150, apertureSize=3)

    # Hough line detection for demonstration
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100,
                            minLineLength=100, maxLineGap=10)
    # Here you would parse out vertical/horizontal lines, cluster them, etc.

    return lines

def run_ocr(processed_image, method='easyocr'):
    """
    1) Use an OCR engine to detect text (room labels, dimensions, etc.).
    2) 'method' can switch between Tesseract or EasyOCR.
    3) Return recognized text data (text content and bounding boxes).
    """
    text_data = []

    if method == 'easyocr':
        reader = easyocr.Reader(['en', 'ko'], gpu=True)
        result = reader.readtext(processed_image, detail=1, paragraph=False)
        # result structure: [ [bbox, text, confidence], ... ]
        for (bbox, text, conf) in result:
            text_data.append({'bbox': bbox, 'text': text, 'confidence': conf})
    else:
        # Tesseract approach
        config = r'--psm 6'
        tess_result = pytesseract.image_to_data(processed_image, config=config, output_type=pytesseract.Output.DICT)
        # parse data into a structured list
        for i in range(len(tess_result['text'])):
            txt = tess_result['text'][i].strip()
            if txt:
                x = tess_result['left'][i]
                y = tess_result['top'][i]
                w = tess_result['width'][i]
                h = tess_result['height'][i]
                conf = tess_result['conf'][i]
                text_data.append({
                    'bbox': (x, y, x+w, y+h),
                    'text': txt,
                    'confidence': conf
                })
    return text_data

def detect_symbols_and_rooms(processed_image):
    """
    1) Potentially run object detection (e.g., YOLO, Detectron2) to detect symbols:
       - Doors, balconies, fixtures, etc.
    2) Segment out rooms by combining wall detection + adjacency.
    3) Return data about room polygons, symbols, etc.
    """
    # Placeholder: real implementation would require a trained model or rule-based approach.
    # For demonstration, return empty data.
    rooms_data = []
    symbols_data = []
    return rooms_data, symbols_data



def blueprint_analyzer(image_path):
    """
    Orchestrate the entire pipeline on one image:
      1) Preprocess
      2) Detect structural lines
      3) OCR text detection
      4) Symbol/room detection
      5) Compute area differences or summarize
    """
    processed_img = preprocess_image(image_path)

    lines = detect_lines_and_grid(processed_img)
    text_data = run_ocr(processed_img, method='easyocr')


    return lines, text_data
system_prompt_4 = """You are given two sets of data from two blueprint images (Image 1 and Image 2). Along with each image’s extracted objects, you have:
A set of objects (walls, doors, stairs, etc.) along with information on their labels and centers.
A set of “areas” (e.g., “Balcony,” “Living Room,” “Hallway,” “Bathroom,” etc.) with bounding boxes to identify where each area is located.For a particular area like balcony
there can be multiple instances
you are also given the detected grid lines and ocr results:
A “nearest reference area” for each object, including a small textual description of distance and direction (e.g., “The Door door in the balconey”,"the door in the bathroom").
Identifications of which objects match across the two images (same label and close centers).
Your Task
Ignore any objects that match between the two images (same label, nearly identical location).
Summarize the differences: newly added or missing objects, label changes, and any changes in object location.
Use the relative position data (distance/direction text) to describe where each new or missing object is/was in terms of known areas (e.g., “the missing wall in the northern side of the corridor,” “the new door near the balcony,” etc.).
Do not output raw numeric distances, bounding boxes, or polygon areas in your final summary. Instead, give a natural-language location description (e.g., “near the east side of the main hallway,” “slightly south of the balcony,” etc.).
Provide your answer in a concise Markdown format, focusing only on significant differences."""

def chat_seg_model(img1_path , img2_path) :
    image1 = Image.open(img1_path)
    image2 = Image.open(img2_path) 
    final_output_1, class_mask_dict_1 = segment_large_image_with_tiles(
    model,
    large_image_path=img1_path,
    tile_size=1080,
    overlap=120,
    alpha=0.4,
    display=True
    )
    final_output_2, class_mask_dict_2= segment_large_image_with_tiles(
    model,
    large_image_path=img2_path,
    tile_size=1080,
    overlap=120,
    alpha=0.4,
    display=True
    )
    label_dict = {0: 'EMP', 1: 'balcony_area', 2: 'bathroom', 3: 'brick_wall', 4: 'concrete_wall', 5: 'corridor', 6: 'dining_area', 7: 'door', 8: 'double_window', 9: 'dressing_room', 10: 'elevator', 11: 'elevator_hall', 12: 'emergency_exit', 13: 'empty_area', 14: 'lobby', 15: 'pantry', 16: 'porch', 17: 'primary_insulation', 18: 'rooms', 19: 'single_window', 20: 'stairs', 21: 'thin_wall'}
    img1_results = {}
    for key in class_mask_dict_1.keys():
        img1_results[label_dict[key]] = class_mask_dict_1[key]
    img2_results = {}
    for key in class_mask_dict_2.keys():
        img2_results[label_dict[key]] = class_mask_dict_2[key]
    width, height = image1.width, image1.height 
    image_1 , image_2 = image1 , image2
    image_1_data = usable_data(img1_results, image_1) 
    image_2_data = usable_data(img2_results, image_2)
    lines_1, text_data_1 = blueprint_analyzer(img1_path)
    lines_2, text_data_2 = blueprint_analyzer(img2_path)

    user_prompt_3 = f"""I have two construction blueprint images, Image 1 and Image 2, and here are their segmentation results (with bounding boxes, centers, and areas). Please compare them and provide a short Markdown summary of the differences, ignoring any objects that match in both images:

        Image 1:
        image: {image_1}
        segmentation results: {image_1_data}
        grid lines: {lines_1}
        ocr results: {text_data_1}
        Image 2:
        image: {image_2}
        segmentation results: {image_2_data}
        grid lines: {lines_2}
        ocr results: {text_data_2}

        Please:
        Also compare the area of corresponding objects if the change in their area is grater than 500 magnitude
        Compare the two images only in terms of differences—ignore any objects that match (same label and near-identical center).
        For objects missing in Image 2 (but present in Image 1), or newly added in Image 2, indicate their relative position using known areas or approximate directions. For instance, mention if the missing doors were “towards the north side, near the elevator,” or if new walls appeared “in the southeastern corner, near the balcony.”
        Summarize any changes in labels or text, again without giving raw bounding box or polygon coordinate data.
        Provide your final output in a short, clear Markdown summary that describes where objects have changed.
        Mention if there are text/label changes (e.g., from an OCR perspective) in any particular area or region
    """
    client = OpenAI(api_key= os.getenv('OPENAI_API_KEY'))

    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt_4},
            {
                "role": "user",
                "content": user_prompt_3
            }

        ]
    )

    print(completion.choices[0].message.content)
    return completion.choices[0].message.content