File size: 3,868 Bytes
a526622
 
 
 
 
 
 
714db0a
a526622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714db0a
 
 
a526622
 
 
35f0b0b
 
 
 
 
 
a526622
 
 
 
 
35f0b0b
a526622
 
 
35f0b0b
 
 
a526622
 
 
 
 
 
 
 
 
 
714db0a
a526622
 
714db0a
 
 
a526622
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
714db0a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import subprocess
import os
import subprocess
from PIL import Image, ImageDraw
import re
import json
import subprocess
import spaces

def process_inference_results(results, process_image=False):
    """
    Process the inference results by:
    1. Adding bounding boxes on the image based on the coordinates in 'text'.
    2. Extracting and returning the text prompt.
    
    :param results: List of inference results with bounding boxes in 'text'.
    :return: (image, text)
    """
    processed_images = []
    extracted_texts = []

    for result in results:
        image_path = result['image_path']
        img = Image.open(image_path).convert("RGB")
        draw = ImageDraw.Draw(img)

        bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text'])
        if bbox_str:
            bbox = [int(coord) for coord in bbox_str.group(1).split(',')]
            x1, y1, x2, y2 = bbox

            draw.rectangle([x1, y1, x2, y2], outline="red", width=3)

        extracted_texts.append(result['text'])

        processed_images.append(img)

    if process_image:
        return processed_images, extracted_texts

    return extracted_texts
    
@spaces.GPU()
def inference_and_run(image_dir, image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None, process_image=False, temperature=0.2, top_p=0.7, max_new_tokens=512, stop='<eos>'):
    """
    Run the inference and capture the errors for debugging.
    """


    if box is not None:
        conversation_value = f"<image>\n{prompt} <bbox_location0>"
    else:
        conversation_value = f"<image>\n{prompt}"
    data_input = [{
        "id": 0,
        "image": os.path.basename(image_path),
        "image_h": Image.open(image_path).height,
        "image_w": Image.open(image_path).width,
        "conversations": [{"from": "human", "value": conversation_value}]
    }]
    
    if box:
        box_numbers = [int(float(coord)) for coord in box.split(", ")]
        # Structure it in the desired format
        data_input[0]["box_x1y1x2y2"] = [[box_numbers]]

    with open("eval.json", "w") as json_file:
        json.dump(data_input, json_file)
    
    print("eval.json file created successfully.")
    
    cmd = [
        "python", "-m", "model_UI", 
        "--model_path", model_path,
        "--data_path", "eval.json", 
        "--image_path", image_dir, 
        "--answers_file", "eval_output.jsonl", 
        "--num_beam", "1", 
        "--temperature", str(temperature),
        "--top_p", str(top_p),
        "--max_new_tokens", str(max_new_tokens),
        "--conv_mode", conv_mode
    ]

    if box:
        cmd.extend(["--region_format", "box", "--add_region_feature"])

    try:
        result = subprocess.run(cmd, check=True, capture_output=True, text=True)
        print(f"Subprocess output:\n{result.stdout}")
        print(f"Subprocess error (if any):\n{result.stderr}")
        print(f"Inference completed. Output written to eval_output.jsonl")

        output_folder = 'eval_output.jsonl'
        if os.path.exists(output_folder):
            json_files = [f for f in os.listdir(output_folder) if f.endswith(".jsonl")]
            if json_files:
                output_file_path = os.path.join(output_folder, json_files[0])
                with open(output_file_path, "r") as output_file:
                    results = [json.loads(line) for line in output_file]
                
                return process_inference_results(results, process_image)
            else:
                print("No output JSONL files found.")
                return None, None
        else:
            print("Output folder not found.")
            return None, None

    except subprocess.CalledProcessError as e:
        print(f"Error occurred during inference:\n{e}")
        print(f"Subprocess output:\n{e.output}")
        return None, None