import subprocess import os import subprocess from PIL import Image, ImageDraw import re import json import subprocess import spaces def process_inference_results(results, process_image=False): """ Process the inference results by: 1. Adding bounding boxes on the image based on the coordinates in 'text'. 2. Extracting and returning the text prompt. :param results: List of inference results with bounding boxes in 'text'. :return: (image, text) """ processed_images = [] extracted_texts = [] for result in results: image_path = result['image_path'] img = Image.open(image_path).convert("RGB") draw = ImageDraw.Draw(img) bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text']) if bbox_str: bbox = [int(coord) for coord in bbox_str.group(1).split(',')] x1, y1, x2, y2 = bbox draw.rectangle([x1, y1, x2, y2], outline="red", width=3) extracted_texts.append(result['text']) processed_images.append(img) if process_image: return processed_images, extracted_texts return extracted_texts @spaces.GPU() def inference_and_run(image_dir, image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None, process_image=False, temperature=0.2, top_p=0.7, max_new_tokens=512, stop=''): """ Run the inference and capture the errors for debugging. """ if box is not None: conversation_value = f"\n{prompt} " else: conversation_value = f"\n{prompt}" data_input = [{ "id": 0, "image": os.path.basename(image_path), "image_h": Image.open(image_path).height, "image_w": Image.open(image_path).width, "conversations": [{"from": "human", "value": conversation_value}] }] if box: box_numbers = [int(float(coord)) for coord in box.split(", ")] # Structure it in the desired format data_input[0]["box_x1y1x2y2"] = [[box_numbers]] with open("eval.json", "w") as json_file: json.dump(data_input, json_file) print("eval.json file created successfully.") cmd = [ "python", "-m", "model_UI", "--model_path", model_path, "--data_path", "eval.json", "--image_path", image_dir, "--answers_file", "eval_output.jsonl", "--num_beam", "1", "--temperature", str(temperature), "--top_p", str(top_p), "--max_new_tokens", str(max_new_tokens), "--conv_mode", conv_mode ] if box: cmd.extend(["--region_format", "box", "--add_region_feature"]) try: result = subprocess.run(cmd, check=True, capture_output=True, text=True) print(f"Subprocess output:\n{result.stdout}") print(f"Subprocess error (if any):\n{result.stderr}") print(f"Inference completed. Output written to eval_output.jsonl") output_folder = 'eval_output.jsonl' if os.path.exists(output_folder): json_files = [f for f in os.listdir(output_folder) if f.endswith(".jsonl")] if json_files: output_file_path = os.path.join(output_folder, json_files[0]) with open(output_file_path, "r") as output_file: results = [json.loads(line) for line in output_file] return process_inference_results(results, process_image) else: print("No output JSONL files found.") return None, None else: print("Output folder not found.") return None, None except subprocess.CalledProcessError as e: print(f"Error occurred during inference:\n{e}") print(f"Subprocess output:\n{e.output}") return None, None