Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,868 Bytes
a526622 714db0a a526622 714db0a a526622 35f0b0b a526622 35f0b0b a526622 35f0b0b a526622 714db0a a526622 714db0a a526622 714db0a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 |
import subprocess
import os
import subprocess
from PIL import Image, ImageDraw
import re
import json
import subprocess
import spaces
def process_inference_results(results, process_image=False):
"""
Process the inference results by:
1. Adding bounding boxes on the image based on the coordinates in 'text'.
2. Extracting and returning the text prompt.
:param results: List of inference results with bounding boxes in 'text'.
:return: (image, text)
"""
processed_images = []
extracted_texts = []
for result in results:
image_path = result['image_path']
img = Image.open(image_path).convert("RGB")
draw = ImageDraw.Draw(img)
bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text'])
if bbox_str:
bbox = [int(coord) for coord in bbox_str.group(1).split(',')]
x1, y1, x2, y2 = bbox
draw.rectangle([x1, y1, x2, y2], outline="red", width=3)
extracted_texts.append(result['text'])
processed_images.append(img)
if process_image:
return processed_images, extracted_texts
return extracted_texts
@spaces.GPU()
def inference_and_run(image_dir, image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None, process_image=False, temperature=0.2, top_p=0.7, max_new_tokens=512, stop='<eos>'):
"""
Run the inference and capture the errors for debugging.
"""
if box is not None:
conversation_value = f"<image>\n{prompt} <bbox_location0>"
else:
conversation_value = f"<image>\n{prompt}"
data_input = [{
"id": 0,
"image": os.path.basename(image_path),
"image_h": Image.open(image_path).height,
"image_w": Image.open(image_path).width,
"conversations": [{"from": "human", "value": conversation_value}]
}]
if box:
box_numbers = [int(float(coord)) for coord in box.split(", ")]
# Structure it in the desired format
data_input[0]["box_x1y1x2y2"] = [[box_numbers]]
with open("eval.json", "w") as json_file:
json.dump(data_input, json_file)
print("eval.json file created successfully.")
cmd = [
"python", "-m", "model_UI",
"--model_path", model_path,
"--data_path", "eval.json",
"--image_path", image_dir,
"--answers_file", "eval_output.jsonl",
"--num_beam", "1",
"--temperature", str(temperature),
"--top_p", str(top_p),
"--max_new_tokens", str(max_new_tokens),
"--conv_mode", conv_mode
]
if box:
cmd.extend(["--region_format", "box", "--add_region_feature"])
try:
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
print(f"Subprocess output:\n{result.stdout}")
print(f"Subprocess error (if any):\n{result.stderr}")
print(f"Inference completed. Output written to eval_output.jsonl")
output_folder = 'eval_output.jsonl'
if os.path.exists(output_folder):
json_files = [f for f in os.listdir(output_folder) if f.endswith(".jsonl")]
if json_files:
output_file_path = os.path.join(output_folder, json_files[0])
with open(output_file_path, "r") as output_file:
results = [json.loads(line) for line in output_file]
return process_inference_results(results, process_image)
else:
print("No output JSONL files found.")
return None, None
else:
print("Output folder not found.")
return None, None
except subprocess.CalledProcessError as e:
print(f"Error occurred during inference:\n{e}")
print(f"Subprocess output:\n{e.output}")
return None, None |