Spaces:
Running
on
Zero
Running
on
Zero
import subprocess | |
import os | |
import subprocess | |
from PIL import Image, ImageDraw | |
import re | |
import json | |
import subprocess | |
import spaces | |
def process_inference_results(results, process_image=False): | |
""" | |
Process the inference results by: | |
1. Adding bounding boxes on the image based on the coordinates in 'text'. | |
2. Extracting and returning the text prompt. | |
:param results: List of inference results with bounding boxes in 'text'. | |
:return: (image, text) | |
""" | |
processed_images = [] | |
extracted_texts = [] | |
for result in results: | |
image_path = result['image_path'] | |
img = Image.open(image_path).convert("RGB") | |
draw = ImageDraw.Draw(img) | |
bbox_str = re.search(r'\[\[([0-9,\s]+)\]\]', result['text']) | |
if bbox_str: | |
bbox = [int(coord) for coord in bbox_str.group(1).split(',')] | |
x1, y1, x2, y2 = bbox | |
draw.rectangle([x1, y1, x2, y2], outline="red", width=3) | |
extracted_texts.append(result['text']) | |
processed_images.append(img) | |
if process_image: | |
return processed_images, extracted_texts | |
return extracted_texts | |
def inference_and_run(image_dir, image_path, prompt, conv_mode="ferret_gemma_instruct", model_path="jadechoghari/Ferret-UI-Gemma2b", box=None, process_image=False, temperature=0.2, top_p=0.7, max_new_tokens=512, stop='<eos>'): | |
""" | |
Run the inference and capture the errors for debugging. | |
""" | |
if box is not None: | |
conversation_value = f"<image>\n{prompt} <bbox_location0>" | |
else: | |
conversation_value = f"<image>\n{prompt}" | |
data_input = [{ | |
"id": 0, | |
"image": os.path.basename(image_path), | |
"image_h": Image.open(image_path).height, | |
"image_w": Image.open(image_path).width, | |
"conversations": [{"from": "human", "value": conversation_value}] | |
}] | |
if box: | |
box_numbers = [int(float(coord)) for coord in box.split(", ")] | |
# Structure it in the desired format | |
data_input[0]["box_x1y1x2y2"] = [[box_numbers]] | |
with open("eval.json", "w") as json_file: | |
json.dump(data_input, json_file) | |
print("eval.json file created successfully.") | |
cmd = [ | |
"python", "-m", "model_UI", | |
"--model_path", model_path, | |
"--data_path", "eval.json", | |
"--image_path", image_dir, | |
"--answers_file", "eval_output.jsonl", | |
"--num_beam", "1", | |
"--temperature", str(temperature), | |
"--top_p", str(top_p), | |
"--max_new_tokens", str(max_new_tokens), | |
"--conv_mode", conv_mode | |
] | |
if box: | |
cmd.extend(["--region_format", "box", "--add_region_feature"]) | |
try: | |
result = subprocess.run(cmd, check=True, capture_output=True, text=True) | |
print(f"Subprocess output:\n{result.stdout}") | |
print(f"Subprocess error (if any):\n{result.stderr}") | |
print(f"Inference completed. Output written to eval_output.jsonl") | |
output_folder = 'eval_output.jsonl' | |
if os.path.exists(output_folder): | |
json_files = [f for f in os.listdir(output_folder) if f.endswith(".jsonl")] | |
if json_files: | |
output_file_path = os.path.join(output_folder, json_files[0]) | |
with open(output_file_path, "r") as output_file: | |
results = [json.loads(line) for line in output_file] | |
return process_inference_results(results, process_image) | |
else: | |
print("No output JSONL files found.") | |
return None, None | |
else: | |
print("Output folder not found.") | |
return None, None | |
except subprocess.CalledProcessError as e: | |
print(f"Error occurred during inference:\n{e}") | |
print(f"Subprocess output:\n{e.output}") | |
return None, None |