Spaces:
Runtime error
Runtime error
import gradio as gr | |
from ultralytics import YOLO | |
from PIL import Image | |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
from qreader import QReader | |
import cv2 | |
import json | |
import ast | |
from datetime import datetime | |
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-stage1") | |
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-stage1") | |
qreader = QReader() | |
def yolo_and_trocr(image_input, save): | |
try: | |
# YOLO instanciated from the trained model | |
yolo = YOLO('best.pt') | |
# Creating results | |
results = yolo(image_input, conf=0.5, iou=0.7) | |
res = results[0].plot()[:, :, [2,1,0]] | |
boxes = results[0].boxes.xyxy | |
image = Image.fromarray(res) | |
texts = [] | |
# Texts and cropped images get saved in the lists. | |
for i in boxes: | |
img_cropped = image.crop(tuple(i.tolist())) | |
# TrOCR model is run to detect text in image | |
pixel_values = processor(img_cropped, return_tensors="pt").pixel_values | |
generated_ids = model.generate(pixel_values) | |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
texts.append(generated_text) | |
text = texts[0] | |
text = f"{text[:5]}.{text[5:]}" # fix decimals | |
# Reading the QR code from the image | |
qr_code = cv2.cvtColor(cv2.imread(image_input), cv2.COLOR_BGR2RGB) | |
decoded_text = qreader.detect_and_decode(image=qr_code) | |
if len(decoded_text) == 0: | |
decoded_text = "No QR code detected" | |
else: | |
decoded_text = decoded_text[0] | |
# Saving the info in a dictionary for output if requested | |
if save: | |
data_dict = ast.literal_eval(decoded_text) | |
file_path = f"{data_dict['Address']}.json" | |
with open(file_path, "w") as file: | |
current_datetime = datetime.now() | |
timestamp = current_datetime.strftime("%Y-%m-%d %H:%M:%S") | |
data_dict['Last_Reading'] = {f'{timestamp}': f'{text}'} | |
json.dump(data_dict, file, indent=4) | |
return image, text, decoded_text, file_path | |
# Outputing the image, reading, and QR code info without saving the data | |
else: | |
return image, text, decoded_text, None | |
except Exception as e: | |
return "", f"Your input is invalid: {str(e)}", f"Try Again: Make sure the meter and QR code are clearly captured" | |
app = gr.Interface( | |
fn=yolo_and_trocr, | |
inputs=[gr.File(label="Input: Water Meter Image"), | |
gr.Checkbox(label="Save")], | |
outputs=[gr.Image(label='Output: Water Meter Photo'), | |
gr.Textbox(label="Output: Water Meter Reading"), | |
gr.Textbox(label="Output: QR Code Detection"), | |
gr.File(label="Output: Saved Data")], | |
title="Water Meter Reading with YOLO and OCR" | |
) | |
app.launch() |