import gradio as gr from ultralytics import YOLO from PIL import Image from transformers import TrOCRProcessor, VisionEncoderDecoderModel from qreader import QReader import cv2 import json import ast from datetime import datetime processor = TrOCRProcessor.from_pretrained("microsoft/trocr-large-stage1") model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-large-stage1") qreader = QReader() def yolo_and_trocr(image_input, save): try: # YOLO instanciated from the trained model yolo = YOLO('best.pt') # Creating results results = yolo(image_input, conf=0.5, iou=0.7) res = results[0].plot()[:, :, [2,1,0]] boxes = results[0].boxes.xyxy image = Image.fromarray(res) texts = [] # Texts and cropped images get saved in the lists. for i in boxes: img_cropped = image.crop(tuple(i.tolist())) # TrOCR model is run to detect text in image pixel_values = processor(img_cropped, return_tensors="pt").pixel_values generated_ids = model.generate(pixel_values) generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] texts.append(generated_text) text = texts[0] text = f"{text[:5]}.{text[5:]}" # fix decimals # Reading the QR code from the image qr_code = cv2.cvtColor(cv2.imread(image_input), cv2.COLOR_BGR2RGB) decoded_text = qreader.detect_and_decode(image=qr_code) if len(decoded_text) == 0: decoded_text = "No QR code detected" else: decoded_text = decoded_text[0] # Saving the info in a dictionary for output if requested if save: data_dict = ast.literal_eval(decoded_text) file_path = f"{data_dict['Address']}.json" with open(file_path, "w") as file: current_datetime = datetime.now() timestamp = current_datetime.strftime("%Y-%m-%d %H:%M:%S") data_dict['Last_Reading'] = {f'{timestamp}': f'{text}'} json.dump(data_dict, file, indent=4) return image, text, decoded_text, file_path # Outputing the image, reading, and QR code info without saving the data else: return image, text, decoded_text, None except Exception as e: return "", f"Your input is invalid: {str(e)}", f"Try Again: Make sure the meter and QR code are clearly captured" app = gr.Interface( fn=yolo_and_trocr, inputs=[gr.File(label="Input: Water Meter Image"), gr.Checkbox(label="Save")], outputs=[gr.Image(label='Output: Water Meter Photo'), gr.Textbox(label="Output: Water Meter Reading"), gr.Textbox(label="Output: QR Code Detection"), gr.File(label="Output: Saved Data")], title="Water Meter Reading with YOLO and OCR" ) app.launch()