SakibRumu commited on
Commit
2405743
·
verified ·
1 Parent(s): 3b9fd91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -32
app.py CHANGED
@@ -1,29 +1,26 @@
1
  import gradio as gr
2
- import torch
3
  import cv2
 
4
  import numpy as np
 
5
  from PIL import Image
6
- from paddleocr import PaddleOCR # Import PaddleOCR
7
  from ultralytics import YOLO
 
8
 
9
- # Load model
10
  model = YOLO("/home/user/app/best.pt")
11
 
12
  # Label map
13
  label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}
14
 
15
- # Initialize PaddleOCR (for Bangla OCR)
16
- ocr = PaddleOCR(use_angle_cls=True, lang='en') # For Bangla language
17
 
18
- def process_frame(frame):
19
- # Resize to YOLO input shape
20
  input_img = cv2.resize(frame, (640, 640))
21
  results = model(input_img)[0]
22
  detections = results.boxes.data.cpu().numpy()
23
 
24
- extracted_texts = []
25
- confidences = []
26
-
27
  for det in detections:
28
  if len(det) < 6:
29
  continue
@@ -33,54 +30,73 @@ def process_frame(frame):
33
  label = label_map.get(int(cls), "Unknown")
34
  percent = f"{conf * 100:.2f}%"
35
 
36
- # Draw box and label on image
37
  cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
38
  cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10),
39
  cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
40
 
41
- # OCR using PaddleOCR
42
- cropped = frame[y1:y2, x1:x2] # Use original frame for OCR
43
  if cropped.size > 0:
44
- # Convert to RGB and run OCR
45
- result = ocr.ocr(cropped, cls=True)
46
- for line in result[0]:
47
- extracted_texts.append(line[1]) # Get the detected text
48
- confidences.append(percent)
49
-
50
- # Convert to PIL
51
- annotated = cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
52
- pil_img = Image.fromarray(annotated)
53
-
54
- return pil_img, "\n".join(extracted_texts), ", ".join(confidences)
55
 
 
56
 
57
  def process_input(input_file):
58
  file_path = input_file.name
 
59
 
60
- if file_path.endswith(('.mp4', '.avi', '.mov')):
61
  cap = cv2.VideoCapture(file_path)
62
- ret, frame = cap.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  cap.release()
64
- if not ret:
65
- return None, "Couldn't read video", ""
 
 
66
  else:
 
67
  frame = cv2.imread(file_path)
68
  if frame is None:
69
  return None, "Invalid image", ""
70
 
71
- return process_frame(frame)
 
 
72
 
73
 
74
  interface = gr.Interface(
75
  fn=process_input,
76
  inputs=gr.File(type="filepath", label="Upload Image or Video"),
77
  outputs=[
78
- gr.Image(type="pil", label="Detected Output"),
79
  gr.Textbox(label="Detected Text (Bangla)"),
80
  gr.Textbox(label="Confidence (%)")
81
  ],
82
- title="YOLOv10n License Plate Detector (Bangla)",
83
- description="Upload an image or video. Detects license plates and extracts Bangla text using PaddleOCR."
84
  )
85
 
86
  interface.launch()
 
1
  import gradio as gr
 
2
  import cv2
3
+ import easyocr
4
  import numpy as np
5
+ import os
6
  from PIL import Image
 
7
  from ultralytics import YOLO
8
+ from datetime import datetime
9
 
10
+ # Load YOLO model
11
  model = YOLO("/home/user/app/best.pt")
12
 
13
  # Label map
14
  label_map = {0: "Analog", 1: "Digital", 2: "Non-LP"}
15
 
16
+ # EasyOCR Bengali
17
+ reader = easyocr.Reader(['bn'])
18
 
19
+ def annotate_frame(frame):
 
20
  input_img = cv2.resize(frame, (640, 640))
21
  results = model(input_img)[0]
22
  detections = results.boxes.data.cpu().numpy()
23
 
 
 
 
24
  for det in detections:
25
  if len(det) < 6:
26
  continue
 
30
  label = label_map.get(int(cls), "Unknown")
31
  percent = f"{conf * 100:.2f}%"
32
 
33
+ # Draw box and label
34
  cv2.rectangle(input_img, (x1, y1), (x2, y2), (255, 0, 0), 2)
35
  cv2.putText(input_img, f"{label}: {percent}", (x1, y1 - 10),
36
  cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
37
 
38
+ # OCR
39
+ cropped = frame[y1:y2, x1:x2]
40
  if cropped.size > 0:
41
+ ocr_result = reader.readtext(cropped)
42
+ for i, item in enumerate(ocr_result):
43
+ text = item[1].strip()
44
+ conf = item[2]
45
+ cv2.putText(input_img, text, (x1, y2 + 20 + i*25),
46
+ cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 255), 2)
 
 
 
 
 
47
 
48
+ return cv2.cvtColor(input_img, cv2.COLOR_BGR2RGB)
49
 
50
  def process_input(input_file):
51
  file_path = input_file.name
52
+ ext = os.path.splitext(file_path)[-1].lower()
53
 
54
+ if ext in ['.mp4', '.avi', '.mov']:
55
  cap = cv2.VideoCapture(file_path)
56
+ fourcc = cv2.VideoWriter_fourcc(*'mp4v')
57
+ fps = cap.get(cv2.CAP_PROP_FPS)
58
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
59
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
60
+
61
+ # Output path
62
+ timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
63
+ output_path = f"annotated_{timestamp}.mp4"
64
+ out = cv2.VideoWriter(output_path, fourcc, fps, (640, 640))
65
+
66
+ while cap.isOpened():
67
+ ret, frame = cap.read()
68
+ if not ret:
69
+ break
70
+ annotated = annotate_frame(frame)
71
+ annotated_resized = cv2.resize(annotated, (640, 640))
72
+ out.write(cv2.cvtColor(annotated_resized, cv2.COLOR_RGB2BGR))
73
+
74
  cap.release()
75
+ out.release()
76
+
77
+ return output_path, "", ""
78
+
79
  else:
80
+ # Image case
81
  frame = cv2.imread(file_path)
82
  if frame is None:
83
  return None, "Invalid image", ""
84
 
85
+ annotated = annotate_frame(frame)
86
+ pil_img = Image.fromarray(annotated)
87
+ return pil_img, "", ""
88
 
89
 
90
  interface = gr.Interface(
91
  fn=process_input,
92
  inputs=gr.File(type="filepath", label="Upload Image or Video"),
93
  outputs=[
94
+ gr.Video(label="Output Video or Image") | gr.Image(type="pil", label="Output"),
95
  gr.Textbox(label="Detected Text (Bangla)"),
96
  gr.Textbox(label="Confidence (%)")
97
  ],
98
+ title="YOLOv5 License Plate Detector (Bangla OCR)",
99
+ description="Upload an image or video. Detects license plates and extracts Bangla text using EasyOCR."
100
  )
101
 
102
  interface.launch()