stupidog04 commited on
Commit
5017f0e
·
1 Parent(s): da2ea29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -14
app.py CHANGED
@@ -1,10 +1,13 @@
1
- import pandas as pd
2
  import PIL
3
- from PIL import Image
4
- from PIL import ImageDraw
5
  import gradio as gr
6
  import torch
7
  import easyocr
 
 
 
 
8
 
9
  #torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/BeautyIsTruthTruthisBeauty.JPG', 'BeautyIsTruthTruthisBeauty.JPG')
10
  #torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/PleaseRepeatLouder.jpg', 'PleaseRepeatLouder.jpg')
@@ -23,15 +26,53 @@ def draw_boxes(image, bounds, color='yellow', width=2):
23
  draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
24
  return image
25
 
26
- def inference(img, lang):
 
 
 
27
  reader = easyocr.Reader(lang)
28
- bounds = reader.readtext(img.name)
29
- im = PIL.Image.open(img.name)
30
- draw_boxes(im, bounds)
31
- im.save('result.jpg')
32
- return ['result.jpg', pd.DataFrame(bounds).iloc[: , 1:]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- title = '🖼️Image to Multilingual OCR👁️Gradio'
 
35
  description = 'Multilingual OCR which works conveniently on all devices in multiple languages.'
36
  article = "<p style='text-align: center'></p>"
37
 
@@ -51,14 +92,24 @@ choices = [
51
  "hi",
52
  "ru"
53
  ]
 
 
54
  gr.Interface(
55
  inference,
56
- [gr.inputs.Image(type='file', label='Input'),gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='language')],
57
- [gr.outputs.Image(type='file', label='Output'), gr.outputs.Dataframe(headers=['text', 'confidence'])],
 
 
 
 
 
 
 
 
58
  title=title,
59
  description=description,
60
  article=article,
61
- examples=examples,
62
  css=css,
63
  enable_queue=True
64
- ).launch(debug=True)
 
1
+ import numpy as np
2
  import PIL
3
+ from PIL import Image, ImageDraw
 
4
  import gradio as gr
5
  import torch
6
  import easyocr
7
+ import os
8
+ from pathlib import Path
9
+ import cv2
10
+
11
 
12
  #torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/BeautyIsTruthTruthisBeauty.JPG', 'BeautyIsTruthTruthisBeauty.JPG')
13
  #torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/PleaseRepeatLouder.jpg', 'PleaseRepeatLouder.jpg')
 
26
  draw.line([*p0, *p1, *p2, *p3, *p0], fill=color, width=width)
27
  return image
28
 
29
+ def inference(video, lang, time_step):
30
+ # output = f"{Path(video).stem}_detected{Path(src).suffix}"
31
+ output = 'results.mp4'
32
+
33
  reader = easyocr.Reader(lang)
34
+ bounds = []
35
+ vidcap = cv2.VideoCapture(video)
36
+ success, frame = vidcap.read()
37
+ count = 0
38
+ frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
39
+ output_frames = []
40
+ while success:
41
+ if count % (int(frame_rate * time_step)) == 0:
42
+ bounds = reader.readtext(frame)
43
+ im = PIL.Image.fromarray(frame)
44
+ draw_boxes(im, bounds)
45
+ output_frames.append(np.array(im))
46
+ success, frame = vidcap.read()
47
+ count += 1
48
+
49
+ # Default resolutions of the frame are obtained. The default resolutions are system dependent.
50
+ # We convert the resolutions from float to integer.
51
+ width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
52
+ height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
53
+ fps = vidcap.get(cv2.CAP_PROP_FPS)
54
+ frames_total = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
55
+
56
+ # Define the codec and create VideoWriter object.
57
+ temp = f"{Path(output).stem}_temp{Path(output).suffix}"
58
+ output_video = cv2.VideoWriter(
59
+ temp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
60
+ )
61
+ # output_video = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
62
+ for frame in output_frames:
63
+ output_video.write(frame)
64
+ output_video.release()
65
+ vidcap.release()
66
+
67
+ # Compressing the video for smaller size and web compatibility.
68
+ os.system(
69
+ f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
70
+ )
71
+ os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
72
+ return output
73
 
74
+
75
+ title = '🖼️Video to Multilingual OCR👁️Gradio'
76
  description = 'Multilingual OCR which works conveniently on all devices in multiple languages.'
77
  article = "<p style='text-align: center'></p>"
78
 
 
92
  "hi",
93
  "ru"
94
  ]
95
+
96
+
97
  gr.Interface(
98
  inference,
99
+ [
100
+ # gr.inputs.Image(type='file', label='Input Image'),
101
+ gr.inputs.Video(label='Input Video'),
102
+ gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='Language'),
103
+ gr.inputs.Number(label='Time Step (in seconds)', default=1.0)
104
+ ],
105
+ [
106
+ gr.outputs.Video(label='Output Video'),
107
+ # gr.outputs.Dataframe(headers=['Text', 'Confidence'])
108
+ ],
109
  title=title,
110
  description=description,
111
  article=article,
112
+ # examples=examples,
113
  css=css,
114
  enable_queue=True
115
+ ).launch(debug=True)