Video-to-Multilingual-OCR

Runtime error

App Files Files Community

Video-to-Multilingual-OCR / app.py

stupidog04

Update app.py

5017f0e almost 2 years ago

raw

history blame

4.54 kB

	import numpy as np
	import PIL
	from PIL import Image, ImageDraw
	import gradio as gr
	import torch
	import easyocr
	import os
	from pathlib import Path
	import cv2


	#torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/BeautyIsTruthTruthisBeauty.JPG', 'BeautyIsTruthTruthisBeauty.JPG')
	#torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/PleaseRepeatLouder.jpg', 'PleaseRepeatLouder.jpg')
	#torch.hub.download_url_to_file('https://github.com/AaronCWacker/Yggdrasil/blob/main/images/ProhibitedInWhiteHouse.JPG', 'ProhibitedInWhiteHouse.JPG')

	torch.hub.download_url_to_file('https://raw.githubusercontent.com/AaronCWacker/Yggdrasil/master/images/20-Books.jpg','20-Books.jpg')
	torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/english.png', 'COVID.png')
	torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/chinese.jpg', 'chinese.jpg')
	torch.hub.download_url_to_file('https://github.com/JaidedAI/EasyOCR/raw/master/examples/japanese.jpg', 'japanese.jpg')
	torch.hub.download_url_to_file('https://i.imgur.com/mwQFd7G.jpeg', 'Hindi.jpeg')

	def draw_boxes(image, bounds, color='yellow', width=2):
	draw = ImageDraw.Draw(image)
	for bound in bounds:
	p0, p1, p2, p3 = bound[0]
	draw.line([p0, p1, p2, p3, *p0], fill=color, width=width)
	return image

	def inference(video, lang, time_step):
	# output = f"{Path(video).stem}_detected{Path(src).suffix}"
	output = 'results.mp4'

	reader = easyocr.Reader(lang)
	bounds = []
	vidcap = cv2.VideoCapture(video)
	success, frame = vidcap.read()
	count = 0
	frame_rate = vidcap.get(cv2.CAP_PROP_FPS)
	output_frames = []
	while success:
	if count % (int(frame_rate * time_step)) == 0:
	bounds = reader.readtext(frame)
	im = PIL.Image.fromarray(frame)
	draw_boxes(im, bounds)
	output_frames.append(np.array(im))
	success, frame = vidcap.read()
	count += 1

	# Default resolutions of the frame are obtained. The default resolutions are system dependent.
	# We convert the resolutions from float to integer.
	width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	fps = vidcap.get(cv2.CAP_PROP_FPS)
	frames_total = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))

	# Define the codec and create VideoWriter object.
	temp = f"{Path(output).stem}_temp{Path(output).suffix}"
	output_video = cv2.VideoWriter(
	temp, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height)
	)
	# output_video = cv2.VideoWriter(output, cv2.VideoWriter_fourcc(*"mp4v"), fps, (width, height))
	for frame in output_frames:
	output_video.write(frame)
	output_video.release()
	vidcap.release()

	# Compressing the video for smaller size and web compatibility.
	os.system(
	f"ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 1 -c:a aac -f mp4 /dev/null && ffmpeg -y -i {temp} -c:v libx264 -b:v 5000k -minrate 1000k -maxrate 8000k -pass 2 -c:a aac -movflags faststart {output}"
	)
	os.system(f"rm -rf {temp} ffmpeg2pass-0.log ffmpeg2pass-0.log.mbtree")
	return output


	title = '🖼️Video to Multilingual OCR👁️Gradio'
	description = 'Multilingual OCR which works conveniently on all devices in multiple languages.'
	article = "<p style='text-align: center'></p>"

	examples = [
	#['PleaseRepeatLouder.jpg',['ja']],['ProhibitedInWhiteHouse.JPG',['en']],['BeautyIsTruthTruthisBeauty.JPG',['en']],
	['20-Books.jpg',['en']],['COVID.png',['en']],['chinese.jpg',['ch_sim', 'en']],['japanese.jpg',['ja', 'en']],['Hindi.jpeg',['hi', 'en']]
	]

	css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
	choices = [
	"ch_sim",
	"ch_tra",
	"de",
	"en",
	"es",
	"ja",
	"hi",
	"ru"
	]


	gr.Interface(
	inference,
	[
	# gr.inputs.Image(type='file', label='Input Image'),
	gr.inputs.Video(label='Input Video'),
	gr.inputs.CheckboxGroup(choices, type="value", default=['en'], label='Language'),
	gr.inputs.Number(label='Time Step (in seconds)', default=1.0)
	],
	[
	gr.outputs.Video(label='Output Video'),
	# gr.outputs.Dataframe(headers=['Text', 'Confidence'])
	],
	title=title,
	description=description,
	article=article,
	# examples=examples,
	css=css,
	enable_queue=True
	).launch(debug=True)