File size: 6,219 Bytes
43c7b9d 6b49c3e 43c7b9d 6b49c3e 43c7b9d 6b49c3e 43c7b9d 6b49c3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# Import needed library
from PIL import Image
import gradio as gr
import torch
import requests
import re
from transformers import pipeline, BlipProcessor, BlipForConditionalGeneration, TrOCRProcessor, VisionEncoderDecoderModel
# load image examples
img_urls_1 = ['https://i.pinimg.com/564x/f7/f5/bd/f7f5bd929e05a852ff423e6e02deea54.jpg', 'https://i.pinimg.com/564x/b4/29/69/b4296962cb76a72354a718109835caa3.jpg',
'https://i.pinimg.com/564x/f2/68/8e/f2688eccd6dd60fdad89ef78950b9ead.jpg']
for idx1, url1 in enumerate(img_urls_1):
image = Image.open(requests.get(url1, stream=True).raw)
image.save(f"image_{idx1}.png")
# load image examples
img_urls_2 = ['https://i.pinimg.com/564x/14/b0/07/14b0075ccd5ea35f7deffc9e5bd6de30.jpg', 'https://newsimg.bbc.co.uk/media/images/45510000/jpg/_45510184_the_writings_466_180.jpg',
'https://cdn.shopify.com/s/files/1/0047/1524/9737/files/Cetaphil_Face_Wash_Ingredients_Optimized.png?v=1680923920', 'https://github.com/kawther12h/Image_Captioning-and-Text_Recognition/blob/main/handText22.jpg?raw=true','https://github.com/kawther12h/Image_Captioning-and-Text_Recognition/blob/main/handText11.jpg?raw=true']
for idx2, url2 in enumerate(img_urls_2):
image = Image.open(requests.get(url2, stream=True).raw)
image.save(f"tx_image_{idx2}.png")
# Load Blip model and processor for captioning
processor_blip = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
model_blip = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
# Load marefa model for translation (English to Arabic)
translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar")
def caption_and_translate(img, min_len, max_len):
# Generate English caption
raw_image = Image.open(img).convert('RGB')
inputs_blip = processor_blip(raw_image, return_tensors="pt")
out_blip = model_blip.generate(**inputs_blip, min_length=min_len, max_length=max_len)
english_caption = processor_blip.decode(out_blip[0], skip_special_tokens=True)
# Translate caption from English to Arabic
arabic_caption = translate(english_caption)
arabic_caption = arabic_caption[0]['translation_text']
translated_caption = f'<div dir="rtl">{arabic_caption}</div>'
# Return both captions
return english_caption, translated_caption
# Gradio interface with multiple outputs
img_cap_en_ar = gr.Interface(
fn=caption_and_translate,
inputs=[gr.Image(type='filepath', label='Image'),
gr.Slider(label='Minimum Length', minimum=1, maximum=500, value=30),
gr.Slider(label='Maximum Length', minimum=1, maximum=500, value=100)],
outputs=[gr.Textbox(label='English Caption'),
gr.HTML(label='Arabic Caption')],
title='Image Captioning | وصف الصورة',
description="Upload an image to generate an English & Arabic caption | قم برفع صورة وأرسلها ليظهر لك وصف للصورة",
examples =[["image_2.png"]]
)
# Load the model
text_rec = pipeline("image-to-text", model="jinhybr/OCR-Donut-CORD")
# Load MarianMT model for translation (English to Arabic)
translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar")
# Function to process the image and extract text
def extract_text(image):
# Pass the image to the pipeline
result = text_rec(image)
# Extract the plain text and remove tags
text = result[0]['generated_text']
text = re.sub(r'<[^>]*>', '', text) # Remove all HTML tags
# Translate extracted text from English to Arabic
arabic_text3 = translate(text)
arabic_text3 = arabic_text3[0]['translation_text']
htranslated_text = f'<div dir="rtl">{arabic_text3}</div>'
# Return the extracted text
return text,htranslated_text
# Define the Gradio interface
text_recognition = gr.Interface(
fn=extract_text, # The function that processes the image
inputs=gr.Image(type="pil"), # Input is an image (PIL format)
outputs=[gr.Textbox(label='Extracted text'), gr.HTML(label= 'Translateted of Extracted text ')], # Output is text
title="Text Extraction and Translation | إستخراج النص وترجمتة",
description="Upload an image then Submet to extract text and translate it to Arabic| قم برفع الصورة وأرسلها ليظهر لك النص من الصورة",
examples =[["tx_image_0.png"]],
)
# Load trocr model for handwritten text extraction
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-handwritten')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-handwritten')
# Load MarianMT model for translation (English to Arabic)
translate = pipeline("translation",model="marefa-nlp/marefa-mt-en-ar")
def recognize_handwritten_text(image2):
# process and and extract text
pixel_values = processor(images=image2, return_tensors="pt").pixel_values
generated_ids = model.generate(pixel_values)
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
# Translate extracted text from English to Arabic
arabic_text2 = translate(generated_text)
arabic_text2 = arabic_text2[0]['translation_text']
htranslated_text = f'<div dir="rtl">{arabic_text2}</div>'
# Return the extracted text and translated text
return generated_text, htranslated_text
# Gradio interface with image upload input and text output
handwritten_rec = gr.Interface(
fn=recognize_handwritten_text,
inputs=gr.Image(label="Upload Image"),
outputs=[gr.Textbox(label='English Text'),
gr.HTML(label='Arabic Text')],
title="Handwritten Text Extraction | | إستخراج النص المكتوب بخط اليد وترجمتة",
description="Upload an image then Submet to extract text and translate it to Arabic| قم برفع الصورة وأرسلها ليظهر لك النص من الصورة",
examples =[["tx_image_1.png"]]
)
# Combine all interfaces into a tabbed interface
demo = gr.TabbedInterface([img_cap_en_ar, text_recognition, handwritten_rec], ["Extract_Caption", " Extract_Digital_text", " Extract_HandWritten_text"])
demo.launch(debug=True)
if __name__ == "__main__":
app.run(host= "0.0.0.0", port=7860) |