import gradio as gr import os import uuid import shutil import re from transformers import AutoModel, AutoTokenizer tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True) model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cpu', use_safetensors=True) model = model.eval() UPLOAD_FOLDER = "./uploads" RESULTS_FOLDER = "./results" for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]: if not os.path.exists(folder): os.makedirs(folder) def run_GOT(image, search_term): unique_id = str(uuid.uuid4()) image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.png") shutil.copy(image, image_path) try: res = model.chat(tokenizer, image_path, ocr_type='ocr') highlighted_text = highlight_text(res, search_term) return highlighted_text, None except Exception as e: return f"Error: {str(e)}", None finally: if os.path.exists(image_path): os.remove(image_path) def highlight_text(text, search_term): if not search_term: return text pattern = re.compile(re.escape(search_term), re.IGNORECASE) return pattern.sub(lambda m: f'{m.group()}', text) title_html = """

General OCR Theory (GOT): Multi-Language OCR HINDI AND ENGLISH

""" with gr.Blocks() as demo: gr.HTML(title_html) gr.Markdown(""" ### Instructions Upload your respective image below and click "Submit" to extract text in both English and Hindi. If you want you can select the word to be highlighted. """) with gr.Row(): with gr.Column(): image_input = gr.Image(type="filepath", label="Upload your image") search_input = gr.Textbox(label="Enter a word to search", placeholder="Search term") submit_button = gr.Button("Submit") with gr.Column(): ocr_result = gr.HTML(label="Extracted Text:") submit_button.click(run_GOT, inputs=[image_input, search_input], outputs=[ocr_result]) if __name__ == "__main__": demo.launch()