Spaces:

Vinay15
/

OCR

Runtime error

App Files Files Community

Vinay15 commited on Sep 30, 2024

Commit

4a0b98f

verified ·

1 Parent(s): ecc49db

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -15

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
-import torch  # Importing torch to check CUDA availability
 # Check CUDA availability
 def check_cuda():
@@ -14,35 +14,41 @@ def check_cuda():
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
 model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, device_map="auto", use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-model = model.eval()  # No need for .cuda() with device_map="auto"
 # Define the OCR function
-def perform_ocr(image):
     # Check for CUDA availability and print the result
     cuda_info = check_cuda()
-    print(cuda_info)  # This will be logged in the output
-    # Convert PIL image to RGB format (if necessary)
     if image.mode != "RGB":
         image = image.convert("RGB")
-    # Save the image to a temporary path
-    image_file_path = 'temp_image.jpg'
-    image.save(image_file_path)
     # Perform OCR using the model
-    res = model.chat(tokenizer, image_file_path, ocr_type='ocr')
-    return res
 # Define the Gradio interface
 interface = gr.Interface(
     fn=perform_ocr,
-    inputs=gr.Image(type="pil", label="Upload Image"),
-    outputs=gr.Textbox(label="Extracted Text"),
     title="OCR and Document Search Web Application",
-    description="Upload an image to extract text using the GOT-OCR2_0 model."
 )
 # Launch the Gradio app
-interface.launch()

 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
+import torch
 # Check CUDA availability
 def check_cuda():
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
 model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, device_map="auto", use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+model.eval()
 # Define the OCR function
+def perform_ocr(image, keyword):
     # Check for CUDA availability and print the result
     cuda_info = check_cuda()
+    print(cuda_info)
+    # Convert PIL image to RGB format
     if image.mode != "RGB":
         image = image.convert("RGB")
     # Perform OCR using the model
+    res = model.chat(tokenizer, image, ocr_type='ocr')
+    # Check for keyword in the extracted text
+    if keyword.lower() in res.lower():
+        return res, f'Keyword "{keyword}" found in the text.'
+    else:
+        return res, f'Keyword "{keyword}" not found in the text.'
 # Define the Gradio interface
 interface = gr.Interface(
     fn=perform_ocr,
+    inputs=[
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Textbox(label="Enter Keyword to Search")
+    ],
+    outputs=[
+        gr.Textbox(label="Extracted Text"),
+        gr.Textbox(label="Search Result")
+    ],
     title="OCR and Document Search Web Application",
+    description="Upload an image to extract text using the GOT-OCR2_0 model and search for a keyword."
 )
 # Launch the Gradio app
+interface.launch()