Spaces:

Vinay15
/

OCR

Runtime error

App Files Files Community

Vinay15 commited on Sep 30, 2024

Commit

aebff7d

verified ·

1 Parent(s): 4a0b98f

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -21

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
-import torch
 # Check CUDA availability
 def check_cuda():
@@ -14,41 +14,35 @@ def check_cuda():
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
 model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, device_map="auto", use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
-model.eval()
 # Define the OCR function
-def perform_ocr(image, keyword):
     # Check for CUDA availability and print the result
     cuda_info = check_cuda()
-    print(cuda_info)
-    # Convert PIL image to RGB format
     if image.mode != "RGB":
         image = image.convert("RGB")
     # Perform OCR using the model
-    res = model.chat(tokenizer, image, ocr_type='ocr')
-    # Check for keyword in the extracted text
-    if keyword.lower() in res.lower():
-        return res, f'Keyword "{keyword}" found in the text.'
-    else:
-        return res, f'Keyword "{keyword}" not found in the text.'
 # Define the Gradio interface
 interface = gr.Interface(
     fn=perform_ocr,
-    inputs=[
-        gr.Image(type="pil", label="Upload Image"),
-        gr.Textbox(label="Enter Keyword to Search")
-    ],
-    outputs=[
-        gr.Textbox(label="Extracted Text"),
-        gr.Textbox(label="Search Result")
-    ],
     title="OCR and Document Search Web Application",
-    description="Upload an image to extract text using the GOT-OCR2_0 model and search for a keyword."
 )
 # Launch the Gradio app
-interface.launch()

 import gradio as gr
 from transformers import AutoModel, AutoTokenizer
 from PIL import Image
+import torch  # Importing torch to check CUDA availability
 # Check CUDA availability
 def check_cuda():
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
 model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, device_map="auto", use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+model = model.eval()  # No need for .cuda() with device_map="auto"
 # Define the OCR function
+def perform_ocr(image):
     # Check for CUDA availability and print the result
     cuda_info = check_cuda()
+    print(cuda_info)  # This will be logged in the output
+    # Convert PIL image to RGB format (if necessary)
     if image.mode != "RGB":
         image = image.convert("RGB")
+    # Save the image to a temporary path
+    image_file_path = 'temp_image.jpg'
+    image.save(image_file_path)
     # Perform OCR using the model
+    res = model.chat(tokenizer, image_file_path, ocr_type='ocr')
+    return res
 # Define the Gradio interface
 interface = gr.Interface(
     fn=perform_ocr,
+    inputs=gr.Image(type="pil", label="Upload Image"),
+    outputs=gr.Textbox(label="Extracted Text"),
     title="OCR and Document Search Web Application",
+    description="Upload an image to extract text using the GOT-OCR2_0 model."
 )
 # Launch the Gradio app
+interface.launch()