Vinay15 commited on
Commit
fe5490c
·
verified ·
1 Parent(s): aebff7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -20
app.py CHANGED
@@ -1,27 +1,14 @@
1
  import gradio as gr
2
  from transformers import AutoModel, AutoTokenizer
3
  from PIL import Image
4
- import torch # Importing torch to check CUDA availability
5
-
6
- # Check CUDA availability
7
- def check_cuda():
8
- if torch.cuda.is_available():
9
- device_info = f"CUDA is available. GPU device: {torch.cuda.get_device_name(0)}"
10
- else:
11
- device_info = "CUDA is not available. Running on CPU."
12
- return device_info
13
 
14
  # Load the tokenizer and model
15
  tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
16
- model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, device_map="auto", use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
17
- model = model.eval() # No need for .cuda() with device_map="auto"
18
 
19
  # Define the OCR function
20
  def perform_ocr(image):
21
- # Check for CUDA availability and print the result
22
- cuda_info = check_cuda()
23
- print(cuda_info) # This will be logged in the output
24
-
25
  # Convert PIL image to RGB format (if necessary)
26
  if image.mode != "RGB":
27
  image = image.convert("RGB")
@@ -35,14 +22,37 @@ def perform_ocr(image):
35
 
36
  return res
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # Define the Gradio interface
39
  interface = gr.Interface(
40
- fn=perform_ocr,
41
- inputs=gr.Image(type="pil", label="Upload Image"),
42
- outputs=gr.Textbox(label="Extracted Text"),
43
  title="OCR and Document Search Web Application",
44
- description="Upload an image to extract text using the GOT-OCR2_0 model."
45
  )
46
 
47
  # Launch the Gradio app
48
- interface.launch()
 
1
  import gradio as gr
2
  from transformers import AutoModel, AutoTokenizer
3
  from PIL import Image
 
 
 
 
 
 
 
 
 
4
 
5
  # Load the tokenizer and model
6
  tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
7
+ model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, low_cpu_mem_usage=True, device_map='cuda', use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
8
+ model = model.eval().cuda()
9
 
10
  # Define the OCR function
11
  def perform_ocr(image):
 
 
 
 
12
  # Convert PIL image to RGB format (if necessary)
13
  if image.mode != "RGB":
14
  image = image.convert("RGB")
 
22
 
23
  return res
24
 
25
+ # Define the search function
26
+ def search_keyword(extracted_text, keyword):
27
+ # Check if keyword is provided
28
+ if not keyword.strip():
29
+ return "Please enter a keyword."
30
+
31
+ # Search for the keyword in the extracted text
32
+ if keyword.lower() in extracted_text.lower():
33
+ return f"Keyword '{keyword}' found in the extracted text!"
34
+ else:
35
+ return f"Keyword '{keyword}' not found in the extracted text."
36
+
37
+ # Define the interface with both OCR and keyword search functionality
38
+ def ocr_and_search(image, keyword):
39
+ # Perform OCR to extract text from the image
40
+ extracted_text = perform_ocr(image)
41
+
42
+ # Perform keyword search within the extracted text
43
+ search_result = search_keyword(extracted_text, keyword)
44
+
45
+ # Return both the extracted text and the search result
46
+ return extracted_text, search_result
47
+
48
  # Define the Gradio interface
49
  interface = gr.Interface(
50
+ fn=ocr_and_search,
51
+ inputs=[gr.Image(type="pil", label="Upload Image"), gr.Textbox(label="Enter Keyword to Search")],
52
+ outputs=[gr.Textbox(label="Extracted Text"), gr.Textbox(label="Search Result")],
53
  title="OCR and Document Search Web Application",
54
+ description="Upload an image to extract text using the GOT-OCR2_0 model and search for a keyword within the extracted text."
55
  )
56
 
57
  # Launch the Gradio app
58
+ interface.launch()