gauri-sharan commited on
Commit
6017a53
·
verified ·
1 Parent(s): bc401e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -32
app.py CHANGED
@@ -7,6 +7,7 @@ from PIL import Image
7
  import os
8
  import traceback
9
  import spaces
 
10
 
11
  # Check if CUDA is available
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -28,19 +29,19 @@ def ocr_and_extract(image, text_query):
28
  temp_image_path = "temp_image.jpg"
29
  image.save(temp_image_path)
30
 
31
- # Clear the index before adding a new image to avoid conflicts
32
- rag_model.delete_index("image_index")
33
 
34
  # Index the image with Byaldi
35
  rag_model.index(
36
  input_path=temp_image_path,
37
- index_name="image_index",
38
  store_collection_with_index=False,
39
- overwrite=True
40
  )
41
 
42
  # Perform the search query on the indexed image
43
- results = rag_model.search(text_query, k=1)
44
 
45
  # Prepare the input for Qwen2-VL
46
  image_data = Image.open(temp_image_path)
@@ -70,30 +71,4 @@ def ocr_and_extract(image, text_query):
70
  # Generate the output with Qwen2-VL
71
  generated_ids = qwen_model.generate(**inputs, max_new_tokens=50)
72
  output_text = processor.batch_decode(
73
- generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
74
- )
75
-
76
- # Clean up the temporary file
77
- os.remove(temp_image_path)
78
-
79
- return output_text[0]
80
-
81
- except Exception as e:
82
- error_message = str(e)
83
- traceback.print_exc()
84
- return f"Error: {error_message}"
85
-
86
- # Gradio interface for image input
87
- iface = gr.Interface(
88
- fn=ocr_and_extract,
89
- inputs=[
90
- gr.Image(type="pil"),
91
- gr.Textbox(label="Enter your query (optional)"),
92
- ],
93
- outputs="text",
94
- title="Image OCR with Byaldi + Qwen2-VL",
95
- description="Upload an image (JPEG/PNG) containing Hindi and English text for OCR.",
96
- )
97
-
98
- # Launch the Gradio app
99
- iface.launch()
 
7
  import os
8
  import traceback
9
  import spaces
10
+ import time
11
 
12
  # Check if CUDA is available
13
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
29
  temp_image_path = "temp_image.jpg"
30
  image.save(temp_image_path)
31
 
32
+ # Generate a unique index name using the current timestamp
33
+ unique_index_name = f"image_index_{int(time.time())}"
34
 
35
  # Index the image with Byaldi
36
  rag_model.index(
37
  input_path=temp_image_path,
38
+ index_name=unique_index_name, # Use the unique index name
39
  store_collection_with_index=False,
40
+ overwrite=True # Ensure the index is overwritten if it already exists
41
  )
42
 
43
  # Perform the search query on the indexed image
44
+ results = rag_model.search(text_query, k=1, index_name=unique_index_name)
45
 
46
  # Prepare the input for Qwen2-VL
47
  image_data = Image.open(temp_image_path)
 
71
  # Generate the output with Qwen2-VL
72
  generated_ids = qwen_model.generate(**inputs, max_new_tokens=50)
73
  output_text = processor.batch_decode(
74
+ generated_ids, skip_special_tokens=True, clean_up_tokeniza