Spaces:

gauri-sharan
/

test-two

Sleeping

App Files Files Community

gauri-sharan commited on Sep 29, 2024

Commit

6017a53

verified ·

1 Parent(s): bc401e8

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -32

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ from PIL import Image
 import os
 import traceback
 import spaces
 # Check if CUDA is available
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -28,19 +29,19 @@ def ocr_and_extract(image, text_query):
         temp_image_path = "temp_image.jpg"
         image.save(temp_image_path)
-        # Clear the index before adding a new image to avoid conflicts
-        rag_model.delete_index("image_index")
         # Index the image with Byaldi
         rag_model.index(
             input_path=temp_image_path,
-            index_name="image_index",
             store_collection_with_index=False,
-            overwrite=True
         )
         # Perform the search query on the indexed image
-        results = rag_model.search(text_query, k=1)
         # Prepare the input for Qwen2-VL
         image_data = Image.open(temp_image_path)
@@ -70,30 +71,4 @@ def ocr_and_extract(image, text_query):
         # Generate the output with Qwen2-VL
         generated_ids = qwen_model.generate(**inputs, max_new_tokens=50)
         output_text = processor.batch_decode(
-            generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False
-        )
-        # Clean up the temporary file
-        os.remove(temp_image_path)
-        return output_text[0]
-    except Exception as e:
-        error_message = str(e)
-        traceback.print_exc()
-        return f"Error: {error_message}"
-# Gradio interface for image input
-iface = gr.Interface(
-    fn=ocr_and_extract,
-    inputs=[
-        gr.Image(type="pil"),
-        gr.Textbox(label="Enter your query (optional)"),
-    ],
-    outputs="text",
-    title="Image OCR with Byaldi + Qwen2-VL",
-    description="Upload an image (JPEG/PNG) containing Hindi and English text for OCR.",
-)
-# Launch the Gradio app
-iface.launch()

 import os
 import traceback
 import spaces
+import time
 # Check if CUDA is available
 device = "cuda" if torch.cuda.is_available() else "cpu"
         temp_image_path = "temp_image.jpg"
         image.save(temp_image_path)
+        # Generate a unique index name using the current timestamp
+        unique_index_name = f"image_index_{int(time.time())}"
         # Index the image with Byaldi
         rag_model.index(
             input_path=temp_image_path,
+            index_name=unique_index_name,  # Use the unique index name
             store_collection_with_index=False,
+            overwrite=True  # Ensure the index is overwritten if it already exists
         )
         # Perform the search query on the indexed image
+        results = rag_model.search(text_query, k=1, index_name=unique_index_name)
         # Prepare the input for Qwen2-VL
         image_data = Image.open(temp_image_path)
         # Generate the output with Qwen2-VL
         generated_ids = qwen_model.generate(**inputs, max_new_tokens=50)
         output_text = processor.batch_decode(
+            generated_ids, skip_special_tokens=True, clean_up_tokeniza