Spaces:

Kethan20
/

OCR-Text-Extractor

Sleeping

Kethan20 commited on Sep 28, 2024

Commit

2a1e7f5

verified ·

1 Parent(s): ea0dcdc

Upload 2 files

Files changed (2) hide show

app.py ADDED Viewed

+from transformers import AutoModel, AutoTokenizer
+import streamlit as st
+from PIL import Image
+import tempfile
+def perform_ocr(image):
+    tokenizer = AutoTokenizer.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True)
+    model = AutoModel.from_pretrained('srimanth-d/GOT_CPU', trust_remote_code=True, low_cpu_mem_usage=True, use_safetensors=True, pad_token_id=tokenizer.eos_token_id)
+    model = model.eval()
+    res = model.chat(tokenizer, image, ocr_type='ocr')
+    return res
+# Title and instructions
+st.title(' OCR and Document Search Web Application Prototype')
+st.write('Upload an image and extract text in Hindi and English. You can also search for keywords within the extracted text.')
+# Upload the image
+uploaded_file = st.file_uploader("Choose an image file", type=["jpg", "png", "jpeg"])
+# If an image is uploaded
+if uploaded_file is not None:
+    image = Image.open(uploaded_file)
+    st.image(image, caption='Uploaded Image.', use_column_width=True)
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as temp_file:
+        temp_file.write(uploaded_file.getbuffer())
+        temp_file_path = temp_file.name
+    # Perform OCR on the uploaded image
+    st.write("Extracting text...")
+    extracted_text = perform_ocr(temp_file_path)
+    st.write("Extracted Text:")
+    st.text_area("OCR Output", extracted_text, height=200)

requirements.txt ADDED Viewed

File without changes