Spaces:

rajsecrets0
/

Hindi-English-OCR

Sleeping

App Files Files Community

rajsecrets0 commited on Nov 3, 2024

Commit

8f91176

verified ·

1 Parent(s): 8292d80

Upload 2 files

Browse files

Files changed (2) hide show

app.py +157 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import streamlit as st
+import easyocr
+import numpy as np
+from PIL import Image
+import re
+import io
+import base64
+from streamlit_lottie import st_lottie
+import requests
+# Set page configuration
+st.set_page_config(page_title="OCR & Search App", layout="wide")
+# Custom CSS
+st.markdown("""
+<style>
+.highlight {
+    background-color: yellow;
+    font-weight: bold;
+}
+.footer {
+    position: fixed;
+    left: 0;
+    bottom: 0;
+    width: 100%;
+    background-color: #f0f2f6;
+    color: black;
+    text-align: center;
+    padding: 10px 0;
+    font-style: italic;
+}
+.stButton>button {
+    width: 100%;
+}
+</style>
+""", unsafe_allow_html=True)
+# Lottie Animation
+def load_lottieurl(url: str):
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
+lottie_url = "https://assets5.lottiefiles.com/packages/lf20_fcfjwiyb.json"
+lottie_json = load_lottieurl(lottie_url)
+# Initialize the OCR reader
+@st.cache_resource
+def load_ocr_reader():
+    return easyocr.Reader(['en', 'hi'])  # For English and Hindi
+reader = load_ocr_reader()
+def process_image(image):
+    try:
+        img_array = np.array(image)
+        if len(img_array.shape) == 2:  # Grayscale
+            img_array = np.stack((img_array,)*3, axis=-1)
+        elif img_array.shape[2] == 4:  # RGBA
+            img_array = img_array[:,:,:3]
+        results = reader.readtext(img_array)
+        extracted_text = '\n'.join([result[1] for result in results])
+        return extracted_text
+    except Exception as e:
+        return f"Error processing image: {str(e)}"
+def search_in_text(extracted_text, keyword):
+    if not keyword:
+        return "No keyword provided."
+    try:
+        lines = extracted_text.split('\n')
+        highlighted_lines = []
+        for line in lines:
+            if keyword.lower() in line.lower():
+                pattern = re.compile(re.escape(keyword), re.IGNORECASE)
+                highlighted_line = pattern.sub(lambda m: f"<span class='highlight'>{m.group()}</span>", line)
+                highlighted_lines.append(highlighted_line)
+        if highlighted_lines:
+            return "<br>".join(highlighted_lines)
+        else:
+            return "Keyword not found."
+    except Exception as e:
+        return f"Error searching text: {str(e)}"
+# Streamlit app
+st.title("📷 OCR and Keyword Search Application")
+st.write("Upload an image containing Hindi or English text, extract the content, and search for keywords.")
+# Create three columns
+col1, col2, col3 = st.columns([1, 1, 1])
+with col1:
+    st.header("📤 Upload Image")
+    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
+    if uploaded_file is not None:
+        if st.button('🔍 Extract Text', key='extract'):
+            with st.spinner('Extracting text...'):
+                image = Image.open(uploaded_file)
+                extracted_text = process_image(image)
+                st.session_state['extracted_text'] = extracted_text
+            st.success('Text extracted successfully!')
+        image = Image.open(uploaded_file)
+        st.image(image, caption='Uploaded Image', use_column_width=True)
+    else:
+        st_lottie(lottie_json, key="lottie", height=300)
+with col2:
+    st.header("📝 Extracted Text")
+    if 'extracted_text' in st.session_state:
+        st.text_area("", st.session_state['extracted_text'], height=300)
+        # Download button
+        st.download_button(
+            label="📥 Download Extracted Text",
+            data=st.session_state['extracted_text'].encode('utf-8'),
+            file_name="extracted_text.txt",
+            mime="text/plain"
+        )
+    else:
+        st.info("Upload an image and extract text to see the results here.")
+with col3:
+    st.header("🔎 Keyword Search")
+    if 'extracted_text' in st.session_state:
+        keyword = st.text_input("Enter keyword to search")
+        if keyword:
+            search_result = search_in_text(st.session_state['extracted_text'], keyword)
+            st.markdown(search_result, unsafe_allow_html=True)
+        # Word count
+        word_count = len(st.session_state['extracted_text'].split())
+        st.metric(label="Word Count", value=word_count)
+        # Language detection
+        def detect_language(text):
+            hindi_pattern = re.compile(r'[\u0900-\u097F]')
+            if hindi_pattern.search(text):
+                return "Hindi (and possibly English)"
+            return "English"
+        language = detect_language(st.session_state['extracted_text'])
+        st.info(f"Detected Language: {language}")
+    else:
+        st.info("Extract text from an image to use the search functionality.")
+# Add a footer
+st.markdown("""
+<div class="footer">
+    <p>Created By Devender Singh</p>
+</div>
+""", unsafe_allow_html=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+streamlit==1.22.0
+easyocr==1.7.0
+numpy==1.23.5
+Pillow==9.5.0
+requests==2.31.0
+streamlit-lottie==0.0.5