Spaces:
Sleeping
Sleeping
rajsecrets0
commited on
Upload 2 files
Browse files- app.py +157 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import easyocr
|
3 |
+
import numpy as np
|
4 |
+
from PIL import Image
|
5 |
+
import re
|
6 |
+
import io
|
7 |
+
import base64
|
8 |
+
from streamlit_lottie import st_lottie
|
9 |
+
import requests
|
10 |
+
|
11 |
+
# Set page configuration
|
12 |
+
st.set_page_config(page_title="OCR & Search App", layout="wide")
|
13 |
+
|
14 |
+
# Custom CSS
|
15 |
+
st.markdown("""
|
16 |
+
<style>
|
17 |
+
.highlight {
|
18 |
+
background-color: yellow;
|
19 |
+
font-weight: bold;
|
20 |
+
}
|
21 |
+
.footer {
|
22 |
+
position: fixed;
|
23 |
+
left: 0;
|
24 |
+
bottom: 0;
|
25 |
+
width: 100%;
|
26 |
+
background-color: #f0f2f6;
|
27 |
+
color: black;
|
28 |
+
text-align: center;
|
29 |
+
padding: 10px 0;
|
30 |
+
font-style: italic;
|
31 |
+
}
|
32 |
+
.stButton>button {
|
33 |
+
width: 100%;
|
34 |
+
}
|
35 |
+
</style>
|
36 |
+
""", unsafe_allow_html=True)
|
37 |
+
|
38 |
+
# Lottie Animation
|
39 |
+
def load_lottieurl(url: str):
|
40 |
+
r = requests.get(url)
|
41 |
+
if r.status_code != 200:
|
42 |
+
return None
|
43 |
+
return r.json()
|
44 |
+
|
45 |
+
lottie_url = "https://assets5.lottiefiles.com/packages/lf20_fcfjwiyb.json"
|
46 |
+
lottie_json = load_lottieurl(lottie_url)
|
47 |
+
|
48 |
+
# Initialize the OCR reader
|
49 |
+
@st.cache_resource
|
50 |
+
def load_ocr_reader():
|
51 |
+
return easyocr.Reader(['en', 'hi']) # For English and Hindi
|
52 |
+
|
53 |
+
reader = load_ocr_reader()
|
54 |
+
|
55 |
+
def process_image(image):
|
56 |
+
try:
|
57 |
+
img_array = np.array(image)
|
58 |
+
if len(img_array.shape) == 2: # Grayscale
|
59 |
+
img_array = np.stack((img_array,)*3, axis=-1)
|
60 |
+
elif img_array.shape[2] == 4: # RGBA
|
61 |
+
img_array = img_array[:,:,:3]
|
62 |
+
|
63 |
+
results = reader.readtext(img_array)
|
64 |
+
extracted_text = '\n'.join([result[1] for result in results])
|
65 |
+
return extracted_text
|
66 |
+
except Exception as e:
|
67 |
+
return f"Error processing image: {str(e)}"
|
68 |
+
|
69 |
+
def search_in_text(extracted_text, keyword):
|
70 |
+
if not keyword:
|
71 |
+
return "No keyword provided."
|
72 |
+
|
73 |
+
try:
|
74 |
+
lines = extracted_text.split('\n')
|
75 |
+
highlighted_lines = []
|
76 |
+
for line in lines:
|
77 |
+
if keyword.lower() in line.lower():
|
78 |
+
pattern = re.compile(re.escape(keyword), re.IGNORECASE)
|
79 |
+
highlighted_line = pattern.sub(lambda m: f"<span class='highlight'>{m.group()}</span>", line)
|
80 |
+
highlighted_lines.append(highlighted_line)
|
81 |
+
|
82 |
+
if highlighted_lines:
|
83 |
+
return "<br>".join(highlighted_lines)
|
84 |
+
else:
|
85 |
+
return "Keyword not found."
|
86 |
+
except Exception as e:
|
87 |
+
return f"Error searching text: {str(e)}"
|
88 |
+
|
89 |
+
# Streamlit app
|
90 |
+
st.title("π· OCR and Keyword Search Application")
|
91 |
+
st.write("Upload an image containing Hindi or English text, extract the content, and search for keywords.")
|
92 |
+
|
93 |
+
# Create three columns
|
94 |
+
col1, col2, col3 = st.columns([1, 1, 1])
|
95 |
+
|
96 |
+
with col1:
|
97 |
+
st.header("π€ Upload Image")
|
98 |
+
uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
|
99 |
+
|
100 |
+
if uploaded_file is not None:
|
101 |
+
if st.button('π Extract Text', key='extract'):
|
102 |
+
with st.spinner('Extracting text...'):
|
103 |
+
image = Image.open(uploaded_file)
|
104 |
+
extracted_text = process_image(image)
|
105 |
+
st.session_state['extracted_text'] = extracted_text
|
106 |
+
st.success('Text extracted successfully!')
|
107 |
+
|
108 |
+
image = Image.open(uploaded_file)
|
109 |
+
st.image(image, caption='Uploaded Image', use_column_width=True)
|
110 |
+
else:
|
111 |
+
st_lottie(lottie_json, key="lottie", height=300)
|
112 |
+
|
113 |
+
with col2:
|
114 |
+
st.header("π Extracted Text")
|
115 |
+
if 'extracted_text' in st.session_state:
|
116 |
+
st.text_area("", st.session_state['extracted_text'], height=300)
|
117 |
+
|
118 |
+
# Download button
|
119 |
+
st.download_button(
|
120 |
+
label="π₯ Download Extracted Text",
|
121 |
+
data=st.session_state['extracted_text'].encode('utf-8'),
|
122 |
+
file_name="extracted_text.txt",
|
123 |
+
mime="text/plain"
|
124 |
+
)
|
125 |
+
else:
|
126 |
+
st.info("Upload an image and extract text to see the results here.")
|
127 |
+
|
128 |
+
with col3:
|
129 |
+
st.header("π Keyword Search")
|
130 |
+
if 'extracted_text' in st.session_state:
|
131 |
+
keyword = st.text_input("Enter keyword to search")
|
132 |
+
if keyword:
|
133 |
+
search_result = search_in_text(st.session_state['extracted_text'], keyword)
|
134 |
+
st.markdown(search_result, unsafe_allow_html=True)
|
135 |
+
|
136 |
+
# Word count
|
137 |
+
word_count = len(st.session_state['extracted_text'].split())
|
138 |
+
st.metric(label="Word Count", value=word_count)
|
139 |
+
|
140 |
+
# Language detection
|
141 |
+
def detect_language(text):
|
142 |
+
hindi_pattern = re.compile(r'[\u0900-\u097F]')
|
143 |
+
if hindi_pattern.search(text):
|
144 |
+
return "Hindi (and possibly English)"
|
145 |
+
return "English"
|
146 |
+
|
147 |
+
language = detect_language(st.session_state['extracted_text'])
|
148 |
+
st.info(f"Detected Language: {language}")
|
149 |
+
else:
|
150 |
+
st.info("Extract text from an image to use the search functionality.")
|
151 |
+
|
152 |
+
# Add a footer
|
153 |
+
st.markdown("""
|
154 |
+
<div class="footer">
|
155 |
+
<p>Created By Devender Singh</p>
|
156 |
+
</div>
|
157 |
+
""", unsafe_allow_html=True)
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit==1.22.0
|
2 |
+
easyocr==1.7.0
|
3 |
+
numpy==1.23.5
|
4 |
+
Pillow==9.5.0
|
5 |
+
requests==2.31.0
|
6 |
+
streamlit-lottie==0.0.5
|