rajsecrets0 commited on
Commit
8f91176
Β·
verified Β·
1 Parent(s): 8292d80

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +157 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import easyocr
3
+ import numpy as np
4
+ from PIL import Image
5
+ import re
6
+ import io
7
+ import base64
8
+ from streamlit_lottie import st_lottie
9
+ import requests
10
+
11
+ # Set page configuration
12
+ st.set_page_config(page_title="OCR & Search App", layout="wide")
13
+
14
+ # Custom CSS
15
+ st.markdown("""
16
+ <style>
17
+ .highlight {
18
+ background-color: yellow;
19
+ font-weight: bold;
20
+ }
21
+ .footer {
22
+ position: fixed;
23
+ left: 0;
24
+ bottom: 0;
25
+ width: 100%;
26
+ background-color: #f0f2f6;
27
+ color: black;
28
+ text-align: center;
29
+ padding: 10px 0;
30
+ font-style: italic;
31
+ }
32
+ .stButton>button {
33
+ width: 100%;
34
+ }
35
+ </style>
36
+ """, unsafe_allow_html=True)
37
+
38
+ # Lottie Animation
39
+ def load_lottieurl(url: str):
40
+ r = requests.get(url)
41
+ if r.status_code != 200:
42
+ return None
43
+ return r.json()
44
+
45
+ lottie_url = "https://assets5.lottiefiles.com/packages/lf20_fcfjwiyb.json"
46
+ lottie_json = load_lottieurl(lottie_url)
47
+
48
+ # Initialize the OCR reader
49
+ @st.cache_resource
50
+ def load_ocr_reader():
51
+ return easyocr.Reader(['en', 'hi']) # For English and Hindi
52
+
53
+ reader = load_ocr_reader()
54
+
55
+ def process_image(image):
56
+ try:
57
+ img_array = np.array(image)
58
+ if len(img_array.shape) == 2: # Grayscale
59
+ img_array = np.stack((img_array,)*3, axis=-1)
60
+ elif img_array.shape[2] == 4: # RGBA
61
+ img_array = img_array[:,:,:3]
62
+
63
+ results = reader.readtext(img_array)
64
+ extracted_text = '\n'.join([result[1] for result in results])
65
+ return extracted_text
66
+ except Exception as e:
67
+ return f"Error processing image: {str(e)}"
68
+
69
+ def search_in_text(extracted_text, keyword):
70
+ if not keyword:
71
+ return "No keyword provided."
72
+
73
+ try:
74
+ lines = extracted_text.split('\n')
75
+ highlighted_lines = []
76
+ for line in lines:
77
+ if keyword.lower() in line.lower():
78
+ pattern = re.compile(re.escape(keyword), re.IGNORECASE)
79
+ highlighted_line = pattern.sub(lambda m: f"<span class='highlight'>{m.group()}</span>", line)
80
+ highlighted_lines.append(highlighted_line)
81
+
82
+ if highlighted_lines:
83
+ return "<br>".join(highlighted_lines)
84
+ else:
85
+ return "Keyword not found."
86
+ except Exception as e:
87
+ return f"Error searching text: {str(e)}"
88
+
89
+ # Streamlit app
90
+ st.title("πŸ“· OCR and Keyword Search Application")
91
+ st.write("Upload an image containing Hindi or English text, extract the content, and search for keywords.")
92
+
93
+ # Create three columns
94
+ col1, col2, col3 = st.columns([1, 1, 1])
95
+
96
+ with col1:
97
+ st.header("πŸ“€ Upload Image")
98
+ uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
99
+
100
+ if uploaded_file is not None:
101
+ if st.button('πŸ” Extract Text', key='extract'):
102
+ with st.spinner('Extracting text...'):
103
+ image = Image.open(uploaded_file)
104
+ extracted_text = process_image(image)
105
+ st.session_state['extracted_text'] = extracted_text
106
+ st.success('Text extracted successfully!')
107
+
108
+ image = Image.open(uploaded_file)
109
+ st.image(image, caption='Uploaded Image', use_column_width=True)
110
+ else:
111
+ st_lottie(lottie_json, key="lottie", height=300)
112
+
113
+ with col2:
114
+ st.header("πŸ“ Extracted Text")
115
+ if 'extracted_text' in st.session_state:
116
+ st.text_area("", st.session_state['extracted_text'], height=300)
117
+
118
+ # Download button
119
+ st.download_button(
120
+ label="πŸ“₯ Download Extracted Text",
121
+ data=st.session_state['extracted_text'].encode('utf-8'),
122
+ file_name="extracted_text.txt",
123
+ mime="text/plain"
124
+ )
125
+ else:
126
+ st.info("Upload an image and extract text to see the results here.")
127
+
128
+ with col3:
129
+ st.header("πŸ”Ž Keyword Search")
130
+ if 'extracted_text' in st.session_state:
131
+ keyword = st.text_input("Enter keyword to search")
132
+ if keyword:
133
+ search_result = search_in_text(st.session_state['extracted_text'], keyword)
134
+ st.markdown(search_result, unsafe_allow_html=True)
135
+
136
+ # Word count
137
+ word_count = len(st.session_state['extracted_text'].split())
138
+ st.metric(label="Word Count", value=word_count)
139
+
140
+ # Language detection
141
+ def detect_language(text):
142
+ hindi_pattern = re.compile(r'[\u0900-\u097F]')
143
+ if hindi_pattern.search(text):
144
+ return "Hindi (and possibly English)"
145
+ return "English"
146
+
147
+ language = detect_language(st.session_state['extracted_text'])
148
+ st.info(f"Detected Language: {language}")
149
+ else:
150
+ st.info("Extract text from an image to use the search functionality.")
151
+
152
+ # Add a footer
153
+ st.markdown("""
154
+ <div class="footer">
155
+ <p>Created By Devender Singh</p>
156
+ </div>
157
+ """, unsafe_allow_html=True)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit==1.22.0
2
+ easyocr==1.7.0
3
+ numpy==1.23.5
4
+ Pillow==9.5.0
5
+ requests==2.31.0
6
+ streamlit-lottie==0.0.5