Spaces:

MarketOne
/

OCRMO

Runtime error

App Files Files Community

cesar commited on Nov 5, 2022

Commit

9472d8d

1 Parent(s): fe50644

Upload 3 files

Browse files

Files changed (3) hide show

app.py +113 -0
packages.txt +1 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+# Copyright (C) 2021, Mindee.
+# This program is licensed under the Apache License version 2.
+# See LICENSE or go to <https://www.apache.org/licenses/LICENSE-2.0.txt> for full license details.
+import os
+import matplotlib.pyplot as plt
+import streamlit as st
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
+import cv2
+import tensorflow as tf
+gpu_devices = tf.config.experimental.list_physical_devices('GPU')
+if any(gpu_devices):
+    tf.config.experimental.set_memory_growth(gpu_devices[0], True)
+from doctr.io import DocumentFile
+from doctr.models import ocr_predictor
+from doctr.utils.visualization import visualize_page
+DET_ARCHS = ["db_resnet50", "db_mobilenet_v3_large"]
+RECO_ARCHS = ["crnn_vgg16_bn", "crnn_mobilenet_v3_small", "master", "sar_resnet31"]
+def main():
+    # Wide mode
+    st.set_page_config(layout="wide")
+    # Designing the interface
+    st.title("docTR: Document Text Recognition")
+    # For newline
+    st.write('\n')
+    #
+    st.write('Find more info at: https://github.com/mindee/doctr')
+    # For newline
+    st.write('\n')
+    # Instructions
+    st.markdown("*Hint: click on the top-right corner of an image to enlarge it!*")
+    # Set the columns
+    cols = st.beta_columns((1, 1, 1, 1))
+    cols[0].subheader("Input page")
+    cols[1].subheader("Segmentation heatmap")
+    cols[2].subheader("OCR output")
+    cols[3].subheader("Page reconstitution")
+    # Sidebar
+    # File selection
+    st.sidebar.title("Document selection")
+    # Disabling warning
+    st.set_option('deprecation.showfileUploaderEncoding', False)
+    # Choose your own image
+    uploaded_file = st.sidebar.file_uploader("Upload files", type=['pdf', 'png', 'jpeg', 'jpg'])
+    if uploaded_file is not None:
+        if uploaded_file.name.endswith('.pdf'):
+            doc = DocumentFile.from_pdf(uploaded_file.read())
+        else:
+            doc = DocumentFile.from_images(uploaded_file.read())
+        page_idx = st.sidebar.selectbox("Page selection", [idx + 1 for idx in range(len(doc))]) - 1
+        cols[0].image(doc[page_idx])
+    # Model selection
+    st.sidebar.title("Model selection")
+    det_arch = st.sidebar.selectbox("Text detection model", DET_ARCHS)
+    reco_arch = st.sidebar.selectbox("Text recognition model", RECO_ARCHS)
+    # For newline
+    st.sidebar.write('\n')
+    if st.sidebar.button("Analyze page"):
+        if uploaded_file is None:
+            st.sidebar.write("Please upload a document")
+        else:
+            with st.spinner('Loading model...'):
+                predictor = ocr_predictor(det_arch, reco_arch, pretrained=True)
+            with st.spinner('Analyzing...'):
+                # Forward the image to the model
+                processed_batches = predictor.det_predictor.pre_processor([doc[page_idx]])
+                out = predictor.det_predictor.model(processed_batches[0], return_model_output=True)
+                seg_map = out["out_map"]
+                seg_map = tf.squeeze(seg_map[0, ...], axis=[2])
+                seg_map = cv2.resize(seg_map.numpy(), (doc[page_idx].shape[1], doc[page_idx].shape[0]),
+                                     interpolation=cv2.INTER_LINEAR)
+                # Plot the raw heatmap
+                fig, ax = plt.subplots()
+                ax.imshow(seg_map)
+                ax.axis('off')
+                cols[1].pyplot(fig)
+                # Plot OCR output
+                out = predictor([doc[page_idx]])
+                fig = visualize_page(out.pages[0].export(), doc[page_idx], interactive=False)
+                cols[2].pyplot(fig)
+                # Page reconsitution under input page
+                page_export = out.pages[0].export()
+                img = out.pages[0].synthesize()
+                cols[3].image(img, clamp=True)
+                # Display JSON
+                st.markdown("\nHere are your analysis results in JSON format:")
+                st.json(page_export)
+if __name__ == '__main__':
+    main()

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python3-opencv

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+-e git+https://github.com/mindee/doctr.git#egg=python-doctr[tf]
+streamlit>=0.65.0
+PyMuPDF>=1.16.0,!=1.18.11,!=1.18.12,!=1.19.5