|
import streamlit as st |
|
import requests |
|
from PIL import Image |
|
|
|
st.title("OCR Extraction Client") |
|
st.write( |
|
""" |
|
This app lets you upload a PDF or image file. The file is sent to a FastAPI endpoint for OCR extraction, |
|
and then the extracted text is returned as a Markdown file. |
|
""" |
|
) |
|
|
|
|
|
st.sidebar.header("Upload Document") |
|
uploaded_file = st.sidebar.file_uploader("Upload a PDF or image file", type=["pdf", "png", "jpg", "jpeg", "webp"]) |
|
process_button = st.sidebar.button("Process Document") |
|
|
|
if uploaded_file is not None and process_button: |
|
st.info(f"Processing file: **{uploaded_file.name}**") |
|
|
|
|
|
if uploaded_file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')): |
|
try: |
|
image = Image.open(uploaded_file) |
|
st.image(image, caption="Uploaded Image", use_column_width=True) |
|
except Exception as e: |
|
st.error(f"Error displaying image: {e}") |
|
|
|
with st.spinner("Sending file to OCR service..."): |
|
|
|
files = {"file": (uploaded_file.name, uploaded_file.getvalue(), uploaded_file.type)} |
|
api_url = "https://hammad712-urdu-ocr-app.hf.space/upload" |
|
response = requests.post(api_url, files=files) |
|
|
|
if response.status_code == 200: |
|
st.success("OCR extraction complete!") |
|
md_content = response.content.decode("utf-8") |
|
|
|
|
|
if uploaded_file.name.lower().endswith(('.png', '.jpg', '.jpeg', '.webp')): |
|
st.markdown("### Extracted Text from Image") |
|
st.markdown(md_content) |
|
else: |
|
st.markdown("### Extracted Markdown Text") |
|
st.markdown(md_content) |
|
|
|
st.download_button( |
|
label="Download Markdown File", |
|
data=md_content, |
|
file_name="output.md", |
|
mime="text/markdown" |
|
) |
|
else: |
|
st.error(f"Error: {response.status_code} {response.text}") |
|
|