got_ocr_test

Sleeping

App Files Files Community

acharyaaditya26 commited on Nov 29

Commit

aceb54a

•

1 Parent(s): ac722ca

changes

Browse files

Files changed (6) hide show

Dockerfile +31 -0
app.py +123 -0
requirements.txt +6 -0
static/style.css +29 -0
templates/index.html +15 -0
templates/result.html +19 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,31 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
+RUN apt-get update && apt-get upgrade -y
+RUN apt-get install -y python3-pip python3-dev
+RUN apt-get update && apt-get install ffmpeg libsm6 libxext6  -y
+RUN apt-get install -y git
+RUN pip3 install --upgrade pip
+RUN pip3 install packaging
+RUN pip install --no-cache-dir numpy==1.23.5
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+# Install torch first
+RUN pip install --no-cache-dir torch==2.1.2
+# Now install the rest of the packages
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+RUN pip install flash_attn
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+import fitz  # PyMuPDF
+from transformers import AutoModel, AutoTokenizer
+from PIL import Image
+import numpy as np
+import os
+import base64
+import io
+import uuid
+import tempfile
+import time
+import shutil
+from pathlib import Path
+import json
+from starlette.requests import Request
+app = FastAPI()
+# Load tokenizer and model
+tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+model = AutoModel.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True, device_map='cuda', use_safetensors=True)
+model = model.eval().cuda()
+UPLOAD_FOLDER = "./uploads"
+RESULTS_FOLDER = "./results"
+# Ensure directories exist
+for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
+    if not os.path.exists(folder):
+        os.makedirs(folder)
+def image_to_base64(image):
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    return base64.b64encode(buffered.getvalue()).decode()
+def pdf_to_images(pdf_path):
+    images = []
+    pdf_document = fitz.open(pdf_path)
+    for page_num in range(len(pdf_document)):
+        page = pdf_document.load_page(page_num)
+        pix = page.get_pixmap()
+        img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+        images.append(img)
+    return images
+def run_GOT(pdf_file):
+    unique_id = str(uuid.uuid4())
+    pdf_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}.pdf")
+    shutil.copy(pdf_file, pdf_path)
+    images = pdf_to_images(pdf_path)
+    results = []
+    try:
+        for i, image in enumerate(images):
+            image_path = os.path.join(UPLOAD_FOLDER, f"{unique_id}_page_{i+1}.png")
+            image.save(image_path)
+            result_path = os.path.join(RESULTS_FOLDER, f"{unique_id}_page_{i+1}.html")
+            res = model.chat_crop(tokenizer, image_path, ocr_type='format', render=True, save_render_file=result_path)
+            # Read the rendered HTML content
+            with open(result_path, 'r') as f:
+                html_content = f.read()
+            results.append({
+                "page_number": i + 1,
+                "text": res,
+                "html": html_content
+            })
+            if os.path.exists(image_path):
+                os.remove(image_path)
+            if os.path.exists(result_path):
+                os.remove(result_path)
+    except Exception as e:
+        return f"Error: {str(e)}", None
+    finally:
+        if os.path.exists(pdf_path):
+            os.remove(pdf_path)
+    html_output = "".join([result["html"] for result in results])
+    return json.dumps(results, indent=4), html_output
+def cleanup_old_files():
+    current_time = time.time()
+    for folder in [UPLOAD_FOLDER, RESULTS_FOLDER]:
+        for file_path in Path(folder).glob('*'):
+            if current_time - file_path.stat().st_mtime > 3600:  # 1 hour
+                file_path.unlink()
+cleanup_old_files()
+# Mount static files
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Set up Jinja2 templates
+templates = Jinja2Templates(directory="templates")
+@app.get("/", response_class=HTMLResponse)
+async def read_root(request: Request):
+    return templates.TemplateResponse("index.html", {"request": request})
+@app.post("/uploadfile/")
+async def upload_file(request: Request, file: UploadFile = File(...)):
+    temp_dir = tempfile.TemporaryDirectory()
+    temp_pdf_path = os.path.join(temp_dir.name, file.filename)
+    with open(temp_pdf_path, "wb") as buffer:
+        buffer.write(await file.read())
+    json_output, html_output = run_GOT(temp_pdf_path)
+    temp_dir.cleanup()
+    return templates.TemplateResponse("result.html", {"request": request, "json_output": json_output, "html_output": html_output})
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+PyMuPDF
+transformers
+pillow
+numpy
+fastapi
+uvicorn

static/style.css ADDED Viewed

	@@ -0,0 +1,29 @@

+/* static/style.css */
+body {
+    font-family: Arial, sans-serif;
+    margin: 20px;
+}
+h1, h2 {
+    color: #333;
+}
+form {
+    margin-bottom: 20px;
+}
+#json-output, #html-output {
+    margin-bottom: 20px;
+}
+pre {
+    background-color: #f4f4f4;
+    padding: 10px;
+    border-radius: 5px;
+    overflow-x: auto;
+}
+iframe {
+    border: 1px solid #ccc;
+    border-radius: 5px;
+}

templates/index.html ADDED Viewed

	@@ -0,0 +1,15 @@

+<!-- templates/index.html -->
+<!DOCTYPE html>
+<html>
+<head>
+    <title>PDF OCR</title>
+    <link href="/static/style.css" rel="stylesheet">
+</head>
+<body>
+    <h1>Upload PDF for OCR</h1>
+    <form action="/uploadfile/" enctype="multipart/form-data" method="post">
+        <input name="file" type="file" accept=".pdf">
+        <button type="submit">Upload</button>
+    </form>
+</body>
+</html>

templates/result.html ADDED Viewed

	@@ -0,0 +1,19 @@

+<!-- templates/result.html -->
+<!DOCTYPE html>
+<html>
+<head>
+    <title>OCR Result</title>
+    <link href="/static/style.css" rel="stylesheet">
+</head>
+<body>
+    <h1>OCR Result</h1>
+    <div id="json-output">
+        <h2>GOT Output</h2>
+        <pre>{{ json_output }}</pre>
+    </div>
+    <div id="html-output">
+        <h2>Rendered HTML</h2>
+        <iframe srcdoc="{{ html_output }}" width="100%" height="600px"></iframe>
+    </div>
+</body>
+</html>