AI-SV
/

document-ocr-ai

Model card Files Files and versions Community

mayank-youdataai commited on Sep 30, 2024

Commit

74f6a97

verified ·

1 Parent(s): b0c683f

Upload 2 files

Browse files

Files changed (2) hide show

app.py +148 -0
requirements.txt +19 -0

app.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import os
+import json
+import tempfile
+from fastapi import FastAPI, UploadFile, File, HTTPException
+from paddleocr import PPStructure
+import logging
+import paddle
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Initialize FastAPI app
+app = FastAPI()
+# Global variable for OCR engine
+ocr_engine = None
+# Function to initialize the PaddleOCR engine based on GPU availability
+def init_ocr_engine():
+    global ocr_engine
+    if ocr_engine is None:
+        use_gpu = is_gpu_available()
+        if use_gpu:
+            logger.info("NVIDIA GPU detected, running PaddleOCR on GPU.")
+        else:
+            logger.info("No GPU detected, running PaddleOCR on CPU.")
+        # Initialize the OCR engine with the use_gpu variable
+        ocr_engine = PPStructure(
+            table=True,
+            ocr=True,
+            show_log=True,
+            layout_score_threshold=0.1,
+            structure_version='PP-StructureV2',
+            use_gpu=use_gpu
+        )
+    return ocr_engine
+# Function to check for GPU availability using Paddle
+def is_gpu_available():
+    # Check if PaddlePaddle is compiled with CUDA and if a CUDA device is available
+    return paddle.is_compiled_with_cuda() and paddle.device.cuda.device_count() > 0
+# Function to perform OCR and save the structured result
+def perform_ocr_and_save(pdf_path, save_folder='./output'):
+    # Initialize PaddleOCR engine
+    ocr_engine = init_ocr_engine()
+    # Directly pass the PDF to PaddleOCR
+    result = ocr_engine(pdf_path)
+    if not result:
+        logger.error(f"OCR failed for {pdf_path}")
+    return result
+# Function to format results to strings and sort them
+def format_to_strings_and_sort(results):
+    logger.info("Formatting and sorting OCR results.")
+    formatted_data = []
+    for idx, elements in enumerate(results):
+        for element in elements:
+            type = element['type']
+            bbox = element['bbox']
+            responses = element['res']
+            if type != 'table':
+                for response in responses:
+                    y_coordinate = bbox[1]  # Use y1 coordinate for sorting
+                    formatted_data.append({
+                        'page_num': idx + 1,
+                        'type': type,
+                        'text': response['text'],
+                        'confidence': response['confidence'],
+                        'bbox': bbox,
+                        'y_coordinate': y_coordinate  # Add y-coordinate for sorting
+                    })
+            else:
+                formatted_data.append({
+                    'page_num': idx + 1,
+                    'type': type,
+                    'html': responses['html'],
+                    'bbox': bbox,
+                    'y_coordinate': bbox[1]  # Use bbox y1 for sorting
+                })
+    sorted_data = sorted(formatted_data, key=lambda x: (x['page_num'], x['y_coordinate']))
+    logger.info("Sorting completed.")
+    return sorted_data
+# Function to save results to a JSON file
+def save_to_json(data, filename):
+    logger.info(f"Saving sorted results to {filename}.")
+    with open(filename, "w") as json_file:
+        json.dump(data, json_file, indent=4)
+# FastAPI endpoint to process uploaded PDF
+@app.post("/process-ocr/")
+async def process_ocr(file: UploadFile = File(...)):
+    try:
+        # Validate file type
+        if file.content_type != "application/pdf":
+            logger.warning(f"Invalid file type uploaded: {file.content_type}")
+            raise HTTPException(status_code=400, detail="Invalid file type. Please upload a PDF file.")
+        # Create a temporary file to store the uploaded PDF
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+            contents = await file.read()
+            temp_file.write(contents)
+            temp_file_path = temp_file.name
+            logger.info(f"Temporary file created at: {temp_file_path}")
+        # Perform OCR and save results
+        result = perform_ocr_and_save(temp_file_path)
+        if result is None:
+            raise HTTPException(status_code=500, detail="OCR processing failed. Check the input file.")
+        # Sort and format the results
+        result_json = format_to_strings_and_sort(result)
+        # Optionally, save the result JSON to a file (for debugging)
+        save_to_json(result_json, 'result_json.json')
+        # Return sorted result as JSON
+        return result_json
+    except Exception as e:
+        logger.error(f"An error occurred during OCR processing: {e}")
+        raise HTTPException(status_code=500, detail="An error occurred during OCR processing.")
+    finally:
+        # Clean up the temporary file
+        if os.path.exists(temp_file_path):
+            os.remove(temp_file_path)
+            logger.info(f"Temporary file {temp_file_path} deleted.")
+# Endpoint to check if GPU is available
+@app.get("/check-gpu/")
+def check_gpu():
+    if is_gpu_available():
+        return {"gpu_available": True, "message": "NVIDIA GPU is available and will be used."}
+    else:
+        return {"gpu_available": False, "message": "NVIDIA GPU is not available, using CPU instead."}

requirements.txt ADDED Viewed

	@@ -0,0 +1,19 @@

+fastapi==0.115.0
+uvicorn==0.31.0
+paddleocr==2.8.1  # Updated to the latest available version
+paddlepaddle-gpu==2.5.2  # GPU support, modify based on your CUDA version
+pyMuPDF==1.22.1
+numpy==1.24.4
+tqdm==4.65.0
+python-multipart==0.0.5
+premailer==3.10.0
+# fastapi
+# uvicorn
+# paddleocr
+# paddlepaddle # For GPU support, you can modify this based on your CUDA version
+# PyMuPDF
+# numpy
+# tqdm
+# python-multipart
+# premailer