Spaces:

MJobe
/

document-vqa-v2

Sleeping

File size: 2,648 Bytes

6bbd3ca
 
 
 
 
dea0d8a
4bf804b
6bbd3ca
 
 
d3f8141
 
6bbd3ca
 
 
d3f8141
6bbd3ca
f8ec4b3
6bbd3ca
f8ec4b3
6bbd3ca
 
 
 
4bf804b
 
 
 
 
 
 
6bbd3ca
 
 
 
 
 
 
 
 
4bf804b
6bbd3ca
 
 
 
d3f8141
6bbd3ca
 
d3f8141
 
 
 
f8ec4b3
 
 
 
 
 
 
 
 
 
 
 
2181fee
f8ec4b3
 
d3f8141
f8ec4b3
2181fee
d3f8141
 
 
 
6bbd3ca

from io import BytesIO
from PIL import Image
from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import JSONResponse
from transformers import pipeline
from pytesseract import pytesseract
import base64

app = FastAPI()

# Use a pipeline as a high-level helper
nlp_qa = pipeline("document-question-answering", model="impira/layoutlm-document-qa")

description = """
## Image-based Document QA
This API extracts text from an uploaded image using OCR and performs document question answering using a LayoutLM-based model.

### Endpoints:
- **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
- **POST /pdfUpload/:** Provide a file to extract text and answer provided questions.
"""

app = FastAPI(docs_url="/", description=description)

def get_image_content(contents):
    # Convert binary content to image
    image = Image.open(BytesIO(contents))
    # Perform OCR to extract text from the image
    text_content = pytesseract.image_to_string(image)
    return text_content

@app.post("/uploadfile/", description=description)
async def perform_document_qa(
    file: UploadFile = File(...),
    questions: str = Form(...),
):
    try:
        # Read the uploaded file
        contents = await file.read()

        text_content = get_image_content(contents)

        # Split the questions string into a list
        question_list = [q.strip() for q in questions.split(',')]

        # Perform document question answering for each question using LayoutLM-based model
        answers_dict = {}
        for question in question_list:
            result = nlp_qa(
                text_content,
                question
            )
            answers_dict[question] = result['answer']

        return answers_dict
    except Exception as e:
        return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)

@app.post("/pdfUpload/", description=description)
async def load_file(
    file: UploadFile = File(...),
    questions: str = Form(...),
):
    try:
        # Read the uploaded file as bytes
        contents = await file.read()

        # Perform document question answering for each question using LayoutLM-based model
        answers_dict = {}
        for question in questions.split(','):
            result = nlp_qa(
                contents.decode('utf-8'),  # Assuming the content is text, adjust as needed
                question.strip()
            )
            answers_dict[question] = result['answer']

        return answers_dict
    except Exception as e:
        return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)