Spaces:
Sleeping
Sleeping
from io import BytesIO | |
from PIL import Image | |
from fastapi import FastAPI, File, UploadFile, Form | |
from fastapi.responses import JSONResponse | |
import fitz | |
from transformers import pipeline | |
import requests | |
from typing import List | |
from pytesseract import pytesseract | |
app = FastAPI() | |
# Load a BERT-based question answering pipeline | |
nlp_qa = pipeline('question-answering', model='bert-large-uncased-whole-word-masking-finetuned-squad') | |
description = """ | |
## Image-based Document QA | |
This API extracts text from an uploaded image using OCR and performs document question answering using a BERT-based model. | |
### Endpoints: | |
- **POST /uploadfile/:** Upload an image file to extract text and answer provided questions. | |
- **POST /pdfUpload/:** Provide a file to extract text and answer provided questions. | |
""" | |
app = FastAPI(docs_url="/", description=description) | |
async def perform_document_qa( | |
file: UploadFile = File(...), | |
questions: str = Form(...), | |
): | |
try: | |
# Read the uploaded file | |
contents = await file.read() | |
# Convert binary content to image | |
image = Image.open(BytesIO(contents)) | |
# Perform OCR to extract text from the image | |
text_content = pytesseract.image_to_string(image) | |
# Split the questions string into a list | |
question_list = [q.strip() for q in questions.split(',')] | |
# Perform document question answering for each question using BERT-based model | |
answers_dict = {} | |
for question in question_list: | |
result = nlp_qa({ | |
'question': question, | |
'context': text_content | |
}) | |
answers_dict[question] = result['answer'] | |
return answers_dict | |
except Exception as e: | |
return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500) | |
async def load_file( | |
file: UploadFile = File(...), | |
questions: str = Form(...), | |
): | |
try: | |
# Read the uploaded file as bytes | |
contents = await file.read() | |
# Perform document question answering for each question using BERT-based model | |
answers_dict = {} | |
for question in questions.split(','): | |
result = nlp_qa({ | |
'question': question.strip(), | |
'context': contents.decode('utf-8') # Assuming the content is text, adjust as needed | |
}) | |
answers_dict[question] = result['answer'] | |
return answers_dict | |
except Exception as e: | |
return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500) | |