Spaces:

MJobe
/

document-vqa-v2

Sleeping

MJobe commited on Dec 12, 2023

Commit

4bf804b

•

1 Parent(s): dea0d8a

Update main.py

Files changed (1) hide show

main.py CHANGED Viewed

@@ -4,6 +4,7 @@ from fastapi import FastAPI, File, UploadFile, Form
 from fastapi.responses import JSONResponse
 from transformers import pipeline
 from pytesseract import pytesseract
 app = FastAPI()
@@ -21,6 +22,13 @@ This API extracts text from an uploaded image using OCR and performs document qu
 app = FastAPI(docs_url="/", description=description)
 @app.post("/uploadfile/", description=description)
 async def perform_document_qa(
     file: UploadFile = File(...),
@@ -30,11 +38,7 @@ async def perform_document_qa(
         # Read the uploaded file
         contents = await file.read()
-        # Convert binary content to image
-        image = Image.open(BytesIO(contents))
-        # Perform OCR to extract text from the image
-        text_content = pytesseract.image_to_string(image)
         # Split the questions string into a list
         question_list = [q.strip() for q in questions.split(',')]

 from fastapi.responses import JSONResponse
 from transformers import pipeline
 from pytesseract import pytesseract
+import base64
 app = FastAPI()
 app = FastAPI(docs_url="/", description=description)
+def get_image_content(contents):
+    # Convert binary content to image
+    image = Image.open(BytesIO(contents))
+    # Perform OCR to extract text from the image
+    text_content = pytesseract.image_to_string(image)
+    return text_content
 @app.post("/uploadfile/", description=description)
 async def perform_document_qa(
     file: UploadFile = File(...),
         # Read the uploaded file
         contents = await file.read()
+        text_content = get_image_content(contents)
         # Split the questions string into a list
         question_list = [q.strip() for q in questions.split(',')]