MinerUapi / app.py
kitjesen's picture
Upload 3 files
e59cbb0 verified
from typing import Dict
import os
import base64
from magic_pdf.user_api import parse_union_pdf
from magic_pdf.rw import S3ReaderWriter
from loguru import logger
def get_s3_writer():
"""初始化S3读写器"""
try:
return S3ReaderWriter(
ak=os.environ["S3_ACCESS_KEY"],
sk=os.environ["S3_SECRET_KEY"],
endpoint=os.environ["S3_ENDPOINT"],
bucket=os.environ["S3_BUCKET"]
)
except KeyError as e:
raise Exception(f"Missing environment variable: {str(e)}")
def inference(inputs: Dict):
"""
Serverless API入口点
"""
try:
# 获取并验证输入
if "pdf_bytes" not in inputs:
return {"status": "error", "message": "No PDF data provided"}
# Base64解码PDF内容
try:
pdf_bytes = base64.b64decode(inputs["pdf_bytes"])
except Exception as e:
return {"status": "error", "message": f"Invalid PDF data: {str(e)}"}
# 初始化S3读写器
image_writer = get_s3_writer()
# 准备参数
kwargs = {
"lang": inputs.get("lang", "zh"),
"layout_model": inputs.get("layout_model", True),
"formula_enable": inputs.get("formula_enable", True),
"table_enable": inputs.get("table_enable", True),
"input_model_is_empty": True
}
# 使用 parse_union_pdf 处理
result = parse_union_pdf(
pdf_bytes=pdf_bytes,
pdf_models=[], # 使用内置模型
imageWriter=image_writer,
**kwargs
)
return {
"status": "success",
"data": result
}
except Exception as e:
logger.exception("Error processing PDF")
return {
"status": "error",
"message": str(e)
}