File size: 1,979 Bytes
e59cbb0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
from typing import Dict
import os
import base64
from magic_pdf.user_api import parse_union_pdf
from magic_pdf.rw import S3ReaderWriter
from loguru import logger
def get_s3_writer():
"""初始化S3读写器"""
try:
return S3ReaderWriter(
ak=os.environ["S3_ACCESS_KEY"],
sk=os.environ["S3_SECRET_KEY"],
endpoint=os.environ["S3_ENDPOINT"],
bucket=os.environ["S3_BUCKET"]
)
except KeyError as e:
raise Exception(f"Missing environment variable: {str(e)}")
def inference(inputs: Dict):
"""
Serverless API入口点
"""
try:
# 获取并验证输入
if "pdf_bytes" not in inputs:
return {"status": "error", "message": "No PDF data provided"}
# Base64解码PDF内容
try:
pdf_bytes = base64.b64decode(inputs["pdf_bytes"])
except Exception as e:
return {"status": "error", "message": f"Invalid PDF data: {str(e)}"}
# 初始化S3读写器
image_writer = get_s3_writer()
# 准备参数
kwargs = {
"lang": inputs.get("lang", "zh"),
"layout_model": inputs.get("layout_model", True),
"formula_enable": inputs.get("formula_enable", True),
"table_enable": inputs.get("table_enable", True),
"input_model_is_empty": True
}
# 使用 parse_union_pdf 处理
result = parse_union_pdf(
pdf_bytes=pdf_bytes,
pdf_models=[], # 使用内置模型
imageWriter=image_writer,
**kwargs
)
return {
"status": "success",
"data": result
}
except Exception as e:
logger.exception("Error processing PDF")
return {
"status": "error",
"message": str(e)
} |