from typing import Dict import os import base64 from magic_pdf.user_api import parse_union_pdf from magic_pdf.rw import S3ReaderWriter from loguru import logger def get_s3_writer(): """初始化S3读写器""" try: return S3ReaderWriter( ak=os.environ["S3_ACCESS_KEY"], sk=os.environ["S3_SECRET_KEY"], endpoint=os.environ["S3_ENDPOINT"], bucket=os.environ["S3_BUCKET"] ) except KeyError as e: raise Exception(f"Missing environment variable: {str(e)}") def inference(inputs: Dict): """ Serverless API入口点 """ try: # 获取并验证输入 if "pdf_bytes" not in inputs: return {"status": "error", "message": "No PDF data provided"} # Base64解码PDF内容 try: pdf_bytes = base64.b64decode(inputs["pdf_bytes"]) except Exception as e: return {"status": "error", "message": f"Invalid PDF data: {str(e)}"} # 初始化S3读写器 image_writer = get_s3_writer() # 准备参数 kwargs = { "lang": inputs.get("lang", "zh"), "layout_model": inputs.get("layout_model", True), "formula_enable": inputs.get("formula_enable", True), "table_enable": inputs.get("table_enable", True), "input_model_is_empty": True } # 使用 parse_union_pdf 处理 result = parse_union_pdf( pdf_bytes=pdf_bytes, pdf_models=[], # 使用内置模型 imageWriter=image_writer, **kwargs ) return { "status": "success", "data": result } except Exception as e: logger.exception("Error processing PDF") return { "status": "error", "message": str(e) }