from flask import Flask, request, jsonify import requests import time import json import supabase import logging # Configure logging logging.basicConfig(level=logging.INFO) # Azure Document Intelligence setup AZURE_ENDPOINT = "https://gosignpdf.cognitiveservices.azure.com/" AZURE_KEY = "2nUifMPmbS35qkiFr5OjgzDw7ooE5Piw5892GQgyWZHe0oNRIBJHJQQJ99AKACfhMk5XJ3w3AAALACOGkANC" # Supabase setup SUPABASE_URL = "https://dtzuqtvroalrjhgdcowq.supabase.co/" SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjU0NDk3MzIsImV4cCI6MjA0MTAyNTczMn0.WrIvwEOq4CqCb8IkU8G4jiWkf9DM1JxGd2_aTN4vlV4" supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_KEY) app = Flask(__name__) def log_debug(message, **kwargs): """Log debug messages for tracking.""" print(f"[DEBUG] {message}") if kwargs: for key, value in kwargs.items(): print(f" - {key}: {value}") def download_file_from_supabase(file_path): # """Download file from Supabase storage.""" response = supabase_client.storage.from_("files").download(file_path) # No need to check status_code here, as response.content is the file content (bytes). if isinstance(response, bytes): # Direct check if response is file content. return response else: raise Exception(f"Failed to download file from Supabase: {response.text}") def analyze_pdf_layout(file_content): # """Send PDF to Azure and get layout data.""" url = f"{AZURE_ENDPOINT}/formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31" headers = { "Ocp-Apim-Subscription-Key": AZURE_KEY, "Content-Type": "application/pdf", } response = requests.post(url, headers=headers, data=file_content) if response.status_code != 202: raise Exception(f"Azure request failed: {response.text}") operation_location = response.headers.get("Operation-Location") if not operation_location: raise Exception("Operation-Location header not found in response.") while True: result_response = requests.get(operation_location, headers={"Ocp-Apim-Subscription-Key": AZURE_KEY}) result = result_response.json() if result.get("status") == "succeeded": return result["analyzeResult"] elif result.get("status") == "failed": raise Exception("Analysis failed.") time.sleep(8) @app.route("/analyze", methods=["POST"]) def analyze(): try: # Get file ID from request file_id = request.json.get("file_id") if not file_id: return jsonify({"error": "File ID is required"}), 400 # Fetch file path from Supabase file_data = supabase_client.table("files").select("file_path").eq("id", file_id).single().execute() # Check if file_data is not None and contains valid data if not file_data or not file_data.data: return jsonify({"error": "File not found or Supabase query failed"}), 404 file_path = file_data.data["file_path"] # Download the file from Supabase file_content = download_file_from_supabase(file_path) # Analyze the PDF layout with Azure layout_data = analyze_pdf_layout(file_content) # Extract required layout values page_data = layout_data.get("pages", [])[0] # Assuming single-page PDF for simplicity first_word = page_data.get("words", [])[0] last_word = page_data.get("words", [])[-1] page_height = page_data["height"] page_width = page_data["width"] x1 = first_word["polygon"][0] # X1 of first word y4 = last_word["polygon"][-1] # Y4 of last word # Commenting out the Supabase file update logic update_response = supabase_client.table("files").update({ "page_height": page_height, "page_width": page_width, "x1": x1, "y4": y4, }).eq("id", file_id).execute() # Check if update was successful by checking if data is present and valid if not update_response.data: return jsonify({"error": "Failed to update file layout data"}), 500 # Check if there is any error message in the response if hasattr(update_response, 'error') and update_response.error: return jsonify({"error": "Failed to update file layout data", "details": update_response.error}), 500 return jsonify({ "message": "Layout data analyzed successfully", "page_height": page_height, "page_width": page_width, "x1": x1, "y4": y4 }), 200 except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=8000)