Spaces:

gosign-de
/

letterhead

Running

App Files Files Community

gosign commited on Dec 13, 2024

Commit

1c01735

verified ·

1 Parent(s): f19b84d

Update app.py

Browse files

Files changed (1) hide show

app.py +110 -239

app.py CHANGED Viewed

@@ -1,253 +1,124 @@
-import json
-import os
-import magic
-from dotenv import load_dotenv
-from docx import Document
-from docx.shared import Inches
-from docx.enum.text import WD_ALIGN_PARAGRAPH
-from flask_cors import CORS
 from flask import Flask, request, jsonify
-from supabase import create_client, Client
 import logging
 # Configure logging
 logging.basicConfig(level=logging.INFO)
-# load_dotenv(dotenv_path='.env.local')
-load_dotenv()
-app = Flask(__name__)
-CORS(app)
-url: str = 'https://dtzuqtvroalrjhgdcowq.supabase.co/'
-key: str = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjU0NDk3MzIsImV4cCI6MjA0MTAyNTczMn0.WrIvwEOq4CqCb8IkU8G4jiWkf9DM1JxGd2_aTN4vlV4'
-supabase: Client = create_client(url, key)
-def get_file_by_id(file_id):
-    try:
-        response = supabase.table("files").select("*").eq("id", file_id).single().execute()
-        file = response.data
-        if not file:
-            raise ValueError(response.error.message if response.error else "File not found.")
-        file_path = file.get("file_path")
-        file_name = file.get("name")
-        if not file_path:
-            raise ValueError("File path is missing in the metadata.")
-        # Fetch the actual file content from Supabase storage
-        file_data = supabase.storage.from_('files').download(file_path)
-        return file_name, file_data
-    except Exception as e:
-        print("Error fetching file:", e)
-        return jsonify({"error": str(e)}), 500
-def get_file_type(file_path):
-    try:
-        # Use python-magic to detect the MIME type of the file
-        mime = magic.Magic(mime=True)
-        file_type = mime.from_file(file_path)
-        return file_type
-    except Exception as e:
-        print("Error fetching file:", e)
-        return jsonify({"error": str(e)}), 500
-def insert_file_record(user_id, doc):
-    try:
-        file_type = get_file_type(doc)
-        file_record = {
-            "user_id": user_id,
-            "description": "",
-            "file_path": "",
-            "name": "letterhead-" + os.path.basename(doc),
-            "size": os.path.getsize(doc),
-            "tokens": 0,
-            "type": file_type,
-        }
-        response = supabase.table("files").insert(file_record).execute()
-        return response
-    except Exception as e:
-        print("Error fetching file:", e)
-        return jsonify({"error": str(e)}), 500
-def upload_file_to_storage(file, metadata):
-    # Replace with the actual upload implementation
-    file_path = f"{metadata['user_id']}/{metadata['file_id']}"
-    # file_content = file.read()  # Read the file content as bytes
-    file_type = get_file_type(file)
-    try:
-        with open(file, 'rb') as f:
-            response = supabase.storage.from_("files").upload(
-                file=f,
-                path=file_path,
-                file_options={"cache-control": "3600", "content-type": file_type, "upsert": "false"},
-            )
-            file_path = response.path
-            return file_path
-    except Exception as e:
-        print("Error uploading file:", e)
-        return jsonify({"error": str(e)}), 500
-def update_file_record(file_id, updates):
-    try:
-        response = supabase.table("files").update(updates).eq("id", file_id).execute()
-        return response
-    except Exception as e:
-        print("Error while updating record:", e)
-        return jsonify({"error": str(e)}), 500
-def insert_text_and_image_at_end(full_path, full_image_path, text_to_insert, include_signature, signature_position, letterhead_address):
-    try:
-        doc = Document(full_path)
-        # Replace placeholder <<SENDER_ADDRESS>> with the letterhead address
-        for paragraph in doc.paragraphs:
-            if ("<<SENDER_ADDRESS>>" in paragraph.text and letterhead_address):
-                for run in paragraph.runs:
-                    run.text = run.text.replace("<<SENDER_ADDRESS>>", letterhead_address)
-        # Add the new text at the end of the document
-        doc.add_paragraph(text_to_insert)
-        # Add the image at the end of the document with position adjustment
-        if (include_signature and full_image_path):
-            image_paragraph = doc.add_paragraph()
-            run = image_paragraph.add_run()
-            run.add_picture(full_image_path, width=Inches(1), height=Inches(1))
-            # Adjust the alignment based on signature_position
-            if signature_position == 'left':
-                image_paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
-            elif signature_position == 'right':
-                image_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
-            elif signature_position == 'center':
-                image_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
-        # Save the document with the inserted text and image
-        doc.save(full_path)
-        return full_path
-    except Exception as e:
-        print("Error while inserting text:", e)
-        return jsonify({"error": str(e)}), 500
-def fetch_image(bucket_name: str, image_path: str):
-    try:
-        # Download file from Supabase storage
-        file_data = supabase.storage.from_(bucket_name).download(image_path)
-        # Use python-magic to detect MIME type from the file data
-        mime_type = magic.Magic(mime=True).from_buffer(file_data)
-        current_directory = os.path.dirname(os.path.abspath(__file__))
-        os.makedirs('letterhead', exist_ok=True)
-        letterhead_image_path = image_path.split('/')[-1] + "." + mime_type.split('/')[-1]
-        full_letterhead_image_path = os.path.join(current_directory, "letterhead", letterhead_image_path)
-        with open(full_letterhead_image_path, 'wb') as f:
-            f.write(file_data)
-        return full_letterhead_image_path
-    except Exception as e:
-        print(f"Error: {e}")
-        return jsonify({"error": str(e)}), 500
-def delete_all_files(directory):
-    keep_file = "WARNING-DO-NOT-DELETE.txt"
     try:
-        # Loop through each file in the directory
-        for filename in os.listdir(directory):
-            file_path = os.path.join(directory, filename)
-            # Check if it's a file and not the one to keep
-            if os.path.isfile(file_path) and filename != keep_file:
-                os.remove(file_path)
-    except Exception as e:
-        print(f"An error occurred: {e}")
-        return jsonify({"error": str(e)}), 500
-@app.route("/api/letterhead", methods=["POST"])
-def letterhead():
-    data = request.get_json()
-    try:
-        # Log the data instead of saving it to a file
-        logging.info("Received Data: %s", data)
     except Exception as e:
         return jsonify({"error": str(e)}), 500
-        # Extract data
-    chat_settings = data.get("chatSettings")
-    profile = data.get("profile")
-    letterhead_data = data.get("letterheadData")
-    try:
-        file_name, file_data = get_file_by_id(chat_settings["letterheadFileId"])
-        current_directory = os.path.dirname(os.path.abspath(__file__))
-        full_letterhead_file_path = os.path.join(current_directory, "letterhead", file_name)
-        full_letterhead_signature_path = None
-        if (letterhead_data["includeSignature"] and (chat_settings["letterheadSignatureImagePath"])):
-            full_letterhead_signature_path = fetch_image("assistant_images", (chat_settings["letterheadSignatureImagePath"]))
-        text_to_insert = letterhead_data["letterheadContent"]
-        include_signature = letterhead_data["includeSignature"]
-        signature_position = letterhead_data["signaturePosition"]
-        letterhead_address = letterhead_data["letterheadAddress"]
-        with open(full_letterhead_file_path, "wb") as f:
-            if hasattr(file_data, "read"):
-                f.write(file_data.read())
-            else:  # If it's raw bytes
-                f.write(file_data)
-        modified_doc = insert_text_and_image_at_end(full_letterhead_file_path, full_letterhead_signature_path, text_to_insert, include_signature,signature_position, letterhead_address)
-        created_file = insert_file_record(profile["user_id"], modified_doc)
-        file_data = created_file.json()
-        file_data_json = json.loads(file_data)
-        print("file data: ", file_data);
-        print("file data json", file_data_json)
-        print("modified doc", modified_doc)
-        file_path = upload_file_to_storage(modified_doc, {
-            "name": file_data_json["data"][0]["name"],
-            "user_id": file_data_json["data"][0]["user_id"],
-            "file_id": file_data_json["data"][0]["id"],
-        })
-        print("file path: ", file_path)
-        update_file_record(file_data_json["data"][0]["id"], {"file_path": file_path})
-        current_directory = os.path.dirname(os.path.abspath(__file__))
-        file_deleting_directory_path = os.path.join(current_directory, "letterhead")
-        delete_all_files(file_deleting_directory_path)
-        message = f"letterheadFileId:{file_data_json['data'][0]['id']} Your letterhead is successfully created."
-        return jsonify({ "message": message }), 200
-    except ValueError as e:
-        return jsonify({"error": str(e)}), 404
-    # except Exception as e:
-    #     return jsonify({"error": str(e)}), 500
-if __name__ == '__main__':
-    app.run(debug=True)
-    print('working')

 from flask import Flask, request, jsonify
+import requests
+import time
+import json
+import supabase
 import logging
 # Configure logging
 logging.basicConfig(level=logging.INFO)
+# Azure Document Intelligence setup
+AZURE_ENDPOINT = "https://gosignpdf.cognitiveservices.azure.com/"
+AZURE_KEY = "2nUifMPmbS35qkiFr5OjgzDw7ooE5Piw5892GQgyWZHe0oNRIBJHJQQJ99AKACfhMk5XJ3w3AAALACOGkANC"
+# Supabase setup
+SUPABASE_URL = "https://dtzuqtvroalrjhgdcowq.supabase.co/"
+SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjU0NDk3MzIsImV4cCI6MjA0MTAyNTczMn0.WrIvwEOq4CqCb8IkU8G4jiWkf9DM1JxGd2_aTN4vlV4"
+supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_KEY)
+app = Flask(__name__)
+def log_debug(message, **kwargs):
+    """Log debug messages for tracking."""
+    print(f"[DEBUG] {message}")
+    if kwargs:
+        for key, value in kwargs.items():
+            print(f"  - {key}: {value}")
+def download_file_from_supabase(file_path):
+    """Download file from Supabase storage."""
+    log_debug("Downloading file from Supabase", file_path=file_path)
+    response = supabase_client.storage.from_("files").download(file_path)
+    log_debug("Supabase download response", status_code=response.status_code, text=response.text)
+    if response.status_code != 200:
+        raise Exception(f"Failed to download file from Supabase: {response.text}")
+    return response.content
+def analyze_pdf_layout(file_content):
+    """Send PDF to Azure and get layout data."""
+    log_debug("Sending PDF to Azure for analysis")
+    url = f"{AZURE_ENDPOINT}/formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31"
+    headers = {
+        "Ocp-Apim-Subscription-Key": AZURE_KEY,
+        "Content-Type": "application/pdf",
+    }
+    response = requests.post(url, headers=headers, data=file_content)
+    log_debug("Azure response", status_code=response.status_code, headers=response.headers)
+    if response.status_code != 202:
+        raise Exception(f"Azure request failed: {response.text}")
+    operation_location = response.headers.get("Operation-Location")
+    log_debug("Azure operation location", operation_location=operation_location)
+    if not operation_location:
+        raise Exception("Operation-Location header not found in response.")
+    while True:
+        result_response = requests.get(operation_location, headers={"Ocp-Apim-Subscription-Key": AZURE_KEY})
+        result = result_response.json()
+        log_debug("Azure polling result", status=result.get("status"))
+        if result.get("status") == "succeeded":
+            log_debug("Azure analysis succeeded")
+            return result["analyzeResult"]
+        elif result.get("status") == "failed":
+            raise Exception("Analysis failed.")
+        time.sleep(8)
+@app.route("/analyze", methods=["POST"])
+def analyze():
     try:
+        # Get file ID from request
+        file_id = request.json.get("file_id")
+        log_debug("Received API request", file_id=file_id)
+        if not file_id:
+            return jsonify({"error": "File ID is required"}), 400
+        # Fetch file path from Supabase
+        file_data = supabase_client.table("files").select("filePath").eq("id", file_id).single().execute()
+        log_debug("Supabase file data response", status_code=file_data.status_code, data=file_data.data)
+        if file_data.status_code != 200 or not file_data.data:
+            return jsonify({"error": "File not found"}), 404
+        file_path = file_data.data["filePath"]
+        log_debug("File path retrieved from Supabase", file_path=file_path)
+        # Download the file from Supabase
+        file_content = download_file_from_supabase(file_path)
+        # Analyze the PDF layout with Azure
+        layout_data = analyze_pdf_layout(file_content)
+        log_debug("Layout data retrieved", layout_data=layout_data)
+        # Extract required layout values
+        page_data = layout_data.get("pages", [])[0]  # Assuming single-page PDF for simplicity
+        first_word = page_data.get("words", [])[0]
+        last_word = page_data.get("words", [])[-1]
+        page_height = page_data["height"]
+        page_width = page_data["width"]
+        x1 = first_word["polygon"][0]  # X1 of first word
+        y4 = last_word["polygon"][-1]  # Y4 of last word
+        log_debug("Extracted layout values", page_height=page_height, page_width=page_width, x1=x1, y4=y4)
+        # Update the `files` table in Supabase
+        update_response = supabase_client.table("files").update({
+            "page_height": page_height,
+            "page_width": page_width,
+            "x1": x1,
+            "y4": y4,
+        }).eq("id", file_id).execute()
+        log_debug("Supabase update response", status_code=update_response.status_code, data=update_response.data)
+        if update_response.status_code != 200:
+            return jsonify({"error": "Failed to update file layout data"}), 500
+        return jsonify({"message": "Layout data successfully updated"}), 200
     except Exception as e:
+        log_debug("Error occurred", error=str(e))
         return jsonify({"error": str(e)}), 500
+if __name__ == "__main__":
+    app.run(host="0.0.0.0", port=8000)