gosign commited on
Commit
1c01735
·
verified ·
1 Parent(s): f19b84d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -239
app.py CHANGED
@@ -1,253 +1,124 @@
1
- import json
2
- import os
3
- import magic
4
- from dotenv import load_dotenv
5
- from docx import Document
6
- from docx.shared import Inches
7
- from docx.enum.text import WD_ALIGN_PARAGRAPH
8
- from flask_cors import CORS
9
  from flask import Flask, request, jsonify
10
- from supabase import create_client, Client
 
 
 
11
  import logging
12
 
13
  # Configure logging
14
  logging.basicConfig(level=logging.INFO)
15
 
16
- # load_dotenv(dotenv_path='.env.local')
17
- load_dotenv()
18
-
19
- app = Flask(__name__)
20
-
21
- CORS(app)
22
-
23
- url: str = 'https://dtzuqtvroalrjhgdcowq.supabase.co/'
24
- key: str = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjU0NDk3MzIsImV4cCI6MjA0MTAyNTczMn0.WrIvwEOq4CqCb8IkU8G4jiWkf9DM1JxGd2_aTN4vlV4'
25
-
26
- supabase: Client = create_client(url, key)
27
-
28
- def get_file_by_id(file_id):
29
- try:
30
- response = supabase.table("files").select("*").eq("id", file_id).single().execute()
31
- file = response.data
32
-
33
- if not file:
34
- raise ValueError(response.error.message if response.error else "File not found.")
35
-
36
- file_path = file.get("file_path")
37
- file_name = file.get("name")
38
-
39
- if not file_path:
40
- raise ValueError("File path is missing in the metadata.")
41
-
42
- # Fetch the actual file content from Supabase storage
43
- file_data = supabase.storage.from_('files').download(file_path)
44
-
45
- return file_name, file_data
46
- except Exception as e:
47
- print("Error fetching file:", e)
48
- return jsonify({"error": str(e)}), 500
49
-
50
- def get_file_type(file_path):
51
- try:
52
- # Use python-magic to detect the MIME type of the file
53
- mime = magic.Magic(mime=True)
54
- file_type = mime.from_file(file_path)
55
- return file_type
56
- except Exception as e:
57
- print("Error fetching file:", e)
58
- return jsonify({"error": str(e)}), 500
59
-
60
- def insert_file_record(user_id, doc):
61
- try:
62
- file_type = get_file_type(doc)
63
- file_record = {
64
- "user_id": user_id,
65
- "description": "",
66
- "file_path": "",
67
- "name": "letterhead-" + os.path.basename(doc),
68
- "size": os.path.getsize(doc),
69
- "tokens": 0,
70
- "type": file_type,
71
- }
72
-
73
- response = supabase.table("files").insert(file_record).execute()
74
-
75
- return response
76
- except Exception as e:
77
- print("Error fetching file:", e)
78
- return jsonify({"error": str(e)}), 500
79
-
80
- def upload_file_to_storage(file, metadata):
81
- # Replace with the actual upload implementation
82
- file_path = f"{metadata['user_id']}/{metadata['file_id']}"
83
- # file_content = file.read() # Read the file content as bytes
84
-
85
- file_type = get_file_type(file)
86
- try:
87
- with open(file, 'rb') as f:
88
- response = supabase.storage.from_("files").upload(
89
- file=f,
90
- path=file_path,
91
- file_options={"cache-control": "3600", "content-type": file_type, "upsert": "false"},
92
- )
93
-
94
- file_path = response.path
95
- return file_path
96
- except Exception as e:
97
- print("Error uploading file:", e)
98
- return jsonify({"error": str(e)}), 500
99
-
100
 
 
 
 
 
101
 
102
- def update_file_record(file_id, updates):
103
- try:
104
- response = supabase.table("files").update(updates).eq("id", file_id).execute()
105
- return response
106
- except Exception as e:
107
- print("Error while updating record:", e)
108
- return jsonify({"error": str(e)}), 500
109
-
110
- def insert_text_and_image_at_end(full_path, full_image_path, text_to_insert, include_signature, signature_position, letterhead_address):
111
- try:
112
- doc = Document(full_path)
113
-
114
- # Replace placeholder <<SENDER_ADDRESS>> with the letterhead address
115
- for paragraph in doc.paragraphs:
116
- if ("<<SENDER_ADDRESS>>" in paragraph.text and letterhead_address):
117
- for run in paragraph.runs:
118
- run.text = run.text.replace("<<SENDER_ADDRESS>>", letterhead_address)
119
-
120
- # Add the new text at the end of the document
121
- doc.add_paragraph(text_to_insert)
122
-
123
- # Add the image at the end of the document with position adjustment
124
- if (include_signature and full_image_path):
125
- image_paragraph = doc.add_paragraph()
126
- run = image_paragraph.add_run()
127
- run.add_picture(full_image_path, width=Inches(1), height=Inches(1))
128
-
129
- # Adjust the alignment based on signature_position
130
- if signature_position == 'left':
131
- image_paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
132
- elif signature_position == 'right':
133
- image_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
134
- elif signature_position == 'center':
135
- image_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
136
-
137
- # Save the document with the inserted text and image
138
- doc.save(full_path)
139
-
140
- return full_path
141
- except Exception as e:
142
- print("Error while inserting text:", e)
143
- return jsonify({"error": str(e)}), 500
144
-
145
-
146
- def fetch_image(bucket_name: str, image_path: str):
147
- try:
148
- # Download file from Supabase storage
149
- file_data = supabase.storage.from_(bucket_name).download(image_path)
150
-
151
- # Use python-magic to detect MIME type from the file data
152
- mime_type = magic.Magic(mime=True).from_buffer(file_data)
153
-
154
- current_directory = os.path.dirname(os.path.abspath(__file__))
155
-
156
- os.makedirs('letterhead', exist_ok=True)
157
-
158
- letterhead_image_path = image_path.split('/')[-1] + "." + mime_type.split('/')[-1]
159
- full_letterhead_image_path = os.path.join(current_directory, "letterhead", letterhead_image_path)
160
-
161
- with open(full_letterhead_image_path, 'wb') as f:
162
- f.write(file_data)
163
-
164
- return full_letterhead_image_path
165
 
166
- except Exception as e:
167
- print(f"Error: {e}")
168
- return jsonify({"error": str(e)}), 500
169
-
170
- def delete_all_files(directory):
171
- keep_file = "WARNING-DO-NOT-DELETE.txt"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  try:
173
- # Loop through each file in the directory
174
- for filename in os.listdir(directory):
175
- file_path = os.path.join(directory, filename)
176
-
177
- # Check if it's a file and not the one to keep
178
- if os.path.isfile(file_path) and filename != keep_file:
179
- os.remove(file_path)
180
- except Exception as e:
181
- print(f"An error occurred: {e}")
182
- return jsonify({"error": str(e)}), 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
 
184
- @app.route("/api/letterhead", methods=["POST"])
185
- def letterhead():
186
- data = request.get_json()
187
- try:
188
- # Log the data instead of saving it to a file
189
- logging.info("Received Data: %s", data)
190
  except Exception as e:
 
191
  return jsonify({"error": str(e)}), 500
192
 
193
-
194
- # Extract data
195
- chat_settings = data.get("chatSettings")
196
- profile = data.get("profile")
197
- letterhead_data = data.get("letterheadData")
198
-
199
- try:
200
- file_name, file_data = get_file_by_id(chat_settings["letterheadFileId"])
201
- current_directory = os.path.dirname(os.path.abspath(__file__))
202
- full_letterhead_file_path = os.path.join(current_directory, "letterhead", file_name)
203
-
204
- full_letterhead_signature_path = None
205
- if (letterhead_data["includeSignature"] and (chat_settings["letterheadSignatureImagePath"])):
206
- full_letterhead_signature_path = fetch_image("assistant_images", (chat_settings["letterheadSignatureImagePath"]))
207
-
208
- text_to_insert = letterhead_data["letterheadContent"]
209
- include_signature = letterhead_data["includeSignature"]
210
- signature_position = letterhead_data["signaturePosition"]
211
- letterhead_address = letterhead_data["letterheadAddress"]
212
-
213
- with open(full_letterhead_file_path, "wb") as f:
214
- if hasattr(file_data, "read"):
215
- f.write(file_data.read())
216
- else: # If it's raw bytes
217
- f.write(file_data)
218
-
219
- modified_doc = insert_text_and_image_at_end(full_letterhead_file_path, full_letterhead_signature_path, text_to_insert, include_signature,signature_position, letterhead_address)
220
-
221
- created_file = insert_file_record(profile["user_id"], modified_doc)
222
-
223
- file_data = created_file.json()
224
- file_data_json = json.loads(file_data)
225
-
226
- print("file data: ", file_data);
227
- print("file data json", file_data_json)
228
- print("modified doc", modified_doc)
229
-
230
- file_path = upload_file_to_storage(modified_doc, {
231
- "name": file_data_json["data"][0]["name"],
232
- "user_id": file_data_json["data"][0]["user_id"],
233
- "file_id": file_data_json["data"][0]["id"],
234
- })
235
-
236
- print("file path: ", file_path)
237
-
238
- update_file_record(file_data_json["data"][0]["id"], {"file_path": file_path})
239
-
240
- current_directory = os.path.dirname(os.path.abspath(__file__))
241
- file_deleting_directory_path = os.path.join(current_directory, "letterhead")
242
- delete_all_files(file_deleting_directory_path)
243
-
244
- message = f"letterheadFileId:{file_data_json['data'][0]['id']} Your letterhead is successfully created."
245
- return jsonify({ "message": message }), 200
246
- except ValueError as e:
247
- return jsonify({"error": str(e)}), 404
248
- # except Exception as e:
249
- # return jsonify({"error": str(e)}), 500
250
-
251
- if __name__ == '__main__':
252
- app.run(debug=True)
253
- print('working')
 
 
 
 
 
 
 
 
 
1
  from flask import Flask, request, jsonify
2
+ import requests
3
+ import time
4
+ import json
5
+ import supabase
6
  import logging
7
 
8
  # Configure logging
9
  logging.basicConfig(level=logging.INFO)
10
 
11
+ # Azure Document Intelligence setup
12
+ AZURE_ENDPOINT = "https://gosignpdf.cognitiveservices.azure.com/"
13
+ AZURE_KEY = "2nUifMPmbS35qkiFr5OjgzDw7ooE5Piw5892GQgyWZHe0oNRIBJHJQQJ99AKACfhMk5XJ3w3AAALACOGkANC"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Supabase setup
16
+ SUPABASE_URL = "https://dtzuqtvroalrjhgdcowq.supabase.co/"
17
+ SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjU0NDk3MzIsImV4cCI6MjA0MTAyNTczMn0.WrIvwEOq4CqCb8IkU8G4jiWkf9DM1JxGd2_aTN4vlV4"
18
+ supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_KEY)
19
 
20
+ app = Flask(__name__)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ def log_debug(message, **kwargs):
23
+ """Log debug messages for tracking."""
24
+ print(f"[DEBUG] {message}")
25
+ if kwargs:
26
+ for key, value in kwargs.items():
27
+ print(f" - {key}: {value}")
28
+
29
+ def download_file_from_supabase(file_path):
30
+ """Download file from Supabase storage."""
31
+ log_debug("Downloading file from Supabase", file_path=file_path)
32
+ response = supabase_client.storage.from_("files").download(file_path)
33
+ log_debug("Supabase download response", status_code=response.status_code, text=response.text)
34
+ if response.status_code != 200:
35
+ raise Exception(f"Failed to download file from Supabase: {response.text}")
36
+ return response.content
37
+
38
+ def analyze_pdf_layout(file_content):
39
+ """Send PDF to Azure and get layout data."""
40
+ log_debug("Sending PDF to Azure for analysis")
41
+ url = f"{AZURE_ENDPOINT}/formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31"
42
+ headers = {
43
+ "Ocp-Apim-Subscription-Key": AZURE_KEY,
44
+ "Content-Type": "application/pdf",
45
+ }
46
+
47
+ response = requests.post(url, headers=headers, data=file_content)
48
+ log_debug("Azure response", status_code=response.status_code, headers=response.headers)
49
+ if response.status_code != 202:
50
+ raise Exception(f"Azure request failed: {response.text}")
51
+
52
+ operation_location = response.headers.get("Operation-Location")
53
+ log_debug("Azure operation location", operation_location=operation_location)
54
+ if not operation_location:
55
+ raise Exception("Operation-Location header not found in response.")
56
+
57
+ while True:
58
+ result_response = requests.get(operation_location, headers={"Ocp-Apim-Subscription-Key": AZURE_KEY})
59
+ result = result_response.json()
60
+ log_debug("Azure polling result", status=result.get("status"))
61
+
62
+ if result.get("status") == "succeeded":
63
+ log_debug("Azure analysis succeeded")
64
+ return result["analyzeResult"]
65
+ elif result.get("status") == "failed":
66
+ raise Exception("Analysis failed.")
67
+ time.sleep(8)
68
+
69
+ @app.route("/analyze", methods=["POST"])
70
+ def analyze():
71
  try:
72
+ # Get file ID from request
73
+ file_id = request.json.get("file_id")
74
+ log_debug("Received API request", file_id=file_id)
75
+ if not file_id:
76
+ return jsonify({"error": "File ID is required"}), 400
77
+
78
+ # Fetch file path from Supabase
79
+ file_data = supabase_client.table("files").select("filePath").eq("id", file_id).single().execute()
80
+ log_debug("Supabase file data response", status_code=file_data.status_code, data=file_data.data)
81
+ if file_data.status_code != 200 or not file_data.data:
82
+ return jsonify({"error": "File not found"}), 404
83
+
84
+ file_path = file_data.data["filePath"]
85
+ log_debug("File path retrieved from Supabase", file_path=file_path)
86
+
87
+ # Download the file from Supabase
88
+ file_content = download_file_from_supabase(file_path)
89
+
90
+ # Analyze the PDF layout with Azure
91
+ layout_data = analyze_pdf_layout(file_content)
92
+ log_debug("Layout data retrieved", layout_data=layout_data)
93
+
94
+ # Extract required layout values
95
+ page_data = layout_data.get("pages", [])[0] # Assuming single-page PDF for simplicity
96
+ first_word = page_data.get("words", [])[0]
97
+ last_word = page_data.get("words", [])[-1]
98
+
99
+ page_height = page_data["height"]
100
+ page_width = page_data["width"]
101
+ x1 = first_word["polygon"][0] # X1 of first word
102
+ y4 = last_word["polygon"][-1] # Y4 of last word
103
+ log_debug("Extracted layout values", page_height=page_height, page_width=page_width, x1=x1, y4=y4)
104
+
105
+ # Update the `files` table in Supabase
106
+ update_response = supabase_client.table("files").update({
107
+ "page_height": page_height,
108
+ "page_width": page_width,
109
+ "x1": x1,
110
+ "y4": y4,
111
+ }).eq("id", file_id).execute()
112
+ log_debug("Supabase update response", status_code=update_response.status_code, data=update_response.data)
113
+
114
+ if update_response.status_code != 200:
115
+ return jsonify({"error": "Failed to update file layout data"}), 500
116
+
117
+ return jsonify({"message": "Layout data successfully updated"}), 200
118
 
 
 
 
 
 
 
119
  except Exception as e:
120
+ log_debug("Error occurred", error=str(e))
121
  return jsonify({"error": str(e)}), 500
122
 
123
+ if __name__ == "__main__":
124
+ app.run(host="0.0.0.0", port=8000)