cjber commited on
Commit
eee0f0f
·
1 Parent(s): abc8d88

feat: update prompt and refactor image processing for planning response form extraction

Browse files
Files changed (1) hide show
  1. src/gpt4o_structured.py +49 -52
src/gpt4o_structured.py CHANGED
@@ -1,73 +1,70 @@
 
 
 
1
  from io import BytesIO
2
  from pathlib import Path
3
 
 
 
 
4
  from pdf2image import convert_from_path
5
 
6
- prompt = """
7
- This image is an extract from a planning response form filled out by a member of the public. The form may contain typed or handwritten responses, including potentially incomplete or unclear sections. Your task is to extract relevant information in a strict, structured format. Do not repeat the document verbatim. Only output responses in the structured format below.
8
-
9
- Instructions:
10
- 1. Extract responses to all structured questions on the form, in the format:
11
- {"<question>": "<response>"}
12
-
13
- 2. For the handwritten notes under extract them verbatim. If any word is illegible or unclear, use the token <UNKNOWN>. Do not attempt to infer or complete missing parts.
14
-
15
- 3. **Do not** output or repeat the original document content in full. Only return structured data in the format described above.
16
- 4. **Ignore irrelevant sections** that are not part of the structured questionnaire or 'Your comments:' section.
17
- 5. If a response is missing or the form section is blank, output:
18
- {"<question>": "No response"}
19
 
20
- Guidelines:
21
- - Ensure you return only structured data in JSON-like format.
22
- - Strictly follow the format for both structured questions and handwritten comments.
23
- - If any part of the form is unclear or unreadable, do not fill it in with assumptions.
24
- - Avoid repeating the full content of the form. Focus only on extracting the relevant sections.
25
 
26
- Example output:
27
- {
28
- "Do you support the planning proposal?": "Yes",
29
- "Your comments:": "The proposal seems reasonable, but <UNKNOWN> needs further assessment."
30
- }
31
  """
32
 
33
- images = []
34
  placeholder = ""
35
  path = Path("./data/raw/pdfs")
36
  i = 1
37
  for file in path.glob("*.pdf"):
38
- pdf_images = convert_from_path(file)
39
- for image in pdf_images:
40
- images.append(image)
41
- placeholder += f"<|image_{i}|>\n"
42
- i += 1
 
 
43
 
44
- import base64
 
 
 
 
 
45
 
46
- buffered = BytesIO()
47
- images[2].save(buffered, format="JPEG")
48
- base64_image = base64.b64encode(buffered.getvalue())
49
-
50
- messages = [
51
- {
52
- "role": "user",
53
- "content": [
54
  {
55
- "type": "text",
56
- "text": prompt,
57
  "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
58
  }
59
- ],
60
- }
61
- ]
62
- import requests
63
 
64
- api_key = "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"
65
- headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
66
- payload = {"model": "gpt-4o", "messages": messages}
 
 
 
 
 
 
 
 
 
67
 
 
 
 
 
 
 
68
 
69
- response = requests.post(
70
- "https://api.openai.com/v1/chat/completions", headers=headers, json=payload
71
- )
72
-
73
- print(response.json()["choices"][0]["message"])
 
1
+ import ast
2
+ import base64
3
+ import os
4
  from io import BytesIO
5
  from pathlib import Path
6
 
7
+ import polars as pl
8
+ import requests
9
+ from dotenv import load_dotenv
10
  from pdf2image import convert_from_path
11
 
12
+ load_dotenv()
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ prompt = """
15
+ The following images are from a planning response form completed by a member of the public. They contain free-form responses related to a planning application, which may be either handwritten or typed.
 
 
 
16
 
17
+ Please extract all the free-form information from these images and output it verbatim. Do not include any additional information or summaries. Note that the images are sequentially ordered, so a response might continue from one image to the next.
 
 
 
 
18
  """
19
 
 
20
  placeholder = ""
21
  path = Path("./data/raw/pdfs")
22
  i = 1
23
  for file in path.glob("*.pdf"):
24
+ images = []
25
+ if file.stem:
26
+ pdf_images = convert_from_path(file)
27
+ for image in pdf_images:
28
+ images.append(image)
29
+ placeholder += f"<|image_{i}|>\n"
30
+ i += 1
31
 
32
+ buffered = BytesIO()
33
+ outs = []
34
+ image_b64 = []
35
+ for image in images:
36
+ image.save(buffered, format="JPEG")
37
+ base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
38
 
39
+ image_b64.append(
 
 
 
 
 
 
 
40
  {
41
+ "type": "image_url",
 
42
  "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
43
  }
44
+ )
 
 
 
45
 
46
+ messages = [
47
+ {
48
+ "role": "user",
49
+ "content": [
50
+ {
51
+ "type": "text",
52
+ "text": prompt,
53
+ },
54
+ ]
55
+ + image_b64,
56
+ }
57
+ ]
58
 
59
+ api_key = os.getenv("OPENAI_API_KEY")
60
+ headers = {
61
+ "Content-Type": "application/json",
62
+ "Authorization": f"Bearer {api_key}",
63
+ }
64
+ payload = {"model": "gpt-4o-mini", "messages": messages}
65
 
66
+ response = requests.post(
67
+ "https://api.openai.com/v1/chat/completions", headers=headers, json=payload
68
+ )
69
+ response.json()
70
+ break