cjber commited on
Commit
a26aa12
·
1 Parent(s): eee0f0f

refactor: restructure script to use functions for improved readability and maintainability

Browse files
Files changed (1) hide show
  1. src/gpt4o_structured.py +44 -37
src/gpt4o_structured.py CHANGED
@@ -9,62 +9,69 @@ import requests
9
  from dotenv import load_dotenv
10
  from pdf2image import convert_from_path
11
 
12
- load_dotenv()
 
13
 
14
- prompt = """
15
- The following images are from a planning response form completed by a member of the public. They contain free-form responses related to a planning application, which may be either handwritten or typed.
16
 
17
- Please extract all the free-form information from these images and output it verbatim. Do not include any additional information or summaries. Note that the images are sequentially ordered, so a response might continue from one image to the next.
18
- """
19
-
20
- placeholder = ""
21
- path = Path("./data/raw/pdfs")
22
- i = 1
23
- for file in path.glob("*.pdf"):
24
- images = []
25
- if file.stem:
26
- pdf_images = convert_from_path(file)
27
- for image in pdf_images:
28
- images.append(image)
29
- placeholder += f"<|image_{i}|>\n"
30
- i += 1
31
-
32
- buffered = BytesIO()
33
- outs = []
34
  image_b64 = []
35
  for image in images:
 
36
  image.save(buffered, format="JPEG")
37
  base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
38
-
39
  image_b64.append(
40
  {
41
  "type": "image_url",
42
  "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
43
  }
44
  )
 
45
 
46
- messages = [
47
- {
48
- "role": "user",
49
- "content": [
50
- {
51
- "type": "text",
52
- "text": prompt,
53
- },
54
- ]
55
- + image_b64,
56
- }
57
- ]
58
-
59
  api_key = os.getenv("OPENAI_API_KEY")
60
  headers = {
61
  "Content-Type": "application/json",
62
  "Authorization": f"Bearer {api_key}",
63
  }
64
  payload = {"model": "gpt-4o-mini", "messages": messages}
65
-
66
  response = requests.post(
67
  "https://api.openai.com/v1/chat/completions", headers=headers, json=payload
68
  )
69
- response.json()
70
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  from dotenv import load_dotenv
10
  from pdf2image import convert_from_path
11
 
12
+ def load_environment():
13
+ load_dotenv()
14
 
15
+ def convert_pdf_to_images(file_path):
16
+ return convert_from_path(file_path)
17
 
18
+ def encode_images_to_base64(images):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  image_b64 = []
20
  for image in images:
21
+ buffered = BytesIO()
22
  image.save(buffered, format="JPEG")
23
  base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
 
24
  image_b64.append(
25
  {
26
  "type": "image_url",
27
  "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
28
  }
29
  )
30
+ return image_b64
31
 
32
+ def send_request_to_api(messages):
 
 
 
 
 
 
 
 
 
 
 
 
33
  api_key = os.getenv("OPENAI_API_KEY")
34
  headers = {
35
  "Content-Type": "application/json",
36
  "Authorization": f"Bearer {api_key}",
37
  }
38
  payload = {"model": "gpt-4o-mini", "messages": messages}
 
39
  response = requests.post(
40
  "https://api.openai.com/v1/chat/completions", headers=headers, json=payload
41
  )
42
+ return response.json()
43
+
44
+ def main():
45
+ load_environment()
46
+
47
+ prompt = """
48
+ The following images are from a planning response form completed by a member of the public. They contain free-form responses related to a planning application, which may be either handwritten or typed.
49
+
50
+ Please extract all the free-form information from these images and output it verbatim. Do not include any additional information or summaries. Note that the images are sequentially ordered, so a response might continue from one image to the next.
51
+ """
52
+
53
+ path = Path("./data/raw/pdfs")
54
+ for file in path.glob("*.pdf"):
55
+ if file.stem:
56
+ images = convert_pdf_to_images(file)
57
+ image_b64 = encode_images_to_base64(images)
58
+
59
+ messages = [
60
+ {
61
+ "role": "user",
62
+ "content": [
63
+ {
64
+ "type": "text",
65
+ "text": prompt,
66
+ },
67
+ ]
68
+ + image_b64,
69
+ }
70
+ ]
71
+
72
+ response = send_request_to_api(messages)
73
+ print(response)
74
+ break
75
+
76
+ if __name__ == "__main__":
77
+ main()