cjber commited on
Commit
abc8d88
·
1 Parent(s): a46d340

add gpt4o test

Browse files
Files changed (2) hide show
  1. src/gpt4o_structured.py +73 -0
  2. src/planning_ai/phi.py +2 -3
src/gpt4o_structured.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ from pathlib import Path
3
+
4
+ from pdf2image import convert_from_path
5
+
6
+ prompt = """
7
+ This image is an extract from a planning response form filled out by a member of the public. The form may contain typed or handwritten responses, including potentially incomplete or unclear sections. Your task is to extract relevant information in a strict, structured format. Do not repeat the document verbatim. Only output responses in the structured format below.
8
+
9
+ Instructions:
10
+ 1. Extract responses to all structured questions on the form, in the format:
11
+ {"<question>": "<response>"}
12
+
13
+ 2. For the handwritten notes under extract them verbatim. If any word is illegible or unclear, use the token <UNKNOWN>. Do not attempt to infer or complete missing parts.
14
+
15
+ 3. **Do not** output or repeat the original document content in full. Only return structured data in the format described above.
16
+ 4. **Ignore irrelevant sections** that are not part of the structured questionnaire or 'Your comments:' section.
17
+ 5. If a response is missing or the form section is blank, output:
18
+ {"<question>": "No response"}
19
+
20
+ Guidelines:
21
+ - Ensure you return only structured data in JSON-like format.
22
+ - Strictly follow the format for both structured questions and handwritten comments.
23
+ - If any part of the form is unclear or unreadable, do not fill it in with assumptions.
24
+ - Avoid repeating the full content of the form. Focus only on extracting the relevant sections.
25
+
26
+ Example output:
27
+ {
28
+ "Do you support the planning proposal?": "Yes",
29
+ "Your comments:": "The proposal seems reasonable, but <UNKNOWN> needs further assessment."
30
+ }
31
+ """
32
+
33
+ images = []
34
+ placeholder = ""
35
+ path = Path("./data/raw/pdfs")
36
+ i = 1
37
+ for file in path.glob("*.pdf"):
38
+ pdf_images = convert_from_path(file)
39
+ for image in pdf_images:
40
+ images.append(image)
41
+ placeholder += f"<|image_{i}|>\n"
42
+ i += 1
43
+
44
+ import base64
45
+
46
+ buffered = BytesIO()
47
+ images[2].save(buffered, format="JPEG")
48
+ base64_image = base64.b64encode(buffered.getvalue())
49
+
50
+ messages = [
51
+ {
52
+ "role": "user",
53
+ "content": [
54
+ {
55
+ "type": "text",
56
+ "text": prompt,
57
+ "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
58
+ }
59
+ ],
60
+ }
61
+ ]
62
+ import requests
63
+
64
+ api_key = "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"
65
+ headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
66
+ payload = {"model": "gpt-4o", "messages": messages}
67
+
68
+
69
+ response = requests.post(
70
+ "https://api.openai.com/v1/chat/completions", headers=headers, json=payload
71
+ )
72
+
73
+ print(response.json()["choices"][0]["message"])
src/planning_ai/phi.py CHANGED
@@ -76,9 +76,8 @@ prompt = processor.tokenizer.apply_chat_template(
76
  messages, tokenize=False, add_generation_prompt=True
77
  )
78
 
79
- inputs = processor(prompt, images[0], return_tensors="pt").to("cuda:0")
80
-
81
- generation_args = {"max_new_tokens": 1000, "do_sample": False}
82
 
83
  generate_ids = model.generate(
84
  **inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args
 
76
  messages, tokenize=False, add_generation_prompt=True
77
  )
78
 
79
+ inputs = processor(prompt, images[1], return_tensors="pt").to("cuda:0")
80
+ generation_args = {"max_new_tokens": 10_000}
 
81
 
82
  generate_ids = model.generate(
83
  **inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args