Spaces:
Build error
Build error
add gpt4o test
Browse files- src/gpt4o_structured.py +73 -0
- src/planning_ai/phi.py +2 -3
src/gpt4o_structured.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from io import BytesIO
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
from pdf2image import convert_from_path
|
5 |
+
|
6 |
+
prompt = """
|
7 |
+
This image is an extract from a planning response form filled out by a member of the public. The form may contain typed or handwritten responses, including potentially incomplete or unclear sections. Your task is to extract relevant information in a strict, structured format. Do not repeat the document verbatim. Only output responses in the structured format below.
|
8 |
+
|
9 |
+
Instructions:
|
10 |
+
1. Extract responses to all structured questions on the form, in the format:
|
11 |
+
{"<question>": "<response>"}
|
12 |
+
|
13 |
+
2. For the handwritten notes under extract them verbatim. If any word is illegible or unclear, use the token <UNKNOWN>. Do not attempt to infer or complete missing parts.
|
14 |
+
|
15 |
+
3. **Do not** output or repeat the original document content in full. Only return structured data in the format described above.
|
16 |
+
4. **Ignore irrelevant sections** that are not part of the structured questionnaire or 'Your comments:' section.
|
17 |
+
5. If a response is missing or the form section is blank, output:
|
18 |
+
{"<question>": "No response"}
|
19 |
+
|
20 |
+
Guidelines:
|
21 |
+
- Ensure you return only structured data in JSON-like format.
|
22 |
+
- Strictly follow the format for both structured questions and handwritten comments.
|
23 |
+
- If any part of the form is unclear or unreadable, do not fill it in with assumptions.
|
24 |
+
- Avoid repeating the full content of the form. Focus only on extracting the relevant sections.
|
25 |
+
|
26 |
+
Example output:
|
27 |
+
{
|
28 |
+
"Do you support the planning proposal?": "Yes",
|
29 |
+
"Your comments:": "The proposal seems reasonable, but <UNKNOWN> needs further assessment."
|
30 |
+
}
|
31 |
+
"""
|
32 |
+
|
33 |
+
images = []
|
34 |
+
placeholder = ""
|
35 |
+
path = Path("./data/raw/pdfs")
|
36 |
+
i = 1
|
37 |
+
for file in path.glob("*.pdf"):
|
38 |
+
pdf_images = convert_from_path(file)
|
39 |
+
for image in pdf_images:
|
40 |
+
images.append(image)
|
41 |
+
placeholder += f"<|image_{i}|>\n"
|
42 |
+
i += 1
|
43 |
+
|
44 |
+
import base64
|
45 |
+
|
46 |
+
buffered = BytesIO()
|
47 |
+
images[2].save(buffered, format="JPEG")
|
48 |
+
base64_image = base64.b64encode(buffered.getvalue())
|
49 |
+
|
50 |
+
messages = [
|
51 |
+
{
|
52 |
+
"role": "user",
|
53 |
+
"content": [
|
54 |
+
{
|
55 |
+
"type": "text",
|
56 |
+
"text": prompt,
|
57 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
58 |
+
}
|
59 |
+
],
|
60 |
+
}
|
61 |
+
]
|
62 |
+
import requests
|
63 |
+
|
64 |
+
api_key = "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"
|
65 |
+
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
66 |
+
payload = {"model": "gpt-4o", "messages": messages}
|
67 |
+
|
68 |
+
|
69 |
+
response = requests.post(
|
70 |
+
"https://api.openai.com/v1/chat/completions", headers=headers, json=payload
|
71 |
+
)
|
72 |
+
|
73 |
+
print(response.json()["choices"][0]["message"])
|
src/planning_ai/phi.py
CHANGED
@@ -76,9 +76,8 @@ prompt = processor.tokenizer.apply_chat_template(
|
|
76 |
messages, tokenize=False, add_generation_prompt=True
|
77 |
)
|
78 |
|
79 |
-
inputs = processor(prompt, images[
|
80 |
-
|
81 |
-
generation_args = {"max_new_tokens": 1000, "do_sample": False}
|
82 |
|
83 |
generate_ids = model.generate(
|
84 |
**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args
|
|
|
76 |
messages, tokenize=False, add_generation_prompt=True
|
77 |
)
|
78 |
|
79 |
+
inputs = processor(prompt, images[1], return_tensors="pt").to("cuda:0")
|
80 |
+
generation_args = {"max_new_tokens": 10_000}
|
|
|
81 |
|
82 |
generate_ids = model.generate(
|
83 |
**inputs, eos_token_id=processor.tokenizer.eos_token_id, **generation_args
|