Spaces:
Running
Running
yuhuizhang
commited on
Upload folder using huggingface_hub
Browse files- .gitignore +169 -0
- README.md +1 -8
- main.py +359 -0
- prompts.py +749 -0
.gitignore
ADDED
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
data/
|
2 |
+
images/
|
3 |
+
*.json
|
4 |
+
*.jsonl
|
5 |
+
*.tsv
|
6 |
+
*.png
|
7 |
+
visualization/
|
8 |
+
# Byte-compiled / optimized / DLL files
|
9 |
+
__pycache__/
|
10 |
+
*.py[cod]
|
11 |
+
*$py.class
|
12 |
+
|
13 |
+
# C extensions
|
14 |
+
*.so
|
15 |
+
|
16 |
+
# Distribution / packaging
|
17 |
+
.Python
|
18 |
+
build/
|
19 |
+
develop-eggs/
|
20 |
+
dist/
|
21 |
+
downloads/
|
22 |
+
eggs/
|
23 |
+
.eggs/
|
24 |
+
lib/
|
25 |
+
lib64/
|
26 |
+
parts/
|
27 |
+
sdist/
|
28 |
+
var/
|
29 |
+
wheels/
|
30 |
+
share/python-wheels/
|
31 |
+
*.egg-info/
|
32 |
+
.installed.cfg
|
33 |
+
*.egg
|
34 |
+
MANIFEST
|
35 |
+
|
36 |
+
# PyInstaller
|
37 |
+
# Usually these files are written by a python script from a template
|
38 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
39 |
+
*.manifest
|
40 |
+
*.spec
|
41 |
+
|
42 |
+
# Installer logs
|
43 |
+
pip-log.txt
|
44 |
+
pip-delete-this-directory.txt
|
45 |
+
|
46 |
+
# Unit test / coverage reports
|
47 |
+
htmlcov/
|
48 |
+
.tox/
|
49 |
+
.nox/
|
50 |
+
.coverage
|
51 |
+
.coverage.*
|
52 |
+
.cache
|
53 |
+
nosetests.xml
|
54 |
+
coverage.xml
|
55 |
+
*.cover
|
56 |
+
*.py,cover
|
57 |
+
.hypothesis/
|
58 |
+
.pytest_cache/
|
59 |
+
cover/
|
60 |
+
|
61 |
+
# Translations
|
62 |
+
*.mo
|
63 |
+
*.pot
|
64 |
+
|
65 |
+
# Django stuff:
|
66 |
+
*.log
|
67 |
+
local_settings.py
|
68 |
+
db.sqlite3
|
69 |
+
db.sqlite3-journal
|
70 |
+
|
71 |
+
# Flask stuff:
|
72 |
+
instance/
|
73 |
+
.webassets-cache
|
74 |
+
|
75 |
+
# Scrapy stuff:
|
76 |
+
.scrapy
|
77 |
+
|
78 |
+
# Sphinx documentation
|
79 |
+
docs/_build/
|
80 |
+
|
81 |
+
# PyBuilder
|
82 |
+
.pybuilder/
|
83 |
+
target/
|
84 |
+
|
85 |
+
# Jupyter Notebook
|
86 |
+
.ipynb_checkpoints
|
87 |
+
|
88 |
+
# IPython
|
89 |
+
profile_default/
|
90 |
+
ipython_config.py
|
91 |
+
|
92 |
+
# pyenv
|
93 |
+
# For a library or package, you might want to ignore these files since the code is
|
94 |
+
# intended to run in multiple environments; otherwise, check them in:
|
95 |
+
# .python-version
|
96 |
+
|
97 |
+
# pipenv
|
98 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
99 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
100 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
101 |
+
# install all needed dependencies.
|
102 |
+
#Pipfile.lock
|
103 |
+
|
104 |
+
# poetry
|
105 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
106 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
107 |
+
# commonly ignored for libraries.
|
108 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
109 |
+
#poetry.lock
|
110 |
+
|
111 |
+
# pdm
|
112 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
113 |
+
#pdm.lock
|
114 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
115 |
+
# in version control.
|
116 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
117 |
+
.pdm.toml
|
118 |
+
.pdm-python
|
119 |
+
.pdm-build/
|
120 |
+
|
121 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
122 |
+
__pypackages__/
|
123 |
+
|
124 |
+
# Celery stuff
|
125 |
+
celerybeat-schedule
|
126 |
+
celerybeat.pid
|
127 |
+
|
128 |
+
# SageMath parsed files
|
129 |
+
*.sage.py
|
130 |
+
|
131 |
+
# Environments
|
132 |
+
.env
|
133 |
+
.venv
|
134 |
+
env/
|
135 |
+
venv/
|
136 |
+
ENV/
|
137 |
+
env.bak/
|
138 |
+
venv.bak/
|
139 |
+
|
140 |
+
# Spyder project settings
|
141 |
+
.spyderproject
|
142 |
+
.spyproject
|
143 |
+
|
144 |
+
# Rope project settings
|
145 |
+
.ropeproject
|
146 |
+
|
147 |
+
# mkdocs documentation
|
148 |
+
/site
|
149 |
+
|
150 |
+
# mypy
|
151 |
+
.mypy_cache/
|
152 |
+
.dmypy.json
|
153 |
+
dmypy.json
|
154 |
+
|
155 |
+
# Pyre type checker
|
156 |
+
.pyre/
|
157 |
+
|
158 |
+
# pytype static type analyzer
|
159 |
+
.pytype/
|
160 |
+
|
161 |
+
# Cython debug symbols
|
162 |
+
cython_debug/
|
163 |
+
|
164 |
+
# PyCharm
|
165 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
166 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
167 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
168 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
169 |
+
#.idea/
|
README.md
CHANGED
@@ -1,13 +1,6 @@
|
|
1 |
---
|
2 |
title: AutoConverter
|
3 |
-
|
4 |
-
colorFrom: pink
|
5 |
-
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.9.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
-
short_description: AutoConverter
|
11 |
---
|
12 |
-
|
13 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
title: AutoConverter
|
3 |
+
app_file: main.py
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 5.9.1
|
|
|
|
|
|
|
6 |
---
|
|
|
|
main.py
ADDED
@@ -0,0 +1,359 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import io
|
3 |
+
import random
|
4 |
+
from textwrap import dedent
|
5 |
+
|
6 |
+
import gradio as gr
|
7 |
+
from openai import OpenAI
|
8 |
+
from PIL import Image
|
9 |
+
from pydantic import BaseModel
|
10 |
+
|
11 |
+
from prompts import (
|
12 |
+
concept_generation_system_prompt,
|
13 |
+
data_processing_generation_system_prompt,
|
14 |
+
evaluator_system_prompt,
|
15 |
+
fusion_generation_system_prompt,
|
16 |
+
question_bias_generation_system_prompt,
|
17 |
+
reasoning_generation_system_prompt,
|
18 |
+
refine_system_prompt_concept,
|
19 |
+
refine_system_prompt_data,
|
20 |
+
refine_system_prompt_question_bias,
|
21 |
+
refine_system_prompt_reason,
|
22 |
+
refine_system_prompt_visual,
|
23 |
+
refiner_system_prompt,
|
24 |
+
review_system_prompt,
|
25 |
+
visual_interpretation_generation_system_prompt,
|
26 |
+
)
|
27 |
+
|
28 |
+
|
29 |
+
class Distractor(BaseModel):
|
30 |
+
text: str
|
31 |
+
reason: str
|
32 |
+
|
33 |
+
|
34 |
+
class Distractors(BaseModel):
|
35 |
+
distractors: list[Distractor]
|
36 |
+
|
37 |
+
|
38 |
+
class Comment(BaseModel):
|
39 |
+
option: str
|
40 |
+
comment: str
|
41 |
+
|
42 |
+
|
43 |
+
class CommentFormat(BaseModel):
|
44 |
+
comments: list[Comment]
|
45 |
+
|
46 |
+
|
47 |
+
class Judgement(BaseModel):
|
48 |
+
reasoning: str
|
49 |
+
correctness: int
|
50 |
+
improvement: str
|
51 |
+
|
52 |
+
|
53 |
+
class Question(BaseModel):
|
54 |
+
reasoning: str
|
55 |
+
distractors: list[str]
|
56 |
+
|
57 |
+
|
58 |
+
def base64_to_image(base64_str):
|
59 |
+
image_data = base64.b64decode(base64_str)
|
60 |
+
image = Image.open(io.BytesIO(image_data))
|
61 |
+
return image
|
62 |
+
|
63 |
+
|
64 |
+
def get_reply(client, system_prompt, user_prompt, image_base64, output_format):
|
65 |
+
completion = client.beta.chat.completions.parse(
|
66 |
+
model="gpt-4o",
|
67 |
+
messages=[
|
68 |
+
{"role": "system", "content": dedent(system_prompt)},
|
69 |
+
{
|
70 |
+
"role": "user",
|
71 |
+
"content": [
|
72 |
+
{"type": "text", "text": dedent(user_prompt)},
|
73 |
+
{
|
74 |
+
"type": "image_url",
|
75 |
+
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
76 |
+
},
|
77 |
+
],
|
78 |
+
},
|
79 |
+
],
|
80 |
+
response_format=output_format,
|
81 |
+
# temperature=0, # Set to 0 for deterministic responses
|
82 |
+
)
|
83 |
+
parsed_output = completion.choices[0].message.parsed.dict()
|
84 |
+
return parsed_output
|
85 |
+
|
86 |
+
|
87 |
+
def convert_to_multi_choice(client, question, answer, image_base64, reviewer):
|
88 |
+
user_prompt = f"""
|
89 |
+
Question: {question}
|
90 |
+
Correct Answer: {answer}
|
91 |
+
"""
|
92 |
+
|
93 |
+
distractors_concept = get_reply(
|
94 |
+
client, concept_generation_system_prompt, user_prompt, image_base64, Distractors
|
95 |
+
)["distractors"]
|
96 |
+
distractors_reasoning = get_reply(
|
97 |
+
client,
|
98 |
+
reasoning_generation_system_prompt,
|
99 |
+
user_prompt,
|
100 |
+
image_base64,
|
101 |
+
Distractors,
|
102 |
+
)["distractors"]
|
103 |
+
distractors_visual_interpretation = get_reply(
|
104 |
+
client,
|
105 |
+
visual_interpretation_generation_system_prompt,
|
106 |
+
user_prompt,
|
107 |
+
image_base64,
|
108 |
+
Distractors,
|
109 |
+
)["distractors"]
|
110 |
+
distractors_data_processing = get_reply(
|
111 |
+
client,
|
112 |
+
data_processing_generation_system_prompt,
|
113 |
+
user_prompt,
|
114 |
+
image_base64,
|
115 |
+
Distractors,
|
116 |
+
)["distractors"]
|
117 |
+
distractors_question_bias = get_reply(
|
118 |
+
client,
|
119 |
+
question_bias_generation_system_prompt,
|
120 |
+
user_prompt,
|
121 |
+
image_base64,
|
122 |
+
Distractors,
|
123 |
+
)["distractors"]
|
124 |
+
# print(distractors_concept)
|
125 |
+
|
126 |
+
if reviewer:
|
127 |
+
user_prompt = """
|
128 |
+
Question: {question}
|
129 |
+
Correct Answer: {answer}
|
130 |
+
Distractions and Reasonings: {distractors}
|
131 |
+
"""
|
132 |
+
reviews_concept = get_reply(
|
133 |
+
client,
|
134 |
+
review_system_prompt.format(type="conceptual"),
|
135 |
+
user_prompt.format(
|
136 |
+
question=question, answer=answer, distractors=distractors_concept
|
137 |
+
),
|
138 |
+
image_base64,
|
139 |
+
CommentFormat,
|
140 |
+
)["comments"]
|
141 |
+
reviews_reasoning = get_reply(
|
142 |
+
client,
|
143 |
+
review_system_prompt.format(type="reasoning"),
|
144 |
+
user_prompt.format(
|
145 |
+
question=question, answer=answer, distractors=distractors_reasoning
|
146 |
+
),
|
147 |
+
image_base64,
|
148 |
+
CommentFormat,
|
149 |
+
)["comments"]
|
150 |
+
reviews_visual_interpretation = get_reply(
|
151 |
+
client,
|
152 |
+
review_system_prompt.format(type="visual interpretation"),
|
153 |
+
user_prompt.format(
|
154 |
+
question=question,
|
155 |
+
answer=answer,
|
156 |
+
distractors=distractors_visual_interpretation,
|
157 |
+
),
|
158 |
+
image_base64,
|
159 |
+
CommentFormat,
|
160 |
+
)["comments"]
|
161 |
+
reviews_data_processing = get_reply(
|
162 |
+
client,
|
163 |
+
review_system_prompt.format(type="data processing"),
|
164 |
+
user_prompt.format(
|
165 |
+
question=question,
|
166 |
+
answer=answer,
|
167 |
+
distractors=distractors_data_processing,
|
168 |
+
),
|
169 |
+
image_base64,
|
170 |
+
CommentFormat,
|
171 |
+
)["comments"]
|
172 |
+
reviews_question_bias = get_reply(
|
173 |
+
client,
|
174 |
+
review_system_prompt.format(type="question bias"),
|
175 |
+
user_prompt.format(
|
176 |
+
question=question, answer=answer, distractors=distractors_question_bias
|
177 |
+
),
|
178 |
+
image_base64,
|
179 |
+
CommentFormat,
|
180 |
+
)["comments"]
|
181 |
+
# print(reviews_concept)
|
182 |
+
|
183 |
+
user_prompt = """
|
184 |
+
Question: {question}
|
185 |
+
Correct Answer: {answer}
|
186 |
+
Distractions and Reviewer Comments: {reviews}
|
187 |
+
"""
|
188 |
+
distractors_concept = get_reply(
|
189 |
+
client,
|
190 |
+
refine_system_prompt_concept,
|
191 |
+
user_prompt.format(
|
192 |
+
question=question, answer=answer, reviews=reviews_concept
|
193 |
+
),
|
194 |
+
image_base64,
|
195 |
+
Distractors,
|
196 |
+
)["distractors"]
|
197 |
+
distractors_reasoning = get_reply(
|
198 |
+
client,
|
199 |
+
refine_system_prompt_reason,
|
200 |
+
user_prompt.format(
|
201 |
+
question=question, answer=answer, reviews=reviews_reasoning
|
202 |
+
),
|
203 |
+
image_base64,
|
204 |
+
Distractors,
|
205 |
+
)["distractors"]
|
206 |
+
distractors_visual_interpretation = get_reply(
|
207 |
+
client,
|
208 |
+
refine_system_prompt_visual,
|
209 |
+
user_prompt.format(
|
210 |
+
question=question, answer=answer, reviews=reviews_visual_interpretation
|
211 |
+
),
|
212 |
+
image_base64,
|
213 |
+
Distractors,
|
214 |
+
)["distractors"]
|
215 |
+
distractors_data_processing = get_reply(
|
216 |
+
client,
|
217 |
+
refine_system_prompt_data,
|
218 |
+
user_prompt.format(
|
219 |
+
question=question, answer=answer, reviews=reviews_data_processing
|
220 |
+
),
|
221 |
+
image_base64,
|
222 |
+
Distractors,
|
223 |
+
)["distractors"]
|
224 |
+
distractors_question_bias = get_reply(
|
225 |
+
client,
|
226 |
+
refine_system_prompt_question_bias,
|
227 |
+
user_prompt.format(
|
228 |
+
question=question, answer=answer, reviews=reviews_question_bias
|
229 |
+
),
|
230 |
+
image_base64,
|
231 |
+
Distractors,
|
232 |
+
)["distractors"]
|
233 |
+
# print(distractors_concept)
|
234 |
+
|
235 |
+
distractors = (
|
236 |
+
distractors_concept
|
237 |
+
+ distractors_reasoning
|
238 |
+
+ distractors_visual_interpretation
|
239 |
+
+ distractors_data_processing
|
240 |
+
+ distractors_question_bias
|
241 |
+
)
|
242 |
+
|
243 |
+
user_prompt = f"""
|
244 |
+
Question: {question}
|
245 |
+
Correct Answer: {answer}
|
246 |
+
All Distractors: {distractors}
|
247 |
+
"""
|
248 |
+
|
249 |
+
distractors = get_reply(
|
250 |
+
client, fusion_generation_system_prompt, user_prompt, image_base64, Distractors
|
251 |
+
)["distractors"]
|
252 |
+
|
253 |
+
return distractors
|
254 |
+
|
255 |
+
|
256 |
+
def judge_multichoice_correctness_with_image(
|
257 |
+
client, question, choices, answer, image_base64
|
258 |
+
):
|
259 |
+
user_prompt = f"""
|
260 |
+
Question: {question}
|
261 |
+
Choices: {choices}
|
262 |
+
Correct Answer: {answer}
|
263 |
+
"""
|
264 |
+
response = get_reply(
|
265 |
+
client,
|
266 |
+
evaluator_system_prompt,
|
267 |
+
user_prompt,
|
268 |
+
image_base64,
|
269 |
+
Judgement,
|
270 |
+
)
|
271 |
+
return response
|
272 |
+
|
273 |
+
|
274 |
+
def improve_multichoice_correctness_with_image(
|
275 |
+
client,
|
276 |
+
question,
|
277 |
+
choices,
|
278 |
+
answer,
|
279 |
+
issue,
|
280 |
+
improvement,
|
281 |
+
image_base64,
|
282 |
+
):
|
283 |
+
user_prompt = f"""
|
284 |
+
Question: {question}
|
285 |
+
Choices: {choices}
|
286 |
+
Correct Answer: {answer}
|
287 |
+
Identified Issues: {issue}
|
288 |
+
Suggested Improvements: {improvement}
|
289 |
+
"""
|
290 |
+
|
291 |
+
response = get_reply(
|
292 |
+
client,
|
293 |
+
refiner_system_prompt,
|
294 |
+
user_prompt,
|
295 |
+
image_base64,
|
296 |
+
Question,
|
297 |
+
)
|
298 |
+
return response
|
299 |
+
|
300 |
+
|
301 |
+
def process_one_question(api_key, image, question, answer, components):
|
302 |
+
reviewer = "Reviewer" in components
|
303 |
+
refiner = "Refiner" in components
|
304 |
+
|
305 |
+
pil_image = Image.fromarray(image)
|
306 |
+
|
307 |
+
buffer = io.BytesIO()
|
308 |
+
pil_image.save(buffer, format="PNG")
|
309 |
+
buffer.seek(0)
|
310 |
+
image_base64 = base64.b64encode(buffer.read()).decode("utf-8")
|
311 |
+
|
312 |
+
random.seed(1234)
|
313 |
+
client = OpenAI(api_key=api_key)
|
314 |
+
distactors = convert_to_multi_choice(
|
315 |
+
client, question, answer, image_base64, reviewer
|
316 |
+
)
|
317 |
+
|
318 |
+
choices = [item["text"] for item in distactors] + [answer]
|
319 |
+
random.shuffle(choices)
|
320 |
+
|
321 |
+
if refiner:
|
322 |
+
judgement = judge_multichoice_correctness_with_image(
|
323 |
+
client, question, choices, answer, image_base64
|
324 |
+
)
|
325 |
+
distractors = improve_multichoice_correctness_with_image(
|
326 |
+
client,
|
327 |
+
question,
|
328 |
+
choices,
|
329 |
+
answer,
|
330 |
+
judgement["reasoning"],
|
331 |
+
judgement["improvement"],
|
332 |
+
image_base64,
|
333 |
+
)
|
334 |
+
|
335 |
+
choices = distractors["distractors"] + [answer]
|
336 |
+
random.shuffle(choices)
|
337 |
+
|
338 |
+
output = f"Question: {question}\n\nA. {choices[0]}\nB.{choices[1]}\nC. {choices[2]}\nD. {choices[3]}\n\nAnswer: {'ABCD'[choices.index(answer)]}"
|
339 |
+
return output
|
340 |
+
|
341 |
+
|
342 |
+
def main_gradio():
|
343 |
+
interface = gr.Interface(
|
344 |
+
fn=process_one_question,
|
345 |
+
inputs=[
|
346 |
+
gr.Textbox(label="OpenAI API Key"),
|
347 |
+
gr.Image(label="Upload an Image"),
|
348 |
+
gr.Textbox(label="Question"),
|
349 |
+
gr.Textbox(label="Answer"),
|
350 |
+
gr.CheckboxGroup(["Reviewer", "Refiner"], label="Components"),
|
351 |
+
],
|
352 |
+
outputs=gr.Textbox(label="Output"),
|
353 |
+
title="AutoConverter: Automated Generation of Challenging Multiple-Choice Questions for Vision Language Model Evaluation",
|
354 |
+
)
|
355 |
+
interface.launch()
|
356 |
+
|
357 |
+
|
358 |
+
if __name__ == "__main__":
|
359 |
+
main_gradio()
|
prompts.py
ADDED
@@ -0,0 +1,749 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
num_choice = 6
|
2 |
+
fusion_selected_choice_num = 3
|
3 |
+
|
4 |
+
|
5 |
+
concept_generation_system_prompt = f"""
|
6 |
+
You are an expert in creating challenging and educational multiple-choice questions, specializing in conceptual errors.
|
7 |
+
Your task is to generate plausible but incorrect options (distractors) for given image-based question(s), focusing on conceptual misunderstandings and misconceptions.
|
8 |
+
|
9 |
+
Given:
|
10 |
+
1. One or more images
|
11 |
+
2. An open-ended question about the image(s)
|
12 |
+
3. The correct answer to the question
|
13 |
+
|
14 |
+
Your task:
|
15 |
+
1. Carefully analyze and understand the provided image(s). Briefly describe the image content(s) (for your understanding only, do not output this).
|
16 |
+
2. Generate {num_choice} unique and plausible distractor options based on conceptual errors. Each distractor should:
|
17 |
+
- Be related to the image(s) and question
|
18 |
+
- Seem potentially correct at first glance
|
19 |
+
- Be very misleading for students due to conceptual misunderstandings
|
20 |
+
- Contain a subtle flaw or misconception that makes it incorrect
|
21 |
+
- Vary in difficulty and the type of conceptual error it represents
|
22 |
+
|
23 |
+
3. Ensure you understand the connection between the image(s), question, and the underlying concepts.
|
24 |
+
4. Focus on common conceptual misconceptions in the subject area, including:
|
25 |
+
- Concept Confusion: Create options that are similar to the correct concept but with subtle differences
|
26 |
+
- Partial Correctness: Include options that contain partially correct information but are incomplete or misleading
|
27 |
+
- Overgeneralization: Develop options that incorrectly apply specific cases to general situations
|
28 |
+
- Cross-Image Misconceptions: When multiple images are provided, create options that misapply concepts across different images
|
29 |
+
|
30 |
+
5. Aim for a diverse set of distractors that test different aspects of conceptual understanding.
|
31 |
+
6. Each distractor should have some relation to the correct answer, but ensure they are distinctly different and incorrect due to conceptual misunderstandings.
|
32 |
+
7. If the question involves a specific subject area, consider common conceptual difficulties unique to that field.
|
33 |
+
8. Adapt the complexity of your distractors to match the simplicity or complexity of the given question and correct answer.
|
34 |
+
9. If multiple images are provided, ensure some distractors address relationships or comparisons between the images, focusing on conceptual errors in interpreting these relationships.
|
35 |
+
|
36 |
+
10. For each distractor, provide a maximum of three sentences explaining why it was generated. The explanation should describe why this distractor is plausible, the subtle flaw it contains, and how it challenges advanced understanding.
|
37 |
+
|
38 |
+
Output format:
|
39 |
+
- For each generated distractor, format your response as:
|
40 |
+
Option:
|
41 |
+
option: [Option text]
|
42 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was created]
|
43 |
+
- Do not add any additional commentary.
|
44 |
+
|
45 |
+
Remember:
|
46 |
+
- Your goal is to create challenging yet ultimately incorrect options that specifically target conceptual misunderstandings.
|
47 |
+
- All distractors should be plausible enough to be considered by a student who doesn't fully grasp the concept, but clear enough to be definitively incorrect upon careful consideration.
|
48 |
+
- Focus exclusively on conceptual errors rather than other types of mistakes (e.g., calculation errors, visual misinterpretations).
|
49 |
+
- Distractors must be incorrect and should not be overly wordy or complex compared to the correct answer.
|
50 |
+
- Ensure consistency in capitalization across all options, including the correct answer. For example, if the correct answer starts with a uppercase letter, adjust all distractors to match.
|
51 |
+
- When dealing with multiple images, consider how conceptual errors might arise from comparing or contrasting information across the images.
|
52 |
+
- Pay attention to any conceptual relationships, patterns, or differences that span multiple images, and create distractors that plausibly misinterpret these inter-image connections due to conceptual misunderstandings.
|
53 |
+
"""
|
54 |
+
|
55 |
+
reasoning_generation_system_prompt = f"""
|
56 |
+
You are an expert in creating challenging and educational multiple-choice questions, specializing in reasoning errors.
|
57 |
+
Your task is to generate plausible but incorrect options (distractors) for given image-based question(s), focusing on flaws in logical reasoning and inference.
|
58 |
+
|
59 |
+
Given:
|
60 |
+
1. One or more images
|
61 |
+
2. An open-ended question about the image(s)
|
62 |
+
3. The correct answer to the question
|
63 |
+
|
64 |
+
Your task:
|
65 |
+
1. Carefully analyze and understand the provided image(s). Briefly describe the image content(s) (for your understanding only, do not output this).
|
66 |
+
2. Generate {num_choice} unique and plausible distractor options based on reasoning errors. Each distractor should:
|
67 |
+
- Be related to the image(s) and question
|
68 |
+
- Seem potentially correct at first glance
|
69 |
+
- Be very misleading for students due to faulty reasoning
|
70 |
+
- Contain a subtle logical flaw that makes it incorrect
|
71 |
+
- Vary in difficulty and the type of reasoning error it represents
|
72 |
+
|
73 |
+
3. Ensure you understand the logical steps required to correctly answer the question based on the image(s).
|
74 |
+
4. Focus on common reasoning errors, including:
|
75 |
+
- Complex Reasoning Flaws: Create options that require multi-step reasoning but contain logical gaps or invalid assumptions
|
76 |
+
- Causal Inversion: Develop options that reverse cause and effect relationships
|
77 |
+
- Context Neglect: Include options that ignore important contextual information provided in the question or image(s)
|
78 |
+
- False Analogies: Generate options that draw incorrect parallels or comparisons
|
79 |
+
- Hasty Generalizations: Create options that jump to conclusions based on insufficient evidence
|
80 |
+
- Cross-Image Fallacies: When multiple images are provided, create options that make invalid logical connections or comparisons between images
|
81 |
+
|
82 |
+
5. Aim for a diverse set of distractors that test different aspects of logical reasoning and critical thinking.
|
83 |
+
6. Each distractor should follow a seemingly logical path but ultimately lead to an incorrect conclusion due to flawed reasoning.
|
84 |
+
7. If the question involves a specific subject area, consider common logical pitfalls or fallacies unique to that field.
|
85 |
+
8. If the question does not involve explicit reasoning, focus on creating plausible reasoning statements that could be mistakenly associated with the correct answer.
|
86 |
+
9. Adapt the complexity of your distractors to match the simplicity or complexity of the given question and correct answer.
|
87 |
+
10. If multiple images are provided, ensure some distractors address relationships or comparisons between the images, focusing on logical errors in interpreting these relationships.
|
88 |
+
|
89 |
+
11. For each distractor, provide a maximum of three sentences explaining why it was generated. The explanation should describe why this distractor is plausible, the subtle flaw it contains, and how it challenges advanced understanding.
|
90 |
+
|
91 |
+
Output format:
|
92 |
+
- For each generated distractor, format your response as:
|
93 |
+
Option:
|
94 |
+
option: [Option text]
|
95 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was created]
|
96 |
+
- Do not add any additional commentary.
|
97 |
+
|
98 |
+
Remember:
|
99 |
+
- Your goal is to create challenging yet ultimately incorrect options that specifically target flaws in logical reasoning and inference.
|
100 |
+
- All distractors should be plausible enough to be considered by a student who hasn't fully developed their critical thinking skills, but clear enough to be definitively incorrect upon careful logical analysis.
|
101 |
+
- Focus exclusively on reasoning errors rather than other types of mistakes (e.g., conceptual misunderstandings, visual misinterpretations).
|
102 |
+
- Distractors must be incorrect and should not be overly wordy or complex compared to the correct answer.
|
103 |
+
- Ensure consistency in capitalization across all options, including the correct answer. For example, if the correct answer starts with a uppercase letter, adjust all distractors to match.
|
104 |
+
- When dealing with multiple images, consider how reasoning errors might arise from comparing or contrasting information across the images.
|
105 |
+
- Pay attention to any logical relationships, patterns, or differences that span multiple images, and create distractors that plausibly misinterpret these inter-image connections using faulty reasoning.
|
106 |
+
"""
|
107 |
+
|
108 |
+
visual_interpretation_generation_system_prompt = f"""
|
109 |
+
You are an expert in creating challenging and educational multiple-choice questions, specializing in visual interpretation errors.
|
110 |
+
Your task is to generate plausible but incorrect options (distractors) for given image-based question(s), focusing on misinterpretations of visual information.
|
111 |
+
|
112 |
+
Given:
|
113 |
+
1. One or more images
|
114 |
+
2. An open-ended question about the image(s)
|
115 |
+
3. The correct answer to the question
|
116 |
+
|
117 |
+
Your task:
|
118 |
+
1. Carefully analyze and understand the provided image(s). Briefly describe the image content(s) (for your understanding only, do not output this).
|
119 |
+
2. Generate {num_choice} unique and plausible distractor options based on visual interpretation errors. Each distractor should:
|
120 |
+
- Be directly related to misinterpretation of the image(s)
|
121 |
+
- Seem potentially correct at first glance
|
122 |
+
- Be very misleading for students due to visual misunderstanding
|
123 |
+
- Contain a subtle error in interpreting visual information that makes it incorrect
|
124 |
+
- Vary in difficulty and the type of visual misinterpretation it represents
|
125 |
+
|
126 |
+
3. Ensure you understand how the correct answer relates to specific visual elements in the image(s).
|
127 |
+
4. Focus on common visual interpretation errors, including:
|
128 |
+
- Misreading Graphs or Charts: Create options that misinterpret trends, scales, or relationships in visual data
|
129 |
+
- Spatial Misinterpretation: Develop options that misunderstand spatial relationships or perspectives in the image(s)
|
130 |
+
- Color Confusion: Include options that misinterpret color-coded information or subtle color differences
|
131 |
+
- Pattern Misrecognition: Generate options that incorrectly identify or extend patterns in the image(s)
|
132 |
+
- Detail Oversight: Create options that miss crucial details or focus on irrelevant visual elements
|
133 |
+
- Scale Misjudgment: Include options that misinterpret the scale or proportions of elements in the image(s)
|
134 |
+
- Cross-Image Miscomparison: When multiple images are provided, create options that incorrectly compare or contrast elements across images
|
135 |
+
|
136 |
+
5. Aim for a diverse set of distractors that test different aspects of visual interpretation and analysis.
|
137 |
+
6. Each distractor should be based on a plausible misreading of the visual information but ultimately be incorrect.
|
138 |
+
7. Consider the specific type(s) of image(s) (e.g., photograph, diagram, graph) and generate errors typical for those visual formats.
|
139 |
+
8. Adapt the complexity of your distractors to match the simplicity or complexity of the given question and correct answer.
|
140 |
+
9. If multiple images are provided, ensure some distractors address relationships or comparisons between the images.
|
141 |
+
|
142 |
+
10. For each distractor, provide a maximum of three sentences explaining why it was generated. The explanation should describe why this distractor is plausible, the subtle flaw it contains, and how it challenges advanced understanding.
|
143 |
+
|
144 |
+
Output format:
|
145 |
+
- For each generated distractor, format your response as:
|
146 |
+
Option:
|
147 |
+
option: [Option text]
|
148 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was created]
|
149 |
+
- Do not add any additional commentary.
|
150 |
+
|
151 |
+
Remember:
|
152 |
+
- Your goal is to create challenging yet ultimately incorrect options that specifically target misinterpretations of visual information.
|
153 |
+
- All distractors should be plausible enough to be considered by a student who hasn't fully developed their visual literacy skills, but clear enough to be definitively incorrect upon careful visual analysis.
|
154 |
+
- Focus exclusively on visual interpretation errors rather than other types of mistakes (e.g., conceptual misunderstandings, reasoning errors).
|
155 |
+
- The distractors should directly relate to misunderstandings of the image(s) itself, not just the general topic of the question.
|
156 |
+
- Distractors must be incorrect and should not be overly wordy or complex compared to the correct answer.
|
157 |
+
- Ensure consistency in capitalization across all options, including the correct answer. For example, if the correct answer starts with a uppercase letter, adjust all distractors to match.
|
158 |
+
- When dealing with multiple images, consider how visual interpretation errors might arise from comparing or contrasting information across the images.
|
159 |
+
- Pay attention to any visual relationships, patterns, or differences that span multiple images, and create distractors that plausibly misinterpret these inter-image connections.
|
160 |
+
"""
|
161 |
+
|
162 |
+
data_processing_generation_system_prompt = f"""
|
163 |
+
You are an expert in creating challenging and educational multiple-choice questions, specializing in data processing errors.
|
164 |
+
Your task is to generate plausible but incorrect options (distractors) for given image-based question(s), focusing on mistakes in handling quantitative information and data analysis.
|
165 |
+
|
166 |
+
Given:
|
167 |
+
1. One or more images
|
168 |
+
2. An open-ended question about the image(s)
|
169 |
+
3. The correct answer to the question
|
170 |
+
|
171 |
+
Your task:
|
172 |
+
1. Carefully analyze and understand the provided image(s), paying special attention to any numerical data, charts, graphs, or quantitative information presented. Briefly describe the image content(s) (for your understanding only, do not output this).
|
173 |
+
2. Generate {num_choice} unique and plausible distractor options based on data processing errors. Each distractor should:
|
174 |
+
- Be directly related to mishandling of numerical or quantitative information in the image(s)
|
175 |
+
- Seem potentially correct at first glance
|
176 |
+
- Be very misleading for students due to data processing mistakes
|
177 |
+
- Contain a subtle error in calculation, interpretation, or application of quantitative information
|
178 |
+
- Vary in difficulty and the type of data processing error it represents
|
179 |
+
|
180 |
+
3. Ensure you understand how the correct answer relates to the quantitative elements in the image(s).
|
181 |
+
4. Focus on common data processing errors, including:
|
182 |
+
- Numerical Errors: Create options with incorrect calculations or use of wrong numerical values
|
183 |
+
- Unit Conversion Mistakes: Develop options that misapply or neglect unit conversions
|
184 |
+
- Statistical Misinterpretation: Include options that misunderstand statistical concepts or misapply statistical tests
|
185 |
+
- Data Range Errors: Generate options that incorrectly interpret data ranges or outliers
|
186 |
+
- Temporal/Sequential Errors: Create options with mistakes in the order or timing of data points or processes
|
187 |
+
- Correlation/Causation Confusion: Include options that mistake correlation for causation in data relationships
|
188 |
+
- Sampling Errors: Develop options that misinterpret sample sizes or sampling methods
|
189 |
+
- Rounding Errors: Create options with incorrect rounding or significant figure usage
|
190 |
+
|
191 |
+
5. Aim for a diverse set of distractors that test different aspects of quantitative reasoning and data analysis.
|
192 |
+
6. Each distractor should be based on a plausible mishandling of the quantitative information but ultimately be incorrect.
|
193 |
+
7. Consider the specific type of data presented (e.g., discrete vs. continuous, time series, categorical) and generate errors typical for that data type.
|
194 |
+
8. If the question does not involve explicit numerical data, focus on creating plausible quantitative statements that could be mistakenly associated with the correct answer.
|
195 |
+
9. Adapt the complexity of your distractors to match the simplicity or complexity of the given question and correct answer.
|
196 |
+
10. If multiple images are provided, ensure that your distractors consider the relationships and comparisons between the images when relevant.
|
197 |
+
11. When generating numerical distractors:
|
198 |
+
- Carefully analyze the structure and precision of the correct answer
|
199 |
+
- Create distractors that closely mimic the format, precision, and magnitude of the correct answer
|
200 |
+
- Use a mix of common calculation errors, transposition mistakes, and misinterpretations to generate deceptive options
|
201 |
+
- For answers with specific formats (e.g., currency with cents, percentages, or large numbers with commas), maintain this format in the distractors
|
202 |
+
- Include options that could result from typical mental math errors or misreading of data
|
203 |
+
- If the correct answer has trailing zeros (e.g., 123,000), some distractors should also have trailing zeros to maintain consistency
|
204 |
+
- For precise answers (e.g., $493.02), create distractors with same precision (e.g., $439.20, $493.20, $492.03) to increase difficulty while maintaining consistency in decimal places
|
205 |
+
12. Ensure high deceptiveness in your distractors:
|
206 |
+
- Create options that could result from common misinterpretations of the data or question
|
207 |
+
- Include distractors that swap digits, misplace decimal points, or make sign errors (e.g., positive instead of negative)
|
208 |
+
- Generate options that could result from using the wrong operation (e.g., addition instead of subtraction)
|
209 |
+
- For multi-step calculations, include results that would occur if a step was omitted or performed incorrectly
|
210 |
+
- Consider psychological factors that might lead to specific errors, such as anchoring bias or confirmation bias
|
211 |
+
13. For each distractor, provide a maximum of three sentences explaining why it was generated. The explanation should describe why this distractor is plausible, the subtle flaw it contains, and how it challenges advanced understanding.
|
212 |
+
|
213 |
+
Output format:
|
214 |
+
- For each generated distractor, format your response as:
|
215 |
+
Option:
|
216 |
+
option: [Option text]
|
217 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was created]
|
218 |
+
- Do not add any additional commentary.
|
219 |
+
|
220 |
+
|
221 |
+
Remember:
|
222 |
+
- Your goal is to create challenging yet ultimately incorrect options that specifically target errors in handling and interpreting quantitative information.
|
223 |
+
- All distractors should be plausible enough to be considered by a student who hasn't fully developed their quantitative reasoning skills, but clear enough to be definitively incorrect upon careful analysis.
|
224 |
+
- Focus exclusively on data processing errors rather than other types of mistakes (e.g., conceptual misunderstandings, visual misinterpretations).
|
225 |
+
- The distractors should directly relate to mishandling of the quantitative information in the image(s), not just the general topic of the question.
|
226 |
+
- Ensure that the errors are subtle enough to be challenging but still clearly incorrect when carefully examined.
|
227 |
+
- Distractors must be incorrect and should not be overly wordy or complex compared to the correct answer.
|
228 |
+
- Ensure consistency in capitalization across all options, including the correct answer. For example, if the correct answer starts with a uppercase letter, adjust all distractors to match.
|
229 |
+
- When dealing with multiple images, consider how data processing errors might arise from comparing or contrasting information across the images.
|
230 |
+
- Pay attention to any relationships, trends, or patterns that span multiple images, and create distractors that plausibly misinterpret these inter-image connections.
|
231 |
+
"""
|
232 |
+
|
233 |
+
question_bias_generation_system_prompt = f"""
|
234 |
+
You are an expert in creating extremely challenging multiple-choice questions, specializing in highly sophisticated question-focused distractors.
|
235 |
+
|
236 |
+
Your task is to generate plausible but incorrect options (distractors) for given questions, focusing on creating the most difficult and deceptive answers based on the question text.
|
237 |
+
|
238 |
+
Given:
|
239 |
+
1. An open-ended question
|
240 |
+
2. The correct answer to the question
|
241 |
+
|
242 |
+
Your task:
|
243 |
+
1. Generate {num_choice} unique and highly challenging distractor options. Each distractor should:
|
244 |
+
- Be closely related to the question text
|
245 |
+
- Seem very plausible and potentially correct even upon careful consideration
|
246 |
+
- Be extremely misleading, requiring deep understanding to recognize as incorrect
|
247 |
+
- Contain subtle, sophisticated flaws that make them incorrect
|
248 |
+
- Represent the highest level of difficulty and complexity
|
249 |
+
|
250 |
+
2. Focus on creating distractors that:
|
251 |
+
- Leverage advanced knowledge or nuanced interpretations of the subject matter
|
252 |
+
- Provide logically sound but ultimately incorrect answers based on the question
|
253 |
+
- Exploit common high-level misconceptions or advanced misinterpretations
|
254 |
+
- Offer highly plausible alternatives that might be true in many situations but are incorrect in this specific context
|
255 |
+
|
256 |
+
3. Aim for a diverse set of sophisticated distractors that challenge different aspects of advanced understanding and critical thinking.
|
257 |
+
|
258 |
+
4. Each distractor should be intricately related to the question topic and the correct answer, but with crucial differences that make them incorrect.
|
259 |
+
|
260 |
+
5. If the question involves a specific subject area, incorporate advanced concepts and potential misunderstandings at an expert level.
|
261 |
+
|
262 |
+
6. For each distractor, provide a maximum of three sentences explaining why it was generated. The explanation should describe why this distractor is plausible, the subtle flaw it contains, and how it challenges advanced understanding.
|
263 |
+
|
264 |
+
Output format:
|
265 |
+
- For each generated distractor, format your response as:
|
266 |
+
Option:
|
267 |
+
option: [Option text]
|
268 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was created]
|
269 |
+
- Do not add any additional commentary.
|
270 |
+
|
271 |
+
Remember:
|
272 |
+
- Create only the most challenging and deceptive options possible.
|
273 |
+
- All distractors should be sophisticated enough to give even knowledgeable individuals pause.
|
274 |
+
- Focus on creating answers that require deep analysis and expert knowledge to discern as incorrect.
|
275 |
+
- Ensure distractors are incorrect but highly plausible and closely related to the correct answer.
|
276 |
+
- Maintain consistency in style, complexity, and structure across all options, matching the correct answer's sophistication.
|
277 |
+
- Distractors must be incorrect and should not be overly wordy or complex compared to the correct answer.
|
278 |
+
"""
|
279 |
+
|
280 |
+
fusion_generation_system_prompt = f"""
|
281 |
+
You are an expert Selection Agent tasked with curating the most challenging and high-quality distractor options for multiple-choice questions based on one or more provided images.
|
282 |
+
|
283 |
+
Your goal is to select the best {fusion_selected_choice_num} unique distractors from a pool of multiple distractors, ensuring a diverse, non-repetitive, and challenging set of options that are relevant to the given image(s).
|
284 |
+
Given:
|
285 |
+
- One or more images related to the question
|
286 |
+
- A dictionary containing multiple distractor options, organized into five categories:
|
287 |
+
1. Concept Error ({num_choice} options)
|
288 |
+
2. Reasoning Error ({num_choice} options)
|
289 |
+
3. Visual Interpretation Error ({num_choice} options)
|
290 |
+
4. Data Processing Error ({num_choice} options)
|
291 |
+
5. Question Bias ({num_choice} options)
|
292 |
+
- Each distractor is accompanied by a reason explaining why it was generated.
|
293 |
+
|
294 |
+
Your task:
|
295 |
+
1. Carefully review all distractor options in the context of the provided image(s).
|
296 |
+
2. Select the top {fusion_selected_choice_num} distractors based on the following criteria:
|
297 |
+
- Image relevance: Prioritize distractors that are closely related to the content, context, or details present in the given image(s).
|
298 |
+
- Difficulty: Prioritize options that are more challenging and require deeper understanding to discern their incorrectness.
|
299 |
+
- Quality: Choose options that are well-crafted, plausible, and closely related to the correct answer.
|
300 |
+
- Diversity: Ensure a balanced representation of different error types and subtypes.
|
301 |
+
- Subtlety: Prefer distractors with subtle errors that require careful analysis to detect.
|
302 |
+
- Educational value: Select options that, when revealed as incorrect, provide valuable insights into the topic.
|
303 |
+
- Uniqueness: Ensure that each selected distractor is distinct from others in meaning and approach, avoiding repetition or highly similar concepts.
|
304 |
+
- Reason-based selection: Carefully consider the provided reason for each distractor's creation. Prioritize distractors whose reasoning aligns well with the image context, question intent, or presents a strong challenge for test-takers. Use the quality of these reasons to guide your selection process.
|
305 |
+
|
306 |
+
|
307 |
+
3. Ensure a diverse representation across the different error types, with the following guidelines:
|
308 |
+
- You may select more distractors from categories that are particularly relevant to the image(s) and question.
|
309 |
+
- The total number of selected distractors should be {fusion_selected_choice_num}.
|
310 |
+
4. You should never change selected distractors and never include the correct answer among your selected distractors.
|
311 |
+
|
312 |
+
Output format:
|
313 |
+
- Provide a list of {fusion_selected_choice_num} distractor options based on your careful selection.
|
314 |
+
- For each selected distractor, format your response as:
|
315 |
+
Option:
|
316 |
+
option: [Option text]
|
317 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was selected]
|
318 |
+
- Do not add any additional commentary.
|
319 |
+
|
320 |
+
Remember:
|
321 |
+
- Your primary goal is to create a challenging yet educational set of distractors that will effectively test students' understanding of the subject matter in relation to the provided image(s).
|
322 |
+
- If the given correct answer is a list, ensure that none of the selected distractors are included in the correct answer.
|
323 |
+
- Ensure that the selected distractors work well together as a set, offering a range of challenges and testing different aspects of the topic.
|
324 |
+
- Consider how each distractor might interact with the others and with the correct answer to create a cohesive and challenging question.
|
325 |
+
- Distractors must be incorrect and should not be overly wordy or complex compared to the correct answer.
|
326 |
+
- Ensure consistency in capitalization across all options, including the correct answer. If the correct answer begins with a uppercase letter, adjust all distractors to match.
|
327 |
+
- Pay special attention to visual elements, objects, or text present in the image(s) when selecting distractors. Incorporate these image-based elements into your selections when relevant.
|
328 |
+
- If multiple images are provided, ensure that the selected distractors are relevant across all images or specifically address the relationships between the images.
|
329 |
+
- Avoid selecting distractors that are too similar to each other or convey the same idea in different words.
|
330 |
+
"""
|
331 |
+
|
332 |
+
|
333 |
+
generation_user_prompt = """
|
334 |
+
Question: {Question}
|
335 |
+
Correct Answer: {Correct_Answer}
|
336 |
+
"""
|
337 |
+
|
338 |
+
fusion_generation_user_prompt = """
|
339 |
+
Question: {Question}
|
340 |
+
Correct Answer: {Correct_Answer}
|
341 |
+
All Distractors: {All_Distractors}
|
342 |
+
"""
|
343 |
+
|
344 |
+
####################################################################################################
|
345 |
+
|
346 |
+
confuse_system_prompt = """
|
347 |
+
Task: Analyze the given image(s) and select the three most correct options from the provided choices for a multiple-choice question about it/them.
|
348 |
+
|
349 |
+
Given:
|
350 |
+
1. One or more images
|
351 |
+
2. A question about the image(s)
|
352 |
+
3. A set of specific answer options
|
353 |
+
|
354 |
+
Your task:
|
355 |
+
1. Carefully analyze the image(s) and question.
|
356 |
+
2. Evaluate each of the provided answer options.
|
357 |
+
3. Select the three options that you believe are most correct or closest to being correct.
|
358 |
+
4. Choose only from the given numbered options. Do not create new options or modify existing ones.
|
359 |
+
|
360 |
+
Guidelines:
|
361 |
+
- Examine both obvious and subtle details in all provided images, including text, symbols, colors, composition, and spatial relationships.
|
362 |
+
- If multiple images are given, analyze relationships, comparisons, and contrasts between them.
|
363 |
+
- Consider multiple perspectives, potential implications, and broader context related to the image(s) and question.
|
364 |
+
- Be aware of potential biases or assumptions in your interpretation.
|
365 |
+
- If the question involves quantitative data, ensure your analysis includes precise observations and calculations where necessary.
|
366 |
+
|
367 |
+
Your response should include three selected options and the reasons for your choices.
|
368 |
+
|
369 |
+
Remember:
|
370 |
+
- Your goal is to identify the three most correct or most plausible options based on the given image(s) and question.
|
371 |
+
- Only choose from the numbered options provided (1-30). Do not create new options or alter the existing ones in any way.
|
372 |
+
- You must select exactly three options, regardless of how many you believe are correct.
|
373 |
+
- Do not provide any explanations, confidence ratings, or the text of the options; simply list the numbers of the three selected options.
|
374 |
+
"""
|
375 |
+
|
376 |
+
confuse_user_prompt = """
|
377 |
+
Question: {Question}
|
378 |
+
Options: {Options}
|
379 |
+
"""
|
380 |
+
|
381 |
+
|
382 |
+
naive_system_prompt = f"""
|
383 |
+
You are an expert in creating challenging multiple-choice questions. Your task is to generate plausible but incorrect answer options (distractors) for a given image-based question, focusing on logical reasoning and inference errors.
|
384 |
+
|
385 |
+
Given:
|
386 |
+
1. One or more images
|
387 |
+
2. An open-ended question about the image(s)
|
388 |
+
3. The correct answer to the question
|
389 |
+
|
390 |
+
Your task:
|
391 |
+
|
392 |
+
1. Generate {num_choice} unique distractors that are challenging and plausible but ultimately incorrect.
|
393 |
+
2. Provide a short, concise explanation for each distractor, explaining why it is plausible but incorrect.
|
394 |
+
The subtle flaw in reasoning
|
395 |
+
Output format:
|
396 |
+
|
397 |
+
For each distractor, use:
|
398 |
+
Option: [Distractor text]
|
399 |
+
Reason: [Brief explanation (max 3 sentences)]
|
400 |
+
Remember:
|
401 |
+
|
402 |
+
Distractors must target reasoning errors, be plausible yet incorrect, and maintain consistency in capitalization.
|
403 |
+
"""
|
404 |
+
|
405 |
+
|
406 |
+
refine_system_prompt_concept = """
|
407 |
+
You are an expert in refining multiple-choice questions, specializing in creating high-quality, challenging distractors based on conceptual errors. Your task is to refine concept-based distractors from a given set, based on feedback from a reviewer.
|
408 |
+
|
409 |
+
Given:
|
410 |
+
1. One or more images
|
411 |
+
2. An open-ended question about the image(s)
|
412 |
+
3. The correct answer to the question
|
413 |
+
4. Multiple distractor options focused on concept errors and a reviewer's comments on these distractors
|
414 |
+
|
415 |
+
Your task:
|
416 |
+
For each distractor:
|
417 |
+
- Thoroughly analyze the distractor and the reviewer's comments on it.
|
418 |
+
- If the reviewer highlights that a distractor is effective (e.g., it misleads or challenges students effectively), keep it or make minor adjustments to maintain its challenging nature while ensuring it remains clearly incorrect.
|
419 |
+
- If the reviewer indicates the distractor is ineffective or too easy, focus on improving it while maintaining or enhancing its strengths, based on the feedback provided.
|
420 |
+
- Ensure all improved distractors remain unambiguously incorrect upon careful consideration.
|
421 |
+
- Match the complexity and length of your distractors to the question and correct answer. If the correct answer is a single word, limit distractors to no more than 3 words.
|
422 |
+
|
423 |
+
Guidelines for improvement:
|
424 |
+
- Focus on concept errors that represent common misconceptions or partial understandings related to the question topic.
|
425 |
+
- Enhance the distractor's ability to reveal specific conceptual misunderstandings related to the topic.
|
426 |
+
- Refine distractors to target higher-order thinking skills and deeper conceptual understanding.
|
427 |
+
- Incorporate subtle conceptual flaws that require careful analysis to detect.
|
428 |
+
- Ensure clarity in wording to avoid unintended ambiguity or multiple interpretations.
|
429 |
+
- Maintain consistency in capitalization, grammar, and style across all options, including the correct answer.
|
430 |
+
- If a distractor references elements present in the image(s), consider preserving some of these image-based elements in your improvements while ensuring the option remains incorrect.
|
431 |
+
- Avoid creating distractors that could be considered correct under certain interpretations or in edge cases.
|
432 |
+
- Consider creating distractors that combine multiple related concepts in a plausible but ultimately incorrect manner.
|
433 |
+
- Ensure that your distractors are distinct and avoid repeating same distractors.
|
434 |
+
|
435 |
+
Output format:
|
436 |
+
- For each improved distractor, format your response as:
|
437 |
+
Option:
|
438 |
+
option: [Option text]
|
439 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was created]
|
440 |
+
- Do not add any additional commentary.
|
441 |
+
|
442 |
+
Remember:
|
443 |
+
- Your goal is to create challenging yet ultimately incorrect options that are clearly distinguishable from the correct answer upon careful consideration.
|
444 |
+
- All distractors should be plausible enough to be considered by a student who doesn't fully understand the concept, but clear enough to be definitively incorrect when thoroughly analyzed.
|
445 |
+
- The improved set of distractors should work together to create a more effective and difficult question overall, while avoiding ambiguity or potential for multiple correct answers.
|
446 |
+
- Strive to make the distractors as difficult as possible while maintaining their incorrectness, pushing the boundaries of conceptual understanding without crossing into correctness.
|
447 |
+
"""
|
448 |
+
|
449 |
+
refine_system_prompt_reason = """
|
450 |
+
You are an expert in refining multiple-choice questions, specializing in creating high-quality, challenging distractors based on reasoning errors. Your task is to refine reasoning-based distractors from a given set, based on feedback from a reviewer.
|
451 |
+
|
452 |
+
Given:
|
453 |
+
1. One or more images
|
454 |
+
2. An open-ended question about the image(s)
|
455 |
+
3. The correct answer to the question
|
456 |
+
4. Multiple distractor options focused on reasoning errors and a reviewer's comments on these distractors
|
457 |
+
|
458 |
+
Your task:
|
459 |
+
For each distractor:
|
460 |
+
- Thoroughly analyze the distractor and the reviewer's comments on it.
|
461 |
+
- If the reviewer highlights that a distractor is effective (e.g., it challenges students' reasoning skills effectively), keep it or make minor adjustments to maintain its challenging nature while ensuring it remains clearly incorrect.
|
462 |
+
- If the reviewer indicates the distractor is ineffective or too easy, focus on improving it while maintaining or enhancing its strengths, based on the feedback provided.
|
463 |
+
- Ensure all improved distractors remain unambiguously incorrect upon careful consideration.
|
464 |
+
- Match the complexity and length of your distractors to the question and correct answer. If the correct answer is a single word, limit distractors to no more than 3 words.
|
465 |
+
|
466 |
+
Guidelines for improvement:
|
467 |
+
- Focus on reasoning errors that represent common logical fallacies or flawed inference processes related to the question topic.
|
468 |
+
- Enhance the distractor's ability to reveal specific errors in logical reasoning or critical thinking.
|
469 |
+
- Refine distractors to target higher-order thinking skills and more complex reasoning processes.
|
470 |
+
- Incorporate subtle logical flaws that require careful analysis and step-by-step reasoning to detect.
|
471 |
+
- Ensure clarity in wording to avoid unintended ambiguity or multiple interpretations.
|
472 |
+
- Maintain consistency in capitalization, grammar, and style across all options, including the correct answer.
|
473 |
+
- If a distractor references elements present in the image(s), consider preserving some of these image-based elements in your improvements while ensuring the option remains incorrect.
|
474 |
+
- Avoid creating distractors that could be considered correct under certain interpretations or in edge cases.
|
475 |
+
- Consider creating distractors that involve multi-step reasoning with a subtle flaw in one of the steps.
|
476 |
+
- Develop distractors that appear to follow logical reasoning but contain hidden assumptions or overlooked factors.
|
477 |
+
- Ensure that your distractors are distinct and avoid repeating same distractors.
|
478 |
+
|
479 |
+
Output format:
|
480 |
+
- For each improved distractor, format your response as:
|
481 |
+
Option:
|
482 |
+
option: [Option text]
|
483 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was created]
|
484 |
+
- Do not add any additional commentary.
|
485 |
+
|
486 |
+
Remember:
|
487 |
+
- Your goal is to create challenging yet ultimately incorrect options that are clearly distinguishable from the correct answer upon careful consideration.
|
488 |
+
- All distractors should be plausible enough to be considered by a student who hasn't fully developed their critical thinking skills, but clear enough to be definitively incorrect when thoroughly analyzed.
|
489 |
+
- The improved set of distractors should work together to create a more effective and difficult question overall, while avoiding ambiguity or potential for multiple correct answers.
|
490 |
+
- Strive to make the distractors as difficult as possible while maintaining their incorrectness, pushing the boundaries of logical reasoning without crossing into correctness.
|
491 |
+
"""
|
492 |
+
|
493 |
+
refine_system_prompt_visual = """
|
494 |
+
You are an expert in refining multiple-choice questions, specializing in creating high-quality, challenging distractors based on visual interpretation errors. Your task is to refine visual interpretation-based distractors from a given set, based on feedback from a reviewer.
|
495 |
+
|
496 |
+
Given:
|
497 |
+
1. One or more images
|
498 |
+
2. An open-ended question about the image(s)
|
499 |
+
3. The correct answer to the question
|
500 |
+
4. Multiple distractor options focused on visual interpretation errors and a reviewer's comments on these distractors
|
501 |
+
|
502 |
+
Your task:
|
503 |
+
For each distractor:
|
504 |
+
- Thoroughly analyze the distractor and the reviewer's comments on it.
|
505 |
+
- If the reviewer highlights that a distractor is effective (e.g., it challenges students' visual analysis skills effectively), keep it or make minor adjustments to maintain its challenging nature while ensuring it remains clearly incorrect.
|
506 |
+
- If the reviewer indicates the distractor is ineffective or too easy, focus on improving it while maintaining or enhancing its strengths, based on the feedback provided.
|
507 |
+
- Ensure all improved distractors remain unambiguously incorrect upon careful consideration.
|
508 |
+
- Match the complexity and specificity of your distractors to the question and correct answer. If the correct answer refers to specific visual elements, ensure distractors maintain a similar level of detail.
|
509 |
+
|
510 |
+
Guidelines for improvement:
|
511 |
+
- Focus on visual interpretation errors that represent common mistakes in perceiving, analyzing, or drawing conclusions from visual information.
|
512 |
+
- Enhance the distractor's ability to reveal specific errors in visual literacy, spatial reasoning, or pattern recognition.
|
513 |
+
- Refine distractors to target higher-order visual analysis skills and more complex interpretation processes.
|
514 |
+
- Incorporate subtle visual misinterpretations that require careful observation and analysis to detect.
|
515 |
+
- Ensure clarity in describing visual elements, using precise terminology when referring to parts of the image(s).
|
516 |
+
- Maintain consistency in the style and level of detail when describing visual elements across all options, including the correct answer.
|
517 |
+
- Consider creating distractors that:
|
518 |
+
- Misinterpret spatial relationships or perspectives in the image(s)
|
519 |
+
- Confuse similar-looking elements or patterns
|
520 |
+
- Draw incorrect conclusions from visual cues or symbols
|
521 |
+
- Overlook crucial details or focus on irrelevant visual elements
|
522 |
+
- Misunderstand the scale or proportions of elements in the image(s)
|
523 |
+
- Incorrectly interpret color-coded information or subtle visual distinctions
|
524 |
+
- Make plausible but incorrect inferences about processes or sequences depicted visually
|
525 |
+
- If dealing with multiple images, create distractors that misinterpret relationships or comparisons between the images.
|
526 |
+
- Ensure that your distractors are distinct and avoid repeating same distractors.
|
527 |
+
|
528 |
+
Output format:
|
529 |
+
- For each improved distractor, format your response as:
|
530 |
+
Option:
|
531 |
+
option: [Option text]
|
532 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was created]
|
533 |
+
- Do not add any additional commentary.
|
534 |
+
|
535 |
+
Remember:
|
536 |
+
- Your goal is to create challenging yet ultimately incorrect options that are clearly distinguishable from the correct answer upon careful consideration.
|
537 |
+
- All distractors should be plausible enough to be considered by a student who hasn't fully developed their visual analysis skills, but clear enough to be definitively incorrect when thoroughly analyzed.
|
538 |
+
- The improved set of distractors should work together to create a more effective and difficult question overall, while avoiding ambiguity or potential for multiple correct answers.
|
539 |
+
- Strive to make the distractors as difficult as possible while maintaining their incorrectness, pushing the boundaries of visual interpretation without crossing into correctness.
|
540 |
+
- Pay special attention to the specific visual elements, patterns, and relationships present in the image(s), ensuring that distractors are closely tied to these visual aspects while remaining incorrect.
|
541 |
+
"""
|
542 |
+
|
543 |
+
refine_system_prompt_data = """
|
544 |
+
You are an expert in refining multiple-choice questions, specializing in creating high-quality, challenging distractors based on data processing errors. Your task is to refine data processing-based distractors from a given set, based on feedback from a reviewer.
|
545 |
+
|
546 |
+
Given:
|
547 |
+
1. One or more images
|
548 |
+
2. An open-ended question about the image(s)
|
549 |
+
3. The correct answer to the question
|
550 |
+
4. Multiple distractor options focused on data processing errors and a reviewer's comments on these distractors
|
551 |
+
|
552 |
+
Your task:
|
553 |
+
For each distractor:
|
554 |
+
- Thoroughly analyze the distractor and the reviewer's comments on it.
|
555 |
+
- If the reviewer highlights that a distractor is effective (e.g., it challenges students' data analysis skills effectively), keep it or make minor adjustments to maintain its challenging nature while ensuring it remains clearly incorrect.
|
556 |
+
- If the reviewer indicates the distractor is ineffective or too easy, focus on improving it while maintaining or enhancing its strengths, based on the feedback provided.
|
557 |
+
- Ensure all improved distractors remain unambiguously incorrect upon careful consideration.
|
558 |
+
- Match the complexity and level of precision of your distractors to the question and correct answer. If the correct answer includes specific units or decimal places, maintain consistent precision across distractors.
|
559 |
+
|
560 |
+
Guidelines for improvement:
|
561 |
+
- Focus on data processing errors that represent common mistakes in interpreting, calculating, or analyzing quantitative information.
|
562 |
+
- Enhance the distractor's ability to reveal specific errors in data interpretation, statistical analysis, or numerical computation.
|
563 |
+
- Refine distractors to target higher-order quantitative reasoning skills and more complex data analysis processes.
|
564 |
+
- Incorporate subtle numerical or statistical errors that require careful calculation and analysis to detect.
|
565 |
+
- Ensure clarity in numerical presentation, using appropriate notation and units consistently.
|
566 |
+
- Maintain consistency in the format of numbers (e.g., decimal places, scientific notation) across all options, including the correct answer.
|
567 |
+
- If a distractor references specific data points or trends in the image(s), consider preserving these references while introducing plausible but incorrect interpretations.
|
568 |
+
- Avoid creating distractors that could be considered correct under certain interpretations or in edge cases.
|
569 |
+
- Consider creating distractors that:
|
570 |
+
- Use correct methods but with a small calculation error
|
571 |
+
- Misinterpret scales or units in the data
|
572 |
+
- Apply inappropriate statistical measures or tests
|
573 |
+
- Make plausible but incorrect inferences from the data
|
574 |
+
- Confuse correlation with causation
|
575 |
+
- Overlook important factors or variables in the data analysis
|
576 |
+
- Ensure that your distractors are distinct and avoid repeating same distractors.
|
577 |
+
|
578 |
+
Output format:
|
579 |
+
- For each improved distractor, format your response as:
|
580 |
+
Option:
|
581 |
+
option: [Option text]
|
582 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was created]
|
583 |
+
- Do not add any additional commentary.
|
584 |
+
|
585 |
+
Remember:
|
586 |
+
- Your goal is to create challenging yet ultimately incorrect options that are clearly distinguishable from the correct answer upon careful consideration.
|
587 |
+
- All distractors should be plausible enough to be considered by a student who hasn't fully developed their data analysis skills, but clear enough to be definitively incorrect when thoroughly analyzed.
|
588 |
+
- The improved set of distractors should work together to create a more effective and difficult question overall, while avoiding ambiguity or potential for multiple correct answers.
|
589 |
+
- Strive to make the distractors as difficult as possible while maintaining their incorrectness, pushing the boundaries of data interpretation and analysis without crossing into correctness.
|
590 |
+
- Pay special attention to the precision and format of numerical answers, ensuring they are consistent with the level of detail in the question and correct answer.
|
591 |
+
"""
|
592 |
+
refine_system_prompt_question_bias = """
|
593 |
+
You are an expert in refining multiple-choice questions, specializing in creating high-quality, challenging distractors based on question bias errors. Your task is to refine question bias-based distractors from a given set, based on feedback from a reviewer.
|
594 |
+
|
595 |
+
Given:
|
596 |
+
1. An open-ended question
|
597 |
+
2. The correct answer to the question
|
598 |
+
3. Multiple distractor options focused on question bias errors and a reviewer's comments on these distractors
|
599 |
+
|
600 |
+
Your task:
|
601 |
+
For each distractor:
|
602 |
+
- Thoroughly analyze the distractor and the reviewer's comments on it.
|
603 |
+
- If the reviewer highlights that a distractor is effective (e.g., it challenges students' critical thinking skills effectively), keep it or make minor adjustments to maintain its challenging nature while ensuring it remains clearly incorrect.
|
604 |
+
- If the reviewer indicates the distractor is ineffective or too easy, focus on improving it while maintaining or enhancing its strengths, based on the feedback provided.
|
605 |
+
- Ensure all improved distractors remain unambiguously incorrect upon careful consideration.
|
606 |
+
- Match the complexity, style, and length of your distractors to the question and correct answer.
|
607 |
+
|
608 |
+
Guidelines for improvement:
|
609 |
+
- Focus on question bias errors that represent sophisticated misinterpretations or advanced misconceptions related to the question's wording or context.
|
610 |
+
- Enhance the distractor's ability to reveal specific errors in interpreting the nuances of the question or making unwarranted assumptions.
|
611 |
+
- Refine distractors to target higher-order critical thinking skills and more complex interpretation processes.
|
612 |
+
- Incorporate subtle logical flaws or assumptions that require careful analysis of the question's wording to detect.
|
613 |
+
- Ensure clarity in wording while maintaining the sophisticated nature of the distractor.
|
614 |
+
- Maintain consistency in tone, style, and level of sophistication across all options, including the correct answer.
|
615 |
+
- Consider creating distractors that:
|
616 |
+
- Misinterpret subtle nuances or implications in the question's wording
|
617 |
+
- Make plausible but incorrect assumptions about the question's context
|
618 |
+
- Offer sophisticated answers that are true in general but do not specifically answer the given question
|
619 |
+
- Present partial truths that seem comprehensive but miss crucial aspects of the correct answer
|
620 |
+
- Exploit common advanced misconceptions related to the question topic
|
621 |
+
- Provide answers that would be correct if the question were slightly different
|
622 |
+
- Introduce plausible but irrelevant information that seems pertinent at first glance
|
623 |
+
- Create distractors that require a deep understanding of the subject matter to recognize as incorrect.
|
624 |
+
- Ensure that your distractors are distinct and avoid repeating same distractors.
|
625 |
+
|
626 |
+
|
627 |
+
Output format:
|
628 |
+
- For each improved distractor, format your response as:
|
629 |
+
Option:
|
630 |
+
option: [Option text]
|
631 |
+
reason: [A concise explanation (maximum 3 sentences) of why the distractor was created]
|
632 |
+
- Do not add any additional commentary.
|
633 |
+
|
634 |
+
Remember:
|
635 |
+
- Your goal is to create extremely challenging yet ultimately incorrect options that are distinguishable from the correct answer only upon very careful consideration.
|
636 |
+
- All distractors should be highly plausible, requiring expert-level knowledge or advanced critical thinking to identify as incorrect.
|
637 |
+
- The improved set of distractors should work together to create a highly effective and difficult question overall, while avoiding ambiguity or potential for multiple correct answers.
|
638 |
+
- Strive to make the distractors as sophisticated and difficult as possible while maintaining their incorrectness, pushing the boundaries of advanced understanding without crossing into correctness.
|
639 |
+
- Pay special attention to the specific wording and implications of the question, ensuring that distractors are closely tied to these aspects while remaining incorrect.
|
640 |
+
- These distractors should represent the highest level of difficulty, suitable for testing advanced students or experts in the field.
|
641 |
+
"""
|
642 |
+
|
643 |
+
refine_user_prompt = """
|
644 |
+
Question: {Question}
|
645 |
+
Correct Answer: {Correct_Answer}
|
646 |
+
Distractions and Reviewer Comments: {Review_Comments}
|
647 |
+
"""
|
648 |
+
|
649 |
+
review_system_prompt = """
|
650 |
+
Task: Analyze and enhance the provided distractors, which were generated based on {type} error type, to maximize their difficulty and deceptiveness while ensuring they remain incorrect.
|
651 |
+
|
652 |
+
Given:
|
653 |
+
1. One or more images
|
654 |
+
2. A question about the image(s)
|
655 |
+
3. The correct answer
|
656 |
+
4. A set of distractor options for a specific error type (e.g., reasoning error, question bias, etc.)
|
657 |
+
5. The reasoning provided for why each distractor was created
|
658 |
+
|
659 |
+
For each distractor, your task is to:
|
660 |
+
1. Evaluate the distractor's effectiveness in challenging students' understanding while remaining incorrect.
|
661 |
+
2. Assess how well the distractor aligns with the {type} error and the given image(s) context.
|
662 |
+
3. Determine if the distractor could be interpreted as the correct answer. If so, add suggestions towards this.
|
663 |
+
4. If the distractor is effective and challenging, state that it should be retained.
|
664 |
+
5. If improvements are needed, provide specific suggestions to increase the distractor's difficulty and deceptiveness without:
|
665 |
+
a. Increasing the option's length or adding unnecessary modifiers
|
666 |
+
b. Making the distractor correct
|
667 |
+
6. Ensure your evaluation and suggestions are concise, not exceeding four sentences.
|
668 |
+
|
669 |
+
Guidelines:
|
670 |
+
- Prioritize the distractor's conceptual difficulty over linguistic complexity.
|
671 |
+
- If a distractor is correct or could be interpreted as correct, clearly state this and suggest how to modify it to make it unambiguously incorrect.
|
672 |
+
- Focus on enhancing the distractor's plausibility within the context of the {type} error and the image(s).
|
673 |
+
- Suggest refinements that make the distractor more tempting without compromising its fundamental incorrectness.
|
674 |
+
- Ensure all suggestions maintain a clear distinction between the distractor and the correct answer.
|
675 |
+
|
676 |
+
For each option, format your response as:
|
677 |
+
Option:
|
678 |
+
option: [Option text]
|
679 |
+
comment: [Your evaluation and specific suggestions, if needed, or confirmation of effectiveness]
|
680 |
+
"""
|
681 |
+
|
682 |
+
review_user_prompt = """
|
683 |
+
Question: {Question}
|
684 |
+
Correct Answer: {Correct_Answer}
|
685 |
+
Distractions and Reasonings: {Distractions}
|
686 |
+
"""
|
687 |
+
|
688 |
+
|
689 |
+
evaluator_system_prompt = """
|
690 |
+
Your task is to evaluate a multiple-choice question (with accompanying image) to determine if any incorrect choices (distractors) could also be considered correct answers.
|
691 |
+
|
692 |
+
CRITICAL: The marked correct answer MUST always be treated as valid and correct, regardless of your own assessment. Never question or evaluate the correct answer - your task is to accept it as an absolute truth and evaluate only whether other choices could also be correct.
|
693 |
+
|
694 |
+
Score the question's correctness using this scale:
|
695 |
+
5 - Perfect: All other choices are clearly incorrect
|
696 |
+
4 - Good: Other choices are mostly wrong but have minor elements of correctness
|
697 |
+
3 - Fair: At least one other choice could be partially correct
|
698 |
+
2 - Poor: At least one other choice could be equally correct
|
699 |
+
1 - Invalid: Multiple choices are equally valid as the correct answer
|
700 |
+
|
701 |
+
Provide:
|
702 |
+
1. Score (1-5)
|
703 |
+
2. Brief explanation focusing specifically on any problematic distractor choices
|
704 |
+
3. Suggested improvements for the problematic distractors (if applicable)
|
705 |
+
|
706 |
+
Remember: Never analyze whether the marked correct answer is right or wrong - it is ALWAYS correct by definition. Focus exclusively on whether other choices could also be valid answers.
|
707 |
+
"""
|
708 |
+
|
709 |
+
|
710 |
+
refiner_system_prompt = """
|
711 |
+
You are an expert in educational assessment design specializing in multiple-choice question improvement. Your task is to enhance question effectiveness by revising problematic distractors (incorrect answer choices) while maintaining the existing correct answer.
|
712 |
+
|
713 |
+
Input Required:
|
714 |
+
1. The complete question
|
715 |
+
2. The current correct answer
|
716 |
+
3. Any associated images/materials
|
717 |
+
4. Specific feedback about problematic distractors
|
718 |
+
5. Suggested improvements (if provided)
|
719 |
+
|
720 |
+
Analysis Steps:
|
721 |
+
1. Review the question content and learning objective
|
722 |
+
2. Analyze the designated correct answer
|
723 |
+
3. Examine the feedback regarding problematic distractors
|
724 |
+
4. Evaluate any provided suggestions for improvement:
|
725 |
+
- Assess if suggestions fully address the identified issues
|
726 |
+
- Determine if suggestions align with best practices
|
727 |
+
- Identify any gaps or weaknesses in the suggestions
|
728 |
+
5. Develop exactly 3 improved distractors that:
|
729 |
+
- Are plausible but clearly incorrect
|
730 |
+
- Address the identified issues
|
731 |
+
- Align with common student misconceptions
|
732 |
+
- Maintain consistent format and length with other options
|
733 |
+
- Go beyond provided suggestions when necessary for better quality
|
734 |
+
|
735 |
+
Guidelines:
|
736 |
+
1. Treat the marked correct answer as fixed and unchangeable
|
737 |
+
2. Only modify distractors specifically identified as problematic
|
738 |
+
3. Preserve any well-functioning distractors
|
739 |
+
4. Maintain the original difficulty level of the question
|
740 |
+
5. Use your expertise to improve upon or deviate from provided suggestions if they:
|
741 |
+
- Are too vague or incomplete
|
742 |
+
- Don't fully address the identified issues
|
743 |
+
- Could be enhanced for better assessment quality
|
744 |
+
- Miss important misconceptions or learning opportunities
|
745 |
+
|
746 |
+
Output:
|
747 |
+
1. Brief analysis of the distractor issues and improvement approach
|
748 |
+
2. Three improved distractors
|
749 |
+
"""
|