Spaces:
Sleeping
Sleeping
added 2 passes
Browse files- app.py +117 -25
- examples.py +52 -0
- pass1.py +99 -0
- pass2.py +71 -0
- utils.py +0 -106
app.py
CHANGED
@@ -1,36 +1,63 @@
|
|
1 |
import streamlit as st
|
2 |
-
from PIL import Image
|
3 |
-
from utils import get_gpt4V_response, get_str_to_json
|
4 |
-
|
5 |
-
|
6 |
st.set_page_config(page_title="GPT-4V Demo", page_icon="π§ ", layout="wide")
|
7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
with st.sidebar:
|
|
|
|
|
|
|
9 |
st.title("Parameters")
|
10 |
st.write("This is a demo of GPT-4V model. It takes a story, goal, entity and an image as input and generates a response.")
|
11 |
|
12 |
st.subheader("Sampling Temperature")
|
13 |
-
temperature = st.slider(label="", min_value=0.1, max_value=1.0, value=0.5, step=0.1)
|
14 |
st.write("The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.")
|
15 |
|
16 |
st.subheader("Entity?")
|
17 |
-
entity_opt = st.radio(label="With or Without", options=[1, 0], format_func=lambda x: ["Without", "With"][x])
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
def main():
|
21 |
global temperature, entity
|
22 |
st.title('What can go wrong?')
|
23 |
|
|
|
|
|
24 |
col1, col2 = st.columns(2)
|
25 |
|
26 |
with col1:
|
27 |
-
story = st.text_area("Story", placeholder="Enter the story here")
|
28 |
|
29 |
entity = None
|
30 |
if entity_opt:
|
31 |
-
entity = st.text_input("Entity", placeholder="Enter the entity here")
|
32 |
|
33 |
-
goal = st.text_area("Goal", placeholder="Enter the goal here")
|
34 |
|
35 |
images = st.file_uploader("Upload Image", type=['jpg', 'png'], accept_multiple_files=True)
|
36 |
|
@@ -41,35 +68,100 @@ def main():
|
|
41 |
with cols[i]:
|
42 |
image = Image.open(image)
|
43 |
st.image(image, caption="Uploaded Image", use_column_width=True)
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
st.error("Please fill all the fields")
|
49 |
return
|
50 |
with col2:
|
51 |
with st.status("Generating response...", expanded=True):
|
52 |
-
response =
|
53 |
|
|
|
54 |
try:
|
55 |
response_json = get_str_to_json(response)
|
56 |
if "condition" not in response_json or "alternate_condition" not in response_json:
|
57 |
raise ValueError("Invalid JSON - 1")
|
58 |
if not entity_opt and "entity" not in response_json:
|
59 |
raise ValueError("Invalid JSON - 2")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
with st.expander("Condition", expanded=True):
|
66 |
-
st.write(response_json["condition"])
|
67 |
-
|
68 |
-
with st.expander("Alternate Condition", expanded=True):
|
69 |
-
st.write(response_json["alternate_condition"])
|
70 |
|
71 |
except Exception as e:
|
72 |
-
print(e)
|
73 |
st.warning(f"Failed to parse JSON. Going for full output")
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
main()
|
|
|
1 |
import streamlit as st
|
|
|
|
|
|
|
|
|
2 |
st.set_page_config(page_title="GPT-4V Demo", page_icon="π§ ", layout="wide")
|
3 |
|
4 |
+
from PIL import Image
|
5 |
+
import base64
|
6 |
+
from io import BytesIO
|
7 |
+
from utils import get_str_to_json
|
8 |
+
from pass1 import get_gpt4V_response_1
|
9 |
+
from pass2 import get_gpt4V_response_2
|
10 |
+
from examples import example_1, example_2
|
11 |
+
|
12 |
+
def clear_data():
|
13 |
+
st.session_state["story"] = ""
|
14 |
+
st.session_state["goal"] = ""
|
15 |
+
st.session_state["entity"] = ""
|
16 |
+
st.session_state["images"] = []
|
17 |
+
for key in st.session_state.keys():
|
18 |
+
st.session_state.pop(key)
|
19 |
+
# st.rerun()
|
20 |
+
|
21 |
+
print(st.session_state)
|
22 |
+
|
23 |
with st.sidebar:
|
24 |
+
if st.button("Clear Inputs"):
|
25 |
+
clear_data()
|
26 |
+
|
27 |
st.title("Parameters")
|
28 |
st.write("This is a demo of GPT-4V model. It takes a story, goal, entity and an image as input and generates a response.")
|
29 |
|
30 |
st.subheader("Sampling Temperature")
|
31 |
+
temperature = st.slider(label="x", min_value=0.1, max_value=1.0, value=0.5, step=0.1, label_visibility='hidden')
|
32 |
st.write("The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.")
|
33 |
|
34 |
st.subheader("Entity?")
|
35 |
+
entity_opt = st.radio(label="With or Without", options=[1, 0], format_func=lambda x: ["Without", "With"][x], on_change=clear_data)
|
36 |
|
37 |
+
st.subheader("Examples")
|
38 |
+
cols = st.columns(2)
|
39 |
+
for i, example in enumerate([example_1, example_2]):
|
40 |
+
with cols[i % len(cols)]:
|
41 |
+
if st.button(f"Example {i+1}", key=f"example{i+1}"):
|
42 |
+
clear_data()
|
43 |
+
st.session_state["data"] = example
|
44 |
|
45 |
def main():
|
46 |
global temperature, entity
|
47 |
st.title('What can go wrong?')
|
48 |
|
49 |
+
data = st.session_state.get("data", None)
|
50 |
+
|
51 |
col1, col2 = st.columns(2)
|
52 |
|
53 |
with col1:
|
54 |
+
story = st.text_area("Story", placeholder="Enter the story here", value=(data.story if data else ""), key="story")
|
55 |
|
56 |
entity = None
|
57 |
if entity_opt:
|
58 |
+
entity = st.text_input("Entity", placeholder="Enter the entity here", value=(data.entity if data else ""), key="entity")
|
59 |
|
60 |
+
goal = st.text_area("Goal", placeholder="Enter the goal here", value=(data.goal if data else ""), key="goal")
|
61 |
|
62 |
images = st.file_uploader("Upload Image", type=['jpg', 'png'], accept_multiple_files=True)
|
63 |
|
|
|
68 |
with cols[i]:
|
69 |
image = Image.open(image)
|
70 |
st.image(image, caption="Uploaded Image", use_column_width=True)
|
71 |
+
elif not images and data:
|
72 |
+
cols = st.columns(len(data.images))
|
73 |
+
for i, imb64 in enumerate(data.images_base64):
|
74 |
+
with cols[i]:
|
75 |
+
image = Image.open(BytesIO(base64.b64decode(imb64)))
|
76 |
+
st.image(image, caption="Example Image", use_column_width=True)
|
77 |
+
|
78 |
+
if st.button("Pass 1"):
|
79 |
+
st.session_state["button_2"] = False
|
80 |
+
image_to_send = None
|
81 |
+
if images:
|
82 |
+
image_to_send = images
|
83 |
+
elif data:
|
84 |
+
image_to_send = data.images_base64
|
85 |
+
|
86 |
+
if not story or not goal or (entity_opt and not entity) or not image_to_send:
|
87 |
st.error("Please fill all the fields")
|
88 |
return
|
89 |
with col2:
|
90 |
with st.status("Generating response...", expanded=True):
|
91 |
+
response = get_gpt4V_response_1(story, goal, entity, image_to_send, temperature=temperature)
|
92 |
|
93 |
+
response_json = {}
|
94 |
try:
|
95 |
response_json = get_str_to_json(response)
|
96 |
if "condition" not in response_json or "alternate_condition" not in response_json:
|
97 |
raise ValueError("Invalid JSON - 1")
|
98 |
if not entity_opt and "entity" not in response_json:
|
99 |
raise ValueError("Invalid JSON - 2")
|
100 |
+
|
101 |
+
except Exception as e:
|
102 |
+
print("Exception 1", e)
|
103 |
+
response_json = {
|
104 |
+
"entity": "",
|
105 |
+
"condition": "",
|
106 |
+
"alternate_condition": "",
|
107 |
+
"response": response
|
108 |
+
}
|
109 |
+
|
110 |
+
finally:
|
111 |
+
out1 = {
|
112 |
+
"entity": response_json.get("entity", None),
|
113 |
+
"condition": response_json.get("condition", None),
|
114 |
+
"alternate_condition": response_json.get("alternate_condition", None),
|
115 |
+
"response": response_json.get("response", "")
|
116 |
+
}
|
117 |
+
st.session_state["output_1"] = out1
|
118 |
+
st.session_state["button_1"] = True
|
119 |
+
|
120 |
+
with col2:
|
121 |
+
if st.session_state.get("button_1", False): # If pass 1 is done
|
122 |
+
output_1 = st.session_state.get("output_1", {})
|
123 |
+
|
124 |
+
if "response" in output_1 and output_1["response"]:
|
125 |
+
st.warning(f"Failed to parse JSON. Going for full output")
|
126 |
+
st.write(output_1["response"])
|
127 |
+
|
128 |
+
entity = output_1.get("entity", "")
|
129 |
+
condition = output_1.get("condition", "")
|
130 |
+
alternate_condition = output_1.get("alternate_condition", "")
|
131 |
+
if not entity_opt:
|
132 |
+
st.text_input("Entity", value=entity)
|
133 |
+
st.text_area("Condition", value=condition)
|
134 |
+
st.text_area("Alternate Condition", value=alternate_condition)
|
135 |
+
|
136 |
+
if st.button("Pass 2"):
|
137 |
+
st.session_state["button_2"] = True
|
138 |
+
|
139 |
+
with st.status("Generating response...", expanded=True):
|
140 |
+
response = get_gpt4V_response_2(story, goal, alternate_condition, images, temperature=temperature)
|
141 |
+
|
142 |
+
try:
|
143 |
|
144 |
+
response_json = get_str_to_json(response)
|
145 |
+
|
146 |
+
if "event" not in response_json:
|
147 |
+
raise ValueError("Invalid JSON - 3")
|
|
|
|
|
|
|
|
|
|
|
148 |
|
149 |
except Exception as e:
|
150 |
+
print("Exception 2", e)
|
151 |
st.warning(f"Failed to parse JSON. Going for full output")
|
152 |
+
response_json = {
|
153 |
+
"event": response
|
154 |
+
}
|
155 |
+
|
156 |
+
finally:
|
157 |
+
out2 = {
|
158 |
+
"event": response_json.get("event", response)
|
159 |
+
}
|
160 |
+
st.session_state["output_2"] = out2
|
161 |
+
|
162 |
+
if st.session_state.get("button_2", False): # If pass 2 is done
|
163 |
+
output_2 = st.session_state.get("output_2", {})
|
164 |
+
st.subheader("Event Leads to Alternate Condition")
|
165 |
+
st.write(output_2.get("event", ""))
|
166 |
+
|
167 |
main()
|
examples.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import requests
|
3 |
+
import base64
|
4 |
+
|
5 |
+
|
6 |
+
example_1 = {
|
7 |
+
"story": "The parade is starting and people line the streets. The folks who live upstairs in the buildings get a good view. The really cool floats are coming next. It's raining harder but no one seems to mind. The people came prepared with umbrellas for this special occasion.The more the parade goes on, the better the displays seem to be. The zoo animals are very well done and everybody is standing in the rain to see them. The man's dog is a bit confused by these animals. Everyone is having a good time watching the parade and chatting with their neighbors.",
|
8 |
+
"entity": "Rhinoceros - on a float in the street",
|
9 |
+
"goal": "Watch a parade from the high vantage point.",
|
10 |
+
"images": [
|
11 |
+
"https://drive.google.com/uc?export=view&id=1LhLpWAkuIIVDf5jl1E7_DppoHQwl-7P9",
|
12 |
+
"https://drive.google.com/uc?export=view&id=11r_tEEa7QcEkalT_wcZOtOxnaztqtETh",
|
13 |
+
"https://drive.google.com/uc?export=view&id=1NsZpx_otGbh9Jz00A9eyF3xoUfTnvNxH"
|
14 |
+
]
|
15 |
+
}
|
16 |
+
|
17 |
+
example_2 = {
|
18 |
+
"story": "There is a place in the world where the most people go to in their holidays they visit the biggest desert where people gets into helicopter and sees the enormous land from above and the warm air they you feel and there are other people who just like to travel by travel by walking with your family and friends.There is a ancient history with that log old cowboys of the western side used to camp there and have fum stories with their family and friends but one day a disaster happened which caused the log get red and rotten and from all over there world tourist goes to that wonderful place.",
|
19 |
+
"entity": "Sand",
|
20 |
+
"goal": "See a beautiful desert landscape.",
|
21 |
+
"images": [
|
22 |
+
"https://drive.google.com/uc?export=view&id=1DyIOAuMsSy1flFmvB255cT5F3ScI2qfd",
|
23 |
+
"https://drive.google.com/uc?export=view&id=1rn7_WOGfwMoUurHog8DQV5GEIA0kugGB",
|
24 |
+
"https://drive.google.com/uc?export=view&id=112TxpwMDA34EO0Rrleh7XnBG56YN0vHt"
|
25 |
+
]
|
26 |
+
}
|
27 |
+
|
28 |
+
class Example:
|
29 |
+
def __init__(self, story, entity, goal, images):
|
30 |
+
self.story = story
|
31 |
+
self.entity = entity
|
32 |
+
self.goal = goal
|
33 |
+
self.images = images
|
34 |
+
self.images_base64 = self.get_images()
|
35 |
+
|
36 |
+
@st.cache_data
|
37 |
+
def download_image(_self, image_url):
|
38 |
+
return requests.get(image_url).content
|
39 |
+
|
40 |
+
@st.cache_data
|
41 |
+
def get_base64(_self, img_data):
|
42 |
+
return base64.b64encode(img_data)
|
43 |
+
|
44 |
+
@st.cache_data
|
45 |
+
def get_images(self):
|
46 |
+
return [self.get_base64(self.download_image(image_url)) for image_url in self.images]
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
example_1 = Example(example_1["story"], example_1["entity"], example_1["goal"], example_1["images"])
|
51 |
+
|
52 |
+
example_2 = Example(example_2["story"], example_2["entity"], example_2["goal"], example_2["images"])
|
pass1.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from utils import im_2_b64, RANDOM_SEED, client
|
3 |
+
|
4 |
+
|
5 |
+
with_prompt = """
|
6 |
+
In the following task, you will be presented with some images and a story that is, in some manner, related to that goal. You will be given a specific goal and entity (generally, person or object), and be asked to identify condition necessary for that goal and the alternate condition that could prevent that goal.
|
7 |
+
|
8 |
+
Conditions for the output:
|
9 |
+
1. Condition: The condition is the necessary condition for the goal to be achieved. If the condition is not met, the goal cannot be achieved. The condition must be related to the entity.
|
10 |
+
|
11 |
+
2. Alternate Condition: The alternate condition is a different version of the condition that would prevent the goal from being achieved. It is likely that this alternate condition will contradict information provided in the images and/or story.
|
12 |
+
|
13 |
+
Output in a python dictionary where it should have the following keys: 'condition', 'alternate_condition'.
|
14 |
+
|
15 |
+
Story: {story}
|
16 |
+
|
17 |
+
Entity: {entity}
|
18 |
+
|
19 |
+
Goal: {goal}
|
20 |
+
"""
|
21 |
+
|
22 |
+
wo_prompt = """
|
23 |
+
In the following task, you will be presented with a image and a story that is, in some manner, related to that goal. You will be given a specific goal, and be asked to identify an entity (person or object), condition necessary for that goal and the alternate condition that could prevent that goal.
|
24 |
+
|
25 |
+
Conditions for the output:
|
26 |
+
1. Entity: The entity is the person or object that the goal is related to. The entity should be a crucial part for achieving the goal.
|
27 |
+
|
28 |
+
2. Condition: The condition is the necessary condition for the goal to be achieved. If the condition is not met, the goal cannot be achieved. The condition must be related to the entity.
|
29 |
+
|
30 |
+
3. Alternate Condition: The alternate condition is a different version of the condition that would prevent the goal from being achieved. It is likely that this alternate condition will contradict information provided in the images and/or story.
|
31 |
+
|
32 |
+
Output in a python dictionary where it should have the following keys: 'entity', 'condition', 'alternate_condition'.
|
33 |
+
|
34 |
+
Story: {story}
|
35 |
+
|
36 |
+
Goal: {goal}
|
37 |
+
"""
|
38 |
+
|
39 |
+
def get_gpt4V_response_1(story, goal, entity, images, temperature=0.5):
|
40 |
+
# Convert image to base64
|
41 |
+
image_urls = []
|
42 |
+
for image in images:
|
43 |
+
if type(images[0]) == bytes:
|
44 |
+
image_b64 = image
|
45 |
+
image_url = f"data:image/jpeg;base64,{image_b64.decode('utf-8')}"
|
46 |
+
else:
|
47 |
+
image_b64 = im_2_b64(image)
|
48 |
+
image_url = f"data:image/jpeg;base64,{image_b64.decode('utf-8')}"
|
49 |
+
image_urls.append(image_url)
|
50 |
+
st.write("β
Image converted")
|
51 |
+
|
52 |
+
if entity:
|
53 |
+
prompt = with_prompt
|
54 |
+
now_prompt = prompt.format(story=story, goal=goal, entity=entity)
|
55 |
+
else:
|
56 |
+
prompt = wo_prompt
|
57 |
+
now_prompt = prompt.format(story=story, goal=goal)
|
58 |
+
|
59 |
+
content = [
|
60 |
+
{"type": "text", "text": now_prompt},
|
61 |
+
]
|
62 |
+
|
63 |
+
st.write("β
Prompt created")
|
64 |
+
|
65 |
+
for image_url in image_urls:
|
66 |
+
content.append({
|
67 |
+
"type": "image_url",
|
68 |
+
"image_url": {
|
69 |
+
"url": image_url,
|
70 |
+
},
|
71 |
+
})
|
72 |
+
|
73 |
+
st.write("π Getting Response from GPT4V")
|
74 |
+
response = client.chat.completions.create(
|
75 |
+
model="gpt-4-vision-preview",
|
76 |
+
seed=RANDOM_SEED,
|
77 |
+
messages=[
|
78 |
+
{
|
79 |
+
"role": "user",
|
80 |
+
"content": content
|
81 |
+
}
|
82 |
+
],
|
83 |
+
temperature=temperature,
|
84 |
+
max_tokens=1024,
|
85 |
+
# top_p=1,
|
86 |
+
# frequency_penalty=0,
|
87 |
+
# presence_penalty=0,
|
88 |
+
)
|
89 |
+
# print(response)
|
90 |
+
# print("Prompt:")
|
91 |
+
# print(now_prompt)
|
92 |
+
out = response.choices[0].message.content
|
93 |
+
# print("OUTPUT:", out)
|
94 |
+
# print("====================================")
|
95 |
+
# print()
|
96 |
+
|
97 |
+
st.write("β
Response generated")
|
98 |
+
|
99 |
+
return out
|
pass2.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils import im_2_b64, RANDOM_SEED, client
|
2 |
+
import streamlit as st
|
3 |
+
|
4 |
+
|
5 |
+
prompt = """
|
6 |
+
In the following task, you will be presented with some images and a story that is, in some manner, related to that goal. You will be given a specific goal, and a condition that could prevent that goal from being achieved. Using both story and images, you will be asked to identify an event that could result in that condition being true thus preventing goal success. If it is not plausible to identify such an event, you can state that as well.
|
7 |
+
|
8 |
+
Output in a python dictionary where it should have the following keys: 'event'.
|
9 |
+
|
10 |
+
Story: {story}
|
11 |
+
|
12 |
+
Goal: {goal}
|
13 |
+
|
14 |
+
Condition: {condition}
|
15 |
+
"""
|
16 |
+
|
17 |
+
def get_gpt4V_response_2(story, goal, condition, images, temperature=0.5):
|
18 |
+
# Convert image to base64
|
19 |
+
image_urls = []
|
20 |
+
for image in images:
|
21 |
+
if type(images[0]) == bytes:
|
22 |
+
image_b64 = image
|
23 |
+
image_url = f"data:image/jpeg;base64,{image_b64.decode('utf-8')}"
|
24 |
+
else:
|
25 |
+
image_b64 = im_2_b64(image)
|
26 |
+
image_url = f"data:image/jpeg;base64,{image_b64.decode('utf-8')}"
|
27 |
+
image_urls.append(image_url)
|
28 |
+
st.write("β
Image converted")
|
29 |
+
|
30 |
+
now_prompt = prompt.format(story=story, goal=goal, condition=condition)
|
31 |
+
|
32 |
+
content = [
|
33 |
+
{"type": "text", "text": now_prompt},
|
34 |
+
]
|
35 |
+
|
36 |
+
st.write("β
Prompt created")
|
37 |
+
|
38 |
+
for image_url in image_urls:
|
39 |
+
content.append({
|
40 |
+
"type": "image_url",
|
41 |
+
"image_url": {
|
42 |
+
"url": image_url,
|
43 |
+
},
|
44 |
+
})
|
45 |
+
|
46 |
+
st.write("π Getting Response from GPT4V")
|
47 |
+
response = client.chat.completions.create(
|
48 |
+
model="gpt-4-vision-preview",
|
49 |
+
seed=RANDOM_SEED,
|
50 |
+
messages=[
|
51 |
+
{
|
52 |
+
"role": "user",
|
53 |
+
"content": content
|
54 |
+
}
|
55 |
+
],
|
56 |
+
temperature=temperature,
|
57 |
+
max_tokens=1024,
|
58 |
+
# top_p=1,
|
59 |
+
# frequency_penalty=0,
|
60 |
+
# presence_penalty=0,
|
61 |
+
)
|
62 |
+
# print(response)
|
63 |
+
# print("Prompt:")
|
64 |
+
# print(now_prompt)
|
65 |
+
out = response.choices[0].message.content
|
66 |
+
# print("OUTPUT:", out)
|
67 |
+
# print("====================================")
|
68 |
+
# print()
|
69 |
+
|
70 |
+
st.write("β
Response generated")
|
71 |
+
return out
|
utils.py
CHANGED
@@ -20,112 +20,6 @@ def im_2_b64(image):
|
|
20 |
RANDOM_SEED = 42
|
21 |
client = OpenAI(api_key=st.secrets["OPENAI_KEY"])
|
22 |
|
23 |
-
|
24 |
-
with_prompt = """
|
25 |
-
In the following task, you will be presented with a image and a story that is, in some manner, related to that goal. You will be given a specific goal and entity (generally, person or object), and be asked to identify condition necessary for that goal and the alternate condition that could prevent that goal.
|
26 |
-
|
27 |
-
Conditions for the output:
|
28 |
-
1. Condition: The condition is the necessary condition for the goal to be achieved. If the condition is not met, the goal cannot be achieved. The condition must be related to the entity.
|
29 |
-
|
30 |
-
2. Alternate Condition: The alternate condition is a different version of the condition that would prevent the goal from being achieved. It is likely that this alternate condition will contradict information provided in the images and/or story.
|
31 |
-
|
32 |
-
Output in a python dictionary where it should have the following keys: 'condition', 'alternate_condition'.
|
33 |
-
|
34 |
-
Story: {story}
|
35 |
-
|
36 |
-
Entity: {entity}
|
37 |
-
|
38 |
-
Goal: {goal}
|
39 |
-
"""
|
40 |
-
|
41 |
-
wo_prompt = """
|
42 |
-
In the following task, you will be presented with a image and a story that is, in some manner, related to that goal. You will be given a specific goal, and be asked to identify an entity (person or object), condition necessary for that goal and the alternate condition that could prevent that goal.
|
43 |
-
|
44 |
-
Conditions for the output:
|
45 |
-
1. Entity: The entity is the person or object that the goal is related to. The entity should be a crucial part for achieving the goal.
|
46 |
-
|
47 |
-
2. Condition: The condition is the necessary condition for the goal to be achieved. If the condition is not met, the goal cannot be achieved. The condition must be related to the entity.
|
48 |
-
|
49 |
-
3. Alternate Condition: The alternate condition is a different version of the condition that would prevent the goal from being achieved. It is likely that this alternate condition will contradict information provided in the images and/or story.
|
50 |
-
|
51 |
-
Output in a python dictionary where it should have the following keys: 'entity', 'condition', 'alternate_condition'.
|
52 |
-
|
53 |
-
Story: {story}
|
54 |
-
|
55 |
-
Goal: {goal}
|
56 |
-
"""
|
57 |
-
|
58 |
-
data = {
|
59 |
-
"Story id": [],
|
60 |
-
"Prompt": [],
|
61 |
-
"entity": [],
|
62 |
-
"agent": [],
|
63 |
-
"story": [],
|
64 |
-
"Image1": [],
|
65 |
-
"Image2": [],
|
66 |
-
"Image3": [],
|
67 |
-
"GPT-4 Output": [],
|
68 |
-
}
|
69 |
-
|
70 |
-
|
71 |
-
def get_gpt4V_response(story, goal, entity, images, temperature=0.5):
|
72 |
-
# Convert image to base64
|
73 |
-
image_urls = []
|
74 |
-
for i, image in enumerate(images):
|
75 |
-
image_b64 = im_2_b64(image)
|
76 |
-
image_url = f"data:image/jpeg;base64,{image_b64.decode('utf-8')}"
|
77 |
-
image_urls.append(image_url)
|
78 |
-
st.write("β
Image converted")
|
79 |
-
|
80 |
-
if entity:
|
81 |
-
prompt = with_prompt
|
82 |
-
now_prompt = prompt.format(story=story, goal=goal, entity=entity)
|
83 |
-
else:
|
84 |
-
prompt = wo_prompt
|
85 |
-
now_prompt = prompt.format(story=story, goal=goal)
|
86 |
-
|
87 |
-
content = [
|
88 |
-
{"type": "text", "text": now_prompt},
|
89 |
-
]
|
90 |
-
|
91 |
-
st.write("β
Prompt created")
|
92 |
-
|
93 |
-
for image_url in image_urls:
|
94 |
-
content.append({
|
95 |
-
"type": "image_url",
|
96 |
-
"image_url": {
|
97 |
-
"url": image_url,
|
98 |
-
},
|
99 |
-
})
|
100 |
-
|
101 |
-
st.write("π Getting Response from GPT4V")
|
102 |
-
response = client.chat.completions.create(
|
103 |
-
model="gpt-4-vision-preview",
|
104 |
-
seed=RANDOM_SEED,
|
105 |
-
messages=[
|
106 |
-
{
|
107 |
-
"role": "user",
|
108 |
-
"content": content
|
109 |
-
}
|
110 |
-
],
|
111 |
-
temperature=temperature,
|
112 |
-
max_tokens=1024,
|
113 |
-
# top_p=1,
|
114 |
-
# frequency_penalty=0,
|
115 |
-
# presence_penalty=0,
|
116 |
-
)
|
117 |
-
print(response)
|
118 |
-
print("Prompt:")
|
119 |
-
print(now_prompt)
|
120 |
-
out = response.choices[0].message.content
|
121 |
-
print("OUTPUT:", out)
|
122 |
-
print("====================================")
|
123 |
-
print()
|
124 |
-
|
125 |
-
st.write("β
Response generated")
|
126 |
-
|
127 |
-
return out
|
128 |
-
|
129 |
def get_str_to_json(st):
|
130 |
st = re.sub(r"```python", "", st)
|
131 |
st = re.sub(r"```", "", st)
|
|
|
20 |
RANDOM_SEED = 42
|
21 |
client = OpenAI(api_key=st.secrets["OPENAI_KEY"])
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
def get_str_to_json(st):
|
24 |
st = re.sub(r"```python", "", st)
|
25 |
st = re.sub(r"```", "", st)
|