import streamlit as st
from openai import OpenAI
from io import BytesIO
from PIL import Image
import base64
st.set_page_config(page_title='GPT-4 Vision', page_icon='👁️')
if 'history' not in st.session_state:
st.session_state['history'] = [{'role': 'system', 'content': ''}]
st.session_state['cost'] = 0.0
st.session_state['counters'] = [0, 1]
st.markdown('# GPT-4 Vision Client')
api_key = st.text_input('OpenAI API Key', '', type='password')
# display cost
if st.session_state['cost'] > 0:
st.info('Session Cost: ${:f}'.format(st.session_state['cost']), icon='💰')
# make tabs
chatTab, settingsTab = st.tabs(['Chat', 'Settings'])
# set openai settings
with settingsTab:
image_detail = st.selectbox('Image Detail', ['low', 'high'])
temperature = st.slider('Temperature', 0.0, 2.0, 0.7)
max_tokens = st.slider('Max Token Output', 100, 1000, 300)
with chatTab:
# optional system message
with st.expander('System Message'):
st.session_state['history'][0]['content'] = st.text_area('sys message',
st.session_state['history'][0]['content'],
label_visibility='collapsed')
# display chat
for msg in st.session_state['history'][1:]:
if msg['role'] == 'user':
for i in msg['content']:
if i['type'] == 'text':
st.markdown(f"You: {i['text']}", unsafe_allow_html=True)
else:
with st.expander('Attached Image'):
img = Image.open(BytesIO(base64.b64decode(i['image_url']['url'][23:])))
st.image(img)
else:
msg_content = ''.join([' ' + char if char == '\n' else char for char in msg['content']]) # fixes display issue
st.markdown('Assistant: ' + msg_content)
# get user inputs
text_input = st.text_input('Prompt', '', key=st.session_state['counters'][0])
img_input = st.file_uploader('Images', accept_multiple_files=True, key=st.session_state['counters'][1])
# set up button layout
st.markdown(
"""
""", unsafe_allow_html=True
)
cols = st.columns(2)
# send api request
with cols[0]:
if st.button('Send'):
if not api_key:
st.warning('API Key required')
st.stop()
if not (text_input or img_input):
st.warning('You can\'t just send nothing!')
st.stop()
msg = {'role': 'user', 'content': []}
if text_input:
msg['content'].append({'type': 'text', 'text': text_input})
for img in img_input:
if img.name.split('.')[-1].lower() not in ['png', 'jpg', 'jpeg', 'gif', 'webp']:
st.warning('Only .jpg, .png, .gif, or .webp are supported')
st.stop()
encoded_img = base64.b64encode(img.read()).decode('utf-8')
msg['content'].append(
{
'type': 'image_url',
'image_url': {
'url': f'data:image/jpeg;base64,{encoded_img}',
'detail': image_detail
}
}
)
st.session_state['history'].append(msg)
history = (
st.session_state['history']
if st.session_state['history'][0]['content']
else st.session_state['history'][1:]
)
client = OpenAI(api_key=api_key)
response = client.chat.completions.create(
model='gpt-4-vision-preview',
temperature=temperature,
max_tokens=max_tokens,
messages=history
)
st.session_state['history'].append(
{'role': 'assistant', 'content': response.choices[0].message.content}
)
st.session_state['cost'] += response.usage.prompt_tokens * 0.01 / 1000
st.session_state['cost'] += response.usage.completion_tokens * 0.03 / 1000
st.session_state['counters'] = [i+2 for i in st.session_state['counters']]
st.rerun()
# clear chat history
with cols[1]:
if st.button('Clear'):
st.session_state['history'] = [st.session_state['history'][0]]
st.rerun()