eaglelandsonce's picture
Update app.py
86c33f7
raw
history blame contribute delete
4.4 kB
import streamlit as st
from utils import *
# streamlit session state holds history, this mehtods cleas that history. Note: as our applications become more complicated session state will be used more.
def clear_history():
if 'history' in st.session_state:
del st.session_state['history']
# use main function to designate as primary package
if __name__ == "__main__":
import os
# create your side bar
st.subheader('Load a Document and Ask a Question')
with st.sidebar:
# use text_input to bring in your OpenAI API key
api_key = st.text_input('OpenAI API Key:', type='password')
if api_key:
os.environ['OPENAI_API_KEY'] = api_key
# sidebar - file uploader widget, drag and drop, browse button works on windows not on mac
uploaded_file = st.file_uploader('To upload a file drag and drop it on the area below:', type=['pdf', 'docx', 'txt', 'csv'])
# call the chunk size mehtod that sets the number
chunk_size = st.number_input('Chunk size:', min_value=100, max_value=2048, value=512, on_change=clear_history)
# chunk Overlab
chunk_overlap = st.number_input('Chunk Overlap:', min_value=0, max_value=200, value=20, on_change=clear_history)
# input the top-k number, k increase the search effectiveness, but is more expensive
k = st.number_input('top-k most salient docs', min_value=1, max_value=20, value=3, on_change=clear_history)
# click this sidebard button to add data
add_data = st.button('Add Data', on_click=clear_history)
#chekc if data button has been clicked,if the api key is added and if a data file is available for upload
if add_data:
if api_key:
if uploaded_file and add_data: # if the user browsed a file
with st.spinner('Reading, chunking and embedding file ...'):
# writing the file from RAM to the current directory on disk
bytes_data = uploaded_file.read()
file_name = os.path.join('./', uploaded_file.name)
with open(file_name, 'wb') as f:
f.write(bytes_data)
data = load_document(file_name)
chunks = chunk_data(data, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
st.write(f'Chunk size: {chunk_size}, Chunks: {len(chunks)}')
tokens, embedding_cost = calculate_embedding_cost(chunks)
st.write(f'Embedding cost: ${embedding_cost:.4f}')
# creating the embeddings and returning the Chroma vector store
vector_store = create_embeddings(chunks)
# saving the vector store in the streamlit session state (to be persistent between reruns)
st.session_state.vs = vector_store
st.success('File uploaded, chunked and embedded successfully.')
else:
st.error("Please drag and drop your file to the upload area above.....")
else:
st.error("Please provide your OpenAI API key above.....")
# this is the main input widget that allows you to input your query of the uploaded document
q = st.text_input('Ask a question about the content of your file:')
if q: # run the query if the user entered a question and hit enter
if 'vs' in st.session_state: # for seesion state, if there's the vector store (user uploaded, split and embedded a file)
vector_store = st.session_state.vs
st.write(f'k: {k}')
answer = ask_and_get_answer(vector_store, q, k)
# text area widget for the LLM answer
st.text_area('LLM Answer: ', value=answer)
st.divider()
# initialize a chat history if there's no chat history
if 'history' not in st.session_state:
st.session_state.history = ''
# your question and answer
value = f'Q: {q} \nA: {answer}'
st.session_state.history = f'{value} \n {"-" * 100} \n {st.session_state.history}'
h = st.session_state.history
# chat history text area widget
st.text_area(label='Chat History', value=h, key='history', height=400)