import streamlit as st from utils import * # streamlit session state holds history, this mehtods cleas that history. Note: as our applications become more complicated session state will be used more. def clear_history(): if 'history' in st.session_state: del st.session_state['history'] # use main function to designate as primary package if __name__ == "__main__": import os # create your side bar st.subheader('Load a Document and Ask a Question') with st.sidebar: # use text_input to bring in your OpenAI API key api_key = st.text_input('OpenAI API Key:', type='password') if api_key: os.environ['OPENAI_API_KEY'] = api_key # sidebar - file uploader widget, drag and drop, browse button works on windows not on mac uploaded_file = st.file_uploader('To upload a file drag and drop it on the area below:', type=['pdf', 'docx', 'txt', 'csv']) # call the chunk size mehtod that sets the number chunk_size = st.number_input('Chunk size:', min_value=100, max_value=2048, value=512, on_change=clear_history) # chunk Overlab chunk_overlap = st.number_input('Chunk Overlap:', min_value=0, max_value=200, value=20, on_change=clear_history) # input the top-k number, k increase the search effectiveness, but is more expensive k = st.number_input('top-k most salient docs', min_value=1, max_value=20, value=3, on_change=clear_history) # click this sidebard button to add data add_data = st.button('Add Data', on_click=clear_history) #chekc if data button has been clicked,if the api key is added and if a data file is available for upload if add_data: if api_key: if uploaded_file and add_data: # if the user browsed a file with st.spinner('Reading, chunking and embedding file ...'): # writing the file from RAM to the current directory on disk bytes_data = uploaded_file.read() file_name = os.path.join('./', uploaded_file.name) with open(file_name, 'wb') as f: f.write(bytes_data) data = load_document(file_name) chunks = chunk_data(data, chunk_size=chunk_size, chunk_overlap=chunk_overlap) st.write(f'Chunk size: {chunk_size}, Chunks: {len(chunks)}') tokens, embedding_cost = calculate_embedding_cost(chunks) st.write(f'Embedding cost: ${embedding_cost:.4f}') # creating the embeddings and returning the Chroma vector store vector_store = create_embeddings(chunks) # saving the vector store in the streamlit session state (to be persistent between reruns) st.session_state.vs = vector_store st.success('File uploaded, chunked and embedded successfully.') else: st.error("Please drag and drop your file to the upload area above.....") else: st.error("Please provide your OpenAI API key above.....") # this is the main input widget that allows you to input your query of the uploaded document q = st.text_input('Ask a question about the content of your file:') if q: # run the query if the user entered a question and hit enter if 'vs' in st.session_state: # for seesion state, if there's the vector store (user uploaded, split and embedded a file) vector_store = st.session_state.vs st.write(f'k: {k}') answer = ask_and_get_answer(vector_store, q, k) # text area widget for the LLM answer st.text_area('LLM Answer: ', value=answer) st.divider() # initialize a chat history if there's no chat history if 'history' not in st.session_state: st.session_state.history = '' # your question and answer value = f'Q: {q} \nA: {answer}' st.session_state.history = f'{value} \n {"-" * 100} \n {st.session_state.history}' h = st.session_state.history # chat history text area widget st.text_area(label='Chat History', value=h, key='history', height=400)