File size: 4,404 Bytes
acc9eef 425fab4 acc9eef 9eaa2b8 acc9eef 4ecf027 acc9eef 4ecf027 acc9eef 4ecf027 99e4d1a acc9eef 4ecf027 acc9eef 86c33f7 0cd7c97 acc9eef 4ecf027 acc9eef 4ecf027 acc9eef 86c33f7 acc9eef 4d80aaf acc9eef 4ecf027 acc9eef 4ecf027 acc9eef 4ecf027 acc9eef 4ecf027 acc9eef 4ecf027 acc9eef |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import streamlit as st
from utils import *
# streamlit session state holds history, this mehtods cleas that history. Note: as our applications become more complicated session state will be used more.
def clear_history():
if 'history' in st.session_state:
del st.session_state['history']
# use main function to designate as primary package
if __name__ == "__main__":
import os
# create your side bar
st.subheader('Load a Document and Ask a Question')
with st.sidebar:
# use text_input to bring in your OpenAI API key
api_key = st.text_input('OpenAI API Key:', type='password')
if api_key:
os.environ['OPENAI_API_KEY'] = api_key
# sidebar - file uploader widget, drag and drop, browse button works on windows not on mac
uploaded_file = st.file_uploader('To upload a file drag and drop it on the area below:', type=['pdf', 'docx', 'txt', 'csv'])
# call the chunk size mehtod that sets the number
chunk_size = st.number_input('Chunk size:', min_value=100, max_value=2048, value=512, on_change=clear_history)
# chunk Overlab
chunk_overlap = st.number_input('Chunk Overlap:', min_value=0, max_value=200, value=20, on_change=clear_history)
# input the top-k number, k increase the search effectiveness, but is more expensive
k = st.number_input('top-k most salient docs', min_value=1, max_value=20, value=3, on_change=clear_history)
# click this sidebard button to add data
add_data = st.button('Add Data', on_click=clear_history)
#chekc if data button has been clicked,if the api key is added and if a data file is available for upload
if add_data:
if api_key:
if uploaded_file and add_data: # if the user browsed a file
with st.spinner('Reading, chunking and embedding file ...'):
# writing the file from RAM to the current directory on disk
bytes_data = uploaded_file.read()
file_name = os.path.join('./', uploaded_file.name)
with open(file_name, 'wb') as f:
f.write(bytes_data)
data = load_document(file_name)
chunks = chunk_data(data, chunk_size=chunk_size, chunk_overlap=chunk_overlap)
st.write(f'Chunk size: {chunk_size}, Chunks: {len(chunks)}')
tokens, embedding_cost = calculate_embedding_cost(chunks)
st.write(f'Embedding cost: ${embedding_cost:.4f}')
# creating the embeddings and returning the Chroma vector store
vector_store = create_embeddings(chunks)
# saving the vector store in the streamlit session state (to be persistent between reruns)
st.session_state.vs = vector_store
st.success('File uploaded, chunked and embedded successfully.')
else:
st.error("Please drag and drop your file to the upload area above.....")
else:
st.error("Please provide your OpenAI API key above.....")
# this is the main input widget that allows you to input your query of the uploaded document
q = st.text_input('Ask a question about the content of your file:')
if q: # run the query if the user entered a question and hit enter
if 'vs' in st.session_state: # for seesion state, if there's the vector store (user uploaded, split and embedded a file)
vector_store = st.session_state.vs
st.write(f'k: {k}')
answer = ask_and_get_answer(vector_store, q, k)
# text area widget for the LLM answer
st.text_area('LLM Answer: ', value=answer)
st.divider()
# initialize a chat history if there's no chat history
if 'history' not in st.session_state:
st.session_state.history = ''
# your question and answer
value = f'Q: {q} \nA: {answer}'
st.session_state.history = f'{value} \n {"-" * 100} \n {st.session_state.history}'
h = st.session_state.history
# chat history text area widget
st.text_area(label='Chat History', value=h, key='history', height=400)
|