|
import streamlit as st |
|
|
|
from components.sidebar import sidebar |
|
|
|
from ui import ( |
|
wrap_doc_in_html, |
|
is_query_valid, |
|
is_file_valid, |
|
is_open_ai_key_valid, |
|
display_file_read_error, |
|
) |
|
|
|
from core.caching import bootstrap_caching |
|
|
|
from core.parsing import read_file |
|
from core.chunking import chunk_file |
|
from core.embedding import embed_files |
|
from core.qa import query_folder |
|
from core.utils import get_llm |
|
|
|
|
|
EMBEDDING = "openai" |
|
VECTOR_STORE = "faiss" |
|
MODEL_LIST = ["gpt-3.5-turbo", "gpt-4"] |
|
|
|
|
|
|
|
|
|
st.set_page_config(page_title="☘️InspectSolv", page_icon="☘️", layout="wide") |
|
st.header("☘️InspectSolv") |
|
|
|
|
|
bootstrap_caching() |
|
|
|
sidebar() |
|
|
|
openai_api_key = st.session_state.get("OPENAI_API_KEY") |
|
|
|
|
|
if not openai_api_key: |
|
st.warning( |
|
"Enter your OpenAI API key in the sidebar. You can get a key at" |
|
" https://platform.openai.com/account/api-keys." |
|
) |
|
|
|
|
|
|
|
|
|
uploaded_file = st.file_uploader( |
|
"Upload a pdf, docx, or txt file", |
|
type=["pdf", "docx", "txt"], |
|
help="Scanned documents are not supported yet!", |
|
) |
|
|
|
|
|
|
|
model = MODEL_LIST[0] |
|
|
|
|
|
from importlib import metadata |
|
with st.expander("Advanced Options"): |
|
return_all_chunks = st.checkbox("Show all chunks retrieved from vector search") |
|
show_full_doc = st.checkbox("Show parsed contents of the document") |
|
st.markdown('# config') |
|
import sys |
|
st.markdown(f'Python version: {sys.version}') |
|
for dist in metadata.distributions(): |
|
st.markdown((f"{dist.name}=={dist.version}")) |
|
|
|
|
|
|
|
if not uploaded_file: |
|
st.stop() |
|
|
|
try: |
|
file = read_file(uploaded_file) |
|
except Exception as e: |
|
display_file_read_error(e, file_name=uploaded_file.name) |
|
|
|
chunked_file = chunk_file(file, chunk_size=300, chunk_overlap=0) |
|
|
|
if not is_file_valid(file): |
|
st.stop() |
|
|
|
|
|
if not is_open_ai_key_valid(openai_api_key, model): |
|
st.stop() |
|
|
|
|
|
with st.spinner("Indexing document... This may take a while⏳"): |
|
folder_index = embed_files( |
|
files=[chunked_file], |
|
embedding=EMBEDDING if model != "debug" else "debug", |
|
vector_store=VECTOR_STORE if model != "debug" else "debug", |
|
openai_api_key=openai_api_key, |
|
) |
|
|
|
|
|
with st.form(key="qa_form"): |
|
options = ['List all pre existing conditions which may affect home insurance', 'Show the problematic components!', 'Show repair needs!'] |
|
query = st.selectbox('Select an option', options) |
|
submit = st.form_submit_button("Submit") |
|
|
|
|
|
if show_full_doc: |
|
with st.expander("Document"): |
|
|
|
st.markdown(f"<p>{wrap_doc_in_html(file.docs)}</p>", unsafe_allow_html=True) |
|
|
|
|
|
if submit: |
|
if not is_query_valid(query): |
|
st.stop() |
|
|
|
|
|
answer_col, sources_col = st.columns(2) |
|
|
|
llm = get_llm(model=model, openai_api_key=openai_api_key, temperature=0) |
|
result = query_folder( |
|
folder_index=folder_index, |
|
query=query, |
|
return_all=return_all_chunks, |
|
llm=llm, |
|
) |
|
|
|
with answer_col: |
|
st.markdown("#### Answer") |
|
st.markdown(result.answer) |
|
|
|
with sources_col: |
|
st.markdown("#### Sources") |
|
for source in result.sources: |
|
st.markdown(source.page_content) |
|
st.markdown(source.metadata["source"]) |
|
st.markdown("---") |
|
|
|
|
|
|
|
|