import streamlit as st from streamlit_chat import message from openai.error import OpenAIError from .utils import ( parse_docx, parse_pdf, parse_txt, search_docs, embed_docs, text_to_docs, get_answer, ) from uuid import uuid4 def clear_submit(): st.session_state["submit"] = False def set_openai_api_key(api_key: str): st.session_state["OPENAI_API_KEY"] = api_key def qa_main(): st.markdown("

This app allows to chat with files!

", unsafe_allow_html=True) st.markdown(\ """ Developed using LangChain and OpenAI Embeddings.

Before hitting on "Submit", please make sure you have uploaded a file and entered a question. You can upload files using the sidebar on the left. """, unsafe_allow_html=True ) index = None doc = None with st.sidebar: user_secret = st.text_input( "OpenAI API Key", type="password", placeholder="Paste your OpenAI API key here (sk-...)", help="You can get your API key from https://platform.openai.com/account/api-keys.", value=st.session_state.get("OPENAI_API_KEY", ""), ) if user_secret: set_openai_api_key(user_secret) uploaded_file = st.file_uploader( "Upload a pdf, docx, or txt file", type=["pdf", "docx", "txt", "py", "json", "html", "css", "md"], help="Scanned documents are not supported yet!", on_change=clear_submit, accept_multiple_files=False, ) # reading the files if uploaded_file is not None: if uploaded_file.name.endswith(".pdf"): doc = parse_pdf(uploaded_file) elif uploaded_file.name.endswith(".docx"): doc = parse_docx(uploaded_file) elif uploaded_file.name.endswith(".txt"): doc = parse_txt(uploaded_file) else: st.error("File type not yet supported! Supported files: [.pdf, .docx, .txt]") doc = None text = text_to_docs(text=tuple(doc)) st.write(text[:1]) try: with st.spinner("Indexing document(s)... This may take some time."): index = embed_docs(tuple(text)) st.session_state["api_key_configured"] = True except OpenAIError as e: st.error(e._message) tab1, tab2 = st.tabs(["Chat With File", "About the Application"]) with tab1: if 'generated' not in st.session_state: st.session_state['generated'] = [] if 'past' not in st.session_state: st.session_state['past'] = [] def get_text(): if user_secret: st.header("Ask me something about the document:") input_text = st.text_area("You:", on_change=clear_submit) return input_text user_input = get_text() button = st.button("Submit") if button or st.session_state.get("submit"): if not user_input: st.error("Please enter a question!") else: st.session_state["submit"] = True sources = search_docs(index, user_input) try: answer = get_answer(sources, user_input) st.session_state.past.append(user_input) st.session_state.generated.append(answer["output_text"]) except OpenAIError as e: st.error(e._message) if st.session_state['past']: for i in range(len(st.session_state['past'])-1, -1, -1): message(st.session_state['generated'][i], key=str(uuid4())) message(st.session_state['past'][i], is_user=True, key=str(uuid4())) with tab2: st.write('See sources') # st.write('Chat with Files enables user to extract all the information from a file. User can obtain the transcription, the embedding of each segment and also ask questions to the file through a chat.') # st.write('Features include- ') # st.write('1. Reading any pdf, docx or plain txt (such as python programs) file') # st.write('2. Embedding texts segments with Langchain and OpenAI') # st.write('3. Chatting with the file using streamlit-chat and LangChain QA with source and the GPT4 model')