ali121300's picture
Update app.py
9a66b4f verified
raw
history blame
2.68 kB
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.llms import openai
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceBgeEmbeddings
###########################################################################################
def get_pdf_load():
loader=PyPDFDirectoryLoader("./data")
document=loader.load()
return document
########################################################################################
def get_text_split(document):
text_splitter= RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts =text_splitter.split_documents(document)
return texts
#########################################################################################
def get_vectorstore(texts):
#Vector and Embeddings
DB_FAISS_PATH = 'vectore_Imstudio/faiss'
#Vector and Embeddings
embeddings= HuggingFaceBgeEmbeddings(model_name='Avditvs/multilingual-e5-small-distill-base-0.1', model_kwargs={'device': 'cpu'})
db= FAISS.from_documents(texts,embeddings)
db.save_local(DB_FAISS_PATH)
return db
############################################################################################
def get_chain(db):
llm=ChatOpenAI(base_url="https://bd4c-85-9-86-142.ngrok-free.app/v1", api_key="lm-studio",temperature=0.1,model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF")
#Build a chain
qa_chain = ConversationalRetrievalChain.from_llm(
llm,db.as_retriever (search_kwargs={'k':2}),return_source_documents=True)
return qa_chain
####################################################################################################################
def main():
st.set_page_config(
page_title="Chat Bot PDFs",
page_icon=":books:",
)
st.header("Chat Bot PDFs :books:")
user_question = st.text_input("Ask a question about your documents:")
if st.button("Build Model"):
with st.spinner("Waiting"):
document=get_pdf_load()
texts=et_text_split(document)
db=get_vectorstore(texts)
qa_chain=get_chain(db)
st.write("compelete build model")
if st.button("Answer"):
with st.spinner("Answering"):
if st.button("CLEAR"):
with st.spinner("CLEARING"):
st.cache_data.clear()
#with st.sidebar:
#if st.button("Process build model"):
if __name__ == "__main__":
main()