Spaces:
Sleeping
Sleeping
from langchain.document_loaders import PyPDFDirectoryLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import FAISS | |
from langchain.llms import openai | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.embeddings import HuggingFaceBgeEmbeddings | |
########################################################################################### | |
def get_pdf_load(): | |
loader=PyPDFDirectoryLoader("./data") | |
document=loader.load() | |
return document | |
######################################################################################## | |
def get_text_split(document): | |
text_splitter= RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100) | |
texts =text_splitter.split_documents(document) | |
return texts | |
######################################################################################### | |
def get_vectorstore(texts): | |
#Vector and Embeddings | |
DB_FAISS_PATH = 'vectore_Imstudio/faiss' | |
#Vector and Embeddings | |
embeddings= HuggingFaceBgeEmbeddings(model_name='Avditvs/multilingual-e5-small-distill-base-0.1', model_kwargs={'device': 'cpu'}) | |
db= FAISS.from_documents(texts,embeddings) | |
db.save_local(DB_FAISS_PATH) | |
return db | |
############################################################################################ | |
def get_chain(db): | |
llm=ChatOpenAI(base_url="https://bd4c-85-9-86-142.ngrok-free.app/v1", api_key="lm-studio",temperature=0.1,model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF") | |
#Build a chain | |
qa_chain = ConversationalRetrievalChain.from_llm( | |
llm,db.as_retriever (search_kwargs={'k':2}),return_source_documents=True) | |
return qa_chain | |
#################################################################################################################### | |
def main(): | |
st.set_page_config( | |
page_title="Chat Bot PDFs", | |
page_icon=":books:", | |
) | |
st.header("Chat Bot PDFs :books:") | |
user_question = st.text_input("Ask a question about your documents:") | |
if st.button("Build Model"): | |
with st.spinner("Waiting"): | |
document=get_pdf_load() | |
texts=et_text_split(document) | |
db=get_vectorstore(texts) | |
qa_chain=get_chain(db) | |
st.write("compelete build model") | |
if st.button("Answer"): | |
with st.spinner("Answering"): | |
if st.button("CLEAR"): | |
with st.spinner("CLEARING"): | |
st.cache_data.clear() | |
#with st.sidebar: | |
#if st.button("Process build model"): | |
if __name__ == "__main__": | |
main() | |