Spaces:
Sleeping
Sleeping
File size: 3,200 Bytes
9a66b4f ec9e166 9a66b4f ec9e166 cbf031d 9a66b4f 751653d 8e8ccf4 9c39b4d 9a66b4f 01481a7 9a66b4f 4ac050d 9a66b4f 96cad08 b56342f ba6247e 751653d ba6247e ec9e166 33ac479 ec9e166 8c29218 33ac479 7e93132 06f0aa1 9c636da 751653d 9a66b4f 0c95774 652d1d7 0c95774 8c29218 9a66b4f ec9e166 907eaa3 2a31b94 1d9366f a7dd90a 9f78fda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.llms import openai
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceBgeEmbeddings
import streamlit as st
###########################################################################################
def get_pdf_load():
loader=PyPDFDirectoryLoader("./")
document=loader.load()
return document
########################################################################################
def get_text_split(document):
text_splitter= RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
texts =text_splitter.split_documents(document)
return texts
#########################################################################################
def get_vectorstore(texts):
#Vector and Embeddings
DB_FAISS_PATH = 'vectore_Imstudio/faiss'
#Vector and Embeddings
embeddings= HuggingFaceBgeEmbeddings(model_name='Avditvs/multilingual-e5-small-distill-base-0.1', model_kwargs={'device': 'cpu'})
db= FAISS.from_documents(texts,embeddings)
db.save_local(DB_FAISS_PATH)
return db
############################################################################################
def get_chain(db):
llm=ChatOpenAI(base_url="https://bd4c-85-9-86-142.ngrok-free.app/v1", api_key="lm-studio",temperature=0.1,model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF")
#Build a chain
qa_chain = ConversationalRetrievalChain.from_llm(
llm,db.as_retriever (search_kwargs={'k':2}),return_source_documents=True)
return qa_chain
####################################################################################################################
def get_conversation(query_user,qa_chain):
chat_history=[]
query_1="این سوال را به زبان فارسی تشریح کن:"
query=query_1+query_user
result= qa_chain({'question': query, 'chat_history': chat_history})
st.write('Answer of you question:' +result['answer'] +'\n')
return result
####################################################################################################################
def main():
st.set_page_config(
page_title="Chat Bot PDFs",
page_icon=":books:",
)
st.header("Chat Bot PDFs :books:")
user_question = st.text_input("Ask a question about your documents:")
document=get_pdf_load()
st.write("load pdf")
texts=get_text_split(document)
st.write("text split")
db=get_vectorstore(texts)
st.write("vectore store")
qa_chain=get_chain(db)
st.write("compelete build model")
if st.button("Answer"):
with st.spinner("Answering"):
get_conversation(query_user=user_question,qa_chain=qa_chain)
#if st.button("CLEAR"):
#with st.spinner("CLEARING"):
#st.cache_data.clear()
#with st.sidebar:
#if st.button("Process build model"):
if __name__ == "__main__":
main()
|