File size: 3,200 Bytes
9a66b4f
 
ec9e166
9a66b4f
ec9e166
cbf031d
9a66b4f
751653d
8e8ccf4
9c39b4d
9a66b4f
01481a7
9a66b4f
 
4ac050d
9a66b4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96cad08
b56342f
ba6247e
 
 
 
 
751653d
ba6247e
ec9e166
 
33ac479
ec9e166
 
8c29218
33ac479
7e93132
06f0aa1
 
 
 
 
 
 
 
9c636da
 
 
751653d
9a66b4f
0c95774
652d1d7
 
 
0c95774
8c29218
9a66b4f
 
 
ec9e166
907eaa3
2a31b94
1d9366f
a7dd90a
9f78fda
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.llms import openai
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import HuggingFaceBgeEmbeddings
import streamlit as st
###########################################################################################

def get_pdf_load():
    loader=PyPDFDirectoryLoader("./")
    document=loader.load()
    return document
########################################################################################
def get_text_split(document):
    text_splitter= RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    texts =text_splitter.split_documents(document)
    return texts
#########################################################################################
def get_vectorstore(texts):
    #Vector and Embeddings
    DB_FAISS_PATH = 'vectore_Imstudio/faiss'
    #Vector and Embeddings
    embeddings= HuggingFaceBgeEmbeddings(model_name='Avditvs/multilingual-e5-small-distill-base-0.1', model_kwargs={'device': 'cpu'})
    db= FAISS.from_documents(texts,embeddings)
    db.save_local(DB_FAISS_PATH)
    return db
############################################################################################
def get_chain(db):
    llm=ChatOpenAI(base_url="https://bd4c-85-9-86-142.ngrok-free.app/v1", api_key="lm-studio",temperature=0.1,model="lmstudio-community/Meta-Llama-3-8B-Instruct-GGUF")
    #Build a chain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm,db.as_retriever (search_kwargs={'k':2}),return_source_documents=True)
    return  qa_chain
####################################################################################################################
def get_conversation(query_user,qa_chain):
    chat_history=[]
    query_1="این سوال را به زبان فارسی تشریح کن:"
    query=query_1+query_user
    result= qa_chain({'question': query, 'chat_history': chat_history})
    st.write('Answer of you question:' +result['answer'] +'\n')
    return result
####################################################################################################################
def main():
    st.set_page_config(
        page_title="Chat Bot PDFs",
        page_icon=":books:",
    )
    
    st.header("Chat Bot PDFs :books:")
    user_question = st.text_input("Ask a question about your documents:")
    document=get_pdf_load()
    st.write("load pdf")
    texts=get_text_split(document)
    st.write("text split")
    db=get_vectorstore(texts)
    st.write("vectore store")
    qa_chain=get_chain(db)
    st.write("compelete build model")
    if st.button("Answer"):
        with st.spinner("Answering"):
            get_conversation(query_user=user_question,qa_chain=qa_chain)
                
              
          
    #if st.button("CLEAR"):
            #with st.spinner("CLEARING"):
              #st.cache_data.clear()
         
    
    #with st.sidebar:
        #if st.button("Process build model"):
 


if __name__ == "__main__": 
    main()