MohsenParsa commited on
Commit
0380f92
1 Parent(s): d6c96dd

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -140
app.py DELETED
@@ -1,140 +0,0 @@
1
- import os
2
- from typing import List, Tuple
3
- from langchain_community.llms import GPT4All
4
- from langchain_core.output_parsers import StrOutputParser
5
- from langchain_core.runnables import RunnablePassthrough
6
- import bs4
7
- import textwrap
8
- from langchain.chains import create_retrieval_chain
9
- #from langchain.chains.combine_documents import create_stuff_documents_chain
10
- from langchain_community.vectorstores import FAISS
11
- #from langchain_community.document_loaders import WebBaseLoader
12
- from langchain_core.prompts import ChatPromptTemplate
13
- from langchain_community.embeddings import LlamaCppEmbeddings
14
- from langchain_text_splitters import RecursiveCharacterTextSplitter
15
-
16
- from langchain_core.callbacks import BaseCallbackHandler
17
- from langchain_community.document_loaders import TextLoader
18
-
19
- from google.colab import drive
20
- drive.mount('/content/drive')
21
-
22
- local_path = "/content/drive/MyDrive/Model/aya-23-8B-Q3_K_S.gguf" # "/content/drive/MyDrive/Dorna-Llama3-8B-Instruct.Q5_0.gguf" #
23
- model_path = "/content/drive/MyDrive/Model/labse.Q3_K_S.gguf" # "/content/drive/MyDrive/labse.Q6_K.gguf" #
24
- text_path = "/content/drive/MyDrive/gpt4all/docs/Books/chmn.txt"
25
- index_path = "/content/drive/MyDrive/gpt4all/index_CHEHEL_MAJLESE_NOOR"
26
-
27
- def initialize_embeddings() -> LlamaCppEmbeddings:
28
- return LlamaCppEmbeddings(model_path=model_path)
29
-
30
- def load_documents() -> List:
31
- loader = TextLoader(text_path)
32
- return loader.load()
33
-
34
- def split_chunks(sources: List) -> List:
35
- chunks = []
36
- splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32)
37
- for chunk in splitter.split_documents(sources):
38
- chunks.append(chunk)
39
- return chunks
40
-
41
- def generate_index(chunks: List, embeddings: LlamaCppEmbeddings) -> FAISS:
42
- texts = [doc.page_content for doc in chunks]
43
- metadatas = [doc.metadata for doc in chunks]
44
- return FAISS.from_texts(texts, embeddings, metadatas=metadatas)
45
-
46
- class MyCustomHandler(BaseCallbackHandler):
47
- def on_llm_new_token(self, token: str, **kwargs) -> None:
48
- print(token),
49
-
50
- llm = GPT4All( model=local_path, n_threads=150, streaming=True,verbose=False)#,device='cuda:Tesla T4') #
51
- # callbacks=[MyCustomHandler()],
52
- # # 1. Load, chunk and index the contents of the blog to create a retriever.
53
- # loader = WebBaseLoader(
54
- # web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
55
- # bs_kwargs=dict(
56
- # parse_only=bs4.SoupStrainer(
57
- # class_=("post-content", "post-title", "post-header")
58
- # )
59
- # ),
60
- # )
61
- def format_docs(docs):
62
- return "\n\n".join(doc.page_content for doc in docs)
63
-
64
- #docs = loader.load()
65
-
66
- #text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
67
- #splits = text_splitter.split_documents(docs)
68
- #vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
69
- #retriever = vectorstore.as_retriever() ########## attention
70
-
71
- embeddings = initialize_embeddings()
72
- rebuilIndex = input('Rebuild Index (y/n)?')
73
- if rebuilIndex=='y':
74
- #start = time.time()
75
- sources = load_documents()
76
- chunks = split_chunks(sources)
77
- vectorstore = generate_index(chunks, embeddings)
78
- vectorstore.save_local(index_path)
79
- #end = time.time()
80
- #elapsed = end - start
81
- #print('Elapsed time to build index: ' + str(elapsed))
82
-
83
- index = FAISS.load_local(index_path, embeddings,allow_dangerous_deserialization=True)
84
-
85
- retriver = index.as_retriever()
86
-
87
- # 2. Incorporate the retriever into a question-answering chain.
88
- system_prompt = (
89
- """You are an assistant for question-answering tasks. "
90
- "Only use the {context} to answer: "
91
- "لطفاً فقط به زبان فارسی صحبت کن و تمام پاسخ ها را به زبان فارسی بنویس "
92
- "لطفا پاسخ هایت طولانی باشد "
93
- "اگر پاسخ سوال را نیافتی بگو نمیدانم"
94
- "\n\n"""
95
-
96
- )
97
-
98
- prompt = ChatPromptTemplate.from_messages(
99
- [
100
- ("system", system_prompt),
101
- ("human", "{input}"),
102
- ]
103
- )
104
-
105
- ##question_answer_chain = create_stuff_documents_chain(llm, prompt)
106
- ##rag_chain = create_retrieval_chain(retriver , question_answer_chain) # retriever
107
-
108
- #result = rag_chain.invoke({"input": "What is Task Decomposition?"})
109
-
110
- # second edit
111
-
112
- rag_chain_from_docs = (
113
- {
114
- "input": lambda x: x["input"], # input query
115
- "context": lambda x: format_docs(x["context"]), # context
116
- }
117
- | prompt # format query and context into prompt
118
- | llm # generate response
119
- | StrOutputParser() # coerce to string
120
- )
121
-
122
- # Pass input query to retriever
123
- retrieve_docs = (lambda x: x["input"]) | retriver
124
-
125
- # Below, we chain `.assign` calls. This takes a dict and successively
126
- # adds keys-- "context" and "answer"-- where the value for each key
127
- # is determined by a Runnable. The Runnable operates on all existing
128
- # keys in the dict.
129
- chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
130
- answer=rag_chain_from_docs
131
- )
132
- chat_history = []
133
- while True:
134
- query = input("پرسش تان را بپرسید. حقیر در خدمتم: ")
135
-
136
- if query.lower() == 'exit':
137
- break
138
-
139
- response = chain.invoke({"input": query})
140
- print(textwrap.fill(response['answer'],80))