Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -55,8 +55,17 @@ def get_vectorstore(text_chunks : list) -> FAISS:
|
|
55 |
|
56 |
|
57 |
def get_conversation_chain(vectorstore):
|
58 |
-
|
59 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
61 |
conversation_chain = ConversationalRetrievalChain.from_llm(
|
62 |
llm=llm,
|
|
|
55 |
|
56 |
|
57 |
def get_conversation_chain(vectorstore):
|
58 |
+
n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
|
59 |
+
n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
|
60 |
+
n_ctx=2048
|
61 |
+
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
|
62 |
+
# Make sure the model path is correct for your system
|
63 |
+
llm = LlamaCpp(
|
64 |
+
model_path="mostafaamiri/persian-llama-7b-GGUF-Q4",
|
65 |
+
n_gpu_layers=n_gpu_layers, n_batch=n_batch,
|
66 |
+
callback_manager=callback_manager,
|
67 |
+
verbose=True,
|
68 |
+
n_ctx=n_ctx)
|
69 |
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
|
70 |
conversation_chain = ConversationalRetrievalChain.from_llm(
|
71 |
llm=llm,
|