ali121300 commited on
Commit
9b9d29d
1 Parent(s): 22d45be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -2
app.py CHANGED
@@ -55,8 +55,17 @@ def get_vectorstore(text_chunks : list) -> FAISS:
55
 
56
 
57
  def get_conversation_chain(vectorstore):
58
- llm = ChatOpenAI(temperature=0.2)
59
- # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.2, "max_length":512})
 
 
 
 
 
 
 
 
 
60
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
61
  conversation_chain = ConversationalRetrievalChain.from_llm(
62
  llm=llm,
 
55
 
56
 
57
  def get_conversation_chain(vectorstore):
58
+ n_gpu_layers = 40 # Change this value based on your model and your GPU VRAM pool.
59
+ n_batch = 512 # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
60
+ n_ctx=2048
61
+ callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
62
+ # Make sure the model path is correct for your system
63
+ llm = LlamaCpp(
64
+ model_path="mostafaamiri/persian-llama-7b-GGUF-Q4",
65
+ n_gpu_layers=n_gpu_layers, n_batch=n_batch,
66
+ callback_manager=callback_manager,
67
+ verbose=True,
68
+ n_ctx=n_ctx)
69
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
70
  conversation_chain = ConversationalRetrievalChain.from_llm(
71
  llm=llm,