herMaster commited on
Commit
68c8d72
1 Parent(s): 9f7d3b3

change model file and using ctransformers directly instead of langchain.

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -8,8 +8,8 @@ from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
8
  # from langchain.llms import LlamaCpp
9
  from langchain.vectorstores import Qdrant
10
  from qdrant_client.http import models
11
- from langchain.llms import CTransformers
12
- # from ctransformers import AutoModelForCausalLM
13
 
14
 
15
 
@@ -36,13 +36,13 @@ print("loading the LLM......................................")
36
  # verbose=True,
37
  # )
38
 
39
- llm = CTransformers(
40
- model="TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q8_0.gguf",
41
- callbacks=[StreamingStdOutCallbackHandler()],
42
- temperature = 0.2,
43
- max_new_tokens = 1000,
44
-
45
- )
46
 
47
 
48
  print("LLM loaded........................................")
 
8
  # from langchain.llms import LlamaCpp
9
  from langchain.vectorstores import Qdrant
10
  from qdrant_client.http import models
11
+ # from langchain.llms import CTransformers
12
+ from ctransformers import AutoModelForCausalLM
13
 
14
 
15
 
 
36
  # verbose=True,
37
  # )
38
 
39
+ llm = AutoModelForCausalLM.from_pretrained("TheBloke/Llama-2-7B-Chat-GGUF",
40
+ model_file="llama-2-7b-chat.Q8_0.gguf",
41
+ model_type="llama",
42
+ # config = ctransformers.hub.AutoConfig,
43
+ hf = True
44
+ )
45
+
46
 
47
 
48
  print("LLM loaded........................................")