Spaces:
Running
Running
Update rag.py
Browse files- app/main.py +1 -1
- app/rag.py +3 -1
app/main.py
CHANGED
@@ -77,7 +77,7 @@ def upload(files: list[UploadFile]):
|
|
77 |
@app.get("/clear")
|
78 |
def ping():
|
79 |
session_assistant.clear()
|
80 |
-
message = "All files have been cleared."
|
81 |
generator = re.split(r'(\s)', message)
|
82 |
return StreamingResponse(astreamer(generator), media_type='text/event-stream')
|
83 |
|
|
|
77 |
@app.get("/clear")
|
78 |
def ping():
|
79 |
session_assistant.clear()
|
80 |
+
message = "All files have been cleared. The first query may take a little longer."
|
81 |
generator = re.split(r'(\s)', message)
|
82 |
return StreamingResponse(astreamer(generator), media_type='text/event-stream')
|
83 |
|
app/rag.py
CHANGED
@@ -47,8 +47,10 @@ class ChatPDF:
|
|
47 |
model_url="https://huggingface.co/Qwen/Qwen2-0.5B-Instruct-GGUF/resolve/main/qwen2-0_5b-instruct-fp16.gguf",
|
48 |
temperature=0.1,
|
49 |
max_new_tokens=256,
|
|
|
|
|
50 |
context_window=3900, #32k
|
51 |
-
|
52 |
# model_kwargs={"n_gpu_layers": -1},
|
53 |
# messages_to_prompt=self.messages_to_prompt,
|
54 |
# completion_to_prompt=self.completion_to_prompt,
|
|
|
47 |
model_url="https://huggingface.co/Qwen/Qwen2-0.5B-Instruct-GGUF/resolve/main/qwen2-0_5b-instruct-fp16.gguf",
|
48 |
temperature=0.1,
|
49 |
max_new_tokens=256,
|
50 |
+
n_ctx=3900,
|
51 |
+
n_batch=256,
|
52 |
context_window=3900, #32k
|
53 |
+
generate_kwargs={"max_tokens": 256, "temperature": 0.1, "top_k": 3},
|
54 |
# model_kwargs={"n_gpu_layers": -1},
|
55 |
# messages_to_prompt=self.messages_to_prompt,
|
56 |
# completion_to_prompt=self.completion_to_prompt,
|