Spaces:
Runtime error
Runtime error
Added the ability with the HF models to pass the temperature through from the config to the model, and added a sample which is effectively deterministic.
Browse files- config/architectures.json +7 -0
- src/architectures.py +4 -2
config/architectures.json
CHANGED
@@ -7,6 +7,13 @@
|
|
7 |
{"class": "HFLlamaHttpRequestor", "params": {"model": "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful agent.", "max_tokens": 2000}}
|
8 |
]
|
9 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
{
|
11 |
"name": "RAG Architecture",
|
12 |
"description": "An architecture which uses a raw baseline LLM for its core, but augments requests from the user with information which has been retrieved from a knowledge store where the organisational knowledge has previously been stored for this purpose.",
|
|
|
7 |
{"class": "HFLlamaHttpRequestor", "params": {"model": "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful agent.", "max_tokens": 2000}}
|
8 |
]
|
9 |
},
|
10 |
+
{
|
11 |
+
"name": "Determinstic LLM",
|
12 |
+
"description": "This is just a demonstration setup for configuration of the temperature setting. In this architecture setup, the temperature has been set to 0.001 which means the LLM component is in practical terms, not selecting probabilistically, rather deterministically. Therefore the same request should always result in the same response. In order to see this, copy your query, try it, then navigate away and back before trying it again - you should see the same response.",
|
13 |
+
"steps": [
|
14 |
+
{"class": "HFLlamaHttpRequestor", "params": {"model": "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful agent.", "max_tokens": 2000, "temperature": 0.01}}
|
15 |
+
]
|
16 |
+
},
|
17 |
{
|
18 |
"name": "RAG Architecture",
|
19 |
"description": "An architecture which uses a raw baseline LLM for its core, but augments requests from the user with information which has been retrieved from a knowledge store where the organisational knowledge has previously been stored for this purpose.",
|
src/architectures.py
CHANGED
@@ -321,11 +321,12 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
|
|
321 |
"""
|
322 |
description = "Passes the request to a model hosted on hugging face hub"
|
323 |
|
324 |
-
def __init__(self, model: str, system_prompt: str, max_tokens: int):
|
325 |
self.model: str = model
|
326 |
self.system_prompt: str = system_prompt
|
327 |
self.max_tokens = max_tokens
|
328 |
self.api_token = hf_api_token()
|
|
|
329 |
|
330 |
def config_description(self) -> str:
|
331 |
"""
|
@@ -333,6 +334,7 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
|
|
333 |
"""
|
334 |
desc = f"Model: {self.model}; "
|
335 |
desc += f"Max tokens: {self.max_tokens}; "
|
|
|
336 |
desc += f"System prompt: {self.system_prompt}"
|
337 |
return desc
|
338 |
|
@@ -345,5 +347,5 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
|
|
345 |
llm = HFLlamaChatModel.for_model(self.model)
|
346 |
if llm is None:
|
347 |
raise ValueError(f'No model {self.model} configured in the environment')
|
348 |
-
response = llm(request.request, system_prompt=self.system_prompt, max_new_tokens=self.max_tokens)
|
349 |
request.response = response
|
|
|
321 |
"""
|
322 |
description = "Passes the request to a model hosted on hugging face hub"
|
323 |
|
324 |
+
def __init__(self, model: str, system_prompt: str, max_tokens: int, temperature: float = 1.0):
|
325 |
self.model: str = model
|
326 |
self.system_prompt: str = system_prompt
|
327 |
self.max_tokens = max_tokens
|
328 |
self.api_token = hf_api_token()
|
329 |
+
self.temperature = temperature
|
330 |
|
331 |
def config_description(self) -> str:
|
332 |
"""
|
|
|
334 |
"""
|
335 |
desc = f"Model: {self.model}; "
|
336 |
desc += f"Max tokens: {self.max_tokens}; "
|
337 |
+
desc += f"Temperature: {self.temperature}; "
|
338 |
desc += f"System prompt: {self.system_prompt}"
|
339 |
return desc
|
340 |
|
|
|
347 |
llm = HFLlamaChatModel.for_model(self.model)
|
348 |
if llm is None:
|
349 |
raise ValueError(f'No model {self.model} configured in the environment')
|
350 |
+
response = llm(request.request, system_prompt=self.system_prompt, max_new_tokens=self.max_tokens, temperature=self.temperature)
|
351 |
request.response = response
|