alfraser commited on
Commit
aee0ded
·
1 Parent(s): bd6f44c

Added the ability with the HF models to pass the temperature through from the config to the model, and added a sample which is effectively deterministic.

Browse files
config/architectures.json CHANGED
@@ -7,6 +7,13 @@
7
  {"class": "HFLlamaHttpRequestor", "params": {"model": "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful agent.", "max_tokens": 2000}}
8
  ]
9
  },
 
 
 
 
 
 
 
10
  {
11
  "name": "RAG Architecture",
12
  "description": "An architecture which uses a raw baseline LLM for its core, but augments requests from the user with information which has been retrieved from a knowledge store where the organisational knowledge has previously been stored for this purpose.",
 
7
  {"class": "HFLlamaHttpRequestor", "params": {"model": "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful agent.", "max_tokens": 2000}}
8
  ]
9
  },
10
+ {
11
+ "name": "Determinstic LLM",
12
+ "description": "This is just a demonstration setup for configuration of the temperature setting. In this architecture setup, the temperature has been set to 0.001 which means the LLM component is in practical terms, not selecting probabilistically, rather deterministically. Therefore the same request should always result in the same response. In order to see this, copy your query, try it, then navigate away and back before trying it again - you should see the same response.",
13
+ "steps": [
14
+ {"class": "HFLlamaHttpRequestor", "params": {"model": "meta-llama/Llama-2-7b-chat-hf", "system_prompt": "You are a helpful agent.", "max_tokens": 2000, "temperature": 0.01}}
15
+ ]
16
+ },
17
  {
18
  "name": "RAG Architecture",
19
  "description": "An architecture which uses a raw baseline LLM for its core, but augments requests from the user with information which has been retrieved from a knowledge store where the organisational knowledge has previously been stored for this purpose.",
src/architectures.py CHANGED
@@ -321,11 +321,12 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
321
  """
322
  description = "Passes the request to a model hosted on hugging face hub"
323
 
324
- def __init__(self, model: str, system_prompt: str, max_tokens: int):
325
  self.model: str = model
326
  self.system_prompt: str = system_prompt
327
  self.max_tokens = max_tokens
328
  self.api_token = hf_api_token()
 
329
 
330
  def config_description(self) -> str:
331
  """
@@ -333,6 +334,7 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
333
  """
334
  desc = f"Model: {self.model}; "
335
  desc += f"Max tokens: {self.max_tokens}; "
 
336
  desc += f"System prompt: {self.system_prompt}"
337
  return desc
338
 
@@ -345,5 +347,5 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
345
  llm = HFLlamaChatModel.for_model(self.model)
346
  if llm is None:
347
  raise ValueError(f'No model {self.model} configured in the environment')
348
- response = llm(request.request, system_prompt=self.system_prompt, max_new_tokens=self.max_tokens)
349
  request.response = response
 
321
  """
322
  description = "Passes the request to a model hosted on hugging face hub"
323
 
324
+ def __init__(self, model: str, system_prompt: str, max_tokens: int, temperature: float = 1.0):
325
  self.model: str = model
326
  self.system_prompt: str = system_prompt
327
  self.max_tokens = max_tokens
328
  self.api_token = hf_api_token()
329
+ self.temperature = temperature
330
 
331
  def config_description(self) -> str:
332
  """
 
334
  """
335
  desc = f"Model: {self.model}; "
336
  desc += f"Max tokens: {self.max_tokens}; "
337
+ desc += f"Temperature: {self.temperature}; "
338
  desc += f"System prompt: {self.system_prompt}"
339
  return desc
340
 
 
347
  llm = HFLlamaChatModel.for_model(self.model)
348
  if llm is None:
349
  raise ValueError(f'No model {self.model} configured in the environment')
350
+ response = llm(request.request, system_prompt=self.system_prompt, max_new_tokens=self.max_tokens, temperature=self.temperature)
351
  request.response = response