Spaces:

alfraser
/

llm-arch

Runtime error

alfraser commited on Nov 29, 2023

Commit

aee0ded

1 Parent(s): bd6f44c

Added the ability with the HF models to pass the temperature through from the config to the model, and added a sample which is effectively deterministic.

Files changed (2) hide show

config/architectures.json CHANGED Viewed

@@ -7,6 +7,13 @@
         {"class":  "HFLlamaHttpRequestor", "params":  {"model":  "meta-llama/Llama-2-7b-chat-hf", "system_prompt":  "You are a helpful agent.", "max_tokens":  2000}}
       ]
     },
     {
       "name": "RAG Architecture",
       "description": "An architecture which uses a raw baseline LLM for its core, but augments requests from the user with information which has been retrieved from a knowledge store where the organisational knowledge has previously been stored for this purpose.",

         {"class":  "HFLlamaHttpRequestor", "params":  {"model":  "meta-llama/Llama-2-7b-chat-hf", "system_prompt":  "You are a helpful agent.", "max_tokens":  2000}}
       ]
     },
+    {
+      "name": "Determinstic LLM",
+      "description": "This is just a demonstration setup for configuration of the temperature setting.  In this architecture setup, the temperature has been set to 0.001 which means the LLM component is in practical terms, not selecting probabilistically, rather deterministically.  Therefore the same request should always result in the same response. In order to see this, copy your query, try it, then navigate away and back before trying it again - you should see the same response.",
+      "steps": [
+        {"class":  "HFLlamaHttpRequestor", "params":  {"model":  "meta-llama/Llama-2-7b-chat-hf", "system_prompt":  "You are a helpful agent.", "max_tokens":  2000, "temperature":  0.01}}
+      ]
+    },
     {
       "name": "RAG Architecture",
       "description": "An architecture which uses a raw baseline LLM for its core, but augments requests from the user with information which has been retrieved from a knowledge store where the organisational knowledge has previously been stored for this purpose.",

src/architectures.py CHANGED Viewed

@@ -321,11 +321,12 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
     """
     description = "Passes the request to a model hosted on hugging face hub"
-    def __init__(self, model: str, system_prompt: str, max_tokens: int):
         self.model: str = model
         self.system_prompt: str = system_prompt
         self.max_tokens = max_tokens
         self.api_token = hf_api_token()
     def config_description(self) -> str:
         """
@@ -333,6 +334,7 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
         """
         desc = f"Model: {self.model};  "
         desc += f"Max tokens: {self.max_tokens};  "
         desc += f"System prompt: {self.system_prompt}"
         return desc
@@ -345,5 +347,5 @@ class HFLlamaHttpRequestor(ArchitectureComponent):
         llm = HFLlamaChatModel.for_model(self.model)
         if llm is None:
             raise ValueError(f'No model {self.model} configured in the environment')
-        response = llm(request.request, system_prompt=self.system_prompt, max_new_tokens=self.max_tokens)
         request.response = response

     """
     description = "Passes the request to a model hosted on hugging face hub"
+    def __init__(self, model: str, system_prompt: str, max_tokens: int, temperature: float = 1.0):
         self.model: str = model
         self.system_prompt: str = system_prompt
         self.max_tokens = max_tokens
         self.api_token = hf_api_token()
+        self.temperature = temperature
     def config_description(self) -> str:
         """
         """
         desc = f"Model: {self.model};  "
         desc += f"Max tokens: {self.max_tokens};  "
+        desc += f"Temperature: {self.temperature};  "
         desc += f"System prompt: {self.system_prompt}"
         return desc
         llm = HFLlamaChatModel.for_model(self.model)
         if llm is None:
             raise ValueError(f'No model {self.model} configured in the environment')
+        response = llm(request.request, system_prompt=self.system_prompt, max_new_tokens=self.max_tokens, temperature=self.temperature)
         request.response = response