alfraser commited on
Commit
227dc25
·
1 Parent(s): 8c04dd2

Removed now unused code which calls huggin face via the free HTTP interface - moved everything to endpoints to be equivalent.

Browse files
config/models.json DELETED
@@ -1,14 +0,0 @@
1
- {
2
- "models": [
3
- {
4
- "name": "Llama2 Chat 7B",
5
- "id": "meta-llama/Llama-2-7b-chat-hf",
6
- "description": "The unmodified 7 billion parameter version of the llama 2 chat model from meta."
7
- },
8
- {
9
- "name": "Llama2 Chat 13B",
10
- "id": "meta-llama/Llama-2-13b-chat-hf",
11
- "description": "The unmodified 13 billion parameter version of the llama 2 chat model from meta."
12
- }
13
- ]
14
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pages/005_LLM_Models.py DELETED
@@ -1,61 +0,0 @@
1
- import streamlit as st
2
-
3
- from src.models import HFLlamaChatModel
4
- from src.st_helpers import st_setup
5
-
6
- if st_setup('LLM Models'):
7
- st.write("# LLM Models")
8
- st.write("The project uses a number of different models which are deployed with other components to form a variety of architectures. This page lists those models, and allows users to interact in isolation just with the model directly, excluding any other architecture components.")
9
-
10
- if st.button('Force reload of models config'):
11
- HFLlamaChatModel.load_configs()
12
-
13
- SESSION_KEY_CHAT_SERVER = 'chat_server'
14
- button_count = 0
15
-
16
-
17
- def button_key() -> str:
18
- global button_count
19
- button_count += 1
20
- return f"btn_{button_count}"
21
-
22
- server_container = st.container()
23
- chat_container = st.container()
24
-
25
- with server_container:
26
- server_count = len(HFLlamaChatModel.available_models())
27
- if server_count == 1:
28
- st.write(f'### 1 model configured')
29
- else:
30
- st.write(f'### {server_count} models configured')
31
-
32
- with st.container():
33
- st.divider()
34
- for i, m in enumerate(HFLlamaChatModel.models):
35
- with st.container(): # row
36
- content, actions = st.columns([4, 1])
37
- with content:
38
- st.write(f'**{m.name}** \n\n _{m.description}_')
39
-
40
- with actions:
41
- if st.button("Chat with this model", key=button_key()):
42
- st.session_state[SESSION_KEY_CHAT_SERVER] = m.name
43
- st.rerun()
44
-
45
- st.divider()
46
-
47
- if SESSION_KEY_CHAT_SERVER in st.session_state:
48
- with chat_container:
49
- st.write(f"### Chatting with {st.session_state[SESSION_KEY_CHAT_SERVER]}")
50
- st.write(
51
- "Note this is a simple single prompt call back to the relevant chat server. This is just a toy so you can interact with it and does not manage a chat session history.")
52
- with st.chat_message("assistant"):
53
- st.write("Chat with me in the box below")
54
- if prompt := st.chat_input("Ask a question"):
55
- with chat_container:
56
- with st.chat_message("user"):
57
- st.write(prompt)
58
- chat_model = HFLlamaChatModel.for_name(st.session_state[SESSION_KEY_CHAT_SERVER])
59
- response = chat_model(prompt)
60
- with st.chat_message("assistant"):
61
- st.write(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/architectures.py CHANGED
@@ -17,7 +17,6 @@ from typing import List, Optional
17
  from better_profanity import profanity
18
 
19
  from src.common import config_dir, data_dir, hf_api_token, escape_dollars
20
- from src.models import HFLlamaChatModel
21
 
22
 
23
  class ArchitectureRequest:
@@ -343,43 +342,6 @@ class RetrievalAugmentor(ArchitectureComponent):
343
  return desc
344
 
345
 
346
- class HFLlamaHttpRequestor(ArchitectureComponent):
347
- """
348
- A concrete pipeline component which sends the user text to a given llama chat based
349
- model on hugging face.
350
- """
351
- description = "Passes the request to a model hosted on hugging face hub"
352
-
353
- def __init__(self, model: str, system_prompt: str, max_tokens: int, temperature: float = 1.0):
354
- self.model: str = model
355
- self.system_prompt: str = system_prompt
356
- self.max_tokens = max_tokens
357
- self.api_token = hf_api_token()
358
- self.temperature = temperature
359
-
360
- def config_description(self) -> str:
361
- """
362
- Custom config details as markdown
363
- """
364
- desc = f"Model: {self.model}; "
365
- desc += f"Max tokens: {self.max_tokens}; "
366
- desc += f"Temperature: {self.temperature}; "
367
- desc += f"System prompt: {self.system_prompt}"
368
- return desc
369
-
370
- def process_request(self, request: ArchitectureRequest) -> None:
371
- """
372
- Main processing method for this function. Calls the HTTP service for the model
373
- by port if provided or attempting to lookup by name, and then adds this to the
374
- response element of the request.
375
- """
376
- llm = HFLlamaChatModel.for_model(self.model)
377
- if llm is None:
378
- raise ValueError(f'No model {self.model} configured in the environment')
379
- response = llm(request.request, system_prompt=self.system_prompt, max_new_tokens=self.max_tokens, temperature=self.temperature)
380
- request.response = response
381
-
382
-
383
  class HFInferenceEndpoint(ArchitectureComponent):
384
  """
385
  A concrete pipeline component which sends the user text to a given llama chat based
 
17
  from better_profanity import profanity
18
 
19
  from src.common import config_dir, data_dir, hf_api_token, escape_dollars
 
20
 
21
 
22
  class ArchitectureRequest:
 
342
  return desc
343
 
344
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
345
  class HFInferenceEndpoint(ArchitectureComponent):
346
  """
347
  A concrete pipeline component which sends the user text to a given llama chat based
src/models.py DELETED
@@ -1,73 +0,0 @@
1
- import json
2
- import os
3
- import requests
4
- from typing import List
5
-
6
- from src.common import config_dir, hf_api_token
7
-
8
-
9
- class HFLlamaChatModel:
10
- models = None
11
-
12
- @classmethod
13
- def load_configs(cls):
14
- config_file = os.path.join(config_dir, "models.json")
15
- with open(config_file, "r") as f:
16
- configs = json.load(f)['models']
17
- cls.models = []
18
- for cfg in configs:
19
- if cls.for_name(cfg['name']) is None:
20
- cls.models.append(HFLlamaChatModel(cfg['name'], cfg['id'], cfg['description']))
21
-
22
- @classmethod
23
- def for_name(cls, name: str):
24
- if cls.models is None:
25
- cls.load_configs()
26
- for m in cls.models:
27
- if m.name == name:
28
- return m
29
-
30
- @classmethod
31
- def for_model(cls, model: str):
32
- if cls.models is None:
33
- cls.load_configs()
34
- for m in cls.models:
35
- if m.id == model:
36
- return m
37
-
38
- @classmethod
39
- def available_models(cls) -> List[str]:
40
- if cls.models is None:
41
- cls.load_configs()
42
- return [m.name for m in cls.models]
43
-
44
- def __init__(self, name: str, id: str, description: str):
45
- self.name = name
46
- self.id = id
47
- self.description = description
48
-
49
- def __call__(self,
50
- query: str,
51
- auth_token: str = None,
52
- system_prompt: str = None,
53
- max_new_tokens: str = 256,
54
- temperature: float = 1.0):
55
- if auth_token is None:
56
- auth_token = hf_api_token() # Attempt look up if not provided
57
- headers = {"Authorization": f"Bearer {auth_token}"}
58
- api_url = f"https://api-inference.huggingface.co/models/{self.id}"
59
- if system_prompt is None:
60
- system_prompt = "You are a helpful assistant."
61
- query_input = f"[INST] <<SYS>> {system_prompt} <<SYS>> {query} [/INST] "
62
- query_payload = {
63
- "inputs": query_input,
64
- "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}
65
- }
66
- response = requests.post(api_url, headers=headers, json=query_payload)
67
- if response.status_code == 200:
68
- resp_json = json.loads(response.text)
69
- llm_text = resp_json[0]['generated_text'].strip()
70
- return llm_text
71
- else:
72
- error_detail = f"Error from hugging face code: {response.status_code}: {response.reason} ({response.content})"
73
- raise ValueError(error_detail)