Spaces:

alfraser
/

llm-arch

Runtime error

App Files Files Community

alfraser commited on Nov 28, 2023

Commit

54b3256

1 Parent(s): 326698c

Set up configuration for models on HF and an associated page on the application to allow end use test chat.

Browse files

Files changed (4) hide show

config/models.json +9 -0
pages/005_LLM_Models.py +58 -0
src/common.py +1 -0
src/models.py +64 -0

config/models.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "models": [
+    {
+      "name": "Llama2 Chat 7B",
+      "id": "meta-llama/Llama-2-7b-chat-hf",
+      "description": "The unmodified 7 billion parameter version of the llama 2 chat model from meta."
+    }
+  ]
+}

pages/005_LLM_Models.py ADDED Viewed

	@@ -0,0 +1,58 @@

+import streamlit as st
+from src.models import HFLlamaChatModel
+from src.st_helpers import st_setup
+if st_setup('LLM Models'):
+    st.write("# LLM Models")
+    st.write("The project uses a number of different models which are deployed with other components to form a variety of architectures.  This page lists those models, and allows users to interact in isolation just with the model directly, excluding any other architecture components.")
+    SESSION_KEY_CHAT_SERVER = 'chat_server'
+    HF_AUTH_KEY_SECRET = 'hf_token'
+    button_count = 0
+    def button_key() -> str:
+        global button_count
+        button_count += 1
+        return f"btn_{button_count}"
+    server_container = st.container()
+    chat_container = st.container()
+    with server_container:
+        server_count = len(HFLlamaChatModel.available_models())
+        if server_count == 1:
+            st.write(f'### 1 server configured')
+        else:
+            st.write(f'### {server_count} servers configured')
+        with st.container():
+            for i, m_name in enumerate(HFLlamaChatModel.available_models()):
+                with st.container():  # row
+                    content, actions = st.columns([4, 1])
+                    with content:
+                        st.write(m_name)
+                    with actions:
+                        if st.button("Chat with server", key=button_key()):
+                            st.session_state[SESSION_KEY_CHAT_SERVER] = m_name
+                            st.rerun()
+                if i != len(HFLlamaChatModel.available_models()) - 1:
+                    st.divider()
+    if SESSION_KEY_CHAT_SERVER in st.session_state:
+        with chat_container:
+            st.write(f"### Chatting with {st.session_state[SESSION_KEY_CHAT_SERVER]}")
+            st.write(
+                "Note this is a simple single prompt call back to the relevant chat server. This is just a toy so you can interact with it and does not manage a chat session history.")
+            with st.chat_message("assistant"):
+                st.write("Chat with me in the box below")
+            if prompt := st.chat_input("Ask a question"):
+                with chat_container:
+                    with st.chat_message("user"):
+                        st.write(prompt)
+                    chat_model = HFLlamaChatModel.get_model(st.session_state[SESSION_KEY_CHAT_SERVER])
+                    response = chat_model(prompt, st.secrets[HF_AUTH_KEY_SECRET])
+                    with st.chat_message("assistant"):
+                        st.write(response)

src/common.py CHANGED Viewed

	@@ -2,3 +2,4 @@ import os
2
3
4	data_dir = os.path.join(os.path.dirname(__file__), '..', 'data')


2
3
4	data_dir = os.path.join(os.path.dirname(__file__), '..', 'data')
5	+ config_dir = os.path.join(os.path.dirname(__file__), '..', 'config')

src/models.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import json
+import os
+import requests
+from typing import List
+from src.common import config_dir
+class HFLlamaChatModel:
+    models = None
+    @classmethod
+    def load_configs(cls):
+        config_file = os.path.join(config_dir, "models.json")
+        with open(config_file, "r") as f:
+            configs = json.load(f)['models']
+            cls.models = []
+            for cfg in configs:
+                if cls.get_model(cfg['name']) is None:
+                    cls.models.append(HFLlamaChatModel(cfg['name'], cfg['id'], cfg['description']))
+    @classmethod
+    def get_model(cls, model: str):
+        for m in cls.models:
+            if m.name == model:
+                return m
+    @classmethod
+    def available_models(cls) -> List[str]:
+        if cls.models is None:
+            cls.load_configs()
+        return [m.name for m in cls.models]
+    def __init__(self, name: str, id: str, description: str):
+        self.name = name
+        self.id = id
+        self.description = description
+    def __call__(self,
+                 query: str,
+                 auth_token: str,
+                 system_prompt: str = None,
+                 max_new_tokens: str = 256,
+                 temperature: float = 1.0):
+        headers = {"Authorization": f"Bearer {auth_token}"}
+        api_url = f"https://api-inference.huggingface.co/models/{self.id}"
+        if system_prompt is None:
+            system_prompt = "You are a helpful assistant."
+        query_input = f"[INST] <<SYS>> {system_prompt} <<SYS>> {query} [/INST] "
+        query_payload = {
+            "inputs": query_input,
+            "parameters": {"max_new_tokens": max_new_tokens, "temperature": temperature}
+        }
+        print(query_payload)
+        response = requests.post(api_url, headers=headers, json=query_payload)
+        if response.status_code == 200:
+            resp_json = json.loads(response.text)
+            llm_text = resp_json[0]['generated_text']
+            query_len = len(query_input)
+            llm_text = llm_text[query_len:].strip()
+            return llm_text
+        else:
+            error_detail = f"Error from hugging face code: {response.status_code}: {response.reason} ({response.content})"
+            raise ValueError(error_detail)