Spaces:

alfraser
/

llm-arch

Runtime error

App Files Files Community

alfraser commited on Mar 4, 2024

Commit

8f8b146

1 Parent(s): 63018b5

Reviewed comments and type hints

Browse files

Files changed (1) hide show

src/architectures.py +63 -18

src/architectures.py CHANGED Viewed

@@ -1,6 +1,8 @@
 """
 This file contains all the code which defines architectures and
-architecture components.
 """
 import chromadb
@@ -82,7 +84,7 @@ class ArchitectureTraceOutcome(Enum):
 class ArchitectureTraceStep:
     """
-    Class to hold the details of a single trace step
     """
     def __init__(self, name: str):
         self.name = name
@@ -165,6 +167,11 @@ class ArchitectureTrace:
 class ArchitectureComponent(ABC):
     description = "Components should override a description"
     @abstractmethod
@@ -188,6 +195,17 @@ class ArchitectureComponent(ABC):
 class LogWorker(Thread):
     instance = None
     architectures = None
     save_repo = None
@@ -207,6 +225,7 @@ class LogWorker(Thread):
         while True:
             arch_name, request, trace, trace_tags, trace_comment = LogWorker.queue.get()
             if request is None:
                 for func in LogWorker.timeout_functions:
                     print(f"LogWorker commit running {func.__name__}")
                     try:
@@ -215,6 +234,7 @@ class LogWorker(Thread):
                         print(f"Timeout func {func.__name__} had error {e}")
             else:
                 if LogWorker.commit_timer is not None and LogWorker.commit_timer.is_alive():
                     LogWorker.commit_timer.cancel()
                     LogWorker.commit_timer = None
                 try:
@@ -232,11 +252,16 @@ class LogWorker(Thread):
                 except Exception as err:
                     print(f"Request / trace save failed {err}")
                 LogWorker.commit_timer = Timer(LogWorker.commit_time, LogWorker.signal_commit)
                 LogWorker.commit_timer.start()
     @classmethod
-    def append_and_save_data_as_json(cls, data: Dict):
         print(f"LogWorker logging open record {LogWorker.commit_count + 1}")
         if cls.save_repo is None and not cls.save_repo_load_error:
             try:
@@ -255,6 +280,9 @@ class LogWorker(Thread):
     @classmethod
     def commit_repo(cls):
         if cls.commit_count > 0:
             print(f"LogWorker committing {LogWorker.commit_count} open records")
             cls.save_repo.push_to_hub()
@@ -270,7 +298,11 @@ class LogWorker(Thread):
     @classmethod
     def write(cls, arch_name: str, request: ArchitectureRequest, trace: ArchitectureTrace,
-              trace_tags: List[str] = None, trace_comment: str = None):
         trace_tags = [] if trace_tags is None else trace_tags
         trace_comment = "" if trace_comment is None else trace_comment
         cls.queue.put((arch_name, request, trace, trace_tags, trace_comment))
@@ -302,7 +334,10 @@ class Architecture:
     trace_file = os.path.join(trace_dir, trace_file_name)
     @classmethod
-    def wipe_trace(cls, hf_write_token:str = None):
         if os.path.exists(cls.trace_dir):
             shutil.rmtree(cls.trace_dir)
         try:
@@ -319,6 +354,9 @@ class Architecture:
     @classmethod
     def get_trace_records(cls) -> List[Dict]:
         if not os.path.isfile(cls.trace_file):
             hf_write_token = hf_api_token(write=True)
             try:
@@ -395,8 +433,8 @@ class Architecture:
         in sequence, allowing them to amend the request or early exit the processing. Also captures
         exceptions and generates the trace, plus saves the request/response and the trace to a store
         for analysis.
-        :param request:
-        :return:
         """
         print(f'{self.name} processing query "{request.request}"')
         trace = ArchitectureTrace()
@@ -420,6 +458,10 @@ class Architecture:
 class InputRequestScreener(ArchitectureComponent):
     description = "Simplistic input screener for demonstration. Screens inputs for profanity."
     def process_request(self, request: ArchitectureRequest) -> None:
@@ -430,6 +472,12 @@ class InputRequestScreener(ArchitectureComponent):
 class OutputResponseScreener(ArchitectureComponent):
     description = "Screens outputs for offensive responses."
     def __init__(self):
@@ -459,6 +507,11 @@ class OutputResponseScreener(ArchitectureComponent):
 class RetrievalAugmentor(ArchitectureComponent):
     description = "Retrieves appropriate documents from the store and then augments the request."
     def __init__(self, vector_store: str, doc_count: int = 5):
@@ -494,7 +547,7 @@ class RetrievalAugmentor(ArchitectureComponent):
 class HFInferenceEndpoint(ArchitectureComponent):
     """
-    A concrete pipeline component which sends the user text to a given llama chat based
     inference endpoint on HuggingFace
     """
     def __init__(self, endpoint_url: str, model_name: str, system_prompt: str, max_new_tokens: int,
@@ -522,7 +575,8 @@ class HFInferenceEndpoint(ArchitectureComponent):
         """
         Main processing method for this function. Calls the HTTP service for the model
         by port if provided or attempting to lookup by name, and then adds this to the
-        response element of the request.
         """
         headers = {
             "Accept": "application/json",
@@ -583,12 +637,3 @@ class ResponseTrimmer(ArchitectureComponent):
     def config_description(self) -> str:
         return f"Regexes: {self.regex_display}"
-if __name__ == "__main__":
-    req = ArchitectureRequest("Testing")
-    a = Architecture.get_architecture("1. Baseline LLM")
-    a(req)
-    print("Hold")

 """
 This file contains all the code which defines architectures and
+architecture components.  An architecture is modelled a pipeline of ArchitectureComponents
+through which an ArchitectureRequest flows.  Architectures are configured in the file
+config/architectures.json
 """
 import chromadb
 class ArchitectureTraceStep:
     """
+    Class to hold the trace details of a single step in an Architecture pipeline
     """
     def __init__(self, name: str):
         self.name = name
 class ArchitectureComponent(ABC):
+    """
+    This is the anbstract base class for all classes which want to be a concrete components available
+    to be configured into an Architecture pipeline.  Specifies the elements which need to be implemented
+    to be a compliant architecture component.
+    """
     description = "Components should override a description"
     @abstractmethod
 class LogWorker(Thread):
+    """
+    The LogWorker implements a daemon thread which runs in the background to write the results
+    of user queries through the system to a log file for analysis/reporting and offline saving.
+    The LogWorker provides two functions to the system.  1) it moves this I/O operation out of the
+    main architecture execution which allows for clearer understanding of the true performance of the
+    architectures themselves.  2) it is designed to be run as a single thread to provide controlled
+    shared access to a resource (the log file) with an in-memory queue for thread safety, which then
+    allows us to multi-thread the architecture invocation itself.  In addition to the LogWorker provides
+    some basic batching capabilities for performance (e.g. batches up N requests before committing the IO
+    operation to the file, or commits open activity after a set period of inactivity)
+    """
     instance = None
     architectures = None
     save_repo = None
         while True:
             arch_name, request, trace, trace_tags, trace_comment = LogWorker.queue.get()
             if request is None:
+                # There was a period of inactivity so run the timeout functions
                 for func in LogWorker.timeout_functions:
                     print(f"LogWorker commit running {func.__name__}")
                     try:
                         print(f"Timeout func {func.__name__} had error {e}")
             else:
                 if LogWorker.commit_timer is not None and LogWorker.commit_timer.is_alive():
+                    # Cancel the inactivity timer
                     LogWorker.commit_timer.cancel()
                     LogWorker.commit_timer = None
                 try:
                 except Exception as err:
                     print(f"Request / trace save failed {err}")
+                # Restart the inactivity timer
                 LogWorker.commit_timer = Timer(LogWorker.commit_time, LogWorker.signal_commit)
                 LogWorker.commit_timer.start()
     @classmethod
+    def append_and_save_data_as_json(cls, data: Dict) -> None:
+        """
+        If the working log file is not download, then get a local copy.
+        Add the new record to the local file.
+        """
         print(f"LogWorker logging open record {LogWorker.commit_count + 1}")
         if cls.save_repo is None and not cls.save_repo_load_error:
             try:
     @classmethod
     def commit_repo(cls):
+        """
+        If there are any changes in the local file which are not committed to the repo then commit them.
+        """
         if cls.commit_count > 0:
             print(f"LogWorker committing {LogWorker.commit_count} open records")
             cls.save_repo.push_to_hub()
     @classmethod
     def write(cls, arch_name: str, request: ArchitectureRequest, trace: ArchitectureTrace,
+              trace_tags: List[str] = None, trace_comment: str = None) -> None:
+        """
+        Class method callable from across the system to put a logging request onto the queue so that
+        the LogWorker will pick it up in turn and write it to the log
+        """
         trace_tags = [] if trace_tags is None else trace_tags
         trace_comment = "" if trace_comment is None else trace_comment
         cls.queue.put((arch_name, request, trace, trace_tags, trace_comment))
     trace_file = os.path.join(trace_dir, trace_file_name)
     @classmethod
+    def wipe_trace(cls, hf_write_token:str = None) -> None:
+        """
+        Wipes the json trace file - note will not delete any records which have been saved offline to the database
+        """
         if os.path.exists(cls.trace_dir):
             shutil.rmtree(cls.trace_dir)
         try:
     @classmethod
     def get_trace_records(cls) -> List[Dict]:
+        """
+        Loads and returns all the trace records which are held in the trace file
+        """
         if not os.path.isfile(cls.trace_file):
             hf_write_token = hf_api_token(write=True)
             try:
         in sequence, allowing them to amend the request or early exit the processing. Also captures
         exceptions and generates the trace, plus saves the request/response and the trace to a store
         for analysis.
+        :param request: The architecture request to pass down the pipeline
+        :return: The trace record for this invocation of the architecture
         """
         print(f'{self.name} processing query "{request.request}"')
         trace = ArchitectureTrace()
 class InputRequestScreener(ArchitectureComponent):
+    """
+    This is a concrete component which screens the input query for profanity using an off the shelf
+    profanity search library (better_profanity)
+    """
     description = "Simplistic input screener for demonstration. Screens inputs for profanity."
     def process_request(self, request: ArchitectureRequest) -> None:
 class OutputResponseScreener(ArchitectureComponent):
+    """
+    This is a concrete component designed to review the final response before showing it to the user.
+    It is a simple exemplar component using a call to the baseline LLM just with the response text and asking
+    the baseline LLM if it contains anything offensive. This is illustrative only and should not be considered
+    a best in class or production usable safety implementation.
+    """
     description = "Screens outputs for offensive responses."
     def __init__(self):
 class RetrievalAugmentor(ArchitectureComponent):
+    """
+    This is a concrete implementation of the RAG augmentation component of the RAG architecture.  Takes
+    the current input request, queries the vector store for documents and then appends these documents into
+    the beginning of the LLM prompt, ready for inference.
+    """
     description = "Retrieves appropriate documents from the store and then augments the request."
     def __init__(self, vector_store: str, doc_count: int = 5):
 class HFInferenceEndpoint(ArchitectureComponent):
     """
+    A concrete pipeline component which sends the current query to a given llama chat based
     inference endpoint on HuggingFace
     """
     def __init__(self, endpoint_url: str, model_name: str, system_prompt: str, max_new_tokens: int,
         """
         Main processing method for this function. Calls the HTTP service for the model
         by port if provided or attempting to lookup by name, and then adds this to the
+        response element of the request.  Support different prompt styles that were tested
+        during testing to determine the best way to get a good response from the various LLM endpoints.
         """
         headers = {
             "Accept": "application/json",
     def config_description(self) -> str:
         return f"Regexes: {self.regex_display}"