Spaces:

seanpedrickcase
/

Light-PDF-Web-QA-Chatbot

Running

App Files Files Community

Sean-Case commited on Oct 11, 2023

Commit

114048b

1 Parent(s): 9795699

gpu_layers should now update correctly. Added code for creating distribution.

Browse files

Files changed (5) hide show

.gitignore +3 -1
app.py +23 -9
bootstrapper.py +63 -0
chatfuncs/chatfuncs.py +26 -30
requirements.txt +0 -1

.gitignore CHANGED Viewed

@@ -1,3 +1,5 @@
 *.pyc
 *.ipynb
-*.pdf

 *.pyc
 *.ipynb
+*.pdf
+*/build
+*/dist

app.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # +
 import os
 # Need to overwrite version of gradio present in Huggingface spaces as it doesn't have like buttons/avatars (Oct 2023)
 #os.system("pip uninstall -y gradio")
@@ -69,18 +70,31 @@ import chatfuncs.chatfuncs as chatf
 chatf.embeddings = load_embeddings(embeddings_name)
 chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
-def load_model(model_type, gpu_layers, CtransInitConfig_gpu=chatf.CtransInitConfig_gpu, CtransInitConfig_cpu=chatf.CtransInitConfig_cpu, torch_device=chatf.torch_device):
-    print("Loading model")
     if model_type == "Orca Mini":
-        CtransInitConfig_gpu.gpu_layers = gpu_layers
-        CtransInitConfig_cpu.gpu_layers = gpu_layers
         try:
-            model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(CtransInitConfig_gpu()))
         except:
-            model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict(CtransInitConfig_cpu()))
         tokenizer = []
@@ -119,10 +133,10 @@ def load_model(model_type, gpu_layers, CtransInitConfig_gpu=chatf.CtransInitConf
 # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
 model_type = "Orca Mini"
-load_model(model_type, chatf.gpu_layers, chatf.CtransInitConfig_gpu, chatf.CtransInitConfig_cpu, chatf.torch_device)
 model_type = "Flan Alpaca"
-load_model(model_type, 0, chatf.CtransInitConfig_gpu, chatf.CtransInitConfig_cpu, chatf.torch_device)
 def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
@@ -207,7 +221,7 @@ with block:
     with gr.Tab("Advanced features"):
         model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
-        gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (please don't change if you don't know what you're doing).", value=0, minimum=0, maximum=6, step = 1)
     gr.HTML(
         "<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"

 # +
 import os
+import copy
 # Need to overwrite version of gradio present in Huggingface spaces as it doesn't have like buttons/avatars (Oct 2023)
 #os.system("pip uninstall -y gradio")
 chatf.embeddings = load_embeddings(embeddings_name)
 chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
+def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None):
+    print("Loading model")
+    # Default values inside the function
+    if gpu_config is None:
+        gpu_config = chatf.gpu_config
+    if cpu_config is None:
+        cpu_config = chatf.cpu_config
+    if torch_device is None:
+        torch_device = chatf.torch_device
     if model_type == "Orca Mini":
+        gpu_config.update_gpu(gpu_layers)
+        cpu_config.update_gpu(gpu_layers)
+        print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")
+        print(vars(gpu_config))
+        print(vars(cpu_config))
         try:
+            model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
         except:
+            model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) #**asdict(CtransRunConfig_gpu())
         tokenizer = []
 # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
 model_type = "Orca Mini"
+load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
 model_type = "Flan Alpaca"
+load_model(model_type, 0, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
 def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
     with gr.Tab("Advanced features"):
         model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
+        gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (please don't change if you don't know what you're doing).", value=0, minimum=0, maximum=6, step = 1, scale = 0)
     gr.HTML(
         "<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"

bootstrapper.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import sys
+import os
+import subprocess
+import logging
+# Set up logging
+logging.basicConfig(filename='bootstrapper.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
+ENV_DIR = "app_env"
+def create_virtual_env():
+    logging.info("Checking for virtual environment at {}".format(ENV_DIR))
+    if not os.path.exists(ENV_DIR):
+        logging.info("Virtual environment not found. Creating a new one.")
+        # Import virtualenv and create a new environment
+        import virtualenv
+        virtualenv.create_environment(ENV_DIR)
+def install_dependencies():
+    logging.info("Installing dependencies.")
+    # Ensure the requirements.txt file is bundled with your application
+    requirements_path = "requirements.txt"
+    # pip executable within the virtual environment
+    pip_path = os.path.join(ENV_DIR, 'Scripts', 'pip')
+    try:
+        subprocess.check_call([pip_path, "install", "-r", requirements_path])
+        logging.info("Dependencies installed successfully.")
+    except Exception as e:
+        logging.error("Error installing dependencies: {}".format(e))
+def main():
+    #try:
+    #	create_virtual_env()
+    #except Exception as e:
+    #	logging.error("An error occurred in the bootstrapper: {}".format(e), exc_info=True)
+    try:
+        import langchain
+    except ImportError:
+        logging.warning("Some dependencies are missing. Attempting to install.")
+        install_dependencies()
+    # Now you can run your main application logic.
+    # If it's in another file, you can use exec as shown before.
+    try:
+        with open('app.py', 'r') as file:
+            exec(file.read())
+        logging.info("Main application executed successfully.")
+    except Exception as e:
+        logging.error("Error executing main application: {}".format(e))
+if __name__ == "__main__":
+    logging.info("Bootstrapper started.")
+    try:
+        main()
+        logging.info("Bootstrapper finished.")
+    except Exception as e:
+        logging.error("An error occurred in the bootstrapper: {}".format(e))

chatfuncs/chatfuncs.py CHANGED Viewed

@@ -95,38 +95,34 @@ context_length:int = 4096
 sample = True
-@dataclass
 class CtransInitConfig_gpu:
-    temperature: float = temperature
-    top_k: int = top_k
-    top_p: float = top_p
-    repetition_penalty: float = repetition_penalty
-    last_n_tokens: int = last_n_tokens
-    max_new_tokens: int = max_new_tokens
-    seed: int = seed
-    reset: bool = reset
-    stream: bool = stream
-    threads: int = threads
-    batch_size:int = batch_size
-    context_length:int = context_length
-    gpu_layers:int = gpu_layers
-    #stop: list[str] = field(default_factory=lambda: [stop_string])
-class CtransInitConfig_cpu:
-    temperature: float = temperature
-    top_k: int = top_k
-    top_p: float = top_p
-    repetition_penalty: float = repetition_penalty
-    last_n_tokens: int = last_n_tokens
-    max_new_tokens: int = max_new_tokens
-    seed: int = seed
-    reset: bool = reset
-    stream: bool = stream
-    threads: int = threads
-    batch_size:int = batch_size
-    context_length:int = context_length
-    gpu_layers:int = 0
-    #stop: list[str] = field(default_factory=lambda: [stop_string])
 @dataclass
 class CtransGenGenerationConfig:

 sample = True
 class CtransInitConfig_gpu:
+    def __init__(self, temperature=0.1, top_k=3, top_p=1, repetition_penalty=1.05, last_n_tokens=64, max_new_tokens=125, seed=42, reset=False, stream=True, threads=None, batch_size=1024, context_length=4096, gpu_layers=None):
+        self.temperature = temperature
+        self.top_k = top_k
+        self.top_p = top_p
+        self.repetition_penalty = repetition_penalty
+        self.last_n_tokens = last_n_tokens
+        self.max_new_tokens = max_new_tokens
+        self.seed = seed
+        self.reset = reset
+        self.stream = stream
+        self.threads = threads
+        self.batch_size = batch_size
+        self.context_length = context_length
+        self.gpu_layers = gpu_layers
+        # self.stop: list[str] = field(default_factory=lambda: [stop_string])
+    def update_gpu(self, new_value):
+        self.gpu_layers = new_value
+class CtransInitConfig_cpu(CtransInitConfig_gpu):
+    def __init__(self):
+        super().__init__()
+        self.gpu_layers = 0
+gpu_config = CtransInitConfig_gpu()
+cpu_config = CtransInitConfig_cpu()
 @dataclass
 class CtransGenGenerationConfig:

requirements.txt CHANGED Viewed

@@ -6,7 +6,6 @@ transformers
 torch
 sentence_transformers
 faiss-cpu
-bitsandbytes
 pypdf
 python-docx
 ctransformers[cuda]

 torch
 sentence_transformers
 faiss-cpu
 pypdf
 python-docx
 ctransformers[cuda]