Spaces:
Running
Running
File size: 5,076 Bytes
9dbf344 ff32b4a 9dbf344 4cfed8e 9dbf344 4cfed8e 9dbf344 4cfed8e 9dbf344 ff32b4a 9dbf344 4cfed8e 9dbf344 82b1ab1 9dbf344 ff32b4a 9dbf344 4cfed8e ff32b4a 82b1ab1 4cfed8e 9dbf344 4cfed8e 9dbf344 4cfed8e 9dbf344 4cfed8e 82b1ab1 4cfed8e 9dbf344 4cfed8e 9dbf344 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
import os
#from ctransformers import AutoModelForCausalLM
#from transformers import AutoTokenizer, pipeline
from bertopic.representation import LlamaCPP
from llama_cpp import Llama
from pydantic import BaseModel
import torch.cuda
from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, TextGeneration
from funcs.prompts import capybara_prompt, capybara_start, open_hermes_prompt, open_hermes_start, stablelm_prompt, stablelm_start
chosen_prompt = open_hermes_prompt # stablelm_prompt
chosen_start_tag = open_hermes_start # stablelm_start
# Find model file
def find_model_file(hf_model_name, hf_model_file, search_folder):
hf_loc = search_folder #os.environ["HF_HOME"]
hf_sub_loc = search_folder + "/hub/" #os.environ["HF_HOME"]
hf_model_name_path = hf_sub_loc + 'models--' + hf_model_name.replace("/","--")
print(hf_model_name_path)
def find_file(root_folder, file_name):
for root, dirs, files in os.walk(root_folder):
if file_name in files:
return os.path.join(root, file_name)
return None
# Example usage
folder_path = hf_model_name_path # Replace with your folder path
file_to_find = hf_model_file # Replace with the file name you're looking for
found_file = find_file(folder_path, file_to_find) # os.environ["HF_HOME"]
if found_file:
print(f"File found: {found_file}")
return found_file
else:
error = "File not found."
print(error, " Downloading model from hub")
from huggingface_hub import hf_hub_download
hf_hub_download(repo_id=hf_model_name, filename='phi-2-orange.Q5_K_M.gguf')
found_file = find_file(folder_path, file_to_find)
return found_file
# Currently set n_gpu_layers to 0 even with cuda due to persistent bugs in implementation with cuda
if torch.cuda.is_available():
torch_device = "gpu"
low_resource_mode = "No"
n_gpu_layers = 100
else:
torch_device = "cpu"
low_resource_mode = "Yes"
n_gpu_layers = 0
low_resource_mode = "No" # Override for testing
#print("Running on device:", torch_device)
n_threads = torch.get_num_threads()
print("CPU n_threads:", n_threads)
# Default Model parameters
temperature: float = 0.1
top_k: int = 3
top_p: float = 1
repeat_penalty: float = 1.1
last_n_tokens_size: int = 128
max_tokens: int = 500
seed: int = 42
reset: bool = True
stream: bool = False
n_threads: int = n_threads
n_batch:int = 256
n_ctx:int = 4096
sample:bool = True
trust_remote_code:bool =True
class LLamacppInitConfigGpu(BaseModel):
last_n_tokens_size: int
seed: int
n_threads: int
n_batch: int
n_ctx: int
n_gpu_layers: int
temperature: float
top_k: int
top_p: float
repeat_penalty: float
max_tokens: int
reset: bool
stream: bool
stop: str
trust_remote_code:bool
def update_gpu(self, new_value: int):
self.n_gpu_layers = new_value
llm_config = LLamacppInitConfigGpu(last_n_tokens_size=last_n_tokens_size,
seed=seed,
n_threads=n_threads,
n_batch=n_batch,
n_ctx=n_ctx,
n_gpu_layers=n_gpu_layers,
temperature=temperature,
top_k=top_k,
top_p=top_p,
repeat_penalty=repeat_penalty,
max_tokens=max_tokens,
reset=reset,
stream=stream,
stop=chosen_start_tag,
trust_remote_code=trust_remote_code)
## Create representation model parameters ##
# KeyBERT
keybert = KeyBERTInspired()
def create_representation_model(create_llm_topic_labels, llm_config, hf_model_name, hf_model_file, chosen_start_tag):
if create_llm_topic_labels == "Yes":
# Use llama.cpp to load in model
# Check for HF_HOME environment variable and supply a default value if it's not found (current folder)
hf_home_value = os.getenv("HF_HOME", '.')
found_file = find_model_file(hf_model_name, hf_model_file, hf_home_value)
llm = Llama(model_path=found_file, stop=chosen_start_tag, n_gpu_layers=llm_config.n_gpu_layers, n_ctx=llm_config.n_ctx) #**llm_config.model_dump())#
#print(llm.n_gpu_layers)
llm_model = LlamaCPP(llm, prompt=chosen_prompt)#, **gen_config.model_dump())
# All representation models
representation_model = {
"KeyBERT": keybert,
"Mistral": llm_model
}
elif create_llm_topic_labels == "No":
representation_model = {"KeyBERT": keybert}
# Deprecated example using CTransformers. This package is not really used anymore
#model = AutoModelForCausalLM.from_pretrained('NousResearch/Nous-Capybara-7B-V1.9-GGUF', model_type='mistral', model_file='Capybara-7B-V1.9-Q5_K_M.gguf', hf=True, **vars(llm_config))
#tokenizer = AutoTokenizer.from_pretrained("NousResearch/Nous-Capybara-7B-V1.9")
#generator = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
# Text generation with Llama 2
#mistral_capybara = TextGeneration(generator, prompt=capybara_prompt)
#mistral_hermes = TextGeneration(generator, prompt=open_hermes_prompt)
return representation_model
|