|
|
|
|
|
|
|
|
|
import os
|
|
import logging
|
|
from typing import Tuple, Optional
|
|
import gradio as gr
|
|
|
|
|
|
from App_Function_Libraries.Local_LLM.Local_LLM_Inference_Engine_Lib import (
|
|
download_llm_model,
|
|
llm_models,
|
|
start_llamafile,
|
|
get_gguf_llamafile_files
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
def create_chat_with_llamafile_tab():
|
|
|
|
def on_local_model_change(selected_model: str, search_directory: str) -> str:
|
|
if selected_model and isinstance(search_directory, str):
|
|
model_path = os.path.abspath(os.path.join(search_directory, selected_model))
|
|
logging.debug(f"Selected model path: {model_path}")
|
|
return model_path
|
|
return "Invalid selection or directory."
|
|
|
|
|
|
def update_dropdowns(search_directory: str) -> Tuple[dict, str]:
|
|
logging.debug(f"User-entered directory: {search_directory}")
|
|
if not os.path.isdir(search_directory):
|
|
logging.debug(f"Directory does not exist: {search_directory}")
|
|
return gr.update(choices=[], value=None), "Directory does not exist."
|
|
|
|
logging.debug(f"Directory exists: {search_directory}, scanning for files...")
|
|
model_files = get_gguf_llamafile_files(search_directory)
|
|
|
|
if not model_files:
|
|
logging.debug(f"No model files found in {search_directory}")
|
|
return gr.update(choices=[], value=None), "No model files found in the specified directory."
|
|
|
|
|
|
logging.debug(f"Models loaded from {search_directory}: {model_files}")
|
|
return gr.update(choices=model_files, value=None), f"Models loaded from {search_directory}."
|
|
|
|
|
|
|
|
def download_preset_model(selected_model: str) -> Tuple[str, str]:
|
|
"""
|
|
Downloads the selected preset model.
|
|
|
|
Args:
|
|
selected_model (str): The key of the selected preset model.
|
|
|
|
Returns:
|
|
Tuple[str, str]: Status message and the path to the downloaded model.
|
|
"""
|
|
model_info = llm_models.get(selected_model)
|
|
if not model_info:
|
|
return "Invalid model selection.", ""
|
|
|
|
try:
|
|
model_path = download_llm_model(
|
|
model_name=model_info["name"],
|
|
model_url=model_info["url"],
|
|
model_filename=model_info["filename"],
|
|
model_hash=model_info["hash"]
|
|
)
|
|
return f"Model '{model_info['name']}' downloaded successfully.", model_path
|
|
except Exception as e:
|
|
logging.error(f"Error downloading model: {e}")
|
|
return f"Failed to download model: {e}", ""
|
|
|
|
with gr.TabItem("Local LLM with Llamafile", visible=True):
|
|
gr.Markdown("# Settings for Llamafile")
|
|
|
|
with gr.Row():
|
|
with gr.Column():
|
|
am_noob = gr.Checkbox(label="Enable Sane Defaults", value=False, visible=True)
|
|
advanced_mode_toggle = gr.Checkbox(label="Advanced Mode - Show All Settings", value=False)
|
|
|
|
verbose_checked = gr.Checkbox(label="Enable Verbose Output", value=False, visible=False)
|
|
threads_checked = gr.Checkbox(label="Set CPU Threads", value=False, visible=False)
|
|
threads_value = gr.Number(label="Number of CPU Threads", value=None, precision=0, visible=False)
|
|
threads_batched_checked = gr.Checkbox(label="Enable Batched Inference", value=False, visible=False)
|
|
threads_batched_value = gr.Number(label="Batch Size for Inference", value=None, precision=0, visible=False)
|
|
model_alias_checked = gr.Checkbox(label="Set Model Alias", value=False, visible=False)
|
|
model_alias_value = gr.Textbox(label="Model Alias", value="", visible=False)
|
|
ctx_size_checked = gr.Checkbox(label="Set Prompt Context Size", value=False, visible=False)
|
|
ctx_size_value = gr.Number(label="Prompt Context Size", value=8124, precision=0, visible=False)
|
|
ngl_checked = gr.Checkbox(label="Enable GPU Layers", value=False, visible=True)
|
|
ngl_value = gr.Number(label="Number of GPU Layers", value=None, precision=0, visible=True)
|
|
batch_size_checked = gr.Checkbox(label="Set Batch Size", value=False, visible=False)
|
|
batch_size_value = gr.Number(label="Batch Size", value=512, visible=False)
|
|
memory_f32_checked = gr.Checkbox(label="Use 32-bit Floating Point", value=False, visible=False)
|
|
numa_checked = gr.Checkbox(label="Enable NUMA", value=False, visible=False)
|
|
server_timeout_value = gr.Number(label="Server Timeout", value=600, precision=0, visible=False)
|
|
host_checked = gr.Checkbox(label="Set IP to Listen On", value=False, visible=False)
|
|
host_value = gr.Textbox(label="Host IP Address", value="", visible=False)
|
|
port_checked = gr.Checkbox(label="Set Server Port", value=False, visible=False)
|
|
port_value = gr.Number(label="Port Number", value=8080, precision=0, visible=False)
|
|
api_key_checked = gr.Checkbox(label="Set API Key", value=False, visible=False)
|
|
api_key_value = gr.Textbox(label="API Key", value="", visible=False)
|
|
http_threads_checked = gr.Checkbox(label="Set HTTP Server Threads", value=False, visible=False)
|
|
http_threads_value = gr.Number(label="Number of HTTP Server Threads", value=None, precision=0, visible=False)
|
|
hf_repo_checked = gr.Checkbox(label="Use Huggingface Repo Model", value=False, visible=False)
|
|
hf_repo_value = gr.Textbox(label="Huggingface Repo Name", value="", visible=False)
|
|
hf_file_checked = gr.Checkbox(label="Set Huggingface Model File", value=False, visible=False)
|
|
hf_file_value = gr.Textbox(label="Huggingface Model File", value="", visible=False)
|
|
|
|
with gr.Column():
|
|
|
|
gr.Markdown("## Model Selection")
|
|
|
|
|
|
with gr.Row():
|
|
search_directory = gr.Textbox(label="Model Directory",
|
|
placeholder="Enter directory path(currently '.\Models')",
|
|
value=".\Models",
|
|
interactive=True)
|
|
|
|
|
|
initial_dropdown_update, _ = update_dropdowns(".\Models")
|
|
refresh_button = gr.Button("Refresh Models")
|
|
local_model_dropdown = gr.Dropdown(label="Select Model from Directory", choices=[])
|
|
|
|
model_value = gr.Textbox(label="Selected Model File Path", value="", interactive=False)
|
|
|
|
|
|
gr.Markdown("## Download Preset Models")
|
|
|
|
preset_model_dropdown = gr.Dropdown(
|
|
label="Select a Preset Model",
|
|
choices=list(llm_models.keys()),
|
|
value=None,
|
|
interactive=True,
|
|
info="Choose a preset model to download."
|
|
)
|
|
download_preset_button = gr.Button("Download Selected Preset")
|
|
|
|
with gr.Row():
|
|
with gr.Column():
|
|
start_button = gr.Button("Start Llamafile")
|
|
stop_button = gr.Button("Stop Llamafile (doesn't work)")
|
|
output_display = gr.Markdown()
|
|
|
|
|
|
|
|
def update_visibility(show_advanced: bool):
|
|
components = [
|
|
verbose_checked, threads_checked, threads_value,
|
|
http_threads_checked, http_threads_value,
|
|
hf_repo_checked, hf_repo_value,
|
|
hf_file_checked, hf_file_value,
|
|
ctx_size_checked, ctx_size_value,
|
|
ngl_checked, ngl_value,
|
|
host_checked, host_value,
|
|
port_checked, port_value
|
|
]
|
|
return [gr.update(visible=show_advanced) for _ in components]
|
|
|
|
def on_start_button_click(
|
|
am_noob: bool,
|
|
verbose_checked: bool,
|
|
threads_checked: bool,
|
|
threads_value: Optional[int],
|
|
threads_batched_checked: bool,
|
|
threads_batched_value: Optional[int],
|
|
model_alias_checked: bool,
|
|
model_alias_value: str,
|
|
http_threads_checked: bool,
|
|
http_threads_value: Optional[int],
|
|
model_value: str,
|
|
hf_repo_checked: bool,
|
|
hf_repo_value: str,
|
|
hf_file_checked: bool,
|
|
hf_file_value: str,
|
|
ctx_size_checked: bool,
|
|
ctx_size_value: Optional[int],
|
|
ngl_checked: bool,
|
|
ngl_value: Optional[int],
|
|
batch_size_checked: bool,
|
|
batch_size_value: Optional[int],
|
|
memory_f32_checked: bool,
|
|
numa_checked: bool,
|
|
server_timeout_value: Optional[int],
|
|
host_checked: bool,
|
|
host_value: str,
|
|
port_checked: bool,
|
|
port_value: Optional[int],
|
|
api_key_checked: bool,
|
|
api_key_value: str
|
|
) -> str:
|
|
"""
|
|
Event handler for the Start Llamafile button.
|
|
"""
|
|
try:
|
|
result = start_llamafile(
|
|
am_noob,
|
|
verbose_checked,
|
|
threads_checked,
|
|
threads_value,
|
|
threads_batched_checked,
|
|
threads_batched_value,
|
|
model_alias_checked,
|
|
model_alias_value,
|
|
http_threads_checked,
|
|
http_threads_value,
|
|
model_value,
|
|
hf_repo_checked,
|
|
hf_repo_value,
|
|
hf_file_checked,
|
|
hf_file_value,
|
|
ctx_size_checked,
|
|
ctx_size_value,
|
|
ngl_checked,
|
|
ngl_value,
|
|
batch_size_checked,
|
|
batch_size_value,
|
|
memory_f32_checked,
|
|
numa_checked,
|
|
server_timeout_value,
|
|
host_checked,
|
|
host_value,
|
|
port_checked,
|
|
port_value,
|
|
api_key_checked,
|
|
api_key_value
|
|
)
|
|
return result
|
|
except Exception as e:
|
|
logging.error(f"Error starting Llamafile: {e}")
|
|
return f"Failed to start Llamafile: {e}"
|
|
|
|
advanced_mode_toggle.change(
|
|
fn=update_visibility,
|
|
inputs=[advanced_mode_toggle],
|
|
outputs=[
|
|
verbose_checked, threads_checked, threads_value,
|
|
http_threads_checked, http_threads_value,
|
|
hf_repo_checked, hf_repo_value,
|
|
hf_file_checked, hf_file_value,
|
|
ctx_size_checked, ctx_size_value,
|
|
ngl_checked, ngl_value,
|
|
host_checked, host_value,
|
|
port_checked, port_value
|
|
]
|
|
)
|
|
|
|
start_button.click(
|
|
fn=on_start_button_click,
|
|
inputs=[
|
|
am_noob,
|
|
verbose_checked,
|
|
threads_checked,
|
|
threads_value,
|
|
threads_batched_checked,
|
|
threads_batched_value,
|
|
model_alias_checked,
|
|
model_alias_value,
|
|
http_threads_checked,
|
|
http_threads_value,
|
|
model_value,
|
|
hf_repo_checked,
|
|
hf_repo_value,
|
|
hf_file_checked,
|
|
hf_file_value,
|
|
ctx_size_checked,
|
|
ctx_size_value,
|
|
ngl_checked,
|
|
ngl_value,
|
|
batch_size_checked,
|
|
batch_size_value,
|
|
memory_f32_checked,
|
|
numa_checked,
|
|
server_timeout_value,
|
|
host_checked,
|
|
host_value,
|
|
port_checked,
|
|
port_value,
|
|
api_key_checked,
|
|
api_key_value
|
|
],
|
|
outputs=output_display
|
|
)
|
|
|
|
download_preset_button.click(
|
|
fn=download_preset_model,
|
|
inputs=[preset_model_dropdown],
|
|
outputs=[output_display, model_value]
|
|
)
|
|
|
|
|
|
refresh_button.click(
|
|
fn=update_dropdowns,
|
|
inputs=[search_directory],
|
|
outputs=[local_model_dropdown, output_display]
|
|
)
|
|
|
|
|
|
local_model_dropdown.change(
|
|
fn=on_local_model_change,
|
|
inputs=[local_model_dropdown, search_directory],
|
|
outputs=[model_value]
|
|
)
|
|
|
|
|
|
|
|
|