test / setup_llama.py
Rejekts's picture
Update setup_llama.py
3185b19 verified
import os
import subprocess
import time
import requests
from pyngrok import ngrok
os.environ["OLLAMA_HOST"] = "0.0.0.0"
def run_command(command_list, check=True, use_shell=False):
"""Run a shell command using a list format, with optional shell support."""
try:
subprocess.run(command_list, check=check, shell=use_shell)
except subprocess.CalledProcessError as e:
print(f"Error running command: {' '.join(command_list)}\n{e}")
exit(1)
def download_model():
"""Download the Llama model from HuggingFace."""
model_url = "https://huggingface.co/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2-GGUF/resolve/main/Llama-3.1-8B-Lexi-Uncensored_V2_Q4.gguf"
print(f"Downloading model from {model_url}...")
run_command(["wget", "-O", "unllama.gguf", model_url])
def start_ollama_service():
"""Start the Ollama service and expose via ngrok."""
ngrok_token = os.getenv("NGROK_AUTHTOKEN")
if not ngrok_token:
print("NGROK_AUTHTOKEN is not set!")
exit(1)
ngrok.set_auth_token(ngrok_token)
print("Starting Ollama service...")
# Install Ollama with superuser privileges
run_command("curl -fsSL https://ollama.com/install.sh | sudo sh", use_shell=True)
# Start Ollama in the background
subprocess.Popen(["ollama", "serve"])
while True:
try:
response = requests.get("http://localhost:11434")
if response.status_code == 200:
print("Ollama service is up and running!")
public_url = ngrok.connect(11434)
print(f"Service is accessible at: {public_url}")
break
except requests.ConnectionError:
time.sleep(2)
def create_model():
"""Create a model using the downloaded GGUF file."""
run_command(["ollama", "create", "unllama", "-f", "unllama.gguf"])
print("Created unllama model")
if __name__ == "__main__":
start_ollama_service() # Ensure service is running before creating the model
download_model()
create_model()
print("Ollama service is running and accessible through ngrok.")