import os import subprocess import time import requests from pyngrok import ngrok os.environ["OLLAMA_HOST"] = "0.0.0.0" def run_command(command_list, check=True, use_shell=False): """Run a shell command using a list format, with optional shell support.""" try: subprocess.run(command_list, check=check, shell=use_shell) except subprocess.CalledProcessError as e: print(f"Error running command: {' '.join(command_list)}\n{e}") exit(1) def download_model(): """Download the Llama model from HuggingFace.""" model_url = "https://huggingface.co/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2-GGUF/resolve/main/Llama-3.1-8B-Lexi-Uncensored_V2_Q4.gguf" print(f"Downloading model from {model_url}...") run_command(["wget", "-O", "unllama.gguf", model_url]) def start_ollama_service(): """Start the Ollama service and expose via ngrok.""" ngrok_token = os.getenv("NGROK_AUTHTOKEN") if not ngrok_token: print("NGROK_AUTHTOKEN is not set!") exit(1) ngrok.set_auth_token(ngrok_token) print("Starting Ollama service...") # Install Ollama with superuser privileges run_command("curl -fsSL https://ollama.com/install.sh | sudo sh", use_shell=True) # Start Ollama in the background subprocess.Popen(["ollama", "serve"]) while True: try: response = requests.get("http://localhost:11434") if response.status_code == 200: print("Ollama service is up and running!") public_url = ngrok.connect(11434) print(f"Service is accessible at: {public_url}") break except requests.ConnectionError: time.sleep(2) def create_model(): """Create a model using the downloaded GGUF file.""" run_command(["ollama", "create", "unllama", "-f", "unllama.gguf"]) print("Created unllama model") if __name__ == "__main__": start_ollama_service() # Ensure service is running before creating the model download_model() create_model() print("Ollama service is running and accessible through ngrok.")