|
import os |
|
import subprocess |
|
import time |
|
import requests |
|
from pyngrok import ngrok |
|
|
|
os.environ["OLLAMA_HOST"] = "0.0.0.0" |
|
|
|
def run_command(command_list, check=True, use_shell=False): |
|
"""Run a shell command using a list format, with optional shell support.""" |
|
try: |
|
subprocess.run(command_list, check=check, shell=use_shell) |
|
except subprocess.CalledProcessError as e: |
|
print(f"Error running command: {' '.join(command_list)}\n{e}") |
|
exit(1) |
|
|
|
def download_model(): |
|
"""Download the Llama model from HuggingFace.""" |
|
model_url = "https://huggingface.co/Orenguteng/Llama-3.1-8B-Lexi-Uncensored-V2-GGUF/resolve/main/Llama-3.1-8B-Lexi-Uncensored_V2_Q4.gguf" |
|
print(f"Downloading model from {model_url}...") |
|
run_command(["wget", "-O", "unllama.gguf", model_url]) |
|
|
|
def start_ollama_service(): |
|
"""Start the Ollama service and expose via ngrok.""" |
|
ngrok_token = os.getenv("NGROK_AUTHTOKEN") |
|
if not ngrok_token: |
|
print("NGROK_AUTHTOKEN is not set!") |
|
exit(1) |
|
|
|
ngrok.set_auth_token(ngrok_token) |
|
|
|
print("Starting Ollama service...") |
|
|
|
|
|
run_command("curl -fsSL https://ollama.com/install.sh | sudo sh", use_shell=True) |
|
|
|
|
|
subprocess.Popen(["ollama", "serve"]) |
|
|
|
while True: |
|
try: |
|
response = requests.get("http://localhost:11434") |
|
if response.status_code == 200: |
|
print("Ollama service is up and running!") |
|
public_url = ngrok.connect(11434) |
|
print(f"Service is accessible at: {public_url}") |
|
break |
|
except requests.ConnectionError: |
|
time.sleep(2) |
|
|
|
def create_model(): |
|
"""Create a model using the downloaded GGUF file.""" |
|
run_command(["ollama", "create", "unllama", "-f", "unllama.gguf"]) |
|
print("Created unllama model") |
|
|
|
if __name__ == "__main__": |
|
start_ollama_service() |
|
download_model() |
|
create_model() |
|
print("Ollama service is running and accessible through ngrok.") |