Generate with CLI
mlx_lm.generate --model mlx-community/Qwen2.5-7B-Instruct-kowiki-qa-4bit --prompt "νλμ΄ νλ μ΄μ κ° λμΌ?"
In Python
from mlx_lm import load, generate
model, tokenizer = load(
"mlx-community/Qwen2.5-7B-Instruct-kowiki-qa-4bit",
tokenizer_config={"trust_remote_code": True},
)
prompt = "νλμ΄ νλ μ΄μ κ° λμΌ?"
messages = [
{"role": "system", "content": "λΉμ μ μΉμ² ν μ±λ΄μ
λλ€."},
{"role": "user", "content": prompt},
]
prompt = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
)
text = generate(
model,
tokenizer,
prompt=prompt,
)
OpenAI Compitable HTTP Server
mlx_lm.server --model mlx-community/Qwen2.5-7B-Instruct-kowiki-qa-4bit --host 0.0.0.0
import openai
client = openai.OpenAI(
base_url="http://localhost:8080/v1",
)
prompt = "νλμ΄ νλ μ΄μ κ° λμΌ?"
messages = [
{"role": "system", "content": "λΉμ μ μΉμ ν μ±λ΄μ
λλ€.",},
{"role": "user", "content": prompt},
]
res = client.chat.completions.create(
model='mlx-community/Qwen2.5-7B-Instruct-kowiki-qa-4bit',
messages=messages,
temperature=0.2,
)
print(res.choices[0].message.content)