Spaces:
Runtime error
Runtime error
import gradio as gr | |
from huggingface_hub import hf_hub_download | |
from llama_cpp import Llama | |
import os | |
# Hugging Face Hub上のモデルを指定 | |
repo_id = "ineair/llm-jp-3-3.7b-instruct-EZO-Humanities-gguf" | |
filename = "llm-jp-3-3.7b-instruct-EZO-Humanities-f16.gguf" | |
# モデルをダウンロード(キャッシュされている場合はキャッシュを使用) | |
model_path = hf_hub_download(repo_id=repo_id, filename=filename) | |
CONTEXT_SIZE = 4096 | |
llm = Llama( | |
model_path=model_path, | |
n_threads=os.cpu_count(), | |
n_batch=32, | |
verbose=False, | |
n_ctx=CONTEXT_SIZE, | |
) | |
def get_llama_response(prompt): | |
return llm(prompt, max_tokens=2048, temperature=0.7, top_p=0.95, repeat_penalty=1.1, stream=True) | |
def greet(prompt, intensity): | |
full_response = "" | |
for output in get_llama_response(prompt): | |
if len(output['choices']) > 0: | |
text_chunk = output['choices'][0]['text'] | |
full_response += text_chunk | |
yield full_response | |
return full_response + "!" * int(intensity) | |
demo = gr.Interface( | |
title="Llama.cpp-python-sample (Streaming)", | |
description=f"MODEL: {filename} from {repo_id}", | |
fn=greet, | |
inputs=[ | |
gr.Textbox(label="Enter your prompt"), | |
gr.Slider(minimum=0, maximum=10, step=1, label="Intensity") | |
], | |
outputs=gr.Textbox(label="Generated Response"), | |
live=False | |
) | |
demo.queue() | |
demo.launch() |