Spaces:
Runtime error
Runtime error
File size: 2,180 Bytes
0284d5c a023bcb 5b3ef21 1a4abae 5b3ef21 a023bcb 6cc4580 c11dc45 6cc4580 0284d5c a023bcb 0284d5c cc4e355 c11dc45 fae8ffe 0284d5c 8f263fc a023bcb 0284d5c d99a054 8f263fc a023bcb 6cc4580 a023bcb 8f263fc a023bcb 7bb3f3e a023bcb 8f263fc a023bcb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import gradio as gr
import os
from pathlib import Path
os.environ["CMAKE_ARGS"] = "-DLLAMA_CUBLAS=on"
os.system('CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python[server]')
import argparse
model_file = "yi-chat-6b.Q4_K_M.gguf"
if not os.path.isfile(model_file):
os.system("wget -c https://huggingface.co/XeIaso/yi-chat-6B-GGUF/resolve/main/yi-chat-6b.Q4_K_M.gguf")
DEFAULT_MODEL_PATH = model_file
from llama_cpp import Llama
llm = Llama(model_path=model_file, model_type="mistral")
llm._token_eos = 7
def predict(input, chatbot, max_length, top_p, temperature, history):
chatbot.append((input, ""))
response = ""
history.append(input)
for output in llm(input, stream=True, temperature=temperature, top_p=top_p, max_tokens=max_length, stop=["<|im_end|>"]):
piece = output['choices'][0]['text']
response += piece
chatbot[-1] = (chatbot[-1][0], response)
yield chatbot, history
history.append(response)
yield chatbot, history
def reset_user_input():
return gr.update(value="")
def reset_state():
return [], []
with gr.Blocks() as demo:
gr.HTML("""<h1 align="center">Yi-6B-Chat by llama-cpp-python</h1>""")
chatbot = gr.Chatbot()
with gr.Row():
with gr.Column(scale=4):
user_input = gr.Textbox(show_label=False, placeholder="Input...", lines=8)
submitBtn = gr.Button("Submit", variant="primary")
with gr.Column(scale=1):
max_length = gr.Slider(0, 32048, value=2048, step=1.0, label="Maximum Length", interactive=True)
top_p = gr.Slider(0, 1, value=0.7, step=0.01, label="Top P", interactive=True)
temperature = gr.Slider(0, 1, value=0.95, step=0.01, label="Temperature", interactive=True)
emptyBtn = gr.Button("Clear History")
history = gr.State([])
submitBtn.click(
predict, [user_input, chatbot, max_length, top_p, temperature, history], [chatbot, history], show_progress=True
)
submitBtn.click(reset_user_input, [], [user_input])
emptyBtn.click(reset_state, outputs=[chatbot, history], show_progress=True)
demo.queue().launch(share=False, inbrowser=True) |