Spaces:
Sleeping
Sleeping
File size: 3,949 Bytes
9c880cb 5bdf9aa b4dff1d 5bdf9aa 9c880cb b4dff1d 6c72519 a5db718 b4dff1d a5db718 b4dff1d 6c72519 b4dff1d a5db718 1e06dbb b4dff1d 5bdf9aa 6c72519 5bdf9aa b4dff1d a5db718 5bdf9aa b4dff1d 9c880cb 1e06dbb 6c9ad89 1e06dbb 6c72519 93b41fc 6c72519 93b41fc b4dff1d 93b41fc 6c72519 cbafc8c 6c72519 c174edf b4dff1d c174edf 9c880cb 5bdf9aa b4dff1d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
from huggingface_hub import InferenceClient
import os
from threading import Event
hf_token = os.getenv("HF_TOKEN")
stop_event = Event()
def get_model_response(client, messages, max_tokens, temperature, top_p):
try:
response = client.chat_completion(
messages,
max_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
stream=True
)
full_response = ""
for message in response:
if stop_event.is_set():
break
token = message.choices[0].delta.content if hasattr(message.choices[0], 'delta') else message.choices[0].text
if token:
full_response += token
yield full_response
stop_event.clear()
except Exception as e:
yield f"모델 추론 실패: {str(e)}"
def respond(message, history, system_message, max_tokens, temperature, top_p, selected_model):
stop_event.clear()
try:
client = InferenceClient(model=selected_model, token=hf_token)
messages = [{"role": "system", "content": system_message}]
messages.extend([{"role": "user" if i % 2 == 0 else "assistant", "content": m} for h in history for i, m in enumerate(h) if m])
messages.append({"role": "user", "content": message})
response = ""
for partial_response in get_model_response(client, messages, max_tokens, temperature, top_p):
response = partial_response
yield "", history + [(message, response)]
except Exception as e:
yield "", history + [(message, f"오류 발생: {str(e)}")]
def stop_generation():
stop_event.set()
return "생성이 중단되었습니다."
models = {
"deepseek-ai/DeepSeek-Coder-V2-Instruct": "DeepSeek-Coder-V2-Instruct",
"CohereForAI/c4ai-command-r-plus": "Cohere Command-R Plus",
"meta-llama/Meta-Llama-3.1-8B-Instruct": "Meta-Llama-3.1-8B-Instruct"
}
with gr.Blocks() as demo:
chatbot = gr.Chatbot()
with gr.Row():
msg = gr.Textbox(scale=4, label="메시지 입력")
send = gr.Button("전송", scale=1)
with gr.Row():
regenerate = gr.Button("🔄 재생성")
stop = gr.Button("🛑 생성 중단")
clear = gr.Button("🗑️ 대화 내역 지우기")
with gr.Accordion("추가 설정", open=True):
system_message = gr.Textbox(
value="너는 나의 최고의 비서이다.\n내가 요구하는것들을 최대한 자세하고 정확하게 답변하라.\n반드시 한글로 답변할것.",
label="시스템 메시지",
lines=10
)
max_tokens = gr.Slider(minimum=1, maximum=2000, value=500, step=100, label="최대 새 토큰 수")
temperature = gr.Slider(minimum=0.1, maximum=2.0, value=0.7, step=0.05, label="온도")
top_p = gr.Slider(minimum=0.1, maximum=1.0, value=0.90, step=0.05, label="Top-p (핵 샘플링)")
model = gr.Radio(list(models.keys()), value=list(models.keys())[0], label="언어 모델 선택", info="사용할 언어 모델을 선택하세요")
send.click(respond, inputs=[msg, chatbot, system_message, max_tokens, temperature, top_p, model], outputs=[msg, chatbot])
msg.submit(respond, inputs=[msg, chatbot, system_message, max_tokens, temperature, top_p, model], outputs=[msg, chatbot])
regenerate.click(lambda h, s, m, t, p, mod: respond(h[-1][0] if h else "", h[:-1], s, m, t, p, mod), inputs=[chatbot, system_message, max_tokens, temperature, top_p, model], outputs=[msg, chatbot])
stop.click(stop_generation, inputs=[], outputs=[msg])
clear.click(lambda: (None, None), outputs=[msg, chatbot])
if __name__ == "__main__":
if not hf_token:
print("경고: HF_TOKEN 환경 변수가 설정되지 않았습니다. 일부 모델에 접근할 수 없을 수 있습니다.")
demo.launch(share=True) |