Spaces:
Running
Running
File size: 2,227 Bytes
813eb9d 965f814 813eb9d 965f814 813eb9d 965f814 813eb9d 965f814 813eb9d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import os
import streamlit as st
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
# Hugging FaceのAPIトークンを設定
os.environ["HUGGINGFACE_TOKEN"] = os.getenv("HUGGINGFACE_TOKEN")
model_name_or_path = "mmnga/ELYZA-japanese-Llama-2-7b-fast-instruct-gguf"
model_basename = "ELYZA-japanese-Llama-2-7b-fast-instruct-q5_K_M.gguf"
model_path = hf_hub_download(repo_id=model_name_or_path, filename=model_basename, revision="main")
llama = Llama(model_path, n_ctx=5120)
def predict(messages):
# Llamaでの回答を取得(ストリーミングオン)
streamer = llama.create_chat_completion(messages, stream=True, max_tokens=512)
partial_message = ""
for msg in streamer:
message = msg['choices'][0]['delta']
print(f"message: {message}")
if 'content' in message:
partial_message += message['content']
yield partial_message
def main():
st.title("Chat with Elyza!")
# Session state for retaining messages
if 'messages' not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(f"{message['content']}")
# Input for the user message
user_message = st.chat_input("Your Message")
# React to user input
if user_message:
# Display user message in chat message container
with st.chat_message("user"):
st.markdown(f"{user_message}")
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": user_message})
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
for char in predict([{"role": m["role"], "content": m["content"]} for m in st.session_state.messages]):
full_response = char #+= char
message_placeholder.markdown(full_response + " ❚ ")
message_placeholder.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
if __name__ == "__main__":
main()
|