|
import streamlit as st |
|
from huggingface_hub import InferenceClient |
|
import os |
|
|
|
|
|
client = InferenceClient("Dolly135/Pen_Model", token=os.getenv("token")) |
|
|
|
|
|
def respond( |
|
message: str, |
|
history: list, |
|
system_message: str, |
|
temperature: float, |
|
top_p: float, |
|
max_new_tokens: int, |
|
): |
|
|
|
messages = [{"role": "system", "content": system_message}] |
|
|
|
|
|
messages.extend(history) |
|
|
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
try: |
|
response = "" |
|
for msg in client.text_generation( |
|
messages, |
|
stream=True, |
|
temperature=temperature, |
|
top_p=top_p, |
|
max_new_tokens=max_new_tokens, |
|
): |
|
token = msg.choices[0].delta.content |
|
response += token |
|
yield response |
|
except Exception as e: |
|
yield f"An error occurred: {str(e)}" |
|
|
|
|
|
def main(): |
|
|
|
st.sidebar.title("Model Settings") |
|
system_message = st.sidebar.text_input("System message", "You are Pen.") |
|
max_new_tokens = st.sidebar.slider("Max new tokens", 1, 2048, 512) |
|
temperature = st.sidebar.slider("Temperature", 0.1, 4.0, 0.7) |
|
top_p = st.sidebar.slider("Top-p (nucleus sampling)", 0.1, 1.0, 0.95) |
|
|
|
|
|
if 'history' not in st.session_state: |
|
st.session_state.history = [] |
|
|
|
|
|
message = st.text_input("You:", "") |
|
|
|
|
|
if st.button("Send"): |
|
|
|
response = "" |
|
if message: |
|
for response_text in respond( |
|
message, |
|
st.session_state.history, |
|
system_message, |
|
temperature, |
|
top_p, |
|
max_new_tokens |
|
): |
|
response = response_text |
|
|
|
|
|
if response: |
|
st.session_state.history.append({"role": "user", "content": message}) |
|
st.session_state.history.append({"role": "assistant", "content": response}) |
|
st.text_area("Conversation", value=str(st.session_state.history), height=300) |
|
|
|
|
|
if __name__ == "__main__": |
|
main() |
|
|