File size: 3,110 Bytes
86ea792
 
2644024
3c09fac
2644024
86ea792
 
 
2644024
86ea792
 
133c54b
86ea792
3c09fac
 
194886f
133c54b
194886f
 
53e1948
194886f
 
 
 
 
 
 
36bc1b5
194886f
 
 
 
 
 
3c09fac
 
133c54b
3c09fac
86ea792
53e1948
 
86ea792
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3c09fac
 
 
53e1948
3c09fac
 
 
194886f
3c09fac
 
 
 
 
 
 
 
86ea792
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from openai import OpenAI  # Assuming Nvidia client is available in the same library, adjust if necessary
import streamlit as st
import os
from datetime import datetime

# Initialize Nvidia client
client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1",  # Nvidia API endpoint
    api_key=os.environ["NVIDIA_API_KEY"]  # Nvidia API Key from Streamlit secrets
)

st.title("ChatGPT-like clone with Nvidia Nemotron 70B Model")

# Sidebar with instructions and Clear Session button
with st.sidebar:
    # Instruction
    st.markdown("### Instructions 🤖\nThis is a basic chatbot. Ask anything, and the AI will try to help you! The app is supported by Yiqiao Yin.")

    # Add a section to ask the user for the response length
    st.markdown("#### Select the desired length of the AI response:")
    response_length = st.radio(
        "How detailed do you want the response to be?",
        ('Efficient', 'Medium', 'Academic')
    )

    # Set max_tokens based on user selection
    if response_length == 'Efficient':
        max_tokens = 100
    elif response_length == 'Medium':
        max_tokens = 600
    else:  # 'Academic'
        max_tokens = 1024

    # Clear
    if st.button("Clear Session"):
        st.session_state.clear()
    st.write(f"Copyright © 2010-{datetime.now().year} Present Yiqiao Yin")

# Initialize session state variables if not already present
if "nvidia_model" not in st.session_state:
    st.session_state["nvidia_model"] = "nvidia/llama-3.1-nemotron-70b-instruct"

if "messages" not in st.session_state:
    # Adding the initial system message
    st.session_state.messages = [{"role": "system", "content": "You are a helpful assistant."}]

# Render the chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Get new user input
if prompt := st.chat_input("What is up?"):
    # Add user message to the session state
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    # Display assistant's message while waiting for the response
    with st.chat_message("assistant"):
        with st.spinner("The assistant is thinking... Please wait."):
            # Create Nvidia completion request with full conversation history
            stream = client.chat.completions.create(
                model=st.session_state["nvidia_model"],
                messages=st.session_state.messages,  # Include all previous messages in the API call
                temperature=0.5,
                top_p=0.7,
                max_tokens=max_tokens,
                stream=True,
            )
            response_chunks = []
            for chunk in stream:
                if chunk.choices[0].delta.content is not None:
                    response_chunks.append(chunk.choices[0].delta.content)
            response = "".join(response_chunks)
            st.markdown(response)

    # Store the assistant response in the session state
    st.session_state.messages.append({"role": "assistant", "content": response})