File size: 3,110 Bytes

from openai import OpenAI  # Assuming Nvidia client is available in the same library, adjust if necessary
import streamlit as st
import os
from datetime import datetime

# Initialize Nvidia client
client = OpenAI(
    base_url="https://integrate.api.nvidia.com/v1",  # Nvidia API endpoint
    api_key=os.environ["NVIDIA_API_KEY"]  # Nvidia API Key from Streamlit secrets
)

st.title("ChatGPT-like clone with Nvidia Nemotron 70B Model")

# Sidebar with instructions and Clear Session button
with st.sidebar:
    # Instruction
    st.markdown("### Instructions 🤖\nThis is a basic chatbot. Ask anything, and the AI will try to help you! The app is supported by Yiqiao Yin.")

    # Add a section to ask the user for the response length
    st.markdown("#### Select the desired length of the AI response:")
    response_length = st.radio(
        "How detailed do you want the response to be?",
        ('Efficient', 'Medium', 'Academic')
    )

    # Set max_tokens based on user selection
    if response_length == 'Efficient':
        max_tokens = 100
    elif response_length == 'Medium':
        max_tokens = 600
    else:  # 'Academic'
        max_tokens = 1024

    # Clear
    if st.button("Clear Session"):
        st.session_state.clear()
    st.write(f"Copyright © 2010-{datetime.now().year} Present Yiqiao Yin")

# Initialize session state variables if not already present
if "nvidia_model" not in st.session_state:
    st.session_state["nvidia_model"] = "nvidia/llama-3.1-nemotron-70b-instruct"

if "messages" not in st.session_state:
    # Adding the initial system message
    st.session_state.messages = [{"role": "system", "content": "You are a helpful assistant."}]

# Render the chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# Get new user input
if prompt := st.chat_input("What is up?"):
    # Add user message to the session state
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    # Display assistant's message while waiting for the response
    with st.chat_message("assistant"):
        with st.spinner("The assistant is thinking... Please wait."):
            # Create Nvidia completion request with full conversation history
            stream = client.chat.completions.create(
                model=st.session_state["nvidia_model"],
                messages=st.session_state.messages,  # Include all previous messages in the API call
                temperature=0.5,
                top_p=0.7,
                max_tokens=max_tokens,
                stream=True,
            )
            response_chunks = []
            for chunk in stream:
                if chunk.choices[0].delta.content is not None:
                    response_chunks.append(chunk.choices[0].delta.content)
            response = "".join(response_chunks)
            st.markdown(response)

    # Store the assistant response in the session state
    st.session_state.messages.append({"role": "assistant", "content": response})