from openai import OpenAI # Assuming Nvidia client is available in the same library, adjust if necessary import streamlit as st import os from datetime import datetime # Initialize Nvidia client client = OpenAI( base_url="https://integrate.api.nvidia.com/v1", # Nvidia API endpoint api_key=os.environ["NVIDIA_API_KEY"] # Nvidia API Key from Streamlit secrets ) st.title("ChatGPT-like clone with Nvidia Nemotron 70B Model") # Sidebar with instructions and Clear Session button with st.sidebar: # Instruction st.markdown("### Instructions 🤖\nThis is a basic chatbot. Ask anything, and the AI will try to help you! The app is supported by Yiqiao Yin.") # Add a section to ask the user for the response length st.markdown("#### Select the desired length of the AI response:") response_length = st.radio( "How detailed do you want the response to be?", ('Efficient', 'Medium', 'Academic') ) # Set max_tokens based on user selection if response_length == 'Efficient': max_tokens = 100 elif response_length == 'Medium': max_tokens = 600 else: # 'Academic' max_tokens = 1024 # Clear if st.button("Clear Session"): st.session_state.clear() st.write(f"Copyright © 2010-{datetime.now().year} Present Yiqiao Yin") # Initialize session state variables if not already present if "nvidia_model" not in st.session_state: st.session_state["nvidia_model"] = "nvidia/llama-3.1-nemotron-70b-instruct" if "messages" not in st.session_state: # Adding the initial system message st.session_state.messages = [{"role": "system", "content": "You are a helpful assistant."}] # Render the chat history for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # Get new user input if prompt := st.chat_input("What is up?"): # Add user message to the session state st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) # Display assistant's message while waiting for the response with st.chat_message("assistant"): with st.spinner("The assistant is thinking... Please wait."): # Create Nvidia completion request with full conversation history stream = client.chat.completions.create( model=st.session_state["nvidia_model"], messages=st.session_state.messages, # Include all previous messages in the API call temperature=0.5, top_p=0.7, max_tokens=max_tokens, stream=True, ) response_chunks = [] for chunk in stream: if chunk.choices[0].delta.content is not None: response_chunks.append(chunk.choices[0].delta.content) response = "".join(response_chunks) st.markdown(response) # Store the assistant response in the session state st.session_state.messages.append({"role": "assistant", "content": response})