import streamlit as st from huggingface_hub import InferenceClient, HfApi import time import requests from requests.exceptions import RequestException from gtts import gTTS # Google Text-to-Speech import tempfile import os # Set page config at the very beginning st.set_page_config(page_title="Phi-3.5 Chatbot", page_icon="🤖") # Add a text input for the Hugging Face API token hf_token = st.text_input("Enter your Hugging Face API token", type="password") @st.cache_resource def get_client(token): return InferenceClient("microsoft/Phi-3.5-mini-instruct", token=token) def validate_token(token): try: api = HfApi(token=token) api.whoami() return True except Exception as e: st.error(f"Token validation failed: {str(e)}") return False def make_request_with_retries(client, prompt, max_new_tokens, temperature, top_p, max_retries=5, initial_delay=1): for attempt in range(max_retries): try: response = client.text_generation( prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, ) return response except RequestException as e: if attempt < max_retries - 1: delay = initial_delay * (2 ** attempt) # Exponential backoff st.warning(f"Request failed. Retrying in {delay} seconds... (Attempt {attempt + 1}/{max_retries})") time.sleep(delay) else: raise e def respond(message, history, system_message, max_tokens, temperature, top_p): if not hf_token: st.error("Please enter your Hugging Face API token.") return if not validate_token(hf_token): return client = get_client(hf_token) # Construct the prompt prompt = f"{system_message}\n\n" for user_msg, assistant_msg in history: prompt += f"Human: {user_msg}\nAssistant: {assistant_msg}\n\n" prompt += f"Human: {message}\nAssistant:" try: response = make_request_with_retries(client, prompt, max_tokens, temperature, top_p) yield response except Exception as e: st.error(f"An error occurred: {str(e)}") yield "I'm sorry, but I encountered an error while processing your request." def text_to_speech(text): # Create a named temporary file tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts = gTTS(text=text, lang='en') tts.save(tmp_file.name) return tmp_file.name st.title("Phi-3.5 Mini Chatbot") if "messages" not in st.session_state: st.session_state.messages = [] system_message = st.text_input("System message", value="You are a helpful AI assistant.") max_tokens = st.slider("Max new tokens", min_value=1, max_value=1024, value=256, step=1) temperature = st.slider("Temperature", min_value=0.01, max_value=1.0, value=0.7, step=0.01) top_p = st.slider("Top P", min_value=0.0, max_value=1.0, value=0.9, step=0.01) for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) if prompt := st.chat_input("What is your message?"): st.session_state.messages.append({"role": "user", "content": prompt}) with st.chat_message("user"): st.markdown(prompt) with st.chat_message("assistant"): message_placeholder = st.empty() full_response = "" for response in respond(prompt, [(msg["content"], st.session_state.messages[i+1]["content"]) for i, msg in enumerate(st.session_state.messages[:-1:2])], system_message, max_tokens, temperature, top_p): message_placeholder.markdown(response) full_response = response st.session_state.messages.append({"role": "assistant", "content": full_response}) # Generate audio from the assistant's response audio_file = text_to_speech(full_response) st.audio(audio_file, format='audio/mp3')