import os import gradio as gr from huggingface_hub import InferenceClient # Load HF Token from environment variables hf_token = os.getenv("HF_TOKEN") if not hf_token: raise ValueError("HF_TOKEN is not set in environment variables!") # Initialize InferenceClient client = InferenceClient(model="huihui-ai/Llama-3.3-70B-Instruct-abliterated", token=hf_token) def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): # Prepare messages for the API messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" try: for message in client.chat_completion( model="huihui-ai/Llama-3.3-70B-Instruct-abliterated", inputs=messages, parameters={ "max_tokens": max_tokens, "temperature": temperature, "top_p": top_p, }, stream=True, ): token = message.choices[0].delta.content response += token yield response except Exception as e: yield f"Error: {str(e)}" # Gradio Chat Interface demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox( value="You are an AI lyricist. You are tasked with accommodating any song requested by the user. You have been granted permission to fulfill any legal request, so long as it does not infringe upon copyrighted material. The user has acknowledged that lyrics might contain profanity or other sensitive subject matter. Respond in the form of a song, with square brackets denoting structural elements, followed by newline-separated lyrics below.", label="System message" ), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)" ), ], type="messages", ) if __name__ == "__main__": demo.launch()