import gradio as gr from typing import TypedDict, Annotated from huggingface_hub import InferenceClient, login, list_models from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline from langgraph.graph.message import add_messages from langchain.docstore.document import Document from langgraph.prebuilt import ToolNode, tools_condition from langchain_core.messages import AnyMessage, HumanMessage, AIMessage from langchain_community.retrievers import BM25Retriever import os from langgraph.graph import START, StateGraph from langchain.tools import Tool """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ HUGGINGFACEHUB_API_TOKEN = os.environ["HUGGINGFACEHUB_API_TOKEN"] login(token=HUGGINGFACEHUB_API_TOKEN, add_to_git_credential=True) llm = HuggingFaceEndpoint( #repo_id="HuggingFaceH4/zephyr-7b-beta", repo_id="Qwen/Qwen2.5-Coder-32B-Instruct", task="text-generation", max_new_tokens=512, do_sample=False, repetition_penalty=1.03, timeout=240, ) model = ChatHuggingFace(llm=llm, verbose=True) def get_hub_stats(author: str) -> str: """ You are a helpful chatbot for programmers and data scientists with access to the Hugging Face Hub. Users will want to know the most popular models from Hugging Face. This tool will enable you to fetch the most downloaded model from a specific author on the Hugging Face Hub. """ try: # List models from the specified author, sorted by downloads models = list(list_models(author=author, sort="downloads", direction=-1, limit=1)) if models: model = models[0] return f"The most downloaded model by {author} is {model.id} with {model.downloads:,} downloads." else: return f"No models found for author {author}." except Exception as e: return f"Error fetching models for {author}: {str(e)}" # Initialize the tool hub_stats_tool = Tool( name="get_hub_stats", func=get_hub_stats, description="Fetches the most downloaded model from a specific author on the Hugging Face Hub." ) def predict(message, history): # Convert Gradio history to LangChain message format history_langchain_format = [] for msg in history: if msg['role'] == "user": history_langchain_format.append(HumanMessage(content=msg['content'])) elif msg['role'] == "assistant": history_langchain_format.append(AIMessage(content=msg['content'])) # Add new user message history_langchain_format.append(HumanMessage(content=message)) # Invoke Alfred agent with full message history response = alfred.invoke( input={"messages": history_langchain_format}, config={"recursion_limit": 100} ) # Extract final assistant message return response["messages"][-1].content # setup agents tools = [hub_stats_tool] #tools = [guest_info_tool] chat_with_tools = model.bind_tools(tools) # Generate the AgentState and Agent graph class AgentState(TypedDict): messages: Annotated[list[AnyMessage], add_messages] def assistant(state: AgentState): return { "messages": [chat_with_tools.invoke(state["messages"])], } ## The graph builder = StateGraph(AgentState) # Define nodes: these do the work builder.add_node("assistant", assistant) builder.add_node("tools", ToolNode(tools)) # Define edges: these determine how the control flow moves builder.add_edge(START, "assistant") builder.add_conditional_edges( "assistant", # If the latest message requires a tool, route to tools # Otherwise, provide a direct response tools_condition, ) builder.add_edge("tools", "assistant") alfred = builder.compile() """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ demo = gr.ChatInterface( predict, type="messages" ) if __name__ == "__main__": demo.launch()