from langchain import hub from langchain.agents import AgentExecutor, create_openai_tools_agent, load_tools from langchain_openai import ChatOpenAI from gradio import ChatMessage import gradio as gr import os from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline from langchain_community.llms import HuggingFaceEndpoint from langchain_community.chat_models.huggingface import ChatHuggingFace from transformers import BitsAndBytesConfig from dotenv import load_dotenv load_dotenv() # Environment variables HF_TOKEN = os.environ.get('HF_TOKEN') # Ensure token is set #model = ChatOpenAI(temperature=0, streaming=True) from langchain_community.llms import HuggingFaceEndpoint from langchain_community.chat_models.huggingface import ChatHuggingFace from transformers import BitsAndBytesConfig #quantization to 8bit, must have GPU. quantization_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype="float16", bnb_4bit_use_double_quant=True, ) # 2. Create model llm = HuggingFacePipeline.from_model_id( model_id="EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003", task="text-generation", pipeline_kwargs=dict( max_new_tokens=2048, do_sample=False, repetition_penalty=1.03, return_full_text=False, ), model_kwargs={"quantization_config": quantization_config}, ) tools = load_tools(["serpapi"]) # Get the prompt to use - you can modify this! prompt = hub.pull("hwchase17/openai-tools-agent") # print(prompt.messages) -- to see the prompt # Construct the ReAct agent agent = create_react_agent(llm, tools, prompt) agent_executor = AgentExecutor(agent=agent, tools=tools).with_config( {"run_name": "Agent"} ) async def interact_with_langchain_agent(prompt, messages): messages.append(ChatMessage(role="user", content=prompt)) yield messages async for chunk in agent_executor.astream( {"input": prompt} ): if "steps" in chunk: for step in chunk["steps"]: messages.append(ChatMessage(role="assistant", content=step.action.log, metadata={"title": f"🛠️ Used tool {step.action.tool}"})) yield messages if "output" in chunk: messages.append(ChatMessage(role="assistant", content=chunk["output"])) yield messages with gr.Blocks() as demo: gr.Markdown("# Chat with a LangChain Agent 🦜⛓️ and see its thoughts 💭") chatbot = gr.Chatbot( type="messages", label="Agent", avatar_images=( None, "https://em-content.zobj.net/source/twitter/141/parrot_1f99c.png", ), ) input = gr.Textbox(lines=1, label="Chat Message") input.submit(interact_with_langchain_agent, [input_2, chatbot_2], [chatbot_2]) demo.launch()