legolasyiu's picture
Update app.py
dbca8cc verified
raw
history blame
2.8 kB
from langchain import hub
from langchain.agents import AgentExecutor, create_openai_tools_agent, load_tools
from langchain_openai import ChatOpenAI
from gradio import ChatMessage
import gradio as gr
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.chat_models.huggingface import ChatHuggingFace
from transformers import BitsAndBytesConfig
from dotenv import load_dotenv
load_dotenv()
# Environment variables
HF_TOKEN = os.environ.get('HF_TOKEN') # Ensure token is set
#model = ChatOpenAI(temperature=0, streaming=True)
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.chat_models.huggingface import ChatHuggingFace
from transformers import BitsAndBytesConfig
#quantization to 8bit, must have GPU.
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype="float16",
bnb_4bit_use_double_quant=True,
)
# 2. Create model
llm = HuggingFacePipeline.from_model_id(
model_id="EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003",
task="text-generation",
pipeline_kwargs=dict(
max_new_tokens=2048,
do_sample=False,
repetition_penalty=1.03,
return_full_text=False,
),
model_kwargs={"quantization_config": quantization_config},
)
tools = load_tools(["serpapi"])
# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-tools-agent")
# print(prompt.messages) -- to see the prompt
# Construct the ReAct agent
agent = create_react_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools).with_config(
{"run_name": "Agent"}
)
async def interact_with_langchain_agent(prompt, messages):
messages.append(ChatMessage(role="user", content=prompt))
yield messages
async for chunk in agent_executor.astream(
{"input": prompt}
):
if "steps" in chunk:
for step in chunk["steps"]:
messages.append(ChatMessage(role="assistant", content=step.action.log,
metadata={"title": f"πŸ› οΈ Used tool {step.action.tool}"}))
yield messages
if "output" in chunk:
messages.append(ChatMessage(role="assistant", content=chunk["output"]))
yield messages
with gr.Blocks() as demo:
gr.Markdown("# Chat with a LangChain Agent πŸ¦œβ›“οΈ and see its thoughts πŸ’­")
chatbot = gr.Chatbot(
type="messages",
label="Agent",
avatar_images=(
None,
"https://em-content.zobj.net/source/twitter/141/parrot_1f99c.png",
),
)
input = gr.Textbox(lines=1, label="Chat Message")
input.submit(interact_with_langchain_agent, [input_2, chatbot_2], [chatbot_2])
demo.launch()