Spaces:

EpistemeAI
/

agent-llama-gradio

Runtime error

App Files Files Community

agent-llama-gradio / app.py

legolasyiu

Update app.py

68e39cc verified 2 months ago

raw

history blame

2.88 kB

	from langchain import hub
	from langchain.agents import AgentExecutor, create_openai_tools_agent, load_tools
	from langchain_openai import ChatOpenAI
	from gradio import ChatMessage
	import gradio as gr
	import os
	from langchain_huggingface import ChatHuggingFace, HuggingFacePipeline

	from langchain_community.llms import HuggingFaceEndpoint
	from langchain_community.chat_models.huggingface import ChatHuggingFace

	from transformers import BitsAndBytesConfig

	from dotenv import load_dotenv

	load_dotenv()
	# Environment variables
	HF_TOKEN = os.environ.get('HF_TOKEN') # Ensure token is set
	#model = ChatOpenAI(temperature=0, streaming=True)

	from langchain_community.llms import HuggingFaceEndpoint
	from langchain_community.chat_models.huggingface import ChatHuggingFace

	from transformers import BitsAndBytesConfig
	#quantization to 8bit, must have GPU.
	quantization_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_quant_type="nf4",
	bnb_4bit_compute_dtype="float16",
	bnb_4bit_use_double_quant=True,
	)

	# 2. Create model
	llm = HuggingFacePipeline.from_model_id(
	model_id="EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003",
	task="text-generation",
	pipeline_kwargs=dict(
	max_new_tokens=2048,
	do_sample=False,
	repetition_penalty=1.03,
	return_full_text=False,
	),
	model_kwargs={"quantization_config": quantization_config},
	)

	tools = load_tools(["serpapi"])

	# Get the prompt to use - you can modify this!
	prompt = hub.pull("hwchase17/openai-tools-agent")
	# print(prompt.messages) -- to see the prompt
	# Construct the ReAct agent
	agent = create_react_agent(llm, tools, prompt)

	agent_executor = AgentExecutor(agent=agent, tools=tools).with_config(
	{"run_name": "Agent"}
	)

	async def interact_with_langchain_agent(prompt, messages):
	messages.append(ChatMessage(role="user", content=prompt))
	yield messages
	async for chunk in agent_executor.astream(
	{"input": prompt}
	):
	if "steps" in chunk:
	for step in chunk["steps"]:
	messages.append(ChatMessage(role="assistant", content=step.action.log,
	metadata={"title": f"🛠️ Used tool {step.action.tool}"}))
	yield messages
	if "output" in chunk:
	messages.append(ChatMessage(role="assistant", content=chunk["output"]))
	yield messages


	with gr.Blocks() as demo:
	gr.Markdown("# Chat with a LangChain Agent 🦜⛓️ and see its thoughts 💭")
	chatbot = gr.Chatbot(
	type="messages",
	label="Agent",
	avatar_images=(
	None,
	"https://em-content.zobj.net/source/twitter/141/parrot_1f99c.png",
	),
	)
	input = gr.Textbox(lines=1, label="Chat Message")
	input.submit(interact_with_langchain_agent, [input_2, chatbot_2], [chatbot_2])

	demo.launch()