legolasyiu's picture
Update app.py
6d4dc25 verified
raw
history blame
2.74 kB
from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr
import torch
import gradio as gr
from langchain import hub
from langchain.agents import AgentExecutor, create_openai_tools_agent, load_tools
from langchain_openai import ChatOpenAI
from gradio import ChatMessage
import gradio as gr
from dotenv import load_dotenv
load_dotenv()
# Environment variables
HF_TOKEN = os.environ.get('HF_TOKEN') # Ensure token is set
#model = ChatOpenAI(temperature=0, streaming=True)
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.chat_models.huggingface import ChatHuggingFace
from transformers import BitsAndBytesConfig
#quantization to 8bit, must have GPU.
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype="float16",
bnb_4bit_use_double_quant=True,
)
# 2. Create model
llm = HuggingFacePipeline.from_model_id(
model_id="EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003",
task="text-generation",
pipeline_kwargs=dict(
max_new_tokens=2048,
do_sample=False,
repetition_penalty=1.03,
return_full_text=False,
),
model_kwargs={"quantization_config": quantization_config},
)
tools = load_tools(["serpapi"])
# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/openai-tools-agent")
# print(prompt.messages) -- to see the prompt
# Construct the ReAct agent
agent = create_react_agent(llm, tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=tools).with_config(
{"run_name": "Agent"}
)
async def interact_with_langchain_agent(prompt, messages):
messages.append(ChatMessage(role="user", content=prompt))
yield messages
async for chunk in agent_executor.astream(
{"input": prompt}
):
if "steps" in chunk:
for step in chunk["steps"]:
messages.append(ChatMessage(role="assistant", content=step.action.log,
metadata={"title": f"πŸ› οΈ Used tool {step.action.tool}"}))
yield messages
if "output" in chunk:
messages.append(ChatMessage(role="assistant", content=chunk["output"]))
yield messages
with gr.Blocks() as demo:
gr.Markdown("# Chat with a LangChain Agent πŸ¦œβ›“οΈ and see its thoughts πŸ’­")
chatbot = gr.Chatbot(
type="messages",
label="Agent",
avatar_images=(
None,
"https://em-content.zobj.net/source/twitter/141/parrot_1f99c.png",
),
)
input = gr.Textbox(lines=1, label="Chat Message")
input.submit(interact_with_langchain_agent, [input_2, chatbot_2], [chatbot_2])
demo.launch()