Spaces:
Runtime error
Runtime error
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import gradio as gr | |
import torch | |
import gradio as gr | |
from langchain import hub | |
from langchain.agents import AgentExecutor, create_openai_tools_agent, load_tools | |
from langchain_openai import ChatOpenAI | |
from gradio import ChatMessage | |
import gradio as gr | |
from dotenv import load_dotenv | |
load_dotenv() | |
# Environment variables | |
HF_TOKEN = os.environ.get('HF_TOKEN') # Ensure token is set | |
#model = ChatOpenAI(temperature=0, streaming=True) | |
from langchain_community.llms import HuggingFaceEndpoint | |
from langchain_community.chat_models.huggingface import ChatHuggingFace | |
from transformers import BitsAndBytesConfig | |
#quantization to 8bit, must have GPU. | |
quantization_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_quant_type="nf4", | |
bnb_4bit_compute_dtype="float16", | |
bnb_4bit_use_double_quant=True, | |
) | |
# 2. Create model | |
llm = HuggingFacePipeline.from_model_id( | |
model_id="EpistemeAI/Fireball-Meta-Llama-3.1-8B-Instruct-Agent-0.003", | |
task="text-generation", | |
pipeline_kwargs=dict( | |
max_new_tokens=2048, | |
do_sample=False, | |
repetition_penalty=1.03, | |
return_full_text=False, | |
), | |
model_kwargs={"quantization_config": quantization_config}, | |
) | |
tools = load_tools(["serpapi"]) | |
# Get the prompt to use - you can modify this! | |
prompt = hub.pull("hwchase17/openai-tools-agent") | |
# print(prompt.messages) -- to see the prompt | |
# Construct the ReAct agent | |
agent = create_react_agent(llm, tools, prompt) | |
agent_executor = AgentExecutor(agent=agent, tools=tools).with_config( | |
{"run_name": "Agent"} | |
) | |
async def interact_with_langchain_agent(prompt, messages): | |
messages.append(ChatMessage(role="user", content=prompt)) | |
yield messages | |
async for chunk in agent_executor.astream( | |
{"input": prompt} | |
): | |
if "steps" in chunk: | |
for step in chunk["steps"]: | |
messages.append(ChatMessage(role="assistant", content=step.action.log, | |
metadata={"title": f"π οΈ Used tool {step.action.tool}"})) | |
yield messages | |
if "output" in chunk: | |
messages.append(ChatMessage(role="assistant", content=chunk["output"])) | |
yield messages | |
with gr.Blocks() as demo: | |
gr.Markdown("# Chat with a LangChain Agent π¦βοΈ and see its thoughts π") | |
chatbot = gr.Chatbot( | |
type="messages", | |
label="Agent", | |
avatar_images=( | |
None, | |
"https://em-content.zobj.net/source/twitter/141/parrot_1f99c.png", | |
), | |
) | |
input = gr.Textbox(lines=1, label="Chat Message") | |
input.submit(interact_with_langchain_agent, [input_2, chatbot_2], [chatbot_2]) | |
demo.launch() |