| from huggingface_hub import login, InferenceClient |
| import os, gc, time, random, datetime, json, re |
| HF_TOKEN=os.getenv('HF_TOKEN') |
| SERP_API_KEY=os.getenv('SERP_KEY') |
| login(token=HF_TOKEN) |
| import gradio as gr |
| from transformers import CodeAgent, Tool, ToolCollection, load_tool, ReactCodeAgent, ReactJsonAgent |
| from transformers.agents import PythonInterpreterTool |
| from langchain.memory import ConversationBufferMemory |
| import bs4 |
| import requests |
| from llm_engine import HfEngine |
| import datasets |
| import spaces |
| import tqdm |
| from langchain_huggingface.embeddings import HuggingFaceEmbeddings |
| from langchain_community.vectorstores import FAISS |
| from langchain.docstore.document import Document |
| from langchain.text_splitter import RecursiveCharacterTextSplitter |
| from langchain_core.vectorstores import VectorStore |
| from transformers.agents.prompts import DEFAULT_REACT_CODE_SYSTEM_PROMPT, DEFAULT_REACT_JSON_SYSTEM_PROMPT |
| from transformers.agents.default_tools import Tool, PythonInterpreterTool |
| from duckduckgo_search import DDGS |
| from web_surfer import (SearchInformationTool, NavigationalSearchTool, VisitTool, DownloadTool, PageUpTool, PageDownTool, FinderTool, FindNextTool, ArchiveSearchTool,) |
| from mdconvert import MarkdownConverter |
| from visual_qa import VisualQATool, VisualQAGPT4Tool |
| def search_ducky(query): |
| with DDGS() as ddgs: |
| results = list(ddgs.text(query, max_results=10)) |
| content = '' |
| if results: |
| for result in results: |
| content += result['body'] |
| return content |
| knowledge_base = datasets.load_dataset("m-ric/huggingface_doc", split="train") |
| source_docs = [Document(page_content=doc["text"], metadata={"source": doc["source"].split("/")[1]}) for doc in knowledge_base] |
| docs_processed = RecursiveCharacterTextSplitter(chunk_size=500).split_documents(source_docs)[:1000] |
| embedding_model = HuggingFaceEmbeddings(model_name="thenlper/gte-small") |
| vectordb = FAISS.from_documents(documents=docs_processed, embedding=embedding_model) |
| all_sources = list(set([doc.metadata["source"] for doc in docs_processed])) |
| print(all_sources) |
| class RetrieverTool(Tool): |
| name = "retriever" |
| description = "Retrieves some documents from the knowledge base that have the closest embeddings to the input query." |
| inputs = { |
| "query": { |
| "type": "text", |
| "description": "The query to perform. This should be semantically close to your target documents. Use the affirmative form rather than a question.", |
| }, |
| "source": { |
| "type": "text", |
| "description": "" |
| }, |
| } |
| output_type = "text" |
| |
| def __init__(self, vectordb: VectorStore, all_sources: str, **kwargs): |
| super().__init__(**kwargs) |
| self.vectordb = vectordb |
| self.inputs["source"]["description"] = (f"The source of the documents to search, as a str representation of a list. Possible values in the list are: {all_sources}. If this argument is not provided, all sources will be searched.") |
|
|
| def forward(self, query: str, source: str = None) -> str: |
| assert isinstance(query, str), "Your search query must be a string" |
|
|
| if source: |
| if isinstance(source, str) and "[" not in str(source): |
| source = [source] |
| source = json.loads(str(source).replace("'", '"')) |
|
|
| docs = self.vectordb.similarity_search(query, filter=({"source": source} if source else None), k=3) |
|
|
| if len(docs) == 0: |
| return "No documents found with this filtering. Try removing the source filter." |
| return "Retrieved documents:\n\n" + "\n===Document===\n".join([doc.page_content for doc in docs]) |
| memory = ConversationBufferMemory(memory_key="chat_history") |
| llm_engine = HfEngine(model="Jopmt/JoPmt") |
| |
| |
| |
| |
| class SearchTool(Tool): |
| name = "ask_search_agent" |
| description = "A search agent that will browse the internet to answer a question. Use it to gather informations, not for problem-solving." |
|
|
| inputs = { |
| "question": { |
| "description": "Your question, as a natural language sentence. You are talking to an agent, so provide them with as much context as possible.", |
| "type": "text", |
| } |
| } |
| output_type = "text" |
|
|
| def forward(self, question: str) -> str: |
| return websurfer_agent.run(question) |
| tools=[PythonInterpreterTool(),SearchTool(),RetrieverTool(vectordb, all_sources)] |
| additional_authorized_imports=['requests', 'bs4', 'os', 'time', 'datetime', 'json', 're'] |
| WEB_TOOLS = [SearchInformationTool(), NavigationalSearchTool(), VisitTool(), DownloadTool(), PageUpTool(), PageDownTool(), FinderTool(), FindNextTool(), ArchiveSearchTool(),] |
| websurfer_agent = ReactJsonAgent(tools=WEB_TOOLS,llm_engine=llm_engine, add_base_tools=True,max_iterations=1) |
| reagent = ReactCodeAgent(tools=tools, llm_engine=llm_engine, add_base_tools=True,max_iterations=1,additional_authorized_imports=additional_authorized_imports) |
| def plix(inut, progress=gr.Progress(track_tqdm=True)): |
| goose=reagent.run(inut) |
| return goose |
| with gr.Blocks(theme=random.choice([gr.themes.Monochrome(),gr.themes.Base.from_hub("gradio/seafoam"),gr.themes.Base.from_hub("freddyaboulton/dracula_revamped"),gr.themes.Glass(),gr.themes.Base(),]),analytics_enabled=False) as iface: |
| out=gr.Textbox(label="🤗Output",lines=5,interactive=False) |
| inut=gr.Textbox(label="Prompt") |
| btn=gr.Button("GENERATE") |
| btn.click(fn=plix,inputs=inut,outputs=out) |
| iface.queue(max_size=1,api_open=False) |
| iface.launch(max_threads=20,inline=False,show_api=False) |