Spaces:

joshuasundance
/

langchain-streamlit-demo

Running

App Files Files Community

Joshua Sundance Bailey commited on Dec 12, 2023

Commit

9b54a0e

•

2 Parent(s): f310350 1ea3b53

Merge pull request #103 from joshuasundance-swca/agent

Browse files

Files changed (10) hide show

.pre-commit-config.yaml +9 -11
langchain-streamlit-demo/app.py +78 -23
langchain-streamlit-demo/defaults.py +1 -1
langchain-streamlit-demo/llm_resources.py +58 -7
langchain-streamlit-demo/research_assistant/__init__.py +3 -0
langchain-streamlit-demo/research_assistant/chain.py +16 -0
langchain-streamlit-demo/research_assistant/search/__init__.py +0 -0
langchain-streamlit-demo/research_assistant/search/web.py +180 -0
langchain-streamlit-demo/research_assistant/writer.py +75 -0
requirements.txt +3 -0

.pre-commit-config.yaml CHANGED Viewed

@@ -40,24 +40,22 @@ repos:
  - id: trailing-whitespace
  - id: mixed-line-ending
  - id: requirements-txt-fixer
-- repo: https://github.com/pre-commit/mirrors-mypy
- rev: v1.5.1
  hooks:
- - id: mypy
 - repo: https://github.com/asottile/add-trailing-comma
  rev: v3.1.0
  hooks:
  - id: add-trailing-comma
-#- repo: https://github.com/dannysepler/rm_unneeded_f_str
-# rev: v0.2.0
-# hooks:
-# - id: rm-unneeded-f-str
-- repo: https://github.com/psf/black
- rev: 23.9.1
  hooks:
- - id: black
 - repo: https://github.com/PyCQA/bandit
  rev: 1.7.5
  hooks:
  - id: bandit
- args: ["-x", "tests/*.py"]

  - id: trailing-whitespace
  - id: mixed-line-ending
  - id: requirements-txt-fixer
+- repo: https://github.com/psf/black
+ rev: 23.9.1
  hooks:
+ - id: black
 - repo: https://github.com/asottile/add-trailing-comma
  rev: v3.1.0
  hooks:
  - id: add-trailing-comma
+- repo: https://github.com/pre-commit/mirrors-mypy
+ rev: v1.5.1
  hooks:
+ - id: mypy
+ additional_dependencies:
+ - types-requests
 - repo: https://github.com/PyCQA/bandit
  rev: 1.7.5
  hooks:
  - id: bandit
+ args: ["-x", "tests/*.py", "-s", "B113"]

langchain-streamlit-demo/app.py CHANGED Viewed

@@ -5,23 +5,28 @@ import anthropic
 import langsmith.utils
 import openai
 import streamlit as st
 from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
 from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler
 from langchain.memory import ConversationBufferMemory, StreamlitChatMessageHistory
 from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain.schema.document import Document
 from langchain.schema.retriever import BaseRetriever
 from langsmith.client import Client
 from streamlit_feedback import streamlit_feedback
 from defaults import default_values
 from llm_resources import (
- get_runnable,
  get_llm,
  get_texts_and_multiretriever,
- StreamHandler,
 )
 __version__ = "1.1.0"
@@ -414,12 +419,16 @@ if st.session_state.llm:
  if st.session_state.ls_tracer:
  callbacks.append(st.session_state.ls_tracer)
- config: Dict[str, Any] = dict(
- callbacks=callbacks,
- tags=["Streamlit Chat"],
- )
- if st.session_state.provider == "Anthropic":
- config["max_concurrency"] = 5
  use_document_chat = all(
  [
@@ -429,24 +438,70 @@ if st.session_state.llm:
  )
  full_response: Union[str, None] = None
  message_placeholder = st.empty()
- stream_handler = StreamHandler(message_placeholder)
- callbacks.append(stream_handler)
- st.session_state.chain = get_runnable(
- use_document_chat,
- document_chat_chain_type,
- st.session_state.llm,
- st.session_state.retriever,
- MEMORY,
- chat_prompt,
- prompt,
- )
  # --- LLM call ---
  try:
- full_response = st.session_state.chain.invoke(prompt, config)
  except (openai.AuthenticationError, anthropic.AuthenticationError):
  st.error(

 import langsmith.utils
 import openai
 import streamlit as st
+from langchain.callbacks import StreamlitCallbackHandler
+from langchain.callbacks.base import BaseCallbackHandler
 from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
 from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler
 from langchain.memory import ConversationBufferMemory, StreamlitChatMessageHistory
 from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain.schema.document import Document
 from langchain.schema.retriever import BaseRetriever
+from langchain.tools import DuckDuckGoSearchRun, WikipediaQueryRun
+from langchain.tools import Tool
+from langchain.utilities import WikipediaAPIWrapper
 from langsmith.client import Client
 from streamlit_feedback import streamlit_feedback
 from defaults import default_values
 from llm_resources import (
+ get_agent,
  get_llm,
+ get_runnable,
  get_texts_and_multiretriever,
 )
+from research_assistant.chain import chain as research_assistant_chain
 __version__ = "1.1.0"
  if st.session_state.ls_tracer:
  callbacks.append(st.session_state.ls_tracer)
+ def get_config(callbacks: list[BaseCallbackHandler]) -> dict[str, Any]:
+ config: Dict[str, Any] = dict(
+ callbacks=callbacks,
+ tags=["Streamlit Chat"],
+ verbose=True,
+ return_intermediate_steps=True,
+ )
+ if st.session_state.provider == "Anthropic":
+ config["max_concurrency"] = 5
+ return config
  use_document_chat = all(
  [
  )
  full_response: Union[str, None] = None
+ # stream_handler = StreamHandler(message_placeholder)
+ # callbacks.append(stream_handler)
  message_placeholder = st.empty()
+ default_tools = [
+ DuckDuckGoSearchRun(),
+ WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()),
+ ]
+ if st.session_state.provider in ("Azure OpenAI", "OpenAI"):
+ st_callback = StreamlitCallbackHandler(st.container())
+ callbacks.append(st_callback)
+ research_assistant_tool = Tool.from_function(
+ func=lambda s: research_assistant_chain.invoke(
+ {"question": s},
+ config=get_config(callbacks),
+ ),
+ name="web-research-assistant",
+ description="this assistant returns a comprehensive report based on web research. for quick facts, use duckduckgo instead.",
+ )
+ TOOLS = [research_assistant_tool] + default_tools
+ if use_document_chat:
+ st.session_state.doc_chain = get_runnable(
+ use_document_chat,
+ document_chat_chain_type,
+ st.session_state.llm,
+ st.session_state.retriever,
+ MEMORY,
+ chat_prompt,
+ prompt,
+ )
+ doc_chain_tool = Tool.from_function(
+ func=lambda s: st.session_state.doc_chain.invoke(
+ s,
+ config=get_config(callbacks),
+ ),
+ name="user-document-chat",
+ description="this assistant returns a response based on the user's custom context. if the user's meaning is unclear, perhaps the answer is here. generally speaking, try this tool before conducting web research.",
+ )
+ TOOLS = [doc_chain_tool, research_assistant_tool] + default_tools
+ st.session_state.chain = get_agent(
+ TOOLS,
+ STMEMORY,
+ st.session_state.llm,
+ callbacks,
+ )
+ else:
+ st.session_state.chain = get_runnable(
+ use_document_chat,
+ document_chat_chain_type,
+ st.session_state.llm,
+ st.session_state.retriever,
+ MEMORY,
+ chat_prompt,
+ prompt,
+ )
  # --- LLM call ---
  try:
+ full_response = st.session_state.chain.invoke(
+ prompt,
+ config=get_config(callbacks),
+ )
  except (openai.AuthenticationError, anthropic.AuthenticationError):
  st.error(

langchain-streamlit-demo/defaults.py CHANGED Viewed

@@ -25,7 +25,7 @@ DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "gpt-3.5-turbo")
 DEFAULT_SYSTEM_PROMPT = os.environ.get(
  "DEFAULT_SYSTEM_PROMPT",
- "You are a helpful chatbot.",
 )
 MIN_TEMP = float(os.environ.get("MIN_TEMPERATURE", 0.0))

 DEFAULT_SYSTEM_PROMPT = os.environ.get(
  "DEFAULT_SYSTEM_PROMPT",
+ "You are a helpful chatbot. Do not rush. Always plan, think, and act in a step-by-step manner.",
 )
 MIN_TEMP = float(os.environ.get("MIN_TEMPERATURE", 0.0))

langchain-streamlit-demo/llm_resources.py CHANGED Viewed

@@ -1,8 +1,15 @@
 from tempfile import NamedTemporaryFile
 from typing import Tuple, List, Optional, Dict
 from langchain.callbacks.base import BaseCallbackHandler
-from langchain.chains import RetrievalQA, LLMChain
 from langchain.chat_models import (
  AzureChatOpenAI,
  ChatOpenAI,
@@ -11,21 +18,66 @@ from langchain.chat_models import (
 )
 from langchain.document_loaders import PyPDFLoader
 from langchain.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
 from langchain.retrievers import EnsembleRetriever
-from langchain.schema import Document, BaseRetriever
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.vectorstores import FAISS
 from langchain.retrievers.multi_query import MultiQueryRetriever
 from langchain.retrievers.multi_vector import MultiVectorRetriever
 from langchain.storage import InMemoryStore
-import uuid
 from defaults import DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP, DEFAULT_RETRIEVER_K
 from qagen import get_rag_qa_gen_chain
 from summarize import get_rag_summarization_chain
 def get_runnable(
  use_document_chat: bool,
  document_chat_chain_type: str,
@@ -58,7 +110,6 @@ def get_runnable(
  llm=llm,
  chain_type=document_chat_chain_type,
  retriever=retriever,
- memory=memory,
  output_key="output_text",
  ) | (lambda output: output["output_text"])

+import uuid
 from tempfile import NamedTemporaryFile
 from typing import Tuple, List, Optional, Dict
+from langchain.agents import AgentExecutor
+from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
+ AgentTokenBufferMemory,
+)
+from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
 from langchain.callbacks.base import BaseCallbackHandler
+from langchain.chains import LLMChain
+from langchain.chains import RetrievalQA
 from langchain.chat_models import (
  AzureChatOpenAI,
  ChatOpenAI,
 )
 from langchain.document_loaders import PyPDFLoader
 from langchain.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
+from langchain.llms.base import BaseLLM
+from langchain.prompts import MessagesPlaceholder
 from langchain.retrievers import EnsembleRetriever
 from langchain.retrievers.multi_query import MultiQueryRetriever
 from langchain.retrievers.multi_vector import MultiVectorRetriever
+from langchain.schema import Document, BaseRetriever
+from langchain.schema.chat_history import BaseChatMessageHistory
+from langchain.schema.runnable import RunnablePassthrough
 from langchain.storage import InMemoryStore
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.tools.base import BaseTool
+from langchain.vectorstores import FAISS
+from langchain_core.messages import SystemMessage
 from defaults import DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP, DEFAULT_RETRIEVER_K
 from qagen import get_rag_qa_gen_chain
 from summarize import get_rag_summarization_chain
+def get_agent(
+ tools: list[BaseTool],
+ chat_history: BaseChatMessageHistory,
+ llm: BaseLLM,
+ callbacks,
+):
+ memory_key = "agent_history"
+ system_message = SystemMessage(
+ content=(
+ "Do your best to answer the questions. "
+ "Feel free to use any tools available to look up "
+ "relevant information, only if necessary"
+ ),
+ )
+ prompt = OpenAIFunctionsAgent.create_prompt(
+ system_message=system_message,
+ extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
+ )
+ agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
+ agent_memory = AgentTokenBufferMemory(
+ chat_memory=chat_history,
+ memory_key=memory_key,
+ llm=llm,
+ )
+ agent_executor = AgentExecutor(
+ agent=agent,
+ tools=tools,
+ memory=agent_memory,
+ verbose=True,
+ return_intermediate_steps=True,
+ callbacks=callbacks,
+ )
+ return (
+ {"input": RunnablePassthrough()}
+ | agent_executor
+ | (lambda output: output["output"])
+ )
 def get_runnable(
  use_document_chat: bool,
  document_chat_chain_type: str,
  llm=llm,
  chain_type=document_chat_chain_type,
  retriever=retriever,
  output_key="output_text",
  ) | (lambda output: output["output_text"])

langchain-streamlit-demo/research_assistant/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from research_assistant.chain import chain
2	+
3	+ __all__ = ["chain"]

langchain-streamlit-demo/research_assistant/chain.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from langchain_core.pydantic_v1 import BaseModel
+from langchain_core.runnables import RunnablePassthrough
+from research_assistant.search.web import chain as search_chain
+from research_assistant.writer import chain as writer_chain
+chain_notypes = (
+ RunnablePassthrough().assign(research_summary=search_chain) | writer_chain
+)
+class InputType(BaseModel):
+ question: str
+chain = chain_notypes.with_types(input_type=InputType)

langchain-streamlit-demo/research_assistant/search/__init__.py ADDED Viewed

File without changes

langchain-streamlit-demo/research_assistant/search/web.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import json
+from typing import Any
+import requests
+from bs4 import BeautifulSoup
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
+from langchain.utilities import DuckDuckGoSearchAPIWrapper
+from langchain_core.messages import SystemMessage
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import (
+ ConfigurableField,
+ Runnable,
+ RunnableLambda,
+ RunnableParallel,
+ RunnablePassthrough,
+)
+RESULTS_PER_QUESTION = 3
+ddg_search = DuckDuckGoSearchAPIWrapper()
+def scrape_text(url: str):
+ # Send a GET request to the webpage
+ try:
+ response = requests.get(url)
+ # Check if the request was successful
+ if response.status_code == 200:
+ # Parse the content of the request with BeautifulSoup
+ soup = BeautifulSoup(response.text, "html.parser")
+ # Extract all text from the webpage
+ page_text = soup.get_text(separator=" ", strip=True)
+ # Print the extracted text
+ return page_text
+ else:
+ return f"Failed to retrieve the webpage: Status code {response.status_code}"
+ except Exception as e:
+ print(e)
+ return f"Failed to retrieve the webpage: {e}"
+def web_search(query: str, num_results: int):
+ results = ddg_search.results(query, num_results)
+ return [r["link"] for r in results]
+get_links: Runnable[Any, Any] = (
+ RunnablePassthrough()
+ | RunnableLambda(
+ lambda x: [
+ {"url": url, "question": x["question"]}
+ for url in web_search(query=x["question"], num_results=RESULTS_PER_QUESTION)
+ ],
+ )
+).configurable_alternatives(
+ ConfigurableField("search_engine"),
+ default_key="duckduckgo",
+ tavily=RunnableLambda(lambda x: x["question"])
+ | RunnableParallel(
+ {
+ "question": RunnablePassthrough(),
+ "results": TavilySearchAPIRetriever(k=RESULTS_PER_QUESTION),
+ },
+ )
+ | RunnableLambda(
+ lambda x: [
+ {"url": result.metadata["source"], "question": x["question"]}
+ for result in x["results"]
+ ],
+ ),
+)
+SEARCH_PROMPT = ChatPromptTemplate.from_messages(
+ [
+ ("system", "{agent_prompt}"),
+ (
+ "user",
+ "Write 3 google search queries to search online that form an "
+ "objective opinion from the following: {question}\n"
+ "You must respond with a list of strings in the following format: "
+ '["query 1", "query 2", "query 3"].',
+ ),
+ ],
+)
+AUTO_AGENT_INSTRUCTIONS = """
+This task involves researching a given topic, regardless of its complexity or the availability of a definitive answer. The research is conducted by a specific agent, defined by its type and role, with each agent requiring distinct instructions.
+Agent
+The agent is determined by the field of the topic and the specific name of the agent that could be utilized to research the topic provided. Agents are categorized by their area of expertise, and each agent type is associated with a corresponding emoji.
+examples:
+task: "should I invest in apple stocks?"
+response:
+{
+ "agent": "💰 Finance Agent",
+ "agent_role_prompt: "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends."
+}
+task: "could reselling sneakers become profitable?"
+response:
+{
+ "agent": "📈 Business Analyst Agent",
+ "agent_role_prompt": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis."
+}
+task: "what are the most interesting sites in Tel Aviv?"
+response:
+{
+ "agent: "🌍 Travel Agent",
+ "agent_role_prompt": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights."
+}
+""" # noqa: E501
+CHOOSE_AGENT_PROMPT = ChatPromptTemplate.from_messages(
+ [SystemMessage(content=AUTO_AGENT_INSTRUCTIONS), ("user", "task: {task}")],
+)
+SUMMARY_TEMPLATE = """{text}
+-----------
+Using the above text, answer in short the following question:
+> {question}
+-----------
+if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available.""" # noqa: E501
+SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)
+scrape_and_summarize: Runnable[Any, Any] = (
+ RunnableParallel(
+ {
+ "question": lambda x: x["question"],
+ "text": lambda x: scrape_text(x["url"])[:10000],
+ "url": lambda x: x["url"],
+ },
+ )
+ | RunnableParallel(
+ {
+ "summary": SUMMARY_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser(),
+ "url": lambda x: x["url"],
+ },
+ )
+ | RunnableLambda(lambda x: f"Source Url: {x['url']}\nSummary: {x['summary']}")
+)
+multi_search = get_links | scrape_and_summarize.map() | (lambda x: "\n".join(x))
+def load_json(s):
+ try:
+ return json.loads(s)
+ except Exception:
+ return {}
+search_query = SEARCH_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | load_json
+choose_agent = (
+ CHOOSE_AGENT_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | load_json
+)
+get_search_queries = (
+ RunnablePassthrough().assign(
+ agent_prompt=RunnableParallel({"task": lambda x: x})
+ | choose_agent
+ | (lambda x: x.get("agent_role_prompt")),
+ )
+ | search_query
+)
+chain = (
+ get_search_queries
+ | (lambda x: [{"question": q} for q in x])
+ | multi_search.map()
+ | (lambda x: "\n\n".join(x))
+)

langchain-streamlit-demo/research_assistant/writer.py ADDED Viewed

	@@ -0,0 +1,75 @@

+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import ConfigurableField
+WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text." # noqa: E501
+# Report prompts from https://github.com/assafelovic/gpt-researcher/blob/master/gpt_researcher/master/prompts.py
+RESEARCH_REPORT_TEMPLATE = """Information:
+--------
+{research_summary}
+--------
+Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
+The report should focus on the answer to the question, should be well structured, informative, \
+in depth, with facts and numbers if available and a minimum of 1,200 words.
+You should strive to write the report as long as you can using all relevant and necessary information provided.
+You must write the report with markdown syntax.
+You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
+Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
+You must write the report in apa format.
+Please do your best, this is very important to my career.""" # noqa: E501
+RESOURCE_REPORT_TEMPLATE = """Information:
+--------
+{research_summary}
+--------
+Based on the above information, generate a bibliography recommendation report for the following question or topic: "{question}". \
+The report should provide a detailed analysis of each recommended resource, explaining how each source can contribute to finding answers to the research question. \
+Focus on the relevance, reliability, and significance of each source. \
+Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax. \
+Include relevant facts, figures, and numbers whenever available. \
+The report should have a minimum length of 1,200 words.
+Please do your best, this is very important to my career.""" # noqa: E501
+OUTLINE_REPORT_TEMPLATE = """Information:
+--------
+{research_summary}
+--------
+Using the above information, generate an outline for a research report in Markdown syntax for the following question or topic: "{question}". \
+The outline should provide a well-structured framework for the research report, including the main sections, subsections, and key points to be covered. \
+The research report should be detailed, informative, in-depth, and a minimum of 1,200 words. \
+Use appropriate Markdown syntax to format the outline and ensure readability.
+Please do your best, this is very important to my career.""" # noqa: E501
+model = ChatOpenAI(temperature=0)
+prompt = ChatPromptTemplate.from_messages(
+ [
+ ("system", WRITER_SYSTEM_PROMPT),
+ ("user", RESEARCH_REPORT_TEMPLATE),
+ ],
+).configurable_alternatives(
+ ConfigurableField("report_type"),
+ default_key="research_report",
+ resource_report=ChatPromptTemplate.from_messages(
+ [
+ ("system", WRITER_SYSTEM_PROMPT),
+ ("user", RESOURCE_REPORT_TEMPLATE),
+ ],
+ ),
+ outline_report=ChatPromptTemplate.from_messages(
+ [
+ ("system", WRITER_SYSTEM_PROMPT),
+ ("user", OUTLINE_REPORT_TEMPLATE),
+ ],
+ ),
+)
+chain = prompt | model | StrOutputParser()

requirements.txt CHANGED Viewed

@@ -1,4 +1,6 @@
 anthropic==0.7.7
 faiss-cpu==1.7.4
 langchain==0.0.348
 langsmith==0.0.69
@@ -12,3 +14,4 @@ streamlit-feedback==0.1.3
 tiktoken==0.5.2
 tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
 validators>=0.21.0 # not directly required, pinned by Snyk to avoid a vulnerability

 anthropic==0.7.7
+beautifulsoup4==4.12.2
+duckduckgo-search==4.0.0
 faiss-cpu==1.7.4
 langchain==0.0.348
 langsmith==0.0.69
 tiktoken==0.5.2
 tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
 validators>=0.21.0 # not directly required, pinned by Snyk to avoid a vulnerability
+wikipedia==1.4.0