Joshua Sundance Bailey commited on
Commit
883f3be
1 Parent(s): 048798b

research assistant

Browse files
.pre-commit-config.yaml CHANGED
@@ -44,6 +44,8 @@ repos:
44
  rev: v1.5.1
45
  hooks:
46
  - id: mypy
 
 
47
  - repo: https://github.com/asottile/add-trailing-comma
48
  rev: v3.1.0
49
  hooks:
@@ -60,4 +62,4 @@ repos:
60
  rev: 1.7.5
61
  hooks:
62
  - id: bandit
63
- args: ["-x", "tests/*.py"]
 
44
  rev: v1.5.1
45
  hooks:
46
  - id: mypy
47
+ additional_dependencies:
48
+ - types-requests
49
  - repo: https://github.com/asottile/add-trailing-comma
50
  rev: v3.1.0
51
  hooks:
 
62
  rev: 1.7.5
63
  hooks:
64
  - id: bandit
65
+ args: ["-x", "tests/*.py", "-s", "B113"]
langchain-streamlit-demo/app.py CHANGED
@@ -5,23 +5,24 @@ import anthropic
5
  import langsmith.utils
6
  import openai
7
  import streamlit as st
 
8
  from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
9
  from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler
10
  from langchain.memory import ConversationBufferMemory, StreamlitChatMessageHistory
11
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
12
  from langchain.schema.document import Document
13
  from langchain.schema.retriever import BaseRetriever
 
14
  from langsmith.client import Client
15
  from streamlit_feedback import streamlit_feedback
16
 
17
  from defaults import default_values
18
-
19
  from llm_resources import (
20
- get_runnable,
21
  get_llm,
22
  get_texts_and_multiretriever,
23
- StreamHandler,
24
  )
 
25
 
26
  __version__ = "1.1.0"
27
 
@@ -378,6 +379,15 @@ st.session_state.llm = get_llm(
378
  },
379
  )
380
 
 
 
 
 
 
 
 
 
 
381
  # --- Chat History ---
382
  for msg in STMEMORY.messages:
383
  st.chat_message(
@@ -430,24 +440,27 @@ if st.session_state.llm:
430
 
431
  full_response: Union[str, None] = None
432
 
 
 
 
 
 
 
433
  message_placeholder = st.empty()
434
- stream_handler = StreamHandler(message_placeholder)
435
- callbacks.append(stream_handler)
436
-
437
- st.session_state.chain = get_runnable(
438
- use_document_chat,
439
- document_chat_chain_type,
440
- st.session_state.llm,
441
- st.session_state.retriever,
442
- MEMORY,
443
- chat_prompt,
444
- prompt,
445
- STMEMORY,
446
- )
447
 
448
  # --- LLM call ---
449
  try:
450
- full_response = st.session_state.chain.invoke(prompt, config)
451
 
452
  except (openai.AuthenticationError, anthropic.AuthenticationError):
453
  st.error(
 
5
  import langsmith.utils
6
  import openai
7
  import streamlit as st
8
+ from langchain.callbacks import StreamlitCallbackHandler
9
  from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
10
  from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler
11
  from langchain.memory import ConversationBufferMemory, StreamlitChatMessageHistory
12
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
13
  from langchain.schema.document import Document
14
  from langchain.schema.retriever import BaseRetriever
15
+ from langchain.tools import Tool
16
  from langsmith.client import Client
17
  from streamlit_feedback import streamlit_feedback
18
 
19
  from defaults import default_values
 
20
  from llm_resources import (
21
+ get_agent,
22
  get_llm,
23
  get_texts_and_multiretriever,
 
24
  )
25
+ from research_assistant.chain import chain as research_assistant_chain
26
 
27
  __version__ = "1.1.0"
28
 
 
379
  },
380
  )
381
 
382
+ research_assistant_tool = Tool.from_function(
383
+ func=lambda s: research_assistant_chain.invoke({"question": s}),
384
+ name="web-research-assistant",
385
+ description="this assistant returns a report based on web research",
386
+ )
387
+
388
+ TOOLS = [research_assistant_tool]
389
+ st.session_state.agent = get_agent(TOOLS, STMEMORY, st.session_state.llm)
390
+
391
  # --- Chat History ---
392
  for msg in STMEMORY.messages:
393
  st.chat_message(
 
440
 
441
  full_response: Union[str, None] = None
442
 
443
+ # stream_handler = StreamHandler(message_placeholder)
444
+ # callbacks.append(stream_handler)
445
+
446
+ st_callback = StreamlitCallbackHandler(st.container())
447
+ callbacks.append(st_callback)
448
+
449
  message_placeholder = st.empty()
450
+ # st.session_state.chain = get_runnable(
451
+ # use_document_chat,
452
+ # document_chat_chain_type,
453
+ # st.session_state.llm,
454
+ # st.session_state.retriever,
455
+ # MEMORY,
456
+ # chat_prompt,
457
+ # prompt,
458
+ # STMEMORY,
459
+ # )
 
 
 
460
 
461
  # --- LLM call ---
462
  try:
463
+ full_response = st.session_state.agent.invoke(prompt, config)
464
 
465
  except (openai.AuthenticationError, anthropic.AuthenticationError):
466
  st.error(
langchain-streamlit-demo/llm_resources.py CHANGED
@@ -32,6 +32,48 @@ from langchain_core.messages import SystemMessage
32
  from defaults import DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP, DEFAULT_RETRIEVER_K
33
  from qagen import get_rag_qa_gen_chain
34
  from summarize import get_rag_summarization_chain
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
 
37
  def get_runnable(
@@ -69,38 +111,8 @@ def get_runnable(
69
  "Retrieves custom context provided by the user for this conversation. Use this if you cannot answer immediately and confidently.",
70
  )
71
  tools = [tool]
72
- memory_key = "agent_history"
73
- system_message = SystemMessage(
74
- content=(
75
- "Do your best to answer the questions. "
76
- "Feel free to use any tools available to look up "
77
- "relevant information, only if necessary"
78
- ),
79
- )
80
- prompt = OpenAIFunctionsAgent.create_prompt(
81
- system_message=system_message,
82
- extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
83
- )
84
- agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
85
 
86
- agent_memory = AgentTokenBufferMemory(
87
- chat_memory=chat_history,
88
- memory_key=memory_key,
89
- llm=llm,
90
- )
91
-
92
- agent_executor = AgentExecutor(
93
- agent=agent,
94
- tools=tools,
95
- memory=agent_memory,
96
- verbose=True,
97
- return_intermediate_steps=True,
98
- )
99
- return (
100
- {"input": RunnablePassthrough()}
101
- | agent_executor
102
- | (lambda output: output["output"])
103
- )
104
 
105
 
106
  def get_llm(
 
32
  from defaults import DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP, DEFAULT_RETRIEVER_K
33
  from qagen import get_rag_qa_gen_chain
34
  from summarize import get_rag_summarization_chain
35
+ from langchain.tools.base import BaseTool
36
+ from langchain.schema.chat_history import BaseChatMessageHistory
37
+ from langchain.llms.base import BaseLLM
38
+
39
+
40
+ def get_agent(
41
+ tools: list[BaseTool],
42
+ chat_history: BaseChatMessageHistory,
43
+ llm: BaseLLM,
44
+ ):
45
+ memory_key = "agent_history"
46
+ system_message = SystemMessage(
47
+ content=(
48
+ "Do your best to answer the questions. "
49
+ "Feel free to use any tools available to look up "
50
+ "relevant information, only if necessary"
51
+ ),
52
+ )
53
+ prompt = OpenAIFunctionsAgent.create_prompt(
54
+ system_message=system_message,
55
+ extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
56
+ )
57
+ agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
58
+
59
+ agent_memory = AgentTokenBufferMemory(
60
+ chat_memory=chat_history,
61
+ memory_key=memory_key,
62
+ llm=llm,
63
+ )
64
+
65
+ agent_executor = AgentExecutor(
66
+ agent=agent,
67
+ tools=tools,
68
+ memory=agent_memory,
69
+ verbose=True,
70
+ return_intermediate_steps=True,
71
+ )
72
+ return (
73
+ {"input": RunnablePassthrough()}
74
+ | agent_executor
75
+ | (lambda output: output["output"])
76
+ )
77
 
78
 
79
  def get_runnable(
 
111
  "Retrieves custom context provided by the user for this conversation. Use this if you cannot answer immediately and confidently.",
112
  )
113
  tools = [tool]
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
+ return get_agent(tools, chat_history, llm)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
 
117
 
118
  def get_llm(
langchain-streamlit-demo/research_assistant/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from research_assistant.chain import chain
2
+
3
+ __all__ = ["chain"]
langchain-streamlit-demo/research_assistant/chain.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.pydantic_v1 import BaseModel
2
+ from langchain_core.runnables import RunnablePassthrough
3
+
4
+ from research_assistant.search.web import chain as search_chain
5
+ from research_assistant.writer import chain as writer_chain
6
+
7
+ chain_notypes = (
8
+ RunnablePassthrough().assign(research_summary=search_chain) | writer_chain
9
+ )
10
+
11
+
12
+ class InputType(BaseModel):
13
+ question: str
14
+
15
+
16
+ chain = chain_notypes.with_types(input_type=InputType)
langchain-streamlit-demo/research_assistant/search/__init__.py ADDED
File without changes
langchain-streamlit-demo/research_assistant/search/web.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Any
3
+
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
9
+ from langchain.utilities import DuckDuckGoSearchAPIWrapper
10
+ from langchain_core.messages import SystemMessage
11
+ from langchain_core.output_parsers import StrOutputParser
12
+ from langchain_core.runnables import (
13
+ ConfigurableField,
14
+ Runnable,
15
+ RunnableLambda,
16
+ RunnableParallel,
17
+ RunnablePassthrough,
18
+ )
19
+
20
+ RESULTS_PER_QUESTION = 3
21
+
22
+ ddg_search = DuckDuckGoSearchAPIWrapper()
23
+
24
+
25
+ def scrape_text(url: str):
26
+ # Send a GET request to the webpage
27
+ try:
28
+ response = requests.get(url)
29
+
30
+ # Check if the request was successful
31
+ if response.status_code == 200:
32
+ # Parse the content of the request with BeautifulSoup
33
+ soup = BeautifulSoup(response.text, "html.parser")
34
+
35
+ # Extract all text from the webpage
36
+ page_text = soup.get_text(separator=" ", strip=True)
37
+
38
+ # Print the extracted text
39
+ return page_text
40
+ else:
41
+ return f"Failed to retrieve the webpage: Status code {response.status_code}"
42
+ except Exception as e:
43
+ print(e)
44
+ return f"Failed to retrieve the webpage: {e}"
45
+
46
+
47
+ def web_search(query: str, num_results: int):
48
+ results = ddg_search.results(query, num_results)
49
+ return [r["link"] for r in results]
50
+
51
+
52
+ get_links: Runnable[Any, Any] = (
53
+ RunnablePassthrough()
54
+ | RunnableLambda(
55
+ lambda x: [
56
+ {"url": url, "question": x["question"]}
57
+ for url in web_search(query=x["question"], num_results=RESULTS_PER_QUESTION)
58
+ ],
59
+ )
60
+ ).configurable_alternatives(
61
+ ConfigurableField("search_engine"),
62
+ default_key="duckduckgo",
63
+ tavily=RunnableLambda(lambda x: x["question"])
64
+ | RunnableParallel(
65
+ {
66
+ "question": RunnablePassthrough(),
67
+ "results": TavilySearchAPIRetriever(k=RESULTS_PER_QUESTION),
68
+ },
69
+ )
70
+ | RunnableLambda(
71
+ lambda x: [
72
+ {"url": result.metadata["source"], "question": x["question"]}
73
+ for result in x["results"]
74
+ ],
75
+ ),
76
+ )
77
+
78
+
79
+ SEARCH_PROMPT = ChatPromptTemplate.from_messages(
80
+ [
81
+ ("system", "{agent_prompt}"),
82
+ (
83
+ "user",
84
+ "Write 3 google search queries to search online that form an "
85
+ "objective opinion from the following: {question}\n"
86
+ "You must respond with a list of strings in the following format: "
87
+ '["query 1", "query 2", "query 3"].',
88
+ ),
89
+ ],
90
+ )
91
+
92
+ AUTO_AGENT_INSTRUCTIONS = """
93
+ This task involves researching a given topic, regardless of its complexity or the availability of a definitive answer. The research is conducted by a specific agent, defined by its type and role, with each agent requiring distinct instructions.
94
+ Agent
95
+ The agent is determined by the field of the topic and the specific name of the agent that could be utilized to research the topic provided. Agents are categorized by their area of expertise, and each agent type is associated with a corresponding emoji.
96
+
97
+ examples:
98
+ task: "should I invest in apple stocks?"
99
+ response:
100
+ {
101
+ "agent": "💰 Finance Agent",
102
+ "agent_role_prompt: "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends."
103
+ }
104
+ task: "could reselling sneakers become profitable?"
105
+ response:
106
+ {
107
+ "agent": "📈 Business Analyst Agent",
108
+ "agent_role_prompt": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis."
109
+ }
110
+ task: "what are the most interesting sites in Tel Aviv?"
111
+ response:
112
+ {
113
+ "agent: "🌍 Travel Agent",
114
+ "agent_role_prompt": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights."
115
+ }
116
+ """ # noqa: E501
117
+ CHOOSE_AGENT_PROMPT = ChatPromptTemplate.from_messages(
118
+ [SystemMessage(content=AUTO_AGENT_INSTRUCTIONS), ("user", "task: {task}")],
119
+ )
120
+
121
+ SUMMARY_TEMPLATE = """{text}
122
+
123
+ -----------
124
+
125
+ Using the above text, answer in short the following question:
126
+
127
+ > {question}
128
+
129
+ -----------
130
+ if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available.""" # noqa: E501
131
+ SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)
132
+
133
+ scrape_and_summarize: Runnable[Any, Any] = (
134
+ RunnableParallel(
135
+ {
136
+ "question": lambda x: x["question"],
137
+ "text": lambda x: scrape_text(x["url"])[:10000],
138
+ "url": lambda x: x["url"],
139
+ },
140
+ )
141
+ | RunnableParallel(
142
+ {
143
+ "summary": SUMMARY_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser(),
144
+ "url": lambda x: x["url"],
145
+ },
146
+ )
147
+ | RunnableLambda(lambda x: f"Source Url: {x['url']}\nSummary: {x['summary']}")
148
+ )
149
+
150
+ multi_search = get_links | scrape_and_summarize.map() | (lambda x: "\n".join(x))
151
+
152
+
153
+ def load_json(s):
154
+ try:
155
+ return json.loads(s)
156
+ except Exception:
157
+ return {}
158
+
159
+
160
+ search_query = SEARCH_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | load_json
161
+ choose_agent = (
162
+ CHOOSE_AGENT_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | load_json
163
+ )
164
+
165
+ get_search_queries = (
166
+ RunnablePassthrough().assign(
167
+ agent_prompt=RunnableParallel({"task": lambda x: x})
168
+ | choose_agent
169
+ | (lambda x: x.get("agent_role_prompt")),
170
+ )
171
+ | search_query
172
+ )
173
+
174
+
175
+ chain = (
176
+ get_search_queries
177
+ | (lambda x: [{"question": q} for q in x])
178
+ | multi_search.map()
179
+ | (lambda x: "\n\n".join(x))
180
+ )
langchain-streamlit-demo/research_assistant/writer.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chat_models import ChatOpenAI
2
+ from langchain.prompts import ChatPromptTemplate
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.runnables import ConfigurableField
5
+
6
+ WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text." # noqa: E501
7
+
8
+
9
+ # Report prompts from https://github.com/assafelovic/gpt-researcher/blob/master/gpt_researcher/master/prompts.py
10
+ RESEARCH_REPORT_TEMPLATE = """Information:
11
+ --------
12
+ {research_summary}
13
+ --------
14
+
15
+ Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
16
+ The report should focus on the answer to the question, should be well structured, informative, \
17
+ in depth, with facts and numbers if available and a minimum of 1,200 words.
18
+
19
+ You should strive to write the report as long as you can using all relevant and necessary information provided.
20
+ You must write the report with markdown syntax.
21
+ You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
22
+ Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
23
+ You must write the report in apa format.
24
+ Please do your best, this is very important to my career.""" # noqa: E501
25
+
26
+
27
+ RESOURCE_REPORT_TEMPLATE = """Information:
28
+ --------
29
+ {research_summary}
30
+ --------
31
+
32
+ Based on the above information, generate a bibliography recommendation report for the following question or topic: "{question}". \
33
+ The report should provide a detailed analysis of each recommended resource, explaining how each source can contribute to finding answers to the research question. \
34
+ Focus on the relevance, reliability, and significance of each source. \
35
+ Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax. \
36
+ Include relevant facts, figures, and numbers whenever available. \
37
+ The report should have a minimum length of 1,200 words.
38
+
39
+ Please do your best, this is very important to my career.""" # noqa: E501
40
+
41
+ OUTLINE_REPORT_TEMPLATE = """Information:
42
+ --------
43
+ {research_summary}
44
+ --------
45
+
46
+ Using the above information, generate an outline for a research report in Markdown syntax for the following question or topic: "{question}". \
47
+ The outline should provide a well-structured framework for the research report, including the main sections, subsections, and key points to be covered. \
48
+ The research report should be detailed, informative, in-depth, and a minimum of 1,200 words. \
49
+ Use appropriate Markdown syntax to format the outline and ensure readability.
50
+
51
+ Please do your best, this is very important to my career.""" # noqa: E501
52
+
53
+ model = ChatOpenAI(temperature=0)
54
+ prompt = ChatPromptTemplate.from_messages(
55
+ [
56
+ ("system", WRITER_SYSTEM_PROMPT),
57
+ ("user", RESEARCH_REPORT_TEMPLATE),
58
+ ],
59
+ ).configurable_alternatives(
60
+ ConfigurableField("report_type"),
61
+ default_key="research_report",
62
+ resource_report=ChatPromptTemplate.from_messages(
63
+ [
64
+ ("system", WRITER_SYSTEM_PROMPT),
65
+ ("user", RESOURCE_REPORT_TEMPLATE),
66
+ ],
67
+ ),
68
+ outline_report=ChatPromptTemplate.from_messages(
69
+ [
70
+ ("system", WRITER_SYSTEM_PROMPT),
71
+ ("user", OUTLINE_REPORT_TEMPLATE),
72
+ ],
73
+ ),
74
+ )
75
+ chain = prompt | model | StrOutputParser()
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  anthropic==0.7.7
 
 
2
  faiss-cpu==1.7.4
3
  langchain==0.0.348
4
  langsmith==0.0.69
 
1
  anthropic==0.7.7
2
+ beautifulsoup4==4.12.2
3
+ duckduckgo-search==4.0.0
4
  faiss-cpu==1.7.4
5
  langchain==0.0.348
6
  langsmith==0.0.69