Joshua Sundance Bailey commited on
Commit
9b54a0e
2 Parent(s): f310350 1ea3b53

Merge pull request #103 from joshuasundance-swca/agent

Browse files
.pre-commit-config.yaml CHANGED
@@ -40,24 +40,22 @@ repos:
40
  - id: trailing-whitespace
41
  - id: mixed-line-ending
42
  - id: requirements-txt-fixer
43
- - repo: https://github.com/pre-commit/mirrors-mypy
44
- rev: v1.5.1
45
  hooks:
46
- - id: mypy
47
  - repo: https://github.com/asottile/add-trailing-comma
48
  rev: v3.1.0
49
  hooks:
50
  - id: add-trailing-comma
51
- #- repo: https://github.com/dannysepler/rm_unneeded_f_str
52
- # rev: v0.2.0
53
- # hooks:
54
- # - id: rm-unneeded-f-str
55
- - repo: https://github.com/psf/black
56
- rev: 23.9.1
57
  hooks:
58
- - id: black
 
 
59
  - repo: https://github.com/PyCQA/bandit
60
  rev: 1.7.5
61
  hooks:
62
  - id: bandit
63
- args: ["-x", "tests/*.py"]
 
40
  - id: trailing-whitespace
41
  - id: mixed-line-ending
42
  - id: requirements-txt-fixer
43
+ - repo: https://github.com/psf/black
44
+ rev: 23.9.1
45
  hooks:
46
+ - id: black
47
  - repo: https://github.com/asottile/add-trailing-comma
48
  rev: v3.1.0
49
  hooks:
50
  - id: add-trailing-comma
51
+ - repo: https://github.com/pre-commit/mirrors-mypy
52
+ rev: v1.5.1
 
 
 
 
53
  hooks:
54
+ - id: mypy
55
+ additional_dependencies:
56
+ - types-requests
57
  - repo: https://github.com/PyCQA/bandit
58
  rev: 1.7.5
59
  hooks:
60
  - id: bandit
61
+ args: ["-x", "tests/*.py", "-s", "B113"]
langchain-streamlit-demo/app.py CHANGED
@@ -5,23 +5,28 @@ import anthropic
5
  import langsmith.utils
6
  import openai
7
  import streamlit as st
 
 
8
  from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
9
  from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler
10
  from langchain.memory import ConversationBufferMemory, StreamlitChatMessageHistory
11
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
12
  from langchain.schema.document import Document
13
  from langchain.schema.retriever import BaseRetriever
 
 
 
14
  from langsmith.client import Client
15
  from streamlit_feedback import streamlit_feedback
16
 
17
  from defaults import default_values
18
-
19
  from llm_resources import (
20
- get_runnable,
21
  get_llm,
 
22
  get_texts_and_multiretriever,
23
- StreamHandler,
24
  )
 
25
 
26
  __version__ = "1.1.0"
27
 
@@ -414,12 +419,16 @@ if st.session_state.llm:
414
  if st.session_state.ls_tracer:
415
  callbacks.append(st.session_state.ls_tracer)
416
 
417
- config: Dict[str, Any] = dict(
418
- callbacks=callbacks,
419
- tags=["Streamlit Chat"],
420
- )
421
- if st.session_state.provider == "Anthropic":
422
- config["max_concurrency"] = 5
 
 
 
 
423
 
424
  use_document_chat = all(
425
  [
@@ -429,24 +438,70 @@ if st.session_state.llm:
429
  )
430
 
431
  full_response: Union[str, None] = None
432
-
 
433
  message_placeholder = st.empty()
434
- stream_handler = StreamHandler(message_placeholder)
435
- callbacks.append(stream_handler)
436
-
437
- st.session_state.chain = get_runnable(
438
- use_document_chat,
439
- document_chat_chain_type,
440
- st.session_state.llm,
441
- st.session_state.retriever,
442
- MEMORY,
443
- chat_prompt,
444
- prompt,
445
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
446
 
447
  # --- LLM call ---
448
  try:
449
- full_response = st.session_state.chain.invoke(prompt, config)
 
 
 
450
 
451
  except (openai.AuthenticationError, anthropic.AuthenticationError):
452
  st.error(
 
5
  import langsmith.utils
6
  import openai
7
  import streamlit as st
8
+ from langchain.callbacks import StreamlitCallbackHandler
9
+ from langchain.callbacks.base import BaseCallbackHandler
10
  from langchain.callbacks.tracers.langchain import LangChainTracer, wait_for_all_tracers
11
  from langchain.callbacks.tracers.run_collector import RunCollectorCallbackHandler
12
  from langchain.memory import ConversationBufferMemory, StreamlitChatMessageHistory
13
  from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder
14
  from langchain.schema.document import Document
15
  from langchain.schema.retriever import BaseRetriever
16
+ from langchain.tools import DuckDuckGoSearchRun, WikipediaQueryRun
17
+ from langchain.tools import Tool
18
+ from langchain.utilities import WikipediaAPIWrapper
19
  from langsmith.client import Client
20
  from streamlit_feedback import streamlit_feedback
21
 
22
  from defaults import default_values
 
23
  from llm_resources import (
24
+ get_agent,
25
  get_llm,
26
+ get_runnable,
27
  get_texts_and_multiretriever,
 
28
  )
29
+ from research_assistant.chain import chain as research_assistant_chain
30
 
31
  __version__ = "1.1.0"
32
 
 
419
  if st.session_state.ls_tracer:
420
  callbacks.append(st.session_state.ls_tracer)
421
 
422
+ def get_config(callbacks: list[BaseCallbackHandler]) -> dict[str, Any]:
423
+ config: Dict[str, Any] = dict(
424
+ callbacks=callbacks,
425
+ tags=["Streamlit Chat"],
426
+ verbose=True,
427
+ return_intermediate_steps=True,
428
+ )
429
+ if st.session_state.provider == "Anthropic":
430
+ config["max_concurrency"] = 5
431
+ return config
432
 
433
  use_document_chat = all(
434
  [
 
438
  )
439
 
440
  full_response: Union[str, None] = None
441
+ # stream_handler = StreamHandler(message_placeholder)
442
+ # callbacks.append(stream_handler)
443
  message_placeholder = st.empty()
444
+
445
+ default_tools = [
446
+ DuckDuckGoSearchRun(),
447
+ WikipediaQueryRun(api_wrapper=WikipediaAPIWrapper()),
448
+ ]
449
+ if st.session_state.provider in ("Azure OpenAI", "OpenAI"):
450
+ st_callback = StreamlitCallbackHandler(st.container())
451
+ callbacks.append(st_callback)
452
+ research_assistant_tool = Tool.from_function(
453
+ func=lambda s: research_assistant_chain.invoke(
454
+ {"question": s},
455
+ config=get_config(callbacks),
456
+ ),
457
+ name="web-research-assistant",
458
+ description="this assistant returns a comprehensive report based on web research. for quick facts, use duckduckgo instead.",
459
+ )
460
+
461
+ TOOLS = [research_assistant_tool] + default_tools
462
+ if use_document_chat:
463
+ st.session_state.doc_chain = get_runnable(
464
+ use_document_chat,
465
+ document_chat_chain_type,
466
+ st.session_state.llm,
467
+ st.session_state.retriever,
468
+ MEMORY,
469
+ chat_prompt,
470
+ prompt,
471
+ )
472
+ doc_chain_tool = Tool.from_function(
473
+ func=lambda s: st.session_state.doc_chain.invoke(
474
+ s,
475
+ config=get_config(callbacks),
476
+ ),
477
+ name="user-document-chat",
478
+ description="this assistant returns a response based on the user's custom context. if the user's meaning is unclear, perhaps the answer is here. generally speaking, try this tool before conducting web research.",
479
+ )
480
+ TOOLS = [doc_chain_tool, research_assistant_tool] + default_tools
481
+
482
+ st.session_state.chain = get_agent(
483
+ TOOLS,
484
+ STMEMORY,
485
+ st.session_state.llm,
486
+ callbacks,
487
+ )
488
+ else:
489
+ st.session_state.chain = get_runnable(
490
+ use_document_chat,
491
+ document_chat_chain_type,
492
+ st.session_state.llm,
493
+ st.session_state.retriever,
494
+ MEMORY,
495
+ chat_prompt,
496
+ prompt,
497
+ )
498
 
499
  # --- LLM call ---
500
  try:
501
+ full_response = st.session_state.chain.invoke(
502
+ prompt,
503
+ config=get_config(callbacks),
504
+ )
505
 
506
  except (openai.AuthenticationError, anthropic.AuthenticationError):
507
  st.error(
langchain-streamlit-demo/defaults.py CHANGED
@@ -25,7 +25,7 @@ DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "gpt-3.5-turbo")
25
 
26
  DEFAULT_SYSTEM_PROMPT = os.environ.get(
27
  "DEFAULT_SYSTEM_PROMPT",
28
- "You are a helpful chatbot.",
29
  )
30
 
31
  MIN_TEMP = float(os.environ.get("MIN_TEMPERATURE", 0.0))
 
25
 
26
  DEFAULT_SYSTEM_PROMPT = os.environ.get(
27
  "DEFAULT_SYSTEM_PROMPT",
28
+ "You are a helpful chatbot. Do not rush. Always plan, think, and act in a step-by-step manner.",
29
  )
30
 
31
  MIN_TEMP = float(os.environ.get("MIN_TEMPERATURE", 0.0))
langchain-streamlit-demo/llm_resources.py CHANGED
@@ -1,8 +1,15 @@
 
1
  from tempfile import NamedTemporaryFile
2
  from typing import Tuple, List, Optional, Dict
3
 
 
 
 
 
 
4
  from langchain.callbacks.base import BaseCallbackHandler
5
- from langchain.chains import RetrievalQA, LLMChain
 
6
  from langchain.chat_models import (
7
  AzureChatOpenAI,
8
  ChatOpenAI,
@@ -11,21 +18,66 @@ from langchain.chat_models import (
11
  )
12
  from langchain.document_loaders import PyPDFLoader
13
  from langchain.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
 
 
14
  from langchain.retrievers import EnsembleRetriever
15
- from langchain.schema import Document, BaseRetriever
16
- from langchain.text_splitter import RecursiveCharacterTextSplitter
17
- from langchain.vectorstores import FAISS
18
-
19
  from langchain.retrievers.multi_query import MultiQueryRetriever
20
  from langchain.retrievers.multi_vector import MultiVectorRetriever
 
 
 
21
  from langchain.storage import InMemoryStore
22
- import uuid
 
 
 
23
 
24
  from defaults import DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP, DEFAULT_RETRIEVER_K
25
  from qagen import get_rag_qa_gen_chain
26
  from summarize import get_rag_summarization_chain
27
 
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  def get_runnable(
30
  use_document_chat: bool,
31
  document_chat_chain_type: str,
@@ -58,7 +110,6 @@ def get_runnable(
58
  llm=llm,
59
  chain_type=document_chat_chain_type,
60
  retriever=retriever,
61
- memory=memory,
62
  output_key="output_text",
63
  ) | (lambda output: output["output_text"])
64
 
 
1
+ import uuid
2
  from tempfile import NamedTemporaryFile
3
  from typing import Tuple, List, Optional, Dict
4
 
5
+ from langchain.agents import AgentExecutor
6
+ from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
7
+ AgentTokenBufferMemory,
8
+ )
9
+ from langchain.agents.openai_functions_agent.base import OpenAIFunctionsAgent
10
  from langchain.callbacks.base import BaseCallbackHandler
11
+ from langchain.chains import LLMChain
12
+ from langchain.chains import RetrievalQA
13
  from langchain.chat_models import (
14
  AzureChatOpenAI,
15
  ChatOpenAI,
 
18
  )
19
  from langchain.document_loaders import PyPDFLoader
20
  from langchain.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
21
+ from langchain.llms.base import BaseLLM
22
+ from langchain.prompts import MessagesPlaceholder
23
  from langchain.retrievers import EnsembleRetriever
 
 
 
 
24
  from langchain.retrievers.multi_query import MultiQueryRetriever
25
  from langchain.retrievers.multi_vector import MultiVectorRetriever
26
+ from langchain.schema import Document, BaseRetriever
27
+ from langchain.schema.chat_history import BaseChatMessageHistory
28
+ from langchain.schema.runnable import RunnablePassthrough
29
  from langchain.storage import InMemoryStore
30
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
31
+ from langchain.tools.base import BaseTool
32
+ from langchain.vectorstores import FAISS
33
+ from langchain_core.messages import SystemMessage
34
 
35
  from defaults import DEFAULT_CHUNK_SIZE, DEFAULT_CHUNK_OVERLAP, DEFAULT_RETRIEVER_K
36
  from qagen import get_rag_qa_gen_chain
37
  from summarize import get_rag_summarization_chain
38
 
39
 
40
+ def get_agent(
41
+ tools: list[BaseTool],
42
+ chat_history: BaseChatMessageHistory,
43
+ llm: BaseLLM,
44
+ callbacks,
45
+ ):
46
+ memory_key = "agent_history"
47
+ system_message = SystemMessage(
48
+ content=(
49
+ "Do your best to answer the questions. "
50
+ "Feel free to use any tools available to look up "
51
+ "relevant information, only if necessary"
52
+ ),
53
+ )
54
+ prompt = OpenAIFunctionsAgent.create_prompt(
55
+ system_message=system_message,
56
+ extra_prompt_messages=[MessagesPlaceholder(variable_name=memory_key)],
57
+ )
58
+ agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
59
+
60
+ agent_memory = AgentTokenBufferMemory(
61
+ chat_memory=chat_history,
62
+ memory_key=memory_key,
63
+ llm=llm,
64
+ )
65
+
66
+ agent_executor = AgentExecutor(
67
+ agent=agent,
68
+ tools=tools,
69
+ memory=agent_memory,
70
+ verbose=True,
71
+ return_intermediate_steps=True,
72
+ callbacks=callbacks,
73
+ )
74
+ return (
75
+ {"input": RunnablePassthrough()}
76
+ | agent_executor
77
+ | (lambda output: output["output"])
78
+ )
79
+
80
+
81
  def get_runnable(
82
  use_document_chat: bool,
83
  document_chat_chain_type: str,
 
110
  llm=llm,
111
  chain_type=document_chat_chain_type,
112
  retriever=retriever,
 
113
  output_key="output_text",
114
  ) | (lambda output: output["output_text"])
115
 
langchain-streamlit-demo/research_assistant/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from research_assistant.chain import chain
2
+
3
+ __all__ = ["chain"]
langchain-streamlit-demo/research_assistant/chain.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.pydantic_v1 import BaseModel
2
+ from langchain_core.runnables import RunnablePassthrough
3
+
4
+ from research_assistant.search.web import chain as search_chain
5
+ from research_assistant.writer import chain as writer_chain
6
+
7
+ chain_notypes = (
8
+ RunnablePassthrough().assign(research_summary=search_chain) | writer_chain
9
+ )
10
+
11
+
12
+ class InputType(BaseModel):
13
+ question: str
14
+
15
+
16
+ chain = chain_notypes.with_types(input_type=InputType)
langchain-streamlit-demo/research_assistant/search/__init__.py ADDED
File without changes
langchain-streamlit-demo/research_assistant/search/web.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from typing import Any
3
+
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from langchain.chat_models import ChatOpenAI
7
+ from langchain.prompts import ChatPromptTemplate
8
+ from langchain.retrievers.tavily_search_api import TavilySearchAPIRetriever
9
+ from langchain.utilities import DuckDuckGoSearchAPIWrapper
10
+ from langchain_core.messages import SystemMessage
11
+ from langchain_core.output_parsers import StrOutputParser
12
+ from langchain_core.runnables import (
13
+ ConfigurableField,
14
+ Runnable,
15
+ RunnableLambda,
16
+ RunnableParallel,
17
+ RunnablePassthrough,
18
+ )
19
+
20
+ RESULTS_PER_QUESTION = 3
21
+
22
+ ddg_search = DuckDuckGoSearchAPIWrapper()
23
+
24
+
25
+ def scrape_text(url: str):
26
+ # Send a GET request to the webpage
27
+ try:
28
+ response = requests.get(url)
29
+
30
+ # Check if the request was successful
31
+ if response.status_code == 200:
32
+ # Parse the content of the request with BeautifulSoup
33
+ soup = BeautifulSoup(response.text, "html.parser")
34
+
35
+ # Extract all text from the webpage
36
+ page_text = soup.get_text(separator=" ", strip=True)
37
+
38
+ # Print the extracted text
39
+ return page_text
40
+ else:
41
+ return f"Failed to retrieve the webpage: Status code {response.status_code}"
42
+ except Exception as e:
43
+ print(e)
44
+ return f"Failed to retrieve the webpage: {e}"
45
+
46
+
47
+ def web_search(query: str, num_results: int):
48
+ results = ddg_search.results(query, num_results)
49
+ return [r["link"] for r in results]
50
+
51
+
52
+ get_links: Runnable[Any, Any] = (
53
+ RunnablePassthrough()
54
+ | RunnableLambda(
55
+ lambda x: [
56
+ {"url": url, "question": x["question"]}
57
+ for url in web_search(query=x["question"], num_results=RESULTS_PER_QUESTION)
58
+ ],
59
+ )
60
+ ).configurable_alternatives(
61
+ ConfigurableField("search_engine"),
62
+ default_key="duckduckgo",
63
+ tavily=RunnableLambda(lambda x: x["question"])
64
+ | RunnableParallel(
65
+ {
66
+ "question": RunnablePassthrough(),
67
+ "results": TavilySearchAPIRetriever(k=RESULTS_PER_QUESTION),
68
+ },
69
+ )
70
+ | RunnableLambda(
71
+ lambda x: [
72
+ {"url": result.metadata["source"], "question": x["question"]}
73
+ for result in x["results"]
74
+ ],
75
+ ),
76
+ )
77
+
78
+
79
+ SEARCH_PROMPT = ChatPromptTemplate.from_messages(
80
+ [
81
+ ("system", "{agent_prompt}"),
82
+ (
83
+ "user",
84
+ "Write 3 google search queries to search online that form an "
85
+ "objective opinion from the following: {question}\n"
86
+ "You must respond with a list of strings in the following format: "
87
+ '["query 1", "query 2", "query 3"].',
88
+ ),
89
+ ],
90
+ )
91
+
92
+ AUTO_AGENT_INSTRUCTIONS = """
93
+ This task involves researching a given topic, regardless of its complexity or the availability of a definitive answer. The research is conducted by a specific agent, defined by its type and role, with each agent requiring distinct instructions.
94
+ Agent
95
+ The agent is determined by the field of the topic and the specific name of the agent that could be utilized to research the topic provided. Agents are categorized by their area of expertise, and each agent type is associated with a corresponding emoji.
96
+
97
+ examples:
98
+ task: "should I invest in apple stocks?"
99
+ response:
100
+ {
101
+ "agent": "💰 Finance Agent",
102
+ "agent_role_prompt: "You are a seasoned finance analyst AI assistant. Your primary goal is to compose comprehensive, astute, impartial, and methodically arranged financial reports based on provided data and trends."
103
+ }
104
+ task: "could reselling sneakers become profitable?"
105
+ response:
106
+ {
107
+ "agent": "📈 Business Analyst Agent",
108
+ "agent_role_prompt": "You are an experienced AI business analyst assistant. Your main objective is to produce comprehensive, insightful, impartial, and systematically structured business reports based on provided business data, market trends, and strategic analysis."
109
+ }
110
+ task: "what are the most interesting sites in Tel Aviv?"
111
+ response:
112
+ {
113
+ "agent: "🌍 Travel Agent",
114
+ "agent_role_prompt": "You are a world-travelled AI tour guide assistant. Your main purpose is to draft engaging, insightful, unbiased, and well-structured travel reports on given locations, including history, attractions, and cultural insights."
115
+ }
116
+ """ # noqa: E501
117
+ CHOOSE_AGENT_PROMPT = ChatPromptTemplate.from_messages(
118
+ [SystemMessage(content=AUTO_AGENT_INSTRUCTIONS), ("user", "task: {task}")],
119
+ )
120
+
121
+ SUMMARY_TEMPLATE = """{text}
122
+
123
+ -----------
124
+
125
+ Using the above text, answer in short the following question:
126
+
127
+ > {question}
128
+
129
+ -----------
130
+ if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available.""" # noqa: E501
131
+ SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)
132
+
133
+ scrape_and_summarize: Runnable[Any, Any] = (
134
+ RunnableParallel(
135
+ {
136
+ "question": lambda x: x["question"],
137
+ "text": lambda x: scrape_text(x["url"])[:10000],
138
+ "url": lambda x: x["url"],
139
+ },
140
+ )
141
+ | RunnableParallel(
142
+ {
143
+ "summary": SUMMARY_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser(),
144
+ "url": lambda x: x["url"],
145
+ },
146
+ )
147
+ | RunnableLambda(lambda x: f"Source Url: {x['url']}\nSummary: {x['summary']}")
148
+ )
149
+
150
+ multi_search = get_links | scrape_and_summarize.map() | (lambda x: "\n".join(x))
151
+
152
+
153
+ def load_json(s):
154
+ try:
155
+ return json.loads(s)
156
+ except Exception:
157
+ return {}
158
+
159
+
160
+ search_query = SEARCH_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | load_json
161
+ choose_agent = (
162
+ CHOOSE_AGENT_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | load_json
163
+ )
164
+
165
+ get_search_queries = (
166
+ RunnablePassthrough().assign(
167
+ agent_prompt=RunnableParallel({"task": lambda x: x})
168
+ | choose_agent
169
+ | (lambda x: x.get("agent_role_prompt")),
170
+ )
171
+ | search_query
172
+ )
173
+
174
+
175
+ chain = (
176
+ get_search_queries
177
+ | (lambda x: [{"question": q} for q in x])
178
+ | multi_search.map()
179
+ | (lambda x: "\n\n".join(x))
180
+ )
langchain-streamlit-demo/research_assistant/writer.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chat_models import ChatOpenAI
2
+ from langchain.prompts import ChatPromptTemplate
3
+ from langchain_core.output_parsers import StrOutputParser
4
+ from langchain_core.runnables import ConfigurableField
5
+
6
+ WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text." # noqa: E501
7
+
8
+
9
+ # Report prompts from https://github.com/assafelovic/gpt-researcher/blob/master/gpt_researcher/master/prompts.py
10
+ RESEARCH_REPORT_TEMPLATE = """Information:
11
+ --------
12
+ {research_summary}
13
+ --------
14
+
15
+ Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
16
+ The report should focus on the answer to the question, should be well structured, informative, \
17
+ in depth, with facts and numbers if available and a minimum of 1,200 words.
18
+
19
+ You should strive to write the report as long as you can using all relevant and necessary information provided.
20
+ You must write the report with markdown syntax.
21
+ You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
22
+ Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
23
+ You must write the report in apa format.
24
+ Please do your best, this is very important to my career.""" # noqa: E501
25
+
26
+
27
+ RESOURCE_REPORT_TEMPLATE = """Information:
28
+ --------
29
+ {research_summary}
30
+ --------
31
+
32
+ Based on the above information, generate a bibliography recommendation report for the following question or topic: "{question}". \
33
+ The report should provide a detailed analysis of each recommended resource, explaining how each source can contribute to finding answers to the research question. \
34
+ Focus on the relevance, reliability, and significance of each source. \
35
+ Ensure that the report is well-structured, informative, in-depth, and follows Markdown syntax. \
36
+ Include relevant facts, figures, and numbers whenever available. \
37
+ The report should have a minimum length of 1,200 words.
38
+
39
+ Please do your best, this is very important to my career.""" # noqa: E501
40
+
41
+ OUTLINE_REPORT_TEMPLATE = """Information:
42
+ --------
43
+ {research_summary}
44
+ --------
45
+
46
+ Using the above information, generate an outline for a research report in Markdown syntax for the following question or topic: "{question}". \
47
+ The outline should provide a well-structured framework for the research report, including the main sections, subsections, and key points to be covered. \
48
+ The research report should be detailed, informative, in-depth, and a minimum of 1,200 words. \
49
+ Use appropriate Markdown syntax to format the outline and ensure readability.
50
+
51
+ Please do your best, this is very important to my career.""" # noqa: E501
52
+
53
+ model = ChatOpenAI(temperature=0)
54
+ prompt = ChatPromptTemplate.from_messages(
55
+ [
56
+ ("system", WRITER_SYSTEM_PROMPT),
57
+ ("user", RESEARCH_REPORT_TEMPLATE),
58
+ ],
59
+ ).configurable_alternatives(
60
+ ConfigurableField("report_type"),
61
+ default_key="research_report",
62
+ resource_report=ChatPromptTemplate.from_messages(
63
+ [
64
+ ("system", WRITER_SYSTEM_PROMPT),
65
+ ("user", RESOURCE_REPORT_TEMPLATE),
66
+ ],
67
+ ),
68
+ outline_report=ChatPromptTemplate.from_messages(
69
+ [
70
+ ("system", WRITER_SYSTEM_PROMPT),
71
+ ("user", OUTLINE_REPORT_TEMPLATE),
72
+ ],
73
+ ),
74
+ )
75
+ chain = prompt | model | StrOutputParser()
requirements.txt CHANGED
@@ -1,4 +1,6 @@
1
  anthropic==0.7.7
 
 
2
  faiss-cpu==1.7.4
3
  langchain==0.0.348
4
  langsmith==0.0.69
@@ -12,3 +14,4 @@ streamlit-feedback==0.1.3
12
  tiktoken==0.5.2
13
  tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
14
  validators>=0.21.0 # not directly required, pinned by Snyk to avoid a vulnerability
 
 
1
  anthropic==0.7.7
2
+ beautifulsoup4==4.12.2
3
+ duckduckgo-search==4.0.0
4
  faiss-cpu==1.7.4
5
  langchain==0.0.348
6
  langsmith==0.0.69
 
14
  tiktoken==0.5.2
15
  tornado>=6.3.3 # not directly required, pinned by Snyk to avoid a vulnerability
16
  validators>=0.21.0 # not directly required, pinned by Snyk to avoid a vulnerability
17
+ wikipedia==1.4.0