silvanocerza commited on
Commit
cf437f7
·
1 Parent(s): c5936cd

Add document sources

Browse files
Files changed (1) hide show
  1. main.py +31 -11
main.py CHANGED
@@ -5,8 +5,10 @@ import os
5
 
6
  from dotenv import load_dotenv
7
  from haystack.preview import Pipeline
 
8
  from haystack.preview.components.retrievers import MemoryBM25Retriever
9
  from haystack.preview.components.generators.openai.gpt import GPTGenerator
 
10
  from haystack.preview.components.builders.prompt_builder import PromptBuilder
11
  from haystack.preview.components.preprocessors import (
12
  DocumentCleaner,
@@ -77,34 +79,39 @@ def index_files(files):
77
  indexing_pipeline.run({"converter": {"paths": files}})
78
 
79
 
80
- def search(question: str) -> str:
81
- retriever = MemoryBM25Retriever(document_store=document_store(), top_k=10)
82
 
83
  template = """Take a deep breath and think then answer given the context
84
- Context: {{ documents|map(attribute='text')|join('\n') }}
85
- Question: {{ question }}
86
  Answer:
87
  """
88
  prompt_builder = PromptBuilder(template)
89
 
90
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
91
  generator = GPTGenerator(api_key=OPENAI_API_KEY)
 
92
 
93
  pipe = Pipeline()
94
 
95
  pipe.add_component("docs_retriever", retriever)
96
- pipe.add_component("builder", prompt_builder)
97
  pipe.add_component("gpt35", generator)
 
98
 
99
- pipe.connect("docs_retriever.documents", "builder.documents")
100
- pipe.connect("builder.prompt", "gpt35.prompt")
 
 
101
  res = pipe.run(
102
  {
103
  "docs_retriever": {"query": question},
104
- "builder": {"question": question},
 
105
  }
106
  )
107
- return res["gpt35"]["replies"][0]
108
 
109
 
110
  with st.status(
@@ -129,5 +136,18 @@ if question := st.text_input(
129
  with st.spinner("Waiting"):
130
  answer = search(question)
131
 
132
- st.balloons()
133
- st.markdown(answer)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  from dotenv import load_dotenv
7
  from haystack.preview import Pipeline
8
+ from haystack.preview.dataclasses import GeneratedAnswer
9
  from haystack.preview.components.retrievers import MemoryBM25Retriever
10
  from haystack.preview.components.generators.openai.gpt import GPTGenerator
11
+ from haystack.preview.components.builders.answer_builder import AnswerBuilder
12
  from haystack.preview.components.builders.prompt_builder import PromptBuilder
13
  from haystack.preview.components.preprocessors import (
14
  DocumentCleaner,
 
79
  indexing_pipeline.run({"converter": {"paths": files}})
80
 
81
 
82
+ def search(question: str) -> GeneratedAnswer:
83
+ retriever = MemoryBM25Retriever(document_store=document_store(), top_k=5)
84
 
85
  template = """Take a deep breath and think then answer given the context
86
+ Context: {{ documents|map(attribute='text')|replace('\n', ' ')|join(';') }}
87
+ Question: {{ query }}
88
  Answer:
89
  """
90
  prompt_builder = PromptBuilder(template)
91
 
92
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
93
  generator = GPTGenerator(api_key=OPENAI_API_KEY)
94
+ answer_builder = AnswerBuilder()
95
 
96
  pipe = Pipeline()
97
 
98
  pipe.add_component("docs_retriever", retriever)
99
+ pipe.add_component("prompt_builder", prompt_builder)
100
  pipe.add_component("gpt35", generator)
101
+ pipe.add_component("answer_builder", answer_builder)
102
 
103
+ pipe.connect("docs_retriever.documents", "prompt_builder.documents")
104
+ pipe.connect("prompt_builder.prompt", "gpt35.prompt")
105
+ pipe.connect("docs_retriever.documents", "answer_builder.documents")
106
+ pipe.connect("gpt35.replies", "answer_builder.replies")
107
  res = pipe.run(
108
  {
109
  "docs_retriever": {"query": question},
110
+ "prompt_builder": {"query": question},
111
+ "answer_builder": {"query": question},
112
  }
113
  )
114
+ return res["answer_builder"]["answers"][0]
115
 
116
 
117
  with st.status(
 
136
  with st.spinner("Waiting"):
137
  answer = search(question)
138
 
139
+ if not st.session_state.get("run_once", False):
140
+ st.balloons()
141
+ st.session_state["run_once"] = True
142
+
143
+ print(answer.data)
144
+ st.markdown(answer.data)
145
+ with st.expander("See sources:"):
146
+ for document in answer.documents:
147
+ url_source = document.metadata.get("url_source", "")
148
+ content = f"{url_source}: {document.text}" if url_source else document.text
149
+ if document.metadata.get("type") == "md":
150
+ st.markdown(content)
151
+ else:
152
+ st.write(content)
153
+ st.divider()