ugmSorcero commited on
Commit
a088d42
1 Parent(s): 1d466d7

Revert "Adds doc 2 speech node to keyword search and plays audio in UI"

Browse files

This reverts commit 1d466d71500aa4ad0916150db61be4b47f859cc9.

.gitignore CHANGED
@@ -128,6 +128,4 @@ dmypy.json
128
  # Pyre type checker
129
  .pyre/
130
 
131
- .vscode/
132
-
133
- data/audio/
 
128
  # Pyre type checker
129
  .pyre/
130
 
131
+ .vscode/
 
 
core/pipelines.py CHANGED
@@ -2,19 +2,14 @@
2
  Haystack Pipelines
3
  """
4
 
5
- from pathlib import Path
6
  from haystack import Pipeline
7
  from haystack.document_stores import InMemoryDocumentStore
8
  from haystack.nodes.retriever import DensePassageRetriever, TfidfRetriever
9
  from haystack.nodes.preprocessor import PreProcessor
10
  from haystack.nodes.ranker import SentenceTransformersRanker
11
- from haystack.nodes.audio.document_to_speech import DocumentToSpeech
12
- import os
13
 
14
- data_path = 'data/'
15
- os.makedirs(data_path, exist_ok=True)
16
 
17
- def keyword_search(index="documents", split_word_length=100, audio_output=False):
18
  """
19
  **Keyword Search Pipeline**
20
 
@@ -49,13 +44,6 @@ def keyword_search(index="documents", split_word_length=100, audio_output=False)
49
  index_pipeline.add_node(
50
  document_store, name="DocumentStore", inputs=["TfidfRetriever"]
51
  )
52
-
53
- if audio_output:
54
- doc2speech = DocumentToSpeech(
55
- model_name_or_path="espnet/kan-bayashi_ljspeech_vits",
56
- generated_audio_dir=Path(data_path + 'audio'),
57
- )
58
- search_pipeline.add_node(doc2speech, name='DocumentToSpeech', inputs=['TfidfRetriever'])
59
 
60
  return search_pipeline, index_pipeline
61
 
 
2
  Haystack Pipelines
3
  """
4
 
 
5
  from haystack import Pipeline
6
  from haystack.document_stores import InMemoryDocumentStore
7
  from haystack.nodes.retriever import DensePassageRetriever, TfidfRetriever
8
  from haystack.nodes.preprocessor import PreProcessor
9
  from haystack.nodes.ranker import SentenceTransformersRanker
 
 
10
 
 
 
11
 
12
+ def keyword_search(index="documents", split_word_length=100):
13
  """
14
  **Keyword Search Pipeline**
15
 
 
44
  index_pipeline.add_node(
45
  document_store, name="DocumentStore", inputs=["TfidfRetriever"]
46
  )
 
 
 
 
 
 
 
47
 
48
  return search_pipeline, index_pipeline
49
 
core/search_index.py CHANGED
@@ -32,17 +32,13 @@ def search(queries, pipeline):
32
  for res in matches:
33
  if not score_is_empty:
34
  score_is_empty = True if res.score is None else False
35
- match = {
36
- "text": res.content,
37
- "id": res.meta["id"],
38
- "fragment_id": res.id,
39
- }
40
- if not score_is_empty:
41
- match.update({'score': res.score})
42
- if hasattr(res, 'content_audio'):
43
- match.update({'content_audio': res.content_audio})
44
  query_results.append(
45
- match
 
 
 
 
 
46
  )
47
  if not score_is_empty:
48
  query_results = sorted(
 
32
  for res in matches:
33
  if not score_is_empty:
34
  score_is_empty = True if res.score is None else False
 
 
 
 
 
 
 
 
 
35
  query_results.append(
36
+ {
37
+ "text": res.content,
38
+ "score": res.score,
39
+ "id": res.meta["id"],
40
+ "fragment_id": res.id,
41
+ }
42
  )
43
  if not score_is_empty:
44
  query_results = sorted(
interface/components.py CHANGED
@@ -59,10 +59,8 @@ def component_show_search_result(container, results):
59
  st.markdown(f"### Match {idx+1}")
60
  st.markdown(f"**Text**: {document['text']}")
61
  st.markdown(f"**Document**: {document['id']}")
62
- if 'score' in document:
63
  st.markdown(f"**Score**: {document['score']:.3f}")
64
- if 'content_audio' in document:
65
- st.audio(str(document['content_audio']))
66
  st.markdown("---")
67
 
68
 
 
59
  st.markdown(f"### Match {idx+1}")
60
  st.markdown(f"**Text**: {document['text']}")
61
  st.markdown(f"**Document**: {document['id']}")
62
+ if document["score"] is not None:
63
  st.markdown(f"**Score**: {document['score']:.3f}")
 
 
64
  st.markdown("---")
65
 
66
 
requirements.txt CHANGED
@@ -5,8 +5,4 @@ black==22.8.0
5
  plotly==5.10.0
6
  newspaper3k==0.2.8
7
  PyPDF2==2.10.7
8
- pytesseract==0.3.10
9
- soundfile==0.10.3.post1
10
- espnet
11
- pydub==0.25.1
12
- espnet_model_zoo==0.1.7
 
5
  plotly==5.10.0
6
  newspaper3k==0.2.8
7
  PyPDF2==2.10.7
8
+ pytesseract==0.3.10