josepablonevermined commited on
Commit
a734561
·
1 Parent(s): c24391d

Upload 2 files

Browse files
Files changed (2) hide show
  1. handler.py +54 -0
  2. requirements.txt +3 -0
handler.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from haystack.utils import fetch_archive_from_http, clean_wiki_text, convert_files_to_docs
3
+ from haystack.schema import Answer
4
+ from haystack.document_stores import InMemoryDocumentStore
5
+ from haystack.pipelines import ExtractiveQAPipeline
6
+ from haystack.nodes import FARMReader, TfidfRetriever
7
+ import logging
8
+ import json
9
+
10
+ os.environ['TOKENIZERS_PARALLELISM'] ="false"
11
+
12
+ #Haystack Components
13
+ def start_haystack():
14
+ document_store = InMemoryDocumentStore()
15
+ load_and_write_data(document_store)
16
+ retriever = TfidfRetriever(document_store=document_store)
17
+ reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2-distilled", use_gpu=True)
18
+ pipeline = ExtractiveQAPipeline(reader, retriever)
19
+ return pipeline
20
+
21
+ def load_and_write_data(document_store):
22
+
23
+ doc_dir = './dao_data'
24
+ print("Loading data ...")
25
+
26
+ docs = convert_files_to_docs(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)
27
+ document_store.write_documents(docs)
28
+
29
+
30
+ class EndpointHandler():
31
+ def __init__(self, path=""):
32
+ # load the optimized model
33
+ self.pipeline = start_haystack()
34
+
35
+
36
+ def __call__(self, data):
37
+ """
38
+ Args:
39
+ data (:obj:):
40
+ includes the input data and the parameters for the inference.
41
+ Return:
42
+ A :obj:`list`:. The object returned should be a list of one list like [[{"label": 0.9939950108528137}]] containing :
43
+ - "label": A string representing what the label/class is. There can be multiple labels.
44
+ - "score": A score between 0 and 1 describing how confident the model is for this label/class.
45
+ """
46
+ question = data.pop("question", None)
47
+ if question is not None:
48
+ prediction = self.pipeline.run(query=question, params={"Retriever": {"top_k": 10}, "Reader": {"top_k": 5}})
49
+ else:
50
+ return {}
51
+
52
+ # postprocess the prediction
53
+ response = { "answer": prediction['answers'][0].answer}
54
+ return json.dumps(response)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ farm-haystack==1.19.0
2
+ farm-haystack[inference]==1.19.0
3
+ validators==0.21.1