Update query.py
Browse files
query.py
CHANGED
@@ -9,15 +9,16 @@ def extract_between_tags(text, start_tag, end_tag):
|
|
9 |
return text[start_index+len(start_tag):end_index-len(end_tag)]
|
10 |
|
11 |
class VectaraQuery():
|
12 |
-
def __init__(self, api_key: str, customer_id:
|
13 |
self.customer_id = customer_id
|
14 |
self.corpus_ids = corpus_ids
|
15 |
self.api_key = api_key
|
|
|
16 |
self.conv_id = None
|
17 |
|
18 |
def submit_query(self, query_str: str):
|
19 |
corpora_key_list = [{
|
20 |
-
'customer_id':
|
21 |
} for corpus_id in self.corpus_ids
|
22 |
]
|
23 |
|
@@ -27,7 +28,7 @@ class VectaraQuery():
|
|
27 |
headers = {
|
28 |
"Content-Type": "application/json",
|
29 |
"Accept": "application/json",
|
30 |
-
"customer-id":
|
31 |
"x-api-key": self.api_key,
|
32 |
"grpc-timeout": "60S"
|
33 |
}
|
@@ -55,12 +56,12 @@ class VectaraQuery():
|
|
55 |
{
|
56 |
'responseLang': 'eng',
|
57 |
'maxSummarizedResults': 5,
|
58 |
-
'summarizerPromptName':
|
59 |
'chat': {
|
60 |
'store': True,
|
61 |
'conversationId': self.conv_id
|
62 |
},
|
63 |
-
'debug': True,
|
64 |
}
|
65 |
]
|
66 |
}
|
@@ -101,9 +102,10 @@ class VectaraQuery():
|
|
101 |
doc_num = responses[response_num-1]['documentIndex']
|
102 |
metadata = {item['name']: item['value'] for item in docs[doc_num]['metadata']}
|
103 |
text = extract_between_tags(responses[response_num-1]['text'], start_tag, end_tag)
|
104 |
-
url
|
105 |
-
|
106 |
-
refs
|
|
|
107 |
|
108 |
# replace references with markdown links
|
109 |
refs_dict = {url:(inx+1) for inx,url in enumerate(refs)}
|
@@ -118,3 +120,4 @@ class VectaraQuery():
|
|
118 |
summary = summary[:start] + f'[\[{citation_inx}\]]({url})' + summary[end:]
|
119 |
|
120 |
return summary
|
|
|
|
9 |
return text[start_index+len(start_tag):end_index-len(end_tag)]
|
10 |
|
11 |
class VectaraQuery():
|
12 |
+
def __init__(self, api_key: str, customer_id: str, corpus_ids: list[str], prompt_name: str = None):
|
13 |
self.customer_id = customer_id
|
14 |
self.corpus_ids = corpus_ids
|
15 |
self.api_key = api_key
|
16 |
+
self.prompt_name = prompt_name if prompt_name else "vectara-summary-ext-v1.2.0"
|
17 |
self.conv_id = None
|
18 |
|
19 |
def submit_query(self, query_str: str):
|
20 |
corpora_key_list = [{
|
21 |
+
'customer_id': self.customer_id, 'corpus_id': corpus_id, 'lexical_interpolation_config': {'lambda': 0.025}
|
22 |
} for corpus_id in self.corpus_ids
|
23 |
]
|
24 |
|
|
|
28 |
headers = {
|
29 |
"Content-Type": "application/json",
|
30 |
"Accept": "application/json",
|
31 |
+
"customer-id": self.customer_id,
|
32 |
"x-api-key": self.api_key,
|
33 |
"grpc-timeout": "60S"
|
34 |
}
|
|
|
56 |
{
|
57 |
'responseLang': 'eng',
|
58 |
'maxSummarizedResults': 5,
|
59 |
+
'summarizerPromptName': self.prompt_name,
|
60 |
'chat': {
|
61 |
'store': True,
|
62 |
'conversationId': self.conv_id
|
63 |
},
|
64 |
+
# 'debug': True,
|
65 |
}
|
66 |
]
|
67 |
}
|
|
|
102 |
doc_num = responses[response_num-1]['documentIndex']
|
103 |
metadata = {item['name']: item['value'] for item in docs[doc_num]['metadata']}
|
104 |
text = extract_between_tags(responses[response_num-1]['text'], start_tag, end_tag)
|
105 |
+
if 'url' in metadata.keys():
|
106 |
+
url = f"{metadata['url']}#:~:text={quote(text)}"
|
107 |
+
if url not in refs:
|
108 |
+
refs.append(url)
|
109 |
|
110 |
# replace references with markdown links
|
111 |
refs_dict = {url:(inx+1) for inx,url in enumerate(refs)}
|
|
|
120 |
summary = summary[:start] + f'[\[{citation_inx}\]]({url})' + summary[end:]
|
121 |
|
122 |
return summary
|
123 |
+
|