IRS-chat

Sleeping

App Files Files Community

ofermend commited on Mar 7, 2024

Commit

a466baa

verified ·

1 Parent(s): 2c7091c

Update query.py

Browse files

Files changed (1) hide show

query.py +11 -8

query.py CHANGED Viewed

@@ -9,15 +9,16 @@ def extract_between_tags(text, start_tag, end_tag):
     return text[start_index+len(start_tag):end_index-len(end_tag)]
 class VectaraQuery():
-    def __init__(self, api_key: str, customer_id: int, corpus_ids: list):
         self.customer_id = customer_id
         self.corpus_ids = corpus_ids
         self.api_key = api_key
         self.conv_id = None
     def submit_query(self, query_str: str):
         corpora_key_list = [{
-                'customer_id': str(self.customer_id), 'corpus_id': str(corpus_id), 'lexical_interpolation_config': {'lambda': 0.025}
             } for corpus_id in self.corpus_ids
         ]
@@ -27,7 +28,7 @@ class VectaraQuery():
         headers = {
             "Content-Type": "application/json",
             "Accept": "application/json",
-            "customer-id": str(self.customer_id),
             "x-api-key": self.api_key,
             "grpc-timeout": "60S"
         }
@@ -55,12 +56,12 @@ class VectaraQuery():
                         {
                             'responseLang': 'eng',
                             'maxSummarizedResults': 5,
-                            'summarizerPromptName': 'vectara-experimental-summary-ext-2023-12-11-large',  # Note: this promptName requires Vectara Scale plan
                             'chat': {
                                 'store': True,
                                 'conversationId': self.conv_id
                             },
-                            'debug': True,
                         }
                     ]
                 }
@@ -101,9 +102,10 @@ class VectaraQuery():
             doc_num = responses[response_num-1]['documentIndex']
             metadata = {item['name']: item['value'] for item in docs[doc_num]['metadata']}
             text = extract_between_tags(responses[response_num-1]['text'], start_tag, end_tag)
-            url = f"{metadata['url']}#:~:text={quote(text)}"
-            if url not in refs:
-                refs.append(url)
         # replace references with markdown links
         refs_dict = {url:(inx+1) for inx,url in enumerate(refs)}
@@ -118,3 +120,4 @@ class VectaraQuery():
             summary = summary[:start] + f'[\[{citation_inx}\]]({url})' + summary[end:]
         return summary

     return text[start_index+len(start_tag):end_index-len(end_tag)]
 class VectaraQuery():
+    def __init__(self, api_key: str, customer_id: str, corpus_ids: list[str], prompt_name: str = None):
         self.customer_id = customer_id
         self.corpus_ids = corpus_ids
         self.api_key = api_key
+        self.prompt_name = prompt_name if prompt_name else "vectara-summary-ext-v1.2.0"
         self.conv_id = None
     def submit_query(self, query_str: str):
         corpora_key_list = [{
+                'customer_id': self.customer_id, 'corpus_id': corpus_id, 'lexical_interpolation_config': {'lambda': 0.025}
             } for corpus_id in self.corpus_ids
         ]
         headers = {
             "Content-Type": "application/json",
             "Accept": "application/json",
+            "customer-id": self.customer_id,
             "x-api-key": self.api_key,
             "grpc-timeout": "60S"
         }
                         {
                             'responseLang': 'eng',
                             'maxSummarizedResults': 5,
+                            'summarizerPromptName': self.prompt_name,
                             'chat': {
                                 'store': True,
                                 'conversationId': self.conv_id
                             },
+#                            'debug': True,
                         }
                     ]
                 }
             doc_num = responses[response_num-1]['documentIndex']
             metadata = {item['name']: item['value'] for item in docs[doc_num]['metadata']}
             text = extract_between_tags(responses[response_num-1]['text'], start_tag, end_tag)
+            if 'url' in metadata.keys():
+                url = f"{metadata['url']}#:~:text={quote(text)}"
+                if url not in refs:
+                    refs.append(url)
         # replace references with markdown links
         refs_dict = {url:(inx+1) for inx,url in enumerate(refs)}
             summary = summary[:start] + f'[\[{citation_inx}\]]({url})' + summary[end:]
         return summary