ofermend commited on
Commit
7f46a81
·
0 Parent(s):

Duplicate from ofermend/Ask-Langchain

Browse files
Files changed (6) hide show
  1. .gitattributes +35 -0
  2. README.md +14 -0
  3. Vectara-logo.png +0 -0
  4. app.py +76 -0
  5. query.py +92 -0
  6. requirements.txt +4 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Ask Feynman
3
+ emoji: 📈
4
+ colorFrom: indigo
5
+ colorTo: green
6
+ sdk: streamlit
7
+ sdk_version: 1.25.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: apache-2.0
11
+ duplicated_from: ofermend/Ask-Langchain
12
+ ---
13
+
14
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
Vectara-logo.png ADDED
app.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import toml
3
+ from omegaconf import OmegaConf
4
+ from query import VectaraQuery
5
+ import os
6
+
7
+ import streamlit as st
8
+ from PIL import Image
9
+ from functools import partial
10
+
11
+ def set_query(q: str):
12
+ st.session_state['query'] = q
13
+
14
+ def launch_bot():
15
+ def get_answer(question):
16
+ response = vq.submit_query(question)
17
+ return response
18
+
19
+ corpus_ids = list(eval(os.environ['corpus_ids']))
20
+ questions = list(eval(os.environ['examples']))
21
+ cfg = OmegaConf.create({
22
+ 'customer_id': os.environ['customer_id'],
23
+ 'corpus_ids': corpus_ids,
24
+ 'api_key': os.environ['api_key'],
25
+ 'title': os.environ['title'],
26
+ 'description': os.environ['description'],
27
+ 'examples': questions,
28
+ 'source_data_desc': os.environ['source_data_desc']
29
+ })
30
+ vq = VectaraQuery(cfg.api_key, cfg.customer_id, cfg.corpus_ids)
31
+ st.set_page_config(page_title=cfg.title, layout="wide")
32
+
33
+ # left side content
34
+ with st.sidebar:
35
+ image = Image.open('Vectara-logo.png')
36
+ st.markdown(f"## Welcome to {cfg.title}\n\n"
37
+ f"With this demo uses [Grounded Generation](https://vectara.com/grounded-generation-making-generative-ai-safe-trustworthy-more-relevant/) to ask questions about {cfg.source_data_desc}\n\n")
38
+
39
+ st.markdown("---")
40
+ st.markdown(
41
+ "## How this works?\n"
42
+ "This app was built with [Vectara](https://vectara.com).\n"
43
+ "Vectara's [Indexing API](https://docs.vectara.com/docs/api-reference/indexing-apis/indexing) was used to ingest the data into a Vectara corpus (or index).\n\n"
44
+ "This app uses Vectara API to query the corpus and present the results to you, answering your question.\n\n"
45
+ )
46
+ st.markdown("---")
47
+ st.image(image, width=250)
48
+
49
+ st.markdown(f"<center> <h2> Vectara demo app: {cfg.title} </h2> </center>", unsafe_allow_html=True)
50
+ st.markdown(f"<center> <h4> {cfg.description} <h4> </center>", unsafe_allow_html=True)
51
+
52
+ # Setup a split column layout
53
+ main_col, questions_col = st.columns([4, 2], gap="medium")
54
+ with main_col:
55
+ cols = st.columns([1, 8], gap="small")
56
+ cols[0].markdown("""<h5>Search</h5>""", unsafe_allow_html=True)
57
+ cols[1].text_input(label="search", key='query', max_chars=256, label_visibility='collapsed', help="Enter your question here")
58
+
59
+ st.markdown("<h5>Response</h5>", unsafe_allow_html=True)
60
+ response_text = st.empty()
61
+ response_text.text_area(f" ", placeholder="The answer will appear here.", disabled=True,
62
+ key="response", height=1, label_visibility='collapsed')
63
+ with questions_col:
64
+ st.markdown("<h5 style='text-align:center; color: red'> Sample questions </h5>", unsafe_allow_html=True)
65
+ for q in list(cfg.examples):
66
+ st.button(q, on_click=partial(set_query, q), use_container_width=True)
67
+
68
+
69
+ # run the main flow
70
+ if st.session_state.get('query'):
71
+ query = st.session_state['query']
72
+ response = get_answer(query)
73
+ response_text.markdown(response)
74
+
75
+ if __name__ == "__main__":
76
+ launch_bot()
query.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import re
4
+ from urllib.parse import quote
5
+
6
+ def extract_between_tags(text, start_tag, end_tag):
7
+ start_index = text.find(start_tag)
8
+ end_index = text.find(end_tag, start_index)
9
+ return text[start_index+len(start_tag):end_index-len(end_tag)]
10
+
11
+ class VectaraQuery():
12
+ def __init__(self, api_key: str, customer_id: int, corpus_ids: list):
13
+ self.customer_id = customer_id
14
+ self.corpus_ids = corpus_ids
15
+ self.api_key = api_key
16
+
17
+ def submit_query(self, query_str: str):
18
+ corpora_key_list = [{
19
+ 'customer_id': str(self.customer_id), 'corpus_id': str(corpus_id), 'lexical_interpolation_config': {'lambda': 0.025}
20
+ } for corpus_id in self.corpus_ids
21
+ ]
22
+
23
+ endpoint = f"https://api.vectara.io/v1/query"
24
+ start_tag = "%START_SNIPPET%"
25
+ end_tag = "%END_SNIPPET%"
26
+ headers = {
27
+ "Content-Type": "application/json",
28
+ "Accept": "application/json",
29
+ "customer-id": str(self.customer_id),
30
+ "x-api-key": self.api_key,
31
+ "grpc-timeout": "60S"
32
+ }
33
+ body = {
34
+ 'query': [
35
+ {
36
+ 'query': query_str,
37
+ 'start': 0,
38
+ 'numResults': 7,
39
+ 'corpusKey': corpora_key_list,
40
+ 'context_config': {
41
+ 'sentences_before': 3,
42
+ 'sentences_after': 3,
43
+ 'start_tag': start_tag,
44
+ 'end_tag': end_tag,
45
+ },
46
+ 'summary': [
47
+ {
48
+ 'responseLang': 'eng',
49
+ 'maxSummarizedResults': 7,
50
+ }
51
+ ]
52
+ }
53
+ ]
54
+ }
55
+
56
+ response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=headers)
57
+ if response.status_code != 200:
58
+ print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
59
+ return "Sorry, something went wrong in my brain. Please try again later."
60
+
61
+ res = response.json()
62
+
63
+ summary = res['responseSet'][0]['summary'][0]['text']
64
+ responses = res['responseSet'][0]['response']
65
+ docs = res['responseSet'][0]['document']
66
+ pattern = r'\[\d{1,2}\]'
67
+ matches = [match.span() for match in re.finditer(pattern, summary)]
68
+
69
+ # figure out unique list of references
70
+ refs = []
71
+ for match in matches:
72
+ start, end = match
73
+ response_num = int(summary[start+1:end-1])
74
+ doc_num = responses[response_num-1]['documentIndex']
75
+ metadata = {item['name']: item['value'] for item in docs[doc_num]['metadata']}
76
+ text = extract_between_tags(responses[response_num-1]['text'], start_tag, end_tag)
77
+ url = f"{metadata['url']}#:~:text={quote(text)}"
78
+ if url not in refs:
79
+ refs.append(url)
80
+
81
+ # replace references with markdown links
82
+ refs_dict = {url:(inx+1) for inx,url in enumerate(refs)}
83
+ for match in reversed(matches):
84
+ start, end = match
85
+ response_num = int(summary[start+1:end-1])
86
+ doc_num = responses[response_num-1]['documentIndex']
87
+ metadata = {item['name']: item['value'] for item in docs[doc_num]['metadata']}
88
+ text = extract_between_tags(responses[response_num-1]['text'], start_tag, end_tag)
89
+ url = f"{metadata['url']}#:~:text={quote(text)}"
90
+ citation_inx = refs_dict[url]
91
+ summary = summary[:start] + f'[\[{citation_inx}\]]({url})' + summary[end:]
92
+ return summary
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ requests_to_curl==1.1.0
2
+ toml==0.10.2
3
+ omegaconf==2.3.0
4
+ syrupy==4.0.8