Ask-Langchain

Sleeping

App Files Files Community

ofermend commited on Aug 22, 2023

Commit

7f46a81

0 Parent(s):

Duplicate from ofermend/Ask-Langchain

Browse files

Files changed (6) hide show

.gitattributes +35 -0
README.md +14 -0
Vectara-logo.png +0 -0
app.py +76 -0
query.py +92 -0
requirements.txt +4 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: Ask Feynman
+emoji: 📈
+colorFrom: indigo
+colorTo: green
+sdk: streamlit
+sdk_version: 1.25.0
+app_file: app.py
+pinned: false
+license: apache-2.0
+duplicated_from: ofermend/Ask-Langchain
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

Vectara-logo.png ADDED Viewed

app.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import sys
+import toml
+from omegaconf import OmegaConf
+from query import VectaraQuery
+import os
+import streamlit as st
+from PIL import Image
+from functools import partial
+def set_query(q: str):
+    st.session_state['query'] = q
+def launch_bot():
+    def get_answer(question):
+        response = vq.submit_query(question)
+        return response
+    corpus_ids = list(eval(os.environ['corpus_ids']))
+    questions = list(eval(os.environ['examples']))
+    cfg = OmegaConf.create({
+        'customer_id': os.environ['customer_id'],
+        'corpus_ids': corpus_ids,
+        'api_key': os.environ['api_key'],
+        'title': os.environ['title'],
+        'description': os.environ['description'],
+        'examples': questions,
+        'source_data_desc': os.environ['source_data_desc']
+    })
+    vq = VectaraQuery(cfg.api_key, cfg.customer_id, cfg.corpus_ids)
+    st.set_page_config(page_title=cfg.title, layout="wide")
+    # left side content
+    with st.sidebar:
+        image = Image.open('Vectara-logo.png')
+        st.markdown(f"## Welcome to {cfg.title}\n\n"
+                    f"With this demo uses [Grounded Generation](https://vectara.com/grounded-generation-making-generative-ai-safe-trustworthy-more-relevant/) to ask questions about {cfg.source_data_desc}\n\n")
+        st.markdown("---")
+        st.markdown(
+            "## How this works?\n"
+            "This app was built with [Vectara](https://vectara.com).\n"
+            "Vectara's [Indexing API](https://docs.vectara.com/docs/api-reference/indexing-apis/indexing) was used to ingest the data into a Vectara corpus (or index).\n\n"
+            "This app uses Vectara API to query the corpus and present the results to you, answering your question.\n\n"
+        )
+        st.markdown("---")
+        st.image(image, width=250)
+    st.markdown(f"<center> <h2> Vectara demo app: {cfg.title} </h2> </center>", unsafe_allow_html=True)
+    st.markdown(f"<center> <h4> {cfg.description} <h4> </center>", unsafe_allow_html=True)
+    # Setup a split column layout
+    main_col, questions_col = st.columns([4, 2], gap="medium")
+    with main_col:
+        cols = st.columns([1, 8], gap="small")
+        cols[0].markdown("""<h5>Search</h5>""", unsafe_allow_html=True)
+        cols[1].text_input(label="search", key='query', max_chars=256, label_visibility='collapsed', help="Enter your question here")
+        st.markdown("<h5>Response</h5>", unsafe_allow_html=True)
+        response_text = st.empty()
+        response_text.text_area(f" ", placeholder="The answer will appear here.", disabled=True,
+                                key="response", height=1, label_visibility='collapsed')
+    with questions_col:
+        st.markdown("<h5 style='text-align:center; color: red'> Sample questions </h5>", unsafe_allow_html=True)
+        for q in list(cfg.examples):
+            st.button(q, on_click=partial(set_query, q), use_container_width=True)
+    # run the main flow
+    if st.session_state.get('query'):
+        query = st.session_state['query']
+        response = get_answer(query)
+        response_text.markdown(response)
+if __name__ == "__main__":
+    launch_bot()

query.py ADDED Viewed

	@@ -0,0 +1,92 @@

+import requests
+import json
+import re
+from urllib.parse import quote
+def extract_between_tags(text, start_tag, end_tag):
+    start_index = text.find(start_tag)
+    end_index = text.find(end_tag, start_index)
+    return text[start_index+len(start_tag):end_index-len(end_tag)]
+class VectaraQuery():
+    def __init__(self, api_key: str, customer_id: int, corpus_ids: list):
+        self.customer_id = customer_id
+        self.corpus_ids = corpus_ids
+        self.api_key = api_key
+    def submit_query(self, query_str: str):
+        corpora_key_list = [{
+                'customer_id': str(self.customer_id), 'corpus_id': str(corpus_id), 'lexical_interpolation_config': {'lambda': 0.025}
+            } for corpus_id in self.corpus_ids
+        ]
+        endpoint = f"https://api.vectara.io/v1/query"
+        start_tag = "%START_SNIPPET%"
+        end_tag = "%END_SNIPPET%"
+        headers = {
+            "Content-Type": "application/json",
+            "Accept": "application/json",
+            "customer-id": str(self.customer_id),
+            "x-api-key": self.api_key,
+            "grpc-timeout": "60S"
+        }
+        body = {
+            'query': [
+                {
+                    'query': query_str,
+                    'start': 0,
+                    'numResults': 7,
+                    'corpusKey': corpora_key_list,
+                    'context_config': {
+                        'sentences_before': 3,
+                        'sentences_after': 3,
+                        'start_tag': start_tag,
+                        'end_tag': end_tag,
+                    },
+                    'summary': [
+                        {
+                            'responseLang': 'eng',
+                            'maxSummarizedResults': 7,
+                        }
+                    ]
+                }
+            ]
+        }
+        response = requests.post(endpoint, data=json.dumps(body), verify=True, headers=headers)
+        if response.status_code != 200:
+            print(f"Query failed with code {response.status_code}, reason {response.reason}, text {response.text}")
+            return "Sorry, something went wrong in my brain. Please try again later."
+        res = response.json()
+        summary = res['responseSet'][0]['summary'][0]['text']
+        responses = res['responseSet'][0]['response']
+        docs = res['responseSet'][0]['document']
+        pattern = r'\[\d{1,2}\]'
+        matches = [match.span() for match in re.finditer(pattern, summary)]
+        # figure out unique list of references
+        refs = []
+        for match in matches:
+            start, end = match
+            response_num = int(summary[start+1:end-1])
+            doc_num = responses[response_num-1]['documentIndex']
+            metadata = {item['name']: item['value'] for item in docs[doc_num]['metadata']}
+            text = extract_between_tags(responses[response_num-1]['text'], start_tag, end_tag)
+            url = f"{metadata['url']}#:~:text={quote(text)}"
+            if url not in refs:
+                refs.append(url)
+        # replace references with markdown links
+        refs_dict = {url:(inx+1) for inx,url in enumerate(refs)}
+        for match in reversed(matches):
+            start, end = match
+            response_num = int(summary[start+1:end-1])
+            doc_num = responses[response_num-1]['documentIndex']
+            metadata = {item['name']: item['value'] for item in docs[doc_num]['metadata']}
+            text = extract_between_tags(responses[response_num-1]['text'], start_tag, end_tag)
+            url = f"{metadata['url']}#:~:text={quote(text)}"
+            citation_inx = refs_dict[url]
+            summary = summary[:start] + f'[\[{citation_inx}\]]({url})' + summary[end:]
+        return summary

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+requests_to_curl==1.1.0
+toml==0.10.2
+omegaconf==2.3.0
+syrupy==4.0.8