Spaces:
Runtime error
Runtime error
jamescalam
commited on
Commit
•
a50de00
1
Parent(s):
2ccdcf8
Update app.py
Browse files
app.py
CHANGED
@@ -1,19 +1,46 @@
|
|
1 |
import streamlit as st
|
2 |
import pinecone
|
3 |
from sentence_transformers import SentenceTransformer
|
|
|
4 |
|
5 |
PINECONE_KEY = st.secrets["PINECONE_KEY"] # app.pinecone.io
|
|
|
6 |
|
7 |
@st.experimental_singleton
|
8 |
def init_pinecone():
|
9 |
pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp")
|
10 |
-
return pinecone.Index(
|
11 |
|
12 |
@st.experimental_singleton
|
13 |
def init_retriever():
|
14 |
return SentenceTransformer("multi-qa-mpnet-base-dot-v1")
|
15 |
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
retriever = init_retriever()
|
18 |
|
19 |
def card(thumbnail: str, title: str, urls: list, contexts: list, starts: list, ends: list):
|
@@ -64,7 +91,10 @@ def card(thumbnail: str, title: str, urls: list, contexts: list, starts: list, e
|
|
64 |
|
65 |
st.write("""
|
66 |
# YouTube Q&A
|
67 |
-
|
|
|
|
|
|
|
68 |
""")
|
69 |
|
70 |
st.markdown("""
|
@@ -75,12 +105,11 @@ query = st.text_input("Search!", "")
|
|
75 |
|
76 |
if query != "":
|
77 |
print(f"query: {query}")
|
78 |
-
|
79 |
-
xc = index.query(xq, top_k=5, include_metadata=True)
|
80 |
|
81 |
results = {}
|
82 |
order = []
|
83 |
-
for context in
|
84 |
video_id = context['metadata']['url'].split('/')[-1]
|
85 |
if video_id not in results:
|
86 |
results[video_id] = {
|
|
|
1 |
import streamlit as st
|
2 |
import pinecone
|
3 |
from sentence_transformers import SentenceTransformer
|
4 |
+
import logging
|
5 |
|
6 |
PINECONE_KEY = st.secrets["PINECONE_KEY"] # app.pinecone.io
|
7 |
+
INDEX_ID = 'youtube-search'
|
8 |
|
9 |
@st.experimental_singleton
|
10 |
def init_pinecone():
|
11 |
pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp")
|
12 |
+
return pinecone.Index(INDEX_ID)
|
13 |
|
14 |
@st.experimental_singleton
|
15 |
def init_retriever():
|
16 |
return SentenceTransformer("multi-qa-mpnet-base-dot-v1")
|
17 |
|
18 |
+
def make_query(query, retriever, top_k=10, include_values=True, include_metadata=True, filter=None):
|
19 |
+
xq = retriever.encode([query]).tolist()
|
20 |
+
logging.info(f"Query: {query}")
|
21 |
+
attempt = 0
|
22 |
+
while attempt < 3:
|
23 |
+
try:
|
24 |
+
xc = st.session_state.index.query(
|
25 |
+
xq,
|
26 |
+
top_k=top_k,
|
27 |
+
include_values=include_values,
|
28 |
+
include_metadata=include_metadata,
|
29 |
+
filter=filter
|
30 |
+
)
|
31 |
+
matches = xc['matches']
|
32 |
+
break
|
33 |
+
except:
|
34 |
+
# force reload
|
35 |
+
pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp")
|
36 |
+
st.session_state.index = pinecone.Index(INDEX_ID)
|
37 |
+
attempt += 1
|
38 |
+
matches = []
|
39 |
+
if len(matches) == 0:
|
40 |
+
logging.error(f"Query failed")
|
41 |
+
return matches
|
42 |
+
|
43 |
+
st.session_state.index = init_pinecone()
|
44 |
retriever = init_retriever()
|
45 |
|
46 |
def card(thumbnail: str, title: str, urls: list, contexts: list, starts: list, ends: list):
|
|
|
91 |
|
92 |
st.write("""
|
93 |
# YouTube Q&A
|
94 |
+
|
95 |
+
:::info
|
96 |
+
YouTube search built as [explained here](https://pinecone.io/learn/openai-whisper)!
|
97 |
+
The current version has a limited video dataset, but we are working on expanding it.
|
98 |
""")
|
99 |
|
100 |
st.markdown("""
|
|
|
105 |
|
106 |
if query != "":
|
107 |
print(f"query: {query}")
|
108 |
+
matches = make_query(query, retriever, top_k=5)
|
|
|
109 |
|
110 |
results = {}
|
111 |
order = []
|
112 |
+
for context in matches:
|
113 |
video_id = context['metadata']['url'].split('/')[-1]
|
114 |
if video_id not in results:
|
115 |
results[video_id] = {
|