jamescalam commited on
Commit
a50de00
1 Parent(s): 2ccdcf8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -6
app.py CHANGED
@@ -1,19 +1,46 @@
1
  import streamlit as st
2
  import pinecone
3
  from sentence_transformers import SentenceTransformer
 
4
 
5
  PINECONE_KEY = st.secrets["PINECONE_KEY"] # app.pinecone.io
 
6
 
7
  @st.experimental_singleton
8
  def init_pinecone():
9
  pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp")
10
- return pinecone.Index('youtube-search')
11
 
12
  @st.experimental_singleton
13
  def init_retriever():
14
  return SentenceTransformer("multi-qa-mpnet-base-dot-v1")
15
 
16
- index = init_pinecone()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  retriever = init_retriever()
18
 
19
  def card(thumbnail: str, title: str, urls: list, contexts: list, starts: list, ends: list):
@@ -64,7 +91,10 @@ def card(thumbnail: str, title: str, urls: list, contexts: list, starts: list, e
64
 
65
  st.write("""
66
  # YouTube Q&A
67
- Ask me a question!
 
 
 
68
  """)
69
 
70
  st.markdown("""
@@ -75,12 +105,11 @@ query = st.text_input("Search!", "")
75
 
76
  if query != "":
77
  print(f"query: {query}")
78
- xq = retriever.encode([query]).tolist()
79
- xc = index.query(xq, top_k=5, include_metadata=True)
80
 
81
  results = {}
82
  order = []
83
- for context in xc['matches']:
84
  video_id = context['metadata']['url'].split('/')[-1]
85
  if video_id not in results:
86
  results[video_id] = {
 
1
  import streamlit as st
2
  import pinecone
3
  from sentence_transformers import SentenceTransformer
4
+ import logging
5
 
6
  PINECONE_KEY = st.secrets["PINECONE_KEY"] # app.pinecone.io
7
+ INDEX_ID = 'youtube-search'
8
 
9
  @st.experimental_singleton
10
  def init_pinecone():
11
  pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp")
12
+ return pinecone.Index(INDEX_ID)
13
 
14
  @st.experimental_singleton
15
  def init_retriever():
16
  return SentenceTransformer("multi-qa-mpnet-base-dot-v1")
17
 
18
+ def make_query(query, retriever, top_k=10, include_values=True, include_metadata=True, filter=None):
19
+ xq = retriever.encode([query]).tolist()
20
+ logging.info(f"Query: {query}")
21
+ attempt = 0
22
+ while attempt < 3:
23
+ try:
24
+ xc = st.session_state.index.query(
25
+ xq,
26
+ top_k=top_k,
27
+ include_values=include_values,
28
+ include_metadata=include_metadata,
29
+ filter=filter
30
+ )
31
+ matches = xc['matches']
32
+ break
33
+ except:
34
+ # force reload
35
+ pinecone.init(api_key=PINECONE_KEY, environment="us-west1-gcp")
36
+ st.session_state.index = pinecone.Index(INDEX_ID)
37
+ attempt += 1
38
+ matches = []
39
+ if len(matches) == 0:
40
+ logging.error(f"Query failed")
41
+ return matches
42
+
43
+ st.session_state.index = init_pinecone()
44
  retriever = init_retriever()
45
 
46
  def card(thumbnail: str, title: str, urls: list, contexts: list, starts: list, ends: list):
 
91
 
92
  st.write("""
93
  # YouTube Q&A
94
+
95
+ :::info
96
+ YouTube search built as [explained here](https://pinecone.io/learn/openai-whisper)!
97
+ The current version has a limited video dataset, but we are working on expanding it.
98
  """)
99
 
100
  st.markdown("""
 
105
 
106
  if query != "":
107
  print(f"query: {query}")
108
+ matches = make_query(query, retriever, top_k=5)
 
109
 
110
  results = {}
111
  order = []
112
+ for context in matches:
113
  video_id = context['metadata']['url'].split('/')[-1]
114
  if video_id not in results:
115
  results[video_id] = {