jamescalam commited on
Commit
93da16f
·
1 Parent(s): 0003de8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -20
app.py CHANGED
@@ -16,25 +16,50 @@ def init_retriever():
16
  index = init_pinecone()
17
  retriever = init_retriever()
18
 
19
- def card(thubmnail, title, url, context):
20
- return st.markdown(f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  <div class="container-fluid">
22
  <div class="row align-items-start">
23
  <div class="col-md-4 col-sm-4">
24
- <div class="position-relative">
25
- <a href={url}><img src={thubmnail} class="img-fluid" style="width: 192px; height: 106px"></a>
26
- </div>
27
- </div>
28
- <div class="col-md-8 col-sm-8">
29
- <a href={url}>{title}</a>
30
- <br>
31
- <span style="color: #808080;">
32
- <small>{context[:200].capitalize()+"...."}</small>
33
- </span>
34
- </div>
35
- </div>
36
- </div>
37
- """, unsafe_allow_html=True)
38
 
39
 
40
  st.write("""
@@ -52,10 +77,35 @@ if query != "":
52
  xq = retriever.encode([query]).tolist()
53
  xc = index.query(xq, top_k=5, include_metadata=True)
54
 
 
 
55
  for context in xc['matches']:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  card(
57
- f"https://img.youtube.com/vi/{context['metadata']['url'].split('/')[-1]}/maxresdefault.jpg",
58
- context['metadata']['title'],
59
- f"{context['metadata']['url']}?t={int(context['metadata']['start'])}",
60
- context['metadata']['text']
 
 
61
  )
 
16
  index = init_pinecone()
17
  retriever = init_retriever()
18
 
19
+ def card(thumbnail: str, title: str, urls: list, contexts: list, starts: list, ends: list):
20
+ meta = [(e, s, u, c) for e, s, u, c in zip(ends, starts, urls, contexts)]
21
+ meta.sort(reverse=False)
22
+ text_content = []
23
+ current_start = 0
24
+ current_end = 0
25
+ for end, start, url, context in meta:
26
+ # reformat seconds to timestamp
27
+ time = start / 60
28
+ mins = f"0{int(time)}"[-2:]
29
+ secs = f"0{int(round((time - int(mins))*60, 0))}"[-2:]
30
+ timestamp = f"{mins}:{secs}"
31
+ if start < current_end and start > current_start:
32
+ # this means it is a continuation of the previous sentence
33
+ text_content[-1][0] = text_content[-1][0].split(context[:10])[0]
34
+ text_content.append([f"[{timestamp}] {context.capitalize()}", url])
35
+ else:
36
+ text_content.append(["xxLINEBREAKxx", ""])
37
+ text_content.append([f"[{timestamp}] {context}", url])
38
+ current_start = start
39
+ current_end = end
40
+ html_text = ""
41
+ for text, url in text_content:
42
+ if text == "xxLINEBREAKxx":
43
+ html_text += "<br>"
44
+ else:
45
+ html_text += f"<small><a href={url}>{text.strip()}... </a></small>"
46
+ print(html_text)
47
+ html = f"""
48
  <div class="container-fluid">
49
  <div class="row align-items-start">
50
  <div class="col-md-4 col-sm-4">
51
+ <div class="position-relative">
52
+ <a href={urls[0]}><img src={thumbnail} class="img-fluid" style="width: 192px; height: 106px"></a>
53
+ </div>
54
+ </div>
55
+ <div class="col-md-8 col-sm-8">
56
+ <h2>{title}</h2>
57
+ </div>
58
+ <div>
59
+ {html_text}
60
+ <br><br>
61
+ """
62
+ return st.markdown(html, unsafe_allow_html=True)
 
 
63
 
64
 
65
  st.write("""
 
77
  xq = retriever.encode([query]).tolist()
78
  xc = index.query(xq, top_k=5, include_metadata=True)
79
 
80
+ results = {}
81
+ order = []
82
  for context in xc['matches']:
83
+ video_id = context['metadata']['url'].split('/')[-1]
84
+ if video_id not in results:
85
+ results[video_id] = {
86
+ 'title': context['metadata']['title'],
87
+ 'urls': [f"{context['metadata']['url']}?t={int(context['metadata']['start'])}"],
88
+ 'contexts': [context['metadata']['text']],
89
+ 'starts': [int(context['metadata']['start'])],
90
+ 'ends': [int(context['metadata']['end'])]
91
+ }
92
+ order.append(video_id)
93
+ else:
94
+ results[video_id]['urls'].append(
95
+ f"{context['metadata']['url']}?t={int(context['metadata']['start'])}"
96
+ )
97
+ results[video_id]['contexts'].append(
98
+ context['metadata']['text']
99
+ )
100
+ results[video_id]['starts'].append(int(context['metadata']['start']))
101
+ results[video_id]['ends'].append(int(context['metadata']['end']))
102
+ # now display cards
103
+ for video_id in order:
104
  card(
105
+ thumbnail=f"https://img.youtube.com/vi/{video_id}/maxresdefault.jpg",
106
+ title=results[video_id]['title'],
107
+ urls=results[video_id]['urls'],
108
+ contexts=results[video_id]['contexts'],
109
+ starts=results[video_id]['starts'],
110
+ ends=results[video_id]['ends']
111
  )