ola13 commited on
Commit
a949c83
1 Parent(s): aca9622

refactor towards falgging

Browse files
Files changed (1) hide show
  1. app.py +58 -85
app.py CHANGED
@@ -88,6 +88,7 @@ def flag(query, language, num_results, issue_description):
88
 
89
 
90
  def format_result(result, highlight_terms, exact_search, datasets_filter=None):
 
91
  text, url, docid = result
92
  if datasets_filter is not None:
93
  datasets_filter = set(datasets_filter)
@@ -143,61 +144,22 @@ def format_result(result, highlight_terms, exact_search, datasets_filter=None):
143
 
144
 
145
  def format_result_page(
146
- language, results, highlight_terms, num_results, exact_search, datasets_filter=None
147
- ) -> gr.HTML:
148
-
149
- filtered_num_results = 0
150
- header_html = ""
151
-
152
- if language == "detect_language" and not exact_search:
153
- header_html += """<div style='font-family: Arial; color:MediumAquaMarine; text-align: center; line-height: 3em'>
154
- Detected language: <b style='color:MediumAquaMarine'>{}</b></div>""".format(
155
- list(results.keys())[0]
156
  )
157
-
158
- results_html = ""
159
- for lang, results_for_lang in results.items():
160
- if len(results_for_lang) == 0:
161
- if exact_search:
162
- results_html += """<div style='font-family: Arial; color:Silver; text-align: left; line-height: 3em'>
163
- No results found.</div>"""
164
- else:
165
- results_html += """<div style='font-family: Arial; color:Silver; text-align: left; line-height: 3em'>
166
- No results for language: <b>{}</b></div>""".format(
167
- lang
168
- )
169
- continue
170
- results_for_lang_html = ""
171
- for result in results_for_lang:
172
- result_html = format_result(
173
- result, highlight_terms, exact_search, datasets_filter
174
- )
175
- if result_html != "":
176
- filtered_num_results += 1
177
- results_for_lang_html += result_html
178
- if language == "all" and not exact_search:
179
- results_for_lang_html = f"""
180
- <details>
181
- <summary style='font-family: Arial; color:MediumAquaMarine; text-align: left; line-height: 3em'>
182
- Results for language: <b>{lang}</b>
183
- </summary>
184
- {results_for_lang_html}
185
- </details>"""
186
- results_html += results_for_lang_html
187
-
188
- if num_results is not None:
189
- header_html += """<div style='font-family: Arial; color:MediumAquaMarine; text-align: center; line-height: 3em'>
190
- Total number of matches: <b style='color:MediumAquaMarine'>{}</b></div>""".format(
191
- num_results
192
- )
193
-
194
- return header_html + results_html
195
 
196
 
197
  def extract_results_from_payload(query, language, payload, exact_search):
198
  results = payload["results"]
199
 
200
- processed_results = dict()
201
  datasets = set()
202
  highlight_terms = None
203
  num_results = None
@@ -205,25 +167,22 @@ def extract_results_from_payload(query, language, payload, exact_search):
205
  if exact_search:
206
  highlight_terms = query
207
  num_results = payload["num_results"]
208
- results = {"dummy": results}
209
  else:
210
  highlight_terms = payload["highlight_terms"]
211
 
212
- for lang, results_for_lang in results.items():
213
- processed_results[lang] = list()
214
- for result in results_for_lang:
215
- text = result["text"]
216
- url = (
217
- result["meta"]["url"]
218
- if "meta" in result
219
- and result["meta"] is not None
220
- and "url" in result["meta"]
221
- else None
222
- )
223
- docid = result["docid"]
224
- _, dataset, _ = docid.split("/")
225
- datasets.add(dataset)
226
- processed_results[lang].append((text, url, docid))
227
 
228
  return processed_results, highlight_terms, num_results, list(datasets)
229
 
@@ -313,7 +272,6 @@ if __name__ == "__main__":
313
  "vi",
314
  "zh",
315
  "detect_language",
316
- "all",
317
  ],
318
  value="en",
319
  label="Language",
@@ -329,8 +287,9 @@ if __name__ == "__main__":
329
  label="Datasets Filter",
330
  multiselect=True,
331
  )
332
- with gr.Row():
333
- results = gr.HTML(label="Results")
 
334
  with gr.Row(visible=False) as pagination:
335
  next_page_btn = gr.Button("Next Page")
336
  with gr.Column(visible=False) as flagging_form:
@@ -372,15 +331,30 @@ if __name__ == "__main__":
372
  payload,
373
  exact_search,
374
  )
375
- results_html = format_result_page(
376
- lang, processed_results, highlight_terms, num_results, exact_search
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
377
  )
378
  return (
379
  processed_results,
380
  highlight_terms,
381
  num_results,
382
  exact_search,
383
- results_html,
384
  ds,
385
  )
386
 
@@ -390,7 +364,7 @@ if __name__ == "__main__":
390
  highlight_terms,
391
  num_results,
392
  exact_search,
393
- results_html,
394
  datasets,
395
  ) = run_query(query, lang, k, dropdown_input, max_page_size, 0)
396
  has_more_results = exact_search and (num_results > max_page_size)
@@ -399,14 +373,14 @@ if __name__ == "__main__":
399
  highlight_terms_state: highlight_terms,
400
  num_results_state: num_results,
401
  exact_search_state: exact_search,
402
- results: results_html,
403
  flagging_form: gr.update(visible=True),
404
  datasets_filter: gr.update(visible=True),
405
  available_datasets: gr.Dropdown.update(
406
  choices=datasets, value=datasets
407
  ),
408
  pagination: gr.update(visible=has_more_results),
409
- received_results_state: len(next(iter(processed_results.values()))),
410
  }
411
 
412
  def next_page(
@@ -423,22 +397,22 @@ if __name__ == "__main__":
423
  highlight_terms,
424
  num_results,
425
  exact_search,
426
- results_html,
427
  datasets,
428
  ) = run_query(
429
  query, lang, k, dropdown_input, max_page_size, received_results
430
  )
431
- num_processed_results = len(next(iter(processed_results.values())))
432
  has_more_results = exact_search and (num_results > max_page_size)
433
  print("num_processed_results", num_processed_results)
434
  print("has_more_results", has_more_results)
435
- print("current page", len(list(processed_results.values())[0]))
436
  return {
437
  processed_results_state: processed_results,
438
  highlight_terms_state: highlight_terms,
439
  num_results_state: num_results,
440
  exact_search_state: exact_search,
441
- results: results_html,
442
  flagging_form: gr.update(visible=True),
443
  datasets_filter: gr.update(visible=True),
444
  available_datasets: gr.Dropdown.update(
@@ -458,15 +432,14 @@ if __name__ == "__main__":
458
  exact_search,
459
  datasets_filter,
460
  ):
461
- results_html = format_result_page(
462
- lang,
463
  processed_results,
464
  highlight_terms,
465
  num_results,
466
  exact_search,
467
  datasets_filter,
468
  )
469
- return {results: results_html}
470
 
471
  query.submit(
472
  fn=submit,
@@ -476,7 +449,7 @@ if __name__ == "__main__":
476
  highlight_terms_state,
477
  num_results_state,
478
  exact_search_state,
479
- results,
480
  flagging_form,
481
  datasets_filter,
482
  available_datasets,
@@ -492,7 +465,7 @@ if __name__ == "__main__":
492
  highlight_terms_state,
493
  num_results_state,
494
  exact_search_state,
495
- results,
496
  flagging_form,
497
  datasets_filter,
498
  available_datasets,
@@ -517,7 +490,7 @@ if __name__ == "__main__":
517
  highlight_terms_state,
518
  num_results_state,
519
  exact_search_state,
520
- results,
521
  flagging_form,
522
  datasets_filter,
523
  available_datasets,
@@ -536,6 +509,6 @@ if __name__ == "__main__":
536
  exact_search_state,
537
  available_datasets,
538
  ],
539
- outputs=[results],
540
  )
541
  demo.launch(enable_queue=True, debug=True)
 
88
 
89
 
90
  def format_result(result, highlight_terms, exact_search, datasets_filter=None):
91
+ # print("result", result)
92
  text, url, docid = result
93
  if datasets_filter is not None:
94
  datasets_filter = set(datasets_filter)
 
144
 
145
 
146
  def format_result_page(
147
+ results, highlight_terms, num_results, exact_search, datasets_filter=None
148
+ ):
149
+ results_html = []
150
+ for result in results:
151
+ result_html = format_result(
152
+ result, highlight_terms, exact_search, datasets_filter
 
 
 
 
153
  )
154
+ if result_html != "":
155
+ results_html.append(result_html)
156
+ return results_html
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
 
159
  def extract_results_from_payload(query, language, payload, exact_search):
160
  results = payload["results"]
161
 
162
+ processed_results = list()
163
  datasets = set()
164
  highlight_terms = None
165
  num_results = None
 
167
  if exact_search:
168
  highlight_terms = query
169
  num_results = payload["num_results"]
 
170
  else:
171
  highlight_terms = payload["highlight_terms"]
172
 
173
+ for result in results:
174
+ text = result["text"]
175
+ url = (
176
+ result["meta"]["url"]
177
+ if "meta" in result
178
+ and result["meta"] is not None
179
+ and "url" in result["meta"]
180
+ else None
181
+ )
182
+ docid = result["docid"]
183
+ _, dataset, _ = docid.split("/")
184
+ datasets.add(dataset)
185
+ processed_results.append((text, url, docid))
 
 
186
 
187
  return processed_results, highlight_terms, num_results, list(datasets)
188
 
 
272
  "vi",
273
  "zh",
274
  "detect_language",
 
275
  ],
276
  value="en",
277
  label="Language",
 
287
  label="Datasets Filter",
288
  multiselect=True,
289
  )
290
+ with gr.Row() as results_row:
291
+ header_html = gr.HTML(label="Header")
292
+ results_html = gr.HTML(label="Results")
293
  with gr.Row(visible=False) as pagination:
294
  next_page_btn = gr.Button("Next Page")
295
  with gr.Column(visible=False) as flagging_form:
 
331
  payload,
332
  exact_search,
333
  )
334
+ header_html = ""
335
+ if lang == "detect_language" and not exact_search:
336
+ header_html += """<div style='font-family: Arial; color:MediumAquaMarine; text-align: center; line-height: 3em'>
337
+ Detected language: <b style='color:MediumAquaMarine'>{}</b></div>""".format(
338
+ "FIX ME!"
339
+ )
340
+ if len(processed_results) == 0:
341
+ header_html += """<div style='font-family: Arial; color:Silver; text-align: left; line-height: 3em'>
342
+ No results found.</div>"""
343
+ elif num_results is not None:
344
+ header_html += """<div style='font-family: Arial; color:MediumAquaMarine; text-align: center; line-height: 3em'>
345
+ Total number of matches: <b style='color:MediumAquaMarine'>{}</b></div>""".format(
346
+ num_results
347
+ )
348
+ # print("processed_results", processed_results)
349
+ results_html_new = format_result_page(
350
+ processed_results, highlight_terms, num_results, exact_search
351
  )
352
  return (
353
  processed_results,
354
  highlight_terms,
355
  num_results,
356
  exact_search,
357
+ results_html_new,
358
  ds,
359
  )
360
 
 
364
  highlight_terms,
365
  num_results,
366
  exact_search,
367
+ results_html_new,
368
  datasets,
369
  ) = run_query(query, lang, k, dropdown_input, max_page_size, 0)
370
  has_more_results = exact_search and (num_results > max_page_size)
 
373
  highlight_terms_state: highlight_terms,
374
  num_results_state: num_results,
375
  exact_search_state: exact_search,
376
+ results_html: results_html_new,
377
  flagging_form: gr.update(visible=True),
378
  datasets_filter: gr.update(visible=True),
379
  available_datasets: gr.Dropdown.update(
380
  choices=datasets, value=datasets
381
  ),
382
  pagination: gr.update(visible=has_more_results),
383
+ received_results_state: len(processed_results),
384
  }
385
 
386
  def next_page(
 
397
  highlight_terms,
398
  num_results,
399
  exact_search,
400
+ results_html_new,
401
  datasets,
402
  ) = run_query(
403
  query, lang, k, dropdown_input, max_page_size, received_results
404
  )
405
+ num_processed_results = len(processed_results)
406
  has_more_results = exact_search and (num_results > max_page_size)
407
  print("num_processed_results", num_processed_results)
408
  print("has_more_results", has_more_results)
409
+ print("received_results", received_results)
410
  return {
411
  processed_results_state: processed_results,
412
  highlight_terms_state: highlight_terms,
413
  num_results_state: num_results,
414
  exact_search_state: exact_search,
415
+ results_html: results_html_new,
416
  flagging_form: gr.update(visible=True),
417
  datasets_filter: gr.update(visible=True),
418
  available_datasets: gr.Dropdown.update(
 
432
  exact_search,
433
  datasets_filter,
434
  ):
435
+ results_html_new = format_result_page(
 
436
  processed_results,
437
  highlight_terms,
438
  num_results,
439
  exact_search,
440
  datasets_filter,
441
  )
442
+ return {results_html: results_html_new}
443
 
444
  query.submit(
445
  fn=submit,
 
449
  highlight_terms_state,
450
  num_results_state,
451
  exact_search_state,
452
+ results_html,
453
  flagging_form,
454
  datasets_filter,
455
  available_datasets,
 
465
  highlight_terms_state,
466
  num_results_state,
467
  exact_search_state,
468
+ results_html,
469
  flagging_form,
470
  datasets_filter,
471
  available_datasets,
 
490
  highlight_terms_state,
491
  num_results_state,
492
  exact_search_state,
493
+ results_html,
494
  flagging_form,
495
  datasets_filter,
496
  available_datasets,
 
509
  exact_search_state,
510
  available_datasets,
511
  ],
512
+ outputs=[results_html],
513
  )
514
  demo.launch(enable_queue=True, debug=True)