bertugmirasyedi commited on
Commit
ae3712d
·
1 Parent(s): f9baad9

Made changes according to the website

Browse files
Files changed (1) hide show
  1. app.py +20 -146
app.py CHANGED
@@ -293,30 +293,29 @@ async def search(
293
 
294
  results = [
295
  {
 
296
  "title": title,
297
  "author": author,
298
  "publisher": publisher,
299
  "description": description,
300
- "image": image,
301
  }
302
- for title, author, publisher, description, image in zip(
303
- titles, authors, publishers, descriptions, images
304
  )
305
  ]
306
 
307
- response = {"results": results}
308
-
309
- return response
310
 
311
 
312
  @app.post("/classify")
313
- async def classify(data: dict, runtime: str = "normal"):
314
  """
315
  Create classifier pipeline and return the results.
316
  """
317
- titles = [book["title"] for book in data["results"]]
318
- descriptions = [book["description"] for book in data["results"]]
319
- publishers = [book["publisher"] for book in data["results"]]
320
 
321
  # Combine title, description, and publisher into a single string
322
  combined_data = [
@@ -369,7 +368,9 @@ async def classify(data: dict, runtime: str = "normal"):
369
  classes = [
370
  {
371
  "audience": classifier_pipe(doc, audience)["labels"][0],
372
- "level": classifier_pipe(doc, level)["scores"][0],
 
 
373
  }
374
  for doc in combined_data
375
  ]
@@ -378,16 +379,16 @@ async def classify(data: dict, runtime: str = "normal"):
378
 
379
 
380
  @app.post("/find_similar")
381
- async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
382
  """
383
- Calculate the similarity between the books and return the top_k results.
384
  """
385
  from sentence_transformers import SentenceTransformer
386
  from sentence_transformers import util
387
 
388
- titles = [book["title"] for book in data["results"]]
389
- descriptions = [book["description"] for book in data["results"]]
390
- publishers = [book["publisher"] for book in data["results"]]
391
 
392
  # Combine title, description, and publisher into a single string
393
  combined_data = [
@@ -402,6 +403,7 @@ async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
402
  top_k = len(combined_data) if top_k > len(combined_data) else top_k
403
 
404
  similar_books = []
 
405
  for i in range(len(combined_data)):
406
  # Get the embedding for the ith book
407
  current_embedding = book_embeddings[i]
@@ -418,9 +420,7 @@ async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
418
  }
419
  )
420
 
421
- response = {"results": similar_books}
422
-
423
- return response
424
 
425
 
426
  @app.post("/summarize")
@@ -451,135 +451,9 @@ async def summarize(descriptions: list, runtime="normal"):
451
  # Summarize the descriptions
452
  summaries = [
453
  summarizer_pipe(description)
454
- if (len(description) > 0 and description != "Null")
455
  else [{"summary_text": "No summary text is available."}]
456
  for description in descriptions
457
  ]
458
 
459
  return summaries
460
-
461
- def classify(combined_data, runtime="normal"):
462
- """
463
- Create classifier pipeline and return the results.
464
- """
465
- from transformers import (
466
- AutoTokenizer,
467
- AutoModelForSequenceClassification,
468
- pipeline,
469
- )
470
- from optimum.onnxruntime import ORTModelForSequenceClassification
471
- from optimum.bettertransformer import BetterTransformer
472
-
473
- if runtime == "normal":
474
- # Define the zero-shot classifier
475
- tokenizer = AutoTokenizer.from_pretrained(
476
- "sileod/deberta-v3-base-tasksource-nli"
477
- )
478
- model = AutoModelForSequenceClassification.from_pretrained(
479
- "sileod/deberta-v3-base-tasksource-nli"
480
- )
481
- elif runtime == "onnxruntime":
482
- tokenizer = AutoTokenizer.from_pretrained(
483
- "optimum/distilbert-base-uncased-mnli"
484
- )
485
- model = ORTModelForSequenceClassification.from_pretrained(
486
- "optimum/distilbert-base-uncased-mnli"
487
- )
488
-
489
- classifier_pipe = pipeline(
490
- "zero-shot-classification",
491
- model=model,
492
- tokenizer=tokenizer,
493
- hypothesis_template="This book is {}.",
494
- batch_size=1,
495
- device=-1,
496
- multi_label=False,
497
- )
498
-
499
- # Define the candidate labels
500
- level = [
501
- "Introductory",
502
- "Advanced",
503
- ]
504
-
505
- audience = ["Academic", "Not Academic", "Manual"]
506
-
507
- classes = [
508
- {
509
- "audience": classifier_pipe(doc, audience),
510
- "level": classifier_pipe(doc, level),
511
- }
512
- for doc in combined_data
513
- ]
514
-
515
- return classes
516
-
517
- # If true then run the similarity, summarize, and classify functions
518
- if classification:
519
- classes = classify(combined_data, runtime="normal")
520
- else:
521
- classes = [
522
- {"labels": ["No labels available."], "scores": [0]}
523
- for i in range(len(combined_data))
524
- ]
525
-
526
- # Calculate the elapsed time between the third and fourth checkpoints
527
- fourth_checkpoint = time.time()
528
- classification_time = int(fourth_checkpoint - third_checkpoint)
529
-
530
- if summarization:
531
- summaries = summarize(descriptions, runtime="normal")
532
- else:
533
- summaries = [
534
- [{"summary_text": description}]
535
- if (len(description) > 0)
536
- else [{"summary_text": "No summary text is available."}]
537
- for description in descriptions
538
- ]
539
-
540
- # Calculate the elapsed time between the fourth and fifth checkpoints
541
- fifth_checkpoint = time.time()
542
- summarization_time = int(fifth_checkpoint - fourth_checkpoint)
543
-
544
- if similarity:
545
- similar_books = find_similar(combined_data)
546
- else:
547
- similar_books = [
548
- {"sorted_by_similarity": ["No similar books available."]}
549
- for i in range(len(combined_data))
550
- ]
551
-
552
- # Calculate the elapsed time between the fifth and sixth checkpoints
553
- sixth_checkpoint = time.time()
554
- similarity_time = int(sixth_checkpoint - fifth_checkpoint)
555
-
556
- # Calculate the total elapsed time
557
- end_time = time.time()
558
- runtime = f"{end_time - start_time:.2f} seconds"
559
-
560
- # Create a list of dictionaries to store the results
561
- results = []
562
- for i in range(len(titles)):
563
- results.append(
564
- {
565
- "id": i,
566
- "title": titles[i],
567
- "author": authors[i],
568
- "publisher": publishers[i],
569
- "image_link": images[i],
570
- "audience": classes[i]["audience"]["labels"][0],
571
- "audience_confidence": classes[i]["audience"]["scores"][0],
572
- "level": classes[i]["level"]["labels"][0],
573
- "level_confidence": classes[i]["level"]["scores"][0],
574
- "summary": summaries[i][0]["summary_text"],
575
- "similar_books": similar_books[i]["sorted_by_similarity"],
576
- "runtime": {
577
- "total": runtime,
578
- "classification": classification_time,
579
- "summarization": summarization_time,
580
- "similarity": similarity_time,
581
- },
582
- }
583
- )
584
-
585
- return results
 
293
 
294
  results = [
295
  {
296
+ "id": i,
297
  "title": title,
298
  "author": author,
299
  "publisher": publisher,
300
  "description": description,
301
+ "image_link": image,
302
  }
303
+ for (i, [title, author, publisher, description, image]) in enumerate(
304
+ zip(titles, authors, publishers, descriptions, images)
305
  )
306
  ]
307
 
308
+ return results
 
 
309
 
310
 
311
  @app.post("/classify")
312
+ async def classify(data: list, runtime: str = "normal"):
313
  """
314
  Create classifier pipeline and return the results.
315
  """
316
+ titles = [book["title"] for book in data]
317
+ descriptions = [book["description"] for book in data]
318
+ publishers = [book["publisher"] for book in data]
319
 
320
  # Combine title, description, and publisher into a single string
321
  combined_data = [
 
368
  classes = [
369
  {
370
  "audience": classifier_pipe(doc, audience)["labels"][0],
371
+ "audience_confidence": classifier_pipe(doc, audience)["scores"][0],
372
+ "level": classifier_pipe(doc, level)["labels"][0],
373
+ "level_confidence": classifier_pipe(doc, level)["scores"][0],
374
  }
375
  for doc in combined_data
376
  ]
 
379
 
380
 
381
  @app.post("/find_similar")
382
+ async def find_similar(data: list, top_k: int = 5):
383
  """
384
+ Calculate the similarity between the selected book and the corpus. Return the top_k results.
385
  """
386
  from sentence_transformers import SentenceTransformer
387
  from sentence_transformers import util
388
 
389
+ titles = [book["title"] for book in data]
390
+ descriptions = [book["description"] for book in data]
391
+ publishers = [book["publisher"] for book in data]
392
 
393
  # Combine title, description, and publisher into a single string
394
  combined_data = [
 
403
  top_k = len(combined_data) if top_k > len(combined_data) else top_k
404
 
405
  similar_books = []
406
+
407
  for i in range(len(combined_data)):
408
  # Get the embedding for the ith book
409
  current_embedding = book_embeddings[i]
 
420
  }
421
  )
422
 
423
+ return similar_books
 
 
424
 
425
 
426
  @app.post("/summarize")
 
451
  # Summarize the descriptions
452
  summaries = [
453
  summarizer_pipe(description)
454
+ if (len(description) > 0 and description != "Null" and description != None)
455
  else [{"summary_text": "No summary text is available."}]
456
  for description in descriptions
457
  ]
458
 
459
  return summaries