Spaces:
Sleeping
Sleeping
bertugmirasyedi
commited on
Commit
·
ae3712d
1
Parent(s):
f9baad9
Made changes according to the website
Browse files
app.py
CHANGED
@@ -293,30 +293,29 @@ async def search(
|
|
293 |
|
294 |
results = [
|
295 |
{
|
|
|
296 |
"title": title,
|
297 |
"author": author,
|
298 |
"publisher": publisher,
|
299 |
"description": description,
|
300 |
-
"
|
301 |
}
|
302 |
-
for title, author, publisher, description, image in
|
303 |
-
titles, authors, publishers, descriptions, images
|
304 |
)
|
305 |
]
|
306 |
|
307 |
-
|
308 |
-
|
309 |
-
return response
|
310 |
|
311 |
|
312 |
@app.post("/classify")
|
313 |
-
async def classify(data:
|
314 |
"""
|
315 |
Create classifier pipeline and return the results.
|
316 |
"""
|
317 |
-
titles = [book["title"] for book in data
|
318 |
-
descriptions = [book["description"] for book in data
|
319 |
-
publishers = [book["publisher"] for book in data
|
320 |
|
321 |
# Combine title, description, and publisher into a single string
|
322 |
combined_data = [
|
@@ -369,7 +368,9 @@ async def classify(data: dict, runtime: str = "normal"):
|
|
369 |
classes = [
|
370 |
{
|
371 |
"audience": classifier_pipe(doc, audience)["labels"][0],
|
372 |
-
"
|
|
|
|
|
373 |
}
|
374 |
for doc in combined_data
|
375 |
]
|
@@ -378,16 +379,16 @@ async def classify(data: dict, runtime: str = "normal"):
|
|
378 |
|
379 |
|
380 |
@app.post("/find_similar")
|
381 |
-
async def find_similar(data:
|
382 |
"""
|
383 |
-
Calculate the similarity between the
|
384 |
"""
|
385 |
from sentence_transformers import SentenceTransformer
|
386 |
from sentence_transformers import util
|
387 |
|
388 |
-
titles = [book["title"] for book in data
|
389 |
-
descriptions = [book["description"] for book in data
|
390 |
-
publishers = [book["publisher"] for book in data
|
391 |
|
392 |
# Combine title, description, and publisher into a single string
|
393 |
combined_data = [
|
@@ -402,6 +403,7 @@ async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
|
|
402 |
top_k = len(combined_data) if top_k > len(combined_data) else top_k
|
403 |
|
404 |
similar_books = []
|
|
|
405 |
for i in range(len(combined_data)):
|
406 |
# Get the embedding for the ith book
|
407 |
current_embedding = book_embeddings[i]
|
@@ -418,9 +420,7 @@ async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
|
|
418 |
}
|
419 |
)
|
420 |
|
421 |
-
|
422 |
-
|
423 |
-
return response
|
424 |
|
425 |
|
426 |
@app.post("/summarize")
|
@@ -451,135 +451,9 @@ async def summarize(descriptions: list, runtime="normal"):
|
|
451 |
# Summarize the descriptions
|
452 |
summaries = [
|
453 |
summarizer_pipe(description)
|
454 |
-
if (len(description) > 0 and description != "Null")
|
455 |
else [{"summary_text": "No summary text is available."}]
|
456 |
for description in descriptions
|
457 |
]
|
458 |
|
459 |
return summaries
|
460 |
-
|
461 |
-
def classify(combined_data, runtime="normal"):
|
462 |
-
"""
|
463 |
-
Create classifier pipeline and return the results.
|
464 |
-
"""
|
465 |
-
from transformers import (
|
466 |
-
AutoTokenizer,
|
467 |
-
AutoModelForSequenceClassification,
|
468 |
-
pipeline,
|
469 |
-
)
|
470 |
-
from optimum.onnxruntime import ORTModelForSequenceClassification
|
471 |
-
from optimum.bettertransformer import BetterTransformer
|
472 |
-
|
473 |
-
if runtime == "normal":
|
474 |
-
# Define the zero-shot classifier
|
475 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
476 |
-
"sileod/deberta-v3-base-tasksource-nli"
|
477 |
-
)
|
478 |
-
model = AutoModelForSequenceClassification.from_pretrained(
|
479 |
-
"sileod/deberta-v3-base-tasksource-nli"
|
480 |
-
)
|
481 |
-
elif runtime == "onnxruntime":
|
482 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
483 |
-
"optimum/distilbert-base-uncased-mnli"
|
484 |
-
)
|
485 |
-
model = ORTModelForSequenceClassification.from_pretrained(
|
486 |
-
"optimum/distilbert-base-uncased-mnli"
|
487 |
-
)
|
488 |
-
|
489 |
-
classifier_pipe = pipeline(
|
490 |
-
"zero-shot-classification",
|
491 |
-
model=model,
|
492 |
-
tokenizer=tokenizer,
|
493 |
-
hypothesis_template="This book is {}.",
|
494 |
-
batch_size=1,
|
495 |
-
device=-1,
|
496 |
-
multi_label=False,
|
497 |
-
)
|
498 |
-
|
499 |
-
# Define the candidate labels
|
500 |
-
level = [
|
501 |
-
"Introductory",
|
502 |
-
"Advanced",
|
503 |
-
]
|
504 |
-
|
505 |
-
audience = ["Academic", "Not Academic", "Manual"]
|
506 |
-
|
507 |
-
classes = [
|
508 |
-
{
|
509 |
-
"audience": classifier_pipe(doc, audience),
|
510 |
-
"level": classifier_pipe(doc, level),
|
511 |
-
}
|
512 |
-
for doc in combined_data
|
513 |
-
]
|
514 |
-
|
515 |
-
return classes
|
516 |
-
|
517 |
-
# If true then run the similarity, summarize, and classify functions
|
518 |
-
if classification:
|
519 |
-
classes = classify(combined_data, runtime="normal")
|
520 |
-
else:
|
521 |
-
classes = [
|
522 |
-
{"labels": ["No labels available."], "scores": [0]}
|
523 |
-
for i in range(len(combined_data))
|
524 |
-
]
|
525 |
-
|
526 |
-
# Calculate the elapsed time between the third and fourth checkpoints
|
527 |
-
fourth_checkpoint = time.time()
|
528 |
-
classification_time = int(fourth_checkpoint - third_checkpoint)
|
529 |
-
|
530 |
-
if summarization:
|
531 |
-
summaries = summarize(descriptions, runtime="normal")
|
532 |
-
else:
|
533 |
-
summaries = [
|
534 |
-
[{"summary_text": description}]
|
535 |
-
if (len(description) > 0)
|
536 |
-
else [{"summary_text": "No summary text is available."}]
|
537 |
-
for description in descriptions
|
538 |
-
]
|
539 |
-
|
540 |
-
# Calculate the elapsed time between the fourth and fifth checkpoints
|
541 |
-
fifth_checkpoint = time.time()
|
542 |
-
summarization_time = int(fifth_checkpoint - fourth_checkpoint)
|
543 |
-
|
544 |
-
if similarity:
|
545 |
-
similar_books = find_similar(combined_data)
|
546 |
-
else:
|
547 |
-
similar_books = [
|
548 |
-
{"sorted_by_similarity": ["No similar books available."]}
|
549 |
-
for i in range(len(combined_data))
|
550 |
-
]
|
551 |
-
|
552 |
-
# Calculate the elapsed time between the fifth and sixth checkpoints
|
553 |
-
sixth_checkpoint = time.time()
|
554 |
-
similarity_time = int(sixth_checkpoint - fifth_checkpoint)
|
555 |
-
|
556 |
-
# Calculate the total elapsed time
|
557 |
-
end_time = time.time()
|
558 |
-
runtime = f"{end_time - start_time:.2f} seconds"
|
559 |
-
|
560 |
-
# Create a list of dictionaries to store the results
|
561 |
-
results = []
|
562 |
-
for i in range(len(titles)):
|
563 |
-
results.append(
|
564 |
-
{
|
565 |
-
"id": i,
|
566 |
-
"title": titles[i],
|
567 |
-
"author": authors[i],
|
568 |
-
"publisher": publishers[i],
|
569 |
-
"image_link": images[i],
|
570 |
-
"audience": classes[i]["audience"]["labels"][0],
|
571 |
-
"audience_confidence": classes[i]["audience"]["scores"][0],
|
572 |
-
"level": classes[i]["level"]["labels"][0],
|
573 |
-
"level_confidence": classes[i]["level"]["scores"][0],
|
574 |
-
"summary": summaries[i][0]["summary_text"],
|
575 |
-
"similar_books": similar_books[i]["sorted_by_similarity"],
|
576 |
-
"runtime": {
|
577 |
-
"total": runtime,
|
578 |
-
"classification": classification_time,
|
579 |
-
"summarization": summarization_time,
|
580 |
-
"similarity": similarity_time,
|
581 |
-
},
|
582 |
-
}
|
583 |
-
)
|
584 |
-
|
585 |
-
return results
|
|
|
293 |
|
294 |
results = [
|
295 |
{
|
296 |
+
"id": i,
|
297 |
"title": title,
|
298 |
"author": author,
|
299 |
"publisher": publisher,
|
300 |
"description": description,
|
301 |
+
"image_link": image,
|
302 |
}
|
303 |
+
for (i, [title, author, publisher, description, image]) in enumerate(
|
304 |
+
zip(titles, authors, publishers, descriptions, images)
|
305 |
)
|
306 |
]
|
307 |
|
308 |
+
return results
|
|
|
|
|
309 |
|
310 |
|
311 |
@app.post("/classify")
|
312 |
+
async def classify(data: list, runtime: str = "normal"):
|
313 |
"""
|
314 |
Create classifier pipeline and return the results.
|
315 |
"""
|
316 |
+
titles = [book["title"] for book in data]
|
317 |
+
descriptions = [book["description"] for book in data]
|
318 |
+
publishers = [book["publisher"] for book in data]
|
319 |
|
320 |
# Combine title, description, and publisher into a single string
|
321 |
combined_data = [
|
|
|
368 |
classes = [
|
369 |
{
|
370 |
"audience": classifier_pipe(doc, audience)["labels"][0],
|
371 |
+
"audience_confidence": classifier_pipe(doc, audience)["scores"][0],
|
372 |
+
"level": classifier_pipe(doc, level)["labels"][0],
|
373 |
+
"level_confidence": classifier_pipe(doc, level)["scores"][0],
|
374 |
}
|
375 |
for doc in combined_data
|
376 |
]
|
|
|
379 |
|
380 |
|
381 |
@app.post("/find_similar")
|
382 |
+
async def find_similar(data: list, top_k: int = 5):
|
383 |
"""
|
384 |
+
Calculate the similarity between the selected book and the corpus. Return the top_k results.
|
385 |
"""
|
386 |
from sentence_transformers import SentenceTransformer
|
387 |
from sentence_transformers import util
|
388 |
|
389 |
+
titles = [book["title"] for book in data]
|
390 |
+
descriptions = [book["description"] for book in data]
|
391 |
+
publishers = [book["publisher"] for book in data]
|
392 |
|
393 |
# Combine title, description, and publisher into a single string
|
394 |
combined_data = [
|
|
|
403 |
top_k = len(combined_data) if top_k > len(combined_data) else top_k
|
404 |
|
405 |
similar_books = []
|
406 |
+
|
407 |
for i in range(len(combined_data)):
|
408 |
# Get the embedding for the ith book
|
409 |
current_embedding = book_embeddings[i]
|
|
|
420 |
}
|
421 |
)
|
422 |
|
423 |
+
return similar_books
|
|
|
|
|
424 |
|
425 |
|
426 |
@app.post("/summarize")
|
|
|
451 |
# Summarize the descriptions
|
452 |
summaries = [
|
453 |
summarizer_pipe(description)
|
454 |
+
if (len(description) > 0 and description != "Null" and description != None)
|
455 |
else [{"summary_text": "No summary text is available."}]
|
456 |
for description in descriptions
|
457 |
]
|
458 |
|
459 |
return summaries
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|