Spaces:

bertugmirasyedi
/

aristotle-api

Sleeping

App Files Files Community

bertugmirasyedi commited on Mar 28, 2023

Commit

6b67b82

1 Parent(s): 99b3772

First commit

Browse files

Files changed (3) hide show

Dockerfile +11 -0
requirements.txt +8 -0
search.py +308 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,11 @@

+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+COPY . .
+CMD ["uvicorn", "search:app", "--host", "0.0.0.0", "--port", "7860"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+fastapi==0.95.0
+flair==0.11.3
+openai==0.27.0
+optimum==1.7.1
+pyalex==0.7
+requests==2.25.1
+sentence_transformers==2.2.2
+transformers==4.26.1

search.py ADDED Viewed

	@@ -0,0 +1,308 @@

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+import sys
+# Set the maximum recursion depth to 10000
+sys.setrecursionlimit(10000)
+# Define the FastAPI app
+app = FastAPI()
+# Add the CORS middleware to the app
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/search={query}&similarity={similarity}")
+def search(query, similarity=False):
+    import time
+    import requests
+    start_time = time.time()
+    # Set the API endpoint and query parameters
+    url = "https://www.googleapis.com/books/v1/volumes"
+    params = {"q": str(query), "printType": "books", "maxResults": 30}
+    # Send a GET request to the API with the specified parameters
+    response = requests.get(url, params=params)
+    # Initialize the lists to store the results
+    titles = []
+    authors = []
+    publishers = []
+    descriptions = []
+    images = []
+    # Parse the response JSON and append the results
+    data = response.json()
+    for item in data["items"]:
+        volume_info = item["volumeInfo"]
+        try:
+            titles.append(f"{volume_info['title']}: {volume_info['subtitle']}")
+        except KeyError:
+            titles.append(volume_info["title"])
+        try:
+            descriptions.append(volume_info["description"])
+        except KeyError:
+            descriptions.append("Null")
+        try:
+            publishers.append(volume_info["publisher"])
+        except KeyError:
+            publishers.append("Null")
+        try:
+            authors.append(volume_info["authors"][0])
+        except KeyError:
+            authors.append("Null")
+        try:
+            images.append(volume_info["imageLinks"]["thumbnail"])
+        except KeyError:
+            images.append(
+                "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
+            )
+    ### Openalex ###
+    import pyalex
+    from pyalex import Works
+    # Add email to the config
+    pyalex.config.email = "[email protected]"
+    # Define a pager object with the same query
+    pager = Works().search(str(query)).paginate(per_page=10, n_max=10)
+    # Generate a list of the results
+    openalex_results = list(pager)
+    # Get the titles, descriptions, and publishers and append them to the lists
+    for result in openalex_results[0]:
+        try:
+            titles.append(result["title"])
+        except KeyError:
+            titles.append("Null")
+        try:
+            descriptions.append(result["abstract"])
+        except KeyError:
+            descriptions.append("Null")
+        try:
+            publishers.append(result["host_venue"]["publisher"])
+        except KeyError:
+            publishers.append("Null")
+        try:
+            authors.append(result["authorships"][0]["author"]["display_name"])
+        except KeyError:
+            authors.append("Null")
+        images.append(
+            "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
+        )
+    ### OpenAI ###
+    import openai
+    # Set the OpenAI API key
+    openai.api_key = "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"
+    # Create ChatGPT query
+    chatgpt_response = openai.ChatCompletion.create(
+        model="gpt-3.5-turbo",
+        messages=[
+            {
+                "role": "system",
+                "content": "You are a librarian. You are helping a patron find a book.",
+            },
+            {
+                "role": "user",
+                "content": f"Recommend me 10 books about {query}. Your response should be like: 'title: <title>, author: <author>, publisher: <publisher>, summary: <summary>'",
+            },
+        ],
+    )
+    # Split the response into a list of results
+    chatgpt_results = chatgpt_response["choices"][0]["message"]["content"].split("\n")[
+        2::2
+    ]
+    # Define a function to parse the results
+    def parse_result(result, ordered_keys=["Title", "Author", "Publisher", "Summary"]):
+        # Create a dict to store the key-value pairs
+        parsed_result = {}
+        for key in ordered_keys:
+            # Split the result string by the key and append the value to the list
+            if key != ordered_keys[-1]:
+                parsed_result[key] = result.split(f"{key}: ")[1].split(",")[0]
+            else:
+                parsed_result[key] = result.split(f"{key}: ")[1]
+        return parsed_result
+    ordered_keys = ["Title", "Author", "Publisher", "Summary"]
+    for result in chatgpt_results:
+        # Parse the result
+        parsed_result = parse_result(result, ordered_keys=ordered_keys)
+        # Append the parsed result to the lists
+        titles.append(parsed_result["Title"])
+        authors.append(parsed_result["Author"])
+        publishers.append(parsed_result["Publisher"])
+        descriptions.append(parsed_result["Summary"])
+        images.append(
+            "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
+        )
+    ### Prediction ###
+    from flair.models import TextClassifier
+    from flair.data import Sentence
+    from flair.tokenization import SegtokTokenizer
+    from transformers import (
+        AutoTokenizer,
+        AutoModelForSeq2SeqLM,
+        AutoModelForSequenceClassification,
+        pipeline,
+    )
+    from sentence_transformers import SentenceTransformer, CrossEncoder
+    from sentence_transformers.util import cos_sim, dot_score
+    from optimum.onnxruntime import (
+        ORTModelForSeq2SeqLM,
+        ORTModelForSequenceClassification,
+    )
+    from optimum.pipelines import pipeline as optimum_pipeline
+    # Load the classifiers
+    # classifier = TextClassifier.load(
+    #    "trainers/deberta-v3-base-tasksource-nli/best-model.pt"
+    # )
+    # sentence_transformer = SentenceTransformer("all-MiniLM-L12-v2")
+    # cross_encoder = CrossEncoder("cross-encoder/stsb-distilroberta-base")
+    # Combine title, description, and publisher into a single string
+    combined_data = [
+        f"{title} {description} {publisher}"
+        for title, description, publisher in zip(titles, descriptions, publishers)
+    ]
+    # Prepare the Sentence object
+    # sentences = [
+    #    Sentence(doc, use_tokenizer=SegtokTokenizer()) for doc in combined_data
+    # ]
+    # Classify the sentences
+    # classifier.predict(sentences)
+    # Get the predicted labels
+    # classes = [sentence.labels for sentence in sentences]
+    # Define the summarizer model and tokenizer
+    sum_tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum")
+    sum_model_quantized = ORTModelForSeq2SeqLM.from_pretrained(
+        "trainers/bart-base-samsum-quantized"
+    )
+    # sum_model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-xsum-12-6")
+    summarizer_pipeline = optimum_pipeline(
+        "summarization",
+        model=sum_model_quantized,
+        tokenizer=sum_tokenizer,
+        batch_size=64,
+    )
+    # Define the zero-shot classifier
+    zs_tokenizer = AutoTokenizer.from_pretrained(
+        "sileod/deberta-v3-base-tasksource-nli"
+    )
+    # Quickfix for the tokenizer
+    # zs_tokenizer.model_input_names = ["input_ids", "attention_mask"]
+    zs_model = AutoModelForSequenceClassification.from_pretrained(
+        "sileod/deberta-v3-base-tasksource-nli"
+    )
+    zs_classifier = pipeline(
+        "zero-shot-classification",
+        model=zs_model,
+        tokenizer=zs_tokenizer,
+        batch_size=64,
+        hypothesis_template="This book is {}.",
+        multi_label=True,
+    )
+    # Summarize the descriptions
+    summaries = [
+        summarizer_pipeline(description[0:1024])
+        if (description != None)
+        else [{"summary_text": "Null"}]
+        for description in descriptions
+    ]
+    # Predict the level of the book
+    candidate_labels = [
+        "Introductory",
+        "Advanced",
+        "Academic",
+        "Not Academic",
+        "Manual",
+    ]
+    # Get the predicted labels
+    classes = [zs_classifier(doc, candidate_labels) for doc in combined_data]
+    # Calculate the elapsed time
+    end_time = time.time()
+    runtime = f"{end_time - start_time:.2f} seconds"
+    # Calculate the similarity between the books
+    if similarity:
+        from sentence_transformers import util
+        sentence_transformer = SentenceTransformer("all-MiniLM-L6-v2")
+        book_embeddings = sentence_transformer.encode(
+            combined_data, convert_to_tensor=True
+        )
+        similar_books = []
+        for i in range(len(titles)):
+            current_embedding = book_embeddings[i]
+            similarity_sorted = util.semantic_search(
+                current_embedding, book_embeddings, top_k=20
+            )
+            similar_books.append(
+                {
+                    "sorted_by_similarity": similarity_sorted[0][1:],
+                }
+            )
+    # Create a list of dictionaries to store the results
+    results = []
+    for i in range(len(titles)):
+        results.append(
+            {
+                "id": i,
+                "title": titles[i],
+                "author": authors[i],
+                "publisher": publishers[i],
+                "image_link": images[i],
+                "labels": classes[i]["labels"][0:2],
+                "label_confidences": classes[i]["scores"][0:2],
+                "summary": summaries[i][0]["summary_text"],
+                "similar_books": similar_books[i]["sorted_by_similarity"],
+                "runtime": runtime,
+            }
+        )
+    return results