from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware import os from transformers import ( AutoModelForSeq2SeqLM, AutoTokenizer, AutoModelForSequenceClassification, ) from optimum.onnxruntime import ORTModelForSeq2SeqLM, ORTModelForSequenceClassification from sentence_transformers import SentenceTransformer # Define the FastAPI app app = FastAPI(docs_url="/") # Add the CORS middleware to the app app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # Define the Google Books API key key = os.environ.get("GOOGLE_BOOKS_API_KEY") # Define summarization models summary_tokenizer_normal = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum") summary_model_normal = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum") summary_tokenizer_onnx = AutoTokenizer.from_pretrained("optimum/t5-small") summary_model_onnx = ORTModelForSeq2SeqLM.from_pretrained("optimum/t5-small") # Define classification models classification_tokenizer_normal = AutoTokenizer.from_pretrained( "sileod/deberta-v3-base-tasksource-nli" ) classification_model_normal = AutoModelForSequenceClassification.from_pretrained( "sileod/deberta-v3-base-tasksource-nli" ) classification_tokenizer_onnx = AutoTokenizer.from_pretrained( "optimum/distilbert-base-uncased-mnli" ) classification_model_onnx = ORTModelForSequenceClassification.from_pretrained( "optimum/distilbert-base-uncased-mnli" ) # Define similarity model similarity_model = SentenceTransformer("all-MiniLM-L6-v2") @app.get("/search") async def search( query: str, add_chatgpt_results: bool = False, n_results: int = 10, ): """ Get the results from the Google Books API, OpenAlex, and optionally OpenAI. """ import time import requests start_time = time.time() # Initialize the lists to store the results titles = [] authors = [] publishers = [] descriptions = [] images = [] def gbooks_search(query, n_results=30): """ Access the Google Books API and return the results. """ # Set the API endpoint and query parameters url = "https://www.googleapis.com/books/v1/volumes" params = { "q": str(query), "printType": "books", "maxResults": n_results, "key": key, } # Send a GET request to the API with the specified parameters response = requests.get(url, params=params) # Parse the response JSON and append the results data = response.json() # Initialize the lists to store the results titles = [] authors = [] publishers = [] descriptions = [] images = [] for item in data["items"]: volume_info = item["volumeInfo"] try: titles.append(f"{volume_info['title']}: {volume_info['subtitle']}") except KeyError: titles.append(volume_info["title"]) try: descriptions.append(volume_info["description"]) except KeyError: descriptions.append("Null") try: publishers.append(volume_info["publisher"]) except KeyError: publishers.append("Null") try: authors.append(volume_info["authors"][0]) except KeyError: authors.append("Null") try: images.append(volume_info["imageLinks"]["thumbnail"]) except KeyError: images.append( "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png" ) return titles, authors, publishers, descriptions, images # Run the gbooks_search function ( titles_placeholder, authors_placeholder, publishers_placeholder, descriptions_placeholder, images_placeholder, ) = gbooks_search(query, n_results=n_results) # Append the results to the lists [titles.append(title) for title in titles_placeholder] [authors.append(author) for author in authors_placeholder] [publishers.append(publisher) for publisher in publishers_placeholder] [descriptions.append(description) for description in descriptions_placeholder] [images.append(image) for image in images_placeholder] # Get the time since the start first_checkpoint = time.time() first_checkpoint_time = int(first_checkpoint - start_time) def openalex_search(query, n_results=10): """ Run a search on OpenAlex and return the results. """ import pyalex from pyalex import Works # Add email to the config pyalex.config.email = "ber2mir@gmail.com" # Define a pager object with the same query pager = Works().search(str(query)).paginate(per_page=n_results, n_max=n_results) # Generate a list of the results openalex_results = list(pager) # Initialize the lists to store the results titles = [] authors = [] publishers = [] descriptions = [] images = [] # Get the titles, descriptions, and publishers and append them to the lists try: for result in openalex_results[0]: try: titles.append(result["title"]) except KeyError: titles.append("Null") try: descriptions.append(result["abstract"]) except KeyError: descriptions.append("Null") try: publishers.append(result["host_venue"]["publisher"]) except KeyError: publishers.append("Null") try: authors.append(result["authorships"][0]["author"]["display_name"]) except KeyError: authors.append("Null") images.append( "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png" ) except IndexError: titles.append("Null") descriptions.append("Null") publishers.append("Null") authors.append("Null") images.append( "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png" ) return titles, authors, publishers, descriptions, images # Run the openalex_search function ( titles_placeholder, authors_placeholder, publishers_placeholder, descriptions_placeholder, images_placeholder, ) = openalex_search(query, n_results=n_results) # Append the results to the lists [titles.append(title) for title in titles_placeholder] [authors.append(author) for author in authors_placeholder] [publishers.append(publisher) for publisher in publishers_placeholder] [descriptions.append(description) for description in descriptions_placeholder] [images.append(image) for image in images_placeholder] # Calculate the elapsed time between the first and second checkpoints second_checkpoint = time.time() second_checkpoint_time = int(second_checkpoint - first_checkpoint) def openai_search(query, n_results=10): """ Create a query to the OpenAI ChatGPT API and return the results. """ import openai # Initialize the lists to store the results titles = [] authors = [] publishers = [] descriptions = [] images = [] # Set the OpenAI API key openai.api_key = os.environ.get("OPENAI_API_KEY") # Create ChatGPT query chatgpt_response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ { "role": "system", "content": "You are a librarian. You are helping a patron find a book.", }, { "role": "user", "content": f"Recommend me {n_results} books about {query}. Your response should be like: 'title: