llm-arch / src /datatypes.py
alfraser's picture
Reviewed comments and type hints
d50e68d
raw
history blame
7.08 kB
"""
This file contains all the code to load the business/private datatypes from the database and interact with them.
i.e. the product categories, products, reviews etc.... including utility methods to find them, render them etc...
It is all implemented as a very simple read-only Object-Relational mapping type layer to grant access to read and
use the data in the database. DB updates are all done by offline processes and the data setup.
"""
from __future__ import annotations # For referencing own class
import sqlite3
from typing import List, Dict
from src.common import *
class DataLoader:
"""
Class which controls the loading of all the data in the system from the database
"""
active_db = "02_baseline_products_dataset"
db_dir = os.path.join(data_dir, 'sqlite')
db_file = os.path.join(db_dir, f"{active_db}.db")
loaded = False
@classmethod
def set_db_name(cls, name: str):
"""
Programatically switch the database in use.
"""
if name != cls.active_db:
new_file = os.path.join(data_dir, 'sqlite', f"{name}.db")
cls.db_file = os.path.join(DataLoader.db_dir, f"{name}.db")
DataLoader.load_data(reload=True)
cls.active_db = name
@staticmethod
def current_db() -> str:
"""
Get the name of the current database in use
"""
return DataLoader.active_db[:-3]
@staticmethod
def available_dbs() -> List[str]:
"""
Get the names of all the available databases in the sqlite folder
"""
return [f[:-3] for f in os.listdir(DataLoader.db_dir) if ('products' in f) and f.endswith('.db')]
@staticmethod
def load_data(reload=False):
"""
Load (or reload) all the data classes across the system from the database. Fills out all the
class variables holding the list of all of each type, and then completes also the cross-references
between objects so they are aware of each other (e.g. putting reviews into products and linking the reviews
to their products)
"""
if DataLoader.loaded and not reload:
return
# Wipe out any prior data
Review.all = {}
Feature.all = {}
Product.all = {}
Category.all = {}
con = sqlite3.connect(DataLoader.db_file)
cur = con.cursor()
categories = cur.execute('SELECT * FROM categories').fetchall()
for c in categories:
Category.all[c[0]] = Category(c[0], c[1])
features = cur.execute('SELECT * FROM features').fetchall()
for f in features:
feat = Feature(f[0], f[1], Category.all[f[2]])
Feature.all[f[0]] = feat
Category.all[f[2]].features.append(feat)
products = cur.execute('SELECT * FROM products').fetchall()
for p in products:
prod = Product(p[0], p[1], p[2], p[3], Category.all[p[4]])
Product.all[p[0]] = prod
Category.all[p[4]].products.append(prod)
prod_feats = cur.execute('SELECT * FROM product_features').fetchall()
for pf in prod_feats:
Product.all[pf[1]].features.append(Feature.all[pf[2]])
Feature.all[pf[2]].products.append(Product.all[pf[1]])
reviews = cur.execute('SELECT * FROM reviews').fetchall()
for r in reviews:
rev = Review(r[0], r[2], r[3], Product.all[r[1]])
Review.all[r[0]] = rev
Product.all[r[1]].reviews.append(rev)
DataLoader.loaded = True
class Category:
"""
A category of products (e.g. "Dishwashers"). Holds also a list of Features relevant to this
category and a list of Products in this category
"""
all: Dict[str, Category] = {} # All categories by ID
@staticmethod
def all_sorted() -> List[Category]:
"""
Gets a list of all the catgories, sorted by name
"""
all_cats = list(Category.all.values())
all_cats.sort(key=lambda x: x.name)
return all_cats
@staticmethod
def by_name(name: str) -> Category:
"""
Look up a single category by name
"""
all_cats = list(Category.all.values())
for c in all_cats:
if c.name == name:
return c
def __init__(self, id, name):
self.id = id
self.name = name
self.features = []
self.products = []
@property
def feature_count(self) -> int:
return len(self.features)
@property
def product_count(self) -> int:
return len(self.products)
@property
def singular_name(self) -> str:
"""
Get the name of this category, stripping the trailing s if there is one
"""
if self.name[-1] == "s":
return self.name[:-1] # Clip the s
return self.name
@property
def lower_singular_name(self):
"""
Get the name of this ctaegory in lower case, removing any trailing s
"""
if self.name[-1] == "s":
return self.name[:-1].lower() # Clip the s
return self.name.lower()
class Feature:
"""
A feature available to Products in a given Category (e.g. "Quiet operation")
"""
all: Dict[str, Feature] = {} # All features by ID
def __init__(self, id, name, category):
self.id = id
self.name = name
self.category = category
self.products = []
@property
def product_count(self) -> int:
return len(self.products)
def __repr__(self) -> str:
return self.name
class Product:
"""
A product specification with its price, description, features, and a list of associated
customer reviews.
"""
all: Dict[str, Product] = {} # All products by ID
def __init__(self, id, name, description, price, category):
self.id = id
self.name = name
self.description = description
self.price = round(price, 2)
self.category = category
self.features = []
self.reviews = []
@property
def feature_count(self) -> int:
return len(self.features)
@property
def review_count(self) -> int:
return len(self.reviews)
@property
def average_rating(self, decimals=2) -> float:
if self.review_count == 0:
return 0.0
return float(round(sum([r.rating for r in self.reviews]) / self.review_count, decimals))
@staticmethod
def for_ids(ids: List[str]) -> List[Product]:
"""
Get a set of products based on a set of IDs
"""
return[Product.all[i] for i in ids]
@staticmethod
def all_as_list() -> List[Product]:
return list(Product.all.values())
class Review:
"""
A single customer review with its star rating and the review text
"""
all: Dict[str, Review] = {}
def __init__(self, id, rating, review_text, product):
self.id = id
self.rating = rating
self.review_text = review_text
self.product = product