alfraser commited on
Commit
b5d446f
·
1 Parent(s): 0ff95ad

Updated the dataloader to ignore case on product features. Note have deliberately not harmonised product feature capitalisation or similar features as this "messiness" is representative of real organisational data.

Browse files
Files changed (1) hide show
  1. src/data_synthesis/data_loader.py +9 -1
src/data_synthesis/data_loader.py CHANGED
@@ -17,6 +17,9 @@ def db_file() -> str:
17
 
18
 
19
  def setup_db_tables() -> None:
 
 
 
20
  con = sqlite3.connect(db_file())
21
  tables = ['reviews', 'product_features', 'features', 'products', 'categories']
22
  for t in tables:
@@ -42,11 +45,16 @@ def setup_db_tables() -> None:
42
  sql = "CREATE TABLE reviews (id INTEGER PRIMARY KEY AUTOINCREMENT, product_id INTEGER NOT NULL, rating INTEGER NOT NULL, review_text TEXT NOT NULL, FOREIGN KEY (product_id) REFERENCES products (id))"
43
  con.execute(sql)
44
 
 
45
  def insert_data() -> None:
 
 
 
46
  con = sqlite3.connect(db_file())
47
  cur = con.cursor()
48
 
49
  cats_and_features = get_categories_and_features()
 
50
  for cat, features in cats_and_features.items():
51
  sql = f"INSERT INTO categories('name') VALUES ('{cat}')"
52
  cat_id = con.execute(sql).lastrowid
@@ -64,7 +72,7 @@ def insert_data() -> None:
64
  con.commit()
65
 
66
  for feat in prod.features:
67
- sql = f"SELECT id from features WHERE name='{feat}' AND category_id={cat_id}"
68
  cur.execute(sql)
69
  rows = cur.fetchall()
70
  if len(rows) == 0:
 
17
 
18
 
19
  def setup_db_tables() -> None:
20
+ """
21
+ Drop all the tables in the database and then re-build the structure empty
22
+ """
23
  con = sqlite3.connect(db_file())
24
  tables = ['reviews', 'product_features', 'features', 'products', 'categories']
25
  for t in tables:
 
45
  sql = "CREATE TABLE reviews (id INTEGER PRIMARY KEY AUTOINCREMENT, product_id INTEGER NOT NULL, rating INTEGER NOT NULL, review_text TEXT NOT NULL, FOREIGN KEY (product_id) REFERENCES products (id))"
46
  con.execute(sql)
47
 
48
+
49
  def insert_data() -> None:
50
+ """
51
+ Insert the data from the json data files into the data structure
52
+ """
53
  con = sqlite3.connect(db_file())
54
  cur = con.cursor()
55
 
56
  cats_and_features = get_categories_and_features()
57
+
58
  for cat, features in cats_and_features.items():
59
  sql = f"INSERT INTO categories('name') VALUES ('{cat}')"
60
  cat_id = con.execute(sql).lastrowid
 
72
  con.commit()
73
 
74
  for feat in prod.features:
75
+ sql = f"SELECT id from features WHERE lower(name)='{feat.lower()}' AND category_id={cat_id}"
76
  cur.execute(sql)
77
  rows = cur.fetchall()
78
  if len(rows) == 0: