Spaces:
Runtime error
Runtime error
Updated the dataloader to ignore case on product features. Note have deliberately not harmonised product feature capitalisation or similar features as this "messiness" is representative of real organisational data.
Browse files
src/data_synthesis/data_loader.py
CHANGED
@@ -17,6 +17,9 @@ def db_file() -> str:
|
|
17 |
|
18 |
|
19 |
def setup_db_tables() -> None:
|
|
|
|
|
|
|
20 |
con = sqlite3.connect(db_file())
|
21 |
tables = ['reviews', 'product_features', 'features', 'products', 'categories']
|
22 |
for t in tables:
|
@@ -42,11 +45,16 @@ def setup_db_tables() -> None:
|
|
42 |
sql = "CREATE TABLE reviews (id INTEGER PRIMARY KEY AUTOINCREMENT, product_id INTEGER NOT NULL, rating INTEGER NOT NULL, review_text TEXT NOT NULL, FOREIGN KEY (product_id) REFERENCES products (id))"
|
43 |
con.execute(sql)
|
44 |
|
|
|
45 |
def insert_data() -> None:
|
|
|
|
|
|
|
46 |
con = sqlite3.connect(db_file())
|
47 |
cur = con.cursor()
|
48 |
|
49 |
cats_and_features = get_categories_and_features()
|
|
|
50 |
for cat, features in cats_and_features.items():
|
51 |
sql = f"INSERT INTO categories('name') VALUES ('{cat}')"
|
52 |
cat_id = con.execute(sql).lastrowid
|
@@ -64,7 +72,7 @@ def insert_data() -> None:
|
|
64 |
con.commit()
|
65 |
|
66 |
for feat in prod.features:
|
67 |
-
sql = f"SELECT id from features WHERE name='{feat}' AND category_id={cat_id}"
|
68 |
cur.execute(sql)
|
69 |
rows = cur.fetchall()
|
70 |
if len(rows) == 0:
|
|
|
17 |
|
18 |
|
19 |
def setup_db_tables() -> None:
|
20 |
+
"""
|
21 |
+
Drop all the tables in the database and then re-build the structure empty
|
22 |
+
"""
|
23 |
con = sqlite3.connect(db_file())
|
24 |
tables = ['reviews', 'product_features', 'features', 'products', 'categories']
|
25 |
for t in tables:
|
|
|
45 |
sql = "CREATE TABLE reviews (id INTEGER PRIMARY KEY AUTOINCREMENT, product_id INTEGER NOT NULL, rating INTEGER NOT NULL, review_text TEXT NOT NULL, FOREIGN KEY (product_id) REFERENCES products (id))"
|
46 |
con.execute(sql)
|
47 |
|
48 |
+
|
49 |
def insert_data() -> None:
|
50 |
+
"""
|
51 |
+
Insert the data from the json data files into the data structure
|
52 |
+
"""
|
53 |
con = sqlite3.connect(db_file())
|
54 |
cur = con.cursor()
|
55 |
|
56 |
cats_and_features = get_categories_and_features()
|
57 |
+
|
58 |
for cat, features in cats_and_features.items():
|
59 |
sql = f"INSERT INTO categories('name') VALUES ('{cat}')"
|
60 |
cat_id = con.execute(sql).lastrowid
|
|
|
72 |
con.commit()
|
73 |
|
74 |
for feat in prod.features:
|
75 |
+
sql = f"SELECT id from features WHERE lower(name)='{feat.lower()}' AND category_id={cat_id}"
|
76 |
cur.execute(sql)
|
77 |
rows = cur.fetchall()
|
78 |
if len(rows) == 0:
|