alfraser commited on
Commit
2b08e8f
·
1 Parent(s): 7e353fe

Fixed less than/greater than bug where I was dropping the wrong reviews to achieve a target average review. Update the sql data set too.

Browse files
data/sqlite/02_baseline_products_dataset.db CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c614f87d479e5ffd3dab7ec185811dc59c52a27a41eed7c5788f23674c6d77fd
3
  size 17260544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec1a30e75fd7c4ff15dc46c35e250a042836159a2178cebf33e25599df70c9ca
3
  size 17260544
src/data_synthesis/select_test_data_from_all_products.py CHANGED
@@ -112,8 +112,8 @@ def avg_rating(review_ratings1: List[List[int]], review_ratings2: List[List[int]
112
 
113
  def get_review_ids_to_drop(review_ratings: List[List[int]], target_review_count: int, target_avg_rating: float) -> List[int]:
114
  ids_to_drop = []
115
- rated_lower_than_target = [r for r in review_ratings if r[1] >= target_avg_rating]
116
- rated_higher_than_target = [r for r in review_ratings if r[1] < target_avg_rating]
117
  while len(rated_higher_than_target) + len(rated_lower_than_target) > target_review_count:
118
  if avg_rating(rated_higher_than_target, rated_lower_than_target) >= target_avg_rating:
119
  if len(rated_higher_than_target) == 0:
@@ -134,6 +134,7 @@ def drop_reviews(review_ids: list[int]):
134
  else:
135
  ids_in = f'({", ".join([str(r) for r in review_ids])})'
136
  sql = f'delete from reviews where id in {ids_in}'
 
137
 
138
 
139
  def drop_reviews_to_balance_avg_rating(min_review_count: int = 5,
 
112
 
113
  def get_review_ids_to_drop(review_ratings: List[List[int]], target_review_count: int, target_avg_rating: float) -> List[int]:
114
  ids_to_drop = []
115
+ rated_lower_than_target = [r for r in review_ratings if r[1] <= target_avg_rating]
116
+ rated_higher_than_target = [r for r in review_ratings if r[1] > target_avg_rating]
117
  while len(rated_higher_than_target) + len(rated_lower_than_target) > target_review_count:
118
  if avg_rating(rated_higher_than_target, rated_lower_than_target) >= target_avg_rating:
119
  if len(rated_higher_than_target) == 0:
 
134
  else:
135
  ids_in = f'({", ".join([str(r) for r in review_ids])})'
136
  sql = f'delete from reviews where id in {ids_in}'
137
+ execute_sqls([sql])
138
 
139
 
140
  def drop_reviews_to_balance_avg_rating(min_review_count: int = 5,