alfraser commited on
Commit
2cb7b84
·
1 Parent(s): a1317da

Checked type hints

Browse files
src/data_synthesis/generate_data.py CHANGED
@@ -82,7 +82,7 @@ Please format the response as json in this style:
82
 
83
 
84
  @staticmethod
85
- def reviews_for_product(product: Product, k: int):
86
  prompt = f"Suggest exactly {k} reviews for this product.\nThe product is a {product.category.lower()[0:-1]} named the '{product.name}', which features {DataPrompt.format_features(product.features)}.\nFirst pick an integer star rating from 1 to 5 stars, where 1 is bad and 5 is great, for the review.\nNext write the review text of between 50 and 100 words for the review from the user. The text in the review should align to the star rating, so if the rating is 1 the review would be critical and if the rating is 5 the review would be positive.\n"
87
  prompt += """
88
  Please format the response as json in this style:
@@ -97,9 +97,10 @@ Please format the response as json in this style:
97
  return prompt
98
 
99
 
100
- def generate_products(category: str, features: List[str], k: int = 20):
101
  """
102
  Call GPT3.5 Turbo model and get it to generate some products based on a category
 
103
  """
104
  prompt = DataPrompt.products_for_category(category, features, k)
105
  response = openai.ChatCompletion.create(
@@ -214,7 +215,7 @@ def get_categories_and_features() -> Dict[str, List[str]]:
214
  return cats_and_feats
215
 
216
 
217
- def generate_all_products(target_count=40):
218
  """
219
  Generate all products for all categories, trying to reach a given target count
220
  of products.
@@ -235,7 +236,7 @@ def generate_all_products(target_count=40):
235
  print(f"Skipping {cat_name} as targetting {target_count} and already have {len(existing_products)}")
236
 
237
 
238
- def dump_products_to_csv():
239
  """
240
  Dump a csv file for debug, for every product showing category name and product name
241
  """
@@ -249,7 +250,7 @@ def dump_products_to_csv():
249
  f.write('\n'.join(cat_keys))
250
 
251
 
252
- def generate_reviews(target_count: int):
253
  """
254
  Generate reviews for each category up to a target count of reviews
255
  """
@@ -257,7 +258,7 @@ def generate_reviews(target_count: int):
257
  generate_reviews_for_category(cat, target_count)
258
 
259
 
260
- def generate_reviews_for_category(category: str, target_count: int):
261
  """
262
  Generate reviews for a specific category up to a given target number of reviews
263
  """
@@ -291,7 +292,7 @@ def generate_reviews_for_category(category: str, target_count: int):
291
  print(f'{prod.category[:-1]}: {prod.name} has {len(prod.reviews)} reviews ({target_count} requested). Skipping.')
292
 
293
 
294
- def generate_reviews_for_product(product: Product, k: int):
295
  """
296
  Generate a number of reviews from GPT3.5 for a specific product and add them to the product
297
  """
@@ -308,7 +309,7 @@ def generate_reviews_for_product(product: Product, k: int):
308
  add_reviews_to_product(output_text, product)
309
 
310
 
311
- def add_reviews_to_product(reviews_json: str, product: Product):
312
  """
313
  Load the reviews file containing this product category, append this review to the list and
314
  re-save the file
 
82
 
83
 
84
  @staticmethod
85
+ def reviews_for_product(product: Product, k: int) -> str:
86
  prompt = f"Suggest exactly {k} reviews for this product.\nThe product is a {product.category.lower()[0:-1]} named the '{product.name}', which features {DataPrompt.format_features(product.features)}.\nFirst pick an integer star rating from 1 to 5 stars, where 1 is bad and 5 is great, for the review.\nNext write the review text of between 50 and 100 words for the review from the user. The text in the review should align to the star rating, so if the rating is 1 the review would be critical and if the rating is 5 the review would be positive.\n"
87
  prompt += """
88
  Please format the response as json in this style:
 
97
  return prompt
98
 
99
 
100
+ def generate_products(category: str, features: List[str], k: int = 20) -> None:
101
  """
102
  Call GPT3.5 Turbo model and get it to generate some products based on a category
103
+ Insert those products into the category
104
  """
105
  prompt = DataPrompt.products_for_category(category, features, k)
106
  response = openai.ChatCompletion.create(
 
215
  return cats_and_feats
216
 
217
 
218
+ def generate_all_products(target_count=40) -> None:
219
  """
220
  Generate all products for all categories, trying to reach a given target count
221
  of products.
 
236
  print(f"Skipping {cat_name} as targetting {target_count} and already have {len(existing_products)}")
237
 
238
 
239
+ def dump_products_to_csv() -> None:
240
  """
241
  Dump a csv file for debug, for every product showing category name and product name
242
  """
 
250
  f.write('\n'.join(cat_keys))
251
 
252
 
253
+ def generate_reviews(target_count: int) -> None:
254
  """
255
  Generate reviews for each category up to a target count of reviews
256
  """
 
258
  generate_reviews_for_category(cat, target_count)
259
 
260
 
261
+ def generate_reviews_for_category(category: str, target_count: int) -> None:
262
  """
263
  Generate reviews for a specific category up to a given target number of reviews
264
  """
 
292
  print(f'{prod.category[:-1]}: {prod.name} has {len(prod.reviews)} reviews ({target_count} requested). Skipping.')
293
 
294
 
295
+ def generate_reviews_for_product(product: Product, k: int) -> None:
296
  """
297
  Generate a number of reviews from GPT3.5 for a specific product and add them to the product
298
  """
 
309
  add_reviews_to_product(output_text, product)
310
 
311
 
312
+ def add_reviews_to_product(reviews_json: str, product: Product) -> None:
313
  """
314
  Load the reviews file containing this product category, append this review to the list and
315
  re-save the file