Spaces:
Runtime error
Runtime error
Checked type hints
Browse files
src/data_synthesis/generate_data.py
CHANGED
@@ -82,7 +82,7 @@ Please format the response as json in this style:
|
|
82 |
|
83 |
|
84 |
@staticmethod
|
85 |
-
def reviews_for_product(product: Product, k: int):
|
86 |
prompt = f"Suggest exactly {k} reviews for this product.\nThe product is a {product.category.lower()[0:-1]} named the '{product.name}', which features {DataPrompt.format_features(product.features)}.\nFirst pick an integer star rating from 1 to 5 stars, where 1 is bad and 5 is great, for the review.\nNext write the review text of between 50 and 100 words for the review from the user. The text in the review should align to the star rating, so if the rating is 1 the review would be critical and if the rating is 5 the review would be positive.\n"
|
87 |
prompt += """
|
88 |
Please format the response as json in this style:
|
@@ -97,9 +97,10 @@ Please format the response as json in this style:
|
|
97 |
return prompt
|
98 |
|
99 |
|
100 |
-
def generate_products(category: str, features: List[str], k: int = 20):
|
101 |
"""
|
102 |
Call GPT3.5 Turbo model and get it to generate some products based on a category
|
|
|
103 |
"""
|
104 |
prompt = DataPrompt.products_for_category(category, features, k)
|
105 |
response = openai.ChatCompletion.create(
|
@@ -214,7 +215,7 @@ def get_categories_and_features() -> Dict[str, List[str]]:
|
|
214 |
return cats_and_feats
|
215 |
|
216 |
|
217 |
-
def generate_all_products(target_count=40):
|
218 |
"""
|
219 |
Generate all products for all categories, trying to reach a given target count
|
220 |
of products.
|
@@ -235,7 +236,7 @@ def generate_all_products(target_count=40):
|
|
235 |
print(f"Skipping {cat_name} as targetting {target_count} and already have {len(existing_products)}")
|
236 |
|
237 |
|
238 |
-
def dump_products_to_csv():
|
239 |
"""
|
240 |
Dump a csv file for debug, for every product showing category name and product name
|
241 |
"""
|
@@ -249,7 +250,7 @@ def dump_products_to_csv():
|
|
249 |
f.write('\n'.join(cat_keys))
|
250 |
|
251 |
|
252 |
-
def generate_reviews(target_count: int):
|
253 |
"""
|
254 |
Generate reviews for each category up to a target count of reviews
|
255 |
"""
|
@@ -257,7 +258,7 @@ def generate_reviews(target_count: int):
|
|
257 |
generate_reviews_for_category(cat, target_count)
|
258 |
|
259 |
|
260 |
-
def generate_reviews_for_category(category: str, target_count: int):
|
261 |
"""
|
262 |
Generate reviews for a specific category up to a given target number of reviews
|
263 |
"""
|
@@ -291,7 +292,7 @@ def generate_reviews_for_category(category: str, target_count: int):
|
|
291 |
print(f'{prod.category[:-1]}: {prod.name} has {len(prod.reviews)} reviews ({target_count} requested). Skipping.')
|
292 |
|
293 |
|
294 |
-
def generate_reviews_for_product(product: Product, k: int):
|
295 |
"""
|
296 |
Generate a number of reviews from GPT3.5 for a specific product and add them to the product
|
297 |
"""
|
@@ -308,7 +309,7 @@ def generate_reviews_for_product(product: Product, k: int):
|
|
308 |
add_reviews_to_product(output_text, product)
|
309 |
|
310 |
|
311 |
-
def add_reviews_to_product(reviews_json: str, product: Product):
|
312 |
"""
|
313 |
Load the reviews file containing this product category, append this review to the list and
|
314 |
re-save the file
|
|
|
82 |
|
83 |
|
84 |
@staticmethod
|
85 |
+
def reviews_for_product(product: Product, k: int) -> str:
|
86 |
prompt = f"Suggest exactly {k} reviews for this product.\nThe product is a {product.category.lower()[0:-1]} named the '{product.name}', which features {DataPrompt.format_features(product.features)}.\nFirst pick an integer star rating from 1 to 5 stars, where 1 is bad and 5 is great, for the review.\nNext write the review text of between 50 and 100 words for the review from the user. The text in the review should align to the star rating, so if the rating is 1 the review would be critical and if the rating is 5 the review would be positive.\n"
|
87 |
prompt += """
|
88 |
Please format the response as json in this style:
|
|
|
97 |
return prompt
|
98 |
|
99 |
|
100 |
+
def generate_products(category: str, features: List[str], k: int = 20) -> None:
|
101 |
"""
|
102 |
Call GPT3.5 Turbo model and get it to generate some products based on a category
|
103 |
+
Insert those products into the category
|
104 |
"""
|
105 |
prompt = DataPrompt.products_for_category(category, features, k)
|
106 |
response = openai.ChatCompletion.create(
|
|
|
215 |
return cats_and_feats
|
216 |
|
217 |
|
218 |
+
def generate_all_products(target_count=40) -> None:
|
219 |
"""
|
220 |
Generate all products for all categories, trying to reach a given target count
|
221 |
of products.
|
|
|
236 |
print(f"Skipping {cat_name} as targetting {target_count} and already have {len(existing_products)}")
|
237 |
|
238 |
|
239 |
+
def dump_products_to_csv() -> None:
|
240 |
"""
|
241 |
Dump a csv file for debug, for every product showing category name and product name
|
242 |
"""
|
|
|
250 |
f.write('\n'.join(cat_keys))
|
251 |
|
252 |
|
253 |
+
def generate_reviews(target_count: int) -> None:
|
254 |
"""
|
255 |
Generate reviews for each category up to a target count of reviews
|
256 |
"""
|
|
|
258 |
generate_reviews_for_category(cat, target_count)
|
259 |
|
260 |
|
261 |
+
def generate_reviews_for_category(category: str, target_count: int) -> None:
|
262 |
"""
|
263 |
Generate reviews for a specific category up to a given target number of reviews
|
264 |
"""
|
|
|
292 |
print(f'{prod.category[:-1]}: {prod.name} has {len(prod.reviews)} reviews ({target_count} requested). Skipping.')
|
293 |
|
294 |
|
295 |
+
def generate_reviews_for_product(product: Product, k: int) -> None:
|
296 |
"""
|
297 |
Generate a number of reviews from GPT3.5 for a specific product and add them to the product
|
298 |
"""
|
|
|
309 |
add_reviews_to_product(output_text, product)
|
310 |
|
311 |
|
312 |
+
def add_reviews_to_product(reviews_json: str, product: Product) -> None:
|
313 |
"""
|
314 |
Load the reviews file containing this product category, append this review to the list and
|
315 |
re-save the file
|