Spaces:

ReallyFloppyPenguin
/

SynthGen

Running

App Files Files Community

ReallyFloppyPenguin commited on 23 days ago

Commit

74b8bd8

verified ·

1 Parent(s): b4174f8

Update synthgen.py

Browse files

Files changed (1) hide show

synthgen.py +85 -2

synthgen.py CHANGED Viewed

@@ -47,8 +47,8 @@ def generate_synthetic_text(
         "model": model,
         "messages": [
             {"role": "system", "content": system_message},
-            {"role": "user", "content": prompt},
-        ],
         "extra_headers": {
              # "HTTP-Referer": "YOUR_SITE_URL",
              "X-Title": "SynthGen",
@@ -194,6 +194,89 @@ def generate_synthetic_conversation(
     return f"Generated conversation for prompt '{system_prompt}':\n\n{conversation_text}"
 # --- Main Execution (Example Usage) ---
 if __name__ == "__main__":

         "model": model,
         "messages": [
             {"role": "system", "content": system_message},
+                {"role": "user", "content": prompt},
+            ],
         "extra_headers": {
              # "HTTP-Referer": "YOUR_SITE_URL",
              "X-Title": "SynthGen",
     return f"Generated conversation for prompt '{system_prompt}':\n\n{conversation_text}"
+# Function to generate different types of content based on a topic
+def generate_corpus_content(
+    topic: str,
+    content_type: str, # e.g., "Corpus Snippets", "Short Story", "Article"
+    length_param: int, # Meaning depends on type (e.g., num snippets, approx words)
+    model: str,
+    system_message_base: str = "You are a helpful assistant generating synthetic content.",
+    temperature: Optional[float] = 0.7,
+    top_p: Optional[float] = None,
+    max_tokens: Optional[int] = None # Use a larger default if None
+) -> str:
+    """
+    Generates different types of synthetic content based on a topic.
+    Args:
+        topic: The central topic for the content.
+        content_type: The type of content to generate.
+        length_param: A parameter controlling length/quantity (meaning depends on type).
+        model: The model ID.
+        system_message_base: Base system message (will be specialized).
+        temperature: Model temperature.
+        top_p: Model top_p.
+        max_tokens: Model max_tokens.
+    Returns:
+        The generated content string or an error message.
+    """
+    prompt = ""
+    system_message = system_message_base # Start with base
+    # --- Construct Prompt based on Content Type ---
+    if content_type == "Corpus Snippets":
+        if length_param <= 0: length_param = 5 # Default number of snippets
+        prompt = (
+            f"Generate exactly {length_param} distinct text snippets related to the topic: '{topic}'. "
+            f"Each snippet should be a few sentences long and focus on a different aspect if possible. "
+            f"Present each snippet clearly, perhaps separated by a blank line or a marker like '---'."
+        )
+        system_message = "You are an AI generating diverse text snippets for a data corpus."
+        # Adjust max_tokens based on expected number of snippets if not set
+        if max_tokens is None: max_tokens = length_param * 150 # Estimate
+    elif content_type == "Short Story":
+        if length_param <= 0: length_param = 300 # Default approx words
+        prompt = (
+            f"Write a short story (approximately {length_param} words) centered around the topic: '{topic}'. "
+            f"The story should have a clear beginning, middle, and end."
+        )
+        system_message = "You are a creative AI writing a short story."
+        if max_tokens is None: max_tokens = int(length_param * 2.5) # Estimate
+    elif content_type == "Article":
+        if length_param <= 0: length_param = 500 # Default approx words
+        prompt = (
+            f"Write an informative article (approximately {length_param} words) about the topic: '{topic}'. "
+            f"The article should be well-structured, factual (to the best of your ability), and engaging."
+        )
+        system_message = "You are an AI assistant writing an informative article."
+        if max_tokens is None: max_tokens = int(length_param * 2.5) # Estimate
+    else:
+        return f"Error: Unknown content type '{content_type}'."
+    if not prompt:
+         return "Error: Could not construct a valid prompt."
+    # --- Call the core generation function ---
+    generated_text = generate_synthetic_text(
+        prompt=prompt,
+        model=model,
+        system_message=system_message,
+        temperature=temperature,
+        top_p=top_p,
+        max_tokens=max_tokens
+    )
+    # Return the result (includes potential errors from generate_synthetic_text)
+    # Add a title for clarity
+    if not generated_text.startswith("Error:"):
+         return f"Generated {content_type} for topic '{topic}':\n\n{generated_text}"
+    else:
+         return generated_text # Propagate the error
 # --- Main Execution (Example Usage) ---
 if __name__ == "__main__":