kaikaidai commited on
Commit
7d0e577
·
verified ·
1 Parent(s): 6e812c0

Create random_sample_generation.py

Browse files
Files changed (1) hide show
  1. random_sample_generation.py +183 -0
random_sample_generation.py ADDED
@@ -0,0 +1,183 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openai import OpenAI
2
+ import anthropic
3
+ import json
4
+ import re
5
+ import random
6
+ import os
7
+ from gen_api_answer import get_openai_response, get_anthropic_response
8
+
9
+ # Initialize clients
10
+ anthropic_client = anthropic.Anthropic()
11
+ openai_client = OpenAI()
12
+
13
+ GOOD_SYSTEM_PROMPT = """You are an assistant that generates random conversations between a human and an AI assistant for testing purposes. The AI response generated should be a few sentences long. Format your output as JSON: {"human": "<human message>", "ai": <AI assistant response>}. Ensure the output is valid JSON, without additional formatting or explanations."""
14
+ BAD_SYSTEM_PROMPT = """You are an assistant that generates random conversations between a human and an AI assistant for testing purposes. The response should contain incorrect information, logical fallacies, or misleading explanations. It should sound plausible but be fundamentally wrong. The AI response generated should be a few sentences long. Format your output as JSON: {"human": "<human message>", "ai": <AI assistant response>}. Ensure the output is valid JSON, without additional formatting or explanations."""
15
+ AMBIGUOUS_SYSTEM_PROMPT = """You are an assistant that generates random conversations between a human and an AI assistant for testing purposes. The response should mix correct and incorrect information - it should contain some accurate points but also include nuanced, questionable claims or exaggerations. The AI response generated should be a few sentences long. Format your output as JSON: {"human": "<human message>", "ai": <AI assistant response>}. Ensure the output is valid JSON, without additional formatting or explanations."""
16
+
17
+ GOOD_SYSTEM_PROMPT_WITH_GROUND_TRUTH = """You are an assistant that generates random conversations between a human and an AI assistant for testing purposes, along with an ideal reference answer. The AI response generated should be a few sentences long and contain accurate information. The ground truth response should be a perfect, comprehensive answer that would score 5/5. Format your output as JSON: {"human": "<human message>", "ai": "<AI assistant response>", "ground_truth": "<perfect reference answer>"}. Ensure the output is valid JSON, without additional formatting or explanations."""
18
+ BAD_SYSTEM_PROMPT_WITH_GROUND_TRUTH = """You are an assistant that generates random conversations between a human and an AI assistant for testing purposes, along with an ideal reference answer. The AI response should be a few sentences long and contain incorrect information, logical fallacies, or misleading explanations. It should sound plausible but be fundamentally wrong. The ground truth response should be a perfect, comprehensive answer that would score 5/5. Format your output as JSON: {"human": "<human message>", "ai": "<AI assistant response>", "ground_truth": "<perfect reference answer>"}. Ensure the output is valid JSON, without additional formatting or explanations."""
19
+ AMBIGUOUS_SYSTEM_PROMPT_WITH_GROUND_TRUTH = """You are an assistant that generates random conversations between a human and an AI assistant for testing purposes, along with an ideal reference answer. The AI response should be a few sentences long and mix correct and incorrect information - it should contain some accurate points but also include nuanced, questionable claims or exaggerations. The ground truth response should be a perfect, comprehensive answer that would score 5/5. Format your output as JSON: {"human": "<human message>", "ai": "<AI assistant response>", "ground_truth": "<perfect reference answer>"}. Ensure the output is valid JSON, without additional formatting or explanations."""
20
+
21
+ GENERATION_PROMPT = """Please generate a random human message and an AI response in the format of a QA dataset. The human input should not be a one-word answer question like "What is the capital of France?". The AI response generated should be a few sentences long."""
22
+ GENERATION_PROMPT_WITH_GROUND_TRUTH = """Please generate:
23
+ 1. A random human message (not a simple one-word answer question)
24
+ 2. An AI response (a few sentences long)
25
+ 3. A perfect reference answer that would score 5/5 on all criteria (e.g., concise, helpful, and accurate)
26
+
27
+ Format as JSON with "human", "ai", and "ground_truth" fields."""
28
+
29
+ RESPONSE_GENERATION_SYSTEM_PROMPT = "You are an assistant that generates random responses to human messages for testing purposes. Generate bad responses (with a mix of correct and incorrect information) 60% of the time and good responses 40% of the time. Do not say which type of response you are generating, just generate the response."
30
+
31
+ def get_random_human_ai_pair():
32
+ # Select system prompt with specified probabilities
33
+ system_prompt = random.choices(
34
+ [GOOD_SYSTEM_PROMPT, BAD_SYSTEM_PROMPT, AMBIGUOUS_SYSTEM_PROMPT],
35
+ weights=[0.2, 0.2, 0.6] # 20% good, 20% bad, 60% ambiguous
36
+ )[0]
37
+
38
+ # Log which type of response is being generated
39
+ prompt_type = {
40
+ GOOD_SYSTEM_PROMPT: "good",
41
+ BAD_SYSTEM_PROMPT: "bad",
42
+ AMBIGUOUS_SYSTEM_PROMPT: "ambiguous"
43
+ }[system_prompt]
44
+ print(f"Generating {prompt_type} response")
45
+
46
+ # Randomly choose between GPT-3.5 and Claude with 65%/35% weights
47
+ model_choice = random.choices([
48
+ ("gpt-3.5-turbo", get_openai_response),
49
+ ("claude-3-5-haiku-latest", get_anthropic_response)
50
+ ], weights=[0.5, 0.5])[0]
51
+ model_name, api_func = model_choice
52
+
53
+ # Generate response using selected model
54
+ response = api_func(
55
+ model_name=model_name,
56
+ prompt=GENERATION_PROMPT,
57
+ system_prompt=system_prompt,
58
+ max_tokens=500,
59
+ temperature=1
60
+ )
61
+
62
+ # Define default messages
63
+ default_human = "How do muscles grow?"
64
+ default_ai = """Muscles grow through a process called skeletal muscle hypertrophy, which adds more myosin filaments to each muscle fiber, making the engine of the cell bigger and stronger over time. This is achieved through increased muscle tension and physical stress, breaking down muscle fiber. Muscle growth is also a direct consequence of resistance training and nutrition. People build muscle at different rates depending on their age, sex, and genetics, but muscle development significantly increases if exercise is done correctly and the body stores more protein through a process called protein synthesis."""
65
+
66
+ try:
67
+ # Clean the response by replacing newlines with spaces
68
+ cleaned_response = response.replace('\n', ' ').replace('\r', '')
69
+ data = json.loads(cleaned_response)
70
+
71
+ # Extract messages with fallbacks
72
+ human_message = data.get("human", default_human)
73
+ ai_message = data.get("ai", default_ai)
74
+
75
+ # Debug logging
76
+ print(f"Parsed response: human='{human_message}', ai='{ai_message[:50]}...'")
77
+
78
+ except Exception as e:
79
+ print(f"Failed to parse response: {str(e)}\n {response}")
80
+ human_message = default_human
81
+ ai_message = default_ai
82
+
83
+ return human_message, ai_message
84
+
85
+ def get_random_human_ai_ground_truth_pair():
86
+ # Select system prompt with specified probabilities
87
+ system_prompts = {
88
+ "good": GOOD_SYSTEM_PROMPT_WITH_GROUND_TRUTH,
89
+ "bad": BAD_SYSTEM_PROMPT_WITH_GROUND_TRUTH,
90
+ "ambiguous": AMBIGUOUS_SYSTEM_PROMPT_WITH_GROUND_TRUTH
91
+ }
92
+
93
+ prompt_type = random.choices(
94
+ ["good", "bad", "ambiguous"],
95
+ weights=[0.2, 0.2, 0.6] # 20% good, 20% bad, 60% ambiguous
96
+ )[0]
97
+
98
+ system_prompt = system_prompts[prompt_type]
99
+ print(f"Generating {prompt_type} response with ground truth")
100
+
101
+ # Randomly choose between GPT-3.5 and Claude with 50/50 weights
102
+ model_choice = random.choices([
103
+ ("gpt-3.5-turbo", get_openai_response),
104
+ ("claude-3-5-haiku-latest", get_anthropic_response)
105
+ ], weights=[0.5, 0.5])[0]
106
+ model_name, api_func = model_choice
107
+
108
+ # Define default messages
109
+ defaults = {
110
+ "human": "How do muscles grow?",
111
+ "ai": """Muscles grow through a process called skeletal muscle hypertrophy, which adds more myosin filaments to each muscle fiber, making the engine of the cell bigger and stronger over time. This is achieved through increased muscle tension and physical stress, breaking down muscle fiber. Muscle growth is also a direct consequence of resistance training and nutrition. People build muscle at different rates depending on their age, sex, and genetics, but muscle development significantly increases if exercise is done correctly and the body stores more protein through a process called protein synthesis.""",
112
+ "ground_truth": """Muscle growth (hypertrophy) occurs through a complex biological process involving several key mechanisms:
113
+
114
+ 1. Mechanical Tension: Resistance training creates mechanical tension in muscle fibers, triggering molecular and cellular responses that promote growth.
115
+
116
+ 2. Metabolic Stress: The depletion of energy resources and accumulation of metabolic byproducts during exercise contributes to muscle growth signaling.
117
+
118
+ 3. Muscle Damage: Exercise-induced micro-damage to muscle fibers activates satellite cells, which help repair and build new muscle tissue.
119
+
120
+ 4. Protein Synthesis: After exercise, increased protein synthesis rates exceed protein breakdown, leading to net muscle protein accretion.
121
+
122
+ 5. Hormonal Response: Exercise triggers the release of growth-promoting hormones like testosterone, growth hormone, and IGF-1.
123
+
124
+ 6. Recovery: Adequate rest between training sessions allows for repair and growth, supported by proper nutrition, particularly protein intake (1.6-2.2g/kg/day).
125
+
126
+ This process is influenced by factors including genetics, age, sex, nutrition, sleep quality, and training variables. Optimal muscle growth requires a structured resistance training program, adequate protein intake, sufficient calories, and proper recovery."""
127
+ }
128
+
129
+ # Generate response using selected model
130
+ response = api_func(
131
+ model_name=model_name,
132
+ prompt=GENERATION_PROMPT_WITH_GROUND_TRUTH,
133
+ system_prompt=system_prompt,
134
+ max_tokens=1000, # Increased token limit to accommodate ground truth
135
+ temperature=1
136
+ )
137
+
138
+ # Parse the response to get all three components
139
+ try:
140
+ # Clean the response by replacing newlines with spaces
141
+ cleaned_response = response.replace('\n', ' ').replace('\r', '')
142
+ data = json.loads(cleaned_response)
143
+
144
+ # Extract messages with fallbacks
145
+ human_message = data.get("human", defaults["human"])
146
+ ai_message = data.get("ai", defaults["ai"])
147
+ ground_truth = data.get("ground_truth", defaults["ground_truth"])
148
+
149
+ # Debug logging
150
+ print(f"Parsed response: human='{human_message}', ai='{ai_message[:50]}...', ground_truth='{ground_truth[:50]}...'")
151
+
152
+ except Exception as e:
153
+ print(f"Failed to parse response: {str(e)}\n {response}")
154
+ human_message = defaults["human"]
155
+ ai_message = defaults["ai"]
156
+ ground_truth = defaults["ground_truth"]
157
+
158
+ return human_message, ai_message, ground_truth
159
+
160
+ def generate_ai_response(human_msg):
161
+ """Generate AI response using GPT-3.5-turbo"""
162
+ if not human_msg.strip():
163
+ return "", False
164
+
165
+ try:
166
+ response = get_openai_response(
167
+ "gpt-3.5-turbo",
168
+ human_msg,
169
+ system_prompt=RESPONSE_GENERATION_SYSTEM_PROMPT,
170
+ max_tokens=1000,
171
+ temperature=1
172
+ )
173
+ # Extract just the response content since we don't need JSON format here
174
+ if isinstance(response, str):
175
+ # Clean up any JSON formatting if present
176
+ try:
177
+ data = json.loads(response)
178
+ response = data.get("content", response)
179
+ except json.JSONDecodeError:
180
+ pass
181
+ return response, False # Return response and button interactive state
182
+ except Exception as e:
183
+ return f"Error generating response: {str(e)}", False