App_Simulator / task_specific_data_population.py
jjz5463's picture
initial commit
2649124
raw
history blame
6.89 kB
# task = task_generation(sitemap)
from openai import OpenAI
from datasets import load_dataset
import json_repair
class DataPopulation:
def __init__(self, api_key):
# Set the API key during initialization
self.client = OpenAI(api_key=api_key)
self.conversation = [
{
"role": "system",
"content": (
"You are an intelligent assistant specialized in web page management tasks. "
"Your responsibilities include identifying relevant pages, updating page details, user data, and the sitemap as required."
)
}
]
self.feature_update_conversation = [
{
"role": "system",
"content": (
"You are an intelligent assistant specialized in web page management tasks. "
"Your responsibilities is to identify which type of actions (select vs text_input) does each feature represents."
)
}
]
def fetch_huggingface_dataset(self, dataset_name):
"""Fetch the dataset from Hugging Face."""
return load_dataset(dataset_name)
def gpt4_chat(self, conversation):
"""Send a chat request to GPT-4."""
response = self.client.chat.completions.create(
model="gpt-4",
messages=conversation,
max_tokens=1000, # Adjusted max_tokens if needed
temperature=1.0,
)
return response.choices[0].message.content.strip()
def ask_for_relevant_pages(self, task, sitemap):
"""Identify relevant pages for the task from the sitemap."""
self.conversation.append({
"role": "user",
"content": (
f"Given the task: '{task}' and the sitemap:\n{sitemap}\n\n"
"Identify the page(s) relevant to this task. "
"Return the page names exactly as they appear in the sitemap, in JSON format. "
"For each relevant page, provide a brief explanation of its relevance. "
"Example response:\n{{\n 'Ride History': 'Displays previous ride data needed for the task.'\n}}"
)
})
response_content = self.gpt4_chat(self.conversation)
return response_content
def _update_user_data(self, task, relevant_page_details):
"""Populate the relevant user data for the task."""
self.conversation.append({
"role": "user",
"content": (
f"Given the task: '{task}' and the following page details:\n{relevant_page_details}\n\n"
"Instructions:\n"
"1. Ensure each page contains the following keys: 'page_name', 'features', and 'user_data' (even if empty).\n"
"2. Update 'user_data' with essential information relevant to the task. Do not modify the 'features' section.\n"
"3. Maintain the exact input structure, making changes only to 'user_data' where required.\n"
"4. Confirm that both 'features' and 'user_data' are dictionaries.\n"
"5. Verify that every page includes exactly the three required keys: 'page_name', 'features', and 'user_data'."
"6. Make sure 'user_data' must exist as a key! "
)
})
response_content = self.gpt4_chat(self.conversation)
return response_content
def ask_to_update_sitemap(self, sitemap, new_page):
"""Update the sitemap with the new page and adjust links."""
self.conversation.append({
"role": "user",
"content": (
f"Given the current sitemap:\n{sitemap}\nand a new page: '{new_page}' (currently only has a 'uid'), update the sitemap by:\n"
"- Adding necessary details to '{new_page}', including 'links_to'.\n"
"- Updating 'links_to' in other relevant pages to include '{new_page}'.\n"
"Ensure the output retains the exact structure of the input."
)
})
response_sitemap = self.gpt4_chat(self.conversation)
return json_repair.loads(response_sitemap)
def ask_to_update_user_state(self, task, user_state):
"""Update the user state based on the task."""
self.conversation.append({
"role": "user",
"content": (
f"Given the task: '{task}' and the current user state:\n{user_state}\n\n"
"Update the user state to reflect any changes necessary for completing the task. "
"Only modify values that are required; maintain the exact structure of the input."
)
})
response_content = self.gpt4_chat(self.conversation)
return json_repair.loads(response_content)
@staticmethod
def extract_uid_from_sitemap(sitemap, relevant_pages):
"""Extract UIDs for the relevant pages from the sitemap."""
uid = []
for page in relevant_pages:
try:
uid.append(sitemap['pages'][page]['uid'])
except KeyError:
print(f"Page name '{page}' not found in the sitemap.")
return uid
def process_data(self, task, hugging_face_url):
"""Process the task with the given dataset."""
dataset = self.fetch_huggingface_dataset(hugging_face_url)
# Extract the sitemap, page details, and user state from the dataset
sitemap = eval(dataset['train'][0]['value'])
page_details = eval(dataset['train'][1]['value'])
user_state = eval(dataset['train'][2]['value'])
# Step 1: Identify relevant pages
relevant_pages = self.ask_for_relevant_pages(task, sitemap)
self.conversation.append({"role": "assistant", "content": relevant_pages})
relevant_pages = json_repair.loads(relevant_pages)
target_page_names = relevant_pages.keys()
# Step 2: Extract UIDs for the relevant pages
page_uid = self.extract_uid_from_sitemap(sitemap, target_page_names)
# Step 3: Retrieve page details using the UIDs
relevant_page_details = {
uid: page_details[uid] for uid in page_uid if uid in page_details
}
# Step 4: Populate user data for the task (only for relevant pages)
updated_user_data = self._update_user_data(task, relevant_page_details)
self.conversation.append({"role": "assistant", "content": updated_user_data})
updated_user_data = json_repair.loads(updated_user_data)
for uid, page_data in updated_user_data.items():
page_details[uid]['user_data'] = page_data['user_data']
# Step 5: Update user state
updated_user_state = self.ask_to_update_user_state(task, user_state)
# Return the updated structures
return sitemap, page_details, updated_user_state