# task = task_generation(sitemap) from openai import OpenAI from datasets import load_dataset import json_repair class DataPopulation: def __init__(self, api_key): # Set the API key during initialization self.client = OpenAI(api_key=api_key) self.conversation = [ { "role": "system", "content": ( "You are an intelligent assistant specialized in web page management tasks. " "Your responsibilities include identifying relevant pages, updating page details, user data, and the sitemap as required." ) } ] self.feature_update_conversation = [ { "role": "system", "content": ( "You are an intelligent assistant specialized in web page management tasks. " "Your responsibilities is to identify which type of actions (select vs text_input) does each feature represents." ) } ] def fetch_huggingface_dataset(self, dataset_name): """Fetch the dataset from Hugging Face.""" return load_dataset(dataset_name) def gpt4_chat(self, conversation): """Send a chat request to GPT-4.""" response = self.client.chat.completions.create( model="gpt-4", messages=conversation, max_tokens=1000, # Adjusted max_tokens if needed temperature=1.0, ) return response.choices[0].message.content.strip() def ask_for_relevant_pages(self, task, sitemap): """Identify relevant pages for the task from the sitemap.""" self.conversation.append({ "role": "user", "content": ( f"Given the task: '{task}' and the sitemap:\n{sitemap}\n\n" "Identify the page(s) relevant to this task. " "Return the page names exactly as they appear in the sitemap, in JSON format. " "For each relevant page, provide a brief explanation of its relevance. " "Example response:\n{{\n 'Ride History': 'Displays previous ride data needed for the task.'\n}}" ) }) response_content = self.gpt4_chat(self.conversation) return response_content def _update_user_data(self, task, relevant_page_details): """Populate the relevant user data for the task.""" self.conversation.append({ "role": "user", "content": ( f"Given the task: '{task}' and the following page details:\n{relevant_page_details}\n\n" "Instructions:\n" "1. Ensure each page contains the following keys: 'page_name', 'features', and 'user_data' (even if empty).\n" "2. Update 'user_data' with essential information relevant to the task. Do not modify the 'features' section.\n" "3. Maintain the exact input structure, making changes only to 'user_data' where required.\n" "4. Confirm that both 'features' and 'user_data' are dictionaries.\n" "5. Verify that every page includes exactly the three required keys: 'page_name', 'features', and 'user_data'." "6. Make sure 'user_data' must exist as a key! " ) }) response_content = self.gpt4_chat(self.conversation) return response_content def ask_to_update_sitemap(self, sitemap, new_page): """Update the sitemap with the new page and adjust links.""" self.conversation.append({ "role": "user", "content": ( f"Given the current sitemap:\n{sitemap}\nand a new page: '{new_page}' (currently only has a 'uid'), update the sitemap by:\n" "- Adding necessary details to '{new_page}', including 'links_to'.\n" "- Updating 'links_to' in other relevant pages to include '{new_page}'.\n" "Ensure the output retains the exact structure of the input." ) }) response_sitemap = self.gpt4_chat(self.conversation) return json_repair.loads(response_sitemap) def ask_to_update_user_state(self, task, user_state): """Update the user state based on the task.""" self.conversation.append({ "role": "user", "content": ( f"Given the task: '{task}' and the current user state:\n{user_state}\n\n" "Update the user state to reflect any changes necessary for completing the task. " "Only modify values that are required; maintain the exact structure of the input." ) }) response_content = self.gpt4_chat(self.conversation) return json_repair.loads(response_content) @staticmethod def extract_uid_from_sitemap(sitemap, relevant_pages): """Extract UIDs for the relevant pages from the sitemap.""" uid = [] for page in relevant_pages: try: uid.append(sitemap['pages'][page]['uid']) except KeyError: print(f"Page name '{page}' not found in the sitemap.") return uid def process_data(self, task, hugging_face_url): """Process the task with the given dataset.""" dataset = self.fetch_huggingface_dataset(hugging_face_url) # Extract the sitemap, page details, and user state from the dataset sitemap = eval(dataset['train'][0]['value']) page_details = eval(dataset['train'][1]['value']) user_state = eval(dataset['train'][2]['value']) # Step 1: Identify relevant pages relevant_pages = self.ask_for_relevant_pages(task, sitemap) self.conversation.append({"role": "assistant", "content": relevant_pages}) relevant_pages = json_repair.loads(relevant_pages) target_page_names = relevant_pages.keys() # Step 2: Extract UIDs for the relevant pages page_uid = self.extract_uid_from_sitemap(sitemap, target_page_names) # Step 3: Retrieve page details using the UIDs relevant_page_details = { uid: page_details[uid] for uid in page_uid if uid in page_details } # Step 4: Populate user data for the task (only for relevant pages) updated_user_data = self._update_user_data(task, relevant_page_details) self.conversation.append({"role": "assistant", "content": updated_user_data}) updated_user_data = json_repair.loads(updated_user_data) for uid, page_data in updated_user_data.items(): page_details[uid]['user_data'] = page_data['user_data'] # Step 5: Update user state updated_user_state = self.ask_to_update_user_state(task, user_state) # Return the updated structures return sitemap, page_details, updated_user_state