Spaces:
Sleeping
Sleeping
File size: 6,888 Bytes
2649124 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# task = task_generation(sitemap)
from openai import OpenAI
from datasets import load_dataset
import json_repair
class DataPopulation:
def __init__(self, api_key):
# Set the API key during initialization
self.client = OpenAI(api_key=api_key)
self.conversation = [
{
"role": "system",
"content": (
"You are an intelligent assistant specialized in web page management tasks. "
"Your responsibilities include identifying relevant pages, updating page details, user data, and the sitemap as required."
)
}
]
self.feature_update_conversation = [
{
"role": "system",
"content": (
"You are an intelligent assistant specialized in web page management tasks. "
"Your responsibilities is to identify which type of actions (select vs text_input) does each feature represents."
)
}
]
def fetch_huggingface_dataset(self, dataset_name):
"""Fetch the dataset from Hugging Face."""
return load_dataset(dataset_name)
def gpt4_chat(self, conversation):
"""Send a chat request to GPT-4."""
response = self.client.chat.completions.create(
model="gpt-4",
messages=conversation,
max_tokens=1000, # Adjusted max_tokens if needed
temperature=1.0,
)
return response.choices[0].message.content.strip()
def ask_for_relevant_pages(self, task, sitemap):
"""Identify relevant pages for the task from the sitemap."""
self.conversation.append({
"role": "user",
"content": (
f"Given the task: '{task}' and the sitemap:\n{sitemap}\n\n"
"Identify the page(s) relevant to this task. "
"Return the page names exactly as they appear in the sitemap, in JSON format. "
"For each relevant page, provide a brief explanation of its relevance. "
"Example response:\n{{\n 'Ride History': 'Displays previous ride data needed for the task.'\n}}"
)
})
response_content = self.gpt4_chat(self.conversation)
return response_content
def _update_user_data(self, task, relevant_page_details):
"""Populate the relevant user data for the task."""
self.conversation.append({
"role": "user",
"content": (
f"Given the task: '{task}' and the following page details:\n{relevant_page_details}\n\n"
"Instructions:\n"
"1. Ensure each page contains the following keys: 'page_name', 'features', and 'user_data' (even if empty).\n"
"2. Update 'user_data' with essential information relevant to the task. Do not modify the 'features' section.\n"
"3. Maintain the exact input structure, making changes only to 'user_data' where required.\n"
"4. Confirm that both 'features' and 'user_data' are dictionaries.\n"
"5. Verify that every page includes exactly the three required keys: 'page_name', 'features', and 'user_data'."
"6. Make sure 'user_data' must exist as a key! "
)
})
response_content = self.gpt4_chat(self.conversation)
return response_content
def ask_to_update_sitemap(self, sitemap, new_page):
"""Update the sitemap with the new page and adjust links."""
self.conversation.append({
"role": "user",
"content": (
f"Given the current sitemap:\n{sitemap}\nand a new page: '{new_page}' (currently only has a 'uid'), update the sitemap by:\n"
"- Adding necessary details to '{new_page}', including 'links_to'.\n"
"- Updating 'links_to' in other relevant pages to include '{new_page}'.\n"
"Ensure the output retains the exact structure of the input."
)
})
response_sitemap = self.gpt4_chat(self.conversation)
return json_repair.loads(response_sitemap)
def ask_to_update_user_state(self, task, user_state):
"""Update the user state based on the task."""
self.conversation.append({
"role": "user",
"content": (
f"Given the task: '{task}' and the current user state:\n{user_state}\n\n"
"Update the user state to reflect any changes necessary for completing the task. "
"Only modify values that are required; maintain the exact structure of the input."
)
})
response_content = self.gpt4_chat(self.conversation)
return json_repair.loads(response_content)
@staticmethod
def extract_uid_from_sitemap(sitemap, relevant_pages):
"""Extract UIDs for the relevant pages from the sitemap."""
uid = []
for page in relevant_pages:
try:
uid.append(sitemap['pages'][page]['uid'])
except KeyError:
print(f"Page name '{page}' not found in the sitemap.")
return uid
def process_data(self, task, hugging_face_url):
"""Process the task with the given dataset."""
dataset = self.fetch_huggingface_dataset(hugging_face_url)
# Extract the sitemap, page details, and user state from the dataset
sitemap = eval(dataset['train'][0]['value'])
page_details = eval(dataset['train'][1]['value'])
user_state = eval(dataset['train'][2]['value'])
# Step 1: Identify relevant pages
relevant_pages = self.ask_for_relevant_pages(task, sitemap)
self.conversation.append({"role": "assistant", "content": relevant_pages})
relevant_pages = json_repair.loads(relevant_pages)
target_page_names = relevant_pages.keys()
# Step 2: Extract UIDs for the relevant pages
page_uid = self.extract_uid_from_sitemap(sitemap, target_page_names)
# Step 3: Retrieve page details using the UIDs
relevant_page_details = {
uid: page_details[uid] for uid in page_uid if uid in page_details
}
# Step 4: Populate user data for the task (only for relevant pages)
updated_user_data = self._update_user_data(task, relevant_page_details)
self.conversation.append({"role": "assistant", "content": updated_user_data})
updated_user_data = json_repair.loads(updated_user_data)
for uid, page_data in updated_user_data.items():
page_details[uid]['user_data'] = page_data['user_data']
# Step 5: Update user state
updated_user_state = self.ask_to_update_user_state(task, user_state)
# Return the updated structures
return sitemap, page_details, updated_user_state
|