Spaces:
Sleeping
Sleeping
File size: 5,857 Bytes
2649124 5f90409 2649124 76df96c 7224d3a 718b316 2649124 76df96c 718b316 2649124 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# task = task_generation(sitemap)
from openai import OpenAI
from datasets import load_dataset
import json_repair
class DataPopulation:
def __init__(self, api_key):
# Set the API key during initialization
self.client = OpenAI(api_key=api_key)
self.conversation = [
{
"role": "system",
"content": (
"You are an intelligent assistant specialized in web page management tasks. "
"Your responsibilities include identifying relevant pages, updating page details, user data, and the sitemap as required."
)
}
]
self.feature_update_conversation = [
{
"role": "system",
"content": (
"You are an intelligent assistant specialized in web page management tasks. "
"Your responsibilities is to identify which type of actions (select vs text_input) does each feature represents."
)
}
]
def fetch_huggingface_dataset(self, dataset_name):
"""Fetch the dataset from Hugging Face."""
return load_dataset(dataset_name)
def gpt4_chat(self, conversation):
"""Send a chat request to GPT-4."""
response = self.client.chat.completions.create(
model="gpt-4",
messages=conversation,
max_tokens=1000, # Adjusted max_tokens if needed
temperature=1.0,
)
return response.choices[0].message.content.strip()
def ask_for_relevant_pages(self, task, sitemap):
"""Identify relevant pages for the task from the sitemap."""
self.conversation.append({
"role": "user",
"content": (
f"Given the task: '{task}' and the sitemap:\n{sitemap}\n\n"
"Identify the page(s) relevant to this task. "
"Return the page names exactly as they appear in the sitemap, in JSON format. "
"For each relevant page, provide a brief explanation of its relevance. "
"Example response:\n{{\n 'Ride History': 'Displays previous ride data needed for the task.'\n}}"
)
})
response_content = self.gpt4_chat(self.conversation)
return response_content
def _update_user_data(self, task, relevant_page_details):
"""Populate the relevant user data for the task."""
self.conversation.append({
"role": "user",
"content": (
f"Given the task: '{task}' and the following task-relevant page details:\n{relevant_page_details}\n\n"
f"Update each page's 'user_data' value with essential information for task-completion."
f"For example, if a task ask us to retrieve previous order, then we will need to populate synthetic order history in user_data."
"Ensure output maintain the exact format and structure as input page details."
)
})
response_content = self.gpt4_chat(self.conversation)
return response_content
def ask_to_update_user_state(self, task, user_state):
"""Update the user state based on the task."""
self.conversation.append({
"role": "user",
"content": (
f"Given the task: '{task}', default user state:\n{user_state}, and user_data in chat history.\n\n"
"Initialize the user state values to reflect any initial status necessary for completing the task. "
"Ensure output maintain the exact format and structure as input page details."
)
})
response_content = self.gpt4_chat(self.conversation)
return json_repair.loads(response_content)
@staticmethod
def extract_uid_from_sitemap(sitemap, relevant_pages):
"""Extract UIDs for the relevant pages from the sitemap."""
uid = []
for page in relevant_pages:
try:
uid.append(sitemap['pages'][page]['uid'])
except KeyError:
print(f"Page name '{page}' not found in the sitemap.")
return uid
def process_data(self, task, hugging_face_url):
"""Process the task with the given dataset."""
dataset = self.fetch_huggingface_dataset(hugging_face_url)
# Extract the sitemap, page details, and user state from the dataset
sitemap = eval(dataset['train'][0]['value'])
page_details = eval(dataset['train'][1]['value'])
user_state = eval(dataset['train'][2]['value'])
# Step 1: Identify relevant pages
relevant_pages = self.ask_for_relevant_pages(task, sitemap)
self.conversation.append({"role": "assistant", "content": relevant_pages})
relevant_pages = json_repair.loads(relevant_pages)
target_page_names = relevant_pages.keys()
# Step 2: Extract UIDs for the relevant pages
page_uid = self.extract_uid_from_sitemap(sitemap, target_page_names)
# Step 3: Retrieve page details using the UIDs
relevant_page_details = {
uid: page_details[uid] for uid in page_uid if uid in page_details
}
# Step 4: Populate user data for the task (only for relevant pages)
updated_user_data = self._update_user_data(task, relevant_page_details)
self.conversation.append({"role": "assistant", "content": updated_user_data})
updated_user_data = json_repair.loads(updated_user_data)
for uid, page_data in updated_user_data.items():
page_details[uid]['user_data'] = page_data['user_data']
# Step 5: Update user state
updated_user_state = self.ask_to_update_user_state(task, user_state)
# Return the updated structures
return sitemap, page_details, updated_user_state
|