File size: 5,857 Bytes
2649124
 
 
 
 
 
 
 
 
5f90409
2649124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76df96c
7224d3a
 
718b316
2649124
 
 
 
 
 
 
 
 
 
76df96c
 
718b316
2649124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# task = task_generation(sitemap)
from openai import OpenAI
from datasets import load_dataset
import json_repair


class DataPopulation:
    def __init__(self, api_key):
        # Set the API key during initialization
        self.client = OpenAI(api_key=api_key)
        self.conversation = [
            {
                "role": "system",
                "content": (
                    "You are an intelligent assistant specialized in web page management tasks. "
                    "Your responsibilities include identifying relevant pages, updating page details, user data, and the sitemap as required."
                )
            }
        ]
        self.feature_update_conversation = [
            {
                "role": "system",
                "content": (
                    "You are an intelligent assistant specialized in web page management tasks. "
                    "Your responsibilities is to identify which type of actions (select vs text_input) does each feature represents."
                )
            }
        ]

    def fetch_huggingface_dataset(self, dataset_name):
        """Fetch the dataset from Hugging Face."""
        return load_dataset(dataset_name)

    def gpt4_chat(self, conversation):
        """Send a chat request to GPT-4."""
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=conversation,
            max_tokens=1000,  # Adjusted max_tokens if needed
            temperature=1.0,
        )
        return response.choices[0].message.content.strip()

    def ask_for_relevant_pages(self, task, sitemap):
        """Identify relevant pages for the task from the sitemap."""
        self.conversation.append({
            "role": "user",
            "content": (
                f"Given the task: '{task}' and the sitemap:\n{sitemap}\n\n"
                "Identify the page(s) relevant to this task. "
                "Return the page names exactly as they appear in the sitemap, in JSON format. "
                "For each relevant page, provide a brief explanation of its relevance. "
                "Example response:\n{{\n  'Ride History': 'Displays previous ride data needed for the task.'\n}}"
            )
        })
        response_content = self.gpt4_chat(self.conversation)
        return response_content

    def _update_user_data(self, task, relevant_page_details):
        """Populate the relevant user data for the task."""
        self.conversation.append({
            "role": "user",
            "content": (
                f"Given the task: '{task}' and the following task-relevant page details:\n{relevant_page_details}\n\n"
                f"Update each page's 'user_data' value with essential information for task-completion."
                f"For example, if a task ask us to retrieve previous order, then we will need to populate synthetic order history in user_data."
                "Ensure output maintain the exact format and structure as input page details."
            )
        })
        response_content = self.gpt4_chat(self.conversation)
        return response_content

    def ask_to_update_user_state(self, task, user_state):
        """Update the user state based on the task."""
        self.conversation.append({
            "role": "user",
            "content": (
                f"Given the task: '{task}', default user state:\n{user_state}, and user_data in chat history.\n\n"
                "Initialize the user state values to reflect any initial status necessary for completing the task. "
                "Ensure output maintain the exact format and structure as input page details."
            )
        })
        response_content = self.gpt4_chat(self.conversation)
        return json_repair.loads(response_content)

    @staticmethod
    def extract_uid_from_sitemap(sitemap, relevant_pages):
        """Extract UIDs for the relevant pages from the sitemap."""
        uid = []
        for page in relevant_pages:
            try:
                uid.append(sitemap['pages'][page]['uid'])
            except KeyError:
                print(f"Page name '{page}' not found in the sitemap.")
        return uid

    def process_data(self, task, hugging_face_url):
        """Process the task with the given dataset."""
        dataset = self.fetch_huggingface_dataset(hugging_face_url)

        # Extract the sitemap, page details, and user state from the dataset
        sitemap = eval(dataset['train'][0]['value'])
        page_details = eval(dataset['train'][1]['value'])
        user_state = eval(dataset['train'][2]['value'])

        # Step 1: Identify relevant pages
        relevant_pages = self.ask_for_relevant_pages(task, sitemap)
        self.conversation.append({"role": "assistant", "content": relevant_pages})
        relevant_pages = json_repair.loads(relevant_pages)
        target_page_names = relevant_pages.keys()

        # Step 2: Extract UIDs for the relevant pages
        page_uid = self.extract_uid_from_sitemap(sitemap, target_page_names)

        # Step 3: Retrieve page details using the UIDs
        relevant_page_details = {
            uid: page_details[uid] for uid in page_uid if uid in page_details
        }

        # Step 4: Populate user data for the task (only for relevant pages)
        updated_user_data = self._update_user_data(task, relevant_page_details)
        self.conversation.append({"role": "assistant", "content": updated_user_data})
        updated_user_data = json_repair.loads(updated_user_data)
        for uid, page_data in updated_user_data.items():
            page_details[uid]['user_data'] = page_data['user_data']

        # Step 5: Update user state
        updated_user_state = self.ask_to_update_user_state(task, user_state)

        # Return the updated structures
        return sitemap, page_details, updated_user_state