File size: 6,888 Bytes
2649124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# task = task_generation(sitemap)
from openai import OpenAI
from datasets import load_dataset
import json_repair


class DataPopulation:
    def __init__(self, api_key):
        # Set the API key during initialization
        self.client = OpenAI(api_key=api_key)
        self.conversation = [
            {
                "role": "system",
                "content": (
                    "You are an intelligent assistant specialized in web page management tasks. "
                    "Your responsibilities include identifying relevant pages, updating page details, user data, and the sitemap as required."
                )
            }
        ]
        self.feature_update_conversation = [
            {
                "role": "system",
                "content": (
                    "You are an intelligent assistant specialized in web page management tasks. "
                    "Your responsibilities is to identify which type of actions (select vs text_input) does each feature represents."
                )
            }
        ]


    def fetch_huggingface_dataset(self, dataset_name):
        """Fetch the dataset from Hugging Face."""
        return load_dataset(dataset_name)

    def gpt4_chat(self, conversation):
        """Send a chat request to GPT-4."""
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=conversation,
            max_tokens=1000,  # Adjusted max_tokens if needed
            temperature=1.0,
        )
        return response.choices[0].message.content.strip()

    def ask_for_relevant_pages(self, task, sitemap):
        """Identify relevant pages for the task from the sitemap."""
        self.conversation.append({
            "role": "user",
            "content": (
                f"Given the task: '{task}' and the sitemap:\n{sitemap}\n\n"
                "Identify the page(s) relevant to this task. "
                "Return the page names exactly as they appear in the sitemap, in JSON format. "
                "For each relevant page, provide a brief explanation of its relevance. "
                "Example response:\n{{\n  'Ride History': 'Displays previous ride data needed for the task.'\n}}"
            )
        })
        response_content = self.gpt4_chat(self.conversation)
        return response_content

    def _update_user_data(self, task, relevant_page_details):
        """Populate the relevant user data for the task."""
        self.conversation.append({
            "role": "user",
            "content": (
                f"Given the task: '{task}' and the following page details:\n{relevant_page_details}\n\n"
                "Instructions:\n"
                "1. Ensure each page contains the following keys: 'page_name', 'features', and 'user_data' (even if empty).\n"
                "2. Update 'user_data' with essential information relevant to the task. Do not modify the 'features' section.\n"
                "3. Maintain the exact input structure, making changes only to 'user_data' where required.\n"
                "4. Confirm that both 'features' and 'user_data' are dictionaries.\n"
                "5. Verify that every page includes exactly the three required keys: 'page_name', 'features', and 'user_data'."
                "6. Make sure 'user_data' must exist as a key! "
            )
        })
        response_content = self.gpt4_chat(self.conversation)
        return response_content

    def ask_to_update_sitemap(self, sitemap, new_page):
        """Update the sitemap with the new page and adjust links."""
        self.conversation.append({
            "role": "user",
            "content": (
                f"Given the current sitemap:\n{sitemap}\nand a new page: '{new_page}' (currently only has a 'uid'), update the sitemap by:\n"
                "- Adding necessary details to '{new_page}', including 'links_to'.\n"
                "- Updating 'links_to' in other relevant pages to include '{new_page}'.\n"
                "Ensure the output retains the exact structure of the input."
            )
        })
        response_sitemap = self.gpt4_chat(self.conversation)
        return json_repair.loads(response_sitemap)

    def ask_to_update_user_state(self, task, user_state):
        """Update the user state based on the task."""
        self.conversation.append({
            "role": "user",
            "content": (
                f"Given the task: '{task}' and the current user state:\n{user_state}\n\n"
                "Update the user state to reflect any changes necessary for completing the task. "
                "Only modify values that are required; maintain the exact structure of the input."
            )
        })
        response_content = self.gpt4_chat(self.conversation)
        return json_repair.loads(response_content)

    @staticmethod
    def extract_uid_from_sitemap(sitemap, relevant_pages):
        """Extract UIDs for the relevant pages from the sitemap."""
        uid = []
        for page in relevant_pages:
            try:
                uid.append(sitemap['pages'][page]['uid'])
            except KeyError:
                print(f"Page name '{page}' not found in the sitemap.")
        return uid

    def process_data(self, task, hugging_face_url):
        """Process the task with the given dataset."""
        dataset = self.fetch_huggingface_dataset(hugging_face_url)

        # Extract the sitemap, page details, and user state from the dataset
        sitemap = eval(dataset['train'][0]['value'])
        page_details = eval(dataset['train'][1]['value'])
        user_state = eval(dataset['train'][2]['value'])

        # Step 1: Identify relevant pages
        relevant_pages = self.ask_for_relevant_pages(task, sitemap)
        self.conversation.append({"role": "assistant", "content": relevant_pages})
        relevant_pages = json_repair.loads(relevant_pages)
        target_page_names = relevant_pages.keys()

        # Step 2: Extract UIDs for the relevant pages
        page_uid = self.extract_uid_from_sitemap(sitemap, target_page_names)

        # Step 3: Retrieve page details using the UIDs
        relevant_page_details = {
            uid: page_details[uid] for uid in page_uid if uid in page_details
        }

        # Step 4: Populate user data for the task (only for relevant pages)
        updated_user_data = self._update_user_data(task, relevant_page_details)
        self.conversation.append({"role": "assistant", "content": updated_user_data})
        updated_user_data = json_repair.loads(updated_user_data)
        for uid, page_data in updated_user_data.items():
            page_details[uid]['user_data'] = page_data['user_data']

        # Step 5: Update user state
        updated_user_state = self.ask_to_update_user_state(task, user_state)

        # Return the updated structures
        return sitemap, page_details, updated_user_state