File size: 5,895 Bytes
2649124
 
 
 
 
 
 
 
 
5f90409
2649124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54d66e1
2649124
 
 
 
 
 
 
 
 
54d66e1
 
2649124
 
54d66e1
2649124
 
 
 
 
54d66e1
2649124
 
 
 
76df96c
54d66e1
7224d3a
 
718b316
2649124
 
 
 
 
 
 
 
 
 
76df96c
 
718b316
2649124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54d66e1
2649124
 
 
 
 
 
 
 
 
 
 
 
 
54d66e1
2649124
 
 
15af633
 
 
 
2649124
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# task = task_generation(sitemap)
from openai import OpenAI
from datasets import load_dataset
import json_repair


class DataPopulation:
    def __init__(self, api_key):
        # Set the API key during initialization
        self.client = OpenAI(api_key=api_key)
        self.conversation = [
            {
                "role": "system",
                "content": (
                    "You are an intelligent assistant specialized in web page management tasks. "
                    "Your responsibilities include identifying relevant pages, updating page details, user data, and the sitemap as required."
                )
            }
        ]

    def fetch_huggingface_dataset(self, dataset_name):
        """Fetch the dataset from Hugging Face."""
        return load_dataset(dataset_name)

    def gpt4_chat(self, conversation):
        """Send a chat request to GPT-4."""
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=conversation,
            max_tokens=1000,  # Adjusted max_tokens if needed
            temperature=0.7,
        )
        return response.choices[0].message.content.strip()

    def ask_for_relevant_pages(self, task, sitemap):
        """Identify relevant pages for the task from the sitemap."""
        self.conversation.append({
            "role": "user",
            "content": (
                f"Given the task: '{task}' and the sitemap:\n{sitemap}\n\n"
                f"Respond first with a brief 'Plan' which suggests what data we have to pre-populate the sitemap"
                f"to make task accomplishable. Then identify the page(s) these data going to be stored on. "
                "Return the page names exactly as they appear in the sitemap, in JSON format. "
                "For each relevant page, provide a brief explanation of its relevance. "
                "Example response:\nPlanning sentences. PAGES: {{\n  'Ride History': 'Displays previous ride data needed for the task.'\n}}"
            )
        })
        response_content = self.gpt4_chat(self.conversation)
        return response_content

    def _update_user_data(self, task, relevant_page_details, relevant_pages):
        """Populate the relevant user data for the task."""
        self.conversation.append({
            "role": "user",
            "content": (
                f"Given the task: '{task}' and the following task-relevant page details:\n{relevant_page_details}\n\n"
                f"Here is reason behind each relevant page: {relevant_pages}."
                f"Update each page's 'user_data' value with essential information for task-completion."
                f"For example, if a task ask us to retrieve previous order, then we will need to populate synthetic order history in user_data."
                "Ensure output maintain the exact format and structure as input page details."
            )
        })
        response_content = self.gpt4_chat(self.conversation)
        return response_content

    def ask_to_update_user_state(self, task, user_state):
        """Update the user state based on the task."""
        self.conversation.append({
            "role": "user",
            "content": (
                f"Given the task: '{task}', default user state:\n{user_state}, and user_data in chat history.\n\n"
                "Initialize the user state values to reflect any initial status necessary for completing the task. "
                "Ensure output maintain the exact format and structure as input page details."
            )
        })
        response_content = self.gpt4_chat(self.conversation)
        return json_repair.loads(response_content)

    @staticmethod
    def extract_uid_from_sitemap(sitemap, relevant_pages):
        """Extract UIDs for the relevant pages from the sitemap."""
        uid = []
        for page in relevant_pages:
            try:
                uid.append(sitemap['pages'][page]['uid'])
            except KeyError:
                print(f"Page name '{page}' not found in the sitemap.")
        return uid

    def process_data(self, task, hugging_face_url):
        """Process the task with the given dataset."""
        dataset = self.fetch_huggingface_dataset(hugging_face_url)

        # Extract the sitemap, page details, and user state from the dataset
        sitemap = eval(dataset['train'][0]['value'])
        page_details = eval(dataset['train'][1]['value'])
        user_state = eval(dataset['train'][2]['value'])

        # Step 1: Identify relevant pages
        relevant_pages = self.ask_for_relevant_pages(task, sitemap)
        relevant_pages = relevant_pages.split("PAGES:", 1)[1].strip()
        self.conversation.append({"role": "assistant", "content": relevant_pages})
        relevant_pages = json_repair.loads(relevant_pages)
        target_page_names = relevant_pages.keys()

        # Step 2: Extract UIDs for the relevant pages
        page_uid = self.extract_uid_from_sitemap(sitemap, target_page_names)

        # Step 3: Retrieve page details using the UIDs
        relevant_page_details = {
            uid: page_details[uid] for uid in page_uid if uid in page_details
        }

        # Step 4: Populate user data for the task (only for relevant pages)
        updated_user_data = self._update_user_data(task, relevant_page_details, relevant_pages)
        self.conversation.append({"role": "assistant", "content": updated_user_data})
        updated_user_data = json_repair.loads(updated_user_data)
        for uid, page_data in updated_user_data.items():
            try:
                page_details[uid]['user_data'] = page_data['user_data']
            except:
                continue

        # Step 5: Update user state
        updated_user_state = self.ask_to_update_user_state(task, user_state)

        # Return the updated structures
        return sitemap, page_details, updated_user_state