File size: 8,592 Bytes
2649124
 
 
 
 
 
 
41f933e
2649124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df9bd09
2649124
 
 
 
 
 
 
 
 
 
e74e7eb
2649124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e74e7eb
2649124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
from openai import OpenAI
import json_repair


class ChatbotSimulation:
    def __init__(self, site_map, page_details, user_state, task,
                 app_name, log_location, openai_api_key, agent='human',
                 max_steps=50, max_tokens=8192, buffer_tokens=500):
        self.sitemap = site_map
        self.page_details = page_details
        self.user_state = user_state
        self.user_state['current_page'] = 'Home'  # Initialize current page
        self.user_state['task_completed'] = 0
        self.task = task
        self.app_name = app_name
        self.log_location = log_location
        self.agent = agent.lower()
        if self.agent not in ['human', 'llm']:
            raise ValueError("Invalid agent type. Expected 'Human' or 'llm'.")
        self.max_steps = max_steps
        self.max_tokens = max_tokens
        self.buffer_tokens = buffer_tokens
        self.conversation = []  # Stores recent conversation snippets
        self.prompt_count = 0
        self.client = OpenAI(api_key=openai_api_key)
        self.actions = []

    def _get_page_uid(self, page_name):
        """Retrieve the UID of the given page from the sitemap."""
        return self.sitemap['pages'].get(page_name, {}).get('uid')

    def _get_page_details(self, page_name):
        """Retrieve the page details using its UID."""
        uid = self._get_page_uid(page_name)
        return self.page_details.get(uid, {})

    def _generate_system_prompt(self):
        """Create a dynamic system prompt based on the current state."""
        current_page = self.user_state['current_page']
        page_info = self._get_page_details(current_page)

        return f"""
You are interacting with the {self.app_name} app. Your task is: {self.task}.
You are currently on the {current_page} page. Current user state: {self.user_state}.

Page Information:
{page_info}

- **Features**: Represent available options the user can select on this page.
- **User Data**: Represents user-specific data accessible on this page.

Provide instructions or request input from the user. If the user provides an invalid action, respond with:
"Invalid action. Please select a valid option."

### Instruction Format:
You are at the {current_page} page. You have the following options:
1. Feature 1
2. Feature 2
3. Feature 3
4. Feature 4

Please enter your choice as the corresponding number.

Rules:
- Be sure to display all options that is available in features. 
- Be robotic and emotionless. Avoid offering any advice to the user.
- If a feature requires `input_text`, request input as: "Enter query as: [number]: query"
        """

    def _get_openai_response(self, prompt):
        """Fetch response from OpenAI API."""
        self._trim_conversation()
        response = self.client.chat.completions.create(
            model="gpt-4",
            messages=prompt,
            max_tokens=self.buffer_tokens,  # Adjusted max_tokens if needed
            temperature=1.0,
        )
        return response.choices[0].message.content

    def _calculate_token_count(self, conversation):
        """Estimate the token count in the conversation."""
        return sum(len(entry['content'].split()) for entry in conversation)

    def _trim_conversation(self):
        """Trim the conversation to keep it within the token limit."""
        while self._calculate_token_count(self.conversation) > self.max_tokens - self.buffer_tokens:
            self.conversation.pop(0)  #

    def one_conversation_round(self, user_input):
        """Conduct one round of conversation between the user and the assistant."""
        # User provides input
        self.conversation.append({"role": "user", "content": user_input})
        self.actions.append(user_input)

        # Update user state using GPT's response
        update_prompt = f"""
        Update the user state based on the input: '{user_input}'.

        Current user state (JSON format): {self.user_state}

        Sitemap: {self.sitemap}

        Instructions:
        1. If the 'current_page' has changed, update it to a page from the sitemap.
        2. If the task is finished, update 'task_completed' to 1. Otherwise, leave it unchanged.
        3. If no updates are needed, return the user state exactly as provided, without modification.
        4. Preserve the **exact JSON structure** and **format** of the provided user state.
        5. The output **must be a single JSON dictionary** representing the updated user state—do not wrap it in a list.
        6. Do not change any other fields unless explicitly required by the instructions.

        Important:
        - Ensure 'current_page' and 'task_completed' are keys in the returned dictionary.
        - Return **only the JSON object** without additional output or wrapping.
        - **AVOID OUTPUT A LIST**, must be JSON!
        
        The format for each page should be:
        
        {{
            "page_name": page_name
            "features": {{}},
            "user_data": {{
                "Data Entry 1": "Details of Data Entry 1", 
                "Data Entry 2": "Details of Data Entry 2"
            }}
        }}
        """

        self.conversation.append({"role": "assistant", "content": update_prompt})
        updated_state = self._get_openai_response(self.conversation)

        # Parse and update the user state
        updated_state = json_repair.loads(updated_state)

        if isinstance(updated_state, list):
            reformat_prompt = f'''
            Given the {updated_state}, reformat it into a proper JSON
            with only 3 keys: page_name, features, user_data
            Follow the format:
             {{
            "page_name": page_name
            "features": {{}},
            "user_data": {{
                "Data Entry 1": "Details of Data Entry 1", 
                "Data Entry 2": "Details of Data Entry 2"
                }}
            }}
            '''
            self.conversation.append({"role": "assistant", "content": reformat_prompt})
            reformat_state = self._get_openai_response(self.conversation)
            updated_state = json_repair.loads(reformat_state)

        if updated_state['task_completed']:
            return f"Task completed! You took {self.prompt_count} steps."

        self.user_state = updated_state

        self.conversation.clear()
        system_prompt = self._generate_system_prompt()

        # GPT generates the page instructions
        self.conversation.append({"role": "system", "content": system_prompt})
        gpt_instruction = self._get_openai_response(self.conversation)
        self.conversation.append({"role": "assistant", "content": gpt_instruction})
        return gpt_instruction

    def _generate_agent_input(self):
        """
        Simulate the agent generating input based on the conversation state.
        """
        agent_prompt = f"""
        Imagine you are an agent navigate through the Uber environment.
        Your overarching task is: {self.task}. You may have done some part of the task, or none at all.
        You will have access to all of your previous actions in the environment, as well as the last message from the assistant giving the current state of the environment.
        The last message from the assistant was: {self.conversation[-1]['content']}
        Respond first with a brief "Plan" which suggests what steps you are going to take to accomplish the task, and what your immediate. 
        Then generate an "Action" which is the immediate next step you can take.
        """

        messages = [{"role": "system", "content": agent_prompt}]
        for action in self.actions:
            messages.append({"role": "user", "content": action})
        messages.append({"role": "assistant", "content": self.conversation[-1]['content']})

        agent_response = self.client.chat.completions.create(
            model="gpt-4",
            messages=messages,
            temperature=1.0,
        )
        print(f"LLM-Agent: {agent_response.choices[0].message.content}")
        return agent_response.choices[0].message.content

    def start_conversation(self):
        greeting = f'\n Welcome to {self.app_name} simulator! Your task is: {self.task}. \n'
        system_prompt = self._generate_system_prompt()
        # GPT generates the page instructions
        self.conversation.append({"role": "system", "content": system_prompt})
        gpt_instruction = self._get_openai_response(self.conversation)
        self.conversation.append({"role": "assistant", "content": gpt_instruction})
        return greeting + gpt_instruction