File size: 13,169 Bytes
d6fb2ea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
#!/usr/bin/env python
# coding: utf-8

# In[14]:


pip install gradio newspaper3k transformers sentence-transformers openai lxml_html_clean difflib


# In[15]:


get_ipython().system('pip show openai')
get_ipython().system('pip uninstall newspaper3k -y')
get_ipython().system('pip install newspaper3k')


# In[16]:


import os
import yaml
import pandas as pd
import numpy as np

import azureml.core
from azureml.core import Workspace, Datastore, ComputeTarget
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient
from azure.ai.ml import command
from azure.ai.ml import Input, Output
from azure.ai.ml import load_component
from azure.ai.ml.entities import Environment, Data, PipelineJob, Job, Schedule
from datetime import datetime, timedelta

# perspective generation
import openai
import os
from openai import OpenAI

from newspaper import Article
import gradio as gr

import json

import difflib


# In[17]:


# Read the YAML file
with open('./curify_api.yaml', 'r') as yaml_file:
    data = yaml.safe_load(yaml_file)

# Access the API keys and other configuration data
weaviate_url = data.get('weaviate').get('url')
weaviate_api_key = data.get('weaviate').get('api_key')
cohere_api_key = data.get('cohere').get('api_key')
openai_api_key = data.get('openai').get('api_key')
serper_api_key = data.get('serper').get('api_key')

os.environ["OPENAI_API_KEY"] = openai_api_key
os.environ["SERPER_API_KEY"] = serper_api_key
SUBSCRIPTION = data.get('azure').get('subscription_id')
RESOURCE_GROUP = data.get('azure').get('resource_group_name')
WS_NAME = data.get('azure').get('workspace_name')


# In[18]:


def convert_to_listed_json(input_string):
    """
    Converts a string to a listed JSON object.
    
    Parameters:
        input_string (str): The JSON-like string to be converted.
    
    Returns:
        list: A JSON object parsed into a Python list of dictionaries.
    """
    try:
        # Parse the string into a Python object
        trimmed_string = input_string[input_string.index('['):input_string.rindex(']') + 1]

        json_object = json.loads(trimmed_string)
        return json_object
    except json.JSONDecodeError as e:
        return None
    
    return None
    #raise ValueError(f"Invalid JSON format: {e}")


def validate_and_extract_json(json_string):
    """
    Validates the JSON string, extracts fields with possible variants using fuzzy matching.
    
    Args:
    - json_string (str): The JSON string to validate and extract from.
    - field_names (list): List of field names to extract, with possible variants.
    
    Returns:
    - dict: Extracted values with the best matched field names.
    """
    # Try to parse the JSON string
    trimmed_string = json_string[json_string.index('{'):json_string.rindex('}') + 1]
    try:
        parsed_json = json.loads(trimmed_string)
        return parsed_json
    except json.JSONDecodeError as e:
        return None

    # {"error": "Parsed JSON is not a dictionary."}
    return None

def json_to_pandas(dat_json, dat_schema = {'name':"", 'description':""}):
    dat_df = pd.DataFrame([dat_schema])
    try:
        dat_df = pd.DataFrame(dat_json)

    except Exception as e:
        dat_df = pd.DataFrame([dat_schema])
    # ValueError(f"Failed to parse LLM output as JSON: {e}\nOutput: {res}")
    return dat_df


# In[19]:


from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_content(text):
    summary = summarizer(text, max_length=350, min_length=40, do_sample=False)
    return summary[0]['summary_text']


# In[20]:


client = OpenAI(
    api_key= os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
)

# Send the prompt to the OpenAI API
def call_openai_api(prompt):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "system", "content": "You are a helpful assistant."},
                  {"role": "user", "content": prompt}],
        max_tokens=5000
    )

    return response.choices[0].message.content.strip()

def fn_task_analysis(project_context, task_description):
    prompt = (
            f"You are working in the context of {project_context}. "
            f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
            "For analyzer, please analyze 1) which project this item belongs to. It's possible that the idea may be a personal reflection or random thoughts, not in an existing project."
            "2) whether this idea is concrete todo or vague."
            "3) what is the category of the task."
            "Please output in JSON with description, project_association, is_concrete, task_category as keys."
        )
    return call_openai_api(prompt)

# Function to break down a task (e.g., Research Paper Review) and create a reasoning path
def generate_reasoning_path(project_context, task_description):
    res_task_analysis = fn_task_analysis(project_context, task_description)
    prompt = (
            f"You are working in the context of {project_context}. "
            f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
            f"Please use the results of task analysis: {res_task_analysis}. "
            "Guideline for breaking down the task: "
            "If the task is to build some features, I would like to receive a prioritized design doc and Gradio-based feature demo as deliverable."
            "If the task is related to some blogs, papers, talks, I would like you to find the exact reference, generate a summary and convert it to a podcast."
            "If the message has some reflections about feelings or opinions, please translate to English, polish it and publish it onto substack."
            "If the task is tool research, reason about if it is a competitive tool or an integration tool. For competitive tools, generate a table to compare the tool and our relevant tool. For integration tools, decide on possible integration."
            "If the task is questionnaire or interview, please deliver a questionnaire design."
            "If the message is pointing to some personal or project issues, please use the framework and write a brief memo: a) What Happened? — Understanding the Problem. b) What Can We Do About It? — Generating Solutions c) So What? — Evaluating the Impact and Moving Forward. "
            "For idea brainstorming, I expect you to list potential ideas, construct the Feasibility Matrix or Impact/Effort Matrix, prioritize these ideas, setup an action plan with todos, build the prototype."
            "Please output the action and priority of each step, you do not need to give explanation."
            "Please ignore the low priority steps in the output."
            "Please output the reasoning steps in JSON with reasoning_steps as key."
        )

    res_steps = call_openai_api(prompt)
    #return res_task_analysis, res_steps

    try:
        json_task_analysis = validate_and_extract_json(res_task_analysis)
        json_steps = validate_and_extract_json(res_steps)

        return json_task_analysis, json_steps
    except ValueError as e:
        return None, None

# Function to store the reasoning path as JSON and use it for task execution
def store_and_execute_task(task_description, reasoning_path, json_key = 'reasoning_steps'):
    if reasoning_path is not None and isinstance(reasoning_path, dict) and json_key in reasoning_path:

        reasoning_steps = reasoning_path[json_key]
        # Example logic to simulate execution (this is just a placeholder)
        # for step in task_steps:
        #     step["status"] = "completed"  # Mark as completed after execution
        
        return reasoning_steps
    return None


# In[21]:


# Initialize dataframes for the schema
ideas_df = pd.DataFrame(columns=["Idea ID", "Content", "Tags"])

def extract_ideas(context, text):
    """
    Extract project ideas from text, with or without a context, and return in JSON format.

    Parameters:
        context (str): Context of the extraction. Can be empty.
        text (str): Text to extract ideas from.

    Returns:
        list: A list of ideas, each represented as a dictionary with name and description.
    """
    if context:
        # Template when context is provided
        prompt = (
            f"You are working in the context of {context}. "
            "Please extract the ongoing projects with project name and description."
            "Please only the listed JSON as output string."
            f"Ongoing projects: {text}"
        )
    else:
        # Template when context is not provided
        prompt = (
            "Given the following information about the user."
            "Please extract the ongoing projects with project name and description."
            "Please only the listed JSON as output string."
            f"Ongoing projects: {text}"
        )

    # return the raw string
    return call_openai_api(prompt)

def df_to_string(df, empty_message = ''):
    """
    Converts a DataFrame to a string if it is not empty. 
    If the DataFrame is empty, returns an empty string.
    
    Parameters:
        ideas_df (pd.DataFrame): The DataFrame to be converted.
    
    Returns:
        str: A string representation of the DataFrame or an empty string.
    """
    if df.empty:
        return empty_message
    else:
        return df.to_string(index=False)


# In[22]:


def curify_ideas(project_description, task_description):

    # May need a task split step that semantically splits the task.

    str_projects = extract_ideas('AI-powered tools for productivity', project_description)
    json_projects = convert_to_listed_json(str_projects)

    # Generate reasoning path
    task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)
    
    # Store and simulate execution of task
    task_data = store_and_execute_task(task_description, reasoning_path)

    return json_to_pandas(json_projects), json_to_pandas(task_data), task_analysis


# In[23]:


project_description = 'work on a number of projects including curify (digest, ideas, careers, projects etc), and writing a book on LLM for recommendation system, educating my 3.5-year-old boy and working on a paper for LLM reasoning.'

# convert_to_listed_json(extract_ideas('AI-powered tools for productivity', project_description))

task_description = 'Build an interview bot for the curify digest project.'
task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)

store_and_execute_task(task_description, reasoning_path)


# In[ ]:


reasoning_path


# In[ ]:


# Gradio Demo
with gr.Blocks(
    css="""
    .gradio-table td {
        white-space: normal !important;
        word-wrap: break-word !important;
    }
    .gradio-table {
        width: 100% !important;  /* Adjust to 100% to fit the container */
        table-layout: fixed !important;  /* Fixed column widths */
        overflow-x: hidden !important;  /* Disable horizontal scrolling */
    }
    .gradio-container {
        overflow-x: hidden !important;  /* Disable horizontal scroll for entire container */
        padding: 0 !important;  /* Remove any default padding */
    }
    .gradio-column {
        max-width: 100% !important;  /* Ensure columns take up full width */
        overflow: hidden !important;  /* Hide overflow to prevent horizontal scroll */
    }
    .gradio-row {
        overflow-x: hidden !important;  /* Prevent horizontal scroll on rows */
    }
"""
) as demo:

    gr.Markdown("## Curify: Unified AI Tools for Productivity")
    
    with gr.Tab("Curify Idea"):
        with gr.Row():
            # Column 1: Webpage rendering
            with gr.Column():
                gr.Markdown("## Enter project descriptions.")

                project_input = gr.Textbox(
                    placeholder="Describe your project...",
                    label=None,
                    lines=5)

                gr.Markdown("## Enter task message.")
                idea_input = gr.Textbox(
                    label=None,
                    placeholder="Describe the task you want to execute (e.g., Research Paper Review)")
    
                task_btn = gr.Button("Generating task steps...")

                gr.Markdown("## Projects Overview")
                project_list = gr.DataFrame(
                    type="pandas"
                )

            # Column 2: Summary and Perspectives
            with gr.Column():
                gr.Markdown("## Task analysis")
                task_analysis_txt = gr.Textbox(
                    label=None,
                    placeholder="Here is an analysis of your task...",
                    lines=3)

                gr.Markdown("## Execution path")
                task_steps = gr.DataFrame(
                    type="pandas"
                )

            task_btn.click(
                curify_ideas, 
                inputs=[project_input, idea_input], 
                outputs=[project_list, task_steps, task_analysis_txt]
            )

demo.launch(share=True)


# In[ ]: