Spaces:

Curify
/

Ideas

Sleeping

File size: 13,169 Bytes

d6fb2ea

#!/usr/bin/env python
# coding: utf-8

# In[14]:


pip install gradio newspaper3k transformers sentence-transformers openai lxml_html_clean difflib


# In[15]:


get_ipython().system('pip show openai')
get_ipython().system('pip uninstall newspaper3k -y')
get_ipython().system('pip install newspaper3k')


# In[16]:


import os
import yaml
import pandas as pd
import numpy as np

import azureml.core
from azureml.core import Workspace, Datastore, ComputeTarget
from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient
from azure.ai.ml import command
from azure.ai.ml import Input, Output
from azure.ai.ml import load_component
from azure.ai.ml.entities import Environment, Data, PipelineJob, Job, Schedule
from datetime import datetime, timedelta

# perspective generation
import openai
import os
from openai import OpenAI

from newspaper import Article
import gradio as gr

import json

import difflib


# In[17]:


# Read the YAML file
with open('./curify_api.yaml', 'r') as yaml_file:
    data = yaml.safe_load(yaml_file)

# Access the API keys and other configuration data
weaviate_url = data.get('weaviate').get('url')
weaviate_api_key = data.get('weaviate').get('api_key')
cohere_api_key = data.get('cohere').get('api_key')
openai_api_key = data.get('openai').get('api_key')
serper_api_key = data.get('serper').get('api_key')

os.environ["OPENAI_API_KEY"] = openai_api_key
os.environ["SERPER_API_KEY"] = serper_api_key
SUBSCRIPTION = data.get('azure').get('subscription_id')
RESOURCE_GROUP = data.get('azure').get('resource_group_name')
WS_NAME = data.get('azure').get('workspace_name')


# In[18]:


def convert_to_listed_json(input_string):
    """
    Converts a string to a listed JSON object.
    
    Parameters:
        input_string (str): The JSON-like string to be converted.
    
    Returns:
        list: A JSON object parsed into a Python list of dictionaries.
    """
    try:
        # Parse the string into a Python object
        trimmed_string = input_string[input_string.index('['):input_string.rindex(']') + 1]

        json_object = json.loads(trimmed_string)
        return json_object
    except json.JSONDecodeError as e:
        return None
    
    return None
    #raise ValueError(f"Invalid JSON format: {e}")


def validate_and_extract_json(json_string):
    """
    Validates the JSON string, extracts fields with possible variants using fuzzy matching.
    
    Args:
    - json_string (str): The JSON string to validate and extract from.
    - field_names (list): List of field names to extract, with possible variants.
    
    Returns:
    - dict: Extracted values with the best matched field names.
    """
    # Try to parse the JSON string
    trimmed_string = json_string[json_string.index('{'):json_string.rindex('}') + 1]
    try:
        parsed_json = json.loads(trimmed_string)
        return parsed_json
    except json.JSONDecodeError as e:
        return None

    # {"error": "Parsed JSON is not a dictionary."}
    return None

def json_to_pandas(dat_json, dat_schema = {'name':"", 'description':""}):
    dat_df = pd.DataFrame([dat_schema])
    try:
        dat_df = pd.DataFrame(dat_json)

    except Exception as e:
        dat_df = pd.DataFrame([dat_schema])
    # ValueError(f"Failed to parse LLM output as JSON: {e}\nOutput: {res}")
    return dat_df


# In[19]:


from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_content(text):
    summary = summarizer(text, max_length=350, min_length=40, do_sample=False)
    return summary[0]['summary_text']


# In[20]:


client = OpenAI(
    api_key= os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
)

# Send the prompt to the OpenAI API
def call_openai_api(prompt):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[{"role": "system", "content": "You are a helpful assistant."},
                  {"role": "user", "content": prompt}],
        max_tokens=5000
    )

    return response.choices[0].message.content.strip()

def fn_task_analysis(project_context, task_description):
    prompt = (
            f"You are working in the context of {project_context}. "
            f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
            "For analyzer, please analyze 1) which project this item belongs to. It's possible that the idea may be a personal reflection or random thoughts, not in an existing project."
            "2) whether this idea is concrete todo or vague."
            "3) what is the category of the task."
            "Please output in JSON with description, project_association, is_concrete, task_category as keys."
        )
    return call_openai_api(prompt)

# Function to break down a task (e.g., Research Paper Review) and create a reasoning path
def generate_reasoning_path(project_context, task_description):
    res_task_analysis = fn_task_analysis(project_context, task_description)
    prompt = (
            f"You are working in the context of {project_context}. "
            f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
            f"Please use the results of task analysis: {res_task_analysis}. "
            "Guideline for breaking down the task: "
            "If the task is to build some features, I would like to receive a prioritized design doc and Gradio-based feature demo as deliverable."
            "If the task is related to some blogs, papers, talks, I would like you to find the exact reference, generate a summary and convert it to a podcast."
            "If the message has some reflections about feelings or opinions, please translate to English, polish it and publish it onto substack."
            "If the task is tool research, reason about if it is a competitive tool or an integration tool. For competitive tools, generate a table to compare the tool and our relevant tool. For integration tools, decide on possible integration."
            "If the task is questionnaire or interview, please deliver a questionnaire design."
            "If the message is pointing to some personal or project issues, please use the framework and write a brief memo: a) What Happened? — Understanding the Problem. b) What Can We Do About It? — Generating Solutions c) So What? — Evaluating the Impact and Moving Forward. "
            "For idea brainstorming, I expect you to list potential ideas, construct the Feasibility Matrix or Impact/Effort Matrix, prioritize these ideas, setup an action plan with todos, build the prototype."
            "Please output the action and priority of each step, you do not need to give explanation."
            "Please ignore the low priority steps in the output."
            "Please output the reasoning steps in JSON with reasoning_steps as key."
        )

    res_steps = call_openai_api(prompt)
    #return res_task_analysis, res_steps

    try:
        json_task_analysis = validate_and_extract_json(res_task_analysis)
        json_steps = validate_and_extract_json(res_steps)

        return json_task_analysis, json_steps
    except ValueError as e:
        return None, None

# Function to store the reasoning path as JSON and use it for task execution
def store_and_execute_task(task_description, reasoning_path, json_key = 'reasoning_steps'):
    if reasoning_path is not None and isinstance(reasoning_path, dict) and json_key in reasoning_path:

        reasoning_steps = reasoning_path[json_key]
        # Example logic to simulate execution (this is just a placeholder)
        # for step in task_steps:
        #     step["status"] = "completed"  # Mark as completed after execution
        
        return reasoning_steps
    return None


# In[21]:


# Initialize dataframes for the schema
ideas_df = pd.DataFrame(columns=["Idea ID", "Content", "Tags"])

def extract_ideas(context, text):
    """
    Extract project ideas from text, with or without a context, and return in JSON format.

    Parameters:
        context (str): Context of the extraction. Can be empty.
        text (str): Text to extract ideas from.

    Returns:
        list: A list of ideas, each represented as a dictionary with name and description.
    """
    if context:
        # Template when context is provided
        prompt = (
            f"You are working in the context of {context}. "
            "Please extract the ongoing projects with project name and description."
            "Please only the listed JSON as output string."
            f"Ongoing projects: {text}"
        )
    else:
        # Template when context is not provided
        prompt = (
            "Given the following information about the user."
            "Please extract the ongoing projects with project name and description."
            "Please only the listed JSON as output string."
            f"Ongoing projects: {text}"
        )

    # return the raw string
    return call_openai_api(prompt)

def df_to_string(df, empty_message = ''):
    """
    Converts a DataFrame to a string if it is not empty. 
    If the DataFrame is empty, returns an empty string.
    
    Parameters:
        ideas_df (pd.DataFrame): The DataFrame to be converted.
    
    Returns:
        str: A string representation of the DataFrame or an empty string.
    """
    if df.empty:
        return empty_message
    else:
        return df.to_string(index=False)


# In[22]:


def curify_ideas(project_description, task_description):

    # May need a task split step that semantically splits the task.

    str_projects = extract_ideas('AI-powered tools for productivity', project_description)
    json_projects = convert_to_listed_json(str_projects)

    # Generate reasoning path
    task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)
    
    # Store and simulate execution of task
    task_data = store_and_execute_task(task_description, reasoning_path)

    return json_to_pandas(json_projects), json_to_pandas(task_data), task_analysis


# In[23]:


project_description = 'work on a number of projects including curify (digest, ideas, careers, projects etc), and writing a book on LLM for recommendation system, educating my 3.5-year-old boy and working on a paper for LLM reasoning.'

# convert_to_listed_json(extract_ideas('AI-powered tools for productivity', project_description))

task_description = 'Build an interview bot for the curify digest project.'
task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)

store_and_execute_task(task_description, reasoning_path)


# In[ ]:


reasoning_path


# In[ ]:


# Gradio Demo
with gr.Blocks(
    css="""
    .gradio-table td {
        white-space: normal !important;
        word-wrap: break-word !important;
    }
    .gradio-table {
        width: 100% !important;  /* Adjust to 100% to fit the container */
        table-layout: fixed !important;  /* Fixed column widths */
        overflow-x: hidden !important;  /* Disable horizontal scrolling */
    }
    .gradio-container {
        overflow-x: hidden !important;  /* Disable horizontal scroll for entire container */
        padding: 0 !important;  /* Remove any default padding */
    }
    .gradio-column {
        max-width: 100% !important;  /* Ensure columns take up full width */
        overflow: hidden !important;  /* Hide overflow to prevent horizontal scroll */
    }
    .gradio-row {
        overflow-x: hidden !important;  /* Prevent horizontal scroll on rows */
    }
"""
) as demo:

    gr.Markdown("## Curify: Unified AI Tools for Productivity")
    
    with gr.Tab("Curify Idea"):
        with gr.Row():
            # Column 1: Webpage rendering
            with gr.Column():
                gr.Markdown("## Enter project descriptions.")

                project_input = gr.Textbox(
                    placeholder="Describe your project...",
                    label=None,
                    lines=5)

                gr.Markdown("## Enter task message.")
                idea_input = gr.Textbox(
                    label=None,
                    placeholder="Describe the task you want to execute (e.g., Research Paper Review)")
    
                task_btn = gr.Button("Generating task steps...")

                gr.Markdown("## Projects Overview")
                project_list = gr.DataFrame(
                    type="pandas"
                )

            # Column 2: Summary and Perspectives
            with gr.Column():
                gr.Markdown("## Task analysis")
                task_analysis_txt = gr.Textbox(
                    label=None,
                    placeholder="Here is an analysis of your task...",
                    lines=3)

                gr.Markdown("## Execution path")
                task_steps = gr.DataFrame(
                    type="pandas"
                )

            task_btn.click(
                curify_ideas, 
                inputs=[project_input, idea_input], 
                outputs=[project_list, task_steps, task_analysis_txt]
            )

demo.launch(share=True)


# In[ ]: