Spaces:

Curify
/

Ideas

Running

App Files Files Community

qqwjq1981 commited on Nov 28, 2024

Commit

d6fb2ea

verified ·

1 Parent(s): eb6ea7f

Upload 2 files

Browse files

Files changed (2) hide show

app.py +386 -0
curify_api.yaml +17 -0

app.py ADDED Viewed

	@@ -0,0 +1,386 @@

+#!/usr/bin/env python
+# coding: utf-8
+# In[14]:
+pip install gradio newspaper3k transformers sentence-transformers openai lxml_html_clean difflib
+# In[15]:
+get_ipython().system('pip show openai')
+get_ipython().system('pip uninstall newspaper3k -y')
+get_ipython().system('pip install newspaper3k')
+# In[16]:
+import os
+import yaml
+import pandas as pd
+import numpy as np
+import azureml.core
+from azureml.core import Workspace, Datastore, ComputeTarget
+from azure.identity import DefaultAzureCredential
+from azure.ai.ml import MLClient
+from azure.ai.ml import command
+from azure.ai.ml import Input, Output
+from azure.ai.ml import load_component
+from azure.ai.ml.entities import Environment, Data, PipelineJob, Job, Schedule
+from datetime import datetime, timedelta
+# perspective generation
+import openai
+import os
+from openai import OpenAI
+from newspaper import Article
+import gradio as gr
+import json
+import difflib
+# In[17]:
+# Read the YAML file
+with open('./curify_api.yaml', 'r') as yaml_file:
+    data = yaml.safe_load(yaml_file)
+# Access the API keys and other configuration data
+weaviate_url = data.get('weaviate').get('url')
+weaviate_api_key = data.get('weaviate').get('api_key')
+cohere_api_key = data.get('cohere').get('api_key')
+openai_api_key = data.get('openai').get('api_key')
+serper_api_key = data.get('serper').get('api_key')
+os.environ["OPENAI_API_KEY"] = openai_api_key
+os.environ["SERPER_API_KEY"] = serper_api_key
+SUBSCRIPTION = data.get('azure').get('subscription_id')
+RESOURCE_GROUP = data.get('azure').get('resource_group_name')
+WS_NAME = data.get('azure').get('workspace_name')
+# In[18]:
+def convert_to_listed_json(input_string):
+    """
+    Converts a string to a listed JSON object.
+    Parameters:
+        input_string (str): The JSON-like string to be converted.
+    Returns:
+        list: A JSON object parsed into a Python list of dictionaries.
+    """
+    try:
+        # Parse the string into a Python object
+        trimmed_string = input_string[input_string.index('['):input_string.rindex(']') + 1]
+        json_object = json.loads(trimmed_string)
+        return json_object
+    except json.JSONDecodeError as e:
+        return None
+    return None
+    #raise ValueError(f"Invalid JSON format: {e}")
+def validate_and_extract_json(json_string):
+    """
+    Validates the JSON string, extracts fields with possible variants using fuzzy matching.
+    Args:
+    - json_string (str): The JSON string to validate and extract from.
+    - field_names (list): List of field names to extract, with possible variants.
+    Returns:
+    - dict: Extracted values with the best matched field names.
+    """
+    # Try to parse the JSON string
+    trimmed_string = json_string[json_string.index('{'):json_string.rindex('}') + 1]
+    try:
+        parsed_json = json.loads(trimmed_string)
+        return parsed_json
+    except json.JSONDecodeError as e:
+        return None
+    # {"error": "Parsed JSON is not a dictionary."}
+    return None
+def json_to_pandas(dat_json, dat_schema = {'name':"", 'description':""}):
+    dat_df = pd.DataFrame([dat_schema])
+    try:
+        dat_df = pd.DataFrame(dat_json)
+    except Exception as e:
+        dat_df = pd.DataFrame([dat_schema])
+    # ValueError(f"Failed to parse LLM output as JSON: {e}\nOutput: {res}")
+    return dat_df
+# In[19]:
+from transformers import pipeline
+summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+def summarize_content(text):
+    summary = summarizer(text, max_length=350, min_length=40, do_sample=False)
+    return summary[0]['summary_text']
+# In[20]:
+client = OpenAI(
+    api_key= os.environ.get("OPENAI_API_KEY"),  # This is the default and can be omitted
+)
+# Send the prompt to the OpenAI API
+def call_openai_api(prompt):
+    response = client.chat.completions.create(
+        model="gpt-4o",
+        messages=[{"role": "system", "content": "You are a helpful assistant."},
+                  {"role": "user", "content": prompt}],
+        max_tokens=5000
+    )
+    return response.choices[0].message.content.strip()
+def fn_task_analysis(project_context, task_description):
+    prompt = (
+            f"You are working in the context of {project_context}. "
+            f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
+            "For analyzer, please analyze 1) which project this item belongs to. It's possible that the idea may be a personal reflection or random thoughts, not in an existing project."
+            "2) whether this idea is concrete todo or vague."
+            "3) what is the category of the task."
+            "Please output in JSON with description, project_association, is_concrete, task_category as keys."
+        )
+    return call_openai_api(prompt)
+# Function to break down a task (e.g., Research Paper Review) and create a reasoning path
+def generate_reasoning_path(project_context, task_description):
+    res_task_analysis = fn_task_analysis(project_context, task_description)
+    prompt = (
+            f"You are working in the context of {project_context}. "
+            f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
+            f"Please use the results of task analysis: {res_task_analysis}. "
+            "Guideline for breaking down the task: "
+            "If the task is to build some features, I would like to receive a prioritized design doc and Gradio-based feature demo as deliverable."
+            "If the task is related to some blogs, papers, talks, I would like you to find the exact reference, generate a summary and convert it to a podcast."
+            "If the message has some reflections about feelings or opinions, please translate to English, polish it and publish it onto substack."
+            "If the task is tool research, reason about if it is a competitive tool or an integration tool. For competitive tools, generate a table to compare the tool and our relevant tool. For integration tools, decide on possible integration."
+            "If the task is questionnaire or interview, please deliver a questionnaire design."
+            "If the message is pointing to some personal or project issues, please use the framework and write a brief memo: a) What Happened? — Understanding the Problem. b) What Can We Do About It? — Generating Solutions c) So What? — Evaluating the Impact and Moving Forward. "
+            "For idea brainstorming, I expect you to list potential ideas, construct the Feasibility Matrix or Impact/Effort Matrix, prioritize these ideas, setup an action plan with todos, build the prototype."
+            "Please output the action and priority of each step, you do not need to give explanation."
+            "Please ignore the low priority steps in the output."
+            "Please output the reasoning steps in JSON with reasoning_steps as key."
+        )
+    res_steps = call_openai_api(prompt)
+    #return res_task_analysis, res_steps
+    try:
+        json_task_analysis = validate_and_extract_json(res_task_analysis)
+        json_steps = validate_and_extract_json(res_steps)
+        return json_task_analysis, json_steps
+    except ValueError as e:
+        return None, None
+# Function to store the reasoning path as JSON and use it for task execution
+def store_and_execute_task(task_description, reasoning_path, json_key = 'reasoning_steps'):
+    if reasoning_path is not None and isinstance(reasoning_path, dict) and json_key in reasoning_path:
+        reasoning_steps = reasoning_path[json_key]
+        # Example logic to simulate execution (this is just a placeholder)
+        # for step in task_steps:
+        #     step["status"] = "completed"  # Mark as completed after execution
+        return reasoning_steps
+    return None
+# In[21]:
+# Initialize dataframes for the schema
+ideas_df = pd.DataFrame(columns=["Idea ID", "Content", "Tags"])
+def extract_ideas(context, text):
+    """
+    Extract project ideas from text, with or without a context, and return in JSON format.
+    Parameters:
+        context (str): Context of the extraction. Can be empty.
+        text (str): Text to extract ideas from.
+    Returns:
+        list: A list of ideas, each represented as a dictionary with name and description.
+    """
+    if context:
+        # Template when context is provided
+        prompt = (
+            f"You are working in the context of {context}. "
+            "Please extract the ongoing projects with project name and description."
+            "Please only the listed JSON as output string."
+            f"Ongoing projects: {text}"
+        )
+    else:
+        # Template when context is not provided
+        prompt = (
+            "Given the following information about the user."
+            "Please extract the ongoing projects with project name and description."
+            "Please only the listed JSON as output string."
+            f"Ongoing projects: {text}"
+        )
+    # return the raw string
+    return call_openai_api(prompt)
+def df_to_string(df, empty_message = ''):
+    """
+    Converts a DataFrame to a string if it is not empty.
+    If the DataFrame is empty, returns an empty string.
+    Parameters:
+        ideas_df (pd.DataFrame): The DataFrame to be converted.
+    Returns:
+        str: A string representation of the DataFrame or an empty string.
+    """
+    if df.empty:
+        return empty_message
+    else:
+        return df.to_string(index=False)
+# In[22]:
+def curify_ideas(project_description, task_description):
+    # May need a task split step that semantically splits the task.
+    str_projects = extract_ideas('AI-powered tools for productivity', project_description)
+    json_projects = convert_to_listed_json(str_projects)
+    # Generate reasoning path
+    task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)
+    # Store and simulate execution of task
+    task_data = store_and_execute_task(task_description, reasoning_path)
+    return json_to_pandas(json_projects), json_to_pandas(task_data), task_analysis
+# In[23]:
+project_description = 'work on a number of projects including curify (digest, ideas, careers, projects etc), and writing a book on LLM for recommendation system, educating my 3.5-year-old boy and working on a paper for LLM reasoning.'
+# convert_to_listed_json(extract_ideas('AI-powered tools for productivity', project_description))
+task_description = 'Build an interview bot for the curify digest project.'
+task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)
+store_and_execute_task(task_description, reasoning_path)
+# In[ ]:
+reasoning_path
+# In[ ]:
+# Gradio Demo
+with gr.Blocks(
+    css="""
+    .gradio-table td {
+        white-space: normal !important;
+        word-wrap: break-word !important;
+    }
+    .gradio-table {
+        width: 100% !important;  /* Adjust to 100% to fit the container */
+        table-layout: fixed !important;  /* Fixed column widths */
+        overflow-x: hidden !important;  /* Disable horizontal scrolling */
+    }
+    .gradio-container {
+        overflow-x: hidden !important;  /* Disable horizontal scroll for entire container */
+        padding: 0 !important;  /* Remove any default padding */
+    }
+    .gradio-column {
+        max-width: 100% !important;  /* Ensure columns take up full width */
+        overflow: hidden !important;  /* Hide overflow to prevent horizontal scroll */
+    }
+    .gradio-row {
+        overflow-x: hidden !important;  /* Prevent horizontal scroll on rows */
+    }
+"""
+) as demo:
+    gr.Markdown("## Curify: Unified AI Tools for Productivity")
+    with gr.Tab("Curify Idea"):
+        with gr.Row():
+            # Column 1: Webpage rendering
+            with gr.Column():
+                gr.Markdown("## Enter project descriptions.")
+                project_input = gr.Textbox(
+                    placeholder="Describe your project...",
+                    label=None,
+                    lines=5)
+                gr.Markdown("## Enter task message.")
+                idea_input = gr.Textbox(
+                    label=None,
+                    placeholder="Describe the task you want to execute (e.g., Research Paper Review)")
+                task_btn = gr.Button("Generating task steps...")
+                gr.Markdown("## Projects Overview")
+                project_list = gr.DataFrame(
+                    type="pandas"
+                )
+            # Column 2: Summary and Perspectives
+            with gr.Column():
+                gr.Markdown("## Task analysis")
+                task_analysis_txt = gr.Textbox(
+                    label=None,
+                    placeholder="Here is an analysis of your task...",
+                    lines=3)
+                gr.Markdown("## Execution path")
+                task_steps = gr.DataFrame(
+                    type="pandas"
+                )
+            task_btn.click(
+                curify_ideas,
+                inputs=[project_input, idea_input],
+                outputs=[project_list, task_steps, task_analysis_txt]
+            )
+demo.launch(share=True)
+# In[ ]:

curify_api.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+weaviate:
+  url: "https://grpc-ddyazhrqhkunrgfdsd7pg.c0.us-west3.gcp.weaviate.cloud/"
+  api_key: "VIIzm0iRMIesfH9CsWdyRXD0YZnNKKBwZnDf"
+cohere:
+  api_key: "dAAGflqk0F0YaimLIQECZOrAXKuTUGU0rHkFs3Bu"
+openai:
+  api_key: "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"
+serper:
+  api_key: "6d85d4591e052751b8ba10a9568985ebc92fa234"
+azure:
+  subscription_id: "541beb67-718e-41c5-958e-8cc0ba95b210"
+  resource_group_name: "awesome_rag_dev"
+  workspace_name: "rag_book_demo"