Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[14]: | |
pip install gradio newspaper3k transformers sentence-transformers openai lxml_html_clean difflib | |
# In[15]: | |
get_ipython().system('pip show openai') | |
get_ipython().system('pip uninstall newspaper3k -y') | |
get_ipython().system('pip install newspaper3k') | |
# In[16]: | |
import os | |
import yaml | |
import pandas as pd | |
import numpy as np | |
import azureml.core | |
from azureml.core import Workspace, Datastore, ComputeTarget | |
from azure.identity import DefaultAzureCredential | |
from azure.ai.ml import MLClient | |
from azure.ai.ml import command | |
from azure.ai.ml import Input, Output | |
from azure.ai.ml import load_component | |
from azure.ai.ml.entities import Environment, Data, PipelineJob, Job, Schedule | |
from datetime import datetime, timedelta | |
# perspective generation | |
import openai | |
import os | |
from openai import OpenAI | |
from newspaper import Article | |
import gradio as gr | |
import json | |
import difflib | |
# In[17]: | |
# Read the YAML file | |
with open('./curify_api.yaml', 'r') as yaml_file: | |
data = yaml.safe_load(yaml_file) | |
# Access the API keys and other configuration data | |
weaviate_url = data.get('weaviate').get('url') | |
weaviate_api_key = data.get('weaviate').get('api_key') | |
cohere_api_key = data.get('cohere').get('api_key') | |
openai_api_key = data.get('openai').get('api_key') | |
serper_api_key = data.get('serper').get('api_key') | |
os.environ["OPENAI_API_KEY"] = openai_api_key | |
os.environ["SERPER_API_KEY"] = serper_api_key | |
SUBSCRIPTION = data.get('azure').get('subscription_id') | |
RESOURCE_GROUP = data.get('azure').get('resource_group_name') | |
WS_NAME = data.get('azure').get('workspace_name') | |
# In[18]: | |
def convert_to_listed_json(input_string): | |
""" | |
Converts a string to a listed JSON object. | |
Parameters: | |
input_string (str): The JSON-like string to be converted. | |
Returns: | |
list: A JSON object parsed into a Python list of dictionaries. | |
""" | |
try: | |
# Parse the string into a Python object | |
trimmed_string = input_string[input_string.index('['):input_string.rindex(']') + 1] | |
json_object = json.loads(trimmed_string) | |
return json_object | |
except json.JSONDecodeError as e: | |
return None | |
return None | |
#raise ValueError(f"Invalid JSON format: {e}") | |
def validate_and_extract_json(json_string): | |
""" | |
Validates the JSON string, extracts fields with possible variants using fuzzy matching. | |
Args: | |
- json_string (str): The JSON string to validate and extract from. | |
- field_names (list): List of field names to extract, with possible variants. | |
Returns: | |
- dict: Extracted values with the best matched field names. | |
""" | |
# Try to parse the JSON string | |
trimmed_string = json_string[json_string.index('{'):json_string.rindex('}') + 1] | |
try: | |
parsed_json = json.loads(trimmed_string) | |
return parsed_json | |
except json.JSONDecodeError as e: | |
return None | |
# {"error": "Parsed JSON is not a dictionary."} | |
return None | |
def json_to_pandas(dat_json, dat_schema = {'name':"", 'description':""}): | |
dat_df = pd.DataFrame([dat_schema]) | |
try: | |
dat_df = pd.DataFrame(dat_json) | |
except Exception as e: | |
dat_df = pd.DataFrame([dat_schema]) | |
# ValueError(f"Failed to parse LLM output as JSON: {e}\nOutput: {res}") | |
return dat_df | |
# In[19]: | |
from transformers import pipeline | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
def summarize_content(text): | |
summary = summarizer(text, max_length=350, min_length=40, do_sample=False) | |
return summary[0]['summary_text'] | |
# In[20]: | |
client = OpenAI( | |
api_key= os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted | |
) | |
# Send the prompt to the OpenAI API | |
def call_openai_api(prompt): | |
response = client.chat.completions.create( | |
model="gpt-4o", | |
messages=[{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": prompt}], | |
max_tokens=5000 | |
) | |
return response.choices[0].message.content.strip() | |
def fn_task_analysis(project_context, task_description): | |
prompt = ( | |
f"You are working in the context of {project_context}. " | |
f"Your task is to analyze the task and break down into reasoning steps: {task_description}" | |
"For analyzer, please analyze 1) which project this item belongs to. It's possible that the idea may be a personal reflection or random thoughts, not in an existing project." | |
"2) whether this idea is concrete todo or vague." | |
"3) what is the category of the task." | |
"Please output in JSON with description, project_association, is_concrete, task_category as keys." | |
) | |
return call_openai_api(prompt) | |
# Function to break down a task (e.g., Research Paper Review) and create a reasoning path | |
def generate_reasoning_path(project_context, task_description): | |
res_task_analysis = fn_task_analysis(project_context, task_description) | |
prompt = ( | |
f"You are working in the context of {project_context}. " | |
f"Your task is to analyze the task and break down into reasoning steps: {task_description}" | |
f"Please use the results of task analysis: {res_task_analysis}. " | |
"Guideline for breaking down the task: " | |
"If the task is to build some features, I would like to receive a prioritized design doc and Gradio-based feature demo as deliverable." | |
"If the task is related to some blogs, papers, talks, I would like you to find the exact reference, generate a summary and convert it to a podcast." | |
"If the message has some reflections about feelings or opinions, please translate to English, polish it and publish it onto substack." | |
"If the task is tool research, reason about if it is a competitive tool or an integration tool. For competitive tools, generate a table to compare the tool and our relevant tool. For integration tools, decide on possible integration." | |
"If the task is questionnaire or interview, please deliver a questionnaire design." | |
"If the message is pointing to some personal or project issues, please use the framework and write a brief memo: a) What Happened? — Understanding the Problem. b) What Can We Do About It? — Generating Solutions c) So What? — Evaluating the Impact and Moving Forward. " | |
"For idea brainstorming, I expect you to list potential ideas, construct the Feasibility Matrix or Impact/Effort Matrix, prioritize these ideas, setup an action plan with todos, build the prototype." | |
"Please output the action and priority of each step, you do not need to give explanation." | |
"Please ignore the low priority steps in the output." | |
"Please output the reasoning steps in JSON with reasoning_steps as key." | |
) | |
res_steps = call_openai_api(prompt) | |
#return res_task_analysis, res_steps | |
try: | |
json_task_analysis = validate_and_extract_json(res_task_analysis) | |
json_steps = validate_and_extract_json(res_steps) | |
return json_task_analysis, json_steps | |
except ValueError as e: | |
return None, None | |
# Function to store the reasoning path as JSON and use it for task execution | |
def store_and_execute_task(task_description, reasoning_path, json_key = 'reasoning_steps'): | |
if reasoning_path is not None and isinstance(reasoning_path, dict) and json_key in reasoning_path: | |
reasoning_steps = reasoning_path[json_key] | |
# Example logic to simulate execution (this is just a placeholder) | |
# for step in task_steps: | |
# step["status"] = "completed" # Mark as completed after execution | |
return reasoning_steps | |
return None | |
# In[21]: | |
# Initialize dataframes for the schema | |
ideas_df = pd.DataFrame(columns=["Idea ID", "Content", "Tags"]) | |
def extract_ideas(context, text): | |
""" | |
Extract project ideas from text, with or without a context, and return in JSON format. | |
Parameters: | |
context (str): Context of the extraction. Can be empty. | |
text (str): Text to extract ideas from. | |
Returns: | |
list: A list of ideas, each represented as a dictionary with name and description. | |
""" | |
if context: | |
# Template when context is provided | |
prompt = ( | |
f"You are working in the context of {context}. " | |
"Please extract the ongoing projects with project name and description." | |
"Please only the listed JSON as output string." | |
f"Ongoing projects: {text}" | |
) | |
else: | |
# Template when context is not provided | |
prompt = ( | |
"Given the following information about the user." | |
"Please extract the ongoing projects with project name and description." | |
"Please only the listed JSON as output string." | |
f"Ongoing projects: {text}" | |
) | |
# return the raw string | |
return call_openai_api(prompt) | |
def df_to_string(df, empty_message = ''): | |
""" | |
Converts a DataFrame to a string if it is not empty. | |
If the DataFrame is empty, returns an empty string. | |
Parameters: | |
ideas_df (pd.DataFrame): The DataFrame to be converted. | |
Returns: | |
str: A string representation of the DataFrame or an empty string. | |
""" | |
if df.empty: | |
return empty_message | |
else: | |
return df.to_string(index=False) | |
# In[22]: | |
def curify_ideas(project_description, task_description): | |
# May need a task split step that semantically splits the task. | |
str_projects = extract_ideas('AI-powered tools for productivity', project_description) | |
json_projects = convert_to_listed_json(str_projects) | |
# Generate reasoning path | |
task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description) | |
# Store and simulate execution of task | |
task_data = store_and_execute_task(task_description, reasoning_path) | |
return json_to_pandas(json_projects), json_to_pandas(task_data), task_analysis | |
# In[23]: | |
project_description = 'work on a number of projects including curify (digest, ideas, careers, projects etc), and writing a book on LLM for recommendation system, educating my 3.5-year-old boy and working on a paper for LLM reasoning.' | |
# convert_to_listed_json(extract_ideas('AI-powered tools for productivity', project_description)) | |
task_description = 'Build an interview bot for the curify digest project.' | |
task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description) | |
store_and_execute_task(task_description, reasoning_path) | |
# In[ ]: | |
reasoning_path | |
# In[ ]: | |
# Gradio Demo | |
with gr.Blocks( | |
css=""" | |
.gradio-table td { | |
white-space: normal !important; | |
word-wrap: break-word !important; | |
} | |
.gradio-table { | |
width: 100% !important; /* Adjust to 100% to fit the container */ | |
table-layout: fixed !important; /* Fixed column widths */ | |
overflow-x: hidden !important; /* Disable horizontal scrolling */ | |
} | |
.gradio-container { | |
overflow-x: hidden !important; /* Disable horizontal scroll for entire container */ | |
padding: 0 !important; /* Remove any default padding */ | |
} | |
.gradio-column { | |
max-width: 100% !important; /* Ensure columns take up full width */ | |
overflow: hidden !important; /* Hide overflow to prevent horizontal scroll */ | |
} | |
.gradio-row { | |
overflow-x: hidden !important; /* Prevent horizontal scroll on rows */ | |
} | |
""" | |
) as demo: | |
gr.Markdown("## Curify: Unified AI Tools for Productivity") | |
with gr.Tab("Curify Idea"): | |
with gr.Row(): | |
# Column 1: Webpage rendering | |
with gr.Column(): | |
gr.Markdown("## Enter project descriptions.") | |
project_input = gr.Textbox( | |
placeholder="Describe your project...", | |
label=None, | |
lines=5) | |
gr.Markdown("## Enter task message.") | |
idea_input = gr.Textbox( | |
label=None, | |
placeholder="Describe the task you want to execute (e.g., Research Paper Review)") | |
task_btn = gr.Button("Generating task steps...") | |
gr.Markdown("## Projects Overview") | |
project_list = gr.DataFrame( | |
type="pandas" | |
) | |
# Column 2: Summary and Perspectives | |
with gr.Column(): | |
gr.Markdown("## Task analysis") | |
task_analysis_txt = gr.Textbox( | |
label=None, | |
placeholder="Here is an analysis of your task...", | |
lines=3) | |
gr.Markdown("## Execution path") | |
task_steps = gr.DataFrame( | |
type="pandas" | |
) | |
task_btn.click( | |
curify_ideas, | |
inputs=[project_input, idea_input], | |
outputs=[project_list, task_steps, task_analysis_txt] | |
) | |
demo.launch(share=True) | |
# In[ ]: | |