Spaces:

Curify
/

Ideas

Sleeping

App Files Files Community

Ideas / app.py

qqwjq1981

Upload 2 files

d6fb2ea verified 5 months ago

raw

history blame

13.2 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[14]:


	pip install gradio newspaper3k transformers sentence-transformers openai lxml_html_clean difflib


	# In[15]:


	get_ipython().system('pip show openai')
	get_ipython().system('pip uninstall newspaper3k -y')
	get_ipython().system('pip install newspaper3k')


	# In[16]:


	import os
	import yaml
	import pandas as pd
	import numpy as np

	import azureml.core
	from azureml.core import Workspace, Datastore, ComputeTarget
	from azure.identity import DefaultAzureCredential
	from azure.ai.ml import MLClient
	from azure.ai.ml import command
	from azure.ai.ml import Input, Output
	from azure.ai.ml import load_component
	from azure.ai.ml.entities import Environment, Data, PipelineJob, Job, Schedule
	from datetime import datetime, timedelta

	# perspective generation
	import openai
	import os
	from openai import OpenAI

	from newspaper import Article
	import gradio as gr

	import json

	import difflib


	# In[17]:


	# Read the YAML file
	with open('./curify_api.yaml', 'r') as yaml_file:
	data = yaml.safe_load(yaml_file)

	# Access the API keys and other configuration data
	weaviate_url = data.get('weaviate').get('url')
	weaviate_api_key = data.get('weaviate').get('api_key')
	cohere_api_key = data.get('cohere').get('api_key')
	openai_api_key = data.get('openai').get('api_key')
	serper_api_key = data.get('serper').get('api_key')

	os.environ["OPENAI_API_KEY"] = openai_api_key
	os.environ["SERPER_API_KEY"] = serper_api_key
	SUBSCRIPTION = data.get('azure').get('subscription_id')
	RESOURCE_GROUP = data.get('azure').get('resource_group_name')
	WS_NAME = data.get('azure').get('workspace_name')


	# In[18]:


	def convert_to_listed_json(input_string):
	"""
	Converts a string to a listed JSON object.

	Parameters:
	input_string (str): The JSON-like string to be converted.

	Returns:
	list: A JSON object parsed into a Python list of dictionaries.
	"""
	try:
	# Parse the string into a Python object
	trimmed_string = input_string[input_string.index('['):input_string.rindex(']') + 1]

	json_object = json.loads(trimmed_string)
	return json_object
	except json.JSONDecodeError as e:
	return None

	return None
	#raise ValueError(f"Invalid JSON format: {e}")


	def validate_and_extract_json(json_string):
	"""
	Validates the JSON string, extracts fields with possible variants using fuzzy matching.

	Args:
	- json_string (str): The JSON string to validate and extract from.
	- field_names (list): List of field names to extract, with possible variants.

	Returns:
	- dict: Extracted values with the best matched field names.
	"""
	# Try to parse the JSON string
	trimmed_string = json_string[json_string.index('{'):json_string.rindex('}') + 1]
	try:
	parsed_json = json.loads(trimmed_string)
	return parsed_json
	except json.JSONDecodeError as e:
	return None

	# {"error": "Parsed JSON is not a dictionary."}
	return None

	def json_to_pandas(dat_json, dat_schema = {'name':"", 'description':""}):
	dat_df = pd.DataFrame([dat_schema])
	try:
	dat_df = pd.DataFrame(dat_json)

	except Exception as e:
	dat_df = pd.DataFrame([dat_schema])
	# ValueError(f"Failed to parse LLM output as JSON: {e}\nOutput: {res}")
	return dat_df


	# In[19]:


	from transformers import pipeline

	summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

	def summarize_content(text):
	summary = summarizer(text, max_length=350, min_length=40, do_sample=False)
	return summary[0]['summary_text']


	# In[20]:


	client = OpenAI(
	api_key= os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted
	)

	# Send the prompt to the OpenAI API
	def call_openai_api(prompt):
	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}],
	max_tokens=5000
	)

	return response.choices[0].message.content.strip()

	def fn_task_analysis(project_context, task_description):
	prompt = (
	f"You are working in the context of {project_context}. "
	f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
	"For analyzer, please analyze 1) which project this item belongs to. It's possible that the idea may be a personal reflection or random thoughts, not in an existing project."
	"2) whether this idea is concrete todo or vague."
	"3) what is the category of the task."
	"Please output in JSON with description, project_association, is_concrete, task_category as keys."
	)
	return call_openai_api(prompt)

	# Function to break down a task (e.g., Research Paper Review) and create a reasoning path
	def generate_reasoning_path(project_context, task_description):
	res_task_analysis = fn_task_analysis(project_context, task_description)
	prompt = (
	f"You are working in the context of {project_context}. "
	f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
	f"Please use the results of task analysis: {res_task_analysis}. "
	"Guideline for breaking down the task: "
	"If the task is to build some features, I would like to receive a prioritized design doc and Gradio-based feature demo as deliverable."
	"If the task is related to some blogs, papers, talks, I would like you to find the exact reference, generate a summary and convert it to a podcast."
	"If the message has some reflections about feelings or opinions, please translate to English, polish it and publish it onto substack."
	"If the task is tool research, reason about if it is a competitive tool or an integration tool. For competitive tools, generate a table to compare the tool and our relevant tool. For integration tools, decide on possible integration."
	"If the task is questionnaire or interview, please deliver a questionnaire design."
	"If the message is pointing to some personal or project issues, please use the framework and write a brief memo: a) What Happened? — Understanding the Problem. b) What Can We Do About It? — Generating Solutions c) So What? — Evaluating the Impact and Moving Forward. "
	"For idea brainstorming, I expect you to list potential ideas, construct the Feasibility Matrix or Impact/Effort Matrix, prioritize these ideas, setup an action plan with todos, build the prototype."
	"Please output the action and priority of each step, you do not need to give explanation."
	"Please ignore the low priority steps in the output."
	"Please output the reasoning steps in JSON with reasoning_steps as key."
	)

	res_steps = call_openai_api(prompt)
	#return res_task_analysis, res_steps

	try:
	json_task_analysis = validate_and_extract_json(res_task_analysis)
	json_steps = validate_and_extract_json(res_steps)

	return json_task_analysis, json_steps
	except ValueError as e:
	return None, None

	# Function to store the reasoning path as JSON and use it for task execution
	def store_and_execute_task(task_description, reasoning_path, json_key = 'reasoning_steps'):
	if reasoning_path is not None and isinstance(reasoning_path, dict) and json_key in reasoning_path:

	reasoning_steps = reasoning_path[json_key]
	# Example logic to simulate execution (this is just a placeholder)
	# for step in task_steps:
	# step["status"] = "completed" # Mark as completed after execution

	return reasoning_steps
	return None


	# In[21]:


	# Initialize dataframes for the schema
	ideas_df = pd.DataFrame(columns=["Idea ID", "Content", "Tags"])

	def extract_ideas(context, text):
	"""
	Extract project ideas from text, with or without a context, and return in JSON format.

	Parameters:
	context (str): Context of the extraction. Can be empty.
	text (str): Text to extract ideas from.

	Returns:
	list: A list of ideas, each represented as a dictionary with name and description.
	"""
	if context:
	# Template when context is provided
	prompt = (
	f"You are working in the context of {context}. "
	"Please extract the ongoing projects with project name and description."
	"Please only the listed JSON as output string."
	f"Ongoing projects: {text}"
	)
	else:
	# Template when context is not provided
	prompt = (
	"Given the following information about the user."
	"Please extract the ongoing projects with project name and description."
	"Please only the listed JSON as output string."
	f"Ongoing projects: {text}"
	)

	# return the raw string
	return call_openai_api(prompt)

	def df_to_string(df, empty_message = ''):
	"""
	Converts a DataFrame to a string if it is not empty.
	If the DataFrame is empty, returns an empty string.

	Parameters:
	ideas_df (pd.DataFrame): The DataFrame to be converted.

	Returns:
	str: A string representation of the DataFrame or an empty string.
	"""
	if df.empty:
	return empty_message
	else:
	return df.to_string(index=False)


	# In[22]:


	def curify_ideas(project_description, task_description):

	# May need a task split step that semantically splits the task.

	str_projects = extract_ideas('AI-powered tools for productivity', project_description)
	json_projects = convert_to_listed_json(str_projects)

	# Generate reasoning path
	task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)

	# Store and simulate execution of task
	task_data = store_and_execute_task(task_description, reasoning_path)

	return json_to_pandas(json_projects), json_to_pandas(task_data), task_analysis


	# In[23]:


	project_description = 'work on a number of projects including curify (digest, ideas, careers, projects etc), and writing a book on LLM for recommendation system, educating my 3.5-year-old boy and working on a paper for LLM reasoning.'

	# convert_to_listed_json(extract_ideas('AI-powered tools for productivity', project_description))

	task_description = 'Build an interview bot for the curify digest project.'
	task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)

	store_and_execute_task(task_description, reasoning_path)


	# In[ ]:


	reasoning_path


	# In[ ]:


	# Gradio Demo
	with gr.Blocks(
	css="""
	.gradio-table td {
	white-space: normal !important;
	word-wrap: break-word !important;
	}
	.gradio-table {
	width: 100% !important; /* Adjust to 100% to fit the container */
	table-layout: fixed !important; /* Fixed column widths */
	overflow-x: hidden !important; /* Disable horizontal scrolling */
	}
	.gradio-container {
	overflow-x: hidden !important; /* Disable horizontal scroll for entire container */
	padding: 0 !important; /* Remove any default padding */
	}
	.gradio-column {
	max-width: 100% !important; /* Ensure columns take up full width */
	overflow: hidden !important; /* Hide overflow to prevent horizontal scroll */
	}
	.gradio-row {
	overflow-x: hidden !important; /* Prevent horizontal scroll on rows */
	}
	"""
	) as demo:

	gr.Markdown("## Curify: Unified AI Tools for Productivity")

	with gr.Tab("Curify Idea"):
	with gr.Row():
	# Column 1: Webpage rendering
	with gr.Column():
	gr.Markdown("## Enter project descriptions.")

	project_input = gr.Textbox(
	placeholder="Describe your project...",
	label=None,
	lines=5)

	gr.Markdown("## Enter task message.")
	idea_input = gr.Textbox(
	label=None,
	placeholder="Describe the task you want to execute (e.g., Research Paper Review)")

	task_btn = gr.Button("Generating task steps...")

	gr.Markdown("## Projects Overview")
	project_list = gr.DataFrame(
	type="pandas"
	)

	# Column 2: Summary and Perspectives
	with gr.Column():
	gr.Markdown("## Task analysis")
	task_analysis_txt = gr.Textbox(
	label=None,
	placeholder="Here is an analysis of your task...",
	lines=3)

	gr.Markdown("## Execution path")
	task_steps = gr.DataFrame(
	type="pandas"
	)

	task_btn.click(
	curify_ideas,
	inputs=[project_input, idea_input],
	outputs=[project_list, task_steps, task_analysis_txt]
	)

	demo.launch(share=True)


	# In[ ]: