Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +386 -0
- curify_api.yaml +17 -0
app.py
ADDED
@@ -0,0 +1,386 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
# coding: utf-8
|
3 |
+
|
4 |
+
# In[14]:
|
5 |
+
|
6 |
+
|
7 |
+
pip install gradio newspaper3k transformers sentence-transformers openai lxml_html_clean difflib
|
8 |
+
|
9 |
+
|
10 |
+
# In[15]:
|
11 |
+
|
12 |
+
|
13 |
+
get_ipython().system('pip show openai')
|
14 |
+
get_ipython().system('pip uninstall newspaper3k -y')
|
15 |
+
get_ipython().system('pip install newspaper3k')
|
16 |
+
|
17 |
+
|
18 |
+
# In[16]:
|
19 |
+
|
20 |
+
|
21 |
+
import os
|
22 |
+
import yaml
|
23 |
+
import pandas as pd
|
24 |
+
import numpy as np
|
25 |
+
|
26 |
+
import azureml.core
|
27 |
+
from azureml.core import Workspace, Datastore, ComputeTarget
|
28 |
+
from azure.identity import DefaultAzureCredential
|
29 |
+
from azure.ai.ml import MLClient
|
30 |
+
from azure.ai.ml import command
|
31 |
+
from azure.ai.ml import Input, Output
|
32 |
+
from azure.ai.ml import load_component
|
33 |
+
from azure.ai.ml.entities import Environment, Data, PipelineJob, Job, Schedule
|
34 |
+
from datetime import datetime, timedelta
|
35 |
+
|
36 |
+
# perspective generation
|
37 |
+
import openai
|
38 |
+
import os
|
39 |
+
from openai import OpenAI
|
40 |
+
|
41 |
+
from newspaper import Article
|
42 |
+
import gradio as gr
|
43 |
+
|
44 |
+
import json
|
45 |
+
|
46 |
+
import difflib
|
47 |
+
|
48 |
+
|
49 |
+
# In[17]:
|
50 |
+
|
51 |
+
|
52 |
+
# Read the YAML file
|
53 |
+
with open('./curify_api.yaml', 'r') as yaml_file:
|
54 |
+
data = yaml.safe_load(yaml_file)
|
55 |
+
|
56 |
+
# Access the API keys and other configuration data
|
57 |
+
weaviate_url = data.get('weaviate').get('url')
|
58 |
+
weaviate_api_key = data.get('weaviate').get('api_key')
|
59 |
+
cohere_api_key = data.get('cohere').get('api_key')
|
60 |
+
openai_api_key = data.get('openai').get('api_key')
|
61 |
+
serper_api_key = data.get('serper').get('api_key')
|
62 |
+
|
63 |
+
os.environ["OPENAI_API_KEY"] = openai_api_key
|
64 |
+
os.environ["SERPER_API_KEY"] = serper_api_key
|
65 |
+
SUBSCRIPTION = data.get('azure').get('subscription_id')
|
66 |
+
RESOURCE_GROUP = data.get('azure').get('resource_group_name')
|
67 |
+
WS_NAME = data.get('azure').get('workspace_name')
|
68 |
+
|
69 |
+
|
70 |
+
# In[18]:
|
71 |
+
|
72 |
+
|
73 |
+
def convert_to_listed_json(input_string):
|
74 |
+
"""
|
75 |
+
Converts a string to a listed JSON object.
|
76 |
+
|
77 |
+
Parameters:
|
78 |
+
input_string (str): The JSON-like string to be converted.
|
79 |
+
|
80 |
+
Returns:
|
81 |
+
list: A JSON object parsed into a Python list of dictionaries.
|
82 |
+
"""
|
83 |
+
try:
|
84 |
+
# Parse the string into a Python object
|
85 |
+
trimmed_string = input_string[input_string.index('['):input_string.rindex(']') + 1]
|
86 |
+
|
87 |
+
json_object = json.loads(trimmed_string)
|
88 |
+
return json_object
|
89 |
+
except json.JSONDecodeError as e:
|
90 |
+
return None
|
91 |
+
|
92 |
+
return None
|
93 |
+
#raise ValueError(f"Invalid JSON format: {e}")
|
94 |
+
|
95 |
+
|
96 |
+
def validate_and_extract_json(json_string):
|
97 |
+
"""
|
98 |
+
Validates the JSON string, extracts fields with possible variants using fuzzy matching.
|
99 |
+
|
100 |
+
Args:
|
101 |
+
- json_string (str): The JSON string to validate and extract from.
|
102 |
+
- field_names (list): List of field names to extract, with possible variants.
|
103 |
+
|
104 |
+
Returns:
|
105 |
+
- dict: Extracted values with the best matched field names.
|
106 |
+
"""
|
107 |
+
# Try to parse the JSON string
|
108 |
+
trimmed_string = json_string[json_string.index('{'):json_string.rindex('}') + 1]
|
109 |
+
try:
|
110 |
+
parsed_json = json.loads(trimmed_string)
|
111 |
+
return parsed_json
|
112 |
+
except json.JSONDecodeError as e:
|
113 |
+
return None
|
114 |
+
|
115 |
+
# {"error": "Parsed JSON is not a dictionary."}
|
116 |
+
return None
|
117 |
+
|
118 |
+
def json_to_pandas(dat_json, dat_schema = {'name':"", 'description':""}):
|
119 |
+
dat_df = pd.DataFrame([dat_schema])
|
120 |
+
try:
|
121 |
+
dat_df = pd.DataFrame(dat_json)
|
122 |
+
|
123 |
+
except Exception as e:
|
124 |
+
dat_df = pd.DataFrame([dat_schema])
|
125 |
+
# ValueError(f"Failed to parse LLM output as JSON: {e}\nOutput: {res}")
|
126 |
+
return dat_df
|
127 |
+
|
128 |
+
|
129 |
+
# In[19]:
|
130 |
+
|
131 |
+
|
132 |
+
from transformers import pipeline
|
133 |
+
|
134 |
+
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
135 |
+
|
136 |
+
def summarize_content(text):
|
137 |
+
summary = summarizer(text, max_length=350, min_length=40, do_sample=False)
|
138 |
+
return summary[0]['summary_text']
|
139 |
+
|
140 |
+
|
141 |
+
# In[20]:
|
142 |
+
|
143 |
+
|
144 |
+
client = OpenAI(
|
145 |
+
api_key= os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted
|
146 |
+
)
|
147 |
+
|
148 |
+
# Send the prompt to the OpenAI API
|
149 |
+
def call_openai_api(prompt):
|
150 |
+
response = client.chat.completions.create(
|
151 |
+
model="gpt-4o",
|
152 |
+
messages=[{"role": "system", "content": "You are a helpful assistant."},
|
153 |
+
{"role": "user", "content": prompt}],
|
154 |
+
max_tokens=5000
|
155 |
+
)
|
156 |
+
|
157 |
+
return response.choices[0].message.content.strip()
|
158 |
+
|
159 |
+
def fn_task_analysis(project_context, task_description):
|
160 |
+
prompt = (
|
161 |
+
f"You are working in the context of {project_context}. "
|
162 |
+
f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
|
163 |
+
"For analyzer, please analyze 1) which project this item belongs to. It's possible that the idea may be a personal reflection or random thoughts, not in an existing project."
|
164 |
+
"2) whether this idea is concrete todo or vague."
|
165 |
+
"3) what is the category of the task."
|
166 |
+
"Please output in JSON with description, project_association, is_concrete, task_category as keys."
|
167 |
+
)
|
168 |
+
return call_openai_api(prompt)
|
169 |
+
|
170 |
+
# Function to break down a task (e.g., Research Paper Review) and create a reasoning path
|
171 |
+
def generate_reasoning_path(project_context, task_description):
|
172 |
+
res_task_analysis = fn_task_analysis(project_context, task_description)
|
173 |
+
prompt = (
|
174 |
+
f"You are working in the context of {project_context}. "
|
175 |
+
f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
|
176 |
+
f"Please use the results of task analysis: {res_task_analysis}. "
|
177 |
+
"Guideline for breaking down the task: "
|
178 |
+
"If the task is to build some features, I would like to receive a prioritized design doc and Gradio-based feature demo as deliverable."
|
179 |
+
"If the task is related to some blogs, papers, talks, I would like you to find the exact reference, generate a summary and convert it to a podcast."
|
180 |
+
"If the message has some reflections about feelings or opinions, please translate to English, polish it and publish it onto substack."
|
181 |
+
"If the task is tool research, reason about if it is a competitive tool or an integration tool. For competitive tools, generate a table to compare the tool and our relevant tool. For integration tools, decide on possible integration."
|
182 |
+
"If the task is questionnaire or interview, please deliver a questionnaire design."
|
183 |
+
"If the message is pointing to some personal or project issues, please use the framework and write a brief memo: a) What Happened? — Understanding the Problem. b) What Can We Do About It? — Generating Solutions c) So What? — Evaluating the Impact and Moving Forward. "
|
184 |
+
"For idea brainstorming, I expect you to list potential ideas, construct the Feasibility Matrix or Impact/Effort Matrix, prioritize these ideas, setup an action plan with todos, build the prototype."
|
185 |
+
"Please output the action and priority of each step, you do not need to give explanation."
|
186 |
+
"Please ignore the low priority steps in the output."
|
187 |
+
"Please output the reasoning steps in JSON with reasoning_steps as key."
|
188 |
+
)
|
189 |
+
|
190 |
+
res_steps = call_openai_api(prompt)
|
191 |
+
#return res_task_analysis, res_steps
|
192 |
+
|
193 |
+
try:
|
194 |
+
json_task_analysis = validate_and_extract_json(res_task_analysis)
|
195 |
+
json_steps = validate_and_extract_json(res_steps)
|
196 |
+
|
197 |
+
return json_task_analysis, json_steps
|
198 |
+
except ValueError as e:
|
199 |
+
return None, None
|
200 |
+
|
201 |
+
# Function to store the reasoning path as JSON and use it for task execution
|
202 |
+
def store_and_execute_task(task_description, reasoning_path, json_key = 'reasoning_steps'):
|
203 |
+
if reasoning_path is not None and isinstance(reasoning_path, dict) and json_key in reasoning_path:
|
204 |
+
|
205 |
+
reasoning_steps = reasoning_path[json_key]
|
206 |
+
# Example logic to simulate execution (this is just a placeholder)
|
207 |
+
# for step in task_steps:
|
208 |
+
# step["status"] = "completed" # Mark as completed after execution
|
209 |
+
|
210 |
+
return reasoning_steps
|
211 |
+
return None
|
212 |
+
|
213 |
+
|
214 |
+
# In[21]:
|
215 |
+
|
216 |
+
|
217 |
+
# Initialize dataframes for the schema
|
218 |
+
ideas_df = pd.DataFrame(columns=["Idea ID", "Content", "Tags"])
|
219 |
+
|
220 |
+
def extract_ideas(context, text):
|
221 |
+
"""
|
222 |
+
Extract project ideas from text, with or without a context, and return in JSON format.
|
223 |
+
|
224 |
+
Parameters:
|
225 |
+
context (str): Context of the extraction. Can be empty.
|
226 |
+
text (str): Text to extract ideas from.
|
227 |
+
|
228 |
+
Returns:
|
229 |
+
list: A list of ideas, each represented as a dictionary with name and description.
|
230 |
+
"""
|
231 |
+
if context:
|
232 |
+
# Template when context is provided
|
233 |
+
prompt = (
|
234 |
+
f"You are working in the context of {context}. "
|
235 |
+
"Please extract the ongoing projects with project name and description."
|
236 |
+
"Please only the listed JSON as output string."
|
237 |
+
f"Ongoing projects: {text}"
|
238 |
+
)
|
239 |
+
else:
|
240 |
+
# Template when context is not provided
|
241 |
+
prompt = (
|
242 |
+
"Given the following information about the user."
|
243 |
+
"Please extract the ongoing projects with project name and description."
|
244 |
+
"Please only the listed JSON as output string."
|
245 |
+
f"Ongoing projects: {text}"
|
246 |
+
)
|
247 |
+
|
248 |
+
# return the raw string
|
249 |
+
return call_openai_api(prompt)
|
250 |
+
|
251 |
+
def df_to_string(df, empty_message = ''):
|
252 |
+
"""
|
253 |
+
Converts a DataFrame to a string if it is not empty.
|
254 |
+
If the DataFrame is empty, returns an empty string.
|
255 |
+
|
256 |
+
Parameters:
|
257 |
+
ideas_df (pd.DataFrame): The DataFrame to be converted.
|
258 |
+
|
259 |
+
Returns:
|
260 |
+
str: A string representation of the DataFrame or an empty string.
|
261 |
+
"""
|
262 |
+
if df.empty:
|
263 |
+
return empty_message
|
264 |
+
else:
|
265 |
+
return df.to_string(index=False)
|
266 |
+
|
267 |
+
|
268 |
+
# In[22]:
|
269 |
+
|
270 |
+
|
271 |
+
def curify_ideas(project_description, task_description):
|
272 |
+
|
273 |
+
# May need a task split step that semantically splits the task.
|
274 |
+
|
275 |
+
str_projects = extract_ideas('AI-powered tools for productivity', project_description)
|
276 |
+
json_projects = convert_to_listed_json(str_projects)
|
277 |
+
|
278 |
+
# Generate reasoning path
|
279 |
+
task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)
|
280 |
+
|
281 |
+
# Store and simulate execution of task
|
282 |
+
task_data = store_and_execute_task(task_description, reasoning_path)
|
283 |
+
|
284 |
+
return json_to_pandas(json_projects), json_to_pandas(task_data), task_analysis
|
285 |
+
|
286 |
+
|
287 |
+
# In[23]:
|
288 |
+
|
289 |
+
|
290 |
+
project_description = 'work on a number of projects including curify (digest, ideas, careers, projects etc), and writing a book on LLM for recommendation system, educating my 3.5-year-old boy and working on a paper for LLM reasoning.'
|
291 |
+
|
292 |
+
# convert_to_listed_json(extract_ideas('AI-powered tools for productivity', project_description))
|
293 |
+
|
294 |
+
task_description = 'Build an interview bot for the curify digest project.'
|
295 |
+
task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)
|
296 |
+
|
297 |
+
store_and_execute_task(task_description, reasoning_path)
|
298 |
+
|
299 |
+
|
300 |
+
# In[ ]:
|
301 |
+
|
302 |
+
|
303 |
+
reasoning_path
|
304 |
+
|
305 |
+
|
306 |
+
# In[ ]:
|
307 |
+
|
308 |
+
|
309 |
+
# Gradio Demo
|
310 |
+
with gr.Blocks(
|
311 |
+
css="""
|
312 |
+
.gradio-table td {
|
313 |
+
white-space: normal !important;
|
314 |
+
word-wrap: break-word !important;
|
315 |
+
}
|
316 |
+
.gradio-table {
|
317 |
+
width: 100% !important; /* Adjust to 100% to fit the container */
|
318 |
+
table-layout: fixed !important; /* Fixed column widths */
|
319 |
+
overflow-x: hidden !important; /* Disable horizontal scrolling */
|
320 |
+
}
|
321 |
+
.gradio-container {
|
322 |
+
overflow-x: hidden !important; /* Disable horizontal scroll for entire container */
|
323 |
+
padding: 0 !important; /* Remove any default padding */
|
324 |
+
}
|
325 |
+
.gradio-column {
|
326 |
+
max-width: 100% !important; /* Ensure columns take up full width */
|
327 |
+
overflow: hidden !important; /* Hide overflow to prevent horizontal scroll */
|
328 |
+
}
|
329 |
+
.gradio-row {
|
330 |
+
overflow-x: hidden !important; /* Prevent horizontal scroll on rows */
|
331 |
+
}
|
332 |
+
"""
|
333 |
+
) as demo:
|
334 |
+
|
335 |
+
gr.Markdown("## Curify: Unified AI Tools for Productivity")
|
336 |
+
|
337 |
+
with gr.Tab("Curify Idea"):
|
338 |
+
with gr.Row():
|
339 |
+
# Column 1: Webpage rendering
|
340 |
+
with gr.Column():
|
341 |
+
gr.Markdown("## Enter project descriptions.")
|
342 |
+
|
343 |
+
project_input = gr.Textbox(
|
344 |
+
placeholder="Describe your project...",
|
345 |
+
label=None,
|
346 |
+
lines=5)
|
347 |
+
|
348 |
+
gr.Markdown("## Enter task message.")
|
349 |
+
idea_input = gr.Textbox(
|
350 |
+
label=None,
|
351 |
+
placeholder="Describe the task you want to execute (e.g., Research Paper Review)")
|
352 |
+
|
353 |
+
task_btn = gr.Button("Generating task steps...")
|
354 |
+
|
355 |
+
gr.Markdown("## Projects Overview")
|
356 |
+
project_list = gr.DataFrame(
|
357 |
+
type="pandas"
|
358 |
+
)
|
359 |
+
|
360 |
+
# Column 2: Summary and Perspectives
|
361 |
+
with gr.Column():
|
362 |
+
gr.Markdown("## Task analysis")
|
363 |
+
task_analysis_txt = gr.Textbox(
|
364 |
+
label=None,
|
365 |
+
placeholder="Here is an analysis of your task...",
|
366 |
+
lines=3)
|
367 |
+
|
368 |
+
gr.Markdown("## Execution path")
|
369 |
+
task_steps = gr.DataFrame(
|
370 |
+
type="pandas"
|
371 |
+
)
|
372 |
+
|
373 |
+
task_btn.click(
|
374 |
+
curify_ideas,
|
375 |
+
inputs=[project_input, idea_input],
|
376 |
+
outputs=[project_list, task_steps, task_analysis_txt]
|
377 |
+
)
|
378 |
+
|
379 |
+
demo.launch(share=True)
|
380 |
+
|
381 |
+
|
382 |
+
# In[ ]:
|
383 |
+
|
384 |
+
|
385 |
+
|
386 |
+
|
curify_api.yaml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
weaviate:
|
2 |
+
url: "https://grpc-ddyazhrqhkunrgfdsd7pg.c0.us-west3.gcp.weaviate.cloud/"
|
3 |
+
api_key: "VIIzm0iRMIesfH9CsWdyRXD0YZnNKKBwZnDf"
|
4 |
+
|
5 |
+
cohere:
|
6 |
+
api_key: "dAAGflqk0F0YaimLIQECZOrAXKuTUGU0rHkFs3Bu"
|
7 |
+
|
8 |
+
openai:
|
9 |
+
api_key: "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"
|
10 |
+
|
11 |
+
serper:
|
12 |
+
api_key: "6d85d4591e052751b8ba10a9568985ebc92fa234"
|
13 |
+
|
14 |
+
azure:
|
15 |
+
subscription_id: "541beb67-718e-41c5-958e-8cc0ba95b210"
|
16 |
+
resource_group_name: "awesome_rag_dev"
|
17 |
+
workspace_name: "rag_book_demo"
|