qqwjq1981 commited on
Commit
d6fb2ea
·
verified ·
1 Parent(s): eb6ea7f

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +386 -0
  2. curify_api.yaml +17 -0
app.py ADDED
@@ -0,0 +1,386 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # coding: utf-8
3
+
4
+ # In[14]:
5
+
6
+
7
+ pip install gradio newspaper3k transformers sentence-transformers openai lxml_html_clean difflib
8
+
9
+
10
+ # In[15]:
11
+
12
+
13
+ get_ipython().system('pip show openai')
14
+ get_ipython().system('pip uninstall newspaper3k -y')
15
+ get_ipython().system('pip install newspaper3k')
16
+
17
+
18
+ # In[16]:
19
+
20
+
21
+ import os
22
+ import yaml
23
+ import pandas as pd
24
+ import numpy as np
25
+
26
+ import azureml.core
27
+ from azureml.core import Workspace, Datastore, ComputeTarget
28
+ from azure.identity import DefaultAzureCredential
29
+ from azure.ai.ml import MLClient
30
+ from azure.ai.ml import command
31
+ from azure.ai.ml import Input, Output
32
+ from azure.ai.ml import load_component
33
+ from azure.ai.ml.entities import Environment, Data, PipelineJob, Job, Schedule
34
+ from datetime import datetime, timedelta
35
+
36
+ # perspective generation
37
+ import openai
38
+ import os
39
+ from openai import OpenAI
40
+
41
+ from newspaper import Article
42
+ import gradio as gr
43
+
44
+ import json
45
+
46
+ import difflib
47
+
48
+
49
+ # In[17]:
50
+
51
+
52
+ # Read the YAML file
53
+ with open('./curify_api.yaml', 'r') as yaml_file:
54
+ data = yaml.safe_load(yaml_file)
55
+
56
+ # Access the API keys and other configuration data
57
+ weaviate_url = data.get('weaviate').get('url')
58
+ weaviate_api_key = data.get('weaviate').get('api_key')
59
+ cohere_api_key = data.get('cohere').get('api_key')
60
+ openai_api_key = data.get('openai').get('api_key')
61
+ serper_api_key = data.get('serper').get('api_key')
62
+
63
+ os.environ["OPENAI_API_KEY"] = openai_api_key
64
+ os.environ["SERPER_API_KEY"] = serper_api_key
65
+ SUBSCRIPTION = data.get('azure').get('subscription_id')
66
+ RESOURCE_GROUP = data.get('azure').get('resource_group_name')
67
+ WS_NAME = data.get('azure').get('workspace_name')
68
+
69
+
70
+ # In[18]:
71
+
72
+
73
+ def convert_to_listed_json(input_string):
74
+ """
75
+ Converts a string to a listed JSON object.
76
+
77
+ Parameters:
78
+ input_string (str): The JSON-like string to be converted.
79
+
80
+ Returns:
81
+ list: A JSON object parsed into a Python list of dictionaries.
82
+ """
83
+ try:
84
+ # Parse the string into a Python object
85
+ trimmed_string = input_string[input_string.index('['):input_string.rindex(']') + 1]
86
+
87
+ json_object = json.loads(trimmed_string)
88
+ return json_object
89
+ except json.JSONDecodeError as e:
90
+ return None
91
+
92
+ return None
93
+ #raise ValueError(f"Invalid JSON format: {e}")
94
+
95
+
96
+ def validate_and_extract_json(json_string):
97
+ """
98
+ Validates the JSON string, extracts fields with possible variants using fuzzy matching.
99
+
100
+ Args:
101
+ - json_string (str): The JSON string to validate and extract from.
102
+ - field_names (list): List of field names to extract, with possible variants.
103
+
104
+ Returns:
105
+ - dict: Extracted values with the best matched field names.
106
+ """
107
+ # Try to parse the JSON string
108
+ trimmed_string = json_string[json_string.index('{'):json_string.rindex('}') + 1]
109
+ try:
110
+ parsed_json = json.loads(trimmed_string)
111
+ return parsed_json
112
+ except json.JSONDecodeError as e:
113
+ return None
114
+
115
+ # {"error": "Parsed JSON is not a dictionary."}
116
+ return None
117
+
118
+ def json_to_pandas(dat_json, dat_schema = {'name':"", 'description':""}):
119
+ dat_df = pd.DataFrame([dat_schema])
120
+ try:
121
+ dat_df = pd.DataFrame(dat_json)
122
+
123
+ except Exception as e:
124
+ dat_df = pd.DataFrame([dat_schema])
125
+ # ValueError(f"Failed to parse LLM output as JSON: {e}\nOutput: {res}")
126
+ return dat_df
127
+
128
+
129
+ # In[19]:
130
+
131
+
132
+ from transformers import pipeline
133
+
134
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
135
+
136
+ def summarize_content(text):
137
+ summary = summarizer(text, max_length=350, min_length=40, do_sample=False)
138
+ return summary[0]['summary_text']
139
+
140
+
141
+ # In[20]:
142
+
143
+
144
+ client = OpenAI(
145
+ api_key= os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted
146
+ )
147
+
148
+ # Send the prompt to the OpenAI API
149
+ def call_openai_api(prompt):
150
+ response = client.chat.completions.create(
151
+ model="gpt-4o",
152
+ messages=[{"role": "system", "content": "You are a helpful assistant."},
153
+ {"role": "user", "content": prompt}],
154
+ max_tokens=5000
155
+ )
156
+
157
+ return response.choices[0].message.content.strip()
158
+
159
+ def fn_task_analysis(project_context, task_description):
160
+ prompt = (
161
+ f"You are working in the context of {project_context}. "
162
+ f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
163
+ "For analyzer, please analyze 1) which project this item belongs to. It's possible that the idea may be a personal reflection or random thoughts, not in an existing project."
164
+ "2) whether this idea is concrete todo or vague."
165
+ "3) what is the category of the task."
166
+ "Please output in JSON with description, project_association, is_concrete, task_category as keys."
167
+ )
168
+ return call_openai_api(prompt)
169
+
170
+ # Function to break down a task (e.g., Research Paper Review) and create a reasoning path
171
+ def generate_reasoning_path(project_context, task_description):
172
+ res_task_analysis = fn_task_analysis(project_context, task_description)
173
+ prompt = (
174
+ f"You are working in the context of {project_context}. "
175
+ f"Your task is to analyze the task and break down into reasoning steps: {task_description}"
176
+ f"Please use the results of task analysis: {res_task_analysis}. "
177
+ "Guideline for breaking down the task: "
178
+ "If the task is to build some features, I would like to receive a prioritized design doc and Gradio-based feature demo as deliverable."
179
+ "If the task is related to some blogs, papers, talks, I would like you to find the exact reference, generate a summary and convert it to a podcast."
180
+ "If the message has some reflections about feelings or opinions, please translate to English, polish it and publish it onto substack."
181
+ "If the task is tool research, reason about if it is a competitive tool or an integration tool. For competitive tools, generate a table to compare the tool and our relevant tool. For integration tools, decide on possible integration."
182
+ "If the task is questionnaire or interview, please deliver a questionnaire design."
183
+ "If the message is pointing to some personal or project issues, please use the framework and write a brief memo: a) What Happened? — Understanding the Problem. b) What Can We Do About It? — Generating Solutions c) So What? — Evaluating the Impact and Moving Forward. "
184
+ "For idea brainstorming, I expect you to list potential ideas, construct the Feasibility Matrix or Impact/Effort Matrix, prioritize these ideas, setup an action plan with todos, build the prototype."
185
+ "Please output the action and priority of each step, you do not need to give explanation."
186
+ "Please ignore the low priority steps in the output."
187
+ "Please output the reasoning steps in JSON with reasoning_steps as key."
188
+ )
189
+
190
+ res_steps = call_openai_api(prompt)
191
+ #return res_task_analysis, res_steps
192
+
193
+ try:
194
+ json_task_analysis = validate_and_extract_json(res_task_analysis)
195
+ json_steps = validate_and_extract_json(res_steps)
196
+
197
+ return json_task_analysis, json_steps
198
+ except ValueError as e:
199
+ return None, None
200
+
201
+ # Function to store the reasoning path as JSON and use it for task execution
202
+ def store_and_execute_task(task_description, reasoning_path, json_key = 'reasoning_steps'):
203
+ if reasoning_path is not None and isinstance(reasoning_path, dict) and json_key in reasoning_path:
204
+
205
+ reasoning_steps = reasoning_path[json_key]
206
+ # Example logic to simulate execution (this is just a placeholder)
207
+ # for step in task_steps:
208
+ # step["status"] = "completed" # Mark as completed after execution
209
+
210
+ return reasoning_steps
211
+ return None
212
+
213
+
214
+ # In[21]:
215
+
216
+
217
+ # Initialize dataframes for the schema
218
+ ideas_df = pd.DataFrame(columns=["Idea ID", "Content", "Tags"])
219
+
220
+ def extract_ideas(context, text):
221
+ """
222
+ Extract project ideas from text, with or without a context, and return in JSON format.
223
+
224
+ Parameters:
225
+ context (str): Context of the extraction. Can be empty.
226
+ text (str): Text to extract ideas from.
227
+
228
+ Returns:
229
+ list: A list of ideas, each represented as a dictionary with name and description.
230
+ """
231
+ if context:
232
+ # Template when context is provided
233
+ prompt = (
234
+ f"You are working in the context of {context}. "
235
+ "Please extract the ongoing projects with project name and description."
236
+ "Please only the listed JSON as output string."
237
+ f"Ongoing projects: {text}"
238
+ )
239
+ else:
240
+ # Template when context is not provided
241
+ prompt = (
242
+ "Given the following information about the user."
243
+ "Please extract the ongoing projects with project name and description."
244
+ "Please only the listed JSON as output string."
245
+ f"Ongoing projects: {text}"
246
+ )
247
+
248
+ # return the raw string
249
+ return call_openai_api(prompt)
250
+
251
+ def df_to_string(df, empty_message = ''):
252
+ """
253
+ Converts a DataFrame to a string if it is not empty.
254
+ If the DataFrame is empty, returns an empty string.
255
+
256
+ Parameters:
257
+ ideas_df (pd.DataFrame): The DataFrame to be converted.
258
+
259
+ Returns:
260
+ str: A string representation of the DataFrame or an empty string.
261
+ """
262
+ if df.empty:
263
+ return empty_message
264
+ else:
265
+ return df.to_string(index=False)
266
+
267
+
268
+ # In[22]:
269
+
270
+
271
+ def curify_ideas(project_description, task_description):
272
+
273
+ # May need a task split step that semantically splits the task.
274
+
275
+ str_projects = extract_ideas('AI-powered tools for productivity', project_description)
276
+ json_projects = convert_to_listed_json(str_projects)
277
+
278
+ # Generate reasoning path
279
+ task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)
280
+
281
+ # Store and simulate execution of task
282
+ task_data = store_and_execute_task(task_description, reasoning_path)
283
+
284
+ return json_to_pandas(json_projects), json_to_pandas(task_data), task_analysis
285
+
286
+
287
+ # In[23]:
288
+
289
+
290
+ project_description = 'work on a number of projects including curify (digest, ideas, careers, projects etc), and writing a book on LLM for recommendation system, educating my 3.5-year-old boy and working on a paper for LLM reasoning.'
291
+
292
+ # convert_to_listed_json(extract_ideas('AI-powered tools for productivity', project_description))
293
+
294
+ task_description = 'Build an interview bot for the curify digest project.'
295
+ task_analysis, reasoning_path = generate_reasoning_path(project_description, task_description)
296
+
297
+ store_and_execute_task(task_description, reasoning_path)
298
+
299
+
300
+ # In[ ]:
301
+
302
+
303
+ reasoning_path
304
+
305
+
306
+ # In[ ]:
307
+
308
+
309
+ # Gradio Demo
310
+ with gr.Blocks(
311
+ css="""
312
+ .gradio-table td {
313
+ white-space: normal !important;
314
+ word-wrap: break-word !important;
315
+ }
316
+ .gradio-table {
317
+ width: 100% !important; /* Adjust to 100% to fit the container */
318
+ table-layout: fixed !important; /* Fixed column widths */
319
+ overflow-x: hidden !important; /* Disable horizontal scrolling */
320
+ }
321
+ .gradio-container {
322
+ overflow-x: hidden !important; /* Disable horizontal scroll for entire container */
323
+ padding: 0 !important; /* Remove any default padding */
324
+ }
325
+ .gradio-column {
326
+ max-width: 100% !important; /* Ensure columns take up full width */
327
+ overflow: hidden !important; /* Hide overflow to prevent horizontal scroll */
328
+ }
329
+ .gradio-row {
330
+ overflow-x: hidden !important; /* Prevent horizontal scroll on rows */
331
+ }
332
+ """
333
+ ) as demo:
334
+
335
+ gr.Markdown("## Curify: Unified AI Tools for Productivity")
336
+
337
+ with gr.Tab("Curify Idea"):
338
+ with gr.Row():
339
+ # Column 1: Webpage rendering
340
+ with gr.Column():
341
+ gr.Markdown("## Enter project descriptions.")
342
+
343
+ project_input = gr.Textbox(
344
+ placeholder="Describe your project...",
345
+ label=None,
346
+ lines=5)
347
+
348
+ gr.Markdown("## Enter task message.")
349
+ idea_input = gr.Textbox(
350
+ label=None,
351
+ placeholder="Describe the task you want to execute (e.g., Research Paper Review)")
352
+
353
+ task_btn = gr.Button("Generating task steps...")
354
+
355
+ gr.Markdown("## Projects Overview")
356
+ project_list = gr.DataFrame(
357
+ type="pandas"
358
+ )
359
+
360
+ # Column 2: Summary and Perspectives
361
+ with gr.Column():
362
+ gr.Markdown("## Task analysis")
363
+ task_analysis_txt = gr.Textbox(
364
+ label=None,
365
+ placeholder="Here is an analysis of your task...",
366
+ lines=3)
367
+
368
+ gr.Markdown("## Execution path")
369
+ task_steps = gr.DataFrame(
370
+ type="pandas"
371
+ )
372
+
373
+ task_btn.click(
374
+ curify_ideas,
375
+ inputs=[project_input, idea_input],
376
+ outputs=[project_list, task_steps, task_analysis_txt]
377
+ )
378
+
379
+ demo.launch(share=True)
380
+
381
+
382
+ # In[ ]:
383
+
384
+
385
+
386
+
curify_api.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ weaviate:
2
+ url: "https://grpc-ddyazhrqhkunrgfdsd7pg.c0.us-west3.gcp.weaviate.cloud/"
3
+ api_key: "VIIzm0iRMIesfH9CsWdyRXD0YZnNKKBwZnDf"
4
+
5
+ cohere:
6
+ api_key: "dAAGflqk0F0YaimLIQECZOrAXKuTUGU0rHkFs3Bu"
7
+
8
+ openai:
9
+ api_key: "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"
10
+
11
+ serper:
12
+ api_key: "6d85d4591e052751b8ba10a9568985ebc92fa234"
13
+
14
+ azure:
15
+ subscription_id: "541beb67-718e-41c5-958e-8cc0ba95b210"
16
+ resource_group_name: "awesome_rag_dev"
17
+ workspace_name: "rag_book_demo"