cboettig commited on
Commit
cc8b36c
1 Parent(s): 263f645

experimental chat

Browse files
Files changed (3) hide show
  1. app.py +41 -0
  2. data.csv +0 -0
  3. plots.ipynb +431 -0
app.py CHANGED
@@ -12,6 +12,47 @@ st.set_page_config(layout="wide",
12
  '''
13
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  # year = st.slider("Select a year", min_value=1988, max_value=2024, value=2022, step=2)
 
12
  '''
13
 
14
 
15
+ ## Chatbot
16
+ import os
17
+ import pandas as pd
18
+ import matplotlib.pyplot as plt
19
+ from pandasai.llm.openai import OpenAI
20
+ from pandasai import Agent
21
+ from pandasai.responses.streamlit_response import StreamlitResponse
22
+
23
+ llm = OpenAI(api_token=st.secrets["OPENAI_API_KEY"])
24
+ df1 = pd.read_csv("data.csv")
25
+
26
+ agent = Agent(
27
+ [df1],
28
+ config={"verbose": True, "response_parser": StreamlitResponse, "llm": llm},
29
+ )
30
+
31
+
32
+
33
+ with st.sidebar:
34
+
35
+ '''
36
+ ## Data Assistant (experimental)
37
+
38
+ Ask questions about the landvote data, like:
39
+
40
+ - What are the top states for approved conservation funds?
41
+ - Plot the total funds spent in conservation each year.
42
+ - What city has approved the most funds in a single measure? What was the description of that vote?
43
+ - Which state has had largest number measures fail? What is that as a fraction of it's total measures?
44
+ '''
45
+
46
+ prompt = st.chat_input("Ask about the data")
47
+ if prompt:
48
+ with st.spinner():
49
+ resp = agent.chat(prompt)
50
+ if os.path.isfile('exports/charts/temp_chart.png'):
51
+ im = plt.imread('exports/charts/temp_chart.png')
52
+ st.image(im)
53
+ os.remove('exports/charts/temp_chart.png')
54
+ st.write(resp)
55
+
56
 
57
 
58
  # year = st.slider("Select a year", min_value=1988, max_value=2024, value=2022, step=2)
data.csv ADDED
The diff for this file is too large to render. See raw diff
 
plots.ipynb ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 14,
6
+ "id": "0f3a8346-0c49-4cab-ab0a-e982006db476",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import ibis\n",
11
+ "from ibis import _\n",
12
+ "\n",
13
+ "con = ibis.duckdb.connect()\n",
14
+ "\n",
15
+ "df = (con.\n",
16
+ " read_csv(\"landvote.csv\")\n",
17
+ " .mutate(amount = _[\"Conservation Funds Approved\"])\n",
18
+ " .mutate(conservation_funds_approved=_.amount.replace('$', '').replace(',', '').cast('float'))\n",
19
+ " .mutate(year = _.Date.year())\n",
20
+ " )\n",
21
+ "\n"
22
+ ]
23
+ },
24
+ {
25
+ "cell_type": "code",
26
+ "execution_count": 15,
27
+ "id": "b36f93ae-b12b-46ed-a105-07243b86b308",
28
+ "metadata": {},
29
+ "outputs": [],
30
+ "source": [
31
+ "cols = ['State',\n",
32
+ " 'Jurisdiction Name',\n",
33
+ " 'Jurisdiction Type',\n",
34
+ " 'Date',\n",
35
+ " 'Description',\n",
36
+ " 'Finance Mechanism',\n",
37
+ " '\"Other\" Comment',\n",
38
+ " 'Purpose',\n",
39
+ " 'Conservation Funds at Stake',\n",
40
+ " 'Pass?',\n",
41
+ " 'Status',\n",
42
+ " '% Yes',\n",
43
+ " '% No',\n",
44
+ " 'Notes',\n",
45
+ " 'Voted Acq. Measure',\n",
46
+ " 'amount',\n",
47
+ " 'conservation_funds_approved',\n",
48
+ " 'year']\n",
49
+ "\n",
50
+ "df.select(cols).to_csv(\"data.csv\")"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": 18,
56
+ "id": "15dda23e-4db4-4860-9c48-20ce580d635b",
57
+ "metadata": {},
58
+ "outputs": [
59
+ {
60
+ "data": {
61
+ "text/html": [
62
+ "<div>\n",
63
+ "<style scoped>\n",
64
+ " .dataframe tbody tr th:only-of-type {\n",
65
+ " vertical-align: middle;\n",
66
+ " }\n",
67
+ "\n",
68
+ " .dataframe tbody tr th {\n",
69
+ " vertical-align: top;\n",
70
+ " }\n",
71
+ "\n",
72
+ " .dataframe thead th {\n",
73
+ " text-align: right;\n",
74
+ " }\n",
75
+ "</style>\n",
76
+ "<table border=\"1\" class=\"dataframe\">\n",
77
+ " <thead>\n",
78
+ " <tr style=\"text-align: right;\">\n",
79
+ " <th></th>\n",
80
+ " <th>State</th>\n",
81
+ " <th>n</th>\n",
82
+ " </tr>\n",
83
+ " </thead>\n",
84
+ " <tbody>\n",
85
+ " <tr>\n",
86
+ " <th>0</th>\n",
87
+ " <td>CA</td>\n",
88
+ " <td>1.907187e+10</td>\n",
89
+ " </tr>\n",
90
+ " <tr>\n",
91
+ " <th>1</th>\n",
92
+ " <td>FL</td>\n",
93
+ " <td>1.411086e+10</td>\n",
94
+ " </tr>\n",
95
+ " <tr>\n",
96
+ " <th>2</th>\n",
97
+ " <td>NJ</td>\n",
98
+ " <td>1.223420e+10</td>\n",
99
+ " </tr>\n",
100
+ " <tr>\n",
101
+ " <th>3</th>\n",
102
+ " <td>CO</td>\n",
103
+ " <td>6.011241e+09</td>\n",
104
+ " </tr>\n",
105
+ " <tr>\n",
106
+ " <th>4</th>\n",
107
+ " <td>MN</td>\n",
108
+ " <td>5.963134e+09</td>\n",
109
+ " </tr>\n",
110
+ " </tbody>\n",
111
+ "</table>\n",
112
+ "</div>"
113
+ ],
114
+ "text/plain": [
115
+ " State n\n",
116
+ "0 CA 1.907187e+10\n",
117
+ "1 FL 1.411086e+10\n",
118
+ "2 NJ 1.223420e+10\n",
119
+ "3 CO 6.011241e+09\n",
120
+ "4 MN 5.963134e+09"
121
+ ]
122
+ },
123
+ "execution_count": 18,
124
+ "metadata": {},
125
+ "output_type": "execute_result"
126
+ }
127
+ ],
128
+ "source": [
129
+ "df.group_by(_.State).agg(n = _.conservation_funds_approved.sum()).order_by(_.n.desc()).head().execute()"
130
+ ]
131
+ },
132
+ {
133
+ "cell_type": "code",
134
+ "execution_count": 22,
135
+ "id": "1117dd18-5207-4e14-9920-4feb262d373c",
136
+ "metadata": {},
137
+ "outputs": [
138
+ {
139
+ "data": {
140
+ "text/html": [
141
+ "<div>\n",
142
+ "<style scoped>\n",
143
+ " .dataframe tbody tr th:only-of-type {\n",
144
+ " vertical-align: middle;\n",
145
+ " }\n",
146
+ "\n",
147
+ " .dataframe tbody tr th {\n",
148
+ " vertical-align: top;\n",
149
+ " }\n",
150
+ "\n",
151
+ " .dataframe thead th {\n",
152
+ " text-align: right;\n",
153
+ " }\n",
154
+ "</style>\n",
155
+ "<table border=\"1\" class=\"dataframe\">\n",
156
+ " <thead>\n",
157
+ " <tr style=\"text-align: right;\">\n",
158
+ " <th></th>\n",
159
+ " <th>State</th>\n",
160
+ " <th>n</th>\n",
161
+ " </tr>\n",
162
+ " </thead>\n",
163
+ " <tbody>\n",
164
+ " <tr>\n",
165
+ " <th>0</th>\n",
166
+ " <td>MA</td>\n",
167
+ " <td>132</td>\n",
168
+ " </tr>\n",
169
+ " <tr>\n",
170
+ " <th>1</th>\n",
171
+ " <td>NJ</td>\n",
172
+ " <td>128</td>\n",
173
+ " </tr>\n",
174
+ " <tr>\n",
175
+ " <th>2</th>\n",
176
+ " <td>CO</td>\n",
177
+ " <td>47</td>\n",
178
+ " </tr>\n",
179
+ " <tr>\n",
180
+ " <th>3</th>\n",
181
+ " <td>CA</td>\n",
182
+ " <td>47</td>\n",
183
+ " </tr>\n",
184
+ " <tr>\n",
185
+ " <th>4</th>\n",
186
+ " <td>IL</td>\n",
187
+ " <td>37</td>\n",
188
+ " </tr>\n",
189
+ " <tr>\n",
190
+ " <th>5</th>\n",
191
+ " <td>PA</td>\n",
192
+ " <td>35</td>\n",
193
+ " </tr>\n",
194
+ " <tr>\n",
195
+ " <th>6</th>\n",
196
+ " <td>WA</td>\n",
197
+ " <td>34</td>\n",
198
+ " </tr>\n",
199
+ " <tr>\n",
200
+ " <th>7</th>\n",
201
+ " <td>OH</td>\n",
202
+ " <td>26</td>\n",
203
+ " </tr>\n",
204
+ " <tr>\n",
205
+ " <th>8</th>\n",
206
+ " <td>MI</td>\n",
207
+ " <td>23</td>\n",
208
+ " </tr>\n",
209
+ " <tr>\n",
210
+ " <th>9</th>\n",
211
+ " <td>FL</td>\n",
212
+ " <td>19</td>\n",
213
+ " </tr>\n",
214
+ " <tr>\n",
215
+ " <th>10</th>\n",
216
+ " <td>Ore</td>\n",
217
+ " <td>15</td>\n",
218
+ " </tr>\n",
219
+ " <tr>\n",
220
+ " <th>11</th>\n",
221
+ " <td>NC</td>\n",
222
+ " <td>13</td>\n",
223
+ " </tr>\n",
224
+ " <tr>\n",
225
+ " <th>12</th>\n",
226
+ " <td>CT</td>\n",
227
+ " <td>13</td>\n",
228
+ " </tr>\n",
229
+ " <tr>\n",
230
+ " <th>13</th>\n",
231
+ " <td>NY</td>\n",
232
+ " <td>12</td>\n",
233
+ " </tr>\n",
234
+ " <tr>\n",
235
+ " <th>14</th>\n",
236
+ " <td>TX</td>\n",
237
+ " <td>11</td>\n",
238
+ " </tr>\n",
239
+ " <tr>\n",
240
+ " <th>15</th>\n",
241
+ " <td>GA</td>\n",
242
+ " <td>9</td>\n",
243
+ " </tr>\n",
244
+ " <tr>\n",
245
+ " <th>16</th>\n",
246
+ " <td>AZ</td>\n",
247
+ " <td>9</td>\n",
248
+ " </tr>\n",
249
+ " <tr>\n",
250
+ " <th>17</th>\n",
251
+ " <td>MN</td>\n",
252
+ " <td>7</td>\n",
253
+ " </tr>\n",
254
+ " <tr>\n",
255
+ " <th>18</th>\n",
256
+ " <td>UT</td>\n",
257
+ " <td>7</td>\n",
258
+ " </tr>\n",
259
+ " <tr>\n",
260
+ " <th>19</th>\n",
261
+ " <td>WI</td>\n",
262
+ " <td>6</td>\n",
263
+ " </tr>\n",
264
+ " <tr>\n",
265
+ " <th>20</th>\n",
266
+ " <td>AK</td>\n",
267
+ " <td>5</td>\n",
268
+ " </tr>\n",
269
+ " <tr>\n",
270
+ " <th>21</th>\n",
271
+ " <td>NV</td>\n",
272
+ " <td>4</td>\n",
273
+ " </tr>\n",
274
+ " <tr>\n",
275
+ " <th>22</th>\n",
276
+ " <td>VA</td>\n",
277
+ " <td>4</td>\n",
278
+ " </tr>\n",
279
+ " <tr>\n",
280
+ " <th>23</th>\n",
281
+ " <td>ID</td>\n",
282
+ " <td>4</td>\n",
283
+ " </tr>\n",
284
+ " <tr>\n",
285
+ " <th>24</th>\n",
286
+ " <td>MT</td>\n",
287
+ " <td>4</td>\n",
288
+ " </tr>\n",
289
+ " <tr>\n",
290
+ " <th>25</th>\n",
291
+ " <td>NM</td>\n",
292
+ " <td>3</td>\n",
293
+ " </tr>\n",
294
+ " <tr>\n",
295
+ " <th>26</th>\n",
296
+ " <td>ME</td>\n",
297
+ " <td>3</td>\n",
298
+ " </tr>\n",
299
+ " <tr>\n",
300
+ " <th>27</th>\n",
301
+ " <td>OK</td>\n",
302
+ " <td>2</td>\n",
303
+ " </tr>\n",
304
+ " <tr>\n",
305
+ " <th>28</th>\n",
306
+ " <td>RI</td>\n",
307
+ " <td>2</td>\n",
308
+ " </tr>\n",
309
+ " <tr>\n",
310
+ " <th>29</th>\n",
311
+ " <td>LA</td>\n",
312
+ " <td>2</td>\n",
313
+ " </tr>\n",
314
+ " <tr>\n",
315
+ " <th>30</th>\n",
316
+ " <td>AR</td>\n",
317
+ " <td>2</td>\n",
318
+ " </tr>\n",
319
+ " <tr>\n",
320
+ " <th>31</th>\n",
321
+ " <td>MS</td>\n",
322
+ " <td>2</td>\n",
323
+ " </tr>\n",
324
+ " <tr>\n",
325
+ " <th>32</th>\n",
326
+ " <td>SC</td>\n",
327
+ " <td>2</td>\n",
328
+ " </tr>\n",
329
+ " <tr>\n",
330
+ " <th>33</th>\n",
331
+ " <td>ND</td>\n",
332
+ " <td>1</td>\n",
333
+ " </tr>\n",
334
+ " <tr>\n",
335
+ " <th>34</th>\n",
336
+ " <td>TN</td>\n",
337
+ " <td>1</td>\n",
338
+ " </tr>\n",
339
+ " <tr>\n",
340
+ " <th>35</th>\n",
341
+ " <td>NE</td>\n",
342
+ " <td>1</td>\n",
343
+ " </tr>\n",
344
+ " <tr>\n",
345
+ " <th>36</th>\n",
346
+ " <td>IA</td>\n",
347
+ " <td>1</td>\n",
348
+ " </tr>\n",
349
+ " <tr>\n",
350
+ " <th>37</th>\n",
351
+ " <td>KY</td>\n",
352
+ " <td>1</td>\n",
353
+ " </tr>\n",
354
+ " </tbody>\n",
355
+ "</table>\n",
356
+ "</div>"
357
+ ],
358
+ "text/plain": [
359
+ " State n\n",
360
+ "0 MA 132\n",
361
+ "1 NJ 128\n",
362
+ "2 CO 47\n",
363
+ "3 CA 47\n",
364
+ "4 IL 37\n",
365
+ "5 PA 35\n",
366
+ "6 WA 34\n",
367
+ "7 OH 26\n",
368
+ "8 MI 23\n",
369
+ "9 FL 19\n",
370
+ "10 Ore 15\n",
371
+ "11 NC 13\n",
372
+ "12 CT 13\n",
373
+ "13 NY 12\n",
374
+ "14 TX 11\n",
375
+ "15 GA 9\n",
376
+ "16 AZ 9\n",
377
+ "17 MN 7\n",
378
+ "18 UT 7\n",
379
+ "19 WI 6\n",
380
+ "20 AK 5\n",
381
+ "21 NV 4\n",
382
+ "22 VA 4\n",
383
+ "23 ID 4\n",
384
+ "24 MT 4\n",
385
+ "25 NM 3\n",
386
+ "26 ME 3\n",
387
+ "27 OK 2\n",
388
+ "28 RI 2\n",
389
+ "29 LA 2\n",
390
+ "30 AR 2\n",
391
+ "31 MS 2\n",
392
+ "32 SC 2\n",
393
+ "33 ND 1\n",
394
+ "34 TN 1\n",
395
+ "35 NE 1\n",
396
+ "36 IA 1\n",
397
+ "37 KY 1"
398
+ ]
399
+ },
400
+ "execution_count": 22,
401
+ "metadata": {},
402
+ "output_type": "execute_result"
403
+ }
404
+ ],
405
+ "source": [
406
+ "df.filter(_.Status == \"Fail\").group_by(_.State).agg(n = _.count()).order_by(_.n.desc()).execute()"
407
+ ]
408
+ }
409
+ ],
410
+ "metadata": {
411
+ "kernelspec": {
412
+ "display_name": "Python 3 (ipykernel)",
413
+ "language": "python",
414
+ "name": "python3"
415
+ },
416
+ "language_info": {
417
+ "codemirror_mode": {
418
+ "name": "ipython",
419
+ "version": 3
420
+ },
421
+ "file_extension": ".py",
422
+ "mimetype": "text/x-python",
423
+ "name": "python",
424
+ "nbconvert_exporter": "python",
425
+ "pygments_lexer": "ipython3",
426
+ "version": "3.11.10"
427
+ }
428
+ },
429
+ "nbformat": 4,
430
+ "nbformat_minor": 5
431
+ }