better prompts / questions
Browse files- .gitignore +1 -0
- fiboa/app.py +15 -16
.gitignore
CHANGED
@@ -5,3 +5,4 @@
|
|
5 |
*.pyc
|
6 |
duck.db.wal
|
7 |
__pycache__/
|
|
|
|
5 |
*.pyc
|
6 |
duck.db.wal
|
7 |
__pycache__/
|
8 |
+
.DS_Store
|
fiboa/app.py
CHANGED
@@ -22,10 +22,10 @@ con.read_parquet(geoparquet, "crops").cast({"geometry": "geometry"})
|
|
22 |
# tbl = con.read_parquet(url, code).cast({"geometry": "geometry"})
|
23 |
|
24 |
st.set_page_config(
|
25 |
-
page_title="fiboaGPT",
|
26 |
page_icon="🦜",
|
27 |
)
|
28 |
-
st.title("fiboaGPT")
|
29 |
|
30 |
# Read the instructions from 'hcat-instructions.txt'
|
31 |
hcat_instructions = ""
|
@@ -58,11 +58,10 @@ The column "area" is in the unit hectares, you may need to convert it to other u
|
|
58 |
There is no other column related to area information, especially not total_area or similar!
|
59 |
If you need to compute the total area, do it manually, with a SUM of the area column. You should always use the 'area' column - never use a 'total_area' column.
|
60 |
The column "perimeter" is in the unit meters, you may need to convert it to other units, e.g. kilometers.
|
61 |
-
The column "
|
62 |
-
"ec_lt" for Latvia, "ec_lv" for Lithuania, "ec_es" for Estonia.
|
63 |
-
Be sure to always include the collection with the right country for any query about a specific country, including it in the WHERE clause.
|
64 |
|
65 |
-
If the user asks for 'percent' of crops or fields for one of the countries you must always calculate the percentage manually, by summing up the area manually. You total number of hectares to calculate the percentage from is
|
|
|
66 |
There is no 'percent' column, so when you calculate the percentage manually you must sum the crop area and then use the total area of the country.
|
67 |
|
68 |
If the user asks for the 'top 10' (or other number) of a crop then sum by area and then sort by that sum.
|
@@ -75,7 +74,9 @@ You should use CTE (computed_quantiles) to do this compute the row_number and qu
|
|
75 |
Be sure to avoid row_number in GROUP BY: The intermediate result in the WITH clause computes quantile so the outer query only groups by quantile. A sample query is 'WITH computed_quantiles AS SELECT area, CEIL(ROW_NUMBER() OVER (ORDER BY area)::FLOAT / (COUNT(*) OVER () / 5)) AS quantile FROM testing) SELECT quantile, AVG(area) AS average_field_area FROM computed_quantiles GROUP BY quantile ORDER BY quantile;
|
76 |
Adjust the 5 of 'count(*) over () / 5' to 10 for decile or for other numbers the user requests.
|
77 |
Generally you should use a Common Table Expression (CTE) or subquery to compute the things like ranks first and then filter the results in the main query as DuckDB does not allow window functions (like ROW_NUMBER()) directly inside a WHERE clause
|
78 |
-
|
|
|
|
|
79 |
'''
|
80 |
+ hcat_instructions + # Concatenate the instructions here
|
81 |
'''
|
@@ -91,18 +92,16 @@ db = SQLDatabase.from_uri("duckdb:///duck.db", view_support=True)
|
|
91 |
chain = create_sql_query_chain(llm, db, prompt=new_prompt, k=100)
|
92 |
|
93 |
'''
|
94 |
-
Ask me about fiboa data
|
95 |
-
|
96 |
-
|
97 |
-
-
|
98 |
-
-
|
99 |
-
-
|
100 |
-
- What are the top ten crops that have a field size over 10 hectares?
|
101 |
-
- Compute the total area of all fields in km² and compute the percentage the total area of the baltic states (175015 km²)
|
102 |
|
103 |
'''
|
104 |
|
105 |
-
example = "
|
106 |
with st.container():
|
107 |
if prompt := st.chat_input(example, key="chain"):
|
108 |
st.chat_message("user").write(prompt)
|
|
|
22 |
# tbl = con.read_parquet(url, code).cast({"geometry": "geometry"})
|
23 |
|
24 |
st.set_page_config(
|
25 |
+
page_title="fiboaGPT - Netherlands",
|
26 |
page_icon="🦜",
|
27 |
)
|
28 |
+
st.title("fiboaGPT - Netherlands")
|
29 |
|
30 |
# Read the instructions from 'hcat-instructions.txt'
|
31 |
hcat_instructions = ""
|
|
|
58 |
There is no other column related to area information, especially not total_area or similar!
|
59 |
If you need to compute the total area, do it manually, with a SUM of the area column. You should always use the 'area' column - never use a 'total_area' column.
|
60 |
The column "perimeter" is in the unit meters, you may need to convert it to other units, e.g. kilometers.
|
61 |
+
You must always include the year in the WHERE clause. The column "year" contains the 3 years - 2018, 2019 and 2021. If no year is specified always use 2021.
|
|
|
|
|
62 |
|
63 |
+
If the user asks for 'percent' of crops or fields for one of the countries you must always calculate the percentage manually, by summing up the area manually. You total number of hectares to calculate the percentage from is 1559575 for 2018, 1604600 for 2019 and 1595051 for 2021.
|
64 |
+
If the users asks for the percent of crops excluding silo maize & pasture use 505624 for the total area.
|
65 |
There is no 'percent' column, so when you calculate the percentage manually you must sum the crop area and then use the total area of the country.
|
66 |
|
67 |
If the user asks for the 'top 10' (or other number) of a crop then sum by area and then sort by that sum.
|
|
|
74 |
Be sure to avoid row_number in GROUP BY: The intermediate result in the WITH clause computes quantile so the outer query only groups by quantile. A sample query is 'WITH computed_quantiles AS SELECT area, CEIL(ROW_NUMBER() OVER (ORDER BY area)::FLOAT / (COUNT(*) OVER () / 5)) AS quantile FROM testing) SELECT quantile, AVG(area) AS average_field_area FROM computed_quantiles GROUP BY quantile ORDER BY quantile;
|
75 |
Adjust the 5 of 'count(*) over () / 5' to 10 for decile or for other numbers the user requests.
|
76 |
Generally you should use a Common Table Expression (CTE) or subquery to compute the things like ranks first and then filter the results in the main query as DuckDB does not allow window functions (like ROW_NUMBER()) directly inside a WHERE clause
|
77 |
+
|
78 |
+
This dataset includes data from the Netherlands only, so if the user asks for the netherlands just ignore it.
|
79 |
+
|
80 |
'''
|
81 |
+ hcat_instructions + # Concatenate the instructions here
|
82 |
'''
|
|
|
92 |
chain = create_sql_query_chain(llm, db, prompt=new_prompt, k=100)
|
93 |
|
94 |
'''
|
95 |
+
Ask me about fiboa data for the Netherlands with crops from 2018, 2019 and 2021.
|
96 |
+
- what is the number of tulip fields by year?
|
97 |
+
- what are the quantiles of average field size?
|
98 |
+
- What is the total area of potatoes by year?
|
99 |
+
- What crops have the largest average field sizes?
|
100 |
+
- What is the average field size of strawberry fields by year?
|
|
|
|
|
101 |
|
102 |
'''
|
103 |
|
104 |
+
example = "What are the top ten flowers in the netherlands?"
|
105 |
with st.container():
|
106 |
if prompt := st.chat_input(example, key="chain"):
|
107 |
st.chat_message("user").write(prompt)
|