Vrinda (वृन्दा): Interactive Vegetation Index Analyzer

import os
from datetime import datetime
import ee
import json
import geemap
import numpy as np
import geemap.foliumap as gee_folium
import leafmap.foliumap as leaf_folium
import streamlit as st
import pandas as pd
import geopandas as gpd
from shapely.ops import transform
from functools import reduce
import plotly.express as px
import branca.colormap as cm

st.set_page_config(layout="wide")
m = st.markdown("""
<style>
div.stButton > button:first-child {
    background-color: #006400;
    color:#ffffff;
}
</style>""", unsafe_allow_html=True)

# Logo
cols = st.columns([1, 7, 1])
with cols[0]:
    st.image("Final_IITGN-Logo-symmetric-Color.png")
with cols[-1]:
    st.image("IFS.jpg")

# Title
# make title in center
with cols[1]:
    st.markdown(
        f"""
        <h1 style="text-align: center;">Vrinda (वृन्दा): Interactive Vegetation Index Analyzer</h1>
        """,
        unsafe_allow_html=True,
    )

############################################
# Hyperparameters
############################################        
st.write("<h2><div style='text-align: center;'>User Inputs</div></h2>", unsafe_allow_html=True)

st.write("Select the vegetation indices to calculate:")
all_veg_indices = ["NDVI", "EVI", "EVI2"]
formulas = {
    "NDVI": r"$\frac{NIR - Red}{NIR + Red}$",
    "EVI": r"$G \times \frac{NIR - Red}{NIR + C1 \times Red - C2 \times Blue + L}$",
    "EVI2": r"$G \times \frac{NIR - Red}{NIR + L + C \times Red}$",
}
veg_indices = []
for veg_index in all_veg_indices:
    if st.checkbox(f"{veg_index} = {formulas[veg_index]}", value=True):
        veg_indices.append(veg_index)

st.write("Select the parameters for the EVI/EVI2 calculation (default is as per EVI's Wikipedia page)")
cols = st.columns(5)
evi_vars = {}
for col, name, default in zip(cols, ["G", "C1", "C2", "L", "C"], [2.5, 6, 7.5, 1, 2.4]):
    value = col.number_input(f'{name}', value=default)
    evi_vars[name] = value

############################################
# Functions
############################################
def daterange_str_to_dates(daterange_str):
    start_date, end_date = daterange_str.split("-")
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    return start_date, end_date

def daterange_dates_to_str(start_date, end_date):
    return f"{start_date.strftime('%Y/%m/%d')}-{end_date.strftime('%Y/%m/%d')}"

def daterange_str_to_year(daterange_str):
    start_date, _ = daterange_str.split("-")
    year = pd.to_datetime(start_date).year
    return year

def shape_3d_to_2d(shape):
    if shape.has_z:
        return transform(lambda x, y, z: (x, y), shape)
    else:
        return shape

def preprocess_gdf(gdf):
    gdf = gdf.to_crs(epsg=7761) # epsg for Gujarat
    gdf["geometry"] = gdf["geometry"].apply(shape_3d_to_2d)
    return gdf

def check_valid_geometry(geometry_gdf):
    geometry = geometry_gdf.geometry.item()
    if geometry.type != "Polygon":
        st.error(
        f"Selected geometry is of type '{geometry.type}'. Please provide a 'Polygon' geometry."
        )
        st.stop()
        
def add_geometry_to_maps(map_list):
    for m in map_list:
        m.add_gdf(buffer_geometry_gdf, layer_name="Geometry Buffer", style_function=lambda x: {"color": "red", "fillOpacity": 0.0})
        m.add_gdf(geometry_gdf, layer_name="Geometry", style_function=lambda x: {"color": "blue", "fillOpacity": 0.0})

def add_indices(image, nir_band, red_band, blue_band):
    # Add negative cloud
    neg_cloud = image.select("MSK_CLDPRB").multiply(-1).rename("Neg_MSK_CLDPRB")
    nir = image.select(nir_band).divide(10000)
    red = image.select(red_band).divide(10000)
    blue = image.select(blue_band).divide(10000)
    numerator = nir.subtract(red)
    ndvi = (numerator).divide(nir.add(red)).rename("NDVI").clamp(-1, 1)
    # EVI formula taken from: https://en.wikipedia.org/wiki/Enhanced_vegetation_index 
    
    denominator = nir.add(red.multiply(evi_vars['C1'])).subtract(blue.multiply(evi_vars['C2'])).add(evi_vars['L'])
    evi = numerator.divide(denominator).multiply(evi_vars['G']).rename("EVI").clamp(-1, 1)
    evi2 = numerator.divide(nir.add(evi_vars['L']).add(red.multiply(evi_vars['C']))).multiply(evi_vars['G']).rename("EVI2").clamp(-1, 1)
    return image.addBands([neg_cloud, ndvi, evi, evi2])

def process_date(daterange, satellite, veg_indices):
    start_date, end_date = daterange
    daterange_str = daterange_dates_to_str(start_date, end_date)
    prefix = f"Processing {satellite} - {daterange_str}"
    try:
        attrs = satellites[satellite]
        collection = attrs["collection"]
        collection = collection.filterBounds(buffer_ee_geometry)
        collection = collection.filterDate(start_date, end_date)
        
        bucket = {}
        for veg_index in veg_indices:
            mosaic_veg_index = collection.qualityMosaic(veg_index)
            fc = geemap.zonal_stats(
                mosaic_veg_index, ee_feature_collection, scale=attrs["scale"], return_fc=True
            ).getInfo()
            mean_veg_index = fc["features"][0]["properties"][veg_index]
            bucket[veg_index] = mean_veg_index
            fc = geemap.zonal_stats(
                mosaic_veg_index, buffer_ee_feature_collection, scale=attrs["scale"], return_fc=True
            ).getInfo()
            buffer_mean_veg_index = fc["features"][0]["properties"][veg_index]
            bucket[f"{veg_index}_buffer"] = buffer_mean_veg_index
            bucket[f"{veg_index}_ratio"] = mean_veg_index / buffer_mean_veg_index
            bucket[f"mosaic_{veg_index}"] = mosaic_veg_index            
            
        # Get median mosaic
        bucket["mosaic_visual_max_ndvi"] = collection.qualityMosaic("NDVI")
        bucket["mosaic_visual_median"] = collection.median()
        bucket["image_visual_least_cloud"] = collection.sort('CLOUDY_PIXEL_PERCENTAGE').first()

        if satellite == "COPERNICUS/S2_SR_HARMONIZED":
            cloud_mask_probability = fc["features"][0]["properties"]["MSK_CLDPRB"] / 100
        else:
            cloud_mask_probability = None
        bucket["Cloud (0 to 1)"] = cloud_mask_probability
        result_df.loc[daterange_str, list(bucket.keys())] = list(bucket.values())
        count = collection.size().getInfo()
        suffix = f" - Processed {count} images"
        write_info(f"{prefix}{suffix}")
    except Exception as e:
        print(e)
        suffix = f" - Imagery not available"
        write_info(f"{prefix}{suffix}")

def write_info(info):
    st.write(f"<span style='color:#006400;'>{info}</span>", unsafe_allow_html=True)


############################################
# One time setup
############################################


def one_time_setup():
    credentials_path = os.path.expanduser("~/.config/earthengine/credentials")
    if os.path.exists(credentials_path):
        pass  # Earth Engine credentials already exist
    elif "EE" in os.environ:  # write the credentials to the file
        ee_credentials = os.environ.get("EE")
        os.makedirs(os.path.dirname(credentials_path), exist_ok=True)
        with open(credentials_path, "w") as f:
            f.write(ee_credentials)
    else:
        raise ValueError(
            f"Earth Engine credentials not found at {credentials_path} or in the environment variable 'EE'"
        )

    ee.Initialize()

    satellites = {
        "COPERNICUS/S2_SR_HARMONIZED": {
            "scale": 10,
            "collection": ee.ImageCollection("COPERNICUS/S2_SR_HARMONIZED")
            .select(
                ["B2", "B4", "B8", "MSK_CLDPRB", "TCI_R", "TCI_G", "TCI_B"],
                ["Blue", "Red", "NIR", "MSK_CLDPRB", "R", "G", "B"],
            )
            .map(lambda image: add_indices(image, nir_band="NIR", red_band="Red", blue_band="Blue")),
        },
    }
    st.session_state.satellites = satellites
    with open("wayback_imagery.json") as f:
        st.session_state.wayback_mapping = json.load(f)

if "one_time_setup_done" not in st.session_state:
    one_time_setup()
    st.session_state.one_time_setup_done = True

satellites = st.session_state.satellites
wayback_mapping = st.session_state.wayback_mapping

############################################
# App
############################################

# Input: Satellite Sources
st.write("Select the satellite sources:")
satellite_selected = {}
for satellite in satellites:
    satellite_selected[satellite] = st.checkbox(satellite, value=True, disabled=True)

# Date range input
max_year = datetime.now().year
jan_1 = pd.to_datetime(f"{max_year}/01/01", format="%Y/%m/%d")
dec_31 = pd.to_datetime(f"{max_year}/12/31", format="%Y/%m/%d")
nov_15 = pd.to_datetime(f"{max_year}/11/15", format="%Y/%m/%d")
dec_15 = pd.to_datetime(f"{max_year}/12/15", format="%Y/%m/%d")
input_daterange = st.date_input("Date Range (Ignore year. App will compute indices for all possible years)", (nov_15, dec_15), jan_1, dec_31)
min_year = int(st.number_input("Minimum Year", value=2010, min_value=2010, step=1))
max_year = int(st.number_input("Maximum Year", value=max_year, min_value=2010, step=1))

# Input: GeoJSON/KML file
input_file = st.file_uploader("Upload KML/GeoJSON file", type=["geojson", "kml", "shp"])
if input_file is None:
    st.stop()
buffer = st.number_input("Buffer (m)", value=150, min_value=0, step=1)

input_gdf = preprocess_gdf(gpd.read_file(input_file))

# Input: Geometry
def format_fn(x):
    return input_gdf.drop(columns=["geometry"]).loc[x].to_dict()
input_geometry_idx = st.selectbox("Select the geometry", input_gdf.index, format_func=format_fn)
geometry_gdf = input_gdf[input_gdf.index == input_geometry_idx]
buffer_geometry_gdf = geometry_gdf.copy()
buffer_geometry_gdf["geometry"] = buffer_geometry_gdf["geometry"].buffer(buffer)
check_valid_geometry(geometry_gdf)

# Derived Inputs
ee_geometry = ee.Geometry(geometry_gdf.to_crs(4326).geometry.item().__geo_interface__)
ee_feature_collection = ee.FeatureCollection(ee_geometry)
buffer_ee_geometry = ee.Geometry(buffer_geometry_gdf.to_crs(4326).geometry.item().__geo_interface__)
buffer_ee_feature_collection = ee.FeatureCollection(buffer_ee_geometry)

# visualize the geometry
m = leaf_folium.Map()
keys = list(wayback_mapping.keys())
latest_date = sorted(keys, key=lambda x: pd.to_datetime(x))[-1]
m.add_tile_layer(wayback_mapping[latest_date], name=f"Esri Wayback - {latest_date.replace('-', '/')}", attribution="Esri")
add_geometry_to_maps([m])
write_info(f"""
<div style="text-align: center;">
    Latest Esri Imagery - {latest_date.replace('-', '/')}
</div>
""")
m.to_streamlit()

# Generate stats
stats_df = pd.DataFrame(
    {
        "Area (m^2)": geometry_gdf.area.item(),
        "Perimeter (m)": geometry_gdf.length.item(),
        "Points": json.loads(geometry_gdf.to_crs(4326).to_json())['features'][0]['geometry']['coordinates'],
    }
)
st.write("<h3><div style='text-align: center;'>Geometry Metrics</div></h3>", unsafe_allow_html=True)
st.markdown(f"""| Metric | Value |
| --- | --- |
| Area (m^2) | {stats_df['Area (m^2)'].item():.2f} m^2 = {stats_df['Area (m^2)'].item()/10000:.2f} ha |
| Perimeter (m) | {stats_df['Perimeter (m)'].item():.2f} m |
| Points | {stats_df['Points'][0]} |
""")

stats_csv = stats_df.to_csv(index=False)
st.download_button("Download Geometry Metrics", stats_csv, "geometry_metrics.csv", "text/csv", use_container_width=True)

# Submit
submit = st.button("Calculate Vegetation Indices", use_container_width=True)

st.write("<h2><div style='text-align: center;'>Results</div></h2>", unsafe_allow_html=True)

if submit:
    if not any(satellite_selected.values()):
        st.error("Please select at least one satellite source")
        st.stop()

    # Create range
    start_day = input_daterange[0].day
    start_month = input_daterange[0].month
    end_day = input_daterange[1].day
    end_month = input_daterange[1].month
    
    dates = []
    for year in range(min_year, max_year+1):
        start_date = pd.to_datetime(f"{year}-{start_month:02d}-{start_day:02d}")
        end_date = pd.to_datetime(f"{year}-{end_month:02d}-{end_day:02d}")
        dates.append((start_date, end_date))
    
    result_df = pd.DataFrame()
    for satellite, attrs in satellites.items():
        if not satellite_selected[satellite]:
            continue

        with st.spinner(f"Processing {satellite} ..."):
            progress_bar = st.progress(0)
            for i, daterange in enumerate(dates):
                process_date(daterange, satellite, veg_indices)
                progress_bar.progress((i + 1) / len(dates))

    st.session_state.result = result_df

print("Printing result...")
if "result" in st.session_state:
    result_df = st.session_state.result
    print(result_df.columns)
    

    # drop rows with all NaN values
    result_df = result_df.dropna(how="all")
    # drop columns with all NaN values
    result_df = result_df.dropna(axis=1, how="all")
    print(result_df.columns)
    print(result_df.head(2))
    
    # df.reset_index(inplace=True)
    # df.index = pd.to_datetime(df["index"], format="%Y-%m")
    for column in result_df.columns:
        result_df[column] = pd.to_numeric(result_df[column], errors="ignore")
    
    df_numeric = result_df.select_dtypes(include=["float64"])
    st.write(df_numeric)
    
    df_numeric_csv = df_numeric.to_csv(index=True)
    st.download_button("Download Time Series Data", df_numeric_csv, "vegetation_indices.csv", "text/csv", use_container_width=True)
    
    df_numeric.index = [daterange_str_to_year(daterange) for daterange in df_numeric.index]
    for veg_index in veg_indices:
        fig = px.line(df_numeric, y=[veg_index, f"{veg_index}_buffer"], markers=True)
        fig.update_layout(xaxis=dict(tickvals=df_numeric.index, ticktext=df_numeric.index))
        st.plotly_chart(fig)

    st.write("<h3><div style='text-align: center;'>Visual Comparison between Two Years</div></h3>", unsafe_allow_html=True)
    cols = st.columns(2)

    with cols[0]:
        year_1 = st.selectbox("Year 1", result_df.index, index=0, format_func=lambda x: daterange_str_to_year(x))
    with cols[1]:
        year_2 = st.selectbox("Year 2", result_df.index, index=len(result_df.index) - 1, format_func=lambda x: daterange_str_to_year(x))
            
    vis_params = {'min': 0, 'max': 1, 'palette': ['white', 'green']}  # Example visualization for Sentinel-2

    # Create a colormap and name it as NDVI
    colormap = cm.LinearColormap(
        colors=vis_params['palette'], 
        vmin=vis_params['min'], 
        vmax=vis_params['max']
    )

    for veg_index in veg_indices:
        st.write(f"<h3><div style='text-align: center;'>{veg_index}</div></h3>", unsafe_allow_html=True)
        cols = st.columns(2)
        for col, daterange_str in zip(cols, [year_1, year_2]):
            mosaic = result_df.loc[daterange_str, f"mosaic_{veg_index}"]
            with col:
                m = gee_folium.Map()
                veg_index_layer = gee_folium.ee_tile_layer(mosaic, {"bands": [veg_index], "min": 0, "max": 1})
                
                if satellite == "COPERNICUS/S2_SR_HARMONIZED":
                    min_all = 0
                    max_all = 255
                else:
                    raise ValueError(f"Unknown satellite: {satellite}")

                m.add_layer(
                    mosaic.select(veg_index), vis_params
                )
                # add colorbar
                # m.add_colorbar(colors=["#000000", "#00FF00"], vmin=0.0, vmax=1.0)
                add_geometry_to_maps([m])
                m.add_child(colormap)
                m.to_streamlit()

    for name, key in zip(["RGB (Least Cloud Tile Crop)", "RGB (Max NDVI Mosaic)", "RGB (Median Mosaic)"], ["image_visual_least_cloud", "mosaic_visual_max_ndvi", "mosaic_visual_median"]):
        st.write(f"<h3><div style='text-align: center;'>{name}</div></h3>", unsafe_allow_html=True)
        cols = st.columns(2)
        for col, daterange_str in zip(cols, [year_1, year_2]):
            start_date, end_date = daterange_str_to_dates(daterange_str)
            mid_date = start_date + (end_date - start_date) / 2
            esri_date = min(wayback_mapping.keys(), key=lambda x: abs(pd.to_datetime(x) - mid_date))
            with col:
                m = gee_folium.Map()
                visual_mosaic = result_df.loc[daterange_str, key]
                # visual_layer = gee_folium.ee_tile_layer(mosaic, {"bands": ["R", "G", "B"], "min": min_all, "max": max_all})
                
                m.add_layer(
                    visual_mosaic.select(["R", "G", "B"])
                )
                add_geometry_to_maps([m])
                m.to_streamlit()
        
    st.write("<h3><div style='text-align: center;'>Esri RGB Imagery</div></h3>", unsafe_allow_html=True)
    cols = st.columns(2)
    for col, daterange_str in zip(cols, [year_1, year_2]):
        start_date, end_date = daterange_str_to_dates(daterange_str)
        mid_date = start_date + (end_date - start_date) / 2
        esri_date = min(wayback_mapping.keys(), key=lambda x: abs(pd.to_datetime(x) - mid_date))
        with col:
            m = leaf_folium.Map()
            m.add_tile_layer(wayback_mapping[esri_date], name=f"Esri Wayback Imagery - {esri_date}", attribution="Esri")
            add_geometry_to_maps([m])
            write_info(f"""
            <div style="text-align: center;">
                Esri Imagery - {esri_date.replace('-', '/')}
            </div>
            """)
            m.to_streamlit()