Spaces:

imageomics
/

bioclip-demo

Running

App Files Files Community

egrace479

thompsonmj commited on Aug 8, 2024

Commit

1ca4d24

1 Parent(s): 9e5fdea

Add components for sample image return

Browse files

metadata used for filtering with readme describing it
query file with filter and return code
Include script used to upload sample images to S3

Co-authored-by: Matthew Thompson <[email protected]>

Files changed (5) hide show

.gitattributes +1 -1
components/metadata.csv +3 -0
components/metadata_readme.md +11 -0
components/query.py +116 -0
components/sync_samples_to_s3.bash +34 -0

.gitattributes CHANGED Viewed

@@ -33,7 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.json filter=lfs diff=lfs merge=lfs -text
 *.jpeg filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 *.json filter=lfs diff=lfs merge=lfs -text
 *.jpeg filter=lfs diff=lfs merge=lfs -text
 *.png filter=lfs diff=lfs merge=lfs -text
+components/metadata.csv filter=lfs diff=lfs merge=lfs -text

components/metadata.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8576f6ca106f35387506369a70df01fb92192a740c3b5da2a12ad8303976aad
+size 233934143

components/metadata_readme.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: Bioclip Demo
+emoji: 🐘
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: 4.36.1
+app_file: app.py
+pinned: false
+license: mit
+---

components/query.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import io
+import boto3
+import requests
+import numpy as np
+import polars as pl
+from PIL import Image
+from botocore.config import Config
+import logging
+logger = logging.getLogger(__name__)
+# S3 for sample images
+my_config = Config(
+    region_name='us-east-1'
+)
+s3_client = boto3.client('s3', config=my_config)
+# Set basepath for EOL pages for info
+EOL_URL = "https://eol.org/pages/"
+RANKS = ["kingdom", "phylum", "class", "order", "family", "genus", "species"]
+def get_sample(df, pred_taxon, rank):
+    '''
+    Function to retrieve a sample image of the predicted taxon and EOL page link for more info.
+    Parameters:
+    -----------
+    df : DataFrame
+        DataFrame with all sample images listed and their filepaths (in "file_path" column).
+    pred_taxon : str
+        Predicted taxon of the uploaded image.
+    rank : int
+        Index of rank in RANKS chosen for prediction.
+    Returns:
+    --------
+    img : PIL.Image
+        Sample image of predicted taxon for display.
+    eol_page : str
+        URL to EOL page for the taxon (may be a lower rank, e.g., species sample).
+    '''
+    logger.info(f"Getting sample for taxon: {pred_taxon} at rank: {rank}")
+    try:
+        filepath, eol_page_id, full_name, is_exact = get_sample_data(df, pred_taxon, rank)
+    except Exception as e:
+        logger.error(f"Error retrieving sample data: {e}")
+        return None, f"We encountered the following error trying to retrieve a sample image: {e}."
+    if filepath is None:
+        logger.warning(f"No sample image found for taxon: {pred_taxon}")
+        return None, f"Sorry, our EOL images do not include {pred_taxon}."
+    # Get sample image of selected individual
+    try:
+        img_src = s3_client.generate_presigned_url('get_object',
+                                                   Params={'Bucket': 'treeoflife-10m-sample-images',
+                                                           'Key': filepath}
+                                                   )
+        img_resp = requests.get(img_src)
+        img = Image.open(io.BytesIO(img_resp.content))
+        full_eol_url = EOL_URL + eol_page_id
+        if is_exact:
+            eol_page = f"<p>Check out the EOL entry for {pred_taxon} to learn more: <a href={full_eol_url} target='_blank'>{full_eol_url}</a>.</p>"
+        else:
+            eol_page = f"<p>Check out an example EOL entry within {pred_taxon} to learn more: {full_name} <a href={full_eol_url} target='_blank'>{full_eol_url}</a>.</p>"
+        logger.info(f"Successfully retrieved sample image and EOL page for {pred_taxon}")
+        return img, eol_page
+    except Exception as e:
+        logger.error(f"Error retrieving sample image: {e}")
+        return None, f"We encountered the following error trying to retrieve a sample image: {e}."
+def get_sample_data(df, pred_taxon, rank):
+    '''
+    Function to randomly select a sample individual of the given taxon and provide associated native location.
+    Parameters:
+    -----------
+    df : DataFrame
+        DataFrame with all sample images listed and their filepaths (in "file_path" column).
+    pred_taxon : str
+        Predicted taxon of the uploaded image.
+    rank : int
+        Index of rank in RANKS chosen for prediction.
+    Returns:
+    --------
+    filepath : str
+        Filepath of selected sample image for predicted taxon.
+    eol_page_id : str
+        EOL page ID associated with predicted taxon for more information.
+    full_name : str
+        Full taxonomic name of the selected sample.
+    is_exact : bool
+        Flag indicating if the match is exact (i.e., with empty lower ranks).
+    '''
+    for idx in range(rank + 1):
+        taxon = RANKS[idx]
+        target_taxon = pred_taxon.split(" ")[idx]
+        df = df.filter(pl.col(taxon) == target_taxon)
+    if df.shape[0] == 0:
+        return None, np.nan, "", False
+    # First, try to find entries with empty lower ranks
+    exact_df = df
+    for lower_rank in RANKS[rank + 1:]:
+        exact_df = exact_df.filter((pl.col(lower_rank).is_null()) | (pl.col(lower_rank) == ""))
+    if exact_df.shape[0] > 0:
+        df_filtered = exact_df.sample()
+        full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0))
+        return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, True
+    # If no exact matches, return any entry with the specified rank
+    df_filtered = df.sample()
+    full_name = " ".join(df_filtered.select(RANKS[:rank+1]).row(0)) + " " + " ".join(df_filtered.select(RANKS[rank+1:]).row(0))
+    return df_filtered["file_path"][0], df_filtered["eol_page_id"].cast(pl.String)[0], full_name, False

components/sync_samples_to_s3.bash ADDED Viewed

	@@ -0,0 +1,34 @@

+#!/bin/bash
+<<COMMENT
+Usage:
+bash sync_samples_to_s3.bash <BASE_DIR>
+Dependencies:
+- awscli (https://aws.amazon.com/cli/)
+Credentials to export as environment variables:
+- AWS_ACCESS_KEY_ID
+- AWS_SECRET_ACCESS_KEY
+COMMENT
+# Check if a valid directory is provided as an argument
+if [ -z "$1" ]; then
+  echo "Usage: $0 <BASE_DIR>"
+  exit 1
+fi
+if [ ! -d "$1" ]; then
+  echo "Error: $1 is not a valid directory"
+  exit 1
+fi
+BASE_DIR="$1"
+S3_BUCKET="s3://treeoflife-10m-sample-images"
+# Loop through all directories and sync them to S3
+for dir in $BASE_DIR/*; do
+  if [ -d "$dir" ]; then
+    dir_name=$(basename "$dir")
+    aws s3 sync "$dir" "$S3_BUCKET/$dir_name/"
+  fi
+done