daviddao commited on
Commit
7cf32e7
1 Parent(s): 19618dc
Files changed (3) hide show
  1. README.md +26 -13
  2. app.py +78 -0
  3. requirements.txt +7 -0
README.md CHANGED
@@ -1,13 +1,26 @@
1
- ---
2
- title: Bioclip Canopy
3
- emoji: 🦀
4
- colorFrom: green
5
- colorTo: indigo
6
- sdk: gradio
7
- sdk_version: 4.37.2
8
- app_file: app.py
9
- pinned: false
10
- license: apache-2.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # BioClip Image Classification
2
+
3
+ This Hugging Face Space demonstrates image classification using the BioClip model. Upload an image to get a prediction of its class, along with the top 3 most similar classes and file paths.
4
+
5
+ ## How to Use
6
+
7
+ 1. Open the Gradio interface in this Space.
8
+ 2. Upload an image using the provided input area.
9
+ 3. The model will process the image and return:
10
+ - The predicted class
11
+ - The top 3 most similar classes
12
+ - The top 3 most similar file paths from the dataset
13
+
14
+ ## About the Model
15
+
16
+ This Space uses the BioClip model, which is designed for biological image classification. The model is loaded from the Hugging Face model hub (imageomics/bioclip).
17
+
18
+ ## Technical Details
19
+
20
+ - The Space uses Gradio for the user interface.
21
+ - It employs FAISS indexes for efficient similarity search.
22
+ - The classification is performed using a k-nearest neighbors approach with majority voting.
23
+
24
+ ## Note
25
+
26
+ The dataset and FAISS indexes are expected to be present in the `./data/embeddings_bioclip_False` directory. Make sure to include these files when setting up the Space.
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ import torch
4
+ from PIL import Image
5
+ import open_clip
6
+ from datasets import Dataset
7
+ import os
8
+
9
+ # Set environment variable to work around OpenMP runtime issue
10
+ os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
11
+
12
+ # Load the model and processor
13
+ model, processor = open_clip.create_model_from_pretrained('hf-hub:imageomics/bioclip')
14
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
15
+ model.to(device)
16
+
17
+ # Load the dataset
18
+ embedding_path = "./data/embeddings_bioclip_False"
19
+ ds = Dataset.load_from_disk(embedding_path)
20
+
21
+ # Load FAISS indexes
22
+ cosine_faiss_path = os.path.join(embedding_path, "embeddings_cosine.faiss")
23
+ l2_faiss_path = os.path.join(embedding_path, "embeddings_l2.faiss")
24
+ ds.load_faiss_index("embeddings_cosine", cosine_faiss_path)
25
+ ds.load_faiss_index("embeddings_l2", l2_faiss_path)
26
+
27
+ def majority_vote(classes, scores=None):
28
+ if scores is None:
29
+ scores = np.ones_like(classes)
30
+ unique_classes, class_counts = np.unique(classes, return_counts=True)
31
+ class_weights = {cls: 0 for cls in unique_classes}
32
+
33
+ for cls, weight in zip(classes, scores):
34
+ class_weights[cls] += weight
35
+
36
+ majority_class = max(class_weights, key=class_weights.get)
37
+ return majority_class
38
+
39
+ def classify_example(example, index="embeddings_l2", k=10, vote_scores=True):
40
+ features = np.array(example["embeddings"], dtype=np.float32)
41
+ scores, nearest = ds.get_nearest_examples(index, features, k)
42
+
43
+ class_labels = [ds.features["label"].names[c] for c in nearest["label"]]
44
+
45
+ if vote_scores:
46
+ prediction = majority_vote(class_labels, scores)
47
+ else:
48
+ prediction = majority_vote(class_labels)
49
+
50
+ return prediction, class_labels, nearest["file"]
51
+
52
+ def embed_image(image: Image.Image):
53
+ processed_images = processor(image).unsqueeze(0)
54
+
55
+ with torch.no_grad():
56
+ embeddings = model.encode_image(processed_images.to(device))
57
+
58
+ return {"embeddings": embeddings.cpu()}
59
+
60
+ def predict(image):
61
+ embedding = embed_image(image)
62
+ prediction, class_labels, file_paths = classify_example(embedding)
63
+
64
+ return prediction, ", ".join(class_labels[:3]), ", ".join(file_paths[:3])
65
+
66
+ iface = gr.Interface(
67
+ fn=predict,
68
+ inputs=gr.Image(type="pil"),
69
+ outputs=[
70
+ gr.Textbox(label="Prediction"),
71
+ gr.Textbox(label="Top 3 Classes"),
72
+ gr.Textbox(label="Top 3 File Paths")
73
+ ],
74
+ title="BioClip Image Classification",
75
+ description="Upload an image to get a prediction using the BioClip model."
76
+ )
77
+
78
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+ numpy
3
+ torch
4
+ Pillow
5
+ open_clip_torch
6
+ datasets
7
+ faiss-cpu