Spaces:
Sleeping
Sleeping
from PIL import Image | |
import io | |
from transformers import AutoTokenizer, CLIPProcessor, CLIPModel | |
import torch | |
# Load CLIP model and processor | |
model_name = "openai/clip-vit-base-patch32" | |
loaded_model = CLIPModel.from_pretrained(model_name) | |
loaded_processor = CLIPProcessor.from_pretrained(model_name) | |
def getTextEmbedding(text): | |
# Preprocess the text | |
print("tear") | |
inputs_text = loaded_processor(text=[text], return_tensors="pt", padding=True) | |
print("here") | |
# Forward pass through the model | |
with torch.no_grad(): | |
# Get the text features | |
text_features = loaded_model.get_text_features(input_ids=inputs_text.input_ids, attention_mask=inputs_text.attention_mask) | |
print("bear") | |
# Convert tensor to numpy array for better readability | |
text_embedding = text_features.squeeze().numpy() | |
print("done") | |
return text_embedding | |
def getImageEmbedding(binary_image_data): | |
# Load and preprocess the image | |
image = Image.open(io.BytesIO(binary_image_data)) | |
inputs = loaded_processor(images=image, return_tensors="pt", padding=True) | |
# Forward pass through the model | |
with torch.no_grad(): | |
# Get the image features | |
image_features = loaded_model.get_image_features(pixel_values=inputs.pixel_values) | |
# Convert tensor to numpy array for better readability | |
image_embedding = image_features.squeeze().numpy() | |
return image_embedding | |