Spaces:

sradc
/

visual-content-search-over-videos

Running

sradc

initial commit

1801c3b over 1 year ago

714 Bytes

	import requests
	from PIL import Image
	from transformers import CLIPModel, CLIPProcessor

	model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
	processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

	url = "http://images.cocodataset.org/val2017/000000039769.jpg"
	image = Image.open(requests.get(url, stream=True).raw)

	inputs = processor(
	text=["a photo of a cat", "a photo of a dog"],
	images=image,
	return_tensors="pt",
	padding=True,
	)

	outputs = model(**inputs)
	logits_per_image = outputs.logits_per_image # this is the image-text similarity score
	probs = logits_per_image.softmax(
	dim=1
	) # we can take the softmax to get the label probabilities
	print(probs)