File size: 3,098 Bytes
a1d71d0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import os
import logging
from tempfile import TemporaryFile
import cv2
import numpy as np
from PIL import Image
import tator
import inference
logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)
# Read environment variables that are provided from TATOR
host = os.getenv('HOST')
token = os.getenv('TOKEN')
project_id = int(os.getenv('PROJECT_ID'))
media_ids = [int(id_) for id_ in os.getenv('MEDIA_IDS').split(',')]
frames_per_inference = int(os.getenv('FRAMES_PER_INFERENCE', 30))
# Set up the TATOR API.
api = tator.get_api(host, token)
# Iterate through each video.
for media_id in media_ids:
# Download video.
media = api.get_media(media_id)
logger.info(f"Downloading {media.name}...")
out_path = f"/tmp/{media.name}"
for progress in tator.util.download_media(api, media, out_path):
logger.info(f"Download progress: {progress}%")
# Do inference on each video.
logger.info(f"Doing inference on {media.name}...")
localizations = []
vid = cv2.VideoCapture(out_path)
frame_number = 0
# Read *every* frame from the video, break when at the end.
while True:
ret, frame = vid.read()
if not ret:
break
# Create a temporary file, access the image data, save data to file.
framefile = TemporaryFile(suffix='.jpg')
im = Image.fromarray(frame)
im.save(framefile)
# For every N frames, make a prediction; append prediction results
# to a list, increase the frame count.
if frame_number % frames_per_inference == 0:
spec = {}
# Predictions contains all information inside pandas dataframe
predictions = inference.run_inference(framefile)
for i, r in predictions.pandas().xyxy[0].iterrows:
spec['media_id'] = media_id
spec['type'] = None # Unsure, docs not specific
spec['frame'] = frame_number
x, y, x2, y2 = r['xmin'], r['ymin'], r['xmax'], r['ymax']
w, h = x2 - x, y2 - y
spec['x'] = x
spec['y'] = y
spec['width'] = w
spec['height'] = h
spec['class_category'] = r['name']
spec['confidence'] = r['confidence']
localizations.append(spec)
frame_number += 1
# End interaction with video properly.
vid.release()
logger.info(f"Uploading object detections on {media.name}...")
# Create the localizations in the video.
num_created = 0
for response in tator.util.chunked_create(api.create_localization_list,
project_id,
localization_spec=localizations):
num_created += len(response.id)
# Output pretty logging information.
logger.info(f"Successfully created {num_created} localizations on "
f"{media.name}!")
logger.info("-------------------------------------------------")
logger.info(f"Completed inference on {len(media_ids)} files.") |