habana_frameworks.mediapipe.fn.SSDMetadata
habana_frameworks.mediapipe.fn.SSDMetadata¶
- Class:
habana_frameworks.mediapipe.fn.SSDMetadata(**kwargs)
- Define graph call:
__call__(ids, sizes, boxes, labels, lengths, flip)
Parameter:
ids - Input tensor of image ids. size=[batch]. Supported dimensions: minimum = 1, maximum = 1. Supported data types: UINT32.
sizes - Input tensor of image sizes. size=[batch, 2]. Supported dimensions: minimum = 2, maximum = 2. Supported data types: UINT32.
boxes - Input tensor of bounding boxes for each image. size=[batch, 200, 4] Supported dimensions: minimum = 3, maximum = 3. Supported data types: FLOAT32.
labels - Input tensor of image labels for each bounding box. size=[batch, 200]. Supported dimensions: minimum = 2, maximum = 2. Supported data types: UINT32.
lengths - Input tensor of number of bounding boxes per image. size=[batch]. Supported dimensions: minimum = 1, maximum = 1. Supported data types: UINT32.
(optional)flip - Input tensor with predicate information for flip. Supported dimensions: minimum = 1, maximum = 1. Supported data types: UINT8.
Description:
SSDMetadata operator takes the metadata output of Coco reader and performs operations such as crop, flip and encoding of bounding boxes.
Crop and flip operations are optional in serialize
parameter. If crop is not present, then the SSDMetadata
operator will not produce crop windows
in the output.
If flip is not provided, then flip input tensor is not needed and image flipping will not be done by the SSDMetadata
operator.
If crop is present, then it should be the first operator in the list. Encode should be the last operator in the list.
- Supported backend:
CPU
Keyword Arguments
kwargs |
Description |
---|---|
workers |
Number of threads used for SSD metadata processing.
|
serialize |
Metadata operations to be done on a batch.
|
cropping_iterations |
Number of iterations to be used to get valid crop window. If valid crop window is not found in given iterations, then no cropping is done.
|
seed |
Seed to be used for SSD crop randomization.
|
Output:
Output Value |
Description |
---|---|
(optional) windows |
Crop window coordinates. |
ids |
Image id from annotation file. |
sizes |
Image size after crop. |
boxes |
List of encoded (wrt 8732 anchors) bounding boxes for every image [x_start, y_start, width, height]. |
labels |
List of labels for every encoded bounding box. |
lengths |
Number of ground truth boxes per image. |
batch |
Number of valid images in a batch. |
Example: SSDMetadata Operator
The following code snippet shows usage of SSDMetadata operator:
from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
from habana_frameworks.mediapipe.operators.cpu_nodes.cpu_nodes import media_function
from media_pipe_api import MetadataOps
import numpy as np
import os
g_display_timeout = os.getenv("DISPLAY_TIMEOUT") or 5
# Create MediaPipe derived class
class myMediaPipe(MediaPipe):
def __init__(self, device, queue_depth, batch_size,
num_threads, op_device, dir, ann_file,
img_h, img_w):
super(
myMediaPipe,
self).__init__(
device,
queue_depth,
batch_size,
num_threads,
self.__class__.__name__)
self.input = fn.CocoReader(root=dir,
annfile=ann_file,
seed=0,
shuffle=False,
drop_remainder=True,
num_slices=1,
slice_index=0,
partial_batch=False,
device='cpu')
self.random_flip_input = fn.MediaFunc(func=random_flip_func,
shape=[batch_size],
dtype=dt.UINT8,
device='cpu')
self.ssd_metadata = fn.SSDMetadata(workers=1,
seed=0,
serialize=[MetadataOps.crop,
MetadataOps.flip,
MetadataOps.encode],
device=op_device)
self.decode = fn.ImageDecoder(device="hpu",
output_format=it.RGB_P,
resize=[img_w, img_h])
self.random_flip = fn.RandomFlip(horizontal=1,
device="hpu")
self.transpose = fn.Transpose(permutation=[2, 0, 1, 3],
tensorDim=4,
dtype=dt.UINT8,
device="hpu")
def definegraph(self):
flip = self.random_flip_input()
jpegs, ids, sizes, boxes, labels, lengths, batch = self.input()
windows, ids, sizes, boxes, labels, num_boxes = self.ssd_metadata(
ids, sizes, boxes, labels, lengths, flip)
images = self.decode(jpegs, windows)
images = self.random_flip(images, flip)
images = self.transpose(images)
return images, ids, sizes, boxes, labels, num_boxes, batch
class random_flip_func(media_function):
def __init__(self, params):
self.p = 0.5
self.np_shape = params['shape'][::-1]
self.np_dtype = params['dtype']
self.seed = params['seed']
self.rng = np.random.default_rng(self.seed)
def __call__(self):
a = self.rng.choice(a=[0, 1], size=(
self.np_shape), p=[self.p, 1-self.p])
a = np.array(a, dtype=self.np_dtype)
return a
def run(device, op_device):
batch_size = 6
img_width = 300
img_height = 300
num_threads = 1
queue_depth = 2
base_dir = os.environ['DATASET_DIR']
base_dir = base_dir+"/coco_data/"
dir = base_dir + "/imgs/"
ann_file = base_dir + "/annotation.json"
# Create MediaPipe object
pipe = myMediaPipe(device, queue_depth, batch_size, num_threads,
op_device, dir, ann_file, img_height, img_width)
# Build MediaPipe
pipe.build()
# Initialize MediaPipe iterator
pipe.iter_init()
# Run MediaPipe
images, ids, sizes, boxes, labels, num_boxes, batch = pipe.run()
# Copy data to host from device as numpy array
images = images.as_cpu().as_nparray()
ids = ids.as_cpu().as_nparray()
sizes = sizes.as_cpu().as_nparray()
boxes = boxes.as_cpu().as_nparray()
labels = labels.as_cpu().as_nparray()
num_boxes = num_boxes.as_cpu().as_nparray()
batch = batch.as_cpu().as_nparray()
del pipe
# Display images, shape, dtype
print('coco ids dtype:', ids.dtype)
print('coco ids:\n', ids)
print('coco sizes dtype:', sizes.dtype)
print('coco sizes:\n', sizes)
print('coco boxes dtype:', boxes.dtype)
print('coco boxes:\n', boxes)
print('coco labels dtype:', labels.dtype)
print('coco labels:\n', labels)
print('coco num_boxes dtype:', num_boxes.dtype)
print('coco num_boxes:\n', num_boxes)
print('coco batch dtype:', batch.dtype)
print('coco batch:\n', batch)
if __name__ == "__main__":
dev_opdev = {'legacy': ['hpu']}
for dev in dev_opdev.keys():
for op_dev in dev_opdev[dev]:
run(dev, op_dev)
The following is the output of SSDMetadata operator:
coco ids dtype: uint32
coco ids:
[391895 522418 184613 318219 554625 574769]
coco sizes dtype: uint32
coco sizes:
[[300 248]
[202 300]
[300 295]
[300 291]
[256 300]
[104 288]]
coco boxes dtype: float32
coco boxes:
[[[0.01333333 0.01333333 0.07 0.07 ]
[0.04 0.01333333 0.07 0.07 ]
[0.06666667 0.01333333 0.07 0.07 ]
...
[0.5 0.5 0.9557719 0.9557719 ]
[0.5 0.5 1. 0.6151829 ]
[0.5 0.5 0.6151829 1. ]]
[[0.01333333 0.01333333 0.07 0.07 ]
[0.04 0.01333333 0.07 0.07 ]
[0.06666667 0.01333333 0.07 0.07 ]
...
[0.5 0.5 0.9557719 0.9557719 ]
[0.5 0.5 1. 0.6151829 ]
[0.5 0.5 0.6151829 1. ]]
[[0.01333333 0.01333333 0.07 0.07 ]
[0.04 0.01333333 0.07 0.07 ]
[0.06666667 0.01333333 0.07 0.07 ]
...
[0.5 0.5 0.9557719 0.9557719 ]
[0.5 0.5 1. 0.6151829 ]
[0.5 0.5 0.6151829 1. ]]
[[0.01333333 0.01333333 0.07 0.07 ]
[0.04 0.01333333 0.07 0.07 ]
[0.06666667 0.01333333 0.07 0.07 ]
...
[0.5 0.5 0.9557719 0.9557719 ]
[0.5 0.5 1. 0.6151829 ]
[0.5 0.5 0.6151829 1. ]]
[[0.01333333 0.01333333 0.07 0.07 ]
[0.04 0.01333333 0.07 0.07 ]
[0.06666667 0.01333333 0.07 0.07 ]
...
[0.5 0.5 0.9557719 0.9557719 ]
[0.5 0.41795546 0.6666666 0.55726564]
[0.5 0.41795546 0.6666666 0.55726564]]
[[0.01333333 0.01333333 0.07 0.07 ]
[0.04 0.01333333 0.07 0.07 ]
[0.06666667 0.01333333 0.07 0.07 ]
...
[0.5069444 0.49999997 0.6944444 0.99999994]
[0.5 0.5 1. 0.6151829 ]
[0.5069444 0.49999997 0.6944444 0.99999994]]]
coco labels dtype: uint32
coco labels:
[[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 0 0]
[0 0 0 ... 0 2 2]
[0 0 0 ... 2 0 2]]
coco num_boxes dtype: uint32
coco num_boxes:
[1 1 1 1 1 1]
coco batch dtype: uint32
coco batch:
[6]