habana_frameworks.mediapipe.fn.SSDBBoxFlip
habana_frameworks.mediapipe.fn.SSDBBoxFlip¶
- Class:
habana_frameworks.mediapipe.fn.SSDBBoxFlip(**kwargs)
- Define graph call:
__call__(is_Flip, boxes, lengths)
Parameter:
is_Flip - Input tensor to indicate flip or don’t flip input bbox. size=[batch]. Supported data types: INT8.
boxes - Input tensor of bounding boxes (each bbox should be in [l,t,r,b] format). size=[batch, 200, 4] Supported dimensions: minimum = 3, maximum = 3. Supported data types: FLOAT32.
lengths - Input tensor of number of bounding boxes per image. size=[batch]. Supported dimensions: minimum = 1, maximum = 1. Supported data types: UINT32.
Description:
SSDBBoxFlip operator takes bbox tensor and perform horizontal flip if corresponding value in is_flip tensor is 1.
- Supported backend:
CPU
Output:
Output Value |
Description |
---|---|
boxes |
List of bounding boxes for every image in [left, top, right, bottom] format. |
Example: SSDBBoxFlip Operator
The following code snippet shows usage of SSDBBoxFlip operator:
from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
import matplotlib.pyplot as plt
import os
flip_prob = 1.0
g_display_timeout = os.getenv("DISPLAY_TIMEOUT") or 5
class myMediaPipe(MediaPipe):
def __init__(self, device, queue_depth, batch_size, num_threads,
op_device, dir, ann_file, img_h, img_w):
super(
myMediaPipe,
self).__init__(
device,
queue_depth,
batch_size,
num_threads,
self.__class__.__name__)
self.input = fn.CocoReader(root=dir,
annfile=ann_file,
seed=1234,
shuffle=False,
drop_remainder=True,
num_slices=1,
slice_index=0,
partial_batch=False,
device='cpu')
self.reshape_ids = fn.Reshape(size=[batch_size],
tensorDim=1,
layout='',
dtype=dt.UINT32,
device='hpu') # [batch_size]
self.ssd_crop_win_gen = fn.SSDCropWindowGen(num_iterations=1,
seed=1234,
device='cpu')
self.bbox_flip_prob = fn.Constant(constant=flip_prob,
dtype=dt.FLOAT32,
device='cpu')
self.is_bbox_flip = fn.CoinFlip(seed=1234,
dtype=dt.INT8,
device='cpu')
self.ssd_bbox_flip = fn.SSDBBoxFlip(device=op_device)
self.decode = fn.ImageDecoder(device="hpu",
output_format=it.RGB_P,
resize=[img_w, img_h])
# iamge flip - Horizontal
self.reshape_is_flip = fn.Reshape(size=[batch_size],
tensorDim=1,
layout='',
dtype=dt.UINT8,
device='hpu')
self.random_flip = fn.RandomFlip(horizontal=1,
device='hpu')
# WHCN -> CWHN
self.transpose = fn.Transpose(permutation=[2, 0, 1, 3],
tensorDim=4,
dtype=dt.UINT8)
def definegraph(self):
# Train pipe
jpegs, ids, sizes, boxes, labels, lengths, batch = self.input()
# ssd crop window generation
sizes, boxes, labels, lengths, windows = self.ssd_crop_win_gen(
sizes, boxes, labels, lengths)
images = self.decode(jpegs, windows)
# ssd Bounding box flip
bb_flip_prob = self.bbox_flip_prob()
is_Flip = self.is_bbox_flip(bb_flip_prob)
boxes_fliped = self.ssd_bbox_flip(is_Flip, boxes, lengths)
# image flip
is_Flip = self.reshape_is_flip(is_Flip)
images = self.random_flip(images, is_Flip)
images = self.transpose(images)
return images, boxes, is_Flip, boxes_fliped
def display_images(images, batch_size, cols):
rows = (batch_size + 1) // cols
plt.figure(figsize=(10, 10))
for i in range(batch_size):
ax = plt.subplot(rows, cols, i + 1)
plt.imshow(images[i])
plt.axis("off")
plt.show(block=False)
plt.pause(g_display_timeout)
plt.close()
def run(device, op_device):
batch_size = 6
img_width = 300
img_height = 300
num_threads = 1
queue_depth = 2
base_dir = os.environ['DATASET_DIR']
base_dir = base_dir+"/coco_data/"
dir = base_dir + "/imgs/"
ann_file = base_dir + "/annotation.json"
# Create MediaPipe object
pipe = myMediaPipe(device, queue_depth, batch_size, num_threads,
op_device, dir, ann_file, img_height, img_width)
# Build MediaPipe
pipe.build()
# Initialize MediaPipe iterator
pipe.iter_init()
# Run MediaPipe
images, boxes, is_flip, boxes_fliped = pipe.run()
def as_cpu(tensor):
if (callable(getattr(tensor, "as_cpu", None))):
tensor = tensor.as_cpu()
return tensor
# Copy data to host from device as numpy array
images = as_cpu(images).as_nparray()
boxes = as_cpu(boxes).as_nparray()
is_flip = as_cpu(is_flip).as_nparray()
boxes_fliped = as_cpu(boxes_fliped).as_nparray()
del pipe
# Display images, shape, dtype
print('images dtype:', images.dtype)
print('images:', images.shape)
print('boxes dtype:', boxes.dtype)
print('boxes:', boxes)
print('is_Flip dtype:', is_flip.dtype)
print('is_Flip:', is_flip)
print('boxes_fliped dtype:', boxes_fliped.dtype)
print('boxes_fliped:', boxes_fliped)
display_images(images, batch_size, 3)
if __name__ == "__main__":
dev_opdev = {'mixed': ['cpu']}
for dev in dev_opdev.keys():
for op_dev in dev_opdev[dev]:
run(dev, op_dev)
SSB BBox Flip Output Images 1
- 1
Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.
The following is the output of SSDMetadata operator:
images dtype: uint8
images: (6, 300, 300, 3)
boxes dtype: float32
boxes: [[[0.35 0. 0.99375 0.26363638]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]
[[0.0597015 0.1 0.43283582 0.8 ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]
[[0.405 0.08333335 0.90500003 0.6041667 ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]
[[0.5142611 0. 0.8407216 0.33333334]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]
[[0. 0. 0.98437494 0.90761214]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]
[[0.0882353 0. 1. 0.99999994]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]]
is_Flip dtype: uint8
is_Flip: [1 1 1 1 1 1]
boxes_fliped dtype: float32
boxes_fliped: [[[0.00625002 0. 0.65 0.26363638]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]
[[0.5671642 0.1 0.9402985 0.8 ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]
[[0.09499997 0.08333335 0.595 0.6041667 ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]
[[0.1592784 0. 0.48573887 0.33333334]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]
[[0.01562506 0. 1. 0.90761214]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]
[[0. 0. 0.9117647 0.99999994]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
...
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]
[0. 0. 0. 0. ]]]