habana_frameworks.mediapipe.fn.SSDBBoxFlip

Class:
  • habana_frameworks.mediapipe.fn.SSDBBoxFlip(**kwargs)

Define graph call:
  • __call__(is_Flip, boxes, lengths)

Parameter:

  • is_Flip - Input tensor to indicate flip or don’t flip input bbox. size=[batch]. Supported data types: INT8.

  • boxes - Input tensor of bounding boxes (each bbox should be in [l,t,r,b] format). size=[batch, 200, 4] Supported dimensions: minimum = 3, maximum = 3. Supported data types: FLOAT32.

  • lengths - Input tensor of number of bounding boxes per image. size=[batch]. Supported dimensions: minimum = 1, maximum = 1. Supported data types: UINT32.

Description:

SSDBBoxFlip operator takes bbox tensor and perform horizontal flip if corresponding value in is_flip tensor is 1.

Supported backend:
  • CPU

Output:

Output Value

Description

boxes

List of bounding boxes for every image in [left, top, right, bottom] format.

Example: SSDBBoxFlip Operator

The following code snippet shows usage of SSDBBoxFlip operator:

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
import matplotlib.pyplot as plt
import os

flip_prob = 1.0

g_display_timeout = os.getenv("DISPLAY_TIMEOUT") or 5

class myMediaPipe(MediaPipe):
    def __init__(self, device, queue_depth, batch_size, num_threads,
                op_device, dir, ann_file, img_h, img_w):

        super(
            myMediaPipe,
            self).__init__(
            device,
            queue_depth,
            batch_size,
            num_threads,
            self.__class__.__name__)

        self.input = fn.CocoReader(root=dir,
                                  annfile=ann_file,
                                  seed=1234,
                                  shuffle=False,
                                  drop_remainder=True,
                                  num_slices=1,
                                  slice_index=0,
                                  partial_batch=False,
                                  device='cpu')

        self.reshape_ids = fn.Reshape(size=[batch_size],
                                      tensorDim=1,
                                      layout='',
                                      dtype=dt.UINT32,
                                      device='hpu')  # [batch_size]

        self.ssd_crop_win_gen = fn.SSDCropWindowGen(num_iterations=1,
                                                    seed=1234,
                                                    device='cpu')

        self.bbox_flip_prob = fn.Constant(constant=flip_prob,
                                          dtype=dt.FLOAT32,
                                          device='cpu')

        self.is_bbox_flip = fn.CoinFlip(seed=1234,
                                        dtype=dt.INT8,
                                        device='cpu')

        self.ssd_bbox_flip = fn.SSDBBoxFlip(device=op_device)

        self.decode = fn.ImageDecoder(device="hpu",
                                      output_format=it.RGB_P,
                                      resize=[img_w, img_h])

        # iamge flip - Horizontal
        self.reshape_is_flip = fn.Reshape(size=[batch_size],
                                          tensorDim=1,
                                          layout='',
                                          dtype=dt.UINT8,
                                          device='hpu')
        self.random_flip = fn.RandomFlip(horizontal=1,
                                        device='hpu')
        # WHCN -> CWHN
        self.transpose = fn.Transpose(permutation=[2, 0, 1, 3],
                                      tensorDim=4,
                                      dtype=dt.UINT8)

    def definegraph(self):
        # Train pipe
        jpegs, ids, sizes, boxes, labels, lengths, batch = self.input()

        # ssd crop window generation
        sizes, boxes, labels, lengths, windows = self.ssd_crop_win_gen(
            sizes, boxes, labels, lengths)

        images = self.decode(jpegs, windows)

        # ssd Bounding box flip
        bb_flip_prob = self.bbox_flip_prob()
        is_Flip = self.is_bbox_flip(bb_flip_prob)
        boxes_fliped = self.ssd_bbox_flip(is_Flip, boxes, lengths)

        # image flip
        is_Flip = self.reshape_is_flip(is_Flip)
        images = self.random_flip(images, is_Flip)

        images = self.transpose(images)
        return images, boxes, is_Flip, boxes_fliped


def display_images(images, batch_size, cols):
    rows = (batch_size + 1) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i])
        plt.axis("off")
    plt.show(block=False)
    plt.pause(g_display_timeout)
    plt.close()

def run(device, op_device):
    batch_size = 6
    img_width = 300
    img_height = 300
    num_threads = 1
    queue_depth = 2

    base_dir = os.environ['DATASET_DIR']
    base_dir = base_dir+"/coco_data/"
    dir = base_dir + "/imgs/"
    ann_file = base_dir + "/annotation.json"

    # Create MediaPipe object
    pipe = myMediaPipe(device, queue_depth, batch_size, num_threads,
                      op_device, dir, ann_file, img_height, img_width)

    # Build MediaPipe
    pipe.build()

    # Initialize MediaPipe iterator
    pipe.iter_init()

    # Run MediaPipe
    images, boxes, is_flip, boxes_fliped = pipe.run()

    def as_cpu(tensor):
        if (callable(getattr(tensor, "as_cpu", None))):
            tensor = tensor.as_cpu()
        return tensor

    # Copy data to host from device as numpy array
    images = as_cpu(images).as_nparray()
    boxes = as_cpu(boxes).as_nparray()
    is_flip = as_cpu(is_flip).as_nparray()
    boxes_fliped = as_cpu(boxes_fliped).as_nparray()

    del pipe

    # Display images, shape, dtype
    print('images dtype:', images.dtype)
    print('images:', images.shape)

    print('boxes dtype:', boxes.dtype)
    print('boxes:', boxes)

    print('is_Flip dtype:', is_flip.dtype)
    print('is_Flip:', is_flip)

    print('boxes_fliped dtype:', boxes_fliped.dtype)
    print('boxes_fliped:', boxes_fliped)

    display_images(images, batch_size, 3)


if __name__ == "__main__":
    dev_opdev = {'mixed': ['cpu']}

    for dev in dev_opdev.keys():
        for op_dev in dev_opdev[dev]:
            run(dev, op_dev)

SSB BBox Flip Output Images 1

Image1 of slice
Image2 of slice
Image3 of slice
Image4 of slice
Image5 of slice
Image6 of slice
1

Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.

The following is the output of SSDMetadata operator:

images dtype: uint8
images: (6, 300, 300, 3)
boxes dtype: float32
boxes: [[[0.35       0.         0.99375    0.26363638]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]

[[0.0597015  0.1        0.43283582 0.8       ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]

[[0.405      0.08333335 0.90500003 0.6041667 ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]

[[0.5142611  0.         0.8407216  0.33333334]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]

[[0.         0.         0.98437494 0.90761214]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]

[[0.0882353  0.         1.         0.99999994]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]]
is_Flip dtype: uint8
is_Flip: [1 1 1 1 1 1]
boxes_fliped dtype: float32
boxes_fliped: [[[0.00625002 0.         0.65       0.26363638]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]

[[0.5671642  0.1        0.9402985  0.8       ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]

[[0.09499997 0.08333335 0.595      0.6041667 ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]

[[0.1592784  0.         0.48573887 0.33333334]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]

[[0.01562506 0.         1.         0.90761214]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]

[[0.         0.         0.9117647  0.99999994]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  ...
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]
  [0.         0.         0.         0.        ]]]