habana_frameworks.mediapipe.fn.Zoom¶

Class:

habana_frameworks.mediapipe.fn.Zoom(**kwargs)

Define graph call:

__call__(image, label, crop_size)

Parameter:

image - Input tensor to operator (images). Supported dimensions: minimum = 5, maximum = 5. Supported data types: FLOAT32.
label - Input tensor to operator (labels). Supported dimensions: minimum = 5, maximum = 5. Supported data types: UINT8.
crop_size - Input tensor to operator of shape [batch_size, 3]. Supported dimensions: minimum = 2, maximum = 2. Supported data types: UINT32.

Description:

Zoom operator supports zoom-in operation on the given images and labels. It achieves zoom by performing center crop (based on crop_size) and resize operations (to patch size) on input data. Resize operation is BICUBIC for images and NEAREST for labels.

Supported backend:

Keyword Arguments

kwargs	Description
patch_size	Width, height and depth dimension for images and labels. Type: int Default: [0, 0, 0] Optional: No
num_channels	Number of channels for image data. For labels it is assumed to be ‘1’. Type: int Default: 1 Optional: yes
dtype	Output data type. Type: habana_frameworks.mediapipe.media_types.dtype Default: UINT8 Optional: yes Supported data types: UINT8 (output labels) FLOAT32 (output images)

kwargs

Description

patch_size

Width, height and depth dimension for images and labels.

Type: int
Default: [0, 0, 0]
Optional: No

num_channels

Number of channels for image data. For labels it is assumed to be ‘1’.

Type: int
Default: 1
Optional: yes

dtype

Output data type.

Type: habana_frameworks.mediapipe.media_types.dtype
Default: UINT8
Optional: yes
Supported data types:
- UINT8 (output labels)
- FLOAT32 (output images)

Example: Zoom Operator

The following code snippet shows usage of Zoom operator:

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import dtype as dt
from habana_frameworks.mediapipe.operators.cpu_nodes.cpu_nodes import media_function
import glob
import numpy as np
import os

g_crop_min = 0.7
g_crop_max = 1.0


class test_random_zoom_func(media_function):
    def __init__(self, params):
        self.np_shape = params['shape'][::-1]
        self.np_dtype = params['dtype']
        self.batch_size = self.np_shape[0]
        self.seed = params['seed'] + params['unique_number']
        self.priv_params = params['priv_params']
        self.crop_min = self.priv_params['crop_min']
        self.crop_max = self.priv_params['crop_max']
        self.patch_size = self.priv_params['patch_size']
        self.patch_size_ar = np.array(self.patch_size, dtype=self.np_dtype)
        print("test_random_zoom_func crop min {} max {} seed {}".format(
            self.crop_min, self.crop_max, self.seed))

        self.rng = np.random.default_rng(self.seed)

    def __call__(self):
        crop_factor = self.rng.uniform(
            low=self.crop_min, high=self.crop_max, size=[self.batch_size])

        cropped_patch_ar = np.zeros(self.np_shape, dtype=self.np_dtype)

        for i in range(self.batch_size):
            cropped_patch_ar[i] = np.array(
                self.patch_size_ar * crop_factor[i], dtype=self.np_dtype)
        return cropped_patch_ar


class myMediaPipe(MediaPipe):
    def __init__(self, device, queue_depth, batch_size, num_threads,
                op_device, patch_size, file_list):
        super(myMediaPipe, self).__init__(
            device,
            queue_depth,
            batch_size,
            num_threads,
            self.__class__.__name__)
        image_num_channel = 4
        seed = 0
        self.batch_size = batch_size
        self.patch_size = patch_size

        self.inputx = fn.ReadNumpyDatasetFromDir(num_outputs=1,
                                                shuffle=False,
                                                shuffle_across_dataset=False,
                                                file_list=file_list[0],
                                                dtype=dt.FLOAT32,
                                                dense=False,
                                                seed=seed,
                                                device="cpu")

        self.inputy = fn.ReadNumpyDatasetFromDir(num_outputs=1,
                                                shuffle=False,
                                                shuffle_across_dataset=False,
                                                file_list=file_list[1],
                                                dtype=dt.UINT8,
                                                dense=False,
                                                seed=seed,
                                                device="cpu")

        self.crop_img = fn.BasicCrop(patch_size=self.patch_size,
                                    num_channels=image_num_channel,
                                    center_crop=True,
                                    dtype=dt.FLOAT32,
                                    device="cpu")

        self.crop_lbl = fn.BasicCrop(patch_size=self.patch_size,
                                    num_channels=1,
                                    center_crop=True,
                                    dtype=dt.UINT8,
                                    device="cpu")

        priv_params = {}
        priv_params['crop_min'] = g_crop_min
        priv_params['crop_max'] = g_crop_max
        priv_params['patch_size'] = self.patch_size

        self.crop_size = fn.MediaFunc(func=test_random_zoom_func,
                                      shape=[3, self.batch_size],
                                      dtype=dt.UINT32,
                                      seed=seed,
                                      priv_params=priv_params,
                                      device="cpu")

        self.zoom = fn.Zoom(patch_size=self.patch_size,
                            num_channels=image_num_channel,
                            device="cpu")

        shape = self.patch_size.copy()
        shape.append(image_num_channel)
        shape.append(self.batch_size)
        self.img_reshape_op = fn.Reshape(size=shape,
                                        tensorDim=5,
                                        layout='',
                                        dtype=dt.FLOAT32,
                                        device="hpu")

        shape = self.patch_size.copy()
        shape.append(1)
        shape.append(self.batch_size)
        self.lbl_reshape_op = fn.Reshape(size=shape,
                                        tensorDim=5,
                                        layout='',
                                        dtype=dt.UINT8,
                                        device="hpu")

    def definegraph(self):

        img = self.inputx()
        lbl = self.inputy()
        img_i = self.crop_img(img)
        lbl_i = self.crop_lbl(lbl)

        crop_size_zoom = self.crop_size()
        img_o, lbl_o = self.zoom(img_i, lbl_i, crop_size_zoom)

        img_o, lbl_o = self.img_reshape_op(img_o), self.lbl_reshape_op(lbl_o)

        return img_o, lbl_o, img_i, lbl_i, crop_size_zoom


def run(device, op_device):
    batch_size = 2
    queue_depth = 1
    num_threads = 1
    columns = 3
    patch_size = [128, 128, 128]
    base_dir = os.environ['DATASET_DIR']
    dir = base_dir+"/npy_data/fp32_4d/"

    pattern_x = "*_x.npy"
    pattern_y = "*_y.npy"

    npy_x = sorted(glob.glob(dir + "/{}".format(pattern_x)))
    npy_y = sorted(glob.glob(dir + "/{}".format(pattern_y)))
    file_list = [npy_x, npy_y]

    # Create MediaPipe object
    pipe = myMediaPipe(device, queue_depth, batch_size, num_threads,
                      op_device, patch_size, file_list)

    # Build MediaPipe
    pipe.build()

    # Initialize MediaPipe iterator
    pipe.iter_init()

    # Run MediaPipe
    images_o, labels_o, images_i, labels_i, crop_size = pipe.run()

    # Copy data to host from device as numpy array
    img_i = images_i.as_cpu().as_nparray()
    lbl_i = labels_i.as_cpu().as_nparray()
    img_o = images_o.as_cpu().as_nparray()
    lbl_o = labels_o.as_cpu().as_nparray()
    crop_size = crop_size.as_cpu().as_nparray()

    del pipe

    print('inp image shape: ', img_i.shape)
    print('inp label shape: ', lbl_i.shape)
    print('crop_size shape: ', crop_size)
    print('out image shape: ', img_o.shape)
    print('out label shape: ', lbl_o.shape)


if __name__ == "__main__":
    dev_opdev = {'legacy': ['cpu']}

    for dev in dev_opdev.keys():
        for op_dev in dev_opdev[dev]:
            run(dev, op_dev)

The following is the output of Zoom operator:

inp image shape:  (2, 4, 128, 128, 128)
inp label shape:  (2, 1, 128, 128, 128)
crop_size shape:  [[114 114 114]
[ 99  99  99]]
out image shape:  (2, 4, 128, 128, 128)
out label shape:  (2, 1, 128, 128, 128)

Gaudi Documentation 1.21.1 documentation

habana_frameworks.mediapipe.fn.Zoom

habana_frameworks.mediapipe.fn.Zoom¶