Image Decoder Examples

This section shows how to use media pipe for decoding, resizing and cropping operations.

Example 1: Image Decode with Resize

The following code snippet shows the configuration of media pipe for image decode and other operations on the decoded images. The decoder can give RGB interleaved or RGB planar images:

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
import matplotlib.pyplot as plt

class myMediaPipe(MediaPipe):
    def __init__(self, device, dir, queue_depth, batch_size, img_h, img_w):
        super(
            myMediaPipe,
            self).__init__(
            device,
            queue_depth,
            batch_size,
            self.__class__.__name__)

        self.input = fn.ReadImageDatasetFromDir(shuffle=False,
                                                dir=dir,
                                                format="jpg")

        self.decode = fn.ImageDecoder(device="hpu",
                                      output_format=it.RGB_I,
                                      resize=[img_w, img_h])

    def definegraph(self):
        images, labels = self.input()
        images = self.decode(images)
        return images, labels

def display_images(images, batch_size, cols):
    rows = (batch_size + 1) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i])
        plt.axis("off")
    plt.show()

def main():
    batch_size = 6
    img_width = 200
    img_height = 200
    img_dir = "/path/to/images"
    queue_depth = 2
    columns = 3

    # Create media pipeline object
    pipe = myMediaPipe('hpu', img_dir, queue_depth, batch_size,
                        img_height, img_width)

    # Build media pipeline
    pipe.build()

    # Initialize media pipeline iterator
    pipe.iter_init()

    # Run media pipeline
    images, labels = pipe.run()

    # Copy data to host from device as numpy array
    images = images.as_cpu().as_nparray()
    labels = labels.as_cpu().as_nparray()

    # Display images
    display_images(images, batch_size, columns)

if __name__ == "__main__":
    main()

Decoded and Resized Images 1

../_images/img0_resize.png
../_images/img1_resize.png
../_images/img2_resize.png
../_images/img3_resize.png
../_images/img4_resize.png
../_images/img5_resize.png
1

Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.

Example 2: Image Decode with Resize and Fixed Crop

This example uses the decode functionality of media pipe as shown in Example 1 and adds crop operation to it.

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
import matplotlib.pyplot as plt

class myMediaPipe(MediaPipe):
    def __init__(self, device, dir, queue_depth, batch_size, img_h, img_w):
        super(
            myMediaPipe,
            self).__init__(
            device,
            queue_depth,
            batch_size,
            self.__class__.__name__)

        self.input = fn.ReadImageDatasetFromDir(shuffle=False,
                                                dir=dir,
                                                format="jpg")

        self.decode = fn.ImageDecoder(device="hpu",
                                      output_format=it.RGB_I,
                                      resize=[img_w, img_h])

        self.crop = fn.Crop(crop_w=150,
                            crop_h=150,
                            dtype=dt.UINT8)

    def definegraph(self):
        images, labels = self.input()
        images = self.decode(images)
        images = self.crop(images)
        return images, labels

def display_images(images, batch_size, cols):
    rows = (batch_size + 1) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i])
        plt.axis("off")
    plt.show()

def main():
    batch_size = 6
    img_width = 200
    img_height = 200
    img_dir = "/path/to/images"
    queue_depth = 2
    columns = 3

    # Create media pipeline object
    pipe = myMediaPipe('hpu', img_dir, queue_depth, batch_size,
                        img_height, img_width)

    # Build media pipeline
    pipe.build()

    # Initialize media pipeline iterator
    pipe.iter_init()

    # Run media pipeline
    images, labels = pipe.run()

    # Copy data to host from device as numpy array
    images = images.as_cpu().as_nparray()
    labels = labels.as_cpu().as_nparray()

    # Display images
    display_images(images, batch_size, columns)

if __name__ == "__main__":
    main()

Decoded, Resized and Cropped Images 2

../_images/img0_resize_crop.png
../_images/img1_resize_crop.png
../_images/img2_resize_crop.png
../_images/img3_resize_crop.png
../_images/img4_resize_crop.png
../_images/img5_resize_crop.png
2

Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.

Example 3: Image Decode with Resize and User Defined Random Crop

This example uses the decode functionality of media pipe as shown in Example 1 and adds the random crop operation to it. It produces the resized image given by sizes in img_width and img_height:

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
from habana_frameworks.mediapipe.operators.cpu_nodes.cpu_nodes import media_function
import matplotlib.pyplot as plt
import numpy as np

class myMediaPipe(MediaPipe):
    def __init__(self, device, dir, queue_depth, batch_size, img_h, img_w):
        super(
            myMediaPipe,
            self).__init__(
            device,
            queue_depth,
            batch_size,
            self.__class__.__name__)

        self.input = fn.ReadImageDatasetFromDir(shuffle=False,
                                                dir=dir,
                                                format="jpg")

        self.decode = fn.ImageDecoder(device="hpu",
                                      output_format=it.RGB_I,
                                      resize=[img_w, img_h])

        priv_params = {}
        priv_params['xval_min'] = 0.0
        priv_params['xval_max'] = 0.2
        priv_params['yval_min'] = 0.0
        priv_params['yval_max'] = 0.3
        priv_params['wval_min'] = 0.8
        priv_params['wval_max'] = 1.0
        priv_params['hval_min'] = 0.7
        priv_params['hval_max'] = 1.0
        seed = 7368592685

        self.random_crop = fn.MediaFunc(func=random_crop_func,
                                        dtype=dt.FLOAT32,
                                        shape=[4, batch_size],
                                        seed=seed,
                                        priv_params=priv_params)

    def definegraph(self):
        images, labels = self.input()
        crop_val = self.random_crop(images)
        images = self.decode(images, crop_val)
        return images, labels

class random_crop_func(media_function):
    def __init__(self, params):
        self.np_shape = params['shape'][::-1]
        self.np_dtype = params['dtype']
        self.batch_size = self.np_shape[0]
        self.seed = params['seed']
        self.xval_min = params['priv_params']['xval_min']
        self.xval_max = params['priv_params']['xval_max']
        self.yval_min = params['priv_params']['yval_min']
        self.yval_max = params['priv_params']['yval_max']
        self.wval_min = params['priv_params']['wval_min']
        self.wval_max = params['priv_params']['wval_max']
        self.hval_min = params['priv_params']['hval_min']
        self.hval_max = params['priv_params']['hval_max']
        self.rng = np.random.default_rng(self.seed)

    def __call__(self, filelist):
        a = np.empty(shape=self.np_shape, dtype=self.np_dtype)
        x_val = self.rng.uniform(self.xval_min, self.xval_max, self.batch_size)
        y_val = self.rng.uniform(self.yval_min, self.yval_max, self.batch_size)
        w_val = self.rng.uniform(self.wval_min, self.wval_max, self.batch_size)
        h_val = self.rng.uniform(self.hval_min, self.hval_max, self.batch_size)
        for i in range(self.batch_size):
            if((x_val[i] + w_val[i]) > 1):
                w_val[i] = 1 - x_val[i]
            if((y_val[i] + h_val[i]) > 1):
                h_val[i] = 1 - y_val[i]
            a[i] = [x_val[i], y_val[i], w_val[i], h_val[i]]
        return a

def display_images(images, batch_size, cols):
    rows = (batch_size + 1) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i])
        plt.axis("off")
    plt.show()

def main():
    batch_size = 6
    img_width = 200
    img_height = 200
    img_dir = "/path/to/images"
    queue_depth = 2
    columns = 3

    # Create media pipeline object
    pipe = myMediaPipe('hpu', img_dir, queue_depth, batch_size,
                        img_height, img_width)

    # Build media pipeline
    pipe.build()

    # Initialize media pipeline iterator
    pipe.iter_init()

    # Run media pipeline
    images, labels = pipe.run()

    # Copy data to host from device as numpy array
    images = images.as_cpu().as_nparray()
    labels = labels.as_cpu().as_nparray()

    # Display images
    display_images(images, batch_size, columns)


if __name__ == "__main__":
    main()

Decoded, Resized and Random Cropped Images 3

Image1 of decoded batch and cropped.
Image2 of decoded batch and cropped.
Image3 of decoded batch and cropped.
Image4 of decoded batch and cropped.
Image5 of decoded batch and cropped.
Image6 of decoded batch and cropped.
3

Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.

Example 4: Image Decode with Resize and Built-in Random Crop

Random crop operation can be done by providing scale, aspect ratio information to image decoder as keyworded arguments to calculate random crop parameters:

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
from habana_frameworks.mediapipe.media_types import randomCropType as rct
import matplotlib.pyplot as plt

class myMediaPipe(MediaPipe):
    def __init__(self, device, dir, queue_depth, batch_size, img_h, img_w):
        super(
            myMediaPipe,
            self).__init__(
            device,
            queue_depth,
            batch_size,
            self.__class__.__name__)

        self.input = fn.ReadImageDatasetFromDir(shuffle=False,
                                                dir=dir,
                                                format="jpg")

        self.decode = fn.ImageDecoder(device="hpu",
                                      output_format=it.RGB_I,
                                      resize=[img_w, img_h],
                                      scale_min=0.08,
                                      scale_max=1.0,
                                      ratio_min=3./4.,
                                      ratio_max=4./3.,
                                      seed=73685926,
                                      random_crop_type=rct.RANDOMIZED_AREA_AND_ASPECT_RATIO_CROP)

    def definegraph(self):
        images, labels = self.input()
        images = self.decode(images)
        return images, labels

def display_images(images, batch_size, cols):
    rows = (batch_size + 1) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i])
        plt.axis("off")
    plt.show()

def main():
    batch_size = 6
    img_width = 200
    img_height = 200
    img_dir = "/path/to/images"
    queue_depth = 2
    columns = 3

    # Create media pipeline object
    pipe = myMediaPipe('hpu', img_dir, queue_depth, batch_size,
                        img_height, img_width)

    # Build media pipeline
    pipe.build()

    # Initialize media pipeline iterator
    pipe.iter_init()

    # Run media pipeline
    images, labels = pipe.run()

    # Copy data to host from device as numpy array
    images = images.as_cpu().as_nparray()
    labels = labels.as_cpu().as_nparray()

    # Display images
    display_images(images, batch_size, columns)

if __name__ == "__main__":
    main()

Decoded, Resized with Built-in Random Crop Images 4

Image1 of decoded batch and cropped using builtin algorithm.
Image2 of decoded batch and cropped using builtin algorithm.
Image3 of decoded batch and cropped using builtin algorithm.
Image4 of decoded batch and cropped using builtin algorithm.
Image5 of decoded batch and cropped using builtin algorithm.
Image6 of decoded batch and cropped using builtin algorithm.
4

Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.