Image Decoder Examples¶

This section shows how to use media pipe for decoding, resizing and cropping operations.

Example 1: Image Decode with Resize¶

The following code snippet shows the configuration of media pipe for image decode and other operations on the decoded images. The decoder can give RGB interleaved or RGB planar images:

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
import matplotlib.pyplot as plt
import numpy as np
import os

g_display_timeout = os.getenv("DISPLAY_TIMEOUT") or 5


class myMediaPipe(MediaPipe):
    def __init__(self, device, queue_depth, batch_size, num_threads, op_device, dir, img_h, img_w):
        super(
            myMediaPipe,
            self).__init__(
            device,
            queue_depth,
            batch_size,
            num_threads,
            self.__class__.__name__)

        self.input = fn.ReadImageDatasetFromDir(shuffle=False,
                                                dir=dir,
                                                format="jpg",
                                                device="cpu")

        self.decode = fn.ImageDecoder(device=op_device,
                                    output_format=it.RGB_I,
                                    resize=[img_w, img_h])

    def definegraph(self):
        images, labels = self.input()
        images = self.decode(images)
        return images, labels


def display_images(images, batch_size, cols):
    rows = (batch_size + 1) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i])
        plt.axis("off")
    plt.show(block=False)
    plt.pause(g_display_timeout)
    plt.close()


def run(device, op_device):
    batch_size = 6
    queue_depth = 2
    num_threads = 1
    img_width = 200
    img_height = 200
    base_dir = os.environ['DATASET_DIR']
    dir = base_dir + "/img_data/"
    columns = 3

    # Create MediaPipe object
    pipe = myMediaPipe(device, queue_depth, batch_size,
                    num_threads, op_device, dir,
                    img_height, img_width)

    # Build MediaPipe
    pipe.build()

    # Initialize MediaPipe iterator
    pipe.iter_init()

    # Run MediaPipe
    images, labels = pipe.run()

    def as_cpu(tensor):
        if (callable(getattr(tensor, "as_cpu", None))):
            tensor = tensor.as_cpu()
        return tensor

    # Copy data to host from device as numpy array
    images = as_cpu(images).as_nparray()

    del pipe

    # Display images
    display_images(images, batch_size, columns)


def test_main():
    dev_opdev = {'mixed': ['hpu'],
                'legacy': ['hpu']}

    for dev in dev_opdev.keys():
        for op_dev in dev_opdev[dev]:
            run(dev, op_dev)


if __name__ == "__main__":
    test_main()

Decoded and Resized Images 1

1: Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.

Example 2: Image Decode with Resize and Fixed Crop¶

This example uses the decode functionality of media pipe as shown in Example 1 and adds crop operation to it.

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
import matplotlib.pyplot as plt
import numpy as np
import os

g_display_timeout = os.getenv("DISPLAY_TIMEOUT") or 5


class myMediaPipe(MediaPipe):
    def __init__(self, device, queue_depth, batch_size, num_threads, op_device, dir, img_h, img_w):
        super(
            myMediaPipe,
            self).__init__(
            device,
            queue_depth,
            batch_size,
            num_threads,
            self.__class__.__name__)

        self.input = fn.ReadImageDatasetFromDir(shuffle=False,
                                                dir=dir,
                                                format="jpg",
                                                device="cpu")

        self.decode = fn.ImageDecoder(device=op_device,
                                    output_format=it.RGB_I,
                                    resize=[img_w, img_h])

        self.crop = fn.Crop(crop_w=150,
                            crop_h=150,
                            dtype=dt.UINT8,
                            device=op_device)

    def definegraph(self):
        images, labels = self.input()
        images = self.decode(images)
        images = self.crop(images)
        return images, labels


def display_images(images, batch_size, cols):
    rows = (batch_size + 1) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i])
        plt.axis("off")
    plt.show(block=False)
    plt.pause(g_display_timeout)
    plt.close()


def run(device, op_device):
    batch_size = 6
    queue_depth = 2
    num_threads = 1
    img_width = 200
    img_height = 200
    base_dir = os.environ['DATASET_DIR']
    dir = base_dir + "/img_data/"
    columns = 3

    # Create MediaPipe object
    pipe = myMediaPipe(device, queue_depth, batch_size,
                    num_threads, op_device, dir,
                    img_height, img_width)

    # Build MediaPipe
    pipe.build()

    # Initialize MediaPipe iterator
    pipe.iter_init()

    # Run MediaPipe
    images, labels = pipe.run()

    def as_cpu(tensor):
        if (callable(getattr(tensor, "as_cpu", None))):
            tensor = tensor.as_cpu()
        return tensor

    # Copy data to host from device as numpy array
    images = as_cpu(images).as_nparray()
    labels = as_cpu(labels).as_nparray()
    del pipe

    # Display images
    display_images(images, batch_size, columns)


def test_main():
    dev_opdev = {'mixed': ['hpu'],
                'legacy': ['hpu']}

    for dev in dev_opdev.keys():
        for op_dev in dev_opdev[dev]:
            run(dev, op_dev)


if __name__ == "__main__":
    test_main()

Decoded, Resized and Cropped Images 2

2: Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.

Example 3: Image Decode with Resize and User Defined Random Crop¶

This example uses the decode functionality of media pipe as shown in Example 1 and adds the random crop operation to it. It produces the resized image given by sizes in img_width and img_height:

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
from habana_frameworks.mediapipe.operators.cpu_nodes.cpu_nodes import media_function
import matplotlib.pyplot as plt
import numpy as np
import os

g_display_timeout = os.getenv("DISPLAY_TIMEOUT") or 5


class myMediaPipe(MediaPipe):
    def __init__(self, device, queue_depth, batch_size, num_threads, op_device, dir, img_h, img_w):
        super(
            myMediaPipe,
            self).__init__(
            device,
            queue_depth,
            batch_size,
            num_threads,
            self.__class__.__name__)

        self.input = fn.ReadImageDatasetFromDir(shuffle=False,
                                                dir=dir,
                                                format="jpg",
                                                device="cpu")

        self.decode = fn.ImageDecoder(device=op_device,
                                    output_format=it.RGB_I,
                                    resize=[img_w, img_h])

        priv_params = {}
        priv_params['xval_min'] = 0.0
        priv_params['xval_max'] = 0.2
        priv_params['yval_min'] = 0.0
        priv_params['yval_max'] = 0.3
        priv_params['wval_min'] = 0.8
        priv_params['wval_max'] = 1.0
        priv_params['hval_min'] = 0.7
        priv_params['hval_max'] = 1.0
        seed = 7368592685

        self.random_crop = fn.MediaFunc(func=random_crop_func,
                                        dtype=dt.FLOAT32,
                                        shape=[4, batch_size],
                                        seed=seed,
                                        priv_params=priv_params,
                                        device="cpu")

    def definegraph(self):
        images, labels = self.input()
        crop_val = self.random_crop(images)
        images = self.decode(images, crop_val)
        return images, labels


class random_crop_func(media_function):
    def __init__(self, params):
        self.np_shape = params['shape'][::-1]
        self.np_dtype = params['dtype']
        self.batch_size = self.np_shape[0]
        self.seed = params['seed']
        self.xval_min = params['priv_params']['xval_min']
        self.xval_max = params['priv_params']['xval_max']
        self.yval_min = params['priv_params']['yval_min']
        self.yval_max = params['priv_params']['yval_max']
        self.wval_min = params['priv_params']['wval_min']
        self.wval_max = params['priv_params']['wval_max']
        self.hval_min = params['priv_params']['hval_min']
        self.hval_max = params['priv_params']['hval_max']
        self.rng = np.random.default_rng(self.seed)

    def __call__(self, filelist):
        a = np.empty(shape=self.np_shape, dtype=self.np_dtype)
        x_val = self.rng.uniform(self.xval_min, self.xval_max, self.batch_size)
        y_val = self.rng.uniform(self.yval_min, self.yval_max, self.batch_size)
        w_val = self.rng.uniform(self.wval_min, self.wval_max, self.batch_size)
        h_val = self.rng.uniform(self.hval_min, self.hval_max, self.batch_size)
        for i in range(self.batch_size):
            if ((x_val[i] + w_val[i]) > 1):
                w_val[i] = 1 - x_val[i]
            if ((y_val[i] + h_val[i]) > 1):
                h_val[i] = 1 - y_val[i]
            a[i] = [x_val[i], y_val[i], w_val[i], h_val[i]]
        return a


def display_images(images, batch_size, cols):
    rows = (batch_size + 1) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i])
        plt.axis("off")
    plt.show(block=False)
    plt.pause(g_display_timeout)
    plt.close()


def run(device, op_device):
    batch_size = 6
    queue_depth = 2
    num_threads = 1
    img_width = 200
    img_height = 200
    base_dir = os.environ['DATASET_DIR']
    dir = base_dir + "/img_data/"
    columns = 3

    # Create MediaPipe object
    pipe = myMediaPipe(device, queue_depth, batch_size,
                    num_threads, op_device, dir,
                    img_height, img_width)

    # Build MediaPipe
    pipe.build()

    # Initialize MediaPipe iterator
    pipe.iter_init()

    # Run MediaPipe
    images, labels = pipe.run()

    def as_cpu(tensor):
        if (callable(getattr(tensor, "as_cpu", None))):
            tensor = tensor.as_cpu()
        return tensor

    # Copy data to host from device as numpy array
    images = as_cpu(images).as_nparray()

    del pipe

    # Display images
    display_images(images, batch_size, columns)


def test_main():
    dev_opdev = {'legacy': ['hpu']}

    for dev in dev_opdev.keys():
        for op_dev in dev_opdev[dev]:
            run(dev, op_dev)


if __name__ == "__main__":
    test_main()

Decoded, Resized and Random Cropped Images 3

3: Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.

Example 4: Image Decode with Resize and Built-in Random Crop¶

Random crop operation can be done by providing scale, aspect ratio information to image decoder as keyworded arguments to calculate random crop parameters:

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import randomCropType as rct
import matplotlib.pyplot as plt
import numpy as np
import os


g_display_timeout = os.getenv("DISPLAY_TIMEOUT") or 5


class myMediaPipe(MediaPipe):
    def __init__(self, device, queue_depth, batch_size, num_threads, op_device, dir, img_h, img_w):
        super(
            myMediaPipe,
            self).__init__(
            device,
            queue_depth,
            batch_size,
            num_threads,
            self.__class__.__name__)

        self.input = fn.ReadImageDatasetFromDir(shuffle=False,
                                                dir=dir,
                                                format="jpg",
                                                device="cpu")

        self.decode = fn.ImageDecoder(device=op_device,
                                    output_format=it.RGB_I,
                                    resize=[img_w, img_h],
                                    scale_min=0.08,
                                    scale_max=1.0,
                                    ratio_min=3. / 4.,
                                    ratio_max=4. / 3.,
                                    seed=73685926,
                                    random_crop_type=rct.RANDOMIZED_AREA_AND_ASPECT_RATIO_CROP)

    def definegraph(self):
        images, labels = self.input()
        images = self.decode(images)
        return images, labels


def display_images(images, batch_size, cols):
    rows = (batch_size + 1) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        ax = plt.subplot(rows, cols, i + 1)
        plt.imshow(images[i])
        plt.axis("off")
    plt.show(block=False)
    plt.pause(g_display_timeout)
    plt.close()


def run(device, op_device):
    batch_size = 6
    queue_depth = 2
    num_threads = 1
    img_width = 200
    img_height = 200
    base_dir = os.environ['DATASET_DIR']
    dir = base_dir + "/img_data/"
    columns = 3

    # Create MediaPipe object
    pipe = myMediaPipe(device, queue_depth, batch_size,
                    num_threads, op_device, dir,
                    img_height, img_width)

    # Build MediaPipe
    pipe.build()

    # Initialize MediaPipe iterator
    pipe.iter_init()

    # Run MediaPipe
    images, labels = pipe.run()

    def as_cpu(tensor):
        if (callable(getattr(tensor, "as_cpu", None))):
            tensor = tensor.as_cpu()
        return tensor

    # Copy data to host from device as numpy array
    images = as_cpu(images).as_nparray()
    labels = as_cpu(labels).as_nparray()
    del pipe

    # Display images
    display_images(images, batch_size, columns)


def test_main():
    dev_opdev = {'mixed': ['hpu'],
                'legacy': ['hpu']}

    for dev in dev_opdev.keys():
        for op_dev in dev_opdev[dev]:
            run(dev, op_dev)


if __name__ == "__main__":
    test_main()

Decoded, Resized with Built-in Random Crop Images 4

Image1 of decoded batch and cropped using builtin algorithm.

Image2 of decoded batch and cropped using builtin algorithm.

Image3 of decoded batch and cropped using builtin algorithm.

Image4 of decoded batch and cropped using builtin algorithm.

Image5 of decoded batch and cropped using builtin algorithm.

Image6 of decoded batch and cropped using builtin algorithm.

4: Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.

Gaudi Documentation 1.23.0 documentation

Image Decoder Examples

On this Page

Image Decoder Examples¶

Example 1: Image Decode with Resize¶

Example 2: Image Decode with Resize and Fixed Crop¶

Example 3: Image Decode with Resize and User Defined Random Crop¶

Example 4: Image Decode with Resize and Built-in Random Crop¶