Video Decoder Examples

This section shows how to use MediaPipe for decoding, resizing and cropping operations.

Example 1: Video Decode with User Defined Random Crop and Resize

This example uses the MediaPipe decode functionality and adds the random crop operation to it. It produces the resized video given by sizes in img_width and img_height:

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
from habana_frameworks.mediapipe.operators.cpu_nodes.cpu_nodes import media_function
import matplotlib.pyplot as plt
import os
import numpy as np


class myMediaPipe(MediaPipe):
    def __init__(self, device, queue_depth, batch_size, num_threads, channel, dir, resize_w, resize_h, crop_w, crop_h, frame_per_clip):
        super(
            myMediaPipe,
            self).__init__(
            device,
            queue_depth,
            batch_size,
            num_threads,
            self.__class__.__name__)
        self.input = fn.ReadVideoDatasetFromDir(shuffle=False,
                                                dir=dir,
                                                format="mp4",
                                                frames_per_clip=frame_per_clip,
                                                start_frame_index=0,
                                                fixed_clip_mode=True)

        self.decode = fn.VideoDecoder(device="hpu",
                                    output_format=it.RGB_I,
                                    resize=[resize_w, resize_h],
                                    frames_per_clip=frame_per_clip,
                                    max_frame_vid=frame_per_clip,
                                    dtype=dt.UINT8)

        seed = 7368592685

        self.random_crop = fn.MediaFunc(func=random_crop_func,
                                        dtype=dt.FLOAT32,
                                        shape=[4, batch_size],
                                        seed=seed)

    def definegraph(self):
        videos, labels, resample, video_offset = self.input()
        crop_val = self.random_crop()
        videos = self.decode(videos, video_offset, resample, crop_val)
        return videos, labels


class random_crop_func(media_function):
    def __init__(self, params):
        self.np_shape = params['shape'][::-1]
        self.np_dtype = params['dtype']
        self.batch_size = self.np_shape[0]
        self.seed = params['seed']
        self.rng = np.random.default_rng(self.seed)

    def __call__(self):
        a = np.empty(shape=self.np_shape, dtype=self.np_dtype)
        for i in range(self.batch_size):
            x_val = self.rng.uniform(0, .2, self.batch_size)
            y_val = self.rng.uniform(0, .1, self.batch_size)
            w_val = self.rng.uniform(0.8, 1, self.batch_size)
            h_val = self.rng.uniform(0.9, 1, self.batch_size)
            for i in range(self.batch_size):
                if((x_val[i] + w_val[i]) > 1):
                    w_val[i] = 1 - x_val[i]
                if((y_val[i] + h_val[i]) > 1):
                    h_val[i] = 1 - y_val[i]
                a[i] = [x_val[i], y_val[i], w_val[i], h_val[i]]
        return a


def display_videos(videos, labels, batch_size, frame_per_clip, cols):
    rows = (batch_size) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        for j in range(frame_per_clip):
            ax = plt.subplot(rows, cols, i + 1)
            plt.imshow(videos[i][j])
            plt.title("label:"+str(labels[i]))
            plt.axis("off")
    plt.show()


def main():
    batch_size = 4
    img_width = 200
    img_height = 200
    crop_width = 150
    crop_height = 150
    channels = 3
    queue_depth = 3
    num_threads = 1
    frame_per_clip = 2
    base_dir = os.environ['DATASET_DIR']
    dir = base_dir + "/vid_data/"
    pipe = myMediaPipe('legacy', queue_depth, batch_size, num_threads,
                    channels, dir, img_width, img_height, crop_width, crop_height, frame_per_clip)
    pipe.build()
    pipe.iter_init()
    bcnt = 0
    while(bcnt < 1):
        try:
            videos, labels = pipe.run()
        except StopIteration:
            break
        videos = videos.as_cpu().as_nparray()
        labels = labels.as_cpu().as_nparray()

        display_videos(videos, labels, batch_size, frame_per_clip, cols=2)
        bcnt = bcnt + 1


if __name__ == "__main__":
    main()

Decoded, Random Cropped and Resized Videos 1

label 0, frame 1, cropped and resized
label 0, frame 2, cropped and resized
label 0, frame 1, cropped and resized
label 0, frame 2, cropped and resized
label 1, frame 1, cropped and resized
label 1, frame 2, cropped and resized
label 2, frame 1, cropped and resized
label 2, frame 2, cropped and resized
1

Licensed under a CC BY SA 4.0 license. The videos used here are generated using images from https://data.caltech.edu/records/mzrjq-6wc02.

Example 2: Video Decode with Resize and Crop

This example uses the MediaPipe decode functionality as well as resize and crop. It resizes the video given by sizes in img_width and img_height, and then crops it in crop_width and crop_height:

from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
import matplotlib.pyplot as plt
import os


class myMediaPipe(MediaPipe):
    def __init__(self, device, queue_depth, batch_size, num_threads, channels, dir, resize_width, resize_height, crop_x, crop_y, crop_width, crop_height, frame_per_clip):
        super(myMediaPipe, self).__init__(device=device,
                                        prefetch_depth=queue_depth,
                                        batch_size=batch_size,
                                        pipe_name=self.__class__.__name__)

        self.input = fn.ReadVideoDatasetFromDir(shuffle=False,
                                                dir=dir,
                                                format="mp4",
                                                frames_per_clip=frame_per_clip,
                                                start_frame_index=0,
                                                fixed_clip_mode=True)

        self.decode = fn.VideoDecoder(device="hpu",
                                    output_format=it.RGB_I,
                                    resize=[resize_width, resize_height],
                                    crop_after_resize=[
                                        crop_x, crop_y, crop_width, crop_height],
                                    frames_per_clip=frame_per_clip,
                                    max_frame_vid=frame_per_clip,
                                    dtype=dt.UINT8)

    def definegraph(self):
        videos, labels, resample, video_offset = self.input()
        videos = self.decode(videos, video_offset)
        return videos, labels


def display_videos(videos, labels, batch_size, frame_per_clip, cols):
    rows = (batch_size) // cols
    plt.figure(figsize=(10, 10))
    for i in range(batch_size):
        for j in range(frame_per_clip):
            ax = plt.subplot(rows, cols, i + 1)
            plt.imshow(videos[i][j])
            plt.title("label:"+str(labels[i]))
            plt.axis("off")
    plt.show()


def main():
    batch_size = 4
    img_width = 200
    img_height = 200
    crop_width = 150
    crop_height = 150
    crop_x = 10
    crop_y = 10
    channels = 3
    queue_depth = 3
    frame_per_clip = 2
    num_threads = 1
    base_dir = os.environ['DATASET_DIR']
    dir = base_dir + "/vid_data/"
    pipe = myMediaPipe('legacy', queue_depth, batch_size, num_threads,
                    channels, dir, img_width, img_height, crop_x, crop_y, crop_width, crop_height, frame_per_clip)
    pipe.build()
    pipe.iter_init()
    bcnt = 0
    while(bcnt < 1):
        try:
            videos, labels = pipe.run()
        except StopIteration:
            break
        videos = videos.as_cpu().as_nparray()
        labels = labels.as_cpu().as_nparray()
        display_videos(videos, labels, batch_size, frame_per_clip, cols=2)
        bcnt = bcnt + 1


if __name__ == "__main__":
    main()

Decoded, Resized with Built-in Random Crop Videos 2

label 0, frame 1, resized and cropped
label 0, frame 2, resized and cropped
label 0, frame 1, resized and cropped
label 0, frame 2, resized and cropped
label 1, frame 1, resized and cropped
label 1, frame 2, resized and cropped
label 2, frame 1, resized and cropped
label 2, frame 2, resized and cropped
2

Licensed under a CC BY SA 4.0 license. The videos used here are generated using images from https://data.caltech.edu/records/mzrjq-6wc02.