Video Decoder Examples
On this Page
Video Decoder Examples¶
This section shows how to use MediaPipe for decoding, resizing and cropping operations.
Example 1: Video Decode with User Defined Random Crop and Resize¶
This example uses the MediaPipe decode functionality and adds the random crop operation to it.
It produces the resized video given by sizes in img_width
and img_height
:
from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
from habana_frameworks.mediapipe.operators.cpu_nodes.cpu_nodes import media_function
import matplotlib.pyplot as plt
import os
import numpy as np
class myMediaPipe(MediaPipe):
def __init__(self, device, queue_depth, batch_size, num_threads, channel, dir, resize_w, resize_h, crop_w, crop_h, frame_per_clip):
super(
myMediaPipe,
self).__init__(
device,
queue_depth,
batch_size,
num_threads,
self.__class__.__name__)
self.input = fn.ReadVideoDatasetFromDir(shuffle=False,
dir=dir,
format="mp4",
frames_per_clip=frame_per_clip,
start_frame_index=0,
fixed_clip_mode=True)
self.decode = fn.VideoDecoder(device="hpu",
output_format=it.RGB_I,
resize=[resize_w, resize_h],
frames_per_clip=frame_per_clip,
max_frame_vid=frame_per_clip,
dtype=dt.UINT8)
seed = 7368592685
self.random_crop = fn.MediaFunc(func=random_crop_func,
dtype=dt.FLOAT32,
shape=[4, batch_size],
seed=seed)
def definegraph(self):
videos, labels, resample, video_offset = self.input()
crop_val = self.random_crop()
videos = self.decode(videos, video_offset, resample, crop_val)
return videos, labels
class random_crop_func(media_function):
def __init__(self, params):
self.np_shape = params['shape'][::-1]
self.np_dtype = params['dtype']
self.batch_size = self.np_shape[0]
self.seed = params['seed']
self.rng = np.random.default_rng(self.seed)
def __call__(self):
a = np.empty(shape=self.np_shape, dtype=self.np_dtype)
for i in range(self.batch_size):
x_val = self.rng.uniform(0, .2, self.batch_size)
y_val = self.rng.uniform(0, .1, self.batch_size)
w_val = self.rng.uniform(0.8, 1, self.batch_size)
h_val = self.rng.uniform(0.9, 1, self.batch_size)
for i in range(self.batch_size):
if((x_val[i] + w_val[i]) > 1):
w_val[i] = 1 - x_val[i]
if((y_val[i] + h_val[i]) > 1):
h_val[i] = 1 - y_val[i]
a[i] = [x_val[i], y_val[i], w_val[i], h_val[i]]
return a
def display_videos(videos, labels, batch_size, frame_per_clip, cols):
rows = (batch_size) // cols
plt.figure(figsize=(10, 10))
for i in range(batch_size):
for j in range(frame_per_clip):
ax = plt.subplot(rows, cols, i + 1)
plt.imshow(videos[i][j])
plt.title("label:"+str(labels[i]))
plt.axis("off")
plt.show()
def main():
batch_size = 4
img_width = 200
img_height = 200
crop_width = 150
crop_height = 150
channels = 3
queue_depth = 3
num_threads = 1
frame_per_clip = 2
base_dir = os.environ['DATASET_DIR']
dir = base_dir + "/vid_data/"
pipe = myMediaPipe('legacy', queue_depth, batch_size, num_threads,
channels, dir, img_width, img_height, crop_width, crop_height, frame_per_clip)
pipe.build()
pipe.iter_init()
bcnt = 0
while(bcnt < 1):
try:
videos, labels = pipe.run()
except StopIteration:
break
videos = videos.as_cpu().as_nparray()
labels = labels.as_cpu().as_nparray()
display_videos(videos, labels, batch_size, frame_per_clip, cols=2)
bcnt = bcnt + 1
if __name__ == "__main__":
main()
Decoded, Random Cropped and Resized Videos 1
- 1
Licensed under a CC BY SA 4.0 license. The videos used here are generated using images from https://data.caltech.edu/records/mzrjq-6wc02.
Example 2: Video Decode with Resize and Crop¶
This example uses the MediaPipe decode functionality as well as resize and crop.
It resizes the video given by sizes in img_width
and img_height
,
and then crops it in crop_width
and crop_height
:
from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
import matplotlib.pyplot as plt
import os
class myMediaPipe(MediaPipe):
def __init__(self, device, queue_depth, batch_size, num_threads, channels, dir, resize_width, resize_height, crop_x, crop_y, crop_width, crop_height, frame_per_clip):
super(myMediaPipe, self).__init__(device=device,
prefetch_depth=queue_depth,
batch_size=batch_size,
pipe_name=self.__class__.__name__)
self.input = fn.ReadVideoDatasetFromDir(shuffle=False,
dir=dir,
format="mp4",
frames_per_clip=frame_per_clip,
start_frame_index=0,
fixed_clip_mode=True)
self.decode = fn.VideoDecoder(device="hpu",
output_format=it.RGB_I,
resize=[resize_width, resize_height],
crop_after_resize=[
crop_x, crop_y, crop_width, crop_height],
frames_per_clip=frame_per_clip,
max_frame_vid=frame_per_clip,
dtype=dt.UINT8)
def definegraph(self):
videos, labels, resample, video_offset = self.input()
videos = self.decode(videos, video_offset)
return videos, labels
def display_videos(videos, labels, batch_size, frame_per_clip, cols):
rows = (batch_size) // cols
plt.figure(figsize=(10, 10))
for i in range(batch_size):
for j in range(frame_per_clip):
ax = plt.subplot(rows, cols, i + 1)
plt.imshow(videos[i][j])
plt.title("label:"+str(labels[i]))
plt.axis("off")
plt.show()
def main():
batch_size = 4
img_width = 200
img_height = 200
crop_width = 150
crop_height = 150
crop_x = 10
crop_y = 10
channels = 3
queue_depth = 3
frame_per_clip = 2
num_threads = 1
base_dir = os.environ['DATASET_DIR']
dir = base_dir + "/vid_data/"
pipe = myMediaPipe('legacy', queue_depth, batch_size, num_threads,
channels, dir, img_width, img_height, crop_x, crop_y, crop_width, crop_height, frame_per_clip)
pipe.build()
pipe.iter_init()
bcnt = 0
while(bcnt < 1):
try:
videos, labels = pipe.run()
except StopIteration:
break
videos = videos.as_cpu().as_nparray()
labels = labels.as_cpu().as_nparray()
display_videos(videos, labels, batch_size, frame_per_clip, cols=2)
bcnt = bcnt + 1
if __name__ == "__main__":
main()
Decoded, Resized with Built-in Random Crop Videos 2
- 2
Licensed under a CC BY SA 4.0 license. The videos used here are generated using images from https://data.caltech.edu/records/mzrjq-6wc02.