habana_frameworks.mediapipe.fn.GaussianBlur
habana_frameworks.mediapipe.fn.GaussianBlur¶
- Class:
habana_frameworks.mediapipe.fn.GaussianBlur(**kwargs)
- Define graph call:
__call__(input, gaussian_kernel)
- Parameter:
input - Input tensor to operator. Supported dimensions: minimum = 4, maximum = 5. Supported data types: FLOAT16, BFLOAT16, FLOAT32. Supported Layout CWHN.
gaussian_kernel - Gaussian kernel for grouped 1D convolution. It should be Numpy array of dtype same as input tensor and shape
[1, kernel_size, 1, (num_channels * batch_size)]
, wherekernel_size = int(2 * math.ceil(3 * max_sigma) + 1)
.
Description:
This operation applies 1D gaussian blur on input tensor by applying grouped 1D convolution with input gaussian kernel.
- Supported backend:
HPU
Keyword Arguments
kwargs |
Description |
---|---|
maxSigma |
maximum value of sigma used in gaussian kernel.
|
minSigma |
minimum value of sigma used in gaussian kernel.
|
dtype |
Output data type.
|
Note
Data type of input/ output tensor and gaussian_kernel must be same.
As GaussianBur is implemented using convolution, add a Pad node before it to keep output shape same as input shape.
Example: Gaussian Blur Operator
The following code snippet shows usage of GaussianBlur operator. A sample implementation of gaussian kernel generation is provided for reference.
class gaussian_kernel_sigma
provides sample implementation to randomly generate gaussian kernel sigma values for every image of the batch.
It takes batch_size
, min_sigma
and max_sigma
as priv_params and generates output sigmas as 1D array of batch_size.
Output of class gaussian_kernel_sigma
is passed to class gaussian_kernel_gen
along with input tensor.
class gaussian_kernel_gen
provides a sample implementation of 1D gaussian kernel generation.
It takes batch_size
, channels
and kernel_size
as priv_params, where kernel_size
is computed using formula kernel_size = int(2 * math.ceil(3 * max_sigma) + 1)
At run time, it uses randomly generated values of sigma (output of class gaussian_kernel_sigma
) and
first generates a 2D tensor of shape [batch_size, kernel_size]
using gaussian distribution formula.
As 1D group convolution kernel requires filter of shape [1, kernel_size, 1, (channel * batch_size)]
, which is achieved using numpy operations.
import numpy as np
import math as math
import matplotlib.pyplot as plt
from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
from habana_frameworks.mediapipe.operators.cpu_nodes.cpu_nodes import media_function
g_max_sigma = 1.5
g_min_sigma = 0.5
g_seed = 123
class gaussian_kernel_sigma(media_function):
def __init__(self, params):
self.priv_params = params['priv_params']
self.batch_size = self.priv_params['batch_size']
self.min_sigma = self.priv_params['min_sigma']
self.max_sigma = self.priv_params['max_sigma']
self.seed = self.priv_params['seed']
self.rng = np.random.default_rng(self.seed)
self.s_start = 0
self.s_end = self.batch_size
def __call__(self):
# uncomment for training
sigmas = self.rng.uniform(
low=self.min_sigma, high=self.max_sigma, size=(self.batch_size))
return [sigmas]
class gaussian_kernel_gen(media_function):
def __init__(self, params):
self.priv_params = params['priv_params']
self.batch_size = self.priv_params['batch_size']
self.channels = self.priv_params['channels']
self.kSize = self.priv_params['kernel_size']
def __call__(self, sigmas):
gaussianWeights = self.create_oneD_gaussian_kernel(sigmas) #batch_size, kernel_size
gaussianWeights_np = np.array(gaussianWeights, dtype=np.float32)
gaussianWeights_np = np.transpose(gaussianWeights_np) #kernel_size , batch_size
gaussianWeights_np = np.tile(gaussianWeights_np, self.channels) #kernel_size , (batch_size x channels)
gaussianWeights_np = np.expand_dims(gaussianWeights_np, axis=0) #1, kernel_size , (batch_size x channels)
gaussianWeights_np = np.expand_dims(gaussianWeights_np, axis=2) #1, kernel_size , 1, (batch_size x channels)
return gaussianWeights_np #[1, kernel_size, 1, (channel * batch_size)]
def create_oneD_gaussian_kernel(self, sigmas):
# Compute 1D Gaussian Filter of shape based on current sigma
max_size_1d = self.kSize
gaussianWeights = []
for sigma in sigmas:
#size_1d will be a odd number
size_1d = 2 * math.ceil(3 * sigma) + 1
#gaussian kernel will be symmetric around mid
mid_idx = int((size_1d - 1) / 2)
one_by_sigma_sq = 1.0 / (sigma * sigma)
sum = 0.0
# Calculate first half
weightG = [0.0]*size_1d
#compute first half of symmetric gaussian array
for x in range(-mid_idx, mid_idx+1):
weightG[x+mid_idx] = math.exp(-(x * x * 0.5 * one_by_sigma_sq))
sum += weightG[x+mid_idx]
#Normalized central element
normalization_factor = 1.0 / float(sum)
#Normalize all elements
for idx in range(0, size_1d):
weightG[idx] *= normalization_factor
#pad gaussian array if required
len_diff = max_size_1d - len(weightG)
if(len_diff > 0):
half_len_diff = int(len_diff/2)
weightG = [0]*half_len_diff + weightG + [0]*(len_diff-half_len_diff)
gaussianWeights.append(weightG)
return gaussianWeights #batch_size x kSize
def get_pad_params(W, H, kW, kH, S=1):
# compute Pad values
padTB = H * S - S - H + kH
padLR = W * S - S - W + kW
padL = padLR//2
padR = padLR - padL
padT = padTB//2
padB = padTB - padT
return padL, padR, padT, padB
class myMediaPipe(MediaPipe):
def __init__(self, device, queue_depth, batch_size, channel, height, width):
super(
myMediaPipe,
self).__init__(
device,
queue_depth,
batch_size,
self.__class__.__name__)
max_sigma = g_max_sigma
min_sigma = g_min_sigma
kW = int(2 * math.ceil(3 * max_sigma) + 1)
priv_params = {}
priv_params['batch_size'] = batch_size
priv_params['channels'] = channel
priv_params['kernel_size'] = kW
priv_params_sigma = {}
priv_params_sigma['batch_size'] = batch_size
priv_params_sigma['min_sigma'] = min_sigma
priv_params_sigma['max_sigma'] = max_sigma
priv_params_sigma['seed'] = g_seed
priv_params_sigma['prob'] = 0.5
self.input = fn.ReadImageDatasetFromDir(
shuffle=False, dir="/path/to/image/", format="jpg")
self.decode = fn.ImageDecoder(
device="hpu", output_format=it.RGB_P, resize=[width, height]) # WHCN - RGB_P
self.gaussian_kernel_sigma = fn.MediaFunc(func=gaussian_kernel_sigma, shape=([
batch_size]), dtype=dt.FLOAT32, priv_params=priv_params_sigma)
# ------Blur along Width-----------------------------
self.gaussian_kernel1 = fn.MediaFunc(func=gaussian_kernel_gen, shape=[(channel * batch_size), 1, kW, 1], dtype=dt.FLOAT32, priv_params=priv_params)
self.transp11 = fn.Transpose(permutation=[3, 2, 0, 1], tensorDim=4, output_scale=1/255, output_zerop=0) # WHCN -> NCWH
self.cast11 = fn.Cast(dtype=dt.FLOAT32)
self.reshape11 = fn.Reshape(size=[batch_size * channel, width,
height, 1], tensorDim=4, layout='', dtype=dt.FLOAT32) # (NC)WH1
padL1, padR1, padT1, padB1 = get_pad_params(width, height, kW, 1)
self.pad1 = fn.Pad(
mode=1, pads=[0, padL1, padT1, 0, 0, padR1, padB1, 0], dtype=dt.FLOAT32)
self.gaussian_blur1 = fn.GaussianBlur(
maxSigma=max_sigma, dtype=dt.FLOAT32)
self.reshape12 = fn.Reshape(
size=[batch_size, channel, width, height], tensorDim=4, layout='', dtype=dt.FLOAT32) # NCWH
self.transp12 = fn.Transpose(
permutation=[0, 1, 3, 2], tensorDim=4, dtype=dt.FLOAT32) # NCWH -> NCHW
# -----------------------Blur Along Height--------------------------------------------------------
self.gaussian_kernel2 = fn.MediaFunc(func=gaussian_kernel_gen, shape=[(channel * batch_size), 1, kW, 1], dtype=dt.FLOAT32, priv_params=priv_params)
self.reshape21 = fn.Reshape(size=[batch_size * channel, height,
width, 1], tensorDim=4, layout='', dtype=dt.FLOAT32) # (NC)HW1
padL2, padR2, padT2, padB2 = get_pad_params(height, width, kW, 1)
self.pad2 = fn.Pad(
mode=1, pads=[0, padL2, padT2, 0, 0, padR2, padB2, 0], dtype=dt.FLOAT32)
self.gaussian_blur2 = fn.GaussianBlur(
maxSigma=max_sigma, dtype=dt.FLOAT32) # (NC)HW1
self.reshape22 = fn.Reshape(
size=[batch_size, channel, height, width], tensorDim=4, layout='', dtype=dt.FLOAT32) # NCHW
self.transp22 = fn.Transpose(
permutation=[1, 3, 2, 0], tensorDim=4, dtype=dt.FLOAT32) # NCHW -> CWHN:RGB_I"""
def definegraph(self):
jpegs, labels = self.input()
images = self.decode(jpegs) # WHCN - RGB_P
sigmas = self.gaussian_kernel_sigma()
# ------------Blur along W --------------------------------
images = self.transp11(images) # WHCN -> NCWH
images = self.cast11(images)
images = self.reshape11(images) #(NC)WH1
images = self.pad1(images)
gaussian_k1 = self.gaussian_kernel1(sigmas)
images = self.gaussian_blur1(images, gaussian_k1) #(NC)WH1
images = self.reshape12(images) #NCWH
images = self.transp12(images) # NCWH -> NCHW
# ------------Blur along H --------------------------------
images = self.reshape21(images) # (NC)HW1
images = self.pad2(images)
gaussian_k2 = self.gaussian_kernel2(sigmas)
images = self.gaussian_blur2(images, gaussian_k2) # (NC)HW1
images = self.reshape22(images) # NCHW
images = self.transp22(images) # NCHW -> WHCN
return images, labels
def display_images(images, batch_size, cols):
rows = (batch_size + 1) // cols
plt.figure(figsize=(10, 10))
for i in range(batch_size):
ax = plt.subplot(rows, cols, i + 1)
plt.imshow(images[i])
plt.axis("off")
plt.show()
def main():
batch_size = 6
img_width = 200
img_height = 200
channels = 3
queue_depth = 1
columns = 3
pipe = myMediaPipe('hpu', queue_depth, batch_size,
channels, img_height, img_width)
# Build media pipeline
pipe.build()
# Initialize media pipeline iterator
pipe.iter_init()
# Run media pipeline
images, labels = pipe.run()
# Copy data to CPU as numpy array
images = images.as_cpu().as_nparray()
labels = labels.as_cpu().as_nparray()
# Display images
display_images(images, batch_size, columns)
if __name__ == "__main__":
main()
Gaussian Blur Output Images 1
- 1
Licensed under a CC BY SA 4.0 license. The images used here are taken from https://data.caltech.edu/records/mzrjq-6wc02.