habana_frameworks.mediapipe.fn.ReadNumpyDatasetFromDir
habana_frameworks.mediapipe.fn.ReadNumpyDatasetFromDir¶
- Class:
habana_frameworks.mediapipe.fn.ReadNumpyDatasetFromDir(**kwargs)
- Define graph call:
__call__()
- Parameter:
None
Description:
This reader reads numpy data files and numpy label files either from given directory or file list and returns the batch of numpy images and labels.
- Supported backend:
CPU
Keyword Arguments:
kwargs |
Description |
---|---|
dir |
Input image directory path for reading images and labels.
|
file_list |
Instead of providing dir (input image directory path), user can provide list of files to reader.
|
seed |
Seed for randomization, if not provided it will be generated internally. It is used for shuffling the dataset and it is also used for randomly
selecting the images to pad the last batch when the
|
pattern |
Pattern for searching file names with name and extension.
|
shuffle |
If
|
max_file |
Full path of biggest input file. This is used for pre-allocating buffers. If not provided, reader will find it.
|
num_readers |
Number of parallel reader threads to be used.
|
drop_remainder |
If
|
pad_remainder |
If
|
num_slices |
It indicates number of cards in multi-card training. Before first epoch, input data will be divided into num_slices i.e. one slice for every card. During entire training, same slice will be used for that particular card for creating batches in every epoch. Default value is one, which indicates single card training.
|
slice_index |
In multi-card training, it indicates index of card.
|
dense |
It should be used only when all numpy files in a dataset are of same shape.
If set to
|
shuffle_across_dataset |
When shuffle_across_dataset set to
|
Example1: Use ReadNumpyDatasetFromDir by providing input directory
The following code snippet shows numpy reader using directory input and pattern for file selection. All of “xyz_*_x.npy” are of the same shape and the same is true for “xyz_*_y.npy”. Refer to habana_frameworks.mediapipe.fn.Crop example for variable shape input.
from habana_frameworks.mediapipe.operators.cpu_nodes.cpu_nodes import media_function
from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
import numpy as np
import glob
class myMediaPipe(MediaPipe):
def __init__(self, device, queue_depth, batch_size, dir, pattern):
super(
myMediaPipe,
self).__init__(
device,
queue_depth,
batch_size,
self.__class__.__name__)
self.inputxy = fn.ReadNumpyDatasetFromDir(num_outputs=2,
shuffle=False,
dir=dir,
pattern=pattern,
dtype=[dt.FLOAT32, dt.UINT8])
self.memcopy_op = fn.MemCpy(dtype=dt.FLOAT32)
def definegraph(self):
img0, img1 = self.inputxy()
img0 = self.memcopy_op(img0)
return img1, img0
def main():
batch_size = 2
queue_depth = 2
dir = "/path/to/4d/"
pattern = ["xyz_*_x.npy", "xyz_*_y.npy"]
# Using directory path
pipe = myMediaPipe('hpu', queue_depth, batch_size, dir, pattern)
pipe.build()
pipe.iter_init()
for i in range(1):
labels, images = pipe.run()
images = images.as_cpu().as_nparray()
labels = labels.as_cpu().as_nparray()
print('image shape: ', images.shape)
print('label shape: ', labels.shape)
print(images)
print(labels)
del pipe
if __name__ == "__main__":
main()
The following is the output of Numpy reader using input directory:
image shape: (2, 6, 5, 4, 3) label shape: (2, 6, 5, 4, 1) [[[[[ 0. 0.33333334 0.6666667 ] [ 1. 1.3333334 1.6666666 ] [ 2. 2.3333333 2.6666667 ] [ 3. 3.3333333 3.6666667 ]] [[ 4. 4.3333335 4.6666665 ] [ 5. 5.3333335 5.6666665 ] [ 6. 6.3333335 6.6666665 ] [ 7. 7.3333335 7.6666665 ]] ... ... ... [[224. 224.66667 225.33333 ] [226. 226.66667 227.33333 ] [228. 228.66667 229.33333 ] [230. 230.66667 231.33333 ]] [[232. 232.66667 233.33333 ] [234. 234.66667 235.33333 ] [236. 236.66667 237.33333 ] [238. 238.66667 239.33333 ]]]]] [[[[[ 0] [ 1] [ 2] [ 3]] [[ 4] [ 5] [ 6] [ 7]] ... ... ... [[224] [226] [228] [230]] [[232] [234] [236] [238]]]]]![]()
Example2: Use ReadNumpyDatasetFromDir by providing file_list
The following code snippet shows numpy reader using file list input:
from habana_frameworks.mediapipe.operators.cpu_nodes.cpu_nodes import media_function
from habana_frameworks.mediapipe import fn
from habana_frameworks.mediapipe.mediapipe import MediaPipe
from habana_frameworks.mediapipe.media_types import imgtype as it
from habana_frameworks.mediapipe.media_types import dtype as dt
import numpy as np
import glob
class myMediaPipe(MediaPipe):
def __init__(self, device, queue_depth, batch_size, file_list):
super(
myMediaPipe,
self).__init__(
device,
queue_depth,
batch_size,
self.__class__.__name__)
self.inputxy = fn.ReadNumpyDatasetFromDir(num_outputs=2,
shuffle=False,
file_list=file_list,
dtype=[dt.FLOAT32, dt.UINT8])
self.memcopy_op = fn.MemCpy(dtype=dt.FLOAT32)
def definegraph(self):
img0, img1 = self.inputxy()
img0 = self.memcopy_op(img0)
return img1, img0
def main():
batch_size = 2
queue_depth = 2
dir = "/path/to/4d/"
pattern = ["xyz_*_x.npy", "xyz_*_y.npy"]
# Using file list
npy_x = sorted(glob.glob(dir + "/{}".format(pattern[0])))
npy_y = sorted(glob.glob(dir + "/{}".format(pattern[1])))
file_list = [npy_x, npy_y]
pipe = myMediaPipe('hpu', queue_depth, batch_size, file_list)
pipe.build()
pipe.iter_init()
for i in range(1):
labels, images = pipe.run()
images = images.as_cpu().as_nparray()
labels = labels.as_cpu().as_nparray()
print('image shape: ', images.shape)
print('label shape: ', labels.shape)
print(images)
print(labels)
del pipe
if __name__ == "__main__":
main()
The following is the output of Numpy reader using file list:
image shape: (2, 6, 5, 4, 3) label shape: (2, 6, 5, 4, 1) [[[[[ 0. 0.33333334 0.6666667 ] [ 1. 1.3333334 1.6666666 ] [ 2. 2.3333333 2.6666667 ] [ 3. 3.3333333 3.6666667 ]] [[ 4. 4.3333335 4.6666665 ] [ 5. 5.3333335 5.6666665 ] [ 6. 6.3333335 6.6666665 ] [ 7. 7.3333335 7.6666665 ]] ... ... ... [[224. 224.66667 225.33333 ] [226. 226.66667 227.33333 ] [228. 228.66667 229.33333 ] [230. 230.66667 231.33333 ]] [[232. 232.66667 233.33333 ] [234. 234.66667 235.33333 ] [236. 236.66667 237.33333 ] [238. 238.66667 239.33333 ]]]]] [[[[[ 0] [ 1] [ 2] [ 3]] [[ 4] [ 5] [ 6] [ 7]] ... ... ... [[224] [226] [228] [230]] [[232] [234] [236] [238]]]]]![]()