CustomOp API Structures and Functions

PartialOutputMetaData

Structure defining output tensor metadata:

struct PartialOutputMetaData {
    at::ScalarType dtype{at::ScalarType::Undefined};
    std::vector<int64_t> shape{};
};

using PartialOutputMetaDataVector = std::vector<PartialOutputMetaData>;

OutputMetaFn

Callback used for computing output tensors metadata:

/**
* @param stack Current operation stack as passed from PyTorch.
*
* @return Vector of output tensors metadata.
*/
using OutputMetaFn =
    std::function<PartialOutputMetaDataVector(const at::Stack&)>;

FillParamsFn

Callback used for allocating and filling TPC user params structure:

/**
* @param stack Current operation stack as passed from PyTorch.
* @param[out] size For returning allocated TPC params structure size.
*
* @return Pointer to TPC user params structure.
*/
using FillParamsFn =
    std::function<std::shared_ptr<void>(const at::Stack&, size_t&)>;

registerUserCustomOp

Adds CustomOp to kernel registry and exposes it to PyTorch:

/**
* @param schema_name Schema name as set in TORCH_LIBRARY.
* @param guid TPC kernel guid.
* @param output_meta_fn Function specifying output tensors.
* @param fill_params_fn Function filling kernel's params.
*/
void registerUserCustomOp(
    const std::string& schema,
    const std::string& guid,
    OutputMetaFn output_meta_fn,
    FillParamsFn fill_params_fn);
    std::function<std::shared_ptr<void>(const at::Stack&, size_t&)>;

HPU_PARAMS_STUB

Macro for allocating user params structure:

/**
* Helper macro to shorten allocating user params structure and set size
* output parameter.
* To be used inside fill_params_fn callback.
*/
#define HPU_PARAMS_STUB(struct_name) \
  size = sizeof(struct_name);        \
  auto params = std::make_shared<struct_name>()

getUserCustomOpDescriptor

Static method for acquiring registered UserCustomOpDescriptor object:

/**
* @param op schema registration name which is used in
* registerUserCustomOp
*
* @return Custom op descriptor.
*/
static const UserCustomOpDescriptor& getUserCustomOpDescriptor(
    const std::string& op);

execute

Method for the actual execution of the registered CustomOp:

/**
* @param inputs All values by order to op execution
*
* @return Vector of op results.
*/
std::vector<at::Tensor> execute(const std::vector<c10::IValue>& inputs);