File Util.h¶
File List > Intern > rayx-core > src > Tracer > Util.h
Go to the documentation of this file
#pragma once
#include <alpaka/alpaka.hpp>
#include <optional>
#include <vector>
#include "Debug/Instrumentor.h"
#include "Shader/Rand.h"
#include "Shader/RaysPtr.h"
namespace rayx {
template <typename Acc, typename T>
using OptBuf = std::optional<alpaka::Buf<Acc, T, alpaka::DimInt<1>, int32_t>>;
// this struct is analog to struct Rays. It contains OptBufs instead of vectors, so it can be used as buffers on CPU or GPU
template <typename Acc>
struct RaysBuf {
#define X(type, name, flag) OptBuf<Acc, type> name;
RAYX_X_MACRO_RAY_ATTR
#undef X
};
template <typename Acc>
RaysPtr raysBufToRaysPtr(RaysBuf<Acc>& buf) {
return RaysPtr{
#define X(type, name, flag) .name = buf.name ? alpaka::getPtrNative(*buf.name) : nullptr,
RAYX_X_MACRO_RAY_ATTR
#undef X
};
}
inline int ceilIntDivision(const int dividend, const int divisor) { return (divisor + dividend - 1) / divisor; }
inline int nextPowerOfTwo(const int value) { return static_cast<int>(glm::pow(2, glm::ceil(glm::log(value) / glm::log(2)))); }
inline int nextMultiple(const int value, const int divisor) {
if (divisor == 0) RAYX_EXIT << "error: divisor must not be zero";
auto remainder = value % divisor;
if (remainder == 0)
return value; // already a multiple
else
return value + (divisor - remainder); // next bigger multiple
}
template <typename Queue, typename Buf>
inline void allocBuf(Queue q, std::optional<Buf>& buf, const int size) {
using Idx = alpaka::Idx<Buf>;
using Elem = alpaka::Elem<Buf>;
const auto shouldAlloc = !buf || alpaka::getExtents(*buf)[0] < size;
if (shouldAlloc) RAYX_VERB << (!buf ? "new alloc on device: " : "realloc on device: ") << nextPowerOfTwo(size * sizeof(Elem)) << " bytes";
if (shouldAlloc) buf = alpaka::allocAsyncBufIfSupported<Elem, Idx>(q, nextPowerOfTwo(size));
}
template <typename Queue, typename Acc>
inline void allocRaysBuf(Queue q, const RayAttrMask attrMask, RaysBuf<Acc>& raysBuf, const int size) {
#define X(type, name, flag) \
if (contains(attrMask, RayAttrMask::flag)) allocBuf(q, raysBuf.name, size);
RAYX_X_MACRO_RAY_ATTR
#undef X
}
namespace BlockSizeConstraint {
struct None {};
struct Exact {
int value;
};
struct AtLeast {
int value;
};
struct AtMost {
int value;
};
struct InRange {
int atLeast;
int atMost;
};
using Variant = std::variant<None, Exact, AtLeast, AtMost, InRange>;
} // namespace BlockSizeConstraint
// TODO: maybe make a PR to alpaka for alpaka::Acc<Dev> to extract Acc from DevAcc (= Dev<Platform<Acc>>)
template <typename Acc, typename DevAcc, typename Queue, typename Kernel, typename... Args>
inline void execWithValidWorkDiv(DevAcc devAcc, Queue q, const int numElements, BlockSizeConstraint::Variant blockSizeConstraint,
const Kernel& kernel, Args&&... args) {
const auto conf = alpaka::KernelCfg<Acc>{
.gridElemExtent = numElements,
.threadElemExtent = 1,
.blockThreadMustDivideGridThreadExtent = false,
};
auto workDiv = alpaka::getValidWorkDiv(conf, devAcc, kernel, std::forward<Args>(args)...);
std::visit(
[&]<typename BlockSizeConstraintType>(BlockSizeConstraintType constraint) {
if constexpr (std::is_same_v<BlockSizeConstraintType, BlockSizeConstraint::Exact>) {
assert(workDiv.m_blockThreadExtent[0] <= constraint.value && "BlockSizeConstraint::Exact exceeds the capabilities this device");
workDiv.m_blockThreadExtent = constraint.value;
workDiv.m_gridBlockExtent = ceilIntDivision(numElements, constraint.value);
}
if constexpr (std::is_same_v<BlockSizeConstraintType, BlockSizeConstraint::AtMost>) {
if (constraint.value < workDiv.m_blockThreadExtent[0]) {
workDiv.m_blockThreadExtent = constraint.value;
workDiv.m_gridBlockExtent = ceilIntDivision(numElements, constraint.value);
}
}
if constexpr (std::is_same_v<BlockSizeConstraintType, BlockSizeConstraint::AtLeast>) {
assert(constraint.value <= workDiv.m_blockThreadExtent[0] && "BlockSizeConstraint::AtLeast exceeds the capabilities this device");
}
if constexpr (std::is_same_v<BlockSizeConstraintType, BlockSizeConstraint::InRange>) {
assert(constraint.atLeast <= workDiv.m_blockThreadExtent[0] && "BlockSizeConstraint::InRange exceeds capabilities of this device");
if (constraint.atMost < workDiv.m_blockThreadExtent[0]) {
workDiv.m_blockThreadExtent = constraint.atMost;
workDiv.m_gridBlockExtent = ceilIntDivision(numElements, constraint.atMost);
}
}
},
blockSizeConstraint);
RAYX_VERB << "execute kernel with launch config: "
<< "blocks = " << workDiv.m_gridBlockExtent[0] << ", "
<< "threads = " << workDiv.m_blockThreadExtent[0];
alpaka::exec<Acc>(q, workDiv, kernel, std::forward<Args>(args)...);
}
} // namespace rayx