|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include <neural-graphics-primitives/common.h> |
|
#include <neural-graphics-primitives/triangle_bvh.cuh> |
|
#include <tiny-cuda-nn/gpu_memory.h> |
|
|
|
#include <Eigen/Dense> |
|
|
|
#include <stack> |
|
|
|
#ifdef NGP_OPTIX |
|
# include <optix.h> |
|
# include <optix_stubs.h> |
|
# include <optix_function_table_definition.h> |
|
# include <optix_stack_size.h> |
|
|
|
|
|
# include "optix/pathescape.h" |
|
# include "optix/raystab.h" |
|
# include "optix/raytrace.h" |
|
|
|
# include "optix/program.h" |
|
|
|
|
|
|
|
namespace optix_ptx { |
|
#include <optix_ptx.h> |
|
} |
|
#endif |
|
|
|
using namespace Eigen; |
|
using namespace tcnn; |
|
|
|
NGP_NAMESPACE_BEGIN |
|
|
|
constexpr float MAX_DIST = 10.0f; |
|
constexpr float MAX_DIST_SQ = MAX_DIST*MAX_DIST; |
|
|
|
#ifdef NGP_OPTIX |
|
OptixDeviceContext g_optix; |
|
|
|
namespace optix { |
|
bool initialize() { |
|
static bool ran_before = false; |
|
static bool is_optix_initialized = false; |
|
if (ran_before) { |
|
return is_optix_initialized; |
|
} |
|
|
|
ran_before = true; |
|
|
|
|
|
CUDA_CHECK_THROW(cudaFree(nullptr)); |
|
|
|
try { |
|
|
|
OPTIX_CHECK_THROW(optixInit()); |
|
|
|
|
|
OptixDeviceContextOptions options = {}; |
|
|
|
|
|
|
|
CUcontext cuCtx = 0; |
|
|
|
OPTIX_CHECK_THROW(optixDeviceContextCreate(cuCtx, &options, &g_optix)); |
|
} catch (std::exception& e) { |
|
tlog::warning() << "OptiX failed to initialize: " << e.what(); |
|
return false; |
|
} |
|
|
|
is_optix_initialized = true; |
|
return true; |
|
} |
|
|
|
class Gas { |
|
public: |
|
Gas(const GPUMemory<Triangle>& triangles, OptixDeviceContext optix, cudaStream_t stream) { |
|
|
|
OptixAccelBuildOptions accel_options = {}; |
|
accel_options.buildFlags = OPTIX_BUILD_FLAG_NONE; |
|
accel_options.operation = OPTIX_BUILD_OPERATION_BUILD; |
|
|
|
|
|
|
|
const uint32_t triangle_input_flags[1] = { OPTIX_GEOMETRY_FLAG_NONE }; |
|
OptixBuildInput triangle_input = {}; |
|
|
|
CUdeviceptr d_triangles = (CUdeviceptr)(uintptr_t)triangles.data(); |
|
|
|
triangle_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES; |
|
triangle_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3; |
|
triangle_input.triangleArray.numVertices = (uint32_t)triangles.size()*3; |
|
triangle_input.triangleArray.vertexBuffers = &d_triangles; |
|
triangle_input.triangleArray.flags = triangle_input_flags; |
|
triangle_input.triangleArray.numSbtRecords = 1; |
|
|
|
|
|
OptixAccelBufferSizes gas_buffer_sizes; |
|
OPTIX_CHECK_THROW(optixAccelComputeMemoryUsage(optix, &accel_options, &triangle_input, 1, &gas_buffer_sizes)); |
|
|
|
|
|
|
|
GPUMemory<char> gas_tmp_buffer{gas_buffer_sizes.tempSizeInBytes}; |
|
m_gas_gpu_buffer.resize(gas_buffer_sizes.outputSizeInBytes); |
|
|
|
OPTIX_CHECK_THROW(optixAccelBuild( |
|
optix, |
|
stream, |
|
&accel_options, |
|
&triangle_input, |
|
1, |
|
(CUdeviceptr)(uintptr_t)gas_tmp_buffer.data(), |
|
gas_buffer_sizes.tempSizeInBytes, |
|
(CUdeviceptr)(uintptr_t)m_gas_gpu_buffer.data(), |
|
gas_buffer_sizes.outputSizeInBytes, |
|
&m_gas_handle, |
|
nullptr, |
|
0 |
|
)); |
|
} |
|
|
|
OptixTraversableHandle handle() const { |
|
return m_gas_handle; |
|
} |
|
|
|
private: |
|
OptixTraversableHandle m_gas_handle; |
|
GPUMemory<char> m_gas_gpu_buffer; |
|
}; |
|
} |
|
#endif |
|
|
|
__global__ void signed_distance_watertight_kernel(uint32_t n_elements, const Vector3f* __restrict__ positions, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float* __restrict__ distances, bool use_existing_distances_as_upper_bounds = false); |
|
__global__ void signed_distance_raystab_kernel(uint32_t n_elements, const Vector3f* __restrict__ positions, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float* __restrict__ distances, bool use_existing_distances_as_upper_bounds = false); |
|
__global__ void unsigned_distance_kernel(uint32_t n_elements, const Vector3f* __restrict__ positions, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float* __restrict__ distances, bool use_existing_distances_as_upper_bounds = false); |
|
__global__ void raytrace_kernel(uint32_t n_elements, Vector3f* __restrict__ positions, Vector3f* __restrict__ directions, const TriangleBvhNode* __restrict__ nodes, const Triangle* __restrict__ triangles); |
|
|
|
struct DistAndIdx { |
|
float dist; |
|
uint32_t idx; |
|
|
|
|
|
__host__ __device__ bool operator<(const DistAndIdx& other) { |
|
return dist < other.dist; |
|
} |
|
}; |
|
|
|
template <typename T> |
|
__host__ __device__ void inline compare_and_swap(T& t1, T& t2) { |
|
if (t1 < t2) { |
|
T tmp{t1}; t1 = t2; t2 = tmp; |
|
} |
|
} |
|
|
|
|
|
template <uint32_t N, typename T> |
|
__host__ __device__ void sorting_network(T values[N]) { |
|
static_assert(N <= 8, "Sorting networks are only implemented up to N==8"); |
|
if (N <= 1) { |
|
return; |
|
} else if (N == 2) { |
|
compare_and_swap(values[0], values[1]); |
|
} else if (N == 3) { |
|
compare_and_swap(values[0], values[2]); |
|
compare_and_swap(values[0], values[1]); |
|
compare_and_swap(values[1], values[2]); |
|
} else if (N == 4) { |
|
compare_and_swap(values[0], values[2]); |
|
compare_and_swap(values[1], values[3]); |
|
compare_and_swap(values[0], values[1]); |
|
compare_and_swap(values[2], values[3]); |
|
compare_and_swap(values[1], values[2]); |
|
} else if (N == 5) { |
|
compare_and_swap(values[0], values[3]); |
|
compare_and_swap(values[1], values[4]); |
|
|
|
compare_and_swap(values[0], values[2]); |
|
compare_and_swap(values[1], values[3]); |
|
|
|
compare_and_swap(values[0], values[1]); |
|
compare_and_swap(values[2], values[4]); |
|
|
|
compare_and_swap(values[1], values[2]); |
|
compare_and_swap(values[3], values[4]); |
|
|
|
compare_and_swap(values[2], values[3]); |
|
} else if (N == 6) { |
|
compare_and_swap(values[0], values[5]); |
|
compare_and_swap(values[1], values[3]); |
|
compare_and_swap(values[2], values[4]); |
|
|
|
compare_and_swap(values[1], values[2]); |
|
compare_and_swap(values[3], values[4]); |
|
|
|
compare_and_swap(values[0], values[3]); |
|
compare_and_swap(values[2], values[5]); |
|
|
|
compare_and_swap(values[0], values[1]); |
|
compare_and_swap(values[2], values[3]); |
|
compare_and_swap(values[4], values[5]); |
|
|
|
compare_and_swap(values[1], values[2]); |
|
compare_and_swap(values[3], values[4]); |
|
} else if (N == 7) { |
|
compare_and_swap(values[0], values[6]); |
|
compare_and_swap(values[2], values[3]); |
|
compare_and_swap(values[4], values[5]); |
|
|
|
compare_and_swap(values[0], values[2]); |
|
compare_and_swap(values[1], values[4]); |
|
compare_and_swap(values[3], values[6]); |
|
|
|
compare_and_swap(values[0], values[1]); |
|
compare_and_swap(values[2], values[5]); |
|
compare_and_swap(values[3], values[4]); |
|
|
|
compare_and_swap(values[1], values[2]); |
|
compare_and_swap(values[4], values[6]); |
|
|
|
compare_and_swap(values[2], values[3]); |
|
compare_and_swap(values[4], values[5]); |
|
|
|
compare_and_swap(values[1], values[2]); |
|
compare_and_swap(values[3], values[4]); |
|
compare_and_swap(values[5], values[6]); |
|
} else if (N == 8) { |
|
compare_and_swap(values[0], values[2]); |
|
compare_and_swap(values[1], values[3]); |
|
compare_and_swap(values[4], values[6]); |
|
compare_and_swap(values[5], values[7]); |
|
|
|
compare_and_swap(values[0], values[4]); |
|
compare_and_swap(values[1], values[5]); |
|
compare_and_swap(values[2], values[6]); |
|
compare_and_swap(values[3], values[7]); |
|
|
|
compare_and_swap(values[0], values[1]); |
|
compare_and_swap(values[2], values[3]); |
|
compare_and_swap(values[4], values[5]); |
|
compare_and_swap(values[6], values[7]); |
|
|
|
compare_and_swap(values[2], values[4]); |
|
compare_and_swap(values[3], values[5]); |
|
|
|
compare_and_swap(values[1], values[4]); |
|
compare_and_swap(values[3], values[6]); |
|
|
|
compare_and_swap(values[1], values[2]); |
|
compare_and_swap(values[3], values[4]); |
|
compare_and_swap(values[5], values[6]); |
|
} |
|
} |
|
|
|
template <uint32_t BRANCHING_FACTOR> |
|
class TriangleBvhWithBranchingFactor : public TriangleBvh { |
|
public: |
|
__host__ __device__ static std::pair<int, float> ray_intersect(const Vector3f& ro, const Vector3f& rd, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles) { |
|
FixedIntStack query_stack; |
|
query_stack.push(0); |
|
|
|
float mint = MAX_DIST; |
|
int shortest_idx = -1; |
|
|
|
while (!query_stack.empty()) { |
|
int idx = query_stack.pop(); |
|
|
|
const TriangleBvhNode& node = bvhnodes[idx]; |
|
|
|
if (node.left_idx < 0) { |
|
int end = -node.right_idx-1; |
|
for (int i = -node.left_idx-1; i < end; ++i) { |
|
float t = triangles[i].ray_intersect(ro, rd); |
|
if (t < mint) { |
|
mint = t; |
|
shortest_idx = i; |
|
} |
|
} |
|
} else { |
|
DistAndIdx children[BRANCHING_FACTOR]; |
|
|
|
uint32_t first_child = node.left_idx; |
|
|
|
NGP_PRAGMA_UNROLL |
|
for (uint32_t i = 0; i < BRANCHING_FACTOR; ++i) { |
|
children[i] = {bvhnodes[i+first_child].bb.ray_intersect(ro, rd).x(), i+first_child}; |
|
} |
|
|
|
sorting_network<BRANCHING_FACTOR>(children); |
|
|
|
NGP_PRAGMA_UNROLL |
|
for (uint32_t i = 0; i < BRANCHING_FACTOR; ++i) { |
|
if (children[i].dist < mint) { |
|
query_stack.push(children[i].idx); |
|
} |
|
} |
|
} |
|
} |
|
|
|
return {shortest_idx, mint}; |
|
} |
|
|
|
__host__ __device__ static std::pair<int, float> closest_triangle(const Vector3f& point, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float max_distance_sq = MAX_DIST_SQ) { |
|
FixedIntStack query_stack; |
|
query_stack.push(0); |
|
|
|
float shortest_distance_sq = max_distance_sq; |
|
int shortest_idx = -1; |
|
|
|
while (!query_stack.empty()) { |
|
int idx = query_stack.pop(); |
|
|
|
const TriangleBvhNode& node = bvhnodes[idx]; |
|
|
|
if (node.left_idx < 0) { |
|
int end = -node.right_idx-1; |
|
for (int i = -node.left_idx-1; i < end; ++i) { |
|
float dist_sq = triangles[i].distance_sq(point); |
|
if (dist_sq <= shortest_distance_sq) { |
|
shortest_distance_sq = dist_sq; |
|
shortest_idx = i; |
|
} |
|
} |
|
} else { |
|
DistAndIdx children[BRANCHING_FACTOR]; |
|
|
|
uint32_t first_child = node.left_idx; |
|
|
|
NGP_PRAGMA_UNROLL |
|
for (uint32_t i = 0; i < BRANCHING_FACTOR; ++i) { |
|
children[i] = {bvhnodes[i+first_child].bb.distance_sq(point), i+first_child}; |
|
} |
|
|
|
sorting_network<BRANCHING_FACTOR>(children); |
|
|
|
NGP_PRAGMA_UNROLL |
|
for (uint32_t i = 0; i < BRANCHING_FACTOR; ++i) { |
|
if (children[i].dist <= shortest_distance_sq) { |
|
query_stack.push(children[i].idx); |
|
} |
|
} |
|
} |
|
} |
|
|
|
if (shortest_idx == -1) { |
|
|
|
shortest_idx = 0; |
|
shortest_distance_sq = 0.0f; |
|
} |
|
|
|
return {shortest_idx, std::sqrt(shortest_distance_sq)}; |
|
} |
|
|
|
|
|
__host__ __device__ static Vector3f avg_normal_around_point(const Vector3f& point, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles) { |
|
FixedIntStack query_stack; |
|
query_stack.push(0); |
|
|
|
static constexpr float EPSILON = 1e-6f; |
|
|
|
float total_weight = 0; |
|
Vector3f result = Vector3f::Zero(); |
|
|
|
while (!query_stack.empty()) { |
|
int idx = query_stack.pop(); |
|
|
|
const TriangleBvhNode& node = bvhnodes[idx]; |
|
|
|
if (node.left_idx < 0) { |
|
int end = -node.right_idx-1; |
|
for (int i = -node.left_idx-1; i < end; ++i) { |
|
if (triangles[i].distance_sq(point) < EPSILON) { |
|
float weight = 1; |
|
result += triangles[i].normal(); |
|
total_weight += weight; |
|
} |
|
} |
|
} else { |
|
uint32_t first_child = node.left_idx; |
|
|
|
NGP_PRAGMA_UNROLL |
|
for (uint32_t i = 0; i < BRANCHING_FACTOR; ++i) { |
|
if (bvhnodes[i+first_child].bb.distance_sq(point) < EPSILON) { |
|
query_stack.push(i+first_child); |
|
} |
|
} |
|
} |
|
} |
|
|
|
return result / total_weight; |
|
} |
|
|
|
__host__ __device__ static float unsigned_distance(const Vector3f& point, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float max_distance_sq = MAX_DIST_SQ) { |
|
return closest_triangle(point, bvhnodes, triangles, max_distance_sq).second; |
|
} |
|
|
|
__host__ __device__ static float signed_distance_watertight(const Vector3f& point, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float max_distance_sq = MAX_DIST_SQ) { |
|
auto p = closest_triangle(point, bvhnodes, triangles, max_distance_sq); |
|
|
|
const Triangle& tri = triangles[p.first]; |
|
Vector3f closest_point = tri.closest_point(point); |
|
Vector3f avg_normal = avg_normal_around_point(closest_point, bvhnodes, triangles); |
|
|
|
return std::copysignf(p.second, avg_normal.dot(point - closest_point)); |
|
} |
|
|
|
__host__ __device__ static float signed_distance_raystab(const Vector3f& point, const TriangleBvhNode* __restrict__ bvhnodes, const Triangle* __restrict__ triangles, float max_distance_sq = MAX_DIST_SQ, default_rng_t rng={}) { |
|
float distance = unsigned_distance(point, bvhnodes, triangles, max_distance_sq); |
|
|
|
Vector2f offset = random_val_2d(rng); |
|
|
|
static constexpr uint32_t N_STAB_RAYS = 32; |
|
for (uint32_t i = 0; i < N_STAB_RAYS; ++i) { |
|
|
|
|
|
Vector3f d = fibonacci_dir<N_STAB_RAYS>(i, offset); |
|
|
|
|
|
if (ray_intersect(point, -d, bvhnodes, triangles).first < 0 || ray_intersect(point, d, bvhnodes, triangles).first < 0) { |
|
return distance; |
|
} |
|
} |
|
|
|
return -distance; |
|
} |
|
|
|
|
|
Vector3f avg_normal_around_point(const Vector3f& point, const Triangle* __restrict__ triangles) const { |
|
return avg_normal_around_point(point, m_nodes.data(), triangles); |
|
} |
|
|
|
float signed_distance(EMeshSdfMode mode, const Vector3f& point, const std::vector<Triangle>& triangles) const { |
|
if (mode == EMeshSdfMode::Watertight) { |
|
return signed_distance_watertight(point, m_nodes.data(), triangles.data()); |
|
} else { |
|
return signed_distance_raystab(point, m_nodes.data(), triangles.data()); |
|
} |
|
} |
|
|
|
void signed_distance_gpu(uint32_t n_elements, EMeshSdfMode mode, const Vector3f* gpu_positions, float* gpu_distances, const Triangle* gpu_triangles, bool use_existing_distances_as_upper_bounds, cudaStream_t stream) override { |
|
if (mode == EMeshSdfMode::Watertight) { |
|
linear_kernel(signed_distance_watertight_kernel, 0, stream, |
|
n_elements, |
|
gpu_positions, |
|
m_nodes_gpu.data(), |
|
gpu_triangles, |
|
gpu_distances, |
|
use_existing_distances_as_upper_bounds |
|
); |
|
} else { |
|
#ifdef NGP_OPTIX |
|
if (m_optix.available) { |
|
linear_kernel(unsigned_distance_kernel, 0, stream, |
|
n_elements, |
|
gpu_positions, |
|
m_nodes_gpu.data(), |
|
gpu_triangles, |
|
gpu_distances, |
|
use_existing_distances_as_upper_bounds |
|
); |
|
|
|
if (mode == EMeshSdfMode::Raystab) { |
|
m_optix.raystab->invoke({gpu_positions, gpu_distances, m_optix.gas->handle()}, {n_elements, 1, 1}, stream); |
|
} else if (mode == EMeshSdfMode::PathEscape) { |
|
m_optix.pathescape->invoke({gpu_positions, gpu_triangles, gpu_distances, m_optix.gas->handle()}, {n_elements, 1, 1}, stream); |
|
} |
|
} else |
|
#endif |
|
{ |
|
if (mode == EMeshSdfMode::Raystab) { |
|
linear_kernel(signed_distance_raystab_kernel, 0, stream, |
|
n_elements, |
|
gpu_positions, |
|
m_nodes_gpu.data(), |
|
gpu_triangles, |
|
gpu_distances, |
|
use_existing_distances_as_upper_bounds |
|
); |
|
} else if (mode == EMeshSdfMode::PathEscape) { |
|
throw std::runtime_error{"TriangleBvh: EMeshSdfMode::PathEscape is only supported with OptiX enabled."}; |
|
} |
|
} |
|
} |
|
} |
|
|
|
void ray_trace_gpu(uint32_t n_elements, Vector3f* gpu_positions, Vector3f* gpu_directions, const Triangle* gpu_triangles, cudaStream_t stream) override { |
|
#ifdef NGP_OPTIX |
|
if (m_optix.available) { |
|
m_optix.raytrace->invoke({gpu_positions, gpu_directions, gpu_triangles, m_optix.gas->handle()}, {n_elements, 1, 1}, stream); |
|
} else |
|
#endif |
|
{ |
|
linear_kernel(raytrace_kernel, 0, stream, |
|
n_elements, |
|
gpu_positions, |
|
gpu_directions, |
|
m_nodes_gpu.data(), |
|
gpu_triangles |
|
); |
|
} |
|
} |
|
|
|
bool touches_triangle(const BoundingBox& bb, const TriangleBvhNode& node, const Triangle* __restrict__ triangles) const { |
|
if (!node.bb.intersects(bb)) { |
|
return false; |
|
} |
|
|
|
if (node.left_idx < 0) { |
|
|
|
int end = -node.right_idx-1; |
|
for (int i = -node.left_idx-1; i < end; ++i) { |
|
if (bb.intersects(triangles[i])) { |
|
return true; |
|
} |
|
} |
|
} else { |
|
|
|
int child_idx = node.left_idx; |
|
for (int i = 0; i < BRANCHING_FACTOR; ++i) { |
|
if (touches_triangle(bb, m_nodes[i+child_idx], triangles)) { |
|
return true; |
|
} |
|
} |
|
} |
|
|
|
return false; |
|
} |
|
|
|
bool touches_triangle(const BoundingBox& bb, const Triangle* __restrict__ triangles) const override { |
|
return touches_triangle(bb, m_nodes.front(), triangles); |
|
} |
|
|
|
void build(std::vector<Triangle>& triangles, uint32_t n_primitives_per_leaf) override { |
|
m_nodes.clear(); |
|
|
|
|
|
m_nodes.emplace_back(); |
|
m_nodes.front().bb = BoundingBox(std::begin(triangles), std::end(triangles)); |
|
|
|
struct BuildNode { |
|
int node_idx; |
|
std::vector<Triangle>::iterator begin; |
|
std::vector<Triangle>::iterator end; |
|
}; |
|
|
|
std::stack<BuildNode> build_stack; |
|
build_stack.push({0, std::begin(triangles), std::end(triangles)}); |
|
|
|
while (!build_stack.empty()) { |
|
const BuildNode& curr = build_stack.top(); |
|
size_t node_idx = curr.node_idx; |
|
|
|
std::array<BuildNode, BRANCHING_FACTOR> children; |
|
children[0].begin = curr.begin; |
|
children[0].end = curr.end; |
|
|
|
build_stack.pop(); |
|
|
|
|
|
int n_children = 1; |
|
while (n_children < BRANCHING_FACTOR) { |
|
for (int i = n_children - 1; i >= 0; --i) { |
|
auto& child = children[i]; |
|
|
|
|
|
Vector3f mean = Vector3f::Zero(); |
|
for (auto it = child.begin; it != child.end; ++it) { |
|
mean += it->centroid(); |
|
} |
|
mean /= (float)std::distance(child.begin, child.end); |
|
|
|
Vector3f var = Vector3f::Zero(); |
|
for (auto it = child.begin; it != child.end; ++it) { |
|
Vector3f diff = it->centroid() - mean; |
|
var += diff.cwiseProduct(diff); |
|
} |
|
var /= (float)std::distance(child.begin, child.end); |
|
|
|
Vector3f::Index axis; |
|
var.maxCoeff(&axis); |
|
|
|
auto m = child.begin + std::distance(child.begin, child.end)/2; |
|
std::nth_element(child.begin, m, child.end, [&](const Triangle& tri1, const Triangle& tri2) { return tri1.centroid(axis) < tri2.centroid(axis); }); |
|
|
|
children[i*2].begin = children[i].begin; |
|
children[i*2+1].end = children[i].end; |
|
children[i*2].end = children[i*2+1].begin = m; |
|
} |
|
|
|
n_children *= 2; |
|
} |
|
|
|
|
|
m_nodes[node_idx].left_idx = (int)m_nodes.size(); |
|
for (uint32_t i = 0; i < BRANCHING_FACTOR; ++i) { |
|
auto& child = children[i]; |
|
assert(child.begin != child.end); |
|
child.node_idx = (int)m_nodes.size(); |
|
|
|
m_nodes.emplace_back(); |
|
m_nodes.back().bb = BoundingBox(child.begin, child.end); |
|
|
|
if (std::distance(child.begin, child.end) <= n_primitives_per_leaf) { |
|
m_nodes.back().left_idx = -(int)std::distance(std::begin(triangles), child.begin)-1; |
|
m_nodes.back().right_idx = -(int)std::distance(std::begin(triangles), child.end)-1; |
|
} else { |
|
build_stack.push(child); |
|
} |
|
} |
|
m_nodes[node_idx].right_idx = (int)m_nodes.size(); |
|
} |
|
|
|
m_nodes_gpu.resize_and_copy_from_host(m_nodes); |
|
|
|
tlog::success() << "Built TriangleBvh: nodes=" << m_nodes.size(); |
|
} |
|
|
|
void build_optix(const GPUMemory<Triangle>& triangles, cudaStream_t stream) override { |
|
#ifdef NGP_OPTIX |
|
m_optix.available = optix::initialize(); |
|
if (m_optix.available) { |
|
m_optix.gas = std::make_unique<optix::Gas>(triangles, g_optix, stream); |
|
m_optix.raystab = std::make_unique<optix::Program<Raystab>>((const char*)optix_ptx::raystab_ptx, sizeof(optix_ptx::raystab_ptx), g_optix); |
|
m_optix.raytrace = std::make_unique<optix::Program<Raytrace>>((const char*)optix_ptx::raytrace_ptx, sizeof(optix_ptx::raytrace_ptx), g_optix); |
|
m_optix.pathescape = std::make_unique<optix::Program<PathEscape>>((const char*)optix_ptx::pathescape_ptx, sizeof(optix_ptx::pathescape_ptx), g_optix); |
|
tlog::success() << "Built OptiX GAS and shaders"; |
|
} else { |
|
tlog::warning() << "Falling back to slower TriangleBVH::ray_intersect."; |
|
} |
|
#else |
|
tlog::warning() << "OptiX was not built. Falling back to slower TriangleBVH::ray_intersect."; |
|
#endif |
|
} |
|
|
|
TriangleBvhWithBranchingFactor() {} |
|
|
|
private: |
|
#ifdef NGP_OPTIX |
|
struct { |
|
std::unique_ptr<optix::Gas> gas; |
|
std::unique_ptr<optix::Program<Raystab>> raystab; |
|
std::unique_ptr<optix::Program<Raytrace>> raytrace; |
|
std::unique_ptr<optix::Program<PathEscape>> pathescape; |
|
bool available = false; |
|
} m_optix; |
|
#endif |
|
}; |
|
|
|
using TriangleBvh4 = TriangleBvhWithBranchingFactor<4>; |
|
|
|
std::unique_ptr<TriangleBvh> TriangleBvh::make() { |
|
return std::unique_ptr<TriangleBvh>(new TriangleBvh4()); |
|
} |
|
|
|
__global__ void signed_distance_watertight_kernel(uint32_t n_elements, |
|
const Vector3f* __restrict__ positions, |
|
const TriangleBvhNode* __restrict__ bvhnodes, |
|
const Triangle* __restrict__ triangles, |
|
float* __restrict__ distances, |
|
bool use_existing_distances_as_upper_bounds |
|
) { |
|
uint32_t i = blockIdx.x * blockDim.x + threadIdx.x; |
|
if (i >= n_elements) return; |
|
|
|
float max_distance = use_existing_distances_as_upper_bounds ? distances[i] : MAX_DIST; |
|
distances[i] = TriangleBvh4::signed_distance_watertight(positions[i], bvhnodes, triangles, max_distance*max_distance); |
|
} |
|
|
|
__global__ void signed_distance_raystab_kernel( |
|
uint32_t n_elements, |
|
const Vector3f* __restrict__ positions, |
|
const TriangleBvhNode* __restrict__ bvhnodes, |
|
const Triangle* __restrict__ triangles, |
|
float* __restrict__ distances, |
|
bool use_existing_distances_as_upper_bounds |
|
) { |
|
uint32_t i = blockIdx.x * blockDim.x + threadIdx.x; |
|
if (i >= n_elements) return; |
|
|
|
float max_distance = use_existing_distances_as_upper_bounds ? distances[i] : MAX_DIST; |
|
default_rng_t rng; |
|
rng.advance(i * 2); |
|
|
|
distances[i] = TriangleBvh4::signed_distance_raystab(positions[i], bvhnodes, triangles, max_distance*max_distance, rng); |
|
} |
|
|
|
__global__ void unsigned_distance_kernel(uint32_t n_elements, |
|
const Vector3f* __restrict__ positions, |
|
const TriangleBvhNode* __restrict__ bvhnodes, |
|
const Triangle* __restrict__ triangles, |
|
float* __restrict__ distances, |
|
bool use_existing_distances_as_upper_bounds |
|
) { |
|
uint32_t i = blockIdx.x * blockDim.x + threadIdx.x; |
|
if (i >= n_elements) return; |
|
|
|
float max_distance = use_existing_distances_as_upper_bounds ? distances[i] : MAX_DIST; |
|
distances[i] = TriangleBvh4::unsigned_distance(positions[i], bvhnodes, triangles, max_distance*max_distance); |
|
} |
|
|
|
__global__ void raytrace_kernel(uint32_t n_elements, Vector3f* __restrict__ positions, Vector3f* __restrict__ directions, const TriangleBvhNode* __restrict__ nodes, const Triangle* __restrict__ triangles) { |
|
uint32_t i = blockIdx.x * blockDim.x + threadIdx.x; |
|
if (i >= n_elements) return; |
|
|
|
auto pos = positions[i]; |
|
auto dir = directions[i]; |
|
|
|
auto p = TriangleBvh4::ray_intersect(pos, dir, nodes, triangles); |
|
positions[i] = pos + p.second * dir; |
|
|
|
if (p.first >= 0) { |
|
directions[i] = triangles[p.first].normal(); |
|
} |
|
} |
|
|
|
NGP_NAMESPACE_END |
|
|
|
|
|
|