1695 lines
65 KiB
C
1695 lines
65 KiB
C
![]() |
// ----------------------------------------------------------------------------
|
||
|
// - Open3D: www.open3d.org -
|
||
|
// ----------------------------------------------------------------------------
|
||
|
// Copyright (c) 2018-2023 www.open3d.org
|
||
|
// SPDX-License-Identifier: MIT
|
||
|
// ----------------------------------------------------------------------------
|
||
|
|
||
|
#include <atomic>
|
||
|
#include <cmath>
|
||
|
|
||
|
#include "open3d/core/Dispatch.h"
|
||
|
#include "open3d/core/Dtype.h"
|
||
|
#include "open3d/core/MemoryManager.h"
|
||
|
#include "open3d/core/SizeVector.h"
|
||
|
#include "open3d/core/Tensor.h"
|
||
|
#include "open3d/core/hashmap/Dispatch.h"
|
||
|
#include "open3d/t/geometry/Utility.h"
|
||
|
#include "open3d/t/geometry/kernel/GeometryIndexer.h"
|
||
|
#include "open3d/t/geometry/kernel/GeometryMacros.h"
|
||
|
#include "open3d/t/geometry/kernel/VoxelBlockGrid.h"
|
||
|
#include "open3d/utility/Logging.h"
|
||
|
#include "open3d/utility/Timer.h"
|
||
|
|
||
|
namespace open3d {
|
||
|
namespace t {
|
||
|
namespace geometry {
|
||
|
namespace kernel {
|
||
|
namespace voxel_grid {
|
||
|
|
||
|
using index_t = int;
|
||
|
using ArrayIndexer = TArrayIndexer<index_t>;
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
void GetVoxelCoordinatesAndFlattenedIndicesCUDA
|
||
|
#else
|
||
|
void GetVoxelCoordinatesAndFlattenedIndicesCPU
|
||
|
#endif
|
||
|
(const core::Tensor& buf_indices,
|
||
|
const core::Tensor& block_keys,
|
||
|
core::Tensor& voxel_coords,
|
||
|
core::Tensor& flattened_indices,
|
||
|
index_t resolution,
|
||
|
float voxel_size) {
|
||
|
core::Device device = buf_indices.GetDevice();
|
||
|
|
||
|
const index_t* buf_indices_ptr = buf_indices.GetDataPtr<index_t>();
|
||
|
const index_t* block_key_ptr = block_keys.GetDataPtr<index_t>();
|
||
|
|
||
|
float* voxel_coords_ptr = voxel_coords.GetDataPtr<float>();
|
||
|
int64_t* flattened_indices_ptr = flattened_indices.GetDataPtr<int64_t>();
|
||
|
|
||
|
index_t n = flattened_indices.GetLength();
|
||
|
ArrayIndexer voxel_indexer({resolution, resolution, resolution});
|
||
|
index_t resolution3 = resolution * resolution * resolution;
|
||
|
|
||
|
core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t workload_idx) {
|
||
|
index_t block_idx = buf_indices_ptr[workload_idx / resolution3];
|
||
|
index_t voxel_idx = workload_idx % resolution3;
|
||
|
|
||
|
index_t block_key_offset = block_idx * 3;
|
||
|
index_t xb = block_key_ptr[block_key_offset + 0];
|
||
|
index_t yb = block_key_ptr[block_key_offset + 1];
|
||
|
index_t zb = block_key_ptr[block_key_offset + 2];
|
||
|
|
||
|
index_t xv, yv, zv;
|
||
|
voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
|
||
|
|
||
|
float x = (xb * resolution + xv) * voxel_size;
|
||
|
float y = (yb * resolution + yv) * voxel_size;
|
||
|
float z = (zb * resolution + zv) * voxel_size;
|
||
|
|
||
|
flattened_indices_ptr[workload_idx] =
|
||
|
block_idx * resolution3 + voxel_idx;
|
||
|
|
||
|
index_t voxel_coords_offset = workload_idx * 3;
|
||
|
voxel_coords_ptr[voxel_coords_offset + 0] = x;
|
||
|
voxel_coords_ptr[voxel_coords_offset + 1] = y;
|
||
|
voxel_coords_ptr[voxel_coords_offset + 2] = z;
|
||
|
});
|
||
|
}
|
||
|
|
||
|
inline OPEN3D_DEVICE index_t
|
||
|
DeviceGetLinearIdx(index_t xo,
|
||
|
index_t yo,
|
||
|
index_t zo,
|
||
|
index_t curr_block_idx,
|
||
|
index_t resolution,
|
||
|
const ArrayIndexer& nb_block_masks_indexer,
|
||
|
const ArrayIndexer& nb_block_indices_indexer) {
|
||
|
index_t xn = (xo + resolution) % resolution;
|
||
|
index_t yn = (yo + resolution) % resolution;
|
||
|
index_t zn = (zo + resolution) % resolution;
|
||
|
|
||
|
index_t dxb = Sign(xo - xn);
|
||
|
index_t dyb = Sign(yo - yn);
|
||
|
index_t dzb = Sign(zo - zn);
|
||
|
|
||
|
index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
|
||
|
|
||
|
bool block_mask_i =
|
||
|
*nb_block_masks_indexer.GetDataPtr<bool>(curr_block_idx, nb_idx);
|
||
|
if (!block_mask_i) return -1;
|
||
|
|
||
|
index_t block_idx_i = *nb_block_indices_indexer.GetDataPtr<index_t>(
|
||
|
curr_block_idx, nb_idx);
|
||
|
|
||
|
return (((block_idx_i * resolution) + zn) * resolution + yn) * resolution +
|
||
|
xn;
|
||
|
}
|
||
|
|
||
|
template <typename tsdf_t>
|
||
|
inline OPEN3D_DEVICE void DeviceGetNormal(
|
||
|
const tsdf_t* tsdf_base_ptr,
|
||
|
index_t xo,
|
||
|
index_t yo,
|
||
|
index_t zo,
|
||
|
index_t curr_block_idx,
|
||
|
float* n,
|
||
|
index_t resolution,
|
||
|
const ArrayIndexer& nb_block_masks_indexer,
|
||
|
const ArrayIndexer& nb_block_indices_indexer) {
|
||
|
auto GetLinearIdx = [&] OPEN3D_DEVICE(index_t xo, index_t yo,
|
||
|
index_t zo) -> index_t {
|
||
|
return DeviceGetLinearIdx(xo, yo, zo, curr_block_idx, resolution,
|
||
|
nb_block_masks_indexer,
|
||
|
nb_block_indices_indexer);
|
||
|
};
|
||
|
index_t vxp = GetLinearIdx(xo + 1, yo, zo);
|
||
|
index_t vxn = GetLinearIdx(xo - 1, yo, zo);
|
||
|
index_t vyp = GetLinearIdx(xo, yo + 1, zo);
|
||
|
index_t vyn = GetLinearIdx(xo, yo - 1, zo);
|
||
|
index_t vzp = GetLinearIdx(xo, yo, zo + 1);
|
||
|
index_t vzn = GetLinearIdx(xo, yo, zo - 1);
|
||
|
if (vxp >= 0 && vxn >= 0) n[0] = tsdf_base_ptr[vxp] - tsdf_base_ptr[vxn];
|
||
|
if (vyp >= 0 && vyn >= 0) n[1] = tsdf_base_ptr[vyp] - tsdf_base_ptr[vyn];
|
||
|
if (vzp >= 0 && vzn >= 0) n[2] = tsdf_base_ptr[vzp] - tsdf_base_ptr[vzn];
|
||
|
};
|
||
|
|
||
|
template <typename input_depth_t,
|
||
|
typename input_color_t,
|
||
|
typename tsdf_t,
|
||
|
typename weight_t,
|
||
|
typename color_t>
|
||
|
#if defined(__CUDACC__)
|
||
|
void IntegrateCUDA
|
||
|
#else
|
||
|
void IntegrateCPU
|
||
|
#endif
|
||
|
(const core::Tensor& depth,
|
||
|
const core::Tensor& color,
|
||
|
const core::Tensor& indices,
|
||
|
const core::Tensor& block_keys,
|
||
|
TensorMap& block_value_map,
|
||
|
const core::Tensor& depth_intrinsic,
|
||
|
const core::Tensor& color_intrinsic,
|
||
|
const core::Tensor& extrinsics,
|
||
|
index_t resolution,
|
||
|
float voxel_size,
|
||
|
float sdf_trunc,
|
||
|
float depth_scale,
|
||
|
float depth_max) {
|
||
|
// Parameters
|
||
|
index_t resolution2 = resolution * resolution;
|
||
|
index_t resolution3 = resolution2 * resolution;
|
||
|
|
||
|
TransformIndexer transform_indexer(depth_intrinsic, extrinsics, voxel_size);
|
||
|
TransformIndexer colormap_indexer(
|
||
|
color_intrinsic,
|
||
|
core::Tensor::Eye(4, core::Dtype::Float64, core::Device("CPU:0")));
|
||
|
|
||
|
ArrayIndexer voxel_indexer({resolution, resolution, resolution});
|
||
|
|
||
|
ArrayIndexer block_keys_indexer(block_keys, 1);
|
||
|
ArrayIndexer depth_indexer(depth, 2);
|
||
|
core::Device device = block_keys.GetDevice();
|
||
|
|
||
|
const index_t* indices_ptr = indices.GetDataPtr<index_t>();
|
||
|
|
||
|
if (!block_value_map.Contains("tsdf") ||
|
||
|
!block_value_map.Contains("weight")) {
|
||
|
utility::LogError(
|
||
|
"TSDF and/or weight not allocated in blocks, please implement "
|
||
|
"customized integration.");
|
||
|
}
|
||
|
tsdf_t* tsdf_base_ptr = block_value_map.at("tsdf").GetDataPtr<tsdf_t>();
|
||
|
weight_t* weight_base_ptr =
|
||
|
block_value_map.at("weight").GetDataPtr<weight_t>();
|
||
|
|
||
|
bool integrate_color =
|
||
|
block_value_map.Contains("color") && color.NumElements() > 0;
|
||
|
color_t* color_base_ptr = nullptr;
|
||
|
ArrayIndexer color_indexer;
|
||
|
|
||
|
float color_multiplier = 1.0;
|
||
|
if (integrate_color) {
|
||
|
color_base_ptr = block_value_map.at("color").GetDataPtr<color_t>();
|
||
|
color_indexer = ArrayIndexer(color, 2);
|
||
|
|
||
|
// Float32: [0, 1] -> [0, 255]
|
||
|
if (color.GetDtype() == core::Float32) {
|
||
|
color_multiplier = 255.0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
index_t n = indices.GetLength() * resolution3;
|
||
|
core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t workload_idx) {
|
||
|
// Natural index (0, N) -> (block_idx, voxel_idx)
|
||
|
index_t block_idx = indices_ptr[workload_idx / resolution3];
|
||
|
index_t voxel_idx = workload_idx % resolution3;
|
||
|
|
||
|
/// Coordinate transform
|
||
|
// block_idx -> (x_block, y_block, z_block)
|
||
|
index_t* block_key_ptr =
|
||
|
block_keys_indexer.GetDataPtr<index_t>(block_idx);
|
||
|
index_t xb = block_key_ptr[0];
|
||
|
index_t yb = block_key_ptr[1];
|
||
|
index_t zb = block_key_ptr[2];
|
||
|
|
||
|
// voxel_idx -> (x_voxel, y_voxel, z_voxel)
|
||
|
index_t xv, yv, zv;
|
||
|
voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
|
||
|
|
||
|
// coordinate in world (in voxel)
|
||
|
index_t x = xb * resolution + xv;
|
||
|
index_t y = yb * resolution + yv;
|
||
|
index_t z = zb * resolution + zv;
|
||
|
|
||
|
// coordinate in camera (in voxel -> in meter)
|
||
|
float xc, yc, zc, u, v;
|
||
|
transform_indexer.RigidTransform(static_cast<float>(x),
|
||
|
static_cast<float>(y),
|
||
|
static_cast<float>(z), &xc, &yc, &zc);
|
||
|
|
||
|
// coordinate in image (in pixel)
|
||
|
transform_indexer.Project(xc, yc, zc, &u, &v);
|
||
|
if (!depth_indexer.InBoundary(u, v)) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
index_t ui = static_cast<index_t>(u);
|
||
|
index_t vi = static_cast<index_t>(v);
|
||
|
|
||
|
// Associate image workload and compute SDF and
|
||
|
// TSDF.
|
||
|
float depth =
|
||
|
*depth_indexer.GetDataPtr<input_depth_t>(ui, vi) / depth_scale;
|
||
|
|
||
|
float sdf = depth - zc;
|
||
|
if (depth <= 0 || depth > depth_max || zc <= 0 || sdf < -sdf_trunc) {
|
||
|
return;
|
||
|
}
|
||
|
sdf = sdf < sdf_trunc ? sdf : sdf_trunc;
|
||
|
sdf /= sdf_trunc;
|
||
|
|
||
|
index_t linear_idx = block_idx * resolution3 + voxel_idx;
|
||
|
|
||
|
tsdf_t* tsdf_ptr = tsdf_base_ptr + linear_idx;
|
||
|
weight_t* weight_ptr = weight_base_ptr + linear_idx;
|
||
|
|
||
|
float inv_wsum = 1.0f / (*weight_ptr + 1);
|
||
|
float weight = *weight_ptr;
|
||
|
*tsdf_ptr = (weight * (*tsdf_ptr) + sdf) * inv_wsum;
|
||
|
|
||
|
if (integrate_color) {
|
||
|
color_t* color_ptr = color_base_ptr + 3 * linear_idx;
|
||
|
|
||
|
// Unproject ui, vi with depth_intrinsic, then project back with
|
||
|
// color_intrinsic
|
||
|
float x, y, z;
|
||
|
transform_indexer.Unproject(ui, vi, 1.0, &x, &y, &z);
|
||
|
|
||
|
float uf, vf;
|
||
|
colormap_indexer.Project(x, y, z, &uf, &vf);
|
||
|
if (color_indexer.InBoundary(uf, vf)) {
|
||
|
ui = round(uf);
|
||
|
vi = round(vf);
|
||
|
|
||
|
input_color_t* input_color_ptr =
|
||
|
color_indexer.GetDataPtr<input_color_t>(ui, vi);
|
||
|
|
||
|
for (index_t i = 0; i < 3; ++i) {
|
||
|
color_ptr[i] = (weight * color_ptr[i] +
|
||
|
input_color_ptr[i] * color_multiplier) *
|
||
|
inv_wsum;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
*weight_ptr = weight + 1;
|
||
|
});
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
core::cuda::Synchronize();
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
void EstimateRangeCUDA
|
||
|
#else
|
||
|
void EstimateRangeCPU
|
||
|
#endif
|
||
|
(const core::Tensor& block_keys,
|
||
|
core::Tensor& range_minmax_map,
|
||
|
const core::Tensor& intrinsics,
|
||
|
const core::Tensor& extrinsics,
|
||
|
int h,
|
||
|
int w,
|
||
|
int down_factor,
|
||
|
int64_t block_resolution,
|
||
|
float voxel_size,
|
||
|
float depth_min,
|
||
|
float depth_max,
|
||
|
core::Tensor& fragment_buffer) {
|
||
|
|
||
|
// TODO(wei): reserve it in a reusable buffer
|
||
|
|
||
|
// Every 2 channels: (min, max)
|
||
|
int h_down = h / down_factor;
|
||
|
int w_down = w / down_factor;
|
||
|
range_minmax_map = core::Tensor({h_down, w_down, 2}, core::Float32,
|
||
|
block_keys.GetDevice());
|
||
|
NDArrayIndexer range_map_indexer(range_minmax_map, 2);
|
||
|
|
||
|
// Every 6 channels: (v_min, u_min, v_max, u_max, z_min, z_max)
|
||
|
const int fragment_size = 16;
|
||
|
|
||
|
if (fragment_buffer.GetDataPtr() == 0 ||
|
||
|
fragment_buffer.NumElements() == 0) {
|
||
|
// Rough heuristic; should tend to overallocate
|
||
|
const int reserve_frag_buffer_size =
|
||
|
h_down * w_down / (fragment_size * fragment_size) / voxel_size;
|
||
|
fragment_buffer = core::Tensor({reserve_frag_buffer_size, 6},
|
||
|
core::Float32, block_keys.GetDevice());
|
||
|
}
|
||
|
|
||
|
const int frag_buffer_size = fragment_buffer.NumElements() / 6;
|
||
|
|
||
|
NDArrayIndexer frag_buffer_indexer(fragment_buffer, 1);
|
||
|
NDArrayIndexer block_keys_indexer(block_keys, 1);
|
||
|
TransformIndexer w2c_transform_indexer(intrinsics, extrinsics);
|
||
|
#if defined(__CUDACC__)
|
||
|
core::Tensor count(std::vector<int>{0}, {1}, core::Int32,
|
||
|
block_keys.GetDevice());
|
||
|
int* count_ptr = count.GetDataPtr<int>();
|
||
|
#else
|
||
|
std::atomic<int> count_atomic(0);
|
||
|
std::atomic<int>* count_ptr = &count_atomic;
|
||
|
#endif
|
||
|
|
||
|
#ifndef __CUDACC__
|
||
|
using std::max;
|
||
|
using std::min;
|
||
|
#endif
|
||
|
|
||
|
// Pass 0: iterate over blocks, fill-in an rendering fragment array
|
||
|
core::ParallelFor(
|
||
|
block_keys.GetDevice(), block_keys.GetLength(),
|
||
|
[=] OPEN3D_DEVICE(int64_t workload_idx) {
|
||
|
int* key = block_keys_indexer.GetDataPtr<int>(workload_idx);
|
||
|
|
||
|
int u_min = w_down - 1, v_min = h_down - 1, u_max = 0,
|
||
|
v_max = 0;
|
||
|
float z_min = depth_max, z_max = depth_min;
|
||
|
|
||
|
float xc, yc, zc, u, v;
|
||
|
|
||
|
// Project 8 corners to low-res image and form a rectangle
|
||
|
for (int i = 0; i < 8; ++i) {
|
||
|
float xw = (key[0] + ((i & 1) > 0)) * block_resolution *
|
||
|
voxel_size;
|
||
|
float yw = (key[1] + ((i & 2) > 0)) * block_resolution *
|
||
|
voxel_size;
|
||
|
float zw = (key[2] + ((i & 4) > 0)) * block_resolution *
|
||
|
voxel_size;
|
||
|
|
||
|
w2c_transform_indexer.RigidTransform(xw, yw, zw, &xc, &yc,
|
||
|
&zc);
|
||
|
if (zc <= 0) continue;
|
||
|
|
||
|
// Project to the down sampled image buffer
|
||
|
w2c_transform_indexer.Project(xc, yc, zc, &u, &v);
|
||
|
u /= down_factor;
|
||
|
v /= down_factor;
|
||
|
|
||
|
v_min = min(static_cast<int>(floorf(v)), v_min);
|
||
|
v_max = max(static_cast<int>(ceilf(v)), v_max);
|
||
|
|
||
|
u_min = min(static_cast<int>(floorf(u)), u_min);
|
||
|
u_max = max(static_cast<int>(ceilf(u)), u_max);
|
||
|
|
||
|
z_min = min(z_min, zc);
|
||
|
z_max = max(z_max, zc);
|
||
|
}
|
||
|
|
||
|
v_min = max(0, v_min);
|
||
|
v_max = min(h_down - 1, v_max);
|
||
|
|
||
|
u_min = max(0, u_min);
|
||
|
u_max = min(w_down - 1, u_max);
|
||
|
|
||
|
if (v_min >= v_max || u_min >= u_max || z_min >= z_max) return;
|
||
|
|
||
|
// Divide the rectangle into small 16x16 fragments
|
||
|
int frag_v_count =
|
||
|
ceilf(float(v_max - v_min + 1) / float(fragment_size));
|
||
|
int frag_u_count =
|
||
|
ceilf(float(u_max - u_min + 1) / float(fragment_size));
|
||
|
|
||
|
int frag_count = frag_v_count * frag_u_count;
|
||
|
int frag_count_start = OPEN3D_ATOMIC_ADD(count_ptr, frag_count);
|
||
|
int frag_count_end = frag_count_start + frag_count;
|
||
|
if (frag_count_end >= frag_buffer_size) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
int offset = 0;
|
||
|
for (int frag_v = 0; frag_v < frag_v_count; ++frag_v) {
|
||
|
for (int frag_u = 0; frag_u < frag_u_count;
|
||
|
++frag_u, ++offset) {
|
||
|
float* frag_ptr = frag_buffer_indexer.GetDataPtr<float>(
|
||
|
frag_count_start + offset);
|
||
|
// zmin, zmax
|
||
|
frag_ptr[0] = z_min;
|
||
|
frag_ptr[1] = z_max;
|
||
|
|
||
|
// vmin, umin
|
||
|
frag_ptr[2] = v_min + frag_v * fragment_size;
|
||
|
frag_ptr[3] = u_min + frag_u * fragment_size;
|
||
|
|
||
|
// vmax, umax
|
||
|
frag_ptr[4] = min(frag_ptr[2] + fragment_size - 1,
|
||
|
static_cast<float>(v_max));
|
||
|
frag_ptr[5] = min(frag_ptr[3] + fragment_size - 1,
|
||
|
static_cast<float>(u_max));
|
||
|
}
|
||
|
}
|
||
|
});
|
||
|
#if defined(__CUDACC__)
|
||
|
int needed_frag_count = count[0].Item<int>();
|
||
|
#else
|
||
|
int needed_frag_count = (*count_ptr).load();
|
||
|
#endif
|
||
|
|
||
|
int frag_count = needed_frag_count;
|
||
|
if (frag_count >= frag_buffer_size) {
|
||
|
utility::LogWarning(
|
||
|
"Could not generate full range map; allocated {} fragments but "
|
||
|
"needed {}",
|
||
|
frag_buffer_size, frag_count);
|
||
|
frag_count = frag_buffer_size - 1;
|
||
|
} else {
|
||
|
utility::LogDebug("EstimateRange Allocated {} fragments and needed {}",
|
||
|
frag_buffer_size, frag_count);
|
||
|
}
|
||
|
|
||
|
// Pass 0.5: Fill in range map to prepare for atomic min/max
|
||
|
core::ParallelFor(block_keys.GetDevice(), h_down * w_down,
|
||
|
[=] OPEN3D_DEVICE(int64_t workload_idx) {
|
||
|
int v = workload_idx / w_down;
|
||
|
int u = workload_idx % w_down;
|
||
|
float* range_ptr =
|
||
|
range_map_indexer.GetDataPtr<float>(u, v);
|
||
|
range_ptr[0] = depth_max;
|
||
|
range_ptr[1] = depth_min;
|
||
|
});
|
||
|
|
||
|
// Pass 1: iterate over rendering fragment array, fill-in range
|
||
|
core::ParallelFor(
|
||
|
block_keys.GetDevice(), frag_count * fragment_size * fragment_size,
|
||
|
[=] OPEN3D_DEVICE(int64_t workload_idx) {
|
||
|
int frag_idx = workload_idx / (fragment_size * fragment_size);
|
||
|
int local_idx = workload_idx % (fragment_size * fragment_size);
|
||
|
int dv = local_idx / fragment_size;
|
||
|
int du = local_idx % fragment_size;
|
||
|
|
||
|
float* frag_ptr =
|
||
|
frag_buffer_indexer.GetDataPtr<float>(frag_idx);
|
||
|
int v_min = static_cast<int>(frag_ptr[2]);
|
||
|
int u_min = static_cast<int>(frag_ptr[3]);
|
||
|
int v_max = static_cast<int>(frag_ptr[4]);
|
||
|
int u_max = static_cast<int>(frag_ptr[5]);
|
||
|
|
||
|
int v = v_min + dv;
|
||
|
int u = u_min + du;
|
||
|
if (v > v_max || u > u_max) return;
|
||
|
|
||
|
float z_min = frag_ptr[0];
|
||
|
float z_max = frag_ptr[1];
|
||
|
float* range_ptr = range_map_indexer.GetDataPtr<float>(u, v);
|
||
|
#ifdef __CUDACC__
|
||
|
atomicMinf(&(range_ptr[0]), z_min);
|
||
|
atomicMaxf(&(range_ptr[1]), z_max);
|
||
|
#else
|
||
|
#pragma omp critical(EstimateRangeCPU)
|
||
|
{
|
||
|
range_ptr[0] = min(z_min, range_ptr[0]);
|
||
|
range_ptr[1] = max(z_max, range_ptr[1]);
|
||
|
}
|
||
|
#endif
|
||
|
});
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
core::cuda::Synchronize();
|
||
|
#endif
|
||
|
|
||
|
if (needed_frag_count != frag_count) {
|
||
|
utility::LogInfo("Reallocating {} fragments for EstimateRange (was {})",
|
||
|
needed_frag_count, frag_count);
|
||
|
|
||
|
fragment_buffer = core::Tensor({needed_frag_count, 6}, core::Float32,
|
||
|
block_keys.GetDevice());
|
||
|
}
|
||
|
}
|
||
|
|
||
|
struct MiniVecCache {
|
||
|
index_t x;
|
||
|
index_t y;
|
||
|
index_t z;
|
||
|
index_t block_idx;
|
||
|
|
||
|
inline index_t OPEN3D_DEVICE Check(index_t xin, index_t yin, index_t zin) {
|
||
|
return (xin == x && yin == y && zin == z) ? block_idx : -1;
|
||
|
}
|
||
|
|
||
|
inline void OPEN3D_DEVICE Update(index_t xin,
|
||
|
index_t yin,
|
||
|
index_t zin,
|
||
|
index_t block_idx_in) {
|
||
|
x = xin;
|
||
|
y = yin;
|
||
|
z = zin;
|
||
|
block_idx = block_idx_in;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
template <typename tsdf_t, typename weight_t, typename color_t>
|
||
|
#if defined(__CUDACC__)
|
||
|
void RayCastCUDA
|
||
|
#else
|
||
|
void RayCastCPU
|
||
|
#endif
|
||
|
(std::shared_ptr<core::HashMap>& hashmap,
|
||
|
const TensorMap& block_value_map,
|
||
|
const core::Tensor& range,
|
||
|
TensorMap& renderings_map,
|
||
|
const core::Tensor& intrinsic,
|
||
|
const core::Tensor& extrinsics,
|
||
|
index_t h,
|
||
|
index_t w,
|
||
|
index_t block_resolution,
|
||
|
float voxel_size,
|
||
|
float depth_scale,
|
||
|
float depth_min,
|
||
|
float depth_max,
|
||
|
float weight_threshold,
|
||
|
float trunc_voxel_multiplier,
|
||
|
int range_map_down_factor) {
|
||
|
using Key = utility::MiniVec<index_t, 3>;
|
||
|
using Hash = utility::MiniVecHash<index_t, 3>;
|
||
|
using Eq = utility::MiniVecEq<index_t, 3>;
|
||
|
|
||
|
auto device_hashmap = hashmap->GetDeviceHashBackend();
|
||
|
#if defined(__CUDACC__)
|
||
|
auto cuda_hashmap =
|
||
|
std::dynamic_pointer_cast<core::StdGPUHashBackend<Key, Hash, Eq>>(
|
||
|
device_hashmap);
|
||
|
if (cuda_hashmap == nullptr) {
|
||
|
utility::LogError(
|
||
|
"Unsupported backend: CUDA raycasting only supports STDGPU.");
|
||
|
}
|
||
|
auto hashmap_impl = cuda_hashmap->GetImpl();
|
||
|
#else
|
||
|
auto cpu_hashmap =
|
||
|
std::dynamic_pointer_cast<core::TBBHashBackend<Key, Hash, Eq>>(
|
||
|
device_hashmap);
|
||
|
if (cpu_hashmap == nullptr) {
|
||
|
utility::LogError(
|
||
|
"Unsupported backend: CPU raycasting only supports TBB.");
|
||
|
}
|
||
|
auto hashmap_impl = *cpu_hashmap->GetImpl();
|
||
|
#endif
|
||
|
|
||
|
core::Device device = hashmap->GetDevice();
|
||
|
|
||
|
ArrayIndexer range_indexer(range, 2);
|
||
|
|
||
|
// Geometry
|
||
|
ArrayIndexer depth_indexer;
|
||
|
ArrayIndexer vertex_indexer;
|
||
|
ArrayIndexer normal_indexer;
|
||
|
|
||
|
// Diff rendering
|
||
|
ArrayIndexer index_indexer;
|
||
|
ArrayIndexer mask_indexer;
|
||
|
ArrayIndexer interp_ratio_indexer;
|
||
|
ArrayIndexer interp_ratio_dx_indexer;
|
||
|
ArrayIndexer interp_ratio_dy_indexer;
|
||
|
ArrayIndexer interp_ratio_dz_indexer;
|
||
|
|
||
|
// Color
|
||
|
ArrayIndexer color_indexer;
|
||
|
|
||
|
if (!block_value_map.Contains("tsdf") ||
|
||
|
!block_value_map.Contains("weight")) {
|
||
|
utility::LogError(
|
||
|
"TSDF and/or weight not allocated in blocks, please implement "
|
||
|
"customized integration.");
|
||
|
}
|
||
|
const tsdf_t* tsdf_base_ptr =
|
||
|
block_value_map.at("tsdf").GetDataPtr<tsdf_t>();
|
||
|
const weight_t* weight_base_ptr =
|
||
|
block_value_map.at("weight").GetDataPtr<weight_t>();
|
||
|
|
||
|
// Geometry
|
||
|
if (renderings_map.Contains("depth")) {
|
||
|
depth_indexer = ArrayIndexer(renderings_map.at("depth"), 2);
|
||
|
}
|
||
|
if (renderings_map.Contains("vertex")) {
|
||
|
vertex_indexer = ArrayIndexer(renderings_map.at("vertex"), 2);
|
||
|
}
|
||
|
if (renderings_map.Contains("normal")) {
|
||
|
normal_indexer = ArrayIndexer(renderings_map.at("normal"), 2);
|
||
|
}
|
||
|
|
||
|
// Diff rendering
|
||
|
if (renderings_map.Contains("index")) {
|
||
|
index_indexer = ArrayIndexer(renderings_map.at("index"), 2);
|
||
|
}
|
||
|
if (renderings_map.Contains("mask")) {
|
||
|
mask_indexer = ArrayIndexer(renderings_map.at("mask"), 2);
|
||
|
}
|
||
|
if (renderings_map.Contains("interp_ratio")) {
|
||
|
interp_ratio_indexer =
|
||
|
ArrayIndexer(renderings_map.at("interp_ratio"), 2);
|
||
|
}
|
||
|
if (renderings_map.Contains("interp_ratio_dx")) {
|
||
|
interp_ratio_dx_indexer =
|
||
|
ArrayIndexer(renderings_map.at("interp_ratio_dx"), 2);
|
||
|
}
|
||
|
if (renderings_map.Contains("interp_ratio_dy")) {
|
||
|
interp_ratio_dy_indexer =
|
||
|
ArrayIndexer(renderings_map.at("interp_ratio_dy"), 2);
|
||
|
}
|
||
|
if (renderings_map.Contains("interp_ratio_dz")) {
|
||
|
interp_ratio_dz_indexer =
|
||
|
ArrayIndexer(renderings_map.at("interp_ratio_dz"), 2);
|
||
|
}
|
||
|
|
||
|
// Color
|
||
|
bool render_color = false;
|
||
|
if (block_value_map.Contains("color") && renderings_map.Contains("color")) {
|
||
|
render_color = true;
|
||
|
color_indexer = ArrayIndexer(renderings_map.at("color"), 2);
|
||
|
}
|
||
|
const color_t* color_base_ptr =
|
||
|
render_color ? block_value_map.at("color").GetDataPtr<color_t>()
|
||
|
: nullptr;
|
||
|
|
||
|
bool visit_neighbors = render_color || normal_indexer.GetDataPtr() ||
|
||
|
mask_indexer.GetDataPtr() ||
|
||
|
index_indexer.GetDataPtr() ||
|
||
|
interp_ratio_indexer.GetDataPtr() ||
|
||
|
interp_ratio_dx_indexer.GetDataPtr() ||
|
||
|
interp_ratio_dy_indexer.GetDataPtr() ||
|
||
|
interp_ratio_dz_indexer.GetDataPtr();
|
||
|
|
||
|
TransformIndexer c2w_transform_indexer(
|
||
|
intrinsic, t::geometry::InverseTransformation(extrinsics));
|
||
|
TransformIndexer w2c_transform_indexer(intrinsic, extrinsics);
|
||
|
|
||
|
index_t rows = h;
|
||
|
index_t cols = w;
|
||
|
index_t n = rows * cols;
|
||
|
|
||
|
float block_size = voxel_size * block_resolution;
|
||
|
index_t resolution2 = block_resolution * block_resolution;
|
||
|
index_t resolution3 = resolution2 * block_resolution;
|
||
|
|
||
|
#ifndef __CUDACC__
|
||
|
using std::max;
|
||
|
using std::sqrt;
|
||
|
#endif
|
||
|
|
||
|
core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t workload_idx) {
|
||
|
auto GetLinearIdxAtP = [&] OPEN3D_DEVICE(
|
||
|
index_t x_b, index_t y_b, index_t z_b,
|
||
|
index_t x_v, index_t y_v, index_t z_v,
|
||
|
core::buf_index_t block_buf_idx,
|
||
|
MiniVecCache & cache) -> index_t {
|
||
|
index_t x_vn = (x_v + block_resolution) % block_resolution;
|
||
|
index_t y_vn = (y_v + block_resolution) % block_resolution;
|
||
|
index_t z_vn = (z_v + block_resolution) % block_resolution;
|
||
|
|
||
|
index_t dx_b = Sign(x_v - x_vn);
|
||
|
index_t dy_b = Sign(y_v - y_vn);
|
||
|
index_t dz_b = Sign(z_v - z_vn);
|
||
|
|
||
|
if (dx_b == 0 && dy_b == 0 && dz_b == 0) {
|
||
|
return block_buf_idx * resolution3 + z_v * resolution2 +
|
||
|
y_v * block_resolution + x_v;
|
||
|
} else {
|
||
|
Key key(x_b + dx_b, y_b + dy_b, z_b + dz_b);
|
||
|
|
||
|
index_t block_buf_idx = cache.Check(key[0], key[1], key[2]);
|
||
|
if (block_buf_idx < 0) {
|
||
|
auto iter = hashmap_impl.find(key);
|
||
|
if (iter == hashmap_impl.end()) return -1;
|
||
|
block_buf_idx = iter->second;
|
||
|
cache.Update(key[0], key[1], key[2], block_buf_idx);
|
||
|
}
|
||
|
|
||
|
return block_buf_idx * resolution3 + z_vn * resolution2 +
|
||
|
y_vn * block_resolution + x_vn;
|
||
|
}
|
||
|
};
|
||
|
|
||
|
auto GetLinearIdxAtT = [&] OPEN3D_DEVICE(
|
||
|
float x_o, float y_o, float z_o,
|
||
|
float x_d, float y_d, float z_d, float t,
|
||
|
MiniVecCache& cache) -> index_t {
|
||
|
float x_g = x_o + t * x_d;
|
||
|
float y_g = y_o + t * y_d;
|
||
|
float z_g = z_o + t * z_d;
|
||
|
|
||
|
// MiniVec coordinate and look up
|
||
|
index_t x_b = static_cast<index_t>(floorf(x_g / block_size));
|
||
|
index_t y_b = static_cast<index_t>(floorf(y_g / block_size));
|
||
|
index_t z_b = static_cast<index_t>(floorf(z_g / block_size));
|
||
|
|
||
|
Key key(x_b, y_b, z_b);
|
||
|
index_t block_buf_idx = cache.Check(x_b, y_b, z_b);
|
||
|
if (block_buf_idx < 0) {
|
||
|
auto iter = hashmap_impl.find(key);
|
||
|
if (iter == hashmap_impl.end()) return -1;
|
||
|
block_buf_idx = iter->second;
|
||
|
cache.Update(x_b, y_b, z_b, block_buf_idx);
|
||
|
}
|
||
|
|
||
|
// Voxel coordinate and look up
|
||
|
index_t x_v = index_t((x_g - x_b * block_size) / voxel_size);
|
||
|
index_t y_v = index_t((y_g - y_b * block_size) / voxel_size);
|
||
|
index_t z_v = index_t((z_g - z_b * block_size) / voxel_size);
|
||
|
|
||
|
return block_buf_idx * resolution3 + z_v * resolution2 +
|
||
|
y_v * block_resolution + x_v;
|
||
|
};
|
||
|
|
||
|
index_t y = workload_idx / cols;
|
||
|
index_t x = workload_idx % cols;
|
||
|
|
||
|
const float* range = range_indexer.GetDataPtr<float>(
|
||
|
x / range_map_down_factor, y / range_map_down_factor);
|
||
|
|
||
|
float* depth_ptr = nullptr;
|
||
|
float* vertex_ptr = nullptr;
|
||
|
float* color_ptr = nullptr;
|
||
|
float* normal_ptr = nullptr;
|
||
|
|
||
|
int64_t* index_ptr = nullptr;
|
||
|
bool* mask_ptr = nullptr;
|
||
|
float* interp_ratio_ptr = nullptr;
|
||
|
float* interp_ratio_dx_ptr = nullptr;
|
||
|
float* interp_ratio_dy_ptr = nullptr;
|
||
|
float* interp_ratio_dz_ptr = nullptr;
|
||
|
|
||
|
if (vertex_indexer.GetDataPtr()) {
|
||
|
vertex_ptr = vertex_indexer.GetDataPtr<float>(x, y);
|
||
|
vertex_ptr[0] = 0;
|
||
|
vertex_ptr[1] = 0;
|
||
|
vertex_ptr[2] = 0;
|
||
|
}
|
||
|
if (depth_indexer.GetDataPtr()) {
|
||
|
depth_ptr = depth_indexer.GetDataPtr<float>(x, y);
|
||
|
depth_ptr[0] = 0;
|
||
|
}
|
||
|
if (normal_indexer.GetDataPtr()) {
|
||
|
normal_ptr = normal_indexer.GetDataPtr<float>(x, y);
|
||
|
normal_ptr[0] = 0;
|
||
|
normal_ptr[1] = 0;
|
||
|
normal_ptr[2] = 0;
|
||
|
}
|
||
|
|
||
|
if (mask_indexer.GetDataPtr()) {
|
||
|
mask_ptr = mask_indexer.GetDataPtr<bool>(x, y);
|
||
|
#ifdef __CUDACC__
|
||
|
#pragma unroll
|
||
|
#endif
|
||
|
for (int i = 0; i < 8; ++i) {
|
||
|
mask_ptr[i] = false;
|
||
|
}
|
||
|
}
|
||
|
if (index_indexer.GetDataPtr()) {
|
||
|
index_ptr = index_indexer.GetDataPtr<int64_t>(x, y);
|
||
|
#ifdef __CUDACC__
|
||
|
#pragma unroll
|
||
|
#endif
|
||
|
for (int i = 0; i < 8; ++i) {
|
||
|
index_ptr[i] = 0;
|
||
|
}
|
||
|
}
|
||
|
if (interp_ratio_indexer.GetDataPtr()) {
|
||
|
interp_ratio_ptr = interp_ratio_indexer.GetDataPtr<float>(x, y);
|
||
|
#ifdef __CUDACC__
|
||
|
#pragma unroll
|
||
|
#endif
|
||
|
for (int i = 0; i < 8; ++i) {
|
||
|
interp_ratio_ptr[i] = 0;
|
||
|
}
|
||
|
}
|
||
|
if (interp_ratio_dx_indexer.GetDataPtr()) {
|
||
|
interp_ratio_dx_ptr =
|
||
|
interp_ratio_dx_indexer.GetDataPtr<float>(x, y);
|
||
|
#ifdef __CUDACC__
|
||
|
#pragma unroll
|
||
|
#endif
|
||
|
for (int i = 0; i < 8; ++i) {
|
||
|
interp_ratio_dx_ptr[i] = 0;
|
||
|
}
|
||
|
}
|
||
|
if (interp_ratio_dy_indexer.GetDataPtr()) {
|
||
|
interp_ratio_dy_ptr =
|
||
|
interp_ratio_dy_indexer.GetDataPtr<float>(x, y);
|
||
|
#ifdef __CUDACC__
|
||
|
#pragma unroll
|
||
|
#endif
|
||
|
for (int i = 0; i < 8; ++i) {
|
||
|
interp_ratio_dy_ptr[i] = 0;
|
||
|
}
|
||
|
}
|
||
|
if (interp_ratio_dz_indexer.GetDataPtr()) {
|
||
|
interp_ratio_dz_ptr =
|
||
|
interp_ratio_dz_indexer.GetDataPtr<float>(x, y);
|
||
|
#ifdef __CUDACC__
|
||
|
#pragma unroll
|
||
|
#endif
|
||
|
for (int i = 0; i < 8; ++i) {
|
||
|
interp_ratio_dz_ptr[i] = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (color_indexer.GetDataPtr()) {
|
||
|
color_ptr = color_indexer.GetDataPtr<float>(x, y);
|
||
|
color_ptr[0] = 0;
|
||
|
color_ptr[1] = 0;
|
||
|
color_ptr[2] = 0;
|
||
|
}
|
||
|
|
||
|
float t = range[0];
|
||
|
const float t_max = range[1];
|
||
|
if (t >= t_max) return;
|
||
|
|
||
|
// Coordinates in camera and global
|
||
|
float x_c = 0, y_c = 0, z_c = 0;
|
||
|
float x_g = 0, y_g = 0, z_g = 0;
|
||
|
float x_o = 0, y_o = 0, z_o = 0;
|
||
|
|
||
|
// Iterative ray intersection check
|
||
|
float t_prev = t;
|
||
|
|
||
|
float tsdf_prev = -1.0f;
|
||
|
float tsdf = 1.0;
|
||
|
float sdf_trunc = voxel_size * trunc_voxel_multiplier;
|
||
|
float w = 0.0;
|
||
|
|
||
|
// Camera origin
|
||
|
c2w_transform_indexer.RigidTransform(0, 0, 0, &x_o, &y_o, &z_o);
|
||
|
|
||
|
// Direction
|
||
|
c2w_transform_indexer.Unproject(static_cast<float>(x),
|
||
|
static_cast<float>(y), 1.0f, &x_c, &y_c,
|
||
|
&z_c);
|
||
|
c2w_transform_indexer.RigidTransform(x_c, y_c, z_c, &x_g, &y_g, &z_g);
|
||
|
float x_d = (x_g - x_o);
|
||
|
float y_d = (y_g - y_o);
|
||
|
float z_d = (z_g - z_o);
|
||
|
|
||
|
MiniVecCache cache{0, 0, 0, -1};
|
||
|
bool surface_found = false;
|
||
|
while (t < t_max) {
|
||
|
index_t linear_idx =
|
||
|
GetLinearIdxAtT(x_o, y_o, z_o, x_d, y_d, z_d, t, cache);
|
||
|
|
||
|
if (linear_idx < 0) {
|
||
|
t_prev = t;
|
||
|
t += block_size;
|
||
|
} else {
|
||
|
tsdf_prev = tsdf;
|
||
|
tsdf = tsdf_base_ptr[linear_idx];
|
||
|
w = weight_base_ptr[linear_idx];
|
||
|
if (tsdf_prev > 0 && w >= weight_threshold && tsdf <= 0) {
|
||
|
surface_found = true;
|
||
|
break;
|
||
|
}
|
||
|
t_prev = t;
|
||
|
float delta = tsdf * sdf_trunc;
|
||
|
t += delta < voxel_size ? voxel_size : delta;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (surface_found) {
|
||
|
float t_intersect =
|
||
|
(t * tsdf_prev - t_prev * tsdf) / (tsdf_prev - tsdf);
|
||
|
x_g = x_o + t_intersect * x_d;
|
||
|
y_g = y_o + t_intersect * y_d;
|
||
|
z_g = z_o + t_intersect * z_d;
|
||
|
|
||
|
// Trivial vertex assignment
|
||
|
if (depth_ptr) {
|
||
|
*depth_ptr = t_intersect * depth_scale;
|
||
|
}
|
||
|
if (vertex_ptr) {
|
||
|
w2c_transform_indexer.RigidTransform(
|
||
|
x_g, y_g, z_g, vertex_ptr + 0, vertex_ptr + 1,
|
||
|
vertex_ptr + 2);
|
||
|
}
|
||
|
if (!visit_neighbors) return;
|
||
|
|
||
|
// Trilinear interpolation
|
||
|
// TODO(wei): simplify the flow by splitting the
|
||
|
// functions given what is enabled
|
||
|
index_t x_b = static_cast<index_t>(floorf(x_g / block_size));
|
||
|
index_t y_b = static_cast<index_t>(floorf(y_g / block_size));
|
||
|
index_t z_b = static_cast<index_t>(floorf(z_g / block_size));
|
||
|
float x_v = (x_g - float(x_b) * block_size) / voxel_size;
|
||
|
float y_v = (y_g - float(y_b) * block_size) / voxel_size;
|
||
|
float z_v = (z_g - float(z_b) * block_size) / voxel_size;
|
||
|
|
||
|
Key key(x_b, y_b, z_b);
|
||
|
|
||
|
index_t block_buf_idx = cache.Check(x_b, y_b, z_b);
|
||
|
if (block_buf_idx < 0) {
|
||
|
auto iter = hashmap_impl.find(key);
|
||
|
if (iter == hashmap_impl.end()) return;
|
||
|
block_buf_idx = iter->second;
|
||
|
cache.Update(x_b, y_b, z_b, block_buf_idx);
|
||
|
}
|
||
|
|
||
|
index_t x_v_floor = static_cast<index_t>(floorf(x_v));
|
||
|
index_t y_v_floor = static_cast<index_t>(floorf(y_v));
|
||
|
index_t z_v_floor = static_cast<index_t>(floorf(z_v));
|
||
|
|
||
|
float ratio_x = x_v - float(x_v_floor);
|
||
|
float ratio_y = y_v - float(y_v_floor);
|
||
|
float ratio_z = z_v - float(z_v_floor);
|
||
|
|
||
|
float sum_r = 0.0;
|
||
|
for (index_t k = 0; k < 8; ++k) {
|
||
|
index_t dx_v = (k & 1) > 0 ? 1 : 0;
|
||
|
index_t dy_v = (k & 2) > 0 ? 1 : 0;
|
||
|
index_t dz_v = (k & 4) > 0 ? 1 : 0;
|
||
|
|
||
|
index_t linear_idx_k = GetLinearIdxAtP(
|
||
|
x_b, y_b, z_b, x_v_floor + dx_v, y_v_floor + dy_v,
|
||
|
z_v_floor + dz_v, block_buf_idx, cache);
|
||
|
|
||
|
if (linear_idx_k >= 0 && weight_base_ptr[linear_idx_k] > 0) {
|
||
|
float rx = dx_v * (ratio_x) + (1 - dx_v) * (1 - ratio_x);
|
||
|
float ry = dy_v * (ratio_y) + (1 - dy_v) * (1 - ratio_y);
|
||
|
float rz = dz_v * (ratio_z) + (1 - dz_v) * (1 - ratio_z);
|
||
|
float r = rx * ry * rz;
|
||
|
|
||
|
if (interp_ratio_ptr) {
|
||
|
interp_ratio_ptr[k] = r;
|
||
|
}
|
||
|
if (mask_ptr) {
|
||
|
mask_ptr[k] = true;
|
||
|
}
|
||
|
if (index_ptr) {
|
||
|
index_ptr[k] = linear_idx_k;
|
||
|
}
|
||
|
|
||
|
float tsdf_k = tsdf_base_ptr[linear_idx_k];
|
||
|
float interp_ratio_dx = ry * rz * (2 * dx_v - 1);
|
||
|
float interp_ratio_dy = rx * rz * (2 * dy_v - 1);
|
||
|
float interp_ratio_dz = rx * ry * (2 * dz_v - 1);
|
||
|
|
||
|
if (interp_ratio_dx_ptr) {
|
||
|
interp_ratio_dx_ptr[k] = interp_ratio_dx;
|
||
|
}
|
||
|
if (interp_ratio_dy_ptr) {
|
||
|
interp_ratio_dy_ptr[k] = interp_ratio_dy;
|
||
|
}
|
||
|
if (interp_ratio_dz_ptr) {
|
||
|
interp_ratio_dz_ptr[k] = interp_ratio_dz;
|
||
|
}
|
||
|
|
||
|
if (normal_ptr) {
|
||
|
normal_ptr[0] += interp_ratio_dx * tsdf_k;
|
||
|
normal_ptr[1] += interp_ratio_dy * tsdf_k;
|
||
|
normal_ptr[2] += interp_ratio_dz * tsdf_k;
|
||
|
}
|
||
|
|
||
|
if (color_ptr) {
|
||
|
index_t color_linear_idx = linear_idx_k * 3;
|
||
|
color_ptr[0] +=
|
||
|
r * color_base_ptr[color_linear_idx + 0];
|
||
|
color_ptr[1] +=
|
||
|
r * color_base_ptr[color_linear_idx + 1];
|
||
|
color_ptr[2] +=
|
||
|
r * color_base_ptr[color_linear_idx + 2];
|
||
|
}
|
||
|
|
||
|
sum_r += r;
|
||
|
}
|
||
|
} // loop over 8 neighbors
|
||
|
|
||
|
if (sum_r > 0) {
|
||
|
sum_r *= 255.0;
|
||
|
if (color_ptr) {
|
||
|
color_ptr[0] /= sum_r;
|
||
|
color_ptr[1] /= sum_r;
|
||
|
color_ptr[2] /= sum_r;
|
||
|
}
|
||
|
|
||
|
if (normal_ptr) {
|
||
|
constexpr float EPSILON = 1e-5f;
|
||
|
float norm = sqrt(normal_ptr[0] * normal_ptr[0] +
|
||
|
normal_ptr[1] * normal_ptr[1] +
|
||
|
normal_ptr[2] * normal_ptr[2]);
|
||
|
norm = std::max(norm, EPSILON);
|
||
|
w2c_transform_indexer.Rotate(
|
||
|
-normal_ptr[0] / norm, -normal_ptr[1] / norm,
|
||
|
-normal_ptr[2] / norm, normal_ptr + 0,
|
||
|
normal_ptr + 1, normal_ptr + 2);
|
||
|
}
|
||
|
}
|
||
|
} // surface-found
|
||
|
});
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
core::cuda::Synchronize();
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
template <typename tsdf_t, typename weight_t, typename color_t>
|
||
|
#if defined(__CUDACC__)
|
||
|
void ExtractPointCloudCUDA
|
||
|
#else
|
||
|
void ExtractPointCloudCPU
|
||
|
#endif
|
||
|
(const core::Tensor& indices,
|
||
|
const core::Tensor& nb_indices,
|
||
|
const core::Tensor& nb_masks,
|
||
|
const core::Tensor& block_keys,
|
||
|
const TensorMap& block_value_map,
|
||
|
core::Tensor& points,
|
||
|
core::Tensor& normals,
|
||
|
core::Tensor& colors,
|
||
|
index_t resolution,
|
||
|
float voxel_size,
|
||
|
float weight_threshold,
|
||
|
int& valid_size) {
|
||
|
core::Device device = block_keys.GetDevice();
|
||
|
|
||
|
// Parameters
|
||
|
index_t resolution2 = resolution * resolution;
|
||
|
index_t resolution3 = resolution2 * resolution;
|
||
|
|
||
|
// Shape / transform indexers, no data involved
|
||
|
ArrayIndexer voxel_indexer({resolution, resolution, resolution});
|
||
|
|
||
|
// Real data indexer
|
||
|
ArrayIndexer block_keys_indexer(block_keys, 1);
|
||
|
ArrayIndexer nb_block_masks_indexer(nb_masks, 2);
|
||
|
ArrayIndexer nb_block_indices_indexer(nb_indices, 2);
|
||
|
|
||
|
// Plain arrays that does not require indexers
|
||
|
const index_t* indices_ptr = indices.GetDataPtr<index_t>();
|
||
|
|
||
|
if (!block_value_map.Contains("tsdf") ||
|
||
|
!block_value_map.Contains("weight")) {
|
||
|
utility::LogError(
|
||
|
"TSDF and/or weight not allocated in blocks, please implement "
|
||
|
"customized integration.");
|
||
|
}
|
||
|
const tsdf_t* tsdf_base_ptr =
|
||
|
block_value_map.at("tsdf").GetDataPtr<tsdf_t>();
|
||
|
const weight_t* weight_base_ptr =
|
||
|
block_value_map.at("weight").GetDataPtr<weight_t>();
|
||
|
const color_t* color_base_ptr = nullptr;
|
||
|
if (block_value_map.Contains("color")) {
|
||
|
color_base_ptr = block_value_map.at("color").GetDataPtr<color_t>();
|
||
|
}
|
||
|
|
||
|
index_t n_blocks = indices.GetLength();
|
||
|
index_t n = n_blocks * resolution3;
|
||
|
|
||
|
// Output
|
||
|
#if defined(__CUDACC__)
|
||
|
core::Tensor count(std::vector<index_t>{0}, {1}, core::Int32,
|
||
|
block_keys.GetDevice());
|
||
|
index_t* count_ptr = count.GetDataPtr<index_t>();
|
||
|
#else
|
||
|
std::atomic<index_t> count_atomic(0);
|
||
|
std::atomic<index_t>* count_ptr = &count_atomic;
|
||
|
#endif
|
||
|
|
||
|
if (valid_size < 0) {
|
||
|
utility::LogDebug(
|
||
|
"No estimated max point cloud size provided, using a 2-pass "
|
||
|
"estimation. Surface extraction could be slow.");
|
||
|
// This pass determines valid number of points.
|
||
|
|
||
|
core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t workload_idx) {
|
||
|
auto GetLinearIdx = [&] OPEN3D_DEVICE(
|
||
|
index_t xo, index_t yo, index_t zo,
|
||
|
index_t curr_block_idx) -> index_t {
|
||
|
return DeviceGetLinearIdx(xo, yo, zo, curr_block_idx,
|
||
|
resolution, nb_block_masks_indexer,
|
||
|
nb_block_indices_indexer);
|
||
|
};
|
||
|
|
||
|
// Natural index (0, N) -> (block_idx,
|
||
|
// voxel_idx)
|
||
|
index_t workload_block_idx = workload_idx / resolution3;
|
||
|
index_t block_idx = indices_ptr[workload_block_idx];
|
||
|
index_t voxel_idx = workload_idx % resolution3;
|
||
|
|
||
|
// voxel_idx -> (x_voxel, y_voxel, z_voxel)
|
||
|
index_t xv, yv, zv;
|
||
|
voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
|
||
|
|
||
|
index_t linear_idx = block_idx * resolution3 + voxel_idx;
|
||
|
float tsdf_o = tsdf_base_ptr[linear_idx];
|
||
|
float weight_o = weight_base_ptr[linear_idx];
|
||
|
if (weight_o <= weight_threshold) return;
|
||
|
|
||
|
// Enumerate x-y-z directions
|
||
|
for (index_t i = 0; i < 3; ++i) {
|
||
|
index_t linear_idx_i =
|
||
|
GetLinearIdx(xv + (i == 0), yv + (i == 1),
|
||
|
zv + (i == 2), workload_block_idx);
|
||
|
if (linear_idx_i < 0) continue;
|
||
|
|
||
|
float tsdf_i = tsdf_base_ptr[linear_idx_i];
|
||
|
float weight_i = weight_base_ptr[linear_idx_i];
|
||
|
if (weight_i > weight_threshold && tsdf_i * tsdf_o < 0) {
|
||
|
OPEN3D_ATOMIC_ADD(count_ptr, 1);
|
||
|
}
|
||
|
}
|
||
|
});
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
valid_size = count[0].Item<index_t>();
|
||
|
count[0] = 0;
|
||
|
#else
|
||
|
valid_size = (*count_ptr).load();
|
||
|
(*count_ptr) = 0;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
if (points.GetLength() == 0) {
|
||
|
points = core::Tensor({valid_size, 3}, core::Float32, device);
|
||
|
}
|
||
|
ArrayIndexer point_indexer(points, 1);
|
||
|
|
||
|
// Normals
|
||
|
ArrayIndexer normal_indexer;
|
||
|
normals = core::Tensor({valid_size, 3}, core::Float32, device);
|
||
|
normal_indexer = ArrayIndexer(normals, 1);
|
||
|
|
||
|
// This pass extracts exact surface points.
|
||
|
|
||
|
// Colors
|
||
|
ArrayIndexer color_indexer;
|
||
|
if (color_base_ptr) {
|
||
|
colors = core::Tensor({valid_size, 3}, core::Float32, device);
|
||
|
color_indexer = ArrayIndexer(colors, 1);
|
||
|
}
|
||
|
|
||
|
core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t workload_idx) {
|
||
|
auto GetLinearIdx = [&] OPEN3D_DEVICE(
|
||
|
index_t xo, index_t yo, index_t zo,
|
||
|
index_t curr_block_idx) -> index_t {
|
||
|
return DeviceGetLinearIdx(xo, yo, zo, curr_block_idx, resolution,
|
||
|
nb_block_masks_indexer,
|
||
|
nb_block_indices_indexer);
|
||
|
};
|
||
|
|
||
|
auto GetNormal = [&] OPEN3D_DEVICE(index_t xo, index_t yo, index_t zo,
|
||
|
index_t curr_block_idx, float* n) {
|
||
|
return DeviceGetNormal<tsdf_t>(
|
||
|
tsdf_base_ptr, xo, yo, zo, curr_block_idx, n, resolution,
|
||
|
nb_block_masks_indexer, nb_block_indices_indexer);
|
||
|
};
|
||
|
|
||
|
// Natural index (0, N) -> (block_idx, voxel_idx)
|
||
|
index_t workload_block_idx = workload_idx / resolution3;
|
||
|
index_t block_idx = indices_ptr[workload_block_idx];
|
||
|
index_t voxel_idx = workload_idx % resolution3;
|
||
|
|
||
|
/// Coordinate transform
|
||
|
// block_idx -> (x_block, y_block, z_block)
|
||
|
index_t* block_key_ptr =
|
||
|
block_keys_indexer.GetDataPtr<index_t>(block_idx);
|
||
|
index_t xb = block_key_ptr[0];
|
||
|
index_t yb = block_key_ptr[1];
|
||
|
index_t zb = block_key_ptr[2];
|
||
|
|
||
|
// voxel_idx -> (x_voxel, y_voxel, z_voxel)
|
||
|
index_t xv, yv, zv;
|
||
|
voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
|
||
|
|
||
|
index_t linear_idx = block_idx * resolution3 + voxel_idx;
|
||
|
float tsdf_o = tsdf_base_ptr[linear_idx];
|
||
|
float weight_o = weight_base_ptr[linear_idx];
|
||
|
if (weight_o <= weight_threshold) return;
|
||
|
|
||
|
float no[3] = {0}, ne[3] = {0};
|
||
|
|
||
|
// Get normal at origin
|
||
|
GetNormal(xv, yv, zv, workload_block_idx, no);
|
||
|
|
||
|
index_t x = xb * resolution + xv;
|
||
|
index_t y = yb * resolution + yv;
|
||
|
index_t z = zb * resolution + zv;
|
||
|
|
||
|
// Enumerate x-y-z axis
|
||
|
for (index_t i = 0; i < 3; ++i) {
|
||
|
index_t linear_idx_i =
|
||
|
GetLinearIdx(xv + (i == 0), yv + (i == 1), zv + (i == 2),
|
||
|
workload_block_idx);
|
||
|
if (linear_idx_i < 0) continue;
|
||
|
|
||
|
float tsdf_i = tsdf_base_ptr[linear_idx_i];
|
||
|
float weight_i = weight_base_ptr[linear_idx_i];
|
||
|
if (weight_i > weight_threshold && tsdf_i * tsdf_o < 0) {
|
||
|
float ratio = (0 - tsdf_o) / (tsdf_i - tsdf_o);
|
||
|
|
||
|
index_t idx = OPEN3D_ATOMIC_ADD(count_ptr, 1);
|
||
|
if (idx >= valid_size) {
|
||
|
printf("Point cloud size larger than "
|
||
|
"estimated, please increase the "
|
||
|
"estimation!\n");
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
float* point_ptr = point_indexer.GetDataPtr<float>(idx);
|
||
|
point_ptr[0] = voxel_size * (x + ratio * int(i == 0));
|
||
|
point_ptr[1] = voxel_size * (y + ratio * int(i == 1));
|
||
|
point_ptr[2] = voxel_size * (z + ratio * int(i == 2));
|
||
|
|
||
|
// Get normal at edge and interpolate
|
||
|
float* normal_ptr = normal_indexer.GetDataPtr<float>(idx);
|
||
|
GetNormal(xv + (i == 0), yv + (i == 1), zv + (i == 2),
|
||
|
workload_block_idx, ne);
|
||
|
float nx = (1 - ratio) * no[0] + ratio * ne[0];
|
||
|
float ny = (1 - ratio) * no[1] + ratio * ne[1];
|
||
|
float nz = (1 - ratio) * no[2] + ratio * ne[2];
|
||
|
float norm = static_cast<float>(
|
||
|
sqrt(nx * nx + ny * ny + nz * nz) + 1e-5);
|
||
|
normal_ptr[0] = nx / norm;
|
||
|
normal_ptr[1] = ny / norm;
|
||
|
normal_ptr[2] = nz / norm;
|
||
|
|
||
|
if (color_base_ptr) {
|
||
|
float* color_ptr = color_indexer.GetDataPtr<float>(idx);
|
||
|
const color_t* color_o_ptr =
|
||
|
color_base_ptr + 3 * linear_idx;
|
||
|
float r_o = color_o_ptr[0];
|
||
|
float g_o = color_o_ptr[1];
|
||
|
float b_o = color_o_ptr[2];
|
||
|
|
||
|
const color_t* color_i_ptr =
|
||
|
color_base_ptr + 3 * linear_idx_i;
|
||
|
float r_i = color_i_ptr[0];
|
||
|
float g_i = color_i_ptr[1];
|
||
|
float b_i = color_i_ptr[2];
|
||
|
|
||
|
color_ptr[0] = ((1 - ratio) * r_o + ratio * r_i) / 255.0f;
|
||
|
color_ptr[1] = ((1 - ratio) * g_o + ratio * g_i) / 255.0f;
|
||
|
color_ptr[2] = ((1 - ratio) * b_o + ratio * b_i) / 255.0f;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
});
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
index_t total_count = count.Item<index_t>();
|
||
|
#else
|
||
|
index_t total_count = (*count_ptr).load();
|
||
|
#endif
|
||
|
|
||
|
utility::LogDebug("{} vertices extracted", total_count);
|
||
|
valid_size = total_count;
|
||
|
|
||
|
#if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__)
|
||
|
core::cuda::Synchronize();
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
template <typename tsdf_t, typename weight_t, typename color_t>
|
||
|
#if defined(__CUDACC__)
|
||
|
void ExtractTriangleMeshCUDA
|
||
|
#else
|
||
|
void ExtractTriangleMeshCPU
|
||
|
#endif
|
||
|
(const core::Tensor& block_indices,
|
||
|
const core::Tensor& inv_block_indices,
|
||
|
const core::Tensor& nb_block_indices,
|
||
|
const core::Tensor& nb_block_masks,
|
||
|
const core::Tensor& block_keys,
|
||
|
const TensorMap& block_value_map,
|
||
|
core::Tensor& vertices,
|
||
|
core::Tensor& triangles,
|
||
|
core::Tensor& vertex_normals,
|
||
|
core::Tensor& vertex_colors,
|
||
|
index_t block_resolution,
|
||
|
float voxel_size,
|
||
|
float weight_threshold,
|
||
|
index_t& vertex_count) {
|
||
|
core::Device device = block_indices.GetDevice();
|
||
|
|
||
|
index_t resolution = block_resolution;
|
||
|
index_t resolution3 = resolution * resolution * resolution;
|
||
|
|
||
|
// Shape / transform indexers, no data involved
|
||
|
ArrayIndexer voxel_indexer({resolution, resolution, resolution});
|
||
|
index_t n_blocks = static_cast<index_t>(block_indices.GetLength());
|
||
|
|
||
|
// TODO(wei): profile performance by replacing the table to a hashmap.
|
||
|
// Voxel-wise mesh info. 4 channels correspond to:
|
||
|
// 3 edges' corresponding vertex index + 1 table index.
|
||
|
core::Tensor mesh_structure;
|
||
|
try {
|
||
|
mesh_structure = core::Tensor::Zeros(
|
||
|
{n_blocks, resolution, resolution, resolution, 4}, core::Int32,
|
||
|
device);
|
||
|
} catch (const std::runtime_error&) {
|
||
|
utility::LogError(
|
||
|
"Unable to allocate assistance mesh structure for Marching "
|
||
|
"Cubes with {} active voxel blocks. Please consider using a "
|
||
|
"larger voxel size (currently {}) for TSDF integration, or "
|
||
|
"using tsdf_volume.cpu() to perform mesh extraction on CPU.",
|
||
|
n_blocks, voxel_size);
|
||
|
}
|
||
|
|
||
|
// Real data indexer
|
||
|
ArrayIndexer mesh_structure_indexer(mesh_structure, 4);
|
||
|
ArrayIndexer nb_block_masks_indexer(nb_block_masks, 2);
|
||
|
ArrayIndexer nb_block_indices_indexer(nb_block_indices, 2);
|
||
|
|
||
|
// Plain arrays that does not require indexers
|
||
|
const index_t* indices_ptr = block_indices.GetDataPtr<index_t>();
|
||
|
const index_t* inv_indices_ptr = inv_block_indices.GetDataPtr<index_t>();
|
||
|
|
||
|
if (!block_value_map.Contains("tsdf") ||
|
||
|
!block_value_map.Contains("weight")) {
|
||
|
utility::LogError(
|
||
|
"TSDF and/or weight not allocated in blocks, please implement "
|
||
|
"customized integration.");
|
||
|
}
|
||
|
const tsdf_t* tsdf_base_ptr =
|
||
|
block_value_map.at("tsdf").GetDataPtr<tsdf_t>();
|
||
|
const weight_t* weight_base_ptr =
|
||
|
block_value_map.at("weight").GetDataPtr<weight_t>();
|
||
|
const color_t* color_base_ptr = nullptr;
|
||
|
if (block_value_map.Contains("color")) {
|
||
|
color_base_ptr = block_value_map.at("color").GetDataPtr<color_t>();
|
||
|
}
|
||
|
|
||
|
index_t n = n_blocks * resolution3;
|
||
|
// Pass 0: analyze mesh structure, set up one-on-one correspondences
|
||
|
// from edges to vertices.
|
||
|
|
||
|
core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t widx) {
|
||
|
auto GetLinearIdx = [&] OPEN3D_DEVICE(
|
||
|
index_t xo, index_t yo, index_t zo,
|
||
|
index_t curr_block_idx) -> index_t {
|
||
|
return DeviceGetLinearIdx(xo, yo, zo, curr_block_idx,
|
||
|
static_cast<index_t>(resolution),
|
||
|
nb_block_masks_indexer,
|
||
|
nb_block_indices_indexer);
|
||
|
};
|
||
|
|
||
|
// Natural index (0, N) -> (block_idx, voxel_idx)
|
||
|
index_t workload_block_idx = widx / resolution3;
|
||
|
index_t voxel_idx = widx % resolution3;
|
||
|
|
||
|
// voxel_idx -> (x_voxel, y_voxel, z_voxel)
|
||
|
index_t xv, yv, zv;
|
||
|
voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
|
||
|
|
||
|
// Check per-vertex sign in the cube to determine cube
|
||
|
// type
|
||
|
index_t table_idx = 0;
|
||
|
for (index_t i = 0; i < 8; ++i) {
|
||
|
index_t linear_idx_i =
|
||
|
GetLinearIdx(xv + vtx_shifts[i][0], yv + vtx_shifts[i][1],
|
||
|
zv + vtx_shifts[i][2], workload_block_idx);
|
||
|
if (linear_idx_i < 0) return;
|
||
|
|
||
|
float tsdf_i = tsdf_base_ptr[linear_idx_i];
|
||
|
float weight_i = weight_base_ptr[linear_idx_i];
|
||
|
if (weight_i <= weight_threshold) return;
|
||
|
|
||
|
table_idx |= ((tsdf_i < 0) ? (1 << i) : 0);
|
||
|
}
|
||
|
|
||
|
index_t* mesh_struct_ptr = mesh_structure_indexer.GetDataPtr<index_t>(
|
||
|
xv, yv, zv, workload_block_idx);
|
||
|
mesh_struct_ptr[3] = table_idx;
|
||
|
|
||
|
if (table_idx == 0 || table_idx == 255) return;
|
||
|
|
||
|
// Check per-edge sign determine the cube type
|
||
|
index_t edges_with_vertices = edge_table[table_idx];
|
||
|
for (index_t i = 0; i < 12; ++i) {
|
||
|
if (edges_with_vertices & (1 << i)) {
|
||
|
index_t xv_i = xv + edge_shifts[i][0];
|
||
|
index_t yv_i = yv + edge_shifts[i][1];
|
||
|
index_t zv_i = zv + edge_shifts[i][2];
|
||
|
index_t edge_i = edge_shifts[i][3];
|
||
|
|
||
|
index_t dxb = xv_i / resolution;
|
||
|
index_t dyb = yv_i / resolution;
|
||
|
index_t dzb = zv_i / resolution;
|
||
|
|
||
|
index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
|
||
|
|
||
|
index_t block_idx_i =
|
||
|
*nb_block_indices_indexer.GetDataPtr<index_t>(
|
||
|
workload_block_idx, nb_idx);
|
||
|
index_t* mesh_ptr_i =
|
||
|
mesh_structure_indexer.GetDataPtr<index_t>(
|
||
|
xv_i - dxb * resolution,
|
||
|
yv_i - dyb * resolution,
|
||
|
zv_i - dzb * resolution,
|
||
|
inv_indices_ptr[block_idx_i]);
|
||
|
|
||
|
// Non-atomic write, but we are safe
|
||
|
mesh_ptr_i[edge_i] = -1;
|
||
|
}
|
||
|
}
|
||
|
});
|
||
|
|
||
|
// Pass 1: determine valid number of vertices (if not preset)
|
||
|
#if defined(__CUDACC__)
|
||
|
core::Tensor count(std::vector<index_t>{0}, {}, core::Int32, device);
|
||
|
|
||
|
index_t* count_ptr = count.GetDataPtr<index_t>();
|
||
|
#else
|
||
|
std::atomic<index_t> count_atomic(0);
|
||
|
std::atomic<index_t>* count_ptr = &count_atomic;
|
||
|
#endif
|
||
|
|
||
|
if (vertex_count < 0) {
|
||
|
core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t widx) {
|
||
|
// Natural index (0, N) -> (block_idx, voxel_idx)
|
||
|
index_t workload_block_idx = widx / resolution3;
|
||
|
index_t voxel_idx = widx % resolution3;
|
||
|
|
||
|
// voxel_idx -> (x_voxel, y_voxel, z_voxel)
|
||
|
index_t xv, yv, zv;
|
||
|
voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
|
||
|
|
||
|
// Obtain voxel's mesh struct ptr
|
||
|
index_t* mesh_struct_ptr =
|
||
|
mesh_structure_indexer.GetDataPtr<index_t>(
|
||
|
xv, yv, zv, workload_block_idx);
|
||
|
|
||
|
// Early quit -- no allocated vertex to compute
|
||
|
if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 &&
|
||
|
mesh_struct_ptr[2] != -1) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Enumerate 3 edges in the voxel
|
||
|
for (index_t e = 0; e < 3; ++e) {
|
||
|
index_t vertex_idx = mesh_struct_ptr[e];
|
||
|
if (vertex_idx != -1) continue;
|
||
|
|
||
|
OPEN3D_ATOMIC_ADD(count_ptr, 1);
|
||
|
}
|
||
|
});
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
vertex_count = count.Item<index_t>();
|
||
|
#else
|
||
|
vertex_count = (*count_ptr).load();
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
utility::LogDebug("Total vertex count = {}", vertex_count);
|
||
|
vertices = core::Tensor({vertex_count, 3}, core::Float32, device);
|
||
|
|
||
|
vertex_normals = core::Tensor({vertex_count, 3}, core::Float32, device);
|
||
|
ArrayIndexer normal_indexer = ArrayIndexer(vertex_normals, 1);
|
||
|
|
||
|
ArrayIndexer color_indexer;
|
||
|
if (color_base_ptr) {
|
||
|
vertex_colors = core::Tensor({vertex_count, 3}, core::Float32, device);
|
||
|
color_indexer = ArrayIndexer(vertex_colors, 1);
|
||
|
}
|
||
|
|
||
|
ArrayIndexer block_keys_indexer(block_keys, 1);
|
||
|
ArrayIndexer vertex_indexer(vertices, 1);
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
count = core::Tensor(std::vector<index_t>{0}, {}, core::Int32, device);
|
||
|
count_ptr = count.GetDataPtr<index_t>();
|
||
|
#else
|
||
|
(*count_ptr) = 0;
|
||
|
#endif
|
||
|
|
||
|
// Pass 2: extract vertices.
|
||
|
|
||
|
core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t widx) {
|
||
|
auto GetLinearIdx = [&] OPEN3D_DEVICE(
|
||
|
index_t xo, index_t yo, index_t zo,
|
||
|
index_t curr_block_idx) -> index_t {
|
||
|
return DeviceGetLinearIdx(xo, yo, zo, curr_block_idx, resolution,
|
||
|
nb_block_masks_indexer,
|
||
|
nb_block_indices_indexer);
|
||
|
};
|
||
|
|
||
|
auto GetNormal = [&] OPEN3D_DEVICE(index_t xo, index_t yo, index_t zo,
|
||
|
index_t curr_block_idx, float* n) {
|
||
|
return DeviceGetNormal<tsdf_t>(
|
||
|
tsdf_base_ptr, xo, yo, zo, curr_block_idx, n, resolution,
|
||
|
nb_block_masks_indexer, nb_block_indices_indexer);
|
||
|
};
|
||
|
|
||
|
// Natural index (0, N) -> (block_idx, voxel_idx)
|
||
|
index_t workload_block_idx = widx / resolution3;
|
||
|
index_t block_idx = indices_ptr[workload_block_idx];
|
||
|
index_t voxel_idx = widx % resolution3;
|
||
|
|
||
|
// block_idx -> (x_block, y_block, z_block)
|
||
|
index_t* block_key_ptr =
|
||
|
block_keys_indexer.GetDataPtr<index_t>(block_idx);
|
||
|
index_t xb = block_key_ptr[0];
|
||
|
index_t yb = block_key_ptr[1];
|
||
|
index_t zb = block_key_ptr[2];
|
||
|
|
||
|
// voxel_idx -> (x_voxel, y_voxel, z_voxel)
|
||
|
index_t xv, yv, zv;
|
||
|
voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
|
||
|
|
||
|
// global coordinate (in voxels)
|
||
|
index_t x = xb * resolution + xv;
|
||
|
index_t y = yb * resolution + yv;
|
||
|
index_t z = zb * resolution + zv;
|
||
|
|
||
|
// Obtain voxel's mesh struct ptr
|
||
|
index_t* mesh_struct_ptr = mesh_structure_indexer.GetDataPtr<index_t>(
|
||
|
xv, yv, zv, workload_block_idx);
|
||
|
|
||
|
// Early quit -- no allocated vertex to compute
|
||
|
if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 &&
|
||
|
mesh_struct_ptr[2] != -1) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
// Obtain voxel ptr
|
||
|
index_t linear_idx = resolution3 * block_idx + voxel_idx;
|
||
|
float tsdf_o = tsdf_base_ptr[linear_idx];
|
||
|
|
||
|
float no[3] = {0}, ne[3] = {0};
|
||
|
|
||
|
// Get normal at origin
|
||
|
GetNormal(xv, yv, zv, workload_block_idx, no);
|
||
|
|
||
|
// Enumerate 3 edges in the voxel
|
||
|
for (index_t e = 0; e < 3; ++e) {
|
||
|
index_t vertex_idx = mesh_struct_ptr[e];
|
||
|
if (vertex_idx != -1) continue;
|
||
|
|
||
|
index_t linear_idx_e =
|
||
|
GetLinearIdx(xv + (e == 0), yv + (e == 1), zv + (e == 2),
|
||
|
workload_block_idx);
|
||
|
OPEN3D_ASSERT(linear_idx_e > 0 &&
|
||
|
"Internal error: GetVoxelAt returns nullptr.");
|
||
|
float tsdf_e = tsdf_base_ptr[linear_idx_e];
|
||
|
float ratio = (0 - tsdf_o) / (tsdf_e - tsdf_o);
|
||
|
|
||
|
index_t idx = OPEN3D_ATOMIC_ADD(count_ptr, 1);
|
||
|
mesh_struct_ptr[e] = idx;
|
||
|
|
||
|
float ratio_x = ratio * index_t(e == 0);
|
||
|
float ratio_y = ratio * index_t(e == 1);
|
||
|
float ratio_z = ratio * index_t(e == 2);
|
||
|
|
||
|
float* vertex_ptr = vertex_indexer.GetDataPtr<float>(idx);
|
||
|
vertex_ptr[0] = voxel_size * (x + ratio_x);
|
||
|
vertex_ptr[1] = voxel_size * (y + ratio_y);
|
||
|
vertex_ptr[2] = voxel_size * (z + ratio_z);
|
||
|
|
||
|
// Get normal at edge and interpolate
|
||
|
float* normal_ptr = normal_indexer.GetDataPtr<float>(idx);
|
||
|
GetNormal(xv + (e == 0), yv + (e == 1), zv + (e == 2),
|
||
|
workload_block_idx, ne);
|
||
|
float nx = (1 - ratio) * no[0] + ratio * ne[0];
|
||
|
float ny = (1 - ratio) * no[1] + ratio * ne[1];
|
||
|
float nz = (1 - ratio) * no[2] + ratio * ne[2];
|
||
|
float norm = static_cast<float>(sqrt(nx * nx + ny * ny + nz * nz) +
|
||
|
1e-5);
|
||
|
normal_ptr[0] = nx / norm;
|
||
|
normal_ptr[1] = ny / norm;
|
||
|
normal_ptr[2] = nz / norm;
|
||
|
|
||
|
if (color_base_ptr) {
|
||
|
float* color_ptr = color_indexer.GetDataPtr<float>(idx);
|
||
|
float r_o = color_base_ptr[linear_idx * 3 + 0];
|
||
|
float g_o = color_base_ptr[linear_idx * 3 + 1];
|
||
|
float b_o = color_base_ptr[linear_idx * 3 + 2];
|
||
|
|
||
|
float r_e = color_base_ptr[linear_idx_e * 3 + 0];
|
||
|
float g_e = color_base_ptr[linear_idx_e * 3 + 1];
|
||
|
float b_e = color_base_ptr[linear_idx_e * 3 + 2];
|
||
|
|
||
|
color_ptr[0] = ((1 - ratio) * r_o + ratio * r_e) / 255.0f;
|
||
|
color_ptr[1] = ((1 - ratio) * g_o + ratio * g_e) / 255.0f;
|
||
|
color_ptr[2] = ((1 - ratio) * b_o + ratio * b_e) / 255.0f;
|
||
|
}
|
||
|
}
|
||
|
});
|
||
|
|
||
|
// Pass 3: connect vertices and form triangles.
|
||
|
index_t triangle_count = vertex_count * 3;
|
||
|
triangles = core::Tensor({triangle_count, 3}, core::Int32, device);
|
||
|
ArrayIndexer triangle_indexer(triangles, 1);
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
count = core::Tensor(std::vector<index_t>{0}, {}, core::Int32, device);
|
||
|
count_ptr = count.GetDataPtr<index_t>();
|
||
|
#else
|
||
|
(*count_ptr) = 0;
|
||
|
#endif
|
||
|
core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t widx) {
|
||
|
// Natural index (0, N) -> (block_idx, voxel_idx)
|
||
|
index_t workload_block_idx = widx / resolution3;
|
||
|
index_t voxel_idx = widx % resolution3;
|
||
|
|
||
|
// voxel_idx -> (x_voxel, y_voxel, z_voxel)
|
||
|
index_t xv, yv, zv;
|
||
|
voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv);
|
||
|
|
||
|
// Obtain voxel's mesh struct ptr
|
||
|
index_t* mesh_struct_ptr = mesh_structure_indexer.GetDataPtr<index_t>(
|
||
|
xv, yv, zv, workload_block_idx);
|
||
|
|
||
|
index_t table_idx = mesh_struct_ptr[3];
|
||
|
if (tri_count[table_idx] == 0) return;
|
||
|
|
||
|
for (index_t tri = 0; tri < 16; tri += 3) {
|
||
|
if (tri_table[table_idx][tri] == -1) return;
|
||
|
|
||
|
index_t tri_idx = OPEN3D_ATOMIC_ADD(count_ptr, 1);
|
||
|
|
||
|
for (index_t vertex = 0; vertex < 3; ++vertex) {
|
||
|
index_t edge = tri_table[table_idx][tri + vertex];
|
||
|
|
||
|
index_t xv_i = xv + edge_shifts[edge][0];
|
||
|
index_t yv_i = yv + edge_shifts[edge][1];
|
||
|
index_t zv_i = zv + edge_shifts[edge][2];
|
||
|
index_t edge_i = edge_shifts[edge][3];
|
||
|
|
||
|
index_t dxb = xv_i / resolution;
|
||
|
index_t dyb = yv_i / resolution;
|
||
|
index_t dzb = zv_i / resolution;
|
||
|
|
||
|
index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9;
|
||
|
|
||
|
index_t block_idx_i =
|
||
|
*nb_block_indices_indexer.GetDataPtr<index_t>(
|
||
|
workload_block_idx, nb_idx);
|
||
|
index_t* mesh_struct_ptr_i =
|
||
|
mesh_structure_indexer.GetDataPtr<index_t>(
|
||
|
xv_i - dxb * resolution,
|
||
|
yv_i - dyb * resolution,
|
||
|
zv_i - dzb * resolution,
|
||
|
inv_indices_ptr[block_idx_i]);
|
||
|
|
||
|
index_t* triangle_ptr =
|
||
|
triangle_indexer.GetDataPtr<index_t>(tri_idx);
|
||
|
triangle_ptr[2 - vertex] = mesh_struct_ptr_i[edge_i];
|
||
|
}
|
||
|
}
|
||
|
});
|
||
|
|
||
|
#if defined(__CUDACC__)
|
||
|
triangle_count = count.Item<index_t>();
|
||
|
#else
|
||
|
triangle_count = (*count_ptr).load();
|
||
|
#endif
|
||
|
utility::LogDebug("Total triangle count = {}", triangle_count);
|
||
|
triangles = triangles.Slice(0, 0, triangle_count);
|
||
|
}
|
||
|
|
||
|
} // namespace voxel_grid
|
||
|
} // namespace kernel
|
||
|
} // namespace geometry
|
||
|
} // namespace t
|
||
|
} // namespace open3d
|