// ---------------------------------------------------------------------------- // - Open3D: www.open3d.org - // ---------------------------------------------------------------------------- // Copyright (c) 2018-2023 www.open3d.org // SPDX-License-Identifier: MIT // ---------------------------------------------------------------------------- #include #include #include "open3d/core/Dispatch.h" #include "open3d/core/Dtype.h" #include "open3d/core/MemoryManager.h" #include "open3d/core/SizeVector.h" #include "open3d/core/Tensor.h" #include "open3d/core/hashmap/Dispatch.h" #include "open3d/t/geometry/Utility.h" #include "open3d/t/geometry/kernel/GeometryIndexer.h" #include "open3d/t/geometry/kernel/GeometryMacros.h" #include "open3d/t/geometry/kernel/VoxelBlockGrid.h" #include "open3d/utility/Logging.h" #include "open3d/utility/Timer.h" namespace open3d { namespace t { namespace geometry { namespace kernel { namespace voxel_grid { using index_t = int; using ArrayIndexer = TArrayIndexer; #if defined(__CUDACC__) void GetVoxelCoordinatesAndFlattenedIndicesCUDA #else void GetVoxelCoordinatesAndFlattenedIndicesCPU #endif (const core::Tensor& buf_indices, const core::Tensor& block_keys, core::Tensor& voxel_coords, core::Tensor& flattened_indices, index_t resolution, float voxel_size) { core::Device device = buf_indices.GetDevice(); const index_t* buf_indices_ptr = buf_indices.GetDataPtr(); const index_t* block_key_ptr = block_keys.GetDataPtr(); float* voxel_coords_ptr = voxel_coords.GetDataPtr(); int64_t* flattened_indices_ptr = flattened_indices.GetDataPtr(); index_t n = flattened_indices.GetLength(); ArrayIndexer voxel_indexer({resolution, resolution, resolution}); index_t resolution3 = resolution * resolution * resolution; core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t workload_idx) { index_t block_idx = buf_indices_ptr[workload_idx / resolution3]; index_t voxel_idx = workload_idx % resolution3; index_t block_key_offset = block_idx * 3; index_t xb = block_key_ptr[block_key_offset + 0]; index_t yb = block_key_ptr[block_key_offset + 1]; index_t zb = block_key_ptr[block_key_offset + 2]; index_t xv, yv, zv; voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv); float x = (xb * resolution + xv) * voxel_size; float y = (yb * resolution + yv) * voxel_size; float z = (zb * resolution + zv) * voxel_size; flattened_indices_ptr[workload_idx] = block_idx * resolution3 + voxel_idx; index_t voxel_coords_offset = workload_idx * 3; voxel_coords_ptr[voxel_coords_offset + 0] = x; voxel_coords_ptr[voxel_coords_offset + 1] = y; voxel_coords_ptr[voxel_coords_offset + 2] = z; }); } inline OPEN3D_DEVICE index_t DeviceGetLinearIdx(index_t xo, index_t yo, index_t zo, index_t curr_block_idx, index_t resolution, const ArrayIndexer& nb_block_masks_indexer, const ArrayIndexer& nb_block_indices_indexer) { index_t xn = (xo + resolution) % resolution; index_t yn = (yo + resolution) % resolution; index_t zn = (zo + resolution) % resolution; index_t dxb = Sign(xo - xn); index_t dyb = Sign(yo - yn); index_t dzb = Sign(zo - zn); index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9; bool block_mask_i = *nb_block_masks_indexer.GetDataPtr(curr_block_idx, nb_idx); if (!block_mask_i) return -1; index_t block_idx_i = *nb_block_indices_indexer.GetDataPtr( curr_block_idx, nb_idx); return (((block_idx_i * resolution) + zn) * resolution + yn) * resolution + xn; } template inline OPEN3D_DEVICE void DeviceGetNormal( const tsdf_t* tsdf_base_ptr, index_t xo, index_t yo, index_t zo, index_t curr_block_idx, float* n, index_t resolution, const ArrayIndexer& nb_block_masks_indexer, const ArrayIndexer& nb_block_indices_indexer) { auto GetLinearIdx = [&] OPEN3D_DEVICE(index_t xo, index_t yo, index_t zo) -> index_t { return DeviceGetLinearIdx(xo, yo, zo, curr_block_idx, resolution, nb_block_masks_indexer, nb_block_indices_indexer); }; index_t vxp = GetLinearIdx(xo + 1, yo, zo); index_t vxn = GetLinearIdx(xo - 1, yo, zo); index_t vyp = GetLinearIdx(xo, yo + 1, zo); index_t vyn = GetLinearIdx(xo, yo - 1, zo); index_t vzp = GetLinearIdx(xo, yo, zo + 1); index_t vzn = GetLinearIdx(xo, yo, zo - 1); if (vxp >= 0 && vxn >= 0) n[0] = tsdf_base_ptr[vxp] - tsdf_base_ptr[vxn]; if (vyp >= 0 && vyn >= 0) n[1] = tsdf_base_ptr[vyp] - tsdf_base_ptr[vyn]; if (vzp >= 0 && vzn >= 0) n[2] = tsdf_base_ptr[vzp] - tsdf_base_ptr[vzn]; }; template #if defined(__CUDACC__) void IntegrateCUDA #else void IntegrateCPU #endif (const core::Tensor& depth, const core::Tensor& color, const core::Tensor& indices, const core::Tensor& block_keys, TensorMap& block_value_map, const core::Tensor& depth_intrinsic, const core::Tensor& color_intrinsic, const core::Tensor& extrinsics, index_t resolution, float voxel_size, float sdf_trunc, float depth_scale, float depth_max) { // Parameters index_t resolution2 = resolution * resolution; index_t resolution3 = resolution2 * resolution; TransformIndexer transform_indexer(depth_intrinsic, extrinsics, voxel_size); TransformIndexer colormap_indexer( color_intrinsic, core::Tensor::Eye(4, core::Dtype::Float64, core::Device("CPU:0"))); ArrayIndexer voxel_indexer({resolution, resolution, resolution}); ArrayIndexer block_keys_indexer(block_keys, 1); ArrayIndexer depth_indexer(depth, 2); core::Device device = block_keys.GetDevice(); const index_t* indices_ptr = indices.GetDataPtr(); if (!block_value_map.Contains("tsdf") || !block_value_map.Contains("weight")) { utility::LogError( "TSDF and/or weight not allocated in blocks, please implement " "customized integration."); } tsdf_t* tsdf_base_ptr = block_value_map.at("tsdf").GetDataPtr(); weight_t* weight_base_ptr = block_value_map.at("weight").GetDataPtr(); bool integrate_color = block_value_map.Contains("color") && color.NumElements() > 0; color_t* color_base_ptr = nullptr; ArrayIndexer color_indexer; float color_multiplier = 1.0; if (integrate_color) { color_base_ptr = block_value_map.at("color").GetDataPtr(); color_indexer = ArrayIndexer(color, 2); // Float32: [0, 1] -> [0, 255] if (color.GetDtype() == core::Float32) { color_multiplier = 255.0; } } index_t n = indices.GetLength() * resolution3; core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t workload_idx) { // Natural index (0, N) -> (block_idx, voxel_idx) index_t block_idx = indices_ptr[workload_idx / resolution3]; index_t voxel_idx = workload_idx % resolution3; /// Coordinate transform // block_idx -> (x_block, y_block, z_block) index_t* block_key_ptr = block_keys_indexer.GetDataPtr(block_idx); index_t xb = block_key_ptr[0]; index_t yb = block_key_ptr[1]; index_t zb = block_key_ptr[2]; // voxel_idx -> (x_voxel, y_voxel, z_voxel) index_t xv, yv, zv; voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv); // coordinate in world (in voxel) index_t x = xb * resolution + xv; index_t y = yb * resolution + yv; index_t z = zb * resolution + zv; // coordinate in camera (in voxel -> in meter) float xc, yc, zc, u, v; transform_indexer.RigidTransform(static_cast(x), static_cast(y), static_cast(z), &xc, &yc, &zc); // coordinate in image (in pixel) transform_indexer.Project(xc, yc, zc, &u, &v); if (!depth_indexer.InBoundary(u, v)) { return; } index_t ui = static_cast(u); index_t vi = static_cast(v); // Associate image workload and compute SDF and // TSDF. float depth = *depth_indexer.GetDataPtr(ui, vi) / depth_scale; float sdf = depth - zc; if (depth <= 0 || depth > depth_max || zc <= 0 || sdf < -sdf_trunc) { return; } sdf = sdf < sdf_trunc ? sdf : sdf_trunc; sdf /= sdf_trunc; index_t linear_idx = block_idx * resolution3 + voxel_idx; tsdf_t* tsdf_ptr = tsdf_base_ptr + linear_idx; weight_t* weight_ptr = weight_base_ptr + linear_idx; float inv_wsum = 1.0f / (*weight_ptr + 1); float weight = *weight_ptr; *tsdf_ptr = (weight * (*tsdf_ptr) + sdf) * inv_wsum; if (integrate_color) { color_t* color_ptr = color_base_ptr + 3 * linear_idx; // Unproject ui, vi with depth_intrinsic, then project back with // color_intrinsic float x, y, z; transform_indexer.Unproject(ui, vi, 1.0, &x, &y, &z); float uf, vf; colormap_indexer.Project(x, y, z, &uf, &vf); if (color_indexer.InBoundary(uf, vf)) { ui = round(uf); vi = round(vf); input_color_t* input_color_ptr = color_indexer.GetDataPtr(ui, vi); for (index_t i = 0; i < 3; ++i) { color_ptr[i] = (weight * color_ptr[i] + input_color_ptr[i] * color_multiplier) * inv_wsum; } } } *weight_ptr = weight + 1; }); #if defined(__CUDACC__) core::cuda::Synchronize(); #endif } #if defined(__CUDACC__) void EstimateRangeCUDA #else void EstimateRangeCPU #endif (const core::Tensor& block_keys, core::Tensor& range_minmax_map, const core::Tensor& intrinsics, const core::Tensor& extrinsics, int h, int w, int down_factor, int64_t block_resolution, float voxel_size, float depth_min, float depth_max, core::Tensor& fragment_buffer) { // TODO(wei): reserve it in a reusable buffer // Every 2 channels: (min, max) int h_down = h / down_factor; int w_down = w / down_factor; range_minmax_map = core::Tensor({h_down, w_down, 2}, core::Float32, block_keys.GetDevice()); NDArrayIndexer range_map_indexer(range_minmax_map, 2); // Every 6 channels: (v_min, u_min, v_max, u_max, z_min, z_max) const int fragment_size = 16; if (fragment_buffer.GetDataPtr() == 0 || fragment_buffer.NumElements() == 0) { // Rough heuristic; should tend to overallocate const int reserve_frag_buffer_size = h_down * w_down / (fragment_size * fragment_size) / voxel_size; fragment_buffer = core::Tensor({reserve_frag_buffer_size, 6}, core::Float32, block_keys.GetDevice()); } const int frag_buffer_size = fragment_buffer.NumElements() / 6; NDArrayIndexer frag_buffer_indexer(fragment_buffer, 1); NDArrayIndexer block_keys_indexer(block_keys, 1); TransformIndexer w2c_transform_indexer(intrinsics, extrinsics); #if defined(__CUDACC__) core::Tensor count(std::vector{0}, {1}, core::Int32, block_keys.GetDevice()); int* count_ptr = count.GetDataPtr(); #else std::atomic count_atomic(0); std::atomic* count_ptr = &count_atomic; #endif #ifndef __CUDACC__ using std::max; using std::min; #endif // Pass 0: iterate over blocks, fill-in an rendering fragment array core::ParallelFor( block_keys.GetDevice(), block_keys.GetLength(), [=] OPEN3D_DEVICE(int64_t workload_idx) { int* key = block_keys_indexer.GetDataPtr(workload_idx); int u_min = w_down - 1, v_min = h_down - 1, u_max = 0, v_max = 0; float z_min = depth_max, z_max = depth_min; float xc, yc, zc, u, v; // Project 8 corners to low-res image and form a rectangle for (int i = 0; i < 8; ++i) { float xw = (key[0] + ((i & 1) > 0)) * block_resolution * voxel_size; float yw = (key[1] + ((i & 2) > 0)) * block_resolution * voxel_size; float zw = (key[2] + ((i & 4) > 0)) * block_resolution * voxel_size; w2c_transform_indexer.RigidTransform(xw, yw, zw, &xc, &yc, &zc); if (zc <= 0) continue; // Project to the down sampled image buffer w2c_transform_indexer.Project(xc, yc, zc, &u, &v); u /= down_factor; v /= down_factor; v_min = min(static_cast(floorf(v)), v_min); v_max = max(static_cast(ceilf(v)), v_max); u_min = min(static_cast(floorf(u)), u_min); u_max = max(static_cast(ceilf(u)), u_max); z_min = min(z_min, zc); z_max = max(z_max, zc); } v_min = max(0, v_min); v_max = min(h_down - 1, v_max); u_min = max(0, u_min); u_max = min(w_down - 1, u_max); if (v_min >= v_max || u_min >= u_max || z_min >= z_max) return; // Divide the rectangle into small 16x16 fragments int frag_v_count = ceilf(float(v_max - v_min + 1) / float(fragment_size)); int frag_u_count = ceilf(float(u_max - u_min + 1) / float(fragment_size)); int frag_count = frag_v_count * frag_u_count; int frag_count_start = OPEN3D_ATOMIC_ADD(count_ptr, frag_count); int frag_count_end = frag_count_start + frag_count; if (frag_count_end >= frag_buffer_size) { return; } int offset = 0; for (int frag_v = 0; frag_v < frag_v_count; ++frag_v) { for (int frag_u = 0; frag_u < frag_u_count; ++frag_u, ++offset) { float* frag_ptr = frag_buffer_indexer.GetDataPtr( frag_count_start + offset); // zmin, zmax frag_ptr[0] = z_min; frag_ptr[1] = z_max; // vmin, umin frag_ptr[2] = v_min + frag_v * fragment_size; frag_ptr[3] = u_min + frag_u * fragment_size; // vmax, umax frag_ptr[4] = min(frag_ptr[2] + fragment_size - 1, static_cast(v_max)); frag_ptr[5] = min(frag_ptr[3] + fragment_size - 1, static_cast(u_max)); } } }); #if defined(__CUDACC__) int needed_frag_count = count[0].Item(); #else int needed_frag_count = (*count_ptr).load(); #endif int frag_count = needed_frag_count; if (frag_count >= frag_buffer_size) { utility::LogWarning( "Could not generate full range map; allocated {} fragments but " "needed {}", frag_buffer_size, frag_count); frag_count = frag_buffer_size - 1; } else { utility::LogDebug("EstimateRange Allocated {} fragments and needed {}", frag_buffer_size, frag_count); } // Pass 0.5: Fill in range map to prepare for atomic min/max core::ParallelFor(block_keys.GetDevice(), h_down * w_down, [=] OPEN3D_DEVICE(int64_t workload_idx) { int v = workload_idx / w_down; int u = workload_idx % w_down; float* range_ptr = range_map_indexer.GetDataPtr(u, v); range_ptr[0] = depth_max; range_ptr[1] = depth_min; }); // Pass 1: iterate over rendering fragment array, fill-in range core::ParallelFor( block_keys.GetDevice(), frag_count * fragment_size * fragment_size, [=] OPEN3D_DEVICE(int64_t workload_idx) { int frag_idx = workload_idx / (fragment_size * fragment_size); int local_idx = workload_idx % (fragment_size * fragment_size); int dv = local_idx / fragment_size; int du = local_idx % fragment_size; float* frag_ptr = frag_buffer_indexer.GetDataPtr(frag_idx); int v_min = static_cast(frag_ptr[2]); int u_min = static_cast(frag_ptr[3]); int v_max = static_cast(frag_ptr[4]); int u_max = static_cast(frag_ptr[5]); int v = v_min + dv; int u = u_min + du; if (v > v_max || u > u_max) return; float z_min = frag_ptr[0]; float z_max = frag_ptr[1]; float* range_ptr = range_map_indexer.GetDataPtr(u, v); #ifdef __CUDACC__ atomicMinf(&(range_ptr[0]), z_min); atomicMaxf(&(range_ptr[1]), z_max); #else #pragma omp critical(EstimateRangeCPU) { range_ptr[0] = min(z_min, range_ptr[0]); range_ptr[1] = max(z_max, range_ptr[1]); } #endif }); #if defined(__CUDACC__) core::cuda::Synchronize(); #endif if (needed_frag_count != frag_count) { utility::LogInfo("Reallocating {} fragments for EstimateRange (was {})", needed_frag_count, frag_count); fragment_buffer = core::Tensor({needed_frag_count, 6}, core::Float32, block_keys.GetDevice()); } } struct MiniVecCache { index_t x; index_t y; index_t z; index_t block_idx; inline index_t OPEN3D_DEVICE Check(index_t xin, index_t yin, index_t zin) { return (xin == x && yin == y && zin == z) ? block_idx : -1; } inline void OPEN3D_DEVICE Update(index_t xin, index_t yin, index_t zin, index_t block_idx_in) { x = xin; y = yin; z = zin; block_idx = block_idx_in; } }; template #if defined(__CUDACC__) void RayCastCUDA #else void RayCastCPU #endif (std::shared_ptr& hashmap, const TensorMap& block_value_map, const core::Tensor& range, TensorMap& renderings_map, const core::Tensor& intrinsic, const core::Tensor& extrinsics, index_t h, index_t w, index_t block_resolution, float voxel_size, float depth_scale, float depth_min, float depth_max, float weight_threshold, float trunc_voxel_multiplier, int range_map_down_factor) { using Key = utility::MiniVec; using Hash = utility::MiniVecHash; using Eq = utility::MiniVecEq; auto device_hashmap = hashmap->GetDeviceHashBackend(); #if defined(__CUDACC__) auto cuda_hashmap = std::dynamic_pointer_cast>( device_hashmap); if (cuda_hashmap == nullptr) { utility::LogError( "Unsupported backend: CUDA raycasting only supports STDGPU."); } auto hashmap_impl = cuda_hashmap->GetImpl(); #else auto cpu_hashmap = std::dynamic_pointer_cast>( device_hashmap); if (cpu_hashmap == nullptr) { utility::LogError( "Unsupported backend: CPU raycasting only supports TBB."); } auto hashmap_impl = *cpu_hashmap->GetImpl(); #endif core::Device device = hashmap->GetDevice(); ArrayIndexer range_indexer(range, 2); // Geometry ArrayIndexer depth_indexer; ArrayIndexer vertex_indexer; ArrayIndexer normal_indexer; // Diff rendering ArrayIndexer index_indexer; ArrayIndexer mask_indexer; ArrayIndexer interp_ratio_indexer; ArrayIndexer interp_ratio_dx_indexer; ArrayIndexer interp_ratio_dy_indexer; ArrayIndexer interp_ratio_dz_indexer; // Color ArrayIndexer color_indexer; if (!block_value_map.Contains("tsdf") || !block_value_map.Contains("weight")) { utility::LogError( "TSDF and/or weight not allocated in blocks, please implement " "customized integration."); } const tsdf_t* tsdf_base_ptr = block_value_map.at("tsdf").GetDataPtr(); const weight_t* weight_base_ptr = block_value_map.at("weight").GetDataPtr(); // Geometry if (renderings_map.Contains("depth")) { depth_indexer = ArrayIndexer(renderings_map.at("depth"), 2); } if (renderings_map.Contains("vertex")) { vertex_indexer = ArrayIndexer(renderings_map.at("vertex"), 2); } if (renderings_map.Contains("normal")) { normal_indexer = ArrayIndexer(renderings_map.at("normal"), 2); } // Diff rendering if (renderings_map.Contains("index")) { index_indexer = ArrayIndexer(renderings_map.at("index"), 2); } if (renderings_map.Contains("mask")) { mask_indexer = ArrayIndexer(renderings_map.at("mask"), 2); } if (renderings_map.Contains("interp_ratio")) { interp_ratio_indexer = ArrayIndexer(renderings_map.at("interp_ratio"), 2); } if (renderings_map.Contains("interp_ratio_dx")) { interp_ratio_dx_indexer = ArrayIndexer(renderings_map.at("interp_ratio_dx"), 2); } if (renderings_map.Contains("interp_ratio_dy")) { interp_ratio_dy_indexer = ArrayIndexer(renderings_map.at("interp_ratio_dy"), 2); } if (renderings_map.Contains("interp_ratio_dz")) { interp_ratio_dz_indexer = ArrayIndexer(renderings_map.at("interp_ratio_dz"), 2); } // Color bool render_color = false; if (block_value_map.Contains("color") && renderings_map.Contains("color")) { render_color = true; color_indexer = ArrayIndexer(renderings_map.at("color"), 2); } const color_t* color_base_ptr = render_color ? block_value_map.at("color").GetDataPtr() : nullptr; bool visit_neighbors = render_color || normal_indexer.GetDataPtr() || mask_indexer.GetDataPtr() || index_indexer.GetDataPtr() || interp_ratio_indexer.GetDataPtr() || interp_ratio_dx_indexer.GetDataPtr() || interp_ratio_dy_indexer.GetDataPtr() || interp_ratio_dz_indexer.GetDataPtr(); TransformIndexer c2w_transform_indexer( intrinsic, t::geometry::InverseTransformation(extrinsics)); TransformIndexer w2c_transform_indexer(intrinsic, extrinsics); index_t rows = h; index_t cols = w; index_t n = rows * cols; float block_size = voxel_size * block_resolution; index_t resolution2 = block_resolution * block_resolution; index_t resolution3 = resolution2 * block_resolution; #ifndef __CUDACC__ using std::max; using std::sqrt; #endif core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t workload_idx) { auto GetLinearIdxAtP = [&] OPEN3D_DEVICE( index_t x_b, index_t y_b, index_t z_b, index_t x_v, index_t y_v, index_t z_v, core::buf_index_t block_buf_idx, MiniVecCache & cache) -> index_t { index_t x_vn = (x_v + block_resolution) % block_resolution; index_t y_vn = (y_v + block_resolution) % block_resolution; index_t z_vn = (z_v + block_resolution) % block_resolution; index_t dx_b = Sign(x_v - x_vn); index_t dy_b = Sign(y_v - y_vn); index_t dz_b = Sign(z_v - z_vn); if (dx_b == 0 && dy_b == 0 && dz_b == 0) { return block_buf_idx * resolution3 + z_v * resolution2 + y_v * block_resolution + x_v; } else { Key key(x_b + dx_b, y_b + dy_b, z_b + dz_b); index_t block_buf_idx = cache.Check(key[0], key[1], key[2]); if (block_buf_idx < 0) { auto iter = hashmap_impl.find(key); if (iter == hashmap_impl.end()) return -1; block_buf_idx = iter->second; cache.Update(key[0], key[1], key[2], block_buf_idx); } return block_buf_idx * resolution3 + z_vn * resolution2 + y_vn * block_resolution + x_vn; } }; auto GetLinearIdxAtT = [&] OPEN3D_DEVICE( float x_o, float y_o, float z_o, float x_d, float y_d, float z_d, float t, MiniVecCache& cache) -> index_t { float x_g = x_o + t * x_d; float y_g = y_o + t * y_d; float z_g = z_o + t * z_d; // MiniVec coordinate and look up index_t x_b = static_cast(floorf(x_g / block_size)); index_t y_b = static_cast(floorf(y_g / block_size)); index_t z_b = static_cast(floorf(z_g / block_size)); Key key(x_b, y_b, z_b); index_t block_buf_idx = cache.Check(x_b, y_b, z_b); if (block_buf_idx < 0) { auto iter = hashmap_impl.find(key); if (iter == hashmap_impl.end()) return -1; block_buf_idx = iter->second; cache.Update(x_b, y_b, z_b, block_buf_idx); } // Voxel coordinate and look up index_t x_v = index_t((x_g - x_b * block_size) / voxel_size); index_t y_v = index_t((y_g - y_b * block_size) / voxel_size); index_t z_v = index_t((z_g - z_b * block_size) / voxel_size); return block_buf_idx * resolution3 + z_v * resolution2 + y_v * block_resolution + x_v; }; index_t y = workload_idx / cols; index_t x = workload_idx % cols; const float* range = range_indexer.GetDataPtr( x / range_map_down_factor, y / range_map_down_factor); float* depth_ptr = nullptr; float* vertex_ptr = nullptr; float* color_ptr = nullptr; float* normal_ptr = nullptr; int64_t* index_ptr = nullptr; bool* mask_ptr = nullptr; float* interp_ratio_ptr = nullptr; float* interp_ratio_dx_ptr = nullptr; float* interp_ratio_dy_ptr = nullptr; float* interp_ratio_dz_ptr = nullptr; if (vertex_indexer.GetDataPtr()) { vertex_ptr = vertex_indexer.GetDataPtr(x, y); vertex_ptr[0] = 0; vertex_ptr[1] = 0; vertex_ptr[2] = 0; } if (depth_indexer.GetDataPtr()) { depth_ptr = depth_indexer.GetDataPtr(x, y); depth_ptr[0] = 0; } if (normal_indexer.GetDataPtr()) { normal_ptr = normal_indexer.GetDataPtr(x, y); normal_ptr[0] = 0; normal_ptr[1] = 0; normal_ptr[2] = 0; } if (mask_indexer.GetDataPtr()) { mask_ptr = mask_indexer.GetDataPtr(x, y); #ifdef __CUDACC__ #pragma unroll #endif for (int i = 0; i < 8; ++i) { mask_ptr[i] = false; } } if (index_indexer.GetDataPtr()) { index_ptr = index_indexer.GetDataPtr(x, y); #ifdef __CUDACC__ #pragma unroll #endif for (int i = 0; i < 8; ++i) { index_ptr[i] = 0; } } if (interp_ratio_indexer.GetDataPtr()) { interp_ratio_ptr = interp_ratio_indexer.GetDataPtr(x, y); #ifdef __CUDACC__ #pragma unroll #endif for (int i = 0; i < 8; ++i) { interp_ratio_ptr[i] = 0; } } if (interp_ratio_dx_indexer.GetDataPtr()) { interp_ratio_dx_ptr = interp_ratio_dx_indexer.GetDataPtr(x, y); #ifdef __CUDACC__ #pragma unroll #endif for (int i = 0; i < 8; ++i) { interp_ratio_dx_ptr[i] = 0; } } if (interp_ratio_dy_indexer.GetDataPtr()) { interp_ratio_dy_ptr = interp_ratio_dy_indexer.GetDataPtr(x, y); #ifdef __CUDACC__ #pragma unroll #endif for (int i = 0; i < 8; ++i) { interp_ratio_dy_ptr[i] = 0; } } if (interp_ratio_dz_indexer.GetDataPtr()) { interp_ratio_dz_ptr = interp_ratio_dz_indexer.GetDataPtr(x, y); #ifdef __CUDACC__ #pragma unroll #endif for (int i = 0; i < 8; ++i) { interp_ratio_dz_ptr[i] = 0; } } if (color_indexer.GetDataPtr()) { color_ptr = color_indexer.GetDataPtr(x, y); color_ptr[0] = 0; color_ptr[1] = 0; color_ptr[2] = 0; } float t = range[0]; const float t_max = range[1]; if (t >= t_max) return; // Coordinates in camera and global float x_c = 0, y_c = 0, z_c = 0; float x_g = 0, y_g = 0, z_g = 0; float x_o = 0, y_o = 0, z_o = 0; // Iterative ray intersection check float t_prev = t; float tsdf_prev = -1.0f; float tsdf = 1.0; float sdf_trunc = voxel_size * trunc_voxel_multiplier; float w = 0.0; // Camera origin c2w_transform_indexer.RigidTransform(0, 0, 0, &x_o, &y_o, &z_o); // Direction c2w_transform_indexer.Unproject(static_cast(x), static_cast(y), 1.0f, &x_c, &y_c, &z_c); c2w_transform_indexer.RigidTransform(x_c, y_c, z_c, &x_g, &y_g, &z_g); float x_d = (x_g - x_o); float y_d = (y_g - y_o); float z_d = (z_g - z_o); MiniVecCache cache{0, 0, 0, -1}; bool surface_found = false; while (t < t_max) { index_t linear_idx = GetLinearIdxAtT(x_o, y_o, z_o, x_d, y_d, z_d, t, cache); if (linear_idx < 0) { t_prev = t; t += block_size; } else { tsdf_prev = tsdf; tsdf = tsdf_base_ptr[linear_idx]; w = weight_base_ptr[linear_idx]; if (tsdf_prev > 0 && w >= weight_threshold && tsdf <= 0) { surface_found = true; break; } t_prev = t; float delta = tsdf * sdf_trunc; t += delta < voxel_size ? voxel_size : delta; } } if (surface_found) { float t_intersect = (t * tsdf_prev - t_prev * tsdf) / (tsdf_prev - tsdf); x_g = x_o + t_intersect * x_d; y_g = y_o + t_intersect * y_d; z_g = z_o + t_intersect * z_d; // Trivial vertex assignment if (depth_ptr) { *depth_ptr = t_intersect * depth_scale; } if (vertex_ptr) { w2c_transform_indexer.RigidTransform( x_g, y_g, z_g, vertex_ptr + 0, vertex_ptr + 1, vertex_ptr + 2); } if (!visit_neighbors) return; // Trilinear interpolation // TODO(wei): simplify the flow by splitting the // functions given what is enabled index_t x_b = static_cast(floorf(x_g / block_size)); index_t y_b = static_cast(floorf(y_g / block_size)); index_t z_b = static_cast(floorf(z_g / block_size)); float x_v = (x_g - float(x_b) * block_size) / voxel_size; float y_v = (y_g - float(y_b) * block_size) / voxel_size; float z_v = (z_g - float(z_b) * block_size) / voxel_size; Key key(x_b, y_b, z_b); index_t block_buf_idx = cache.Check(x_b, y_b, z_b); if (block_buf_idx < 0) { auto iter = hashmap_impl.find(key); if (iter == hashmap_impl.end()) return; block_buf_idx = iter->second; cache.Update(x_b, y_b, z_b, block_buf_idx); } index_t x_v_floor = static_cast(floorf(x_v)); index_t y_v_floor = static_cast(floorf(y_v)); index_t z_v_floor = static_cast(floorf(z_v)); float ratio_x = x_v - float(x_v_floor); float ratio_y = y_v - float(y_v_floor); float ratio_z = z_v - float(z_v_floor); float sum_r = 0.0; for (index_t k = 0; k < 8; ++k) { index_t dx_v = (k & 1) > 0 ? 1 : 0; index_t dy_v = (k & 2) > 0 ? 1 : 0; index_t dz_v = (k & 4) > 0 ? 1 : 0; index_t linear_idx_k = GetLinearIdxAtP( x_b, y_b, z_b, x_v_floor + dx_v, y_v_floor + dy_v, z_v_floor + dz_v, block_buf_idx, cache); if (linear_idx_k >= 0 && weight_base_ptr[linear_idx_k] > 0) { float rx = dx_v * (ratio_x) + (1 - dx_v) * (1 - ratio_x); float ry = dy_v * (ratio_y) + (1 - dy_v) * (1 - ratio_y); float rz = dz_v * (ratio_z) + (1 - dz_v) * (1 - ratio_z); float r = rx * ry * rz; if (interp_ratio_ptr) { interp_ratio_ptr[k] = r; } if (mask_ptr) { mask_ptr[k] = true; } if (index_ptr) { index_ptr[k] = linear_idx_k; } float tsdf_k = tsdf_base_ptr[linear_idx_k]; float interp_ratio_dx = ry * rz * (2 * dx_v - 1); float interp_ratio_dy = rx * rz * (2 * dy_v - 1); float interp_ratio_dz = rx * ry * (2 * dz_v - 1); if (interp_ratio_dx_ptr) { interp_ratio_dx_ptr[k] = interp_ratio_dx; } if (interp_ratio_dy_ptr) { interp_ratio_dy_ptr[k] = interp_ratio_dy; } if (interp_ratio_dz_ptr) { interp_ratio_dz_ptr[k] = interp_ratio_dz; } if (normal_ptr) { normal_ptr[0] += interp_ratio_dx * tsdf_k; normal_ptr[1] += interp_ratio_dy * tsdf_k; normal_ptr[2] += interp_ratio_dz * tsdf_k; } if (color_ptr) { index_t color_linear_idx = linear_idx_k * 3; color_ptr[0] += r * color_base_ptr[color_linear_idx + 0]; color_ptr[1] += r * color_base_ptr[color_linear_idx + 1]; color_ptr[2] += r * color_base_ptr[color_linear_idx + 2]; } sum_r += r; } } // loop over 8 neighbors if (sum_r > 0) { sum_r *= 255.0; if (color_ptr) { color_ptr[0] /= sum_r; color_ptr[1] /= sum_r; color_ptr[2] /= sum_r; } if (normal_ptr) { constexpr float EPSILON = 1e-5f; float norm = sqrt(normal_ptr[0] * normal_ptr[0] + normal_ptr[1] * normal_ptr[1] + normal_ptr[2] * normal_ptr[2]); norm = std::max(norm, EPSILON); w2c_transform_indexer.Rotate( -normal_ptr[0] / norm, -normal_ptr[1] / norm, -normal_ptr[2] / norm, normal_ptr + 0, normal_ptr + 1, normal_ptr + 2); } } } // surface-found }); #if defined(__CUDACC__) core::cuda::Synchronize(); #endif } template #if defined(__CUDACC__) void ExtractPointCloudCUDA #else void ExtractPointCloudCPU #endif (const core::Tensor& indices, const core::Tensor& nb_indices, const core::Tensor& nb_masks, const core::Tensor& block_keys, const TensorMap& block_value_map, core::Tensor& points, core::Tensor& normals, core::Tensor& colors, index_t resolution, float voxel_size, float weight_threshold, int& valid_size) { core::Device device = block_keys.GetDevice(); // Parameters index_t resolution2 = resolution * resolution; index_t resolution3 = resolution2 * resolution; // Shape / transform indexers, no data involved ArrayIndexer voxel_indexer({resolution, resolution, resolution}); // Real data indexer ArrayIndexer block_keys_indexer(block_keys, 1); ArrayIndexer nb_block_masks_indexer(nb_masks, 2); ArrayIndexer nb_block_indices_indexer(nb_indices, 2); // Plain arrays that does not require indexers const index_t* indices_ptr = indices.GetDataPtr(); if (!block_value_map.Contains("tsdf") || !block_value_map.Contains("weight")) { utility::LogError( "TSDF and/or weight not allocated in blocks, please implement " "customized integration."); } const tsdf_t* tsdf_base_ptr = block_value_map.at("tsdf").GetDataPtr(); const weight_t* weight_base_ptr = block_value_map.at("weight").GetDataPtr(); const color_t* color_base_ptr = nullptr; if (block_value_map.Contains("color")) { color_base_ptr = block_value_map.at("color").GetDataPtr(); } index_t n_blocks = indices.GetLength(); index_t n = n_blocks * resolution3; // Output #if defined(__CUDACC__) core::Tensor count(std::vector{0}, {1}, core::Int32, block_keys.GetDevice()); index_t* count_ptr = count.GetDataPtr(); #else std::atomic count_atomic(0); std::atomic* count_ptr = &count_atomic; #endif if (valid_size < 0) { utility::LogDebug( "No estimated max point cloud size provided, using a 2-pass " "estimation. Surface extraction could be slow."); // This pass determines valid number of points. core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t workload_idx) { auto GetLinearIdx = [&] OPEN3D_DEVICE( index_t xo, index_t yo, index_t zo, index_t curr_block_idx) -> index_t { return DeviceGetLinearIdx(xo, yo, zo, curr_block_idx, resolution, nb_block_masks_indexer, nb_block_indices_indexer); }; // Natural index (0, N) -> (block_idx, // voxel_idx) index_t workload_block_idx = workload_idx / resolution3; index_t block_idx = indices_ptr[workload_block_idx]; index_t voxel_idx = workload_idx % resolution3; // voxel_idx -> (x_voxel, y_voxel, z_voxel) index_t xv, yv, zv; voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv); index_t linear_idx = block_idx * resolution3 + voxel_idx; float tsdf_o = tsdf_base_ptr[linear_idx]; float weight_o = weight_base_ptr[linear_idx]; if (weight_o <= weight_threshold) return; // Enumerate x-y-z directions for (index_t i = 0; i < 3; ++i) { index_t linear_idx_i = GetLinearIdx(xv + (i == 0), yv + (i == 1), zv + (i == 2), workload_block_idx); if (linear_idx_i < 0) continue; float tsdf_i = tsdf_base_ptr[linear_idx_i]; float weight_i = weight_base_ptr[linear_idx_i]; if (weight_i > weight_threshold && tsdf_i * tsdf_o < 0) { OPEN3D_ATOMIC_ADD(count_ptr, 1); } } }); #if defined(__CUDACC__) valid_size = count[0].Item(); count[0] = 0; #else valid_size = (*count_ptr).load(); (*count_ptr) = 0; #endif } if (points.GetLength() == 0) { points = core::Tensor({valid_size, 3}, core::Float32, device); } ArrayIndexer point_indexer(points, 1); // Normals ArrayIndexer normal_indexer; normals = core::Tensor({valid_size, 3}, core::Float32, device); normal_indexer = ArrayIndexer(normals, 1); // This pass extracts exact surface points. // Colors ArrayIndexer color_indexer; if (color_base_ptr) { colors = core::Tensor({valid_size, 3}, core::Float32, device); color_indexer = ArrayIndexer(colors, 1); } core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t workload_idx) { auto GetLinearIdx = [&] OPEN3D_DEVICE( index_t xo, index_t yo, index_t zo, index_t curr_block_idx) -> index_t { return DeviceGetLinearIdx(xo, yo, zo, curr_block_idx, resolution, nb_block_masks_indexer, nb_block_indices_indexer); }; auto GetNormal = [&] OPEN3D_DEVICE(index_t xo, index_t yo, index_t zo, index_t curr_block_idx, float* n) { return DeviceGetNormal( tsdf_base_ptr, xo, yo, zo, curr_block_idx, n, resolution, nb_block_masks_indexer, nb_block_indices_indexer); }; // Natural index (0, N) -> (block_idx, voxel_idx) index_t workload_block_idx = workload_idx / resolution3; index_t block_idx = indices_ptr[workload_block_idx]; index_t voxel_idx = workload_idx % resolution3; /// Coordinate transform // block_idx -> (x_block, y_block, z_block) index_t* block_key_ptr = block_keys_indexer.GetDataPtr(block_idx); index_t xb = block_key_ptr[0]; index_t yb = block_key_ptr[1]; index_t zb = block_key_ptr[2]; // voxel_idx -> (x_voxel, y_voxel, z_voxel) index_t xv, yv, zv; voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv); index_t linear_idx = block_idx * resolution3 + voxel_idx; float tsdf_o = tsdf_base_ptr[linear_idx]; float weight_o = weight_base_ptr[linear_idx]; if (weight_o <= weight_threshold) return; float no[3] = {0}, ne[3] = {0}; // Get normal at origin GetNormal(xv, yv, zv, workload_block_idx, no); index_t x = xb * resolution + xv; index_t y = yb * resolution + yv; index_t z = zb * resolution + zv; // Enumerate x-y-z axis for (index_t i = 0; i < 3; ++i) { index_t linear_idx_i = GetLinearIdx(xv + (i == 0), yv + (i == 1), zv + (i == 2), workload_block_idx); if (linear_idx_i < 0) continue; float tsdf_i = tsdf_base_ptr[linear_idx_i]; float weight_i = weight_base_ptr[linear_idx_i]; if (weight_i > weight_threshold && tsdf_i * tsdf_o < 0) { float ratio = (0 - tsdf_o) / (tsdf_i - tsdf_o); index_t idx = OPEN3D_ATOMIC_ADD(count_ptr, 1); if (idx >= valid_size) { printf("Point cloud size larger than " "estimated, please increase the " "estimation!\n"); return; } float* point_ptr = point_indexer.GetDataPtr(idx); point_ptr[0] = voxel_size * (x + ratio * int(i == 0)); point_ptr[1] = voxel_size * (y + ratio * int(i == 1)); point_ptr[2] = voxel_size * (z + ratio * int(i == 2)); // Get normal at edge and interpolate float* normal_ptr = normal_indexer.GetDataPtr(idx); GetNormal(xv + (i == 0), yv + (i == 1), zv + (i == 2), workload_block_idx, ne); float nx = (1 - ratio) * no[0] + ratio * ne[0]; float ny = (1 - ratio) * no[1] + ratio * ne[1]; float nz = (1 - ratio) * no[2] + ratio * ne[2]; float norm = static_cast( sqrt(nx * nx + ny * ny + nz * nz) + 1e-5); normal_ptr[0] = nx / norm; normal_ptr[1] = ny / norm; normal_ptr[2] = nz / norm; if (color_base_ptr) { float* color_ptr = color_indexer.GetDataPtr(idx); const color_t* color_o_ptr = color_base_ptr + 3 * linear_idx; float r_o = color_o_ptr[0]; float g_o = color_o_ptr[1]; float b_o = color_o_ptr[2]; const color_t* color_i_ptr = color_base_ptr + 3 * linear_idx_i; float r_i = color_i_ptr[0]; float g_i = color_i_ptr[1]; float b_i = color_i_ptr[2]; color_ptr[0] = ((1 - ratio) * r_o + ratio * r_i) / 255.0f; color_ptr[1] = ((1 - ratio) * g_o + ratio * g_i) / 255.0f; color_ptr[2] = ((1 - ratio) * b_o + ratio * b_i) / 255.0f; } } } }); #if defined(__CUDACC__) index_t total_count = count.Item(); #else index_t total_count = (*count_ptr).load(); #endif utility::LogDebug("{} vertices extracted", total_count); valid_size = total_count; #if defined(BUILD_CUDA_MODULE) && defined(__CUDACC__) core::cuda::Synchronize(); #endif } template #if defined(__CUDACC__) void ExtractTriangleMeshCUDA #else void ExtractTriangleMeshCPU #endif (const core::Tensor& block_indices, const core::Tensor& inv_block_indices, const core::Tensor& nb_block_indices, const core::Tensor& nb_block_masks, const core::Tensor& block_keys, const TensorMap& block_value_map, core::Tensor& vertices, core::Tensor& triangles, core::Tensor& vertex_normals, core::Tensor& vertex_colors, index_t block_resolution, float voxel_size, float weight_threshold, index_t& vertex_count) { core::Device device = block_indices.GetDevice(); index_t resolution = block_resolution; index_t resolution3 = resolution * resolution * resolution; // Shape / transform indexers, no data involved ArrayIndexer voxel_indexer({resolution, resolution, resolution}); index_t n_blocks = static_cast(block_indices.GetLength()); // TODO(wei): profile performance by replacing the table to a hashmap. // Voxel-wise mesh info. 4 channels correspond to: // 3 edges' corresponding vertex index + 1 table index. core::Tensor mesh_structure; try { mesh_structure = core::Tensor::Zeros( {n_blocks, resolution, resolution, resolution, 4}, core::Int32, device); } catch (const std::runtime_error&) { utility::LogError( "Unable to allocate assistance mesh structure for Marching " "Cubes with {} active voxel blocks. Please consider using a " "larger voxel size (currently {}) for TSDF integration, or " "using tsdf_volume.cpu() to perform mesh extraction on CPU.", n_blocks, voxel_size); } // Real data indexer ArrayIndexer mesh_structure_indexer(mesh_structure, 4); ArrayIndexer nb_block_masks_indexer(nb_block_masks, 2); ArrayIndexer nb_block_indices_indexer(nb_block_indices, 2); // Plain arrays that does not require indexers const index_t* indices_ptr = block_indices.GetDataPtr(); const index_t* inv_indices_ptr = inv_block_indices.GetDataPtr(); if (!block_value_map.Contains("tsdf") || !block_value_map.Contains("weight")) { utility::LogError( "TSDF and/or weight not allocated in blocks, please implement " "customized integration."); } const tsdf_t* tsdf_base_ptr = block_value_map.at("tsdf").GetDataPtr(); const weight_t* weight_base_ptr = block_value_map.at("weight").GetDataPtr(); const color_t* color_base_ptr = nullptr; if (block_value_map.Contains("color")) { color_base_ptr = block_value_map.at("color").GetDataPtr(); } index_t n = n_blocks * resolution3; // Pass 0: analyze mesh structure, set up one-on-one correspondences // from edges to vertices. core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t widx) { auto GetLinearIdx = [&] OPEN3D_DEVICE( index_t xo, index_t yo, index_t zo, index_t curr_block_idx) -> index_t { return DeviceGetLinearIdx(xo, yo, zo, curr_block_idx, static_cast(resolution), nb_block_masks_indexer, nb_block_indices_indexer); }; // Natural index (0, N) -> (block_idx, voxel_idx) index_t workload_block_idx = widx / resolution3; index_t voxel_idx = widx % resolution3; // voxel_idx -> (x_voxel, y_voxel, z_voxel) index_t xv, yv, zv; voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv); // Check per-vertex sign in the cube to determine cube // type index_t table_idx = 0; for (index_t i = 0; i < 8; ++i) { index_t linear_idx_i = GetLinearIdx(xv + vtx_shifts[i][0], yv + vtx_shifts[i][1], zv + vtx_shifts[i][2], workload_block_idx); if (linear_idx_i < 0) return; float tsdf_i = tsdf_base_ptr[linear_idx_i]; float weight_i = weight_base_ptr[linear_idx_i]; if (weight_i <= weight_threshold) return; table_idx |= ((tsdf_i < 0) ? (1 << i) : 0); } index_t* mesh_struct_ptr = mesh_structure_indexer.GetDataPtr( xv, yv, zv, workload_block_idx); mesh_struct_ptr[3] = table_idx; if (table_idx == 0 || table_idx == 255) return; // Check per-edge sign determine the cube type index_t edges_with_vertices = edge_table[table_idx]; for (index_t i = 0; i < 12; ++i) { if (edges_with_vertices & (1 << i)) { index_t xv_i = xv + edge_shifts[i][0]; index_t yv_i = yv + edge_shifts[i][1]; index_t zv_i = zv + edge_shifts[i][2]; index_t edge_i = edge_shifts[i][3]; index_t dxb = xv_i / resolution; index_t dyb = yv_i / resolution; index_t dzb = zv_i / resolution; index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9; index_t block_idx_i = *nb_block_indices_indexer.GetDataPtr( workload_block_idx, nb_idx); index_t* mesh_ptr_i = mesh_structure_indexer.GetDataPtr( xv_i - dxb * resolution, yv_i - dyb * resolution, zv_i - dzb * resolution, inv_indices_ptr[block_idx_i]); // Non-atomic write, but we are safe mesh_ptr_i[edge_i] = -1; } } }); // Pass 1: determine valid number of vertices (if not preset) #if defined(__CUDACC__) core::Tensor count(std::vector{0}, {}, core::Int32, device); index_t* count_ptr = count.GetDataPtr(); #else std::atomic count_atomic(0); std::atomic* count_ptr = &count_atomic; #endif if (vertex_count < 0) { core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t widx) { // Natural index (0, N) -> (block_idx, voxel_idx) index_t workload_block_idx = widx / resolution3; index_t voxel_idx = widx % resolution3; // voxel_idx -> (x_voxel, y_voxel, z_voxel) index_t xv, yv, zv; voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv); // Obtain voxel's mesh struct ptr index_t* mesh_struct_ptr = mesh_structure_indexer.GetDataPtr( xv, yv, zv, workload_block_idx); // Early quit -- no allocated vertex to compute if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 && mesh_struct_ptr[2] != -1) { return; } // Enumerate 3 edges in the voxel for (index_t e = 0; e < 3; ++e) { index_t vertex_idx = mesh_struct_ptr[e]; if (vertex_idx != -1) continue; OPEN3D_ATOMIC_ADD(count_ptr, 1); } }); #if defined(__CUDACC__) vertex_count = count.Item(); #else vertex_count = (*count_ptr).load(); #endif } utility::LogDebug("Total vertex count = {}", vertex_count); vertices = core::Tensor({vertex_count, 3}, core::Float32, device); vertex_normals = core::Tensor({vertex_count, 3}, core::Float32, device); ArrayIndexer normal_indexer = ArrayIndexer(vertex_normals, 1); ArrayIndexer color_indexer; if (color_base_ptr) { vertex_colors = core::Tensor({vertex_count, 3}, core::Float32, device); color_indexer = ArrayIndexer(vertex_colors, 1); } ArrayIndexer block_keys_indexer(block_keys, 1); ArrayIndexer vertex_indexer(vertices, 1); #if defined(__CUDACC__) count = core::Tensor(std::vector{0}, {}, core::Int32, device); count_ptr = count.GetDataPtr(); #else (*count_ptr) = 0; #endif // Pass 2: extract vertices. core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t widx) { auto GetLinearIdx = [&] OPEN3D_DEVICE( index_t xo, index_t yo, index_t zo, index_t curr_block_idx) -> index_t { return DeviceGetLinearIdx(xo, yo, zo, curr_block_idx, resolution, nb_block_masks_indexer, nb_block_indices_indexer); }; auto GetNormal = [&] OPEN3D_DEVICE(index_t xo, index_t yo, index_t zo, index_t curr_block_idx, float* n) { return DeviceGetNormal( tsdf_base_ptr, xo, yo, zo, curr_block_idx, n, resolution, nb_block_masks_indexer, nb_block_indices_indexer); }; // Natural index (0, N) -> (block_idx, voxel_idx) index_t workload_block_idx = widx / resolution3; index_t block_idx = indices_ptr[workload_block_idx]; index_t voxel_idx = widx % resolution3; // block_idx -> (x_block, y_block, z_block) index_t* block_key_ptr = block_keys_indexer.GetDataPtr(block_idx); index_t xb = block_key_ptr[0]; index_t yb = block_key_ptr[1]; index_t zb = block_key_ptr[2]; // voxel_idx -> (x_voxel, y_voxel, z_voxel) index_t xv, yv, zv; voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv); // global coordinate (in voxels) index_t x = xb * resolution + xv; index_t y = yb * resolution + yv; index_t z = zb * resolution + zv; // Obtain voxel's mesh struct ptr index_t* mesh_struct_ptr = mesh_structure_indexer.GetDataPtr( xv, yv, zv, workload_block_idx); // Early quit -- no allocated vertex to compute if (mesh_struct_ptr[0] != -1 && mesh_struct_ptr[1] != -1 && mesh_struct_ptr[2] != -1) { return; } // Obtain voxel ptr index_t linear_idx = resolution3 * block_idx + voxel_idx; float tsdf_o = tsdf_base_ptr[linear_idx]; float no[3] = {0}, ne[3] = {0}; // Get normal at origin GetNormal(xv, yv, zv, workload_block_idx, no); // Enumerate 3 edges in the voxel for (index_t e = 0; e < 3; ++e) { index_t vertex_idx = mesh_struct_ptr[e]; if (vertex_idx != -1) continue; index_t linear_idx_e = GetLinearIdx(xv + (e == 0), yv + (e == 1), zv + (e == 2), workload_block_idx); OPEN3D_ASSERT(linear_idx_e > 0 && "Internal error: GetVoxelAt returns nullptr."); float tsdf_e = tsdf_base_ptr[linear_idx_e]; float ratio = (0 - tsdf_o) / (tsdf_e - tsdf_o); index_t idx = OPEN3D_ATOMIC_ADD(count_ptr, 1); mesh_struct_ptr[e] = idx; float ratio_x = ratio * index_t(e == 0); float ratio_y = ratio * index_t(e == 1); float ratio_z = ratio * index_t(e == 2); float* vertex_ptr = vertex_indexer.GetDataPtr(idx); vertex_ptr[0] = voxel_size * (x + ratio_x); vertex_ptr[1] = voxel_size * (y + ratio_y); vertex_ptr[2] = voxel_size * (z + ratio_z); // Get normal at edge and interpolate float* normal_ptr = normal_indexer.GetDataPtr(idx); GetNormal(xv + (e == 0), yv + (e == 1), zv + (e == 2), workload_block_idx, ne); float nx = (1 - ratio) * no[0] + ratio * ne[0]; float ny = (1 - ratio) * no[1] + ratio * ne[1]; float nz = (1 - ratio) * no[2] + ratio * ne[2]; float norm = static_cast(sqrt(nx * nx + ny * ny + nz * nz) + 1e-5); normal_ptr[0] = nx / norm; normal_ptr[1] = ny / norm; normal_ptr[2] = nz / norm; if (color_base_ptr) { float* color_ptr = color_indexer.GetDataPtr(idx); float r_o = color_base_ptr[linear_idx * 3 + 0]; float g_o = color_base_ptr[linear_idx * 3 + 1]; float b_o = color_base_ptr[linear_idx * 3 + 2]; float r_e = color_base_ptr[linear_idx_e * 3 + 0]; float g_e = color_base_ptr[linear_idx_e * 3 + 1]; float b_e = color_base_ptr[linear_idx_e * 3 + 2]; color_ptr[0] = ((1 - ratio) * r_o + ratio * r_e) / 255.0f; color_ptr[1] = ((1 - ratio) * g_o + ratio * g_e) / 255.0f; color_ptr[2] = ((1 - ratio) * b_o + ratio * b_e) / 255.0f; } } }); // Pass 3: connect vertices and form triangles. index_t triangle_count = vertex_count * 3; triangles = core::Tensor({triangle_count, 3}, core::Int32, device); ArrayIndexer triangle_indexer(triangles, 1); #if defined(__CUDACC__) count = core::Tensor(std::vector{0}, {}, core::Int32, device); count_ptr = count.GetDataPtr(); #else (*count_ptr) = 0; #endif core::ParallelFor(device, n, [=] OPEN3D_DEVICE(index_t widx) { // Natural index (0, N) -> (block_idx, voxel_idx) index_t workload_block_idx = widx / resolution3; index_t voxel_idx = widx % resolution3; // voxel_idx -> (x_voxel, y_voxel, z_voxel) index_t xv, yv, zv; voxel_indexer.WorkloadToCoord(voxel_idx, &xv, &yv, &zv); // Obtain voxel's mesh struct ptr index_t* mesh_struct_ptr = mesh_structure_indexer.GetDataPtr( xv, yv, zv, workload_block_idx); index_t table_idx = mesh_struct_ptr[3]; if (tri_count[table_idx] == 0) return; for (index_t tri = 0; tri < 16; tri += 3) { if (tri_table[table_idx][tri] == -1) return; index_t tri_idx = OPEN3D_ATOMIC_ADD(count_ptr, 1); for (index_t vertex = 0; vertex < 3; ++vertex) { index_t edge = tri_table[table_idx][tri + vertex]; index_t xv_i = xv + edge_shifts[edge][0]; index_t yv_i = yv + edge_shifts[edge][1]; index_t zv_i = zv + edge_shifts[edge][2]; index_t edge_i = edge_shifts[edge][3]; index_t dxb = xv_i / resolution; index_t dyb = yv_i / resolution; index_t dzb = zv_i / resolution; index_t nb_idx = (dxb + 1) + (dyb + 1) * 3 + (dzb + 1) * 9; index_t block_idx_i = *nb_block_indices_indexer.GetDataPtr( workload_block_idx, nb_idx); index_t* mesh_struct_ptr_i = mesh_structure_indexer.GetDataPtr( xv_i - dxb * resolution, yv_i - dyb * resolution, zv_i - dzb * resolution, inv_indices_ptr[block_idx_i]); index_t* triangle_ptr = triangle_indexer.GetDataPtr(tri_idx); triangle_ptr[2 - vertex] = mesh_struct_ptr_i[edge_i]; } } }); #if defined(__CUDACC__) triangle_count = count.Item(); #else triangle_count = (*count_ptr).load(); #endif utility::LogDebug("Total triangle count = {}", triangle_count); triangles = triangles.Slice(0, 0, triangle_count); } } // namespace voxel_grid } // namespace kernel } // namespace geometry } // namespace t } // namespace open3d