image_framework_ymj/include/open3d/t/geometry/Image.h
2024-12-06 16:25:16 +08:00

346 lines
14 KiB
C++
Executable File

// ----------------------------------------------------------------------------
// - Open3D: www.open3d.org -
// ----------------------------------------------------------------------------
// Copyright (c) 2018-2023 www.open3d.org
// SPDX-License-Identifier: MIT
// ----------------------------------------------------------------------------
#pragma once
#include <limits>
#include <memory>
#include <string>
#include <vector>
#include "open3d/core/Dtype.h"
#include "open3d/core/Tensor.h"
#include "open3d/core/kernel/UnaryEW.h"
#include "open3d/geometry/Image.h"
#include "open3d/t/geometry/Geometry.h"
namespace open3d {
namespace t {
namespace geometry {
/// \class Image
///
/// \brief The Image class stores image with customizable rows, cols, channels,
/// dtype and device.
class Image : public Geometry {
public:
/// \brief Constructor for image.
///
/// Row-major storage is used, similar to OpenCV. Use (row, col, channel)
/// indexing order for image creation and accessing. In general, (r, c, ch)
/// are the preferred variable names for consistency, and avoid using width,
/// height, u, v, x, y for coordinates.
///
/// \param rows Number of rows of the image, i.e. image height. \p rows must
/// be non-negative.
/// \param cols Number of columns of the image, i.e. image width. \p cols
/// must be non-negative.
/// \param channels Number of channels of the image. E.g. for RGB image,
/// channels == 3; for grayscale image, channels == 1. \p channels must be
/// greater than 0.
/// \param dtype Data type of the image.
/// \param device Device where the image is stored.
Image(int64_t rows = 0,
int64_t cols = 0,
int64_t channels = 1,
core::Dtype dtype = core::Float32,
const core::Device &device = core::Device("CPU:0"));
/// \brief Construct from a tensor. The tensor won't be copied and memory
/// will be shared.
///
/// \param tensor: Tensor of the image. The tensor must be contiguous. The
/// tensor must be 2D (rows, cols) or 3D (rows, cols, channels).
Image(const core::Tensor &tensor);
virtual ~Image() override {}
public:
/// \brief Clear image contents by resetting the rows and cols to 0, while
/// keeping channels, dtype and device unchanged.
Image &Clear() override {
data_ = core::Tensor({0, 0, GetChannels()}, GetDtype(), GetDevice());
return *this;
}
/// \brief Returns true if rows * cols * channels == 0.
bool IsEmpty() const override {
return GetRows() * GetCols() * GetChannels() == 0;
}
/// \brief Reinitialize image with new parameters.
Image &Reset(int64_t rows = 0,
int64_t cols = 0,
int64_t channels = 1,
core::Dtype dtype = core::Float32,
const core::Device &device = core::Device("CPU:0"));
public:
/// \brief Get the number of rows of the image.
int64_t GetRows() const { return data_.GetShape()[0]; }
/// \brief Get the number of columns of the image.
int64_t GetCols() const { return data_.GetShape()[1]; }
/// \brief Get the number of channels of the image.
int64_t GetChannels() const { return data_.GetShape()[2]; }
/// \brief Get dtype of the image.
core::Dtype GetDtype() const { return data_.GetDtype(); }
/// \brief Get device of the image.
core::Device GetDevice() const override { return data_.GetDevice(); }
/// \brief Get pixel(s) in the image.
///
/// If channels == 1, returns a tensor with shape {}, otherwise returns a
/// tensor with shape {channels,}. The returned tensor is a slice of the
/// image's tensor, so when modifying the slice, the original tensor will
/// also be modified.
core::Tensor At(int64_t r, int64_t c) const {
if (GetChannels() == 1) {
return data_[r][c][0];
} else {
return data_[r][c];
}
}
/// \brief Get pixel(s) in the image. Returns a tensor with shape {}.
core::Tensor At(int64_t r, int64_t c, int64_t ch) const {
return data_[r][c][ch];
}
/// \brief Get raw buffer of the Image data.
void *GetDataPtr() { return data_.GetDataPtr(); }
/// \brief Get raw buffer of the Image data.
const void *GetDataPtr() const { return data_.GetDataPtr(); }
/// \brief Returns the underlying Tensor of the Image.
core::Tensor AsTensor() const { return data_; }
/// \brief Transfer the image to a specified device.
///
/// \param device The targeted device to convert to.
/// \param copy If true, a new image is always created; if false, the
/// copy is avoided when the original image is already on the targeted
/// device.
Image To(const core::Device &device, bool copy = false) const {
return Image(data_.To(device, copy));
}
/// \brief Returns copy of the image on the same device.
Image Clone() const { return To(GetDevice(), /*copy=*/true); }
/// \brief Returns an Image with the specified \p dtype.
///
/// \param dtype The targeted dtype to convert to.
/// \param copy If true, a new tensor is always created; if false, the copy
/// is avoided when the original tensor already has the targeted dtype.
/// \param scale Optional scale value. This is 1./255 for UInt8 ->
/// Float{32,64}, 1./65535 for UInt16 -> Float{32,64} and 1 otherwise
/// \param offset Optional shift value. Default 0.
Image To(core::Dtype dtype,
bool copy = false,
utility::optional<double> scale = utility::nullopt,
double offset = 0.0) const;
/// \brief Function to linearly transform pixel intensities in place.
///
/// \f$image = scale * image + offset\f$.
///
/// \param scale First multiply image pixel values with this factor. This
/// should be positive for unsigned dtypes.
/// \param offset Then add this factor to all image pixel values.
///
/// \return Reference to self.
Image &LinearTransform(double scale = 1.0, double offset = 0.0) {
To(GetDtype(), false, scale, offset);
return *this;
}
/// \brief Converts a 3-channel RGB image to a new 1-channel Grayscale image
///
/// Uses formula \f$I = 0.299 * R + 0.587 * G + 0.114 * B\f$.
Image RGBToGray() const;
/// Image interpolation algorithms.
enum class InterpType {
Nearest = 0, ///< Nearest neighbors interpolation.
Linear = 1, ///< Bilinear interpolation.
Cubic = 2, ///< Bicubic interpolation.
Lanczos = 3, ///< Lanczos filter interpolation.
Super = 4 ///< Super sampling interpolation (only downsample).
};
/// \brief Return a new image after resizing with specified interpolation
/// type.
///
/// Downsample if sampling rate is < 1. Upsample if sampling rate > 1.
/// Aspect ratio is always preserved.
Image Resize(float sampling_rate = 0.5f,
InterpType interp_type = InterpType::Nearest) const;
/// \brief Return a new image after performing morphological dilation.
///
/// Supported datatypes are UInt8, UInt16 and Float32 with {1, 3, 4}
/// channels. An 8-connected neighborhood is used to create the dilation
/// mask.
///
/// \param kernel_size An odd number >= 3.
Image Dilate(int kernel_size = 3) const;
/// \brief Return a new image after filtering with the given kernel.
Image Filter(const core::Tensor &kernel) const;
/// \brief Return a new image after bilateral filtering.
///
/// \param value_sigma Standard deviation for the image content.
/// \param distance_sigma Standard deviation for the image pixel positions.
///
/// Note: CPU (IPP) and CUDA (NPP) versions use different algorithms and
/// will give different results:\n
/// CPU uses a round kernel (radius = floor(kernel_size / 2)),\n
/// while CUDA uses a square kernel (width = kernel_size).\n
/// Make sure to tune parameters accordingly.
Image FilterBilateral(int kernel_size = 3,
float value_sigma = 20.0f,
float distance_sigma = 10.0f) const;
/// \brief Return a new image after Gaussian filtering.
///
/// \param kernel_size Odd numbers >= 3 are supported.
/// \param sigma Standard deviation of the Gaussian distribution.
Image FilterGaussian(int kernel_size = 3, float sigma = 1.0f) const;
/// \brief Return a pair of new gradient images (dx, dy) after Sobel
/// filtering.
///
/// \param kernel_size: Sobel filter kernel size, either 3 or 5.
std::pair<Image, Image> FilterSobel(int kernel_size = 3) const;
/// \brief Return a new downsampled image with pyramid downsampling.
///
/// The returned image is formed by a chained Gaussian filter (kernel_size =
/// 5, sigma = 1.0) and a resize (ratio = 0.5) operation.
///
/// \returns Half sized downsampled depth image.
Image PyrDown() const;
/// \brief Edge and invalid value preserving downsampling by 2 specifically
/// for depth images.
///
/// Only 1 channel Float32 images are supported. The returned image is
/// formed by a chained Gaussian filter (kernel_size = 5, sigma = 1.0) and a
/// resize (ratio = 0.5) operation.
///
/// \param diff_threshold The Gaussian filter averaging ignores neighboring
/// values if the depth difference is larger than this value.
/// \param invalid_fill The Gaussian filter ignores these values (may be
/// specified as NAN, INFINITY or 0.0 (default)).
///
/// \returns Half sized downsampled Float32 depth image.
Image PyrDownDepth(float diff_threshold, float invalid_fill = 0.f) const;
/// \brief Return new image after scaling and clipping image values.
///
/// This is typically used for preprocessing a depth image. Images of shape
/// (rows, cols, channels=1) and Dtypes UInt16 and Float32 are supported.
/// Each pixel will be transformed by
/// - x = x / \p scale
/// - x = x < \p min_value ? \p clip_fill : x
/// - x = x > \p max_value ? \p clip_fill : x
///
/// Use INFINITY, NAN or 0.0 (default) for \p clip_fill.
/// \return Transformed image of type Float32, with out-of-range pixels
/// clipped and assigned the \p clip_fill value.
Image ClipTransform(float scale,
float min_value,
float max_value,
float clip_fill = 0.0f) const;
/// \brief Create a vertex map from a depth image using unprojection.
///
/// The input depth (of shape (rows, cols, channels=1) and Dtype Float32) is
/// expected to be the output of ClipTransform.
///
/// \param intrinsics Pinhole camera model of (3, 3) in Float64.
/// \param invalid_fill Value to fill in for invalid depths. Use NAN,
/// INFINITY or 0.0 (default). Must be consistent with \p clip_fill in
/// ClipTransform.
///
/// \returns Vertex map of shape (rows, cols, channels=3) and Dtype Float32,
/// with invalid points assigned the \p invalid_fill value.
Image CreateVertexMap(const core::Tensor &intrinsics,
float invalid_fill = 0.0f);
/// \brief Create a normal map from a vertex map.
///
/// The input vertex map image should be of shape (rows, cols, channels=3)
/// and Dtype Float32. This uses a cross product of \f$V(r, c+1)-V(r, c)\f$
/// and \f$V(r+1, c)-V(r, c)\f$. The input vertex map is expected to be the
/// output of CreateVertexMap. You may need to start with a filtered depth
/// image (e.g. with FilterBilateral) to obtain good results.
///
/// \param invalid_fill Value to fill in for invalid points, and to fill-in
/// if no valid neighbor is found. Use NAN, INFINITY or 0.0 (default). Must
/// be consistent with \p clip_fill in CreateVertexMap.
///
/// \returns Normal map of shape (rows, cols, channels=3) and Dtype Float32,
/// with invalid normals assigned the \p invalid_fill value.
Image CreateNormalMap(float invalid_fill = 0.0f);
/// \brief Colorize an input depth image (with Dtype UInt16 or Float32).
///
/// The image values are divided by scale, then clamped within [min_value,
/// max_value] and finally converted to an RGB image using the Turbo
/// colormap as a lookup table.
///
/// \returns Full color depth map of shape (rows, cols, channels=3) and
/// Dtype UInt8.
Image ColorizeDepth(float scale, float min_value, float max_value);
/// \brief Compute min 2D coordinates for the data (always {0, 0}).
core::Tensor GetMinBound() const {
return core::Tensor::Zeros({2}, core::Int64);
}
/// \brief Compute max 2D coordinates for the data ({rows, cols}).
core::Tensor GetMaxBound() const {
return core::Tensor(std::vector<int64_t>{GetRows(), GetCols()}, {2},
core::Int64);
}
/// \brief Create from a legacy Open3D Image.
static Image FromLegacy(const open3d::geometry::Image &image_legacy,
const core::Device &Device = core::Device("CPU:0"));
/// \brief Convert to legacy Image type.
open3d::geometry::Image ToLegacy() const;
/// \brief Text description.
std::string ToString() const;
/// Do we use IPP ICV for accelerating image processing operations?
#ifdef WITH_IPPICV
static constexpr bool HAVE_IPPICV = true;
#else
static constexpr bool HAVE_IPPICV = false;
#endif
protected:
/// Internal data of the Image, represented as a contiguous 3D tensor of
/// shape {rows, cols, channels}. Image properties can be obtained from the
/// tensor.
core::Tensor data_;
};
} // namespace geometry
} // namespace t
} // namespace open3d