offloadable

koide3 · koide3 · commit e24b72ce9af6 · 2025-06-24T19:51:42.000+09:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -165,6 +165,7 @@ add_library(gtsam_points SHARED
   src/gtsam_points/ann/incremental_covariance_voxelmap.cpp
   src/gtsam_points/ann/fast_occupancy_grid.cpp
   # types
+  src/gtsam_points/types/offloadable.cpp
   src/gtsam_points/types/point_cloud.cpp
   src/gtsam_points/types/point_cloud_cpu.cpp
   src/gtsam_points/types/point_cloud_cpu_funcs.cpp
diff --git a/include/gtsam_points/types/gaussian_voxelmap_gpu.hpp b/include/gtsam_points/types/gaussian_voxelmap_gpu.hpp
@@ -6,6 +6,7 @@
 #include <memory>
 #include <Eigen/Core>
 
+#include <gtsam_points/types/offloadable.hpp>
 #include <gtsam_points/types/gaussian_voxelmap.hpp>
 
 // forward declaration
@@ -34,7 +35,7 @@ struct VoxelBucket {
 /**
  * @brief Gaussian distribution voxelmap on GPU
  */
-class GaussianVoxelMapGPU : public GaussianVoxelMap {
+class GaussianVoxelMapGPU : public GaussianVoxelMap, public OffloadableGPU {
 public:
   using Ptr = std::shared_ptr<GaussianVoxelMapGPU>;
   using ConstPtr = std::shared_ptr<const GaussianVoxelMapGPU>;
@@ -75,11 +76,10 @@ class GaussianVoxelMapGPU : public GaussianVoxelMap {
   static GaussianVoxelMapGPU::Ptr load(const std::string& path);
 
   // GPU memory offloading
-  std::uint64_t last_accessed_time() const { return last_access; }
-
-  bool touch(CUstream_st* stream = 0);
-  bool offload_gpu(CUstream_st* stream = 0);
-  bool reload_gpu(CUstream_st* stream = 0);
+  size_t memory_usage_gpu() const override;
+  bool loaded_on_gpu() const override;
+  bool offload_gpu(CUstream_st* stream = 0) override;
+  bool reload_gpu(CUstream_st* stream = 0) override;
 
 private:
   void create_bucket_table(CUstream_st* stream, const PointCloud& frame);
@@ -100,8 +100,6 @@ class GaussianVoxelMapGPU : public GaussianVoxelMap {
   Eigen::Matrix3f* voxel_covs;   ///< Voxel covariances
 
   // GPU memory offloading
-  std::uint64_t last_access;
-
   std::vector<VoxelBucket> offloaded_buckets;          ///< Offloaded buckets
   std::vector<int> offloaded_num_points;               ///< Offloaded number of points
   std::vector<Eigen::Vector3f> offloaded_voxel_means;  ///< Offloaded voxel means
diff --git a/include/gtsam_points/types/offloadable.hpp b/include/gtsam_points/types/offloadable.hpp
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <memory>
 #include <vector>
 #include <atomic>
 #include <cstdint>
@@ -10,26 +11,43 @@ struct CUstream_st;
 namespace gtsam_points {
 
 /**
- * @brief An interface class for offloading data on the GPU memory.
+ * @brief An interface class for offloading data from GPU to CPU.
  */
 class OffloadableGPU {
 public:
+  using Ptr = std::shared_ptr<OffloadableGPU>;
+  using ConstPtr = std::shared_ptr<const OffloadableGPU>;
+
   OffloadableGPU();
   virtual ~OffloadableGPU();
 
-  // GPU memory offloading
+  /// @brief Current global access time counter
   static std::uint64_t current_access_time();
+
+  /// @brief Time of the last access to this object
   std::uint64_t last_accessed_time() const;
 
+  /// @brief Memory usage in bytes on the GPU
   virtual size_t memory_usage_gpu() const { return 0; }
 
-  virtual bool touch(CUstream_st* stream = 0) = 0;
+  /// @brief Check if the data is loaded on the GPU
+  virtual bool loaded_on_gpu() const = 0;
+
+  /// @brief Reload data from CPU to GPU (if necessary) and update the last access time
+  /// @return true if the data offload is conducted, false if the data is already on the CPU
+  virtual bool touch(CUstream_st* stream = 0);
+
+  /// @brief Offload data from GPU to CPU
+  /// @return true if the data offload is conducted, false if the data is already on the CPU
   virtual bool offload_gpu(CUstream_st* stream = 0) = 0;
+
+  /// @brief  Reload data from CPU to GPU
+  /// @return true if the data upload is conducted, false if the data is already on the GPU
   virtual bool reload_gpu(CUstream_st* stream = 0) = 0;
 
-private:
-  static std::atomic_uint64_t access_counter;  ///< Counter for the last access time
-  std::uint64_t last_access;
+protected:
+  static std::atomic_uint64_t access_counter;  ///< Global counter for the last access time
+  std::uint64_t last_access;                   ///< Last access time of this object
 };
 
 }  // namespace gtsam_points
diff --git a/include/gtsam_points/types/point_cloud_cpu.hpp b/include/gtsam_points/types/point_cloud_cpu.hpp
@@ -94,6 +94,9 @@ struct PointCloudCPU : public PointCloud {
 
   static PointCloudCPU::Ptr load(const std::string& path);
 
+  /// @brief Memory usage in bytes
+  size_t memory_usage() const;
+
 public:
   std::vector<double> times_storage;
   std::vector<Eigen::Vector4d> points_storage;
diff --git a/include/gtsam_points/types/point_cloud_gpu.hpp b/include/gtsam_points/types/point_cloud_gpu.hpp
@@ -129,10 +129,10 @@ struct PointCloudGPU : public PointCloudCPU, public OffloadableGPU {
   void download_points(CUstream_st* stream = 0);
 
   // GPU memory offloading
-  virtual size_t memory_usage_gpu() const override;
-
-  bool offload_gpu(CUstream_st* stream = 0);
-  bool reload_gpu(CUstream_st* stream = 0);
+  size_t memory_usage_gpu() const override;
+  bool loaded_on_gpu() const override;
+  bool offload_gpu(CUstream_st* stream = 0) override;
+  bool reload_gpu(CUstream_st* stream = 0) override;
 };
 
 // Device to host data transfer
diff --git a/src/gtsam_points/types/gaussian_voxelmap_gpu.cu b/src/gtsam_points/types/gaussian_voxelmap_gpu.cu
@@ -171,8 +171,7 @@ GaussianVoxelMapGPU::GaussianVoxelMapGPU(
   CUstream_st* stream)
 : stream(stream),
   init_num_buckets(init_num_buckets),
-  target_points_drop_rate(target_points_drop_rate),
-  last_access(0) {
+  target_points_drop_rate(target_points_drop_rate) {
   voxelmap_info.num_voxels = 0;
   voxelmap_info.num_buckets = init_num_buckets;
   voxelmap_info.max_bucket_scan_count = max_bucket_scan_count;
@@ -439,9 +438,13 @@ GaussianVoxelMapGPU::Ptr GaussianVoxelMapGPU::load(const std::string& path) {
   return voxelmap;
 }
 
-bool GaussianVoxelMapGPU::touch(CUstream_st* stream) {
-  last_access = PointCloudGPU::current_access_time();
-  return reload_gpu(stream);
+size_t GaussianVoxelMapGPU::memory_usage_gpu() const {
+  return voxelmap_info.num_voxels * (sizeof(int) + sizeof(Eigen::Vector3f) + sizeof(Eigen::Matrix3f)) +
+         voxelmap_info.num_buckets * sizeof(gtsam_points::VoxelBucket);
+}
+
+bool GaussianVoxelMapGPU::loaded_on_gpu() const {
+  return buckets;
 }
 
 bool GaussianVoxelMapGPU::offload_gpu(CUstream_st* stream) {
diff --git a/src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu b/src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu
@@ -19,6 +19,25 @@ namespace gtsam_points {
 
 namespace {
 
+void make_sure_loaded_on_gpu(const GaussianVoxelMapGPU::ConstPtr& target_gpu, CUstream_st* stream) {
+  if (!target_gpu->loaded_on_gpu()) {
+    // A bit hacky, but we need to ensure that the target voxelmap is loaded on GPU
+    const_cast<GaussianVoxelMapGPU*>(target_gpu.get())->touch(stream);
+  }
+}
+
+void make_sure_loaded_on_gpu(const PointCloud::ConstPtr& source, CUstream_st* stream) {
+  auto source_gpu = std::dynamic_pointer_cast<const PointCloudGPU>(source);
+  if (!source_gpu) {
+    std::cerr << "error: Source point cloud is not a PointCloudGPU!!" << std::endl;
+    abort();
+  }
+
+  if (!source_gpu->has_points_gpu()) {
+    const_cast<PointCloudGPU*>(source_gpu.get())->touch(stream);
+  }
+}
+
 struct transform_means_kernel {
   transform_means_kernel(const thrust::device_ptr<const Eigen::Isometry3f>& transform_ptr) : transform_ptr(transform_ptr) {}
 
@@ -167,6 +186,9 @@ overlap_gpu(const GaussianVoxelMap::ConstPtr& target_, const PointCloud::ConstPt
     abort();
   }
 
+  make_sure_loaded_on_gpu(target, stream);
+  make_sure_loaded_on_gpu(source, stream);
+
   bool* overlap;
   check_error << cudaMallocAsync(&overlap, sizeof(bool) * source->size(), stream);
   thrust::device_ptr<bool> overlap_ptr(overlap);
@@ -210,6 +232,9 @@ overlap_gpu(const GaussianVoxelMap::ConstPtr& target_, const PointCloud::ConstPt
     abort();
   }
 
+  make_sure_loaded_on_gpu(target, stream);
+  make_sure_loaded_on_gpu(source, stream);
+
   Eigen::Isometry3f h_delta = delta.cast<float>();
   Eigen::Isometry3f* d_delta;
   check_error << cudaMallocAsync(&d_delta, sizeof(Eigen::Isometry3f), stream);
@@ -237,7 +262,10 @@ double overlap_gpu(
     if (!targets[i]) {
       std::cerr << "error: Failed to cast target voxelmap to GaussianVoxelMapGPU!!" << std::endl;
     }
+
+    make_sure_loaded_on_gpu(targets[i], stream);
   }
+  make_sure_loaded_on_gpu(source, stream);
 
   std::vector<Eigen::Isometry3f> h_deltas(deltas_.size());
   std::transform(deltas_.begin(), deltas_.end(), h_deltas.begin(), [](const Eigen::Isometry3d& delta) { return delta.cast<float>(); });
@@ -304,9 +332,8 @@ std::vector<double> overlap_gpu(
       std::cerr << "error: Failed to cast target voxelmap to GaussianVoxelMapGPU!!" << std::endl;
     }
 
-    if (!sources[i]->has_points_gpu()) {
-      std::cerr << "error: GPU source points have not been allocated!!" << std::endl;
-    }
+    make_sure_loaded_on_gpu(targets[i], stream);
+    make_sure_loaded_on_gpu(sources[i], stream);
 
     max_num_points = std::max(max_num_points, sources[i]->size());
   }
diff --git a/src/gtsam_points/types/offloadable.cpp b/src/gtsam_points/types/offloadable.cpp
@@ -4,7 +4,7 @@ namespace gtsam_points {
 
 std::atomic_uint64_t OffloadableGPU::access_counter(0);  ///< Counter for the last access time
 
-OffloadableGPU::OffloadableGPU() : last_access(0) {}
+OffloadableGPU::OffloadableGPU() : last_access(access_counter.load()) {}
 
 OffloadableGPU::~OffloadableGPU() {}
 
diff --git a/src/gtsam_points/types/point_cloud_gpu.cu b/src/gtsam_points/types/point_cloud_gpu.cu
@@ -11,12 +11,9 @@
 
 namespace gtsam_points {
 
-std::atomic_uint64_t PointCloudGPU::access_time_counter(0);
-
 // constructor with points
 template <typename T, int D>
-PointCloudGPU::PointCloudGPU(const Eigen::Matrix<T, D, 1>* points, int num_points) : PointCloudCPU(points, num_points),
-                                                                                     last_access(0) {
+PointCloudGPU::PointCloudGPU(const Eigen::Matrix<T, D, 1>* points, int num_points) : PointCloudCPU(points, num_points) {
   add_points_gpu(points, num_points);
 }
 
@@ -64,7 +61,7 @@ PointCloudGPU::Ptr PointCloudGPU::clone(const PointCloud& frame, CUstream_st* st
   return new_frame;
 }
 
-PointCloudGPU::PointCloudGPU() : last_access(0) {}
+PointCloudGPU::PointCloudGPU() {}
 
 PointCloudGPU::~PointCloudGPU() {
   if (times_gpu) {
@@ -280,31 +277,30 @@ std::vector<float> download_times_gpu(const gtsam_points::PointCloud& frame, CUs
   return times;
 }
 
-bool PointCloudGPU::touch(CUstream_st* stream) {
-  last_access = (access_time_counter++);
-  return reload_gpu(stream);
-}
-
 size_t PointCloudGPU::memory_usage_gpu() const {
   size_t bytes = 0;
-  if (times_gpu) {
+  if (times) {
     bytes += sizeof(float) * num_points;
   }
-  if (points_gpu) {
+  if (points) {
     bytes += sizeof(Eigen::Vector3f) * num_points;
   }
-  if (normals_gpu) {
+  if (normals) {
     bytes += sizeof(Eigen::Vector3f) * num_points;
   }
-  if (covs_gpu) {
+  if (covs) {
     bytes += sizeof(Eigen::Matrix3f) * num_points;
   }
-  if (intensities_gpu) {
+  if (intensities) {
     bytes += sizeof(float) * num_points;
   }
   return bytes;
 }
 
+bool PointCloudGPU::loaded_on_gpu() const {
+  return points_gpu || times_gpu || normals_gpu || covs_gpu || intensities_gpu;
+}
+
 bool PointCloudGPU::offload_gpu(CUstream_st* stream) {
   if (!points_gpu && !times_gpu && !normals_gpu && !covs_gpu && !intensities_gpu) {
     return false;  // Nothing to offload