Skip to content

Commit e24b72c

Browse files
committed
offloadable
1 parent c3aed36 commit e24b72c

File tree

9 files changed

+88
-42
lines changed

9 files changed

+88
-42
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,7 @@ add_library(gtsam_points SHARED
165165
src/gtsam_points/ann/incremental_covariance_voxelmap.cpp
166166
src/gtsam_points/ann/fast_occupancy_grid.cpp
167167
# types
168+
src/gtsam_points/types/offloadable.cpp
168169
src/gtsam_points/types/point_cloud.cpp
169170
src/gtsam_points/types/point_cloud_cpu.cpp
170171
src/gtsam_points/types/point_cloud_cpu_funcs.cpp

include/gtsam_points/types/gaussian_voxelmap_gpu.hpp

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <memory>
77
#include <Eigen/Core>
88

9+
#include <gtsam_points/types/offloadable.hpp>
910
#include <gtsam_points/types/gaussian_voxelmap.hpp>
1011

1112
// forward declaration
@@ -34,7 +35,7 @@ struct VoxelBucket {
3435
/**
3536
* @brief Gaussian distribution voxelmap on GPU
3637
*/
37-
class GaussianVoxelMapGPU : public GaussianVoxelMap {
38+
class GaussianVoxelMapGPU : public GaussianVoxelMap, public OffloadableGPU {
3839
public:
3940
using Ptr = std::shared_ptr<GaussianVoxelMapGPU>;
4041
using ConstPtr = std::shared_ptr<const GaussianVoxelMapGPU>;
@@ -75,11 +76,10 @@ class GaussianVoxelMapGPU : public GaussianVoxelMap {
7576
static GaussianVoxelMapGPU::Ptr load(const std::string& path);
7677

7778
// GPU memory offloading
78-
std::uint64_t last_accessed_time() const { return last_access; }
79-
80-
bool touch(CUstream_st* stream = 0);
81-
bool offload_gpu(CUstream_st* stream = 0);
82-
bool reload_gpu(CUstream_st* stream = 0);
79+
size_t memory_usage_gpu() const override;
80+
bool loaded_on_gpu() const override;
81+
bool offload_gpu(CUstream_st* stream = 0) override;
82+
bool reload_gpu(CUstream_st* stream = 0) override;
8383

8484
private:
8585
void create_bucket_table(CUstream_st* stream, const PointCloud& frame);
@@ -100,8 +100,6 @@ class GaussianVoxelMapGPU : public GaussianVoxelMap {
100100
Eigen::Matrix3f* voxel_covs; ///< Voxel covariances
101101

102102
// GPU memory offloading
103-
std::uint64_t last_access;
104-
105103
std::vector<VoxelBucket> offloaded_buckets; ///< Offloaded buckets
106104
std::vector<int> offloaded_num_points; ///< Offloaded number of points
107105
std::vector<Eigen::Vector3f> offloaded_voxel_means; ///< Offloaded voxel means
Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#pragma once
22

3+
#include <memory>
34
#include <vector>
45
#include <atomic>
56
#include <cstdint>
@@ -10,26 +11,43 @@ struct CUstream_st;
1011
namespace gtsam_points {
1112

1213
/**
13-
* @brief An interface class for offloading data on the GPU memory.
14+
* @brief An interface class for offloading data from GPU to CPU.
1415
*/
1516
class OffloadableGPU {
1617
public:
18+
using Ptr = std::shared_ptr<OffloadableGPU>;
19+
using ConstPtr = std::shared_ptr<const OffloadableGPU>;
20+
1721
OffloadableGPU();
1822
virtual ~OffloadableGPU();
1923

20-
// GPU memory offloading
24+
/// @brief Current global access time counter
2125
static std::uint64_t current_access_time();
26+
27+
/// @brief Time of the last access to this object
2228
std::uint64_t last_accessed_time() const;
2329

30+
/// @brief Memory usage in bytes on the GPU
2431
virtual size_t memory_usage_gpu() const { return 0; }
2532

26-
virtual bool touch(CUstream_st* stream = 0) = 0;
33+
/// @brief Check if the data is loaded on the GPU
34+
virtual bool loaded_on_gpu() const = 0;
35+
36+
/// @brief Reload data from CPU to GPU (if necessary) and update the last access time
37+
/// @return true if the data offload is conducted, false if the data is already on the CPU
38+
virtual bool touch(CUstream_st* stream = 0);
39+
40+
/// @brief Offload data from GPU to CPU
41+
/// @return true if the data offload is conducted, false if the data is already on the CPU
2742
virtual bool offload_gpu(CUstream_st* stream = 0) = 0;
43+
44+
/// @brief Reload data from CPU to GPU
45+
/// @return true if the data upload is conducted, false if the data is already on the GPU
2846
virtual bool reload_gpu(CUstream_st* stream = 0) = 0;
2947

30-
private:
31-
static std::atomic_uint64_t access_counter; ///< Counter for the last access time
32-
std::uint64_t last_access;
48+
protected:
49+
static std::atomic_uint64_t access_counter; ///< Global counter for the last access time
50+
std::uint64_t last_access; ///< Last access time of this object
3351
};
3452

3553
} // namespace gtsam_points

include/gtsam_points/types/point_cloud_cpu.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ struct PointCloudCPU : public PointCloud {
9494

9595
static PointCloudCPU::Ptr load(const std::string& path);
9696

97+
/// @brief Memory usage in bytes
98+
size_t memory_usage() const;
99+
97100
public:
98101
std::vector<double> times_storage;
99102
std::vector<Eigen::Vector4d> points_storage;

include/gtsam_points/types/point_cloud_gpu.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,10 @@ struct PointCloudGPU : public PointCloudCPU, public OffloadableGPU {
129129
void download_points(CUstream_st* stream = 0);
130130

131131
// GPU memory offloading
132-
virtual size_t memory_usage_gpu() const override;
133-
134-
bool offload_gpu(CUstream_st* stream = 0);
135-
bool reload_gpu(CUstream_st* stream = 0);
132+
size_t memory_usage_gpu() const override;
133+
bool loaded_on_gpu() const override;
134+
bool offload_gpu(CUstream_st* stream = 0) override;
135+
bool reload_gpu(CUstream_st* stream = 0) override;
136136
};
137137

138138
// Device to host data transfer

src/gtsam_points/types/gaussian_voxelmap_gpu.cu

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,7 @@ GaussianVoxelMapGPU::GaussianVoxelMapGPU(
171171
CUstream_st* stream)
172172
: stream(stream),
173173
init_num_buckets(init_num_buckets),
174-
target_points_drop_rate(target_points_drop_rate),
175-
last_access(0) {
174+
target_points_drop_rate(target_points_drop_rate) {
176175
voxelmap_info.num_voxels = 0;
177176
voxelmap_info.num_buckets = init_num_buckets;
178177
voxelmap_info.max_bucket_scan_count = max_bucket_scan_count;
@@ -439,9 +438,13 @@ GaussianVoxelMapGPU::Ptr GaussianVoxelMapGPU::load(const std::string& path) {
439438
return voxelmap;
440439
}
441440

442-
bool GaussianVoxelMapGPU::touch(CUstream_st* stream) {
443-
last_access = PointCloudGPU::current_access_time();
444-
return reload_gpu(stream);
441+
size_t GaussianVoxelMapGPU::memory_usage_gpu() const {
442+
return voxelmap_info.num_voxels * (sizeof(int) + sizeof(Eigen::Vector3f) + sizeof(Eigen::Matrix3f)) +
443+
voxelmap_info.num_buckets * sizeof(gtsam_points::VoxelBucket);
444+
}
445+
446+
bool GaussianVoxelMapGPU::loaded_on_gpu() const {
447+
return buckets;
445448
}
446449

447450
bool GaussianVoxelMapGPU::offload_gpu(CUstream_st* stream) {

src/gtsam_points/types/gaussian_voxelmap_gpu_funcs.cu

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,25 @@ namespace gtsam_points {
1919

2020
namespace {
2121

22+
void make_sure_loaded_on_gpu(const GaussianVoxelMapGPU::ConstPtr& target_gpu, CUstream_st* stream) {
23+
if (!target_gpu->loaded_on_gpu()) {
24+
// A bit hacky, but we need to ensure that the target voxelmap is loaded on GPU
25+
const_cast<GaussianVoxelMapGPU*>(target_gpu.get())->touch(stream);
26+
}
27+
}
28+
29+
void make_sure_loaded_on_gpu(const PointCloud::ConstPtr& source, CUstream_st* stream) {
30+
auto source_gpu = std::dynamic_pointer_cast<const PointCloudGPU>(source);
31+
if (!source_gpu) {
32+
std::cerr << "error: Source point cloud is not a PointCloudGPU!!" << std::endl;
33+
abort();
34+
}
35+
36+
if (!source_gpu->has_points_gpu()) {
37+
const_cast<PointCloudGPU*>(source_gpu.get())->touch(stream);
38+
}
39+
}
40+
2241
struct transform_means_kernel {
2342
transform_means_kernel(const thrust::device_ptr<const Eigen::Isometry3f>& transform_ptr) : transform_ptr(transform_ptr) {}
2443

@@ -167,6 +186,9 @@ overlap_gpu(const GaussianVoxelMap::ConstPtr& target_, const PointCloud::ConstPt
167186
abort();
168187
}
169188

189+
make_sure_loaded_on_gpu(target, stream);
190+
make_sure_loaded_on_gpu(source, stream);
191+
170192
bool* overlap;
171193
check_error << cudaMallocAsync(&overlap, sizeof(bool) * source->size(), stream);
172194
thrust::device_ptr<bool> overlap_ptr(overlap);
@@ -210,6 +232,9 @@ overlap_gpu(const GaussianVoxelMap::ConstPtr& target_, const PointCloud::ConstPt
210232
abort();
211233
}
212234

235+
make_sure_loaded_on_gpu(target, stream);
236+
make_sure_loaded_on_gpu(source, stream);
237+
213238
Eigen::Isometry3f h_delta = delta.cast<float>();
214239
Eigen::Isometry3f* d_delta;
215240
check_error << cudaMallocAsync(&d_delta, sizeof(Eigen::Isometry3f), stream);
@@ -237,7 +262,10 @@ double overlap_gpu(
237262
if (!targets[i]) {
238263
std::cerr << "error: Failed to cast target voxelmap to GaussianVoxelMapGPU!!" << std::endl;
239264
}
265+
266+
make_sure_loaded_on_gpu(targets[i], stream);
240267
}
268+
make_sure_loaded_on_gpu(source, stream);
241269

242270
std::vector<Eigen::Isometry3f> h_deltas(deltas_.size());
243271
std::transform(deltas_.begin(), deltas_.end(), h_deltas.begin(), [](const Eigen::Isometry3d& delta) { return delta.cast<float>(); });
@@ -304,9 +332,8 @@ std::vector<double> overlap_gpu(
304332
std::cerr << "error: Failed to cast target voxelmap to GaussianVoxelMapGPU!!" << std::endl;
305333
}
306334

307-
if (!sources[i]->has_points_gpu()) {
308-
std::cerr << "error: GPU source points have not been allocated!!" << std::endl;
309-
}
335+
make_sure_loaded_on_gpu(targets[i], stream);
336+
make_sure_loaded_on_gpu(sources[i], stream);
310337

311338
max_num_points = std::max(max_num_points, sources[i]->size());
312339
}

src/gtsam_points/types/offloadable.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ namespace gtsam_points {
44

55
std::atomic_uint64_t OffloadableGPU::access_counter(0); ///< Counter for the last access time
66

7-
OffloadableGPU::OffloadableGPU() : last_access(0) {}
7+
OffloadableGPU::OffloadableGPU() : last_access(access_counter.load()) {}
88

99
OffloadableGPU::~OffloadableGPU() {}
1010

src/gtsam_points/types/point_cloud_gpu.cu

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,9 @@
1111

1212
namespace gtsam_points {
1313

14-
std::atomic_uint64_t PointCloudGPU::access_time_counter(0);
15-
1614
// constructor with points
1715
template <typename T, int D>
18-
PointCloudGPU::PointCloudGPU(const Eigen::Matrix<T, D, 1>* points, int num_points) : PointCloudCPU(points, num_points),
19-
last_access(0) {
16+
PointCloudGPU::PointCloudGPU(const Eigen::Matrix<T, D, 1>* points, int num_points) : PointCloudCPU(points, num_points) {
2017
add_points_gpu(points, num_points);
2118
}
2219

@@ -64,7 +61,7 @@ PointCloudGPU::Ptr PointCloudGPU::clone(const PointCloud& frame, CUstream_st* st
6461
return new_frame;
6562
}
6663

67-
PointCloudGPU::PointCloudGPU() : last_access(0) {}
64+
PointCloudGPU::PointCloudGPU() {}
6865

6966
PointCloudGPU::~PointCloudGPU() {
7067
if (times_gpu) {
@@ -280,31 +277,30 @@ std::vector<float> download_times_gpu(const gtsam_points::PointCloud& frame, CUs
280277
return times;
281278
}
282279

283-
bool PointCloudGPU::touch(CUstream_st* stream) {
284-
last_access = (access_time_counter++);
285-
return reload_gpu(stream);
286-
}
287-
288280
size_t PointCloudGPU::memory_usage_gpu() const {
289281
size_t bytes = 0;
290-
if (times_gpu) {
282+
if (times) {
291283
bytes += sizeof(float) * num_points;
292284
}
293-
if (points_gpu) {
285+
if (points) {
294286
bytes += sizeof(Eigen::Vector3f) * num_points;
295287
}
296-
if (normals_gpu) {
288+
if (normals) {
297289
bytes += sizeof(Eigen::Vector3f) * num_points;
298290
}
299-
if (covs_gpu) {
291+
if (covs) {
300292
bytes += sizeof(Eigen::Matrix3f) * num_points;
301293
}
302-
if (intensities_gpu) {
294+
if (intensities) {
303295
bytes += sizeof(float) * num_points;
304296
}
305297
return bytes;
306298
}
307299

300+
bool PointCloudGPU::loaded_on_gpu() const {
301+
return points_gpu || times_gpu || normals_gpu || covs_gpu || intensities_gpu;
302+
}
303+
308304
bool PointCloudGPU::offload_gpu(CUstream_st* stream) {
309305
if (!points_gpu && !times_gpu && !normals_gpu && !covs_gpu && !intensities_gpu) {
310306
return false; // Nothing to offload

0 commit comments

Comments
 (0)