-
Notifications
You must be signed in to change notification settings - Fork 121
Description
rendered = CudaRasterizer::Rasterizer::forward(
geomFunc,
binningFunc,
imgFunc,
P, degree, M,
background.contiguous().data(),
W, H,
indices.contiguous().data(),
parent_indices.contiguous().data(),
ts.contiguous().data(),
kids.contiguous().data(),
means3D.contiguous().data(),
sh.contiguous().data_ptr(),
colors.contiguous().data(),
opacity.contiguous().data(),
scales.contiguous().data_ptr(),
scale_modifier,
rotations.contiguous().data_ptr(),
cov3D_precomp.contiguous().data(),
viewmatrix.contiguous().data(),
projmatrix.contiguous().data(),
campos.contiguous().data(),
tan_fovx,
tan_fovy,
prefiltered,
out_color.contiguous().data(),
out_invdepthptr,
radii.contiguous().data(),
rects.contiguous().data(),
nullptr,
nullptr,
debug);
}
the code up is to use CudaRasterizer::Rasterizer::forward
the code below is to implement the function
int CudaRasterizer::Rasterizer::forward(
std::function<char* (size_t)> geometryBuffer,
std::function<char* (size_t)> binningBuffer,
std::function<char* (size_t)> imageBuffer,
const int P, int D, int M,
const float* background,
const int width, int height,
const int* indices,
const int* parent_indices,
const float* ts,
const int* kids,
const float* means3D,
const float* shs,
const float* colors_precomp,
const float* opacities,
const float* scales,
const float scale_modifier,
const float* rotations,
const float* cov3D_precomp,
const float* view_matrix,
const float* proj_matrix,
const float* cam_pos,
const float tan_fovx, float tan_fovy,
const bool prefiltered,
float* out_color,
float* depth,
int* radii,
int* rects,
float* boxmin,
float* boxmax,
bool debug,
int skyboxnum,
void* streamy,
int* num_rendered,
float biglimit,
bool on_cpu)
{
cudaStream_t stream = (cudaStream_t)streamy;
const float focal_y = height / (2.0f * tan_fovy);
const float focal_x = width / (2.0f * tan_fovx);
size_t chunk_size = required<GeometryState>(P);
char* chunkptr = geometryBuffer(chunk_size);
GeometryState geomState = GeometryState::fromChunk(chunkptr, P);
if (radii == nullptr)
{
radii = geomState.internal_radii;
}
dim3 tile_grid((width + BLOCK_X - 1) / BLOCK_X, (height + BLOCK_Y - 1) / BLOCK_Y, 1);
dim3 block(BLOCK_X, BLOCK_Y, 1);
// Dynamically resize image-based auxiliary buffers during training
size_t img_chunk_size = required<ImageState>(width * height);
char* img_chunkptr = imageBuffer(img_chunk_size);
ImageState imgState = ImageState::fromChunk(img_chunkptr, width * height);
if (NUM_CHANNELS != 3 && colors_precomp == nullptr)
{
throw std::runtime_error("For non-RGB, provide precomputed Gaussian colors!");
}
float3 minn = { -FLT_MAX, -FLT_MAX, -FLT_MAX };
float3 maxx = { FLT_MAX, FLT_MAX, FLT_MAX };
if (boxmin != nullptr)
{
minn = *((float3*)boxmin);
maxx = *((float3*)boxmax);
}
// Run preprocessing per-Gaussian (transformation, bounding, conversion of SHs to RGB)
CHECK_CUDA(FORWARD::preprocess(
P, D, M,
indices,
parent_indices,
ts,
means3D,
(glm::vec3*)scales,
scale_modifier,
(glm::vec4*)rotations,
opacities,
shs,
geomState.clamped,
geomState.p_clamped,
cov3D_precomp,
colors_precomp,
view_matrix,
proj_matrix,
(glm::vec3*)cam_pos,
width, height,
focal_x, focal_y,
tan_fovx, tan_fovy,
radii,
geomState.means2D,
geomState.depths,
geomState.cov3D,
geomState.rgb,
geomState.conic_opacity,
tile_grid,
geomState.tiles_touched,
prefiltered,
(int2*)rects,
minn,
maxx,
skyboxnum,
stream,
biglimit,
on_cpu
), debug);
// Compute prefix sum over full list of touched tile counts by Gaussians
// E.g., [2, 3, 0, 2, 1] -> [2, 5, 5, 7, 8]
CHECK_CUDA(cub::DeviceScan::InclusiveSum(geomState.scanning_space, geomState.scan_size, geomState.tiles_touched, geomState.point_offsets, P, stream), debug);
// Retrieve total number of Gaussian instances to launch and resize aux buffers
int backup;
if (num_rendered == nullptr)
num_rendered = &backup;
CHECK_CUDA(cudaMemcpyAsync(num_rendered, geomState.point_offsets + P - 1, sizeof(int), cudaMemcpyDeviceToHost, stream), debug);
cudaStreamSynchronize(stream);
if (*num_rendered == 0)
return 0;
size_t binning_chunk_size = required<BinningState>(*num_rendered);
char* binning_chunkptr = binningBuffer(binning_chunk_size);
BinningState binningState = BinningState::fromChunk(binning_chunkptr, *num_rendered);
// For each instance to be rendered, produce adequate [ tile | depth ] key
// and corresponding dublicated Gaussian indices to be sorted
duplicateWithKeys << <(P + 255) / 256, 256, 0, stream >> > (
P,
geomState.means2D,
geomState.depths,
geomState.point_offsets,
binningState.point_list_keys_unsorted,
binningState.point_list_unsorted,
radii,
tile_grid,
(int2*)rects
);
CHECK_CUDA(, debug)
int bit = getHigherMsb(tile_grid.x * tile_grid.y);
// Sort complete list of (duplicated) Gaussian indices by keys
CHECK_CUDA(cub::DeviceRadixSort::SortPairs(
binningState.list_sorting_space,
binningState.sorting_size,
binningState.point_list_keys_unsorted, binningState.point_list_keys,
binningState.point_list_unsorted, binningState.point_list,
*num_rendered, 0, 32 + bit, stream), debug);
CHECK_CUDA(cudaMemsetAsync(imgState.ranges, 0, tile_grid.x * tile_grid.y * sizeof(uint2), stream), debug);
// Identify start and end of per-tile workloads in sorted list
if (*num_rendered > 0)
identifyTileRanges << <(*num_rendered + 255) / 256, 256, 0, stream >> > (
*num_rendered,
binningState.point_list_keys,
imgState.ranges);
CHECK_CUDA(, debug)
// Let each tile blend its range of Gaussians independently in parallel
const float* feature_ptr = colors_precomp != nullptr ? colors_precomp : geomState.rgb;
CHECK_CUDA(FORWARD::render(
tile_grid, block,
imgState.ranges,
binningState.point_list,
width, height,
ts,
kids,
geomState.means2D,
feature_ptr,
geomState.conic_opacity,
imgState.accum_alpha,
imgState.n_contrib,
background,
out_color,
P,
skyboxnum,
stream,
geomState.depths,
depth), debug);
return *num_rendered;
}
variety "biglimit" do not get any value ,how do it work?