Skip to content

How biglimit is Obtained #145

@zhangqi-eiq

Description

@zhangqi-eiq

rendered = CudaRasterizer::Rasterizer::forward(
geomFunc,
binningFunc,
imgFunc,
P, degree, M,
background.contiguous().data(),
W, H,
indices.contiguous().data(),
parent_indices.contiguous().data(),
ts.contiguous().data(),
kids.contiguous().data(),
means3D.contiguous().data(),
sh.contiguous().data_ptr(),
colors.contiguous().data(),
opacity.contiguous().data(),
scales.contiguous().data_ptr(),
scale_modifier,
rotations.contiguous().data_ptr(),
cov3D_precomp.contiguous().data(),
viewmatrix.contiguous().data(),
projmatrix.contiguous().data(),
campos.contiguous().data(),
tan_fovx,
tan_fovy,
prefiltered,
out_color.contiguous().data(),
out_invdepthptr,
radii.contiguous().data(),
rects.contiguous().data(),
nullptr,
nullptr,
debug);
}
the code up is to use CudaRasterizer::Rasterizer::forward
the code below is to implement the function
int CudaRasterizer::Rasterizer::forward(
std::function<char* (size_t)> geometryBuffer,
std::function<char* (size_t)> binningBuffer,
std::function<char* (size_t)> imageBuffer,
const int P, int D, int M,
const float* background,
const int width, int height,
const int* indices,
const int* parent_indices,
const float* ts,
const int* kids,
const float* means3D,
const float* shs,
const float* colors_precomp,
const float* opacities,
const float* scales,
const float scale_modifier,
const float* rotations,
const float* cov3D_precomp,
const float* view_matrix,
const float* proj_matrix,
const float* cam_pos,
const float tan_fovx, float tan_fovy,
const bool prefiltered,
float* out_color,
float* depth,
int* radii,
int* rects,
float* boxmin,
float* boxmax,
bool debug,
int skyboxnum,
void* streamy,
int* num_rendered,
float biglimit,
bool on_cpu)
{
cudaStream_t stream = (cudaStream_t)streamy;
const float focal_y = height / (2.0f * tan_fovy);
const float focal_x = width / (2.0f * tan_fovx);

size_t chunk_size = required<GeometryState>(P);
char* chunkptr = geometryBuffer(chunk_size);
GeometryState geomState = GeometryState::fromChunk(chunkptr, P);

if (radii == nullptr)
{
	radii = geomState.internal_radii;
}

dim3 tile_grid((width + BLOCK_X - 1) / BLOCK_X, (height + BLOCK_Y - 1) / BLOCK_Y, 1);
dim3 block(BLOCK_X, BLOCK_Y, 1);

// Dynamically resize image-based auxiliary buffers during training
size_t img_chunk_size = required<ImageState>(width * height);
char* img_chunkptr = imageBuffer(img_chunk_size);
ImageState imgState = ImageState::fromChunk(img_chunkptr, width * height);

if (NUM_CHANNELS != 3 && colors_precomp == nullptr)
{
	throw std::runtime_error("For non-RGB, provide precomputed Gaussian colors!");
}

float3 minn = { -FLT_MAX, -FLT_MAX, -FLT_MAX };
float3 maxx = { FLT_MAX, FLT_MAX, FLT_MAX };
if (boxmin != nullptr)
{
	minn = *((float3*)boxmin);
	maxx = *((float3*)boxmax);
}

// Run preprocessing per-Gaussian (transformation, bounding, conversion of SHs to RGB)
CHECK_CUDA(FORWARD::preprocess(
	P, D, M,
	indices,
	parent_indices,
	ts,
	means3D,
	(glm::vec3*)scales,
	scale_modifier,
	(glm::vec4*)rotations,
	opacities,
	shs,
	geomState.clamped,
	geomState.p_clamped,
	cov3D_precomp,
	colors_precomp,
	view_matrix,
	proj_matrix,
	(glm::vec3*)cam_pos,
	width, height,
	focal_x, focal_y,
	tan_fovx, tan_fovy,
	radii,
	geomState.means2D,
	geomState.depths,
	geomState.cov3D,
	geomState.rgb,
	geomState.conic_opacity,
	tile_grid,
	geomState.tiles_touched,
	prefiltered,
	(int2*)rects,
	minn,
	maxx,
	skyboxnum,
	stream,
	biglimit,
	on_cpu
), debug);

// Compute prefix sum over full list of touched tile counts by Gaussians
// E.g., [2, 3, 0, 2, 1] -> [2, 5, 5, 7, 8]
CHECK_CUDA(cub::DeviceScan::InclusiveSum(geomState.scanning_space, geomState.scan_size, geomState.tiles_touched, geomState.point_offsets, P, stream), debug);

// Retrieve total number of Gaussian instances to launch and resize aux buffers

int backup;
if (num_rendered == nullptr)
	num_rendered = &backup;

CHECK_CUDA(cudaMemcpyAsync(num_rendered, geomState.point_offsets + P - 1, sizeof(int), cudaMemcpyDeviceToHost, stream), debug);
cudaStreamSynchronize(stream);

if (*num_rendered == 0)
	return 0;

size_t binning_chunk_size = required<BinningState>(*num_rendered);
char* binning_chunkptr = binningBuffer(binning_chunk_size);
BinningState binningState = BinningState::fromChunk(binning_chunkptr, *num_rendered);

// For each instance to be rendered, produce adequate [ tile | depth ] key 
// and corresponding dublicated Gaussian indices to be sorted
duplicateWithKeys << <(P + 255) / 256, 256, 0, stream >> > (
	P,
	geomState.means2D,
	geomState.depths,
	geomState.point_offsets,
	binningState.point_list_keys_unsorted,
	binningState.point_list_unsorted,
	radii,
	tile_grid,
	(int2*)rects
	);
CHECK_CUDA(, debug)

int bit = getHigherMsb(tile_grid.x * tile_grid.y);

// Sort complete list of (duplicated) Gaussian indices by keys
CHECK_CUDA(cub::DeviceRadixSort::SortPairs(
	binningState.list_sorting_space,
	binningState.sorting_size,
	binningState.point_list_keys_unsorted, binningState.point_list_keys,
	binningState.point_list_unsorted, binningState.point_list,
	*num_rendered, 0, 32 + bit, stream), debug);

CHECK_CUDA(cudaMemsetAsync(imgState.ranges, 0, tile_grid.x * tile_grid.y * sizeof(uint2), stream), debug);

// Identify start and end of per-tile workloads in sorted list
if (*num_rendered > 0)
	 identifyTileRanges << <(*num_rendered + 255) / 256, 256, 0, stream >> > (
		*num_rendered,
		binningState.point_list_keys,
		imgState.ranges);
CHECK_CUDA(, debug)


// Let each tile blend its range of Gaussians independently in parallel
const float* feature_ptr = colors_precomp != nullptr ? colors_precomp : geomState.rgb;
CHECK_CUDA(FORWARD::render(
	tile_grid, block,
	imgState.ranges,
	binningState.point_list,
	width, height,
	ts,
	kids,
	geomState.means2D,
	feature_ptr,
	geomState.conic_opacity,
	imgState.accum_alpha,
	imgState.n_contrib,
	background,
	out_color,
	P,
	skyboxnum,
	stream,
	geomState.depths,
	depth), debug);

return *num_rendered;

}
variety "biglimit" do not get any value ,how do it work?

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions