@@ -48,14 +48,12 @@ void accel_sim_framework::simulation_loop() {
4848 if (finished_graphics == tracer.graphics_count ) {
4949 printf (" No graphics kernel parsed\n " );
5050 printf (" STEP1 - rendering done at %llu\n " , m_gpgpu_sim->gpu_tot_sim_cycle );
51- m_gpgpu_sim->all_graphics_done = true ;
5251 graphics_done = true ;
5352 }
5453 if (finished_computes == tracer.compute_count ) {
5554 printf (" No compute kernel parsed\n " );
5655 printf (" STEP1 - computes done at %llu\n " , m_gpgpu_sim->gpu_tot_sim_cycle );
5756 m_gpgpu_sim->gpu_compute_end_cycle = m_gpgpu_sim->gpu_tot_sim_cycle ;
58- m_gpgpu_sim->all_compute_done = true ;
5957 computes_done = true ;
6058 }
6159
@@ -139,14 +137,12 @@ void accel_sim_framework::simulation_loop() {
139137 printf (" All graphics kernels finished one iteration\n " );
140138 printf (" STEP1 - rendering done at %llu\n " ,
141139 m_gpgpu_sim->gpu_tot_sim_cycle );
142- m_gpgpu_sim->all_graphics_done = true ;
143140 graphics_done = true ;
144141 }
145142 if (finished_computes == tracer.compute_count && !computes_done) {
146143 printf (" All compute kernels finished one iteration\n " );
147144 printf (" STEP1 - computes done at %llu\n " , m_gpgpu_sim->gpu_tot_sim_cycle );
148145 m_gpgpu_sim->gpu_compute_end_cycle = m_gpgpu_sim->gpu_tot_sim_cycle ;
149- m_gpgpu_sim->all_compute_done = true ;
150146 computes_done = true ;
151147 }
152148 if (graphics_done && computes_done) {
@@ -165,7 +161,6 @@ void accel_sim_framework::simulation_loop() {
165161 }
166162 finished_graphics = 0 ;
167163 graphics_commands.clear ();
168- m_gpgpu_sim->all_graphics_done = false ;
169164
170165 printf (" relaunching graphics kernels\n " );
171166 }
@@ -178,7 +173,6 @@ void accel_sim_framework::simulation_loop() {
178173 }
179174 finished_computes = 0 ;
180175 compute_commands.clear ();
181- m_gpgpu_sim->all_compute_done = false ;
182176 printf (" relaunching compute kernels\n " );
183177 }
184178 }
@@ -215,22 +209,21 @@ void accel_sim_framework::parse_commandlist() {
215209 // Read trace header info for window_size number of kernels
216210 kernel_trace_t *kernel_trace_info = tracer.parse_kernel_info (
217211 commandlist[commandlist_index].command_string );
212+ if (kernel_trace_info->kernel_name .find (" VERTEX" ) != std::string::npos) {
213+ kernel_trace_info->cuda_stream_id = graphics_stream_id;
214+ last_grpahics_stream_id = graphics_stream_id;
215+ graphics_stream_id++;
216+ } else if (kernel_trace_info->kernel_name .find (" FRAG" ) !=
217+ std::string::npos) {
218+ kernel_trace_info->cuda_stream_id = last_grpahics_stream_id;
219+ }
218220 kernel_info = create_kernel_info (kernel_trace_info, m_gpgpu_context,
219221 &tconfig, &tracer);
220222
221223 if (kernel_info->is_graphic_kernel ) {
222224 graphics_commands.push_back (commandlist[commandlist_index]);
223225 unsigned kernel_id = kernel_info->get_uid ();
224- if (kernel_info->get_name ().find (" VERTEX" ) != std::string::npos) {
225- // is vertex shader
226- last_launched_vertex = kernel_id;
227- kernel_trace_info->cuda_stream_id = graphics_stream_id;
228- last_grpahics_stream_id = graphics_stream_id;
229- graphics_stream_id++;
230- } else {
231- assert (kernel_info->get_name ().find (" FRAG" ) != std::string::npos);
232- kernel_trace_info->cuda_stream_id = last_grpahics_stream_id;
233- }
226+
234227 // save kernel info
235228 m_gpgpu_sim->vb_addr [kernel_id] = kernel_vb_addr;
236229 m_gpgpu_sim->vb_size [kernel_id] = kernel_vb_size;
@@ -259,13 +252,15 @@ void accel_sim_framework::parse_commandlist() {
259252void accel_sim_framework::cleanup (unsigned finished_kernel) {
260253 trace_kernel_info_t *k = NULL ;
261254 unsigned long long finished_kernel_cuda_stream_id = -1 ;
255+ unsigned finishd_kernel_uid = 0 ;
262256 for (unsigned j = 0 ; j < kernels_info.size (); j++) {
263257 k = kernels_info.at (j);
264258 if (k->get_uid () == finished_kernel ||
265259 m_gpgpu_sim->cycle_insn_cta_max_hit () || !m_gpgpu_sim->active ()) {
266260 for (unsigned int l = 0 ; l < busy_streams.size (); l++) {
267261 if (busy_streams.at (l) == k->get_cuda_stream_id ()) {
268262 finished_kernel_cuda_stream_id = k->get_cuda_stream_id ();
263+ finishd_kernel_uid = k->get_uid ();
269264 busy_streams.erase (busy_streams.begin () + l);
270265 break ;
271266 }
@@ -310,7 +305,7 @@ void accel_sim_framework::cleanup(unsigned finished_kernel) {
310305 }
311306 }
312307 assert (k);
313- m_gpgpu_sim->print_stats (finished_kernel_cuda_stream_id);
308+ m_gpgpu_sim->print_stats (finished_kernel_cuda_stream_id, finishd_kernel_uid );
314309}
315310
316311unsigned accel_sim_framework::simulate () {
@@ -352,6 +347,10 @@ trace_kernel_info_t *accel_sim_framework::create_kernel_info(
352347 function_info->set_name (kernel_trace_info->kernel_name .c_str ());
353348 trace_kernel_info_t *kernel_info = new trace_kernel_info_t (
354349 gridDim, blockDim, function_info, parser, config, kernel_trace_info);
350+ if (kernel_trace_info->kernel_name .find (" VERTEX" ) != std::string::npos ||
351+ kernel_trace_info->kernel_name .find (" FRAG" ) != std::string::npos) {
352+ kernel_info->is_graphic_kernel = true ;
353+ }
355354
356355 return kernel_info;
357356}
0 commit comments