@@ -19,6 +19,20 @@ namespace xe {
1919namespace gpu {
2020using namespace ucode ;
2121
22+ void DxbcShaderTranslator::KillPixel (bool condition,
23+ const dxbc::Src& condition_src) {
24+ // Discard the pixel, but continue execution if other lanes in the quad need
25+ // this lane for derivatives. The driver may also perform early exiting
26+ // internally if all lanes are discarded if deemed beneficial.
27+ a_.OpDiscard (condition, condition_src);
28+ if (edram_rov_used_) {
29+ // Even though discarding disables all subsequent UAV/ROV writes, also skip
30+ // as much of the Render Backend emulation logic as possible by setting the
31+ // coverage and the mask of the written render targets to zero.
32+ a_.OpMov (dxbc::Dest::R (system_temp_rov_params_, 0b0001 ), dxbc::Src::LU (0 ));
33+ }
34+ }
35+
2236void DxbcShaderTranslator::ProcessVectorAluOperation (
2337 const ParsedAluInstruction& instr, uint32_t & result_swizzle,
2438 bool & predicate_written) {
@@ -492,11 +506,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
492506 a_.OpOr (dxbc::Dest::R (system_temp_result_, 0b0001 ),
493507 dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
494508 dxbc::Src::R (system_temp_result_, dxbc::Src::kYYYY ));
495- if (edram_rov_used_) {
496- a_.OpRetC (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
497- } else {
498- a_.OpDiscard (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
499- }
509+ KillPixel (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
500510 if (used_result_components) {
501511 a_.OpAnd (dxbc::Dest::R (system_temp_result_, 0b0001 ),
502512 dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
@@ -512,11 +522,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
512522 a_.OpOr (dxbc::Dest::R (system_temp_result_, 0b0001 ),
513523 dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
514524 dxbc::Src::R (system_temp_result_, dxbc::Src::kYYYY ));
515- if (edram_rov_used_) {
516- a_.OpRetC (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
517- } else {
518- a_.OpDiscard (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
519- }
525+ KillPixel (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
520526 if (used_result_components) {
521527 a_.OpAnd (dxbc::Dest::R (system_temp_result_, 0b0001 ),
522528 dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
@@ -532,11 +538,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
532538 a_.OpOr (dxbc::Dest::R (system_temp_result_, 0b0001 ),
533539 dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
534540 dxbc::Src::R (system_temp_result_, dxbc::Src::kYYYY ));
535- if (edram_rov_used_) {
536- a_.OpRetC (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
537- } else {
538- a_.OpDiscard (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
539- }
541+ KillPixel (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
540542 if (used_result_components) {
541543 a_.OpAnd (dxbc::Dest::R (system_temp_result_, 0b0001 ),
542544 dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
@@ -552,11 +554,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation(
552554 a_.OpOr (dxbc::Dest::R (system_temp_result_, 0b0001 ),
553555 dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
554556 dxbc::Src::R (system_temp_result_, dxbc::Src::kYYYY ));
555- if (edram_rov_used_) {
556- a_.OpRetC (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
557- } else {
558- a_.OpDiscard (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
559- }
557+ KillPixel (true , dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ));
560558 if (used_result_components) {
561559 a_.OpAnd (dxbc::Dest::R (system_temp_result_, 0b0001 ),
562560 dxbc::Src::R (system_temp_result_, dxbc::Src::kXXXX ),
@@ -952,47 +950,27 @@ void DxbcShaderTranslator::ProcessScalarAluOperation(
952950
953951 case AluScalarOpcode::kKillsEq :
954952 a_.OpEq (ps_dest, operand_0_a, dxbc::Src::LF (0 .0f ));
955- if (edram_rov_used_) {
956- a_.OpRetC (true , ps_src);
957- } else {
958- a_.OpDiscard (true , ps_src);
959- }
953+ KillPixel (true , ps_src);
960954 a_.OpAnd (ps_dest, ps_src, dxbc::Src::LF (1 .0f ));
961955 break ;
962956 case AluScalarOpcode::kKillsGt :
963957 a_.OpLT (ps_dest, dxbc::Src::LF (0 .0f ), operand_0_a);
964- if (edram_rov_used_) {
965- a_.OpRetC (true , ps_src);
966- } else {
967- a_.OpDiscard (true , ps_src);
968- }
958+ KillPixel (true , ps_src);
969959 a_.OpAnd (ps_dest, ps_src, dxbc::Src::LF (1 .0f ));
970960 break ;
971961 case AluScalarOpcode::kKillsGe :
972962 a_.OpGE (ps_dest, operand_0_a, dxbc::Src::LF (0 .0f ));
973- if (edram_rov_used_) {
974- a_.OpRetC (true , ps_src);
975- } else {
976- a_.OpDiscard (true , ps_src);
977- }
963+ KillPixel (true , ps_src);
978964 a_.OpAnd (ps_dest, ps_src, dxbc::Src::LF (1 .0f ));
979965 break ;
980966 case AluScalarOpcode::kKillsNe :
981967 a_.OpNE (ps_dest, operand_0_a, dxbc::Src::LF (0 .0f ));
982- if (edram_rov_used_) {
983- a_.OpRetC (true , ps_src);
984- } else {
985- a_.OpDiscard (true , ps_src);
986- }
968+ KillPixel (true , ps_src);
987969 a_.OpAnd (ps_dest, ps_src, dxbc::Src::LF (1 .0f ));
988970 break ;
989971 case AluScalarOpcode::kKillsOne :
990972 a_.OpEq (ps_dest, operand_0_a, dxbc::Src::LF (1 .0f ));
991- if (edram_rov_used_) {
992- a_.OpRetC (true , ps_src);
993- } else {
994- a_.OpDiscard (true , ps_src);
995- }
973+ KillPixel (true , ps_src);
996974 a_.OpAnd (ps_dest, ps_src, dxbc::Src::LF (1 .0f ));
997975 break ;
998976
0 commit comments