@@ -848,11 +848,61 @@ void A64Emitter::LoadConstantV(oaknut::QReg dest, const vec128_t& v) {
848848 } else if (v.low == ~uint64_t (0 ) && v.high == ~uint64_t (0 )) {
849849 // 1111...
850850 MOVI (dest.B16 (), 0xFF );
851- } else if (std::adjacent_find (std::cbegin (v.u8 ), std::cend (v.u8 ),
852- std::not_equal_to<>()) == std::cend (v.u8 )) {
853- // 0xXX, 0xXX, 0xXX...
854- MOVI (dest.B16 (), v.u8 [0 ]);
855851 } else {
852+ // Try to figure out some common splat-patterns to utilize MOVI rather than
853+ // stashing to memory.
854+ const bool all_same_u8 =
855+ std::adjacent_find (std::cbegin (v.u8 ), std::cend (v.u8 ),
856+ std::not_equal_to<>()) == std::cend (v.u8 );
857+
858+ if (all_same_u8) {
859+ // 0xXX, 0xXX, 0xXX...
860+ MOVI (dest.B16 (), v.u8 [0 ]);
861+ return ;
862+ }
863+
864+ const bool all_same_u16 =
865+ std::adjacent_find (std::cbegin (v.u16 ), std::cend (v.u16 ),
866+ std::not_equal_to<>()) == std::cend (v.u16 );
867+
868+ if (all_same_u16) {
869+ if ((v.u16 [0 ] & 0xFF00 ) == 0 ) {
870+ // 0x00XX, 0x00XX, 0x00XX...
871+ MOVI (dest.H8 (), uint8_t (v.u16 [0 ]));
872+ return ;
873+ } else if ((v.u16 [0 ] & 0x00FF ) == 0 ) {
874+ // 0xXX00, 0xXX00, 0xXX00...
875+ MOVI (dest.H8 (), uint8_t (v.u16 [0 ] >> 8 ), oaknut::util::LSL, 8 );
876+ return ;
877+ }
878+ }
879+
880+ const bool all_same_u32 =
881+ std::adjacent_find (std::cbegin (v.u32 ), std::cend (v.u32 ),
882+ std::not_equal_to<>()) == std::cend (v.u32 );
883+
884+ if (all_same_u32) {
885+ if ((v.u32 [0 ] & 0x00FFFFFF ) == 0 ) {
886+ // This is used a lot for certain float-splats and should be checked
887+ // first before the others
888+ // 0xXX000000, 0xXX000000, 0xXX000000...
889+ MOVI (dest.S4 (), uint8_t (v.u32 [0 ] >> 24 ), oaknut::util::LSL, 24 );
890+ return ;
891+ } else if ((v.u32 [0 ] & 0xFFFFFF00 ) == 0 ) {
892+ // 0x000000XX, 0x000000XX, 0x000000XX...
893+ MOVI (dest.S4 (), uint8_t (v.u32 [0 ]));
894+ return ;
895+ } else if ((v.u32 [0 ] & 0xFFFF00FF ) == 0 ) {
896+ // 0x0000XX00, 0x0000XX00, 0x0000XX00...
897+ MOVI (dest.S4 (), uint8_t (v.u32 [0 ] >> 8 ), oaknut::util::LSL, 8 );
898+ return ;
899+ } else if ((v.u32 [0 ] & 0xFF00FFFF ) == 0 ) {
900+ // 0x00XX0000, 0x00XX0000, 0x00XX0000...
901+ MOVI (dest.S4 (), uint8_t (v.u32 [0 ] >> 16 ), oaknut::util::LSL, 16 );
902+ return ;
903+ }
904+ }
905+
856906 // TODO(benvanik): see what other common values are.
857907 // TODO(benvanik): build constant table - 99% are reused.
858908 MovMem64 (SP, kStashOffset , v.low );
0 commit comments