Skip to content

Commit 1127fd9

Browse files
committed
[a64] Implement OPCODE_CACHE_CONTROL
`dc civac` causes an illegal-instruciton on Windows-ARM. This is likely as a security measure against cache-attacks. On Linux this instruction is trapped into an EL1 kernel function. Windows does not seem to have any user-mode cache-maintenance instructions available for data-cache(only instruction-cache via `FlushInstructionCache`). The closest thing we can do for now is a full data memory-barrier with `dsb ish`. Prefetches are implemented using `prfm pldl1keep, ...`.
1 parent 164f1e4 commit 1127fd9

File tree

1 file changed

+67
-1
lines changed

1 file changed

+67
-1
lines changed

src/xenia/cpu/backend/a64/a64_seq_memory.cc

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1082,7 +1082,73 @@ struct CACHE_CONTROL
10821082
}
10831083
size_t cache_line_size = i.src2.value;
10841084

1085-
// TODO(wunkolo): Arm64 cache-control
1085+
XReg addr = X0;
1086+
uint32_t address_constant;
1087+
if (i.src1.is_constant) {
1088+
// TODO(benvanik): figure out how to do this without a temp.
1089+
// Since the constant is often 0x8... if we tried to use that as a
1090+
// displacement it would be sign extended and mess things up.
1091+
address_constant = static_cast<uint32_t>(i.src1.constant());
1092+
if (address_constant < 0x80000000) {
1093+
e.ADD(addr, e.GetMembaseReg(), address_constant);
1094+
} else {
1095+
if (address_constant >= 0xE0000000 &&
1096+
xe::memory::allocation_granularity() > 0x1000) {
1097+
e.MOV(X1, address_constant + 0x1000);
1098+
} else {
1099+
e.MOV(X1, address_constant);
1100+
}
1101+
e.ADD(addr, e.GetMembaseReg(), X1);
1102+
}
1103+
} else {
1104+
if (xe::memory::allocation_granularity() > 0x1000) {
1105+
// Emulate the 4 KB physical address offset in 0xE0000000+ when can't do
1106+
// it via memory mapping.
1107+
e.MOV(X1, 0xE0000000);
1108+
e.CMP(i.src1.reg(), X1);
1109+
e.CSET(X1, Cond::HS);
1110+
e.ADD(X1, i.src1.reg(), X1, LSL, 12);
1111+
} else {
1112+
// Clear the top 32 bits, as they are likely garbage.
1113+
e.MOV(W1, i.src1.reg().toW());
1114+
}
1115+
e.ADD(addr, e.GetMembaseReg(), X1);
1116+
}
1117+
1118+
if (is_clflush) {
1119+
// TODO(wunkolo): These kind of cache-maintenance instructions cause an
1120+
// illegal-instruction on windows, but is trapped to proper EL1 code on
1121+
// Linux. Need a way to do cache-maintenance on Windows-Arm
1122+
// e.DC(DcOp::CIVAC, addr);
1123+
1124+
// Full data sync
1125+
e.DSB(BarrierOp::ISH);
1126+
}
1127+
if (is_prefetch) {
1128+
e.PRFM(PrfOp::PLDL1KEEP, addr);
1129+
}
1130+
1131+
if (cache_line_size >= 128) {
1132+
// Prefetch the other 64 bytes of the 128-byte cache line.
1133+
if (i.src1.is_constant && address_constant < 0x80000000) {
1134+
e.ADD(addr, e.GetMembaseReg(), address_constant ^ 64);
1135+
} else {
1136+
e.EOR(X1, X1, 64);
1137+
}
1138+
if (is_clflush) {
1139+
// TODO(wunkolo): These kind of cache-maintenance instructions cause an
1140+
// illegal-instruction on windows, but is trapped to proper EL1 code on
1141+
// Linux. Need a way to do cache-maintenance on Windows-Arm
1142+
// e.DC(DcOp::CIVAC, addr);
1143+
1144+
// Full data sync
1145+
e.DSB(BarrierOp::ISH);
1146+
}
1147+
if (is_prefetch) {
1148+
e.PRFM(PrfOp::PLDL1KEEP, addr);
1149+
}
1150+
assert_true(cache_line_size == 128);
1151+
}
10861152
}
10871153
};
10881154
EMITTER_OPCODE_TABLE(OPCODE_CACHE_CONTROL, CACHE_CONTROL);

0 commit comments

Comments
 (0)