Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 2778fd0

Browse files
committedDec 20, 2017
[AArch64] Implement stack probing for windows
Differential Revision: https://reviews.llvm.org/D41131 llvm-svn: 321150
1 parent 6528fb8 commit 2778fd0

File tree

3 files changed

+130
-9
lines changed

3 files changed

+130
-9
lines changed
 

‎llvm/docs/Extensions.rst

+28
Original file line numberDiff line numberDiff line change
@@ -288,3 +288,31 @@ standard stack probe emission.
288288

289289
The MSVC environment does not emit code for VLAs currently.
290290

291+
Windows on ARM64
292+
----------------
293+
294+
Stack Probe Emission
295+
^^^^^^^^^^^^^^^^^^^^
296+
297+
The reference implementation (Microsoft Visual Studio 2017) emits stack probes
298+
in the following fashion:
299+
300+
.. code-block:: gas
301+
302+
mov x15, #constant
303+
bl __chkstk
304+
sub sp, sp, x15, lsl #4
305+
306+
However, this has the limitation of 256 MiB (±128MiB). In order to accommodate
307+
larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 8GiB
308+
(±4GiB) range via a slight deviation. It will generate an indirect jump as
309+
follows:
310+
311+
.. code-block:: gas
312+
313+
mov x15, #constant
314+
adrp x16, __chkstk
315+
add x16, x16, :lo12:__chkstk
316+
blr x16
317+
sub sp, sp, x15, lsl #4
318+

‎llvm/lib/Target/AArch64/AArch64FrameLowering.cpp

+77-9
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@
9797
#include "AArch64RegisterInfo.h"
9898
#include "AArch64Subtarget.h"
9999
#include "AArch64TargetMachine.h"
100+
#include "MCTargetDesc/AArch64AddressingModes.h"
100101
#include "llvm/ADT/SmallVector.h"
101102
#include "llvm/ADT/Statistic.h"
102103
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -335,6 +336,22 @@ bool AArch64FrameLowering::canUseAsPrologue(
335336
return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
336337
}
337338

339+
static bool windowsRequiresStackProbe(MachineFunction &MF,
340+
unsigned StackSizeInBytes) {
341+
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
342+
if (!Subtarget.isTargetWindows())
343+
return false;
344+
const Function &F = MF.getFunction();
345+
// TODO: When implementing stack protectors, take that into account
346+
// for the probe threshold.
347+
unsigned StackProbeSize = 4096;
348+
if (F.hasFnAttribute("stack-probe-size"))
349+
F.getFnAttribute("stack-probe-size")
350+
.getValueAsString()
351+
.getAsInteger(0, StackProbeSize);
352+
return StackSizeInBytes >= StackProbeSize;
353+
}
354+
338355
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
339356
MachineFunction &MF, unsigned StackBumpBytes) const {
340357
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
@@ -347,7 +364,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
347364

348365
// 512 is the maximum immediate for stp/ldp that will be used for
349366
// callee-save save/restores
350-
if (StackBumpBytes >= 512)
367+
if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
351368
return false;
352369

353370
if (MFI.hasVarSizedObjects())
@@ -478,7 +495,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
478495
return;
479496

480497
int NumBytes = (int)MFI.getStackSize();
481-
if (!AFI->hasStackFrame()) {
498+
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
482499
assert(!HasFP && "unexpected function without stack frame but with FP");
483500

484501
// All of the stack allocation is for locals.
@@ -550,6 +567,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
550567
MachineInstr::FrameSetup);
551568
}
552569

570+
if (windowsRequiresStackProbe(MF, NumBytes)) {
571+
uint32_t NumWords = NumBytes >> 4;
572+
573+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
574+
.addImm(NumWords)
575+
.setMIFlags(MachineInstr::FrameSetup);
576+
577+
switch (MF.getTarget().getCodeModel()) {
578+
case CodeModel::Small:
579+
case CodeModel::Medium:
580+
case CodeModel::Kernel:
581+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
582+
.addExternalSymbol("__chkstk")
583+
.addReg(AArch64::X15, RegState::Implicit)
584+
.setMIFlags(MachineInstr::FrameSetup);
585+
break;
586+
case CodeModel::Large:
587+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
588+
.addReg(AArch64::X16, RegState::Define)
589+
.addExternalSymbol("__chkstk")
590+
.addExternalSymbol("__chkstk")
591+
.setMIFlags(MachineInstr::FrameSetup);
592+
593+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
594+
.addReg(AArch64::X16, RegState::Kill)
595+
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
596+
.setMIFlags(MachineInstr::FrameSetup);
597+
break;
598+
}
599+
600+
BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
601+
.addReg(AArch64::SP, RegState::Kill)
602+
.addReg(AArch64::X15, RegState::Kill)
603+
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
604+
.setMIFlags(MachineInstr::FrameSetup);
605+
NumBytes = 0;
606+
}
607+
553608
// Allocate space for the rest of the frame.
554609
if (NumBytes) {
555610
const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
@@ -1164,18 +1219,32 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
11641219
unsigned UnspilledCSGPR = AArch64::NoRegister;
11651220
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
11661221

1222+
MachineFrameInfo &MFI = MF.getFrameInfo();
1223+
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
1224+
1225+
unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
1226+
? RegInfo->getBaseRegister()
1227+
: (unsigned)AArch64::NoRegister;
1228+
1229+
unsigned SpillEstimate = SavedRegs.count();
1230+
for (unsigned i = 0; CSRegs[i]; ++i) {
1231+
unsigned Reg = CSRegs[i];
1232+
unsigned PairedReg = CSRegs[i ^ 1];
1233+
if (Reg == BasePointerReg)
1234+
SpillEstimate++;
1235+
if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
1236+
SpillEstimate++;
1237+
}
1238+
SpillEstimate += 2; // Conservatively include FP+LR in the estimate
1239+
unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
1240+
11671241
// The frame record needs to be created by saving the appropriate registers
1168-
if (hasFP(MF)) {
1242+
if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
11691243
SavedRegs.set(AArch64::FP);
11701244
SavedRegs.set(AArch64::LR);
11711245
}
11721246

1173-
unsigned BasePointerReg = AArch64::NoRegister;
1174-
if (RegInfo->hasBasePointer(MF))
1175-
BasePointerReg = RegInfo->getBaseRegister();
1176-
11771247
unsigned ExtraCSSpill = 0;
1178-
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
11791248
// Figure out which callee-saved registers to save/restore.
11801249
for (unsigned i = 0; CSRegs[i]; ++i) {
11811250
const unsigned Reg = CSRegs[i];
@@ -1217,7 +1286,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
12171286

12181287
// The CSR spill slots have not been allocated yet, so estimateStackSize
12191288
// won't include them.
1220-
MachineFrameInfo &MFI = MF.getFrameInfo();
12211289
unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
12221290
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
12231291
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);

‎llvm/test/CodeGen/AArch64/chkstk.ll

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs %s -o - \
2+
; RUN: | FileCheck -check-prefix CHECK-DEFAULT-CODE-MODEL %s
3+
4+
; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs -code-model=large %s -o - \
5+
; RUN: | FileCheck -check-prefix CHECK-LARGE-CODE-MODEL %s
6+
7+
define void @check_watermark() {
8+
entry:
9+
%buffer = alloca [4096 x i8], align 1
10+
ret void
11+
}
12+
13+
; CHECK-DEFAULT-CODE-MODEL: check_watermark:
14+
; CHECK-DEFAULT-CODE-MODEL-DAG: stp x29, x30, [sp
15+
; CHECK-DEFAULT-CODE-MODEL-DAG: orr x15, xzr, #0x100
16+
; CHECK-DEFAULT-CODE-MODEL: bl __chkstk
17+
; CHECK-DEFAULT-CODE-MODEL: sub sp, sp, x15, lsl #4
18+
19+
; CHECK-LARGE-CODE-MODEL: check_watermark:
20+
; CHECK-LARGE-CODE-MODEL-DAG: stp x29, x30, [sp
21+
; CHECK-LARGE-CODE-MODEL-DAG: orr x15, xzr, #0x100
22+
; CHECK-LARGE-CODE-MODEL-DAG: adrp x16, __chkstk
23+
; CHECK-LARGE-CODE-MODEL-DAG: add x16, x16, __chkstk
24+
; CHECK-LARGE-CODE-MODEL: blr x16
25+
; CHECK-LARGE-CODE-MODEL: sub sp, sp, x15, lsl #4

0 commit comments

Comments
 (0)
Failed to load comments.