97
97
#include " AArch64RegisterInfo.h"
98
98
#include " AArch64Subtarget.h"
99
99
#include " AArch64TargetMachine.h"
100
+ #include " MCTargetDesc/AArch64AddressingModes.h"
100
101
#include " llvm/ADT/SmallVector.h"
101
102
#include " llvm/ADT/Statistic.h"
102
103
#include " llvm/CodeGen/LivePhysRegs.h"
@@ -335,6 +336,22 @@ bool AArch64FrameLowering::canUseAsPrologue(
335
336
return findScratchNonCalleeSaveRegister (TmpMBB) != AArch64::NoRegister;
336
337
}
337
338
339
+ static bool windowsRequiresStackProbe (MachineFunction &MF,
340
+ unsigned StackSizeInBytes) {
341
+ const AArch64Subtarget &Subtarget = MF.getSubtarget <AArch64Subtarget>();
342
+ if (!Subtarget.isTargetWindows ())
343
+ return false ;
344
+ const Function &F = MF.getFunction ();
345
+ // TODO: When implementing stack protectors, take that into account
346
+ // for the probe threshold.
347
+ unsigned StackProbeSize = 4096 ;
348
+ if (F.hasFnAttribute (" stack-probe-size" ))
349
+ F.getFnAttribute (" stack-probe-size" )
350
+ .getValueAsString ()
351
+ .getAsInteger (0 , StackProbeSize);
352
+ return StackSizeInBytes >= StackProbeSize;
353
+ }
354
+
338
355
bool AArch64FrameLowering::shouldCombineCSRLocalStackBump (
339
356
MachineFunction &MF, unsigned StackBumpBytes) const {
340
357
AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
@@ -347,7 +364,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
347
364
348
365
// 512 is the maximum immediate for stp/ldp that will be used for
349
366
// callee-save save/restores
350
- if (StackBumpBytes >= 512 )
367
+ if (StackBumpBytes >= 512 || windowsRequiresStackProbe (MF, StackBumpBytes) )
351
368
return false ;
352
369
353
370
if (MFI.hasVarSizedObjects ())
@@ -478,7 +495,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
478
495
return ;
479
496
480
497
int NumBytes = (int )MFI.getStackSize ();
481
- if (!AFI->hasStackFrame ()) {
498
+ if (!AFI->hasStackFrame () && ! windowsRequiresStackProbe (MF, NumBytes) ) {
482
499
assert (!HasFP && " unexpected function without stack frame but with FP" );
483
500
484
501
// All of the stack allocation is for locals.
@@ -550,6 +567,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
550
567
MachineInstr::FrameSetup);
551
568
}
552
569
570
+ if (windowsRequiresStackProbe (MF, NumBytes)) {
571
+ uint32_t NumWords = NumBytes >> 4 ;
572
+
573
+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::MOVi64imm), AArch64::X15)
574
+ .addImm (NumWords)
575
+ .setMIFlags (MachineInstr::FrameSetup);
576
+
577
+ switch (MF.getTarget ().getCodeModel ()) {
578
+ case CodeModel::Small:
579
+ case CodeModel::Medium:
580
+ case CodeModel::Kernel:
581
+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::BL))
582
+ .addExternalSymbol (" __chkstk" )
583
+ .addReg (AArch64::X15, RegState::Implicit)
584
+ .setMIFlags (MachineInstr::FrameSetup);
585
+ break ;
586
+ case CodeModel::Large:
587
+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::MOVaddrEXT))
588
+ .addReg (AArch64::X16, RegState::Define)
589
+ .addExternalSymbol (" __chkstk" )
590
+ .addExternalSymbol (" __chkstk" )
591
+ .setMIFlags (MachineInstr::FrameSetup);
592
+
593
+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::BLR))
594
+ .addReg (AArch64::X16, RegState::Kill)
595
+ .addReg (AArch64::X15, RegState::Implicit | RegState::Define)
596
+ .setMIFlags (MachineInstr::FrameSetup);
597
+ break ;
598
+ }
599
+
600
+ BuildMI (MBB, MBBI, DL, TII->get (AArch64::SUBXrx64), AArch64::SP)
601
+ .addReg (AArch64::SP, RegState::Kill)
602
+ .addReg (AArch64::X15, RegState::Kill)
603
+ .addImm (AArch64_AM::getArithExtendImm (AArch64_AM::UXTX, 4 ))
604
+ .setMIFlags (MachineInstr::FrameSetup);
605
+ NumBytes = 0 ;
606
+ }
607
+
553
608
// Allocate space for the rest of the frame.
554
609
if (NumBytes) {
555
610
const bool NeedsRealignment = RegInfo->needsStackRealignment (MF);
@@ -1164,18 +1219,32 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
1164
1219
unsigned UnspilledCSGPR = AArch64::NoRegister;
1165
1220
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
1166
1221
1222
+ MachineFrameInfo &MFI = MF.getFrameInfo ();
1223
+ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs (&MF);
1224
+
1225
+ unsigned BasePointerReg = RegInfo->hasBasePointer (MF)
1226
+ ? RegInfo->getBaseRegister ()
1227
+ : (unsigned )AArch64::NoRegister;
1228
+
1229
+ unsigned SpillEstimate = SavedRegs.count ();
1230
+ for (unsigned i = 0 ; CSRegs[i]; ++i) {
1231
+ unsigned Reg = CSRegs[i];
1232
+ unsigned PairedReg = CSRegs[i ^ 1 ];
1233
+ if (Reg == BasePointerReg)
1234
+ SpillEstimate++;
1235
+ if (produceCompactUnwindFrame (MF) && !SavedRegs.test (PairedReg))
1236
+ SpillEstimate++;
1237
+ }
1238
+ SpillEstimate += 2 ; // Conservatively include FP+LR in the estimate
1239
+ unsigned StackEstimate = MFI.estimateStackSize (MF) + 8 * SpillEstimate;
1240
+
1167
1241
// The frame record needs to be created by saving the appropriate registers
1168
- if (hasFP (MF)) {
1242
+ if (hasFP (MF) || windowsRequiresStackProbe (MF, StackEstimate) ) {
1169
1243
SavedRegs.set (AArch64::FP);
1170
1244
SavedRegs.set (AArch64::LR);
1171
1245
}
1172
1246
1173
- unsigned BasePointerReg = AArch64::NoRegister;
1174
- if (RegInfo->hasBasePointer (MF))
1175
- BasePointerReg = RegInfo->getBaseRegister ();
1176
-
1177
1247
unsigned ExtraCSSpill = 0 ;
1178
- const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs (&MF);
1179
1248
// Figure out which callee-saved registers to save/restore.
1180
1249
for (unsigned i = 0 ; CSRegs[i]; ++i) {
1181
1250
const unsigned Reg = CSRegs[i];
@@ -1217,7 +1286,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
1217
1286
1218
1287
// The CSR spill slots have not been allocated yet, so estimateStackSize
1219
1288
// won't include them.
1220
- MachineFrameInfo &MFI = MF.getFrameInfo ();
1221
1289
unsigned CFSize = MFI.estimateStackSize (MF) + 8 * NumRegsSpilled;
1222
1290
DEBUG (dbgs () << " Estimated stack frame size: " << CFSize << " bytes.\n " );
1223
1291
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit (MF);
0 commit comments