[SCEV] Don't use non-deterministic constant folding for trip counts (#90942)

nikic · web-flow · commit b6e102e08cd3 · 2024-05-20T07:40:54.000+02:00
When calculating the exit count exhaustively, if any of the involved operations is non-deterministic, the exit count we compute at compile-time and the exit count at run-time may differ. Using these non-deterministic constant folding results is only correct if we actually replace all uses of the instruction with the value. SCEV (or its consumers) generally don't do this. Handle this by adding a new AllowNonDeterministic flag to the constant folding API, and disabling it in SCEV. If non-deterministic results are not allowed, do not fold FP lib calls in general, and FP operations returning NaNs in particular. This could be made more precise (some FP libcalls like fabs are fully deterministic), but I don't think this that precise handling here is worthwhile. Fixes the interesting part of #89885.
diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -68,9 +68,16 @@ Constant *ConstantFoldConstant(const Constant *C, const DataLayout &DL,
 /// fold instructions like loads and stores, which have no constant expression
 /// form.
 ///
+/// In some cases, constant folding may return one value chosen from a set of
+/// multiple legal return values. For example, the exact bit pattern of NaN
+/// results is not guaranteed. Using such a result is usually only valid if
+/// all uses of the original operation are replaced by the constant-folded
+/// result. The \p AllowNonDeterministic parameter controls whether this is
+/// allowed.
 Constant *ConstantFoldInstOperands(Instruction *I, ArrayRef<Constant *> Ops,
                                    const DataLayout &DL,
-                                   const TargetLibraryInfo *TLI = nullptr);
+                                   const TargetLibraryInfo *TLI = nullptr,
+                                   bool AllowNonDeterministic = true);
 
 /// Attempt to constant fold a compare instruction (icmp/fcmp) with the
 /// specified operands. Returns null or a constant expression of the specified
@@ -95,7 +102,8 @@ Constant *ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS,
 /// Returns null or a constant expression of the specified operands on failure.
 Constant *ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
                                      Constant *RHS, const DataLayout &DL,
-                                     const Instruction *I);
+                                     const Instruction *I,
+                                     bool AllowNonDeterministic = true);
 
 /// Attempt to flush float point constant according to denormal mode set in the
 /// instruction's parent function attributes. If so, return a zero with the
@@ -190,7 +198,8 @@ bool canConstantFoldCallTo(const CallBase *Call, const Function *F);
 /// with the specified arguments, returning null if unsuccessful.
 Constant *ConstantFoldCall(const CallBase *Call, Function *F,
                            ArrayRef<Constant *> Operands,
-                           const TargetLibraryInfo *TLI = nullptr);
+                           const TargetLibraryInfo *TLI = nullptr,
+                           bool AllowNonDeterministic = true);
 
 Constant *ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
                                       Constant *RHS, Type *Ty,
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -992,7 +992,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
 Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
                                        ArrayRef<Constant *> Ops,
                                        const DataLayout &DL,
-                                       const TargetLibraryInfo *TLI) {
+                                       const TargetLibraryInfo *TLI,
+                                       bool AllowNonDeterministic) {
   Type *DestTy = InstOrCE->getType();
 
   if (Instruction::isUnaryOp(Opcode))
@@ -1011,7 +1012,8 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
       // TODO: If a constant expression is being folded rather than an
       // instruction, denormals will not be flushed/treated as zero
       if (const auto *I = dyn_cast<Instruction>(InstOrCE)) {
-        return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I);
+        return ConstantFoldFPInstOperands(Opcode, Ops[0], Ops[1], DL, I,
+                                          AllowNonDeterministic);
       }
     }
     return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
@@ -1053,7 +1055,8 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
     if (auto *F = dyn_cast<Function>(Ops.back())) {
       const auto *Call = cast<CallBase>(InstOrCE);
       if (canConstantFoldCallTo(Call, F))
-        return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI);
+        return ConstantFoldCall(Call, F, Ops.slice(0, Ops.size() - 1), TLI,
+                                AllowNonDeterministic);
     }
     return nullptr;
   case Instruction::Select:
@@ -1114,8 +1117,8 @@ ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
   }
 
   if (auto *CE = dyn_cast<ConstantExpr>(C)) {
-    if (Constant *Res =
-            ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI))
+    if (Constant *Res = ConstantFoldInstOperandsImpl(
+            CE, CE->getOpcode(), Ops, DL, TLI, /*AllowNonDeterministic=*/true))
       return Res;
     return const_cast<Constant *>(C);
   }
@@ -1183,8 +1186,10 @@ Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,
 Constant *llvm::ConstantFoldInstOperands(Instruction *I,
                                          ArrayRef<Constant *> Ops,
                                          const DataLayout &DL,
-                                         const TargetLibraryInfo *TLI) {
-  return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
+                                         const TargetLibraryInfo *TLI,
+                                         bool AllowNonDeterministic) {
+  return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI,
+                                      AllowNonDeterministic);
 }
 
 Constant *llvm::ConstantFoldCompareInstOperands(
@@ -1357,7 +1362,8 @@ Constant *llvm::FlushFPConstant(Constant *Operand, const Instruction *I,
 
 Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
                                            Constant *RHS, const DataLayout &DL,
-                                           const Instruction *I) {
+                                           const Instruction *I,
+                                           bool AllowNonDeterministic) {
   if (Instruction::isBinaryOp(Opcode)) {
     // Flush denormal inputs if needed.
     Constant *Op0 = FlushFPConstant(LHS, I, /* IsOutput */ false);
@@ -1367,13 +1373,30 @@ Constant *llvm::ConstantFoldFPInstOperands(unsigned Opcode, Constant *LHS,
     if (!Op1)
       return nullptr;
 
+    // If nsz or an algebraic FMF flag is set, the result of the FP operation
+    // may change due to future optimization. Don't constant fold them if
+    // non-deterministic results are not allowed.
+    if (!AllowNonDeterministic)
+      if (auto *FP = dyn_cast_or_null<FPMathOperator>(I))
+        if (FP->hasNoSignedZeros() || FP->hasAllowReassoc() ||
+            FP->hasAllowContract() || FP->hasAllowReciprocal())
+          return nullptr;
+
     // Calculate constant result.
     Constant *C = ConstantFoldBinaryOpOperands(Opcode, Op0, Op1, DL);
     if (!C)
       return nullptr;
 
     // Flush denormal output if needed.
-    return FlushFPConstant(C, I, /* IsOutput */ true);
+    C = FlushFPConstant(C, I, /* IsOutput */ true);
+    if (!C)
+      return nullptr;
+
+    // The precise NaN value is non-deterministic.
+    if (!AllowNonDeterministic && C->isNaN())
+      return nullptr;
+
+    return C;
   }
   // If instruction lacks a parent/function and the denormal mode cannot be
   // determined, use the default (IEEE).
@@ -3401,7 +3424,8 @@ Constant *llvm::ConstantFoldBinaryIntrinsic(Intrinsic::ID ID, Constant *LHS,
 
 Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
                                  ArrayRef<Constant *> Operands,
-                                 const TargetLibraryInfo *TLI) {
+                                 const TargetLibraryInfo *TLI,
+                                 bool AllowNonDeterministic) {
   if (Call->isNoBuiltin())
     return nullptr;
   if (!F->hasName())
@@ -3417,8 +3441,13 @@ Constant *llvm::ConstantFoldCall(const CallBase *Call, Function *F,
       return nullptr;
   }
 
-  StringRef Name = F->getName();
+  // Conservatively assume that floating-point libcalls may be
+  // non-deterministic.
   Type *Ty = F->getReturnType();
+  if (!AllowNonDeterministic && Ty->isFPOrFPVectorTy())
+    return nullptr;
+
+  StringRef Name = F->getName();
   if (auto *FVTy = dyn_cast<FixedVectorType>(Ty))
     return ConstantFoldFixedVectorCall(
         Name, IID, FVTy, Operands, F->getParent()->getDataLayout(), TLI, Call);
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -9540,7 +9540,8 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,
     Operands[i] = C;
   }
 
-  return ConstantFoldInstOperands(I, Operands, DL, TLI);
+  return ConstantFoldInstOperands(I, Operands, DL, TLI,
+                                  /*AllowNonDeterministic=*/false);
 }
 
 
@@ -10031,7 +10032,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
 
     Constant *C = nullptr;
     const DataLayout &DL = getDataLayout();
-    C = ConstantFoldInstOperands(I, Operands, DL, &TLI);
+    C = ConstantFoldInstOperands(I, Operands, DL, &TLI,
+                                 /*AllowNonDeterministic=*/false);
     if (!C)
       return V;
     return getSCEV(C);
diff --git a/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll b/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll
@@ -27,4 +27,156 @@ for.cond.cleanup:
   ret void
 }
 
+; Do not compute exhaustive trip count based on FP libcalls, as their exact
+; return value may not be specified.
+define i64 @test_fp_libcall() {
+; CHECK-LABEL: 'test_fp_libcall'
+; CHECK-NEXT:  Determining loop execution counts for: @test_fp_libcall
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ 1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %fv.next = call double @llvm.sin.f64(double %fv)
+  %iv.next = add i64 %iv, 1
+  %fcmp = fcmp une double %fv, 0x3FC6BA15EE8460B0
+  br i1 %fcmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+; Do not compute exhaustive trip count based on FP constant folding resulting
+; in NaN values, as we don't specify which NaN exactly is returned.
+define i64 @test_nan_sign() {
+; CHECK-LABEL: 'test_nan_sign'
+; CHECK-NEXT:  Determining loop execution counts for: @test_nan_sign
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ -1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %a = fsub double %fv, 0x7F86C16C16C16C16
+  %b = fadd double %a, %a
+  %fv.next = fsub double %b, %a
+  %iv.next = add i64 %iv, 1
+  %fv.bc = bitcast double %fv to i64
+  %icmp = icmp slt i64 %fv.bc, 0
+  br i1 %icmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+; Do not compute exhaustive trip count based on FP constant folding if the
+; involved operation has nsz or one of the algebraic FMF flags (reassoc, arcp,
+; contract) set. The examples in the following are dummies and don't illustrate
+; real cases where FMF transforms could cause issues.
+
+define i64 @test_fp_nsz() {
+; CHECK-LABEL: 'test_fp_nsz'
+; CHECK-NEXT:  Determining loop execution counts for: @test_fp_nsz
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ 1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %fv.next = fadd nsz double %fv, 1.0
+  %iv.next = add i64 %iv, 1
+  %fcmp = fcmp une double %fv, 100.0
+  br i1 %fcmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+define i64 @test_fp_reassoc() {
+; CHECK-LABEL: 'test_fp_reassoc'
+; CHECK-NEXT:  Determining loop execution counts for: @test_fp_reassoc
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ 1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %fv.next = fadd reassoc double %fv, 1.0
+  %iv.next = add i64 %iv, 1
+  %fcmp = fcmp une double %fv, 100.0
+  br i1 %fcmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+define i64 @test_fp_arcp() {
+; CHECK-LABEL: 'test_fp_arcp'
+; CHECK-NEXT:  Determining loop execution counts for: @test_fp_arcp
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ 1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %fv.next = fadd arcp double %fv, 1.0
+  %iv.next = add i64 %iv, 1
+  %fcmp = fcmp une double %fv, 100.0
+  br i1 %fcmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
+define i64 @test_fp_contract() {
+; CHECK-LABEL: 'test_fp_contract'
+; CHECK-NEXT:  Determining loop execution counts for: @test_fp_contract
+; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable constant max backedge-taken count.
+; CHECK-NEXT:  Loop %loop: Unpredictable symbolic max backedge-taken count.
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %fv = phi double [ 1.000000e+00, %entry ], [ %fv.next, %loop ]
+  call void @use(double %fv)
+  %fv.next = fadd contract double %fv, 1.0
+  %iv.next = add i64 %iv, 1
+  %fcmp = fcmp une double %fv, 100.0
+  br i1 %fcmp, label %loop, label %exit
+
+exit:
+  ret i64 %iv
+}
+
 declare void @dummy()
+declare void @use(double %i)
+declare double @llvm.sin.f64(double)