Add the alloc_size attribute to clang, attempt 2.

This is a recommit of r290149, which was reverted in r290169 due to msan failures. msan was failing because we were calling `isMostDerivedAnUnsizedArray` on an invalid designator, which caused us to read uninitialized memory. To fix this, the logic of the caller of said function was simplified, and we now have a `!Invalid` assert in `isMostDerivedAnUnsizedArray`, so we can catch this particular bug more easily in the future. Fingers crossed that this patch sticks this time. :) Original commit message: This patch does three things: - Gives us the alloc_size attribute in clang, which lets us infer the number of bytes handed back to us by malloc/realloc/calloc/any user functions that act in a similar manner. - Teaches our constexpr evaluator that evaluating some `const` variables is OK sometimes. This is why we have a change in test/SemaCXX/constant-expression-cxx11.cpp and other seemingly unrelated tests. Richard Smith okay'ed this idea some time ago in person. - Uniques some Blocks in CodeGen, which was reviewed separately at D26410. Lack of uniquing only really shows up as a problem when combined with our new eagerness in the face of const. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@290297 91177308-0d34-0410-b5e6-96231b3b80d8
author: George Burgess IV <george.burgess.iv@gmail.com> 2016-12-22 02:50:20 +0000
committer: George Burgess IV <george.burgess.iv@gmail.com> 2016-12-22 02:50:20 +0000
commit: aa365cb2fed2efbc374e52db2a01bdb929ed977a (patch)
tree: 29df45f4fc82d8bcb41a8ae142a0cec2467e1116 /lib/AST/ExprConstant.cpp
parent: a8bebbeb2ba5f6d8a0d6859a7e5713a8970aad8e (diff)
1 files changed, 433 insertions, 183 deletions
diff --git a/lib/AST/ExprConstant.cpp b/lib/AST/ExprConstant.cpp
index 0abdaa879e..a89a45797e 100644
--- a/lib/AST/ExprConstant.cpp
+++ b/lib/AST/ExprConstant.cpp
@@ -109,19 +109,57 @@ namespace {
     return getAsBaseOrMember(E).getInt();
   }
 
+  /// Given a CallExpr, try to get the alloc_size attribute. May return null.
+  static const AllocSizeAttr *getAllocSizeAttr(const CallExpr *CE) {
+    const FunctionDecl *Callee = CE->getDirectCallee();
+    return Callee ? Callee->getAttr<AllocSizeAttr>() : nullptr;
+  }
+
+  /// Attempts to unwrap a CallExpr (with an alloc_size attribute) from an Expr.
+  /// This will look through a single cast.
+  ///
+  /// Returns null if we couldn't unwrap a function with alloc_size.
+  static const CallExpr *tryUnwrapAllocSizeCall(const Expr *E) {
+    if (!E->getType()->isPointerType())
+      return nullptr;
+
+    E = E->IgnoreParens();
+    // If we're doing a variable assignment from e.g. malloc(N), there will
+    // probably be a cast of some kind. Ignore it.
+    if (const auto *Cast = dyn_cast<CastExpr>(E))
+      E = Cast->getSubExpr()->IgnoreParens();
+
+    if (const auto *CE = dyn_cast<CallExpr>(E))
+      return getAllocSizeAttr(CE) ? CE : nullptr;
+    return nullptr;
+  }
+
+  /// Determines whether or not the given Base contains a call to a function
+  /// with the alloc_size attribute.
+  static bool isBaseAnAllocSizeCall(APValue::LValueBase Base) {
+    const auto *E = Base.dyn_cast<const Expr *>();
+    return E && E->getType()->isPointerType() && tryUnwrapAllocSizeCall(E);
+  }
+
+  /// Determines if an LValue with the given LValueBase will have an unsized
+  /// array in its designator.
   /// Find the path length and type of the most-derived subobject in the given
   /// path, and find the size of the containing array, if any.
-  static
-  unsigned findMostDerivedSubobject(ASTContext &Ctx, QualType Base,
-                                    ArrayRef<APValue::LValuePathEntry> Path,
-                                    uint64_t &ArraySize, QualType &Type,
-                                    bool &IsArray) {
+  static unsigned
+  findMostDerivedSubobject(ASTContext &Ctx, APValue::LValueBase Base,
+                           ArrayRef<APValue::LValuePathEntry> Path,
+                           uint64_t &ArraySize, QualType &Type, bool &IsArray) {
+    // This only accepts LValueBases from APValues, and APValues don't support
+    // arrays that lack size info.
+    assert(!isBaseAnAllocSizeCall(Base) &&
+           "Unsized arrays shouldn't appear here");
     unsigned MostDerivedLength = 0;
-    Type = Base;
+    Type = getType(Base);
+
     for (unsigned I = 0, N = Path.size(); I != N; ++I) {
       if (Type->isArrayType()) {
         const ConstantArrayType *CAT =
-          cast<ConstantArrayType>(Ctx.getAsArrayType(Type));
+            cast<ConstantArrayType>(Ctx.getAsArrayType(Type));
         Type = CAT->getElementType();
         ArraySize = CAT->getSize().getZExtValue();
         MostDerivedLength = I + 1;
@@ -162,17 +200,23 @@ namespace {
     /// Is this a pointer one past the end of an object?
     unsigned IsOnePastTheEnd : 1;
 
+    /// Indicator of whether the first entry is an unsized array.
+    unsigned FirstEntryIsAnUnsizedArray : 1;
+
     /// Indicator of whether the most-derived object is an array element.
     unsigned MostDerivedIsArrayElement : 1;
 
     /// The length of the path to the most-derived object of which this is a
     /// subobject.
-    unsigned MostDerivedPathLength : 29;
+    unsigned MostDerivedPathLength : 28;
 
     /// The size of the array of which the most-derived object is an element.
     /// This will always be 0 if the most-derived object is not an array
     /// element. 0 is not an indicator of whether or not the most-derived object
     /// is an array, however, because 0-length arrays are allowed.
+    ///
+    /// If the current array is an unsized array, the value of this is
+    /// undefined.
     uint64_t MostDerivedArraySize;
 
     /// The type of the most derived object referred to by this address.
@@ -187,23 +231,24 @@ namespace {
 
     explicit SubobjectDesignator(QualType T)
         : Invalid(false), IsOnePastTheEnd(false),
-          MostDerivedIsArrayElement(false), MostDerivedPathLength(0),
-          MostDerivedArraySize(0), MostDerivedType(T) {}
+          FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false),
+          MostDerivedPathLength(0), MostDerivedArraySize(0),
+          MostDerivedType(T) {}
 
     SubobjectDesignator(ASTContext &Ctx, const APValue &V)
         : Invalid(!V.isLValue() || !V.hasLValuePath()), IsOnePastTheEnd(false),
-          MostDerivedIsArrayElement(false), MostDerivedPathLength(0),
-          MostDerivedArraySize(0) {
+          FirstEntryIsAnUnsizedArray(false), MostDerivedIsArrayElement(false),
+          MostDerivedPathLength(0), MostDerivedArraySize(0) {
+      assert(V.isLValue() && "Non-LValue used to make an LValue designator?");
       if (!Invalid) {
         IsOnePastTheEnd = V.isLValueOnePastTheEnd();
         ArrayRef<PathEntry> VEntries = V.getLValuePath();
         Entries.insert(Entries.end(), VEntries.begin(), VEntries.end());
         if (V.getLValueBase()) {
           bool IsArray = false;
-          MostDerivedPathLength =
-              findMostDerivedSubobject(Ctx, getType(V.getLValueBase()),
-                                       V.getLValuePath(), MostDerivedArraySize,
-                                       MostDerivedType, IsArray);
+          MostDerivedPathLength = findMostDerivedSubobject(
+              Ctx, V.getLValueBase(), V.getLValuePath(), MostDerivedArraySize,
+              MostDerivedType, IsArray);
           MostDerivedIsArrayElement = IsArray;
         }
       }
@@ -214,12 +259,26 @@ namespace {
       Entries.clear();
     }
 
+    /// Determine whether the most derived subobject is an array without a
+    /// known bound.
+    bool isMostDerivedAnUnsizedArray() const {
+      assert(!Invalid && "Calling this makes no sense on invalid designators");
+      return Entries.size() == 1 && FirstEntryIsAnUnsizedArray;
+    }
+
+    /// Determine what the most derived array's size is. Results in an assertion
+    /// failure if the most derived array lacks a size.
+    uint64_t getMostDerivedArraySize() const {
+      assert(!isMostDerivedAnUnsizedArray() && "Unsized array has no size");
+      return MostDerivedArraySize;
+    }
+
     /// Determine whether this is a one-past-the-end pointer.
     bool isOnePastTheEnd() const {
       assert(!Invalid);
       if (IsOnePastTheEnd)
         return true;
-      if (MostDerivedIsArrayElement &&
+      if (!isMostDerivedAnUnsizedArray() && MostDerivedIsArrayElement &&
           Entries[MostDerivedPathLength - 1].ArrayIndex == MostDerivedArraySize)
         return true;
       return false;
@@ -247,6 +306,21 @@ namespace {
       MostDerivedArraySize = CAT->getSize().getZExtValue();
       MostDerivedPathLength = Entries.size();
     }
+    /// Update this designator to refer to the first element within the array of
+    /// elements of type T. This is an array of unknown size.
+    void addUnsizedArrayUnchecked(QualType ElemTy) {
+      PathEntry Entry;
+      Entry.ArrayIndex = 0;
+      Entries.push_back(Entry);
+
+      MostDerivedType = ElemTy;
+      MostDerivedIsArrayElement = true;
+      // The value in MostDerivedArraySize is undefined in this case. So, set it
+      // to an arbitrary value that's likely to loudly break things if it's
+      // used.
+      MostDerivedArraySize = std::numeric_limits<uint64_t>::max() / 2;
+      MostDerivedPathLength = Entries.size();
+    }
     /// Update this designator to refer to the given base or member of this
     /// object.
     void addDeclUnchecked(const Decl *D, bool Virtual = false) {
@@ -280,10 +354,16 @@ namespace {
     /// Add N to the address of this subobject.
     void adjustIndex(EvalInfo &Info, const Expr *E, uint64_t N) {
       if (Invalid) return;
+      if (isMostDerivedAnUnsizedArray()) {
+        // Can't verify -- trust that the user is doing the right thing (or if
+        // not, trust that the caller will catch the bad behavior).
+        Entries.back().ArrayIndex += N;
+        return;
+      }
       if (MostDerivedPathLength == Entries.size() &&
           MostDerivedIsArrayElement) {
         Entries.back().ArrayIndex += N;
-        if (Entries.back().ArrayIndex > MostDerivedArraySize) {
+        if (Entries.back().ArrayIndex > getMostDerivedArraySize()) {
           diagnosePointerArithmetic(Info, E, Entries.back().ArrayIndex);
           setInvalid();
         }
@@ -524,9 +604,15 @@ namespace {
       /// gets a chance to look at it.
       EM_PotentialConstantExpressionUnevaluated,
 
-      /// Evaluate as a constant expression. Continue evaluating if we find a
-      /// MemberExpr with a base that can't be evaluated.
-      EM_DesignatorFold,
+      /// Evaluate as a constant expression. Continue evaluating if either:
+      /// - We find a MemberExpr with a base that can't be evaluated.
+      /// - We find a variable initialized with a call to a function that has
+      ///   the alloc_size attribute on it.
+      /// In either case, the LValue returned shall have an invalid base; in the
+      /// former, the base will be the invalid MemberExpr, in the latter, the
+      /// base will be either the alloc_size CallExpr or a CastExpr wrapping
+      /// said CallExpr.
+      EM_OffsetFold,
     } EvalMode;
 
     /// Are we checking whether the expression is a potential constant
@@ -628,7 +714,7 @@ namespace {
           case EM_PotentialConstantExpression:
           case EM_ConstantExpressionUnevaluated:
           case EM_PotentialConstantExpressionUnevaluated:
-          case EM_DesignatorFold:
+          case EM_OffsetFold:
             HasActiveDiagnostic = false;
             return OptionalDiagnostic();
           }
@@ -720,7 +806,7 @@ namespace {
       case EM_ConstantExpression:
       case EM_ConstantExpressionUnevaluated:
       case EM_ConstantFold:
-      case EM_DesignatorFold:
+      case EM_OffsetFold:
         return false;
       }
       llvm_unreachable("Missed EvalMode case");
@@ -739,7 +825,7 @@ namespace {
       case EM_EvaluateForOverflow:
       case EM_IgnoreSideEffects:
       case EM_ConstantFold:
-      case EM_DesignatorFold:
+      case EM_OffsetFold:
         return true;
 
       case EM_PotentialConstantExpression:
@@ -775,7 +861,7 @@ namespace {
       case EM_ConstantExpressionUnevaluated:
       case EM_ConstantFold:
       case EM_IgnoreSideEffects:
-      case EM_DesignatorFold:
+      case EM_OffsetFold:
         return false;
       }
       llvm_unreachable("Missed EvalMode case");
@@ -805,7 +891,7 @@ namespace {
     }
 
     bool allowInvalidBaseExpr() const {
-      return EvalMode == EM_DesignatorFold;
+      return EvalMode == EM_OffsetFold;
     }
 
     class ArrayInitLoopIndex {
@@ -856,11 +942,10 @@ namespace {
   struct FoldOffsetRAII {
     EvalInfo &Info;
     EvalInfo::EvaluationMode OldMode;
-    explicit FoldOffsetRAII(EvalInfo &Info, bool Subobject)
+    explicit FoldOffsetRAII(EvalInfo &Info)
         : Info(Info), OldMode(Info.EvalMode) {
       if (!Info.checkingPotentialConstantExpression())
-        Info.EvalMode = Subobject ? EvalInfo::EM_DesignatorFold
-                                  : EvalInfo::EM_ConstantFold;
+        Info.EvalMode = EvalInfo::EM_OffsetFold;
     }
 
     ~FoldOffsetRAII() { Info.EvalMode = OldMode; }
@@ -966,10 +1051,12 @@ bool SubobjectDesignator::checkSubobject(EvalInfo &Info, const Expr *E,
 
 void SubobjectDesignator::diagnosePointerArithmetic(EvalInfo &Info,
                                                     const Expr *E, uint64_t N) {
+  // If we're complaining, we must be able to statically determine the size of
+  // the most derived array.
   if (MostDerivedPathLength == Entries.size() && MostDerivedIsArrayElement)
     Info.CCEDiag(E, diag::note_constexpr_array_index)
       << static_cast<int>(N) << /*array*/ 0
-      << static_cast<unsigned>(MostDerivedArraySize);
+      << static_cast<unsigned>(getMostDerivedArraySize());
   else
     Info.CCEDiag(E, diag::note_constexpr_array_index)
       << static_cast<int>(N) << /*non-array*/ 1;
@@ -1102,12 +1189,16 @@ namespace {
       if (Designator.Invalid)
         V = APValue(Base, Offset, APValue::NoLValuePath(), CallIndex,
                     IsNullPtr);
-      else
+      else {
+        assert(!InvalidBase && "APValues can't handle invalid LValue bases");
+        assert(!Designator.FirstEntryIsAnUnsizedArray &&
+               "Unsized array with a valid base?");
         V = APValue(Base, Offset, Designator.Entries,
                     Designator.IsOnePastTheEnd, CallIndex, IsNullPtr);
+      }
     }
     void setFrom(ASTContext &Ctx, const APValue &V) {
-      assert(V.isLValue());
+      assert(V.isLValue() && "Setting LValue from a non-LValue?");
       Base = V.getLValueBase();
       Offset = V.getLValueOffset();
       InvalidBase = false;
@@ -1118,6 +1209,15 @@ namespace {
 
     void set(APValue::LValueBase B, unsigned I = 0, bool BInvalid = false,
              bool IsNullPtr_ = false, uint64_t Offset_ = 0) {
+#ifndef NDEBUG
+      // We only allow a few types of invalid bases. Enforce that here.
+      if (BInvalid) {
+        const auto *E = B.get<const Expr *>();
+        assert((isa<MemberExpr>(E) || tryUnwrapAllocSizeCall(E)) &&
+               "Unexpected type of invalid base");
+      }
+#endif
+
       Base = B;
       Offset = CharUnits::fromQuantity(Offset_);
       InvalidBase = BInvalid;
@@ -1157,6 +1257,13 @@ namespace {
       if (checkSubobject(Info, E, isa<FieldDecl>(D) ? CSK_Field : CSK_Base))
         Designator.addDeclUnchecked(D, Virtual);
     }
+    void addUnsizedArray(EvalInfo &Info, QualType ElemTy) {
+      assert(Designator.Entries.empty() && getType(Base)->isPointerType());
+      assert(isBaseAnAllocSizeCall(Base) &&
+             "Only alloc_size bases can have unsized arrays");
+      Designator.FirstEntryIsAnUnsizedArray = true;
+      Designator.addUnsizedArrayUnchecked(ElemTy);
+    }
     void addArray(EvalInfo &Info, const Expr *E, const ConstantArrayType *CAT) {
       if (checkSubobject(Info, E, CSK_ArrayToPointer))
         Designator.addArrayUnchecked(CAT);
@@ -2796,7 +2903,7 @@ static CompleteObject findCompleteObject(EvalInfo &Info, const Expr *E,
         // All the remaining cases only permit reading.
         Info.FFDiag(E, diag::note_constexpr_modify_global);
         return CompleteObject();
-      } else if (VD->isConstexpr()) {
+      } else if (VD->isConstexpr() || BaseType.isConstQualified()) {
         // OK, we can read this variable.
       } else if (BaseType->isIntegralOrEnumerationType()) {
         // In OpenCL if a variable is in constant address space it is a const value.
@@ -5079,6 +5186,105 @@ bool LValueExprEvaluator::VisitBinAssign(const BinaryOperator *E) {
 // Pointer Evaluation
 //===----------------------------------------------------------------------===//
 
+/// \brief Attempts to compute the number of bytes available at the pointer
+/// returned by a function with the alloc_size attribute. Returns true if we
+/// were successful. Places an unsigned number into `Result`.
+///
+/// This expects the given CallExpr to be a call to a function with an
+/// alloc_size attribute.
+static bool getBytesReturnedByAllocSizeCall(const ASTContext &Ctx,
+                                            const CallExpr *Call,
+                                            llvm::APInt &Result) {
+  const AllocSizeAttr *AllocSize = getAllocSizeAttr(Call);
+
+  // alloc_size args are 1-indexed, 0 means not present.
+  assert(AllocSize && AllocSize->getElemSizeParam() != 0);
+  unsigned SizeArgNo = AllocSize->getElemSizeParam() - 1;
+  unsigned BitsInSizeT = Ctx.getTypeSize(Ctx.getSizeType());
+  if (Call->getNumArgs() <= SizeArgNo)
+    return false;
+
+  auto EvaluateAsSizeT = [&](const Expr *E, APSInt &Into) {
+    if (!E->EvaluateAsInt(Into, Ctx, Expr::SE_AllowSideEffects))
+      return false;
+    if (Into.isNegative() || !Into.isIntN(BitsInSizeT))
+      return false;
+    Into = Into.zextOrSelf(BitsInSizeT);
+    return true;
+  };
+
+  APSInt SizeOfElem;
+  if (!EvaluateAsSizeT(Call->getArg(SizeArgNo), SizeOfElem))
+    return false;
+
+  if (!AllocSize->getNumElemsParam()) {
+    Result = std::move(SizeOfElem);
+    return true;
+  }
+
+  APSInt NumberOfElems;
+  // Argument numbers start at 1
+  unsigned NumArgNo = AllocSize->getNumElemsParam() - 1;
+  if (!EvaluateAsSizeT(Call->getArg(NumArgNo), NumberOfElems))
+    return false;
+
+  bool Overflow;
+  llvm::APInt BytesAvailable = SizeOfElem.umul_ov(NumberOfElems, Overflow);
+  if (Overflow)
+    return false;
+
+  Result = std::move(BytesAvailable);
+  return true;
+}
+
+/// \brief Convenience function. LVal's base must be a call to an alloc_size
+/// function.
+static bool getBytesReturnedByAllocSizeCall(const ASTContext &Ctx,
+                                            const LValue &LVal,
+                                            llvm::APInt &Result) {
+  assert(isBaseAnAllocSizeCall(LVal.getLValueBase()) &&
+         "Can't get the size of a non alloc_size function");
+  const auto *Base = LVal.getLValueBase().get<const Expr *>();
+  const CallExpr *CE = tryUnwrapAllocSizeCall(Base);
+  return getBytesReturnedByAllocSizeCall(Ctx, CE, Result);
+}
+
+/// \brief Attempts to evaluate the given LValueBase as the result of a call to
+/// a function with the alloc_size attribute. If it was possible to do so, this
+/// function will return true, make Result's Base point to said function call,
+/// and mark Result's Base as invalid.
+static bool evaluateLValueAsAllocSize(EvalInfo &Info, APValue::LValueBase Base,
+                                      LValue &Result) {
+  if (!Info.allowInvalidBaseExpr() || Base.isNull())
+    return false;
+
+  // Because we do no form of static analysis, we only support const variables.
+  //
+  // Additionally, we can't support parameters, nor can we support static
+  // variables (in the latter case, use-before-assign isn't UB; in the former,
+  // we have no clue what they'll be assigned to).
+  const auto *VD =
+      dyn_cast_or_null<VarDecl>(Base.dyn_cast<const ValueDecl *>());
+  if (!VD || !VD->isLocalVarDecl() || !VD->getType().isConstQualified())
+    return false;
+
+  const Expr *Init = VD->getAnyInitializer();
+  if (!Init)
+    return false;
+
+  const Expr *E = Init->IgnoreParens();
+  if (!tryUnwrapAllocSizeCall(E))
+    return false;
+
+  // Store E instead of E unwrapped so that the type of the LValue's base is
+  // what the user wanted.
+  Result.setInvalid(E);
+
+  QualType Pointee = E->getType()->castAs<PointerType>()->getPointeeType();
+  Result.addUnsizedArray(Info, Pointee);
+  return true;
+}
+
 namespace {
 class PointerExprEvaluator
   : public ExprEvaluatorBase<PointerExprEvaluator> {
@@ -5088,6 +5294,8 @@ class PointerExprEvaluator
     Result.set(E);
     return true;
   }
+
+  bool visitNonBuiltinCallExpr(const CallExpr *E);
 public:
 
   PointerExprEvaluator(EvalInfo &info, LValue &Result)
@@ -5270,6 +5478,19 @@ bool PointerExprEvaluator::VisitCastExpr(const CastExpr* E) {
 
   case CK_FunctionToPointerDecay:
     return EvaluateLValue(SubExpr, Result, Info);
+
+  case CK_LValueToRValue: {
+    LValue LVal;
+    if (!EvaluateLValue(E->getSubExpr(), LVal, Info))
+      return false;
+
+    APValue RVal;
+    // Note, we use the subexpression's type in order to retain cv-qualifiers.
+    if (!handleLValueToRValueConversion(Info, E, E->getSubExpr()->getType(),
+                                        LVal, RVal))
+      return evaluateLValueAsAllocSize(Info, LVal.Base, Result);
+    return Success(RVal, E);
+  }
   }
 
   return ExprEvaluatorBaseTy::VisitCastExpr(E);
@@ -5307,6 +5528,20 @@ static CharUnits GetAlignOfExpr(EvalInfo &Info, const Expr *E) {
   return GetAlignOfType(Info, E->getType());
 }
 
+// To be clear: this happily visits unsupported builtins. Better name welcomed.
+bool PointerExprEvaluator::visitNonBuiltinCallExpr(const CallExpr *E) {
+  if (ExprEvaluatorBaseTy::VisitCallExpr(E))
+    return true;
+
+  if (!(Info.allowInvalidBaseExpr() && getAllocSizeAttr(E)))
+    return false;
+
+  Result.setInvalid(E);
+  QualType PointeeTy = E->getType()->castAs<PointerType>()->getPointeeType();
+  Result.addUnsizedArray(Info, PointeeTy);
+  return true;
+}
+
 bool PointerExprEvaluator::VisitCallExpr(const CallExpr *E) {
   if (IsStringLiteralCall(E))
     return Success(E);
@@ -5314,7 +5549,7 @@ bool PointerExprEvaluator::VisitCallExpr(const CallExpr *E) {
   if (unsigned BuiltinOp = E->getBuiltinCallee())
     return VisitBuiltinCallExpr(E, BuiltinOp);
 
-  return ExprEvaluatorBaseTy::VisitCallExpr(E);
+  return visitNonBuiltinCallExpr(E);
 }
 
 bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
@@ -5473,7 +5708,7 @@ bool PointerExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
   }
 
   default:
-    return ExprEvaluatorBaseTy::VisitCallExpr(E);
+    return visitNonBuiltinCallExpr(E);
   }
 }
 
@@ -6512,8 +6747,6 @@ public:
   bool VisitCXXNoexceptExpr(const CXXNoexceptExpr *E);
   bool VisitSizeOfPackExpr(const SizeOfPackExpr *E);
 
-private:
-  bool TryEvaluateBuiltinObjectSize(const CallExpr *E, unsigned Type);
   // FIXME: Missing: array subscript of vector, member of vector
 };
 } // end anonymous namespace
@@ -6785,7 +7018,7 @@ static QualType getObjectType(APValue::LValueBase B) {
 }
 
 /// A more selective version of E->IgnoreParenCasts for
-/// TryEvaluateBuiltinObjectSize. This ignores some casts/parens that serve only
+/// tryEvaluateBuiltinObjectSize. This ignores some casts/parens that serve only
 /// to change the type of E.
 /// Ex. For E = `(short*)((char*)(&foo))`, returns `&foo`
 ///
@@ -6852,82 +7085,191 @@ static bool isDesignatorAtObjectEnd(const ASTContext &Ctx, const LValue &LVal) {
     }
   }
 
+  unsigned I = 0;
   QualType BaseType = getType(Base);
-  for (int I = 0, E = LVal.Designator.Entries.size(); I != E; ++I) {
+  if (LVal.Designator.FirstEntryIsAnUnsizedArray) {
+    assert(isBaseAnAllocSizeCall(Base) &&
+           "Unsized array in non-alloc_size call?");
+    // If this is an alloc_size base, we should ignore the initial array index
+    ++I;
+    BaseType = BaseType->castAs<PointerType>()->getPointeeType();
+  }
+
+  for (unsigned E = LVal.Designator.Entries.size(); I != E; ++I) {
+    const auto &Entry = LVal.Designator.Entries[I];
     if (BaseType->isArrayType()) {
       // Because __builtin_object_size treats arrays as objects, we can ignore
       // the index iff this is the last array in the Designator.
       if (I + 1 == E)
         return true;
-      auto *CAT = cast<ConstantArrayType>(Ctx.getAsArrayType(BaseType));
-      uint64_t Index = LVal.Designator.Entries[I].ArrayIndex;
+      const auto *CAT = cast<ConstantArrayType>(Ctx.getAsArrayType(BaseType));
+      uint64_t Index = Entry.ArrayIndex;
       if (Index + 1 != CAT->getSize())
         return false;
       BaseType = CAT->getElementType();
     } else if (BaseType->isAnyComplexType()) {
-      auto *CT = BaseType->castAs<ComplexType>();
-      uint64_t Index = LVal.Designator.Entries[I].ArrayIndex;
+      const auto *CT = BaseType->castAs<ComplexType>();
+      uint64_t Index = Entry.ArrayIndex;
       if (Index != 1)
         return false;
       BaseType = CT->getElementType();
-    } else if (auto *FD = getAsField(LVal.Designator.Entries[I])) {
+    } else if (auto *FD = getAsField(Entry)) {
       bool Invalid;
       if (!IsLastOrInvalidFieldDecl(FD, Invalid))
         return Invalid;
       BaseType = FD->getType();
     } else {
-      assert(getAsBaseClass(LVal.Designator.Entries[I]) != nullptr &&
-             "Expecting cast to a base class");
+      assert(getAsBaseClass(Entry) && "Expecting cast to a base class");
       return false;
     }
   }
   return true;
 }
 
-/// Tests to see if the LValue has a designator (that isn't necessarily valid).
+/// Tests to see if the LValue has a user-specified designator (that isn't
+/// necessarily valid). Note that this always returns 'true' if the LValue has
+/// an unsized array as its first designator entry, because there's currently no
+/// way to tell if the user typed *foo or foo[0].
 static bool refersToCompleteObject(const LValue &LVal) {
-  if (LVal.Designator.Invalid || !LVal.Designator.Entries.empty())
+  if (LVal.Designator.Invalid)
     return false;
 
+  if (!LVal.Designator.Entries.empty())
+    return LVal.Designator.isMostDerivedAnUnsizedArray();
+
   if (!LVal.InvalidBase)
     return true;
 
-  auto *E = LVal.Base.dyn_cast<const Expr *>();
-  (void)E;
-  assert(E != nullptr && isa<MemberExpr>(E));
-  return false;
+  // If `E` is a MemberExpr, then the first part of the designator is hiding in
+  // the LValueBase.
+  const auto *E = LVal.Base.dyn_cast<const Expr *>();
+  return !E || !isa<MemberExpr>(E);
+}
+
+/// Attempts to detect a user writing into a piece of memory that's impossible
+/// to figure out the size of by just using types.
+static bool isUserWritingOffTheEnd(const ASTContext &Ctx, const LValue &LVal) {
+  const SubobjectDesignator &Designator = LVal.Designator;
+  // Notes:
+  // - Users can only write off of the end when we have an invalid base. Invalid
+  //   bases imply we don't know where the memory came from.
+  // - We used to be a bit more aggressive here; we'd only be conservative if
+  //   the array at the end was flexible, or if it had 0 or 1 elements. This
+  //   broke some common standard library extensions (PR30346), but was
+  //   otherwise seemingly fine. It may be useful to reintroduce this behavior
+  //   with some sort of whitelist. OTOH, it seems that GCC is always
+  //   conservative with the last element in structs (if it's an array), so our
+  //   current behavior is more compatible than a whitelisting approach would
+  //   be.
+  return LVal.InvalidBase &&
+         Designator.Entries.size() == Designator.MostDerivedPathLength &&
+         Designator.MostDerivedIsArrayElement &&
+         isDesignatorAtObjectEnd(Ctx, LVal);
+}
+
+/// Converts the given APInt to CharUnits, assuming the APInt is unsigned.
+/// Fails if the conversion would cause loss of precision.
+static bool convertUnsignedAPIntToCharUnits(const llvm::APInt &Int,
+                                            CharUnits &Result) {
+  auto CharUnitsMax = std::numeric_limits<CharUnits::QuantityType>::max();
+  if (Int.ugt(CharUnitsMax))
+    return false;
+  Result = CharUnits::fromQuantity(Int.getZExtValue());
+  return true;
 }
 
-/// Tries to evaluate the __builtin_object_size for @p E. If successful, returns
-/// true and stores the result in @p Size.
+/// Helper for tryEvaluateBuiltinObjectSize -- Given an LValue, this will
+/// determine how many bytes exist from the beginning of the object to either
+/// the end of the current subobject, or the end of the object itself, depending
+/// on what the LValue looks like + the value of Type.
 ///
-/// If @p WasError is non-null, this will report whether the failure to evaluate
-/// is to be treated as an Error in IntExprEvaluator.
-static bool tryEvaluateBuiltinObjectSize(const Expr *E, unsigned Type,
-                                         EvalInfo &Info, uint64_t &Size,
-                                         bool *WasError = nullptr) {
-  if (WasError != nullptr)
-    *WasError = false;
-
-  auto Error = [&](const Expr *E) {
-    if (WasError != nullptr)
-      *WasError = true;
+/// If this returns false, the value of Result is undefined.
+static bool determineEndOffset(EvalInfo &Info, SourceLocation ExprLoc,
+                               unsigned Type, const LValue &LVal,
+                               CharUnits &EndOffset) {
+  bool DetermineForCompleteObject = refersToCompleteObject(LVal);
+
+  // We want to evaluate the size of the entire object. This is a valid fallback
+  // for when Type=1 and the designator is invalid, because we're asked for an
+  // upper-bound.
+  if (!(Type & 1) || LVal.Designator.Invalid || DetermineForCompleteObject) {
+    // Type=3 wants a lower bound, so we can't fall back to this.
+    if (Type == 3 && !DetermineForCompleteObject)
+      return false;
+
+    llvm::APInt APEndOffset;
+    if (isBaseAnAllocSizeCall(LVal.getLValueBase()) &&
+        getBytesReturnedByAllocSizeCall(Info.Ctx, LVal, APEndOffset))
+      return convertUnsignedAPIntToCharUnits(APEndOffset, EndOffset);
+
+    if (LVal.InvalidBase)
+      return false;
+
+    QualType BaseTy = getObjectType(LVal.getLValueBase());
+    return !BaseTy.isNull() && HandleSizeof(Info, ExprLoc, BaseTy, EndOffset);
+  }
+
+  // We want to evaluate the size of a subobject.
+  const SubobjectDesignator &Designator = LVal.Designator;
+
+  // The following is a moderately common idiom in C:
+  //
+  // struct Foo { int a; char c[1]; };
+  // struct Foo *F = (struct Foo *)malloc(sizeof(struct Foo) + strlen(Bar));
+  // strcpy(&F->c[0], Bar);
+  //
+  // In order to not break too much legacy code, we need to support it.
+  if (isUserWritingOffTheEnd(Info.Ctx, LVal)) {
+    // If we can resolve this to an alloc_size call, we can hand that back,
+    // because we know for certain how many bytes there are to write to.
+    llvm::APInt APEndOffset;
+    if (isBaseAnAllocSizeCall(LVal.getLValueBase()) &&
+        getBytesReturnedByAllocSizeCall(Info.Ctx, LVal, APEndOffset))
+      return convertUnsignedAPIntToCharUnits(APEndOffset, EndOffset);
+
+    // If we cannot determine the size of the initial allocation, then we can't
+    // given an accurate upper-bound. However, we are still able to give
+    // conservative lower-bounds for Type=3.
+    if (Type == 1)
+      return false;
+  }
+
+  CharUnits BytesPerElem;
+  if (!HandleSizeof(Info, ExprLoc, Designator.MostDerivedType, BytesPerElem))
     return false;
-  };
 
-  auto Success = [&](uint64_t S, const Expr *E) {
-    Size = S;
-    return true;
-  };
+  // According to the GCC documentation, we want the size of the subobject
+  // denoted by the pointer. But that's not quite right -- what we actually
+  // want is the size of the immediately-enclosing array, if there is one.
+  int64_t ElemsRemaining;
+  if (Designator.MostDerivedIsArrayElement &&
+      Designator.Entries.size() == Designator.MostDerivedPathLength) {
+    uint64_t ArraySize = Designator.getMostDerivedArraySize();
+    uint64_t ArrayIndex = Designator.Entries.back().ArrayIndex;
+    ElemsRemaining = ArraySize <= ArrayIndex ? 0 : ArraySize - ArrayIndex;
+  } else {
+    ElemsRemaining = Designator.isOnePastTheEnd() ? 0 : 1;
+  }
 
+  EndOffset = LVal.getLValueOffset() + BytesPerElem * ElemsRemaining;
+  return true;
+}
+
+/// \brief Tries to evaluate the __builtin_object_size for @p E. If successful,
+/// returns true and stores the result in @p Size.
+///
+/// If @p WasError is non-null, this will report whether the failure to evaluate
+/// is to be treated as an Error in IntExprEvaluator.
+static bool tryEvaluateBuiltinObjectSize(const Expr *E, unsigned Type,
+                                         EvalInfo &Info, uint64_t &Size) {
   // Determine the denoted object.
-  LValue Base;
+  LValue LVal;
   {
     // The operand of __builtin_object_size is never evaluated for side-effects.
     // If there are any, but we can determine the pointed-to object anyway, then
     // ignore the side-effects.
     SpeculativeEvaluationRAII SpeculativeEval(Info);
-    FoldOffsetRAII Fold(Info, Type & 1);
+    FoldOffsetRAII Fold(Info);
 
     if (E->isGLValue()) {
       // It's possible for us to be given GLValues if we're called via
@@ -6935,122 +7277,29 @@ static bool tryEvaluateBuiltinObjectSize(const Expr *E, unsigned Type,
       APValue RVal;
       if (!EvaluateAsRValue(Info, E, RVal))
         return false;
-      Base.setFrom(Info.Ctx, RVal);
-    } else if (!EvaluatePointer(ignorePointerCastsAndParens(E), Base, Info))
+      LVal.setFrom(Info.Ctx, RVal);
+    } else if (!EvaluatePointer(ignorePointerCastsAndParens(E), LVal, Info))
       return false;
   }
 
-  CharUnits BaseOffset = Base.getLValueOffset();
   // If we point to before the start of the object, there are no accessible
   // bytes.
-  if (BaseOffset.isNegative())
-    return Success(0, E);
-
-  // In the case where we're not dealing with a subobject, we discard the
-  // subobject bit.
-  bool SubobjectOnly = (Type & 1) != 0 && !refersToCompleteObject(Base);
-
-  // If Type & 1 is 0, we need to be able to statically guarantee that the bytes
-  // exist. If we can't verify the base, then we can't do that.
-  //
-  // As a special case, we produce a valid object size for an unknown object
-  // with a known designator if Type & 1 is 1. For instance:
-  //
-  //   extern struct X { char buff[32]; int a, b, c; } *p;
-  //   int a = __builtin_object_size(p->buff + 4, 3); // returns 28
-  //   int b = __builtin_object_size(p->buff + 4, 2); // returns 0, not 40
-  //
-  // This matches GCC's behavior.
-  if (Base.InvalidBase && !SubobjectOnly)
-    return Error(E);
-
-  // If we're not examining only the subobject, then we reset to a complete
-  // object designator
-  //
-  // If Type is 1 and we've lost track of the subobject, just find the complete
-  // object instead. (If Type is 3, that's not correct behavior and we should
-  // return 0 instead.)
-  LValue End = Base;
-  if (!SubobjectOnly || (End.Designator.Invalid && Type == 1)) {
-    QualType T = getObjectType(End.getLValueBase());
-    if (T.isNull())
-      End.Designator.setInvalid();
-    else {
-      End.Designator = SubobjectDesignator(T);
-      End.Offset = CharUnits::Zero();
-    }
+  if (LVal.getLValueOffset().isNegative()) {
+    Size = 0;
+    return true;
   }
 
-  // If it is not possible to determine which objects ptr points to at compile
-  // time, __builtin_object_size should return (size_t) -1 for type 0 or 1
-  // and (size_t) 0 for type 2 or 3.
-  if (End.Designator.Invalid)
-    return false;
-
-  // According to the GCC documentation, we want the size of the subobject
-  // denoted by the pointer. But that's not quite right -- what we actually
-  // want is the size of the immediately-enclosing array, if there is one.
-  int64_t AmountToAdd = 1;
-  if (End.Designator.MostDerivedIsArrayElement &&
-      End.Designator.Entries.size() == End.Designator.MostDerivedPathLength) {
-    // We got a pointer to an array. Step to its end.
-    AmountToAdd = End.Designator.MostDerivedArraySize -
-                  End.Designator.Entries.back().ArrayIndex;
-  } else if (End.Designator.isOnePastTheEnd()) {
-    // We're already pointing at the end of the object.
-    AmountToAdd = 0;
-  }
-
-  QualType PointeeType = End.Designator.MostDerivedType;
-  assert(!PointeeType.isNull());
-  if (PointeeType->isIncompleteType() || PointeeType->isFunctionType())
-    return Error(E);
-
-  if (!HandleLValueArrayAdjustment(Info, E, End, End.Designator.MostDerivedType,
-                                   AmountToAdd))
+  CharUnits EndOffset;
+  if (!determineEndOffset(Info, E->getExprLoc(), Type, LVal, EndOffset))
     return false;
 
-  auto EndOffset = End.getLValueOffset();
-
-  // The following is a moderately common idiom in C:
-  //
-  // struct Foo { int a; char c[1]; };
-  // struct Foo *F = (struct Foo *)malloc(sizeof(struct Foo) + strlen(Bar));
-  // strcpy(&F->c[0], Bar);
-  //
-  // So, if we see that we're examining an array at the end of a struct with an
-  // unknown base, we give up instead of breaking code that behaves this way.
-  // Note that we only do this when Type=1, because Type=3 is a lower bound, so
-  // answering conservatively is fine.
-  //
-  // We used to be a bit more aggressive here; we'd only be conservative if the
-  // array at the end was flexible, or if it had 0 or 1 elements. This broke
-  // some common standard library extensions (PR30346), but was otherwise
-  // seemingly fine. It may be useful to reintroduce this behavior with some
-  // sort of whitelist. OTOH, it seems that GCC is always conservative with the
-  // last element in structs (if it's an array), so our current behavior is more
-  // compatible than a whitelisting approach would be.
-  if (End.InvalidBase && SubobjectOnly && Type == 1 &&
-      End.Designator.Entries.size() == End.Designator.MostDerivedPathLength &&
-      End.Designator.MostDerivedIsArrayElement &&
-      isDesignatorAtObjectEnd(Info.Ctx, End))
-    return false;
-
-  if (BaseOffset > EndOffset)
-    return Success(0, E);
-
-  return Success((EndOffset - BaseOffset).getQuantity(), E);
-}
-
-bool IntExprEvaluator::TryEvaluateBuiltinObjectSize(const CallExpr *E,
-                                                    unsigned Type) {
-  uint64_t Size;
-  bool WasError;
-  if (::tryEvaluateBuiltinObjectSize(E->getArg(0), Type, Info, Size, &WasError))
-    return Success(Size, E);
-  if (WasError)
-    return Error(E);
-  return false;
+  // If we've fallen outside of the end offset, just pretend there's nothing to
+  // write to/read from.
+  if (EndOffset <= LVal.getLValueOffset())
+    Size = 0;
+  else
+    Size = (EndOffset - LVal.getLValueOffset()).getQuantity();
+  return true;
 }
 
 bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) {
@@ -7072,8 +7321,9 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
         E->getArg(1)->EvaluateKnownConstInt(Info.Ctx).getZExtValue();
     assert(Type <= 3 && "unexpected type");
 
-    if (TryEvaluateBuiltinObjectSize(E, Type))
-      return true;
+    uint64_t Size;
+    if (tryEvaluateBuiltinObjectSize(E->getArg(0), Type, Info, Size))
+      return Success(Size, E);
 
     if (E->getArg(0)->HasSideEffects(Info.Ctx))
       return Success((Type & 2) ? 0 : -1, E);
@@ -7086,7 +7336,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     case EvalInfo::EM_ConstantFold:
     case EvalInfo::EM_EvaluateForOverflow:
     case EvalInfo::EM_IgnoreSideEffects:
-    case EvalInfo::EM_DesignatorFold:
+    case EvalInfo::EM_OffsetFold:
       // Leave it to IR generation.
       return Error(E);
     case EvalInfo::EM_ConstantExpressionUnevaluated:
@@ -10189,5 +10439,5 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
 
   Expr::EvalStatus Status;
   EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
-  return ::tryEvaluateBuiltinObjectSize(this, Type, Info, Result);
+  return tryEvaluateBuiltinObjectSize(this, Type, Info, Result);
 }
author	George Burgess IV <george.burgess.iv@gmail.com>	2016-12-22 02:50:20 +0000
committer	George Burgess IV <george.burgess.iv@gmail.com>	2016-12-22 02:50:20 +0000
commit	aa365cb2fed2efbc374e52db2a01bdb929ed977a (patch)
tree	29df45f4fc82d8bcb41a8ae142a0cec2467e1116 /lib/AST/ExprConstant.cpp
parent	a8bebbeb2ba5f6d8a0d6859a7e5713a8970aad8e (diff)