summaryrefslogtreecommitdiffstats
path: root/lib/StaticAnalyzer
diff options
context:
space:
mode:
authorArtem Dergachev <artem.dergachev@gmail.com>2017-05-29 15:42:56 +0000
committerArtem Dergachev <artem.dergachev@gmail.com>2017-05-29 15:42:56 +0000
commit1f7848d7f2f49a0a931d4e4aa13714fe47bf373e (patch)
tree2dead7262e8fdd72e11646aa0c8e41cfdc3e670d /lib/StaticAnalyzer
parentab4ea4570190c32f1e5c00d851680ebd14e94b53 (diff)
[analyzer] Support partially tainted records.
The analyzer's taint analysis can now reason about structures or arrays originating from taint sources in which only certain sections are tainted. In particular, it also benefits modeling functions like read(), which may read tainted data into a section of a structure, but RegionStore is incapable of expressing the fact that the rest of the structure remains intact, even if we try to model read() directly. Patch by Vlad Tsyrklevich! Differential revision: https://reviews.llvm.org/D28445 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@304162 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/StaticAnalyzer')
-rw-r--r--lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp83
-rw-r--r--lib/StaticAnalyzer/Core/ProgramState.cpp103
-rw-r--r--lib/StaticAnalyzer/Core/RegionStore.cpp5
3 files changed, 111 insertions, 80 deletions
diff --git a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
index b1a54e7795..883c6a6632 100644
--- a/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
+++ b/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
@@ -65,21 +65,8 @@ private:
/// and thus, is tainted.
static bool isStdin(const Expr *E, CheckerContext &C);
- /// This is called from getPointedToSymbol() to resolve symbol references for
- /// the region underlying a LazyCompoundVal. This is the default binding
- /// for the LCV, which could be a conjured symbol from a function call that
- /// initialized the region. It only returns the conjured symbol if the LCV
- /// covers the entire region, e.g. we avoid false positives by not returning
- /// a default bindingc for an entire struct if the symbol for only a single
- /// field or element within it is requested.
- // TODO: Return an appropriate symbol for sub-fields/elements of an LCV so
- // that they are also appropriately tainted.
- static SymbolRef getLCVSymbol(CheckerContext &C,
- nonloc::LazyCompoundVal &LCV);
-
- /// \brief Given a pointer argument, get the symbol of the value it contains
- /// (points to).
- static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
+ /// \brief Given a pointer argument, return the value it points to.
+ static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
/// Functions defining the attack surface.
typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
@@ -186,9 +173,14 @@ private:
static inline bool isTaintedOrPointsToTainted(const Expr *E,
ProgramStateRef State,
CheckerContext &C) {
- return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
- (E->getType().getTypePtr()->isPointerType() &&
- State->isTainted(getPointedToSymbol(C, E))));
+ if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
+ return true;
+
+ if (!E->getType().getTypePtr()->isPointerType())
+ return false;
+
+ Optional<SVal> V = getPointedToSVal(C, E);
+ return (V && State->isTainted(*V));
}
/// \brief Pre-process a function which propagates taint according to the
@@ -400,9 +392,9 @@ bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
if (CE->getNumArgs() < (ArgNum + 1))
return false;
const Expr* Arg = CE->getArg(ArgNum);
- SymbolRef Sym = getPointedToSymbol(C, Arg);
- if (Sym)
- State = State->addTaint(Sym);
+ Optional<SVal> V = getPointedToSVal(C, Arg);
+ if (V)
+ State = State->addTaint(*V);
}
// Clear up the taint info from the state.
@@ -473,47 +465,20 @@ bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
return false;
}
-SymbolRef GenericTaintChecker::getLCVSymbol(CheckerContext &C,
- nonloc::LazyCompoundVal &LCV) {
- StoreManager &StoreMgr = C.getStoreManager();
-
- // getLCVSymbol() is reached in a PostStmt so we can always expect a default
- // binding to exist if one is present.
- if (Optional<SVal> binding = StoreMgr.getDefaultBinding(LCV)) {
- SymbolRef Sym = binding->getAsSymbol();
- if (!Sym)
- return nullptr;
-
- // If the LCV covers an entire base region return the default conjured symbol.
- if (LCV.getRegion() == LCV.getRegion()->getBaseRegion())
- return Sym;
- }
-
- // Otherwise, return a nullptr as there's not yet a functional way to taint
- // sub-regions of LCVs.
- return nullptr;
-}
-
-SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
- const Expr* Arg) {
+Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
+ const Expr* Arg) {
ProgramStateRef State = C.getState();
SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
if (AddrVal.isUnknownOrUndef())
- return nullptr;
+ return None;
Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
if (!AddrLoc)
- return nullptr;
+ return None;
const PointerType *ArgTy =
dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
- SVal Val = State->getSVal(*AddrLoc,
- ArgTy ? ArgTy->getPointeeType(): QualType());
-
- if (auto LCV = Val.getAs<nonloc::LazyCompoundVal>())
- return getLCVSymbol(C, *LCV);
-
- return Val.getAsSymbol();
+ return State->getSVal(*AddrLoc, ArgTy ? ArgTy->getPointeeType(): QualType());
}
ProgramStateRef
@@ -633,9 +598,9 @@ ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
// The arguments are pointer arguments. The data they are pointing at is
// tainted after the call.
const Expr* Arg = CE->getArg(i);
- SymbolRef Sym = getPointedToSymbol(C, Arg);
- if (Sym)
- State = State->addTaint(Sym);
+ Optional<SVal> V = getPointedToSVal(C, Arg);
+ if (V)
+ State = State->addTaint(*V);
}
return State;
}
@@ -710,10 +675,10 @@ bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
// Check for taint.
ProgramStateRef State = C.getState();
- const SymbolRef PointedToSym = getPointedToSymbol(C, E);
+ Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
SVal TaintedSVal;
- if (State->isTainted(PointedToSym))
- TaintedSVal = nonloc::SymbolVal(PointedToSym);
+ if (PointedToSVal && State->isTainted(*PointedToSVal))
+ TaintedSVal = *PointedToSVal;
else if (State->isTainted(E, C.getLocationContext()))
TaintedSVal = C.getSVal(E);
else
diff --git a/lib/StaticAnalyzer/Core/ProgramState.cpp b/lib/StaticAnalyzer/Core/ProgramState.cpp
index 31556c792f..fc26de1a1f 100644
--- a/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -644,15 +644,33 @@ ProgramStateRef ProgramState::addTaint(const Stmt *S,
if (const Expr *E = dyn_cast_or_null<Expr>(S))
S = E->IgnoreParens();
- SymbolRef Sym = getSVal(S, LCtx).getAsSymbol();
+ return addTaint(getSVal(S, LCtx), Kind);
+}
+
+ProgramStateRef ProgramState::addTaint(SVal V,
+ TaintTagType Kind) const {
+ SymbolRef Sym = V.getAsSymbol();
if (Sym)
return addTaint(Sym, Kind);
- const MemRegion *R = getSVal(S, LCtx).getAsRegion();
- addTaint(R, Kind);
+ // If the SVal represents a structure, try to mass-taint all values within the
+ // structure. For now it only works efficiently on lazy compound values that
+ // were conjured during a conservative evaluation of a function - either as
+ // return values of functions that return structures or arrays by value, or as
+ // values of structures or arrays passed into the function by reference,
+ // directly or through pointer aliasing. Such lazy compound values are
+ // characterized by having exactly one binding in their captured store within
+ // their parent region, which is a conjured symbol default-bound to the base
+ // region of the parent region.
+ if (auto LCV = V.getAs<nonloc::LazyCompoundVal>()) {
+ if (Optional<SVal> binding = getStateManager().StoreMgr->getDefaultBinding(*LCV)) {
+ if (SymbolRef Sym = binding->getAsSymbol())
+ return addPartialTaint(Sym, LCV->getRegion(), Kind);
+ }
+ }
- // Cannot add taint, so just return the state.
- return this;
+ const MemRegion *R = V.getAsRegion();
+ return addTaint(R, Kind);
}
ProgramStateRef ProgramState::addTaint(const MemRegion *R,
@@ -674,6 +692,28 @@ ProgramStateRef ProgramState::addTaint(SymbolRef Sym,
return NewState;
}
+ProgramStateRef ProgramState::addPartialTaint(SymbolRef ParentSym,
+ const SubRegion *SubRegion,
+ TaintTagType Kind) const {
+ // Ignore partial taint if the entire parent symbol is already tainted.
+ if (contains<TaintMap>(ParentSym) && *get<TaintMap>(ParentSym) == Kind)
+ return this;
+
+ // Partial taint applies if only a portion of the symbol is tainted.
+ if (SubRegion == SubRegion->getBaseRegion())
+ return addTaint(ParentSym, Kind);
+
+ TaintedSubRegionsRef TaintedSubRegions(0, TSRFactory.getTreeFactory());
+ if (const TaintedSubRegionsRef *SavedTaintedRegions =
+ get<DerivedSymTaint>(ParentSym))
+ TaintedSubRegions = *SavedTaintedRegions;
+
+ TaintedSubRegions = TaintedSubRegions.add(SubRegion, Kind);
+ ProgramStateRef NewState = set<DerivedSymTaint>(ParentSym, TaintedSubRegions);
+ assert(NewState);
+ return NewState;
+}
+
bool ProgramState::isTainted(const Stmt *S, const LocationContext *LCtx,
TaintTagType Kind) const {
if (const Expr *E = dyn_cast_or_null<Expr>(S))
@@ -714,31 +754,54 @@ bool ProgramState::isTainted(SymbolRef Sym, TaintTagType Kind) const {
return false;
// Traverse all the symbols this symbol depends on to see if any are tainted.
- bool Tainted = false;
for (SymExpr::symbol_iterator SI = Sym->symbol_begin(), SE =Sym->symbol_end();
SI != SE; ++SI) {
if (!isa<SymbolData>(*SI))
continue;
- const TaintTagType *Tag = get<TaintMap>(*SI);
- Tainted = (Tag && *Tag == Kind);
+ if (const TaintTagType *Tag = get<TaintMap>(*SI)) {
+ if (*Tag == Kind)
+ return true;
+ }
- // If this is a SymbolDerived with a tainted parent, it's also tainted.
- if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI))
- Tainted = Tainted || isTainted(SD->getParentSymbol(), Kind);
+ if (const SymbolDerived *SD = dyn_cast<SymbolDerived>(*SI)) {
+ // If this is a SymbolDerived with a tainted parent, it's also tainted.
+ if (isTainted(SD->getParentSymbol(), Kind))
+ return true;
+
+ // If this is a SymbolDerived with the same parent symbol as another
+ // tainted SymbolDerived and a region that's a sub-region of that tainted
+ // symbol, it's also tainted.
+ if (const TaintedSubRegionsRef *SymRegions =
+ get<DerivedSymTaint>(SD->getParentSymbol())) {
+ const TypedValueRegion *R = SD->getRegion();
+ for (TaintedSubRegionsRef::iterator I = SymRegions->begin(),
+ E = SymRegions->end();
+ I != E; ++I) {
+ // FIXME: The logic to identify tainted regions could be more
+ // complete. For example, this would not currently identify
+ // overlapping fields in a union as tainted. To identify this we can
+ // check for overlapping/nested byte offsets.
+ if (Kind == I->second &&
+ (R == I->first || R->isSubRegionOf(I->first)))
+ return true;
+ }
+ }
+ }
// If memory region is tainted, data is also tainted.
- if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI))
- Tainted = Tainted || isTainted(SRV->getRegion(), Kind);
-
- // If If this is a SymbolCast from a tainted value, it's also tainted.
- if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI))
- Tainted = Tainted || isTainted(SC->getOperand(), Kind);
+ if (const SymbolRegionValue *SRV = dyn_cast<SymbolRegionValue>(*SI)) {
+ if (isTainted(SRV->getRegion(), Kind))
+ return true;
+ }
- if (Tainted)
- return true;
+ // If this is a SymbolCast from a tainted value, it's also tainted.
+ if (const SymbolCast *SC = dyn_cast<SymbolCast>(*SI)) {
+ if (isTainted(SC->getOperand(), Kind))
+ return true;
+ }
}
- return Tainted;
+ return false;
}
diff --git a/lib/StaticAnalyzer/Core/RegionStore.cpp b/lib/StaticAnalyzer/Core/RegionStore.cpp
index 3000e13d32..28f78fa3ff 100644
--- a/lib/StaticAnalyzer/Core/RegionStore.cpp
+++ b/lib/StaticAnalyzer/Core/RegionStore.cpp
@@ -496,7 +496,10 @@ public: // Part of public interface to class.
Optional<SVal> getDefaultBinding(Store S, const MemRegion *R) override {
RegionBindingsRef B = getRegionBindings(S);
- return B.getDefaultBinding(R);
+ // Default bindings are always applied over a base region so look up the
+ // base region's default binding, otherwise the lookup will fail when R
+ // is at an offset from R->getBaseRegion().
+ return B.getDefaultBinding(R->getBaseRegion());
}
SVal getBinding(RegionBindingsConstRef B, Loc L, QualType T = QualType());