Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[analyzer] Improve diagnostics from ArrayBoundCheckerV2 #70056

Merged
merged 5 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 154 additions & 54 deletions clang/lib/StaticAnalyzer/Checkers/ArrayBoundCheckerV2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,25 @@
#include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
#include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/FormatVariadic.h"
#include "llvm/Support/raw_ostream.h"
#include <optional>

using namespace clang;
using namespace ento;
using namespace taint;
using llvm::formatv;

namespace {
enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };
steakhal marked this conversation as resolved.
Show resolved Hide resolved

class ArrayBoundCheckerV2 :
public Checker<check::Location> {
BugType BT{this, "Out-of-bound access"};
BugType TaintBT{this, "Out-of-bound access", categories::TaintedData};

enum OOB_Kind { OOB_Precedes, OOB_Exceeds, OOB_Taint };

void reportOOB(CheckerContext &C, ProgramStateRef ErrorState, OOB_Kind Kind,
SVal TaintedSVal = UnknownVal()) const;
NonLoc Offset, std::string RegName, std::string Msg) const;

static bool isFromCtypeMacro(const Stmt *S, ASTContext &AC);

Expand All @@ -53,7 +55,7 @@ class ArrayBoundCheckerV2 :
/// Arr and the distance of Location from the beginning of Arr (expressed in a
/// NonLoc that specifies the number of CharUnits). Returns nullopt when these
/// cannot be determined.
std::optional<std::pair<const SubRegion *, NonLoc>>
static std::optional<std::pair<const SubRegion *, NonLoc>>
computeOffset(ProgramStateRef State, SValBuilder &SVB, SVal Location) {
QualType T = SVB.getArrayIndexType();
auto EvalBinOp = [&SVB, State, T](BinaryOperatorKind Op, NonLoc L, NonLoc R) {
Expand Down Expand Up @@ -174,9 +176,116 @@ compareValueToThreshold(ProgramStateRef State, NonLoc Value, NonLoc Threshold,
return {nullptr, nullptr};
}

void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
const Stmt* LoadS,
CheckerContext &checkerContext) const {
static std::string getRegionName(const SubRegion *Region) {
if (std::string RegName = Region->getDescriptiveName(); !RegName.empty())
return RegName;

// Field regions only have descriptive names when their parent has a
// descriptive name; so we provide a fallback representation for them:
if (const auto *FR = Region->getAs<FieldRegion>()) {
if (StringRef Name = FR->getDecl()->getName(); !Name.empty())
return formatv("the field '{0}'", Name);
return "the unnamed field";
}

if (isa<AllocaRegion>(Region))
return "the memory returned by 'alloca'";

if (isa<SymbolicRegion>(Region) &&
isa<HeapSpaceRegion>(Region->getMemorySpace()))
return "the heap area";

if (isa<StringRegion>(Region))
return "the string literal";

return "the region";
}

static std::optional<int64_t> getConcreteValue(NonLoc SV) {
if (auto ConcreteVal = SV.getAs<nonloc::ConcreteInt>()) {
return ConcreteVal->getValue().tryExtValue();
}
return std::nullopt;
}

static std::string getShortMsg(OOB_Kind Kind, std::string RegName) {
static const char *ShortMsgTemplates[] = {
"Out of bound access to memory preceding {0}",
"Out of bound access to memory after the end of {0}",
"Potential out of bound access to {0} with tainted offset"};

return formatv(ShortMsgTemplates[Kind], RegName);
NagyDonat marked this conversation as resolved.
Show resolved Hide resolved
}

static std::string getPrecedesMsg(std::string RegName, NonLoc Offset) {
SmallString<128> Buf;
llvm::raw_svector_ostream Out(Buf);
steakhal marked this conversation as resolved.
Show resolved Hide resolved
Out << "Access of " << RegName << " at negative byte offset";
if (auto ConcreteIdx = Offset.getAs<nonloc::ConcreteInt>())
Out << ' ' << ConcreteIdx->getValue();
return std::string(Buf);
}
static std::string getExceedsMsg(ASTContext &ACtx, std::string RegName,
NonLoc Offset, NonLoc Extent, SVal Location) {
const auto *EReg = Location.getAsRegion()->getAs<ElementRegion>();
assert(EReg && "this checker only handles element access");
QualType ElemType = EReg->getElementType();

std::optional<int64_t> OffsetN = getConcreteValue(Offset);
std::optional<int64_t> ExtentN = getConcreteValue(Extent);

bool UseByteOffsets = true;
if (int64_t ElemSize = ACtx.getTypeSizeInChars(ElemType).getQuantity()) {
const bool OffsetHasRemainder = OffsetN && *OffsetN % ElemSize;
const bool ExtentHasRemainder = ExtentN && *ExtentN % ElemSize;
if (!OffsetHasRemainder && !ExtentHasRemainder) {
UseByteOffsets = false;
if (OffsetN)
*OffsetN /= ElemSize;
if (ExtentN)
*ExtentN /= ElemSize;
}
}

SmallString<256> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Access of ";
if (!ExtentN && !UseByteOffsets)
Out << "'" << ElemType.getAsString() << "' element in ";
Out << RegName << " at ";
if (OffsetN) {
Out << (UseByteOffsets ? "byte offset " : "index ") << *OffsetN;
} else {
Out << "an overflowing " << (UseByteOffsets ? "byte offset" : "index");
}
if (ExtentN) {
Out << ", while it holds only ";
if (*ExtentN != 1)
Out << *ExtentN;
else
Out << "a single";
if (UseByteOffsets)
Out << " byte";
else
Out << " '" << ElemType.getAsString() << "' element";

if (*ExtentN > 1)
Out << "s";
}

return std::string(Buf);
}
static std::string getTaintMsg(std::string RegName) {
SmallString<128> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Access of " << RegName
<< " with a tainted offset that may be too large";
return std::string(Buf);
}

void ArrayBoundCheckerV2::checkLocation(SVal Location, bool IsLoad,
const Stmt *LoadS,
CheckerContext &C) const {

// NOTE: Instead of using ProgramState::assumeInBound(), we are prototyping
// some new logic here that reasons directly about memory region extents.
Expand All @@ -193,14 +302,14 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
// and incomplete analysis of these leads to false positives. As even
// accurate reports would be confusing for the users, just disable reports
// from these macros:
if (isFromCtypeMacro(LoadS, checkerContext.getASTContext()))
if (isFromCtypeMacro(LoadS, C.getASTContext()))
return;

ProgramStateRef state = checkerContext.getState();
SValBuilder &svalBuilder = checkerContext.getSValBuilder();
ProgramStateRef State = C.getState();
SValBuilder &SVB = C.getSValBuilder();

const std::optional<std::pair<const SubRegion *, NonLoc>> &RawOffset =
computeOffset(state, svalBuilder, location);
computeOffset(State, SVB, Location);

if (!RawOffset)
return;
Expand All @@ -217,80 +326,71 @@ void ArrayBoundCheckerV2::checkLocation(SVal location, bool isLoad,
// MallocChecker that call SValBuilder::getConjuredHeapSymbolVal()) and
// non-symbolic regions (e.g. a field subregion of a symbolic region) in
// unknown space.
auto [state_precedesLowerBound, state_withinLowerBound] =
compareValueToThreshold(state, ByteOffset,
svalBuilder.makeZeroArrayIndex(), svalBuilder);
auto [PrecedesLowerBound, WithinLowerBound] = compareValueToThreshold(
State, ByteOffset, SVB.makeZeroArrayIndex(), SVB);

if (state_precedesLowerBound && !state_withinLowerBound) {
if (PrecedesLowerBound && !WithinLowerBound) {
// We know that the index definitely precedes the lower bound.
reportOOB(checkerContext, state_precedesLowerBound, OOB_Precedes);
std::string RegName = getRegionName(Reg);
std::string Msg = getPrecedesMsg(RegName, ByteOffset);
reportOOB(C, PrecedesLowerBound, OOB_Precedes, ByteOffset, RegName, Msg);
steakhal marked this conversation as resolved.
Show resolved Hide resolved
return;
}

if (state_withinLowerBound)
state = state_withinLowerBound;
if (WithinLowerBound)
State = WithinLowerBound;
}

// CHECK UPPER BOUND
DefinedOrUnknownSVal Size = getDynamicExtent(state, Reg, svalBuilder);
DefinedOrUnknownSVal Size = getDynamicExtent(State, Reg, SVB);
if (auto KnownSize = Size.getAs<NonLoc>()) {
auto [state_withinUpperBound, state_exceedsUpperBound] =
compareValueToThreshold(state, ByteOffset, *KnownSize, svalBuilder);
auto [WithinUpperBound, ExceedsUpperBound] =
compareValueToThreshold(State, ByteOffset, *KnownSize, SVB);

if (state_exceedsUpperBound) {
if (!state_withinUpperBound) {
if (ExceedsUpperBound) {
if (!WithinUpperBound) {
// We know that the index definitely exceeds the upper bound.
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Exceeds);
std::string RegName = getRegionName(Reg);
std::string Msg = getExceedsMsg(C.getASTContext(), RegName, ByteOffset,
*KnownSize, Location);
reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, RegName, Msg);
reportOOB(C, ExceedsUpperBound, OOB_Exceeds, ByteOffset, std::move(RegName), std::move(Msg));

return;
}
if (isTainted(state, ByteOffset)) {
if (isTainted(State, ByteOffset)) {
// Both cases are possible, but the index is tainted, so report.
reportOOB(checkerContext, state_exceedsUpperBound, OOB_Taint,
ByteOffset);
std::string RegName = getRegionName(Reg);
std::string Msg = getTaintMsg(RegName);
reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, RegName, Msg);
reportOOB(C, ExceedsUpperBound, OOB_Taint, ByteOffset, std::move(RegName), std::move(Msg));

return;
}
}

if (state_withinUpperBound)
state = state_withinUpperBound;
if (WithinUpperBound)
State = WithinUpperBound;
}

checkerContext.addTransition(state);
C.addTransition(State);
}

void ArrayBoundCheckerV2::reportOOB(CheckerContext &C,
ProgramStateRef ErrorState, OOB_Kind Kind,
SVal TaintedSVal) const {
NonLoc Offset, std::string RegName,
std::string Msg) const {

ExplodedNode *ErrorNode = C.generateErrorNode(ErrorState);
if (!ErrorNode)
return;

// FIXME: These diagnostics are preliminary, and they'll be replaced soon by
// a follow-up commit.
std::string ShortMsg = getShortMsg(Kind, RegName);

SmallString<128> Buf;
llvm::raw_svector_ostream Out(Buf);
Out << "Out of bound memory access ";

switch (Kind) {
case OOB_Precedes:
Out << "(accessed memory precedes memory block)";
break;
case OOB_Exceeds:
Out << "(access exceeds upper limit of memory block)";
break;
case OOB_Taint:
Out << "(index is tainted)";
break;
}
auto BR = std::make_unique<PathSensitiveBugReport>(
Kind == OOB_Taint ? TaintBT : BT, Out.str(), ErrorNode);
// Track back the propagation of taintedness, or do nothing if TaintedSVal is
// the default UnknownVal().
for (SymbolRef Sym : getTaintedSymbols(ErrorState, TaintedSVal)) {
BR->markInteresting(Sym);
}
Kind == OOB_Taint ? TaintBT : BT, ShortMsg, Msg, ErrorNode);

// Track back the propagation of taintedness.
if (Kind == OOB_Taint)
for (SymbolRef Sym : getTaintedSymbols(ErrorState, Offset))
BR->markInteresting(Sym);

C.emitReport(std::move(BR));
}

Expand Down
Loading
Loading