Skip to content

Commit

Permalink
Merge pull request davisjam#49 from davisjam/DetectorEnhancement_Anch…
Browse files Browse the repository at this point in the history
…oredRegex

detect-vuln: test on expanded pattern space
  • Loading branch information
davisjam authored Apr 23, 2018
2 parents 3a9b580 + 9075951 commit 23f6b2a
Showing 1 changed file with 110 additions and 39 deletions.
149 changes: 110 additions & 39 deletions src/detect/detect-vuln.pl
Original file line number Diff line number Diff line change
Expand Up @@ -57,48 +57,65 @@
my $limitTime = (defined $query->{timeLimit}) ? "timeout $query->{timeLimit}s" : "";
my $ulimitMemory = (defined $query->{memoryLimit}) ? "ulimit -m $memoryLimitInBytes; ulimit -v $memoryLimitInBytes;" : "";

# Run each detector. Can re-use the input file.
my @patternsToTry = &expandPatternSpaceForDetectors($query->{pattern});

# This will contain N_DETECTORS * scalar(@patternsToTry) opinions.
my @detectorOpinions;
&log("Applying detectors to pattern /$query->{pattern}/");
for my $d (@DETECTORS) {
&log("Querying detector $d->{name}");
my $t0 = [gettimeofday];
my $stderrFile = "/tmp/detect-vuln-$$-stderr";
my ($rc, $out) = &cmd("$ulimitMemory $limitTime $d->{driver} $patternFile 2>$stderrFile");
my $elapsed = tv_interval($t0);
chomp $out;

# Clean up in case there was a timeout.
my $stderr = &readFile("file"=>$stderrFile);
my @filesToClean = ($stderr =~ m/CLEANUP: (\S+)/g);
&log("Cleaning up @filesToClean");
unlink @filesToClean;
unlink $stderrFile;

my $opinion = { "name" => $d->{name},
"secToDecide" => sprintf("%.4f", $elapsed),
};

if ($rc eq 124) {
&log("Detector $d->{name} timed out");
$opinion->{hasOpinion} = 0;
$opinion->{opinion} = "TIMEOUT";
}
elsif ($rc) {
&log("Detector $d->{name} said rc $rc");
$opinion->{hasOpinion} = 0;
$opinion->{opinion} = "INTERNAL-ERROR";
}
else {
&log("Detector $d->{name} said: $out");
my $result = decode_json($out);
# Extract the details needed to make the summary.
# Otherwise we repeat ourselves too much.
$opinion->{hasOpinion} = 1;
$opinion->{opinion} = $result->{opinion};
# Try each pattern.
for my $pattern (@patternsToTry) {
&log("Applying detectors to pattern /$pattern/");

# Craft query file.
my $newQuery = decode_json(encode_json($query));
$newQuery->{pattern} = $pattern;
my $tmpPatternFile = &makeQueryFile($newQuery);

# Ask each detector.
for my $d (@DETECTORS) {
&log("Querying detector $d->{name}");
my $t0 = [gettimeofday];
my $stderrFile = "/tmp/detect-vuln-$$-stderr";
my ($rc, $out) = &cmd("$ulimitMemory $limitTime $d->{driver} $tmpPatternFile 2>$stderrFile");
my $elapsed = tv_interval($t0);
chomp $out;

# Clean up in case there was a timeout.
my $stderr = &readFile("file"=>$stderrFile);
my @filesToClean = ($stderr =~ m/CLEANUP: (\S+)/g);
&log("Cleaning up @filesToClean");
unlink @filesToClean;
unlink $stderrFile;

my $opinion = { "name" => $d->{name},
"secToDecide" => sprintf("%.4f", $elapsed),
};

if ($rc eq 124) {
&log("Detector $d->{name} timed out");
$opinion->{hasOpinion} = 0;
$opinion->{opinion} = "TIMEOUT";
}
elsif ($rc) {
&log("Detector $d->{name} said rc $rc");
$opinion->{hasOpinion} = 0;
$opinion->{opinion} = "INTERNAL-ERROR";
}
else {
&log("Detector $d->{name} said: $out");
my $result = decode_json($out);
# Extract the details needed to make the summary.
# Otherwise we repeat ourselves too much.
$opinion->{hasOpinion} = 1;
$opinion->{opinion} = $result->{opinion};

# Note the pattern we queried about, so we can distinguish from the original.
$opinion->{patternVariant} = $pattern;
}

push @detectorOpinions, $opinion;
}

push @detectorOpinions, $opinion;
unlink $tmpPatternFile;
}

$query->{detectorOpinions} = \@detectorOpinions;
Expand Down Expand Up @@ -149,6 +166,13 @@ sub getDetectors {
return @detectors;
}

sub makeQueryFile {
my ($query) = @_;
my $tmpFile = "/tmp/detect-vuln-$$.json";
&writeToFile("file"=>$tmpFile, "contents"=>encode_json($query));
return $tmpFile;
}

# input: (\@list, $e)
# output: true if $e is in @list, else false
sub listContains {
Expand All @@ -173,3 +197,50 @@ sub readFile {

return $contents;
}

# input: %args: keys: file contents
# output: $file
sub writeToFile {
my %args = @_;

open(my $fh, '>', $args{file});
print $fh $args{contents};
close $fh;

return $args{file};
}

sub expandPatternSpaceForDetectors {
my ($pattern) = @_;

my @patternsToTry = ($pattern);

# If pattern is unanchored, a backtracking regex engine will run the loop:
# for (1 .. n):
# _match(regex, substr)
# This means that if each match is linear-time, the worst-case behavior is quadratic.
# For example, /a+$/ is quadratic in Node.js.
# The detectors don't seem to acknowledge this loop.
# We can simulate it by prefixing un-anchored regexes with '^(.*?)'.
# This is also how a linear-time engine scans all starting indices in parallel; see Cox's writings.
if (substr($query->{pattern}, 0, 1) ne "^") {
my $anchoredPattern = "^(.*?)$query->{pattern}";
push @patternsToTry, $anchoredPattern;
}

# If pattern contains curlies "{\d*,\d*}", the detectors may time out due to graph expansion.
# We can try a more general pattern with "*" and "+" instead.
# The detectors might give false positives but that's OK, that's what the validate stage is for.
# I'm not being careful about escaped curly braces, so let's hope there are no meta-regexes here.
my $genericCurlies = $query->{pattern};
# {0, and {, both mean "0 or more"
$genericCurlies =~ s/{0,\d*}/\*/g;
$genericCurlies =~ s/{,\d*}/\*/g;
# {[1-9] means "1 or more"
$genericCurlies =~ s/{[1-9]\d*,\d*}/\+/g;
if ($genericCurlies ne $pattern) {
push @patternsToTry, $genericCurlies;
}

return @patternsToTry;
}

0 comments on commit 23f6b2a

Please sign in to comment.