Skip to content

Commit

Permalink
[GR-19220] Print some basic regex stats if the instrumentation is ena…
Browse files Browse the repository at this point in the history
…bled (#2327)

PullRequest: truffleruby/2575
  • Loading branch information
eregon committed Apr 15, 2021
2 parents 3042862 + 7f798bc commit 5f4a462
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 42 deletions.
16 changes: 14 additions & 2 deletions src/main/java/org/truffleruby/core/regexp/RegexpCacheKey.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@
import org.truffleruby.core.Hashing;
import org.truffleruby.core.rope.NativeRope;
import org.truffleruby.core.rope.Rope;
import org.truffleruby.parser.ReOptions;

public class RegexpCacheKey {
public final class RegexpCacheKey {

private final Rope rope;
private final Encoding encoding;
Expand Down Expand Up @@ -46,6 +47,17 @@ public boolean equals(Object o) {

@Override
public String toString() {
return rope.toString();
StringBuilder builder = new StringBuilder();
builder.append('/').append(rope.toString()).append('/');
if ((options & ReOptions.RE_OPTION_MULTILINE) != 0) {
builder.append('m');
}
if ((options & ReOptions.RE_OPTION_IGNORECASE) != 0) {
builder.append('i');
}
if ((options & ReOptions.RE_OPTION_EXTENDED) != 0) {
builder.append('x');
}
return builder.toString();
}
}
57 changes: 17 additions & 40 deletions src/main/java/org/truffleruby/core/regexp/TruffleRegexpNodes.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import java.nio.charset.UnsupportedCharsetException;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;

Expand Down Expand Up @@ -41,7 +42,6 @@
import org.truffleruby.core.array.ArrayBuilderNode.BuilderState;
import org.truffleruby.core.array.RubyArray;
import org.truffleruby.core.encoding.RubyEncoding;
import org.truffleruby.core.hash.ReHashable;
import org.truffleruby.core.kernel.KernelNodes.SameOrEqualNode;
import org.truffleruby.core.regexp.RegexpNodes.ToSNode;
import org.truffleruby.core.regexp.TruffleRegexpNodesFactory.MatchNodeGen;
Expand Down Expand Up @@ -309,7 +309,7 @@ public abstract static class RegexpStatsNode extends CoreMethodArrayArgumentsNod
@TruffleBoundary
protected <T> RubyArray fillinInstrumentData(Map<T, AtomicInteger> map, ArrayBuilderNode arrayBuilderNode,
RubyContext context) {
BuilderState state = arrayBuilderNode.start(compiledRegexps.size() * 2);
BuilderState state = arrayBuilderNode.start(COMPILED_REGEXPS.size() * 2);
int n = 0;
for (Entry<T, AtomicInteger> e : map.entrySet()) {
Rope key = StringOperations.encodeRope(e.getKey().toString(), UTF8Encoding.INSTANCE);
Expand All @@ -326,7 +326,7 @@ public abstract static class CompilationStatsArrayNode extends RegexpStatsNode {
@Specialization
protected Object buildStatsArray(
@Cached ArrayBuilderNode arrayBuilderNode) {
return fillinInstrumentData(compiledRegexps, arrayBuilderNode, getContext());
return fillinInstrumentData(COMPILED_REGEXPS, arrayBuilderNode, getContext());
}
}

Expand All @@ -336,7 +336,7 @@ public abstract static class MatchStatsArrayNode extends RegexpStatsNode {
@Specialization
protected Object buildStatsArray(
@Cached ArrayBuilderNode arrayBuilderNode) {
return fillinInstrumentData(matchedRegexps, arrayBuilderNode, getContext());
return fillinInstrumentData(MATCHED_REGEXPS, arrayBuilderNode, getContext());
}
}

Expand Down Expand Up @@ -476,17 +476,14 @@ protected void instrument(RubyRegexp regexp, Object string, boolean fromStart) {
Encoding enc = RubyStringLibrary.getUncached().getRope(string).getEncoding();
RegexpOptions options = regexp.options;
MatchInfo matchInfo = new MatchInfo(
new RegexpCacheKey(
source,
enc,
options.toJoniOptions(),
getContext().getHashing(REHASH_MATCHED_REGEXPS)),
new RegexpCacheKey(source, enc, options.toJoniOptions(), Hashing.NO_SEED),
fromStart);
ConcurrentOperations.getOrCompute(matchedRegexps, matchInfo, x -> new AtomicInteger()).incrementAndGet();
ConcurrentOperations.getOrCompute(MATCHED_REGEXPS, matchInfo, x -> new AtomicInteger()).incrementAndGet();
}
}

private static class MatchInfo {
private static final class MatchInfo {

private final RegexpCacheKey regexpInfo;
private final boolean matchStart;

Expand All @@ -498,47 +495,31 @@ private static class MatchInfo {

@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + Boolean.hashCode(matchStart);
result = prime * result + regexpInfo.hashCode();
return result;
return Objects.hash(regexpInfo, matchStart);
}

@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {

if (!(obj instanceof MatchInfo)) {
return false;
}

MatchInfo other = (MatchInfo) obj;
if (matchStart != other.matchStart) {
return false;
}
if (!regexpInfo.equals(other.regexpInfo)) {
return false;
}
return true;
return matchStart == other.matchStart && regexpInfo.equals(other.regexpInfo);
}

@Override
public String toString() {
return String.format("Match (%s, fromStart = %s)", regexpInfo, matchStart);
}

}

private static ConcurrentHashMap<RegexpCacheKey, AtomicInteger> compiledRegexps = new ConcurrentHashMap<>();
private static final ReHashable REHASH_COMPILED_REGEXPS = () -> {
compiledRegexps = new ConcurrentHashMap<>(compiledRegexps);
};

private static ConcurrentHashMap<MatchInfo, AtomicInteger> matchedRegexps = new ConcurrentHashMap<>();
private static final ReHashable REHASH_MATCHED_REGEXPS = () -> {
matchedRegexps = new ConcurrentHashMap<>(matchedRegexps);
};
private static ConcurrentHashMap<RegexpCacheKey, AtomicInteger> COMPILED_REGEXPS = new ConcurrentHashMap<>();
private static ConcurrentHashMap<MatchInfo, AtomicInteger> MATCHED_REGEXPS = new ConcurrentHashMap<>();

/** WARNING: computeRegexpEncoding() mutates options, so the caller should make sure it's a copy */
@TruffleBoundary
Expand All @@ -558,12 +539,8 @@ public static Regex compile(RubyLanguage language, RubyDeferredWarnings rubyDefe
regexp.setUserObject(RopeOperations.withEncoding(bytes, enc));

if (language.options.REGEXP_INSTRUMENT_CREATION) {
final RegexpCacheKey key = new RegexpCacheKey(
bytes,
enc,
options.toJoniOptions(),
Hashing.NO_SEED);
ConcurrentOperations.getOrCompute(compiledRegexps, key, x -> new AtomicInteger()).incrementAndGet();
final RegexpCacheKey key = new RegexpCacheKey(bytes, enc, options.toJoniOptions(), Hashing.NO_SEED);
ConcurrentOperations.getOrCompute(COMPILED_REGEXPS, key, x -> new AtomicInteger()).incrementAndGet();
}

return regexp;
Expand Down
27 changes: 27 additions & 0 deletions src/main/ruby/truffleruby/core/truffle/regexp_operations.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,12 @@ def self.match_from(re, str, pos)
Truffle::Boot.delay do
COMPARE_ENGINES = Truffle::Boot.get_option('compare-regex-engines')
USE_TRUFFLE_REGEX = Truffle::Boot.get_option('use-truffle-regex')

if Truffle::Boot.get_option('regexp-instrument-creation') or Truffle::Boot.get_option('regexp-instrument-match')
at_exit do
Truffle::RegexpOperations.print_stats
end
end
end

def self.match_in_region(re, str, from, to, at_start, encoding_conversion, start)
Expand Down Expand Up @@ -168,6 +174,27 @@ def self.match_stats
Hash[*match_stats_array]
end

def self.print_stats
puts '--------------------'
puts 'Regular expression statistics'
puts '--------------------'
puts ' Compilation'
print_stats_table compilation_stats
puts ' --------------------'
puts ' Matches'
print_stats_table match_stats
puts '--------------------'
end

def self.print_stats_table(table)
return if table.empty?
sorted = table.to_a.sort_by(&:last).reverse
width = sorted.first.last.to_s.size
sorted.each do |regexp, count|
printf " %#{width}d %s\n", count, regexp
end
end

def self.option_to_string(option)
string = +''
string << 'm' if (option & Regexp::MULTILINE) > 0
Expand Down

0 comments on commit 5f4a462

Please sign in to comment.