Skip to content

Commit

Permalink
Merge pull request #799 from mattrjacobs/fix-rolling-percentile-write…
Browse files Browse the repository at this point in the history
…-thread-safety

Unit test demonstrating unsafe usage of IntCountsHistogram on write path, and corresponding fix
  • Loading branch information
mattrjacobs committed May 23, 2015
2 parents d065208 + 2639ce4 commit 53de874
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
import java.util.concurrent.atomic.AtomicReferenceArray;
import java.util.concurrent.locks.ReentrantLock;

import org.HdrHistogram.Histogram;
import org.HdrHistogram.IntCountsHistogram;
import org.HdrHistogram.Recorder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand Down Expand Up @@ -281,29 +283,26 @@ public void reset() {
buckets.clear();
}

private static class PercentileBucketData {
private final IntCountsHistogram histogram;
/*package-private*/ static class PercentileBucketData {
final Recorder recorder;
final AtomicReference<Histogram> stableHistogram = new AtomicReference<Histogram>(null);

public PercentileBucketData() {
this.histogram = new IntCountsHistogram(4);
this.recorder = new Recorder(4);
}

public void addValue(int... latency) {
for (int l: latency) {
histogram.recordValue(l);
recorder.recordValue(l);
}
}

public int length() {
return (int) histogram.getTotalCount();
}
}

/**
* @NotThreadSafe
*/
/* package for testing */ static class PercentileSnapshot {
private final IntCountsHistogram aggregateHistogram;
/* package-private*/ final IntCountsHistogram aggregateHistogram;
private final long count;
private final int mean;
private final int p0;
Expand Down Expand Up @@ -331,7 +330,10 @@ public int length() {
/* package for testing */ PercentileSnapshot(Bucket[] buckets) {
aggregateHistogram = new IntCountsHistogram(4);
for (Bucket bucket: buckets) {
aggregateHistogram.add(bucket.bucketData.histogram);
PercentileBucketData bucketData = bucket.bucketData;
//if stable snapshot not already generated, generate it now
bucketData.stableHistogram.compareAndSet(null, bucketData.recorder.getIntervalHistogram());
aggregateHistogram.add(bucket.bucketData.stableHistogram.get());
}

count = aggregateHistogram.getTotalCount();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,9 +162,16 @@ public void testSampleDataOverTime1() {
MockedTime time = new MockedTime();
HystrixRollingPercentile p = new HystrixRollingPercentile(time, timeInMilliseconds, numberOfBuckets, enabled);
int previousTime = 0;

int maxSoFar = -1;

for (int i = 0; i < SampleDataHolder1.data.length; i++) {
int timeInMillisecondsSinceStart = SampleDataHolder1.data[i][0];
int latency = SampleDataHolder1.data[i][1];
if (latency > maxSoFar) {
System.out.println("New MAX latency : " + latency);
maxSoFar = latency;
}
time.increment(timeInMillisecondsSinceStart - previousTime);
previousTime = timeInMillisecondsSinceStart;
p.addValue(latency);
Expand All @@ -181,6 +188,8 @@ public void testSampleDataOverTime1() {
System.out.println("Median: " + p.getPercentile(50));
System.out.println("Median: " + p.getPercentile(50));

System.out.println("MAX : " + p.currentPercentileSnapshot.aggregateHistogram.getMaxValue());

/*
* In a loop as a use case was found where very different values were calculated in subsequent requests.
*/
Expand Down Expand Up @@ -216,6 +225,8 @@ public void testSampleDataOverTime2() {
System.out.println("99.5th: " + p.getPercentile(99.5));
System.out.println("99.99: " + p.getPercentile(99.99));

System.out.println("MAX : " + p.currentPercentileSnapshot.aggregateHistogram.getMaxValue());

if (p.getPercentile(50) > 90 || p.getPercentile(50) < 50) {
fail("We expect around 60-70 but got: " + p.getPercentile(50));
}
Expand Down Expand Up @@ -394,6 +405,42 @@ public void run() {
System.out.println(p.getMean() + " : " + p.getPercentile(50) + " : " + p.getPercentile(75) + " : " + p.getPercentile(90) + " : " + p.getPercentile(95) + " : " + p.getPercentile(99));
}

@Test
public void testWriteThreadSafety() {
final MockedTime time = new MockedTime();
final HystrixRollingPercentile p = new HystrixRollingPercentile(time, HystrixProperty.Factory.asProperty(100), HystrixProperty.Factory.asProperty(25), HystrixProperty.Factory.asProperty(true));

final int NUM_THREADS = 1000;
final int NUM_ITERATIONS = 1000000;

final CountDownLatch latch = new CountDownLatch(NUM_THREADS);

final Random r = new Random();

final AtomicInteger added = new AtomicInteger(0);

for (int i = 0; i < NUM_THREADS; i++) {
threadPool.submit(new Runnable() {
@Override
public void run() {
for (int j = 1; j < NUM_ITERATIONS / NUM_THREADS + 1; j++) {
int nextInt = r.nextInt(100);
p.addValue(nextInt);
added.getAndIncrement();
}
latch.countDown();
}
});
}

try {
latch.await(100, TimeUnit.SECONDS);
assertEquals(added.get(), p.buckets.peekLast().bucketData.recorder.getIntervalHistogram().getTotalCount());
} catch (InterruptedException ex) {
fail("Timeout on all threads writing percentiles");
}
}

@Test
public void testThreadSafetyMulti() {
for (int i = 0; i < 100; i++) {
Expand Down

0 comments on commit 53de874

Please sign in to comment.