Commit 3328d6ce authored by Jan-Erik Rediger's avatar Jan-Erik Rediger Committed by Jan-Erik Rediger
Browse files

Bug 1563753 - Switch bucketing algorithm and Use bucket limits as indices for the values array

The JSON-encoded payload requires this format anyway.
Buckets are laid out in a way that, for a `sample` in bucket `i` this holds: bucket[i] <= sample < bucket[i+1]
parent 9d1982b7
......@@ -138,7 +138,8 @@ data class TimingDistributionData(
val rangeMin: Long = DEFAULT_RANGE_MIN,
val rangeMax: Long = DEFAULT_RANGE_MAX,
val histogramType: HistogramType = HistogramType.Exponential,
val values: MutableMap<Int, Long> = mutableMapOf(),
// map from bucket limits to accumulated values
val values: MutableMap<Long, Long> = mutableMapOf(),
var sum: Long = 0,
val timeUnit: TimeUnit = TimeUnit.Millisecond
) {
......@@ -200,9 +201,9 @@ data class TimingDistributionData(
// return null.
val values = try {
val mapData = jsonObject.getJSONObject("values")
val valueMap: MutableMap<Int, Long> = mutableMapOf()
val valueMap: MutableMap<Long, Long> = mutableMapOf()
mapData.keys().forEach { key ->
valueMap[key.toInt()] = mapData.tryGetLong(key) ?: 0L
valueMap[key.toLong()] = mapData.tryGetLong(key) ?: 0L
}
valueMap
} catch (e: org.json.JSONException) {
......@@ -244,21 +245,32 @@ data class TimingDistributionData(
internal val buckets: List<Long> by lazy { getBuckets() }
/**
* Accumulates a sample to the correct bucket, using a linear search to locate the index of the
* bucket where the sample is less than or equal to the bucket value. This works since the
* buckets are sorted in ascending order. If a mapped value doesn't exist for this bucket yet,
* one is created.
* Accumulates a sample to the correct bucket, using a binary search to locate the index of the
* bucket where the sample is bigger than or equal to the bucket value.
* If a mapped value doesn't exist for this bucket yet, one is created.
*
* @param sample Long value representing the sample that is being accumulated
*/
internal fun accumulate(sample: Long) {
for (i in buckets.indices) {
if (sample <= buckets[i] || i == bucketCount - 1) {
values[i] = (values[i] ?: 0) + 1
var under = 0
var over = bucketCount
var mid: Int
do {
mid = under + (over - under) / 2
if (mid == under) {
break
}
}
if (buckets[mid] <= sample) {
under = mid
} else {
over = mid
}
} while (true)
val limit = buckets[mid]
values[limit] = (values[limit] ?: 0) + 1
sum += sample
}
......@@ -288,15 +300,23 @@ data class TimingDistributionData(
private fun getBuckets(): List<Long> {
// This algorithm calculates the bucket sizes using a natural log approach to get
// `bucketCount` number of buckets, exponentially spaced between `range[MIN]` and
// `range[MAX]`
// `range[MAX]`.
//
// Bucket limits are the minimal bucket value.
// That means values in a bucket `i` are `range[i] <= value < range[i+1]`.
// It will always contain an underflow bucket (`< 1`).
val logMax = Math.log(rangeMax.toDouble())
val result: MutableList<Long> = mutableListOf()
var current = rangeMin
if (current == 0L) {
current = 1L
}
// underflow bucket
result.add(0)
result.add(current)
for (i in 2..bucketCount) {
for (i in 2 until bucketCount) {
val logCurrent = Math.log(current.toDouble())
val logRatio = (logMax - logCurrent) / (bucketCount - i)
val logNext = logCurrent + logRatio
......
......@@ -58,12 +58,12 @@ class TimingDistributionMetricTypeTest {
val snapshot = metric.testGetValue()
// Check the sum
assertEquals(6L, snapshot.sum)
// Check that the 1L fell into the first bucket
assertEquals(1L, snapshot.values[0])
// Check that the 2L fell into the second bucket
// Check that the 1L fell into the first value bucket
assertEquals(1L, snapshot.values[1])
// Check that the 3L fell into the third bucket
// Check that the 2L fell into the second value bucket
assertEquals(1L, snapshot.values[2])
// Check that the 3L fell into the third value bucket
assertEquals(1L, snapshot.values[3])
}
@Test
......@@ -130,11 +130,11 @@ class TimingDistributionMetricTypeTest {
// Check the sum
assertEquals(6L, snapshot.sum)
// Check that the 1L fell into the first bucket
assertEquals(1L, snapshot.values[0])
// Check that the 2L fell into the second bucket
assertEquals(1L, snapshot.values[1])
// Check that the 3L fell into the third bucket
// Check that the 2L fell into the second bucket
assertEquals(1L, snapshot.values[2])
// Check that the 3L fell into the third bucket
assertEquals(1L, snapshot.values[3])
// Check that data was properly recorded in the third ping.
assertTrue(metric.testHasValue("store3"))
......@@ -142,10 +142,10 @@ class TimingDistributionMetricTypeTest {
// Check the sum
assertEquals(6L, snapshot2.sum)
// Check that the 1L fell into the first bucket
assertEquals(1L, snapshot2.values[0])
// Check that the 2L fell into the second bucket
assertEquals(1L, snapshot2.values[1])
// Check that the 3L fell into the third bucket
// Check that the 2L fell into the second bucket
assertEquals(1L, snapshot2.values[2])
// Check that the 3L fell into the third bucket
assertEquals(1L, snapshot2.values[3])
}
}
......@@ -54,7 +54,7 @@ class TimingDistributionsStorageEngineTest {
timeUnit = TimeUnit.Millisecond
)
// Create a sample that will fall into the first bucket (bucket '0') so we can easily
// Create a sample that will fall into the underflow bucket (bucket '0') so we can easily
// find it
val sample = 1L
TimingDistributionsStorageEngine.accumulate(
......@@ -260,7 +260,7 @@ class TimingDistributionsStorageEngineTest {
val snapshot = metric.testGetValue()
assertEquals("Accumulating overflow values should increment last bucket",
1L,
snapshot.values[TimingDistributionData.DEFAULT_BUCKET_COUNT - 1])
snapshot.values[TimingDistributionData.DEFAULT_RANGE_MAX])
}
@Test
......@@ -268,13 +268,13 @@ class TimingDistributionsStorageEngineTest {
// Hand calculated values using current default range 0 - 60000 and bucket count of 100.
// NOTE: The final bucket, regardless of width, represents the overflow bucket to hold any
// values beyond the maximum (in this case the maximum is 60000)
val testBuckets: List<Long> = listOf(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17,
val testBuckets: List<Long> = listOf(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 17,
19, 21, 23, 25, 28, 31, 34, 38, 42, 46, 51, 56, 62, 68, 75, 83, 92, 101, 111, 122, 135,
149, 164, 181, 200, 221, 244, 269, 297, 328, 362, 399, 440, 485, 535, 590, 651, 718,
792, 874, 964, 1064, 1174, 1295, 1429, 1577, 1740, 1920, 2118, 2337, 2579, 2846, 3140,
3464, 3822, 4217, 4653, 5134, 5665, 6250, 6896, 7609, 8395, 9262, 10219, 11275, 12440,
13726, 15144, 16709, 18436, 20341, 22443, 24762, 27321, 30144, 33259, 36696, 40488,
44672, 49288, 54381, 60000, 60001)
44672, 49288, 54381, 60000)
// Define a timing distribution metric, which will be stored in "store1".
val metric = TimingDistributionMetricType(
......@@ -295,7 +295,7 @@ class TimingDistributionsStorageEngineTest {
// Check that timing distribution was recorded.
assertTrue("Accumulating values records data", metric.testHasValue())
// Make sure that the sample in the correct (first) bucket
// Make sure that the sample in the correct (underflow) bucket
val snapshot = metric.testGetValue()
assertEquals("Accumulating should increment correct bucket",
1L, snapshot.values[0])
......@@ -340,15 +340,15 @@ class TimingDistributionsStorageEngineTest {
assertEquals("Accumulating updates the count", 5, snapshot.count)
assertEquals("Accumulating should increment correct bucket",
1L, snapshot.values[0])
1L, snapshot.values[1])
assertEquals("Accumulating should increment correct bucket",
1L, snapshot.values[9])
1L, snapshot.values[10])
assertEquals("Accumulating should increment correct bucket",
1L, snapshot.values[33])
1L, snapshot.values[92])
assertEquals("Accumulating should increment correct bucket",
1L, snapshot.values[57])
1L, snapshot.values[964])
assertEquals("Accumulating should increment correct bucket",
1L, snapshot.values[80])
1L, snapshot.values[9262])
}
@Test
......@@ -381,12 +381,12 @@ class TimingDistributionsStorageEngineTest {
assertEquals("JSON histogram type must match Timing Distribution histogram type",
tdd.histogramType.toString().toLowerCase(), jsonTdd.getString("histogram_type"))
val jsonValue = jsonTdd.getJSONObject("values")
assertEquals("JSON values must match Timing Distribution values",
tdd.values[0], jsonValue.getLong("0"))
assertEquals("JSON values must match Timing Distribution values",
tdd.values[1], jsonValue.getLong("1"))
assertEquals("JSON values must match Timing Distribution values",
tdd.values[2], jsonValue.getLong("2"))
assertEquals("JSON values must match Timing Distribution values",
tdd.values[3], jsonValue.getLong("3"))
assertEquals("JSON sum must match Timing Distribution sum",
tdd.sum, jsonTdd.getLong("sum"))
assertEquals("JSON time unit must match Timing Distribution time unit",
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment