Commit da75090e authored by MickeyMoz's avatar MickeyMoz
Browse files

Merge #4101

4101: 1571422: Implement memory distribution metric r=mdboom a=mdboom

This adds a new memory distribution metric, which uses the identical functional
bucketing from the timing distribution metric, except the units for conversion
are different and it has a different maximum (1TB).

In addition to the obvious additions, there is some refactoring going on here:

- The distribution data classes from timing dist. and custom dist. were moved
  to their own package and renamed `FunctionalHistogram` and
  `PrecomputedHistogram` respectively, so that they could be shared between
  different metric types.  This prompted some moving around of tests.

- It was discovered that these classes stored the name and category of the
  metric, but they really don't need to.  All we need is a way to round-trip
  the bucket positions, values and sum.

- `HistogramBase` was renamed to `HistogramMetricBase`, since it's not the base
  of these new histogram data classes, but of metrics that use a histogram.


---
<!- Text above this line will be added to the commit once "bors" merges this PR -->

### Pull Request checklist
<!-- Before submitting the PR, please address each item -->
- [ ] **Quality**: This PR builds and passes detekt/ktlint checks (A pre-push hook is recommended)
- [ ] **Tests**: This PR includes thorough tests or an explanation of why it does not
- [ ] **Changelog**: This PR includes [a changelog entry](https://github.com/mozilla-mobile/android-components/blob/master/docs/changelog.md) or does not need one
- [ ] **Accessibility**: The code in this PR follows [accessibility best practices](https://github.com/mozilla-mobile/shared-docs/blob/master/android/accessibility_guide.md) or does not include any user facing features

### After merge
- [ ] **Milestone**: Make sure issues closed by this pull request are added to the [milestone](https://github.com/mozilla-mobile/android-components/milestones) of the version currently in development.
- [ ] **Breaking Changes**: If this is a breaking change, please push a draft PR on [Reference Browser](https://github.com/mozilla-mobile/reference-browser

) to address the breaking issues.


Co-authored-by: default avatarMichael Droettboom <mdboom@gmail.com>
parents 763aaf34 54e71da7
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ apply plugin: 'kotlin-android'
 * created during unit testing.
 * This uses a specific version of the schema identified by a git commit hash.
 */
String GLEAN_PING_SCHEMA_GIT_HASH = "7755497"
String GLEAN_PING_SCHEMA_GIT_HASH = "441076d"
String GLEAN_PING_SCHEMA_URL = "https://raw.githubusercontent.com/mozilla-services/mozilla-pipeline-schemas/$GLEAN_PING_SCHEMA_GIT_HASH/schemas/glean/baseline/baseline.1.schema.json"

android {
+1 −1
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ import org.gradle.api.internal.artifacts.ArtifactAttributes
// so that it will be shared between all libraries that use Glean.  This is
// important because it is approximately 300MB in installed size.

String GLEAN_PARSER_VERSION = "1.1.0"
String GLEAN_PARSER_VERSION = "1.2.1"
// The version of Miniconda is explicitly specified.
// Miniconda3-4.5.12 is known to not work on Windows.
String MINICONDA_VERSION = "4.5.11"
+194 −0
Original line number Diff line number Diff line
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

package mozilla.components.service.glean.histogram

import mozilla.components.support.ktx.android.org.json.tryGetLong
import org.json.JSONObject
import java.lang.Math.pow
import kotlin.math.log

/**
 * This class represents a histogram where the bucketing is performed by a
 * function, rather than pre-computed buckets. It is meant to help serialize
 * and deserialize data to the correct format for transport and storage, as well
 * as performing the calculations to determine the correct bucket for each sample.
 *
 * The bucket index of a given sample is determined with the following function:
 *
 *     i = ⌊n log₂(𝑥)⌋
 *
 * In other words, there are n buckets for each power of 2 magnitude.
 *
 * @param values a map containing the minimum bucket value mapped to the accumulated count
 * @param sum the accumulated sum of all the samples in the histogram
 */
data class FunctionalHistogram(
    val logBase: Double,
    val bucketsPerMagnitude: Double,
    // map from bucket limits to accumulated values
    val values: MutableMap<Long, Long> = mutableMapOf(),
    var sum: Long = 0
) {
    private val exponent = pow(logBase, 1.0 / bucketsPerMagnitude)

    companion object {
        /**
         * Factory function that takes stringified JSON and converts it back into a
         * [FunctionalHistogram].
         *
         * @param json Stringified JSON value representing a [FunctionalHistogram] object
         * @return A [FunctionalHistogram] or null if unable to rebuild from the string.
         */
        @Suppress("ReturnCount", "ComplexMethod", "NestedBlockDepth")
        internal fun fromJsonString(json: String): FunctionalHistogram? {
            val jsonObject: JSONObject
            try {
                jsonObject = JSONObject(json)
            } catch (e: org.json.JSONException) {
                return null
            }

            val logBase = try {
                jsonObject.getDouble("log_base")
            } catch (e: org.json.JSONException) {
                return null
            }
            val bucketsPerMagnitude = try {
                jsonObject.getDouble("buckets_per_magnitude")
            } catch (e: org.json.JSONException) {
                return null
            }

            // Attempt to parse the values map, if it fails then something is wrong and we need to
            // return null.
            val values = try {
                val mapData = jsonObject.getJSONObject("values")
                val valueMap: MutableMap<Long, Long> = mutableMapOf()
                mapData.keys().forEach { key ->
                    mapData.tryGetLong(key)?.let {
                        valueMap[key.toLong()] = it
                    }
                }
                valueMap
            } catch (e: org.json.JSONException) {
                // This should only occur if there isn't a key/value pair stored for "values"
                return null
            }
            val sum = jsonObject.tryGetLong("sum") ?: return null

            return FunctionalHistogram(
                logBase = logBase,
                bucketsPerMagnitude = bucketsPerMagnitude,
                values = values,
                sum = sum
            )
        }
    }

    /**
      * Maps a sample to a "bucket index" that it belongs in.
      * A "bucket index" is the consecutive integer index of each bucket, useful as a
      * mathematical concept, even though the internal representation is stored and
      * sent using the minimum value in each bucket.
      *
      * @param sample The data sample
      * @return The bucket index the sample belongs in
      */
    internal fun sampleToBucketIndex(sample: Long): Long {
        return log(sample.toDouble() + 1, exponent).toLong()
    }

    /**
      * Determines the minimum value of a bucket, given a bucket index.
      *
      * @param bucketIndex The ordinal index of a bucket
      * @return The minimum value of the bucket
      */
    internal fun bucketIndexToBucketMinimum(bucketIndex: Long): Long {
        return pow(exponent, bucketIndex.toDouble()).toLong()
    }

    /**
      * Maps a sample to the minimum value of the bucket it belongs in.
      *
      * @param sample The sample value
      * @return the minimum value of the bucket the sample belongs in
      */
    internal fun sampleToBucketMinimum(sample: Long): Long {
        return if (sample == 0L) {
            0L
        } else {
            bucketIndexToBucketMinimum(sampleToBucketIndex(sample))
        }
    }

    // This is a calculated read-only property that returns the total count of accumulated values
    val count: Long
        get() = values.map { it.value }.sum()

    /**
     * Accumulates a sample to the correct bucket.
     * If a value doesn't exist for this bucket yet, one is created.
     *
     * @param sample Long value representing the sample that is being accumulated
     */
    internal fun accumulate(sample: Long) {
        var bucketMinimum = sampleToBucketMinimum(sample)
        values[bucketMinimum] = (values[bucketMinimum] ?: 0) + 1
        sum += sample
    }

    /**
     * Helper function to build the [FunctionalHistogram] into a JSONObject for serialization
     * purposes.
     *
     * @return The histogram as [JSONObject] for persistence
     */
    internal fun toJsonObject(): JSONObject {
        return JSONObject(mapOf(
            "log_base" to logBase,
            "buckets_per_magnitude" to bucketsPerMagnitude,
            "values" to values.mapKeys { "${it.key}" },
            "sum" to sum
        ))
    }

    /**
     * Helper function to build the [FunctionalHistogram] into a JSONObject for sending in the
     * ping payload.
     *
     * All buckets [min, max + 1] are included in the histogram, even if the have zero values.
     *
     * @return The histogram as [JSONObject] for a ping payload
     */
    internal fun toJsonPayloadObject(): JSONObject {
        val completeValues = if (values.size != 0) {
            // A bucket range is defined by its own key, and the key of the next
            // highest bucket. This explicitly adds any empty buckets (even if they have values
            // of 0) between the lowest and highest bucket so that the backend knows the
            // bucket ranges even without needing to know that function that was used to
            // create the buckets.
            val minBucket = sampleToBucketIndex(values.keys.min()!!)
            val maxBucket = sampleToBucketIndex(values.keys.max()!!) + 1

            var completeValues: MutableMap<String, Long> = mutableMapOf()

            for (i in minBucket..maxBucket) {
                val bucketMinimum = bucketIndexToBucketMinimum(i)
                val bucketSum = values.get(bucketMinimum)?.let { it } ?: 0
                completeValues[bucketMinimum.toString()] = bucketSum
            }

            completeValues
        } else {
            values
        }

        return JSONObject(mapOf(
            "values" to completeValues,
            "sum" to sum
        ))
    }
}
+276 −0
Original line number Diff line number Diff line
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

package mozilla.components.service.glean.histogram

import mozilla.components.service.glean.private.HistogramType
import mozilla.components.support.ktx.android.org.json.tryGetInt
import mozilla.components.support.ktx.android.org.json.tryGetLong
import mozilla.components.support.ktx.android.org.json.tryGetString
import org.json.JSONArray
import org.json.JSONObject

/**
 * This class represents the structure of a custom distribution. It is meant
 * to help serialize and deserialize data to the correct format for transport and
 * storage, as well as including helper functions to calculate the bucket sizes.
 *
 * @param rangeMin the minimum value that can be represented
 * @param rangeMax the maximum value that can be represented
 * @param bucketCount total number of buckets
 * @param histogramType the [HistogramType] representing the bucket layout
 * @param values a map containing the bucket index mapped to the accumulated count
 * @param sum the accumulated sum of all the samples in the custom distribution
 */
data class PrecomputedHistogram(
    val rangeMin: Long,
    val rangeMax: Long,
    val bucketCount: Int,
    val histogramType: HistogramType,
    // map from bucket limits to accumulated values
    val values: MutableMap<Long, Long> = mutableMapOf(),
    var sum: Long = 0
) {
    companion object {
        /**
         * Factory function that takes stringified JSON and converts it back into a
         * [PrecomputedHistogram].
         *
         * @param json Stringified JSON value representing a [PrecomputedHistogram] object
         * @return A [PrecomputedHistogram] or null if unable to rebuild from the string.
         */
        @Suppress("ReturnCount", "ComplexMethod")
        internal fun fromJsonString(json: String): PrecomputedHistogram? {
            val jsonObject: JSONObject
            try {
                jsonObject = JSONObject(json)
            } catch (e: org.json.JSONException) {
                return null
            }

            val bucketCount = jsonObject.tryGetInt("bucket_count") ?: return null
            // If 'range' isn't present, JSONException is thrown
            val range = try {
                val array = jsonObject.getJSONArray("range")
                // Range must have exactly 2 values
                if (array.length() == 2) {
                    // The getLong() function throws JSONException if we can't convert to a Long, so
                    // the catch should return null if either value isn't a valid Long
                    array.getLong(0)
                    array.getLong(1)
                    // This returns the JSONArray to the assignment if everything checks out
                    array
                } else {
                    return null
                }
            } catch (e: org.json.JSONException) {
                return null
            }
            val rawHistogramType = jsonObject.tryGetString("histogram_type") ?: return null
            val histogramType = try {
                HistogramType.valueOf(rawHistogramType.capitalize())
            } catch (e: IllegalArgumentException) {
                return null
            }
            // Attempt to parse the values map, if it fails then something is wrong and we need to
            // return null.
            val values = try {
                val mapData = jsonObject.getJSONObject("values")
                val valueMap: MutableMap<Long, Long> = mutableMapOf()
                mapData.keys().forEach { key ->
                    valueMap[key.toLong()] = mapData.tryGetLong(key) ?: 0L
                }
                valueMap
            } catch (e: org.json.JSONException) {
                // This should only occur if there isn't a key/value pair stored for "values"
                return null
            }
            val sum = jsonObject.tryGetLong("sum") ?: return null

            return PrecomputedHistogram(
                bucketCount = bucketCount,
                rangeMin = range.getLong(0),
                rangeMax = range.getLong(1),
                histogramType = histogramType,
                values = values,
                sum = sum
            )
        }
    }

    // This is a calculated read-only property that returns the total count of accumulated values
    val count: Long
        get() = values.map { it.value }.sum()

    // This is a list of limits for the buckets.  Instantiated lazily to ensure that the range and
    // bucket counts are set first.
    internal val buckets: List<Long> by lazy { getBuckets() }

    /**
     * Finds the correct bucket, using a binary search to locate the index of the
     * bucket where the sample is bigger than or equal to the bucket limit.
     *
     * @param sample Long value representing the sample that is being accumulated
     */
    internal fun findBucket(sample: Long): Long {
        var under = 0
        var over = bucketCount
        var mid: Int

        do {
            mid = under + (over - under) / 2
            if (mid == under) {
                break
            }

            if (buckets[mid] <= sample) {
                under = mid
            } else {
                over = mid
            }
        } while (true)

        return buckets[mid]
    }

    /**
     * Accumulates a sample to the correct bucket.
     * If a value doesn't exist for this bucket yet, one is created.
     *
     * @param sample Long value representing the sample that is being accumulated
     */
    internal fun accumulate(sample: Long) {
        val limit = findBucket(sample)
        values[limit] = (values[limit] ?: 0) + 1
        sum += sample
    }

    /**
     * Helper function to build the [PrecomputedHistogram] into a JSONObject for serialization
     * purposes.
     *
     * @return The histogram as JSON for persistence
     */
    internal fun toJsonObject(): JSONObject {
        return JSONObject(mapOf(
            "bucket_count" to bucketCount,
            "range" to JSONArray(arrayOf(rangeMin, rangeMax)),
            "histogram_type" to histogramType.toString().toLowerCase(),
            "values" to values.mapKeys { "${it.key}" },
            "sum" to sum
        ))
    }

    /**
     * Helper function to build the [PrecomputedHistogram] into a JSONObject for sending in the
     * ping payload. Compared to [toJsonObject] which is designed for lossless roundtripping:
     *
     *   - this does not include the bucketing parameters
     *   - all buckets [min, max + 1] are inserted into values
     *
     * @return The histogram as JSON to send in a ping payload
     */
    internal fun toJsonPayloadObject(): JSONObject {
        // Include all buckets [min, max + 1], where max is the maximum bucket with
        // any value recorded.
        val contiguousValues = if (!values.isEmpty()) {
            val bucketMax = values.keys.max()!!
            val contiguousValues = mutableMapOf<String, Long>()
            for (bucketMin in buckets) {
                contiguousValues["$bucketMin"] = values.getOrElse(bucketMin) { 0L }
                if (bucketMin > bucketMax) {
                    break
                }
            }
            contiguousValues
        } else {
            values
        }

        return JSONObject(mapOf(
            "values" to contiguousValues,
            "sum" to sum
        ))
    }

    /**
     * Helper function to generate the list of linear bucket min values used when accumulating
     * to the correct buckets.
     *
     * @return List containing the bucket limits
     */
    @Suppress("MagicNumber")
    private fun getBucketsLinear(): List<Long> {
        // Written to match the bucket generation on legacy desktop telemetry:
        //   https://searchfox.org/mozilla-central/rev/e0b0c38ee83f99d3cf868bad525ace4a395039f1/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py#65

        val result: MutableList<Long> = mutableListOf(0L)

        val dmin = rangeMin.toDouble()
        val dmax = rangeMax.toDouble()

        for (i in (1 until bucketCount)) {
            val linearRange = (dmin * (bucketCount - 1 - i) + dmax * (i - 1)) / (bucketCount - 2)
            result.add((linearRange + 0.5).toLong())
        }

        return result
    }

    /**
     * Helper function to generate the list of exponential bucket min values used when accumulating
     * to the correct buckets.
     *
     * @return List containing the bucket limits
     */
    private fun getBucketsExponential(): List<Long> {
        // Written to match the bucket generation on legacy desktop telemetry:
        //   https://searchfox.org/mozilla-central/rev/e0b0c38ee83f99d3cf868bad525ace4a395039f1/toolkit/components/telemetry/build_scripts/mozparsers/parse_histograms.py#75

        // This algorithm calculates the bucket sizes using a natural log approach to get
        // `bucketCount` number of buckets, exponentially spaced between `range[MIN]` and
        // `range[MAX]`.
        //
        // Bucket limits are the minimal bucket value.
        // That means values in a bucket `i` are `range[i] <= value < range[i+1]`.
        // It will always contain an underflow bucket (`< 1`).
        val logMax = Math.log(rangeMax.toDouble())
        val result: MutableList<Long> = mutableListOf()
        var current = rangeMin
        if (current == 0L) {
            current = 1L
        }

        // underflow bucket
        result.add(0)
        result.add(current)

        for (i in 2 until bucketCount) {
            val logCurrent = Math.log(current.toDouble())
            val logRatio = (logMax - logCurrent) / (bucketCount - i)
            val logNext = logCurrent + logRatio
            val nextValue = Math.round(Math.exp(logNext))
            if (nextValue > current) {
                current = nextValue
            } else {
                ++current
            }
            result.add(current)
        }
        return result.sorted()
    }

    /**
     * Helper function to generate the list of bucket min values used when accumulating
     * to the correct buckets.
     *
     * @return List containing the bucket limits
     */
    private fun getBuckets(): List<Long> {
        return when (histogramType) {
            HistogramType.Linear -> getBucketsLinear()
            HistogramType.Exponential -> getBucketsExponential()
        }
    }
}
+3 −3
Original line number Diff line number Diff line
@@ -6,7 +6,7 @@ package mozilla.components.service.glean.private

import androidx.annotation.VisibleForTesting
import mozilla.components.service.glean.Dispatchers
import mozilla.components.service.glean.storages.CustomDistributionData
import mozilla.components.service.glean.histogram.PrecomputedHistogram
import mozilla.components.service.glean.storages.CustomDistributionsStorageEngine
import mozilla.components.support.base.log.logger.Logger

@@ -37,7 +37,7 @@ data class CustomDistributionMetricType(
    val rangeMax: Long,
    val bucketCount: Int,
    val histogramType: HistogramType
) : CommonMetricData, HistogramBase {
) : CommonMetricData, HistogramMetricBase {

    private val logger = Logger("glean/CustomDistributionMetricType")

@@ -97,7 +97,7 @@ data class CustomDistributionMetricType(
     * @throws [NullPointerException] if no value is stored
     */
    @VisibleForTesting(otherwise = VisibleForTesting.NONE)
    fun testGetValue(pingName: String = sendInPings.first()): CustomDistributionData {
    fun testGetValue(pingName: String = sendInPings.first()): PrecomputedHistogram {
        @Suppress("EXPERIMENTAL_API_USAGE")
        Dispatchers.API.assertInTestingMode()

Loading