Bug 1543247 - Part 2: Install `visualmetrics.py` prerequisites. r=ahal (730a0a46) · Commits · The Tor Project / Applications / Tor Browser

python/mozbuild/mozbuild/artifact_cache.py

0 → 100644

+233 −0

Original line number	Diff line number	Diff line
		# This Source Code Form is subject to the terms of the Mozilla Public
		# License, v. 2.0. If a copy of the MPL was not distributed with this
		# file, You can obtain one at http://mozilla.org/MPL/2.0/.

		'''
		Fetch and cache artifacts from URLs.

		This module manages fetching artifacts from URLS and purging old
		artifacts using a simple Least Recently Used cache.

		This module requires certain modules be importable from the ambient Python
		environment. Consumers will need to arrange this themselves.

		The bulk of the complexity is in managing and persisting several caches. If
		we found a Python LRU cache that pickled cleanly, we could remove a lot of
		this code! Sadly, I found no such candidate implementations, so we pickle
		pylru caches manually.

		None of the instances (or the underlying caches) are safe for concurrent use.
		A future need, perhaps.
		'''


		from __future__ import absolute_import, print_function, unicode_literals

		import binascii
		import hashlib
		import logging
		import os
		import urlparse

		from mozbuild.util import (
		mkdir,
		)
		import mozpack.path as mozpath
		from dlmanager import (
		DownloadManager,
		PersistLimit,
		)


		# Minimum number of downloaded artifacts to keep. Each artifact can be very large,
		# so don't make this to large!
		MIN_CACHED_ARTIFACTS = 6

		# Maximum size of the downloaded artifacts to keep in cache, in bytes (1GiB).
		MAX_CACHED_ARTIFACTS_SIZE = 1024 * 1024 * 1024


		class ArtifactPersistLimit(PersistLimit):
		'''Handle persistence for a cache of artifacts.

		When instantiating a DownloadManager, it starts by filling the
		PersistLimit instance it's given with register_dir_content.
		In practice, this registers all the files already in the cache directory.
		After a download finishes, the newly downloaded file is registered, and the
		oldest files registered to the PersistLimit instance are removed depending
		on the size and file limits it's configured for.
		This is all good, but there are a few tweaks we want here:
		- We have pickle files in the cache directory that we don't want purged.
		- Files that were just downloaded in the same session shouldn't be purged.
		(if for some reason we end up downloading more than the default max size,
		we don't want the files to be purged)
		To achieve this, this subclass of PersistLimit inhibits the register_file
		method for pickle files and tracks what files were downloaded in the same
		session to avoid removing them.

		The register_file method may be used to register cache matches too, so that
		later sessions know they were freshly used.
		'''

		def __init__(self, log=None):
		super(ArtifactPersistLimit, self).__init__(
		size_limit=MAX_CACHED_ARTIFACTS_SIZE,
		file_limit=MIN_CACHED_ARTIFACTS)
		self._log = log
		self._registering_dir = False
		self._downloaded_now = set()

		def log(self, args, *kwargs):
		if self._log:
		self._log(args, *kwargs)

		def register_file(self, path):
		if path.endswith('.pickle') or \
		path.endswith('.checksum') or \
		os.path.basename(path) == '.metadata_never_index':
		return
		if not self._registering_dir:
		# Touch the file so that subsequent calls to a mach artifact
		# command know it was recently used. While remove_old_files
		# is based on access time, in various cases, the access time is not
		# updated when just reading the file, so we force an update.
		try:
		os.utime(path, None)
		except OSError:
		pass
		self._downloaded_now.add(path)
		super(ArtifactPersistLimit, self).register_file(path)

		def register_dir_content(self, directory, pattern="*"):
		self._registering_dir = True
		super(ArtifactPersistLimit, self).register_dir_content(
		directory, pattern)
		self._registering_dir = False

		def remove_old_files(self):
		from dlmanager import fs
		files = sorted(self.files, key=lambda f: f.stat.st_atime)
		kept = []
		while len(files) > self.file_limit and \
		self._files_size >= self.size_limit:
		f = files.pop(0)
		if f.path in self._downloaded_now:
		kept.append(f)
		continue
		try:
		fs.remove(f.path)
		except WindowsError:
		# For some reason, on automation, we can't remove those files.
		# So for now, ignore the error.
		kept.append(f)
		continue
		self.log(
		logging.INFO,
		'artifact',
		{'filename': f.path},
		'Purged artifact {filename}')
		self._files_size -= f.stat.st_size
		self.files = files + kept

		def remove_all(self):
		from dlmanager import fs
		for f in self.files:
		fs.remove(f.path)
		self._files_size = 0
		self.files = []


		class ArtifactCache(object):
		'''Fetch artifacts from URLS and purge least recently used artifacts from disk.'''

		def __init__(self, cache_dir, log=None, skip_cache=False):
		mkdir(cache_dir, not_indexed=True)
		self._cache_dir = cache_dir
		self._log = log
		self._skip_cache = skip_cache
		self._persist_limit = ArtifactPersistLimit(log)
		self._download_manager = DownloadManager(
		self._cache_dir, persist_limit=self._persist_limit)
		self._last_dl_update = -1

		def log(self, args, *kwargs):
		if self._log:
		self._log(args, *kwargs)

		def fetch(self, url, force=False):
		fname = os.path.basename(url)
		try:
		# Use the file name from the url if it looks like a hash digest.
		if len(fname) not in (32, 40, 56, 64, 96, 128):
		raise TypeError()
		binascii.unhexlify(fname)
		except TypeError:
		# We download to a temporary name like HASH[:16]-basename to
		# differentiate among URLs with the same basenames. We used to then
		# extract the build ID from the downloaded artifact and use it to make a
		# human readable unique name, but extracting build IDs is time consuming
		# (especially on Mac OS X, where we must mount a large DMG file).
		hash = hashlib.sha256(url).hexdigest()[:16]
		# Strip query string and fragments.
		basename = os.path.basename(urlparse.urlparse(url).path)
		fname = hash + '-' + basename

		path = os.path.abspath(mozpath.join(self._cache_dir, fname))
		if self._skip_cache and os.path.exists(path):
		self.log(
		logging.INFO,
		'artifact',
		{'path': path},
		'Skipping cache: removing cached downloaded artifact {path}')
		os.remove(path)

		self.log(
		logging.INFO,
		'artifact',
		{'path': path},
		'Downloading to temporary location {path}')
		try:
		dl = self._download_manager.download(url, fname)

		def download_progress(dl, bytes_so_far, total_size):
		if not total_size:
		return
		percent = (float(bytes_so_far) / total_size) * 100
		now = int(percent / 5)
		if now == self._last_dl_update:
		return
		self._last_dl_update = now
		self.log(logging.INFO, 'artifact',
		{'bytes_so_far': bytes_so_far,
		'total_size': total_size,
		'percent': percent},
		'Downloading... {percent:02.1f} %')

		if dl:
		dl.set_progress(download_progress)
		dl.wait()
		else:
		# Avoid the file being removed if it was in the cache already.
		path = os.path.join(self._cache_dir, fname)
		self._persist_limit.register_file(path)

		self.log(
		logging.INFO,
		'artifact',
		{'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
		'Downloaded artifact to {path}')
		return os.path.abspath(mozpath.join(self._cache_dir, fname))
		finally:
		# Cancel any background downloads in progress.
		self._download_manager.cancel()

		def clear_cache(self):
		if self._skip_cache:
		self.log(
		logging.INFO,
		'artifact',
		{},
		'Skipping cache: ignoring clear_cache!')
		return

		self._persist_limit.remove_all()

python/mozbuild/mozbuild/artifacts.py

+1 −194

Original line number	Diff line number	Diff line
		@@ -26,14 +26,6 @@ This module performs the following steps:
		extract relevant files from Mac OS X DMG files into a friendly archive format
		so we don't have to mount DMG files frequently.

		The bulk of the complexity is in managing and persisting several caches. If
		we found a Python LRU cache that pickled cleanly, we could remove a lot of
		this code! Sadly, I found no such candidate implementations, so we pickle
		pylru caches manually.

		None of the instances (or the underlying caches) are safe for concurrent use.
		A future need, perhaps.

		This module requires certain modules be importable from the ambient Python
		environment. \|mach artifact\| ensures these modules are available, but other
		consumers will need to arrange this themselves.
		@@ -42,11 +34,9 @@ consumers will need to arrange this themselves.

		from __future__ import absolute_import, print_function, unicode_literals

		import binascii
		import collections
		import functools
		import glob
		import hashlib
		import logging
		import operator
		import os
		@@ -68,6 +58,7 @@ from taskgraph.util.taskcluster import (
		list_artifacts,
		)

		from mozbuild.artifact_cache import ArtifactCache
		from mozbuild.artifact_builds import JOB_CHOICES
		from mozbuild.util import (
		ensureParentDir,
		@@ -85,10 +76,6 @@ from mozpack.mozjar import (
		)
		from mozpack.packager.unpack import UnpackFinder
		import mozpack.path as mozpath
		from dlmanager import (
		DownloadManager,
		PersistLimit,
		)

		NUM_PUSHHEADS_TO_QUERY_PER_PARENT = 50 # Number of candidate pushheads to cache per parent changeset.

		@@ -101,13 +88,6 @@ NUM_REVISIONS_TO_QUERY = 500

		MAX_CACHED_TASKS = 400 # Number of pushheads to cache Task Cluster task data for.

		# Minimum number of downloaded artifacts to keep. Each artifact can be very large,
		# so don't make this to large!
		MIN_CACHED_ARTIFACTS = 6

		# Maximum size of the downloaded artifacts to keep in cache, in bytes (1GiB).
		MAX_CACHED_ARTIFACTS_SIZE = 1024 * 1024 * 1024

		# Downloaded artifacts are cached, and a subset of their contents extracted for
		# easy installation. This is most noticeable on Mac OS X: since mounting and
		# copying from DMG files is very slow, we extract the desired binaries to a
		@@ -760,179 +740,6 @@ class TaskCache(CacheManager):
		return taskId, list_artifacts(taskId)


		class ArtifactPersistLimit(PersistLimit):
		'''Handle persistence for artifacts cache

		When instantiating a DownloadManager, it starts by filling the
		PersistLimit instance it's given with register_dir_content.
		In practice, this registers all the files already in the cache directory.
		After a download finishes, the newly downloaded file is registered, and the
		oldest files registered to the PersistLimit instance are removed depending
		on the size and file limits it's configured for.
		This is all good, but there are a few tweaks we want here:
		- We have pickle files in the cache directory that we don't want purged.
		- Files that were just downloaded in the same session shouldn't be purged.
		(if for some reason we end up downloading more than the default max size,
		we don't want the files to be purged)
		To achieve this, this subclass of PersistLimit inhibits the register_file
		method for pickle files and tracks what files were downloaded in the same
		session to avoid removing them.

		The register_file method may be used to register cache matches too, so that
		later sessions know they were freshly used.
		'''

		def __init__(self, log=None):
		super(ArtifactPersistLimit, self).__init__(
		size_limit=MAX_CACHED_ARTIFACTS_SIZE,
		file_limit=MIN_CACHED_ARTIFACTS)
		self._log = log
		self._registering_dir = False
		self._downloaded_now = set()

		def log(self, args, *kwargs):
		if self._log:
		self._log(args, *kwargs)

		def register_file(self, path):
		if path.endswith('.pickle') or \
		os.path.basename(path) == '.metadata_never_index':
		return
		if not self._registering_dir:
		# Touch the file so that subsequent calls to a mach artifact
		# command know it was recently used. While remove_old_files
		# is based on access time, in various cases, the access time is not
		# updated when just reading the file, so we force an update.
		try:
		os.utime(path, None)
		except OSError:
		pass
		self._downloaded_now.add(path)
		super(ArtifactPersistLimit, self).register_file(path)

		def register_dir_content(self, directory, pattern="*"):
		self._registering_dir = True
		super(ArtifactPersistLimit, self).register_dir_content(
		directory, pattern)
		self._registering_dir = False

		def remove_old_files(self):
		from dlmanager import fs
		files = sorted(self.files, key=lambda f: f.stat.st_atime)
		kept = []
		while len(files) > self.file_limit and \
		self._files_size >= self.size_limit:
		f = files.pop(0)
		if f.path in self._downloaded_now:
		kept.append(f)
		continue
		try:
		fs.remove(f.path)
		except WindowsError:
		# For some reason, on automation, we can't remove those files.
		# So for now, ignore the error.
		kept.append(f)
		continue
		self.log(logging.INFO, 'artifact',
		{'filename': f.path},
		'Purged artifact {filename}')
		self._files_size -= f.stat.st_size
		self.files = files + kept

		def remove_all(self):
		from dlmanager import fs
		for f in self.files:
		fs.remove(f.path)
		self._files_size = 0
		self.files = []


		class ArtifactCache(object):
		'''Fetch Task Cluster artifact URLs and purge least recently used artifacts from disk.'''

		def __init__(self, cache_dir, log=None, skip_cache=False):
		mkdir(cache_dir, not_indexed=True)
		self._cache_dir = cache_dir
		self._log = log
		self._skip_cache = skip_cache
		self._persist_limit = ArtifactPersistLimit(log)
		self._download_manager = DownloadManager(
		self._cache_dir, persist_limit=self._persist_limit)
		self._last_dl_update = -1

		def log(self, args, *kwargs):
		if self._log:
		self._log(args, *kwargs)

		def fetch(self, url, force=False):
		fname = os.path.basename(url)
		try:
		# Use the file name from the url if it looks like a hash digest.
		if len(fname) not in (32, 40, 56, 64, 96, 128):
		raise TypeError()
		binascii.unhexlify(fname)
		except TypeError:
		# We download to a temporary name like HASH[:16]-basename to
		# differentiate among URLs with the same basenames. We used to then
		# extract the build ID from the downloaded artifact and use it to make a
		# human readable unique name, but extracting build IDs is time consuming
		# (especially on Mac OS X, where we must mount a large DMG file).
		hash = hashlib.sha256(url).hexdigest()[:16]
		# Strip query string and fragments.
		basename = os.path.basename(urlparse.urlparse(url).path)
		fname = hash + '-' + basename

		path = os.path.abspath(mozpath.join(self._cache_dir, fname))
		if self._skip_cache and os.path.exists(path):
		self.log(logging.INFO, 'artifact',
		{'path': path},
		'Skipping cache: removing cached downloaded artifact {path}')
		os.remove(path)

		self.log(logging.INFO, 'artifact',
		{'path': path},
		'Downloading to temporary location {path}')
		try:
		dl = self._download_manager.download(url, fname)

		def download_progress(dl, bytes_so_far, total_size):
		if not total_size:
		return
		percent = (float(bytes_so_far) / total_size) * 100
		now = int(percent / 5)
		if now == self._last_dl_update:
		return
		self._last_dl_update = now
		self.log(logging.INFO, 'artifact',
		{'bytes_so_far': bytes_so_far, 'total_size': total_size, 'percent': percent},
		'Downloading... {percent:02.1f} %')

		if dl:
		dl.set_progress(download_progress)
		dl.wait()
		else:
		# Avoid the file being removed if it was in the cache already.
		path = os.path.join(self._cache_dir, fname)
		self._persist_limit.register_file(path)

		self.log(logging.INFO, 'artifact',
		{'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
		'Downloaded artifact to {path}')
		return os.path.abspath(mozpath.join(self._cache_dir, fname))
		finally:
		# Cancel any background downloads in progress.
		self._download_manager.cancel()

		def clear_cache(self):
		if self._skip_cache:
		self.log(logging.INFO, 'artifact',
		{},
		'Skipping cache: ignoring clear_cache!')
		return

		self._persist_limit.remove_all()


		class Artifacts(object):
		'''Maintain state to efficiently fetch build artifacts from a Firefox tree.'''

python/mozbuild/mozbuild/test/python.ini

+1 −1

Original line number	Diff line number	Diff line
		@@ -36,7 +36,7 @@ skip-if = (os == "win")
		[frontend/test_namespaces.py]
		[frontend/test_reader.py]
		[frontend/test_sandbox.py]
		[test_artifacts.py]
		[test_artifact_cache.py]
		[test_base.py]
		[test_containers.py]
		[test_dotproperties.py]

python/mozbuild/mozbuild/test/test_artifacts.py→python/mozbuild/mozbuild/test/test_artifact_cache.py

+8 −8

Original line number	Diff line number	Diff line
		@@ -11,8 +11,8 @@ import unittest
		from tempfile import mkdtemp
		from shutil import rmtree

		from mozbuild.artifacts import ArtifactCache
		from mozbuild import artifacts
		from mozbuild.artifact_cache import ArtifactCache
		from mozbuild import artifact_cache


		CONTENTS = {
		@@ -55,10 +55,10 @@ class FakeSession(object):

		class TestArtifactCache(unittest.TestCase):
		def setUp(self):
		self.min_cached_artifacts = artifacts.MIN_CACHED_ARTIFACTS
		self.max_cached_artifacts_size = artifacts.MAX_CACHED_ARTIFACTS_SIZE
		artifacts.MIN_CACHED_ARTIFACTS = 2
		artifacts.MAX_CACHED_ARTIFACTS_SIZE = 4096
		self.min_cached_artifacts = artifact_cache.MIN_CACHED_ARTIFACTS
		self.max_cached_artifacts_size = artifact_cache.MAX_CACHED_ARTIFACTS_SIZE
		artifact_cache.MIN_CACHED_ARTIFACTS = 2
		artifact_cache.MAX_CACHED_ARTIFACTS_SIZE = 4096

		self._real_utime = os.utime
		os.utime = self.utime
		@@ -68,8 +68,8 @@ class TestArtifactCache(unittest.TestCase):

		def tearDown(self):
		rmtree(self.tmpdir)
		artifacts.MIN_CACHED_ARTIFACTS = self.min_cached_artifacts
		artifacts.MAX_CACHED_ARTIFACTS_SIZE = self.max_cached_artifacts_size
		artifact_cache.MIN_CACHED_ARTIFACTS = self.min_cached_artifacts
		artifact_cache.MAX_CACHED_ARTIFACTS_SIZE = self.max_cached_artifacts_size
		os.utime = self._real_utime

		def utime(self, path, times):

tools/browsertime/mach_commands.py

+292 −26

File changed.

Preview size limit exceeded, changes collapsed.