mirror of
https://github.com/classilla/tenfourfox.git
synced 2024-06-02 08:41:34 +00:00
429 lines
17 KiB
Python
429 lines
17 KiB
Python
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
'''
|
|
Fetch build artifacts from a Firefox tree.
|
|
|
|
This provides an (at-the-moment special purpose) interface to download Android
|
|
artifacts from Mozilla's Task Cluster.
|
|
|
|
This module performs the following steps:
|
|
|
|
* find a candidate hg parent revision using the local pushlog. The local
|
|
pushlog is maintained by mozext locally and updated on every pull.
|
|
|
|
* map the candidate parent to candidate Task Cluster tasks and artifact
|
|
locations. Pushlog entries might not correspond to tasks (yet), and those
|
|
tasks might not produce the desired class of artifacts.
|
|
|
|
* fetch fresh Task Cluster artifacts and purge old artifacts, using a simple
|
|
Least Recently Used cache.
|
|
|
|
The bulk of the complexity is in managing and persisting several caches. If
|
|
we found a Python LRU cache that pickled cleanly, we could remove a lot of
|
|
this code! Sadly, I found no such candidate implementations, so we pickle
|
|
pylru caches manually.
|
|
|
|
None of the instances (or the underlying caches) are safe for concurrent use.
|
|
A future need, perhaps.
|
|
|
|
This module requires certain modules be importable from the ambient Python
|
|
environment. |mach artifact| ensures these modules are available, but other
|
|
consumers will need to arrange this themselves.
|
|
'''
|
|
|
|
|
|
from __future__ import absolute_import, print_function, unicode_literals
|
|
|
|
import functools
|
|
import hashlib
|
|
import logging
|
|
import operator
|
|
import os
|
|
import pickle
|
|
import re
|
|
import shutil
|
|
import subprocess
|
|
import urlparse
|
|
import zipfile
|
|
|
|
import pylru
|
|
import taskcluster
|
|
|
|
from mozbuild.util import (
|
|
ensureParentDir,
|
|
FileAvoidWrite,
|
|
)
|
|
import mozpack.path as mozpath
|
|
from mozversion import mozversion
|
|
from mozregression.download_manager import (
|
|
DownloadManager,
|
|
)
|
|
from mozregression.persist_limit import (
|
|
PersistLimit,
|
|
)
|
|
|
|
MAX_CACHED_PARENTS = 100 # Number of parent changesets to cache candidate pushheads for.
|
|
NUM_PUSHHEADS_TO_QUERY_PER_PARENT = 50 # Number of candidate pushheads to cache per parent changeset.
|
|
|
|
MAX_CACHED_TASKS = 400 # Number of pushheads to cache Task Cluster task data for.
|
|
|
|
# Number of downloaded artifacts to cache. Each artifact can be very large,
|
|
# so don't make this to large! TODO: make this a size (like 500 megs) rather than an artifact count.
|
|
MAX_CACHED_ARTIFACTS = 6
|
|
|
|
# TODO: handle multiple artifacts with the same filename.
|
|
# TODO: handle installing binaries from different types of artifacts (.tar.bz2, .dmg, etc).
|
|
# Keep the keys of this map in sync with the |mach artifact| --job options.
|
|
JOB_DETAILS = {
|
|
# 'android-api-9': {'re': re.compile('public/build/fennec-(.*)\.android-arm\.apk')},
|
|
'android-api-11': {'re': re.compile('public/build/fennec-(.*)\.android-arm\.apk')},
|
|
'android-x86': {'re': re.compile('public/build/fennec-(.*)\.android-i386\.apk')},
|
|
# 'linux': {'re': re.compile('public/build/firefox-(.*)\.linux-i686\.tar\.bz2')},
|
|
# 'linux64': {'re': re.compile('public/build/firefox-(.*)\.linux-x86_64\.tar\.bz2')},
|
|
# 'macosx64': {'re': re.compile('public/build/firefox-(.*)\.mac\.dmg')},
|
|
}
|
|
|
|
|
|
def cachedmethod(cachefunc):
|
|
'''Decorator to wrap a class or instance method with a memoizing callable that
|
|
saves results in a (possibly shared) cache.
|
|
'''
|
|
def decorator(method):
|
|
def wrapper(self, *args, **kwargs):
|
|
mapping = cachefunc(self)
|
|
if mapping is None:
|
|
return method(self, *args, **kwargs)
|
|
key = (method.__name__, args, tuple(sorted(kwargs.items())))
|
|
try:
|
|
value = mapping[key]
|
|
return value
|
|
except KeyError:
|
|
pass
|
|
result = method(self, *args, **kwargs)
|
|
mapping[key] = result
|
|
return result
|
|
return functools.update_wrapper(wrapper, method)
|
|
return decorator
|
|
|
|
|
|
class CacheManager(object):
|
|
'''Maintain an LRU cache. Provide simple persistence, including support for
|
|
loading and saving the state using a "with" block. Allow clearing the cache
|
|
and printing the cache for debugging.
|
|
|
|
Provide simple logging.
|
|
'''
|
|
|
|
def __init__(self, cache_dir, cache_name, cache_size, cache_callback=None, log=None):
|
|
self._cache = pylru.lrucache(cache_size, callback=cache_callback)
|
|
self._cache_filename = mozpath.join(cache_dir, cache_name + '-cache.pickle')
|
|
self._log = log
|
|
|
|
def log(self, *args, **kwargs):
|
|
if self._log:
|
|
self._log(*args, **kwargs)
|
|
|
|
def load_cache(self):
|
|
try:
|
|
items = pickle.load(open(self._cache_filename, 'rb'))
|
|
for key, value in items:
|
|
self._cache[key] = value
|
|
except Exception as e:
|
|
# Corrupt cache, perhaps? Sadly, pickle raises many different
|
|
# exceptions, so it's not worth trying to be fine grained here.
|
|
# We ignore any exception, so the cache is effectively dropped.
|
|
self.log(logging.INFO, 'artifact',
|
|
{'filename': self._cache_filename, 'exception': repr(e)},
|
|
'Ignoring exception unpickling cache file {filename}: {exception}')
|
|
pass
|
|
|
|
def dump_cache(self):
|
|
ensureParentDir(self._cache_filename)
|
|
pickle.dump(list(reversed(list(self._cache.items()))), open(self._cache_filename, 'wb'), -1)
|
|
|
|
def clear_cache(self):
|
|
with self:
|
|
self._cache.clear()
|
|
|
|
def print_cache(self):
|
|
with self:
|
|
for item in self._cache.items():
|
|
self.log(logging.INFO, 'artifact',
|
|
{'item': item},
|
|
'{item}')
|
|
|
|
def print_last_item(self, args, sorted_kwargs, result):
|
|
# By default, show nothing.
|
|
pass
|
|
|
|
def print_last(self):
|
|
# We use the persisted LRU caches to our advantage. The first item is
|
|
# most recent.
|
|
with self:
|
|
item = next(self._cache.items(), None)
|
|
if item is not None:
|
|
(name, args, sorted_kwargs), result = item
|
|
self.print_last_item(args, sorted_kwargs, result)
|
|
else:
|
|
self.log(logging.WARN, 'artifact',
|
|
{},
|
|
'No last cached item found.')
|
|
|
|
def __enter__(self):
|
|
self.load_cache()
|
|
return self
|
|
|
|
def __exit__(self, type, value, traceback):
|
|
self.dump_cache()
|
|
|
|
|
|
class PushHeadCache(CacheManager):
|
|
'''Map parent hg revisions to candidate pushheads.'''
|
|
|
|
def __init__(self, hg, cache_dir, log=None):
|
|
# It's not unusual to pull hundreds of changesets at once, and perhaps
|
|
# |hg up| back and forth a few times.
|
|
CacheManager.__init__(self, cache_dir, 'pushheads', MAX_CACHED_PARENTS, log=log)
|
|
self._hg = hg
|
|
|
|
@cachedmethod(operator.attrgetter('_cache'))
|
|
def pushheads(self, tree, parent):
|
|
pushheads = subprocess.check_output([self._hg, 'log',
|
|
'--template', '{node}\n',
|
|
'-r', 'last(pushhead("{tree}") & ::"{parent}", {num})'.format(
|
|
tree=tree, parent=parent, num=NUM_PUSHHEADS_TO_QUERY_PER_PARENT)])
|
|
pushheads = pushheads.strip().split('\n')
|
|
return pushheads
|
|
|
|
|
|
class TaskCache(CacheManager):
|
|
'''Map candidate pushheads to Task Cluster task IDs and artifact URLs.'''
|
|
|
|
def __init__(self, cache_dir, log=None):
|
|
CacheManager.__init__(self, cache_dir, 'artifact_url', MAX_CACHED_TASKS, log=log)
|
|
self._index = taskcluster.Index()
|
|
self._queue = taskcluster.Queue()
|
|
|
|
@cachedmethod(operator.attrgetter('_cache'))
|
|
def artifact_url(self, tree, job, rev):
|
|
try:
|
|
artifact_re = JOB_DETAILS[job]['re']
|
|
except KeyError:
|
|
self.log(logging.INFO, 'artifact',
|
|
{'job': job},
|
|
'Unknown job {job}')
|
|
raise KeyError("Unknown job")
|
|
|
|
key = '{rev}.{tree}.{job}'.format(rev=rev, tree=tree, job=job)
|
|
try:
|
|
namespace = 'buildbot.revisions.{key}'.format(key=key)
|
|
task = self._index.findTask(namespace)
|
|
except Exception:
|
|
# Not all revisions correspond to pushes that produce the job we
|
|
# care about; and even those that do may not have completed yet.
|
|
raise ValueError('Task for {key} does not exist (yet)!'.format(key=key))
|
|
taskId = task['taskId']
|
|
|
|
# TODO: Make this not Android-only by matching a regular expression.
|
|
artifacts = self._queue.listLatestArtifacts(taskId)['artifacts']
|
|
|
|
def names():
|
|
for artifact in artifacts:
|
|
name = artifact['name']
|
|
if artifact_re.match(name):
|
|
yield name
|
|
|
|
# TODO: Handle multiple artifacts, taking the latest one.
|
|
for name in names():
|
|
# We can easily extract the task ID from the URL. We can't easily
|
|
# extract the build ID; we use the .ini files embedded in the
|
|
# downloaded artifact for this. We could also use the uploaded
|
|
# public/build/buildprops.json for this purpose.
|
|
url = self._queue.buildUrl('getLatestArtifact', taskId, name)
|
|
return url
|
|
raise ValueError('Task for {key} existed, but no artifacts found!'.format(key=key))
|
|
|
|
def print_last_item(self, args, sorted_kwargs, result):
|
|
tree, job, rev = args
|
|
self.log(logging.INFO, 'artifact',
|
|
{'rev': rev},
|
|
'Last installed binaries from hg parent revision {rev}')
|
|
|
|
|
|
class ArtifactCache(CacheManager):
|
|
'''Fetch Task Cluster artifact URLs and purge least recently used artifacts from disk.'''
|
|
|
|
def __init__(self, cache_dir, log=None):
|
|
# TODO: instead of storing N artifact packages, store M megabytes.
|
|
CacheManager.__init__(self, cache_dir, 'fetch', MAX_CACHED_ARTIFACTS, cache_callback=self.delete_file, log=log)
|
|
self._cache_dir = cache_dir
|
|
size_limit = 1024 * 1024 * 1024 # 1Gb in bytes.
|
|
file_limit = 4 # But always keep at least 4 old artifacts around.
|
|
persist_limit = PersistLimit(size_limit, file_limit)
|
|
self._download_manager = DownloadManager(self._cache_dir, persist_limit=persist_limit)
|
|
|
|
def delete_file(self, key, value):
|
|
try:
|
|
os.remove(value)
|
|
self.log(logging.INFO, 'artifact',
|
|
{'filename': value},
|
|
'Purged artifact {filename}')
|
|
except IOError:
|
|
pass
|
|
|
|
@cachedmethod(operator.attrgetter('_cache'))
|
|
def fetch(self, url, force=False):
|
|
# We download to a temporary name like HASH[:16]-basename to
|
|
# differentiate among URLs with the same basenames. We then extract the
|
|
# build ID from the downloaded artifact and use it to make a human
|
|
# readable unique name.
|
|
hash = hashlib.sha256(url).hexdigest()[:16]
|
|
fname = hash + '-' + os.path.basename(url)
|
|
self.log(logging.INFO, 'artifact',
|
|
{'path': os.path.abspath(mozpath.join(self._cache_dir, fname))},
|
|
'Downloading to temporary location {path}')
|
|
try:
|
|
dl = self._download_manager.download(url, fname)
|
|
if dl:
|
|
dl.wait()
|
|
# Version information is extracted from {application,platform}.ini
|
|
# in the package itself.
|
|
info = mozversion.get_version(mozpath.join(self._cache_dir, fname))
|
|
buildid = info['platform_buildid'] or info['application_buildid']
|
|
if not buildid:
|
|
raise ValueError('Artifact for {url} existed, but no build ID could be extracted!'.format(url=url))
|
|
newname = buildid + '-' + os.path.basename(url)
|
|
os.rename(mozpath.join(self._cache_dir, fname), mozpath.join(self._cache_dir, newname))
|
|
self.log(logging.INFO, 'artifact',
|
|
{'path': os.path.abspath(mozpath.join(self._cache_dir, newname))},
|
|
'Downloaded artifact to {path}')
|
|
return os.path.abspath(mozpath.join(self._cache_dir, newname))
|
|
finally:
|
|
# Cancel any background downloads in progress.
|
|
self._download_manager.cancel()
|
|
|
|
def print_last_item(self, args, sorted_kwargs, result):
|
|
url, = args
|
|
self.log(logging.INFO, 'artifact',
|
|
{'url': url},
|
|
'Last installed binaries from url {url}')
|
|
self.log(logging.INFO, 'artifact',
|
|
{'filename': result},
|
|
'Last installed binaries from local file {filename}')
|
|
|
|
|
|
class Artifacts(object):
|
|
'''Maintain state to efficiently fetch build artifacts from a Firefox tree.'''
|
|
|
|
def __init__(self, tree, job, log=None, cache_dir='.', hg='hg'):
|
|
self._tree = tree
|
|
self._job = job
|
|
self._log = log
|
|
self._hg = hg
|
|
self._cache_dir = cache_dir
|
|
|
|
self._pushhead_cache = PushHeadCache(self._hg, self._cache_dir, log=self._log)
|
|
self._task_cache = TaskCache(self._cache_dir, log=self._log)
|
|
self._artifact_cache = ArtifactCache(self._cache_dir, log=self._log)
|
|
|
|
def log(self, *args, **kwargs):
|
|
if self._log:
|
|
self._log(*args, **kwargs)
|
|
|
|
def install_from_file(self, filename, distdir):
|
|
self.log(logging.INFO, 'artifact',
|
|
{'filename': filename},
|
|
'Installing from {filename}')
|
|
|
|
# Copy all .so files to dist/bin, avoiding modification where possible.
|
|
ensureParentDir(os.path.join(distdir, 'bin', '.dummy'))
|
|
|
|
with zipfile.ZipFile(filename) as zf:
|
|
for info in zf.infolist():
|
|
if not info.filename.endswith('.so'):
|
|
continue
|
|
n = os.path.join(distdir, 'bin', os.path.basename(info.filename))
|
|
fh = FileAvoidWrite(n, mode='r')
|
|
shutil.copyfileobj(zf.open(info), fh)
|
|
file_existed, file_updated = fh.close()
|
|
self.log(logging.INFO, 'artifact',
|
|
{'updating': 'Updating' if file_updated else 'Not updating', 'filename': n},
|
|
'{updating} {filename}')
|
|
return 0
|
|
|
|
def install_from_url(self, url, distdir):
|
|
self.log(logging.INFO, 'artifact',
|
|
{'url': url},
|
|
'Installing from {url}')
|
|
with self._artifact_cache as artifact_cache: # The with block handles persistence.
|
|
filename = artifact_cache.fetch(url)
|
|
return self.install_from_file(filename, distdir)
|
|
|
|
def install_from_hg(self, revset, distdir):
|
|
if not revset:
|
|
revset = '.'
|
|
if len(revset) != 40:
|
|
revset = subprocess.check_output([self._hg, 'log', '--template', '{node}\n', '-r', revset]).strip()
|
|
if len(revset.split('\n')) != 1:
|
|
raise ValueError('hg revision specification must resolve to exactly one commit')
|
|
|
|
self.log(logging.INFO, 'artifact',
|
|
{'revset': revset},
|
|
'Installing from local revision {revset}')
|
|
|
|
url = None
|
|
with self._task_cache as task_cache, self._pushhead_cache as pushhead_cache:
|
|
# with blocks handle handle persistence.
|
|
for pushhead in pushhead_cache.pushheads(self._tree, revset):
|
|
self.log(logging.DEBUG, 'artifact',
|
|
{'pushhead': pushhead},
|
|
'Trying to find artifacts for pushhead {pushhead}.')
|
|
try:
|
|
url = task_cache.artifact_url(self._tree, self._job, pushhead)
|
|
self.log(logging.INFO, 'artifact',
|
|
{'pushhead': pushhead},
|
|
'Installing from remote pushhead {pushhead}')
|
|
break
|
|
except ValueError:
|
|
pass
|
|
if url:
|
|
return self.install_from_url(url, distdir)
|
|
self.log(logging.ERROR, 'artifact',
|
|
{'revset': revset},
|
|
'No built artifacts for {revset} found.')
|
|
return 1
|
|
|
|
def install_from(self, source, distdir):
|
|
if source and os.path.isfile(source):
|
|
return self.install_from_file(source, distdir)
|
|
elif source and urlparse.urlparse(source).scheme:
|
|
return self.install_from_url(source, distdir)
|
|
else:
|
|
return self.install_from_hg(source, distdir)
|
|
|
|
def print_last(self):
|
|
self.log(logging.INFO, 'artifact',
|
|
{},
|
|
'Printing last used artifact details.')
|
|
self._pushhead_cache.print_last()
|
|
self._task_cache.print_last()
|
|
self._artifact_cache.print_last()
|
|
|
|
def clear_cache(self):
|
|
self.log(logging.INFO, 'artifact',
|
|
{},
|
|
'Deleting cached artifacts and caches.')
|
|
self._pushhead_cache.clear_cache()
|
|
self._task_cache.clear_cache()
|
|
self._artifact_cache.clear_cache()
|
|
|
|
def print_cache(self):
|
|
self.log(logging.INFO, 'artifact',
|
|
{},
|
|
'Printing cached artifacts and caches.')
|
|
self._pushhead_cache.print_cache()
|
|
self._task_cache.print_cache()
|
|
self._artifact_cache.print_cache()
|