tenfourfox/testing/mozharness/scripts/b2g_bumper.py
Cameron Kaiser c9b2922b70 hello FPR
2017-04-19 00:56:45 -07:00

653 lines
26 KiB
Python
Executable File

#!/usr/bin/env python
# ***** BEGIN LICENSE BLOCK *****
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
# ***** END LICENSE BLOCK *****
""" b2g_bumper.py
Updates a gecko repo with up to date information from B2G repositories.
In particular, it updates gaia.json which is used by B2G desktop builds,
and updates the XML manifests used by device builds.
This is to tie the external repository revisions to a visible gecko commit
which appears on TBPL, so sheriffs can blame the appropriate changes.
"""
import os
import sys
from multiprocessing.pool import ThreadPool
import subprocess
import time
from urlparse import urlparse
try:
import simplejson as json
assert json
except ImportError:
import json
sys.path.insert(1, os.path.dirname(sys.path[0]))
from mozharness.base.errors import HgErrorList
from mozharness.base.vcs.vcsbase import VCSScript
from mozharness.mozilla import repo_manifest
from mozharness.base.log import ERROR
from mozharness.mozilla.mapper import MapperMixin
class B2GBumper(VCSScript, MapperMixin):
config_options = [
[['--no-write'], {
'dest': 'do_write',
'action': 'store_const',
'const': False,
'help': 'disable writing in-tree manifests',
}],
[['--device'], {
'dest': 'device_override',
'help': 'specific device to process',
}],
]
def __init__(self, require_config_file=True):
super(B2GBumper, self).__init__(
config_options=self.config_options,
all_actions=[
'delete-git-ref-cache',
'import-git-ref-cache',
'clobber',
'check-treestatus',
'checkout-gecko',
'bump-gaia',
'checkout-manifests',
'massage-manifests',
'commit-manifests',
'push',
'push-loop',
'export-git-ref-cache',
],
default_actions=[
'push-loop',
],
require_config_file=require_config_file,
# Default config options
config={
'treestatus_base_url': 'https://treestatus.mozilla.org',
'log_max_rotate': 99,
'do_write': True,
}
)
# Mapping of device name to manifest
self.device_manifests = {}
# Cache of "%s:%s" % (remote url, refname) to revision hashes
self._git_ref_cache = {}
# File location for persisting _git_ref_cache dictionary above as a json file
self.git_ref_cache_file = self.config.get('git_ref_cache', os.path.join(self.query_abs_dirs()['abs_work_dir'], 'git_ref_cache.json'))
# Cache of new remotes to original upstreams
self._remote_mappings = {}
# What's the latest gaia revsion we have for hg
self.gaia_hg_revision = None
self.gaia_git_rev = None
# Helper methods {{{1
def query_abs_dirs(self):
if self.abs_dirs:
return self.abs_dirs
abs_dirs = super(B2GBumper, self).query_abs_dirs()
abs_dirs.update({
'manifests_dir':
os.path.join(abs_dirs['abs_work_dir'], 'manifests'),
'gecko_local_dir':
os.path.join(abs_dirs['abs_work_dir'],
self.config['gecko_local_dir']),
})
self.abs_dirs = abs_dirs
return self.abs_dirs
def query_manifest(self, device_name):
if device_name in self.device_manifests:
return self.device_manifests[device_name]
dirs = self.query_abs_dirs()
c = self.config
manifest_file = c['devices'][device_name].get('manifest_file',
'%s.xml' % device_name)
manifest_file = os.path.join(dirs['manifests_dir'], manifest_file)
self.info("Loading %s" % manifest_file)
manifest = repo_manifest.load_manifest(manifest_file)
self.device_manifests[device_name] = manifest
return manifest
def filter_projects(self, device_config, manifest):
for p in device_config['ignore_projects']:
removed = repo_manifest.remove_project(manifest, path=p)
if removed:
self.info("Removed %s" % removed.toxml())
def filter_groups(self, device_config, manifest):
for g in device_config.get('ignore_groups', []):
removed = repo_manifest.remove_group(manifest, g)
for r in removed:
self.info("Removed %s" % r.toxml())
def map_remotes(self, manifest):
def mapping_func(r):
orig_url = r.getAttribute('fetch')
m = repo_manifest.map_remote(r, self.config['repo_remote_mappings'])
self._remote_mappings[m.getAttribute('fetch')] = orig_url
return m
repo_manifest.rewrite_remotes(manifest, mapping_func)
def resolve_git_ref(self, remote_url, revision):
cache_key = "%s:%s" % (remote_url, revision)
cmd = ['git', 'ls-remote', remote_url, revision]
self.debug("Running %s" % cmd)
# Retry this a few times, in case there are network errors or somesuch
max_retries = 5
for _ in range(max_retries):
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT)
if proc.wait() != 0:
self.warning("Returned %i - sleeping and retrying" %
proc.returncode)
self.warning("%s - got output: %s" % (cache_key, proc.stdout.read()))
time.sleep(30)
continue
output = proc.stdout.read()
self.info("%s - got output: %s" % (cache_key, output))
try:
abs_revision = output.split()[0].strip()
self._git_ref_cache[cache_key] = abs_revision
return abs_revision
except IndexError:
# Couldn't split the output properly
self.warning("no output from: git ls-remote %s %s" % (remote_url, revision))
return None
return None
def resolve_refs(self, manifest):
worker_pool = ThreadPool(20)
lookup_threads_by_project = {}
lookup_threads_by_parameters = {}
# Resolve refnames
for p in manifest.getElementsByTagName('project'):
name = p.getAttribute('name')
remote_url = repo_manifest.get_project_remote_url(manifest, p)
revision = repo_manifest.get_project_revision(manifest, p)
# commit ids are already done
if repo_manifest.is_commitid(revision):
self.debug("%s is already locked to %s; skipping" %
(name, revision))
continue
# gaia is special - make sure we're using the same revision we used
# for gaia.json
if self.gaia_hg_revision and p.getAttribute('path') == 'gaia' and revision == self.config['gaia_git_branch']:
git_rev = self.query_gaia_git_rev()
self.info("Using %s for gaia to match %s in gaia.json" % (git_rev, self.gaia_hg_revision))
p.setAttribute('revision', git_rev)
continue
# If there's no '/' in the revision, assume it's a head
if '/' not in revision:
revision = 'refs/heads/%s' % revision
cache_key = "%s:%s" % (remote_url, revision)
# Check to see if we've looked up this revision on this remote
# before. If we have, reuse the previous value rather than looking
# it up again. This will make sure revisions for the same ref name
# are consistent between devices, as long as they use the same
# remote/refname.
if cache_key in self._git_ref_cache:
abs_revision = self._git_ref_cache[cache_key]
self.debug(
"Reusing previous lookup %s -> %s" %
(cache_key, abs_revision))
p.setAttribute('revision', abs_revision)
continue
# Maybe a thread already exists for this lookup, even if the result has not
# yet been retrieved and placed in _git_ref_cache...
# Please note result.get() can be called multiple times without problems;
# the git command will only be executed once. Therefore we can associate many
# projects to the same thread result, without problems later when we call
# get() multiple times against the same thread result.
if cache_key in lookup_threads_by_parameters:
self.debug("Reusing currently running thread to look up %s" % cache_key)
lookup_threads_by_project[p] = lookup_threads_by_parameters.get(cache_key)
else:
async_result = worker_pool.apply_async(self.resolve_git_ref,
(remote_url, revision))
lookup_threads_by_parameters[cache_key] = async_result
lookup_threads_by_project[p] = async_result
# TODO: alert/notify on missing repositories
abort = False
failed = []
for p, result in lookup_threads_by_project.iteritems():
abs_revision = result.get(timeout=300)
remote_url = repo_manifest.get_project_remote_url(manifest, p)
revision = repo_manifest.get_project_revision(manifest, p)
if not abs_revision:
abort = True
self.error("Couldn't resolve reference %s %s" % (remote_url, revision))
failed.append(p)
p.setAttribute('revision', abs_revision)
if abort:
# Write message about how to set up syncing
default = repo_manifest.get_default(manifest)
for p in failed:
if p.hasAttribute('remote'):
remote = repo_manifest.get_remote(manifest, p.getAttribute('remote'))
else:
remote = repo_manifest.get_remote(manifest, default.getAttribute('remote'))
new_fetch_url = remote.getAttribute('fetch')
orig_fetch_url = self._remote_mappings[new_fetch_url]
name = p.getAttribute('name')
self.info("needs sync? %s/%s -> %s/%s" % (orig_fetch_url, name, new_fetch_url, name))
self.fatal("couldn't resolve some refs; exiting")
def query_manifest_path(self, device):
dirs = self.query_abs_dirs()
device_config = self.config['devices'][device]
manifest_file = os.path.join(
dirs['gecko_local_dir'],
'b2g', 'config',
device_config.get('gecko_device_dir', device),
'sources.xml')
return manifest_file
def hg_add(self, repo_path, path):
"""
Runs 'hg add' on path
"""
hg = self.query_exe('hg', return_type='list')
cmd = hg + ['add', path]
self.run_command(cmd, cwd=repo_path)
def hg_commit(self, repo_path, message):
"""
Commits changes in repo_path, with specified user and commit message
"""
user = self.config['hg_user']
hg = self.query_exe('hg', return_type='list')
cmd = hg + ['commit', '-u', user, '-m', message]
env = self.query_env(partial_env={'LANG': 'en_US.UTF-8'})
status = self.run_command(cmd, cwd=repo_path, env=env)
return status == 0
def hg_push(self, repo_path):
hg = self.query_exe('hg', return_type='list')
command = hg + ["push", "-e",
"ssh -oIdentityFile=%s -l %s" % (
self.config["ssh_key"], self.config["ssh_user"],
),
self.config["gecko_push_url"]]
status = self.run_command(command, cwd=repo_path,
error_list=HgErrorList)
if status != 0:
# We failed; get back to a known state so we can either retry
# or fail out and continue later.
self.run_command(hg + ["--config", "extensions.mq=",
"strip", "--no-backup", "outgoing()"],
cwd=repo_path)
self.run_command(hg + ["up", "-C"],
cwd=repo_path)
self.run_command(hg + ["--config", "extensions.purge=",
"purge", "--all"],
cwd=repo_path)
return False
return True
def _read_json(self, path):
if not os.path.exists(path):
self.error("%s doesn't exist!" % path)
return
contents = self.read_from_file(path)
try:
json_contents = json.loads(contents)
return json_contents
except ValueError:
self.error("%s is invalid json!" % path)
def get_revision_list(self, repo_config, prev_revision=None):
revision_list = []
url = repo_config['polling_url']
branch = repo_config.get('branch', 'default')
max_revisions = self.config['gaia_max_revisions']
dirs = self.query_abs_dirs()
if prev_revision:
# hgweb json-pushes hardcode
url += '&fromchange=%s' % prev_revision
file_name = os.path.join(dirs['abs_work_dir'],
'%s.json' % repo_config['repo_name'])
# might be nice to have a load-from-url option; til then,
# download then read
if self.retry(
self.download_file,
args=(url, ),
kwargs={'file_name': file_name},
error_level=ERROR,
sleeptime=0,
) != file_name:
return None
contents = self.read_from_file(file_name)
revision_dict = json.loads(contents)
if not revision_dict:
return []
# Discard any revisions not on the branch we care about.
for k in sorted(revision_dict, key=int): # numeric sort
v = revision_dict[k]
if v['changesets'][-1]['branch'] == branch:
revision_list.append(v)
# Limit the list to max_revisions.
return revision_list[:max_revisions]
def update_gaia_json(self, path,
hg_revision, hg_repo_path,
git_revision, git_repo,
):
""" Update path with repo_path + revision.
If the revision hasn't changed, don't do anything.
If the repo_path changes or the current json is invalid, error but don't fail.
"""
if not os.path.exists(path):
self.add_summary(
"%s doesn't exist; can't update with repo_path %s revision %s!" %
(path, hg_repo_path, hg_revision),
level=ERROR,
)
return -1
contents = self._read_json(path)
if contents:
if contents.get("repo_path") != hg_repo_path:
self.error("Current repo_path %s differs from %s!" % (str(contents.get("repo_path")), hg_repo_path))
if contents.get("revision") == hg_revision:
self.info("Revision %s is the same. No action needed." % hg_revision)
self.add_summary("Revision %s is unchanged for repo_path %s." % (hg_revision, hg_repo_path))
return 0
contents = {
"repo_path": hg_repo_path,
"revision": hg_revision,
"git": {
"remote": git_repo,
"branch": "",
"git_revision": git_revision,
}
}
if self.write_to_file(path, json.dumps(contents, indent=4) + "\n") != path:
self.add_summary(
"Unable to update %s with new revision %s!" % (path, hg_revision),
level=ERROR,
)
return -2
def build_commit_message(self, revision_list, repo_name, repo_url):
revisions = []
comments = ''
for revision_config in reversed(revision_list):
for changeset_config in reversed(revision_config['changesets']):
revisions.append(changeset_config['node'])
comments += "\n========\n"
comments += u'\n%s/rev/%s\nAuthor: %s\nDesc: %s\n' % (
repo_url,
changeset_config['node'][:12],
changeset_config['author'],
changeset_config['desc'],
)
message = 'Bumping gaia.json for %d %s revision(s) a=gaia-bump\n' % (
len(revisions),
repo_name
)
message += comments
message = message.encode("utf-8")
return message
def query_treestatus(self):
"Return True if we can land based on treestatus"
c = self.config
dirs = self.query_abs_dirs()
tree = c.get('treestatus_tree', os.path.basename(c['gecko_pull_url'].rstrip("/")))
treestatus_url = "%s/%s?format=json" % (c['treestatus_base_url'], tree)
treestatus_json = os.path.join(dirs['abs_work_dir'], 'treestatus.json')
if not os.path.exists(dirs['abs_work_dir']):
self.mkdir_p(dirs['abs_work_dir'])
if self.download_file(treestatus_url, file_name=treestatus_json) != treestatus_json:
# Failed to check tree status...assume we can land
self.info("failed to check tree status - assuming we can land")
return True
treestatus = self._read_json(treestatus_json)
if treestatus['status'] != 'closed':
self.info("treestatus is %s - assuming we can land" % repr(treestatus['status']))
return True
return False
def query_devices(self):
c = self.config
override = c.get('device_override')
if override:
return {override: c['devices'][override]}
else:
return c['devices']
def query_gaia_git_rev(self):
"""Returns (and caches) the git revision for gaia corresponding to the
latest hg revision on our branch."""
if not self.gaia_hg_revision:
return None
if not self.gaia_git_rev:
self.gaia_git_rev = self.query_mapper_git_revision(
self.config['mapper_url'],
self.config['gaia_mapper_project'],
self.gaia_hg_revision,
)
return self.gaia_git_rev
# Actions {{{1
def check_treestatus(self):
if not self.query_treestatus():
self.info("breaking early since treestatus is closed")
sys.exit(0)
def checkout_gecko(self):
c = self.config
dirs = self.query_abs_dirs()
dest = dirs['gecko_local_dir']
repos = [{
'repo': c['gecko_pull_url'],
'tag': c.get('gecko_tag', 'default'),
'dest': dest,
'vcs': 'hgtool',
'hgtool_base_bundle_urls': c.get('hgtool_base_bundle_urls'),
}]
self.vcs_checkout_repos(repos)
def checkout_manifests(self):
c = self.config
dirs = self.query_abs_dirs()
repos = [
{'vcs': 'gittool',
'repo': c['manifests_repo'],
'revision': c['manifests_revision'],
'dest': dirs['manifests_dir']},
]
self.vcs_checkout_repos(repos)
def massage_manifests(self):
"""
For each device in config['devices'], we'll strip projects mentioned in
'ignore_projects', or that have group attribute mentioned in
'filter_groups'.
We'll also map remote urls
Finally, we'll resolve absolute refs for projects that aren't fully
specified.
"""
for device, device_config in self.query_devices().items():
self.info("Massaging manifests for %s" % device)
manifest = self.query_manifest(device)
self.filter_projects(device_config, manifest)
self.filter_groups(device_config, manifest)
self.map_remotes(manifest)
self.resolve_refs(manifest)
repo_manifest.cleanup(manifest)
self.device_manifests[device] = manifest
manifest_path = self.query_manifest_path(device)
manifest_xml = manifest.toxml()
if not manifest_xml.endswith("\n"):
manifest_xml += "\n"
if self.config['do_write']:
self.mkdir_p(os.path.dirname(manifest_path))
self.write_to_file(manifest_path, manifest_xml)
def commit_manifests(self):
dirs = self.query_abs_dirs()
repo_path = dirs['gecko_local_dir']
for device, device_config in self.query_devices().items():
manifest_path = self.query_manifest_path(device)
self.hg_add(repo_path, manifest_path)
message = "Bumping manifests a=b2g-bump"
return self.hg_commit(repo_path, message)
def bump_gaia(self):
dirs = self.query_abs_dirs()
repo_path = dirs['gecko_local_dir']
gaia_json_path = os.path.join(repo_path,
self.config['gaia_revision_file'])
contents = self._read_json(gaia_json_path)
# Get the list of changes
if contents:
prev_revision = contents.get('revision')
self.gaia_hg_revision = prev_revision
else:
prev_revision = None
polling_url = "%s/json-pushes?full=1" % self.config['gaia_repo_url']
repo_config = {
'polling_url': polling_url,
'branch': self.config.get('gaia_branch', 'default'),
'repo_name': 'gaia',
}
revision_list = self.get_revision_list(repo_config=repo_config,
prev_revision=prev_revision)
if not revision_list:
# No changes
return False
# Update the gaia.json with the list of changes
hg_gaia_repo_path = urlparse(self.config['gaia_repo_url']).path.lstrip('/')
hg_revision = revision_list[-1]['changesets'][-1]['node']
self.gaia_hg_revision = hg_revision
git_revision = self.query_gaia_git_rev()
git_gaia_repo = self.config['gaia_git_repo']
self.update_gaia_json(gaia_json_path,
hg_revision, hg_gaia_repo_path,
git_revision, git_gaia_repo,
)
# Commit
message = self.build_commit_message(revision_list, 'gaia',
self.config['gaia_repo_url'])
self.hg_commit(repo_path, message)
return True
def push(self):
dirs = self.query_abs_dirs()
repo_path = dirs['gecko_local_dir']
return self.hg_push(repo_path)
def push_loop(self):
max_retries = 5
for _ in range(max_retries):
changed = False
if not self.query_treestatus():
# Tree is closed; exit early to avoid a bunch of wasted time
self.info("breaking early since treestatus is closed")
break
self.checkout_gecko()
if not self.config.get('skip_gaia_json') and self.bump_gaia():
changed = True
self.checkout_manifests()
self.massage_manifests()
if self.commit_manifests():
changed = True
if not changed:
# Nothing changed, we're all done
self.info("No changes - all done")
break
if self.push():
# We did it! Hurray!
self.info("Great success!")
break
# If we're here, then the push failed. It also stripped any
# outgoing commits, so we should be in a pristine state again
# Empty our local cache of manifests so they get loaded again next
# time through this loop. This makes sure we get fresh upstream
# manifests, and avoids problems like bug 979080
self.device_manifests = {}
# Sleep before trying again
self.info("Sleeping 60 before trying again")
time.sleep(60)
else:
self.fatal("Didn't complete successfully (hit max_retries)")
def import_git_ref_cache(self):
""" This action imports the git ref cache created during a previous run. This is
useful for sharing the cache across multiple branches (for example).
"""
if os.path.exists(self.git_ref_cache_file):
self._git_ref_cache = self._read_json(self.git_ref_cache_file)
def export_git_ref_cache(self):
""" This action exports the git ref cache created during this run. This is useful
for sharing the cache across multiple branches (for example).
"""
if self.write_to_file(self.git_ref_cache_file, json.dumps(self._git_ref_cache, sort_keys=True, indent=4) + "\n") != self.git_ref_cache_file:
self.add_summary(
"Unable to update %s with git ref cache" % self.git_ref_cache_file,
level=ERROR,
)
return -2
def delete_git_ref_cache(self):
""" Used to delete the git ref cache from the file system. The cache can be used
to persist git ls-remote lookup results, for example to reuse them between b2g bumper
runs. Since the results are stale and do not get updated, the cache should be
periodically deleted, so that the new refs can be fetched. The cache can also be used
across branches/devices.
"""
self.log("Deleting git ls-remote look-up cache file ('%s')...")
os.remove(self.git_ref_cache_file)
# __main__ {{{1
if __name__ == '__main__':
bumper = B2GBumper()
bumper.run_and_exit()