tenfourfox/testing/taskcluster/taskcluster_graph/image_builder.py

import hashlib
import json
import os
import tarfile
import urllib2

import taskcluster_graph.transform.routes as routes_transform
from slugid import nice as slugid
from taskcluster_graph.templates import Templates

TASKCLUSTER_ROOT = os.path.abspath(os.path.join(os.path.dirname(os.path.realpath(__file__)), '..'))
IMAGE_BUILD_TASK = os.path.join(TASKCLUSTER_ROOT, 'tasks', 'image.yml')
GECKO = os.path.realpath(os.path.join(TASKCLUSTER_ROOT, '..', '..'))
DOCKER_ROOT = os.path.join(GECKO, 'testing', 'docker')
REGISTRY = open(os.path.join(DOCKER_ROOT, 'REGISTRY')).read().strip()
INDEX_URL = 'https://index.taskcluster.net/v1/task/docker.images.v1.{}.{}.hash.{}'
ARTIFACT_URL = 'https://queue.taskcluster.net/v1/task/{}/artifacts/{}'
DEFINE_TASK = 'queue:define-task:aws-provisioner-v1/{}'

def is_docker_registry_image(registry_path):
    return os.path.isfile(registry_path)

def docker_image(name):
    ''' Determine the docker tag/revision from an in tree docker file '''
    repository_path = os.path.join(DOCKER_ROOT, name, 'REGISTRY')
    repository = REGISTRY

    version = open(os.path.join(DOCKER_ROOT, name, 'VERSION')).read().strip()

    if os.path.isfile(repository_path):
        repository = open(repository_path).read().strip()

    return '{}/{}:{}'.format(repository, name, version)

def task_id_for_image(seen_images, project, name):
    if name in seen_images:
        return seen_images[name]['taskId']

    context_path = os.path.join('testing', 'docker', name)
    context_hash = generate_context_hash(context_path)
    task_id = get_task_id_for_namespace(project, name, context_hash)

    if task_id:
        seen_images[name] = {'taskId': task_id}
        return task_id

    task_id = slugid()
    seen_images[name] = {
        'taskId': task_id,
        'path': context_path,
        'hash': context_hash
    }

    return task_id

def image_artifact_exists_for_task_id(task_id, path):
    ''' Verifies that the artifact exists for the task ID '''
    try:
        request = urllib2.Request(ARTIFACT_URL.format(task_id, path))
        request.get_method = lambda : 'HEAD'
        urllib2.urlopen(request)
        return True
    except urllib2.HTTPError,e:
        return False

def get_task_id_for_namespace(project, name, context_hash):
    '''
    Determine the Task ID for an indexed image.

    As an optimization, if the context hash exists for mozilla-central, that image
    task ID will be used.  The reasoning behind this is that eventually everything ends
    up on mozilla-central at some point if most tasks use this as a common image
    for a given context hash, a worker within Taskcluster does not need to contain
    the same image per branch.
    '''
    for p in ['mozilla-central', project]:
        image_index_url = INDEX_URL.format(p, name, context_hash)
        try:
            task = json.load(urllib2.urlopen(image_index_url))
            # Ensure that the artifact exists for the task and hasn't expired
            artifact_exists = image_artifact_exists_for_task_id(task['taskId'],
                                                                'public/image.tar')
            # Only return the task ID if the artifact exists for the indexed
            # task.  Otherwise, continue on looking at each of the branches.  Method
            # continues trying other branches in case mozilla-central has an expired
            # artifact, but 'project' might not. Only return no task ID if all
            # branches have been tried
            if artifact_exists:
                return task['taskId']
        except urllib2.HTTPError:
            pass

    return None

def generate_context_hash(image_path):
    '''
    Generates a sha256 hash for context directory used to build an image.

    Contents of the directory are sorted alphabetically, contents of each file is hashed,
    and then a hash is created for both the file hashs as well as their paths.

    This ensures that hashs are consistent and also change based on if file locations
    within the context directory change.
    '''
    context_hash = hashlib.sha256()
    files = []

    for dirpath, dirnames, filenames in os.walk(os.path.join(GECKO, image_path)):
        for filename in filenames:
            files.append(os.path.join(dirpath, filename))

    for filename in sorted(files):
        with open(filename, 'rb') as f:
            file_hash = hashlib.sha256()
            while True:
                data = f.read()
                if not data:
                    break
                file_hash.update(data)
            context_hash.update(file_hash.hexdigest() + '\t' + filename + '\n')

    return context_hash.hexdigest()

def create_context_tar(context_dir, destination, image_name):
    ''' Creates a tar file of a particular context directory '''
    if not os.path.exists(os.path.dirname(destination)):
        os.makedirs(os.path.dirname(destination))

    with tarfile.open(destination, 'w:gz') as tar:
        tar.add(context_dir, arcname=image_name)

def image_requires_building(details):
    ''' Returns true if an image task should be created for a particular image '''
    if 'path' in details and 'hash' in details:
        return True
    else:
        return False

def create_image_task_parameters(params, name, details):
    image_parameters = dict(params)
    image_parameters['context_hash'] = details['hash']
    image_parameters['context_path'] = details['path']
    image_parameters['artifact_path'] = 'public/image.tar'
    image_parameters['image_slugid'] =  details['taskId']
    image_parameters['image_name'] = name

    return image_parameters

def get_image_details(seen_images, task_id):
    '''
    Based on a collection of image details, return the details
    for an image matching the requested task_id.

    Image details can include a path and hash indicating that the image requires
    building.
    '''
    for name, details in seen_images.items():
        if details['taskId'] == task_id:
            return [name, details]
    return None

def get_json_routes():
    ''' Returns routes that should be included in the image task. '''
    routes_file = os.path.join(TASKCLUSTER_ROOT, 'routes.json')
    with open(routes_file) as f:
        contents = json.load(f)
        json_routes = contents['docker_images']
    return json_routes

def normalize_image_details(graph, task, seen_images, params, decision_task_id):
    '''
    This takes a task-image payload and creates an image task to build that
    image.

    task-image payload is then converted to use a specific task ID of that
    built image.  All tasks within the graph requiring this same image will have their
    image details normalized and require the same image build task.
    '''
    image = task['task']['payload']['image']
    if isinstance(image, str) or image.get('type', 'docker-image') == 'docker-image':
        return

    if 'requires' not in task:
        task['requires'] = []

    name, details = get_image_details(seen_images, image['taskId'])

    if details.get('required', False) is True or image_requires_building(details) is False:
        if 'required' in details:
            task['requires'].append(details['taskId'])
        return

    image_parameters = create_image_task_parameters(params, name, details)

    if decision_task_id:
        image_artifact_path = "public/decision_task/image_contexts/{}/context.tar.gz".format(name)
        destination = "/home/worker/artifacts/decision_task/image_contexts/{}/context.tar.gz".format(name)
        image_parameters['context_url'] = ARTIFACT_URL.format(decision_task_id, image_artifact_path)

        create_context_tar(image_parameters['context_path'], destination, name)

    templates = Templates(TASKCLUSTER_ROOT)
    image_task = templates.load(IMAGE_BUILD_TASK, image_parameters)
    if params['revision_hash']:
        routes_transform.decorate_task_treeherder_routes(
            image_task['task'],
            "{}.{}".format(params['project'], params['revision_hash'])
        )
        routes_transform.decorate_task_json_routes(image_task['task'],
                                                   get_json_routes(),
                                                   image_parameters)

    graph['tasks'].append(image_task);
    task['requires'].append(details['taskId'])

    define_task = DEFINE_TASK.format(
        image_task['task']['workerType']
    )

    graph['scopes'].append(define_task)
    graph['scopes'].extend(image_task['task'].get('scopes', []))
    route_scopes = map(lambda route: 'queue:route:' + route, image_task['task'].get('routes', []))
    graph['scopes'].extend(route_scopes)

    details['required'] = True