tenfourfox/testing/web-platform/update/fetchlogs.py
Cameron Kaiser c9b2922b70 hello FPR
2017-04-19 00:56:45 -07:00

100 lines
3.1 KiB
Python

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
import argparse
import cStringIO
import gzip
import json
import os
import requests
import urlparse
treeherder_base = "https://treeherder.mozilla.org/"
"""Simple script for downloading structured logs from treeherder.
For the moment this is specialised to work with web-platform-tests
logs; in due course it should move somewhere generic and get hooked
up to mach or similar"""
# Interpretation of the "job" list from
# https://github.com/mozilla/treeherder-service/blob/master/treeherder/webapp/api/utils.py#L18
def create_parser():
parser = argparse.ArgumentParser()
parser.add_argument("branch", action="store",
help="Branch on which jobs ran")
parser.add_argument("commit",
action="store",
help="Commit hash for push")
return parser
def download(url, prefix, dest, force_suffix=True):
if dest is None:
dest = "."
if prefix and not force_suffix:
name = os.path.join(dest, prefix + ".log")
else:
name = None
counter = 0
while not name or os.path.exists(name):
counter += 1
sep = "" if not prefix else "-"
name = os.path.join(dest, prefix + sep + str(counter) + ".log")
with open(name, "wb") as f:
resp = requests.get(url, stream=True)
for chunk in resp.iter_content(1024):
f.write(chunk)
def get_blobber_url(branch, job):
job_id = job["id"]
resp = requests.get(urlparse.urljoin(treeherder_base,
"/api/project/%s/artifact/?job_id=%i&name=Job%%20Info" % (branch,
job_id)))
job_data = resp.json()
if job_data:
assert len(job_data) == 1
job_data = job_data[0]
try:
details = job_data["blob"]["job_details"]
for item in details:
if item["value"] == "wpt_raw.log":
return item["url"]
except:
return None
def get_structured_logs(branch, commit, dest=None):
resp = requests.get(urlparse.urljoin(treeherder_base, "/api/project/%s/resultset/?revision=%s" % (branch, commit)))
revision_data = resp.json()
result_set = revision_data["results"][0]["id"]
resp = requests.get(urlparse.urljoin(treeherder_base, "/api/project/%s/jobs/?result_set_id=%s&count=2000&exclusion_profile=false" % (branch, result_set)))
job_data = resp.json()
for result in job_data["results"]:
job_type_name = result["job_type_name"]
if job_type_name.startswith("W3C Web Platform"):
url = get_blobber_url(branch, result)
if url:
prefix = result["platform"] # platform
download(url, prefix, None)
def main():
parser = create_parser()
args = parser.parse_args()
get_structured_logs(args.branch, args.commit)
if __name__ == "__main__":
main()