tenfourfox/testing/runtimes/writeruntimes.py

123 lines
3.9 KiB
Python

from argparse import ArgumentParser
from collections import defaultdict
import json
import os
import sys
import requests
here = os.path.abspath(os.path.dirname(__file__))
ACTIVE_DATA_URL = "http://activedata.allizom.org/query"
PERCENTILE = 0.5 # ignore the bottom PERCENTILE*100% of numbers
def query_activedata(suite, platforms=None):
platforms = ', "build.platform":%s' % json.dumps(platforms) if platforms else ''
query = """
{
"from":"unittest",
"limit":200000,
"groupby":["result.test"],
"select":{"value":"result.duration","aggregate":"average"},
"where":{"and":[
{"eq":{"suite":"%s"%s}},
{"gt":{"run.timestamp":"{{today-week}}"}}
]}
}
""" % (suite, platforms)
response = requests.post(ACTIVE_DATA_URL,
data=query,
stream=True)
response.raise_for_status()
data = response.json()["data"]
return data
def write_runtimes(data, suite, indir=here, outdir=here):
data = dict(data)
outfilename = os.path.join(outdir, "%s.runtimes.json" % suite)
infilename = os.path.join(indir, "%s.runtimes.json" % suite)
if not os.path.exists(outdir):
os.makedirs(outdir)
# read in existing data, if any
indata = None
if os.path.exists(infilename):
with open(infilename, 'r') as f:
indata = json.loads(f.read()).get('runtimes')
# identify a threshold of durations, below which we ignore
runtimes = []
for result in data.itervalues():
duration = int(result * 1000) if result else 0
if duration:
runtimes.append(duration)
runtimes.sort()
threshold = runtimes[int(len(runtimes) * PERCENTILE)]
# split the durations into two groups; ommitted and specified
ommitted = []
specified = indata if indata else {}
current_tests = []
for test, duration in data.iteritems():
current_tests.append(test)
duration = int(duration * 1000) if duration else 0
if duration > 0 and duration < threshold:
ommitted.append(duration)
if test in specified:
del specified[test]
elif duration >= threshold and test != "automation.py":
original = specified.get(test, 0)
if not original or abs(original - duration) > (original/20):
# only write new data if it's > 20% different than original
specified[test] = duration
# delete any test references no longer needed
to_delete = []
for test in specified:
if test not in current_tests:
to_delete.append(test)
for test in to_delete:
del specified[test]
avg = int(sum(ommitted)/len(ommitted))
results = {'excluded_test_average': avg,
'runtimes': specified}
with open(outfilename, 'w') as f:
f.write(json.dumps(results, indent=2, sort_keys=True))
def cli(args=sys.argv[1:]):
parser = ArgumentParser()
parser.add_argument('-o', '--output-directory', dest='outdir',
default=here, help="Directory to save runtime data.")
parser.add_argument('-i', '--input-directory', dest='indir',
default=here, help="Directory from which to read current runtime data.")
parser.add_argument('-p', '--platforms', default=None,
help="Comma separated list of platforms from which to generate data.")
parser.add_argument('-s', '--suite', dest='suite', default=None,
help="Suite for which to generate data.")
args = parser.parse_args(args)
if not args.suite:
raise ValueError("Must specify suite with the -u argument")
if ',' in args.suite:
raise ValueError("Passing multiple suites is not supported")
if args.platforms:
args.platforms = args.platforms.split(',')
data = query_activedata(args.suite, args.platforms)
write_runtimes(data, args.suite, indir=args.indir, outdir=args.outdir)
if __name__ == "__main__":
sys.exit(cli())