scripts: add oe-build-perf-report script

A new tool for pretty-printing build perf test results stored in a Git
repository. The scripts is able to produce either simple plaintext
report showing the difference between two commits, or, an html report
that also displays trendcharts of the test results. The script uses
Jinja2 templates for generating HTML reports so it requires
python3-jinja2 to be installed on the system.

[YOCTO #10931]

(From OE-Core rev: 3b25404f0f99b72f222bdca815929be1cf1cee35)

Signed-off-by: Markus Lehtonen <markus.lehtonen@linux.intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Markus Lehtonen 2017-03-31 17:07:29 +03:00 committed by Richard Purdie
parent 5a85d39c9d
commit 9f299876f7
6 changed files with 1182 additions and 0 deletions

View File

@ -0,0 +1,31 @@
#
# Copyright (c) 2017, Intel Corporation.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
"""Build performance test library functions"""
def print_table(rows, row_fmt=None):
"""Print data table"""
if not rows:
return
if not row_fmt:
row_fmt = ['{:{wid}} '] * len(rows[0])
# Go through the data to get maximum cell widths
num_cols = len(row_fmt)
col_widths = [0] * num_cols
for row in rows:
for i, val in enumerate(row):
col_widths[i] = max(col_widths[i], len(str(val)))
for row in rows:
print(*[row_fmt[i].format(col, wid=col_widths[i]) for i, col in enumerate(row)])

View File

@ -0,0 +1,19 @@
#
# Copyright (c) 2017, Intel Corporation.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
"""Helper module for HTML reporting"""
from jinja2 import Environment, PackageLoader
env = Environment(loader=PackageLoader('build_perf', 'html'))
template = env.get_template('report.html')

View File

@ -0,0 +1,50 @@
<script type="text/javascript">
google.charts.setOnLoadCallback(drawChart_{{ chart_elem_id }});
function drawChart_{{ chart_elem_id }}() {
var data = new google.visualization.DataTable();
// Chart options
var options = {
theme : 'material',
legend: 'none',
hAxis: { format: '', title: 'Commit number',
minValue: {{ chart_opts.haxis.min }},
maxValue: {{ chart_opts.haxis.max }} },
{% if measurement.type == 'time' %}
vAxis: { format: 'h:mm:ss' },
{% else %}
vAxis: { format: '' },
{% endif %}
pointSize: 5,
chartArea: { left: 80, right: 15 },
};
// Define data columns
data.addColumn('number', 'Commit');
data.addColumn('{{ measurement.value_type.gv_data_type }}',
'{{ measurement.value_type.quantity }}');
// Add data rows
data.addRows([
{% for sample in measurement.samples %}
[{{ sample.commit_num }}, {{ sample.mean.gv_value() }}],
{% endfor %}
]);
// Finally, draw the chart
chart_div = document.getElementById('{{ chart_elem_id }}');
var chart = new google.visualization.LineChart(chart_div);
google.visualization.events.addListener(chart, 'ready', function () {
//chart_div = document.getElementById('{{ chart_elem_id }}');
//chart_div.innerHTML = '<img src="' + chart.getImageURI() + '">';
png_div = document.getElementById('{{ chart_elem_id }}_png');
png_div.outerHTML = '<a id="{{ chart_elem_id }}_png" href="' + chart.getImageURI() + '">PNG</a>';
console.log("CHART READY: {{ chart_elem_id }}");
{% if last_chart == true %}
console.log("ALL CHARTS READY");
{% endif %}
//console.log(chart_div.innerHTML);
});
chart.draw(data, options);
}
</script>

View File

@ -0,0 +1,209 @@
<!DOCTYPE html>
<html lang="en">
<head>
{# Scripts, for visualization#}
<!--START-OF-SCRIPTS-->
<script type="text/javascript" src="https://www.gstatic.com/charts/loader.js"></script>
<script type="text/javascript">
google.charts.load('current', {'packages':['corechart']});
</script>
{# Render measurement result charts #}
{% for test in test_data %}
{% set test_loop = loop %}
{% if test.status == 'SUCCESS' %}
{% for measurement in test.measurements %}
{% set chart_elem_id = test.name + '_' + measurement.name + '_chart' %}
{% if test_loop.last and loop.last %}
{% set last_chart = true %}
{% endif %}
{% include 'measurement_chart.html' %}
{% endfor %}
{% endif %}
{% endfor %}
<!--END-OF-SCRIPTS-->
{# Styles #}
<style>
.meta-table {
font-size: 14px;
text-align: left;
border-collapse: collapse;
}
.meta-table tr:nth-child(even){background-color: #f2f2f2}
meta-table th, .meta-table td {
padding: 4px;
}
.summary {
margin: 0;
font-size: 14px;
text-align: left;
border-collapse: collapse;
}
summary th, .meta-table td {
padding: 4px;
}
.measurement {
padding: 8px 0px 8px 8px;
border: 2px solid #f0f0f0;
margin-bottom: 10px;
}
.details {
margin: 0;
font-size: 12px;
text-align: left;
border-collapse: collapse;
}
.details th {
font-weight: normal;
padding-right: 8px;
}
.preformatted {
font-family: monospace;
white-space: pre-wrap;
background-color: #f0f0f0;
margin-left: 10px;
}
hr {
color: #f0f0f0;
}
h2 {
font-size: 20px;
margin-bottom: 0px;
color: #707070;
}
h3 {
font-size: 16px;
margin: 0px;
color: #707070;
}
</style>
<title>{{ title }}</title>
</head>
{% macro poky_link(commit) -%}
<a href="http://git.yoctoproject.org/cgit/cgit.cgi/poky/log/?id={{ commit }}">{{ commit[0:11] }}</a>
{%- endmacro %}
<body><div style="width: 700px">
{# Test metadata #}
<h2>General</h2>
<hr>
<table class="meta-table" style="width: 100%">
<tr>
<th></th>
<th>Current commit</th>
<th>Comparing with</th>
</tr>
{% for key, item in metadata.items() %}
<tr>
<th>{{ item.title }}</th>
{%if key == 'commit' %}
<td>{{ poky_link(item.value) }}</td>
<td>{{ poky_link(item.value_old) }}</td>
{% else %}
<td>{{ item.value }}</td>
<td>{{ item.value_old }}</td>
{% endif %}
</tr>
{% endfor %}
</table>
{# Test result summary #}
<h2>Test result summary</h2>
<hr>
<table class="summary" style="width: 100%">
{% for test in test_data %}
{% if loop.index is even %}
{% set row_style = 'style="background-color: #f2f2f2"' %}
{% else %}
{% set row_style = 'style="background-color: #ffffff"' %}
{% endif %}
<tr {{ row_style }}><td>{{ test.name }}: {{ test.description }}</td>
{% if test.status == 'SUCCESS' %}
{% for measurement in test.measurements %}
{# add empty cell in place of the test name#}
{% if loop.index > 1 %}<td></td>{% endif %}
{% if measurement.absdiff > 0 %}
{% set result_style = "color: red" %}
{% elif measurement.absdiff == measurement.absdiff %}
{% set result_style = "color: green" %}
{% else %}
{% set result_style = "color: orange" %}
{%endif %}
<td>{{ measurement.description }}</td>
<td style="font-weight: bold">{{ measurement.value.mean }}</td>
<td style="{{ result_style }}">{{ measurement.absdiff_str }}</td>
<td style="{{ result_style }}">{{ measurement.reldiff }}</td>
</tr><tr {{ row_style }}>
{% endfor %}
{% else %}
<td style="font-weight: bold; color: red;">{{test.status }}</td>
<td></td> <td></td> <td></td> <td></td>
{% endif %}
</tr>
{% endfor %}
</table>
{# Detailed test results #}
{% for test in test_data %}
<h2>{{ test.name }}: {{ test.description }}</h2>
<hr>
{% if test.status == 'SUCCESS' %}
{% for measurement in test.measurements %}
<div class="measurement">
<h3>{{ measurement.description }}</h3>
<div style="font-weight:bold;">
<span style="font-size: 23px;">{{ measurement.value.mean }}</span>
<span style="font-size: 20px; margin-left: 12px">
{% if measurement.absdiff > 0 %}
<span style="color: red">
{% elif measurement.absdiff == measurement.absdiff %}
<span style="color: green">
{% else %}
<span style="color: orange">
{% endif %}
{{ measurement.absdiff_str }} ({{measurement.reldiff}})
</span></span>
</div>
<table style="width: 100%">
<tr>
<td style="width: 75%">
{# Linechart #}
<div id="{{ test.name }}_{{ measurement.name }}_chart"></div>
</td>
<td>
{# Measurement statistics #}
<table class="details">
<tr>
<th>Test runs</th><td>{{ measurement.value.sample_cnt }}</td>
</tr><tr>
<th>-/+</th><td>-{{ measurement.value.minus }} / +{{ measurement.value.plus }}</td>
</tr><tr>
<th>Min</th><td>{{ measurement.value.min }}</td>
</tr><tr>
<th>Max</th><td>{{ measurement.value.max }}</td>
</tr><tr>
<th>Stdev</th><td>{{ measurement.value.stdev }}</td>
</tr><tr>
<th><div id="{{ test.name }}_{{ measurement.name }}_chart_png"></div></th>
</tr>
</table>
</td>
</tr>
</table>
</div>
{% endfor %}
{# Unsuccessful test #}
{% else %}
<span style="font-size: 150%; font-weight: bold; color: red;">{{ test.status }}
{% if test.err_type %}<span style="font-size: 75%; font-weight: normal">({{ test.err_type }})</span>{% endif %}
</span>
<div class="preformatted">{{ test.message }}</div>
{% endif %}
{% endfor %}
</div></body>
</html>

View File

@ -0,0 +1,342 @@
#
# Copyright (c) 2017, Intel Corporation.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
"""Handling of build perf test reports"""
from collections import OrderedDict, Mapping
from datetime import datetime, timezone
from numbers import Number
from statistics import mean, stdev, variance
def isofmt_to_timestamp(string):
"""Convert timestamp string in ISO 8601 format into unix timestamp"""
if '.' in string:
dt = datetime.strptime(string, '%Y-%m-%dT%H:%M:%S.%f')
else:
dt = datetime.strptime(string, '%Y-%m-%dT%H:%M:%S')
return dt.replace(tzinfo=timezone.utc).timestamp()
def metadata_xml_to_json(elem):
"""Convert metadata xml into JSON format"""
assert elem.tag == 'metadata', "Invalid metadata file format"
def _xml_to_json(elem):
"""Convert xml element to JSON object"""
out = OrderedDict()
for child in elem.getchildren():
key = child.attrib.get('name', child.tag)
if len(child):
out[key] = _xml_to_json(child)
else:
out[key] = child.text
return out
return _xml_to_json(elem)
def results_xml_to_json(elem):
"""Convert results xml into JSON format"""
rusage_fields = ('ru_utime', 'ru_stime', 'ru_maxrss', 'ru_minflt',
'ru_majflt', 'ru_inblock', 'ru_oublock', 'ru_nvcsw',
'ru_nivcsw')
iostat_fields = ('rchar', 'wchar', 'syscr', 'syscw', 'read_bytes',
'write_bytes', 'cancelled_write_bytes')
def _read_measurement(elem):
"""Convert measurement to JSON"""
data = OrderedDict()
data['type'] = elem.tag
data['name'] = elem.attrib['name']
data['legend'] = elem.attrib['legend']
values = OrderedDict()
# SYSRES measurement
if elem.tag == 'sysres':
for subel in elem:
if subel.tag == 'time':
values['start_time'] = isofmt_to_timestamp(subel.attrib['timestamp'])
values['elapsed_time'] = float(subel.text)
elif subel.tag == 'rusage':
rusage = OrderedDict()
for field in rusage_fields:
if 'time' in field:
rusage[field] = float(subel.attrib[field])
else:
rusage[field] = int(subel.attrib[field])
values['rusage'] = rusage
elif subel.tag == 'iostat':
values['iostat'] = OrderedDict([(f, int(subel.attrib[f]))
for f in iostat_fields])
elif subel.tag == 'buildstats_file':
values['buildstats_file'] = subel.text
else:
raise TypeError("Unknown sysres value element '{}'".format(subel.tag))
# DISKUSAGE measurement
elif elem.tag == 'diskusage':
values['size'] = int(elem.find('size').text)
else:
raise Exception("Unknown measurement tag '{}'".format(elem.tag))
data['values'] = values
return data
def _read_testcase(elem):
"""Convert testcase into JSON"""
assert elem.tag == 'testcase', "Expecting 'testcase' element instead of {}".format(elem.tag)
data = OrderedDict()
data['name'] = elem.attrib['name']
data['description'] = elem.attrib['description']
data['status'] = 'SUCCESS'
data['start_time'] = isofmt_to_timestamp(elem.attrib['timestamp'])
data['elapsed_time'] = float(elem.attrib['time'])
measurements = OrderedDict()
for subel in elem.getchildren():
if subel.tag == 'error' or subel.tag == 'failure':
data['status'] = subel.tag.upper()
data['message'] = subel.attrib['message']
data['err_type'] = subel.attrib['type']
data['err_output'] = subel.text
elif subel.tag == 'skipped':
data['status'] = 'SKIPPED'
data['message'] = subel.text
else:
measurements[subel.attrib['name']] = _read_measurement(subel)
data['measurements'] = measurements
return data
def _read_testsuite(elem):
"""Convert suite to JSON"""
assert elem.tag == 'testsuite', \
"Expecting 'testsuite' element instead of {}".format(elem.tag)
data = OrderedDict()
if 'hostname' in elem.attrib:
data['tester_host'] = elem.attrib['hostname']
data['start_time'] = isofmt_to_timestamp(elem.attrib['timestamp'])
data['elapsed_time'] = float(elem.attrib['time'])
tests = OrderedDict()
for case in elem.getchildren():
tests[case.attrib['name']] = _read_testcase(case)
data['tests'] = tests
return data
# Main function
assert elem.tag == 'testsuites', "Invalid test report format"
assert len(elem) == 1, "Too many testsuites"
return _read_testsuite(elem.getchildren()[0])
def aggregate_metadata(metadata):
"""Aggregate metadata into one, basically a sanity check"""
mutable_keys = ('pretty_name', 'version_id')
def aggregate_obj(aggregate, obj, assert_str=True):
"""Aggregate objects together"""
assert type(aggregate) is type(obj), \
"Type mismatch: {} != {}".format(type(aggregate), type(obj))
if isinstance(obj, Mapping):
assert set(aggregate.keys()) == set(obj.keys())
for key, val in obj.items():
aggregate_obj(aggregate[key], val, key not in mutable_keys)
elif isinstance(obj, list):
assert len(aggregate) == len(obj)
for i, val in enumerate(obj):
aggregate_obj(aggregate[i], val)
elif not isinstance(obj, str) or (isinstance(obj, str) and assert_str):
assert aggregate == obj, "Data mismatch {} != {}".format(aggregate, obj)
if not metadata:
return {}
# Do the aggregation
aggregate = metadata[0].copy()
for testrun in metadata[1:]:
aggregate_obj(aggregate, testrun)
aggregate['testrun_count'] = len(metadata)
return aggregate
def aggregate_data(data):
"""Aggregate multiple test results JSON structures into one"""
mutable_keys = ('status', 'message', 'err_type', 'err_output')
class SampleList(list):
"""Container for numerical samples"""
pass
def new_aggregate_obj(obj):
"""Create new object for aggregate"""
if isinstance(obj, Number):
new_obj = SampleList()
new_obj.append(obj)
elif isinstance(obj, str):
new_obj = obj
else:
# Lists and and dicts are kept as is
new_obj = obj.__class__()
aggregate_obj(new_obj, obj)
return new_obj
def aggregate_obj(aggregate, obj, assert_str=True):
"""Recursive "aggregation" of JSON objects"""
if isinstance(obj, Number):
assert isinstance(aggregate, SampleList)
aggregate.append(obj)
return
assert type(aggregate) == type(obj), \
"Type mismatch: {} != {}".format(type(aggregate), type(obj))
if isinstance(obj, Mapping):
for key, val in obj.items():
if not key in aggregate:
aggregate[key] = new_aggregate_obj(val)
else:
aggregate_obj(aggregate[key], val, key not in mutable_keys)
elif isinstance(obj, list):
for i, val in enumerate(obj):
if i >= len(aggregate):
aggregate[key] = new_aggregate_obj(val)
else:
aggregate_obj(aggregate[i], val)
elif isinstance(obj, str):
# Sanity check for data
if assert_str:
assert aggregate == obj, "Data mismatch {} != {}".format(aggregate, obj)
else:
raise Exception("BUG: unable to aggregate '{}' ({})".format(type(obj), str(obj)))
if not data:
return {}
# Do the aggregation
aggregate = data[0].__class__()
for testrun in data:
aggregate_obj(aggregate, testrun)
return aggregate
class MeasurementVal(float):
"""Base class representing measurement values"""
gv_data_type = 'number'
def gv_value(self):
"""Value formatting for visualization"""
if self != self:
return "null"
else:
return self
class TimeVal(MeasurementVal):
"""Class representing time values"""
quantity = 'time'
gv_title = 'elapsed time'
gv_data_type = 'timeofday'
def hms(self):
"""Split time into hours, minutes and seconeds"""
hhh = int(abs(self) / 3600)
mmm = int((abs(self) % 3600) / 60)
sss = abs(self) % 60
return hhh, mmm, sss
def __str__(self):
if self != self:
return "nan"
hh, mm, ss = self.hms()
sign = '-' if self < 0 else ''
if hh > 0:
return '{}{:d}:{:02d}:{:02.0f}'.format(sign, hh, mm, ss)
elif mm > 0:
return '{}{:d}:{:04.1f}'.format(sign, mm, ss)
elif ss > 1:
return '{}{:.1f} s'.format(sign, ss)
else:
return '{}{:.2f} s'.format(sign, ss)
def gv_value(self):
"""Value formatting for visualization"""
if self != self:
return "null"
hh, mm, ss = self.hms()
return [hh, mm, int(ss), int(ss*1000) % 1000]
class SizeVal(MeasurementVal):
"""Class representing time values"""
quantity = 'size'
gv_title = 'size in MiB'
gv_data_type = 'number'
def __str__(self):
if self != self:
return "nan"
if abs(self) < 1024:
return '{:.1f} kiB'.format(self)
elif abs(self) < 1048576:
return '{:.2f} MiB'.format(self / 1024)
else:
return '{:.2f} GiB'.format(self / 1048576)
def gv_value(self):
"""Value formatting for visualization"""
if self != self:
return "null"
return self / 1024
def measurement_stats(meas, prefix=''):
"""Get statistics of a measurement"""
if not meas:
return {prefix + 'sample_cnt': 0,
prefix + 'mean': MeasurementVal('nan'),
prefix + 'stdev': MeasurementVal('nan'),
prefix + 'variance': MeasurementVal('nan'),
prefix + 'min': MeasurementVal('nan'),
prefix + 'max': MeasurementVal('nan'),
prefix + 'minus': MeasurementVal('nan'),
prefix + 'plus': MeasurementVal('nan')}
stats = {'name': meas['name']}
if meas['type'] == 'sysres':
val_cls = TimeVal
values = meas['values']['elapsed_time']
elif meas['type'] == 'diskusage':
val_cls = SizeVal
values = meas['values']['size']
else:
raise Exception("Unknown measurement type '{}'".format(meas['type']))
stats['val_cls'] = val_cls
stats['quantity'] = val_cls.quantity
stats[prefix + 'sample_cnt'] = len(values)
mean_val = val_cls(mean(values))
min_val = val_cls(min(values))
max_val = val_cls(max(values))
stats[prefix + 'mean'] = mean_val
if len(values) > 1:
stats[prefix + 'stdev'] = val_cls(stdev(values))
stats[prefix + 'variance'] = val_cls(variance(values))
else:
stats[prefix + 'stdev'] = float('nan')
stats[prefix + 'variance'] = float('nan')
stats[prefix + 'min'] = min_val
stats[prefix + 'max'] = max_val
stats[prefix + 'minus'] = val_cls(mean_val - min_val)
stats[prefix + 'plus'] = val_cls(max_val - mean_val)
return stats

531
scripts/oe-build-perf-report Executable file
View File

@ -0,0 +1,531 @@
#!/usr/bin/python3
#
# Examine build performance test results
#
# Copyright (c) 2017, Intel Corporation.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
import argparse
import json
import logging
import os
import re
import sys
from collections import namedtuple, OrderedDict
from operator import attrgetter
from xml.etree import ElementTree as ET
# Import oe libs
scripts_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(scripts_path, 'lib'))
import scriptpath
from build_perf import print_table
from build_perf.report import (metadata_xml_to_json, results_xml_to_json,
aggregate_data, aggregate_metadata, measurement_stats)
from build_perf import html
scriptpath.add_oe_lib_path()
from oeqa.utils.git import GitRepo
# Setup logging
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
log = logging.getLogger('oe-build-perf-report')
# Container class for tester revisions
TestedRev = namedtuple('TestedRev', 'commit commit_number tags')
def get_test_runs(repo, tag_name, **kwargs):
"""Get a sorted list of test runs, matching given pattern"""
# First, get field names from the tag name pattern
field_names = [m.group(1) for m in re.finditer(r'{(\w+)}', tag_name)]
undef_fields = [f for f in field_names if f not in kwargs.keys()]
# Fields for formatting tag name pattern
str_fields = dict([(f, '*') for f in field_names])
str_fields.update(kwargs)
# Get a list of all matching tags
tag_pattern = tag_name.format(**str_fields)
tags = repo.run_cmd(['tag', '-l', tag_pattern]).splitlines()
log.debug("Found %d tags matching pattern '%s'", len(tags), tag_pattern)
# Parse undefined fields from tag names
str_fields = dict([(f, r'(?P<{}>[\w\-.]+)'.format(f)) for f in field_names])
str_fields['commit'] = '(?P<commit>[0-9a-f]{7,40})'
str_fields['commit_number'] = '(?P<commit_number>[0-9]{1,7})'
str_fields['tag_number'] = '(?P<tag_number>[0-9]{1,5})'
str_fields.update(kwargs)
tag_re = re.compile(tag_name.format(**str_fields))
# Parse fields from tags
revs = []
for tag in tags:
m = tag_re.match(tag)
groups = m.groupdict()
revs.append([groups[f] for f in undef_fields] + [tag])
# Return field names and a sorted list of revs
return undef_fields, sorted(revs)
def list_test_revs(repo, tag_name, **kwargs):
"""Get list of all tested revisions"""
fields, revs = get_test_runs(repo, tag_name, **kwargs)
ignore_fields = ['tag_number']
print_fields = [i for i, f in enumerate(fields) if f not in ignore_fields]
# Sort revs
rows = [[fields[i].upper() for i in print_fields] + ['TEST RUNS']]
prev = [''] * len(revs)
for rev in revs:
# Only use fields that we want to print
rev = [rev[i] for i in print_fields]
if rev != prev:
new_row = [''] * len(print_fields) + [1]
for i in print_fields:
if rev[i] != prev[i]:
break
new_row[i:-1] = rev[i:]
rows.append(new_row)
else:
rows[-1][-1] += 1
prev = rev
print_table(rows)
def get_test_revs(repo, tag_name, **kwargs):
"""Get list of all tested revisions"""
fields, runs = get_test_runs(repo, tag_name, **kwargs)
revs = {}
commit_i = fields.index('commit')
commit_num_i = fields.index('commit_number')
for run in runs:
commit = run[commit_i]
commit_num = run[commit_num_i]
tag = run[-1]
if not commit in revs:
revs[commit] = TestedRev(commit, commit_num, [tag])
else:
assert commit_num == revs[commit].commit_number, "Commit numbers do not match"
revs[commit].tags.append(tag)
# Return in sorted table
revs = sorted(revs.values(), key=attrgetter('commit_number'))
log.debug("Found %d tested revisions:\n %s", len(revs),
"\n ".join(['{} ({})'.format(rev.commit_number, rev.commit) for rev in revs]))
return revs
def rev_find(revs, attr, val):
"""Search from a list of TestedRev"""
for i, rev in enumerate(revs):
if getattr(rev, attr) == val:
return i
raise ValueError("Unable to find '{}' value '{}'".format(attr, val))
def is_xml_format(repo, commit):
"""Check if the commit contains xml (or json) data"""
if repo.rev_parse(commit + ':results.xml'):
log.debug("Detected report in xml format in %s", commit)
return True
else:
log.debug("No xml report in %s, assuming json formatted results", commit)
return False
def read_results(repo, tags, xml=True):
"""Read result files from repo"""
def parse_xml_stream(data):
"""Parse multiple concatenated XML objects"""
objs = []
xml_d = ""
for line in data.splitlines():
if xml_d and line.startswith('<?xml version='):
objs.append(ET.fromstring(xml_d))
xml_d = line
else:
xml_d += line
objs.append(ET.fromstring(xml_d))
return objs
def parse_json_stream(data):
"""Parse multiple concatenated JSON objects"""
objs = []
json_d = ""
for line in data.splitlines():
if line == '}{':
json_d += '}'
objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
json_d = '{'
else:
json_d += line
objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
return objs
num_revs = len(tags)
# Optimize by reading all data with one git command
log.debug("Loading raw result data from %d tags, %s...", num_revs, tags[0])
if xml:
git_objs = [tag + ':metadata.xml' for tag in tags] + [tag + ':results.xml' for tag in tags]
data = parse_xml_stream(repo.run_cmd(['show'] + git_objs + ['--']))
return ([metadata_xml_to_json(e) for e in data[0:num_revs]],
[results_xml_to_json(e) for e in data[num_revs:]])
else:
git_objs = [tag + ':metadata.json' for tag in tags] + [tag + ':results.json' for tag in tags]
data = parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--']))
return data[0:num_revs], data[num_revs:]
def get_data_item(data, key):
"""Nested getitem lookup"""
for k in key.split('.'):
data = data[k]
return data
def metadata_diff(metadata_l, metadata_r):
"""Prepare a metadata diff for printing"""
keys = [('Hostname', 'hostname', 'hostname'),
('Branch', 'branch', 'layers.meta.branch'),
('Commit number', 'commit_num', 'layers.meta.commit_count'),
('Commit', 'commit', 'layers.meta.commit'),
('Number of test runs', 'testrun_count', 'testrun_count')
]
def _metadata_diff(key):
"""Diff metadata from two test reports"""
try:
val1 = get_data_item(metadata_l, key)
except KeyError:
val1 = '(N/A)'
try:
val2 = get_data_item(metadata_r, key)
except KeyError:
val2 = '(N/A)'
return val1, val2
metadata = OrderedDict()
for title, key, key_json in keys:
value_l, value_r = _metadata_diff(key_json)
metadata[key] = {'title': title,
'value_old': value_l,
'value': value_r}
return metadata
def print_diff_report(metadata_l, data_l, metadata_r, data_r):
"""Print differences between two data sets"""
# First, print general metadata
print("\nTEST METADATA:\n==============")
meta_diff = metadata_diff(metadata_l, metadata_r)
rows = []
row_fmt = ['{:{wid}} ', '{:<{wid}} ', '{:<{wid}}']
rows = [['', 'CURRENT COMMIT', 'OOMPARING WITH']]
for key, val in meta_diff.items():
# Shorten commit hashes
if key == 'commit':
rows.append([val['title'] + ':', val['value'][:20], val['value_old'][:20]])
else:
rows.append([val['title'] + ':', val['value'], val['value_old']])
print_table(rows, row_fmt)
# Print test results
print("\nTEST RESULTS:\n=============")
tests = list(data_l['tests'].keys())
# Append tests that are only present in 'right' set
tests += [t for t in list(data_r['tests'].keys()) if t not in tests]
# Prepare data to be printed
rows = []
row_fmt = ['{:8}', '{:{wid}}', '{:{wid}}', ' {:>{wid}}', ' {:{wid}} ', '{:{wid}}',
' {:>{wid}}', ' {:>{wid}}']
num_cols = len(row_fmt)
for test in tests:
test_l = data_l['tests'][test] if test in data_l['tests'] else None
test_r = data_r['tests'][test] if test in data_r['tests'] else None
pref = ' '
if test_l is None:
pref = '+'
elif test_r is None:
pref = '-'
descr = test_l['description'] if test_l else test_r['description']
heading = "{} {}: {}".format(pref, test, descr)
rows.append([heading])
# Generate the list of measurements
meas_l = test_l['measurements'] if test_l else {}
meas_r = test_r['measurements'] if test_r else {}
measurements = list(meas_l.keys())
measurements += [m for m in list(meas_r.keys()) if m not in measurements]
for meas in measurements:
m_pref = ' '
if meas in meas_l:
stats_l = measurement_stats(meas_l[meas], 'l.')
else:
stats_l = measurement_stats(None, 'l.')
m_pref = '+'
if meas in meas_r:
stats_r = measurement_stats(meas_r[meas], 'r.')
else:
stats_r = measurement_stats(None, 'r.')
m_pref = '-'
stats = stats_l.copy()
stats.update(stats_r)
absdiff = stats['val_cls'](stats['r.mean'] - stats['l.mean'])
reldiff = "{:+.1f} %".format(absdiff * 100 / stats['l.mean'])
if stats['r.mean'] > stats['l.mean']:
absdiff = '+' + str(absdiff)
else:
absdiff = str(absdiff)
rows.append(['', m_pref, stats['name'] + ' ' + stats['quantity'],
str(stats['l.mean']), '->', str(stats['r.mean']),
absdiff, reldiff])
rows.append([''] * num_cols)
print_table(rows, row_fmt)
print()
def print_html_report(data, id_comp):
"""Print report in html format"""
# Handle metadata
metadata = {'branch': {'title': 'Branch', 'value': 'master'},
'hostname': {'title': 'Hostname', 'value': 'foobar'},
'commit': {'title': 'Commit', 'value': '1234'}
}
metadata = metadata_diff(data[id_comp][0], data[-1][0])
# Generate list of tests
tests = []
for test in data[-1][1]['tests'].keys():
test_r = data[-1][1]['tests'][test]
new_test = {'name': test_r['name'],
'description': test_r['description'],
'status': test_r['status'],
'measurements': [],
'err_type': test_r.get('err_type'),
}
# Limit length of err output shown
if 'message' in test_r:
lines = test_r['message'].splitlines()
if len(lines) > 20:
new_test['message'] = '...\n' + '\n'.join(lines[-20:])
else:
new_test['message'] = test_r['message']
# Generate the list of measurements
for meas in test_r['measurements'].keys():
meas_r = test_r['measurements'][meas]
meas_type = 'time' if meas_r['type'] == 'sysres' else 'size'
new_meas = {'name': meas_r['name'],
'legend': meas_r['legend'],
'description': meas_r['name'] + ' ' + meas_type,
}
samples = []
# Run through all revisions in our data
for meta, test_data in data:
if (not test in test_data['tests'] or
not meas in test_data['tests'][test]['measurements']):
samples.append(measurement_stats(None))
continue
test_i = test_data['tests'][test]
meas_i = test_i['measurements'][meas]
commit_num = get_data_item(meta, 'layers.meta.commit_count')
samples.append(measurement_stats(meas_i))
samples[-1]['commit_num'] = commit_num
absdiff = samples[-1]['val_cls'](samples[-1]['mean'] - samples[id_comp]['mean'])
new_meas['absdiff'] = absdiff
new_meas['absdiff_str'] = str(absdiff) if absdiff < 0 else '+' + str(absdiff)
new_meas['reldiff'] = "{:+.1f} %".format(absdiff * 100 / samples[id_comp]['mean'])
new_meas['samples'] = samples
new_meas['value'] = samples[-1]
new_meas['value_type'] = samples[-1]['val_cls']
new_test['measurements'].append(new_meas)
tests.append(new_test)
# Chart options
chart_opts = {'haxis': {'min': get_data_item(data[0][0], 'layers.meta.commit_count'),
'max': get_data_item(data[0][0], 'layers.meta.commit_count')}
}
print(html.template.render(metadata=metadata, test_data=tests, chart_opts=chart_opts))
def auto_args(repo, args):
"""Guess arguments, if not defined by the user"""
# Get the latest commit in the repo
log.debug("Guessing arguments from the latest commit")
msg = repo.run_cmd(['log', '-1', '--all', '--format=%b'])
for line in msg.splitlines():
split = line.split(':', 1)
if len(split) != 2:
continue
key = split[0]
val = split[1].strip()
if key == 'hostname':
log.debug("Using hostname %s", val)
args.hostname = val
elif key == 'branch':
log.debug("Using branch %s", val)
args.branch = val
def parse_args(argv):
"""Parse command line arguments"""
description = """
Examine build performance test results from a Git repository"""
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description=description)
parser.add_argument('--debug', '-d', action='store_true',
help="Verbose logging")
parser.add_argument('--repo', '-r', required=True,
help="Results repository (local git clone)")
parser.add_argument('--list', '-l', action='store_true',
help="List available test runs")
parser.add_argument('--html', action='store_true',
help="Generate report in html format")
group = parser.add_argument_group('Tag and revision')
group.add_argument('--tag-name', '-t',
default='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}',
help="Tag name (pattern) for finding results")
group.add_argument('--hostname', '-H')
group.add_argument('--branch', '-B', default='master')
group.add_argument('--machine', default='qemux86')
group.add_argument('--history-length', default=25, type=int,
help="Number of tested revisions to plot in html report")
group.add_argument('--commit',
help="Revision to search for")
group.add_argument('--commit-number',
help="Revision number to search for, redundant if "
"--commit is specified")
group.add_argument('--commit2',
help="Revision to compare with")
group.add_argument('--commit-number2',
help="Revision number to compare with, redundant if "
"--commit2 is specified")
return parser.parse_args(argv)
def main(argv=None):
"""Script entry point"""
args = parse_args(argv)
if args.debug:
log.setLevel(logging.DEBUG)
repo = GitRepo(args.repo)
if args.list:
list_test_revs(repo, args.tag_name)
return 0
# Determine hostname which to use
if not args.hostname:
auto_args(repo, args)
revs = get_test_revs(repo, args.tag_name, hostname=args.hostname,
branch=args.branch, machine=args.machine)
if len(revs) < 2:
log.error("%d tester revisions found, unable to generate report",
len(revs))
return 1
# Pick revisions
if args.commit:
if args.commit_number:
log.warning("Ignoring --commit-number as --commit was specified")
index1 = rev_find(revs, 'commit', args.commit)
elif args.commit_number:
index1 = rev_find(revs, 'commit_number', args.commit_number)
else:
index1 = len(revs) - 1
if args.commit2:
if args.commit_number2:
log.warning("Ignoring --commit-number2 as --commit2 was specified")
index2 = rev_find(revs, 'commit', args.commit2)
elif args.commit_number2:
index2 = rev_find(revs, 'commit_number', args.commit_number2)
else:
if index1 > 0:
index2 = index1 - 1
else:
log.error("Unable to determine the other commit, use "
"--commit2 or --commit-number2 to specify it")
return 1
index_l = min(index1, index2)
index_r = max(index1, index2)
rev_l = revs[index_l]
rev_r = revs[index_r]
log.debug("Using 'left' revision %s (%s), %s test runs:\n %s",
rev_l.commit_number, rev_l.commit, len(rev_l.tags),
'\n '.join(rev_l.tags))
log.debug("Using 'right' revision %s (%s), %s test runs:\n %s",
rev_r.commit_number, rev_r.commit, len(rev_r.tags),
'\n '.join(rev_r.tags))
# Check report format used in the repo (assume all reports in the same fmt)
xml = is_xml_format(repo, revs[index_r].tags[-1])
if args.html:
index_0 = max(0, index_r - args.history_length)
rev_range = range(index_0, index_r + 1)
else:
# We do not need range of commits for text report (no graphs)
index_0 = index_l
rev_range = (index_l, index_r)
# Read raw data
log.debug("Reading %d revisions, starting from %s (%s)",
len(rev_range), revs[index_0].commit_number, revs[index_0].commit)
raw_data = [read_results(repo, revs[i].tags, xml) for i in rev_range]
data = []
for raw_m, raw_d in raw_data:
data.append((aggregate_metadata(raw_m), aggregate_data(raw_d)))
# Re-map list indexes to the new table starting from index 0
index_r = index_r - index_0
index_l = index_l - index_0
# Print report
if not args.html:
print_diff_report(data[index_l][0], data[index_l][1],
data[index_r][0], data[index_r][1])
else:
print_html_report(data, index_l)
return 0
if __name__ == "__main__":
sys.exit(main())