generic-poky/scripts/oe-build-perf-report

#!/usr/bin/python3
#
# Examine build performance test results
#
# Copyright (c) 2017, Intel Corporation.
#
# This program is free software; you can redistribute it and/or modify it
# under the terms and conditions of the GNU General Public License,
# version 2, as published by the Free Software Foundation.
#
# This program is distributed in the hope it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
import argparse
import json
import logging
import os
import re
import sys
from collections import namedtuple, OrderedDict
from operator import attrgetter
from xml.etree import ElementTree as ET

# Import oe libs
scripts_path = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(scripts_path, 'lib'))
import scriptpath
from build_perf import print_table
from build_perf.report import (metadata_xml_to_json, results_xml_to_json,
                               aggregate_data, aggregate_metadata, measurement_stats)
from build_perf import html

scriptpath.add_oe_lib_path()

from oeqa.utils.git import GitRepo


# Setup logging
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
log = logging.getLogger('oe-build-perf-report')


# Container class for tester revisions
TestedRev = namedtuple('TestedRev', 'commit commit_number tags')


def get_test_runs(repo, tag_name, **kwargs):
    """Get a sorted list of test runs, matching given pattern"""
    # First, get field names from the tag name pattern
    field_names = [m.group(1) for m in re.finditer(r'{(\w+)}', tag_name)]
    undef_fields = [f for f in field_names if f not in kwargs.keys()]

    # Fields for formatting tag name pattern
    str_fields = dict([(f, '*') for f in field_names])
    str_fields.update(kwargs)

    # Get a list of all matching tags
    tag_pattern = tag_name.format(**str_fields)
    tags = repo.run_cmd(['tag', '-l', tag_pattern]).splitlines()
    log.debug("Found %d tags matching pattern '%s'", len(tags), tag_pattern)

    # Parse undefined fields from tag names
    str_fields = dict([(f, r'(?P<{}>[\w\-.()]+)'.format(f)) for f in field_names])
    str_fields['branch'] = r'(?P<branch>[\w\-.()/]+)'
    str_fields['commit'] = '(?P<commit>[0-9a-f]{7,40})'
    str_fields['commit_number'] = '(?P<commit_number>[0-9]{1,7})'
    str_fields['tag_number'] = '(?P<tag_number>[0-9]{1,5})'
    # escape parenthesis in fields in order to not messa up the regexp
    fixed_fields = dict([(k, v.replace('(', r'\(').replace(')', r'\)')) for k, v in kwargs.items()])
    str_fields.update(fixed_fields)
    tag_re = re.compile(tag_name.format(**str_fields))

    # Parse fields from tags
    revs = []
    for tag in tags:
        m = tag_re.match(tag)
        groups = m.groupdict()
        revs.append([groups[f] for f in undef_fields] + [tag])

    # Return field names and a sorted list of revs
    return undef_fields, sorted(revs)

def list_test_revs(repo, tag_name, **kwargs):
    """Get list of all tested revisions"""
    fields, revs = get_test_runs(repo, tag_name, **kwargs)
    ignore_fields = ['tag_number']
    print_fields = [i for i, f in enumerate(fields) if f not in ignore_fields]

    # Sort revs
    rows = [[fields[i].upper() for i in print_fields] + ['TEST RUNS']]
    prev = [''] * len(revs)
    for rev in revs:
        # Only use fields that we want to print
        rev = [rev[i] for i in print_fields]

        if rev != prev:
            new_row = [''] * len(print_fields) + [1]
            for i in print_fields:
                if rev[i] != prev[i]:
                    break
            new_row[i:-1] = rev[i:]
            rows.append(new_row)
        else:
            rows[-1][-1] += 1
        prev = rev

    print_table(rows)

def get_test_revs(repo, tag_name, **kwargs):
    """Get list of all tested revisions"""
    fields, runs = get_test_runs(repo, tag_name, **kwargs)

    revs = {}
    commit_i = fields.index('commit')
    commit_num_i = fields.index('commit_number')
    for run in runs:
        commit = run[commit_i]
        commit_num = run[commit_num_i]
        tag = run[-1]
        if not commit in revs:
            revs[commit] = TestedRev(commit, commit_num, [tag])
        else:
            assert commit_num == revs[commit].commit_number, "Commit numbers do not match"
            revs[commit].tags.append(tag)

    # Return in sorted table
    revs = sorted(revs.values(), key=attrgetter('commit_number'))
    log.debug("Found %d tested revisions:\n    %s", len(revs),
              "\n    ".join(['{} ({})'.format(rev.commit_number, rev.commit) for rev in revs]))
    return revs

def rev_find(revs, attr, val):
    """Search from a list of TestedRev"""
    for i, rev in enumerate(revs):
        if getattr(rev, attr) == val:
            return i
    raise ValueError("Unable to find '{}' value '{}'".format(attr, val))

def is_xml_format(repo, commit):
    """Check if the commit contains xml (or json) data"""
    if repo.rev_parse(commit + ':results.xml'):
        log.debug("Detected report in xml format in %s", commit)
        return True
    else:
        log.debug("No xml report in %s, assuming json formatted results", commit)
        return False

def read_results(repo, tags, xml=True):
    """Read result files from repo"""

    def parse_xml_stream(data):
        """Parse multiple concatenated XML objects"""
        objs = []
        xml_d = ""
        for line in data.splitlines():
            if xml_d and line.startswith('<?xml version='):
                objs.append(ET.fromstring(xml_d))
                xml_d = line
            else:
                xml_d += line
        objs.append(ET.fromstring(xml_d))
        return objs

    def parse_json_stream(data):
        """Parse multiple concatenated JSON objects"""
        objs = []
        json_d = ""
        for line in data.splitlines():
            if line == '}{':
                json_d += '}'
                objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
                json_d = '{'
            else:
                json_d += line
        objs.append(json.loads(json_d, object_pairs_hook=OrderedDict))
        return objs

    num_revs = len(tags)

    # Optimize by reading all data with one git command
    log.debug("Loading raw result data from %d tags, %s...", num_revs, tags[0])
    if xml:
        git_objs = [tag + ':metadata.xml' for tag in tags] + [tag + ':results.xml' for tag in tags]
        data = parse_xml_stream(repo.run_cmd(['show'] + git_objs + ['--']))
        return ([metadata_xml_to_json(e) for e in data[0:num_revs]],
                [results_xml_to_json(e) for e in data[num_revs:]])
    else:
        git_objs = [tag + ':metadata.json' for tag in tags] + [tag + ':results.json' for tag in tags]
        data = parse_json_stream(repo.run_cmd(['show'] + git_objs + ['--']))
        return data[0:num_revs], data[num_revs:]


def get_data_item(data, key):
    """Nested getitem lookup"""
    for k in key.split('.'):
        data = data[k]
    return data


def metadata_diff(metadata_l, metadata_r):
    """Prepare a metadata diff for printing"""
    keys = [('Hostname', 'hostname', 'hostname'),
            ('Branch', 'branch', 'layers.meta.branch'),
            ('Commit number', 'commit_num', 'layers.meta.commit_count'),
            ('Commit', 'commit', 'layers.meta.commit'),
            ('Number of test runs', 'testrun_count', 'testrun_count')
           ]

    def _metadata_diff(key):
        """Diff metadata from two test reports"""
        try:
            val1 = get_data_item(metadata_l, key)
        except KeyError:
            val1 = '(N/A)'
        try:
            val2 = get_data_item(metadata_r, key)
        except KeyError:
            val2 = '(N/A)'
        return val1, val2

    metadata = OrderedDict()
    for title, key, key_json in keys:
        value_l, value_r = _metadata_diff(key_json)
        metadata[key] = {'title': title,
                         'value_old': value_l,
                         'value': value_r}
    return metadata


def print_diff_report(metadata_l, data_l, metadata_r, data_r):
    """Print differences between two data sets"""

    # First, print general metadata
    print("\nTEST METADATA:\n==============")
    meta_diff = metadata_diff(metadata_l, metadata_r)
    rows = []
    row_fmt = ['{:{wid}} ', '{:<{wid}}   ', '{:<{wid}}']
    rows = [['', 'CURRENT COMMIT', 'COMPARING WITH']]
    for key, val in meta_diff.items():
        # Shorten commit hashes
        if key == 'commit':
            rows.append([val['title'] + ':', val['value'][:20], val['value_old'][:20]])
        else:
            rows.append([val['title'] + ':', val['value'], val['value_old']])
    print_table(rows, row_fmt)


    # Print test results
    print("\nTEST RESULTS:\n=============")

    tests = list(data_l['tests'].keys())
    # Append tests that are only present in 'right' set
    tests += [t for t in list(data_r['tests'].keys()) if t not in tests]

    # Prepare data to be printed
    rows = []
    row_fmt = ['{:8}', '{:{wid}}', '{:{wid}}', '  {:>{wid}}', ' {:{wid}} ', '{:{wid}}',
               '  {:>{wid}}', '  {:>{wid}}']
    num_cols = len(row_fmt)
    for test in tests:
        test_l = data_l['tests'][test] if test in data_l['tests'] else None
        test_r = data_r['tests'][test] if test in data_r['tests'] else None
        pref = ' '
        if test_l is None:
            pref = '+'
        elif test_r is None:
            pref = '-'
        descr = test_l['description'] if test_l else test_r['description']
        heading = "{} {}: {}".format(pref, test, descr)

        rows.append([heading])

        # Generate the list of measurements
        meas_l = test_l['measurements'] if test_l else {}
        meas_r = test_r['measurements'] if test_r else {}
        measurements = list(meas_l.keys())
        measurements += [m for m in list(meas_r.keys()) if m not in measurements]

        for meas in measurements:
            m_pref = ' '
            if meas in meas_l:
                stats_l = measurement_stats(meas_l[meas], 'l.')
            else:
                stats_l = measurement_stats(None, 'l.')
                m_pref = '+'
            if meas in meas_r:
                stats_r = measurement_stats(meas_r[meas], 'r.')
            else:
                stats_r = measurement_stats(None, 'r.')
                m_pref = '-'
            stats = stats_l.copy()
            stats.update(stats_r)

            absdiff = stats['val_cls'](stats['r.mean'] - stats['l.mean'])
            reldiff = "{:+.1f} %".format(absdiff * 100 / stats['l.mean'])
            if stats['r.mean'] > stats['l.mean']:
                absdiff = '+' + str(absdiff)
            else:
                absdiff = str(absdiff)
            rows.append(['', m_pref, stats['name'] + ' ' + stats['quantity'],
                         str(stats['l.mean']), '->', str(stats['r.mean']),
                         absdiff, reldiff])
        rows.append([''] * num_cols)

    print_table(rows, row_fmt)

    print()


def print_html_report(data, id_comp):
    """Print report in html format"""
    # Handle metadata
    metadata = {'branch': {'title': 'Branch', 'value': 'master'},
                'hostname': {'title': 'Hostname', 'value': 'foobar'},
                'commit': {'title': 'Commit', 'value': '1234'}
               }
    metadata = metadata_diff(data[id_comp][0], data[-1][0])


    # Generate list of tests
    tests = []
    for test in data[-1][1]['tests'].keys():
        test_r = data[-1][1]['tests'][test]
        new_test = {'name': test_r['name'],
                    'description': test_r['description'],
                    'status': test_r['status'],
                    'measurements': [],
                    'err_type': test_r.get('err_type'),
                   }
        # Limit length of err output shown
        if 'message' in test_r:
            lines = test_r['message'].splitlines()
            if len(lines) > 20:
                new_test['message'] = '...\n' + '\n'.join(lines[-20:])
            else:
                new_test['message'] = test_r['message']


        # Generate the list of measurements
        for meas in test_r['measurements'].keys():
            meas_r = test_r['measurements'][meas]
            meas_type = 'time' if meas_r['type'] == 'sysres' else 'size'
            new_meas = {'name': meas_r['name'],
                        'legend': meas_r['legend'],
                        'description': meas_r['name'] + ' ' + meas_type,
                       }
            samples = []

            # Run through all revisions in our data
            for meta, test_data in data:
                if (not test in test_data['tests'] or
                        not meas in test_data['tests'][test]['measurements']):
                    samples.append(measurement_stats(None))
                    continue
                test_i = test_data['tests'][test]
                meas_i = test_i['measurements'][meas]
                commit_num = get_data_item(meta, 'layers.meta.commit_count')
                samples.append(measurement_stats(meas_i))
                samples[-1]['commit_num'] = commit_num

            absdiff = samples[-1]['val_cls'](samples[-1]['mean'] - samples[id_comp]['mean'])
            new_meas['absdiff'] = absdiff
            new_meas['absdiff_str'] = str(absdiff) if absdiff < 0 else '+' + str(absdiff)
            new_meas['reldiff'] = "{:+.1f} %".format(absdiff * 100 / samples[id_comp]['mean'])
            new_meas['samples'] = samples
            new_meas['value'] = samples[-1]
            new_meas['value_type'] = samples[-1]['val_cls']

            new_test['measurements'].append(new_meas)
        tests.append(new_test)

    # Chart options
    chart_opts = {'haxis': {'min': get_data_item(data[0][0], 'layers.meta.commit_count'),
                            'max': get_data_item(data[-1][0], 'layers.meta.commit_count')}
                 }

    print(html.template.render(metadata=metadata, test_data=tests, chart_opts=chart_opts))


def auto_args(repo, args):
    """Guess arguments, if not defined by the user"""
    # Get the latest commit in the repo
    log.debug("Guessing arguments from the latest commit")
    msg = repo.run_cmd(['log', '-1', '--branches', '--remotes', '--format=%b'])
    for line in msg.splitlines():
        split = line.split(':', 1)
        if len(split) != 2:
            continue

        key = split[0]
        val = split[1].strip()
        if key == 'hostname':
            log.debug("Using hostname %s", val)
            args.hostname = val
        elif key == 'branch':
            log.debug("Using branch %s", val)
            args.branch = val


def parse_args(argv):
    """Parse command line arguments"""
    description = """
Examine build performance test results from a Git repository"""
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        description=description)

    parser.add_argument('--debug', '-d', action='store_true',
                        help="Verbose logging")
    parser.add_argument('--repo', '-r', required=True,
                        help="Results repository (local git clone)")
    parser.add_argument('--list', '-l', action='store_true',
                        help="List available test runs")
    parser.add_argument('--html', action='store_true',
                        help="Generate report in html format")
    group = parser.add_argument_group('Tag and revision')
    group.add_argument('--tag-name', '-t',
                       default='{hostname}/{branch}/{machine}/{commit_number}-g{commit}/{tag_number}',
                       help="Tag name (pattern) for finding results")
    group.add_argument('--hostname', '-H')
    group.add_argument('--branch', '-B', default='master')
    group.add_argument('--machine', default='qemux86')
    group.add_argument('--history-length', default=25, type=int,
                       help="Number of tested revisions to plot in html report")
    group.add_argument('--commit',
                       help="Revision to search for")
    group.add_argument('--commit-number',
                       help="Revision number to search for, redundant if "
                            "--commit is specified")
    group.add_argument('--commit2',
                       help="Revision to compare with")
    group.add_argument('--commit-number2',
                       help="Revision number to compare with, redundant if "
                            "--commit2 is specified")

    return parser.parse_args(argv)


def main(argv=None):
    """Script entry point"""
    args = parse_args(argv)
    if args.debug:
        log.setLevel(logging.DEBUG)

    repo = GitRepo(args.repo)

    if args.list:
        list_test_revs(repo, args.tag_name)
        return 0

    # Determine hostname which to use
    if not args.hostname:
        auto_args(repo, args)

    revs = get_test_revs(repo, args.tag_name, hostname=args.hostname,
                         branch=args.branch, machine=args.machine)
    if len(revs) < 2:
        log.error("%d tester revisions found, unable to generate report",
                  len(revs))
        return 1

    # Pick revisions
    if args.commit:
        if args.commit_number:
            log.warning("Ignoring --commit-number as --commit was specified")
        index1 = rev_find(revs, 'commit', args.commit)
    elif args.commit_number:
        index1 = rev_find(revs, 'commit_number', args.commit_number)
    else:
        index1 = len(revs) - 1

    if args.commit2:
        if args.commit_number2:
            log.warning("Ignoring --commit-number2 as --commit2 was specified")
        index2 = rev_find(revs, 'commit', args.commit2)
    elif args.commit_number2:
        index2 = rev_find(revs, 'commit_number', args.commit_number2)
    else:
        if index1 > 0:
            index2 = index1 - 1
        else:
            log.error("Unable to determine the other commit, use "
                      "--commit2 or --commit-number2 to specify it")
            return 1

    index_l = min(index1, index2)
    index_r = max(index1, index2)

    rev_l = revs[index_l]
    rev_r = revs[index_r]
    log.debug("Using 'left' revision %s (%s), %s test runs:\n    %s",
              rev_l.commit_number, rev_l.commit, len(rev_l.tags),
              '\n    '.join(rev_l.tags))
    log.debug("Using 'right' revision %s (%s), %s test runs:\n    %s",
              rev_r.commit_number, rev_r.commit, len(rev_r.tags),
              '\n    '.join(rev_r.tags))

    # Check report format used in the repo (assume all reports in the same fmt)
    xml = is_xml_format(repo, revs[index_r].tags[-1])

    if args.html:
        index_0 = max(0, index_r - args.history_length)
        rev_range = range(index_0, index_r + 1)
    else:
        # We do not need range of commits for text report (no graphs)
        index_0 = index_l
        rev_range = (index_l, index_r)

    # Read raw data
    log.debug("Reading %d revisions, starting from %s (%s)",
              len(rev_range), revs[index_0].commit_number, revs[index_0].commit)
    raw_data = [read_results(repo, revs[i].tags, xml) for i in rev_range]

    data = []
    for raw_m, raw_d in raw_data:
        data.append((aggregate_metadata(raw_m), aggregate_data(raw_d)))

    # Re-map list indexes to the new table starting from index 0
    index_r = index_r - index_0
    index_l = index_l - index_0

    # Print report
    if not args.html:
        print_diff_report(data[index_l][0], data[index_l][1],
                          data[index_r][0], data[index_r][1])
    else:
        print_html_report(data, index_l)

    return 0

if __name__ == "__main__":
    sys.exit(main())