278 lines
9.7 KiB
Python
278 lines
9.7 KiB
Python
from __future__ import with_statement
|
|
|
|
import os
|
|
import string
|
|
import re
|
|
import tarfile
|
|
from collections import defaultdict
|
|
|
|
from samples import *
|
|
from process_tree import ProcessTree
|
|
|
|
class ParseError(Exception):
|
|
"""Represents errors during parse of the bootchart."""
|
|
def __init__(self, value):
|
|
self.value = value
|
|
|
|
def __str__(self):
|
|
return self.value
|
|
|
|
def _parse_headers(file):
|
|
"""Parses the headers of the bootchart."""
|
|
def parse((headers,last), line):
|
|
if '=' in line: last,value = map(string.strip, line.split('=', 1))
|
|
else: value = line.strip()
|
|
headers[last] += value
|
|
return headers,last
|
|
return reduce(parse, file.read().split('\n'), (defaultdict(str),''))[0]
|
|
|
|
def _parse_timed_blocks(file):
|
|
"""Parses (ie., splits) a file into so-called timed-blocks. A
|
|
timed-block consists of a timestamp on a line by itself followed
|
|
by zero or more lines of data for that point in time."""
|
|
def parse(block):
|
|
lines = block.split('\n')
|
|
if not lines:
|
|
raise ParseError('expected a timed-block consisting a timestamp followed by data lines')
|
|
try:
|
|
return (int(lines[0]), lines[1:])
|
|
except ValueError:
|
|
raise ParseError("expected a timed-block, but timestamp '%s' is not an integer" % lines[0])
|
|
blocks = file.read().split('\n\n')
|
|
return [parse(block) for block in blocks if block.strip()]
|
|
|
|
def _parse_proc_ps_log(file):
|
|
"""
|
|
* See proc(5) for details.
|
|
*
|
|
* {pid, comm, state, ppid, pgrp, session, tty_nr, tpgid, flags, minflt, cminflt, majflt, cmajflt, utime, stime,
|
|
* cutime, cstime, priority, nice, 0, itrealvalue, starttime, vsize, rss, rlim, startcode, endcode, startstack,
|
|
* kstkesp, kstkeip}
|
|
"""
|
|
processMap = {}
|
|
ltime = 0
|
|
timed_blocks = _parse_timed_blocks(file)
|
|
for time, lines in timed_blocks:
|
|
for line in lines:
|
|
tokens = line.split(' ')
|
|
|
|
offset = [index for index, token in enumerate(tokens[1:]) if token.endswith(')')][0]
|
|
pid, cmd, state, ppid = int(tokens[0]), ' '.join(tokens[1:2+offset]), tokens[2+offset], int(tokens[3+offset])
|
|
userCpu, sysCpu, stime= int(tokens[13+offset]), int(tokens[14+offset]), int(tokens[21+offset])
|
|
|
|
if processMap.has_key(pid):
|
|
process = processMap[pid]
|
|
process.cmd = cmd.replace('(', '').replace(')', '') # why rename after latest name??
|
|
else:
|
|
process = Process(pid, cmd, ppid, min(time, stime))
|
|
processMap[pid] = process
|
|
|
|
if process.last_user_cpu_time is not None and process.last_sys_cpu_time is not None and ltime is not None:
|
|
userCpuLoad, sysCpuLoad = process.calc_load(userCpu, sysCpu, time - ltime)
|
|
cpuSample = CPUSample('null', userCpuLoad, sysCpuLoad, 0.0)
|
|
process.samples.append(ProcessSample(time, state, cpuSample))
|
|
|
|
process.last_user_cpu_time = userCpu
|
|
process.last_sys_cpu_time = sysCpu
|
|
ltime = time
|
|
|
|
startTime = timed_blocks[0][0]
|
|
avgSampleLength = (ltime - startTime)/(len(timed_blocks)-1)
|
|
|
|
for process in processMap.values():
|
|
process.set_parent(processMap)
|
|
|
|
for process in processMap.values():
|
|
process.calc_stats(avgSampleLength)
|
|
|
|
return ProcessStats(processMap.values(), avgSampleLength, startTime, ltime)
|
|
|
|
def _parse_proc_stat_log(file):
|
|
samples = []
|
|
ltimes = None
|
|
for time, lines in _parse_timed_blocks(file):
|
|
# CPU times {user, nice, system, idle, io_wait, irq, softirq}
|
|
tokens = lines[0].split();
|
|
times = [ int(token) for token in tokens[1:] ]
|
|
if ltimes:
|
|
user = float((times[0] + times[1]) - (ltimes[0] + ltimes[1]))
|
|
system = float((times[2] + times[5] + times[6]) - (ltimes[2] + ltimes[5] + ltimes[6]))
|
|
idle = float(times[3] - ltimes[3])
|
|
iowait = float(times[4] - ltimes[4])
|
|
|
|
aSum = max(user + system + idle + iowait, 1)
|
|
samples.append( CPUSample(time, user/aSum, system/aSum, iowait/aSum) )
|
|
|
|
ltimes = times
|
|
# skip the rest of statistics lines
|
|
return samples
|
|
|
|
|
|
def _parse_proc_disk_stat_log(file, numCpu):
|
|
"""
|
|
Parse file for disk stats, but only look at the whole disks, eg. sda,
|
|
not sda1, sda2 etc. The format of relevant lines should be:
|
|
{major minor name rio rmerge rsect ruse wio wmerge wsect wuse running use aveq}
|
|
"""
|
|
DISK_REGEX = 'hd.$|sd.$'
|
|
|
|
def is_relevant_line(line):
|
|
return len(line.split()) == 14 and re.match(DISK_REGEX, line.split()[2])
|
|
|
|
disk_stat_samples = []
|
|
|
|
for time, lines in _parse_timed_blocks(file):
|
|
sample = DiskStatSample(time)
|
|
relevant_tokens = [line.split() for line in lines if is_relevant_line(line)]
|
|
|
|
for tokens in relevant_tokens:
|
|
disk, rsect, wsect, use = tokens[2], int(tokens[5]), int(tokens[9]), int(tokens[12])
|
|
sample.add_diskdata([rsect, wsect, use])
|
|
|
|
disk_stat_samples.append(sample)
|
|
|
|
disk_stats = []
|
|
for sample1, sample2 in zip(disk_stat_samples[:-1], disk_stat_samples[1:]):
|
|
interval = sample1.time - sample2.time
|
|
sums = [ a - b for a, b in zip(sample1.diskdata, sample2.diskdata) ]
|
|
readTput = sums[0] / 2.0 * 100.0 / interval
|
|
writeTput = sums[1] / 2.0 * 100.0 / interval
|
|
util = float( sums[2] ) / 10 / interval / numCpu
|
|
util = max(0.0, min(1.0, util))
|
|
disk_stats.append(DiskSample(sample2.time, readTput, writeTput, util))
|
|
|
|
return disk_stats
|
|
|
|
|
|
def get_num_cpus(headers):
|
|
"""Get the number of CPUs from the system.cpu header property. As the
|
|
CPU utilization graphs are relative, the number of CPUs currently makes
|
|
no difference."""
|
|
if headers is None:
|
|
return 1
|
|
cpu_model = headers.get("system.cpu")
|
|
if cpu_model is None:
|
|
return 1
|
|
mat = re.match(".*\\((\\d+)\\)", cpu_model)
|
|
if mat is None:
|
|
return 1
|
|
return int(mat.group(1))
|
|
|
|
class ParserState:
|
|
def __init__(self):
|
|
self.processes = {}
|
|
self.start = {}
|
|
self.end = {}
|
|
|
|
def valid(self):
|
|
return len(self.processes) != 0
|
|
|
|
|
|
_relevant_files = set(["header", "proc_diskstats.log", "proc_ps.log", "proc_stat.log"])
|
|
|
|
def _do_parse(state, filename, file, mintime):
|
|
#print filename
|
|
#writer.status("parsing '%s'" % filename)
|
|
paths = filename.split("/")
|
|
task = paths[-1]
|
|
pn = paths[-2]
|
|
start = None
|
|
end = None
|
|
for line in file:
|
|
if line.startswith("Started:"):
|
|
start = int(float(line.split()[-1]))
|
|
elif line.startswith("Ended:"):
|
|
end = int(float(line.split()[-1]))
|
|
if start and end and (end - start) >= mintime:
|
|
k = pn + ":" + task
|
|
state.processes[pn + ":" + task] = [start, end]
|
|
if start not in state.start:
|
|
state.start[start] = []
|
|
if k not in state.start[start]:
|
|
state.start[start].append(pn + ":" + task)
|
|
if end not in state.end:
|
|
state.end[end] = []
|
|
if k not in state.end[end]:
|
|
state.end[end].append(pn + ":" + task)
|
|
return state
|
|
|
|
def parse_file(state, filename, mintime):
|
|
basename = os.path.basename(filename)
|
|
with open(filename, "rb") as file:
|
|
return _do_parse(state, filename, file, mintime)
|
|
|
|
def parse_paths(state, paths, mintime):
|
|
for path in paths:
|
|
root,extension = os.path.splitext(path)
|
|
if not(os.path.exists(path)):
|
|
print "warning: path '%s' does not exist, ignoring." % path
|
|
continue
|
|
if os.path.isdir(path):
|
|
files = [ f for f in [os.path.join(path, f) for f in os.listdir(path)] ]
|
|
files.sort()
|
|
state = parse_paths(state, files, mintime)
|
|
elif extension in [".tar", ".tgz", ".tar.gz"]:
|
|
tf = None
|
|
try:
|
|
tf = tarfile.open(path, 'r:*')
|
|
for name in tf.getnames():
|
|
state = _do_parse(state, name, tf.extractfile(name))
|
|
except tarfile.ReadError, error:
|
|
raise ParseError("error: could not read tarfile '%s': %s." % (path, error))
|
|
finally:
|
|
if tf != None:
|
|
tf.close()
|
|
else:
|
|
state = parse_file(state, path, mintime)
|
|
return state
|
|
|
|
def parse(paths, prune, mintime):
|
|
state = parse_paths(ParserState(), paths, mintime)
|
|
if not state.valid():
|
|
raise ParseError("empty state: '%s' does not contain a valid bootchart" % ", ".join(paths))
|
|
#monitored_app = state.headers.get("profile.process")
|
|
#proc_tree = ProcessTree(state.ps_stats, monitored_app, prune)
|
|
return state
|
|
|
|
def split_res(res, n):
|
|
""" Split the res into n pieces """
|
|
res_list = []
|
|
if n > 1:
|
|
s_list = sorted(res.start.keys())
|
|
frag_size = len(s_list) / float(n)
|
|
# Need the top value
|
|
if frag_size > int(frag_size):
|
|
frag_size = int(frag_size + 1)
|
|
else:
|
|
frag_size = int(frag_size)
|
|
|
|
start = 0
|
|
end = frag_size
|
|
while start < end:
|
|
state = ParserState()
|
|
for i in range(start, end):
|
|
# Add these lines for reference
|
|
#state.processes[pn + ":" + task] = [start, end]
|
|
#state.start[start] = pn + ":" + task
|
|
#state.end[end] = pn + ":" + task
|
|
for p in res.start[s_list[i]]:
|
|
s = s_list[i]
|
|
e = res.processes[p][1]
|
|
state.processes[p] = [s, e]
|
|
if s not in state.start:
|
|
state.start[s] = []
|
|
if p not in state.start[s]:
|
|
state.start[s].append(p)
|
|
if e not in state.end:
|
|
state.end[e] = []
|
|
if p not in state.end[e]:
|
|
state.end[e].append(p)
|
|
start = end
|
|
end = end + frag_size
|
|
if end > len(s_list):
|
|
end = len(s_list)
|
|
res_list.append(state)
|
|
else:
|
|
res_list.append(res)
|
|
return res_list
|