generic-poky/bitbake/lib/bb/siggen.py

678 lines
25 KiB
Python
Raw Normal View History

import hashlib
import logging
import os
import re
import tempfile
import pickle
import bb.data
import difflib
import simplediff
from bb.checksum import FileChecksumCache
logger = logging.getLogger('BitBake.SigGen')
def init(d):
siggens = [obj for obj in globals().values()
if type(obj) is type and issubclass(obj, SignatureGenerator)]
desired = d.getVar("BB_SIGNATURE_HANDLER") or "noop"
for sg in siggens:
if desired == sg.name:
return sg(d)
break
else:
logger.error("Invalid signature generator '%s', using default 'noop'\n"
"Available generators: %s", desired,
', '.join(obj.name for obj in siggens))
return SignatureGenerator(d)
class SignatureGenerator(object):
"""
"""
name = "noop"
def __init__(self, data):
self.basehash = {}
self.taskhash = {}
self.runtaskdeps = {}
self.file_checksum_values = {}
self.taints = {}
def finalise(self, fn, d, varient):
return
def get_taskhash(self, fn, task, deps, dataCache):
return "0"
def writeout_file_checksum_cache(self):
"""Write/update the file checksum cache onto disk"""
return
def stampfile(self, stampbase, file_name, taskname, extrainfo):
return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
def stampcleanmask(self, stampbase, file_name, taskname, extrainfo):
return ("%s.%s.%s" % (stampbase, taskname, extrainfo)).rstrip('.')
def dump_sigtask(self, fn, task, stampbase, runtime):
return
2012-06-18 15:45:35 +00:00
def invalidate_task(self, task, d, fn):
bb.build.del_stamp(task, d, fn)
def dump_sigs(self, dataCache, options):
return
2012-06-18 15:45:35 +00:00
def get_taskdata(self):
return (self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash)
def set_taskdata(self, data):
self.runtaskdeps, self.taskhash, self.file_checksum_values, self.taints, self.basehash = data
class SignatureGeneratorBasic(SignatureGenerator):
"""
"""
name = "basic"
def __init__(self, data):
self.basehash = {}
self.taskhash = {}
self.taskdeps = {}
self.runtaskdeps = {}
self.file_checksum_values = {}
self.taints = {}
self.gendeps = {}
self.lookupcache = {}
self.pkgnameextract = re.compile("(?P<fn>.*)\..*")
self.basewhitelist = set((data.getVar("BB_HASHBASE_WHITELIST") or "").split())
self.taskwhitelist = None
self.init_rundepcheck(data)
checksum_cache_file = data.getVar("BB_HASH_CHECKSUM_CACHE_FILE")
if checksum_cache_file:
self.checksum_cache = FileChecksumCache()
self.checksum_cache.init_cache(data, checksum_cache_file)
else:
self.checksum_cache = None
def init_rundepcheck(self, data):
self.taskwhitelist = data.getVar("BB_HASHTASK_WHITELIST") or None
if self.taskwhitelist:
self.twl = re.compile(self.taskwhitelist)
else:
self.twl = None
def _build_data(self, fn, d):
ignore_mismatch = ((d.getVar("BB_HASH_IGNORE_MISMATCH") or '') == '1')
tasklist, gendeps, lookupcache = bb.data.generate_dependencies(d)
taskdeps = {}
basehash = {}
for task in tasklist:
data = lookupcache[task]
if data is None:
bb.error("Task %s from %s seems to be empty?!" % (task, fn))
data = ''
bitbake: Remove whitelisted vars from non-task deps Though the value of variables in the BB_BASEHASH_WHITELIST is kept out of the checksums, dependency on them is not, at least for variables and non-task functions. In the code, the whitelist is removed from the overall task dep list, but not the individual variable deps. The result of this is that functions like sysroot_stage_all and oe_runmake end up with whitelisted variables like TERM listed in their dependencies, which means that doing a 'unset TERM' before building will result in all checksums for tasks that depend on those changing, and shared state reuse not behaving correctly. This is only really a potential issue for variables from the environment, as it's the existance/removal of the variable that's an issue, not its value, and the other whitelisted variables are set in our metadata. This which means in practical terms the only cases where this is likely to be an issue are in environments where one of the following are unset: TERM, LOGNAME, HOME, USER, PWD, SHELL. This may seem like an unlikely circumstance, but is in fact a real issue for those of us using autobuilders. Jenkins does not set TERM when executing shell, which means shared state archives produced by your jenkins server would not be fully reused by an actual user. Fixed by removing the whitelisted elements from the individual variable deps, not just the accumulated result. (Bitbake rev: dac12560ac8431ee24609f8de25cb1645572d350) Signed-off-by: Christopher Larson <chris_larson@mentor.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2012-04-03 23:23:49 +00:00
gendeps[task] -= self.basewhitelist
newdeps = gendeps[task]
seen = set()
while newdeps:
nextdeps = newdeps
seen |= nextdeps
newdeps = set()
for dep in nextdeps:
if dep in self.basewhitelist:
continue
bitbake: Remove whitelisted vars from non-task deps Though the value of variables in the BB_BASEHASH_WHITELIST is kept out of the checksums, dependency on them is not, at least for variables and non-task functions. In the code, the whitelist is removed from the overall task dep list, but not the individual variable deps. The result of this is that functions like sysroot_stage_all and oe_runmake end up with whitelisted variables like TERM listed in their dependencies, which means that doing a 'unset TERM' before building will result in all checksums for tasks that depend on those changing, and shared state reuse not behaving correctly. This is only really a potential issue for variables from the environment, as it's the existance/removal of the variable that's an issue, not its value, and the other whitelisted variables are set in our metadata. This which means in practical terms the only cases where this is likely to be an issue are in environments where one of the following are unset: TERM, LOGNAME, HOME, USER, PWD, SHELL. This may seem like an unlikely circumstance, but is in fact a real issue for those of us using autobuilders. Jenkins does not set TERM when executing shell, which means shared state archives produced by your jenkins server would not be fully reused by an actual user. Fixed by removing the whitelisted elements from the individual variable deps, not just the accumulated result. (Bitbake rev: dac12560ac8431ee24609f8de25cb1645572d350) Signed-off-by: Christopher Larson <chris_larson@mentor.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2012-04-03 23:23:49 +00:00
gendeps[dep] -= self.basewhitelist
newdeps |= gendeps[dep]
newdeps -= seen
bitbake: Remove whitelisted vars from non-task deps Though the value of variables in the BB_BASEHASH_WHITELIST is kept out of the checksums, dependency on them is not, at least for variables and non-task functions. In the code, the whitelist is removed from the overall task dep list, but not the individual variable deps. The result of this is that functions like sysroot_stage_all and oe_runmake end up with whitelisted variables like TERM listed in their dependencies, which means that doing a 'unset TERM' before building will result in all checksums for tasks that depend on those changing, and shared state reuse not behaving correctly. This is only really a potential issue for variables from the environment, as it's the existance/removal of the variable that's an issue, not its value, and the other whitelisted variables are set in our metadata. This which means in practical terms the only cases where this is likely to be an issue are in environments where one of the following are unset: TERM, LOGNAME, HOME, USER, PWD, SHELL. This may seem like an unlikely circumstance, but is in fact a real issue for those of us using autobuilders. Jenkins does not set TERM when executing shell, which means shared state archives produced by your jenkins server would not be fully reused by an actual user. Fixed by removing the whitelisted elements from the individual variable deps, not just the accumulated result. (Bitbake rev: dac12560ac8431ee24609f8de25cb1645572d350) Signed-off-by: Christopher Larson <chris_larson@mentor.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2012-04-03 23:23:49 +00:00
alldeps = sorted(seen)
for dep in alldeps:
data = data + dep
var = lookupcache[dep]
if var is not None:
data = data + str(var)
datahash = hashlib.md5(data.encode("utf-8")).hexdigest()
k = fn + "." + task
if not ignore_mismatch and k in self.basehash and self.basehash[k] != datahash:
bb.error("When reparsing %s, the basehash value changed from %s to %s. The metadata is not deterministic and this needs to be fixed." % (k, self.basehash[k], datahash))
self.basehash[k] = datahash
bitbake: Remove whitelisted vars from non-task deps Though the value of variables in the BB_BASEHASH_WHITELIST is kept out of the checksums, dependency on them is not, at least for variables and non-task functions. In the code, the whitelist is removed from the overall task dep list, but not the individual variable deps. The result of this is that functions like sysroot_stage_all and oe_runmake end up with whitelisted variables like TERM listed in their dependencies, which means that doing a 'unset TERM' before building will result in all checksums for tasks that depend on those changing, and shared state reuse not behaving correctly. This is only really a potential issue for variables from the environment, as it's the existance/removal of the variable that's an issue, not its value, and the other whitelisted variables are set in our metadata. This which means in practical terms the only cases where this is likely to be an issue are in environments where one of the following are unset: TERM, LOGNAME, HOME, USER, PWD, SHELL. This may seem like an unlikely circumstance, but is in fact a real issue for those of us using autobuilders. Jenkins does not set TERM when executing shell, which means shared state archives produced by your jenkins server would not be fully reused by an actual user. Fixed by removing the whitelisted elements from the individual variable deps, not just the accumulated result. (Bitbake rev: dac12560ac8431ee24609f8de25cb1645572d350) Signed-off-by: Christopher Larson <chris_larson@mentor.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2012-04-03 23:23:49 +00:00
taskdeps[task] = alldeps
self.taskdeps[fn] = taskdeps
self.gendeps[fn] = gendeps
self.lookupcache[fn] = lookupcache
return taskdeps
def finalise(self, fn, d, variant):
bitbake: bitbake: Initial multi-config support This patch adds the notion of supporting multiple configurations within a single build. To enable it, set a line in local.conf like: BBMULTICONFIG = "configA configB configC" This would tell bitbake that before it parses the base configuration, it should load conf/configA.conf and so on for each different configuration. These would contain lines like: MACHINE = "A" or other variables which can be set which can be built in the same build directory (or change TMPDIR not to conflict). One downside I've already discovered is that if we want to inherit this file right at the start of parsing, the only place you can put the configurations is in "cwd", since BBPATH isn't constructed until the layers are parsed and therefore using it as a preconf file isn't possible unless its located there. Execution of these targets takes the form "bitbake multiconfig:configA:core-image-minimal core-image-sato" so similar to our virtclass approach for native/nativesdk/multilib using BBCLASSEXTEND. Implementation wise, the implication is that instead of tasks being uniquely referenced with "recipename/fn:task" it now needs to be "configuration:recipename:task". We already started using "virtual" filenames for recipes when we implemented BBCLASSEXTEND and this patch adds a new prefix to these, "multiconfig:<configname>:" and hence avoid changes to a large part of the codebase thanks to this. databuilder has an internal array of data stores and uses the right one depending on the supplied virtual filename. That trick allows us to use the existing parsing code including the multithreading mostly unchanged as well as most of the cache code. For recipecache, we end up with a dict of these accessed by multiconfig (mc). taskdata and runqueue can only cope with one recipecache so for taskdata, we pass in each recipecache and have it compute the result and end up with an array of taskdatas. We can only have one runqueue so there extensive changes there. This initial implementation has some drawbacks: a) There are no inter-multi-configuration dependencies as yet b) There are no sstate optimisations. This means if the build uses the same object twice in say two different TMPDIRs, it will either load from an existing sstate cache at the start or build it twice. We can then in due course look at ways in which it would only build it once and then reuse it. This will likely need significant changes to the way sstate currently works to make that possible. (Bitbake rev: 5287991691578825c847bac2368e9b51c0ede3f0) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2016-08-16 16:47:06 +00:00
mc = d.getVar("__BBMULTICONFIG", False) or ""
if variant or mc:
fn = bb.cache.realfn2virtual(fn, variant, mc)
try:
taskdeps = self._build_data(fn, d)
except:
bb.warn("Error during finalise of %s" % fn)
raise
#Slow but can be useful for debugging mismatched basehashes
#for task in self.taskdeps[fn]:
# self.dump_sigtask(fn, task, d.getVar("STAMP"), False)
for task in taskdeps:
d.setVar("BB_BASEHASH_task-%s" % task, self.basehash[fn + "." + task])
def rundep_check(self, fn, recipename, task, dep, depname, dataCache):
# Return True if we should keep the dependency, False to drop it
# We only manipulate the dependencies for packages not in the whitelist
if self.twl and not self.twl.search(recipename):
# then process the actual dependencies
if self.twl.search(depname):
return False
return True
2012-06-18 15:45:35 +00:00
def read_taint(self, fn, task, stampbase):
taint = None
try:
with open(stampbase + '.' + task + '.taint', 'r') as taintf:
taint = taintf.read()
except IOError:
pass
return taint
def get_taskhash(self, fn, task, deps, dataCache):
k = fn + "." + task
data = dataCache.basetaskhash[k]
self.basehash[k] = data
self.runtaskdeps[k] = []
self.file_checksum_values[k] = []
recipename = dataCache.pkg_fn[fn]
for dep in sorted(deps, key=clean_basepath):
depname = dataCache.pkg_fn[self.pkgnameextract.search(dep).group('fn')]
if not self.rundep_check(fn, recipename, task, dep, depname, dataCache):
continue
if dep not in self.taskhash:
bb.fatal("%s is not in taskhash, caller isn't calling in dependency order?", dep)
data = data + self.taskhash[dep]
self.runtaskdeps[k].append(dep)
if task in dataCache.file_checksums[fn]:
if self.checksum_cache:
checksums = self.checksum_cache.get_checksums(dataCache.file_checksums[fn][task], recipename)
else:
checksums = bb.fetch2.get_file_checksums(dataCache.file_checksums[fn][task], recipename)
for (f,cs) in checksums:
self.file_checksum_values[k].append((f,cs))
if cs:
data = data + cs
2012-06-18 15:45:35 +00:00
taskdep = dataCache.task_deps[fn]
if 'nostamp' in taskdep and task in taskdep['nostamp']:
# Nostamp tasks need an implicit taint so that they force any dependent tasks to run
import uuid
taint = str(uuid.uuid4())
data = data + taint
self.taints[k] = "nostamp:" + taint
2012-06-18 15:45:35 +00:00
taint = self.read_taint(fn, task, dataCache.stamp[fn])
if taint:
data = data + taint
self.taints[k] = taint
logger.warning("%s is tainted from a forced run" % k)
2012-06-18 15:45:35 +00:00
h = hashlib.md5(data.encode("utf-8")).hexdigest()
self.taskhash[k] = h
#d.setVar("BB_TASKHASH_task-%s" % task, taskhash[task])
return h
def writeout_file_checksum_cache(self):
"""Write/update the file checksum cache onto disk"""
if self.checksum_cache:
self.checksum_cache.save_extras()
self.checksum_cache.save_merge()
else:
bb.fetch2.fetcher_parse_save()
bb.fetch2.fetcher_parse_done()
def dump_sigtask(self, fn, task, stampbase, runtime):
k = fn + "." + task
referencestamp = stampbase
if isinstance(runtime, str) and runtime.startswith("customfile"):
sigfile = stampbase
referencestamp = runtime[11:]
elif runtime and k in self.taskhash:
sigfile = stampbase + "." + task + ".sigdata" + "." + self.taskhash[k]
else:
sigfile = stampbase + "." + task + ".sigbasedata" + "." + self.basehash[k]
bb.utils.mkdirhier(os.path.dirname(sigfile))
data = {}
data['task'] = task
data['basewhitelist'] = self.basewhitelist
data['taskwhitelist'] = self.taskwhitelist
data['taskdeps'] = self.taskdeps[fn][task]
data['basehash'] = self.basehash[k]
data['gendeps'] = {}
data['varvals'] = {}
data['varvals'][task] = self.lookupcache[fn][task]
for dep in self.taskdeps[fn][task]:
if dep in self.basewhitelist:
continue
data['gendeps'][dep] = self.gendeps[fn][dep]
data['varvals'][dep] = self.lookupcache[fn][dep]
if runtime and k in self.taskhash:
data['runtaskdeps'] = self.runtaskdeps[k]
data['file_checksum_values'] = [(os.path.basename(f), cs) for f,cs in self.file_checksum_values[k]]
data['runtaskhashes'] = {}
for dep in data['runtaskdeps']:
data['runtaskhashes'][dep] = self.taskhash[dep]
data['taskhash'] = self.taskhash[k]
taint = self.read_taint(fn, task, referencestamp)
2012-06-18 15:45:35 +00:00
if taint:
data['taint'] = taint
if runtime and k in self.taints:
if 'nostamp:' in self.taints[k]:
data['taint'] = self.taints[k]
computed_basehash = calc_basehash(data)
if computed_basehash != self.basehash[k]:
bb.error("Basehash mismatch %s versus %s for %s" % (computed_basehash, self.basehash[k], k))
if runtime and k in self.taskhash:
computed_taskhash = calc_taskhash(data)
if computed_taskhash != self.taskhash[k]:
bb.error("Taskhash mismatch %s versus %s for %s" % (computed_taskhash, self.taskhash[k], k))
sigfile = sigfile.replace(self.taskhash[k], computed_taskhash)
fd, tmpfile = tempfile.mkstemp(dir=os.path.dirname(sigfile), prefix="sigtask.")
try:
with os.fdopen(fd, "wb") as stream:
p = pickle.dump(data, stream, -1)
stream.flush()
os.chmod(tmpfile, 0o664)
os.rename(tmpfile, sigfile)
except (OSError, IOError) as err:
try:
os.unlink(tmpfile)
except OSError:
pass
raise err
def dump_sigfn(self, fn, dataCaches, options):
if fn in self.taskdeps:
for task in self.taskdeps[fn]:
bitbake: bitbake: Initial multi-config support This patch adds the notion of supporting multiple configurations within a single build. To enable it, set a line in local.conf like: BBMULTICONFIG = "configA configB configC" This would tell bitbake that before it parses the base configuration, it should load conf/configA.conf and so on for each different configuration. These would contain lines like: MACHINE = "A" or other variables which can be set which can be built in the same build directory (or change TMPDIR not to conflict). One downside I've already discovered is that if we want to inherit this file right at the start of parsing, the only place you can put the configurations is in "cwd", since BBPATH isn't constructed until the layers are parsed and therefore using it as a preconf file isn't possible unless its located there. Execution of these targets takes the form "bitbake multiconfig:configA:core-image-minimal core-image-sato" so similar to our virtclass approach for native/nativesdk/multilib using BBCLASSEXTEND. Implementation wise, the implication is that instead of tasks being uniquely referenced with "recipename/fn:task" it now needs to be "configuration:recipename:task". We already started using "virtual" filenames for recipes when we implemented BBCLASSEXTEND and this patch adds a new prefix to these, "multiconfig:<configname>:" and hence avoid changes to a large part of the codebase thanks to this. databuilder has an internal array of data stores and uses the right one depending on the supplied virtual filename. That trick allows us to use the existing parsing code including the multithreading mostly unchanged as well as most of the cache code. For recipecache, we end up with a dict of these accessed by multiconfig (mc). taskdata and runqueue can only cope with one recipecache so for taskdata, we pass in each recipecache and have it compute the result and end up with an array of taskdatas. We can only have one runqueue so there extensive changes there. This initial implementation has some drawbacks: a) There are no inter-multi-configuration dependencies as yet b) There are no sstate optimisations. This means if the build uses the same object twice in say two different TMPDIRs, it will either load from an existing sstate cache at the start or build it twice. We can then in due course look at ways in which it would only build it once and then reuse it. This will likely need significant changes to the way sstate currently works to make that possible. (Bitbake rev: 5287991691578825c847bac2368e9b51c0ede3f0) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2016-08-16 16:47:06 +00:00
tid = fn + ":" + task
(mc, _, _) = bb.runqueue.split_tid(tid)
k = fn + "." + task
if k not in self.taskhash:
continue
bitbake: bitbake: Initial multi-config support This patch adds the notion of supporting multiple configurations within a single build. To enable it, set a line in local.conf like: BBMULTICONFIG = "configA configB configC" This would tell bitbake that before it parses the base configuration, it should load conf/configA.conf and so on for each different configuration. These would contain lines like: MACHINE = "A" or other variables which can be set which can be built in the same build directory (or change TMPDIR not to conflict). One downside I've already discovered is that if we want to inherit this file right at the start of parsing, the only place you can put the configurations is in "cwd", since BBPATH isn't constructed until the layers are parsed and therefore using it as a preconf file isn't possible unless its located there. Execution of these targets takes the form "bitbake multiconfig:configA:core-image-minimal core-image-sato" so similar to our virtclass approach for native/nativesdk/multilib using BBCLASSEXTEND. Implementation wise, the implication is that instead of tasks being uniquely referenced with "recipename/fn:task" it now needs to be "configuration:recipename:task". We already started using "virtual" filenames for recipes when we implemented BBCLASSEXTEND and this patch adds a new prefix to these, "multiconfig:<configname>:" and hence avoid changes to a large part of the codebase thanks to this. databuilder has an internal array of data stores and uses the right one depending on the supplied virtual filename. That trick allows us to use the existing parsing code including the multithreading mostly unchanged as well as most of the cache code. For recipecache, we end up with a dict of these accessed by multiconfig (mc). taskdata and runqueue can only cope with one recipecache so for taskdata, we pass in each recipecache and have it compute the result and end up with an array of taskdatas. We can only have one runqueue so there extensive changes there. This initial implementation has some drawbacks: a) There are no inter-multi-configuration dependencies as yet b) There are no sstate optimisations. This means if the build uses the same object twice in say two different TMPDIRs, it will either load from an existing sstate cache at the start or build it twice. We can then in due course look at ways in which it would only build it once and then reuse it. This will likely need significant changes to the way sstate currently works to make that possible. (Bitbake rev: 5287991691578825c847bac2368e9b51c0ede3f0) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2016-08-16 16:47:06 +00:00
if dataCaches[mc].basetaskhash[k] != self.basehash[k]:
bb.error("Bitbake's cached basehash does not match the one we just generated (%s)!" % k)
bitbake: bitbake: Initial multi-config support This patch adds the notion of supporting multiple configurations within a single build. To enable it, set a line in local.conf like: BBMULTICONFIG = "configA configB configC" This would tell bitbake that before it parses the base configuration, it should load conf/configA.conf and so on for each different configuration. These would contain lines like: MACHINE = "A" or other variables which can be set which can be built in the same build directory (or change TMPDIR not to conflict). One downside I've already discovered is that if we want to inherit this file right at the start of parsing, the only place you can put the configurations is in "cwd", since BBPATH isn't constructed until the layers are parsed and therefore using it as a preconf file isn't possible unless its located there. Execution of these targets takes the form "bitbake multiconfig:configA:core-image-minimal core-image-sato" so similar to our virtclass approach for native/nativesdk/multilib using BBCLASSEXTEND. Implementation wise, the implication is that instead of tasks being uniquely referenced with "recipename/fn:task" it now needs to be "configuration:recipename:task". We already started using "virtual" filenames for recipes when we implemented BBCLASSEXTEND and this patch adds a new prefix to these, "multiconfig:<configname>:" and hence avoid changes to a large part of the codebase thanks to this. databuilder has an internal array of data stores and uses the right one depending on the supplied virtual filename. That trick allows us to use the existing parsing code including the multithreading mostly unchanged as well as most of the cache code. For recipecache, we end up with a dict of these accessed by multiconfig (mc). taskdata and runqueue can only cope with one recipecache so for taskdata, we pass in each recipecache and have it compute the result and end up with an array of taskdatas. We can only have one runqueue so there extensive changes there. This initial implementation has some drawbacks: a) There are no inter-multi-configuration dependencies as yet b) There are no sstate optimisations. This means if the build uses the same object twice in say two different TMPDIRs, it will either load from an existing sstate cache at the start or build it twice. We can then in due course look at ways in which it would only build it once and then reuse it. This will likely need significant changes to the way sstate currently works to make that possible. (Bitbake rev: 5287991691578825c847bac2368e9b51c0ede3f0) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2016-08-16 16:47:06 +00:00
bb.error("The mismatched hashes were %s and %s" % (dataCaches[mc].basetaskhash[k], self.basehash[k]))
self.dump_sigtask(fn, task, dataCaches[mc].stamp[fn], True)
class SignatureGeneratorBasicHash(SignatureGeneratorBasic):
name = "basichash"
def stampfile(self, stampbase, fn, taskname, extrainfo, clean=False):
if taskname != "do_setscene" and taskname.endswith("_setscene"):
k = fn + "." + taskname[:-9]
else:
k = fn + "." + taskname
if clean:
h = "*"
elif k in self.taskhash:
h = self.taskhash[k]
else:
# If k is not in basehash, then error
h = self.basehash[k]
return ("%s.%s.%s.%s" % (stampbase, taskname, h, extrainfo)).rstrip('.')
def stampcleanmask(self, stampbase, fn, taskname, extrainfo):
return self.stampfile(stampbase, fn, taskname, extrainfo, clean=True)
2012-06-18 15:45:35 +00:00
def invalidate_task(self, task, d, fn):
bb.note("Tainting hash to force rebuild of task %s, %s" % (fn, task))
2012-06-18 15:45:35 +00:00
bb.build.write_taint(task, d, fn)
def dump_this_task(outfile, d):
import bb.parse
fn = d.getVar("BB_FILENAME")
task = "do_" + d.getVar("BB_CURRENTTASK")
referencestamp = bb.build.stamp_internal(task, d, None, True)
bb.parse.siggen.dump_sigtask(fn, task, outfile, "customfile:" + referencestamp)
def worddiff_str(oldstr, newstr):
diff = simplediff.diff(oldstr.split(' '), newstr.split(' '))
ret = []
for change, value in diff:
value = ' '.join(value)
if change == '=':
ret.append(value)
elif change == '+':
item = '{+%s+}' % value
ret.append(item)
elif change == '-':
item = '[-%s-]' % value
ret.append(item)
whitespace_note = ''
if oldstr != newstr and ' '.join(oldstr.split()) == ' '.join(newstr.split()):
whitespace_note = ' (whitespace changed)'
return '"%s"%s' % (' '.join(ret), whitespace_note)
def list_inline_diff(oldlist, newlist):
diff = simplediff.diff(oldlist, newlist)
ret = []
for change, value in diff:
value = ' '.join(value)
if change == '=':
ret.append("'%s'" % value)
elif change == '+':
item = "+'%s'" % value
ret.append(item)
elif change == '-':
item = "-'%s'" % value
ret.append(item)
return '[%s]' % (', '.join(ret))
def clean_basepath(a):
mc = None
if a.startswith("multiconfig:"):
_, mc, a = a.split(":", 2)
b = a.rsplit("/", 2)[1] + '/' + a.rsplit("/", 2)[2]
if a.startswith("virtual:"):
b = b + ":" + a.rsplit(":", 1)[0]
if mc:
b = b + ":multiconfig:" + mc
return b
def clean_basepaths(a):
b = {}
for x in a:
b[clean_basepath(x)] = a[x]
return b
def clean_basepaths_list(a):
b = []
for x in a:
b.append(clean_basepath(x))
return b
def compare_sigfiles(a, b, recursecb=None, collapsed=False):
output = []
with open(a, 'rb') as f:
p1 = pickle.Unpickler(f)
a_data = p1.load()
with open(b, 'rb') as f:
p2 = pickle.Unpickler(f)
b_data = p2.load()
def dict_diff(a, b, whitelist=set()):
sa = set(a.keys())
sb = set(b.keys())
common = sa & sb
changed = set()
for i in common:
if a[i] != b[i] and i not in whitelist:
changed.add(i)
added = sb - sa
removed = sa - sb
return changed, added, removed
def file_checksums_diff(a, b):
from collections import Counter
# Handle old siginfo format
if isinstance(a, dict):
a = [(os.path.basename(f), cs) for f, cs in a.items()]
if isinstance(b, dict):
b = [(os.path.basename(f), cs) for f, cs in b.items()]
# Compare lists, ensuring we can handle duplicate filenames if they exist
removedcount = Counter(a)
removedcount.subtract(b)
addedcount = Counter(b)
addedcount.subtract(a)
added = []
for x in b:
if addedcount[x] > 0:
addedcount[x] -= 1
added.append(x)
removed = []
changed = []
for x in a:
if removedcount[x] > 0:
removedcount[x] -= 1
for y in added:
if y[0] == x[0]:
changed.append((x[0], x[1], y[1]))
added.remove(y)
break
else:
removed.append(x)
added = [x[0] for x in added]
removed = [x[0] for x in removed]
return changed, added, removed
if 'basewhitelist' in a_data and a_data['basewhitelist'] != b_data['basewhitelist']:
output.append("basewhitelist changed from '%s' to '%s'" % (a_data['basewhitelist'], b_data['basewhitelist']))
if a_data['basewhitelist'] and b_data['basewhitelist']:
output.append("changed items: %s" % a_data['basewhitelist'].symmetric_difference(b_data['basewhitelist']))
if 'taskwhitelist' in a_data and a_data['taskwhitelist'] != b_data['taskwhitelist']:
output.append("taskwhitelist changed from '%s' to '%s'" % (a_data['taskwhitelist'], b_data['taskwhitelist']))
if a_data['taskwhitelist'] and b_data['taskwhitelist']:
output.append("changed items: %s" % a_data['taskwhitelist'].symmetric_difference(b_data['taskwhitelist']))
if a_data['taskdeps'] != b_data['taskdeps']:
output.append("Task dependencies changed from:\n%s\nto:\n%s" % (sorted(a_data['taskdeps']), sorted(b_data['taskdeps'])))
if a_data['basehash'] != b_data['basehash'] and not collapsed:
output.append("basehash changed from %s to %s" % (a_data['basehash'], b_data['basehash']))
changed, added, removed = dict_diff(a_data['gendeps'], b_data['gendeps'], a_data['basewhitelist'] & b_data['basewhitelist'])
if changed:
for dep in changed:
output.append("List of dependencies for variable %s changed from '%s' to '%s'" % (dep, a_data['gendeps'][dep], b_data['gendeps'][dep]))
if a_data['gendeps'][dep] and b_data['gendeps'][dep]:
output.append("changed items: %s" % a_data['gendeps'][dep].symmetric_difference(b_data['gendeps'][dep]))
if added:
for dep in added:
output.append("Dependency on variable %s was added" % (dep))
if removed:
for dep in removed:
output.append("Dependency on Variable %s was removed" % (dep))
changed, added, removed = dict_diff(a_data['varvals'], b_data['varvals'])
if changed:
for dep in changed:
oldval = a_data['varvals'][dep]
newval = b_data['varvals'][dep]
if newval and oldval and ('\n' in oldval or '\n' in newval):
diff = difflib.unified_diff(oldval.splitlines(), newval.splitlines(), lineterm='')
# Cut off the first two lines, since we aren't interested in
# the old/new filename (they are blank anyway in this case)
difflines = list(diff)[2:]
output.append("Variable %s value changed:\n%s" % (dep, '\n'.join(difflines)))
elif newval and oldval and (' ' in oldval or ' ' in newval):
output.append("Variable %s value changed:\n%s" % (dep, worddiff_str(oldval, newval)))
else:
output.append("Variable %s value changed from '%s' to '%s'" % (dep, oldval, newval))
if not 'file_checksum_values' in a_data:
a_data['file_checksum_values'] = {}
if not 'file_checksum_values' in b_data:
b_data['file_checksum_values'] = {}
changed, added, removed = file_checksums_diff(a_data['file_checksum_values'], b_data['file_checksum_values'])
if changed:
for f, old, new in changed:
output.append("Checksum for file %s changed from %s to %s" % (f, old, new))
if added:
for f in added:
output.append("Dependency on checksum of file %s was added" % (f))
if removed:
for f in removed:
output.append("Dependency on checksum of file %s was removed" % (f))
if not 'runtaskdeps' in a_data:
a_data['runtaskdeps'] = {}
if not 'runtaskdeps' in b_data:
b_data['runtaskdeps'] = {}
if not collapsed:
if len(a_data['runtaskdeps']) != len(b_data['runtaskdeps']):
changed = ["Number of task dependencies changed"]
else:
changed = []
for idx, task in enumerate(a_data['runtaskdeps']):
a = a_data['runtaskdeps'][idx]
b = b_data['runtaskdeps'][idx]
if a_data['runtaskhashes'][a] != b_data['runtaskhashes'][b]:
changed.append("%s with hash %s\n changed to\n%s with hash %s" % (a, a_data['runtaskhashes'][a], b, b_data['runtaskhashes'][b]))
if changed:
clean_a = clean_basepaths_list(a_data['runtaskdeps'])
clean_b = clean_basepaths_list(b_data['runtaskdeps'])
if clean_a != clean_b:
output.append("runtaskdeps changed:\n%s" % list_inline_diff(clean_a, clean_b))
else:
output.append("runtaskdeps changed:")
output.append("\n".join(changed))
if 'runtaskhashes' in a_data and 'runtaskhashes' in b_data:
a = a_data['runtaskhashes']
b = b_data['runtaskhashes']
siggen: compare runtaskdeps dictionary even when they have the same size * otherwise it reports lots of changed checksums just because it compares different tasks notice linux-libc-headers_3.1.bb.do_package_write: Task dependency hash changed from 42acced29debf54d598802474c5e9cbb to f438a54f995df77620d0727d6f4b4ce5 (for linux-libc-headers_3.1.bb.do_package_write and linux-gta04_git.bb.do_deploy) Task dependency hash changed from 61f8babe1d10c6e7fb1423112bb04e1e to 1b3e21ff106ecfcb7ddf76a1e29537bb (for linux-nokia900-meego_git.bb.do_deploy and linux-gta04_git.bb.do_package_write) Task dependency hash changed from 512f9d6686d760b318d8b11c8b589226 to 42acced29debf54d598802474c5e9cbb (for linux-nokia900-meego_git.bb.do_package_write and linux-libc-headers_3.1.bb.do_package_write) Task dependency hash changed from 153e91dfd1d2053fda7b98cc08d4b802 to 92a293bdd8ed234932b87a66025038c5 (for systemd-serialgetty.bb.do_package_write and systemd-serialgetty.bb.do_package_write) Task dependency hash changed from 730abebf9954794bb440c2f3239f79fe to 413eaebaff27a2fd16f5cf68c1f4ff17 (for systemd_git.bb.do_package_write and systemd_git.bb.do_package_write) with this patch: Dependency on task linux-nokia900-meego_git.bb.do_package_write was added Dependency on task linux-nokia900-meego_git.bb.do_deploy was added Dependency on task linux-gta04_git.bb.do_deploy was removed Dependency on task linux-gta04_git.bb.do_package_write was removed Hash for dependent task systemd_git.bb.do_package_write changed from 730abebf9954794bb440c2f3239f79fe to 413eaebaff27a2fd16f5cf68c1f4ff17 Hash for dependent task systemd-serialgetty.bb.do_package_write changed from 153e91dfd1d2053fda7b98cc08d4b802 to 92a293bdd8ed234932b87a66025038c5 * added test if there is different task with same hash then we don't show it as added/removed dependency, because bitbake doesn't care (Bitbake rev: ca52bf32b479811bd7fed41648bedcc06b00430b) Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2012-02-22 15:01:59 +00:00
changed, added, removed = dict_diff(a, b)
if added:
for dep in added:
bdep_found = False
if removed:
for bdep in removed:
if b[dep] == a[bdep]:
#output.append("Dependency on task %s was replaced by %s with same hash" % (dep, bdep))
bdep_found = True
if not bdep_found:
output.append("Dependency on task %s was added with hash %s" % (clean_basepath(dep), b[dep]))
siggen: compare runtaskdeps dictionary even when they have the same size * otherwise it reports lots of changed checksums just because it compares different tasks notice linux-libc-headers_3.1.bb.do_package_write: Task dependency hash changed from 42acced29debf54d598802474c5e9cbb to f438a54f995df77620d0727d6f4b4ce5 (for linux-libc-headers_3.1.bb.do_package_write and linux-gta04_git.bb.do_deploy) Task dependency hash changed from 61f8babe1d10c6e7fb1423112bb04e1e to 1b3e21ff106ecfcb7ddf76a1e29537bb (for linux-nokia900-meego_git.bb.do_deploy and linux-gta04_git.bb.do_package_write) Task dependency hash changed from 512f9d6686d760b318d8b11c8b589226 to 42acced29debf54d598802474c5e9cbb (for linux-nokia900-meego_git.bb.do_package_write and linux-libc-headers_3.1.bb.do_package_write) Task dependency hash changed from 153e91dfd1d2053fda7b98cc08d4b802 to 92a293bdd8ed234932b87a66025038c5 (for systemd-serialgetty.bb.do_package_write and systemd-serialgetty.bb.do_package_write) Task dependency hash changed from 730abebf9954794bb440c2f3239f79fe to 413eaebaff27a2fd16f5cf68c1f4ff17 (for systemd_git.bb.do_package_write and systemd_git.bb.do_package_write) with this patch: Dependency on task linux-nokia900-meego_git.bb.do_package_write was added Dependency on task linux-nokia900-meego_git.bb.do_deploy was added Dependency on task linux-gta04_git.bb.do_deploy was removed Dependency on task linux-gta04_git.bb.do_package_write was removed Hash for dependent task systemd_git.bb.do_package_write changed from 730abebf9954794bb440c2f3239f79fe to 413eaebaff27a2fd16f5cf68c1f4ff17 Hash for dependent task systemd-serialgetty.bb.do_package_write changed from 153e91dfd1d2053fda7b98cc08d4b802 to 92a293bdd8ed234932b87a66025038c5 * added test if there is different task with same hash then we don't show it as added/removed dependency, because bitbake doesn't care (Bitbake rev: ca52bf32b479811bd7fed41648bedcc06b00430b) Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2012-02-22 15:01:59 +00:00
if removed:
for dep in removed:
adep_found = False
if added:
for adep in added:
if b[adep] == a[dep]:
#output.append("Dependency on task %s was replaced by %s with same hash" % (adep, dep))
adep_found = True
if not adep_found:
output.append("Dependency on task %s was removed with hash %s" % (clean_basepath(dep), a[dep]))
siggen: compare runtaskdeps dictionary even when they have the same size * otherwise it reports lots of changed checksums just because it compares different tasks notice linux-libc-headers_3.1.bb.do_package_write: Task dependency hash changed from 42acced29debf54d598802474c5e9cbb to f438a54f995df77620d0727d6f4b4ce5 (for linux-libc-headers_3.1.bb.do_package_write and linux-gta04_git.bb.do_deploy) Task dependency hash changed from 61f8babe1d10c6e7fb1423112bb04e1e to 1b3e21ff106ecfcb7ddf76a1e29537bb (for linux-nokia900-meego_git.bb.do_deploy and linux-gta04_git.bb.do_package_write) Task dependency hash changed from 512f9d6686d760b318d8b11c8b589226 to 42acced29debf54d598802474c5e9cbb (for linux-nokia900-meego_git.bb.do_package_write and linux-libc-headers_3.1.bb.do_package_write) Task dependency hash changed from 153e91dfd1d2053fda7b98cc08d4b802 to 92a293bdd8ed234932b87a66025038c5 (for systemd-serialgetty.bb.do_package_write and systemd-serialgetty.bb.do_package_write) Task dependency hash changed from 730abebf9954794bb440c2f3239f79fe to 413eaebaff27a2fd16f5cf68c1f4ff17 (for systemd_git.bb.do_package_write and systemd_git.bb.do_package_write) with this patch: Dependency on task linux-nokia900-meego_git.bb.do_package_write was added Dependency on task linux-nokia900-meego_git.bb.do_deploy was added Dependency on task linux-gta04_git.bb.do_deploy was removed Dependency on task linux-gta04_git.bb.do_package_write was removed Hash for dependent task systemd_git.bb.do_package_write changed from 730abebf9954794bb440c2f3239f79fe to 413eaebaff27a2fd16f5cf68c1f4ff17 Hash for dependent task systemd-serialgetty.bb.do_package_write changed from 153e91dfd1d2053fda7b98cc08d4b802 to 92a293bdd8ed234932b87a66025038c5 * added test if there is different task with same hash then we don't show it as added/removed dependency, because bitbake doesn't care (Bitbake rev: ca52bf32b479811bd7fed41648bedcc06b00430b) Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2012-02-22 15:01:59 +00:00
if changed:
for dep in changed:
if not collapsed:
output.append("Hash for dependent task %s changed from %s to %s" % (clean_basepath(dep), a[dep], b[dep]))
if callable(recursecb):
recout = recursecb(dep, a[dep], b[dep])
if recout:
if collapsed:
output.extend(recout)
else:
# If a dependent hash changed, might as well print the line above and then defer to the changes in
# that hash since in all likelyhood, they're the same changes this task also saw.
output = [output[-1]] + recout
2012-06-18 15:45:35 +00:00
a_taint = a_data.get('taint', None)
b_taint = b_data.get('taint', None)
if a_taint != b_taint:
output.append("Taint (by forced/invalidated task) changed from %s to %s" % (a_taint, b_taint))
return output
2012-06-18 15:45:35 +00:00
def calc_basehash(sigdata):
task = sigdata['task']
basedata = sigdata['varvals'][task]
if basedata is None:
basedata = ''
alldeps = sigdata['taskdeps']
for dep in alldeps:
basedata = basedata + dep
val = sigdata['varvals'][dep]
if val is not None:
basedata = basedata + str(val)
return hashlib.md5(basedata.encode("utf-8")).hexdigest()
def calc_taskhash(sigdata):
data = sigdata['basehash']
for dep in sigdata['runtaskdeps']:
data = data + sigdata['runtaskhashes'][dep]
for c in sigdata['file_checksum_values']:
data = data + c[1]
if 'taint' in sigdata:
if 'nostamp:' in sigdata['taint']:
data = data + sigdata['taint'][8:]
else:
data = data + sigdata['taint']
return hashlib.md5(data.encode("utf-8")).hexdigest()
def dump_sigfile(a):
output = []
with open(a, 'rb') as f:
p1 = pickle.Unpickler(f)
a_data = p1.load()
output.append("basewhitelist: %s" % (a_data['basewhitelist']))
output.append("taskwhitelist: %s" % (a_data['taskwhitelist']))
output.append("Task dependencies: %s" % (sorted(a_data['taskdeps'])))
output.append("basehash: %s" % (a_data['basehash']))
for dep in a_data['gendeps']:
output.append("List of dependencies for variable %s is %s" % (dep, a_data['gendeps'][dep]))
for dep in a_data['varvals']:
output.append("Variable %s value is %s" % (dep, a_data['varvals'][dep]))
if 'runtaskdeps' in a_data:
output.append("Tasks this task depends on: %s" % (a_data['runtaskdeps']))
if 'file_checksum_values' in a_data:
output.append("This task depends on the checksums of files: %s" % (a_data['file_checksum_values']))
if 'runtaskhashes' in a_data:
for dep in a_data['runtaskhashes']:
output.append("Hash for dependent task %s is %s" % (dep, a_data['runtaskhashes'][dep]))
2012-06-18 15:45:35 +00:00
if 'taint' in a_data:
output.append("Tainted (by forced/invalidated task): %s" % a_data['taint'])
if 'task' in a_data:
computed_basehash = calc_basehash(a_data)
output.append("Computed base hash is %s and from file %s" % (computed_basehash, a_data['basehash']))
else:
output.append("Unable to compute base hash")
computed_taskhash = calc_taskhash(a_data)
output.append("Computed task hash is %s" % computed_taskhash)
return output