bitbake: refactor out codeparser cache into a separate class

We want to be able to reuse most this functionality for the file
checksum cache.

(Bitbake rev: 0fe3cb1438d297f90dd0fc6b26362ecbff75c76d)

Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Paul Eggleton 2012-05-23 00:23:31 +01:00 committed by Richard Purdie
parent 644b30adfb
commit d7b818b51f
2 changed files with 172 additions and 137 deletions

View File

@ -1,11 +1,12 @@
# ex:ts=4:sw=4:sts=4:et # ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- # -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
# #
# BitBake 'Event' implementation # BitBake Cache implementation
# #
# Caching of bitbake variables before task execution # Caching of bitbake variables before task execution
# Copyright (C) 2006 Richard Purdie # Copyright (C) 2006 Richard Purdie
# Copyright (C) 2012 Intel Corporation
# but small sections based on code from bin/bitbake: # but small sections based on code from bin/bitbake:
# Copyright (C) 2003, 2004 Chris Larson # Copyright (C) 2003, 2004 Chris Larson
@ -703,4 +704,115 @@ class CacheData(object):
for info in info_array: for info in info_array:
info.add_cacheData(self, fn) info.add_cacheData(self, fn)
class MultiProcessCache(object):
"""
BitBake multi-process cache implementation
Used by the codeparser & file checksum caches
"""
def __init__(self):
self.cachefile = None
self.cachedata = self.create_cachedata()
self.cachedata_extras = self.create_cachedata()
def init_cache(self, d):
cachedir = (d.getVar("PERSISTENT_DIR", True) or
d.getVar("CACHE", True))
if cachedir in [None, '']:
return
bb.utils.mkdirhier(cachedir)
self.cachefile = os.path.join(cachedir, self.__class__.cache_file_name)
logger.debug(1, "Using cache in '%s'", self.cachefile)
try:
p = pickle.Unpickler(file(self.cachefile, "rb"))
data, version = p.load()
except:
return
if version != self.__class__.CACHE_VERSION:
return
self.cachedata = data
def internSet(self, items):
new = set()
for i in items:
new.add(intern(i))
return new
def compress_keys(self, data):
# Override in subclasses if desired
return
def create_cachedata(self):
data = [{}]
return data
def save_extras(self, d):
if not self.cachefile:
return
glf = bb.utils.lockfile(self.cachefile + ".lock", shared=True)
i = os.getpid()
lf = None
while not lf:
lf = bb.utils.lockfile(self.cachefile + ".lock." + str(i), retry=False)
if not lf or os.path.exists(self.cachefile + "-" + str(i)):
if lf:
bb.utils.unlockfile(lf)
lf = None
i = i + 1
continue
p = pickle.Pickler(file(self.cachefile + "-" + str(i), "wb"), -1)
p.dump([self.cachedata_extras, self.__class__.CACHE_VERSION])
bb.utils.unlockfile(lf)
bb.utils.unlockfile(glf)
def merge_data(self, source, dest):
for j in range(0,len(dest)):
for h in source[j]:
if h not in dest[j]:
dest[j][h] = source[j][h]
def save_merge(self, d):
if not self.cachefile:
return
glf = bb.utils.lockfile(self.cachefile + ".lock")
try:
p = pickle.Unpickler(file(self.cachefile, "rb"))
data, version = p.load()
except (IOError, EOFError):
data, version = None, None
if version != self.__class__.CACHE_VERSION:
data = self.create_cachedata()
for f in [y for y in os.listdir(os.path.dirname(self.cachefile)) if y.startswith(os.path.basename(self.cachefile) + '-')]:
f = os.path.join(os.path.dirname(self.cachefile), f)
try:
p = pickle.Unpickler(file(f, "rb"))
extradata, version = p.load()
except (IOError, EOFError):
extradata, version = self.create_cachedata(), None
if version != self.__class__.CACHE_VERSION:
continue
self.merge_data(extradata, data)
os.unlink(f)
self.compress_keys(data)
p = pickle.Pickler(file(self.cachefile, "wb"), -1)
p.dump([data, self.__class__.CACHE_VERSION])
bb.utils.unlockfile(glf)

View File

@ -5,10 +5,10 @@ import os.path
import bb.utils, bb.data import bb.utils, bb.data
from itertools import chain from itertools import chain
from pysh import pyshyacc, pyshlex, sherrors from pysh import pyshyacc, pyshlex, sherrors
from bb.cache import MultiProcessCache
logger = logging.getLogger('BitBake.CodeParser') logger = logging.getLogger('BitBake.CodeParser')
PARSERCACHE_VERSION = 2
try: try:
import cPickle as pickle import cPickle as pickle
@ -32,133 +32,56 @@ def check_indent(codestr):
return codestr return codestr
pythonparsecache = {}
shellparsecache = {}
pythonparsecacheextras = {}
shellparsecacheextras = {}
class CodeParserCache(MultiProcessCache):
cache_file_name = "bb_codeparser.dat"
CACHE_VERSION = 2
def parser_cachefile(d): def __init__(self):
cachedir = (d.getVar("PERSISTENT_DIR", True) or MultiProcessCache.__init__(self)
d.getVar("CACHE", True)) self.pythoncache = self.cachedata[0]
if cachedir in [None, '']: self.shellcache = self.cachedata[1]
return None self.pythoncacheextras = self.cachedata_extras[0]
bb.utils.mkdirhier(cachedir) self.shellcacheextras = self.cachedata_extras[1]
cachefile = os.path.join(cachedir, "bb_codeparser.dat")
logger.debug(1, "Using cache in '%s' for codeparser cache", cachefile) def init_cache(self, d):
return cachefile MultiProcessCache.init_cache(self, d)
# cachedata gets re-assigned in the parent
self.pythoncache = self.cachedata[0]
self.shellcache = self.cachedata[1]
def compress_keys(self, data):
# When the dicts are originally created, python calls intern() on the set keys
# which significantly improves memory usage. Sadly the pickle/unpickle process
# doesn't call intern() on the keys and results in the same strings being duplicated
# in memory. This also means pickle will save the same string multiple times in
# the cache file. By interning the data here, the cache file shrinks dramatically
# meaning faster load times and the reloaded cache files also consume much less
# memory. This is worth any performance hit from this loops and the use of the
# intern() data storage.
# Python 3.x may behave better in this area
for h in data[0]:
data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
for h in data[1]:
data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
return
def create_cachedata(self):
data = [{}, {}]
return data
codeparsercache = CodeParserCache()
def parser_cache_init(d): def parser_cache_init(d):
global pythonparsecache codeparsercache.init_cache(d)
global shellparsecache
cachefile = parser_cachefile(d)
if not cachefile:
return
try:
p = pickle.Unpickler(file(cachefile, "rb"))
data, version = p.load()
except:
return
if version != PARSERCACHE_VERSION:
return
pythonparsecache = data[0]
shellparsecache = data[1]
def parser_cache_save(d): def parser_cache_save(d):
cachefile = parser_cachefile(d) codeparsercache.save_extras(d)
if not cachefile:
return
glf = bb.utils.lockfile(cachefile + ".lock", shared=True)
i = os.getpid()
lf = None
while not lf:
shellcache = {}
pythoncache = {}
lf = bb.utils.lockfile(cachefile + ".lock." + str(i), retry=False)
if not lf or os.path.exists(cachefile + "-" + str(i)):
if lf:
bb.utils.unlockfile(lf)
lf = None
i = i + 1
continue
shellcache = shellparsecacheextras
pythoncache = pythonparsecacheextras
p = pickle.Pickler(file(cachefile + "-" + str(i), "wb"), -1)
p.dump([[pythoncache, shellcache], PARSERCACHE_VERSION])
bb.utils.unlockfile(lf)
bb.utils.unlockfile(glf)
def internSet(items):
new = set()
for i in items:
new.add(intern(i))
return new
def parser_cache_savemerge(d): def parser_cache_savemerge(d):
cachefile = parser_cachefile(d) codeparsercache.save_merge(d)
if not cachefile:
return
glf = bb.utils.lockfile(cachefile + ".lock")
try:
p = pickle.Unpickler(file(cachefile, "rb"))
data, version = p.load()
except (IOError, EOFError):
data, version = None, None
if version != PARSERCACHE_VERSION:
data = [{}, {}]
for f in [y for y in os.listdir(os.path.dirname(cachefile)) if y.startswith(os.path.basename(cachefile) + '-')]:
f = os.path.join(os.path.dirname(cachefile), f)
try:
p = pickle.Unpickler(file(f, "rb"))
extradata, version = p.load()
except (IOError, EOFError):
extradata, version = [{}, {}], None
if version != PARSERCACHE_VERSION:
continue
for h in extradata[0]:
if h not in data[0]:
data[0][h] = extradata[0][h]
for h in extradata[1]:
if h not in data[1]:
data[1][h] = extradata[1][h]
os.unlink(f)
# When the dicts are originally created, python calls intern() on the set keys
# which significantly improves memory usage. Sadly the pickle/unpickle process
# doesn't call intern() on the keys and results in the same strings being duplicated
# in memory. This also means pickle will save the same string multiple times in
# the cache file. By interning the data here, the cache file shrinks dramatically
# meaning faster load times and the reloaded cache files also consume much less
# memory. This is worth any performance hit from this loops and the use of the
# intern() data storage.
# Python 3.x may behave better in this area
for h in data[0]:
data[0][h]["refs"] = internSet(data[0][h]["refs"])
data[0][h]["execs"] = internSet(data[0][h]["execs"])
for h in data[1]:
data[1][h]["execs"] = internSet(data[1][h]["execs"])
p = pickle.Pickler(file(cachefile, "wb"), -1)
p.dump([data, PARSERCACHE_VERSION])
bb.utils.unlockfile(glf)
Logger = logging.getLoggerClass() Logger = logging.getLoggerClass()
class BufferedLogger(Logger): class BufferedLogger(Logger):
@ -235,14 +158,14 @@ class PythonParser():
def parse_python(self, node): def parse_python(self, node):
h = hash(str(node)) h = hash(str(node))
if h in pythonparsecache: if h in codeparsercache.pythoncache:
self.references = pythonparsecache[h]["refs"] self.references = codeparsercache.pythoncache[h]["refs"]
self.execs = pythonparsecache[h]["execs"] self.execs = codeparsercache.pythoncache[h]["execs"]
return return
if h in pythonparsecacheextras: if h in codeparsercache.pythoncacheextras:
self.references = pythonparsecacheextras[h]["refs"] self.references = codeparsercache.pythoncacheextras[h]["refs"]
self.execs = pythonparsecacheextras[h]["execs"] self.execs = codeparsercache.pythoncacheextras[h]["execs"]
return return
@ -256,9 +179,9 @@ class PythonParser():
self.references.update(self.var_references) self.references.update(self.var_references)
self.references.update(self.var_execs) self.references.update(self.var_execs)
pythonparsecacheextras[h] = {} codeparsercache.pythoncacheextras[h] = {}
pythonparsecacheextras[h]["refs"] = self.references codeparsercache.pythoncacheextras[h]["refs"] = self.references
pythonparsecacheextras[h]["execs"] = self.execs codeparsercache.pythoncacheextras[h]["execs"] = self.execs
class ShellParser(): class ShellParser():
def __init__(self, name, log): def __init__(self, name, log):
@ -276,12 +199,12 @@ class ShellParser():
h = hash(str(value)) h = hash(str(value))
if h in shellparsecache: if h in codeparsercache.shellcache:
self.execs = shellparsecache[h]["execs"] self.execs = codeparsercache.shellcache[h]["execs"]
return self.execs return self.execs
if h in shellparsecacheextras: if h in codeparsercache.shellcacheextras:
self.execs = shellparsecacheextras[h]["execs"] self.execs = codeparsercache.shellcacheextras[h]["execs"]
return self.execs return self.execs
try: try:
@ -293,8 +216,8 @@ class ShellParser():
self.process_tokens(token) self.process_tokens(token)
self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs) self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
shellparsecacheextras[h] = {} codeparsercache.shellcacheextras[h] = {}
shellparsecacheextras[h]["execs"] = self.execs codeparsercache.shellcacheextras[h]["execs"] = self.execs
return self.execs return self.execs