bitbake: refactor out codeparser cache into a separate class

We want to be able to reuse most this functionality for the file checksum cache. (Bitbake rev: 0fe3cb1438d297f90dd0fc6b26362ecbff75c76d) Signed-off-by: Paul Eggleton <paul.eggleton@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2012-05-23 00:23:31 +01:00 · 2012-05-23 00:23:31 +01:00 · d7b818b51f
parent 644b30adfb
commit d7b818b51f
2 changed files with 172 additions and 137 deletions
--- a/bitbake/lib/bb/cache.py
+++ b/bitbake/lib/bb/cache.py
@ -1,11 +1,12 @@
 # ex:ts=4:sw=4:sts=4:et
 # -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
 #
-# BitBake 'Event' implementation
+# BitBake Cache implementation
 #
 # Caching of bitbake variables before task execution
 # Copyright (C) 2006        Richard Purdie
 # Copyright (C) 2012        Intel Corporation
 # but small sections based on code from bin/bitbake:
 # Copyright (C) 2003, 2004  Chris Larson
@ -703,4 +704,115 @@ class CacheData(object):
        for info in info_array:
            info.add_cacheData(self, fn)
-        
+
 class MultiProcessCache(object):
    """
    BitBake multi-process cache implementation
    Used by the codeparser & file checksum caches
    """
    def __init__(self):
        self.cachefile = None
        self.cachedata = self.create_cachedata()
        self.cachedata_extras = self.create_cachedata()
    def init_cache(self, d):
        cachedir = (d.getVar("PERSISTENT_DIR", True) or
                    d.getVar("CACHE", True))
        if cachedir in [None, '']:
            return
        bb.utils.mkdirhier(cachedir)
        self.cachefile = os.path.join(cachedir, self.__class__.cache_file_name)
        logger.debug(1, "Using cache in '%s'", self.cachefile)
        try:
            p = pickle.Unpickler(file(self.cachefile, "rb"))
            data, version = p.load()
        except:
            return
        if version != self.__class__.CACHE_VERSION:
            return
        self.cachedata = data
    def internSet(self, items):
        new = set()
        for i in items:
            new.add(intern(i))
        return new
    def compress_keys(self, data):
        # Override in subclasses if desired
        return
    def create_cachedata(self):
        data = [{}]
        return data
    def save_extras(self, d):
        if not self.cachefile:
            return
        glf = bb.utils.lockfile(self.cachefile + ".lock", shared=True)
        i = os.getpid()
        lf = None
        while not lf:
            lf = bb.utils.lockfile(self.cachefile + ".lock." + str(i), retry=False)
            if not lf or os.path.exists(self.cachefile + "-" + str(i)):
                if lf:
                    bb.utils.unlockfile(lf)
                    lf = None
                i = i + 1
                continue
            p = pickle.Pickler(file(self.cachefile + "-" + str(i), "wb"), -1)
            p.dump([self.cachedata_extras, self.__class__.CACHE_VERSION])
        bb.utils.unlockfile(lf)
        bb.utils.unlockfile(glf)
    def merge_data(self, source, dest):
        for j in range(0,len(dest)):
            for h in source[j]:
                if h not in dest[j]:
                    dest[j][h] = source[j][h]
    def save_merge(self, d):
        if not self.cachefile:
            return
        glf = bb.utils.lockfile(self.cachefile + ".lock")
        try:
            p = pickle.Unpickler(file(self.cachefile, "rb"))
            data, version = p.load()
        except (IOError, EOFError):
            data, version = None, None
        if version != self.__class__.CACHE_VERSION:
            data = self.create_cachedata()
        for f in [y for y in os.listdir(os.path.dirname(self.cachefile)) if y.startswith(os.path.basename(self.cachefile) + '-')]:
            f = os.path.join(os.path.dirname(self.cachefile), f)
            try:
                p = pickle.Unpickler(file(f, "rb"))
                extradata, version = p.load()
            except (IOError, EOFError):
                extradata, version = self.create_cachedata(), None
            if version != self.__class__.CACHE_VERSION:
                continue
            self.merge_data(extradata, data)
            os.unlink(f)
        self.compress_keys(data)
        p = pickle.Pickler(file(self.cachefile, "wb"), -1)
        p.dump([data, self.__class__.CACHE_VERSION])
        bb.utils.unlockfile(glf)
--- a/bitbake/lib/bb/codeparser.py
+++ b/bitbake/lib/bb/codeparser.py
@ -5,10 +5,10 @@ import os.path
 import bb.utils, bb.data
 from itertools import chain
 from pysh import pyshyacc, pyshlex, sherrors
 from bb.cache import MultiProcessCache
 logger = logging.getLogger('BitBake.CodeParser')
 PARSERCACHE_VERSION = 2
 try:
    import cPickle as pickle
@ -32,133 +32,56 @@ def check_indent(codestr):
    return codestr
 pythonparsecache = {}
 shellparsecache = {}
 pythonparsecacheextras = {}
 shellparsecacheextras = {}
 class CodeParserCache(MultiProcessCache):
    cache_file_name = "bb_codeparser.dat"
    CACHE_VERSION = 2
-def parser_cachefile(d):
+    def __init__(self):
-    cachedir = (d.getVar("PERSISTENT_DIR", True) or
+        MultiProcessCache.__init__(self)
-                d.getVar("CACHE", True))
+        self.pythoncache = self.cachedata[0]
-    if cachedir in [None, '']:
+        self.shellcache = self.cachedata[1]
-        return None
+        self.pythoncacheextras = self.cachedata_extras[0]
-    bb.utils.mkdirhier(cachedir)
+        self.shellcacheextras = self.cachedata_extras[1]
-    cachefile = os.path.join(cachedir, "bb_codeparser.dat")
+
-    logger.debug(1, "Using cache in '%s' for codeparser cache", cachefile)
+    def init_cache(self, d):
-    return cachefile
+        MultiProcessCache.init_cache(self, d)
        # cachedata gets re-assigned in the parent
        self.pythoncache = self.cachedata[0]
        self.shellcache = self.cachedata[1]
    def compress_keys(self, data):
        # When the dicts are originally created, python calls intern() on the set keys
        # which significantly improves memory usage. Sadly the pickle/unpickle process
        # doesn't call intern() on the keys and results in the same strings being duplicated
        # in memory. This also means pickle will save the same string multiple times in
        # the cache file. By interning the data here, the cache file shrinks dramatically
        # meaning faster load times and the reloaded cache files also consume much less
        # memory. This is worth any performance hit from this loops and the use of the
        # intern() data storage.
        # Python 3.x may behave better in this area
        for h in data[0]:
            data[0][h]["refs"] = self.internSet(data[0][h]["refs"])
            data[0][h]["execs"] = self.internSet(data[0][h]["execs"])
        for h in data[1]:
            data[1][h]["execs"] = self.internSet(data[1][h]["execs"])
        return
    def create_cachedata(self):
        data = [{}, {}]
        return data
 codeparsercache = CodeParserCache()
 def parser_cache_init(d):
-    global pythonparsecache
+    codeparsercache.init_cache(d)
    global shellparsecache
    cachefile = parser_cachefile(d)
    if not cachefile:
        return
    try:
        p = pickle.Unpickler(file(cachefile, "rb"))
        data, version = p.load()
    except:
        return
    if version != PARSERCACHE_VERSION:
        return
    pythonparsecache = data[0]
    shellparsecache = data[1]
 def parser_cache_save(d):
-    cachefile = parser_cachefile(d)
+    codeparsercache.save_extras(d)
    if not cachefile:
        return
    glf = bb.utils.lockfile(cachefile + ".lock", shared=True)
    i = os.getpid()
    lf = None
    while not lf:
        shellcache = {}
        pythoncache = {}
        lf = bb.utils.lockfile(cachefile + ".lock." + str(i), retry=False)
        if not lf or os.path.exists(cachefile + "-" + str(i)):
            if lf:
               bb.utils.unlockfile(lf) 
               lf = None
            i = i + 1
            continue
        shellcache = shellparsecacheextras
        pythoncache = pythonparsecacheextras
        p = pickle.Pickler(file(cachefile + "-" + str(i), "wb"), -1)
        p.dump([[pythoncache, shellcache], PARSERCACHE_VERSION])
    bb.utils.unlockfile(lf)
    bb.utils.unlockfile(glf)
 def internSet(items):
    new = set()
    for i in items:
        new.add(intern(i))
    return new
 def parser_cache_savemerge(d):
-    cachefile = parser_cachefile(d)
+    codeparsercache.save_merge(d)
    if not cachefile:
        return
    glf = bb.utils.lockfile(cachefile + ".lock")
    try:
        p = pickle.Unpickler(file(cachefile, "rb"))
        data, version = p.load()
    except (IOError, EOFError):
        data, version = None, None
    if version != PARSERCACHE_VERSION:
        data = [{}, {}]
    for f in [y for y in os.listdir(os.path.dirname(cachefile)) if y.startswith(os.path.basename(cachefile) + '-')]:
        f = os.path.join(os.path.dirname(cachefile), f)
        try:
            p = pickle.Unpickler(file(f, "rb"))
            extradata, version = p.load()
        except (IOError, EOFError):
            extradata, version = [{}, {}], None
        if version != PARSERCACHE_VERSION:
            continue
        for h in extradata[0]:
            if h not in data[0]:
                data[0][h] = extradata[0][h]
        for h in extradata[1]:
            if h not in data[1]:
                data[1][h] = extradata[1][h]
        os.unlink(f)
    # When the dicts are originally created, python calls intern() on the set keys
    # which significantly improves memory usage. Sadly the pickle/unpickle process 
    # doesn't call intern() on the keys and results in the same strings being duplicated
    # in memory. This also means pickle will save the same string multiple times in 
    # the cache file. By interning the data here, the cache file shrinks dramatically
    # meaning faster load times and the reloaded cache files also consume much less 
    # memory. This is worth any performance hit from this loops and the use of the 
    # intern() data storage.
    # Python 3.x may behave better in this area
    for h in data[0]:
        data[0][h]["refs"] = internSet(data[0][h]["refs"])
        data[0][h]["execs"] = internSet(data[0][h]["execs"])
    for h in data[1]:
        data[1][h]["execs"] = internSet(data[1][h]["execs"])
    p = pickle.Pickler(file(cachefile, "wb"), -1)
    p.dump([data, PARSERCACHE_VERSION])
    bb.utils.unlockfile(glf)
 Logger = logging.getLoggerClass()
 class BufferedLogger(Logger):
@ -235,14 +158,14 @@ class PythonParser():
    def parse_python(self, node):
        h = hash(str(node))
-        if h in pythonparsecache:
+        if h in codeparsercache.pythoncache:
-            self.references = pythonparsecache[h]["refs"]
+            self.references = codeparsercache.pythoncache[h]["refs"]
-            self.execs = pythonparsecache[h]["execs"]
+            self.execs = codeparsercache.pythoncache[h]["execs"]
            return
-        if h in pythonparsecacheextras:
+        if h in codeparsercache.pythoncacheextras:
-            self.references = pythonparsecacheextras[h]["refs"]
+            self.references = codeparsercache.pythoncacheextras[h]["refs"]
-            self.execs = pythonparsecacheextras[h]["execs"]
+            self.execs = codeparsercache.pythoncacheextras[h]["execs"]
            return
@ -256,9 +179,9 @@ class PythonParser():
        self.references.update(self.var_references)
        self.references.update(self.var_execs)
-        pythonparsecacheextras[h] = {}
+        codeparsercache.pythoncacheextras[h] = {}
-        pythonparsecacheextras[h]["refs"] = self.references
+        codeparsercache.pythoncacheextras[h]["refs"] = self.references
-        pythonparsecacheextras[h]["execs"] = self.execs
+        codeparsercache.pythoncacheextras[h]["execs"] = self.execs
 class ShellParser():
    def __init__(self, name, log):
@ -276,12 +199,12 @@ class ShellParser():
        h = hash(str(value))
-        if h in shellparsecache:
+        if h in codeparsercache.shellcache:
-            self.execs = shellparsecache[h]["execs"]
+            self.execs = codeparsercache.shellcache[h]["execs"]
            return self.execs
-        if h in shellparsecacheextras:
+        if h in codeparsercache.shellcacheextras:
-            self.execs = shellparsecacheextras[h]["execs"]
+            self.execs = codeparsercache.shellcacheextras[h]["execs"]
            return self.execs
        try:
@ -293,8 +216,8 @@ class ShellParser():
            self.process_tokens(token)
        self.execs = set(cmd for cmd in self.allexecs if cmd not in self.funcdefs)
-        shellparsecacheextras[h] = {}
+        codeparsercache.shellcacheextras[h] = {}
-        shellparsecacheextras[h]["execs"] = self.execs
+        codeparsercache.shellcacheextras[h]["execs"] = self.execs
        return self.execs