package: Add cachedpath optimisation
Currently, various standard library operations like os.walk(), os.path.isdir() and os.path.islink() each call stat or lstat which involves a syscall into the kernel. There is no caching since they could conceivably have changed on disk. The result is that for something like the do_package task of the kernel we're spending over two minutes making 868,000 individual stat calls for 23,000 files. This is suboptimal. This patch adds lib/oe/cachedpath.py which are a set of replacement functions for these operations which use cached stat data rather than hitting the kernel each time. It gives a nice performance improvement halving the build time of the kernel do_package. (From OE-Core rev: 556dee0c4d6d8a87c0cddbd2f60fe5917d009f18) Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
parent
cec0102647
commit
add11fa1ab
|
@ -10,7 +10,7 @@ inherit utility-tasks
|
|||
inherit metadata_scm
|
||||
inherit logging
|
||||
|
||||
OE_IMPORTS += "os sys time oe.path oe.utils oe.data oe.package oe.packagegroup oe.sstatesig oe.lsb"
|
||||
OE_IMPORTS += "os sys time oe.path oe.utils oe.data oe.package oe.packagegroup oe.sstatesig oe.lsb oe.cachedpath"
|
||||
OE_IMPORTS[type] = "list"
|
||||
|
||||
def oe_import(d):
|
||||
|
|
|
@ -288,7 +288,7 @@ def copydebugsources(debugsrcdir, d):
|
|||
basepath = dvar
|
||||
for p in debugsrcdir.split("/"):
|
||||
basepath = basepath + "/" + p
|
||||
if not os.path.exists(basepath):
|
||||
if not cpath.exists(basepath):
|
||||
nosuchdir.append(basepath)
|
||||
bb.utils.mkdirhier(basepath)
|
||||
|
||||
|
@ -388,7 +388,7 @@ python package_do_split_locales() {
|
|||
|
||||
localedir = os.path.join(dvar + datadir, 'locale')
|
||||
|
||||
if not os.path.isdir(localedir):
|
||||
if not cpath.isdir(localedir):
|
||||
bb.debug(1, "No locale files in this package")
|
||||
return
|
||||
|
||||
|
@ -628,7 +628,7 @@ python fixup_perms () {
|
|||
continue
|
||||
|
||||
origin = dvar + dir
|
||||
if not (os.path.exists(origin) and os.path.isdir(origin) and not os.path.islink(origin)):
|
||||
if not (cpath.exists(origin) and cpath.isdir(origin) and not cpath.islink(origin)):
|
||||
continue
|
||||
|
||||
link = fs_perms_table[dir].link
|
||||
|
@ -654,7 +654,7 @@ python fixup_perms () {
|
|||
continue
|
||||
|
||||
origin = dvar + dir
|
||||
if not (os.path.exists(origin) and os.path.isdir(origin)):
|
||||
if not (cpath.exists(origin) and cpath.isdir(origin)):
|
||||
continue
|
||||
|
||||
fix_perms(origin, fs_perms_table[dir].mode, fs_perms_table[dir].uid, fs_perms_table[dir].gid, dir)
|
||||
|
@ -735,7 +735,7 @@ python split_and_strip_files () {
|
|||
baselibdir = os.path.abspath(dvar + os.sep + d.getVar("base_libdir", True))
|
||||
if (d.getVar('INHIBIT_PACKAGE_DEBUG_SPLIT', True) != '1') and \
|
||||
(d.getVar('INHIBIT_PACKAGE_STRIP', True) != '1'):
|
||||
for root, dirs, files in os.walk(dvar):
|
||||
for root, dirs, files in cpath.walk(dvar):
|
||||
for f in files:
|
||||
file = os.path.join(root, f)
|
||||
if file.endswith(".ko") and file.find("/lib/modules/") != -1:
|
||||
|
@ -749,18 +749,20 @@ python split_and_strip_files () {
|
|||
continue
|
||||
|
||||
try:
|
||||
ltarget = oe.path.realpath(file, dvar, False)
|
||||
s = os.lstat(ltarget)
|
||||
ltarget = cpath.realpath(file, dvar, False)
|
||||
s = cpath.lstat(ltarget)
|
||||
except OSError, (err, strerror):
|
||||
if err != errno.ENOENT:
|
||||
raise
|
||||
# Skip broken symlinks
|
||||
continue
|
||||
if not s:
|
||||
continue
|
||||
# Check its an excutable
|
||||
if (s[stat.ST_MODE] & stat.S_IXUSR) or (s[stat.ST_MODE] & stat.S_IXGRP) or (s[stat.ST_MODE] & stat.S_IXOTH) \
|
||||
or ((file.startswith(libdir) or file.startswith(baselibdir)) and ".so" in f):
|
||||
# If it's a symlink, and points to an ELF file, we capture the readlink target
|
||||
if os.path.islink(file):
|
||||
if cpath.islink(file):
|
||||
target = os.readlink(file)
|
||||
if isELF(ltarget):
|
||||
#bb.note("Sym: %s (%d)" % (ltarget, isELF(ltarget)))
|
||||
|
@ -918,8 +920,8 @@ python populate_packages () {
|
|||
for file in files:
|
||||
if os.path.isabs(file):
|
||||
file = '.' + file
|
||||
if not os.path.islink(file):
|
||||
if os.path.isdir(file):
|
||||
if not cpath.islink(file):
|
||||
if cpath.isdir(file):
|
||||
newfiles = [ os.path.join(file,x) for x in os.listdir(file) ]
|
||||
if newfiles:
|
||||
files += newfiles
|
||||
|
@ -929,7 +931,7 @@ python populate_packages () {
|
|||
if [ file ] != globbed:
|
||||
files += globbed
|
||||
continue
|
||||
if (not os.path.islink(file)) and (not os.path.exists(file)):
|
||||
if (not cpath.islink(file)) and (not cpath.exists(file)):
|
||||
continue
|
||||
if file in seen:
|
||||
continue
|
||||
|
@ -938,33 +940,33 @@ python populate_packages () {
|
|||
def mkdir(src, dest, p):
|
||||
src = os.path.join(src, p)
|
||||
dest = os.path.join(dest, p)
|
||||
bb.utils.mkdirhier(dest)
|
||||
fstat = os.stat(src)
|
||||
os.chmod(dest, fstat.st_mode)
|
||||
fstat = cpath.stat(src)
|
||||
os.mkdir(dest, fstat.st_mode)
|
||||
os.chown(dest, fstat.st_uid, fstat.st_gid)
|
||||
if p not in seen:
|
||||
seen.append(p)
|
||||
cpath.updatecache(dest)
|
||||
|
||||
def mkdir_recurse(src, dest, paths):
|
||||
if os.path.exists(dest + '/' + paths):
|
||||
if cpath.exists(dest + '/' + paths):
|
||||
return
|
||||
while paths.startswith("./"):
|
||||
paths = paths[2:]
|
||||
p = "."
|
||||
for c in paths.split("/"):
|
||||
p = os.path.join(p, c)
|
||||
if not os.path.exists(os.path.join(dest, p)):
|
||||
if not cpath.exists(os.path.join(dest, p)):
|
||||
mkdir(src, dest, p)
|
||||
|
||||
if os.path.isdir(file) and not os.path.islink(file):
|
||||
if cpath.isdir(file) and not cpath.islink(file):
|
||||
mkdir_recurse(dvar, root, file)
|
||||
continue
|
||||
|
||||
mkdir_recurse(dvar, root, os.path.dirname(file))
|
||||
fpath = os.path.join(root,file)
|
||||
if not os.path.islink(file):
|
||||
if not cpath.islink(file):
|
||||
os.link(file, fpath)
|
||||
fstat = os.stat(file)
|
||||
fstat = cpath.stat(file)
|
||||
os.chmod(fpath, fstat.st_mode)
|
||||
os.chown(fpath, fstat.st_uid, fstat.st_gid)
|
||||
continue
|
||||
|
@ -975,7 +977,7 @@ python populate_packages () {
|
|||
os.chdir(workdir)
|
||||
|
||||
unshipped = []
|
||||
for root, dirs, files in os.walk(dvar):
|
||||
for root, dirs, files in cpath.walk(dvar):
|
||||
dir = root[len(dvar):]
|
||||
if not dir:
|
||||
dir = os.sep
|
||||
|
@ -1009,8 +1011,8 @@ python package_fixsymlinks () {
|
|||
for path in pkgfiles[pkg]:
|
||||
rpath = path[len(inst_root):]
|
||||
pkg_files[pkg].append(rpath)
|
||||
rtarget = oe.path.realpath(path, inst_root, True, assume_dir = True)
|
||||
if not os.path.lexists(rtarget):
|
||||
rtarget = cpath.realpath(path, inst_root, True, assume_dir = True)
|
||||
if not cpath.lexists(rtarget):
|
||||
dangling_links[pkg].append(os.path.normpath(rtarget[len(inst_root):]))
|
||||
|
||||
newrdepends = {}
|
||||
|
@ -1394,7 +1396,7 @@ python package_do_shlibs() {
|
|||
renames = list()
|
||||
for file in pkgfiles[pkg]:
|
||||
soname = None
|
||||
if os.path.islink(file):
|
||||
if cpath.islink(file):
|
||||
continue
|
||||
if targetos == "darwin" or targetos == "darwin8":
|
||||
darwin_so(file)
|
||||
|
@ -1781,6 +1783,10 @@ python do_package () {
|
|||
# as any change to rpmdeps requires this to be rerun.
|
||||
# PACKAGE_BBCLASS_VERSION = "1"
|
||||
|
||||
# Init cachedpath
|
||||
global cpath
|
||||
cpath = oe.cachedpath.CachedPath()
|
||||
|
||||
###########################################################################
|
||||
# Sanity test the setup
|
||||
###########################################################################
|
||||
|
@ -1827,6 +1833,8 @@ python do_package () {
|
|||
# Split up PKGD into PKGDEST
|
||||
###########################################################################
|
||||
|
||||
cpath = oe.cachedpath.CachedPath()
|
||||
|
||||
for f in (d.getVar('PACKAGESPLITFUNCS', True) or '').split():
|
||||
bb.build.exec_func(f, d)
|
||||
|
||||
|
@ -1841,7 +1849,7 @@ python do_package () {
|
|||
pkgdest = d.getVar('PKGDEST', True)
|
||||
for pkg in packages:
|
||||
pkgfiles[pkg] = []
|
||||
for walkroot, dirs, files in os.walk(pkgdest + "/" + pkg):
|
||||
for walkroot, dirs, files in cpath.walk(pkgdest + "/" + pkg):
|
||||
for file in files:
|
||||
pkgfiles[pkg].append(walkroot + os.sep + file)
|
||||
|
||||
|
|
|
@ -0,0 +1,235 @@
|
|||
#
|
||||
# Based on standard python library functions but avoid
|
||||
# repeated stat calls. Its assumed the files will not change from under us
|
||||
# so we can cache stat calls.
|
||||
#
|
||||
|
||||
import os
|
||||
import errno
|
||||
import stat as statmod
|
||||
|
||||
class CachedPath(object):
|
||||
def __init__(self):
|
||||
self.statcache = {}
|
||||
self.lstatcache = {}
|
||||
self.normpathcache = {}
|
||||
return
|
||||
|
||||
def updatecache(self, x):
|
||||
x = self.normpath(x)
|
||||
if x in self.statcache:
|
||||
del self.statcache[x]
|
||||
if x in self.lstatcache:
|
||||
del self.lstatcache[x]
|
||||
|
||||
def normpath(self, path):
|
||||
if path in self.normpathcache:
|
||||
return self.normpathcache[path]
|
||||
newpath = os.path.normpath(path)
|
||||
self.normpathcache[path] = newpath
|
||||
return newpath
|
||||
|
||||
def _callstat(self, path):
|
||||
if path in self.statcache:
|
||||
return self.statcache[path]
|
||||
try:
|
||||
st = os.stat(path)
|
||||
self.statcache[path] = st
|
||||
return st
|
||||
except os.error:
|
||||
self.statcache[path] = False
|
||||
return False
|
||||
|
||||
# We might as well call lstat and then only
|
||||
# call stat as well in the symbolic link case
|
||||
# since this turns out to be much more optimal
|
||||
# in real world usage of this cache
|
||||
def callstat(self, path):
|
||||
path = self.normpath(path)
|
||||
self.calllstat(path)
|
||||
return self.statcache[path]
|
||||
|
||||
def calllstat(self, path):
|
||||
path = self.normpath(path)
|
||||
if path in self.lstatcache:
|
||||
return self.lstatcache[path]
|
||||
#bb.error("LStatpath:" + path)
|
||||
try:
|
||||
lst = os.lstat(path)
|
||||
self.lstatcache[path] = lst
|
||||
if not statmod.S_ISLNK(lst.st_mode):
|
||||
self.statcache[path] = lst
|
||||
else:
|
||||
self._callstat(path)
|
||||
return lst
|
||||
except (os.error, AttributeError):
|
||||
self.lstatcache[path] = False
|
||||
self.statcache[path] = False
|
||||
return False
|
||||
|
||||
# This follows symbolic links, so both islink() and isdir() can be true
|
||||
# for the same path ono systems that support symlinks
|
||||
def isfile(self, path):
|
||||
"""Test whether a path is a regular file"""
|
||||
st = self.callstat(path)
|
||||
if not st:
|
||||
return False
|
||||
return statmod.S_ISREG(st.st_mode)
|
||||
|
||||
# Is a path a directory?
|
||||
# This follows symbolic links, so both islink() and isdir()
|
||||
# can be true for the same path on systems that support symlinks
|
||||
def isdir(self, s):
|
||||
"""Return true if the pathname refers to an existing directory."""
|
||||
st = self.callstat(s)
|
||||
if not st:
|
||||
return False
|
||||
return statmod.S_ISDIR(st.st_mode)
|
||||
|
||||
def islink(self, path):
|
||||
"""Test whether a path is a symbolic link"""
|
||||
st = self.calllstat(path)
|
||||
if not st:
|
||||
return False
|
||||
return statmod.S_ISLNK(st.st_mode)
|
||||
|
||||
# Does a path exist?
|
||||
# This is false for dangling symbolic links on systems that support them.
|
||||
def exists(self, path):
|
||||
"""Test whether a path exists. Returns False for broken symbolic links"""
|
||||
if self.callstat(path):
|
||||
return True
|
||||
return False
|
||||
|
||||
def lexists(self, path):
|
||||
"""Test whether a path exists. Returns True for broken symbolic links"""
|
||||
if self.calllstat(path):
|
||||
return True
|
||||
return False
|
||||
|
||||
def stat(self, path):
|
||||
return self.callstat(path)
|
||||
|
||||
def lstat(self, path):
|
||||
return self.calllstat(path)
|
||||
|
||||
def walk(self, top, topdown=True, onerror=None, followlinks=False):
|
||||
# Matches os.walk, not os.path.walk()
|
||||
|
||||
# We may not have read permission for top, in which case we can't
|
||||
# get a list of the files the directory contains. os.path.walk
|
||||
# always suppressed the exception then, rather than blow up for a
|
||||
# minor reason when (say) a thousand readable directories are still
|
||||
# left to visit. That logic is copied here.
|
||||
try:
|
||||
# Note that listdir and error are globals in this module due
|
||||
# to earlier import-*.
|
||||
names = os.listdir(top)
|
||||
except error, err:
|
||||
if onerror is not None:
|
||||
onerror(err)
|
||||
return
|
||||
|
||||
dirs, nondirs = [], []
|
||||
for name in names:
|
||||
if self.isdir(os.path.join(top, name)):
|
||||
dirs.append(name)
|
||||
else:
|
||||
nondirs.append(name)
|
||||
|
||||
if topdown:
|
||||
yield top, dirs, nondirs
|
||||
for name in dirs:
|
||||
new_path = os.path.join(top, name)
|
||||
if followlinks or not self.islink(new_path):
|
||||
for x in self.walk(new_path, topdown, onerror, followlinks):
|
||||
yield x
|
||||
if not topdown:
|
||||
yield top, dirs, nondirs
|
||||
|
||||
## realpath() related functions
|
||||
def __is_path_below(self, file, root):
|
||||
return (file + os.path.sep).startswith(root)
|
||||
|
||||
def __realpath_rel(self, start, rel_path, root, loop_cnt, assume_dir):
|
||||
"""Calculates real path of symlink 'start' + 'rel_path' below
|
||||
'root'; no part of 'start' below 'root' must contain symlinks. """
|
||||
have_dir = True
|
||||
|
||||
for d in rel_path.split(os.path.sep):
|
||||
if not have_dir and not assume_dir:
|
||||
raise OSError(errno.ENOENT, "no such directory %s" % start)
|
||||
|
||||
if d == os.path.pardir: # '..'
|
||||
if len(start) >= len(root):
|
||||
# do not follow '..' before root
|
||||
start = os.path.dirname(start)
|
||||
else:
|
||||
# emit warning?
|
||||
pass
|
||||
else:
|
||||
(start, have_dir) = self.__realpath(os.path.join(start, d),
|
||||
root, loop_cnt, assume_dir)
|
||||
|
||||
assert(self.__is_path_below(start, root))
|
||||
|
||||
return start
|
||||
|
||||
def __realpath(self, file, root, loop_cnt, assume_dir):
|
||||
while self.islink(file) and len(file) >= len(root):
|
||||
if loop_cnt == 0:
|
||||
raise OSError(errno.ELOOP, file)
|
||||
|
||||
loop_cnt -= 1
|
||||
target = os.path.normpath(os.readlink(file))
|
||||
|
||||
if not os.path.isabs(target):
|
||||
tdir = os.path.dirname(file)
|
||||
assert(self.__is_path_below(tdir, root))
|
||||
else:
|
||||
tdir = root
|
||||
|
||||
file = self.__realpath_rel(tdir, target, root, loop_cnt, assume_dir)
|
||||
|
||||
try:
|
||||
is_dir = self.isdir(file)
|
||||
except:
|
||||
is_dir = False
|
||||
|
||||
return (file, is_dir)
|
||||
|
||||
def realpath(self, file, root, use_physdir = True, loop_cnt = 100, assume_dir = False):
|
||||
""" Returns the canonical path of 'file' with assuming a
|
||||
toplevel 'root' directory. When 'use_physdir' is set, all
|
||||
preceding path components of 'file' will be resolved first;
|
||||
this flag should be set unless it is guaranteed that there is
|
||||
no symlink in the path. When 'assume_dir' is not set, missing
|
||||
path components will raise an ENOENT error"""
|
||||
|
||||
root = os.path.normpath(root)
|
||||
file = os.path.normpath(file)
|
||||
|
||||
if not root.endswith(os.path.sep):
|
||||
# letting root end with '/' makes some things easier
|
||||
root = root + os.path.sep
|
||||
|
||||
if not self.__is_path_below(file, root):
|
||||
raise OSError(errno.EINVAL, "file '%s' is not below root" % file)
|
||||
|
||||
try:
|
||||
if use_physdir:
|
||||
file = self.__realpath_rel(root, file[(len(root) - 1):], root, loop_cnt, assume_dir)
|
||||
else:
|
||||
file = self.__realpath(file, root, loop_cnt, assume_dir)[0]
|
||||
except OSError, e:
|
||||
if e.errno == errno.ELOOP:
|
||||
# make ELOOP more readable; without catching it, there will
|
||||
# be printed a backtrace with 100s of OSError exceptions
|
||||
# else
|
||||
raise OSError(errno.ELOOP,
|
||||
"too much recursions while resolving '%s'; loop in '%s'" %
|
||||
(file, e.strerror))
|
||||
|
||||
raise
|
||||
|
||||
return file
|
Loading…
Reference in New Issue