package: Add cachedpath optimisation

Currently, various standard library operations like os.walk(),
os.path.isdir() and os.path.islink() each call stat or lstat which
involves a syscall into the kernel. There is no caching since they could
conceivably have changed on disk. The result is that for something like
the do_package task of the kernel we're spending over two minutes making
868,000 individual stat calls for 23,000 files. This is suboptimal.

This patch adds lib/oe/cachedpath.py which are a set of replacement
functions for these operations which use cached stat data rather than
hitting the kernel each time. It gives a nice performance improvement
halving the build time of the kernel do_package.

(From OE-Core rev: 556dee0c4d6d8a87c0cddbd2f60fe5917d009f18)

Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Richard Purdie 2013-03-14 17:26:20 +00:00
parent cec0102647
commit add11fa1ab
3 changed files with 268 additions and 25 deletions

View File

@ -10,7 +10,7 @@ inherit utility-tasks
inherit metadata_scm
inherit logging
OE_IMPORTS += "os sys time oe.path oe.utils oe.data oe.package oe.packagegroup oe.sstatesig oe.lsb"
OE_IMPORTS += "os sys time oe.path oe.utils oe.data oe.package oe.packagegroup oe.sstatesig oe.lsb oe.cachedpath"
OE_IMPORTS[type] = "list"
def oe_import(d):

View File

@ -288,7 +288,7 @@ def copydebugsources(debugsrcdir, d):
basepath = dvar
for p in debugsrcdir.split("/"):
basepath = basepath + "/" + p
if not os.path.exists(basepath):
if not cpath.exists(basepath):
nosuchdir.append(basepath)
bb.utils.mkdirhier(basepath)
@ -388,7 +388,7 @@ python package_do_split_locales() {
localedir = os.path.join(dvar + datadir, 'locale')
if not os.path.isdir(localedir):
if not cpath.isdir(localedir):
bb.debug(1, "No locale files in this package")
return
@ -628,7 +628,7 @@ python fixup_perms () {
continue
origin = dvar + dir
if not (os.path.exists(origin) and os.path.isdir(origin) and not os.path.islink(origin)):
if not (cpath.exists(origin) and cpath.isdir(origin) and not cpath.islink(origin)):
continue
link = fs_perms_table[dir].link
@ -654,7 +654,7 @@ python fixup_perms () {
continue
origin = dvar + dir
if not (os.path.exists(origin) and os.path.isdir(origin)):
if not (cpath.exists(origin) and cpath.isdir(origin)):
continue
fix_perms(origin, fs_perms_table[dir].mode, fs_perms_table[dir].uid, fs_perms_table[dir].gid, dir)
@ -735,7 +735,7 @@ python split_and_strip_files () {
baselibdir = os.path.abspath(dvar + os.sep + d.getVar("base_libdir", True))
if (d.getVar('INHIBIT_PACKAGE_DEBUG_SPLIT', True) != '1') and \
(d.getVar('INHIBIT_PACKAGE_STRIP', True) != '1'):
for root, dirs, files in os.walk(dvar):
for root, dirs, files in cpath.walk(dvar):
for f in files:
file = os.path.join(root, f)
if file.endswith(".ko") and file.find("/lib/modules/") != -1:
@ -749,18 +749,20 @@ python split_and_strip_files () {
continue
try:
ltarget = oe.path.realpath(file, dvar, False)
s = os.lstat(ltarget)
ltarget = cpath.realpath(file, dvar, False)
s = cpath.lstat(ltarget)
except OSError, (err, strerror):
if err != errno.ENOENT:
raise
# Skip broken symlinks
continue
if not s:
continue
# Check its an excutable
if (s[stat.ST_MODE] & stat.S_IXUSR) or (s[stat.ST_MODE] & stat.S_IXGRP) or (s[stat.ST_MODE] & stat.S_IXOTH) \
or ((file.startswith(libdir) or file.startswith(baselibdir)) and ".so" in f):
# If it's a symlink, and points to an ELF file, we capture the readlink target
if os.path.islink(file):
if cpath.islink(file):
target = os.readlink(file)
if isELF(ltarget):
#bb.note("Sym: %s (%d)" % (ltarget, isELF(ltarget)))
@ -918,8 +920,8 @@ python populate_packages () {
for file in files:
if os.path.isabs(file):
file = '.' + file
if not os.path.islink(file):
if os.path.isdir(file):
if not cpath.islink(file):
if cpath.isdir(file):
newfiles = [ os.path.join(file,x) for x in os.listdir(file) ]
if newfiles:
files += newfiles
@ -929,7 +931,7 @@ python populate_packages () {
if [ file ] != globbed:
files += globbed
continue
if (not os.path.islink(file)) and (not os.path.exists(file)):
if (not cpath.islink(file)) and (not cpath.exists(file)):
continue
if file in seen:
continue
@ -938,33 +940,33 @@ python populate_packages () {
def mkdir(src, dest, p):
src = os.path.join(src, p)
dest = os.path.join(dest, p)
bb.utils.mkdirhier(dest)
fstat = os.stat(src)
os.chmod(dest, fstat.st_mode)
fstat = cpath.stat(src)
os.mkdir(dest, fstat.st_mode)
os.chown(dest, fstat.st_uid, fstat.st_gid)
if p not in seen:
seen.append(p)
cpath.updatecache(dest)
def mkdir_recurse(src, dest, paths):
if os.path.exists(dest + '/' + paths):
if cpath.exists(dest + '/' + paths):
return
while paths.startswith("./"):
paths = paths[2:]
p = "."
for c in paths.split("/"):
p = os.path.join(p, c)
if not os.path.exists(os.path.join(dest, p)):
if not cpath.exists(os.path.join(dest, p)):
mkdir(src, dest, p)
if os.path.isdir(file) and not os.path.islink(file):
if cpath.isdir(file) and not cpath.islink(file):
mkdir_recurse(dvar, root, file)
continue
mkdir_recurse(dvar, root, os.path.dirname(file))
fpath = os.path.join(root,file)
if not os.path.islink(file):
if not cpath.islink(file):
os.link(file, fpath)
fstat = os.stat(file)
fstat = cpath.stat(file)
os.chmod(fpath, fstat.st_mode)
os.chown(fpath, fstat.st_uid, fstat.st_gid)
continue
@ -975,7 +977,7 @@ python populate_packages () {
os.chdir(workdir)
unshipped = []
for root, dirs, files in os.walk(dvar):
for root, dirs, files in cpath.walk(dvar):
dir = root[len(dvar):]
if not dir:
dir = os.sep
@ -1009,8 +1011,8 @@ python package_fixsymlinks () {
for path in pkgfiles[pkg]:
rpath = path[len(inst_root):]
pkg_files[pkg].append(rpath)
rtarget = oe.path.realpath(path, inst_root, True, assume_dir = True)
if not os.path.lexists(rtarget):
rtarget = cpath.realpath(path, inst_root, True, assume_dir = True)
if not cpath.lexists(rtarget):
dangling_links[pkg].append(os.path.normpath(rtarget[len(inst_root):]))
newrdepends = {}
@ -1394,7 +1396,7 @@ python package_do_shlibs() {
renames = list()
for file in pkgfiles[pkg]:
soname = None
if os.path.islink(file):
if cpath.islink(file):
continue
if targetos == "darwin" or targetos == "darwin8":
darwin_so(file)
@ -1781,6 +1783,10 @@ python do_package () {
# as any change to rpmdeps requires this to be rerun.
# PACKAGE_BBCLASS_VERSION = "1"
# Init cachedpath
global cpath
cpath = oe.cachedpath.CachedPath()
###########################################################################
# Sanity test the setup
###########################################################################
@ -1827,6 +1833,8 @@ python do_package () {
# Split up PKGD into PKGDEST
###########################################################################
cpath = oe.cachedpath.CachedPath()
for f in (d.getVar('PACKAGESPLITFUNCS', True) or '').split():
bb.build.exec_func(f, d)
@ -1841,7 +1849,7 @@ python do_package () {
pkgdest = d.getVar('PKGDEST', True)
for pkg in packages:
pkgfiles[pkg] = []
for walkroot, dirs, files in os.walk(pkgdest + "/" + pkg):
for walkroot, dirs, files in cpath.walk(pkgdest + "/" + pkg):
for file in files:
pkgfiles[pkg].append(walkroot + os.sep + file)

235
meta/lib/oe/cachedpath.py Normal file
View File

@ -0,0 +1,235 @@
#
# Based on standard python library functions but avoid
# repeated stat calls. Its assumed the files will not change from under us
# so we can cache stat calls.
#
import os
import errno
import stat as statmod
class CachedPath(object):
def __init__(self):
self.statcache = {}
self.lstatcache = {}
self.normpathcache = {}
return
def updatecache(self, x):
x = self.normpath(x)
if x in self.statcache:
del self.statcache[x]
if x in self.lstatcache:
del self.lstatcache[x]
def normpath(self, path):
if path in self.normpathcache:
return self.normpathcache[path]
newpath = os.path.normpath(path)
self.normpathcache[path] = newpath
return newpath
def _callstat(self, path):
if path in self.statcache:
return self.statcache[path]
try:
st = os.stat(path)
self.statcache[path] = st
return st
except os.error:
self.statcache[path] = False
return False
# We might as well call lstat and then only
# call stat as well in the symbolic link case
# since this turns out to be much more optimal
# in real world usage of this cache
def callstat(self, path):
path = self.normpath(path)
self.calllstat(path)
return self.statcache[path]
def calllstat(self, path):
path = self.normpath(path)
if path in self.lstatcache:
return self.lstatcache[path]
#bb.error("LStatpath:" + path)
try:
lst = os.lstat(path)
self.lstatcache[path] = lst
if not statmod.S_ISLNK(lst.st_mode):
self.statcache[path] = lst
else:
self._callstat(path)
return lst
except (os.error, AttributeError):
self.lstatcache[path] = False
self.statcache[path] = False
return False
# This follows symbolic links, so both islink() and isdir() can be true
# for the same path ono systems that support symlinks
def isfile(self, path):
"""Test whether a path is a regular file"""
st = self.callstat(path)
if not st:
return False
return statmod.S_ISREG(st.st_mode)
# Is a path a directory?
# This follows symbolic links, so both islink() and isdir()
# can be true for the same path on systems that support symlinks
def isdir(self, s):
"""Return true if the pathname refers to an existing directory."""
st = self.callstat(s)
if not st:
return False
return statmod.S_ISDIR(st.st_mode)
def islink(self, path):
"""Test whether a path is a symbolic link"""
st = self.calllstat(path)
if not st:
return False
return statmod.S_ISLNK(st.st_mode)
# Does a path exist?
# This is false for dangling symbolic links on systems that support them.
def exists(self, path):
"""Test whether a path exists. Returns False for broken symbolic links"""
if self.callstat(path):
return True
return False
def lexists(self, path):
"""Test whether a path exists. Returns True for broken symbolic links"""
if self.calllstat(path):
return True
return False
def stat(self, path):
return self.callstat(path)
def lstat(self, path):
return self.calllstat(path)
def walk(self, top, topdown=True, onerror=None, followlinks=False):
# Matches os.walk, not os.path.walk()
# We may not have read permission for top, in which case we can't
# get a list of the files the directory contains. os.path.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
try:
# Note that listdir and error are globals in this module due
# to earlier import-*.
names = os.listdir(top)
except error, err:
if onerror is not None:
onerror(err)
return
dirs, nondirs = [], []
for name in names:
if self.isdir(os.path.join(top, name)):
dirs.append(name)
else:
nondirs.append(name)
if topdown:
yield top, dirs, nondirs
for name in dirs:
new_path = os.path.join(top, name)
if followlinks or not self.islink(new_path):
for x in self.walk(new_path, topdown, onerror, followlinks):
yield x
if not topdown:
yield top, dirs, nondirs
## realpath() related functions
def __is_path_below(self, file, root):
return (file + os.path.sep).startswith(root)
def __realpath_rel(self, start, rel_path, root, loop_cnt, assume_dir):
"""Calculates real path of symlink 'start' + 'rel_path' below
'root'; no part of 'start' below 'root' must contain symlinks. """
have_dir = True
for d in rel_path.split(os.path.sep):
if not have_dir and not assume_dir:
raise OSError(errno.ENOENT, "no such directory %s" % start)
if d == os.path.pardir: # '..'
if len(start) >= len(root):
# do not follow '..' before root
start = os.path.dirname(start)
else:
# emit warning?
pass
else:
(start, have_dir) = self.__realpath(os.path.join(start, d),
root, loop_cnt, assume_dir)
assert(self.__is_path_below(start, root))
return start
def __realpath(self, file, root, loop_cnt, assume_dir):
while self.islink(file) and len(file) >= len(root):
if loop_cnt == 0:
raise OSError(errno.ELOOP, file)
loop_cnt -= 1
target = os.path.normpath(os.readlink(file))
if not os.path.isabs(target):
tdir = os.path.dirname(file)
assert(self.__is_path_below(tdir, root))
else:
tdir = root
file = self.__realpath_rel(tdir, target, root, loop_cnt, assume_dir)
try:
is_dir = self.isdir(file)
except:
is_dir = False
return (file, is_dir)
def realpath(self, file, root, use_physdir = True, loop_cnt = 100, assume_dir = False):
""" Returns the canonical path of 'file' with assuming a
toplevel 'root' directory. When 'use_physdir' is set, all
preceding path components of 'file' will be resolved first;
this flag should be set unless it is guaranteed that there is
no symlink in the path. When 'assume_dir' is not set, missing
path components will raise an ENOENT error"""
root = os.path.normpath(root)
file = os.path.normpath(file)
if not root.endswith(os.path.sep):
# letting root end with '/' makes some things easier
root = root + os.path.sep
if not self.__is_path_below(file, root):
raise OSError(errno.EINVAL, "file '%s' is not below root" % file)
try:
if use_physdir:
file = self.__realpath_rel(root, file[(len(root) - 1):], root, loop_cnt, assume_dir)
else:
file = self.__realpath(file, root, loop_cnt, assume_dir)[0]
except OSError, e:
if e.errno == errno.ELOOP:
# make ELOOP more readable; without catching it, there will
# be printed a backtrace with 100s of OSError exceptions
# else
raise OSError(errno.ELOOP,
"too much recursions while resolving '%s'; loop in '%s'" %
(file, e.strerror))
raise
return file