diff options
Diffstat (limited to 'waflib/extras/fast_partial.py')
m--------- | waflib | 0 | ||||
-rw-r--r-- | waflib/extras/fast_partial.py | 518 |
2 files changed, 0 insertions, 518 deletions
diff --git a/waflib b/waflib new file mode 160000 +Subproject 2314e236ca6e7d94a26c3c17091da0f25f5867f diff --git a/waflib/extras/fast_partial.py b/waflib/extras/fast_partial.py deleted file mode 100644 index d5b61448..00000000 --- a/waflib/extras/fast_partial.py +++ /dev/null @@ -1,518 +0,0 @@ -#! /usr/bin/env python -# encoding: utf-8 -# Thomas Nagy, 2017-2018 (ita) - -""" -A system for fast partial rebuilds - -Creating a large amount of task objects up front can take some time. -By making a few assumptions, it is possible to avoid posting creating -task objects for targets that are already up-to-date. - -On a silly benchmark the gain observed for 1M tasks can be 5m->10s -for a single file change. - -Usage:: - - def options(opt): - opt.load('fast_partial') - -Assumptions: -* Mostly for C/C++/Fortran targets with link tasks (object-only targets are not handled) -* For full project builds: no --targets and no pruning from subfolders -* The installation phase is ignored -* `use=` dependencies are specified up front even across build groups -* Task generator source files are not obtained from globs - -Implementation details: -* The first layer obtains file timestamps to recalculate file hashes only - when necessary (similar to md5_tstamp); the timestamps are then stored - in a dedicated pickle file -* A second layer associates each task generator to a file set to help - detecting changes. Task generators are to create their tasks only when - the related files have been modified. A specific db file is created - to store such data (5m -> 1m10) -* A third layer binds build context proxies onto task generators, replacing - the default context. While loading data for the full build uses more memory - (4GB -> 9GB), partial builds are then much faster (1m10 -> 13s) -* A fourth layer enables a 2-level cache on file signatures to - reduce the size of the main pickle file (13s -> 10s) -""" - -import os -from waflib import Build, Context, Errors, Logs, Task, TaskGen, Utils -from waflib.TaskGen import feature, after_method, taskgen_method -import waflib.Node - -DONE = 0 -DIRTY = 1 -NEEDED = 2 - -SKIPPABLE = ['cshlib', 'cxxshlib', 'cstlib', 'cxxstlib', 'cprogram', 'cxxprogram'] - -TSTAMP_DB = '.wafpickle_tstamp_db_file' - -SAVED_ATTRS = 'root node_sigs task_sigs imp_sigs raw_deps node_deps'.split() - -class bld_proxy(object): - def __init__(self, bld): - object.__setattr__(self, 'bld', bld) - - object.__setattr__(self, 'node_class', type('Nod3', (waflib.Node.Node,), {})) - self.node_class.__module__ = 'waflib.Node' - self.node_class.ctx = self - - object.__setattr__(self, 'root', self.node_class('', None)) - for x in SAVED_ATTRS: - if x != 'root': - object.__setattr__(self, x, {}) - - self.fix_nodes() - - def __setattr__(self, name, value): - bld = object.__getattribute__(self, 'bld') - setattr(bld, name, value) - - def __delattr__(self, name): - bld = object.__getattribute__(self, 'bld') - delattr(bld, name) - - def __getattribute__(self, name): - try: - return object.__getattribute__(self, name) - except AttributeError: - bld = object.__getattribute__(self, 'bld') - return getattr(bld, name) - - def __call__(self, *k, **kw): - return self.bld(*k, **kw) - - def fix_nodes(self): - for x in ('srcnode', 'path', 'bldnode'): - node = self.root.find_dir(getattr(self.bld, x).abspath()) - object.__setattr__(self, x, node) - - def set_key(self, store_key): - object.__setattr__(self, 'store_key', store_key) - - def fix_tg_path(self, *tgs): - # changing Node objects on task generators is possible - # yet, all Node objects must belong to the same parent - for tg in tgs: - tg.path = self.root.make_node(tg.path.abspath()) - - def restore(self): - dbfn = os.path.join(self.variant_dir, Context.DBFILE + self.store_key) - Logs.debug('rev_use: reading %s', dbfn) - try: - data = Utils.readf(dbfn, 'rb') - except (EnvironmentError, EOFError): - # handle missing file/empty file - Logs.debug('rev_use: Could not load the build cache %s (missing)', dbfn) - else: - try: - waflib.Node.pickle_lock.acquire() - waflib.Node.Nod3 = self.node_class - try: - data = Build.cPickle.loads(data) - except Exception as e: - Logs.debug('rev_use: Could not pickle the build cache %s: %r', dbfn, e) - else: - for x in SAVED_ATTRS: - object.__setattr__(self, x, data.get(x, {})) - finally: - waflib.Node.pickle_lock.release() - self.fix_nodes() - - def store(self): - data = {} - for x in Build.SAVED_ATTRS: - data[x] = getattr(self, x) - db = os.path.join(self.variant_dir, Context.DBFILE + self.store_key) - - try: - waflib.Node.pickle_lock.acquire() - waflib.Node.Nod3 = self.node_class - x = Build.cPickle.dumps(data, Build.PROTOCOL) - finally: - waflib.Node.pickle_lock.release() - - Logs.debug('rev_use: storing %s', db) - Utils.writef(db + '.tmp', x, m='wb') - try: - st = os.stat(db) - os.remove(db) - if not Utils.is_win32: - os.chown(db + '.tmp', st.st_uid, st.st_gid) - except (AttributeError, OSError): - pass - os.rename(db + '.tmp', db) - -class bld(Build.BuildContext): - def __init__(self, **kw): - super(bld, self).__init__(**kw) - self.hashes_md5_tstamp = {} - - def __call__(self, *k, **kw): - # this is one way of doing it, one could use a task generator method too - bld = kw['bld'] = bld_proxy(self) - ret = TaskGen.task_gen(*k, **kw) - self.task_gen_cache_names = {} - self.add_to_group(ret, group=kw.get('group')) - ret.bld = bld - bld.set_key(ret.path.abspath().replace(os.sep, '') + str(ret.idx)) - return ret - - def is_dirty(self): - return True - - def store_tstamps(self): - # Called after a build is finished - # For each task generator, record all files involved in task objects - # optimization: done only if there was something built - do_store = False - try: - f_deps = self.f_deps - except AttributeError: - f_deps = self.f_deps = {} - self.f_tstamps = {} - - allfiles = set() - for g in self.groups: - for tg in g: - try: - staleness = tg.staleness - except AttributeError: - staleness = DIRTY - - if staleness != DIRTY: - # DONE case: there was nothing built - # NEEDED case: the tg was brought in because of 'use' propagation - # but nothing really changed for them, there may be incomplete - # tasks (object files) and in this case it is best to let the next build - # figure out if an input/output file changed - continue - - do_cache = False - for tsk in tg.tasks: - if tsk.hasrun == Task.SUCCESS: - do_cache = True - pass - elif tsk.hasrun == Task.SKIPPED: - pass - else: - # one failed task, clear the cache for this tg - try: - del f_deps[(tg.path.abspath(), tg.idx)] - except KeyError: - pass - else: - # just store the new state because there is a change - do_store = True - - # skip the rest because there is no valid cache possible - break - else: - if not do_cache: - # all skipped, but is there anything in cache? - try: - f_deps[(tg.path.abspath(), tg.idx)] - except KeyError: - # probably cleared because a wscript file changed - # store it - do_cache = True - - if do_cache: - - # there was a rebuild, store the data structure too - tg.bld.store() - - # all tasks skipped but no cache - # or a successful task build - do_store = True - st = set() - for tsk in tg.tasks: - st.update(tsk.inputs) - st.update(self.node_deps.get(tsk.uid(), [])) - - # TODO do last/when loading the tgs? - lst = [] - for k in ('wscript', 'wscript_build'): - n = tg.path.find_node(k) - if n: - n.get_bld_sig() - lst.append(n.abspath()) - - lst.extend(sorted(x.abspath() for x in st)) - allfiles.update(lst) - f_deps[(tg.path.abspath(), tg.idx)] = lst - - for x in allfiles: - # f_tstamps has everything, while md5_tstamp can be relatively empty on partial builds - self.f_tstamps[x] = self.hashes_md5_tstamp[x][0] - - if do_store: - dbfn = os.path.join(self.variant_dir, TSTAMP_DB) - Logs.debug('rev_use: storing %s', dbfn) - dbfn_tmp = dbfn + '.tmp' - x = Build.cPickle.dumps([self.f_tstamps, f_deps], Build.PROTOCOL) - Utils.writef(dbfn_tmp, x, m='wb') - os.rename(dbfn_tmp, dbfn) - Logs.debug('rev_use: stored %s', dbfn) - - def store(self): - self.store_tstamps() - if self.producer.dirty: - Build.BuildContext.store(self) - - def compute_needed_tgs(self): - # assume the 'use' keys are not modified during the build phase - - dbfn = os.path.join(self.variant_dir, TSTAMP_DB) - Logs.debug('rev_use: Loading %s', dbfn) - try: - data = Utils.readf(dbfn, 'rb') - except (EnvironmentError, EOFError): - Logs.debug('rev_use: Could not load the build cache %s (missing)', dbfn) - self.f_deps = {} - self.f_tstamps = {} - else: - try: - self.f_tstamps, self.f_deps = Build.cPickle.loads(data) - except Exception as e: - Logs.debug('rev_use: Could not pickle the build cache %s: %r', dbfn, e) - self.f_deps = {} - self.f_tstamps = {} - else: - Logs.debug('rev_use: Loaded %s', dbfn) - - - # 1. obtain task generators that contain rebuilds - # 2. obtain the 'use' graph and its dual - stales = set() - reverse_use_map = Utils.defaultdict(list) - use_map = Utils.defaultdict(list) - - for g in self.groups: - for tg in g: - if tg.is_stale(): - stales.add(tg) - - try: - lst = tg.use = Utils.to_list(tg.use) - except AttributeError: - pass - else: - for x in lst: - try: - xtg = self.get_tgen_by_name(x) - except Errors.WafError: - pass - else: - use_map[tg].append(xtg) - reverse_use_map[xtg].append(tg) - - Logs.debug('rev_use: found %r stale tgs', len(stales)) - - # 3. dfs to post downstream tg as stale - visited = set() - def mark_down(tg): - if tg in visited: - return - visited.add(tg) - Logs.debug('rev_use: marking down %r as stale', tg.name) - tg.staleness = DIRTY - for x in reverse_use_map[tg]: - mark_down(x) - for tg in stales: - mark_down(tg) - - # 4. dfs to find ancestors tg to mark as needed - self.needed_tgs = needed_tgs = set() - def mark_needed(tg): - if tg in needed_tgs: - return - needed_tgs.add(tg) - if tg.staleness == DONE: - Logs.debug('rev_use: marking up %r as needed', tg.name) - tg.staleness = NEEDED - for x in use_map[tg]: - mark_needed(x) - for xx in visited: - mark_needed(xx) - - # so we have the whole tg trees to post in the set "needed" - # load their build trees - for tg in needed_tgs: - tg.bld.restore() - tg.bld.fix_tg_path(tg) - - # the stale ones should be fully build, while the needed ones - # may skip a few tasks, see create_compiled_task and apply_link_after below - Logs.debug('rev_use: amount of needed task gens: %r', len(needed_tgs)) - - def post_group(self): - # assumption: we can ignore the folder/subfolders cuts - def tgpost(tg): - try: - f = tg.post - except AttributeError: - pass - else: - f() - - if not self.targets or self.targets == '*': - for tg in self.groups[self.current_group]: - # this can cut quite a lot of tg objects - if tg in self.needed_tgs: - tgpost(tg) - else: - # default implementation - return Build.BuildContext.post_group() - - def get_build_iterator(self): - if not self.targets or self.targets == '*': - self.compute_needed_tgs() - return Build.BuildContext.get_build_iterator(self) - -@taskgen_method -def is_stale(self): - # assume no globs - self.staleness = DIRTY - - # 1. the case of always stale targets - if getattr(self, 'always_stale', False): - return True - - # 2. check if the db file exists - db = os.path.join(self.bld.variant_dir, Context.DBFILE) - try: - dbstat = os.stat(db).st_mtime - except OSError: - Logs.debug('rev_use: must post %r because this is a clean build') - return True - - # 3. check if the configuration changed - if os.stat(self.bld.bldnode.find_node('c4che/build.config.py').abspath()).st_mtime > dbstat: - Logs.debug('rev_use: must post %r because the configuration has changed', self.name) - return True - - # 3.a any tstamp data? - try: - f_deps = self.bld.f_deps - except AttributeError: - Logs.debug('rev_use: must post %r because there is no f_deps', self.name) - return True - - # 4. check if this is the first build (no cache) - try: - lst = f_deps[(self.path.abspath(), self.idx)] - except KeyError: - Logs.debug('rev_use: must post %r because there it has no cached data', self.name) - return True - - try: - cache = self.bld.cache_tstamp_rev_use - except AttributeError: - cache = self.bld.cache_tstamp_rev_use = {} - - # 5. check the timestamp of each dependency files listed is unchanged - f_tstamps = self.bld.f_tstamps - for x in lst: - try: - old_ts = f_tstamps[x] - except KeyError: - Logs.debug('rev_use: must post %r because %r is not in cache', self.name, x) - return True - - try: - try: - ts = cache[x] - except KeyError: - ts = cache[x] = os.stat(x).st_mtime - except OSError: - del f_deps[(self.path.abspath(), self.idx)] - Logs.debug('rev_use: must post %r because %r does not exist anymore', self.name, x) - return True - else: - if ts != old_ts: - Logs.debug('rev_use: must post %r because the timestamp on %r changed %r %r', self.name, x, old_ts, ts) - return True - - self.staleness = DONE - return False - -@taskgen_method -def create_compiled_task(self, name, node): - # skip the creation of object files - # assumption: object-only targets are not skippable - if self.staleness == NEEDED: - # only libraries/programs can skip object files - for x in SKIPPABLE: - if x in self.features: - return None - - out = '%s.%d.o' % (node.name, self.idx) - task = self.create_task(name, node, node.parent.find_or_declare(out)) - try: - self.compiled_tasks.append(task) - except AttributeError: - self.compiled_tasks = [task] - return task - -@feature(*SKIPPABLE) -@after_method('apply_link') -def apply_link_after(self): - # cprogram/cxxprogram might be unnecessary - if self.staleness != NEEDED: - return - for tsk in self.tasks: - tsk.hasrun = Task.SKIPPED - -def path_from(self, node): - # handle nodes of distinct types - if node.ctx is not self.ctx: - node = self.ctx.root.make_node(node.abspath()) - return self.default_path_from(node) -waflib.Node.Node.default_path_from = waflib.Node.Node.path_from -waflib.Node.Node.path_from = path_from - -def h_file(self): - # similar to md5_tstamp.py, but with 2-layer cache - # global_cache for the build context common for all task generators - # local_cache for the build context proxy (one by task generator) - # - # the global cache is not persistent - # the local cache is persistent and meant for partial builds - # - # assume all calls are made from a single thread - # - filename = self.abspath() - st = os.stat(filename) - - global_cache = self.ctx.bld.hashes_md5_tstamp - local_cache = self.ctx.hashes_md5_tstamp - - if filename in global_cache: - # value already calculated in this build - cval = global_cache[filename] - - # the value in global cache is assumed to be calculated once - # reverifying it could cause task generators - # to get distinct tstamp values, thus missing rebuilds - local_cache[filename] = cval - return cval[1] - - if filename in local_cache: - cval = local_cache[filename] - if cval[0] == st.st_mtime: - # correct value from a previous build - # put it in the global cache - global_cache[filename] = cval - return cval[1] - - ret = Utils.h_file(filename) - local_cache[filename] = global_cache[filename] = (st.st_mtime, ret) - return ret -waflib.Node.Node.h_file = h_file - |