From efdc5871d0ba68e364c2d7675a0b4b0965d0130c Mon Sep 17 00:00:00 2001 From: David Robillard Date: Thu, 2 Apr 2020 19:23:06 +0200 Subject: Update to waf 2.0.19 --- extras/clang_compilation_database.py | 172 ++++++++---- extras/javatest.py | 119 +++++++- extras/msvc_pdb.py | 46 +++ extras/pytest.py | 17 +- extras/wafcache.py | 524 +++++++++++++++++++++++++++++++++++ 5 files changed, 808 insertions(+), 70 deletions(-) create mode 100644 extras/msvc_pdb.py create mode 100644 extras/wafcache.py (limited to 'extras') diff --git a/extras/clang_compilation_database.py b/extras/clang_compilation_database.py index 4d9b5e2..1398b0a 100644 --- a/extras/clang_compilation_database.py +++ b/extras/clang_compilation_database.py @@ -1,6 +1,7 @@ #!/usr/bin/env python # encoding: utf-8 # Christoph Koke, 2013 +# Alibek Omarov, 2019 """ Writes the c and cpp compile commands into build/compile_commands.json @@ -8,14 +9,23 @@ see http://clang.llvm.org/docs/JSONCompilationDatabase.html Usage: - def configure(conf): - conf.load('compiler_cxx') - ... - conf.load('clang_compilation_database') + Load this tool in `options` to be able to generate database + by request in command-line and before build: + + $ waf clangdb + + def options(opt): + opt.load('clang_compilation_database') + + Otherwise, load only in `configure` to generate it always before build. + + def configure(conf): + conf.load('compiler_cxx') + ... + conf.load('clang_compilation_database') """ -import sys, os, json, shlex, pipes -from waflib import Logs, TaskGen, Task +from waflib import Logs, TaskGen, Task, Build, Scripting Task.Task.keep_last_cmd = True @@ -23,63 +33,103 @@ Task.Task.keep_last_cmd = True @TaskGen.after_method('process_use') def collect_compilation_db_tasks(self): "Add a compilation database entry for compiled tasks" - try: - clang_db = self.bld.clang_compilation_database_tasks - except AttributeError: - clang_db = self.bld.clang_compilation_database_tasks = [] - self.bld.add_post_fun(write_compilation_database) + if not isinstance(self.bld, ClangDbContext): + return tup = tuple(y for y in [Task.classes.get(x) for x in ('c', 'cxx')] if y) for task in getattr(self, 'compiled_tasks', []): if isinstance(task, tup): - clang_db.append(task) - -def write_compilation_database(ctx): - "Write the clang compilation database as JSON" - database_file = ctx.bldnode.make_node('compile_commands.json') - Logs.info('Build commands will be stored in %s', database_file.path_from(ctx.path)) - try: - root = json.load(database_file) - except IOError: - root = [] - clang_db = dict((x['file'], x) for x in root) - for task in getattr(ctx, 'clang_compilation_database_tasks', []): + self.bld.clang_compilation_database_tasks.append(task) + +class ClangDbContext(Build.BuildContext): + '''generates compile_commands.json by request''' + cmd = 'clangdb' + clang_compilation_database_tasks = [] + + def write_compilation_database(self): + """ + Write the clang compilation database as JSON + """ + database_file = self.bldnode.make_node('compile_commands.json') + Logs.info('Build commands will be stored in %s', database_file.path_from(self.path)) try: - cmd = task.last_cmd - except AttributeError: - continue - directory = getattr(task, 'cwd', ctx.variant_dir) - f_node = task.inputs[0] - filename = os.path.relpath(f_node.abspath(), directory) - entry = { - "directory": directory, - "arguments": cmd, - "file": filename, - } - clang_db[filename] = entry - root = list(clang_db.values()) - database_file.write(json.dumps(root, indent=2)) - -# Override the runnable_status function to do a dummy/dry run when the file doesn't need to be compiled. -# This will make sure compile_commands.json is always fully up to date. -# Previously you could end up with a partial compile_commands.json if the build failed. -for x in ('c', 'cxx'): - if x not in Task.classes: - continue - - t = Task.classes[x] - - def runnable_status(self): - def exec_command(cmd, **kw): - pass - - run_status = self.old_runnable_status() - if run_status == Task.SKIP_ME: - setattr(self, 'old_exec_command', getattr(self, 'exec_command', None)) - setattr(self, 'exec_command', exec_command) - self.run() - setattr(self, 'exec_command', getattr(self, 'old_exec_command', None)) - return run_status - - setattr(t, 'old_runnable_status', getattr(t, 'runnable_status', None)) - setattr(t, 'runnable_status', runnable_status) + root = database_file.read_json() + except IOError: + root = [] + clang_db = dict((x['file'], x) for x in root) + for task in self.clang_compilation_database_tasks: + try: + cmd = task.last_cmd + except AttributeError: + continue + f_node = task.inputs[0] + filename = f_node.path_from(task.get_cwd()) + entry = { + "directory": task.get_cwd().abspath(), + "arguments": cmd, + "file": filename, + } + clang_db[filename] = entry + root = list(clang_db.values()) + database_file.write_json(root) + + def execute(self): + """ + Build dry run + """ + self.restore() + + if not self.all_envs: + self.load_envs() + + self.recurse([self.run_dir]) + self.pre_build() + + # we need only to generate last_cmd, so override + # exec_command temporarily + def exec_command(self, *k, **kw): + return 0 + + for g in self.groups: + for tg in g: + try: + f = tg.post + except AttributeError: + pass + else: + f() + + if isinstance(tg, Task.Task): + lst = [tg] + else: lst = tg.tasks + for tsk in lst: + tup = tuple(y for y in [Task.classes.get(x) for x in ('c', 'cxx')] if y) + if isinstance(tsk, tup): + old_exec = tsk.exec_command + tsk.exec_command = exec_command + tsk.run() + tsk.exec_command = old_exec + + self.write_compilation_database() + +EXECUTE_PATCHED = False +def patch_execute(): + global EXECUTE_PATCHED + + if EXECUTE_PATCHED: + return + + def new_execute_build(self): + """ + Invoke clangdb command before build + """ + if type(self) == Build.BuildContext: + Scripting.run_command('clangdb') + + old_execute_build(self) + + old_execute_build = getattr(Build.BuildContext, 'execute_build', None) + setattr(Build.BuildContext, 'execute_build', new_execute_build) + EXECUTE_PATCHED = True + +patch_execute() diff --git a/extras/javatest.py b/extras/javatest.py index 979b8d8..f3c6cbf 100755 --- a/extras/javatest.py +++ b/extras/javatest.py @@ -1,6 +1,6 @@ #! /usr/bin/env python # encoding: utf-8 -# Federico Pellegrin, 2017 (fedepell) +# Federico Pellegrin, 2019 (fedepell) """ Provides Java Unit test support using :py:class:`waflib.Tools.waf_unit_test.utest` @@ -11,6 +11,10 @@ standard waf unit test environment. It has been tested with TestNG and JUnit but should be easily expandable to other frameworks given the flexibility of ut_str provided by the standard waf unit test environment. +The extra takes care also of managing non-java dependencies (ie. C/C++ libraries +using JNI or Python modules via JEP) and setting up the environment needed to run +them. + Example usage: def options(opt): @@ -20,15 +24,15 @@ def configure(conf): conf.load('java javatest') def build(bld): - + [ ... mainprog is built here ... ] bld(features = 'javac javatest', - srcdir = 'test/', - outdir = 'test', + srcdir = 'test/', + outdir = 'test', sourcepath = ['test'], - classpath = [ 'src' ], - basedir = 'test', + classpath = [ 'src' ], + basedir = 'test', use = ['JAVATEST', 'mainprog'], # mainprog is the program being tested in src/ ut_str = 'java -cp ${CLASSPATH} ${JTRUNNER} ${SRC}', jtest_source = bld.path.ant_glob('test/*.xml'), @@ -53,10 +57,91 @@ The runner class presence on the system is checked for at configuration stage. """ import os -from waflib import Task, TaskGen, Options +from waflib import Task, TaskGen, Options, Errors, Utils, Logs +from waflib.Tools import ccroot + +def _process_use_rec(self, name): + """ + Recursively process ``use`` for task generator with name ``name``.. + Used by javatest_process_use. + """ + if name in self.javatest_use_not or name in self.javatest_use_seen: + return + try: + tg = self.bld.get_tgen_by_name(name) + except Errors.WafError: + self.javatest_use_not.add(name) + return + + self.javatest_use_seen.append(name) + tg.post() + + for n in self.to_list(getattr(tg, 'use', [])): + _process_use_rec(self, n) @TaskGen.feature('javatest') -@TaskGen.after_method('apply_java', 'use_javac_files', 'set_classpath') +@TaskGen.after_method('process_source', 'apply_link', 'use_javac_files') +def javatest_process_use(self): + """ + Process the ``use`` attribute which contains a list of task generator names and store + paths that later is used to populate the unit test runtime environment. + """ + self.javatest_use_not = set() + self.javatest_use_seen = [] + self.javatest_libpaths = [] # strings or Nodes + self.javatest_pypaths = [] # strings or Nodes + self.javatest_dep_nodes = [] + + names = self.to_list(getattr(self, 'use', [])) + for name in names: + _process_use_rec(self, name) + + def extend_unique(lst, varlst): + ext = [] + for x in varlst: + if x not in lst: + ext.append(x) + lst.extend(ext) + + # Collect type specific info needed to construct a valid runtime environment + # for the test. + for name in self.javatest_use_seen: + tg = self.bld.get_tgen_by_name(name) + + # Python-Java embedding crosstools such as JEP + if 'py' in tg.features: + # Python dependencies are added to PYTHONPATH + pypath = getattr(tg, 'install_from', tg.path) + + if 'buildcopy' in tg.features: + # Since buildcopy is used we assume that PYTHONPATH in build should be used, + # not source + extend_unique(self.javatest_pypaths, [pypath.get_bld().abspath()]) + + # Add buildcopy output nodes to dependencies + extend_unique(self.javatest_dep_nodes, [o for task in getattr(tg, 'tasks', []) for o in getattr(task, 'outputs', [])]) + else: + # If buildcopy is not used, depend on sources instead + extend_unique(self.javatest_dep_nodes, tg.source) + extend_unique(self.javatest_pypaths, [pypath.abspath()]) + + + if getattr(tg, 'link_task', None): + # For tasks with a link_task (C, C++, D et.c.) include their library paths: + if not isinstance(tg.link_task, ccroot.stlink_task): + extend_unique(self.javatest_dep_nodes, tg.link_task.outputs) + extend_unique(self.javatest_libpaths, tg.link_task.env.LIBPATH) + + if 'pyext' in tg.features: + # If the taskgen is extending Python we also want to add the interpreter libpath. + extend_unique(self.javatest_libpaths, tg.link_task.env.LIBPATH_PYEXT) + else: + # Only add to libpath if the link task is not a Python extension + extend_unique(self.javatest_libpaths, [tg.link_task.outputs[0].parent.abspath()]) + + +@TaskGen.feature('javatest') +@TaskGen.after_method('apply_java', 'use_javac_files', 'set_classpath', 'javatest_process_use') def make_javatest(self): """ Creates a ``utest`` task with a populated environment for Java Unit test execution @@ -65,6 +150,9 @@ def make_javatest(self): tsk = self.create_task('utest') tsk.set_run_after(self.javac_task) + # Dependencies from recursive use analysis + tsk.dep_nodes.extend(self.javatest_dep_nodes) + # Put test input files as waf_unit_test relies on that for some prints and log generation # If jtest_source is there, this is specially useful for passing XML for TestNG # that contain test specification, use that as inputs, otherwise test sources @@ -97,6 +185,21 @@ def make_javatest(self): if not hasattr(self, 'ut_env'): self.ut_env = dict(os.environ) + def add_paths(var, lst): + # Add list of paths to a variable, lst can contain strings or nodes + lst = [ str(n) for n in lst ] + Logs.debug("ut: %s: Adding paths %s=%s", self, var, lst) + self.ut_env[var] = os.pathsep.join(lst) + os.pathsep + self.ut_env.get(var, '') + + add_paths('PYTHONPATH', self.javatest_pypaths) + + if Utils.is_win32: + add_paths('PATH', self.javatest_libpaths) + elif Utils.unversioned_sys_platform() == 'darwin': + add_paths('DYLD_LIBRARY_PATH', self.javatest_libpaths) + add_paths('LD_LIBRARY_PATH', self.javatest_libpaths) + else: + add_paths('LD_LIBRARY_PATH', self.javatest_libpaths) def configure(ctx): cp = ctx.env.CLASSPATH or '.' diff --git a/extras/msvc_pdb.py b/extras/msvc_pdb.py new file mode 100644 index 0000000..077656b --- /dev/null +++ b/extras/msvc_pdb.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python +# encoding: utf-8 +# Rafaƫl Kooi 2019 + +from waflib import TaskGen + +@TaskGen.feature('c', 'cxx', 'fc') +@TaskGen.after_method('propagate_uselib_vars') +def add_pdb_per_object(self): + """For msvc/fortran, specify a unique compile pdb per object, to work + around LNK4099. Flags are updated with a unique /Fd flag based on the + task output name. This is separate from the link pdb. + """ + if not hasattr(self, 'compiled_tasks'): + return + + link_task = getattr(self, 'link_task', None) + + for task in self.compiled_tasks: + if task.inputs and task.inputs[0].name.lower().endswith('.rc'): + continue + + add_pdb = False + for flagname in ('CFLAGS', 'CXXFLAGS', 'FCFLAGS'): + # several languages may be used at once + for flag in task.env[flagname]: + if flag[1:].lower() == 'zi': + add_pdb = True + break + + if add_pdb: + node = task.outputs[0].change_ext('.pdb') + pdb_flag = '/Fd:' + node.abspath() + + for flagname in ('CFLAGS', 'CXXFLAGS', 'FCFLAGS'): + buf = [pdb_flag] + for flag in task.env[flagname]: + if flag[1:3] == 'Fd' or flag[1:].lower() == 'fs' or flag[1:].lower() == 'mp': + continue + buf.append(flag) + task.env[flagname] = buf + + if link_task and not node in link_task.dep_nodes: + link_task.dep_nodes.append(node) + if not node in task.outputs: + task.outputs.append(node) diff --git a/extras/pytest.py b/extras/pytest.py index 7dd5a1a..fc9ad1c 100644 --- a/extras/pytest.py +++ b/extras/pytest.py @@ -40,6 +40,8 @@ the following environment variables for the `pytest` test runner: - `pytest_libpath` attribute is used to manually specify additional linker paths. +3. Java class search path (CLASSPATH) of any Java/Javalike dependency + Note: `pytest` cannot automatically determine the correct `PYTHONPATH` for `pyext` taskgens because the extension might be part of a Python package or used standalone: @@ -119,6 +121,7 @@ def pytest_process_use(self): self.pytest_use_seen = [] self.pytest_paths = [] # strings or Nodes self.pytest_libpaths = [] # strings or Nodes + self.pytest_javapaths = [] # strings or Nodes self.pytest_dep_nodes = [] names = self.to_list(getattr(self, 'use', [])) @@ -157,6 +160,17 @@ def pytest_process_use(self): extend_unique(self.pytest_dep_nodes, tg.source) extend_unique(self.pytest_paths, [pypath.abspath()]) + if 'javac' in tg.features: + # If a JAR is generated point to that, otherwise to directory + if getattr(tg, 'jar_task', None): + extend_unique(self.pytest_javapaths, [tg.jar_task.outputs[0].abspath()]) + else: + extend_unique(self.pytest_javapaths, [tg.path.get_bld()]) + + # And add respective dependencies if present + if tg.use_lst: + extend_unique(self.pytest_javapaths, tg.use_lst) + if getattr(tg, 'link_task', None): # For tasks with a link_task (C, C++, D et.c.) include their library paths: if not isinstance(tg.link_task, ccroot.stlink_task): @@ -212,8 +226,9 @@ def make_pytest(self): Logs.debug("ut: %s: Adding paths %s=%s", self, var, lst) self.ut_env[var] = os.pathsep.join(lst) + os.pathsep + self.ut_env.get(var, '') - # Prepend dependency paths to PYTHONPATH and LD_LIBRARY_PATH + # Prepend dependency paths to PYTHONPATH, CLASSPATH and LD_LIBRARY_PATH add_paths('PYTHONPATH', self.pytest_paths) + add_paths('CLASSPATH', self.pytest_javapaths) if Utils.is_win32: add_paths('PATH', self.pytest_libpaths) diff --git a/extras/wafcache.py b/extras/wafcache.py new file mode 100644 index 0000000..8b9567f --- /dev/null +++ b/extras/wafcache.py @@ -0,0 +1,524 @@ +#! /usr/bin/env python +# encoding: utf-8 +# Thomas Nagy, 2019 (ita) + +""" +Filesystem-based cache system to share and re-use build artifacts + +Cache access operations (copy to and from) are delegated to +independent pre-forked worker subprocesses. + +The following environment variables may be set: +* WAFCACHE: several possibilities: + - File cache: + absolute path of the waf cache (~/.cache/wafcache_user, + where `user` represents the currently logged-in user) + - URL to a cache server, for example: + export WAFCACHE=http://localhost:8080/files/ + in that case, GET/POST requests are made to urls of the form + http://localhost:8080/files/000000000/0 (cache management is then up to the server) + - GCS or S3 bucket + gs://my-bucket/ + s3://my-bucket/ +* WAFCACHE_NO_PUSH: if set, disables pushing to the cache +* WAFCACHE_VERBOSITY: if set, displays more detailed cache operations + +File cache specific options: + Files are copied using hard links by default; if the cache is located + onto another partition, the system switches to file copies instead. +* WAFCACHE_TRIM_MAX_FOLDER: maximum amount of tasks to cache (1M) +* WAFCACHE_EVICT_MAX_BYTES: maximum amount of cache size in bytes (10GB) +* WAFCACHE_EVICT_INTERVAL_MINUTES: minimum time interval to try + and trim the cache (3 minutess) +Usage:: + + def build(bld): + bld.load('wafcache') + ... + +To troubleshoot:: + + waf clean build --zones=wafcache +""" + +import atexit, base64, errno, fcntl, getpass, os, shutil, sys, time, traceback, urllib3 +try: + import subprocess32 as subprocess +except ImportError: + import subprocess + +base_cache = os.path.expanduser('~/.cache/') +if not os.path.isdir(base_cache): + base_cache = '/tmp/' +default_wafcache_dir = os.path.join(base_cache, 'wafcache_' + getpass.getuser()) + +CACHE_DIR = os.environ.get('WAFCACHE', default_wafcache_dir) +TRIM_MAX_FOLDERS = int(os.environ.get('WAFCACHE_TRIM_MAX_FOLDER', 1000000)) +EVICT_INTERVAL_MINUTES = int(os.environ.get('WAFCACHE_EVICT_INTERVAL_MINUTES', 3)) +EVICT_MAX_BYTES = int(os.environ.get('WAFCACHE_EVICT_MAX_BYTES', 10**10)) +WAFCACHE_NO_PUSH = 1 if os.environ.get('WAFCACHE_NO_PUSH') else 0 +WAFCACHE_VERBOSITY = 1 if os.environ.get('WAFCACHE_VERBOSITY') else 0 +OK = "ok" + +try: + import cPickle +except ImportError: + import pickle as cPickle + +if __name__ != '__main__': + from waflib import Task, Logs, Utils, Build + +def can_retrieve_cache(self): + """ + New method for waf Task classes + """ + if not self.outputs: + return False + + self.cached = False + + sig = self.signature() + ssig = Utils.to_hex(self.uid() + sig) + + files_to = [node.abspath() for node in self.outputs] + err = cache_command(ssig, [], files_to) + if err.startswith(OK): + if WAFCACHE_VERBOSITY: + Logs.pprint('CYAN', ' Fetched %r from cache' % files_to) + else: + Logs.debug('wafcache: fetched %r from cache', files_to) + else: + if WAFCACHE_VERBOSITY: + Logs.pprint('YELLOW', ' No cache entry %s' % files_to) + else: + Logs.debug('wafcache: No cache entry %s: %s', files_to, err) + return False + + self.cached = True + return True + +def put_files_cache(self): + """ + New method for waf Task classes + """ + if WAFCACHE_NO_PUSH or getattr(self, 'cached', None) or not self.outputs: + return + + bld = self.generator.bld + sig = self.signature() + ssig = Utils.to_hex(self.uid() + sig) + + files_from = [node.abspath() for node in self.outputs] + err = cache_command(ssig, files_from, []) + + if err.startswith(OK): + if WAFCACHE_VERBOSITY: + Logs.pprint('CYAN', ' Successfully uploaded %s to cache' % files_from) + else: + Logs.debug('wafcache: Successfully uploaded %r to cache', files_from) + else: + if WAFCACHE_VERBOSITY: + Logs.pprint('RED', ' Error caching step results %s: %s' % (files_from, err)) + else: + Logs.debug('wafcache: Error caching results %s: %s', files_from, err) + + bld.task_sigs[self.uid()] = self.cache_sig + +def hash_env_vars(self, env, vars_lst): + """ + Reimplement BuildContext.hash_env_vars so that the resulting hash does not depend on local paths + """ + if not env.table: + env = env.parent + if not env: + return Utils.SIG_NIL + + idx = str(id(env)) + str(vars_lst) + try: + cache = self.cache_env + except AttributeError: + cache = self.cache_env = {} + else: + try: + return self.cache_env[idx] + except KeyError: + pass + + v = str([env[a] for a in vars_lst]) + v = v.replace(self.srcnode.abspath().__repr__()[:-1], '') + m = Utils.md5() + m.update(v.encode()) + ret = m.digest() + + Logs.debug('envhash: %r %r', ret, v) + + cache[idx] = ret + + return ret + +def uid(self): + """ + Reimplement Task.uid() so that the signature does not depend on local paths + """ + try: + return self.uid_ + except AttributeError: + m = Utils.md5() + src = self.generator.bld.srcnode + up = m.update + up(self.__class__.__name__.encode()) + for x in self.inputs + self.outputs: + up(x.path_from(src).encode()) + self.uid_ = m.digest() + return self.uid_ + + +def make_cached(cls): + """ + Enable the waf cache for a given task class + """ + if getattr(cls, 'nocache', None) or getattr(cls, 'has_cache', False): + return + + m1 = getattr(cls, 'run', None) + def run(self): + if getattr(self, 'nocache', False): + return m1(self) + if self.can_retrieve_cache(): + return 0 + return m1(self) + cls.run = run + + m2 = getattr(cls, 'post_run', None) + def post_run(self): + if getattr(self, 'nocache', False): + return m2(self) + ret = m2(self) + self.put_files_cache() + if hasattr(self, 'chmod'): + for node in self.outputs: + os.chmod(node.abspath(), self.chmod) + return ret + cls.post_run = post_run + cls.has_cache = True + +process_pool = [] +def get_process(): + """ + Returns a worker process that can process waf cache commands + The worker process is assumed to be returned to the process pool when unused + """ + try: + return process_pool.pop() + except IndexError: + filepath = os.path.dirname(os.path.abspath(__file__)) + os.sep + 'wafcache.py' + cmd = [sys.executable, '-c', Utils.readf(filepath)] + return subprocess.Popen(cmd, stdout=subprocess.PIPE, stdin=subprocess.PIPE, bufsize=0) + +def atexit_pool(): + for k in process_pool: + try: + os.kill(k.pid, 9) + except OSError: + pass + else: + k.wait() +atexit.register(atexit_pool) + +def build(bld): + """ + Called during the build process to enable file caching + """ + if process_pool: + # already called once + return + + for x in range(bld.jobs): + process_pool.append(get_process()) + + Task.Task.can_retrieve_cache = can_retrieve_cache + Task.Task.put_files_cache = put_files_cache + Task.Task.uid = uid + Build.BuildContext.hash_env_vars = hash_env_vars + for x in reversed(list(Task.classes.values())): + make_cached(x) + +def cache_command(sig, files_from, files_to): + """ + Create a command for cache worker processes, returns a pickled + base64-encoded tuple containing the task signature, a list of files to + cache and a list of files files to get from cache (one of the lists + is assumed to be empty) + """ + proc = get_process() + + obj = base64.b64encode(cPickle.dumps([sig, files_from, files_to])) + proc.stdin.write(obj) + proc.stdin.write('\n'.encode()) + proc.stdin.flush() + obj = proc.stdout.readline() + if not obj: + raise OSError('Preforked sub-process %r died' % proc.pid) + process_pool.append(proc) + return cPickle.loads(base64.b64decode(obj)) + +try: + copyfun = os.link +except NameError: + copyfun = shutil.copy2 + +def atomic_copy(orig, dest): + """ + Copy files to the cache, the operation is atomic for a given file + """ + global copyfun + tmp = dest + '.tmp' + up = os.path.dirname(dest) + try: + os.makedirs(up) + except OSError: + pass + + try: + copyfun(orig, tmp) + except OSError as e: + if e.errno == errno.EXDEV: + copyfun = shutil.copy2 + copyfun(orig, tmp) + else: + raise + os.rename(tmp, dest) + +def lru_trim(): + """ + the cache folders take the form: + `CACHE_DIR/0b/0b180f82246d726ece37c8ccd0fb1cde2650d7bfcf122ec1f169079a3bfc0ab9` + they are listed in order of last access, and then removed + until the amount of folders is within TRIM_MAX_FOLDERS and the total space + taken by files is less than EVICT_MAX_BYTES + """ + lst = [] + for up in os.listdir(CACHE_DIR): + if len(up) == 2: + sub = os.path.join(CACHE_DIR, up) + for hval in os.listdir(sub): + path = os.path.join(sub, hval) + + size = 0 + for fname in os.listdir(path): + size += os.lstat(os.path.join(path, fname)).st_size + lst.append((os.stat(path).st_mtime, size, path)) + + lst.sort(key=lambda x: x[0]) + lst.reverse() + + tot = sum(x[1] for x in lst) + while tot > EVICT_MAX_BYTES or len(lst) > TRIM_MAX_FOLDERS: + _, tmp_size, path = lst.pop() + tot -= tmp_size + + tmp = path + '.tmp' + try: + shutil.rmtree(tmp) + except OSError: + pass + try: + os.rename(path, tmp) + except OSError: + sys.stderr.write('Could not rename %r to %r' % (path, tmp)) + else: + try: + shutil.rmtree(tmp) + except OSError: + sys.stderr.write('Could not remove %r' % tmp) + sys.stderr.write("Cache trimmed: %r bytes in %r folders left\n" % (tot, len(lst))) + + +def lru_evict(): + """ + Reduce the cache size + """ + lockfile = os.path.join(CACHE_DIR, 'all.lock') + try: + st = os.stat(lockfile) + except EnvironmentError as e: + if e.errno == errno.ENOENT: + with open(lockfile, 'w') as f: + f.write('') + return + else: + raise + + if st.st_mtime < time.time() - EVICT_INTERVAL_MINUTES * 60: + # check every EVICT_INTERVAL_MINUTES minutes if the cache is too big + # OCLOEXEC is unnecessary because no processes are spawned + fd = os.open(lockfile, os.O_RDWR | os.O_CREAT, 0o755) + try: + try: + fcntl.flock(fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + except EnvironmentError: + sys.stderr.write('another process is running!\n') + pass + else: + # now dow the actual cleanup + lru_trim() + os.utime(lockfile, None) + finally: + os.close(fd) + +class netcache(object): + def __init__(self): + self.http = urllib3.PoolManager() + + def url_of(self, sig, i): + return "%s/%s/%s" % (CACHE_DIR, sig, i) + + def upload(self, file_path, sig, i): + url = self.url_of(sig, i) + with open(file_path, 'rb') as f: + file_data = f.read() + r = self.http.request('POST', url, timeout=60, + fields={ 'file': ('%s/%s' % (sig, i), file_data), }) + if r.status >= 400: + raise OSError("Invalid status %r %r" % (url, r.status)) + + def download(self, file_path, sig, i): + url = self.url_of(sig, i) + with self.http.request('GET', url, preload_content=False, timeout=60) as inf: + if inf.status >= 400: + raise OSError("Invalid status %r %r" % (url, inf.status)) + with open(file_path, 'wb') as out: + shutil.copyfileobj(inf, out) + + def copy_to_cache(self, sig, files_from, files_to): + try: + for i, x in enumerate(files_from): + if not os.path.islink(x): + self.upload(x, sig, i) + except Exception: + return traceback.format_exc() + return OK + + def copy_from_cache(self, sig, files_from, files_to): + try: + for i, x in enumerate(files_to): + self.download(x, sig, i) + except Exception: + return traceback.format_exc() + return OK + +class fcache(object): + def __init__(self): + if not os.path.exists(CACHE_DIR): + os.makedirs(CACHE_DIR) + if not os.path.exists(CACHE_DIR): + raise ValueError('Could not initialize the cache directory') + + def copy_to_cache(self, sig, files_from, files_to): + """ + Copy files to the cache, existing files are overwritten, + and the copy is atomic only for a given file, not for all files + that belong to a given task object + """ + try: + for i, x in enumerate(files_from): + dest = os.path.join(CACHE_DIR, sig[:2], sig, str(i)) + atomic_copy(x, dest) + except Exception: + return traceback.format_exc() + else: + # attempt trimming if caching was successful: + # we may have things to trim! + lru_evict() + return OK + + def copy_from_cache(self, sig, files_from, files_to): + """ + Copy files from the cache + """ + try: + for i, x in enumerate(files_to): + orig = os.path.join(CACHE_DIR, sig[:2], sig, str(i)) + atomic_copy(orig, x) + + # success! update the cache time + os.utime(os.path.join(CACHE_DIR, sig[:2], sig), None) + except Exception: + return traceback.format_exc() + return OK + +class bucket_cache(object): + def bucket_copy(self, source, target): + if CACHE_DIR.startswith('s3://'): + cmd = ['aws', 's3', 'cp', source, target] + else: + cmd = ['gsutil', 'cp', source, target] + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + out, err = proc.communicate() + if proc.returncode: + raise OSError('Error copy %r to %r using: %r (exit %r):\n out:%s\n err:%s' % ( + source, target, cmd, proc.returncode, out.decode(), err.decode())) + + def copy_to_cache(self, sig, files_from, files_to): + try: + for i, x in enumerate(files_from): + dest = os.path.join(CACHE_DIR, sig[:2], sig, str(i)) + self.bucket_copy(x, dest) + except Exception: + return traceback.format_exc() + return OK + + def copy_from_cache(self, sig, files_from, files_to): + try: + for i, x in enumerate(files_to): + orig = os.path.join(CACHE_DIR, sig[:2], sig, str(i)) + self.bucket_copy(orig, x) + except EnvironmentError: + return traceback.format_exc() + return OK + +def loop(service): + """ + This function is run when this file is run as a standalone python script, + it assumes a parent process that will communicate the commands to it + as pickled-encoded tuples (one line per command) + + The commands are to copy files to the cache or copy files from the + cache to a target destination + """ + # one operation is performed at a single time by a single process + # therefore stdin never has more than one line + txt = sys.stdin.readline().strip() + if not txt: + # parent process probably ended + sys.exit(1) + ret = OK + + [sig, files_from, files_to] = cPickle.loads(base64.b64decode(txt)) + if files_from: + # TODO return early when pushing files upstream + ret = service.copy_to_cache(sig, files_from, files_to) + elif files_to: + # the build process waits for workers to (possibly) obtain files from the cache + ret = service.copy_from_cache(sig, files_from, files_to) + else: + ret = "Invalid command" + + obj = base64.b64encode(cPickle.dumps(ret)) + sys.stdout.write(obj.decode()) + sys.stdout.write('\n') + sys.stdout.flush() + +if __name__ == '__main__': + if CACHE_DIR.startswith('s3://') or CACHE_DIR.startswith('gs://'): + service = bucket_cache() + elif CACHE_DIR.startswith('http'): + service = netcache() + else: + service = fcache() + while 1: + try: + loop(service) + except KeyboardInterrupt: + break + -- cgit v1.2.1