Source code for scripts

from argparse import ArgumentParser
from runners.utils import load_yaml
import inspect
import textwrap
import os
from src import logging

[docs]def build_parser_for_yml_script(): """ Builds an ArgumentParser with a common setup. Used in the scripts. """ parser = ArgumentParser(add_help=False) parser.add_argument( '-h', '--help', action='store_true', dest='help', help= """ show this help message and exit. """ ) parser.add_argument( '-y', '--yml', type=str, help= """ Path to .yml file containing command line arguments for the script. If the only key is 'jobs', then we assume it points to a list of jobs with parameters input_path and output_path. Each job is executed one after the other. The structure of each .yml file is up to you. If this is given, all other arguments to the script are not required (as they should be defined solely within the .yml). """ ) return parser
[docs]def sequential_job_execution(script_func, jobs): """ Execute jobs one by one with a simple for loop. Args: script_func (function): Function to run. jobs (list): List of dictionaries containing arguments for function. """ for args in jobs: script_func(**args)
[docs]def document_parser(script_name, reference): """ Fancy function for documenting a parser easily. Runs the function to build the parser, then gets the parsers help texts and formats it into the function's doc string for sphinx. A bit hacky but works great! Args: script_name (str): Name of the script. reference (str): Where to point the reference function for the script (e.g. the script that it runs). """ def decorator(parser_func): parser = parser_func() help_text = parser.format_help() help_text = help_text.replace('sphinx-build', f'python -m scripts.{script_name} ') help_text = textwrap.indent(help_text, '\t\t ') help_text = ' .. code-block:: none\n\n' + help_text doc = ( f""" Builds the parser for :py:mod:`scripts.{script_name}`. {help_text} Also see the arguments to :py:func:`{reference}`. Returns: :class:`argparse.ArgumentParser`: The parser for this script. """ ) parser_func.__doc__ = doc return parser_func return decorator
[docs]def cmd(script_func, parser_func, exec_func=sequential_job_execution): """ Builds a parser for any script in the scripts/ directory. Scripts should have two main functions: 1) a function that actually runs the script and 2) a build_parser function that builds up an ArgumentParser with informative help text for the script. This function allows the command line arguments to be passed to the script either through the command line as normal or through a YAML file which has matching keyword arguments for the script. Positional arguments are discouraged. The arguments in the YAML file are checked by passing them back into the command line parser function before giving them to the script. This also allows for default values to be defined in the script argument parser. A script can be called multiple times using a YAML file by having a top-level key called 'jobs'. 'jobs' should contain a list where each item in the list is a set of arguments to be passed to the script one by one. For each script, simply add this like so:: if __name__ == "__main__": cmd(script_func, parser_func) Then to run a script, simply do:: python -m scripts.[script_name] --yml [path_to_yml_file] # for yml python -m scripts.[script_name] [--arg val] # for cmd line Arguments: script_func (function): A function that will take in the arguments as keyword arguments and perform some action. parser_func (function): A function that will build up the argument parser for the script. """ # first check if environment variables exist if not os.getenv('DATA_DIRECTORY'): logging.info( """ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | It doesn't look like you sourced your environment variables! Make sure to | | run 'source setup/environment/[machine_name]_local.sh' before running scripts, | | as the scripts depend on the environment variables. | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ ) return jobs = [] yml_parser = build_parser_for_yml_script() cmd_parser = parser_func() args = vars(yml_parser.parse_known_args()[0]) if args['help']: print('Usage via YML file.') yml_parser_help = yml_parser.print_help() if cmd_parser: print('\nDirect usage via command line arguments.') cmd_parser_help = cmd_parser.print_help() return extra_args = {} if args['yml'] is None: args, unknown_args = cmd_parser.parse_known_args() unknown_args = [u.replace('--', '') for u in unknown_args] unknown_args = dict(zip(unknown_args[:-1:2], unknown_args[1::2])) args = vars(args) script_signature = inspect.getfullargspec(script_func) if script_signature.varkw is not None: args.update(unknown_args) jobs.append(args) else: _args = load_yaml(args['yml']) _jobs = [] if 'jobs' in _args: _jobs = _args.pop('jobs') extra_args = _args else: _jobs.append(_args) for job in _jobs: if cmd_parser: args = [] for key, val in job.items(): if isinstance(val, bool): if val: args.append(f'--{key}') else: args.append(f'--{key}') args.append(str(val)) args, unknown_args = cmd_parser.parse_known_args(args) unknown_args = [u.replace('--', '') for u in unknown_args] unknown_args = dict(zip(unknown_args[:-1:2], unknown_args[1::2])) args = vars(args) script_signature = inspect.getfullargspec(script_func) if script_signature.varkw is not None: args.update(unknown_args) [job.pop(k) for k in args if k in job] args.update(job) else: args = job jobs.append(args) exec_args = inspect.getfullargspec(exec_func) for key in extra_args.copy(): if key not in exec_args.args: extra_args.pop(key) exec_func(script_func, jobs, **extra_args)