from argparse import ArgumentParser
from runners.utils import load_yaml
import inspect
import textwrap
import os
from src import logging
[docs]def build_parser_for_yml_script():
"""
Builds an ArgumentParser with a common setup. Used in the scripts.
"""
parser = ArgumentParser(add_help=False)
parser.add_argument(
'-h',
'--help',
action='store_true',
dest='help',
help= """
show this help message and exit.
"""
)
parser.add_argument(
'-y',
'--yml',
type=str,
help= """
Path to .yml file containing command line arguments for the script. If the only key
is 'jobs', then we assume it points to a list of jobs with parameters
input_path and output_path. Each job is executed one after the other. The
structure of each .yml file is up to you. If this is given, all other arguments
to the script are not required (as they should be defined solely within the
.yml).
"""
)
return parser
[docs]def sequential_job_execution(script_func, jobs):
"""
Execute jobs one by one with a simple for loop.
Args:
script_func (function): Function to run.
jobs (list): List of dictionaries containing arguments for function.
"""
for args in jobs:
script_func(**args)
[docs]def document_parser(script_name, reference):
"""
Fancy function for documenting a parser easily. Runs the function to build
the parser, then gets the parsers help texts and formats it into the
function's doc string for sphinx. A bit hacky but works great!
Args:
script_name (str): Name of the script.
reference (str): Where to point the reference function for the script
(e.g. the script that it runs).
"""
def decorator(parser_func):
parser = parser_func()
help_text = parser.format_help()
help_text = help_text.replace('sphinx-build', f'python -m scripts.{script_name} ')
help_text = textwrap.indent(help_text, '\t\t ')
help_text = ' .. code-block:: none\n\n' + help_text
doc = (
f"""
Builds the parser for :py:mod:`scripts.{script_name}`.
{help_text}
Also see the arguments to :py:func:`{reference}`.
Returns:
:class:`argparse.ArgumentParser`: The parser for this script.
"""
)
parser_func.__doc__ = doc
return parser_func
return decorator
[docs]def cmd(script_func, parser_func, exec_func=sequential_job_execution):
"""
Builds a parser for any script in the scripts/ directory. Scripts should have two
main functions: 1) a function that actually runs the script and 2) a build_parser
function that builds up an ArgumentParser with informative help text for the script.
This function allows the command line arguments to be passed to the script either
through the command line as normal or through a YAML file which has matching keyword
arguments for the script. Positional arguments are discouraged.
The arguments in the YAML file are checked by passing them back into the command
line parser function before giving them to the script. This also allows for default
values to be defined in the script argument parser.
A script can be called multiple times using a YAML file by having a top-level key
called 'jobs'. 'jobs' should contain a list where each item in the list is a
set of arguments to be passed to the script one by one.
For each script, simply add this like so::
if __name__ == "__main__":
cmd(script_func, parser_func)
Then to run a script, simply do::
python -m scripts.[script_name] --yml [path_to_yml_file] # for yml
python -m scripts.[script_name] [--arg val] # for cmd line
Arguments:
script_func (function): A function that will take in the arguments as keyword
arguments and perform some action.
parser_func (function): A function that will build up the argument parser for
the script.
"""
# first check if environment variables exist
if not os.getenv('DATA_DIRECTORY'):
logging.info(
"""
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
| It doesn't look like you sourced your environment variables! Make sure to |
| run 'source setup/environment/[machine_name]_local.sh' before running scripts, |
| as the scripts depend on the environment variables. |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
"""
)
return
jobs = []
yml_parser = build_parser_for_yml_script()
cmd_parser = parser_func()
args = vars(yml_parser.parse_known_args()[0])
if args['help']:
print('Usage via YML file.')
yml_parser_help = yml_parser.print_help()
if cmd_parser:
print('\nDirect usage via command line arguments.')
cmd_parser_help = cmd_parser.print_help()
return
extra_args = {}
if args['yml'] is None:
args, unknown_args = cmd_parser.parse_known_args()
unknown_args = [u.replace('--', '') for u in unknown_args]
unknown_args = dict(zip(unknown_args[:-1:2], unknown_args[1::2]))
args = vars(args)
script_signature = inspect.getfullargspec(script_func)
if script_signature.varkw is not None:
args.update(unknown_args)
jobs.append(args)
else:
_args = load_yaml(args['yml'])
_jobs = []
if 'jobs' in _args:
_jobs = _args.pop('jobs')
extra_args = _args
else:
_jobs.append(_args)
for job in _jobs:
if cmd_parser:
args = []
for key, val in job.items():
if isinstance(val, bool):
if val:
args.append(f'--{key}')
else:
args.append(f'--{key}')
args.append(str(val))
args, unknown_args = cmd_parser.parse_known_args(args)
unknown_args = [u.replace('--', '') for u in unknown_args]
unknown_args = dict(zip(unknown_args[:-1:2], unknown_args[1::2]))
args = vars(args)
script_signature = inspect.getfullargspec(script_func)
if script_signature.varkw is not None:
args.update(unknown_args)
[job.pop(k) for k in args if k in job]
args.update(job)
else:
args = job
jobs.append(args)
exec_args = inspect.getfullargspec(exec_func)
for key in extra_args.copy():
if key not in exec_args.args:
extra_args.pop(key)
exec_func(script_func, jobs, **extra_args)