import csv
import os
import shutil
from src.utils.parallel import parallel_process
from multiprocessing import cpu_count
from runners.utils import load_yaml, parse_yaml
from . import cmd, document_parser
import glob
import logging
import argparse
import yaml
import sys
[docs]def split_urbansound_by_fold(path_to_file, output_directory, input_directory, make_copy=False, 
    train_folds=[1, 2, 3, 4, 5, 6, 7, 8], val_folds=[9], test_folds=[10],
    path_to_urbansound_csv=None):
    """
    Reorganizes the urbansound dataset using the metadata/UrbanSound8K.csv to 
    determine which fold each file belongs to. It makes symlinks in the corresponding
    train, test, and val folders.
    
    Args:
        path_to_file (str): Path to the audio file that will be reorganized. Has form
            /path/to/mixture_name/source_name.ext
        output_directory (str): Where the file after swapping the mixture_name and source_name
            will be copied to.
        input_directory (str): The root of the directory that the file comes from. Useful for
            figuring out the relative path with respect to the input directory for copying
            to the output_directory.
        make_copy (bool, optional): Whether to use a symlink or to actually copy the file. 
            Defaults to False.
        train_folds (list, optional): Which folds belong to the train set. 
            Defaults to [1, 2, 3, 4, 5, 6, 7, 8].
        val_folds (list, optional): Which folds belong to the validation set. 
            Defaults to [9].
        test_folds (list, optional): Which folds belong to the test set. 
            Defaults to [10].
        path_to_urbansound_csv ([type]): Path to metadata/UrbanSound8k.csv. 
            Defaults to None.
    
    Raises:
        ValueError: raises an error if the path to the csv isn't given.
    """
    raise NotImplementedError()
    if not path_to_urbansound_csv:
        raise ValueError("Path to urban sound CSV must be specified!")
    # Below doesn't work yet, just copying from the old stuff.
    for d in ['train', 'validation', 'test']:
        os.makedirs(
            os.path.join(data_directory, 'data', d),
            exist_ok=True)
    def copy_audio_to_folder_of_class(row):
        target_directory = data_directory
        class_name = row['class']
        source_file = os.path.join(data_directory, 'audio', f"fold{row['fold']}", row['slice_file_name'])
        if int(row['fold']) in train_folds:
            target_directory = os.path.join(target_directory, 'train', class_name)
        elif int(row['fold']) in val_folds:
            target_directory = os.path.join(target_directory, 'validation', class_name)
        else:
            target_directory = os.path.join(target_directory, 'test', class_name)
        os.makedirs(target_directory, exist_ok=True)
        target_file = os.path.join(target_directory, row['slice_file_name'])
        print(f"Copying {source_file} w/ fold {row['fold']} to {target_file}", flush=True)
        shutil.copyfile(source_file, target_file)
    with open(os.path.join(data_directory, 'metadata', 'UrbanSound8K.csv'), 'r') as f:
        reader = csv.DictReader(f)
        rows = list(reader)
 
[docs]def split_folder_by_file(path_to_file, output_directory, input_directory, org_file, make_copy=False):
    """
    Reorganizes a directory using a organization file. The organization file should contain a 
    list of paths that are relative to the input_directory. If path_to_file is in the organization
    file, then it will be symlinked (or moved) to the same relative path in output_directory.
    For example if organization file has an entry::
        path/to/my/file/0.wav
    And path to file looks like::
        input_directory/path/to/my/file/0.wav
    
    Then a new file will be created (or symlinked) at::
        output_directory/path/to/my/file/0.wav
    
    Args:
        path_to_file (str): Path to the audio file that will be reorganized.
        output_directory (str): Where the file after swapping the mixture_name and source_name
            will be copied to.
        input_directory (str): The root of the directory that the file comes from. Useful for
            figuring out the relative path with respect to the input directory for copying
            to the output_directory.
        org_file (str): Path to the file containing all of the file names that should be moved.
        make_copy (bool, optional): Whether to use a symlink or to actually copy the file. 
            Defaults to False.
    """
    with open(org_file, 'r') as f:
        files = f.readlines()
        files = [x.strip() for x in files]
    files = set(files)
    relative_path = path_to_file.split(input_directory)[-1][1:]
    if relative_path in files:
        output_path = os.path.join(output_directory, relative_path)
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        if not os.path.exists(output_path):
            if make_copy:
                shutil.copyfile(path_to_file, output_path)
            else:
                os.symlink(path_to_file, output_path)
 
[docs]def split_folder_by_class(path_to_file, output_directory, input_directory, make_copy=False):
    """Splits a folder by class which is indicated by the name of the file. 
    
    The mixture name is the name of the parent directory to the file. This function
    is used to organize datasets like musdb for consumption by Scaper for mixing
    new datasets.
    Takes a folder with audio file structure that looks like this::
    
        folder_input/
            mixture_one_name/
                vocals.wav
                bass.wav
                drums.wav
                other.wav
            mixture_two_name/
                vocals.wav
                bass.wav
                drums.wav
                other.wav
            ...
    and reorganizes it to a different folder like so::
        folder_output/
            vocals/
                mixture_one_name.wav
                mixture_two_name.wav
                ...
            bass/
                mixture_one_name.wav
                mixture_two_name.wav
                ...
            drums/
                mixture_one_name.wav
                mixture_two_name.wav
                ...
            other/
                mixture_one_name.wav
                mixture_two_name.wav
                ...
        
    so that it can be processed easily by Scaper. Notably, MUSDB has this folder 
    structure. This reorganization is done via symlinks so that the entire dataset
    is not copied.
    Args:
        path_to_file (str): Path to the audio file that will be reorganized. Has form
            /path/to/mixture_name/source_name.ext
        output_directory (str): Where the file after swapping the mixture_name and source_name
            will be copied to.
        input_directory (str): The root of the directory that the file comes from. Useful for
            figuring out the relative path with respect to the input directory for copying
            to the output_directory.
        make_copy (bool): Whether to use a symlink or to actually copy the file. 
            Defaults to False.
    """
    head, tail = os.path.split(path_to_file)
    class_name, ext = os.path.splitext(tail)
    head, mixture_name = os.path.split(head)
    output_path = os.path.join(output_directory, class_name, mixture_name + ext)
    os.makedirs(os.path.join(output_directory, class_name), exist_ok=True)
    if not os.path.exists(output_path):
        if make_copy:
            shutil.copyfile(path_to_file, output_path)
        else:
            os.symlink(path_to_file, output_path)
 
[docs]def reorganize(input_path, output_path, org_func, make_copy=False, 
               audio_extensions=['.wav', '.mp3', '.aac'], **kwargs):
    """
    Reorganizes the folders in the input path into the output path given an 
    organization function, passed in by org_func.
    Args:
        input_path (str): Root of folder where all audio files will be reorganized.
        output_path (str): Root of folder where the reorganized files will be placed. 
        org_func (str): Organization function to use reorganize the dataset. Should 
            correspond to the name of a function in reorganize.py.
        make_copy (bool): Whether to use a symlink or to actually copy the file. 
            Defaults to False.
        audio_extensions (list, optional): Audio extensions to look for in the 
            input_path. Matching ones will be reorganize and placed into the output 
            directory via a symlink.. Defaults to ['.wav', '.mp3', '.aac'].
        kwargs (dict): Additional keyword arguments that are passed to the org_func
            that is specified.
    """
    paths_to_files = []
    for ext in audio_extensions:
        paths_to_files += glob.glob(f'{input_path}/**/*{ext}')
    args = [{
        'path_to_file': p,
        'output_directory': output_path,
        'input_directory': input_path,
        'make_copy': make_copy,
        **kwargs
    } for p in paths_to_files]
    module = sys.modules[__name__]
    org_func = getattr(module, org_func)
    parallel_process(
        args, 
        org_func,
        n_jobs=cpu_count(), 
        front_num=1, 
        use_kwargs=True
    )
 
[docs]@document_parser('reorganize', 'scripts.reorganize.reorganize')
def build_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--input_path', type=str, 
        help="""Root of folder where all audio files will be reorganized."""
    )
    parser.add_argument(
        '--output_path', type=str, 
        help="""Root of folder where all reorganized files will be placed."""
    )
    parser.add_argument(
        '--org_func', type=str,
        help="""Organization function to use reorganize the dataset. Should correspond
        to the name of a function in reorganize.py."""
    )
    parser.add_argument(
        '--make_copy', 
        action="store_true",
        help="""Whether to use a symlink or to actually copy the file.""",
    )
    parser.add_argument(
        '--audio_extensions', nargs='+', 
        help="""Audio extensions to look for in the input_path. Matching ones will
        be reorganize and placed into the output directory via a symlink.""",
        default=['.wav', '.mp3', '.aac']
    )
    return parser
 
if __name__ == '__main__':
    cmd(reorganize, build_parser)