diff --git a/utils/exclude_path_keywords.yaml b/utils/exclude_path_keywords.yaml new file mode 100644 index 0000000..f554a31 --- /dev/null +++ b/utils/exclude_path_keywords.yaml @@ -0,0 +1,5 @@ +exclude_paths: + containing : + - .ipynb_checkpoints + - .renku + - .git \ No newline at end of file diff --git a/utils/g5505_utils.py b/utils/g5505_utils.py index b413271..f721578 100644 --- a/utils/g5505_utils.py +++ b/utils/g5505_utils.py @@ -1,3 +1,18 @@ +import sys +import os + +try: + thisFilePath = os.path.abspath(__file__) +except NameError: + print("Error: __file__ is not available. Ensure the script is being run from a file.") + print("[Notice] Path to DIMA package may not be resolved properly.") + thisFilePath = os.getcwd() # Use current directory or specify a default + +dimaPath = os.path.normpath(os.path.join(thisFilePath, "..",'..','..')) # Move up to project root + +if dimaPath not in sys.path: # Avoid duplicate entries + sys.path.insert(0,dimaPath) + import pandas as pd import os import sys @@ -7,7 +22,7 @@ import logging import numpy as np import h5py import re - +import yaml def setup_logging(log_dir, log_filename): """Sets up logging to a specified directory and file. @@ -292,6 +307,19 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path, output_dir_path = os.path.normpath(output_dir_path) select_dir_keywords = [keyword.replace('/',os.sep) for keyword in select_dir_keywords] + try: + with open(os.path.join(dimaPath, 'utils/exclude_path_keywords.yaml'), 'r') as stream: + exclude_path_dict = yaml.safe_load(stream) + if isinstance(exclude_path_dict, dict): + exclude_path_keywords = exclude_path_dict.get('containing', []) + if not all(isinstance(keyword, str) for keyword in exclude_path_keywords): + exclude_path_keywords = [] + else: + exclude_path_keywords = [] + except (FileNotFoundError, yaml.YAMLError) as e: + print(f"Warning. Unable to load YAML file: {e}") + exclude_path_keywords = [] + date = created_at('%Y_%m').replace(":", "-") log_dir='logs/' setup_logging(log_dir, f"copy_directory_with_contraints_{date}.log") @@ -302,8 +330,9 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path, def file_is_selected(filename): return not select_file_keywords or any(keyword in filename for keyword in select_file_keywords) + # Exclude path keywords + - # Collect paths of directories, which are directly connected to the root dir and match select_dir_keywords paths = [] if select_dir_keywords: @@ -319,7 +348,11 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path, for subpath in paths: for dirpath, _, filenames in os.walk(subpath,topdown=False): - + + # Exclude any dirpath containing a keyword in exclude_path_keywords + if any(excluded in dirpath for excluded in exclude_path_keywords): + continue + # Ensure composite keywords e.g., / are contained in the path if select_dir_keywords and not any([keyword in dirpath for keyword in select_dir_keywords]): continue