Add exclude paths set through yaml file

This commit is contained in:
2025-06-10 11:08:14 +02:00
parent 83cec97e83
commit ab897018d9
2 changed files with 41 additions and 3 deletions

View File

@ -0,0 +1,5 @@
exclude_paths:
containing :
- .ipynb_checkpoints
- .renku
- .git

View File

@ -1,3 +1,18 @@
import sys
import os
try:
thisFilePath = os.path.abspath(__file__)
except NameError:
print("Error: __file__ is not available. Ensure the script is being run from a file.")
print("[Notice] Path to DIMA package may not be resolved properly.")
thisFilePath = os.getcwd() # Use current directory or specify a default
dimaPath = os.path.normpath(os.path.join(thisFilePath, "..",'..','..')) # Move up to project root
if dimaPath not in sys.path: # Avoid duplicate entries
sys.path.insert(0,dimaPath)
import pandas as pd
import os
import sys
@ -7,7 +22,7 @@ import logging
import numpy as np
import h5py
import re
import yaml
def setup_logging(log_dir, log_filename):
"""Sets up logging to a specified directory and file.
@ -292,6 +307,19 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path,
output_dir_path = os.path.normpath(output_dir_path)
select_dir_keywords = [keyword.replace('/',os.sep) for keyword in select_dir_keywords]
try:
with open(os.path.join(dimaPath, 'utils/exclude_path_keywords.yaml'), 'r') as stream:
exclude_path_dict = yaml.safe_load(stream)
if isinstance(exclude_path_dict, dict):
exclude_path_keywords = exclude_path_dict.get('containing', [])
if not all(isinstance(keyword, str) for keyword in exclude_path_keywords):
exclude_path_keywords = []
else:
exclude_path_keywords = []
except (FileNotFoundError, yaml.YAMLError) as e:
print(f"Warning. Unable to load YAML file: {e}")
exclude_path_keywords = []
date = created_at('%Y_%m').replace(":", "-")
log_dir='logs/'
setup_logging(log_dir, f"copy_directory_with_contraints_{date}.log")
@ -302,6 +330,7 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path,
def file_is_selected(filename):
return not select_file_keywords or any(keyword in filename for keyword in select_file_keywords)
# Exclude path keywords
# Collect paths of directories, which are directly connected to the root dir and match select_dir_keywords
@ -320,6 +349,10 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path,
for dirpath, _, filenames in os.walk(subpath,topdown=False):
# Exclude any dirpath containing a keyword in exclude_path_keywords
if any(excluded in dirpath for excluded in exclude_path_keywords):
continue
# Ensure composite keywords e.g., <keyword>/<keyword> are contained in the path
if select_dir_keywords and not any([keyword in dirpath for keyword in select_dir_keywords]):
continue