Add exclude paths set through yaml file
This commit is contained in:
5
utils/exclude_path_keywords.yaml
Normal file
5
utils/exclude_path_keywords.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
exclude_paths:
|
||||
containing :
|
||||
- .ipynb_checkpoints
|
||||
- .renku
|
||||
- .git
|
@ -1,3 +1,18 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
try:
|
||||
thisFilePath = os.path.abspath(__file__)
|
||||
except NameError:
|
||||
print("Error: __file__ is not available. Ensure the script is being run from a file.")
|
||||
print("[Notice] Path to DIMA package may not be resolved properly.")
|
||||
thisFilePath = os.getcwd() # Use current directory or specify a default
|
||||
|
||||
dimaPath = os.path.normpath(os.path.join(thisFilePath, "..",'..','..')) # Move up to project root
|
||||
|
||||
if dimaPath not in sys.path: # Avoid duplicate entries
|
||||
sys.path.insert(0,dimaPath)
|
||||
|
||||
import pandas as pd
|
||||
import os
|
||||
import sys
|
||||
@ -7,7 +22,7 @@ import logging
|
||||
import numpy as np
|
||||
import h5py
|
||||
import re
|
||||
|
||||
import yaml
|
||||
|
||||
def setup_logging(log_dir, log_filename):
|
||||
"""Sets up logging to a specified directory and file.
|
||||
@ -292,6 +307,19 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path,
|
||||
output_dir_path = os.path.normpath(output_dir_path)
|
||||
select_dir_keywords = [keyword.replace('/',os.sep) for keyword in select_dir_keywords]
|
||||
|
||||
try:
|
||||
with open(os.path.join(dimaPath, 'utils/exclude_path_keywords.yaml'), 'r') as stream:
|
||||
exclude_path_dict = yaml.safe_load(stream)
|
||||
if isinstance(exclude_path_dict, dict):
|
||||
exclude_path_keywords = exclude_path_dict.get('containing', [])
|
||||
if not all(isinstance(keyword, str) for keyword in exclude_path_keywords):
|
||||
exclude_path_keywords = []
|
||||
else:
|
||||
exclude_path_keywords = []
|
||||
except (FileNotFoundError, yaml.YAMLError) as e:
|
||||
print(f"Warning. Unable to load YAML file: {e}")
|
||||
exclude_path_keywords = []
|
||||
|
||||
date = created_at('%Y_%m').replace(":", "-")
|
||||
log_dir='logs/'
|
||||
setup_logging(log_dir, f"copy_directory_with_contraints_{date}.log")
|
||||
@ -302,8 +330,9 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path,
|
||||
|
||||
def file_is_selected(filename):
|
||||
return not select_file_keywords or any(keyword in filename for keyword in select_file_keywords)
|
||||
# Exclude path keywords
|
||||
|
||||
|
||||
|
||||
# Collect paths of directories, which are directly connected to the root dir and match select_dir_keywords
|
||||
paths = []
|
||||
if select_dir_keywords:
|
||||
@ -319,7 +348,11 @@ def copy_directory_with_contraints(input_dir_path, output_dir_path,
|
||||
for subpath in paths:
|
||||
|
||||
for dirpath, _, filenames in os.walk(subpath,topdown=False):
|
||||
|
||||
|
||||
# Exclude any dirpath containing a keyword in exclude_path_keywords
|
||||
if any(excluded in dirpath for excluded in exclude_path_keywords):
|
||||
continue
|
||||
|
||||
# Ensure composite keywords e.g., <keyword>/<keyword> are contained in the path
|
||||
if select_dir_keywords and not any([keyword in dirpath for keyword in select_dir_keywords]):
|
||||
continue
|
||||
|
Reference in New Issue
Block a user