From f3ff32e049ac4361c6cb02993513bd902bce0e7f Mon Sep 17 00:00:00 2001 From: Florez Ospina Juan Felipe Date: Sun, 22 Jun 2025 12:11:48 +0200 Subject: [PATCH] Update to pipelines/data_integration.py. Added feature to use environment variable MOUNT_DRIVE defined in .env file. --- pipelines/data_integration.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/pipelines/data_integration.py b/pipelines/data_integration.py index 4521d42..66a5fca 100644 --- a/pipelines/data_integration.py +++ b/pipelines/data_integration.py @@ -75,12 +75,28 @@ def load_config_and_setup_logging(yaml_config_file_path, log_dir): except yaml.YAMLError as exc: logging.error("Error loading YAML file: %s", exc) raise ValueError(f"Failed to load YAML file: {exc}") - + # Check if required keys are present missing_keys = [key for key in required_keys if key not in config_dict] if missing_keys: raise KeyError(f"Missing required keys in YAML configuration: {missing_keys}") + # Look for all placeholders like ${VAR_NAME} + input_dir = config_dict['input_file_directory'] + placeholders = re.findall(r'\$\{([^}^{]+)\}', input_dir) + + success = utils.load_env_from_root() + print(f'Success : {success}') + + for var in placeholders: + env_value = os.environ.get(var) + if env_value is None: + raise ValueError(f"Environment variable '{var}' is not set but used in the config.") + input_dir = input_dir.replace(f"${{{var}}}", env_value) + + config_dict['input_file_directory'] = input_dir + + # Check the instrument_datafolder required type and ensure the list is of at least length one. if isinstance(config_dict['instrument_datafolder'], list) and not len(config_dict['instrument_datafolder'])>=1: raise ValueError('Invalid value for key "instrument_datafolder". Expected a list of strings with at least one item.'