Implement record_data_lineage.py to be used as a parameterized decorator. This is to simplify provenance tracking on newly added file readers.
This commit is contained in:
84
src/meta_ops.py
Normal file
84
src/meta_ops.py
Normal file
@@ -0,0 +1,84 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
try:
|
||||
thisFilePath = os.path.abspath(__file__)
|
||||
except NameError:
|
||||
print("Error: __file__ is not available. Ensure the script is being run from a file.")
|
||||
print("[Notice] Path to DIMA package may not be resolved properly.")
|
||||
thisFilePath = os.getcwd() # Use current directory or specify a default
|
||||
|
||||
dimaPath = os.path.normpath(os.path.join(thisFilePath, "..",'..')) # Move up to project root
|
||||
|
||||
if dimaPath not in sys.path: # Avoid duplicate entries
|
||||
sys.path.append(dimaPath)
|
||||
|
||||
|
||||
import h5py
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import logging
|
||||
import datetime
|
||||
import yaml
|
||||
import json
|
||||
import copy
|
||||
|
||||
import utils.g5505_utils as utils
|
||||
#import src.hdf5_writer as hdf5_lib
|
||||
import inspect
|
||||
from functools import wraps
|
||||
|
||||
|
||||
def record_data_lineage(data_level: int = 0):
|
||||
"""Parameterized decorator to record data lineage information.
|
||||
`data_level` is a user-defined integer.
|
||||
Adds lineage metadata to dict returns or HDF5 group attributes."""
|
||||
|
||||
def decorator(function: callable):
|
||||
# Get relative path to the script where the function is defined
|
||||
tmpFunctionAbsPath = inspect.getfile(function)
|
||||
functionFileRelativePath = os.path.relpath(tmpFunctionAbsPath, dimaPath)
|
||||
func_signature = inspect.signature(function)
|
||||
|
||||
@wraps(function)
|
||||
def wrapper_func(*args, **kwargs):
|
||||
# Bind args/kwargs to the function signature
|
||||
bound_args = func_signature.bind(*args, **kwargs)
|
||||
bound_args.apply_defaults()
|
||||
|
||||
dest_file_path = bound_args.arguments.get('dest_file_obj_or_path')
|
||||
dest_group_name = bound_args.arguments.get('dest_group_name')
|
||||
|
||||
# If the file is already an h5py.File object, use its filename
|
||||
if isinstance(dest_file_path, h5py.File):
|
||||
dest_file_path = dest_file_path.filename
|
||||
|
||||
# Call the original function
|
||||
result = function(*args, **kwargs)
|
||||
|
||||
# Prepare lineage metadata
|
||||
data_lineage_metadata = {
|
||||
'data_level': data_level,
|
||||
'processing_script': functionFileRelativePath,
|
||||
'processing_date': utils.created_at(),
|
||||
}
|
||||
|
||||
# Case 1: dict result → inject metadata
|
||||
if isinstance(result, dict):
|
||||
if 'attributes_dict' not in result:
|
||||
result['attributes_dict'] = {}
|
||||
result['attributes_dict'].update(data_lineage_metadata)
|
||||
|
||||
# Case 2: HDF5 group → inject metadata safely
|
||||
elif dest_file_path and dest_group_name:
|
||||
if os.path.exists(dest_file_path) and dest_file_path.endswith('.h5'):
|
||||
with h5py.File(dest_file_path, mode='r+', track_order=True) as fobj:
|
||||
if dest_group_name in fobj:
|
||||
for key, value in data_lineage_metadata.items():
|
||||
fobj[dest_group_name].attrs[key] = value
|
||||
|
||||
return result
|
||||
|
||||
return wrapper_func
|
||||
|
||||
return decorator
|
||||
Reference in New Issue
Block a user