Files
SP2XR/meta_files/generate_config.py

45 lines
1.1 KiB
Python

import pandas as pd
import yaml
import os
def infer_general_dtype(dtype):
if pd.api.types.is_integer_dtype(dtype):
return "int"
elif pd.api.types.is_float_dtype(dtype):
return "float"
elif pd.api.types.is_datetime64_any_dtype(dtype):
return "datetime"
else:
return "string"
def load_schema(input_file):
ext = os.path.splitext(input_file)[1].lower()
if ext == ".csv":
df = pd.read_csv(input_file, nrows=100)
elif ext == ".parquet":
df = pd.read_parquet(input_file)
else:
raise ValueError(f"Unsupported file format: {ext}")
schema = {col: infer_general_dtype(dtype) for col, dtype in df.dtypes.items()}
return schema
def generate_combined_config(pbp_file, hk_file, output_file="config.yaml"):
config = {
"pbp_schema": load_schema(pbp_file),
"hk_schema": load_schema(hk_file),
}
with open(output_file, "w") as f:
yaml.dump(config, f, sort_keys=False)
print(f"Unified config saved to: {output_file}")
# Example usage
generate_combined_config("pbp_meta.parquet", "hk_meta.parquet")