Files
SP2XR/config/conversion_config_template.yaml
2025-11-21 15:00:26 +01:00

62 lines
1.9 KiB
YAML

# SP2XR CSV/ZIP to Parquet Conversion Configuration Template
#
# This file contains all parameters for converting raw SP2XR data files
# to time-indexed Parquet format.
#
# USAGE:
# 1. Update the paths and parameters below for your dataset
# 2. Run: python scripts/sp2xr_csv2parquet.py --config conversion_config_template.yaml
#
# NOTES:
# - For local execution, the script auto-detects available CPU cores and memory
# - Output files are organized by date and hour: target_directory/date=YYYY-MM-DD/hour=HH/
# - Processing is parallelized using Dask for efficient handling of large datasets
# - You can monitor progress via the Dask dashboard (URL printed when script starts)
# Directory containing your raw SP2XR files (CSV or ZIP format)
source_directory: data/SP2XR_orig_files
# Output directory for converted Parquet files
target_directory: data/pbp_files_parquet_2
# Path to your data schema config file (generated by sp2xr_generate_config.py)
schema_config: config/new_data_schema_with_mapping.yaml
# Pattern to filter which files to process
# "PbP" for particle-by-particle data, "hk" for housekeeping data
file_filter: PbP
# Number of files to process in each batch
# Larger values = more memory usage but potentially faster
# Smaller values = less memory usage but more overhead
chunk_size: 100
# Execution mode: "local" or "slurm"
# - local: Use your local machine (laptop/desktop)
# - slurm: Use a SLURM cluster (HPC environment)
execution_mode: local
# --- SLURM-specific parameters (ignored if execution_mode: local) ---
# Number of CPU cores per SLURM job
slurm_cores: 64
# Memory per SLURM job (e.g., "128GB", "256GB")
slurm_memory: 128GB
# SLURM partition to use
# Common values: "hourly", "daily", "general"
slurm_partition: daily
# Walltime for SLURM job (e.g., "01:00:00" for 1 hour, "23:59:00" for 1 day)
# If not specified, defaults based on partition:
# - hourly: 00:59:00
# - daily: 23:59:00
# - general: 7-00:00:00
slurm_walltime: null