# SP2XR CSV/ZIP to Parquet Conversion Configuration Template
#
# This file contains all parameters for converting raw SP2XR data files
# to time-indexed Parquet format.
#
# USAGE:
#   1. Update the paths and parameters below for your dataset
#   2. Run: python scripts/sp2xr_csv2parquet.py --config conversion_config_template.yaml
#
# NOTES:
# - For local execution, the script auto-detects available CPU cores and memory
# - Output files are organized by date and hour: target_directory/date=YYYY-MM-DD/hour=HH/
# - Processing is parallelized using Dask for efficient handling of large datasets
# - You can monitor progress via the Dask dashboard (URL printed when script starts)


# Directory containing your raw SP2XR files (CSV or ZIP format)
source_directory: data/SP2XR_orig_files

# Output directory for converted Parquet files
target_directory: data/pbp_files_parquet_2

# Path to your data schema config file (generated by sp2xr_generate_config.py)
schema_config: config/new_data_schema_with_mapping.yaml


# Pattern to filter which files to process
# "PbP" for particle-by-particle data, "hk" for housekeeping data
file_filter: PbP


# Number of files to process in each batch
# Larger values = more memory usage but potentially faster
# Smaller values = less memory usage but more overhead
chunk_size: 100


# Execution mode: "local" or "slurm"
# - local: Use your local machine (laptop/desktop)
# - slurm: Use a SLURM cluster (HPC environment)
execution_mode: local


# --- SLURM-specific parameters (ignored if execution_mode: local) ---

# Number of CPU cores per SLURM job
slurm_cores: 64

# Memory per SLURM job (e.g., "128GB", "256GB")
slurm_memory: 128GB

# SLURM partition to use
# Common values: "hourly", "daily", "general"
slurm_partition: daily

# Walltime for SLURM job (e.g., "01:00:00" for 1 hour, "23:59:00" for 1 day)
# If not specified, defaults based on partition:
#   - hourly: 00:59:00
#   - daily: 23:59:00
#   - general: 7-00:00:00
slurm_walltime: null