cleanup config organization

This commit is contained in:
2025-09-30 00:23:12 +02:00
parent 40ba49a61f
commit cde421edda
12 changed files with 996 additions and 1152 deletions

View File

@@ -0,0 +1,121 @@
# SP2XR Complete Configuration Example
# ============================================================================
# METADATA - Optional documentation for this configuration
# ============================================================================
metadata:
campaign: "Example_Campaign"
instrument: "PSI SP2XR"
operator: "Barbara Bertozzi"
description: "Complete example configuration with all features enabled"
created: "2025-09-29"
# ============================================================================
# FILE PATHS - All input and output locations
# ============================================================================
paths:
input_pbp: data/pbp_files_parquet
input_hk: data/hk_files_parquet
output: data/SP2XR_processed_1min
instrument_config: config/my_instrument_settings.yaml
# ============================================================================
# WORKFLOW SETTINGS - Complete analysis pipeline
# ============================================================================
workflow:
# Analysis components
conc: true # Number and mass concentrations
BC_hist: true # BC mass distributions (dNdlogDmev, dMdlogDmev)
scatt_hist: true # Scattering size distributions (dNdlogDsc)
timelag_hist: false # Time-lag distributions for mixing state analysis
# Time and data management
dt: 60 # Time resolution (seconds): 1, 10, 60, 300, 3600
repartition: '1h' # Repartition frequency: '15min', '1h', '6h', '1d'
max_partition_size: "200MB" # Memory management
saving_schema: ['date', 'hour'] # Output partitioning scheme
# ============================================================================
# CLUSTER SETTINGS - HPC and local computing options
# ============================================================================
cluster:
# Execution mode
use_local: true # true for local, false for SLURM cluster
# Resource allocation
cores: 32 # Total CPU cores
processes: 16 # Worker processes (usually cores/2)
memory: 256GB # Total memory
# SLURM-specific settings
walltime: "4-00:00:00" # Job time limit (days-hours:minutes:seconds)
partition: general # SLURM partition: general, bigmem, gpu, etc.
log_dir: ./slurm_out # SLURM log directory
# ============================================================================
# TIME CHUNKING - Temporal data processing strategy
# ============================================================================
chunking:
freq: '7d' # Chunk frequency: '1d', '3d', '7d', '1M'
start_date: null # Start date (YYYY-MM-DD format)
end_date: null # End date (YYYY-MM-DD format)
# ============================================================================
# BLACK CARBON PROPERTIES - Physical and optical parameters
# ============================================================================
bc:
# Density settings
rho_eff: 1800 # Effective density (kg/m³)
type: constant_effective_density # Density model
# ============================================================================
# DETAILED HISTOGRAM SETTINGS - Size and mass distribution parameters
# ============================================================================
histo:
# Incandescence (BC mass) distributions
inc:
min_mass: 0.3 # Minimum mass (fg)
max_mass: 400 # Maximum mass (fg)
n_bins: 50 # Number of bins
#log_spacing: true # Logarithmic bin spacing
# Scattering (optical diameter) distributions
scatt:
min_D: 100 # Minimum diameter (nm)
max_D: 500 # Maximum diameter (nm)
n_bins: 20 # Number of bins
#log_spacing: true # Logarithmic bin spacing
# Time-lag distributions (mixing state analysis)
timelag:
min: -10 # Minimum time lag (ns)
max: 400 # Maximum time lag (ns)
n_bins: 100 # Number of bins
#log_spacing: false # Linear bin spacing for time-lag
# ============================================================================
# MIXING STATE ANALYSIS - Particle coating classification
# ============================================================================
mixing_state:
# Classification thresholds
threshold: 50 # Thick coating threshold (nm)
inc_scatt_ratio: 1.1 # Minimum incandescence/scattering ratio
# ============================================================================
# ADVANCED CALIBRATION PARAMETERS
# ============================================================================
calibration:
# Incandescence calibration
incandescence:
curve_type: "polynomial" # "polynomial", "powerlaw", or "spline"
parameters: [0.05, 2.047e-07, -1.2e-15] # Calibration coefficients
# Scattering calibration
scattering:
curve_type: "powerlaw" # "polynomial", "powerlaw", or "spline"
parameters: [17.22, 0.169, -1.494] # [a, b, c] for powerlaw

View File

@@ -1,252 +1,251 @@
'# INSTRUCTIONS':
- This config file contains both schema definitions and column mappings.
- 1. The *_schema sections define the data types for your input files.
- 2. The *_column_mapping sections map your file columns to canonical names.
- 3. Replace placeholder values (YOUR_COLUMN_NAME_FOR_*) with actual column names
from your files.
- 4. If your file doesn't have a particular canonical column, set it to null or remove
the line.
- 5. The output parquet files will use the canonical column names for consistency.
pbp_schema:
Time (sec): float
Packet Time Stamp: float
Flag: int
Dropped Records: int
Record Count: int
Record Size: int
Particle Time Stamp: float
Particle Flags: int
Scatter relPeak: float
Scatter Transit Time: int
Scatter Peak Time: int
Scatter FWHM: int
Scatter Size (nm): float
Incand relPeak: float
Incand Transit Time: float
Incand Peak Time: float
Incand FWHM: float
Incand Delay: float
Incand Mass (fg): float
Reserved: int
hk_schema:
Time Stamp: string
Time (sec): float
Time Stamp (UTC sec): float
Elapsed Time: float
Error Code: int
Packet Time Stamp: float
Laser TEC Temp (C): float
Crystal TEC Temp (C): int
Inlet Air Temp (C): float
Computer Heatsink Temp (C): float
Laser Heatsink Temp (C): float
Outlet Air Temp (C): float
YAG Output Monitor (V): float
Cavity Pressure (hPa): float
Laser Driver Power Monitor (uA): int
Laser Driver Current Limit Monitor (A): float
Laser Driver Current Monitor (A): float
Laser TEC Sense: float
Laser Over Temp (On/Off): int
+5V Laser Rail (V): float
' +5V Rail (V)': float
+12V Rail (V): float
High Voltage (V): float
Battery Temp (C): float
UPS Output (V): float
12V Iso Rail (V): float
5V Iso Rail (V): float
3.3V Iso Rail (V): float
Spare 22: int
Spare 23: int
408 Board Spare 0: int
408 Board Spare 1: int
408 Board Spare 2: int
408 Board Spare 3: int
408 Board Spare 4: int
Purge Flow Monitor (sccm): float
System Input Voltage (V): float
Board Temperature (C): float
408 Board Spare 8: int
408 Board Spare 9: int
408 Board Spare 10: int
408 Board Spare 11: int
408 Board Spare 12: int
408 Board Spare 13: int
408 Board Spare 14: int
408 Board Spare 15: int
Sheath Flow Controller Read (vccm): int
Sheath Flow Controller Read (sccm): int
Sheath Flow Controller Pressure (psia): float
Sheath Flow Controller Temperature (C): float
Sample Flow Controller Read (vccm): float
Sample Flow Controller Read (sccm): float
Sample Flow Controller Pressure (psia): float
Sample Flow Controller Temperature (C): float
Fan 1 (RPM): int
Fan 2 (RPM): int
Laser Fan (RPM): int
Spare tach: int
Threshold Crossing Events: int
Dual Qualified Scatter and Incand Particles: int
Qualified Scatter Only Particles: int
Qualified Incand Only Particles: int
Disqualified Due to Scatter Saturation: int
Disqualified Due to Scatter Transit Time Min: int
Disqualified Due to Scatter Transit Time Max: int
Disqualified Due to Scatter FWHM Min: int
Disqualified Due to Scatter FWHM Max: int
Scatter Inter Part Period Min Violation: int
Disqualified Due to Incand Saturation: int
Disqualified Due to Incand Transit Time Min: int
Disqualified Due to Incand Transit Time Max: int
Disqualified Due to Incand FWHM Min: int
Disqualified Due to Incand FWHM Max: int
Incand Inter Part Period Min Violation: int
Baseline Sizer Lo: int
Baseline Sizer Hi: int
Baseline Incand Lo: int
Baseline Incand Hi: int
Bandwidth Sizer Hi: int
Bandwidth Sizer Lo: int
Bandwidth Incand Lo: int
Bandwidth Incand Hi: int
ABD-0408 HK ADCs min: int
ABD-0436 HK ADCs min: int
ABD-0408 HK ADCs max: int
ABD-0436 HK ADCs max: int
Incand Particle Conc (cts/ccm): float
Scattering Particle Conc (cts/ccm): float
Incand Mass Conc (fg/sccm): float
Scattering Mass Conc (fg/sccm): float
Sheath Flow Set Point: int
Sample Flow Set Point: int
Laser Temp Set Point: int
Laser Current Set Point: float
Spare 4 Set Point: int
Spare 5 Set Point: int
PMT HV Set Point: float
Particle Density (g/ccm): float
PbP Packet Time: float
Scatter Bin 1: int
Scatter Bin 2: int
Scatter Bin 3: int
Scatter Bin 4: int
Scatter Bin 5: int
Scatter Bin 6: int
Scatter Bin 7: int
Scatter Bin 8: int
Scatter Bin 9: int
Scatter Bin 10: int
Scatter Bin 11: int
Scatter Bin 12: int
Scatter Bin 13: int
Scatter Bin 14: int
Scatter Bin 15: int
Scatter Bin 16: int
Scatter Bin 17: int
Scatter Bin 18: int
Scatter Bin 19: int
Scatter Bin 20: int
Incand Bin 1: int
Incand Bin 2: int
Incand Bin 3: int
Incand Bin 4: int
Incand Bin 5: int
Incand Bin 6: int
Incand Bin 7: int
Incand Bin 8: int
Incand Bin 9: int
Incand Bin 10: int
Incand Bin 11: int
Incand Bin 12: int
Incand Bin 13: int
Incand Bin 14: int
Incand Bin 15: int
Incand Bin 16: int
Incand Bin 17: int
Incand Bin 18: int
Incand Bin 19: int
Incand Bin 20: int
pbp_canonical_schema:
Time (sec): float
Packet Time Stamp: float
Flag: float
Dropped Records: float
Record Count: float
Record Size: float
Particle Time Stamp: float
Particle Flags: float
Scatter relPeak: float
Scatter Transit Time: float
Scatter Peak Time: float
Scatter FWHM: float
Scatter Size (nm): float
Incand relPeak: float
Incand Transit Time: float
Incand Peak Time: float
Incand FWHM: float
Incand Delay: float
Incand Mass (fg): float
Reserved: float
hk_canonical_schema:
Time Stamp: datetime
Time (sec): float
Sample Flow Controller Read (sccm): float
Sample Flow Controller Read (vccm): float
Time Stamp (UTC sec): float
Elapsed Time: float
Error Code: float
Packet Time Stamp: float
Laser TEC Temp (C): float
Crystal TEC Temp (C): float
Inlet Air Temp (C): float
Computer Heatsink Temp (C): float
Laser Heatsink Temp (C): float
Outlet Air Temp (C): float
YAG Output Monitor (V): float
Cavity Pressure (hPa): float
Laser Driver Power Monitor (uA): float
Laser Driver Current Limit Monitor (A): float
Laser Driver Current Monitor (A): float
pbp_column_mapping:
Time (sec): Time (sec)
Packet Time Stamp: Packet Time Stamp
Flag: Flag
Dropped Records: Dropped Records
Record Count: Record Count
Record Size: Record Size
Particle Time Stamp: Particle Time Stamp
Particle Flags: Particle Flags
Scatter relPeak: Scatter relPeak
Scatter Transit Time: Scatter Transit Time
Scatter Peak Time: Scatter Peak Time
Scatter FWHM: Scatter FWHM
Scatter Size (nm): Scatter Size (nm)
Incand relPeak: Incand relPeak
Incand Transit Time: Incand Transit Time
Incand Peak Time: Incand Peak Time
Incand FWHM: Incand FWHM
Incand Delay: Incand Delay
Incand Mass (fg): Incand Mass (fg)
Reserved: Reserved
hk_column_mapping:
Time Stamp: Time Stamp
Time (sec): Time (sec)
Sample Flow Controller Read (sccm): Sample Flow Controller Read (sccm)
Sample Flow Controller Read (vccm): Sample Flow Controller Read (vccm)
Time Stamp (UTC sec): Time Stamp (UTC sec)
Elapsed Time: Elapsed Time
Error Code: Error Code
Packet Time Stamp: Packet Time Stamp
Laser TEC Temp (C): Laser TEC Temp (C)
Crystal TEC Temp (C): Crystal TEC Temp (C)
Inlet Air Temp (C): Inlet Air Temp (C)
Computer Heatsink Temp (C): Computer Heatsink Temp (C)
Laser Heatsink Temp (C): Laser Heatsink Temp (C)
Outlet Air Temp (C): Outlet Air Temp (C)
YAG Output Monitor (V): YAG Output Monitor (V)
Cavity Pressure (hPa): Cavity Pressure (hPa)
Laser Driver Power Monitor (uA): Laser Driver Power Monitor (uA)
Laser Driver Current Limit Monitor (A): Laser Driver Current Limit Monitor (A)
Laser Driver Current Monitor (A): Laser Driver Current Monitor (A)
'# INSTRUCTIONS':
- This config file contains both schema definitions and column mappings.
- 1. The *_schema sections define the data types for your input files.
- 2. The *_column_mapping sections map your file columns to canonical names.
- 3. Update column mappings if your files use different column names.
- 4. If your file doesn't have a particular canonical column, set it to null or remove
the line.
- 5. The output parquet files will use the canonical column names for consistency.
pbp_schema:
Time (sec): float
Packet Time Stamp: float
Flag: int
Dropped Records: int
Record Count: int
Record Size: int
Particle Time Stamp: float
Particle Flags: int
Scatter relPeak: float
Scatter Transit Time: int
Scatter Peak Time: int
Scatter FWHM: int
Scatter Size (nm): float
Incand relPeak: float
Incand Transit Time: float
Incand Peak Time: float
Incand FWHM: float
Incand Delay: float
Incand Mass (fg): float
Reserved: int
hk_schema:
Time Stamp: string
Time (sec): float
Time Stamp (UTC sec): float
Elapsed Time: float
Error Code: int
Packet Time Stamp: float
Laser TEC Temp (C): float
Crystal TEC Temp (C): int
Inlet Air Temp (C): float
Computer Heatsink Temp (C): float
Laser Heatsink Temp (C): float
Outlet Air Temp (C): float
YAG Output Monitor (V): float
Cavity Pressure (hPa): float
Laser Driver Power Monitor (uA): int
Laser Driver Current Limit Monitor (A): float
Laser Driver Current Monitor (A): float
Laser TEC Sense: float
Laser Over Temp (On/Off): int
+5V Laser Rail (V): float
' +5V Rail (V)': float
+12V Rail (V): float
High Voltage (V): float
Battery Temp (C): float
UPS Output (V): float
12V Iso Rail (V): float
5V Iso Rail (V): float
3.3V Iso Rail (V): float
Spare 22: int
Spare 23: int
408 Board Spare 0: int
408 Board Spare 1: int
408 Board Spare 2: int
408 Board Spare 3: int
408 Board Spare 4: int
Purge Flow Monitor (sccm): float
System Input Voltage (V): float
Board Temperature (C): float
408 Board Spare 8: int
408 Board Spare 9: int
408 Board Spare 10: int
408 Board Spare 11: int
408 Board Spare 12: int
408 Board Spare 13: int
408 Board Spare 14: int
408 Board Spare 15: int
Sheath Flow Controller Read (vccm): int
Sheath Flow Controller Read (sccm): int
Sheath Flow Controller Pressure (psia): float
Sheath Flow Controller Temperature (C): float
Sample Flow Controller Read (vccm): float
Sample Flow Controller Read (sccm): float
Sample Flow Controller Pressure (psia): float
Sample Flow Controller Temperature (C): float
Fan 1 (RPM): int
Fan 2 (RPM): int
Laser Fan (RPM): int
Spare tach: int
Threshold Crossing Events: int
Dual Qualified Scatter and Incand Particles: int
Qualified Scatter Only Particles: int
Qualified Incand Only Particles: int
Disqualified Due to Scatter Saturation: int
Disqualified Due to Scatter Transit Time Min: int
Disqualified Due to Scatter Transit Time Max: int
Disqualified Due to Scatter FWHM Min: int
Disqualified Due to Scatter FWHM Max: int
Scatter Inter Part Period Min Violation: int
Disqualified Due to Incand Saturation: int
Disqualified Due to Incand Transit Time Min: int
Disqualified Due to Incand Transit Time Max: int
Disqualified Due to Incand FWHM Min: int
Disqualified Due to Incand FWHM Max: int
Incand Inter Part Period Min Violation: int
Baseline Sizer Lo: int
Baseline Sizer Hi: int
Baseline Incand Lo: int
Baseline Incand Hi: int
Bandwidth Sizer Hi: int
Bandwidth Sizer Lo: int
Bandwidth Incand Lo: int
Bandwidth Incand Hi: int
ABD-0408 HK ADCs min: int
ABD-0436 HK ADCs min: int
ABD-0408 HK ADCs max: int
ABD-0436 HK ADCs max: int
Incand Particle Conc (cts/ccm): float
Scattering Particle Conc (cts/ccm): float
Incand Mass Conc (fg/sccm): float
Scattering Mass Conc (fg/sccm): float
Sheath Flow Set Point: int
Sample Flow Set Point: int
Laser Temp Set Point: int
Laser Current Set Point: float
Spare 4 Set Point: int
Spare 5 Set Point: int
PMT HV Set Point: float
Particle Density (g/ccm): float
PbP Packet Time: float
Scatter Bin 1: int
Scatter Bin 2: int
Scatter Bin 3: int
Scatter Bin 4: int
Scatter Bin 5: int
Scatter Bin 6: int
Scatter Bin 7: int
Scatter Bin 8: int
Scatter Bin 9: int
Scatter Bin 10: int
Scatter Bin 11: int
Scatter Bin 12: int
Scatter Bin 13: int
Scatter Bin 14: int
Scatter Bin 15: int
Scatter Bin 16: int
Scatter Bin 17: int
Scatter Bin 18: int
Scatter Bin 19: int
Scatter Bin 20: int
Incand Bin 1: int
Incand Bin 2: int
Incand Bin 3: int
Incand Bin 4: int
Incand Bin 5: int
Incand Bin 6: int
Incand Bin 7: int
Incand Bin 8: int
Incand Bin 9: int
Incand Bin 10: int
Incand Bin 11: int
Incand Bin 12: int
Incand Bin 13: int
Incand Bin 14: int
Incand Bin 15: int
Incand Bin 16: int
Incand Bin 17: int
Incand Bin 18: int
Incand Bin 19: int
Incand Bin 20: int
pbp_canonical_schema:
Time (sec): float
Packet Time Stamp: float
Flag: float
Dropped Records: float
Record Count: float
Record Size: float
Particle Time Stamp: float
Particle Flags: float
Scatter relPeak: float
Scatter Transit Time: float
Scatter Peak Time: float
Scatter FWHM: float
Scatter Size (nm): float
Incand relPeak: float
Incand Transit Time: float
Incand Peak Time: float
Incand FWHM: float
Incand Delay: float
Incand Mass (fg): float
Reserved: float
hk_canonical_schema:
Time Stamp: datetime
Time (sec): float
Sample Flow Controller Read (sccm): float
Sample Flow Controller Read (vccm): float
Time Stamp (UTC sec): float
Elapsed Time: float
Error Code: float
Packet Time Stamp: float
Laser TEC Temp (C): float
Crystal TEC Temp (C): float
Inlet Air Temp (C): float
Computer Heatsink Temp (C): float
Laser Heatsink Temp (C): float
Outlet Air Temp (C): float
YAG Output Monitor (V): float
Cavity Pressure (hPa): float
Laser Driver Power Monitor (uA): float
Laser Driver Current Limit Monitor (A): float
Laser Driver Current Monitor (A): float
pbp_column_mapping:
Time (sec): Time (sec)
Packet Time Stamp: Packet Time Stamp
Flag: Flag
Dropped Records: Dropped Records
Record Count: Record Count
Record Size: Record Size
Particle Time Stamp: Particle Time Stamp
Particle Flags: Particle Flags
Scatter relPeak: Scatter relPeak
Scatter Transit Time: Scatter Transit Time
Scatter Peak Time: Scatter Peak Time
Scatter FWHM: Scatter FWHM
Scatter Size (nm): Scatter Size (nm)
Incand relPeak: Incand relPeak
Incand Transit Time: Incand Transit Time
Incand Peak Time: Incand Peak Time
Incand FWHM: Incand FWHM
Incand Delay: Incand Delay
Incand Mass (fg): Incand Mass (fg)
Reserved: Reserved
hk_column_mapping:
Time Stamp: Time Stamp
Time (sec): Time (sec)
Sample Flow Controller Read (sccm): Sample Flow Controller Read (sccm)
Sample Flow Controller Read (vccm): Sample Flow Controller Read (vccm)
Time Stamp (UTC sec): Time Stamp (UTC sec)
Elapsed Time: Elapsed Time
Error Code: Error Code
Packet Time Stamp: Packet Time Stamp
Laser TEC Temp (C): Laser TEC Temp (C)
Crystal TEC Temp (C): Crystal TEC Temp (C)
Inlet Air Temp (C): Inlet Air Temp (C)
Computer Heatsink Temp (C): Computer Heatsink Temp (C)
Laser Heatsink Temp (C): Laser Heatsink Temp (C)
Outlet Air Temp (C): Outlet Air Temp (C)
YAG Output Monitor (V): YAG Output Monitor (V)
Cavity Pressure (hPa): Cavity Pressure (hPa)
Laser Driver Power Monitor (uA): Laser Driver Power Monitor (uA)
Laser Driver Current Limit Monitor (A): Laser Driver Current Limit Monitor (A)
Laser Driver Current Monitor (A): Laser Driver Current Monitor (A)

View File

@@ -1,15 +1,20 @@
instrument_parameters:
ScattTransitMin: 10.0
ScattTransitMax: 65535.0
ScattFWHMMin: 30.0
ScattFWHMMax: 65535.0
ScattInterTimeMin: 10.0
IncTransitMin: 5.0
IncTransitMax: 65535.0
IncFWHMMin: 30.0
IncFWHMMax: 65535.0
IncInterTimeMin: 10.0
SaveRate: 1.0
Signal_saturation:
IncSatPoint: 1700000000.0
ScattSatPoint: 1700000000.0
metadata:
source_ini_file: C:\Users\Baccandr\Documents\SP2XR_code\SP2XR_code\tests\data\SP2XR_orig_files\20190508\20190508172218\20190508172218
Calibration 20181005.ini
generated_on: '2025-09-29T22:52:49.725057'
generated_by: sp2xr_generate_config.py
instrument_parameters:
ScattTransitMin: 10.0
ScattTransitMax: 65535.0
ScattFWHMMin: 30.0
ScattFWHMMax: 65535.0
ScattInterTimeMin: 10.0
IncTransitMin: 5.0
IncTransitMax: 65535.0
IncFWHMMin: 30.0
IncFWHMMax: 65535.0
IncInterTimeMin: 10.0
SaveRate: 1.0
Signal_saturation:
IncSatPoint: 1700000000.0
ScattSatPoint: 1700000000.0

View File

@@ -1,266 +0,0 @@
[Custom]
Display Tab=TRUE
Display Names=<2>
Display Names 0=set 1
Display Names 1=set 2
Sets=<2>
Sets 0.Cluster.Graph 1=<8>
Sets 0.Cluster.Graph 1 0.Plot.Channel=+5V Laser Rail (V)
Sets 0.Cluster.Graph 1 0.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 1 1.Plot.Channel= +5V Rail (V)
Sets 0.Cluster.Graph 1 1.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 1 2.Plot.Channel=+12V Rail (V)
Sets 0.Cluster.Graph 1 2.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 1 3.Plot.Channel=3.3V Iso Rail (V)
Sets 0.Cluster.Graph 1 3.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 1 4.Plot.Channel=UPS Output (V)
Sets 0.Cluster.Graph 1 4.Plot.Left/Right=TRUE
Sets 0.Cluster.Graph 1 5.Plot.Channel=Inlet Air Temp (C)
Sets 0.Cluster.Graph 1 5.Plot.Left/Right=TRUE
Sets 0.Cluster.Graph 1 6.Plot.Channel=Crystal TEC Temp (C)
Sets 0.Cluster.Graph 1 6.Plot.Left/Right=TRUE
Sets 0.Cluster.Graph 1 7.Plot.Channel=Laser Heatsink Temp (C)
Sets 0.Cluster.Graph 1 7.Plot.Left/Right=TRUE
Sets 0.Cluster.Graph 2=<8>
Sets 0.Cluster.Graph 2 0.Plot.Channel=Laser TEC Temp (C)
Sets 0.Cluster.Graph 2 0.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 2 1.Plot.Channel=Crystal TEC Temp (C)
Sets 0.Cluster.Graph 2 1.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 2 2.Plot.Channel=Inlet Air Temp (C)
Sets 0.Cluster.Graph 2 2.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 2 3.Plot.Channel=Computer Heatsink Temp (C)
Sets 0.Cluster.Graph 2 3.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 2 4.Plot.Channel=Laser Heatsink Temp (C)
Sets 0.Cluster.Graph 2 4.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 2 5.Plot.Channel=Outlet Air Temp (C)
Sets 0.Cluster.Graph 2 5.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 2 6.Plot.Channel=Battery Temp (C)
Sets 0.Cluster.Graph 2 6.Plot.Left/Right=FALSE
Sets 0.Cluster.Graph 2 7.Plot.Channel=Laser TEC Sense
Sets 0.Cluster.Graph 2 7.Plot.Left/Right=TRUE
Sets 1.Cluster.Graph 1=<4>
Sets 1.Cluster.Graph 1 0.Plot.Channel=Threshold Crossing Events
Sets 1.Cluster.Graph 1 0.Plot.Left/Right=FALSE
Sets 1.Cluster.Graph 1 1.Plot.Channel=Dual Qualified Scatter and Incand Particles
Sets 1.Cluster.Graph 1 1.Plot.Left/Right=FALSE
Sets 1.Cluster.Graph 1 2.Plot.Channel=Qualified Scatter Only Particles
Sets 1.Cluster.Graph 1 2.Plot.Left/Right=FALSE
Sets 1.Cluster.Graph 1 3.Plot.Channel=Qualified Incand Only Particles
Sets 1.Cluster.Graph 1 3.Plot.Left/Right=FALSE
Sets 1.Cluster.Graph 2=<8>
Sets 1.Cluster.Graph 2 0.Plot.Channel=Baseline Sizer Lo
Sets 1.Cluster.Graph 2 0.Plot.Left/Right=FALSE
Sets 1.Cluster.Graph 2 1.Plot.Channel=Baseline Sizer Hi
Sets 1.Cluster.Graph 2 1.Plot.Left/Right=FALSE
Sets 1.Cluster.Graph 2 2.Plot.Channel=Baseline Incand Lo
Sets 1.Cluster.Graph 2 2.Plot.Left/Right=FALSE
Sets 1.Cluster.Graph 2 3.Plot.Channel=Baseline Incand Hi
Sets 1.Cluster.Graph 2 3.Plot.Left/Right=FALSE
Sets 1.Cluster.Graph 2 4.Plot.Channel=Bandwidth Sizer Hi
Sets 1.Cluster.Graph 2 4.Plot.Left/Right=TRUE
Sets 1.Cluster.Graph 2 5.Plot.Channel=Bandwidth Sizer Lo
Sets 1.Cluster.Graph 2 5.Plot.Left/Right=TRUE
Sets 1.Cluster.Graph 2 6.Plot.Channel=Bandwidth Incand Lo
Sets 1.Cluster.Graph 2 6.Plot.Left/Right=TRUE
Sets 1.Cluster.Graph 2 7.Plot.Channel=Bandwidth Incand Hi
Sets 1.Cluster.Graph 2 7.Plot.Left/Right=TRUE
[Raw Options]
byte 0: data mux=High Dynamic Range Traces
Raw Data Particle Selection=First Scatter
Scatter relPeak=0
Incand relPeak=0
Inter-raw Period (ms)=100
leader sample count=400
footer sample count=400
[Scatter Parameters]
Graph=Counts
X Mode=Size
Norm?=FALSE
Cumulative=FALSE
[Versions]
Instrument Name=SP2XR
SP2 Version=2.01.01.19
Acq Version=2.00.00.00
Last Date Updated=4/11/2019 6:24:36 AM
[Trigger Settings]
Scatter Transit Time Min=10
Scatter Transit Time Max=65535
Scatter FWHM Min=30
Scatter FWHM Max=65535
Scatter Inter Particle Time Min=10
Incand Transit Time Min=5
Incand Transit Time Max=65535
Incand FWHM Min=30
Incand FWHM Max=65535
Incand Inter Particle Time Min=10
Paired Particle Delay Max=10
Scatter Threshold Min=36100
Scatter Hysteresis Min=2703
Incand Threshold Min=50700
Incand Hysteresis Min=5394
Scatter Threshold Max=559000000
Scatter Hysteresis Max=0
Incand Threshold Max=2147483647
Incand Hysteresis Max=0
Forced Trigger=FALSE
Forced Trigger Interval(ms)=1000
[# Samples S]
# Samples S=0
[Program]
Data File Path=D:\DMT\SP2XR Data
Restart Files=FALSE
Graph 0 Left=YAG Output Monitor (V)
Graph 0 Right=Laser Driver Current Monitor (A)
Graph 1 Left=Scattering Particle Conc (cts/ccm)
Graph 1 Right=Incand Particle Conc (cts/ccm)
Control Cycle Time=0
NTP Server=
Write File?=FALSE
Graph Backgrounds=16448250
Graph 2 Left=Sheath Flow Controller Read (sccm)
Graph 2 Right=Sample Flow Controller Read (sccm)
Description=
Serial Number=0001
2 or 3 Graphs=TRUE
Time Range=12 Hours
OSDS Format=
Num to Avg=0
Global 2=0
Global 3=0
Global 4=0
Global 5=0
Shut Down Sequence=
Crisis Shut Down Seq=turn off pump and laser
Write SP2b Data File=TRUE
Write HK File=TRUE
Write Raw Binary Data=TRUE
TabChannelNum=0
OptimizeChannelNum=0
Write HDF5 File=TRUE
NumParticlesPerHDF5File=100000
Laser Temp Set=29
Laser Current Set=1.9
Spare 4 Set=0
Spare 5 Set=0
PMT HV Set=0.46
Interface Board Scaling=<24>
Interface Board Scaling 0=1/(0.000849+0.000261*ln(10000/(65536/VAR-1))+0.000000125*ln(10000/(65536/VAR-1))^3)-273.15
Interface Board Scaling 1=
Interface Board Scaling 2=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15
Interface Board Scaling 3=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15
Interface Board Scaling 4=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15
Interface Board Scaling 5=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15
Interface Board Scaling 6=0.0000625*VAR
Interface Board Scaling 7=VAR/72+105.55
Interface Board Scaling 8=0.125*VAR
Interface Board Scaling 9=0.000125*VAR
Interface Board Scaling 10=0.000125*VAR
Interface Board Scaling 11=2.01*0.0000625 *VAR
Interface Board Scaling 12=
Interface Board Scaling 13=0.000125*VAR
Interface Board Scaling 14=0.000125*VAR
Interface Board Scaling 15=4.57*0.0000625*VAR
Interface Board Scaling 16=0.0152587890625*VAR
Interface Board Scaling 17=1/(0.000894+0.00025*ln((1662.22* VAR)/(39897.3 - VAR))+0.0000002*ln((1662.22*VAR)/(39897.3 -VAR))^3)-273.15
Interface Board Scaling 18=(69.8+11.5) /11.5*0.0000625*VAR
Interface Board Scaling 19=4.57*0.0000625*VAR
Interface Board Scaling 20=0.000125*VAR
Interface Board Scaling 21=1.1*0.0000625 *VAR
Interface Board Scaling 22=
Interface Board Scaling 23=
ABD 0408 Scaling=<16>
ABD 0408 Scaling 0=
ABD 0408 Scaling 1=
ABD 0408 Scaling 2=
ABD 0408 Scaling 3=
ABD 0408 Scaling 4=
ABD 0408 Scaling 5=VAR*0.00625
ABD 0408 Scaling 6=7.98*(6.25E-5*VAR)
ABD 0408 Scaling 7=-84.962*(6.25E-5*VAR-1.8639)
ABD 0408 Scaling 8=
ABD 0408 Scaling 9=
ABD 0408 Scaling 10=
ABD 0408 Scaling 11=
ABD 0408 Scaling 12=
ABD 0408 Scaling 13=
ABD 0408 Scaling 14=
ABD 0408 Scaling 15=
Save Every Nth Particle=1
zip files=TRUE
Particle Density (g/cc)=1.8
Pump Start-Up State=FALSE
[Detector DAC Settings]
Scatter unused A=23790
Scatter unused B=65535
[Incand Parameters]
Graph=Counts
X Mode=Mass
Norm?=FALSE
Cumulative=FALSE
[Control]
Alarms=<3>
Alarms 0.Alarm.Name=TurnPumpON
Alarms 0.Alarm.Channel=Elapsed Time
Alarms 0.Alarm.Condition=>
Alarms 0.Alarm.Threshold=0
Alarms 0.Alarm.Action=Turn Pump On
Alarms 0.Alarm.Hysteresis=0
Alarms 0.Alarm.Target Channel=
Alarms 0.Alarm.Set Value=0
Alarms 0.Alarm.Min Time=0
Alarms 0.Alarm.Sequence=turn off pump and laser
Alarms 0.Alarm.Target Alarm=TurnPumpON
Alarms 1.Alarm.Name=Turn Laser On
Alarms 1.Alarm.Channel=Elapsed Time
Alarms 1.Alarm.Condition=>
Alarms 1.Alarm.Threshold=5
Alarms 1.Alarm.Action=Turn Laser On
Alarms 1.Alarm.Hysteresis=0
Alarms 1.Alarm.Target Channel=
Alarms 1.Alarm.Set Value=0
Alarms 1.Alarm.Min Time=0
Alarms 1.Alarm.Sequence=
Alarms 1.Alarm.Target Alarm=Turn Laser On
Alarms 2.Alarm.Name=StartRecording
Alarms 2.Alarm.Channel=Elapsed Time
Alarms 2.Alarm.Condition=>
Alarms 2.Alarm.Threshold=10
Alarms 2.Alarm.Action=Start Writing Data
Alarms 2.Alarm.Hysteresis=0
Alarms 2.Alarm.Target Channel=
Alarms 2.Alarm.Set Value=0
Alarms 2.Alarm.Min Time=0
Alarms 2.Alarm.Sequence=
Alarms 2.Alarm.Target Alarm=Turn Laser On
Sequences=<0>
Timers=<0>
[Pump]
Pump=TRUE
[# Samples I]
# Samples I=0
[SampleFlow]
SampleFlow (sccm)=30
[SheathFlow]
SheathFlow (sccm)=600
[Polling Interval]
HK Stream Interval (ms)=1000
PbP Stream Interval (ms)=1000
[Fans Settings]
Case Fan Mode=normal
Case Fan On Threshold=35
Case Fan Off Threshold=33
Laser Fan Mode=forced off
Laser Fan On Threshold=27
Laser Fan Off Threshold=24
[Channel Order]
Channel Order=<0>
Digits=<0>
[Streaming Data]
Port=0
Baud Rate=0
Channels=<0>
Bus=Serial Port
[Calculated Channels]
Calculated Channels=<0>
[Calculations]
Calculations=<0>

View File

@@ -1,159 +0,0 @@
pbp_schema:
Time (sec): float
Packet Time Stamp: float
Flag: float
Dropped Records: float
Record Count: float
Record Size: float
Particle Time Stamp: float
Particle Flags: float
Scatter relPeak: float
Scatter Transit Time: float
Scatter Peak Time: float
Scatter FWHM: float
Scatter Size (nm): float
Incand relPeak: float
Incand Transit Time: float
Incand Peak Time: float
Incand FWHM: float
Incand Delay: float
Incand Mass (fg): float
Reserved: float
hk_schema:
Time Stamp: datetime
Time (sec): float
Time Stamp (UTC sec): float
Elapsed Time: float
Error Code: float
Packet Time Stamp: float
Laser TEC Temp (C): float
Crystal TEC Temp (C): float
Inlet Air Temp (C): float
Computer Heatsink Temp (C): float
Laser Heatsink Temp (C): float
Outlet Air Temp (C): float
YAG Output Monitor (V): float
Cavity Pressure (hPa): float
Laser Driver Power Monitor (uA): float
Laser Driver Current Limit Monitor (A): float
Laser Driver Current Monitor (A): float
Laser TEC Sense: float
Laser Over Temp (On/Off): float
+5V Laser Rail (V): float
' +5V Rail (V)': float
+12V Rail (V): float
High Voltage (V): float
Battery Temp (C): float
UPS Output (V): float
12V Iso Rail (V): float
5V Iso Rail (V): float
3.3V Iso Rail (V): float
Spare 22: float
Spare 23: float
408 Board Spare 0: float
408 Board Spare 1: float
408 Board Spare 2: float
408 Board Spare 3: float
408 Board Spare 4: float
Purge Flow Monitor (sccm): float
System Input Voltage (V): float
Board Temperature (C): float
408 Board Spare 8: float
408 Board Spare 9: float
408 Board Spare 10: float
408 Board Spare 11: float
408 Board Spare 12: float
408 Board Spare 13: float
408 Board Spare 14: float
408 Board Spare 15: float
Sheath Flow Controller Read (vccm): float
Sheath Flow Controller Read (sccm): float
Sheath Flow Controller Pressure (psia): float
Sheath Flow Controller Temperature (C): float
Sample Flow Controller Read (vccm): float
Sample Flow Controller Read (sccm): float
Sample Flow Controller Pressure (psia): float
Sample Flow Controller Temperature (C): float
Fan 1 (RPM): float
Fan 2 (RPM): float
Laser Fan (RPM): float
Spare tach: float
Threshold Crossing Events: float
Dual Qualified Scatter and Incand Particles: float
Qualified Scatter Only Particles: float
Qualified Incand Only Particles: float
Disqualified Due to Scatter Saturation: float
Disqualified Due to Scatter Transit Time Min: float
Disqualified Due to Scatter Transit Time Max: float
Disqualified Due to Scatter FWHM Min: float
Disqualified Due to Scatter FWHM Max: float
Scatter Inter Part Period Min Violation: float
Disqualified Due to Incand Saturation: float
Disqualified Due to Incand Transit Time Min: float
Disqualified Due to Incand Transit Time Max: float
Disqualified Due to Incand FWHM Min: float
Disqualified Due to Incand FWHM Max: float
Incand Inter Part Period Min Violation: float
Baseline Sizer Lo: float
Baseline Sizer Hi: float
Baseline Incand Lo: float
Baseline Incand Hi: float
Bandwidth Sizer Hi: float
Bandwidth Sizer Lo: float
Bandwidth Incand Lo: float
Bandwidth Incand Hi: float
ABD-0408 HK ADCs min: float
ABD-0436 HK ADCs min: float
ABD-0408 HK ADCs max: float
ABD-0436 HK ADCs max: float
Incand Particle Conc (cts/ccm): float
Scattering Particle Conc (cts/ccm): float
Incand Mass Conc (fg/sccm): float
Scattering Mass Conc (fg/sccm): float
Sheath Flow Set Point: float
Sample Flow Set Point: float
Laser Temp Set Point: float
Laser Current Set Point: float
Spare 4 Set Point: float
Spare 5 Set Point: float
PMT HV Set Point: float
Particle Density (g/ccm): float
PbP Packet Time: float
Scatter Bin 1: float
Scatter Bin 2: float
Scatter Bin 3: float
Scatter Bin 4: float
Scatter Bin 5: float
Scatter Bin 6: float
Scatter Bin 7: float
Scatter Bin 8: float
Scatter Bin 9: float
Scatter Bin 10: float
Scatter Bin 11: float
Scatter Bin 12: float
Scatter Bin 13: float
Scatter Bin 14: float
Scatter Bin 15: float
Scatter Bin 16: float
Scatter Bin 17: float
Scatter Bin 18: float
Scatter Bin 19: float
Incand Bin 1: float
Incand Bin 2: float
Incand Bin 3: float
Incand Bin 4: float
Incand Bin 5: float
Incand Bin 6: float
Incand Bin 7: float
Incand Bin 8: float
Incand Bin 9: float
Incand Bin 10: float
Incand Bin 11: float
Incand Bin 12: float
Incand Bin 13: float
Incand Bin 14: float
Incand Bin 15: float
Incand Bin 16: float
Incand Bin 17: float
Incand Bin 18: float
Incand Bin 19: float

View File

@@ -1,228 +0,0 @@
from __future__ import annotations
import pandas as pd
import yaml
import os
from pathlib import Path
from typing import Any
def infer_general_dtype(dtype: Any) -> str:
"""Infer general data type from pandas dtype."""
if pd.api.types.is_integer_dtype(dtype):
return "int"
elif pd.api.types.is_float_dtype(dtype):
return "float"
elif pd.api.types.is_datetime64_any_dtype(dtype):
return "datetime"
else:
return "string"
def load_schema(input_file: str | Path) -> dict[str, str]:
"""Load schema from input file by inferring column types."""
ext = os.path.splitext(str(input_file))[1].lower()
if ext in [".csv", ".zip"]:
df = pd.read_csv(input_file, nrows=100)
elif ext == ".parquet":
df = pd.read_parquet(input_file)
else:
raise ValueError(f"Unsupported file format: {ext}")
schema = {col: infer_general_dtype(dtype) for col, dtype in df.dtypes.items()}
return schema
def get_canonical_schemas() -> dict[str, dict[str, str]]:
"""Return canonical column schemas for SP2XR data."""
pbp_canonical = {
"Time (sec)": "float",
"Packet Time Stamp": "float",
"Flag": "float",
"Dropped Records": "float",
"Record Count": "float",
"Record Size": "float",
"Particle Time Stamp": "float",
"Particle Flags": "float",
"Scatter relPeak": "float",
"Scatter Transit Time": "float",
"Scatter Peak Time": "float",
"Scatter FWHM": "float",
"Scatter Size (nm)": "float",
"Incand relPeak": "float",
"Incand Transit Time": "float",
"Incand Peak Time": "float",
"Incand FWHM": "float",
"Incand Delay": "float",
"Incand Mass (fg)": "float",
"Reserved": "float",
}
hk_canonical = {
"Time Stamp": "datetime",
"Time (sec)": "float",
"Sample Flow Controller Read (sccm)": "float",
"Sample Flow Controller Read (vccm)": "float",
# Core HK columns that are commonly used
"Time Stamp (UTC sec)": "float",
"Elapsed Time": "float",
"Error Code": "float",
"Packet Time Stamp": "float",
"Laser TEC Temp (C)": "float",
"Crystal TEC Temp (C)": "float",
"Inlet Air Temp (C)": "float",
"Computer Heatsink Temp (C)": "float",
"Laser Heatsink Temp (C)": "float",
"Outlet Air Temp (C)": "float",
"YAG Output Monitor (V)": "float",
"Cavity Pressure (hPa)": "float",
"Laser Driver Power Monitor (uA)": "float",
"Laser Driver Current Limit Monitor (A)": "float",
"Laser Driver Current Monitor (A)": "float",
# ... (other HK columns can be added as needed)
}
return {"pbp_canonical": pbp_canonical, "hk_canonical": hk_canonical}
def generate_combined_config(
pbp_file: str | Path, hk_file: str | Path, output_file: str = "config.yaml"
) -> None:
"""Generate config file with both schema definitions and column mappings."""
config = {
"pbp_schema": load_schema(pbp_file),
"hk_schema": load_schema(hk_file),
}
with open(output_file, "w") as f:
yaml.dump(config, f, sort_keys=False)
print(f"Unified config saved to: {output_file}")
def generate_mapping_template(
pbp_file: str | Path,
hk_file: str | Path,
output_file: str = "config_with_mapping.yaml",
) -> None:
"""
Generate enhanced config with column mapping templates.
This creates a config file that allows users to map their instrument-specific
column names to the canonical column names used in the main processing pipeline.
"""
# Load actual file schemas
pbp_schema = load_schema(pbp_file)
hk_schema = load_schema(hk_file)
# Get canonical schemas
canonical_schemas = get_canonical_schemas()
# Create column mapping templates
pbp_mapping = {}
hk_mapping = {}
# For PbP: map file columns to canonical columns
for canonical_col in canonical_schemas["pbp_canonical"]:
# Try to find exact match first
matching_file_col = None
for file_col in pbp_schema.keys():
if file_col.lower() == canonical_col.lower():
matching_file_col = file_col
break
# If exact match found, use it; otherwise leave as template
pbp_mapping[canonical_col] = (
matching_file_col
or f"YOUR_COLUMN_NAME_FOR_{canonical_col.replace(' ', '_').replace('(', '').replace(')', '').upper()}"
)
# For HK: map file columns to canonical columns
for canonical_col in canonical_schemas["hk_canonical"]:
matching_file_col = None
for file_col in hk_schema.keys():
if file_col.lower() == canonical_col.lower():
matching_file_col = file_col
break
hk_mapping[canonical_col] = (
matching_file_col
or f"YOUR_COLUMN_NAME_FOR_{canonical_col.replace(' ', '_').replace('(', '').replace(')', '').upper()}"
)
# Build enhanced config
config = {
"# INSTRUCTIONS": [
"This config file contains both schema definitions and column mappings.",
"1. The *_schema sections define the data types for your input files.",
"2. The *_column_mapping sections map your file columns to canonical names.",
"3. Replace placeholder values (YOUR_COLUMN_NAME_FOR_*) with actual column names from your files.",
"4. If your file doesn't have a particular canonical column, set it to null or remove the line.",
"5. The output parquet files will use the canonical column names for consistency.",
],
"pbp_schema": pbp_schema,
"hk_schema": hk_schema,
"pbp_canonical_schema": canonical_schemas["pbp_canonical"],
"hk_canonical_schema": canonical_schemas["hk_canonical"],
"pbp_column_mapping": pbp_mapping,
"hk_column_mapping": hk_mapping,
}
with open(output_file, "w") as f:
yaml.dump(config, f, sort_keys=False, default_flow_style=False)
print(f"Enhanced config with column mapping saved to: {output_file}")
print("\nNext steps:")
print(
"1. Open the config file and replace placeholder column mappings with your actual column names"
)
print(
"2. Remove or set to null any canonical columns that don't exist in your data"
)
print("3. Use this config file with the updated CSV to Parquet conversion process")
def apply_column_mapping(
df: pd.DataFrame, column_mapping: dict[str, str | None]
) -> pd.DataFrame:
"""
Apply column name mapping to standardize column names.
Parameters
----------
df : pd.DataFrame
Input dataframe with instrument-specific column names
column_mapping : dict[str, str | None]
Mapping from canonical names to file column names
Returns
-------
pd.DataFrame
DataFrame with standardized column names
"""
# Create reverse mapping: file_column_name -> canonical_name
reverse_mapping = {}
for canonical_name, file_column in column_mapping.items():
if (
file_column
and file_column in df.columns
and not file_column.startswith("YOUR_COLUMN_NAME_FOR_")
):
reverse_mapping[file_column] = canonical_name
# Rename columns using reverse mapping
df_renamed = df.rename(columns=reverse_mapping)
return df_renamed
# Example usage
if __name__ == "__main__":
# Legacy function for backward compatibility
# generate_combined_config("pbp_meta.parquet", "hk_meta.parquet")
# New enhanced function
pbp_tmp_file = "/data/user/bertoz_b/merlin6data/SP2XR_code/tests/data/mini_SP2XR_PbP_20190409110737_x0001.zip"
hk_tmp_file = "/data/user/bertoz_b/merlin6data/SP2XR_code/tests/data/mini_SP2XR_hk_20190409110737_x0001.zip"
generate_mapping_template(pbp_tmp_file, hk_tmp_file)

Binary file not shown.

Binary file not shown.

View File

@@ -0,0 +1,458 @@
#!/usr/bin/env python3
"""
Generate SP2XR configuration files by automatically detecting PbP and HK files in a directory.
This script scans a directory for SP2XR data files (CSV/ZIP/Parquet) and generates
configuration files with proper schemas and column mappings.
"""
from __future__ import annotations
import argparse
import pandas as pd
import yaml
import os
from pathlib import Path
from typing import Any
def infer_general_dtype(dtype: Any) -> str:
"""Infer general data type from pandas dtype."""
if pd.api.types.is_integer_dtype(dtype):
return "int"
elif pd.api.types.is_float_dtype(dtype):
return "float"
elif pd.api.types.is_datetime64_any_dtype(dtype):
return "datetime"
else:
return "string"
def find_sp2xr_files(directory: str | Path) -> tuple[list[Path], list[Path]]:
"""
Find PbP and HK files in the given directory and all subdirectories.
Parameters
----------
directory : str | Path
Directory to search for SP2XR files (searches recursively)
Returns
-------
tuple[list[Path], list[Path]]
Lists of PbP files and HK files found
"""
directory = Path(directory)
if not directory.exists():
raise FileNotFoundError(f"Directory not found: {directory}")
# Common SP2XR file patterns
pbp_patterns = ["*PbP*", "*pbp*", "*Pbp*"]
hk_patterns = ["*hk*", "*HK*", "*Hk*"]
file_extensions = ["*.csv", "*.zip", "*.parquet"]
pbp_files = []
hk_files = []
# Search for files matching patterns (including subdirectories)
for ext in file_extensions:
for pattern in pbp_patterns:
pbp_files.extend(directory.glob(f"**/{pattern}{ext}"))
for pattern in hk_patterns:
hk_files.extend(directory.glob(f"**/{pattern}{ext}"))
# Remove duplicates and sort
pbp_files = sorted(list(set(pbp_files)))
hk_files = sorted(list(set(hk_files)))
return pbp_files, hk_files
def load_schema(input_file: str | Path, nrows: int = 100) -> dict[str, str]:
"""
Load schema from input file by inferring column types.
Parameters
----------
input_file : str | Path
Path to the input file
nrows : int
Number of rows to read for type inference (for CSV files)
Returns
-------
dict[str, str]
Mapping of column names to data types
"""
ext = os.path.splitext(str(input_file))[1].lower()
if ext in [".csv", ".zip"]:
df = pd.read_csv(input_file, nrows=nrows)
elif ext == ".parquet":
# For parquet, we can just read the schema without loading data
pf = pd.read_parquet(input_file, engine="pyarrow")
df = pf.head(0) # Empty dataframe with schema
else:
raise ValueError(f"Unsupported file format: {ext}")
schema = {col: infer_general_dtype(dtype) for col, dtype in df.dtypes.items()}
return schema
def get_canonical_schemas() -> dict[str, dict[str, str]]:
"""Return canonical column schemas for SP2XR data."""
pbp_canonical = {
"Time (sec)": "float",
"Packet Time Stamp": "float",
"Flag": "float",
"Dropped Records": "float",
"Record Count": "float",
"Record Size": "float",
"Particle Time Stamp": "float",
"Particle Flags": "float",
"Scatter relPeak": "float",
"Scatter Transit Time": "float",
"Scatter Peak Time": "float",
"Scatter FWHM": "float",
"Scatter Size (nm)": "float",
"Incand relPeak": "float",
"Incand Transit Time": "float",
"Incand Peak Time": "float",
"Incand FWHM": "float",
"Incand Delay": "float",
"Incand Mass (fg)": "float",
"Reserved": "float",
}
hk_canonical = {
"Time Stamp": "datetime",
"Time (sec)": "float",
"Sample Flow Controller Read (sccm)": "float",
"Sample Flow Controller Read (vccm)": "float",
"Time Stamp (UTC sec)": "float",
"Elapsed Time": "float",
"Error Code": "float",
"Packet Time Stamp": "float",
"Laser TEC Temp (C)": "float",
"Crystal TEC Temp (C)": "float",
"Inlet Air Temp (C)": "float",
"Computer Heatsink Temp (C)": "float",
"Laser Heatsink Temp (C)": "float",
"Outlet Air Temp (C)": "float",
"YAG Output Monitor (V)": "float",
"Cavity Pressure (hPa)": "float",
"Laser Driver Power Monitor (uA)": "float",
"Laser Driver Current Limit Monitor (A)": "float",
"Laser Driver Current Monitor (A)": "float",
}
return {"pbp_canonical": pbp_canonical, "hk_canonical": hk_canonical}
def generate_basic_config(
pbp_file: Path,
hk_file: Path,
schema_output: str = "config_schema.yaml",
ini_file: str = None,
instrument_output: str = None,
) -> None:
"""Generate basic config schema file with data type definitions only."""
print(f"Reading PbP schema from: {pbp_file}")
pbp_schema = load_schema(pbp_file)
print(f"Reading HK schema from: {hk_file}")
hk_schema = load_schema(hk_file)
config = {
"pbp_schema": pbp_schema,
"hk_schema": hk_schema,
}
# Create output directory if it doesn't exist
schema_path = Path(schema_output)
schema_path.parent.mkdir(parents=True, exist_ok=True)
with open(schema_output, "w") as f:
yaml.dump(config, f, sort_keys=False)
print(f"Data schema config saved to: {schema_output}")
# Generate separate instrument settings config from INI file
if ini_file:
# Determine instrument settings output filename
if instrument_output:
instrument_path = Path(instrument_output)
else:
instrument_path = (
schema_path.parent / f"{schema_path.stem}_instrument_settings.yaml"
)
try:
from sp2xr.helpers import export_xr_ini_to_yaml_with_source
export_xr_ini_to_yaml_with_source(ini_file, str(instrument_path))
print(f"Instrument settings config saved to: {instrument_path}")
except ImportError:
# Fallback to original function if new one doesn't exist yet
from sp2xr.helpers import export_xr_ini_to_yaml
export_xr_ini_to_yaml(ini_file, str(instrument_path))
print(f"Instrument settings config saved to: {instrument_path}")
except Exception as e:
print(f"Warning: Could not convert INI to YAML: {e}")
# Still reference the original INI file as fallback
config["calibration_file"] = ini_file
with open(schema_output, "w") as f:
yaml.dump(config, f, sort_keys=False)
print(f"Added INI file reference as fallback: {Path(ini_file).name}")
def generate_mapping_config(
pbp_file: Path,
hk_file: Path,
schema_output: str = "config_schema_with_mapping.yaml",
ini_file: str = None,
instrument_output: str = None,
) -> None:
"""Generate enhanced config schema with column mapping templates."""
print(f"Reading PbP schema from: {pbp_file}")
pbp_schema = load_schema(pbp_file)
print(f"Reading HK schema from: {hk_file}")
hk_schema = load_schema(hk_file)
# Get canonical schemas
canonical_schemas = get_canonical_schemas()
# Create column mapping templates
pbp_mapping = {}
hk_mapping = {}
# For PbP: map file columns to canonical columns
for canonical_col in canonical_schemas["pbp_canonical"]:
matching_file_col = None
for file_col in pbp_schema.keys():
if file_col.lower() == canonical_col.lower():
matching_file_col = file_col
break
pbp_mapping[canonical_col] = (
matching_file_col or canonical_col # Use canonical name as default
)
# For HK: map file columns to canonical columns
for canonical_col in canonical_schemas["hk_canonical"]:
matching_file_col = None
for file_col in hk_schema.keys():
if file_col.lower() == canonical_col.lower():
matching_file_col = file_col
break
hk_mapping[canonical_col] = (
matching_file_col or canonical_col # Use canonical name as default
)
# Build enhanced config
config = {
"# INSTRUCTIONS": [
"This config file contains both schema definitions and column mappings.",
"1. The *_schema sections define the data types for your input files.",
"2. The *_column_mapping sections map your file columns to canonical names.",
"3. Update column mappings if your files use different column names.",
"4. If your file doesn't have a particular canonical column, set it to null or remove the line.",
"5. The output parquet files will use the canonical column names for consistency.",
],
"pbp_schema": pbp_schema,
"hk_schema": hk_schema,
"pbp_canonical_schema": canonical_schemas["pbp_canonical"],
"hk_canonical_schema": canonical_schemas["hk_canonical"],
"pbp_column_mapping": pbp_mapping,
"hk_column_mapping": hk_mapping,
}
# Create output directory if it doesn't exist
schema_path = Path(schema_output)
schema_path.parent.mkdir(parents=True, exist_ok=True)
with open(schema_output, "w") as f:
yaml.dump(config, f, sort_keys=False, default_flow_style=False)
print(f"Enhanced data schema config with column mapping saved to: {schema_output}")
# Generate separate instrument settings config from INI file
if ini_file:
# Determine instrument settings output filename
if instrument_output:
instrument_path = Path(instrument_output)
else:
instrument_path = (
schema_path.parent / f"{schema_path.stem}_instrument_settings.yaml"
)
try:
from sp2xr.helpers import export_xr_ini_to_yaml_with_source
export_xr_ini_to_yaml_with_source(ini_file, str(instrument_path))
print(f"Instrument settings config saved to: {instrument_path}")
except ImportError:
# Fallback to original function if new one doesn't exist yet
from sp2xr.helpers import export_xr_ini_to_yaml
export_xr_ini_to_yaml(ini_file, str(instrument_path))
print(f"Instrument settings config saved to: {instrument_path}")
except Exception as e:
print(f"Warning: Could not convert INI to YAML: {e}")
# Still reference the original INI file as fallback
config["calibration_file"] = ini_file
with open(schema_output, "w") as f:
yaml.dump(config, f, sort_keys=False, default_flow_style=False)
print(f"Added INI file reference as fallback: {Path(ini_file).name}")
def parse_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser(
description="Generate SP2XR configuration files from data directory",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Generate basic config schema from files in current directory
python sp2xr_generate_config.py .
# Generate config schema from specific directory
python sp2xr_generate_config.py /path/to/sp2xr/data
# Generate config schema with column mapping support
python sp2xr_generate_config.py /path/to/data --mapping
# Specify custom schema and instrument settings filenames
python sp2xr_generate_config.py /path/to/data --schema-output my_schema.yaml --instrument-output my_settings.yaml
# Generate mapping config with custom names
python sp2xr_generate_config.py /path/to/data --mapping --schema-output campaign_schema.yaml --instrument-output campaign_settings.yaml
""",
)
parser.add_argument(
"directory", help="Directory containing SP2XR files (PbP and HK files)"
)
parser.add_argument(
"--schema-output",
"-s",
default="config_schema.yaml",
help="Output filename for data schema config (default: config_schema.yaml)",
)
parser.add_argument(
"--instrument-output",
"-i",
default=None,
help="Output filename for instrument settings config (default: {schema_output}_instrument_settings.yaml)",
)
parser.add_argument(
"--mapping",
"-m",
action="store_true",
help="Generate config with column mapping support (creates config_with_mapping.yaml)",
)
parser.add_argument(
"--pbp-file", help="Specify specific PbP file instead of auto-detection"
)
parser.add_argument(
"--hk-file", help="Specify specific HK file instead of auto-detection"
)
return parser.parse_args()
def main():
"""Main entry point."""
args = parse_args()
try:
# Use specific files if provided, otherwise auto-detect
if args.pbp_file and args.hk_file:
pbp_file = Path(args.pbp_file)
hk_file = Path(args.hk_file)
if not pbp_file.exists():
raise FileNotFoundError(f"PbP file not found: {pbp_file}")
if not hk_file.exists():
raise FileNotFoundError(f"HK file not found: {hk_file}")
else:
# Auto-detect files in directory
print(f"Searching for SP2XR files in: {args.directory}")
pbp_files, hk_files = find_sp2xr_files(args.directory)
if not pbp_files:
raise FileNotFoundError(
"No PbP files found. Looking for files with 'PbP', 'pbp', or 'Pbp' in the name."
)
if not hk_files:
raise FileNotFoundError(
"No HK files found. Looking for files with 'hk', 'HK', or 'Hk' in the name."
)
# Use the first file found for each type
pbp_file = pbp_files[0]
hk_file = hk_files[0]
print(f"Found {len(pbp_files)} PbP file(s), using: {pbp_file.name}")
print(f"Found {len(hk_files)} HK file(s), using: {hk_file.name}")
# Check for INI files in the directory
try:
from sp2xr.helpers import find_and_validate_ini_files
ini_file = find_and_validate_ini_files(str(args.directory))
if ini_file:
print(f"Found consistent INI calibration file: {Path(ini_file).name}")
else:
print("No INI calibration files found in directory")
except ValueError as e:
print(f"WARNING: {e}")
print("You should process data with different calibrations separately.")
except ImportError:
print("Could not import INI validation function")
# Generate configuration
if args.mapping:
schema_file = (
"config_schema_with_mapping.yaml"
if args.schema_output == "config_schema.yaml"
else args.schema_output
)
generate_mapping_config(
pbp_file,
hk_file,
schema_file,
ini_file if "ini_file" in locals() else None,
args.instrument_output,
)
else:
generate_basic_config(
pbp_file,
hk_file,
args.schema_output,
ini_file if "ini_file" in locals() else None,
args.instrument_output,
)
print("\nConfiguration generation completed successfully!")
except Exception as e:
print(f"Error: {e}")
return 1
return 0
if __name__ == "__main__":
exit(main())

View File

@@ -4,6 +4,7 @@ import re
import yaml
import argparse
import pandas as pd
import numpy as np
import dask.dataframe as dd
from pathlib import Path
from dask_jobqueue import SLURMCluster
@@ -43,7 +44,7 @@ def load_and_resolve_config(args):
"rho_eff": choose(args.BC_rho, base, "bc.rho_eff", None),
"BC_type": choose(args.BC_type, base, "bc.type", None),
"cluster": {
"use_local": choose(args.local, base, "cluster.use_local"),
"use_local": args.local or get(base, "cluster.use_local", False),
"cores": choose(args.cores, base, "cluster.cores", None),
"memory": choose(args.memory, base, "cluster.memory", None),
"walltime": choose(args.walltime, base, "cluster.walltime", None),
@@ -98,9 +99,9 @@ def load_and_resolve_config(args):
def initialize_cluster(config):
if config["cluster"].get("use_local", False):
return make_slurm_cluster(config)
else:
return make_local_cluster(config)
else:
return make_slurm_cluster(config)
def make_slurm_cluster(config):
@@ -137,14 +138,12 @@ def make_local_cluster(config):
total_cores = multiprocessing.cpu_count()
total_memory = psutil.virtual_memory().total # in bytes
# Use all cores or config override
cores = config["cluster"].get("cores", total_cores)
memory_limit = config["cluster"].get("memory")
# For local clusters, always auto-detect resources (ignore config values meant for SLURM)
cores = total_cores
memory_limit_bytes = int(total_memory * 0.8)
memory_limit = f"{memory_limit_bytes // (1024**3)}GB"
# If memory not provided, use 80% of total
if memory_limit is None:
memory_limit_bytes = int(total_memory * 0.8)
memory_limit = f"{memory_limit_bytes // (1024**3)}GB"
print(f"Auto-detected local resources: {cores} cores, {memory_limit} memory")
cluster = LocalCluster(
n_workers=cores,
@@ -161,7 +160,7 @@ def make_local_cluster(config):
timeout="300s",
)
print(f"Dask LOCAL dashboard: {client.dashboard_link}")
return client
return client, cluster
def extract_partitioned_datetimes(parquet_path: str) -> list[pd.Timestamp]:
@@ -288,7 +287,9 @@ def parse_args():
)
# cluster / resource knobs (all optional)
p.add_argument("--local", action="store_false", help="Run Local Cluster")
p.add_argument(
"--local", action="store_true", help="Use local cluster instead of SLURM"
)
p.add_argument("--cores", type=int, default=None, help="CPU cores per SLURM job")
p.add_argument("--memory", default=None, help="RAM per job")
p.add_argument("--walltime", default=None, help="SLURM wall-time")
@@ -419,7 +420,7 @@ def read_xr_ini_file(fname):
return default
params = {}
with open(fname, "r") as f:
with open(fname, "r", encoding="utf-8", errors="ignore") as f:
for line in f:
if "Disqualified" in line: # skip irrelevant lines
continue
@@ -433,6 +434,99 @@ def read_xr_ini_file(fname):
return params
def find_and_validate_ini_files(directory):
"""
Find all .ini files in a directory and validate they are consistent.
Parameters
----------
directory : str
Directory to search for .ini files
Returns
-------
str or None
Path to representative .ini file if all are consistent, None if none found
Raises
------
ValueError
If multiple different .ini files are found
"""
import os
import glob
# Find all .ini files recursively
ini_pattern = os.path.join(directory, "**", "*Calibration*.ini")
ini_files = glob.glob(ini_pattern, recursive=True)
if not ini_files:
return None
if len(ini_files) == 1:
return ini_files[0]
# Compare all .ini files to ensure they're identical
reference_params = None
reference_file = None
for ini_file in ini_files:
try:
params = read_xr_ini_file(ini_file)
if reference_params is None:
reference_params = params
reference_file = ini_file
else:
# Compare with reference
if params != reference_params:
raise ValueError(
f"Multiple different .ini files found in {directory}:\n"
f" Reference: {reference_file}\n"
f" Different: {ini_file}\n"
f"Please process data with different calibrations separately."
)
except Exception as e:
import warnings
warnings.warn(f"Could not read .ini file {ini_file}: {e}")
return reference_file
def export_xr_ini_to_yaml_with_source(ini_path, yaml_path):
"""
Convert an SP2-XR .ini file to a structured YAML configuration with source traceability.
- Groups instrument parameters under 'instrument_parameters'
- Pre-populates calibration and histogram sections for user editing
- Merges with existing YAML if present (preserving user edits)
- Includes source INI file path for traceability
"""
from datetime import datetime
ini_params = read_xr_ini_file(ini_path)
yaml_path = Path(yaml_path)
# Base structure with metadata
params = {
"metadata": {
"source_ini_file": str(Path(ini_path).resolve()),
"generated_on": datetime.now().isoformat(),
"generated_by": "sp2xr_generate_config.py",
},
"instrument_parameters": ini_params,
"Signal_saturation": {"IncSatPoint": 1.7e9, "ScattSatPoint": 1.7e9},
}
# Save YAML
with open(yaml_path, "w") as f:
yaml.dump(params, f, sort_keys=False)
print(
f"Parameters exported to {yaml_path} with structured sections and editable placeholders"
)
def export_xr_ini_to_yaml(ini_path, yaml_path):
"""
Convert an SP2-XR .ini file to a structured YAML configuration.
@@ -480,7 +574,7 @@ def export_xr_ini_to_yaml(ini_path, yaml_path):
yaml.dump(params, f, sort_keys=False)
print(
f"Parameters exported to {yaml_path} with structured sections and editable placeholders"
f"Parameters exported to {yaml_path} with structured sections and editable placeholders"
)
@@ -614,3 +708,41 @@ def extract_sp2xr_filename_parts(file_path: str | Path) -> tuple[str, str]:
folder_name = file_path_obj.parent.name
return file_name_cut, folder_name
def calculate_delta_sec(df):
"""
This function calculates the difference in seconds between the columns
'Time (sec)' and 'first_val' present in the input dataframe
Parameters
----------
df : pandas dataframe
The columns 'Time (sec)' and 'first_val' must be present in the DataFrame.
Returns
-------
int
Floored seconds between the values in the two columns.
"""
return np.floor(df["Time (sec)"]) - df["first_val"]
def extract_datetime(df):
"""
Thi function selects the datetime out of the SP2XR file name.
Parameters
----------
df : Pandas DataFrame
DataFRame conteining the column 'orig_file_name'.
Returns
-------
Pandas Series
Date and time corresponding to the date present in the 'orig_file_name' column.
"""
# return pd.to_datetime(df['orig_file_name'].split('_')[-2])
return pd.to_datetime(df["path"].split("_")[-2])

View File

@@ -1,159 +0,0 @@
pbp_schema:
Time (sec): float
Packet Time Stamp: float
Flag: float
Dropped Records: float
Record Count: float
Record Size: float
Particle Time Stamp: float
Particle Flags: float
Scatter relPeak: float
Scatter Transit Time: float
Scatter Peak Time: float
Scatter FWHM: float
Scatter Size (nm): float
Incand relPeak: float
Incand Transit Time: float
Incand Peak Time: float
Incand FWHM: float
Incand Delay: float
Incand Mass (fg): float
Reserved: float
hk_schema:
Time Stamp: datetime
Time (sec): float
Time Stamp (UTC sec): float
Elapsed Time: float
Error Code: float
Packet Time Stamp: float
Laser TEC Temp (C): float
Crystal TEC Temp (C): float
Inlet Air Temp (C): float
Computer Heatsink Temp (C): float
Laser Heatsink Temp (C): float
Outlet Air Temp (C): float
YAG Output Monitor (V): float
Cavity Pressure (hPa): float
Laser Driver Power Monitor (uA): float
Laser Driver Current Limit Monitor (A): float
Laser Driver Current Monitor (A): float
Laser TEC Sense: float
Laser Over Temp (On/Off): float
+5V Laser Rail (V): float
' +5V Rail (V)': float
+12V Rail (V): float
High Voltage (V): float
Battery Temp (C): float
UPS Output (V): float
12V Iso Rail (V): float
5V Iso Rail (V): float
3.3V Iso Rail (V): float
Spare 22: float
Spare 23: float
408 Board Spare 0: float
408 Board Spare 1: float
408 Board Spare 2: float
408 Board Spare 3: float
408 Board Spare 4: float
Purge Flow Monitor (sccm): float
System Input Voltage (V): float
Board Temperature (C): float
408 Board Spare 8: float
408 Board Spare 9: float
408 Board Spare 10: float
408 Board Spare 11: float
408 Board Spare 12: float
408 Board Spare 13: float
408 Board Spare 14: float
408 Board Spare 15: float
Sheath Flow Controller Read (vccm): float
Sheath Flow Controller Read (sccm): float
Sheath Flow Controller Pressure (psia): float
Sheath Flow Controller Temperature (C): float
Sample Flow Controller Read (vccm): float
Sample Flow Controller Read (sccm): float
Sample Flow Controller Pressure (psia): float
Sample Flow Controller Temperature (C): float
Fan 1 (RPM): float
Fan 2 (RPM): float
Laser Fan (RPM): float
Spare tach: float
Threshold Crossing Events: float
Dual Qualified Scatter and Incand Particles: float
Qualified Scatter Only Particles: float
Qualified Incand Only Particles: float
Disqualified Due to Scatter Saturation: float
Disqualified Due to Scatter Transit Time Min: float
Disqualified Due to Scatter Transit Time Max: float
Disqualified Due to Scatter FWHM Min: float
Disqualified Due to Scatter FWHM Max: float
Scatter Inter Part Period Min Violation: float
Disqualified Due to Incand Saturation: float
Disqualified Due to Incand Transit Time Min: float
Disqualified Due to Incand Transit Time Max: float
Disqualified Due to Incand FWHM Min: float
Disqualified Due to Incand FWHM Max: float
Incand Inter Part Period Min Violation: float
Baseline Sizer Lo: float
Baseline Sizer Hi: float
Baseline Incand Lo: float
Baseline Incand Hi: float
Bandwidth Sizer Hi: float
Bandwidth Sizer Lo: float
Bandwidth Incand Lo: float
Bandwidth Incand Hi: float
ABD-0408 HK ADCs min: float
ABD-0436 HK ADCs min: float
ABD-0408 HK ADCs max: float
ABD-0436 HK ADCs max: float
Incand Particle Conc (cts/ccm): float
Scattering Particle Conc (cts/ccm): float
Incand Mass Conc (fg/sccm): float
Scattering Mass Conc (fg/sccm): float
Sheath Flow Set Point: float
Sample Flow Set Point: float
Laser Temp Set Point: float
Laser Current Set Point: float
Spare 4 Set Point: float
Spare 5 Set Point: float
PMT HV Set Point: float
Particle Density (g/ccm): float
PbP Packet Time: float
Scatter Bin 1: float
Scatter Bin 2: float
Scatter Bin 3: float
Scatter Bin 4: float
Scatter Bin 5: float
Scatter Bin 6: float
Scatter Bin 7: float
Scatter Bin 8: float
Scatter Bin 9: float
Scatter Bin 10: float
Scatter Bin 11: float
Scatter Bin 12: float
Scatter Bin 13: float
Scatter Bin 14: float
Scatter Bin 15: float
Scatter Bin 16: float
Scatter Bin 17: float
Scatter Bin 18: float
Scatter Bin 19: float
Incand Bin 1: float
Incand Bin 2: float
Incand Bin 3: float
Incand Bin 4: float
Incand Bin 5: float
Incand Bin 6: float
Incand Bin 7: float
Incand Bin 8: float
Incand Bin 9: float
Incand Bin 10: float
Incand Bin 11: float
Incand Bin 12: float
Incand Bin 13: float
Incand Bin 14: float
Incand Bin 15: float
Incand Bin 16: float
Incand Bin 17: float
Incand Bin 18: float
Incand Bin 19: float

View File

@@ -1,59 +0,0 @@
paths:
input_pbp: /data/user/bertoz_b/merlin6data/SP2XR/data/NyA/SP2XR_pbp_parquet
input_hk: /data/user/bertoz_b/merlin6data/SP2XR/data/NyA/SP2XR_hk_parquet
output: tests/SP2XR_NyA_processed_data_60s
instrument_config: tests/instrument_config.yaml
workflow:
conc: true
BC_hist: true
scatt_hist: true
timelag_hist: false
dt: 60 # seconds
repartition: '1h'
max_partition_size: "200MB"
saving_schema: ['date']
cluster:
use_local: false
cores: 16
processes: 8
memory: 128GB
walltime: "2-00:59:00"
partition: general
log_dir: ./slurm_out
chunking:
freq: '5d'
start_date: '2020-08-24'
end_date: null
bc:
rho_eff: 1800
type: constant_effective_density
histo:
inc:
min_mass: 0.3
max_mass: 400
n_bins: 50
scatt:
min_D: 100
max_D: 500
n_bins: 20
timelag:
min: -10
max: 400
n_bins: 100
mixing_state:
threshold: 50
inc_scatt_ratio: 1.1
calibration:
incandescence:
curve_type: "polynomial"
parameters: [0.05, 2.0470000507725255e-07]
scattering:
curve_type: "powerlaw"
parameters: [17.21724257, 0.16908516, -1.49431104]