diff --git a/config/complete_example.yaml b/config/complete_example.yaml new file mode 100644 index 0000000..652ad65 --- /dev/null +++ b/config/complete_example.yaml @@ -0,0 +1,121 @@ +# SP2XR Complete Configuration Example + +# ============================================================================ +# METADATA - Optional documentation for this configuration +# ============================================================================ +metadata: + campaign: "Example_Campaign" + instrument: "PSI SP2XR" + operator: "Barbara Bertozzi" + description: "Complete example configuration with all features enabled" + created: "2025-09-29" + +# ============================================================================ +# FILE PATHS - All input and output locations +# ============================================================================ +paths: + input_pbp: data/pbp_files_parquet + input_hk: data/hk_files_parquet + output: data/SP2XR_processed_1min + instrument_config: config/my_instrument_settings.yaml + + +# ============================================================================ +# WORKFLOW SETTINGS - Complete analysis pipeline +# ============================================================================ +workflow: + # Analysis components + conc: true # Number and mass concentrations + BC_hist: true # BC mass distributions (dNdlogDmev, dMdlogDmev) + scatt_hist: true # Scattering size distributions (dNdlogDsc) + timelag_hist: false # Time-lag distributions for mixing state analysis + + # Time and data management + dt: 60 # Time resolution (seconds): 1, 10, 60, 300, 3600 + repartition: '1h' # Repartition frequency: '15min', '1h', '6h', '1d' + max_partition_size: "200MB" # Memory management + saving_schema: ['date', 'hour'] # Output partitioning scheme + + +# ============================================================================ +# CLUSTER SETTINGS - HPC and local computing options +# ============================================================================ +cluster: + # Execution mode + use_local: true # true for local, false for SLURM cluster + + # Resource allocation + cores: 32 # Total CPU cores + processes: 16 # Worker processes (usually cores/2) + memory: 256GB # Total memory + + # SLURM-specific settings + walltime: "4-00:00:00" # Job time limit (days-hours:minutes:seconds) + partition: general # SLURM partition: general, bigmem, gpu, etc. + log_dir: ./slurm_out # SLURM log directory + + +# ============================================================================ +# TIME CHUNKING - Temporal data processing strategy +# ============================================================================ +chunking: + freq: '7d' # Chunk frequency: '1d', '3d', '7d', '1M' + start_date: null # Start date (YYYY-MM-DD format) + end_date: null # End date (YYYY-MM-DD format) + + +# ============================================================================ +# BLACK CARBON PROPERTIES - Physical and optical parameters +# ============================================================================ +bc: + # Density settings + rho_eff: 1800 # Effective density (kg/m³) + type: constant_effective_density # Density model + +# ============================================================================ +# DETAILED HISTOGRAM SETTINGS - Size and mass distribution parameters +# ============================================================================ +histo: + # Incandescence (BC mass) distributions + inc: + min_mass: 0.3 # Minimum mass (fg) + max_mass: 400 # Maximum mass (fg) + n_bins: 50 # Number of bins + #log_spacing: true # Logarithmic bin spacing + + # Scattering (optical diameter) distributions + scatt: + min_D: 100 # Minimum diameter (nm) + max_D: 500 # Maximum diameter (nm) + n_bins: 20 # Number of bins + #log_spacing: true # Logarithmic bin spacing + + # Time-lag distributions (mixing state analysis) + timelag: + min: -10 # Minimum time lag (ns) + max: 400 # Maximum time lag (ns) + n_bins: 100 # Number of bins + #log_spacing: false # Linear bin spacing for time-lag + +# ============================================================================ +# MIXING STATE ANALYSIS - Particle coating classification +# ============================================================================ +mixing_state: + # Classification thresholds + threshold: 50 # Thick coating threshold (nm) + inc_scatt_ratio: 1.1 # Minimum incandescence/scattering ratio + + +# ============================================================================ +# ADVANCED CALIBRATION PARAMETERS +# ============================================================================ +calibration: + # Incandescence calibration + incandescence: + curve_type: "polynomial" # "polynomial", "powerlaw", or "spline" + parameters: [0.05, 2.047e-07, -1.2e-15] # Calibration coefficients + + # Scattering calibration + scattering: + curve_type: "powerlaw" # "polynomial", "powerlaw", or "spline" + parameters: [17.22, 0.169, -1.494] # [a, b, c] for powerlaw \ No newline at end of file diff --git a/meta_files/config_with_mapping.yaml b/config/my_data_schema.yaml similarity index 95% rename from meta_files/config_with_mapping.yaml rename to config/my_data_schema.yaml index 1b3edf1..5acd583 100644 --- a/meta_files/config_with_mapping.yaml +++ b/config/my_data_schema.yaml @@ -1,252 +1,251 @@ -'# INSTRUCTIONS': -- This config file contains both schema definitions and column mappings. -- 1. The *_schema sections define the data types for your input files. -- 2. The *_column_mapping sections map your file columns to canonical names. -- 3. Replace placeholder values (YOUR_COLUMN_NAME_FOR_*) with actual column names - from your files. -- 4. If your file doesn't have a particular canonical column, set it to null or remove - the line. -- 5. The output parquet files will use the canonical column names for consistency. -pbp_schema: - Time (sec): float - Packet Time Stamp: float - Flag: int - Dropped Records: int - Record Count: int - Record Size: int - Particle Time Stamp: float - Particle Flags: int - Scatter relPeak: float - Scatter Transit Time: int - Scatter Peak Time: int - Scatter FWHM: int - Scatter Size (nm): float - Incand relPeak: float - Incand Transit Time: float - Incand Peak Time: float - Incand FWHM: float - Incand Delay: float - Incand Mass (fg): float - Reserved: int -hk_schema: - Time Stamp: string - Time (sec): float - Time Stamp (UTC sec): float - Elapsed Time: float - Error Code: int - Packet Time Stamp: float - Laser TEC Temp (C): float - Crystal TEC Temp (C): int - Inlet Air Temp (C): float - Computer Heatsink Temp (C): float - Laser Heatsink Temp (C): float - Outlet Air Temp (C): float - YAG Output Monitor (V): float - Cavity Pressure (hPa): float - Laser Driver Power Monitor (uA): int - Laser Driver Current Limit Monitor (A): float - Laser Driver Current Monitor (A): float - Laser TEC Sense: float - Laser Over Temp (On/Off): int - +5V Laser Rail (V): float - ' +5V Rail (V)': float - +12V Rail (V): float - High Voltage (V): float - Battery Temp (C): float - UPS Output (V): float - 12V Iso Rail (V): float - 5V Iso Rail (V): float - 3.3V Iso Rail (V): float - Spare 22: int - Spare 23: int - 408 Board Spare 0: int - 408 Board Spare 1: int - 408 Board Spare 2: int - 408 Board Spare 3: int - 408 Board Spare 4: int - Purge Flow Monitor (sccm): float - System Input Voltage (V): float - Board Temperature (C): float - 408 Board Spare 8: int - 408 Board Spare 9: int - 408 Board Spare 10: int - 408 Board Spare 11: int - 408 Board Spare 12: int - 408 Board Spare 13: int - 408 Board Spare 14: int - 408 Board Spare 15: int - Sheath Flow Controller Read (vccm): int - Sheath Flow Controller Read (sccm): int - Sheath Flow Controller Pressure (psia): float - Sheath Flow Controller Temperature (C): float - Sample Flow Controller Read (vccm): float - Sample Flow Controller Read (sccm): float - Sample Flow Controller Pressure (psia): float - Sample Flow Controller Temperature (C): float - Fan 1 (RPM): int - Fan 2 (RPM): int - Laser Fan (RPM): int - Spare tach: int - Threshold Crossing Events: int - Dual Qualified Scatter and Incand Particles: int - Qualified Scatter Only Particles: int - Qualified Incand Only Particles: int - Disqualified Due to Scatter Saturation: int - Disqualified Due to Scatter Transit Time Min: int - Disqualified Due to Scatter Transit Time Max: int - Disqualified Due to Scatter FWHM Min: int - Disqualified Due to Scatter FWHM Max: int - Scatter Inter Part Period Min Violation: int - Disqualified Due to Incand Saturation: int - Disqualified Due to Incand Transit Time Min: int - Disqualified Due to Incand Transit Time Max: int - Disqualified Due to Incand FWHM Min: int - Disqualified Due to Incand FWHM Max: int - Incand Inter Part Period Min Violation: int - Baseline Sizer Lo: int - Baseline Sizer Hi: int - Baseline Incand Lo: int - Baseline Incand Hi: int - Bandwidth Sizer Hi: int - Bandwidth Sizer Lo: int - Bandwidth Incand Lo: int - Bandwidth Incand Hi: int - ABD-0408 HK ADCs min: int - ABD-0436 HK ADCs min: int - ABD-0408 HK ADCs max: int - ABD-0436 HK ADCs max: int - Incand Particle Conc (cts/ccm): float - Scattering Particle Conc (cts/ccm): float - Incand Mass Conc (fg/sccm): float - Scattering Mass Conc (fg/sccm): float - Sheath Flow Set Point: int - Sample Flow Set Point: int - Laser Temp Set Point: int - Laser Current Set Point: float - Spare 4 Set Point: int - Spare 5 Set Point: int - PMT HV Set Point: float - Particle Density (g/ccm): float - PbP Packet Time: float - Scatter Bin 1: int - Scatter Bin 2: int - Scatter Bin 3: int - Scatter Bin 4: int - Scatter Bin 5: int - Scatter Bin 6: int - Scatter Bin 7: int - Scatter Bin 8: int - Scatter Bin 9: int - Scatter Bin 10: int - Scatter Bin 11: int - Scatter Bin 12: int - Scatter Bin 13: int - Scatter Bin 14: int - Scatter Bin 15: int - Scatter Bin 16: int - Scatter Bin 17: int - Scatter Bin 18: int - Scatter Bin 19: int - Scatter Bin 20: int - Incand Bin 1: int - Incand Bin 2: int - Incand Bin 3: int - Incand Bin 4: int - Incand Bin 5: int - Incand Bin 6: int - Incand Bin 7: int - Incand Bin 8: int - Incand Bin 9: int - Incand Bin 10: int - Incand Bin 11: int - Incand Bin 12: int - Incand Bin 13: int - Incand Bin 14: int - Incand Bin 15: int - Incand Bin 16: int - Incand Bin 17: int - Incand Bin 18: int - Incand Bin 19: int - Incand Bin 20: int -pbp_canonical_schema: - Time (sec): float - Packet Time Stamp: float - Flag: float - Dropped Records: float - Record Count: float - Record Size: float - Particle Time Stamp: float - Particle Flags: float - Scatter relPeak: float - Scatter Transit Time: float - Scatter Peak Time: float - Scatter FWHM: float - Scatter Size (nm): float - Incand relPeak: float - Incand Transit Time: float - Incand Peak Time: float - Incand FWHM: float - Incand Delay: float - Incand Mass (fg): float - Reserved: float -hk_canonical_schema: - Time Stamp: datetime - Time (sec): float - Sample Flow Controller Read (sccm): float - Sample Flow Controller Read (vccm): float - Time Stamp (UTC sec): float - Elapsed Time: float - Error Code: float - Packet Time Stamp: float - Laser TEC Temp (C): float - Crystal TEC Temp (C): float - Inlet Air Temp (C): float - Computer Heatsink Temp (C): float - Laser Heatsink Temp (C): float - Outlet Air Temp (C): float - YAG Output Monitor (V): float - Cavity Pressure (hPa): float - Laser Driver Power Monitor (uA): float - Laser Driver Current Limit Monitor (A): float - Laser Driver Current Monitor (A): float -pbp_column_mapping: - Time (sec): Time (sec) - Packet Time Stamp: Packet Time Stamp - Flag: Flag - Dropped Records: Dropped Records - Record Count: Record Count - Record Size: Record Size - Particle Time Stamp: Particle Time Stamp - Particle Flags: Particle Flags - Scatter relPeak: Scatter relPeak - Scatter Transit Time: Scatter Transit Time - Scatter Peak Time: Scatter Peak Time - Scatter FWHM: Scatter FWHM - Scatter Size (nm): Scatter Size (nm) - Incand relPeak: Incand relPeak - Incand Transit Time: Incand Transit Time - Incand Peak Time: Incand Peak Time - Incand FWHM: Incand FWHM - Incand Delay: Incand Delay - Incand Mass (fg): Incand Mass (fg) - Reserved: Reserved -hk_column_mapping: - Time Stamp: Time Stamp - Time (sec): Time (sec) - Sample Flow Controller Read (sccm): Sample Flow Controller Read (sccm) - Sample Flow Controller Read (vccm): Sample Flow Controller Read (vccm) - Time Stamp (UTC sec): Time Stamp (UTC sec) - Elapsed Time: Elapsed Time - Error Code: Error Code - Packet Time Stamp: Packet Time Stamp - Laser TEC Temp (C): Laser TEC Temp (C) - Crystal TEC Temp (C): Crystal TEC Temp (C) - Inlet Air Temp (C): Inlet Air Temp (C) - Computer Heatsink Temp (C): Computer Heatsink Temp (C) - Laser Heatsink Temp (C): Laser Heatsink Temp (C) - Outlet Air Temp (C): Outlet Air Temp (C) - YAG Output Monitor (V): YAG Output Monitor (V) - Cavity Pressure (hPa): Cavity Pressure (hPa) - Laser Driver Power Monitor (uA): Laser Driver Power Monitor (uA) - Laser Driver Current Limit Monitor (A): Laser Driver Current Limit Monitor (A) - Laser Driver Current Monitor (A): Laser Driver Current Monitor (A) +'# INSTRUCTIONS': +- This config file contains both schema definitions and column mappings. +- 1. The *_schema sections define the data types for your input files. +- 2. The *_column_mapping sections map your file columns to canonical names. +- 3. Update column mappings if your files use different column names. +- 4. If your file doesn't have a particular canonical column, set it to null or remove + the line. +- 5. The output parquet files will use the canonical column names for consistency. +pbp_schema: + Time (sec): float + Packet Time Stamp: float + Flag: int + Dropped Records: int + Record Count: int + Record Size: int + Particle Time Stamp: float + Particle Flags: int + Scatter relPeak: float + Scatter Transit Time: int + Scatter Peak Time: int + Scatter FWHM: int + Scatter Size (nm): float + Incand relPeak: float + Incand Transit Time: float + Incand Peak Time: float + Incand FWHM: float + Incand Delay: float + Incand Mass (fg): float + Reserved: int +hk_schema: + Time Stamp: string + Time (sec): float + Time Stamp (UTC sec): float + Elapsed Time: float + Error Code: int + Packet Time Stamp: float + Laser TEC Temp (C): float + Crystal TEC Temp (C): int + Inlet Air Temp (C): float + Computer Heatsink Temp (C): float + Laser Heatsink Temp (C): float + Outlet Air Temp (C): float + YAG Output Monitor (V): float + Cavity Pressure (hPa): float + Laser Driver Power Monitor (uA): int + Laser Driver Current Limit Monitor (A): float + Laser Driver Current Monitor (A): float + Laser TEC Sense: float + Laser Over Temp (On/Off): int + +5V Laser Rail (V): float + ' +5V Rail (V)': float + +12V Rail (V): float + High Voltage (V): float + Battery Temp (C): float + UPS Output (V): float + 12V Iso Rail (V): float + 5V Iso Rail (V): float + 3.3V Iso Rail (V): float + Spare 22: int + Spare 23: int + 408 Board Spare 0: int + 408 Board Spare 1: int + 408 Board Spare 2: int + 408 Board Spare 3: int + 408 Board Spare 4: int + Purge Flow Monitor (sccm): float + System Input Voltage (V): float + Board Temperature (C): float + 408 Board Spare 8: int + 408 Board Spare 9: int + 408 Board Spare 10: int + 408 Board Spare 11: int + 408 Board Spare 12: int + 408 Board Spare 13: int + 408 Board Spare 14: int + 408 Board Spare 15: int + Sheath Flow Controller Read (vccm): int + Sheath Flow Controller Read (sccm): int + Sheath Flow Controller Pressure (psia): float + Sheath Flow Controller Temperature (C): float + Sample Flow Controller Read (vccm): float + Sample Flow Controller Read (sccm): float + Sample Flow Controller Pressure (psia): float + Sample Flow Controller Temperature (C): float + Fan 1 (RPM): int + Fan 2 (RPM): int + Laser Fan (RPM): int + Spare tach: int + Threshold Crossing Events: int + Dual Qualified Scatter and Incand Particles: int + Qualified Scatter Only Particles: int + Qualified Incand Only Particles: int + Disqualified Due to Scatter Saturation: int + Disqualified Due to Scatter Transit Time Min: int + Disqualified Due to Scatter Transit Time Max: int + Disqualified Due to Scatter FWHM Min: int + Disqualified Due to Scatter FWHM Max: int + Scatter Inter Part Period Min Violation: int + Disqualified Due to Incand Saturation: int + Disqualified Due to Incand Transit Time Min: int + Disqualified Due to Incand Transit Time Max: int + Disqualified Due to Incand FWHM Min: int + Disqualified Due to Incand FWHM Max: int + Incand Inter Part Period Min Violation: int + Baseline Sizer Lo: int + Baseline Sizer Hi: int + Baseline Incand Lo: int + Baseline Incand Hi: int + Bandwidth Sizer Hi: int + Bandwidth Sizer Lo: int + Bandwidth Incand Lo: int + Bandwidth Incand Hi: int + ABD-0408 HK ADCs min: int + ABD-0436 HK ADCs min: int + ABD-0408 HK ADCs max: int + ABD-0436 HK ADCs max: int + Incand Particle Conc (cts/ccm): float + Scattering Particle Conc (cts/ccm): float + Incand Mass Conc (fg/sccm): float + Scattering Mass Conc (fg/sccm): float + Sheath Flow Set Point: int + Sample Flow Set Point: int + Laser Temp Set Point: int + Laser Current Set Point: float + Spare 4 Set Point: int + Spare 5 Set Point: int + PMT HV Set Point: float + Particle Density (g/ccm): float + PbP Packet Time: float + Scatter Bin 1: int + Scatter Bin 2: int + Scatter Bin 3: int + Scatter Bin 4: int + Scatter Bin 5: int + Scatter Bin 6: int + Scatter Bin 7: int + Scatter Bin 8: int + Scatter Bin 9: int + Scatter Bin 10: int + Scatter Bin 11: int + Scatter Bin 12: int + Scatter Bin 13: int + Scatter Bin 14: int + Scatter Bin 15: int + Scatter Bin 16: int + Scatter Bin 17: int + Scatter Bin 18: int + Scatter Bin 19: int + Scatter Bin 20: int + Incand Bin 1: int + Incand Bin 2: int + Incand Bin 3: int + Incand Bin 4: int + Incand Bin 5: int + Incand Bin 6: int + Incand Bin 7: int + Incand Bin 8: int + Incand Bin 9: int + Incand Bin 10: int + Incand Bin 11: int + Incand Bin 12: int + Incand Bin 13: int + Incand Bin 14: int + Incand Bin 15: int + Incand Bin 16: int + Incand Bin 17: int + Incand Bin 18: int + Incand Bin 19: int + Incand Bin 20: int +pbp_canonical_schema: + Time (sec): float + Packet Time Stamp: float + Flag: float + Dropped Records: float + Record Count: float + Record Size: float + Particle Time Stamp: float + Particle Flags: float + Scatter relPeak: float + Scatter Transit Time: float + Scatter Peak Time: float + Scatter FWHM: float + Scatter Size (nm): float + Incand relPeak: float + Incand Transit Time: float + Incand Peak Time: float + Incand FWHM: float + Incand Delay: float + Incand Mass (fg): float + Reserved: float +hk_canonical_schema: + Time Stamp: datetime + Time (sec): float + Sample Flow Controller Read (sccm): float + Sample Flow Controller Read (vccm): float + Time Stamp (UTC sec): float + Elapsed Time: float + Error Code: float + Packet Time Stamp: float + Laser TEC Temp (C): float + Crystal TEC Temp (C): float + Inlet Air Temp (C): float + Computer Heatsink Temp (C): float + Laser Heatsink Temp (C): float + Outlet Air Temp (C): float + YAG Output Monitor (V): float + Cavity Pressure (hPa): float + Laser Driver Power Monitor (uA): float + Laser Driver Current Limit Monitor (A): float + Laser Driver Current Monitor (A): float +pbp_column_mapping: + Time (sec): Time (sec) + Packet Time Stamp: Packet Time Stamp + Flag: Flag + Dropped Records: Dropped Records + Record Count: Record Count + Record Size: Record Size + Particle Time Stamp: Particle Time Stamp + Particle Flags: Particle Flags + Scatter relPeak: Scatter relPeak + Scatter Transit Time: Scatter Transit Time + Scatter Peak Time: Scatter Peak Time + Scatter FWHM: Scatter FWHM + Scatter Size (nm): Scatter Size (nm) + Incand relPeak: Incand relPeak + Incand Transit Time: Incand Transit Time + Incand Peak Time: Incand Peak Time + Incand FWHM: Incand FWHM + Incand Delay: Incand Delay + Incand Mass (fg): Incand Mass (fg) + Reserved: Reserved +hk_column_mapping: + Time Stamp: Time Stamp + Time (sec): Time (sec) + Sample Flow Controller Read (sccm): Sample Flow Controller Read (sccm) + Sample Flow Controller Read (vccm): Sample Flow Controller Read (vccm) + Time Stamp (UTC sec): Time Stamp (UTC sec) + Elapsed Time: Elapsed Time + Error Code: Error Code + Packet Time Stamp: Packet Time Stamp + Laser TEC Temp (C): Laser TEC Temp (C) + Crystal TEC Temp (C): Crystal TEC Temp (C) + Inlet Air Temp (C): Inlet Air Temp (C) + Computer Heatsink Temp (C): Computer Heatsink Temp (C) + Laser Heatsink Temp (C): Laser Heatsink Temp (C) + Outlet Air Temp (C): Outlet Air Temp (C) + YAG Output Monitor (V): YAG Output Monitor (V) + Cavity Pressure (hPa): Cavity Pressure (hPa) + Laser Driver Power Monitor (uA): Laser Driver Power Monitor (uA) + Laser Driver Current Limit Monitor (A): Laser Driver Current Limit Monitor (A) + Laser Driver Current Monitor (A): Laser Driver Current Monitor (A) diff --git a/tests/instrument_config.yaml b/config/my_instrument_settings.yaml similarity index 55% rename from tests/instrument_config.yaml rename to config/my_instrument_settings.yaml index eae4592..f2f8f25 100644 --- a/tests/instrument_config.yaml +++ b/config/my_instrument_settings.yaml @@ -1,15 +1,20 @@ -instrument_parameters: - ScattTransitMin: 10.0 - ScattTransitMax: 65535.0 - ScattFWHMMin: 30.0 - ScattFWHMMax: 65535.0 - ScattInterTimeMin: 10.0 - IncTransitMin: 5.0 - IncTransitMax: 65535.0 - IncFWHMMin: 30.0 - IncFWHMMax: 65535.0 - IncInterTimeMin: 10.0 - SaveRate: 1.0 -Signal_saturation: - IncSatPoint: 1700000000.0 - ScattSatPoint: 1700000000.0 +metadata: + source_ini_file: C:\Users\Baccandr\Documents\SP2XR_code\SP2XR_code\tests\data\SP2XR_orig_files\20190508\20190508172218\20190508172218 + Calibration 20181005.ini + generated_on: '2025-09-29T22:52:49.725057' + generated_by: sp2xr_generate_config.py +instrument_parameters: + ScattTransitMin: 10.0 + ScattTransitMax: 65535.0 + ScattFWHMMin: 30.0 + ScattFWHMMax: 65535.0 + ScattInterTimeMin: 10.0 + IncTransitMin: 5.0 + IncTransitMax: 65535.0 + IncFWHMMin: 30.0 + IncFWHMMax: 65535.0 + IncInterTimeMin: 10.0 + SaveRate: 1.0 +Signal_saturation: + IncSatPoint: 1700000000.0 + ScattSatPoint: 1700000000.0 diff --git a/meta_files/20190417210227 Calibration 20181005.ini b/meta_files/20190417210227 Calibration 20181005.ini deleted file mode 100644 index 096fba9..0000000 --- a/meta_files/20190417210227 Calibration 20181005.ini +++ /dev/null @@ -1,266 +0,0 @@ -[Custom] -Display Tab=TRUE -Display Names=<2> -Display Names 0=set 1 -Display Names 1=set 2 -Sets=<2> -Sets 0.Cluster.Graph 1=<8> -Sets 0.Cluster.Graph 1 0.Plot.Channel=+5V Laser Rail (V) -Sets 0.Cluster.Graph 1 0.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 1 1.Plot.Channel= +5V Rail (V) -Sets 0.Cluster.Graph 1 1.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 1 2.Plot.Channel=+12V Rail (V) -Sets 0.Cluster.Graph 1 2.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 1 3.Plot.Channel=3.3V Iso Rail (V) -Sets 0.Cluster.Graph 1 3.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 1 4.Plot.Channel=UPS Output (V) -Sets 0.Cluster.Graph 1 4.Plot.Left/Right=TRUE -Sets 0.Cluster.Graph 1 5.Plot.Channel=Inlet Air Temp (C) -Sets 0.Cluster.Graph 1 5.Plot.Left/Right=TRUE -Sets 0.Cluster.Graph 1 6.Plot.Channel=Crystal TEC Temp (C) -Sets 0.Cluster.Graph 1 6.Plot.Left/Right=TRUE -Sets 0.Cluster.Graph 1 7.Plot.Channel=Laser Heatsink Temp (C) -Sets 0.Cluster.Graph 1 7.Plot.Left/Right=TRUE -Sets 0.Cluster.Graph 2=<8> -Sets 0.Cluster.Graph 2 0.Plot.Channel=Laser TEC Temp (C) -Sets 0.Cluster.Graph 2 0.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 2 1.Plot.Channel=Crystal TEC Temp (C) -Sets 0.Cluster.Graph 2 1.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 2 2.Plot.Channel=Inlet Air Temp (C) -Sets 0.Cluster.Graph 2 2.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 2 3.Plot.Channel=Computer Heatsink Temp (C) -Sets 0.Cluster.Graph 2 3.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 2 4.Plot.Channel=Laser Heatsink Temp (C) -Sets 0.Cluster.Graph 2 4.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 2 5.Plot.Channel=Outlet Air Temp (C) -Sets 0.Cluster.Graph 2 5.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 2 6.Plot.Channel=Battery Temp (C) -Sets 0.Cluster.Graph 2 6.Plot.Left/Right=FALSE -Sets 0.Cluster.Graph 2 7.Plot.Channel=Laser TEC Sense -Sets 0.Cluster.Graph 2 7.Plot.Left/Right=TRUE -Sets 1.Cluster.Graph 1=<4> -Sets 1.Cluster.Graph 1 0.Plot.Channel=Threshold Crossing Events -Sets 1.Cluster.Graph 1 0.Plot.Left/Right=FALSE -Sets 1.Cluster.Graph 1 1.Plot.Channel=Dual Qualified Scatter and Incand Particles -Sets 1.Cluster.Graph 1 1.Plot.Left/Right=FALSE -Sets 1.Cluster.Graph 1 2.Plot.Channel=Qualified Scatter Only Particles -Sets 1.Cluster.Graph 1 2.Plot.Left/Right=FALSE -Sets 1.Cluster.Graph 1 3.Plot.Channel=Qualified Incand Only Particles -Sets 1.Cluster.Graph 1 3.Plot.Left/Right=FALSE -Sets 1.Cluster.Graph 2=<8> -Sets 1.Cluster.Graph 2 0.Plot.Channel=Baseline Sizer Lo -Sets 1.Cluster.Graph 2 0.Plot.Left/Right=FALSE -Sets 1.Cluster.Graph 2 1.Plot.Channel=Baseline Sizer Hi -Sets 1.Cluster.Graph 2 1.Plot.Left/Right=FALSE -Sets 1.Cluster.Graph 2 2.Plot.Channel=Baseline Incand Lo -Sets 1.Cluster.Graph 2 2.Plot.Left/Right=FALSE -Sets 1.Cluster.Graph 2 3.Plot.Channel=Baseline Incand Hi -Sets 1.Cluster.Graph 2 3.Plot.Left/Right=FALSE -Sets 1.Cluster.Graph 2 4.Plot.Channel=Bandwidth Sizer Hi -Sets 1.Cluster.Graph 2 4.Plot.Left/Right=TRUE -Sets 1.Cluster.Graph 2 5.Plot.Channel=Bandwidth Sizer Lo -Sets 1.Cluster.Graph 2 5.Plot.Left/Right=TRUE -Sets 1.Cluster.Graph 2 6.Plot.Channel=Bandwidth Incand Lo -Sets 1.Cluster.Graph 2 6.Plot.Left/Right=TRUE -Sets 1.Cluster.Graph 2 7.Plot.Channel=Bandwidth Incand Hi -Sets 1.Cluster.Graph 2 7.Plot.Left/Right=TRUE -[Raw Options] -byte 0: data mux=High Dynamic Range Traces -Raw Data Particle Selection=First Scatter -Scatter relPeak=0 -Incand relPeak=0 -Inter-raw Period (ms)=100 -leader sample count=400 -footer sample count=400 -[Scatter Parameters] -Graph=Counts -X Mode=Size -Norm?=FALSE -Cumulative=FALSE -[Versions] -Instrument Name=SP2XR -SP2 Version=2.01.01.19 -Acq Version=2.00.00.00 -Last Date Updated=4/11/2019 6:24:36 AM -[Trigger Settings] -Scatter Transit Time Min=10 -Scatter Transit Time Max=65535 -Scatter FWHM Min=30 -Scatter FWHM Max=65535 -Scatter Inter Particle Time Min=10 -Incand Transit Time Min=5 -Incand Transit Time Max=65535 -Incand FWHM Min=30 -Incand FWHM Max=65535 -Incand Inter Particle Time Min=10 -Paired Particle Delay Max=10 -Scatter Threshold Min=36100 -Scatter Hysteresis Min=2703 -Incand Threshold Min=50700 -Incand Hysteresis Min=5394 -Scatter Threshold Max=559000000 -Scatter Hysteresis Max=0 -Incand Threshold Max=2147483647 -Incand Hysteresis Max=0 -Forced Trigger=FALSE -Forced Trigger Interval(ms)=1000 -[# Samples S] -# Samples S=0 -[Program] -Data File Path=D:\DMT\SP2XR Data -Restart Files=FALSE -Graph 0 Left=YAG Output Monitor (V) -Graph 0 Right=Laser Driver Current Monitor (A) -Graph 1 Left=Scattering Particle Conc (cts/ccm) -Graph 1 Right=Incand Particle Conc (cts/ccm) -Control Cycle Time=0 -NTP Server= -Write File?=FALSE -Graph Backgrounds=16448250 -Graph 2 Left=Sheath Flow Controller Read (sccm) -Graph 2 Right=Sample Flow Controller Read (sccm) -Description= -Serial Number=0001 -2 or 3 Graphs=TRUE -Time Range=12 Hours -OSDS Format= -Num to Avg=0 -Global 2=0 -Global 3=0 -Global 4=0 -Global 5=0 -Shut Down Sequence= -Crisis Shut Down Seq=turn off pump and laser -Write SP2b Data File=TRUE -Write HK File=TRUE -Write Raw Binary Data=TRUE -TabChannelNum=0 -OptimizeChannelNum=0 -Write HDF5 File=TRUE -NumParticlesPerHDF5File=100000 -Laser Temp Set=29 -Laser Current Set=1.9 -Spare 4 Set=0 -Spare 5 Set=0 -PMT HV Set=0.46 -Interface Board Scaling=<24> -Interface Board Scaling 0=1/(0.000849+0.000261*ln(10000/(65536/VAR-1))+0.000000125*ln(10000/(65536/VAR-1))^3)-273.15 -Interface Board Scaling 1= -Interface Board Scaling 2=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15 -Interface Board Scaling 3=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15 -Interface Board Scaling 4=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15 -Interface Board Scaling 5=(1.0 / (1.1135E-3 + 2.368E-4 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001)))) + 7.396E-8 * (ln(1E4 * (1-(VAR / (65536.0 * 1.001))) / (VAR / (65536.0 * 1.001))))^3)) - 273.15 -Interface Board Scaling 6=0.0000625*VAR -Interface Board Scaling 7=VAR/72+105.55 -Interface Board Scaling 8=0.125*VAR -Interface Board Scaling 9=0.000125*VAR -Interface Board Scaling 10=0.000125*VAR -Interface Board Scaling 11=2.01*0.0000625 *VAR -Interface Board Scaling 12= -Interface Board Scaling 13=0.000125*VAR -Interface Board Scaling 14=0.000125*VAR -Interface Board Scaling 15=4.57*0.0000625*VAR -Interface Board Scaling 16=0.0152587890625*VAR -Interface Board Scaling 17=1/(0.000894+0.00025*ln((1662.22* VAR)/(39897.3 - VAR))+0.0000002*ln((1662.22*VAR)/(39897.3 -VAR))^3)-273.15 -Interface Board Scaling 18=(69.8+11.5) /11.5*0.0000625*VAR -Interface Board Scaling 19=4.57*0.0000625*VAR -Interface Board Scaling 20=0.000125*VAR -Interface Board Scaling 21=1.1*0.0000625 *VAR -Interface Board Scaling 22= -Interface Board Scaling 23= -ABD 0408 Scaling=<16> -ABD 0408 Scaling 0= -ABD 0408 Scaling 1= -ABD 0408 Scaling 2= -ABD 0408 Scaling 3= -ABD 0408 Scaling 4= -ABD 0408 Scaling 5=VAR*0.00625 -ABD 0408 Scaling 6=7.98*(6.25E-5*VAR) -ABD 0408 Scaling 7=-84.962*(6.25E-5*VAR-1.8639) -ABD 0408 Scaling 8= -ABD 0408 Scaling 9= -ABD 0408 Scaling 10= -ABD 0408 Scaling 11= -ABD 0408 Scaling 12= -ABD 0408 Scaling 13= -ABD 0408 Scaling 14= -ABD 0408 Scaling 15= -Save Every Nth Particle=1 -zip files=TRUE -Particle Density (g/cc)=1.8 -Pump Start-Up State=FALSE -[Detector DAC Settings] -Scatter unused A=23790 -Scatter unused B=65535 -[Incand Parameters] -Graph=Counts -X Mode=Mass -Norm?=FALSE -Cumulative=FALSE -[Control] -Alarms=<3> -Alarms 0.Alarm.Name=TurnPumpON -Alarms 0.Alarm.Channel=Elapsed Time -Alarms 0.Alarm.Condition=> -Alarms 0.Alarm.Threshold=0 -Alarms 0.Alarm.Action=Turn Pump On -Alarms 0.Alarm.Hysteresis=0 -Alarms 0.Alarm.Target Channel= -Alarms 0.Alarm.Set Value=0 -Alarms 0.Alarm.Min Time=0 -Alarms 0.Alarm.Sequence=turn off pump and laser -Alarms 0.Alarm.Target Alarm=TurnPumpON -Alarms 1.Alarm.Name=Turn Laser On -Alarms 1.Alarm.Channel=Elapsed Time -Alarms 1.Alarm.Condition=> -Alarms 1.Alarm.Threshold=5 -Alarms 1.Alarm.Action=Turn Laser On -Alarms 1.Alarm.Hysteresis=0 -Alarms 1.Alarm.Target Channel= -Alarms 1.Alarm.Set Value=0 -Alarms 1.Alarm.Min Time=0 -Alarms 1.Alarm.Sequence= -Alarms 1.Alarm.Target Alarm=Turn Laser On -Alarms 2.Alarm.Name=StartRecording -Alarms 2.Alarm.Channel=Elapsed Time -Alarms 2.Alarm.Condition=> -Alarms 2.Alarm.Threshold=10 -Alarms 2.Alarm.Action=Start Writing Data -Alarms 2.Alarm.Hysteresis=0 -Alarms 2.Alarm.Target Channel= -Alarms 2.Alarm.Set Value=0 -Alarms 2.Alarm.Min Time=0 -Alarms 2.Alarm.Sequence= -Alarms 2.Alarm.Target Alarm=Turn Laser On -Sequences=<0> -Timers=<0> -[Pump] -Pump=TRUE -[# Samples I] -# Samples I=0 -[SampleFlow] -SampleFlow (sccm)=30 -[SheathFlow] -SheathFlow (sccm)=600 -[Polling Interval] -HK Stream Interval (ms)=1000 -PbP Stream Interval (ms)=1000 -[Fans Settings] -Case Fan Mode=normal -Case Fan On Threshold=35 -Case Fan Off Threshold=33 -Laser Fan Mode=forced off -Laser Fan On Threshold=27 -Laser Fan Off Threshold=24 -[Channel Order] -Channel Order=<0> -Digits=<0> -[Streaming Data] -Port=0 -Baud Rate=0 -Channels=<0> -Bus=Serial Port -[Calculated Channels] -Calculated Channels=<0> -[Calculations] -Calculations=<0> diff --git a/meta_files/config.yaml b/meta_files/config.yaml deleted file mode 100644 index 701e851..0000000 --- a/meta_files/config.yaml +++ /dev/null @@ -1,159 +0,0 @@ -pbp_schema: - Time (sec): float - Packet Time Stamp: float - Flag: float - Dropped Records: float - Record Count: float - Record Size: float - Particle Time Stamp: float - Particle Flags: float - Scatter relPeak: float - Scatter Transit Time: float - Scatter Peak Time: float - Scatter FWHM: float - Scatter Size (nm): float - Incand relPeak: float - Incand Transit Time: float - Incand Peak Time: float - Incand FWHM: float - Incand Delay: float - Incand Mass (fg): float - Reserved: float -hk_schema: - Time Stamp: datetime - Time (sec): float - Time Stamp (UTC sec): float - Elapsed Time: float - Error Code: float - Packet Time Stamp: float - Laser TEC Temp (C): float - Crystal TEC Temp (C): float - Inlet Air Temp (C): float - Computer Heatsink Temp (C): float - Laser Heatsink Temp (C): float - Outlet Air Temp (C): float - YAG Output Monitor (V): float - Cavity Pressure (hPa): float - Laser Driver Power Monitor (uA): float - Laser Driver Current Limit Monitor (A): float - Laser Driver Current Monitor (A): float - Laser TEC Sense: float - Laser Over Temp (On/Off): float - +5V Laser Rail (V): float - ' +5V Rail (V)': float - +12V Rail (V): float - High Voltage (V): float - Battery Temp (C): float - UPS Output (V): float - 12V Iso Rail (V): float - 5V Iso Rail (V): float - 3.3V Iso Rail (V): float - Spare 22: float - Spare 23: float - 408 Board Spare 0: float - 408 Board Spare 1: float - 408 Board Spare 2: float - 408 Board Spare 3: float - 408 Board Spare 4: float - Purge Flow Monitor (sccm): float - System Input Voltage (V): float - Board Temperature (C): float - 408 Board Spare 8: float - 408 Board Spare 9: float - 408 Board Spare 10: float - 408 Board Spare 11: float - 408 Board Spare 12: float - 408 Board Spare 13: float - 408 Board Spare 14: float - 408 Board Spare 15: float - Sheath Flow Controller Read (vccm): float - Sheath Flow Controller Read (sccm): float - Sheath Flow Controller Pressure (psia): float - Sheath Flow Controller Temperature (C): float - Sample Flow Controller Read (vccm): float - Sample Flow Controller Read (sccm): float - Sample Flow Controller Pressure (psia): float - Sample Flow Controller Temperature (C): float - Fan 1 (RPM): float - Fan 2 (RPM): float - Laser Fan (RPM): float - Spare tach: float - Threshold Crossing Events: float - Dual Qualified Scatter and Incand Particles: float - Qualified Scatter Only Particles: float - Qualified Incand Only Particles: float - Disqualified Due to Scatter Saturation: float - Disqualified Due to Scatter Transit Time Min: float - Disqualified Due to Scatter Transit Time Max: float - Disqualified Due to Scatter FWHM Min: float - Disqualified Due to Scatter FWHM Max: float - Scatter Inter Part Period Min Violation: float - Disqualified Due to Incand Saturation: float - Disqualified Due to Incand Transit Time Min: float - Disqualified Due to Incand Transit Time Max: float - Disqualified Due to Incand FWHM Min: float - Disqualified Due to Incand FWHM Max: float - Incand Inter Part Period Min Violation: float - Baseline Sizer Lo: float - Baseline Sizer Hi: float - Baseline Incand Lo: float - Baseline Incand Hi: float - Bandwidth Sizer Hi: float - Bandwidth Sizer Lo: float - Bandwidth Incand Lo: float - Bandwidth Incand Hi: float - ABD-0408 HK ADCs min: float - ABD-0436 HK ADCs min: float - ABD-0408 HK ADCs max: float - ABD-0436 HK ADCs max: float - Incand Particle Conc (cts/ccm): float - Scattering Particle Conc (cts/ccm): float - Incand Mass Conc (fg/sccm): float - Scattering Mass Conc (fg/sccm): float - Sheath Flow Set Point: float - Sample Flow Set Point: float - Laser Temp Set Point: float - Laser Current Set Point: float - Spare 4 Set Point: float - Spare 5 Set Point: float - PMT HV Set Point: float - Particle Density (g/ccm): float - PbP Packet Time: float - Scatter Bin 1: float - Scatter Bin 2: float - Scatter Bin 3: float - Scatter Bin 4: float - Scatter Bin 5: float - Scatter Bin 6: float - Scatter Bin 7: float - Scatter Bin 8: float - Scatter Bin 9: float - Scatter Bin 10: float - Scatter Bin 11: float - Scatter Bin 12: float - Scatter Bin 13: float - Scatter Bin 14: float - Scatter Bin 15: float - Scatter Bin 16: float - Scatter Bin 17: float - Scatter Bin 18: float - Scatter Bin 19: float - Incand Bin 1: float - Incand Bin 2: float - Incand Bin 3: float - Incand Bin 4: float - Incand Bin 5: float - Incand Bin 6: float - Incand Bin 7: float - Incand Bin 8: float - Incand Bin 9: float - Incand Bin 10: float - Incand Bin 11: float - Incand Bin 12: float - Incand Bin 13: float - Incand Bin 14: float - Incand Bin 15: float - Incand Bin 16: float - Incand Bin 17: float - Incand Bin 18: float - Incand Bin 19: float diff --git a/meta_files/generate_config.py b/meta_files/generate_config.py deleted file mode 100644 index 41bafef..0000000 --- a/meta_files/generate_config.py +++ /dev/null @@ -1,228 +0,0 @@ -from __future__ import annotations - -import pandas as pd -import yaml -import os -from pathlib import Path -from typing import Any - - -def infer_general_dtype(dtype: Any) -> str: - """Infer general data type from pandas dtype.""" - if pd.api.types.is_integer_dtype(dtype): - return "int" - elif pd.api.types.is_float_dtype(dtype): - return "float" - elif pd.api.types.is_datetime64_any_dtype(dtype): - return "datetime" - else: - return "string" - - -def load_schema(input_file: str | Path) -> dict[str, str]: - """Load schema from input file by inferring column types.""" - ext = os.path.splitext(str(input_file))[1].lower() - - if ext in [".csv", ".zip"]: - df = pd.read_csv(input_file, nrows=100) - elif ext == ".parquet": - df = pd.read_parquet(input_file) - else: - raise ValueError(f"Unsupported file format: {ext}") - - schema = {col: infer_general_dtype(dtype) for col, dtype in df.dtypes.items()} - return schema - - -def get_canonical_schemas() -> dict[str, dict[str, str]]: - """Return canonical column schemas for SP2XR data.""" - pbp_canonical = { - "Time (sec)": "float", - "Packet Time Stamp": "float", - "Flag": "float", - "Dropped Records": "float", - "Record Count": "float", - "Record Size": "float", - "Particle Time Stamp": "float", - "Particle Flags": "float", - "Scatter relPeak": "float", - "Scatter Transit Time": "float", - "Scatter Peak Time": "float", - "Scatter FWHM": "float", - "Scatter Size (nm)": "float", - "Incand relPeak": "float", - "Incand Transit Time": "float", - "Incand Peak Time": "float", - "Incand FWHM": "float", - "Incand Delay": "float", - "Incand Mass (fg)": "float", - "Reserved": "float", - } - - hk_canonical = { - "Time Stamp": "datetime", - "Time (sec)": "float", - "Sample Flow Controller Read (sccm)": "float", - "Sample Flow Controller Read (vccm)": "float", - # Core HK columns that are commonly used - "Time Stamp (UTC sec)": "float", - "Elapsed Time": "float", - "Error Code": "float", - "Packet Time Stamp": "float", - "Laser TEC Temp (C)": "float", - "Crystal TEC Temp (C)": "float", - "Inlet Air Temp (C)": "float", - "Computer Heatsink Temp (C)": "float", - "Laser Heatsink Temp (C)": "float", - "Outlet Air Temp (C)": "float", - "YAG Output Monitor (V)": "float", - "Cavity Pressure (hPa)": "float", - "Laser Driver Power Monitor (uA)": "float", - "Laser Driver Current Limit Monitor (A)": "float", - "Laser Driver Current Monitor (A)": "float", - # ... (other HK columns can be added as needed) - } - - return {"pbp_canonical": pbp_canonical, "hk_canonical": hk_canonical} - - -def generate_combined_config( - pbp_file: str | Path, hk_file: str | Path, output_file: str = "config.yaml" -) -> None: - """Generate config file with both schema definitions and column mappings.""" - config = { - "pbp_schema": load_schema(pbp_file), - "hk_schema": load_schema(hk_file), - } - - with open(output_file, "w") as f: - yaml.dump(config, f, sort_keys=False) - - print(f"Unified config saved to: {output_file}") - - -def generate_mapping_template( - pbp_file: str | Path, - hk_file: str | Path, - output_file: str = "config_with_mapping.yaml", -) -> None: - """ - Generate enhanced config with column mapping templates. - - This creates a config file that allows users to map their instrument-specific - column names to the canonical column names used in the main processing pipeline. - """ - # Load actual file schemas - pbp_schema = load_schema(pbp_file) - hk_schema = load_schema(hk_file) - - # Get canonical schemas - canonical_schemas = get_canonical_schemas() - - # Create column mapping templates - pbp_mapping = {} - hk_mapping = {} - - # For PbP: map file columns to canonical columns - for canonical_col in canonical_schemas["pbp_canonical"]: - # Try to find exact match first - matching_file_col = None - for file_col in pbp_schema.keys(): - if file_col.lower() == canonical_col.lower(): - matching_file_col = file_col - break - - # If exact match found, use it; otherwise leave as template - pbp_mapping[canonical_col] = ( - matching_file_col - or f"YOUR_COLUMN_NAME_FOR_{canonical_col.replace(' ', '_').replace('(', '').replace(')', '').upper()}" - ) - - # For HK: map file columns to canonical columns - for canonical_col in canonical_schemas["hk_canonical"]: - matching_file_col = None - for file_col in hk_schema.keys(): - if file_col.lower() == canonical_col.lower(): - matching_file_col = file_col - break - - hk_mapping[canonical_col] = ( - matching_file_col - or f"YOUR_COLUMN_NAME_FOR_{canonical_col.replace(' ', '_').replace('(', '').replace(')', '').upper()}" - ) - - # Build enhanced config - config = { - "# INSTRUCTIONS": [ - "This config file contains both schema definitions and column mappings.", - "1. The *_schema sections define the data types for your input files.", - "2. The *_column_mapping sections map your file columns to canonical names.", - "3. Replace placeholder values (YOUR_COLUMN_NAME_FOR_*) with actual column names from your files.", - "4. If your file doesn't have a particular canonical column, set it to null or remove the line.", - "5. The output parquet files will use the canonical column names for consistency.", - ], - "pbp_schema": pbp_schema, - "hk_schema": hk_schema, - "pbp_canonical_schema": canonical_schemas["pbp_canonical"], - "hk_canonical_schema": canonical_schemas["hk_canonical"], - "pbp_column_mapping": pbp_mapping, - "hk_column_mapping": hk_mapping, - } - - with open(output_file, "w") as f: - yaml.dump(config, f, sort_keys=False, default_flow_style=False) - - print(f"Enhanced config with column mapping saved to: {output_file}") - print("\nNext steps:") - print( - "1. Open the config file and replace placeholder column mappings with your actual column names" - ) - print( - "2. Remove or set to null any canonical columns that don't exist in your data" - ) - print("3. Use this config file with the updated CSV to Parquet conversion process") - - -def apply_column_mapping( - df: pd.DataFrame, column_mapping: dict[str, str | None] -) -> pd.DataFrame: - """ - Apply column name mapping to standardize column names. - - Parameters - ---------- - df : pd.DataFrame - Input dataframe with instrument-specific column names - column_mapping : dict[str, str | None] - Mapping from canonical names to file column names - - Returns - ------- - pd.DataFrame - DataFrame with standardized column names - """ - # Create reverse mapping: file_column_name -> canonical_name - reverse_mapping = {} - for canonical_name, file_column in column_mapping.items(): - if ( - file_column - and file_column in df.columns - and not file_column.startswith("YOUR_COLUMN_NAME_FOR_") - ): - reverse_mapping[file_column] = canonical_name - - # Rename columns using reverse mapping - df_renamed = df.rename(columns=reverse_mapping) - - return df_renamed - - -# Example usage -if __name__ == "__main__": - # Legacy function for backward compatibility - # generate_combined_config("pbp_meta.parquet", "hk_meta.parquet") - - # New enhanced function - pbp_tmp_file = "/data/user/bertoz_b/merlin6data/SP2XR_code/tests/data/mini_SP2XR_PbP_20190409110737_x0001.zip" - hk_tmp_file = "/data/user/bertoz_b/merlin6data/SP2XR_code/tests/data/mini_SP2XR_hk_20190409110737_x0001.zip" - generate_mapping_template(pbp_tmp_file, hk_tmp_file) diff --git a/meta_files/hk_meta.parquet b/meta_files/hk_meta.parquet deleted file mode 100644 index 1fc1eb7..0000000 Binary files a/meta_files/hk_meta.parquet and /dev/null differ diff --git a/meta_files/pbp_meta.parquet b/meta_files/pbp_meta.parquet deleted file mode 100644 index 2c08a19..0000000 Binary files a/meta_files/pbp_meta.parquet and /dev/null differ diff --git a/scripts/sp2xr_generate_config.py b/scripts/sp2xr_generate_config.py new file mode 100644 index 0000000..901194f --- /dev/null +++ b/scripts/sp2xr_generate_config.py @@ -0,0 +1,458 @@ +#!/usr/bin/env python3 +""" +Generate SP2XR configuration files by automatically detecting PbP and HK files in a directory. + +This script scans a directory for SP2XR data files (CSV/ZIP/Parquet) and generates +configuration files with proper schemas and column mappings. +""" + +from __future__ import annotations + +import argparse +import pandas as pd +import yaml +import os +from pathlib import Path +from typing import Any + + +def infer_general_dtype(dtype: Any) -> str: + """Infer general data type from pandas dtype.""" + if pd.api.types.is_integer_dtype(dtype): + return "int" + elif pd.api.types.is_float_dtype(dtype): + return "float" + elif pd.api.types.is_datetime64_any_dtype(dtype): + return "datetime" + else: + return "string" + + +def find_sp2xr_files(directory: str | Path) -> tuple[list[Path], list[Path]]: + """ + Find PbP and HK files in the given directory and all subdirectories. + + Parameters + ---------- + directory : str | Path + Directory to search for SP2XR files (searches recursively) + + Returns + ------- + tuple[list[Path], list[Path]] + Lists of PbP files and HK files found + """ + directory = Path(directory) + if not directory.exists(): + raise FileNotFoundError(f"Directory not found: {directory}") + + # Common SP2XR file patterns + pbp_patterns = ["*PbP*", "*pbp*", "*Pbp*"] + hk_patterns = ["*hk*", "*HK*", "*Hk*"] + file_extensions = ["*.csv", "*.zip", "*.parquet"] + + pbp_files = [] + hk_files = [] + + # Search for files matching patterns (including subdirectories) + for ext in file_extensions: + for pattern in pbp_patterns: + pbp_files.extend(directory.glob(f"**/{pattern}{ext}")) + for pattern in hk_patterns: + hk_files.extend(directory.glob(f"**/{pattern}{ext}")) + + # Remove duplicates and sort + pbp_files = sorted(list(set(pbp_files))) + hk_files = sorted(list(set(hk_files))) + + return pbp_files, hk_files + + +def load_schema(input_file: str | Path, nrows: int = 100) -> dict[str, str]: + """ + Load schema from input file by inferring column types. + + Parameters + ---------- + input_file : str | Path + Path to the input file + nrows : int + Number of rows to read for type inference (for CSV files) + + Returns + ------- + dict[str, str] + Mapping of column names to data types + """ + ext = os.path.splitext(str(input_file))[1].lower() + + if ext in [".csv", ".zip"]: + df = pd.read_csv(input_file, nrows=nrows) + elif ext == ".parquet": + # For parquet, we can just read the schema without loading data + pf = pd.read_parquet(input_file, engine="pyarrow") + df = pf.head(0) # Empty dataframe with schema + else: + raise ValueError(f"Unsupported file format: {ext}") + + schema = {col: infer_general_dtype(dtype) for col, dtype in df.dtypes.items()} + return schema + + +def get_canonical_schemas() -> dict[str, dict[str, str]]: + """Return canonical column schemas for SP2XR data.""" + pbp_canonical = { + "Time (sec)": "float", + "Packet Time Stamp": "float", + "Flag": "float", + "Dropped Records": "float", + "Record Count": "float", + "Record Size": "float", + "Particle Time Stamp": "float", + "Particle Flags": "float", + "Scatter relPeak": "float", + "Scatter Transit Time": "float", + "Scatter Peak Time": "float", + "Scatter FWHM": "float", + "Scatter Size (nm)": "float", + "Incand relPeak": "float", + "Incand Transit Time": "float", + "Incand Peak Time": "float", + "Incand FWHM": "float", + "Incand Delay": "float", + "Incand Mass (fg)": "float", + "Reserved": "float", + } + + hk_canonical = { + "Time Stamp": "datetime", + "Time (sec)": "float", + "Sample Flow Controller Read (sccm)": "float", + "Sample Flow Controller Read (vccm)": "float", + "Time Stamp (UTC sec)": "float", + "Elapsed Time": "float", + "Error Code": "float", + "Packet Time Stamp": "float", + "Laser TEC Temp (C)": "float", + "Crystal TEC Temp (C)": "float", + "Inlet Air Temp (C)": "float", + "Computer Heatsink Temp (C)": "float", + "Laser Heatsink Temp (C)": "float", + "Outlet Air Temp (C)": "float", + "YAG Output Monitor (V)": "float", + "Cavity Pressure (hPa)": "float", + "Laser Driver Power Monitor (uA)": "float", + "Laser Driver Current Limit Monitor (A)": "float", + "Laser Driver Current Monitor (A)": "float", + } + + return {"pbp_canonical": pbp_canonical, "hk_canonical": hk_canonical} + + +def generate_basic_config( + pbp_file: Path, + hk_file: Path, + schema_output: str = "config_schema.yaml", + ini_file: str = None, + instrument_output: str = None, +) -> None: + """Generate basic config schema file with data type definitions only.""" + print(f"Reading PbP schema from: {pbp_file}") + pbp_schema = load_schema(pbp_file) + + print(f"Reading HK schema from: {hk_file}") + hk_schema = load_schema(hk_file) + + config = { + "pbp_schema": pbp_schema, + "hk_schema": hk_schema, + } + + # Create output directory if it doesn't exist + schema_path = Path(schema_output) + schema_path.parent.mkdir(parents=True, exist_ok=True) + + with open(schema_output, "w") as f: + yaml.dump(config, f, sort_keys=False) + + print(f"Data schema config saved to: {schema_output}") + + # Generate separate instrument settings config from INI file + if ini_file: + # Determine instrument settings output filename + if instrument_output: + instrument_path = Path(instrument_output) + else: + instrument_path = ( + schema_path.parent / f"{schema_path.stem}_instrument_settings.yaml" + ) + + try: + from sp2xr.helpers import export_xr_ini_to_yaml_with_source + + export_xr_ini_to_yaml_with_source(ini_file, str(instrument_path)) + print(f"Instrument settings config saved to: {instrument_path}") + except ImportError: + # Fallback to original function if new one doesn't exist yet + from sp2xr.helpers import export_xr_ini_to_yaml + + export_xr_ini_to_yaml(ini_file, str(instrument_path)) + print(f"Instrument settings config saved to: {instrument_path}") + except Exception as e: + print(f"Warning: Could not convert INI to YAML: {e}") + # Still reference the original INI file as fallback + config["calibration_file"] = ini_file + with open(schema_output, "w") as f: + yaml.dump(config, f, sort_keys=False) + print(f"Added INI file reference as fallback: {Path(ini_file).name}") + + +def generate_mapping_config( + pbp_file: Path, + hk_file: Path, + schema_output: str = "config_schema_with_mapping.yaml", + ini_file: str = None, + instrument_output: str = None, +) -> None: + """Generate enhanced config schema with column mapping templates.""" + print(f"Reading PbP schema from: {pbp_file}") + pbp_schema = load_schema(pbp_file) + + print(f"Reading HK schema from: {hk_file}") + hk_schema = load_schema(hk_file) + + # Get canonical schemas + canonical_schemas = get_canonical_schemas() + + # Create column mapping templates + pbp_mapping = {} + hk_mapping = {} + + # For PbP: map file columns to canonical columns + for canonical_col in canonical_schemas["pbp_canonical"]: + matching_file_col = None + for file_col in pbp_schema.keys(): + if file_col.lower() == canonical_col.lower(): + matching_file_col = file_col + break + + pbp_mapping[canonical_col] = ( + matching_file_col or canonical_col # Use canonical name as default + ) + + # For HK: map file columns to canonical columns + for canonical_col in canonical_schemas["hk_canonical"]: + matching_file_col = None + for file_col in hk_schema.keys(): + if file_col.lower() == canonical_col.lower(): + matching_file_col = file_col + break + + hk_mapping[canonical_col] = ( + matching_file_col or canonical_col # Use canonical name as default + ) + + # Build enhanced config + config = { + "# INSTRUCTIONS": [ + "This config file contains both schema definitions and column mappings.", + "1. The *_schema sections define the data types for your input files.", + "2. The *_column_mapping sections map your file columns to canonical names.", + "3. Update column mappings if your files use different column names.", + "4. If your file doesn't have a particular canonical column, set it to null or remove the line.", + "5. The output parquet files will use the canonical column names for consistency.", + ], + "pbp_schema": pbp_schema, + "hk_schema": hk_schema, + "pbp_canonical_schema": canonical_schemas["pbp_canonical"], + "hk_canonical_schema": canonical_schemas["hk_canonical"], + "pbp_column_mapping": pbp_mapping, + "hk_column_mapping": hk_mapping, + } + + # Create output directory if it doesn't exist + schema_path = Path(schema_output) + schema_path.parent.mkdir(parents=True, exist_ok=True) + + with open(schema_output, "w") as f: + yaml.dump(config, f, sort_keys=False, default_flow_style=False) + + print(f"Enhanced data schema config with column mapping saved to: {schema_output}") + + # Generate separate instrument settings config from INI file + if ini_file: + # Determine instrument settings output filename + if instrument_output: + instrument_path = Path(instrument_output) + else: + instrument_path = ( + schema_path.parent / f"{schema_path.stem}_instrument_settings.yaml" + ) + + try: + from sp2xr.helpers import export_xr_ini_to_yaml_with_source + + export_xr_ini_to_yaml_with_source(ini_file, str(instrument_path)) + print(f"Instrument settings config saved to: {instrument_path}") + except ImportError: + # Fallback to original function if new one doesn't exist yet + from sp2xr.helpers import export_xr_ini_to_yaml + + export_xr_ini_to_yaml(ini_file, str(instrument_path)) + print(f"Instrument settings config saved to: {instrument_path}") + except Exception as e: + print(f"Warning: Could not convert INI to YAML: {e}") + # Still reference the original INI file as fallback + config["calibration_file"] = ini_file + with open(schema_output, "w") as f: + yaml.dump(config, f, sort_keys=False, default_flow_style=False) + print(f"Added INI file reference as fallback: {Path(ini_file).name}") + + +def parse_args(): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Generate SP2XR configuration files from data directory", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Generate basic config schema from files in current directory + python sp2xr_generate_config.py . + + # Generate config schema from specific directory + python sp2xr_generate_config.py /path/to/sp2xr/data + + # Generate config schema with column mapping support + python sp2xr_generate_config.py /path/to/data --mapping + + # Specify custom schema and instrument settings filenames + python sp2xr_generate_config.py /path/to/data --schema-output my_schema.yaml --instrument-output my_settings.yaml + + # Generate mapping config with custom names + python sp2xr_generate_config.py /path/to/data --mapping --schema-output campaign_schema.yaml --instrument-output campaign_settings.yaml + """, + ) + + parser.add_argument( + "directory", help="Directory containing SP2XR files (PbP and HK files)" + ) + + parser.add_argument( + "--schema-output", + "-s", + default="config_schema.yaml", + help="Output filename for data schema config (default: config_schema.yaml)", + ) + + parser.add_argument( + "--instrument-output", + "-i", + default=None, + help="Output filename for instrument settings config (default: {schema_output}_instrument_settings.yaml)", + ) + + parser.add_argument( + "--mapping", + "-m", + action="store_true", + help="Generate config with column mapping support (creates config_with_mapping.yaml)", + ) + + parser.add_argument( + "--pbp-file", help="Specify specific PbP file instead of auto-detection" + ) + + parser.add_argument( + "--hk-file", help="Specify specific HK file instead of auto-detection" + ) + + return parser.parse_args() + + +def main(): + """Main entry point.""" + args = parse_args() + + try: + # Use specific files if provided, otherwise auto-detect + if args.pbp_file and args.hk_file: + pbp_file = Path(args.pbp_file) + hk_file = Path(args.hk_file) + + if not pbp_file.exists(): + raise FileNotFoundError(f"PbP file not found: {pbp_file}") + if not hk_file.exists(): + raise FileNotFoundError(f"HK file not found: {hk_file}") + + else: + # Auto-detect files in directory + print(f"Searching for SP2XR files in: {args.directory}") + pbp_files, hk_files = find_sp2xr_files(args.directory) + + if not pbp_files: + raise FileNotFoundError( + "No PbP files found. Looking for files with 'PbP', 'pbp', or 'Pbp' in the name." + ) + if not hk_files: + raise FileNotFoundError( + "No HK files found. Looking for files with 'hk', 'HK', or 'Hk' in the name." + ) + + # Use the first file found for each type + pbp_file = pbp_files[0] + hk_file = hk_files[0] + + print(f"Found {len(pbp_files)} PbP file(s), using: {pbp_file.name}") + print(f"Found {len(hk_files)} HK file(s), using: {hk_file.name}") + + # Check for INI files in the directory + try: + from sp2xr.helpers import find_and_validate_ini_files + + ini_file = find_and_validate_ini_files(str(args.directory)) + + if ini_file: + print(f"Found consistent INI calibration file: {Path(ini_file).name}") + else: + print("No INI calibration files found in directory") + + except ValueError as e: + print(f"WARNING: {e}") + print("You should process data with different calibrations separately.") + except ImportError: + print("Could not import INI validation function") + + # Generate configuration + if args.mapping: + schema_file = ( + "config_schema_with_mapping.yaml" + if args.schema_output == "config_schema.yaml" + else args.schema_output + ) + generate_mapping_config( + pbp_file, + hk_file, + schema_file, + ini_file if "ini_file" in locals() else None, + args.instrument_output, + ) + else: + generate_basic_config( + pbp_file, + hk_file, + args.schema_output, + ini_file if "ini_file" in locals() else None, + args.instrument_output, + ) + + print("\nConfiguration generation completed successfully!") + + except Exception as e: + print(f"Error: {e}") + return 1 + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/src/sp2xr/helpers.py b/src/sp2xr/helpers.py index ca68750..e11bbc6 100644 --- a/src/sp2xr/helpers.py +++ b/src/sp2xr/helpers.py @@ -4,6 +4,7 @@ import re import yaml import argparse import pandas as pd +import numpy as np import dask.dataframe as dd from pathlib import Path from dask_jobqueue import SLURMCluster @@ -43,7 +44,7 @@ def load_and_resolve_config(args): "rho_eff": choose(args.BC_rho, base, "bc.rho_eff", None), "BC_type": choose(args.BC_type, base, "bc.type", None), "cluster": { - "use_local": choose(args.local, base, "cluster.use_local"), + "use_local": args.local or get(base, "cluster.use_local", False), "cores": choose(args.cores, base, "cluster.cores", None), "memory": choose(args.memory, base, "cluster.memory", None), "walltime": choose(args.walltime, base, "cluster.walltime", None), @@ -98,9 +99,9 @@ def load_and_resolve_config(args): def initialize_cluster(config): if config["cluster"].get("use_local", False): - return make_slurm_cluster(config) - else: return make_local_cluster(config) + else: + return make_slurm_cluster(config) def make_slurm_cluster(config): @@ -137,14 +138,12 @@ def make_local_cluster(config): total_cores = multiprocessing.cpu_count() total_memory = psutil.virtual_memory().total # in bytes - # Use all cores or config override - cores = config["cluster"].get("cores", total_cores) - memory_limit = config["cluster"].get("memory") + # For local clusters, always auto-detect resources (ignore config values meant for SLURM) + cores = total_cores + memory_limit_bytes = int(total_memory * 0.8) + memory_limit = f"{memory_limit_bytes // (1024**3)}GB" - # If memory not provided, use 80% of total - if memory_limit is None: - memory_limit_bytes = int(total_memory * 0.8) - memory_limit = f"{memory_limit_bytes // (1024**3)}GB" + print(f"Auto-detected local resources: {cores} cores, {memory_limit} memory") cluster = LocalCluster( n_workers=cores, @@ -161,7 +160,7 @@ def make_local_cluster(config): timeout="300s", ) print(f"Dask LOCAL dashboard: {client.dashboard_link}") - return client + return client, cluster def extract_partitioned_datetimes(parquet_path: str) -> list[pd.Timestamp]: @@ -288,7 +287,9 @@ def parse_args(): ) # cluster / resource knobs (all optional) - p.add_argument("--local", action="store_false", help="Run Local Cluster") + p.add_argument( + "--local", action="store_true", help="Use local cluster instead of SLURM" + ) p.add_argument("--cores", type=int, default=None, help="CPU cores per SLURM job") p.add_argument("--memory", default=None, help="RAM per job") p.add_argument("--walltime", default=None, help="SLURM wall-time") @@ -419,7 +420,7 @@ def read_xr_ini_file(fname): return default params = {} - with open(fname, "r") as f: + with open(fname, "r", encoding="utf-8", errors="ignore") as f: for line in f: if "Disqualified" in line: # skip irrelevant lines continue @@ -433,6 +434,99 @@ def read_xr_ini_file(fname): return params +def find_and_validate_ini_files(directory): + """ + Find all .ini files in a directory and validate they are consistent. + + Parameters + ---------- + directory : str + Directory to search for .ini files + + Returns + ------- + str or None + Path to representative .ini file if all are consistent, None if none found + + Raises + ------ + ValueError + If multiple different .ini files are found + """ + import os + import glob + + # Find all .ini files recursively + ini_pattern = os.path.join(directory, "**", "*Calibration*.ini") + ini_files = glob.glob(ini_pattern, recursive=True) + + if not ini_files: + return None + + if len(ini_files) == 1: + return ini_files[0] + + # Compare all .ini files to ensure they're identical + reference_params = None + reference_file = None + + for ini_file in ini_files: + try: + params = read_xr_ini_file(ini_file) + + if reference_params is None: + reference_params = params + reference_file = ini_file + else: + # Compare with reference + if params != reference_params: + raise ValueError( + f"Multiple different .ini files found in {directory}:\n" + f" Reference: {reference_file}\n" + f" Different: {ini_file}\n" + f"Please process data with different calibrations separately." + ) + except Exception as e: + import warnings + + warnings.warn(f"Could not read .ini file {ini_file}: {e}") + + return reference_file + + +def export_xr_ini_to_yaml_with_source(ini_path, yaml_path): + """ + Convert an SP2-XR .ini file to a structured YAML configuration with source traceability. + - Groups instrument parameters under 'instrument_parameters' + - Pre-populates calibration and histogram sections for user editing + - Merges with existing YAML if present (preserving user edits) + - Includes source INI file path for traceability + """ + from datetime import datetime + + ini_params = read_xr_ini_file(ini_path) + yaml_path = Path(yaml_path) + + # Base structure with metadata + params = { + "metadata": { + "source_ini_file": str(Path(ini_path).resolve()), + "generated_on": datetime.now().isoformat(), + "generated_by": "sp2xr_generate_config.py", + }, + "instrument_parameters": ini_params, + "Signal_saturation": {"IncSatPoint": 1.7e9, "ScattSatPoint": 1.7e9}, + } + + # Save YAML + with open(yaml_path, "w") as f: + yaml.dump(params, f, sort_keys=False) + + print( + f"Parameters exported to {yaml_path} with structured sections and editable placeholders" + ) + + def export_xr_ini_to_yaml(ini_path, yaml_path): """ Convert an SP2-XR .ini file to a structured YAML configuration. @@ -480,7 +574,7 @@ def export_xr_ini_to_yaml(ini_path, yaml_path): yaml.dump(params, f, sort_keys=False) print( - f"✅ Parameters exported to {yaml_path} with structured sections and editable placeholders" + f"Parameters exported to {yaml_path} with structured sections and editable placeholders" ) @@ -614,3 +708,41 @@ def extract_sp2xr_filename_parts(file_path: str | Path) -> tuple[str, str]: folder_name = file_path_obj.parent.name return file_name_cut, folder_name + + +def calculate_delta_sec(df): + """ + This function calculates the difference in seconds between the columns + 'Time (sec)' and 'first_val' present in the input dataframe + + Parameters + ---------- + df : pandas dataframe + The columns 'Time (sec)' and 'first_val' must be present in the DataFrame. + + Returns + ------- + int + Floored seconds between the values in the two columns. + + """ + return np.floor(df["Time (sec)"]) - df["first_val"] + + +def extract_datetime(df): + """ + Thi function selects the datetime out of the SP2XR file name. + + Parameters + ---------- + df : Pandas DataFrame + DataFRame conteining the column 'orig_file_name'. + + Returns + ------- + Pandas Series + Date and time corresponding to the date present in the 'orig_file_name' column. + + """ + # return pd.to_datetime(df['orig_file_name'].split('_')[-2]) + return pd.to_datetime(df["path"].split("_")[-2]) diff --git a/tests/config.yaml b/tests/config.yaml deleted file mode 100644 index 701e851..0000000 --- a/tests/config.yaml +++ /dev/null @@ -1,159 +0,0 @@ -pbp_schema: - Time (sec): float - Packet Time Stamp: float - Flag: float - Dropped Records: float - Record Count: float - Record Size: float - Particle Time Stamp: float - Particle Flags: float - Scatter relPeak: float - Scatter Transit Time: float - Scatter Peak Time: float - Scatter FWHM: float - Scatter Size (nm): float - Incand relPeak: float - Incand Transit Time: float - Incand Peak Time: float - Incand FWHM: float - Incand Delay: float - Incand Mass (fg): float - Reserved: float -hk_schema: - Time Stamp: datetime - Time (sec): float - Time Stamp (UTC sec): float - Elapsed Time: float - Error Code: float - Packet Time Stamp: float - Laser TEC Temp (C): float - Crystal TEC Temp (C): float - Inlet Air Temp (C): float - Computer Heatsink Temp (C): float - Laser Heatsink Temp (C): float - Outlet Air Temp (C): float - YAG Output Monitor (V): float - Cavity Pressure (hPa): float - Laser Driver Power Monitor (uA): float - Laser Driver Current Limit Monitor (A): float - Laser Driver Current Monitor (A): float - Laser TEC Sense: float - Laser Over Temp (On/Off): float - +5V Laser Rail (V): float - ' +5V Rail (V)': float - +12V Rail (V): float - High Voltage (V): float - Battery Temp (C): float - UPS Output (V): float - 12V Iso Rail (V): float - 5V Iso Rail (V): float - 3.3V Iso Rail (V): float - Spare 22: float - Spare 23: float - 408 Board Spare 0: float - 408 Board Spare 1: float - 408 Board Spare 2: float - 408 Board Spare 3: float - 408 Board Spare 4: float - Purge Flow Monitor (sccm): float - System Input Voltage (V): float - Board Temperature (C): float - 408 Board Spare 8: float - 408 Board Spare 9: float - 408 Board Spare 10: float - 408 Board Spare 11: float - 408 Board Spare 12: float - 408 Board Spare 13: float - 408 Board Spare 14: float - 408 Board Spare 15: float - Sheath Flow Controller Read (vccm): float - Sheath Flow Controller Read (sccm): float - Sheath Flow Controller Pressure (psia): float - Sheath Flow Controller Temperature (C): float - Sample Flow Controller Read (vccm): float - Sample Flow Controller Read (sccm): float - Sample Flow Controller Pressure (psia): float - Sample Flow Controller Temperature (C): float - Fan 1 (RPM): float - Fan 2 (RPM): float - Laser Fan (RPM): float - Spare tach: float - Threshold Crossing Events: float - Dual Qualified Scatter and Incand Particles: float - Qualified Scatter Only Particles: float - Qualified Incand Only Particles: float - Disqualified Due to Scatter Saturation: float - Disqualified Due to Scatter Transit Time Min: float - Disqualified Due to Scatter Transit Time Max: float - Disqualified Due to Scatter FWHM Min: float - Disqualified Due to Scatter FWHM Max: float - Scatter Inter Part Period Min Violation: float - Disqualified Due to Incand Saturation: float - Disqualified Due to Incand Transit Time Min: float - Disqualified Due to Incand Transit Time Max: float - Disqualified Due to Incand FWHM Min: float - Disqualified Due to Incand FWHM Max: float - Incand Inter Part Period Min Violation: float - Baseline Sizer Lo: float - Baseline Sizer Hi: float - Baseline Incand Lo: float - Baseline Incand Hi: float - Bandwidth Sizer Hi: float - Bandwidth Sizer Lo: float - Bandwidth Incand Lo: float - Bandwidth Incand Hi: float - ABD-0408 HK ADCs min: float - ABD-0436 HK ADCs min: float - ABD-0408 HK ADCs max: float - ABD-0436 HK ADCs max: float - Incand Particle Conc (cts/ccm): float - Scattering Particle Conc (cts/ccm): float - Incand Mass Conc (fg/sccm): float - Scattering Mass Conc (fg/sccm): float - Sheath Flow Set Point: float - Sample Flow Set Point: float - Laser Temp Set Point: float - Laser Current Set Point: float - Spare 4 Set Point: float - Spare 5 Set Point: float - PMT HV Set Point: float - Particle Density (g/ccm): float - PbP Packet Time: float - Scatter Bin 1: float - Scatter Bin 2: float - Scatter Bin 3: float - Scatter Bin 4: float - Scatter Bin 5: float - Scatter Bin 6: float - Scatter Bin 7: float - Scatter Bin 8: float - Scatter Bin 9: float - Scatter Bin 10: float - Scatter Bin 11: float - Scatter Bin 12: float - Scatter Bin 13: float - Scatter Bin 14: float - Scatter Bin 15: float - Scatter Bin 16: float - Scatter Bin 17: float - Scatter Bin 18: float - Scatter Bin 19: float - Incand Bin 1: float - Incand Bin 2: float - Incand Bin 3: float - Incand Bin 4: float - Incand Bin 5: float - Incand Bin 6: float - Incand Bin 7: float - Incand Bin 8: float - Incand Bin 9: float - Incand Bin 10: float - Incand Bin 11: float - Incand Bin 12: float - Incand Bin 13: float - Incand Bin 14: float - Incand Bin 15: float - Incand Bin 16: float - Incand Bin 17: float - Incand Bin 18: float - Incand Bin 19: float diff --git a/tests/run_config.yaml b/tests/run_config.yaml deleted file mode 100644 index 4c56b70..0000000 --- a/tests/run_config.yaml +++ /dev/null @@ -1,59 +0,0 @@ -paths: - input_pbp: /data/user/bertoz_b/merlin6data/SP2XR/data/NyA/SP2XR_pbp_parquet - input_hk: /data/user/bertoz_b/merlin6data/SP2XR/data/NyA/SP2XR_hk_parquet - output: tests/SP2XR_NyA_processed_data_60s - instrument_config: tests/instrument_config.yaml - -workflow: - conc: true - BC_hist: true - scatt_hist: true - timelag_hist: false - dt: 60 # seconds - repartition: '1h' - max_partition_size: "200MB" - saving_schema: ['date'] - -cluster: - use_local: false - cores: 16 - processes: 8 - memory: 128GB - walltime: "2-00:59:00" - partition: general - log_dir: ./slurm_out - -chunking: - freq: '5d' - start_date: '2020-08-24' - end_date: null - -bc: - rho_eff: 1800 - type: constant_effective_density - -histo: - inc: - min_mass: 0.3 - max_mass: 400 - n_bins: 50 - scatt: - min_D: 100 - max_D: 500 - n_bins: 20 - timelag: - min: -10 - max: 400 - n_bins: 100 - -mixing_state: - threshold: 50 - inc_scatt_ratio: 1.1 - -calibration: - incandescence: - curve_type: "polynomial" - parameters: [0.05, 2.0470000507725255e-07] - scattering: - curve_type: "powerlaw" - parameters: [17.21724257, 0.16908516, -1.49431104] \ No newline at end of file