style: Ruff auto‑fixes in SP2XR_toolkit
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,4 +7,5 @@ sp2xr/__pycache__/
|
||||
|
||||
core.*
|
||||
|
||||
src/*.egg-info/
|
||||
.venv/
|
||||
159
meta_files/config.yaml
Normal file
159
meta_files/config.yaml
Normal file
@@ -0,0 +1,159 @@
|
||||
pbp_schema:
|
||||
Time (sec): string
|
||||
Packet Time Stamp: string
|
||||
Flag: string
|
||||
Dropped Records: string
|
||||
Record Count: string
|
||||
Record Size: string
|
||||
Particle Time Stamp: string
|
||||
Particle Flags: string
|
||||
Scatter relPeak: string
|
||||
Scatter Transit Time: string
|
||||
Scatter Peak Time: string
|
||||
Scatter FWHM: string
|
||||
Scatter Size (nm): string
|
||||
Incand relPeak: string
|
||||
Incand Transit Time: string
|
||||
Incand Peak Time: string
|
||||
Incand FWHM: string
|
||||
Incand Delay: string
|
||||
Incand Mass (fg): string
|
||||
Reserved: string
|
||||
hk_schema:
|
||||
Time Stamp: string
|
||||
Time (sec): string
|
||||
Time Stamp (UTC sec): string
|
||||
Elapsed Time: string
|
||||
Error Code: string
|
||||
Packet Time Stamp: string
|
||||
Laser TEC Temp (C): string
|
||||
Crystal TEC Temp (C): string
|
||||
Inlet Air Temp (C): string
|
||||
Computer Heatsink Temp (C): string
|
||||
Laser Heatsink Temp (C): string
|
||||
Outlet Air Temp (C): string
|
||||
YAG Output Monitor (V): string
|
||||
Cavity Pressure (hPa): string
|
||||
Laser Driver Power Monitor (uA): string
|
||||
Laser Driver Current Limit Monitor (A): string
|
||||
Laser Driver Current Monitor (A): string
|
||||
Laser TEC Sense: string
|
||||
Laser Over Temp (On/Off): string
|
||||
+5V Laser Rail (V): string
|
||||
' +5V Rail (V)': string
|
||||
+12V Rail (V): string
|
||||
High Voltage (V): string
|
||||
Battery Temp (C): string
|
||||
UPS Output (V): string
|
||||
12V Iso Rail (V): string
|
||||
5V Iso Rail (V): string
|
||||
3.3V Iso Rail (V): string
|
||||
Spare 22: string
|
||||
Spare 23: string
|
||||
408 Board Spare 0: string
|
||||
408 Board Spare 1: string
|
||||
408 Board Spare 2: string
|
||||
408 Board Spare 3: string
|
||||
408 Board Spare 4: string
|
||||
Purge Flow Monitor (sccm): string
|
||||
System Input Voltage (V): string
|
||||
Board Temperature (C): string
|
||||
408 Board Spare 8: string
|
||||
408 Board Spare 9: string
|
||||
408 Board Spare 10: string
|
||||
408 Board Spare 11: string
|
||||
408 Board Spare 12: string
|
||||
408 Board Spare 13: string
|
||||
408 Board Spare 14: string
|
||||
408 Board Spare 15: string
|
||||
Sheath Flow Controller Read (vccm): string
|
||||
Sheath Flow Controller Read (sccm): string
|
||||
Sheath Flow Controller Pressure (psia): string
|
||||
Sheath Flow Controller Temperature (C): string
|
||||
Sample Flow Controller Read (vccm): string
|
||||
Sample Flow Controller Read (sccm): string
|
||||
Sample Flow Controller Pressure (psia): string
|
||||
Sample Flow Controller Temperature (C): string
|
||||
Fan 1 (RPM): string
|
||||
Fan 2 (RPM): string
|
||||
Laser Fan (RPM): string
|
||||
Spare tach: string
|
||||
Threshold Crossing Events: string
|
||||
Dual Qualified Scatter and Incand Particles: string
|
||||
Qualified Scatter Only Particles: string
|
||||
Qualified Incand Only Particles: string
|
||||
Disqualified Due to Scatter Saturation: string
|
||||
Disqualified Due to Scatter Transit Time Min: string
|
||||
Disqualified Due to Scatter Transit Time Max: string
|
||||
Disqualified Due to Scatter FWHM Min: string
|
||||
Disqualified Due to Scatter FWHM Max: string
|
||||
Scatter Inter Part Period Min Violation: string
|
||||
Disqualified Due to Incand Saturation: string
|
||||
Disqualified Due to Incand Transit Time Min: string
|
||||
Disqualified Due to Incand Transit Time Max: string
|
||||
Disqualified Due to Incand FWHM Min: string
|
||||
Disqualified Due to Incand FWHM Max: string
|
||||
Incand Inter Part Period Min Violation: string
|
||||
Baseline Sizer Lo: string
|
||||
Baseline Sizer Hi: string
|
||||
Baseline Incand Lo: string
|
||||
Baseline Incand Hi: string
|
||||
Bandwidth Sizer Hi: string
|
||||
Bandwidth Sizer Lo: string
|
||||
Bandwidth Incand Lo: string
|
||||
Bandwidth Incand Hi: string
|
||||
ABD-0408 HK ADCs min: string
|
||||
ABD-0436 HK ADCs min: string
|
||||
ABD-0408 HK ADCs max: string
|
||||
ABD-0436 HK ADCs max: string
|
||||
Incand Particle Conc (cts/ccm): string
|
||||
Scattering Particle Conc (cts/ccm): string
|
||||
Incand Mass Conc (fg/sccm): string
|
||||
Scattering Mass Conc (fg/sccm): string
|
||||
Sheath Flow Set Point: string
|
||||
Sample Flow Set Point: string
|
||||
Laser Temp Set Point: string
|
||||
Laser Current Set Point: string
|
||||
Spare 4 Set Point: string
|
||||
Spare 5 Set Point: string
|
||||
PMT HV Set Point: string
|
||||
Particle Density (g/ccm): string
|
||||
PbP Packet Time: string
|
||||
Scatter Bin 1: string
|
||||
Scatter Bin 2: string
|
||||
Scatter Bin 3: string
|
||||
Scatter Bin 4: string
|
||||
Scatter Bin 5: string
|
||||
Scatter Bin 6: string
|
||||
Scatter Bin 7: string
|
||||
Scatter Bin 8: string
|
||||
Scatter Bin 9: string
|
||||
Scatter Bin 10: string
|
||||
Scatter Bin 11: string
|
||||
Scatter Bin 12: string
|
||||
Scatter Bin 13: string
|
||||
Scatter Bin 14: string
|
||||
Scatter Bin 15: string
|
||||
Scatter Bin 16: string
|
||||
Scatter Bin 17: string
|
||||
Scatter Bin 18: string
|
||||
Scatter Bin 19: string
|
||||
Incand Bin 1: string
|
||||
Incand Bin 2: string
|
||||
Incand Bin 3: string
|
||||
Incand Bin 4: string
|
||||
Incand Bin 5: string
|
||||
Incand Bin 6: string
|
||||
Incand Bin 7: string
|
||||
Incand Bin 8: string
|
||||
Incand Bin 9: string
|
||||
Incand Bin 10: string
|
||||
Incand Bin 11: string
|
||||
Incand Bin 12: string
|
||||
Incand Bin 13: string
|
||||
Incand Bin 14: string
|
||||
Incand Bin 15: string
|
||||
Incand Bin 16: string
|
||||
Incand Bin 17: string
|
||||
Incand Bin 18: string
|
||||
Incand Bin 19: string
|
||||
1
meta_files/hk_meta.csv
Normal file
1
meta_files/hk_meta.csv
Normal file
@@ -0,0 +1 @@
|
||||
Time Stamp,Time (sec),Time Stamp (UTC sec),Elapsed Time,Error Code,Packet Time Stamp,Laser TEC Temp (C),Crystal TEC Temp (C),Inlet Air Temp (C),Computer Heatsink Temp (C),Laser Heatsink Temp (C),Outlet Air Temp (C),YAG Output Monitor (V),Cavity Pressure (hPa),Laser Driver Power Monitor (uA),Laser Driver Current Limit Monitor (A),Laser Driver Current Monitor (A),Laser TEC Sense,Laser Over Temp (On/Off),+5V Laser Rail (V), +5V Rail (V),+12V Rail (V),High Voltage (V),Battery Temp (C),UPS Output (V),12V Iso Rail (V),5V Iso Rail (V),3.3V Iso Rail (V),Spare 22,Spare 23,408 Board Spare 0,408 Board Spare 1,408 Board Spare 2,408 Board Spare 3,408 Board Spare 4,Purge Flow Monitor (sccm),System Input Voltage (V),Board Temperature (C),408 Board Spare 8,408 Board Spare 9,408 Board Spare 10,408 Board Spare 11,408 Board Spare 12,408 Board Spare 13,408 Board Spare 14,408 Board Spare 15,Sheath Flow Controller Read (vccm),Sheath Flow Controller Read (sccm),Sheath Flow Controller Pressure (psia),Sheath Flow Controller Temperature (C),Sample Flow Controller Read (vccm),Sample Flow Controller Read (sccm),Sample Flow Controller Pressure (psia),Sample Flow Controller Temperature (C),Fan 1 (RPM),Fan 2 (RPM),Laser Fan (RPM),Spare tach,Threshold Crossing Events,Dual Qualified Scatter and Incand Particles,Qualified Scatter Only Particles,Qualified Incand Only Particles,Disqualified Due to Scatter Saturation,Disqualified Due to Scatter Transit Time Min,Disqualified Due to Scatter Transit Time Max,Disqualified Due to Scatter FWHM Min,Disqualified Due to Scatter FWHM Max,Scatter Inter Part Period Min Violation,Disqualified Due to Incand Saturation,Disqualified Due to Incand Transit Time Min,Disqualified Due to Incand Transit Time Max,Disqualified Due to Incand FWHM Min,Disqualified Due to Incand FWHM Max,Incand Inter Part Period Min Violation,Baseline Sizer Lo,Baseline Sizer Hi,Baseline Incand Lo,Baseline Incand Hi,Bandwidth Sizer Hi,Bandwidth Sizer Lo,Bandwidth Incand Lo,Bandwidth Incand Hi,ABD-0408 HK ADCs min,ABD-0436 HK ADCs min,ABD-0408 HK ADCs max,ABD-0436 HK ADCs max,Incand Particle Conc (cts/ccm),Scattering Particle Conc (cts/ccm),Incand Mass Conc (fg/sccm),Scattering Mass Conc (fg/sccm),Sheath Flow Set Point,Sample Flow Set Point,Laser Temp Set Point,Laser Current Set Point,Spare 4 Set Point,Spare 5 Set Point,PMT HV Set Point,Particle Density (g/ccm),PbP Packet Time,Scatter Bin 1,Scatter Bin 2,Scatter Bin 3,Scatter Bin 4,Scatter Bin 5,Scatter Bin 6,Scatter Bin 7,Scatter Bin 8,Scatter Bin 9,Scatter Bin 10,Scatter Bin 11,Scatter Bin 12,Scatter Bin 13,Scatter Bin 14,Scatter Bin 15,Scatter Bin 16,Scatter Bin 17,Scatter Bin 18,Scatter Bin 19,Incand Bin 1,Incand Bin 2,Incand Bin 3,Incand Bin 4,Incand Bin 5,Incand Bin 6,Incand Bin 7,Incand Bin 8,Incand Bin 9,Incand Bin 10,Incand Bin 11,Incand Bin 12,Incand Bin 13,Incand Bin 14,Incand Bin 15,Incand Bin 16,Incand Bin 17,Incand Bin 18,Incand Bin 19
|
||||
|
277
meta_files/hk_meta.yaml
Normal file
277
meta_files/hk_meta.yaml
Normal file
@@ -0,0 +1,277 @@
|
||||
file_type: csv
|
||||
name: hk_meta
|
||||
columns:
|
||||
- name: Time Stamp
|
||||
type: string
|
||||
- name: Time (sec)
|
||||
type: string
|
||||
- name: Time Stamp (UTC sec)
|
||||
type: string
|
||||
- name: Elapsed Time
|
||||
type: string
|
||||
- name: Error Code
|
||||
type: string
|
||||
- name: Packet Time Stamp
|
||||
type: string
|
||||
- name: Laser TEC Temp (C)
|
||||
type: string
|
||||
- name: Crystal TEC Temp (C)
|
||||
type: string
|
||||
- name: Inlet Air Temp (C)
|
||||
type: string
|
||||
- name: Computer Heatsink Temp (C)
|
||||
type: string
|
||||
- name: Laser Heatsink Temp (C)
|
||||
type: string
|
||||
- name: Outlet Air Temp (C)
|
||||
type: string
|
||||
- name: YAG Output Monitor (V)
|
||||
type: string
|
||||
- name: Cavity Pressure (hPa)
|
||||
type: string
|
||||
- name: Laser Driver Power Monitor (uA)
|
||||
type: string
|
||||
- name: Laser Driver Current Limit Monitor (A)
|
||||
type: string
|
||||
- name: Laser Driver Current Monitor (A)
|
||||
type: string
|
||||
- name: Laser TEC Sense
|
||||
type: string
|
||||
- name: Laser Over Temp (On/Off)
|
||||
type: string
|
||||
- name: +5V Laser Rail (V)
|
||||
type: string
|
||||
- name: ' +5V Rail (V)'
|
||||
type: string
|
||||
- name: +12V Rail (V)
|
||||
type: string
|
||||
- name: High Voltage (V)
|
||||
type: string
|
||||
- name: Battery Temp (C)
|
||||
type: string
|
||||
- name: UPS Output (V)
|
||||
type: string
|
||||
- name: 12V Iso Rail (V)
|
||||
type: string
|
||||
- name: 5V Iso Rail (V)
|
||||
type: string
|
||||
- name: 3.3V Iso Rail (V)
|
||||
type: string
|
||||
- name: Spare 22
|
||||
type: string
|
||||
- name: Spare 23
|
||||
type: string
|
||||
- name: 408 Board Spare 0
|
||||
type: string
|
||||
- name: 408 Board Spare 1
|
||||
type: string
|
||||
- name: 408 Board Spare 2
|
||||
type: string
|
||||
- name: 408 Board Spare 3
|
||||
type: string
|
||||
- name: 408 Board Spare 4
|
||||
type: string
|
||||
- name: Purge Flow Monitor (sccm)
|
||||
type: string
|
||||
- name: System Input Voltage (V)
|
||||
type: string
|
||||
- name: Board Temperature (C)
|
||||
type: string
|
||||
- name: 408 Board Spare 8
|
||||
type: string
|
||||
- name: 408 Board Spare 9
|
||||
type: string
|
||||
- name: 408 Board Spare 10
|
||||
type: string
|
||||
- name: 408 Board Spare 11
|
||||
type: string
|
||||
- name: 408 Board Spare 12
|
||||
type: string
|
||||
- name: 408 Board Spare 13
|
||||
type: string
|
||||
- name: 408 Board Spare 14
|
||||
type: string
|
||||
- name: 408 Board Spare 15
|
||||
type: string
|
||||
- name: Sheath Flow Controller Read (vccm)
|
||||
type: string
|
||||
- name: Sheath Flow Controller Read (sccm)
|
||||
type: string
|
||||
- name: Sheath Flow Controller Pressure (psia)
|
||||
type: string
|
||||
- name: Sheath Flow Controller Temperature (C)
|
||||
type: string
|
||||
- name: Sample Flow Controller Read (vccm)
|
||||
type: string
|
||||
- name: Sample Flow Controller Read (sccm)
|
||||
type: string
|
||||
- name: Sample Flow Controller Pressure (psia)
|
||||
type: string
|
||||
- name: Sample Flow Controller Temperature (C)
|
||||
type: string
|
||||
- name: Fan 1 (RPM)
|
||||
type: string
|
||||
- name: Fan 2 (RPM)
|
||||
type: string
|
||||
- name: Laser Fan (RPM)
|
||||
type: string
|
||||
- name: Spare tach
|
||||
type: string
|
||||
- name: Threshold Crossing Events
|
||||
type: string
|
||||
- name: Dual Qualified Scatter and Incand Particles
|
||||
type: string
|
||||
- name: Qualified Scatter Only Particles
|
||||
type: string
|
||||
- name: Qualified Incand Only Particles
|
||||
type: string
|
||||
- name: Disqualified Due to Scatter Saturation
|
||||
type: string
|
||||
- name: Disqualified Due to Scatter Transit Time Min
|
||||
type: string
|
||||
- name: Disqualified Due to Scatter Transit Time Max
|
||||
type: string
|
||||
- name: Disqualified Due to Scatter FWHM Min
|
||||
type: string
|
||||
- name: Disqualified Due to Scatter FWHM Max
|
||||
type: string
|
||||
- name: Scatter Inter Part Period Min Violation
|
||||
type: string
|
||||
- name: Disqualified Due to Incand Saturation
|
||||
type: string
|
||||
- name: Disqualified Due to Incand Transit Time Min
|
||||
type: string
|
||||
- name: Disqualified Due to Incand Transit Time Max
|
||||
type: string
|
||||
- name: Disqualified Due to Incand FWHM Min
|
||||
type: string
|
||||
- name: Disqualified Due to Incand FWHM Max
|
||||
type: string
|
||||
- name: Incand Inter Part Period Min Violation
|
||||
type: string
|
||||
- name: Baseline Sizer Lo
|
||||
type: string
|
||||
- name: Baseline Sizer Hi
|
||||
type: string
|
||||
- name: Baseline Incand Lo
|
||||
type: string
|
||||
- name: Baseline Incand Hi
|
||||
type: string
|
||||
- name: Bandwidth Sizer Hi
|
||||
type: string
|
||||
- name: Bandwidth Sizer Lo
|
||||
type: string
|
||||
- name: Bandwidth Incand Lo
|
||||
type: string
|
||||
- name: Bandwidth Incand Hi
|
||||
type: string
|
||||
- name: ABD-0408 HK ADCs min
|
||||
type: string
|
||||
- name: ABD-0436 HK ADCs min
|
||||
type: string
|
||||
- name: ABD-0408 HK ADCs max
|
||||
type: string
|
||||
- name: ABD-0436 HK ADCs max
|
||||
type: string
|
||||
- name: Incand Particle Conc (cts/ccm)
|
||||
type: string
|
||||
- name: Scattering Particle Conc (cts/ccm)
|
||||
type: string
|
||||
- name: Incand Mass Conc (fg/sccm)
|
||||
type: string
|
||||
- name: Scattering Mass Conc (fg/sccm)
|
||||
type: string
|
||||
- name: Sheath Flow Set Point
|
||||
type: string
|
||||
- name: Sample Flow Set Point
|
||||
type: string
|
||||
- name: Laser Temp Set Point
|
||||
type: string
|
||||
- name: Laser Current Set Point
|
||||
type: string
|
||||
- name: Spare 4 Set Point
|
||||
type: string
|
||||
- name: Spare 5 Set Point
|
||||
type: string
|
||||
- name: PMT HV Set Point
|
||||
type: string
|
||||
- name: Particle Density (g/ccm)
|
||||
type: string
|
||||
- name: PbP Packet Time
|
||||
type: string
|
||||
- name: Scatter Bin 1
|
||||
type: string
|
||||
- name: Scatter Bin 2
|
||||
type: string
|
||||
- name: Scatter Bin 3
|
||||
type: string
|
||||
- name: Scatter Bin 4
|
||||
type: string
|
||||
- name: Scatter Bin 5
|
||||
type: string
|
||||
- name: Scatter Bin 6
|
||||
type: string
|
||||
- name: Scatter Bin 7
|
||||
type: string
|
||||
- name: Scatter Bin 8
|
||||
type: string
|
||||
- name: Scatter Bin 9
|
||||
type: string
|
||||
- name: Scatter Bin 10
|
||||
type: string
|
||||
- name: Scatter Bin 11
|
||||
type: string
|
||||
- name: Scatter Bin 12
|
||||
type: string
|
||||
- name: Scatter Bin 13
|
||||
type: string
|
||||
- name: Scatter Bin 14
|
||||
type: string
|
||||
- name: Scatter Bin 15
|
||||
type: string
|
||||
- name: Scatter Bin 16
|
||||
type: string
|
||||
- name: Scatter Bin 17
|
||||
type: string
|
||||
- name: Scatter Bin 18
|
||||
type: string
|
||||
- name: Scatter Bin 19
|
||||
type: string
|
||||
- name: Incand Bin 1
|
||||
type: string
|
||||
- name: Incand Bin 2
|
||||
type: string
|
||||
- name: Incand Bin 3
|
||||
type: string
|
||||
- name: Incand Bin 4
|
||||
type: string
|
||||
- name: Incand Bin 5
|
||||
type: string
|
||||
- name: Incand Bin 6
|
||||
type: string
|
||||
- name: Incand Bin 7
|
||||
type: string
|
||||
- name: Incand Bin 8
|
||||
type: string
|
||||
- name: Incand Bin 9
|
||||
type: string
|
||||
- name: Incand Bin 10
|
||||
type: string
|
||||
- name: Incand Bin 11
|
||||
type: string
|
||||
- name: Incand Bin 12
|
||||
type: string
|
||||
- name: Incand Bin 13
|
||||
type: string
|
||||
- name: Incand Bin 14
|
||||
type: string
|
||||
- name: Incand Bin 15
|
||||
type: string
|
||||
- name: Incand Bin 16
|
||||
type: string
|
||||
- name: Incand Bin 17
|
||||
type: string
|
||||
- name: Incand Bin 18
|
||||
type: string
|
||||
- name: Incand Bin 19
|
||||
type: string
|
||||
1
meta_files/pbp_meta.csv
Normal file
1
meta_files/pbp_meta.csv
Normal file
@@ -0,0 +1 @@
|
||||
Time (sec),Packet Time Stamp,Flag,Dropped Records,Record Count,Record Size,Particle Time Stamp,Particle Flags,Scatter relPeak,Scatter Transit Time,Scatter Peak Time,Scatter FWHM,Scatter Size (nm),Incand relPeak,Incand Transit Time,Incand Peak Time,Incand FWHM,Incand Delay,Incand Mass (fg),Reserved
|
||||
|
43
meta_files/pbp_meta.yaml
Normal file
43
meta_files/pbp_meta.yaml
Normal file
@@ -0,0 +1,43 @@
|
||||
file_type: csv
|
||||
name: pbp_meta
|
||||
columns:
|
||||
- name: Time (sec)
|
||||
type: string
|
||||
- name: Packet Time Stamp
|
||||
type: string
|
||||
- name: Flag
|
||||
type: string
|
||||
- name: Dropped Records
|
||||
type: string
|
||||
- name: Record Count
|
||||
type: string
|
||||
- name: Record Size
|
||||
type: string
|
||||
- name: Particle Time Stamp
|
||||
type: string
|
||||
- name: Particle Flags
|
||||
type: string
|
||||
- name: Scatter relPeak
|
||||
type: string
|
||||
- name: Scatter Transit Time
|
||||
type: string
|
||||
- name: Scatter Peak Time
|
||||
type: string
|
||||
- name: Scatter FWHM
|
||||
type: string
|
||||
- name: Scatter Size (nm)
|
||||
type: string
|
||||
- name: Incand relPeak
|
||||
type: string
|
||||
- name: Incand Transit Time
|
||||
type: string
|
||||
- name: Incand Peak Time
|
||||
type: string
|
||||
- name: Incand FWHM
|
||||
type: string
|
||||
- name: Incand Delay
|
||||
type: string
|
||||
- name: Incand Mass (fg)
|
||||
type: string
|
||||
- name: Reserved
|
||||
type: string
|
||||
44
meta_files/read.py
Normal file
44
meta_files/read.py
Normal file
@@ -0,0 +1,44 @@
|
||||
import pandas as pd
|
||||
import yaml
|
||||
import os
|
||||
|
||||
|
||||
def infer_dtype(dtype):
|
||||
if pd.api.types.is_integer_dtype(dtype):
|
||||
return "int"
|
||||
elif pd.api.types.is_float_dtype(dtype):
|
||||
return "float"
|
||||
elif pd.api.types.is_datetime64_any_dtype(dtype):
|
||||
return "datetime"
|
||||
else:
|
||||
return "string"
|
||||
|
||||
|
||||
def load_schema(input_file):
|
||||
ext = os.path.splitext(input_file)[1].lower()
|
||||
|
||||
if ext == ".csv":
|
||||
df = pd.read_csv(input_file, nrows=100)
|
||||
elif ext == ".parquet":
|
||||
df = pd.read_parquet(input_file)
|
||||
else:
|
||||
raise ValueError(f"Unsupported file format: {ext}")
|
||||
|
||||
schema = {col: infer_dtype(df[col].dtype) for col in df.columns}
|
||||
return schema
|
||||
|
||||
|
||||
def generate_combined_config(pbp_file, hk_file, output_file="config.yaml"):
|
||||
config = {
|
||||
"pbp_schema": load_schema(pbp_file),
|
||||
"hk_schema": load_schema(hk_file),
|
||||
}
|
||||
|
||||
with open(output_file, "w") as f:
|
||||
yaml.dump(config, f, sort_keys=False)
|
||||
|
||||
print(f"Unified config saved to: {output_file}")
|
||||
|
||||
|
||||
# Example usage:
|
||||
generate_combined_config("pbp_meta.csv", "hk_meta.csv")
|
||||
29
pyproject.toml
Normal file
29
pyproject.toml
Normal file
@@ -0,0 +1,29 @@
|
||||
[project]
|
||||
name = "sp2xr"
|
||||
version = "0.0.0"
|
||||
description = "SP2-XR toolkit (placeholder until full v2 metadata)"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.9"
|
||||
dependencies = [
|
||||
"dask[dataframe]>=2024.6",
|
||||
"pandas>=2.2",
|
||||
"numpy>=1.26",
|
||||
"scipy>=1.11",
|
||||
"matplotlib>=3.8",
|
||||
"seaborn>=0.13",
|
||||
# add others as you discover they’re imported at top level
|
||||
]
|
||||
|
||||
[build-system]
|
||||
requires = ["setuptools>=68", "wheel"]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
||||
[tool.setuptools]
|
||||
# tell setuptools our code lives in src/
|
||||
package-dir = {"" = "src"}
|
||||
|
||||
[project.optional-dependencies]
|
||||
notebook = [
|
||||
"ipython>=8",
|
||||
"ipywidgets>=8",
|
||||
]
|
||||
16
src/sp2xr/__init__.py
Normal file
16
src/sp2xr/__init__.py
Normal file
@@ -0,0 +1,16 @@
|
||||
"""
|
||||
Thin wrapper so `import sp2xr` keeps working
|
||||
while we refactor the legacy monolith.
|
||||
"""
|
||||
|
||||
from importlib import import_module
|
||||
|
||||
# 1 Load the legacy monolith (makes old symbols available)
|
||||
_toolkit = import_module(".toolkit_legacy", package=__name__)
|
||||
globals().update(_toolkit.__dict__) # re‑export legacy names
|
||||
|
||||
# 2 Import new helpers you want public at package root
|
||||
from .io import csv_to_parquet, read_csv_files_with_dask # noqa: F401,E402
|
||||
|
||||
# Cleanup internal names
|
||||
del import_module, _toolkit
|
||||
202
src/sp2xr/io.py
Normal file
202
src/sp2xr/io.py
Normal file
@@ -0,0 +1,202 @@
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
import os
|
||||
import re
|
||||
import zipfile
|
||||
import warnings
|
||||
import numpy as np
|
||||
import dask.dataframe as dd
|
||||
|
||||
from .toolkit_legacy import calculate_delta_sec, extract_datetime
|
||||
|
||||
|
||||
def csv_to_parquet(csv_path: Path, parquet_path: Path, **read_csv_kwargs) -> None:
|
||||
"""
|
||||
Read a CSV file, return an identical Parquet file.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
csv_path : Path | str
|
||||
Location of the source CSV.
|
||||
parquet_path : Path | str
|
||||
Destination Parquet path. Parent dirs are created automatically.
|
||||
read_csv_kwargs : dict
|
||||
Extra kwargs forwarded to pandas.read_csv().
|
||||
"""
|
||||
csv_path = Path(csv_path)
|
||||
parquet_path = Path(parquet_path)
|
||||
df = pd.read_csv(csv_path, **read_csv_kwargs)
|
||||
parquet_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
df.to_parquet(parquet_path, index=False)
|
||||
|
||||
|
||||
def read_csv_files_with_dask(file_path, meta_pbp, meta_hk, target_directory):
|
||||
"""
|
||||
This function reads Pbp or HK files from the SP2XR
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_path : str
|
||||
Complete path of the file to read.
|
||||
meta : pandas DataFrame
|
||||
Empty pandas dataframe with the structure expected for the file that is read.
|
||||
This is ised in case the file is empty --> The function will return an empty DataFrame
|
||||
with this structure.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dask DataFrame
|
||||
Content of the file as Dask DataFrame.
|
||||
|
||||
"""
|
||||
if file_path:
|
||||
tmp_hk = pd.DataFrame()
|
||||
|
||||
hk_0001 = re.sub(r"PbP", "hk", file_path)
|
||||
hk_0001 = re.sub(r"(_x)\d{4}", r"\g<1>0001", hk_0001)
|
||||
hk_0001 = re.sub(r"\.(csv|zip)$", "", hk_0001)
|
||||
if os.path.exists(f"{hk_0001}.csv"):
|
||||
try:
|
||||
tmp_hk = pd.read_csv(
|
||||
f"{hk_0001}.csv",
|
||||
nrows=1,
|
||||
parse_dates=["Time Stamp"],
|
||||
usecols=["Time Stamp", "Time (sec)"],
|
||||
)
|
||||
except pd.errors.EmptyDataError:
|
||||
tmp_hk = pd.DataFrame()
|
||||
except zipfile.BadZipFile:
|
||||
print(f"!! Bad file: {file_path}")
|
||||
tmp_hk = pd.DataFrame()
|
||||
elif os.path.exists(f"{hk_0001}.zip"):
|
||||
try:
|
||||
tmp_hk = pd.read_csv(
|
||||
f"{hk_0001}.zip",
|
||||
nrows=1,
|
||||
parse_dates=["Time Stamp"],
|
||||
usecols=["Time Stamp", "Time (sec)"],
|
||||
)
|
||||
except pd.errors.EmptyDataError:
|
||||
tmp_hk = pd.DataFrame()
|
||||
|
||||
if not tmp_hk.empty:
|
||||
first_val, t0 = tmp_hk[["Time (sec)", "Time Stamp"]].values[0]
|
||||
if "PbP" in file_path:
|
||||
temp = meta_pbp
|
||||
data_type = pd.Series(temp.dtypes.values, index=temp.columns).to_dict()
|
||||
try:
|
||||
df = dd.read_csv(
|
||||
file_path, dtype=data_type, blocksize=None
|
||||
) # , include_path_column=True)
|
||||
df = df.fillna(
|
||||
0
|
||||
) # is this because otherwise we cannot calculate the time_lag?
|
||||
# df['time_lag'] = df['Incand Peak Time'] - df['Scatter Peak Time'] # 02.09.2024 this line implies that for particles with nan in the scatt transit time time_lag=incand transit time. better to calculate timelag for particles with both scatt and incand and set 0 for particles with only incand
|
||||
#!!! MISSING CORRECT TIME LAG CALCULATIONS
|
||||
except zipfile.BadZipFile:
|
||||
print(f"!! Bad zip file: {file_path}")
|
||||
df = pd.DataFrame()
|
||||
return df
|
||||
|
||||
elif "hk" in file_path:
|
||||
temp = meta_hk
|
||||
data_type = pd.Series(temp.dtypes.values, index=temp.columns).to_dict()
|
||||
filtered_dtype_dict = {
|
||||
key: value
|
||||
for key, value in data_type.items()
|
||||
if value != "datetime64[ns]"
|
||||
}
|
||||
try:
|
||||
df = dd.read_csv(
|
||||
file_path,
|
||||
dtype=filtered_dtype_dict,
|
||||
parse_dates=["Time Stamp"],
|
||||
blocksize=None,
|
||||
assume_missing=True,
|
||||
)
|
||||
# df = dd.read_csv(file_path, dtype=data_type, parse_dates=['Time Stamp'], blocksize=None)#, assume_missing=True)
|
||||
"""if 'Time Stamp' in df.columns:
|
||||
datetime_format = '%m/%d/%Y %H:%M:%S.%f'
|
||||
df['Time Stamp'] = df['Time Stamp'].map_partitions(pd.to_datetime, format=datetime_format, meta=('Time Stamp', 'datetime64[ns]'))
|
||||
"""
|
||||
except ValueError as e:
|
||||
# Handle the error if the 'Time Stamp' column is missing or any other parsing error occurs
|
||||
if "Missing column provided to 'parse_dates'" in str(e):
|
||||
print(
|
||||
f"Error for {file_path}: Missing column provided to 'parse_dates': 'Time Stamp'"
|
||||
)
|
||||
df = pd.DataFrame()
|
||||
return df
|
||||
except pd.errors.EmptyDataError:
|
||||
df = pd.DataFrame()
|
||||
return df
|
||||
except zipfile.BadZipFile:
|
||||
print(f"!! Bad zip file: {file_path}")
|
||||
df = pd.DataFrame()
|
||||
return df
|
||||
|
||||
if len(df.columns) > 0:
|
||||
df = df.loc[~df.isna().all(axis=1)]
|
||||
|
||||
df["path"] = str(file_path)
|
||||
df["first_val"] = first_val
|
||||
df["t0"] = t0
|
||||
file_name_cut = (
|
||||
file_path.split("\\")[-1].split("_")[-2]
|
||||
+ "_"
|
||||
+ file_path.split("\\")[-1].split("_")[-1].split(".")[-2]
|
||||
)
|
||||
df["file"] = file_name_cut
|
||||
folder_name = file_path.split("\\")[-1].split("_")[-2]
|
||||
df["folder_name"] = folder_name
|
||||
|
||||
if "Time Stamp" in df.columns:
|
||||
df["Time Stamp"] = df["Time Stamp"].map_partitions(
|
||||
pd.to_datetime, meta=("Time Stamp", "datetime64[ns]")
|
||||
)
|
||||
|
||||
df["delta_sec"] = df.map_partitions(
|
||||
calculate_delta_sec, meta=("delta_sec", "float64")
|
||||
)
|
||||
df["calculated_time"] = df["t0"] + dd.to_timedelta(
|
||||
df["delta_sec"], unit="s"
|
||||
)
|
||||
df["file_datetime"] = df.apply(
|
||||
extract_datetime, axis=1, meta=("file_datetime", "datetime64[ns]")
|
||||
)
|
||||
df["date_floored"] = df["calculated_time"].dt.floor("H")
|
||||
df["date"] = df["calculated_time"].dt.date.astype("date64[pyarrow]")
|
||||
df["hour"] = df["calculated_time"].dt.hour.astype("i8")
|
||||
df["floor_time"] = df["calculated_time"].dt.floor("S")
|
||||
df["Secs_2GB"] = df["Time (sec)"].apply(
|
||||
np.floor, meta=("Secs_2GB", "i8")
|
||||
)
|
||||
|
||||
fn = (
|
||||
file_path.split("\\")[-1].split("_")[-2]
|
||||
+ "_"
|
||||
+ file_path.split("\\")[-1].split("_")[-1].split(".")[-2]
|
||||
)
|
||||
|
||||
def name(part_idx):
|
||||
return f"{fn}.parquet"
|
||||
|
||||
df = df.set_index("calculated_time", drop=True, sort=False, sorted=True)
|
||||
|
||||
df.to_parquet(
|
||||
path=target_directory,
|
||||
engine="pyarrow",
|
||||
partition_on=["date", "hour"],
|
||||
coerce_timestamps="us",
|
||||
allow_truncated_timestamps=True,
|
||||
name_function=name,
|
||||
write_index=True,
|
||||
append=False,
|
||||
)
|
||||
return df
|
||||
else:
|
||||
warnings.warn("tmp_hk empty or not existing")
|
||||
return pd.DataFrame()
|
||||
|
||||
else:
|
||||
raise ValueError("No CSV files found.")
|
||||
@@ -9,8 +9,6 @@ import dask.dataframe as dd
|
||||
# import dask.delayed
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import re
|
||||
import warnings
|
||||
from scipy.optimize import curve_fit
|
||||
from numpy.polynomial import Polynomial
|
||||
import matplotlib.pyplot as plt
|
||||
@@ -23,8 +21,14 @@ from dask import delayed
|
||||
|
||||
import time
|
||||
|
||||
import ipywidgets as widgets
|
||||
from IPython.display import display
|
||||
try:
|
||||
import ipywidgets as widgets
|
||||
except ImportError: # pragma: no cover
|
||||
widgets = None # interactive features will raise later if used
|
||||
try:
|
||||
from IPython.display import display, clear_output
|
||||
except ImportError: # pragma: no cover
|
||||
display = clear_output = None # or define no‑op fallbacks
|
||||
|
||||
|
||||
from matplotlib.backends.backend_pdf import PdfPages
|
||||
@@ -179,174 +183,6 @@ def calculate_delta_sec(df):
|
||||
return np.floor(df["Time (sec)"]) - df["first_val"]
|
||||
|
||||
|
||||
@delayed
|
||||
def read_csv_files_with_dask(file_path, meta_pbp, meta_hk, target_directory):
|
||||
"""
|
||||
This function reads Pbp or HK files from the SP2XR
|
||||
|
||||
Parameters
|
||||
----------
|
||||
file_path : str
|
||||
Complete path of the file to read.
|
||||
meta : pandas DataFrame
|
||||
Empty pandas dataframe with the structure expected for the file that is read.
|
||||
This is ised in case the file is empty --> The function will return an empty DataFrame
|
||||
with this structure.
|
||||
|
||||
Returns
|
||||
-------
|
||||
Dask DataFrame
|
||||
Content of the file as Dask DataFrame.
|
||||
|
||||
"""
|
||||
if file_path:
|
||||
tmp_hk = pd.DataFrame()
|
||||
|
||||
hk_0001 = re.sub(r"PbP", "hk", file_path)
|
||||
hk_0001 = re.sub(r"(_x)\d{4}", r"\g<1>0001", hk_0001)
|
||||
hk_0001 = re.sub(r"\.(csv|zip)$", "", hk_0001)
|
||||
if os.path.exists(f"{hk_0001}.csv"):
|
||||
try:
|
||||
tmp_hk = pd.read_csv(
|
||||
f"{hk_0001}.csv",
|
||||
nrows=1,
|
||||
parse_dates=["Time Stamp"],
|
||||
usecols=["Time Stamp", "Time (sec)"],
|
||||
)
|
||||
except pd.errors.EmptyDataError:
|
||||
tmp_hk = pd.DataFrame()
|
||||
except zipfile.BadZipFile:
|
||||
print(f"!! Bad file: {file_path}")
|
||||
tmp_hk = pd.DataFrame()
|
||||
elif os.path.exists(f"{hk_0001}.zip"):
|
||||
try:
|
||||
tmp_hk = pd.read_csv(
|
||||
f"{hk_0001}.zip",
|
||||
nrows=1,
|
||||
parse_dates=["Time Stamp"],
|
||||
usecols=["Time Stamp", "Time (sec)"],
|
||||
)
|
||||
except pd.errors.EmptyDataError:
|
||||
tmp_hk = pd.DataFrame()
|
||||
|
||||
if not tmp_hk.empty:
|
||||
first_val, t0 = tmp_hk[["Time (sec)", "Time Stamp"]].values[0]
|
||||
if "PbP" in file_path:
|
||||
temp = meta_pbp
|
||||
data_type = pd.Series(temp.dtypes.values, index=temp.columns).to_dict()
|
||||
try:
|
||||
df = dd.read_csv(
|
||||
file_path, dtype=data_type, blocksize=None
|
||||
) # , include_path_column=True)
|
||||
df = df.fillna(
|
||||
0
|
||||
) # is this because otherwise we cannot calculate the time_lag?
|
||||
# df['time_lag'] = df['Incand Peak Time'] - df['Scatter Peak Time'] # 02.09.2024 this line implies that for particles with nan in the scatt transit time time_lag=incand transit time. better to calculate timelag for particles with both scatt and incand and set 0 for particles with only incand
|
||||
#!!! MISSING CORRECT TIME LAG CALCULATIONS
|
||||
except zipfile.BadZipFile:
|
||||
print(f"!! Bad zip file: {file_path}")
|
||||
df = pd.DataFrame()
|
||||
|
||||
elif "hk" in file_path:
|
||||
temp = meta_hk
|
||||
data_type = pd.Series(temp.dtypes.values, index=temp.columns).to_dict()
|
||||
filtered_dtype_dict = {
|
||||
key: value
|
||||
for key, value in data_type.items()
|
||||
if value != "datetime64[ns]"
|
||||
}
|
||||
try:
|
||||
df = dd.read_csv(
|
||||
file_path,
|
||||
dtype=filtered_dtype_dict,
|
||||
parse_dates=["Time Stamp"],
|
||||
blocksize=None,
|
||||
assume_missing=True,
|
||||
)
|
||||
# df = dd.read_csv(file_path, dtype=data_type, parse_dates=['Time Stamp'], blocksize=None)#, assume_missing=True)
|
||||
"""if 'Time Stamp' in df.columns:
|
||||
datetime_format = '%m/%d/%Y %H:%M:%S.%f'
|
||||
df['Time Stamp'] = df['Time Stamp'].map_partitions(pd.to_datetime, format=datetime_format, meta=('Time Stamp', 'datetime64[ns]'))
|
||||
"""
|
||||
except ValueError as e:
|
||||
# Handle the error if the 'Time Stamp' column is missing or any other parsing error occurs
|
||||
if "Missing column provided to 'parse_dates'" in str(e):
|
||||
print(
|
||||
f"Error for {file_path}: Missing column provided to 'parse_dates': 'Time Stamp'"
|
||||
)
|
||||
df = pd.DataFrame()
|
||||
except pd.errors.EmptyDataError:
|
||||
df = pd.DataFrame()
|
||||
except zipfile.BadZipFile:
|
||||
print(f"!! Bad zip file: {file_path}")
|
||||
df = pd.DataFrame()
|
||||
|
||||
if len(df.columns) > 0:
|
||||
df = df.loc[~df.isna().all(axis=1)]
|
||||
|
||||
df["path"] = str(file_path)
|
||||
df["first_val"] = first_val
|
||||
df["t0"] = t0
|
||||
file_name_cut = (
|
||||
file_path.split("\\")[-1].split("_")[-2]
|
||||
+ "_"
|
||||
+ file_path.split("\\")[-1].split("_")[-1].split(".")[-2]
|
||||
)
|
||||
df["file"] = file_name_cut
|
||||
folder_name = file_path.split("\\")[-1].split("_")[-2]
|
||||
df["folder_name"] = folder_name
|
||||
|
||||
if "Time Stamp" in df.columns:
|
||||
df["Time Stamp"] = df["Time Stamp"].map_partitions(
|
||||
pd.to_datetime, meta=("Time Stamp", "datetime64[ns]")
|
||||
)
|
||||
|
||||
df["delta_sec"] = df.map_partitions(
|
||||
calculate_delta_sec, meta=("delta_sec", "float64")
|
||||
)
|
||||
df["calculated_time"] = df["t0"] + dd.to_timedelta(
|
||||
df["delta_sec"], unit="s"
|
||||
)
|
||||
df["file_datetime"] = df.apply(
|
||||
extract_datetime, axis=1, meta=("file_datetime", "datetime64[ns]")
|
||||
)
|
||||
df["date_floored"] = df["calculated_time"].dt.floor("H")
|
||||
df["date"] = df["calculated_time"].dt.date.astype("date64[pyarrow]")
|
||||
df["hour"] = df["calculated_time"].dt.hour.astype("i8")
|
||||
df["floor_time"] = df["calculated_time"].dt.floor("S")
|
||||
df["Secs_2GB"] = df["Time (sec)"].apply(
|
||||
np.floor, meta=("Secs_2GB", "i8")
|
||||
)
|
||||
|
||||
fn = (
|
||||
file_path.split("\\")[-1].split("_")[-2]
|
||||
+ "_"
|
||||
+ file_path.split("\\")[-1].split("_")[-1].split(".")[-2]
|
||||
)
|
||||
|
||||
def name(part_idx):
|
||||
return f"{fn}.parquet"
|
||||
|
||||
df = df.set_index("calculated_time", drop=True, sort=False, sorted=True)
|
||||
|
||||
df.to_parquet(
|
||||
path=target_directory,
|
||||
engine="pyarrow",
|
||||
partition_on=["date", "hour"],
|
||||
coerce_timestamps="us",
|
||||
allow_truncated_timestamps=True,
|
||||
name_function=name,
|
||||
write_index=True,
|
||||
append=False,
|
||||
)
|
||||
del df
|
||||
else:
|
||||
warnings.warn("tmp_hk empty or not existing")
|
||||
|
||||
else:
|
||||
raise ValueError("No CSV files found.")
|
||||
|
||||
|
||||
# %% Functions to read sp2b files
|
||||
|
||||
|
||||
2
tests/test_import.py
Normal file
2
tests/test_import.py
Normal file
@@ -0,0 +1,2 @@
|
||||
def test_import_package():
|
||||
import sp2xr # noqa: F401
|
||||
Reference in New Issue
Block a user