from pathlib import Path from sp2xr.io import process_sp2xr_file # from sp2xr.io import read_csv_files_with_dask DATA = Path(__file__).parent / "data" def test_read_real_pbp_zip(tmp_path): mini_zip = DATA / "mini_SP2XR_PbP_20190409110737_x0001.zip" config_file = DATA / "config.yaml" df = process_sp2xr_file( file_path=str(mini_zip), config_path=str(config_file), target_directory=str(tmp_path / "pq_out"), ) # 50 lines in file = 50 rows returned assert len(df) == 50 # parquet really written assert list((tmp_path / "pq_out").rglob("*.parquet")), "No parquet output" def test_read_real_hk_zip(tmp_path): mini_zip = DATA / "mini_SP2XR_hk_20190409110737_x0001.zip" config_file = DATA / "config.yaml" df = process_sp2xr_file( file_path=str(mini_zip), config_path=str(config_file), target_directory=str(tmp_path / "pq_out"), ) assert len(df) == 50 # or assert exact number if known assert list((tmp_path / "pq_out").rglob("*.parquet")), "No parquet output"