37 lines
1.0 KiB
Python
37 lines
1.0 KiB
Python
from pathlib import Path
|
|
from sp2xr.io import process_sp2xr_file
|
|
|
|
# from sp2xr.io import read_csv_files_with_dask
|
|
|
|
DATA = Path(__file__).parent / "data"
|
|
|
|
|
|
def test_read_real_pbp_zip(tmp_path):
|
|
mini_zip = DATA / "mini_SP2XR_PbP_20190409110737_x0001.zip"
|
|
config_file = DATA / "config.yaml"
|
|
|
|
df = process_sp2xr_file(
|
|
file_path=str(mini_zip),
|
|
config_path=str(config_file),
|
|
target_directory=str(tmp_path / "pq_out"),
|
|
)
|
|
|
|
# 50 lines in file = 50 rows returned
|
|
assert len(df) == 50
|
|
# parquet really written
|
|
assert list((tmp_path / "pq_out").rglob("*.parquet")), "No parquet output"
|
|
|
|
|
|
def test_read_real_hk_zip(tmp_path):
|
|
mini_zip = DATA / "mini_SP2XR_hk_20190409110737_x0001.zip"
|
|
config_file = DATA / "config.yaml"
|
|
|
|
df = process_sp2xr_file(
|
|
file_path=str(mini_zip),
|
|
config_path=str(config_file),
|
|
target_directory=str(tmp_path / "pq_out"),
|
|
)
|
|
|
|
assert len(df) == 50 # or assert exact number if known
|
|
assert list((tmp_path / "pq_out").rglob("*.parquet")), "No parquet output"
|