diff --git a/creader/ClusterFile.py b/creader/ClusterFile.py new file mode 100644 index 0000000..6d60465 --- /dev/null +++ b/creader/ClusterFile.py @@ -0,0 +1,6 @@ + +from . import ClusterFileReader + +class ClusterFile(ClusterFileReader): + def __init__(self, fname): + super().__init__(fname) diff --git a/creader/RawFile.py b/creader/RawFile.py new file mode 100644 index 0000000..f3c08c9 --- /dev/null +++ b/creader/RawFile.py @@ -0,0 +1,63 @@ + +import json +from pathlib import Path +from . import RawFileReader + + +class RawFile: + """ + Generic Raw File reader. Picks up settings from .json master file + Currently supports: Moench03 =) + """ + def __init__(self, fname): + self.findex = 0 + self.fname = fname + fname = Path(fname) + if fname.suffix != '.json': + raise ValueError("Need a master file in json format") + + with open(fname) as f: + self.master = json.load(f) + + #Figure out which file to open + if self.master['Detector Type'] == 'Moench' and self.master['Analog Samples'] == 5000: + #TODO! pass settings to reader + self._parse_fname() + self.reader = RawFileReader(self.data_fname(0,0)) + else: + raise ValueError('unsupported file') + + def _parse_fname(self): + try: + base, _, run_id = self.fname.stem.rsplit("_", 2) + self.base = self.fname.parent / base + self.run_id = int(run_id) + except: + raise ValueError(f"Could not parse master file name: {self.fname}") + + def data_fname(self, i, findex=0): + return Path(f"{self.base}_d{i}_f{findex}_{self.run_id}.raw") + + + def read(self): + return self.reader.read() + + # Support iteration + def __iter__(self): + return self + + def __next__(self): + frame = self.reader.read() + if frame.shape[0] == 0: + raise StopIteration + + # Support with statement + def __enter__(self): + return self + + def __exit__(self, exception_type, exception_value, traceback): + pass + + + + diff --git a/creader/__init__.py b/creader/__init__.py index 3efaa4f..8c52062 100644 --- a/creader/__init__.py +++ b/creader/__init__.py @@ -1,2 +1,5 @@ #Make everything from the C extension available -from _creader import * \ No newline at end of file +from _creader import * + +from .file_utils import open_file +from .ClusterFile import ClusterFile \ No newline at end of file diff --git a/creader/file_utils.py b/creader/file_utils.py new file mode 100644 index 0000000..054eb63 --- /dev/null +++ b/creader/file_utils.py @@ -0,0 +1,14 @@ +from .ClusterFile import ClusterFile +from .RawFile import RawFile +from pathlib import Path + + +def open_file(fname): + """Convenience function to open files""" + fname = Path(fname) + if fname.suffix == '.clust': + return ClusterFile(fname) + elif fname.suffix == '.json': + return RawFile(fname) + else: + raise ValueError('unsupported file type') \ No newline at end of file diff --git a/src/RawFileReader.c b/src/RawFileReader.c index 51fdc9f..0721fb7 100644 --- a/src/RawFileReader.c +++ b/src/RawFileReader.c @@ -1,12 +1,16 @@ #include "RawFileReader.h" #include "data_types.h" #include "raw_reader.h" +#include "arr_desc.h" + +#include //clang-format off typedef struct { PyObject_HEAD FILE *fp; // additional fields for size and decoder? int dtype; + bool read_header; } RawFileReader; //clang-format on @@ -14,19 +18,33 @@ typedef struct { // raises python exception if something goes wrong // returned object should mean file is open and ready to read static int RawFileReader_init(RawFileReader *self, PyObject *args, - PyObject *Py_UNUSED(kwds)) { + PyObject *kwds) { // Parse file name, accepts string or pathlike objects const char *fname = NULL; - PyObject *buf; + PyObject *fname_obj = NULL; + PyObject *fname_bytes = NULL; Py_ssize_t len; - if (!PyArg_ParseTuple(args, "O&", PyUnicode_FSConverter, &buf)) + // Should we read the header + self->read_header = false; + + static char *kwlist[] = {"fname", "header", NULL}; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|p", kwlist, &fname_obj, + &self->read_header)) { return -1; - PyBytes_AsStringAndSize(buf, &fname, &len); + } + + if (fname_obj != Py_None) + if (!PyUnicode_FSConverter(fname_obj, &fname_bytes)) + return -1; + + PyBytes_AsStringAndSize(fname_bytes, &fname, &len); + printf("%s\n read_header: %d\n", fname, self->read_header); self->fp = fopen(fname, "rb"); - // self->n_left = 0; + // Keep the return code to not return before releasing buffer int rc = 0; @@ -37,7 +55,7 @@ static int RawFileReader_init(RawFileReader *self, PyObject *args, rc = -1; } // Release buffer - Py_DECREF(buf); + Py_DECREF(fname_bytes); // Success or fail return rc; @@ -57,7 +75,7 @@ static PyObject *RawFileReader_read(RawFileReader *self, PyObject *args) { self->dtype = NPY_UINT16; const int ndim = 3; - Py_ssize_t n_frames = 1; //default number of frames to read + Py_ssize_t n_frames = 1; // default number of frames to read // Py_ssize_t moench_version = 3; // Py_ssize_t analog_digital = 0; if (!PyArg_ParseTuple(args, "|n", &n_frames)) @@ -67,9 +85,18 @@ static PyObject *RawFileReader_read(RawFileReader *self, PyObject *args) { PyObject *frames = PyArray_SimpleNew(ndim, dims, self->dtype); PyArray_FILLWBYTE((PyArrayObject *)frames, 0); + //Optional return the header + PyObject *header = NULL; + char* header_out = NULL; + if(self->read_header){ + header = PyArray_SimpleNewFromDescr(1, dims, frame_header_dt()); + header_out = PyArray_DATA((PyArrayObject *)header); + } + const size_t frame_size = 400 * 400 * 2; char *out_buf = PyArray_DATA((PyArrayObject *)frames); - int64_t n_read = read_raw(self->fp, n_frames, frame_size, out_buf); + int64_t n_read = + read_raw(self->fp, n_frames, frame_size, out_buf, header_out); if (n_read != n_frames) { // resize the array to match the number of read photons @@ -84,9 +111,23 @@ static PyObject *RawFileReader_read(RawFileReader *self, PyObject *args) { new_shape.len = 3; // resize the array to match the number of clusters read - PyArray_Resize((PyArrayObject *)frames, &new_shape, 3, NPY_ANYORDER); + PyArray_Resize((PyArrayObject *)frames, &new_shape, 1, NPY_ANYORDER); + + //if we also read header we need to reshape the header + if(self->read_header){ + new_shape.len = 1; + PyArray_Resize((PyArrayObject *)header, &new_shape, 1, NPY_ANYORDER); + } + } - return frames; + PyObject *ret = frames; + if(self->read_header){ + ret = PyTuple_Pack(2, frames, header); + Py_DECREF(header); + Py_DECREF(frames); + } + return ret; + } // List all methods in our ClusterFileReader class diff --git a/src/arr_desc.c b/src/arr_desc.c index 8186d36..84eeb43 100644 --- a/src/arr_desc.c +++ b/src/arr_desc.c @@ -24,3 +24,22 @@ PyArray_Descr *cluster_analysis_dt() { Py_DECREF(dict); return dtype; } + +PyArray_Descr *frame_header_dt() { + import_array(); + // Move this to a function that gets run on init?? + PyObject *dtype_dict; + PyArray_Descr *dtype; + dtype_dict = Py_BuildValue( + "[(s, s), (s, s), (s, s), (s, s), (s, s), (s, s), (s, s), (s, s), (s, " + "s), (s, s), (s, s), (s, s), (s, s), (s, s)]", + "Frame Number", "u8", "SubFrame Number/ExpLength", "u4", + "Packet Number", "u4", "Bunch ID", "u8", "Timestamp", "u8", "Module Id", + "u2", "Row", "u2", "Column", "u2", "Reserved", "u2", "Debug", "u4", + "Round Robin Number", "u2", "Detector Type", "u1", "Header Version", + "u1", "Packets Caught Mask", "V64"); + + PyArray_DescrConverter(dtype_dict, &dtype); + Py_DECREF(dtype_dict); + return dtype; +} \ No newline at end of file diff --git a/src/arr_desc.h b/src/arr_desc.h index 3a4679d..82b2ef6 100644 --- a/src/arr_desc.h +++ b/src/arr_desc.h @@ -5,4 +5,6 @@ PyArray_Descr* cluster_dt(); -PyArray_Descr* cluster_analysis_dt(); \ No newline at end of file +PyArray_Descr* cluster_analysis_dt(); + +PyArray_Descr *frame_header_dt(); \ No newline at end of file diff --git a/src/creader_module.c b/src/creader_module.c index 3c082b9..68baa4d 100644 --- a/src/creader_module.c +++ b/src/creader_module.c @@ -59,6 +59,12 @@ static PyObject *get_cluster_dt(PyObject *Py_UNUSED(self), PyObject *args) { return (PyObject*)cluster_dt(); } +static PyObject *get_frame_header_dt(PyObject *Py_UNUSED(self), PyObject *args) { + if (!PyArg_ParseTuple(args, "")) + return NULL; + return (PyObject*)frame_header_dt(); +} + // Module docstring, shown as a part of help(creader) static char module_docstring[] = "C functions to read cluster files"; @@ -66,6 +72,7 @@ static char module_docstring[] = "C functions to read cluster files"; static PyMethodDef creader_methods[] = { {"clusterize", clusterize, METH_VARARGS, "Do some stuff"}, {"cluster_dt", get_cluster_dt, METH_VARARGS, "Do some stuff"}, + {"frame_header_dt", get_frame_header_dt, METH_VARARGS, "Do some stuff"}, {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/src/raw_reader.c b/src/raw_reader.c index b38ec5f..3e64475 100644 --- a/src/raw_reader.c +++ b/src/raw_reader.c @@ -2,8 +2,9 @@ #include "data_types.h" #include +#include -int64_t read_raw(FILE *fp, int64_t n_frames, size_t frame_size, char* out_buf) { +int64_t read_raw(FILE *fp, int64_t n_frames, size_t frame_size, char* out_buf, Header* header_out) { @@ -12,10 +13,14 @@ int64_t read_raw(FILE *fp, int64_t n_frames, size_t frame_size, char* out_buf) { int64_t frames_read = 0; while (frames_read < n_frames) { - // Read header, return on fail + // Read header to temp buffer, return on fail if (fread(&h, sizeof(Header), 1, fp) != 1) { break; } + if(header_out){ + memcpy(header_out, &h, sizeof(Header)); + header_out++; + } // Read frame to temporary buffer if (fread(tmp, frame_size, 1, fp) != 1) { diff --git a/src/raw_reader.h b/src/raw_reader.h index f79fb49..a2c7159 100644 --- a/src/raw_reader.h +++ b/src/raw_reader.h @@ -1,6 +1,8 @@ #pragma once #include #include -int64_t read_raw(FILE *fp, int64_t n_frames, size_t frame_size, char* out_buf); +#include "data_types.h" + +int64_t read_raw(FILE *fp, int64_t n_frames, size_t frame_size, char* out_buf, Header* header_out); void decode_moench03(const uint16_t *buf, uint16_t *out_buf); \ No newline at end of file diff --git a/tests/test_RawFileReader.py b/tests/test_RawFileReader.py index fb6fe49..80a2c24 100644 --- a/tests/test_RawFileReader.py +++ b/tests/test_RawFileReader.py @@ -8,4 +8,24 @@ def test_references_on_read(data_path): fname= data_path/'test_d0_f0_0.raw' r = RawFileReader(fname) frames = r.read(10) - assert sys.getrefcount(frames) == 2 #Over counts by one due to call by reference \ No newline at end of file + assert sys.getrefcount(frames) == 2 #Over counts by one due to call by reference + +def test_references_on_read_with_header(data_path): + fname= data_path/'test_d0_f0_0.raw' + r = RawFileReader(fname, header = True) + frames, header = r.read(100) + assert sys.getrefcount(frames) == 2 #Over counts by one due to call by reference + assert sys.getrefcount(header) == 2 + +def test_reading_frame_numbers(data_path): + fname= data_path/'test_d0_f0_0.raw' + r = RawFileReader(fname, header = True) + frames, header = r.read(1000) + assert (header['Frame Number'] == np.arange(201,1201, dtype = np.uint64)).all() + +def test_reading_more_files_than_available(data_path): + fname= data_path/'test_d0_f0_0.raw' + r = RawFileReader(fname, header = True) + frames, header = r.read(1500) + assert frames.shape == (1000,400, 400) + assert header.size == 1000 \ No newline at end of file