option for reading header

This commit is contained in:
Erik Frojdh 2023-06-02 16:52:14 +02:00
parent e43899cca8
commit 46a697cfb7
11 changed files with 198 additions and 16 deletions

6
creader/ClusterFile.py Normal file
View File

@ -0,0 +1,6 @@
from . import ClusterFileReader
class ClusterFile(ClusterFileReader):
def __init__(self, fname):
super().__init__(fname)

63
creader/RawFile.py Normal file
View File

@ -0,0 +1,63 @@
import json
from pathlib import Path
from . import RawFileReader
class RawFile:
"""
Generic Raw File reader. Picks up settings from .json master file
Currently supports: Moench03 =)
"""
def __init__(self, fname):
self.findex = 0
self.fname = fname
fname = Path(fname)
if fname.suffix != '.json':
raise ValueError("Need a master file in json format")
with open(fname) as f:
self.master = json.load(f)
#Figure out which file to open
if self.master['Detector Type'] == 'Moench' and self.master['Analog Samples'] == 5000:
#TODO! pass settings to reader
self._parse_fname()
self.reader = RawFileReader(self.data_fname(0,0))
else:
raise ValueError('unsupported file')
def _parse_fname(self):
try:
base, _, run_id = self.fname.stem.rsplit("_", 2)
self.base = self.fname.parent / base
self.run_id = int(run_id)
except:
raise ValueError(f"Could not parse master file name: {self.fname}")
def data_fname(self, i, findex=0):
return Path(f"{self.base}_d{i}_f{findex}_{self.run_id}.raw")
def read(self):
return self.reader.read()
# Support iteration
def __iter__(self):
return self
def __next__(self):
frame = self.reader.read()
if frame.shape[0] == 0:
raise StopIteration
# Support with statement
def __enter__(self):
return self
def __exit__(self, exception_type, exception_value, traceback):
pass

View File

@ -1,2 +1,5 @@
#Make everything from the C extension available #Make everything from the C extension available
from _creader import * from _creader import *
from .file_utils import open_file
from .ClusterFile import ClusterFile

14
creader/file_utils.py Normal file
View File

@ -0,0 +1,14 @@
from .ClusterFile import ClusterFile
from .RawFile import RawFile
from pathlib import Path
def open_file(fname):
"""Convenience function to open files"""
fname = Path(fname)
if fname.suffix == '.clust':
return ClusterFile(fname)
elif fname.suffix == '.json':
return RawFile(fname)
else:
raise ValueError('unsupported file type')

View File

@ -1,12 +1,16 @@
#include "RawFileReader.h" #include "RawFileReader.h"
#include "data_types.h" #include "data_types.h"
#include "raw_reader.h" #include "raw_reader.h"
#include "arr_desc.h"
#include <stdbool.h>
//clang-format off //clang-format off
typedef struct { typedef struct {
PyObject_HEAD FILE *fp; PyObject_HEAD FILE *fp;
// additional fields for size and decoder? // additional fields for size and decoder?
int dtype; int dtype;
bool read_header;
} RawFileReader; } RawFileReader;
//clang-format on //clang-format on
@ -14,19 +18,33 @@ typedef struct {
// raises python exception if something goes wrong // raises python exception if something goes wrong
// returned object should mean file is open and ready to read // returned object should mean file is open and ready to read
static int RawFileReader_init(RawFileReader *self, PyObject *args, static int RawFileReader_init(RawFileReader *self, PyObject *args,
PyObject *Py_UNUSED(kwds)) { PyObject *kwds) {
// Parse file name, accepts string or pathlike objects // Parse file name, accepts string or pathlike objects
const char *fname = NULL; const char *fname = NULL;
PyObject *buf; PyObject *fname_obj = NULL;
PyObject *fname_bytes = NULL;
Py_ssize_t len; Py_ssize_t len;
if (!PyArg_ParseTuple(args, "O&", PyUnicode_FSConverter, &buf)) // Should we read the header
self->read_header = false;
static char *kwlist[] = {"fname", "header", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|p", kwlist, &fname_obj,
&self->read_header)) {
return -1; return -1;
PyBytes_AsStringAndSize(buf, &fname, &len); }
if (fname_obj != Py_None)
if (!PyUnicode_FSConverter(fname_obj, &fname_bytes))
return -1;
PyBytes_AsStringAndSize(fname_bytes, &fname, &len);
printf("%s\n read_header: %d\n", fname, self->read_header);
self->fp = fopen(fname, "rb"); self->fp = fopen(fname, "rb");
// self->n_left = 0;
// Keep the return code to not return before releasing buffer // Keep the return code to not return before releasing buffer
int rc = 0; int rc = 0;
@ -37,7 +55,7 @@ static int RawFileReader_init(RawFileReader *self, PyObject *args,
rc = -1; rc = -1;
} }
// Release buffer // Release buffer
Py_DECREF(buf); Py_DECREF(fname_bytes);
// Success or fail // Success or fail
return rc; return rc;
@ -57,7 +75,7 @@ static PyObject *RawFileReader_read(RawFileReader *self, PyObject *args) {
self->dtype = NPY_UINT16; self->dtype = NPY_UINT16;
const int ndim = 3; const int ndim = 3;
Py_ssize_t n_frames = 1; //default number of frames to read Py_ssize_t n_frames = 1; // default number of frames to read
// Py_ssize_t moench_version = 3; // Py_ssize_t moench_version = 3;
// Py_ssize_t analog_digital = 0; // Py_ssize_t analog_digital = 0;
if (!PyArg_ParseTuple(args, "|n", &n_frames)) if (!PyArg_ParseTuple(args, "|n", &n_frames))
@ -67,9 +85,18 @@ static PyObject *RawFileReader_read(RawFileReader *self, PyObject *args) {
PyObject *frames = PyArray_SimpleNew(ndim, dims, self->dtype); PyObject *frames = PyArray_SimpleNew(ndim, dims, self->dtype);
PyArray_FILLWBYTE((PyArrayObject *)frames, 0); PyArray_FILLWBYTE((PyArrayObject *)frames, 0);
//Optional return the header
PyObject *header = NULL;
char* header_out = NULL;
if(self->read_header){
header = PyArray_SimpleNewFromDescr(1, dims, frame_header_dt());
header_out = PyArray_DATA((PyArrayObject *)header);
}
const size_t frame_size = 400 * 400 * 2; const size_t frame_size = 400 * 400 * 2;
char *out_buf = PyArray_DATA((PyArrayObject *)frames); char *out_buf = PyArray_DATA((PyArrayObject *)frames);
int64_t n_read = read_raw(self->fp, n_frames, frame_size, out_buf); int64_t n_read =
read_raw(self->fp, n_frames, frame_size, out_buf, header_out);
if (n_read != n_frames) { if (n_read != n_frames) {
// resize the array to match the number of read photons // resize the array to match the number of read photons
@ -84,9 +111,23 @@ static PyObject *RawFileReader_read(RawFileReader *self, PyObject *args) {
new_shape.len = 3; new_shape.len = 3;
// resize the array to match the number of clusters read // resize the array to match the number of clusters read
PyArray_Resize((PyArrayObject *)frames, &new_shape, 3, NPY_ANYORDER); PyArray_Resize((PyArrayObject *)frames, &new_shape, 1, NPY_ANYORDER);
//if we also read header we need to reshape the header
if(self->read_header){
new_shape.len = 1;
PyArray_Resize((PyArrayObject *)header, &new_shape, 1, NPY_ANYORDER);
}
} }
return frames; PyObject *ret = frames;
if(self->read_header){
ret = PyTuple_Pack(2, frames, header);
Py_DECREF(header);
Py_DECREF(frames);
}
return ret;
} }
// List all methods in our ClusterFileReader class // List all methods in our ClusterFileReader class

View File

@ -24,3 +24,22 @@ PyArray_Descr *cluster_analysis_dt() {
Py_DECREF(dict); Py_DECREF(dict);
return dtype; return dtype;
} }
PyArray_Descr *frame_header_dt() {
import_array();
// Move this to a function that gets run on init??
PyObject *dtype_dict;
PyArray_Descr *dtype;
dtype_dict = Py_BuildValue(
"[(s, s), (s, s), (s, s), (s, s), (s, s), (s, s), (s, s), (s, s), (s, "
"s), (s, s), (s, s), (s, s), (s, s), (s, s)]",
"Frame Number", "u8", "SubFrame Number/ExpLength", "u4",
"Packet Number", "u4", "Bunch ID", "u8", "Timestamp", "u8", "Module Id",
"u2", "Row", "u2", "Column", "u2", "Reserved", "u2", "Debug", "u4",
"Round Robin Number", "u2", "Detector Type", "u1", "Header Version",
"u1", "Packets Caught Mask", "V64");
PyArray_DescrConverter(dtype_dict, &dtype);
Py_DECREF(dtype_dict);
return dtype;
}

View File

@ -6,3 +6,5 @@
PyArray_Descr* cluster_dt(); PyArray_Descr* cluster_dt();
PyArray_Descr* cluster_analysis_dt(); PyArray_Descr* cluster_analysis_dt();
PyArray_Descr *frame_header_dt();

View File

@ -59,6 +59,12 @@ static PyObject *get_cluster_dt(PyObject *Py_UNUSED(self), PyObject *args) {
return (PyObject*)cluster_dt(); return (PyObject*)cluster_dt();
} }
static PyObject *get_frame_header_dt(PyObject *Py_UNUSED(self), PyObject *args) {
if (!PyArg_ParseTuple(args, ""))
return NULL;
return (PyObject*)frame_header_dt();
}
// Module docstring, shown as a part of help(creader) // Module docstring, shown as a part of help(creader)
static char module_docstring[] = "C functions to read cluster files"; static char module_docstring[] = "C functions to read cluster files";
@ -66,6 +72,7 @@ static char module_docstring[] = "C functions to read cluster files";
static PyMethodDef creader_methods[] = { static PyMethodDef creader_methods[] = {
{"clusterize", clusterize, METH_VARARGS, "Do some stuff"}, {"clusterize", clusterize, METH_VARARGS, "Do some stuff"},
{"cluster_dt", get_cluster_dt, METH_VARARGS, "Do some stuff"}, {"cluster_dt", get_cluster_dt, METH_VARARGS, "Do some stuff"},
{"frame_header_dt", get_frame_header_dt, METH_VARARGS, "Do some stuff"},
{NULL, NULL, 0, NULL} /* Sentinel */ {NULL, NULL, 0, NULL} /* Sentinel */
}; };

View File

@ -2,8 +2,9 @@
#include "data_types.h" #include "data_types.h"
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
int64_t read_raw(FILE *fp, int64_t n_frames, size_t frame_size, char* out_buf) { int64_t read_raw(FILE *fp, int64_t n_frames, size_t frame_size, char* out_buf, Header* header_out) {
@ -12,10 +13,14 @@ int64_t read_raw(FILE *fp, int64_t n_frames, size_t frame_size, char* out_buf) {
int64_t frames_read = 0; int64_t frames_read = 0;
while (frames_read < n_frames) { while (frames_read < n_frames) {
// Read header, return on fail // Read header to temp buffer, return on fail
if (fread(&h, sizeof(Header), 1, fp) != 1) { if (fread(&h, sizeof(Header), 1, fp) != 1) {
break; break;
} }
if(header_out){
memcpy(header_out, &h, sizeof(Header));
header_out++;
}
// Read frame to temporary buffer // Read frame to temporary buffer
if (fread(tmp, frame_size, 1, fp) != 1) { if (fread(tmp, frame_size, 1, fp) != 1) {

View File

@ -1,6 +1,8 @@
#pragma once #pragma once
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
int64_t read_raw(FILE *fp, int64_t n_frames, size_t frame_size, char* out_buf); #include "data_types.h"
int64_t read_raw(FILE *fp, int64_t n_frames, size_t frame_size, char* out_buf, Header* header_out);
void decode_moench03(const uint16_t *buf, uint16_t *out_buf); void decode_moench03(const uint16_t *buf, uint16_t *out_buf);

View File

@ -9,3 +9,23 @@ def test_references_on_read(data_path):
r = RawFileReader(fname) r = RawFileReader(fname)
frames = r.read(10) frames = r.read(10)
assert sys.getrefcount(frames) == 2 #Over counts by one due to call by reference assert sys.getrefcount(frames) == 2 #Over counts by one due to call by reference
def test_references_on_read_with_header(data_path):
fname= data_path/'test_d0_f0_0.raw'
r = RawFileReader(fname, header = True)
frames, header = r.read(100)
assert sys.getrefcount(frames) == 2 #Over counts by one due to call by reference
assert sys.getrefcount(header) == 2
def test_reading_frame_numbers(data_path):
fname= data_path/'test_d0_f0_0.raw'
r = RawFileReader(fname, header = True)
frames, header = r.read(1000)
assert (header['Frame Number'] == np.arange(201,1201, dtype = np.uint64)).all()
def test_reading_more_files_than_available(data_path):
fname= data_path/'test_d0_f0_0.raw'
r = RawFileReader(fname, header = True)
frames, header = r.read(1500)
assert frames.shape == (1000,400, 400)
assert header.size == 1000