From 5eeeaf681a31eaebc1ce063e9201f7a168bbed73 Mon Sep 17 00:00:00 2001 From: Erik Frojdh Date: Thu, 1 Jun 2023 17:53:24 +0200 Subject: [PATCH] reducing warnings, simplifying read --- setup.py | 2 +- src/ClusterReader.c | 3 +- src/cluster_reader.c | 33 ++++++++----------- src/creader_module.c | 64 +++++++++++++++---------------------- tests/test_ClusterReader.py | 48 +++++++++++++++++++++++++++- 5 files changed, 89 insertions(+), 61 deletions(-) diff --git a/setup.py b/setup.py index 47d946e..83fd368 100644 --- a/setup.py +++ b/setup.py @@ -19,7 +19,7 @@ c_ext = setuptools.Extension("creader", c_ext.language = 'c' setuptools.setup( name= 'creader', - version = '2023.05.30', + version = '2023.6.1', description = 'Reading cluster files', ext_modules=[c_ext], ) diff --git a/src/ClusterReader.c b/src/ClusterReader.c index db9be16..1fba613 100644 --- a/src/ClusterReader.c +++ b/src/ClusterReader.c @@ -48,8 +48,7 @@ static void ClusterFileReader_dealloc(ClusterFileReader *self) { } // read method -static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args, - PyObject *Py_UNUSED(kwds)) { +static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args) { const int ndim = 1; Py_ssize_t size = 0; diff --git a/src/cluster_reader.c b/src/cluster_reader.c index 1c2996b..0dad64c 100644 --- a/src/cluster_reader.c +++ b/src/cluster_reader.c @@ -1,23 +1,24 @@ #include "cluster_reader.h" +#include int read_clusters(FILE *fp, int64_t n_clusters, Cluster *buf, int *n_left) { -#ifdef CR_VERBOSE - printf("Item size: %lu n_clusters: %lld, n_left: %d\n", sizeof(Cluster), - n_clusters, *n_left); -#endif - int iframe = 0, nph = *n_left; - size_t n_read = 0, nph_read = 0, nn = *n_left, nr = 0; - // n_left=n_clusters; + int iframe = 0; + int nph = *n_left; + + size_t nph_read = 0; + size_t nn = *n_left; + size_t nr = 0; // read photons left from previous frame if (nph) { - if (nph > n_clusters - nph_read) - nn = n_clusters - nph_read; - else + if (nph > n_clusters) { + // if we have more photons left in the frame then photons to read we + // read directly + nn = n_clusters; + } else { nn = nph; - // printf("* %d %d %d %d\n",iframe,nph,nn,n_left); + } nr += fread((void *)(buf + nph_read), sizeof(Cluster), nn, fp); - n_read += nr / sizeof(Cluster); nph_read += nn; *n_left = nph - nn; } @@ -30,10 +31,7 @@ int read_clusters(FILE *fp, int64_t n_clusters, Cluster *buf, int *n_left) { else nn = nph; - // printf("%d %d %d %d\n",iframe,nph,nr,n_left); nr += fread((void *)(buf + nph_read), sizeof(Cluster), nn, fp); - // printf("%d %d %d %d\n",iframe,nph,nr,n_left); - n_read += nr; nph_read += nn; *n_left = nph - nn; } @@ -41,10 +39,7 @@ int read_clusters(FILE *fp, int64_t n_clusters, Cluster *buf, int *n_left) { break; } } - // size_t n_read = fread(buf, sizeof(Cluster), n_clusters, fp); -#ifdef CR_VERBOSE - printf("Read: %zu items %zu left %d\n", nph_read, n_read, *n_left); -#endif + assert(nph_read <= n_clusters); // sanity check in debug mode return nph_read; } diff --git a/src/creader_module.c b/src/creader_module.c index 8339288..0a731b0 100644 --- a/src/creader_module.c +++ b/src/creader_module.c @@ -3,13 +3,12 @@ #include #include +#include "ClusterReader.h" #include "arr_desc.h" #include "data_types.h" -#include "ClusterReader.h" +#include "cluster_reader.h" - -static PyObject *clusterize(PyObject *self, PyObject *args, - PyObject *Py_UNUSED(kwds)) { +static PyObject *clusterize(PyObject *Py_UNUSED(self), PyObject *args) { // // Create an uninitialized numpy array // PyArray_Descr *dtypeIn = cluster_dt(); @@ -19,22 +18,16 @@ static PyObject *clusterize(PyObject *self, PyObject *args, if (!PyArg_ParseTuple(args, "O", &cl_obj)) return NULL; - if (cluster_dt==NULL){ - printf("BYE\n"); - exit(1); - } - // Create a numpy array from the passed object, if possible numpy will // use the underlying buffer, otherwise it will create a copy, for example // if data type is different or we pass in a list. The // NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory. // function steals a reference to the data type so no need to deallocate - PyObject *cl_array = PyArray_FromArray((PyArrayObject *)cl_obj, cluster_dt(), - NPY_ARRAY_C_CONTIGUOUS); + PyObject *cl_array = PyArray_FromArray( + (PyArrayObject *)cl_obj, cluster_dt(), NPY_ARRAY_C_CONTIGUOUS); if (cl_array == NULL) { - PyErr_SetString( - PyExc_TypeError, - "Could not convert first argument to numpy array."); + PyErr_SetString(PyExc_TypeError, + "Could not convert first argument to numpy array."); return NULL; } @@ -42,15 +35,14 @@ static PyObject *clusterize(PyObject *self, PyObject *args, npy_intp *dims = PyArray_SHAPE((PyArrayObject *)cl_array); Py_ssize_t size = dims[0]; - Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(cl_array))); - PyObject *cl_analysis = PyArray_SimpleNewFromDescr(ndim, dims, cluster_analysis_dt()); - PyArray_FILLWBYTE((PyArrayObject *)cl_analysis, 0); //zero initialization + PyObject *cl_analysis = + PyArray_SimpleNewFromDescr(ndim, dims, cluster_analysis_dt()); + PyArray_FILLWBYTE((PyArrayObject *)cl_analysis, 0); // zero initialization // // Get a pointer to the array memory ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)cl_analysis); - int nc = analyze_clusters(size, clusters, buf); if (nc != size) { PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!"); @@ -59,41 +51,37 @@ static PyObject *clusterize(PyObject *self, PyObject *args, return cl_analysis; } +static PyObject *get_cluster_dt(PyObject *Py_UNUSED(self), PyObject *args) { + if (!PyArg_ParseTuple(args, "")) + return NULL; + return (PyObject*)cluster_dt(); +} -static PyObject *get_cluster_dt(PyObject *self, PyObject *args, - PyObject *Py_UNUSED(kwds)) { - return cluster_dt(); - } - -//Module docstring, shown as a part of help(creader) +// Module docstring, shown as a part of help(creader) static char module_docstring[] = "C functions to read cluster files"; -//Module methods +// Module methods static PyMethodDef creader_methods[] = { - {"clusterize", clusterize, METH_VARARGS, - "Do some stuff"}, - {"cluster_dt", get_cluster_dt, METH_VARARGS, - "Do some stuff"}, - {NULL, NULL, 0, NULL} /* Sentinel */ + {"clusterize", clusterize, METH_VARARGS, "Do some stuff"}, + {"cluster_dt", get_cluster_dt, METH_VARARGS, "Do some stuff"}, + {NULL, NULL, 0, NULL} /* Sentinel */ }; - -//Module defenition +// Module defenition static struct PyModuleDef creader_def = { PyModuleDef_HEAD_INIT, "creader", module_docstring, -1, creader_methods, // m_methods - NULL, // m_slots - NULL, // m_traverse - NULL, // m_clear - NULL // m_free + NULL, // m_slots + NULL, // m_traverse + NULL, // m_clear + NULL // m_free }; -//Initialize module and add classes +// Initialize module and add classes PyMODINIT_FUNC PyInit_creader(void) { - PyObject *m = PyModule_Create(&creader_def); if (m == NULL) diff --git a/tests/test_ClusterReader.py b/tests/test_ClusterReader.py index 325b69d..261f01f 100644 --- a/tests/test_ClusterReader.py +++ b/tests/test_ClusterReader.py @@ -2,6 +2,7 @@ import pytest import os, sys from creader import ClusterFileReader from fixtures import data_path +import numpy as np def test_references_on_read(data_path): fname= (data_path/'beam_En700eV_-40deg_300V_10us_d0_f0_100.clust').as_posix() @@ -25,4 +26,49 @@ def test_resize_on_read(data_path): max_clusters = 10000000 #400MB initial allocation clusters = r.read(max_clusters) assert clusters.size == 481603 - assert sys.getrefcount(clusters) == 2 \ No newline at end of file + assert sys.getrefcount(clusters) == 2 + +def test_read_file_with_single_frame(data_path): + #File shoud contain one frame 135 with 97 clusters + fname= (data_path/'single_frame_97_clustrers.clust').as_posix() + r = ClusterFileReader(fname) + clusters = r.read(100) + assert clusters.size == 97 + for i, c in enumerate(clusters): + assert c['x'] == i+1 + assert c['y'] == i+200 + assert (c['data'] == np.arange(i*9, (i+1)*9, dtype = np.int32)).all() + +def test_read_file_with_single_frame_in_chunks(data_path): + #File shoud contain one frame 135 with 97 clusters + fname= (data_path/'single_frame_97_clustrers.clust').as_posix() + r = ClusterFileReader(fname) + # clusters = r.read(5) + total_clusters = 0 + while (clusters:=r.read(5)).size: + total_clusters += clusters.size + assert total_clusters == 97 + + +def test_read_file_with_37_frames(data_path): + #File shoud contain 37 frames with 5 clusters each + #Full spec in utils/write_test_data.py + fname= (data_path/'37frames_with_5_clusters.clust').as_posix() + r = ClusterFileReader(fname) + clusters = r.read(200) + assert clusters.size == 185 + for i, c in enumerate(clusters): + assert c['x'] == i%5+1 + assert c['y'] == i%5+1 + assert (c['data'] == np.arange(i%5, (i%5)+9, dtype = np.int32)).all() + +def test_read_file_with_37_frames_in_chunks(data_path): + #File shoud contain 37 frames with 5 clusters each + #Full spec in utils/write_test_data.py + fname= (data_path/'37frames_with_5_clusters.clust').as_posix() + r = ClusterFileReader(fname) + total_clusters = 0 + while (clusters:=r.read(7)).size: + total_clusters += clusters.size + assert total_clusters == 185 + \ No newline at end of file