reducing warnings, simplifying read

2023-06-01 17:53:24 +02:00 · 2023-06-01 17:53:24 +02:00 · 5eeeaf681a
commit 5eeeaf681a
parent 9b2f8a2eb9
5 changed files with 89 additions and 61 deletions
--- a/setup.py
+++ b/setup.py
@ -19,7 +19,7 @@ c_ext = setuptools.Extension("creader",
 c_ext.language = 'c'
 setuptools.setup(
    name= 'creader',
-    version = '2023.05.30',
+    version = '2023.6.1',
    description = 'Reading cluster files',
    ext_modules=[c_ext],
 )
--- a/src/ClusterReader.c
+++ b/src/ClusterReader.c
@ -48,8 +48,7 @@ static void ClusterFileReader_dealloc(ClusterFileReader *self) {
 }

 // read method
-static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args,
-                                        PyObject *Py_UNUSED(kwds)) {
+static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args) {

    const int ndim = 1;
    Py_ssize_t size = 0;
--- a/src/cluster_reader.c
+++ b/src/cluster_reader.c
@ -1,23 +1,24 @@
 #include "cluster_reader.h"
+#include <assert.h>

 int read_clusters(FILE *fp, int64_t n_clusters, Cluster *buf, int *n_left) {
-#ifdef CR_VERBOSE
-    printf("Item size: %lu n_clusters: %lld, n_left: %d\n", sizeof(Cluster),
-           n_clusters, *n_left);
-#endif
-    int iframe = 0, nph = *n_left;
-    size_t n_read = 0, nph_read = 0, nn = *n_left, nr = 0;
-    // n_left=n_clusters;
+    int iframe = 0;
+    int nph = *n_left;
+
+    size_t nph_read = 0;
+    size_t nn = *n_left;
+    size_t nr = 0;

    // read photons left from previous frame
    if (nph) {
-        if (nph > n_clusters - nph_read)
-            nn = n_clusters - nph_read;
-        else
+        if (nph > n_clusters) {
+            // if we have more photons left in the frame then photons to read we
+            // read directly
+            nn = n_clusters;
+        } else {
            nn = nph;
-        // printf("* %d %d %d %d\n",iframe,nph,nn,n_left);
+        }
        nr += fread((void *)(buf + nph_read), sizeof(Cluster), nn, fp);
-        n_read += nr / sizeof(Cluster);
        nph_read += nn;
        *n_left = nph - nn;
    }
@ -30,10 +31,7 @@ int read_clusters(FILE *fp, int64_t n_clusters, Cluster *buf, int *n_left) {
                else
                    nn = nph;

-                // printf("%d %d %d %d\n",iframe,nph,nr,n_left);
                nr += fread((void *)(buf + nph_read), sizeof(Cluster), nn, fp);
-                // printf("%d %d %d %d\n",iframe,nph,nr,n_left);
-                n_read += nr;
                nph_read += nn;
                *n_left = nph - nn;
            }
@ -41,10 +39,7 @@ int read_clusters(FILE *fp, int64_t n_clusters, Cluster *buf, int *n_left) {
                break;
        }
    }
-    // size_t n_read = fread(buf, sizeof(Cluster), n_clusters, fp);
-#ifdef CR_VERBOSE
-    printf("Read: %zu items %zu left %d\n", nph_read, n_read, *n_left);
-#endif
+    assert(nph_read <= n_clusters); // sanity check in debug mode
    return nph_read;
 }

--- a/src/creader_module.c
+++ b/src/creader_module.c
@ -3,13 +3,12 @@
 #include <Python.h>
 #include <numpy/arrayobject.h>

+#include "ClusterReader.h"
 #include "arr_desc.h"
 #include "data_types.h"
-#include "ClusterReader.h"
+#include "cluster_reader.h"

-
-static PyObject *clusterize(PyObject *self, PyObject *args,
-                            PyObject *Py_UNUSED(kwds)) {
+static PyObject *clusterize(PyObject *Py_UNUSED(self), PyObject *args) {

    // // Create an uninitialized numpy array
    // PyArray_Descr *dtypeIn = cluster_dt();
@ -19,22 +18,16 @@ static PyObject *clusterize(PyObject *self, PyObject *args,
    if (!PyArg_ParseTuple(args, "O", &cl_obj))
        return NULL;

-    if (cluster_dt==NULL){
-        printf("BYE\n");
-        exit(1);
-    }
-
    // Create a numpy array from the passed object, if possible numpy will
    // use the underlying buffer, otherwise it will create a copy, for example
    // if data type is different or we pass in a list. The
    // NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory.
    // function steals a reference to the data type so no need to deallocate
-    PyObject *cl_array = PyArray_FromArray((PyArrayObject *)cl_obj, cluster_dt(),
-                                          NPY_ARRAY_C_CONTIGUOUS);
+    PyObject *cl_array = PyArray_FromArray(
+        (PyArrayObject *)cl_obj, cluster_dt(), NPY_ARRAY_C_CONTIGUOUS);
    if (cl_array == NULL) {
-        PyErr_SetString(
-            PyExc_TypeError,
-            "Could not convert first argument to numpy array.");
+        PyErr_SetString(PyExc_TypeError,
+                        "Could not convert first argument to numpy array.");
        return NULL;
    }

@ -42,15 +35,14 @@ static PyObject *clusterize(PyObject *self, PyObject *args,
    npy_intp *dims = PyArray_SHAPE((PyArrayObject *)cl_array);
    Py_ssize_t size = dims[0];

-
    Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(cl_array)));
-    PyObject *cl_analysis = PyArray_SimpleNewFromDescr(ndim, dims, cluster_analysis_dt());
-    PyArray_FILLWBYTE((PyArrayObject *)cl_analysis, 0); //zero initialization
+    PyObject *cl_analysis =
+        PyArray_SimpleNewFromDescr(ndim, dims, cluster_analysis_dt());
+    PyArray_FILLWBYTE((PyArrayObject *)cl_analysis, 0); // zero initialization

    // // Get a pointer to the array memory
    ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)cl_analysis);

-
    int nc = analyze_clusters(size, clusters, buf);
    if (nc != size) {
        PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!");
@ -59,41 +51,37 @@ static PyObject *clusterize(PyObject *self, PyObject *args,
    return cl_analysis;
 }

+static PyObject *get_cluster_dt(PyObject *Py_UNUSED(self), PyObject *args) {
+    if (!PyArg_ParseTuple(args, ""))
+        return NULL;
+    return (PyObject*)cluster_dt();
+}

-static PyObject *get_cluster_dt(PyObject *self, PyObject *args,
-                            PyObject *Py_UNUSED(kwds)) {
-                                return cluster_dt();
-                            }
-
-//Module docstring, shown as a part of help(creader)
+// Module docstring, shown as a part of help(creader)
 static char module_docstring[] = "C functions to read cluster files";

-//Module methods
+// Module methods
 static PyMethodDef creader_methods[] = {
-    {"clusterize",  clusterize, METH_VARARGS,
-     "Do some stuff"},
-    {"cluster_dt",  get_cluster_dt, METH_VARARGS,
-     "Do some stuff"},
-    {NULL, NULL, 0, NULL}        /* Sentinel */
+    {"clusterize", clusterize, METH_VARARGS, "Do some stuff"},
+    {"cluster_dt", get_cluster_dt, METH_VARARGS, "Do some stuff"},
+    {NULL, NULL, 0, NULL} /* Sentinel */
 };

-
-//Module defenition
+// Module defenition
 static struct PyModuleDef creader_def = {
    PyModuleDef_HEAD_INIT,
    "creader",
    module_docstring,
    -1,
    creader_methods, // m_methods
-    NULL, // m_slots
-    NULL, // m_traverse
-    NULL, // m_clear
-    NULL  // m_free
+    NULL,            // m_slots
+    NULL,            // m_traverse
+    NULL,            // m_clear
+    NULL             // m_free
 };

-//Initialize module and add classes
+// Initialize module and add classes
 PyMODINIT_FUNC PyInit_creader(void) {
-    

    PyObject *m = PyModule_Create(&creader_def);
    if (m == NULL)
--- a/tests/test_ClusterReader.py
+++ b/tests/test_ClusterReader.py
@ -2,6 +2,7 @@ import pytest
 import os, sys
 from creader import ClusterFileReader
 from fixtures import data_path
+import numpy as np

 def test_references_on_read(data_path):
    fname= (data_path/'beam_En700eV_-40deg_300V_10us_d0_f0_100.clust').as_posix()
@ -25,4 +26,49 @@ def test_resize_on_read(data_path):
    max_clusters = 10000000 #400MB initial allocation
    clusters = r.read(max_clusters)
    assert clusters.size == 481603
-    assert sys.getrefcount(clusters) == 2
+    assert sys.getrefcount(clusters) == 2
+
+def test_read_file_with_single_frame(data_path):
+    #File shoud contain one frame 135 with 97 clusters
+    fname= (data_path/'single_frame_97_clustrers.clust').as_posix()
+    r = ClusterFileReader(fname)
+    clusters = r.read(100)
+    assert clusters.size == 97
+    for i, c in enumerate(clusters):
+        assert c['x'] == i+1
+        assert c['y'] == i+200
+        assert (c['data'] == np.arange(i*9, (i+1)*9, dtype = np.int32)).all()
+
+def test_read_file_with_single_frame_in_chunks(data_path):
+    #File shoud contain one frame 135 with 97 clusters
+    fname= (data_path/'single_frame_97_clustrers.clust').as_posix()
+    r = ClusterFileReader(fname)
+    # clusters = r.read(5)
+    total_clusters = 0
+    while (clusters:=r.read(5)).size:
+        total_clusters += clusters.size
+    assert total_clusters == 97
+
+
+def test_read_file_with_37_frames(data_path):
+    #File shoud contain 37 frames with 5 clusters each
+    #Full spec in utils/write_test_data.py
+    fname= (data_path/'37frames_with_5_clusters.clust').as_posix()
+    r = ClusterFileReader(fname)
+    clusters = r.read(200)
+    assert clusters.size == 185
+    for i, c in enumerate(clusters):
+        assert c['x'] == i%5+1
+        assert c['y'] == i%5+1
+        assert (c['data'] == np.arange(i%5, (i%5)+9, dtype = np.int32)).all()
+
+def test_read_file_with_37_frames_in_chunks(data_path):
+    #File shoud contain 37 frames with 5 clusters each
+    #Full spec in utils/write_test_data.py
+    fname= (data_path/'37frames_with_5_clusters.clust').as_posix()
+    r = ClusterFileReader(fname)
+    total_clusters = 0
+    while (clusters:=r.read(7)).size:
+        total_clusters += clusters.size
+    assert total_clusters == 185
+