From 5eeeaf681a31eaebc1ce063e9201f7a168bbed73 Mon Sep 17 00:00:00 2001
From: Erik Frojdh <erik.frojdh@gmail.com>
Date: Thu, 1 Jun 2023 17:53:24 +0200
Subject: [PATCH] reducing warnings, simplifying read

---
 setup.py                    |  2 +-
 src/ClusterReader.c         |  3 +-
 src/cluster_reader.c        | 33 ++++++++-----------
 src/creader_module.c        | 64 +++++++++++++++----------------------
 tests/test_ClusterReader.py | 48 +++++++++++++++++++++++++++-
 5 files changed, 89 insertions(+), 61 deletions(-)

diff --git a/setup.py b/setup.py
index 47d946e..83fd368 100644
--- a/setup.py
+++ b/setup.py
@@ -19,7 +19,7 @@ c_ext = setuptools.Extension("creader",
 c_ext.language = 'c'
 setuptools.setup(
     name= 'creader',
-    version = '2023.05.30',
+    version = '2023.6.1',
     description = 'Reading cluster files',
     ext_modules=[c_ext],
 )
diff --git a/src/ClusterReader.c b/src/ClusterReader.c
index db9be16..1fba613 100644
--- a/src/ClusterReader.c
+++ b/src/ClusterReader.c
@@ -48,8 +48,7 @@ static void ClusterFileReader_dealloc(ClusterFileReader *self) {
 }
 
 // read method
-static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args,
-                                        PyObject *Py_UNUSED(kwds)) {
+static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args) {
 
     const int ndim = 1;
     Py_ssize_t size = 0;
diff --git a/src/cluster_reader.c b/src/cluster_reader.c
index 1c2996b..0dad64c 100644
--- a/src/cluster_reader.c
+++ b/src/cluster_reader.c
@@ -1,23 +1,24 @@
 #include "cluster_reader.h"
+#include <assert.h>
 
 int read_clusters(FILE *fp, int64_t n_clusters, Cluster *buf, int *n_left) {
-#ifdef CR_VERBOSE
-    printf("Item size: %lu n_clusters: %lld, n_left: %d\n", sizeof(Cluster),
-           n_clusters, *n_left);
-#endif
-    int iframe = 0, nph = *n_left;
-    size_t n_read = 0, nph_read = 0, nn = *n_left, nr = 0;
-    // n_left=n_clusters;
+    int iframe = 0;
+    int nph = *n_left;
+
+    size_t nph_read = 0;
+    size_t nn = *n_left;
+    size_t nr = 0;
 
     // read photons left from previous frame
     if (nph) {
-        if (nph > n_clusters - nph_read)
-            nn = n_clusters - nph_read;
-        else
+        if (nph > n_clusters) {
+            // if we have more photons left in the frame then photons to read we
+            // read directly
+            nn = n_clusters;
+        } else {
             nn = nph;
-        // printf("* %d %d %d %d\n",iframe,nph,nn,n_left);
+        }
         nr += fread((void *)(buf + nph_read), sizeof(Cluster), nn, fp);
-        n_read += nr / sizeof(Cluster);
         nph_read += nn;
         *n_left = nph - nn;
     }
@@ -30,10 +31,7 @@ int read_clusters(FILE *fp, int64_t n_clusters, Cluster *buf, int *n_left) {
                 else
                     nn = nph;
 
-                // printf("%d %d %d %d\n",iframe,nph,nr,n_left);
                 nr += fread((void *)(buf + nph_read), sizeof(Cluster), nn, fp);
-                // printf("%d %d %d %d\n",iframe,nph,nr,n_left);
-                n_read += nr;
                 nph_read += nn;
                 *n_left = nph - nn;
             }
@@ -41,10 +39,7 @@ int read_clusters(FILE *fp, int64_t n_clusters, Cluster *buf, int *n_left) {
                 break;
         }
     }
-    // size_t n_read = fread(buf, sizeof(Cluster), n_clusters, fp);
-#ifdef CR_VERBOSE
-    printf("Read: %zu items %zu left %d\n", nph_read, n_read, *n_left);
-#endif
+    assert(nph_read <= n_clusters); // sanity check in debug mode
     return nph_read;
 }
 
diff --git a/src/creader_module.c b/src/creader_module.c
index 8339288..0a731b0 100644
--- a/src/creader_module.c
+++ b/src/creader_module.c
@@ -3,13 +3,12 @@
 #include <Python.h>
 #include <numpy/arrayobject.h>
 
+#include "ClusterReader.h"
 #include "arr_desc.h"
 #include "data_types.h"
-#include "ClusterReader.h"
+#include "cluster_reader.h"
 
-
-static PyObject *clusterize(PyObject *self, PyObject *args,
-                            PyObject *Py_UNUSED(kwds)) {
+static PyObject *clusterize(PyObject *Py_UNUSED(self), PyObject *args) {
 
     // // Create an uninitialized numpy array
     // PyArray_Descr *dtypeIn = cluster_dt();
@@ -19,22 +18,16 @@ static PyObject *clusterize(PyObject *self, PyObject *args,
     if (!PyArg_ParseTuple(args, "O", &cl_obj))
         return NULL;
 
-    if (cluster_dt==NULL){
-        printf("BYE\n");
-        exit(1);
-    }
-
     // Create a numpy array from the passed object, if possible numpy will
     // use the underlying buffer, otherwise it will create a copy, for example
     // if data type is different or we pass in a list. The
     // NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory.
     // function steals a reference to the data type so no need to deallocate
-    PyObject *cl_array = PyArray_FromArray((PyArrayObject *)cl_obj, cluster_dt(),
-                                          NPY_ARRAY_C_CONTIGUOUS);
+    PyObject *cl_array = PyArray_FromArray(
+        (PyArrayObject *)cl_obj, cluster_dt(), NPY_ARRAY_C_CONTIGUOUS);
     if (cl_array == NULL) {
-        PyErr_SetString(
-            PyExc_TypeError,
-            "Could not convert first argument to numpy array.");
+        PyErr_SetString(PyExc_TypeError,
+                        "Could not convert first argument to numpy array.");
         return NULL;
     }
 
@@ -42,15 +35,14 @@ static PyObject *clusterize(PyObject *self, PyObject *args,
     npy_intp *dims = PyArray_SHAPE((PyArrayObject *)cl_array);
     Py_ssize_t size = dims[0];
 
-
     Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(cl_array)));
-    PyObject *cl_analysis = PyArray_SimpleNewFromDescr(ndim, dims, cluster_analysis_dt());
-    PyArray_FILLWBYTE((PyArrayObject *)cl_analysis, 0); //zero initialization
+    PyObject *cl_analysis =
+        PyArray_SimpleNewFromDescr(ndim, dims, cluster_analysis_dt());
+    PyArray_FILLWBYTE((PyArrayObject *)cl_analysis, 0); // zero initialization
 
     // // Get a pointer to the array memory
     ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)cl_analysis);
 
-
     int nc = analyze_clusters(size, clusters, buf);
     if (nc != size) {
         PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!");
@@ -59,41 +51,37 @@ static PyObject *clusterize(PyObject *self, PyObject *args,
     return cl_analysis;
 }
 
+static PyObject *get_cluster_dt(PyObject *Py_UNUSED(self), PyObject *args) {
+    if (!PyArg_ParseTuple(args, ""))
+        return NULL;
+    return (PyObject*)cluster_dt();
+}
 
-static PyObject *get_cluster_dt(PyObject *self, PyObject *args,
-                            PyObject *Py_UNUSED(kwds)) {
-                                return cluster_dt();
-                            }
-
-//Module docstring, shown as a part of help(creader)
+// Module docstring, shown as a part of help(creader)
 static char module_docstring[] = "C functions to read cluster files";
 
-//Module methods
+// Module methods
 static PyMethodDef creader_methods[] = {
-    {"clusterize",  clusterize, METH_VARARGS,
-     "Do some stuff"},
-    {"cluster_dt",  get_cluster_dt, METH_VARARGS,
-     "Do some stuff"},
-    {NULL, NULL, 0, NULL}        /* Sentinel */
+    {"clusterize", clusterize, METH_VARARGS, "Do some stuff"},
+    {"cluster_dt", get_cluster_dt, METH_VARARGS, "Do some stuff"},
+    {NULL, NULL, 0, NULL} /* Sentinel */
 };
 
-
-//Module defenition
+// Module defenition
 static struct PyModuleDef creader_def = {
     PyModuleDef_HEAD_INIT,
     "creader",
     module_docstring,
     -1,
     creader_methods, // m_methods
-    NULL, // m_slots
-    NULL, // m_traverse
-    NULL, // m_clear
-    NULL  // m_free
+    NULL,            // m_slots
+    NULL,            // m_traverse
+    NULL,            // m_clear
+    NULL             // m_free
 };
 
-//Initialize module and add classes
+// Initialize module and add classes
 PyMODINIT_FUNC PyInit_creader(void) {
-    
 
     PyObject *m = PyModule_Create(&creader_def);
     if (m == NULL)
diff --git a/tests/test_ClusterReader.py b/tests/test_ClusterReader.py
index 325b69d..261f01f 100644
--- a/tests/test_ClusterReader.py
+++ b/tests/test_ClusterReader.py
@@ -2,6 +2,7 @@ import pytest
 import os, sys
 from creader import ClusterFileReader
 from fixtures import data_path
+import numpy as np
 
 def test_references_on_read(data_path):
     fname= (data_path/'beam_En700eV_-40deg_300V_10us_d0_f0_100.clust').as_posix()
@@ -25,4 +26,49 @@ def test_resize_on_read(data_path):
     max_clusters = 10000000 #400MB initial allocation
     clusters = r.read(max_clusters)
     assert clusters.size == 481603
-    assert sys.getrefcount(clusters) == 2
\ No newline at end of file
+    assert sys.getrefcount(clusters) == 2
+
+def test_read_file_with_single_frame(data_path):
+    #File shoud contain one frame 135 with 97 clusters
+    fname= (data_path/'single_frame_97_clustrers.clust').as_posix()
+    r = ClusterFileReader(fname)
+    clusters = r.read(100)
+    assert clusters.size == 97
+    for i, c in enumerate(clusters):
+        assert c['x'] == i+1
+        assert c['y'] == i+200
+        assert (c['data'] == np.arange(i*9, (i+1)*9, dtype = np.int32)).all()
+
+def test_read_file_with_single_frame_in_chunks(data_path):
+    #File shoud contain one frame 135 with 97 clusters
+    fname= (data_path/'single_frame_97_clustrers.clust').as_posix()
+    r = ClusterFileReader(fname)
+    # clusters = r.read(5)
+    total_clusters = 0
+    while (clusters:=r.read(5)).size:
+        total_clusters += clusters.size
+    assert total_clusters == 97
+
+
+def test_read_file_with_37_frames(data_path):
+    #File shoud contain 37 frames with 5 clusters each
+    #Full spec in utils/write_test_data.py
+    fname= (data_path/'37frames_with_5_clusters.clust').as_posix()
+    r = ClusterFileReader(fname)
+    clusters = r.read(200)
+    assert clusters.size == 185
+    for i, c in enumerate(clusters):
+        assert c['x'] == i%5+1
+        assert c['y'] == i%5+1
+        assert (c['data'] == np.arange(i%5, (i%5)+9, dtype = np.int32)).all()
+
+def test_read_file_with_37_frames_in_chunks(data_path):
+    #File shoud contain 37 frames with 5 clusters each
+    #Full spec in utils/write_test_data.py
+    fname= (data_path/'37frames_with_5_clusters.clust').as_posix()
+    r = ClusterFileReader(fname)
+    total_clusters = 0
+    while (clusters:=r.read(7)).size:
+        total_clusters += clusters.size
+    assert total_clusters == 185
+    
\ No newline at end of file