From 4efbb9a9145430ac62b9982d97edee553c5cb250 Mon Sep 17 00:00:00 2001
From: Erik Frojdh <erik.frojdh@gmail.com>
Date: Wed, 31 May 2023 13:31:35 +0200
Subject: [PATCH] restructure data types

---
 README.md                   |   1 -
 setup.py                    |   9 ++-
 src/ClusterReader.c         | 145 +++++++++++++++++-------------------
 src/ClusterReader.h         |   4 +-
 src/arr_desc.c              |  25 +++++++
 src/arr_desc.h              |   8 ++
 src/creader_module.c        |  79 +++++++++++++++++++-
 test.py                     |  10 +--
 tests/fixtures.py           |  11 +++
 tests/test_ClusterReader.py |  11 +++
 tests/test_functions.py     |  12 +++
 11 files changed, 225 insertions(+), 90 deletions(-)
 create mode 100644 src/arr_desc.c
 create mode 100644 src/arr_desc.h
 create mode 100644 tests/fixtures.py
 create mode 100644 tests/test_ClusterReader.py
 create mode 100644 tests/test_functions.py

diff --git a/README.md b/README.md
index 0886014..8f4f6d4 100644
--- a/README.md
+++ b/README.md
@@ -33,4 +33,3 @@ conda develop install .
 #or with pip
 pip install --editable .
 ```
-
diff --git a/setup.py b/setup.py
index aebd496..47d946e 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,12 @@ import setuptools
 import numpy as np
 
 c_ext = setuptools.Extension("creader",
-                    sources = ["src/creader_module.c", "src/cluster_reader.c", "src/ClusterReader.c"],
+                    sources = [
+                        "src/creader_module.c", 
+                        "src/cluster_reader.c", 
+                        "src/ClusterReader.c",
+                        "src/arr_desc.c"
+                        ],
                     include_dirs=[
                             np.get_include(),"src/"
                             ],
@@ -14,7 +19,7 @@ c_ext = setuptools.Extension("creader",
 c_ext.language = 'c'
 setuptools.setup(
     name= 'creader',
-    version = '0.1',
+    version = '2023.05.30',
     description = 'Reading cluster files',
     ext_modules=[c_ext],
 )
diff --git a/src/ClusterReader.c b/src/ClusterReader.c
index e11674f..8ecb336 100644
--- a/src/ClusterReader.c
+++ b/src/ClusterReader.c
@@ -1,38 +1,27 @@
 #include "ClusterReader.h"
 #include "cluster_reader.h"
 #include "data_types.h"
+#include "arr_desc.h"
+
 
-#include <numpy/arrayobject.h>
 typedef struct {
     PyObject_HEAD 
     FILE *fp;
     int n_left;
 } ClusterFileReader;
 
-// Create a custom numpy data type that should reflect
-// our cluster data type.
-// TODO! Update with the actual cluster data type
-static PyArray_Descr *cluster_dt() {
-    PyObject *dtype_dict;
-    PyArray_Descr *dtype;
-    dtype_dict = Py_BuildValue("[(s, s),(s, s),(s, s, (i))]", "x", "u2", "y",
-                               "u2", "data", "i4", 9);
 
-    PyArray_DescrConverter(dtype_dict, &dtype);
-    Py_DECREF(dtype_dict);
-    return dtype;
-}
 
-static PyArray_Descr *cluster_analysis_dt() {
-    PyObject *dtype_dict;
-    PyArray_Descr *dtype;
-    dtype_dict = Py_BuildValue("[(s, s),(s, s),(s, s)]", "tot3", "i4", "tot2",
-                               "i4", "corner", "u4");
+// static PyArray_Descr *cluster_analysis_dt() {
+//     PyObject *dtype_dict;
+//     PyArray_Descr *dtype;
+//     dtype_dict = Py_BuildValue("[(s, s),(s, s),(s, s)]", "tot3", "i4", "tot2",
+//                                "i4", "corner", "u4");
 
-    PyArray_DescrConverter(dtype_dict, &dtype);
-    Py_DECREF(dtype_dict);
-    return dtype;
-}
+//     PyArray_DescrConverter(dtype_dict, &dtype);
+//     Py_DECREF(dtype_dict);
+//     return dtype;
+// }
 
 
 
@@ -84,12 +73,11 @@ static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args,
     npy_intp dims[] = {size};
 
     // Create an uninitialized numpy array
-    PyArray_Descr *dtype = cluster_dt();
-    // PyObject *PyArray_SimpleNewFromDescr(int nd, npy_int const *dims,
-    // PyArray_Descr *descr)
-    PyObject *clusters = PyArray_SimpleNewFromDescr(ndim, dims, dtype);
+    PyObject *clusters = PyArray_SimpleNewFromDescr(ndim, dims, cluster_dt());
+    
+    // Fill with zeros
     PyArray_FILLWBYTE((PyArrayObject *)clusters,
-                      0); // zero initialization can be removed later
+                      0);
 
     // Get a pointer to the array memory
     void *buf = PyArray_DATA((PyArrayObject *)clusters);
@@ -117,77 +105,77 @@ static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args,
     return clusters;
 }
 
-// read method
-static PyObject *ClusterFileReader_clusterize(ClusterFileReader *self,
-                                              PyObject *args,
-                                              PyObject *Py_UNUSED(kwds)) {
+// // read method
+// static PyObject *ClusterFileReader_clusterize(ClusterFileReader *self,
+//                                               PyObject *args,
+//                                               PyObject *Py_UNUSED(kwds)) {
 
-    // Create an uninitialized numpy array
-    PyArray_Descr *dtypeIn = cluster_dt();
-    PyArray_Descr *dtypeOut = cluster_analysis_dt();
+//     // Create an uninitialized numpy array
+//     PyArray_Descr *dtypeIn = cluster_dt();
+//     PyArray_Descr *dtypeOut = cluster_analysis_dt();
 
-    PyObject *c_obj;
-    if (!PyArg_ParseTuple(args, "O", &c_obj))
-        return NULL;
+    // PyObject *cl_obj;
+    // if (!PyArg_ParseTuple(args, "O", &cl_obj))
+    //     return NULL;
 
-    // Create two numpy arrays from the passed objects, if possible numpy will
-    // use the underlying buffer, otherwise it will create a copy, for example
-    // if data type is different or we pass in a list. The
-    // NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory.
-    PyObject *c_array = PyArray_FromArray((PyArrayObject *)c_obj, dtypeIn,
-                                          NPY_ARRAY_C_CONTIGUOUS);
+//     // Create two numpy arrays from the passed objects, if possible numpy will
+//     // use the underlying buffer, otherwise it will create a copy, for example
+//     // if data type is different or we pass in a list. The
+// //     // NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory.
+//     PyObject *cl_array = PyArray_FromArray((PyArrayObject *)cl_obj, cluster_dt,
+//                                           NPY_ARRAY_C_CONTIGUOUS);
 
-    // If parsing of a or b fails we throw an exception in Python
-    if (c_array == NULL) {
-        PyErr_SetString(
-            PyExc_TypeError,
-            "Could not convert one of the arguments to a numpy array.");
-        return NULL;
-    }
+//     // If parsing of a or b fails we throw an exception in Python
+//     if (cl_array == NULL) {
+//         PyErr_SetString(
+//             PyExc_TypeError,
+//             "Could not convert one of the arguments to a numpy array.");
+//         return NULL;
+//     }
 
-    const int ndim = PyArray_NDIM((PyArrayObject *)c_array);
+//     const int ndim = PyArray_NDIM((PyArrayObject *)c_array);
 
-    npy_intp *dims = PyArray_SHAPE((PyArrayObject *)c_array);
+//     npy_intp *dims = PyArray_SHAPE((PyArrayObject *)c_array);
 
-    Py_ssize_t size = dims[0];
-    // printf("%d size %d %d\n",ndim,size,sizeof(ClusterAnalysis));
-    // dims[0]=size;
+//     Py_ssize_t size = dims[0];
+//     // printf("%d size %d %d\n",ndim,size,sizeof(ClusterAnalysis));
+//     // dims[0]=size;
 
-    // Cluster *clusters = reinterpret_cast<Cluster *>(
-    // PyArray_DATA(reinterpret_cast<PyArrayObject *>(c_array)));
+//     // Cluster *clusters = reinterpret_cast<Cluster *>(
+//     // PyArray_DATA(reinterpret_cast<PyArrayObject *>(c_array)));
 
-    Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(c_array)));
+//     Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(c_array)));
 
-    // PyObject *PyArray_SimpleNewFromDescr(int nd, npy_int const *dims,
-    // PyArray_Descr *descr)
-    PyObject *clustersA = PyArray_SimpleNewFromDescr(ndim, dims, dtypeOut);
-    // PyArray_FILLWBYTE((PyArrayObject *)clustersA, 0); //zero initialization
-    // can be removed later
-    npy_intp *strides = PyArray_STRIDES(((PyArrayObject *)(clustersA)));
-    //  printf("strides %d %d\n", strides[0],sizeof(ClusterAnalysis));
+//     // PyObject *PyArray_SimpleNewFromDescr(int nd, npy_int const *dims,
+//     // PyArray_Descr *descr)
+//     PyObject *clustersA = PyArray_SimpleNewFromDescr(ndim, dims, dtypeOut);
+//     // PyArray_FILLWBYTE((PyArrayObject *)clustersA, 0); //zero initialization
+//     // can be removed later
+//     npy_intp *strides = PyArray_STRIDES(((PyArrayObject *)(clustersA)));
+//     //  printf("strides %d %d\n", strides[0],sizeof(ClusterAnalysis));
 
-    // Get a pointer to the array memory
-    ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)clustersA);
+//     // Get a pointer to the array memory
+//     ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)clustersA);
 
-    // Call the standalone C code to read clusters from file
-    // Here goes the looping, removing frame numbers etc.
-    int nc = analyze_clusters(size, clusters, buf);
-    // printf("%d %d\n",nc,size);
+//     // Call the standalone C code to read clusters from file
+//     // Here goes the looping, removing frame numbers etc.
+//     int nc = analyze_clusters(size, clusters, buf);
+//     // printf("%d %d\n",nc,size);
 
-    if (nc != size) {
+//     if (nc != size) {
 
-        PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!");
-    }
+//         PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!");
+//     }
 
-    return clustersA;
-}
+//     return clustersA;
+// }
 
 // List all methods in our ClusterFileReader class
 static PyMethodDef ClusterFileReader_methods[] = {
     {"read", (PyCFunction)ClusterFileReader_read, METH_VARARGS,
      "Read clusters"},
-    {"clusterize", (PyCFunction)ClusterFileReader_clusterize, METH_VARARGS,
-     "Analyze clusters"},
+    // {"clusterize", (PyCFunction)ClusterFileReader_clusterize, METH_VARARGS,
+    //  "Analyze clusters"},
     {NULL, NULL, 0, NULL} /* Sentinel */
 };
 
@@ -205,6 +193,7 @@ static PyTypeObject ClusterFileReaderType = {
 };
 
 void init_ClusterFileReader(PyObject *m){
+
     import_array();
     if (PyType_Ready(&ClusterFileReaderType) < 0)
         return NULL;
diff --git a/src/ClusterReader.h b/src/ClusterReader.h
index 90b66e9..54f3a91 100644
--- a/src/ClusterReader.h
+++ b/src/ClusterReader.h
@@ -1,5 +1,7 @@
 # pragma once
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#define PY_SSIZE_T_CLEAN
 #include <Python.h>
-
+#include <numpy/arrayobject.h>
 
 void init_ClusterFileReader(PyObject *m);
\ No newline at end of file
diff --git a/src/arr_desc.c b/src/arr_desc.c
new file mode 100644
index 0000000..ee45234
--- /dev/null
+++ b/src/arr_desc.c
@@ -0,0 +1,25 @@
+#include "arr_desc.h"
+
+PyArray_Descr* cluster_dt(){
+    import_array();
+    PyObject *dict;
+    PyArray_Descr *dtype = NULL;
+    dict = Py_BuildValue("[(s, s),(s, s),(s, s, (i))]", "x", "u2", "y",
+                            "u2", "data", "i4", 9);
+    // return dict;
+    PyArray_DescrConverter(dict, &dtype);
+    Py_DECREF(dict);
+    return dtype;
+}
+
+PyArray_Descr *cluster_analysis_dt() {
+    import_array(); //TODO! Correct placement for this?
+    PyObject *dict;
+    PyArray_Descr *dtype;
+    dict = Py_BuildValue("[(s, s),(s, s),(s, s)]", "tot3", "i4", "tot2",
+                               "i4", "corner", "u4");
+
+    PyArray_DescrConverter(dict, &dtype);
+    Py_DECREF(dict);
+    return dtype;
+}
diff --git a/src/arr_desc.h b/src/arr_desc.h
new file mode 100644
index 0000000..3a4679d
--- /dev/null
+++ b/src/arr_desc.h
@@ -0,0 +1,8 @@
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#define PY_SSIZE_T_CLEAN
+#include <Python.h>
+#include <numpy/arrayobject.h>
+
+PyArray_Descr* cluster_dt();
+
+PyArray_Descr* cluster_analysis_dt();
\ No newline at end of file
diff --git a/src/creader_module.c b/src/creader_module.c
index 6fe04df..d4134ff 100644
--- a/src/creader_module.c
+++ b/src/creader_module.c
@@ -1,19 +1,89 @@
 #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
 #define PY_SSIZE_T_CLEAN
-
-#include "ClusterReader.h"
 #include <Python.h>
+#include <numpy/arrayobject.h>
+
+#include "arr_desc.h"
+#include "data_types.h"
+#include "ClusterReader.h"
+
+
+static PyObject *clusterize(PyObject *self, PyObject *args,
+                            PyObject *Py_UNUSED(kwds)) {
+
+    // // Create an uninitialized numpy array
+    // PyArray_Descr *dtypeIn = cluster_dt();
+    // PyArray_Descr *dtypeOut = cluster_analysis_dt();
+
+    PyObject *cl_obj;
+    if (!PyArg_ParseTuple(args, "O", &cl_obj))
+        return NULL;
+
+    if (cluster_dt==NULL){
+        printf("BYE\n");
+        exit(1);
+    }
+
+    // Create a numpy array from the passed object, if possible numpy will
+    // use the underlying buffer, otherwise it will create a copy, for example
+    // if data type is different or we pass in a list. The
+    // NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory.
+    // function steals a reference to the data type so no need to deallocate
+    PyObject *cl_array = PyArray_FromArray((PyArrayObject *)cl_obj, cluster_dt(),
+                                          NPY_ARRAY_C_CONTIGUOUS);
+    if (cl_array == NULL) {
+        PyErr_SetString(
+            PyExc_TypeError,
+            "Could not convert first argument to numpy array.");
+        return NULL;
+    }
+
+    const int ndim = PyArray_NDIM((PyArrayObject *)cl_array);
+    npy_intp *dims = PyArray_SHAPE((PyArrayObject *)cl_array);
+    Py_ssize_t size = dims[0];
+
+
+    Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(cl_array)));
+    PyObject *cl_analysis = PyArray_SimpleNewFromDescr(ndim, dims, cluster_analysis_dt());
+    PyArray_FILLWBYTE((PyArrayObject *)cl_analysis, 0); //zero initialization
+
+    // // Get a pointer to the array memory
+    ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)cl_analysis);
+
+
+    int nc = analyze_clusters(size, clusters, buf);
+    if (nc != size) {
+        PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!");
+    }
+    return cl_analysis;
+}
+
+
+static PyObject *get_cluster_dt(PyObject *self, PyObject *args,
+                            PyObject *Py_UNUSED(kwds)) {
+                                return cluster_dt();
+                            }
 
 //Module docstring, shown as a part of help(creader)
 static char module_docstring[] = "C functions to read cluster files";
 
+//Module methods
+static PyMethodDef creader_methods[] = {
+    {"clusterize",  clusterize, METH_VARARGS,
+     "Do some stuff"},
+    {"cluster_dt",  get_cluster_dt, METH_VARARGS,
+     "Do some stuff"},
+    {NULL, NULL, 0, NULL}        /* Sentinel */
+};
+
+
 //Module defenition
 static struct PyModuleDef creader_def = {
     PyModuleDef_HEAD_INIT,
     "creader",
     module_docstring,
     -1,
-    NULL, // m_methods
+    creader_methods, // m_methods
     NULL, // m_slots
     NULL, // m_traverse
     NULL, // m_clear
@@ -22,9 +92,12 @@ static struct PyModuleDef creader_def = {
 
 //Initialize module and add classes
 PyMODINIT_FUNC PyInit_creader(void) {
+    
+
     PyObject *m = PyModule_Create(&creader_def);
     if (m == NULL)
         return NULL;
+    import_array();
 
     init_ClusterFileReader(m);
     return m;
diff --git a/test.py b/test.py
index 88bfc6b..af8cd32 100644
--- a/test.py
+++ b/test.py
@@ -1,4 +1,4 @@
-from creader import ClusterFileReader
+from creader import ClusterFileReader, clusterize
 import numpy as np
 
 # maxph=100000000
@@ -9,8 +9,8 @@ fpath = Path("/mnt/sls_det_storage/moench_data/Moench_LGAD_SIM_Nov22/moenchLGAD2
 # r = ClusterFileReader()
 
 r = ClusterFileReader(fpath.as_posix())
+a=r.read(maxph)
+# b=clusterize(a)
 
-# a=r.read(maxph)
-# b=r.clusterize(a)
-#v=int(maxph/100)
-#print(a[::v])
+# #v=int(maxph/100)
+# #print(a[::v])
diff --git a/tests/fixtures.py b/tests/fixtures.py
new file mode 100644
index 0000000..fe58c9d
--- /dev/null
+++ b/tests/fixtures.py
@@ -0,0 +1,11 @@
+import pytest
+import os
+from pathlib import Path
+
+@pytest.fixture
+def data_path():
+    try:
+        p = os.environ['CREADER_TEST_DATA']
+    except KeyError:
+        raise KeyError("CREADER_TEST_DATA needs to be set before running tests")
+    return Path(p)
\ No newline at end of file
diff --git a/tests/test_ClusterReader.py b/tests/test_ClusterReader.py
new file mode 100644
index 0000000..e0f9d48
--- /dev/null
+++ b/tests/test_ClusterReader.py
@@ -0,0 +1,11 @@
+import pytest
+import os, sys
+from creader import ClusterFileReader
+from fixtures import data_path
+
+def test_references_on_read(data_path):
+    fname= (data_path/'beam_En700eV_-40deg_300V_10us_d0_f0_100.clust').as_posix()
+    r = ClusterFileReader(fname)
+    clusters = r.read(10)
+    assert sys.getrefcount(clusters) == 2 #Over counts by one due to call by reference
+
diff --git a/tests/test_functions.py b/tests/test_functions.py
new file mode 100644
index 0000000..32da387
--- /dev/null
+++ b/tests/test_functions.py
@@ -0,0 +1,12 @@
+import pytest
+from fixtures import data_path
+from creader import ClusterFileReader, clusterize
+import sys
+
+def test_references_on_clusterize(data_path):
+    fname= (data_path/'beam_En700eV_-40deg_300V_10us_d0_f0_100.clust').as_posix()
+    r = ClusterFileReader(fname)
+    clusters = r.read(10)
+    result = clusterize(clusters)
+    assert sys.getrefcount(clusters) == 2 #Over counts by one due to call by reference
+    assert sys.getrefcount(result) == 2 
\ No newline at end of file