From 4efbb9a9145430ac62b9982d97edee553c5cb250 Mon Sep 17 00:00:00 2001 From: Erik Frojdh Date: Wed, 31 May 2023 13:31:35 +0200 Subject: [PATCH] restructure data types --- README.md | 1 - setup.py | 9 ++- src/ClusterReader.c | 145 +++++++++++++++++------------------- src/ClusterReader.h | 4 +- src/arr_desc.c | 25 +++++++ src/arr_desc.h | 8 ++ src/creader_module.c | 79 +++++++++++++++++++- test.py | 10 +-- tests/fixtures.py | 11 +++ tests/test_ClusterReader.py | 11 +++ tests/test_functions.py | 12 +++ 11 files changed, 225 insertions(+), 90 deletions(-) create mode 100644 src/arr_desc.c create mode 100644 src/arr_desc.h create mode 100644 tests/fixtures.py create mode 100644 tests/test_ClusterReader.py create mode 100644 tests/test_functions.py diff --git a/README.md b/README.md index 0886014..8f4f6d4 100644 --- a/README.md +++ b/README.md @@ -33,4 +33,3 @@ conda develop install . #or with pip pip install --editable . ``` - diff --git a/setup.py b/setup.py index aebd496..47d946e 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,12 @@ import setuptools import numpy as np c_ext = setuptools.Extension("creader", - sources = ["src/creader_module.c", "src/cluster_reader.c", "src/ClusterReader.c"], + sources = [ + "src/creader_module.c", + "src/cluster_reader.c", + "src/ClusterReader.c", + "src/arr_desc.c" + ], include_dirs=[ np.get_include(),"src/" ], @@ -14,7 +19,7 @@ c_ext = setuptools.Extension("creader", c_ext.language = 'c' setuptools.setup( name= 'creader', - version = '0.1', + version = '2023.05.30', description = 'Reading cluster files', ext_modules=[c_ext], ) diff --git a/src/ClusterReader.c b/src/ClusterReader.c index e11674f..8ecb336 100644 --- a/src/ClusterReader.c +++ b/src/ClusterReader.c @@ -1,38 +1,27 @@ #include "ClusterReader.h" #include "cluster_reader.h" #include "data_types.h" +#include "arr_desc.h" + -#include typedef struct { PyObject_HEAD FILE *fp; int n_left; } ClusterFileReader; -// Create a custom numpy data type that should reflect -// our cluster data type. -// TODO! Update with the actual cluster data type -static PyArray_Descr *cluster_dt() { - PyObject *dtype_dict; - PyArray_Descr *dtype; - dtype_dict = Py_BuildValue("[(s, s),(s, s),(s, s, (i))]", "x", "u2", "y", - "u2", "data", "i4", 9); - PyArray_DescrConverter(dtype_dict, &dtype); - Py_DECREF(dtype_dict); - return dtype; -} -static PyArray_Descr *cluster_analysis_dt() { - PyObject *dtype_dict; - PyArray_Descr *dtype; - dtype_dict = Py_BuildValue("[(s, s),(s, s),(s, s)]", "tot3", "i4", "tot2", - "i4", "corner", "u4"); +// static PyArray_Descr *cluster_analysis_dt() { +// PyObject *dtype_dict; +// PyArray_Descr *dtype; +// dtype_dict = Py_BuildValue("[(s, s),(s, s),(s, s)]", "tot3", "i4", "tot2", +// "i4", "corner", "u4"); - PyArray_DescrConverter(dtype_dict, &dtype); - Py_DECREF(dtype_dict); - return dtype; -} +// PyArray_DescrConverter(dtype_dict, &dtype); +// Py_DECREF(dtype_dict); +// return dtype; +// } @@ -84,12 +73,11 @@ static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args, npy_intp dims[] = {size}; // Create an uninitialized numpy array - PyArray_Descr *dtype = cluster_dt(); - // PyObject *PyArray_SimpleNewFromDescr(int nd, npy_int const *dims, - // PyArray_Descr *descr) - PyObject *clusters = PyArray_SimpleNewFromDescr(ndim, dims, dtype); + PyObject *clusters = PyArray_SimpleNewFromDescr(ndim, dims, cluster_dt()); + + // Fill with zeros PyArray_FILLWBYTE((PyArrayObject *)clusters, - 0); // zero initialization can be removed later + 0); // Get a pointer to the array memory void *buf = PyArray_DATA((PyArrayObject *)clusters); @@ -117,77 +105,77 @@ static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args, return clusters; } -// read method -static PyObject *ClusterFileReader_clusterize(ClusterFileReader *self, - PyObject *args, - PyObject *Py_UNUSED(kwds)) { +// // read method +// static PyObject *ClusterFileReader_clusterize(ClusterFileReader *self, +// PyObject *args, +// PyObject *Py_UNUSED(kwds)) { - // Create an uninitialized numpy array - PyArray_Descr *dtypeIn = cluster_dt(); - PyArray_Descr *dtypeOut = cluster_analysis_dt(); +// // Create an uninitialized numpy array +// PyArray_Descr *dtypeIn = cluster_dt(); +// PyArray_Descr *dtypeOut = cluster_analysis_dt(); - PyObject *c_obj; - if (!PyArg_ParseTuple(args, "O", &c_obj)) - return NULL; + // PyObject *cl_obj; + // if (!PyArg_ParseTuple(args, "O", &cl_obj)) + // return NULL; - // Create two numpy arrays from the passed objects, if possible numpy will - // use the underlying buffer, otherwise it will create a copy, for example - // if data type is different or we pass in a list. The - // NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory. - PyObject *c_array = PyArray_FromArray((PyArrayObject *)c_obj, dtypeIn, - NPY_ARRAY_C_CONTIGUOUS); +// // Create two numpy arrays from the passed objects, if possible numpy will +// // use the underlying buffer, otherwise it will create a copy, for example +// // if data type is different or we pass in a list. The +// // // NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory. +// PyObject *cl_array = PyArray_FromArray((PyArrayObject *)cl_obj, cluster_dt, +// NPY_ARRAY_C_CONTIGUOUS); - // If parsing of a or b fails we throw an exception in Python - if (c_array == NULL) { - PyErr_SetString( - PyExc_TypeError, - "Could not convert one of the arguments to a numpy array."); - return NULL; - } +// // If parsing of a or b fails we throw an exception in Python +// if (cl_array == NULL) { +// PyErr_SetString( +// PyExc_TypeError, +// "Could not convert one of the arguments to a numpy array."); +// return NULL; +// } - const int ndim = PyArray_NDIM((PyArrayObject *)c_array); +// const int ndim = PyArray_NDIM((PyArrayObject *)c_array); - npy_intp *dims = PyArray_SHAPE((PyArrayObject *)c_array); +// npy_intp *dims = PyArray_SHAPE((PyArrayObject *)c_array); - Py_ssize_t size = dims[0]; - // printf("%d size %d %d\n",ndim,size,sizeof(ClusterAnalysis)); - // dims[0]=size; +// Py_ssize_t size = dims[0]; +// // printf("%d size %d %d\n",ndim,size,sizeof(ClusterAnalysis)); +// // dims[0]=size; - // Cluster *clusters = reinterpret_cast( - // PyArray_DATA(reinterpret_cast(c_array))); +// // Cluster *clusters = reinterpret_cast( +// // PyArray_DATA(reinterpret_cast(c_array))); - Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(c_array))); +// Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(c_array))); - // PyObject *PyArray_SimpleNewFromDescr(int nd, npy_int const *dims, - // PyArray_Descr *descr) - PyObject *clustersA = PyArray_SimpleNewFromDescr(ndim, dims, dtypeOut); - // PyArray_FILLWBYTE((PyArrayObject *)clustersA, 0); //zero initialization - // can be removed later - npy_intp *strides = PyArray_STRIDES(((PyArrayObject *)(clustersA))); - // printf("strides %d %d\n", strides[0],sizeof(ClusterAnalysis)); +// // PyObject *PyArray_SimpleNewFromDescr(int nd, npy_int const *dims, +// // PyArray_Descr *descr) +// PyObject *clustersA = PyArray_SimpleNewFromDescr(ndim, dims, dtypeOut); +// // PyArray_FILLWBYTE((PyArrayObject *)clustersA, 0); //zero initialization +// // can be removed later +// npy_intp *strides = PyArray_STRIDES(((PyArrayObject *)(clustersA))); +// // printf("strides %d %d\n", strides[0],sizeof(ClusterAnalysis)); - // Get a pointer to the array memory - ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)clustersA); +// // Get a pointer to the array memory +// ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)clustersA); - // Call the standalone C code to read clusters from file - // Here goes the looping, removing frame numbers etc. - int nc = analyze_clusters(size, clusters, buf); - // printf("%d %d\n",nc,size); +// // Call the standalone C code to read clusters from file +// // Here goes the looping, removing frame numbers etc. +// int nc = analyze_clusters(size, clusters, buf); +// // printf("%d %d\n",nc,size); - if (nc != size) { +// if (nc != size) { - PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!"); - } +// PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!"); +// } - return clustersA; -} +// return clustersA; +// } // List all methods in our ClusterFileReader class static PyMethodDef ClusterFileReader_methods[] = { {"read", (PyCFunction)ClusterFileReader_read, METH_VARARGS, "Read clusters"}, - {"clusterize", (PyCFunction)ClusterFileReader_clusterize, METH_VARARGS, - "Analyze clusters"}, + // {"clusterize", (PyCFunction)ClusterFileReader_clusterize, METH_VARARGS, + // "Analyze clusters"}, {NULL, NULL, 0, NULL} /* Sentinel */ }; @@ -205,6 +193,7 @@ static PyTypeObject ClusterFileReaderType = { }; void init_ClusterFileReader(PyObject *m){ + import_array(); if (PyType_Ready(&ClusterFileReaderType) < 0) return NULL; diff --git a/src/ClusterReader.h b/src/ClusterReader.h index 90b66e9..54f3a91 100644 --- a/src/ClusterReader.h +++ b/src/ClusterReader.h @@ -1,5 +1,7 @@ # pragma once +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#define PY_SSIZE_T_CLEAN #include - +#include void init_ClusterFileReader(PyObject *m); \ No newline at end of file diff --git a/src/arr_desc.c b/src/arr_desc.c new file mode 100644 index 0000000..ee45234 --- /dev/null +++ b/src/arr_desc.c @@ -0,0 +1,25 @@ +#include "arr_desc.h" + +PyArray_Descr* cluster_dt(){ + import_array(); + PyObject *dict; + PyArray_Descr *dtype = NULL; + dict = Py_BuildValue("[(s, s),(s, s),(s, s, (i))]", "x", "u2", "y", + "u2", "data", "i4", 9); + // return dict; + PyArray_DescrConverter(dict, &dtype); + Py_DECREF(dict); + return dtype; +} + +PyArray_Descr *cluster_analysis_dt() { + import_array(); //TODO! Correct placement for this? + PyObject *dict; + PyArray_Descr *dtype; + dict = Py_BuildValue("[(s, s),(s, s),(s, s)]", "tot3", "i4", "tot2", + "i4", "corner", "u4"); + + PyArray_DescrConverter(dict, &dtype); + Py_DECREF(dict); + return dtype; +} diff --git a/src/arr_desc.h b/src/arr_desc.h new file mode 100644 index 0000000..3a4679d --- /dev/null +++ b/src/arr_desc.h @@ -0,0 +1,8 @@ +#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION +#define PY_SSIZE_T_CLEAN +#include +#include + +PyArray_Descr* cluster_dt(); + +PyArray_Descr* cluster_analysis_dt(); \ No newline at end of file diff --git a/src/creader_module.c b/src/creader_module.c index 6fe04df..d4134ff 100644 --- a/src/creader_module.c +++ b/src/creader_module.c @@ -1,19 +1,89 @@ #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #define PY_SSIZE_T_CLEAN - -#include "ClusterReader.h" #include +#include + +#include "arr_desc.h" +#include "data_types.h" +#include "ClusterReader.h" + + +static PyObject *clusterize(PyObject *self, PyObject *args, + PyObject *Py_UNUSED(kwds)) { + + // // Create an uninitialized numpy array + // PyArray_Descr *dtypeIn = cluster_dt(); + // PyArray_Descr *dtypeOut = cluster_analysis_dt(); + + PyObject *cl_obj; + if (!PyArg_ParseTuple(args, "O", &cl_obj)) + return NULL; + + if (cluster_dt==NULL){ + printf("BYE\n"); + exit(1); + } + + // Create a numpy array from the passed object, if possible numpy will + // use the underlying buffer, otherwise it will create a copy, for example + // if data type is different or we pass in a list. The + // NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory. + // function steals a reference to the data type so no need to deallocate + PyObject *cl_array = PyArray_FromArray((PyArrayObject *)cl_obj, cluster_dt(), + NPY_ARRAY_C_CONTIGUOUS); + if (cl_array == NULL) { + PyErr_SetString( + PyExc_TypeError, + "Could not convert first argument to numpy array."); + return NULL; + } + + const int ndim = PyArray_NDIM((PyArrayObject *)cl_array); + npy_intp *dims = PyArray_SHAPE((PyArrayObject *)cl_array); + Py_ssize_t size = dims[0]; + + + Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(cl_array))); + PyObject *cl_analysis = PyArray_SimpleNewFromDescr(ndim, dims, cluster_analysis_dt()); + PyArray_FILLWBYTE((PyArrayObject *)cl_analysis, 0); //zero initialization + + // // Get a pointer to the array memory + ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)cl_analysis); + + + int nc = analyze_clusters(size, clusters, buf); + if (nc != size) { + PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!"); + } + return cl_analysis; +} + + +static PyObject *get_cluster_dt(PyObject *self, PyObject *args, + PyObject *Py_UNUSED(kwds)) { + return cluster_dt(); + } //Module docstring, shown as a part of help(creader) static char module_docstring[] = "C functions to read cluster files"; +//Module methods +static PyMethodDef creader_methods[] = { + {"clusterize", clusterize, METH_VARARGS, + "Do some stuff"}, + {"cluster_dt", get_cluster_dt, METH_VARARGS, + "Do some stuff"}, + {NULL, NULL, 0, NULL} /* Sentinel */ +}; + + //Module defenition static struct PyModuleDef creader_def = { PyModuleDef_HEAD_INIT, "creader", module_docstring, -1, - NULL, // m_methods + creader_methods, // m_methods NULL, // m_slots NULL, // m_traverse NULL, // m_clear @@ -22,9 +92,12 @@ static struct PyModuleDef creader_def = { //Initialize module and add classes PyMODINIT_FUNC PyInit_creader(void) { + + PyObject *m = PyModule_Create(&creader_def); if (m == NULL) return NULL; + import_array(); init_ClusterFileReader(m); return m; diff --git a/test.py b/test.py index 88bfc6b..af8cd32 100644 --- a/test.py +++ b/test.py @@ -1,4 +1,4 @@ -from creader import ClusterFileReader +from creader import ClusterFileReader, clusterize import numpy as np # maxph=100000000 @@ -9,8 +9,8 @@ fpath = Path("/mnt/sls_det_storage/moench_data/Moench_LGAD_SIM_Nov22/moenchLGAD2 # r = ClusterFileReader() r = ClusterFileReader(fpath.as_posix()) +a=r.read(maxph) +# b=clusterize(a) -# a=r.read(maxph) -# b=r.clusterize(a) -#v=int(maxph/100) -#print(a[::v]) +# #v=int(maxph/100) +# #print(a[::v]) diff --git a/tests/fixtures.py b/tests/fixtures.py new file mode 100644 index 0000000..fe58c9d --- /dev/null +++ b/tests/fixtures.py @@ -0,0 +1,11 @@ +import pytest +import os +from pathlib import Path + +@pytest.fixture +def data_path(): + try: + p = os.environ['CREADER_TEST_DATA'] + except KeyError: + raise KeyError("CREADER_TEST_DATA needs to be set before running tests") + return Path(p) \ No newline at end of file diff --git a/tests/test_ClusterReader.py b/tests/test_ClusterReader.py new file mode 100644 index 0000000..e0f9d48 --- /dev/null +++ b/tests/test_ClusterReader.py @@ -0,0 +1,11 @@ +import pytest +import os, sys +from creader import ClusterFileReader +from fixtures import data_path + +def test_references_on_read(data_path): + fname= (data_path/'beam_En700eV_-40deg_300V_10us_d0_f0_100.clust').as_posix() + r = ClusterFileReader(fname) + clusters = r.read(10) + assert sys.getrefcount(clusters) == 2 #Over counts by one due to call by reference + diff --git a/tests/test_functions.py b/tests/test_functions.py new file mode 100644 index 0000000..32da387 --- /dev/null +++ b/tests/test_functions.py @@ -0,0 +1,12 @@ +import pytest +from fixtures import data_path +from creader import ClusterFileReader, clusterize +import sys + +def test_references_on_clusterize(data_path): + fname= (data_path/'beam_En700eV_-40deg_300V_10us_d0_f0_100.clust').as_posix() + r = ClusterFileReader(fname) + clusters = r.read(10) + result = clusterize(clusters) + assert sys.getrefcount(clusters) == 2 #Over counts by one due to call by reference + assert sys.getrefcount(result) == 2 \ No newline at end of file