restructure data types

This commit is contained in:
Erik Frojdh
2023-05-31 13:31:35 +02:00
parent 06b5e97111
commit 4efbb9a914
11 changed files with 225 additions and 90 deletions

View File

@ -33,4 +33,3 @@ conda develop install .
#or with pip #or with pip
pip install --editable . pip install --editable .
``` ```

View File

@ -4,7 +4,12 @@ import setuptools
import numpy as np import numpy as np
c_ext = setuptools.Extension("creader", c_ext = setuptools.Extension("creader",
sources = ["src/creader_module.c", "src/cluster_reader.c", "src/ClusterReader.c"], sources = [
"src/creader_module.c",
"src/cluster_reader.c",
"src/ClusterReader.c",
"src/arr_desc.c"
],
include_dirs=[ include_dirs=[
np.get_include(),"src/" np.get_include(),"src/"
], ],
@ -14,7 +19,7 @@ c_ext = setuptools.Extension("creader",
c_ext.language = 'c' c_ext.language = 'c'
setuptools.setup( setuptools.setup(
name= 'creader', name= 'creader',
version = '0.1', version = '2023.05.30',
description = 'Reading cluster files', description = 'Reading cluster files',
ext_modules=[c_ext], ext_modules=[c_ext],
) )

View File

@ -1,38 +1,27 @@
#include "ClusterReader.h" #include "ClusterReader.h"
#include "cluster_reader.h" #include "cluster_reader.h"
#include "data_types.h" #include "data_types.h"
#include "arr_desc.h"
#include <numpy/arrayobject.h>
typedef struct { typedef struct {
PyObject_HEAD PyObject_HEAD
FILE *fp; FILE *fp;
int n_left; int n_left;
} ClusterFileReader; } ClusterFileReader;
// Create a custom numpy data type that should reflect
// our cluster data type.
// TODO! Update with the actual cluster data type
static PyArray_Descr *cluster_dt() {
PyObject *dtype_dict;
PyArray_Descr *dtype;
dtype_dict = Py_BuildValue("[(s, s),(s, s),(s, s, (i))]", "x", "u2", "y",
"u2", "data", "i4", 9);
PyArray_DescrConverter(dtype_dict, &dtype);
Py_DECREF(dtype_dict);
return dtype;
}
static PyArray_Descr *cluster_analysis_dt() { // static PyArray_Descr *cluster_analysis_dt() {
PyObject *dtype_dict; // PyObject *dtype_dict;
PyArray_Descr *dtype; // PyArray_Descr *dtype;
dtype_dict = Py_BuildValue("[(s, s),(s, s),(s, s)]", "tot3", "i4", "tot2", // dtype_dict = Py_BuildValue("[(s, s),(s, s),(s, s)]", "tot3", "i4", "tot2",
"i4", "corner", "u4"); // "i4", "corner", "u4");
PyArray_DescrConverter(dtype_dict, &dtype); // PyArray_DescrConverter(dtype_dict, &dtype);
Py_DECREF(dtype_dict); // Py_DECREF(dtype_dict);
return dtype; // return dtype;
} // }
@ -84,12 +73,11 @@ static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args,
npy_intp dims[] = {size}; npy_intp dims[] = {size};
// Create an uninitialized numpy array // Create an uninitialized numpy array
PyArray_Descr *dtype = cluster_dt(); PyObject *clusters = PyArray_SimpleNewFromDescr(ndim, dims, cluster_dt());
// PyObject *PyArray_SimpleNewFromDescr(int nd, npy_int const *dims,
// PyArray_Descr *descr) // Fill with zeros
PyObject *clusters = PyArray_SimpleNewFromDescr(ndim, dims, dtype);
PyArray_FILLWBYTE((PyArrayObject *)clusters, PyArray_FILLWBYTE((PyArrayObject *)clusters,
0); // zero initialization can be removed later 0);
// Get a pointer to the array memory // Get a pointer to the array memory
void *buf = PyArray_DATA((PyArrayObject *)clusters); void *buf = PyArray_DATA((PyArrayObject *)clusters);
@ -117,77 +105,77 @@ static PyObject *ClusterFileReader_read(ClusterFileReader *self, PyObject *args,
return clusters; return clusters;
} }
// read method // // read method
static PyObject *ClusterFileReader_clusterize(ClusterFileReader *self, // static PyObject *ClusterFileReader_clusterize(ClusterFileReader *self,
PyObject *args, // PyObject *args,
PyObject *Py_UNUSED(kwds)) { // PyObject *Py_UNUSED(kwds)) {
// Create an uninitialized numpy array // // Create an uninitialized numpy array
PyArray_Descr *dtypeIn = cluster_dt(); // PyArray_Descr *dtypeIn = cluster_dt();
PyArray_Descr *dtypeOut = cluster_analysis_dt(); // PyArray_Descr *dtypeOut = cluster_analysis_dt();
PyObject *c_obj; // PyObject *cl_obj;
if (!PyArg_ParseTuple(args, "O", &c_obj)) // if (!PyArg_ParseTuple(args, "O", &cl_obj))
return NULL; // return NULL;
// Create two numpy arrays from the passed objects, if possible numpy will // // Create two numpy arrays from the passed objects, if possible numpy will
// use the underlying buffer, otherwise it will create a copy, for example // // use the underlying buffer, otherwise it will create a copy, for example
// if data type is different or we pass in a list. The // // if data type is different or we pass in a list. The
// NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory. // // // NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory.
PyObject *c_array = PyArray_FromArray((PyArrayObject *)c_obj, dtypeIn, // PyObject *cl_array = PyArray_FromArray((PyArrayObject *)cl_obj, cluster_dt,
NPY_ARRAY_C_CONTIGUOUS); // NPY_ARRAY_C_CONTIGUOUS);
// If parsing of a or b fails we throw an exception in Python // // If parsing of a or b fails we throw an exception in Python
if (c_array == NULL) { // if (cl_array == NULL) {
PyErr_SetString( // PyErr_SetString(
PyExc_TypeError, // PyExc_TypeError,
"Could not convert one of the arguments to a numpy array."); // "Could not convert one of the arguments to a numpy array.");
return NULL; // return NULL;
} // }
const int ndim = PyArray_NDIM((PyArrayObject *)c_array); // const int ndim = PyArray_NDIM((PyArrayObject *)c_array);
npy_intp *dims = PyArray_SHAPE((PyArrayObject *)c_array); // npy_intp *dims = PyArray_SHAPE((PyArrayObject *)c_array);
Py_ssize_t size = dims[0]; // Py_ssize_t size = dims[0];
// printf("%d size %d %d\n",ndim,size,sizeof(ClusterAnalysis)); // // printf("%d size %d %d\n",ndim,size,sizeof(ClusterAnalysis));
// dims[0]=size; // // dims[0]=size;
// Cluster *clusters = reinterpret_cast<Cluster *>( // // Cluster *clusters = reinterpret_cast<Cluster *>(
// PyArray_DATA(reinterpret_cast<PyArrayObject *>(c_array))); // // PyArray_DATA(reinterpret_cast<PyArrayObject *>(c_array)));
Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(c_array))); // Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(c_array)));
// PyObject *PyArray_SimpleNewFromDescr(int nd, npy_int const *dims, // // PyObject *PyArray_SimpleNewFromDescr(int nd, npy_int const *dims,
// PyArray_Descr *descr) // // PyArray_Descr *descr)
PyObject *clustersA = PyArray_SimpleNewFromDescr(ndim, dims, dtypeOut); // PyObject *clustersA = PyArray_SimpleNewFromDescr(ndim, dims, dtypeOut);
// PyArray_FILLWBYTE((PyArrayObject *)clustersA, 0); //zero initialization // // PyArray_FILLWBYTE((PyArrayObject *)clustersA, 0); //zero initialization
// can be removed later // // can be removed later
npy_intp *strides = PyArray_STRIDES(((PyArrayObject *)(clustersA))); // npy_intp *strides = PyArray_STRIDES(((PyArrayObject *)(clustersA)));
// printf("strides %d %d\n", strides[0],sizeof(ClusterAnalysis)); // // printf("strides %d %d\n", strides[0],sizeof(ClusterAnalysis));
// Get a pointer to the array memory // // Get a pointer to the array memory
ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)clustersA); // ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)clustersA);
// Call the standalone C code to read clusters from file // // Call the standalone C code to read clusters from file
// Here goes the looping, removing frame numbers etc. // // Here goes the looping, removing frame numbers etc.
int nc = analyze_clusters(size, clusters, buf); // int nc = analyze_clusters(size, clusters, buf);
// printf("%d %d\n",nc,size); // // printf("%d %d\n",nc,size);
if (nc != size) { // if (nc != size) {
PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!"); // PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!");
} // }
return clustersA; // return clustersA;
} // }
// List all methods in our ClusterFileReader class // List all methods in our ClusterFileReader class
static PyMethodDef ClusterFileReader_methods[] = { static PyMethodDef ClusterFileReader_methods[] = {
{"read", (PyCFunction)ClusterFileReader_read, METH_VARARGS, {"read", (PyCFunction)ClusterFileReader_read, METH_VARARGS,
"Read clusters"}, "Read clusters"},
{"clusterize", (PyCFunction)ClusterFileReader_clusterize, METH_VARARGS, // {"clusterize", (PyCFunction)ClusterFileReader_clusterize, METH_VARARGS,
"Analyze clusters"}, // "Analyze clusters"},
{NULL, NULL, 0, NULL} /* Sentinel */ {NULL, NULL, 0, NULL} /* Sentinel */
}; };
@ -205,6 +193,7 @@ static PyTypeObject ClusterFileReaderType = {
}; };
void init_ClusterFileReader(PyObject *m){ void init_ClusterFileReader(PyObject *m){
import_array(); import_array();
if (PyType_Ready(&ClusterFileReaderType) < 0) if (PyType_Ready(&ClusterFileReaderType) < 0)
return NULL; return NULL;

View File

@ -1,5 +1,7 @@
# pragma once # pragma once
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#define PY_SSIZE_T_CLEAN
#include <Python.h> #include <Python.h>
#include <numpy/arrayobject.h>
void init_ClusterFileReader(PyObject *m); void init_ClusterFileReader(PyObject *m);

25
src/arr_desc.c Normal file
View File

@ -0,0 +1,25 @@
#include "arr_desc.h"
PyArray_Descr* cluster_dt(){
import_array();
PyObject *dict;
PyArray_Descr *dtype = NULL;
dict = Py_BuildValue("[(s, s),(s, s),(s, s, (i))]", "x", "u2", "y",
"u2", "data", "i4", 9);
// return dict;
PyArray_DescrConverter(dict, &dtype);
Py_DECREF(dict);
return dtype;
}
PyArray_Descr *cluster_analysis_dt() {
import_array(); //TODO! Correct placement for this?
PyObject *dict;
PyArray_Descr *dtype;
dict = Py_BuildValue("[(s, s),(s, s),(s, s)]", "tot3", "i4", "tot2",
"i4", "corner", "u4");
PyArray_DescrConverter(dict, &dtype);
Py_DECREF(dict);
return dtype;
}

8
src/arr_desc.h Normal file
View File

@ -0,0 +1,8 @@
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include <numpy/arrayobject.h>
PyArray_Descr* cluster_dt();
PyArray_Descr* cluster_analysis_dt();

View File

@ -1,19 +1,89 @@
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION #define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#define PY_SSIZE_T_CLEAN #define PY_SSIZE_T_CLEAN
#include "ClusterReader.h"
#include <Python.h> #include <Python.h>
#include <numpy/arrayobject.h>
#include "arr_desc.h"
#include "data_types.h"
#include "ClusterReader.h"
static PyObject *clusterize(PyObject *self, PyObject *args,
PyObject *Py_UNUSED(kwds)) {
// // Create an uninitialized numpy array
// PyArray_Descr *dtypeIn = cluster_dt();
// PyArray_Descr *dtypeOut = cluster_analysis_dt();
PyObject *cl_obj;
if (!PyArg_ParseTuple(args, "O", &cl_obj))
return NULL;
if (cluster_dt==NULL){
printf("BYE\n");
exit(1);
}
// Create a numpy array from the passed object, if possible numpy will
// use the underlying buffer, otherwise it will create a copy, for example
// if data type is different or we pass in a list. The
// NPY_ARRAY_C_CONTIGUOUS flag ensures that we have contiguous memory.
// function steals a reference to the data type so no need to deallocate
PyObject *cl_array = PyArray_FromArray((PyArrayObject *)cl_obj, cluster_dt(),
NPY_ARRAY_C_CONTIGUOUS);
if (cl_array == NULL) {
PyErr_SetString(
PyExc_TypeError,
"Could not convert first argument to numpy array.");
return NULL;
}
const int ndim = PyArray_NDIM((PyArrayObject *)cl_array);
npy_intp *dims = PyArray_SHAPE((PyArrayObject *)cl_array);
Py_ssize_t size = dims[0];
Cluster *clusters = (Cluster *)(PyArray_DATA((PyArrayObject *)(cl_array)));
PyObject *cl_analysis = PyArray_SimpleNewFromDescr(ndim, dims, cluster_analysis_dt());
PyArray_FILLWBYTE((PyArrayObject *)cl_analysis, 0); //zero initialization
// // Get a pointer to the array memory
ClusterAnalysis *buf = PyArray_DATA((PyArrayObject *)cl_analysis);
int nc = analyze_clusters(size, clusters, buf);
if (nc != size) {
PyErr_SetString(PyExc_TypeError, "Parsed wrong size array!");
}
return cl_analysis;
}
static PyObject *get_cluster_dt(PyObject *self, PyObject *args,
PyObject *Py_UNUSED(kwds)) {
return cluster_dt();
}
//Module docstring, shown as a part of help(creader) //Module docstring, shown as a part of help(creader)
static char module_docstring[] = "C functions to read cluster files"; static char module_docstring[] = "C functions to read cluster files";
//Module methods
static PyMethodDef creader_methods[] = {
{"clusterize", clusterize, METH_VARARGS,
"Do some stuff"},
{"cluster_dt", get_cluster_dt, METH_VARARGS,
"Do some stuff"},
{NULL, NULL, 0, NULL} /* Sentinel */
};
//Module defenition //Module defenition
static struct PyModuleDef creader_def = { static struct PyModuleDef creader_def = {
PyModuleDef_HEAD_INIT, PyModuleDef_HEAD_INIT,
"creader", "creader",
module_docstring, module_docstring,
-1, -1,
NULL, // m_methods creader_methods, // m_methods
NULL, // m_slots NULL, // m_slots
NULL, // m_traverse NULL, // m_traverse
NULL, // m_clear NULL, // m_clear
@ -22,9 +92,12 @@ static struct PyModuleDef creader_def = {
//Initialize module and add classes //Initialize module and add classes
PyMODINIT_FUNC PyInit_creader(void) { PyMODINIT_FUNC PyInit_creader(void) {
PyObject *m = PyModule_Create(&creader_def); PyObject *m = PyModule_Create(&creader_def);
if (m == NULL) if (m == NULL)
return NULL; return NULL;
import_array();
init_ClusterFileReader(m); init_ClusterFileReader(m);
return m; return m;

10
test.py
View File

@ -1,4 +1,4 @@
from creader import ClusterFileReader from creader import ClusterFileReader, clusterize
import numpy as np import numpy as np
# maxph=100000000 # maxph=100000000
@ -9,8 +9,8 @@ fpath = Path("/mnt/sls_det_storage/moench_data/Moench_LGAD_SIM_Nov22/moenchLGAD2
# r = ClusterFileReader() # r = ClusterFileReader()
r = ClusterFileReader(fpath.as_posix()) r = ClusterFileReader(fpath.as_posix())
a=r.read(maxph)
# b=clusterize(a)
# a=r.read(maxph) # #v=int(maxph/100)
# b=r.clusterize(a) # #print(a[::v])
#v=int(maxph/100)
#print(a[::v])

11
tests/fixtures.py Normal file
View File

@ -0,0 +1,11 @@
import pytest
import os
from pathlib import Path
@pytest.fixture
def data_path():
try:
p = os.environ['CREADER_TEST_DATA']
except KeyError:
raise KeyError("CREADER_TEST_DATA needs to be set before running tests")
return Path(p)

View File

@ -0,0 +1,11 @@
import pytest
import os, sys
from creader import ClusterFileReader
from fixtures import data_path
def test_references_on_read(data_path):
fname= (data_path/'beam_En700eV_-40deg_300V_10us_d0_f0_100.clust').as_posix()
r = ClusterFileReader(fname)
clusters = r.read(10)
assert sys.getrefcount(clusters) == 2 #Over counts by one due to call by reference

12
tests/test_functions.py Normal file
View File

@ -0,0 +1,12 @@
import pytest
from fixtures import data_path
from creader import ClusterFileReader, clusterize
import sys
def test_references_on_clusterize(data_path):
fname= (data_path/'beam_En700eV_-40deg_300V_10us_d0_f0_100.clust').as_posix()
r = ClusterFileReader(fname)
clusters = r.read(10)
result = clusterize(clusters)
assert sys.getrefcount(clusters) == 2 #Over counts by one due to call by reference
assert sys.getrefcount(result) == 2