added suport for chunk read of clusters

This commit is contained in:
Erik Frojdh 2023-06-05 10:20:14 +02:00
parent 8aa1b6bcbd
commit 221f7e0c0d
4 changed files with 61 additions and 12 deletions

View File

@ -2,5 +2,22 @@
from . import ClusterFileReader
class ClusterFile(ClusterFileReader):
def __init__(self, fname):
super().__init__(fname)
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __iter__(self):
return self
def __next__(self):
clusters = self.read()
if clusters.size == 0:
raise StopIteration
else:
return clusters
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
pass

View File

@ -7,6 +7,7 @@
typedef struct {
PyObject_HEAD FILE *fp;
int n_left;
Py_ssize_t chunk;
} ClusterFileReader;
//clang-format on
@ -14,19 +15,38 @@ typedef struct {
// raises python exception if something goes wrong
// returned object should mean file is open and ready to read
static int ClusterFileReader_init(ClusterFileReader *self, PyObject *args,
PyObject *Py_UNUSED(kwds)) {
PyObject *kwds) {
// Parse file name, accepts string or pathlike objects
const char *fname = NULL;
PyObject *buf;
self->n_left = 0;
self->chunk = 0;
PyObject *fname_obj = NULL;
PyObject *fname_bytes = NULL;
Py_ssize_t len;
if (!PyArg_ParseTuple(args, "O&", PyUnicode_FSConverter, &buf))
static char *kwlist[] = {"fname", "chunk", NULL};
// if (!PyArg_ParseTuple(args, "O&", PyUnicode_FSConverter, &buf))
// return -1;
// PyBytes_AsStringAndSize(buf, &fname, &len);
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|n", kwlist, &fname_obj,
&self->chunk)) {
return -1;
PyBytes_AsStringAndSize(buf, &fname, &len);
}
if (fname_obj != Py_None)
if (!PyUnicode_FSConverter(fname_obj, &fname_bytes))
return -1;
PyBytes_AsStringAndSize(fname_bytes, &fname, &len);
#ifdef CR_VERBOSE
printf("Opening: %s\n chunk: %lu\n", fname, self->chunk);
#endif
self->fp = fopen(fname, "rb");
self->n_left = 0;
// Keep the return code to not return before releasing buffer
int rc = 0;
@ -37,7 +57,7 @@ static int ClusterFileReader_init(ClusterFileReader *self, PyObject *args,
rc = -1;
}
// Release buffer
Py_DECREF(buf);
Py_DECREF(fname_bytes);
// Success or fail
return rc;
@ -63,11 +83,15 @@ static PyObject *ClusterFileReader_read(ClusterFileReader *self,
Py_ssize_t size = 0;
PyObject *noise_obj = NULL;
PyObject *noise_array = NULL;
if (!PyArg_ParseTuple(args, "n|O", &size, &noise_obj)) {
if (!PyArg_ParseTuple(args, "|nO", &size, &noise_obj)) {
PyErr_SetString(PyExc_TypeError, "Could not parse args.");
return NULL;
}
// Fall back on object default/config
if (size == 0)
size = self->chunk;
npy_intp dims[] = {size};
// If possible numpy will

View File

@ -41,8 +41,9 @@ static int RawFileReader_init(RawFileReader *self, PyObject *args,
return -1;
PyBytes_AsStringAndSize(fname_bytes, &fname, &len);
printf("%s\n read_header: %d\n", fname, self->read_header);
#ifdef CR_VERBOSE
printf("fname: %s\n read_header: %d\n", fname, self->read_header);
#endif
self->fp = fopen(fname, "rb");

View File

@ -115,3 +115,10 @@ def test_read_file_with_noise_mask(data_path):
r = ClusterFileReader(fname)
cl = r.read(85, noise_cut)
assert cl.size == 10
def test_chunk_config(data_path):
fname= data_path/'noise_test.clust'
#File contains total 70 clusters
r = ClusterFileReader(fname, chunk = 5)
assert r.read().size == 5
assert r.read(10).size == 10