diff --git a/NEWS b/NEWS index a7a97ed..c3cde87 100644 --- a/NEWS +++ b/NEWS @@ -1,5 +1,14 @@ #### H5PART 1.6.1 ############################################################ +Chunking in the H5Part API +-------------------------- + +There is now an H5Part equivalent H5PartSetChunkSize to the existing +H5BlockDefine3DChunkDims call. Both of these calls enable the chunking +mechanism in the underlying HDF5 layer. Chunking is used in combination +with the aligned open calls to pad datasets to alignment multiples +on disk. + All Steps Available on Write ---------------------------- @@ -17,7 +26,7 @@ Previously, a throttle factor of N meant that for P processors, the total number of P writes were divided into N batchs of P/N writes. To better accomodate round-robin lustre striping, the new policy is to -execute P/N batch of N writes. Thus, matching N to the number of stripes +execute P/N batches of N writes. Thus, matching N to the number of stripes results in the desirable 1-1 matching of writers to stripes. #### H5PART 1.6 ############################################################## diff --git a/src/H5Block.c b/src/H5Block.c index 5b32d9c..f19a57f 100644 --- a/src/H5Block.c +++ b/src/H5Block.c @@ -1201,7 +1201,7 @@ _H5Block_read_data ( type, f->block->memshape, f->block->diskshape, - f->xfer_prop, + f->dxfer_prop, data ); if ( herr < 0 ) return HANDLE_H5D_READ_ERR ( name, f->timestep ); @@ -1468,7 +1468,7 @@ _H5Block_write_data ( type, b->memshape, b->diskshape, - f->xfer_prop, + f->dxfer_prop, data ); if ( herr < 0 ) return HANDLE_H5D_WRITE_ERR ( name, f->timestep ); diff --git a/src/H5MultiBlock.c b/src/H5MultiBlock.c index 938fe13..7e7262c 100644 --- a/src/H5MultiBlock.c +++ b/src/H5MultiBlock.c @@ -916,7 +916,7 @@ _H5MultiBlock_read_data ( type, f->block->memshape, f->block->diskshape, - f->xfer_prop, + f->dxfer_prop, *data ); if ( herr < 0 ) return HANDLE_H5D_READ_ERR ( field_name, f->timestep ); @@ -1026,7 +1026,7 @@ _H5MultiBlock_write_data ( type, b->memshape, b->diskshape, - f->xfer_prop, + f->dxfer_prop, data ); if ( herr < 0 ) return HANDLE_H5D_WRITE_ERR ( name, f->timestep ); diff --git a/src/H5Part.c b/src/H5Part.c index 55c94ee..349f835 100644 --- a/src/H5Part.c +++ b/src/H5Part.c @@ -154,10 +154,10 @@ _H5Part_open_file ( strncpy ( f->groupname_step, H5PART_GROUPNAME_STEP, H5PART_STEPNAME_LEN ); f->stepno_width = 0; - f->xfer_prop = f->create_prop = H5P_DEFAULT; + f->dxfer_prop = f->dcreate_prop = f->fcreate_prop = H5P_DEFAULT; - f->access_prop = H5Pcreate (H5P_FILE_ACCESS); - if (f->access_prop < 0) { + f->faccess_prop = H5Pcreate (H5P_FILE_ACCESS); + if (f->faccess_prop < 0) { HANDLE_H5P_CREATE_ERR; goto error_cleanup; } @@ -187,7 +187,7 @@ _H5Part_open_file ( if (f->myproc == 0) { _H5Part_print_info ( "Selecting MPI-POSIX VFD" ); } - if (H5Pset_fapl_mpiposix ( f->access_prop, comm, 0 ) < 0) { + if (H5Pset_fapl_mpiposix ( f->faccess_prop, comm, 0 ) < 0) { HANDLE_H5P_SET_FAPL_ERR; goto error_cleanup; } @@ -196,7 +196,7 @@ _H5Part_open_file ( if (f->myproc == 0) { _H5Part_print_info ( "Selecting MPI-IO VFD" ); } - if (H5Pset_fapl_mpio ( f->access_prop, comm, info ) < 0) { + if (H5Pset_fapl_mpio ( f->faccess_prop, comm, info ) < 0) { HANDLE_H5P_SET_FAPL_ERR; goto error_cleanup; } @@ -208,12 +208,12 @@ _H5Part_open_file ( if (f->myproc == 0) { _H5Part_print_info ( "Using collective mode" ); } - f->xfer_prop = H5Pcreate (H5P_DATASET_XFER); - if (f->xfer_prop < 0) { + f->dxfer_prop = H5Pcreate (H5P_DATASET_XFER); + if (f->dxfer_prop < 0) { HANDLE_H5P_CREATE_ERR; goto error_cleanup; } - if (H5Pset_dxpl_mpio ( f->xfer_prop, H5FD_MPIO_COLLECTIVE ) < 0) { + if (H5Pset_dxpl_mpio ( f->dxfer_prop, H5FD_MPIO_COLLECTIVE ) < 0) { HANDLE_H5P_SET_DXPL_MPIO_ERR; goto error_cleanup; } @@ -241,21 +241,26 @@ _H5Part_open_file ( btree_bytes ); } - f->create_prop = H5Pcreate(H5P_FILE_CREATE); - H5Pset_istore_k (f->create_prop, btree_ik); + f->fcreate_prop = H5Pcreate(H5P_FILE_CREATE); + if ( f->fcreate_prop < 0 ) { + HANDLE_H5P_CREATE_ERR; + goto error_cleanup; + } + + H5Pset_istore_k (f->fcreate_prop, btree_ik); #ifndef H5_USE_16_API /* defer metadata cache flushing until file close */ H5AC_cache_config_t cache_config; cache_config.version = H5AC__CURR_CACHE_CONFIG_VERSION; - H5Pget_mdc_config (f->access_prop, &cache_config); + H5Pget_mdc_config (f->faccess_prop, &cache_config); cache_config.set_initial_size = 1; cache_config.initial_size = 16 * 1024 * 1024; cache_config.evictions_enabled = 0; cache_config.incr_mode = H5C_incr__off; cache_config.flash_incr_mode = H5C_flash_incr__off; cache_config.decr_mode = H5C_decr__off; - H5Pset_mdc_config (f->access_prop, &cache_config); + H5Pset_mdc_config (f->faccess_prop, &cache_config); #else // H5_USE_16_API _H5Part_print_info ( "Unable to defer metadata write: need HDF5 1.8"); @@ -274,16 +279,20 @@ _H5Part_open_file ( if ( align != 0 ) { if (f->myproc == 0) { - _H5Part_print_info ( "Setting HDF5 alignment to %ld bytes", align ); + _H5Part_print_info ( + "Setting HDF5 alignment to %ld bytes", + align ); } - if (H5Pset_alignment ( f->access_prop, 0, align ) < 0) { + if (H5Pset_alignment ( f->faccess_prop, 0, align ) < 0) { HANDLE_H5P_SET_FAPL_ERR; goto error_cleanup; } if (f->myproc == 0) { - _H5Part_print_info ( "Setting HDF5 meta block to %ld bytes", align ); + _H5Part_print_info ( + "Setting HDF5 meta block to %ld bytes", + align ); } - if (H5Pset_meta_block_size ( f->access_prop, align ) < 0) { + if (H5Pset_meta_block_size ( f->faccess_prop, align ) < 0) { HANDLE_H5P_SET_FAPL_ERR; goto error_cleanup; } @@ -297,24 +306,24 @@ _H5Part_open_file ( } if ( flags & H5PART_READ ) { - f->file = H5Fopen(filename, H5F_ACC_RDONLY, f->access_prop); + f->file = H5Fopen(filename, H5F_ACC_RDONLY, f->faccess_prop); } else if ( flags & H5PART_WRITE ){ - f->file = H5Fcreate (filename, H5F_ACC_TRUNC, f->create_prop, - f->access_prop); + f->file = H5Fcreate (filename, H5F_ACC_TRUNC, f->fcreate_prop, + f->faccess_prop); f->empty = 1; } else if ( flags & H5PART_APPEND ) { int fd = open(filename, O_RDONLY, 0); if ( (fd == -1) && (errno == ENOENT) ) { f->file = H5Fcreate(filename, H5F_ACC_TRUNC, - f->create_prop, f->access_prop); + f->fcreate_prop, f->faccess_prop); f->empty = 1; } else if (fd != -1) { close (fd); f->file = H5Fopen( - filename, H5F_ACC_RDWR, f->access_prop); + filename, H5F_ACC_RDWR, f->faccess_prop); /* The following function call returns an error, if f->file < 0. But we can safely ignore this. @@ -497,12 +506,23 @@ _H5Part_file_is_valid ( return H5PART_ERR_BADFD; } +/*! + \ingroup h5part_open + + Closes an open file. + + \return \c H5PART_SUCCESS or error code +*/ h5part_int64_t -_H5Part_close_hdf_ids ( +H5PartCloseFile ( H5PartFile *f /*!< [in] filehandle of the file to close */ ) { + SET_FNAME ( "H5PartCloseFile" ); herr_t r = 0; + _h5part_errno = H5PART_SUCCESS; + + CHECK_FILEHANDLE ( f ); if ( f->block && f->close_block ) { (*f->close_block) ( f ); @@ -538,51 +558,33 @@ _H5Part_close_hdf_ids ( if ( r < 0 ) HANDLE_H5S_CLOSE_ERR; f->memshape = 0; } - if( f->xfer_prop != H5P_DEFAULT ) { - r = H5Pclose( f->xfer_prop ); - if ( r < 0 ) HANDLE_H5P_CLOSE_ERR ( "f->xfer_prop" ); - f->xfer_prop = H5P_DEFAULT; + if( f->dxfer_prop != H5P_DEFAULT ) { + r = H5Pclose( f->dxfer_prop ); + if ( r < 0 ) HANDLE_H5P_CLOSE_ERR ( "f->dxfer_prop" ); + f->dxfer_prop = H5P_DEFAULT; } - if( f->access_prop != H5P_DEFAULT ) { - r = H5Pclose( f->access_prop ); - if ( r < 0 ) HANDLE_H5P_CLOSE_ERR ( "f->access_prop" ); - f->access_prop = H5P_DEFAULT; - } - if( f->create_prop != H5P_DEFAULT ) { - r = H5Pclose( f->create_prop ); - if ( r < 0 ) HANDLE_H5P_CLOSE_ERR ( "f->create_prop" ); - f->create_prop = H5P_DEFAULT; + if( f->dcreate_prop != H5P_DEFAULT ) { + r = H5Pclose( f->dcreate_prop ); + if ( r < 0 ) HANDLE_H5P_CLOSE_ERR ( "f->dcreate_prop" ); + f->dcreate_prop = H5P_DEFAULT; } - return H5PART_SUCCESS; -} - -/*! - \ingroup h5part_open - - Closes an open file. - - \return \c H5PART_SUCCESS or error code -*/ -h5part_int64_t -H5PartCloseFile ( - H5PartFile *f /*!< [in] filehandle of the file to close */ - ) { - - SET_FNAME ( "H5PartCloseFile" ); - herr_t r = 0; - _h5part_errno = H5PART_SUCCESS; - - CHECK_FILEHANDLE ( f ); - - r = _H5Part_close_hdf_ids ( f ); - if ( r < 0 ) return r; if ( f->file ) { r = H5Fclose( f->file ); if ( r < 0 ) HANDLE_H5F_CLOSE_ERR; f->file = 0; } + if( f->faccess_prop != H5P_DEFAULT ) { + r = H5Pclose( f->faccess_prop ); + if ( r < 0 ) HANDLE_H5P_CLOSE_ERR ( "f->faccess_prop" ); + f->faccess_prop = H5P_DEFAULT; + } + if( f->fcreate_prop != H5P_DEFAULT ) { + r = H5Pclose( f->fcreate_prop ); + if ( r < 0 ) HANDLE_H5P_CLOSE_ERR ( "f->fcreate_prop" ); + f->fcreate_prop = H5P_DEFAULT; + } /* free memory from H5PartFile struct */ if( f->pnparticles ) { @@ -896,6 +898,48 @@ H5PartSetNumParticlesStrided ( return H5PART_SUCCESS; } +/*! + \ingroup h5part_model + + Define the chunk \c size and enables chunking in the underlying + HDF5 layer. When combined with the \c align value in the + \ref H5PartOpenFileAlign or \ref H5PartOpenFileParallelAlign + function, this causes each group of \c size particles to be + padded on disk out to the nearest multiple of \c align bytes. + + Note that this policy wastes disk space, but can improve write + bandwidth on parallel filesystems that are sensitive to alignment + to stripe boundaries (e.g. lustre). + + \return \c H5PART_SUCCESS or error code +*/ +h5part_int64_t +H5PartSetChunkSize ( + H5PartFile *f, + const h5part_int64_t size + ) { + + SET_FNAME ( "H5PartSetChunkSize" ); + CHECK_FILEHANDLE( f ); + + if ( f->myproc == 0 ) + _H5Part_print_info ( + "Setting chunk size to %lld elements", + (long long)size ); + + if ( f->dcreate_prop == H5P_DEFAULT ) { + f->dcreate_prop = H5Pcreate (H5P_DATASET_CREATE); + if ( f->dcreate_prop < 0 ) return HANDLE_H5P_CREATE_ERR; + } + + hsize_t hsize = (hsize_t)size; + + herr_t herr = H5Pset_chunk ( f->dcreate_prop, 1, &hsize ); + if ( herr < 0 ) return HANDLE_H5P_SET_CHUNK_ERR; + + return H5PART_SUCCESS; +} + static void _normalize_dataset_name ( const char *name, @@ -953,23 +997,19 @@ _write_data ( "Dataset[%s] at timestep %lld " "already exists", name2, (long long)f->timestep ); } else { -#ifndef H5_USE_16_API - dataset_id = H5Dcreate2 ( - f->timegroup, - name2, - type, - f->shape, - H5P_DEFAULT, - H5P_DEFAULT, - H5P_DEFAULT ); -#else dataset_id = H5Dcreate ( f->timegroup, name2, type, f->shape, - H5P_DEFAULT ); +#ifndef H5_USE_16_API + H5P_DEFAULT, + f->dcreate_prop, + H5P_DEFAULT +#else + f->dcreate_prop #endif + ); if ( dataset_id < 0 ) return HANDLE_H5D_CREATE_ERR ( name2, f->timestep ); } @@ -984,7 +1024,7 @@ _write_data ( type, f->memshape, f->diskshape, - f->xfer_prop, + f->dxfer_prop, array ); #ifdef PARALLEL_IO @@ -3146,7 +3186,7 @@ _read_data ( complement to disk hyperslab) */ space_id, /* shape/size of data on disk (get hyperslab if needed) */ - f->xfer_prop, /* ignore... its for parallel reads */ + f->dxfer_prop, /* ignore... its for parallel reads */ array ); if ( herr < 0 ) return HANDLE_H5D_READ_ERR ( name2, f->timestep ); diff --git a/src/H5Part.h b/src/H5Part.h index 0baa461..756b87e 100644 --- a/src/H5Part.h +++ b/src/H5Part.h @@ -125,6 +125,12 @@ H5PartSetNumParticlesStrided ( const h5part_int64_t stride /*!< [in] Stride (e.g. number of fields in the particle array) */ ); +h5part_int64_t +H5PartSetChunkSize ( + H5PartFile *f, + const h5part_int64_t size + ); + h5part_int64_t H5PartWriteDataFloat64 ( H5PartFile *f, diff --git a/src/H5PartTypes.h b/src/H5PartTypes.h index 14dfdfc..0511524 100644 --- a/src/H5PartTypes.h +++ b/src/H5PartTypes.h @@ -49,9 +49,10 @@ struct H5PartFile { hid_t timegroup; hid_t shape; - hid_t xfer_prop; - hid_t create_prop; - hid_t access_prop; + hid_t dxfer_prop; + hid_t dcreate_prop; + hid_t faccess_prop; + hid_t fcreate_prop; /* the dataspace on disk for the current view */ hid_t diskshape; diff --git a/test/write.c b/test/write.c index c68eb28..70ad785 100644 --- a/test/write.c +++ b/test/write.c @@ -375,8 +375,6 @@ void test_write1(void) test_write_data32(file1, NPARTICLES, 1); test_write_file_attribs(file1, 0); - _H5Part_close_hdf_ids(file1); - test_open_objects(file1, 1); status = H5PartCloseFile(file1); RETURN(status, H5PART_SUCCESS, "H5PartCloseFile"); } @@ -410,10 +408,10 @@ void test_write3(void) h5part_int64_t status; TEST( "Opening file once, write-truncate, lustre filesyste, " - "MPI-POSIX VFD, 64KB alignment"); + "MPI-POSIX VFD, 1KB alignment"); file1 = OPENALIGN(FILENAME, H5PART_WRITE | H5PART_VFD_MPIPOSIX | H5PART_FS_LUSTRE, - 65536); + 1024); test_is_valid(file1); TEST("Redefining step name"); @@ -435,14 +433,14 @@ void test_write4(void) h5part_int64_t status; TEST( "Opening file twice, write-append + read-only, " - "lustre filesystem, MPI-IO Independent VFD, 64K alignment"); + "lustre filesystem, MPI-IO Independent VFD, 1K alignment"); file1 = OPENALIGN(FILENAME, H5PART_APPEND | H5PART_VFD_MPIIO_IND | H5PART_FS_LUSTRE, - 65536); + 1024); test_is_valid(file1); file2 = OPENALIGN(FILENAME, H5PART_READ | H5PART_VFD_MPIIO_IND | H5PART_FS_LUSTRE, - 65536); + 1024); test_is_valid(file2); TEST("Redefining step name"); @@ -452,6 +450,9 @@ void test_write4(void) status = H5PartDefineStepName(file2, LONGNAME, 16); RETURN(status, H5PART_SUCCESS, "H5PartDefineStepName"); + status = H5PartSetChunkSize(file1, NPARTICLES); + RETURN(status, H5PART_SUCCESS, "H5PartSetChunkSize"); + test_write_data64(file1, NPARTICLES, NTIMESTEPS-2); test_write_file_attribs(file1, 1);