From f70dce9acc1ea5dc295abad654d897063a7badd7 Mon Sep 17 00:00:00 2001 From: Achim Gsell Date: Thu, 16 Jul 2009 08:38:09 +0000 Subject: [PATCH] hash funktions added --- .gitattributes | 2 + src/h5_core/Makefile.am | 2 + src/h5_core/h5_hsearch.c | 212 +++++++++++++++++++++++++++++++ src/h5_core/h5_hsearch_private.h | 30 +++++ 4 files changed, 246 insertions(+) create mode 100644 src/h5_core/h5_hsearch.c create mode 100644 src/h5_core/h5_hsearch_private.h diff --git a/.gitattributes b/.gitattributes index 37f68fc..afb7bcb 100644 --- a/.gitattributes +++ b/.gitattributes @@ -391,6 +391,8 @@ src/h5_core/h5_fcmp.c -text src/h5_core/h5_fcmp_private.h -text src/h5_core/h5_hdf5.c -text src/h5_core/h5_hdf5_private.h -text +src/h5_core/h5_hsearch.c -text +src/h5_core/h5_hsearch_private.h -text src/h5_core/h5_maps.c -text src/h5_core/h5_maps.h -text src/h5_core/h5_mpi.c -text diff --git a/src/h5_core/Makefile.am b/src/h5_core/Makefile.am index 61b93e8..caad204 100644 --- a/src/h5_core/Makefile.am +++ b/src/h5_core/Makefile.am @@ -36,6 +36,7 @@ EXTRA_HEADERS = \ h5_errorhandling_private.h \ h5_fcmp_private.h \ h5_hdf5_private.h \ + h5_hsearch_private.h \ h5_maps.h \ h5_mpi_private.h \ h5_openclose.h \ @@ -85,6 +86,7 @@ libH5_core_a_SOURCES = \ h5_errorhandling.c \ h5_fcmp.c \ h5_hdf5.c \ + h5_hsearch.c \ h5_maps.c \ h5_mpi.c \ h5_openclose.c \ diff --git a/src/h5_core/h5_hsearch.c b/src/h5_core/h5_hsearch.c new file mode 100644 index 0000000..be4ded8 --- /dev/null +++ b/src/h5_core/h5_hsearch.c @@ -0,0 +1,212 @@ +/* Copyright (C) 1993,1995-1997,2002,2005,2007,2008 + Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 1993. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include + +#include "h5_core.h" +#include "h5_core_private.h" + +/* [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 + [Knuth] The Art of Computer Programming, part 3 (6.4) */ + + +/* The reentrant version has no static variables to maintain the state. + Instead the interface of all functions is extended to take an argument + which describes the current status. */ +typedef struct _ENTRY { + unsigned int used; + h5_entry_t entry; +} _ENTRY; + + +/* For the used double hash method the table size has to be a prime. To + correct the user given table size we need a prime test. This trivial + algorithm is adequate because + a) the code is (most probably) called a few times per program run and + b) the number is small because the table must fit in the core */ +static int +isprime (unsigned int number) { + /* no even number will be passed */ + unsigned int div = 3; + + while (div * div < number && number % div != 0) + div += 2; + + return number % div != 0; +} + +/* Before using the hash table we must allocate memory for it. + Test for an existing table are done. We allocate one element + more as the found prime number says. This is done for more effective + indexing as explained in the comment for the hsearch function. + The contents of the table is zeroed, especially the field used + becomes zero. */ +int +h5_hcreate_r ( + h5_file_t * const f, + size_t nel, + struct hsearch_data *htab + ) { + /* Test for correct arguments. */ + if (htab == NULL) { + h5_error_internal ( f, __FILE__, __func__, __LINE__ ); + } + + /* There is still another table active. Return with error. */ + if (htab->table != NULL) { + h5_error_internal ( f, __FILE__, __func__, __LINE__ ); + } + /* Change nel to the first prime number not smaller as nel. */ + nel |= 1; /* make odd */ + while (!isprime (nel)) + nel += 2; + + htab->size = nel; + htab->filled = 0; + + /* allocate memory and zero out */ + TRY ( (htab->table = (_ENTRY *) _h5_calloc ( + f, htab->size + 1, sizeof (_ENTRY)) ) ); + + /* everything went alright */ + return H5_SUCCESS; +} + +/* After using the hash table it has to be destroyed. The used memory can + be freed and the local static variable can be marked as not used. */ +void +h5_hdestroy_r ( + h5_file_t * const f, + struct hsearch_data *htab + ) { + /* Test for correct arguments. */ + if (htab == NULL) { + h5_error_internal ( f, __FILE__, __func__, __LINE__ ); + } + + /* Free used memory. */ + _h5_free ( f, htab->table ); + + /* the sign for an existing table is an value != NULL in htable */ + htab->table = NULL; +} + + + +/* This is the search function. It uses double hashing with open addressing. + The argument item.key has to be a pointer to an zero terminated, most + probably strings of chars. The function for generating a number of the + strings is simple but fast. It can be replaced by a more complex function + like ajw (see [Aho,Sethi,Ullman]) if the needs are shown. + + We use an trick to speed up the lookup. The table is created by hcreate + with one more element available. This enables us to use the index zero + special. This index will never be used because we store the first hash + index in the field used where zero means not used. Every other value + means used. The used field can be used as a first fast comparison for + equality of the stored and the parameter value. This helps to prevent + unnecessary expensive calls of strcmp. */ +int +h5_hsearch_r ( + h5_file_t * const f, + h5_entry_t item, + h5_action_t action, + h5_entry_t **retval, + struct hsearch_data *htab + ) { + unsigned int hval; + unsigned int count; + unsigned int idx; + + /* Compute an value for the given string. Perhaps use a better method. */ + hval = item.len; + count = item.len; + while (count-- > 0) { + hval <<= 4; + hval += item.key[count]; + } + + /* First hash function: simply take the modul but prevent zero. */ + idx = hval % htab->size + 1; + + if (htab->table[idx].used) { + /* Further action might be required according to the action + value. */ + if (htab->table[idx].used == hval + && memcmp ( + item.key, + htab->table[idx].entry.key, + item.len ) == 0) { + *retval = &htab->table[idx].entry; + return H5_SUCCESS; + } + + /* Second hash function, as suggested in [Knuth] */ + unsigned int hval2 = 1 + hval % (htab->size - 2); + unsigned int first_idx = idx; + + do { + /* Because SIZE is prime this guarantees to step + through all available indeces. */ + if (idx <= hval2) + idx = htab->size + idx - hval2; + else + idx -= hval2; + + /* If we visited all entries leave the loop + unsuccessfully. */ + if (idx == first_idx) + break; + + /* If entry is found use it. */ + if (htab->table[idx].used == hval + && memcmp ( + item.key, + htab->table[idx].entry.key, + item.len ) == 0) { + *retval = &htab->table[idx].entry; + return H5_SUCCESS; + } + } while (htab->table[idx].used); + } + + /* An empty bucket has been found. */ + if (action == H5_ENTER) { + /* If table is full and another entry should be entered return + with error. */ + if (htab->filled == htab->size) { + h5_error_internal ( f, __FILE__, __func__, __LINE__ ); + *retval = NULL; + return H5_ERR; + } + + htab->table[idx].used = hval; + htab->table[idx].entry = item; + + ++htab->filled; + + *retval = &htab->table[idx].entry; + return H5_SUCCESS; + } + + *retval = NULL; + return H5_ERR; +} diff --git a/src/h5_core/h5_hsearch_private.h b/src/h5_core/h5_hsearch_private.h new file mode 100644 index 0000000..bee1cff --- /dev/null +++ b/src/h5_core/h5_hsearch_private.h @@ -0,0 +1,30 @@ +#ifndef __H5_HSEARCH_PRIVATE_H +#define __H5_HSEARCH_PRIVATE_H + +/* Action which shall be performed in the call to hsearch. */ +typedef enum { + H5_FIND, + H5_ENTER +} h5_action_t; + +typedef struct h5_entry { + unsigned int len; + char *key; + void *data; +} h5_entry_t; + +struct hsearch_data { + struct _ENTRY *table; + unsigned int size; + unsigned int filled; +}; + +/* Reentrant versions which can handle multiple hashing tables at the + same time. */ +extern int hsearch_r (h5_entry_t __item, h5_action_t __action, h5_entry_t **__retval, + struct hsearch_data *__htab); +extern int hcreate_r (size_t __nel, struct hsearch_data *__htab); +extern void hdestroy_r (struct hsearch_data *__htab); + + +#endif