2008-01-06 23:13:01 +01:00
|
|
|
/*
|
2007-09-12 01:14:06 +02:00
|
|
|
* Copyright 2005-2007 Gerald Schmidt.
|
2008-01-06 23:13:01 +01:00
|
|
|
*
|
2007-09-12 01:14:06 +02:00
|
|
|
* This file is part of Xml Copy Editor.
|
2008-01-06 23:13:01 +01:00
|
|
|
*
|
2007-09-12 01:14:06 +02:00
|
|
|
* Xml Copy Editor is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; version 2 of the License.
|
2008-01-06 23:13:01 +01:00
|
|
|
*
|
2007-09-12 01:14:06 +02:00
|
|
|
* Xml Copy Editor is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
2008-01-06 23:13:01 +01:00
|
|
|
*
|
2007-09-12 01:14:06 +02:00
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with Xml Copy Editor; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
2007-09-07 23:17:30 +02:00
|
|
|
#ifndef STRINGSET_H
|
|
|
|
#define STRINGSET_H
|
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <stdexcept>
|
|
|
|
#include <cmath>
|
|
|
|
#include <climits>
|
2008-07-14 18:43:27 +02:00
|
|
|
#include <cstring>
|
2007-09-07 23:17:30 +02:00
|
|
|
#ifdef __WXMSW__
|
2007-09-08 00:25:30 +02:00
|
|
|
#include <mem.h>
|
2007-09-07 23:17:30 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
template<class T>
|
|
|
|
class StringSetNode
|
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
public:
|
|
|
|
StringSetNode ( T *keyParameter, size_t lenParameter )
|
|
|
|
{
|
|
|
|
next = NULL;
|
|
|
|
len = lenParameter;
|
|
|
|
key = new T[len];
|
|
|
|
memcpy ( key, keyParameter, len * sizeof ( T ) );
|
|
|
|
}
|
|
|
|
~StringSetNode()
|
|
|
|
{
|
|
|
|
delete[] key;
|
|
|
|
}
|
|
|
|
StringSetNode<T> *next;
|
|
|
|
T *key;
|
|
|
|
size_t len;
|
2007-09-07 23:17:30 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
template<class T>
|
|
|
|
class StringSet
|
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
public:
|
|
|
|
StringSet (
|
|
|
|
int hashSizePower = 19
|
|
|
|
);
|
|
|
|
~StringSet();
|
|
|
|
StringSet ( const StringSet<T>& );
|
|
|
|
StringSet& operator= ( const StringSet<T>& );
|
|
|
|
inline bool empty();
|
|
|
|
inline int count();
|
|
|
|
StringSetNode<T> *insert ( std::basic_string<T> &s );
|
|
|
|
StringSetNode<T> *insert ( T *s, size_t len, uint32_t hash = UINT_MAX );
|
|
|
|
StringSetNode<T> *find ( std::basic_string<T> &s );
|
|
|
|
StringSetNode<T> *find ( T *s, size_t len, uint32_t hash = UINT_MAX );
|
|
|
|
void clear();
|
|
|
|
private:
|
|
|
|
bool emptyFlag;
|
|
|
|
uint32_t hashSize, hashMask, nodeCount;
|
|
|
|
StringSetNode<T> **table;
|
|
|
|
uint32_t hash ( const char *s, size_t len );
|
|
|
|
void allocateHashTable ( uint32_t hashSize );
|
2007-09-07 23:17:30 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
template<class T>
|
2007-09-08 00:25:30 +02:00
|
|
|
StringSet<T>::StringSet ( int hashSizePower )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
if ( hashSizePower < 1 )
|
|
|
|
throw std::runtime_error ( "StringSet: invalid parameter" );
|
|
|
|
hashSize = ( int ) pow ( ( double ) 2, ( double ) hashSizePower );
|
|
|
|
hashMask = hashSize - 1;
|
|
|
|
nodeCount = 0;
|
|
|
|
allocateHashTable ( hashSize );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
|
|
|
StringSet<T>::~StringSet()
|
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
if ( !empty() )
|
|
|
|
clear();
|
|
|
|
delete[] table;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
2007-09-08 00:25:30 +02:00
|
|
|
StringSet<T>::StringSet ( const StringSet<T>& d )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
hashSize = d.hashSize;
|
|
|
|
hashMask = hashSize - 1;
|
|
|
|
nodeCount = 0;
|
|
|
|
allocateHashTable ( hashSize );
|
|
|
|
StringSetNode<T> *np;
|
|
|
|
for ( uint32_t i = 0; i < d.hashSize; ++i )
|
|
|
|
for ( np = d.table[i]; np != NULL; np = np->next )
|
|
|
|
insert ( np->key, np->len, i );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
2007-09-08 00:25:30 +02:00
|
|
|
StringSet<T>& StringSet<T>::operator= ( const StringSet<T>& d )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
if ( this != &d )
|
|
|
|
{
|
|
|
|
clear();
|
|
|
|
StringSetNode<T> *np;
|
|
|
|
for ( uint32_t i = 0; i < d.hashSize; ++i )
|
|
|
|
for ( np = d.table[i]; np != NULL; np = np->next )
|
|
|
|
insert ( np->key, np->len, i );
|
|
|
|
}
|
|
|
|
return *this;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// see 'one-at-a-time hash' (http://burtleburtle.net/bob/hash/doobs.html)
|
|
|
|
template<class T>
|
2007-09-08 00:25:30 +02:00
|
|
|
uint32_t StringSet<T>::hash ( const char *key, size_t len )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
uint32_t hash;
|
|
|
|
for ( hash = 0; len--; ++key )
|
|
|
|
{
|
|
|
|
hash += *key;
|
|
|
|
hash += ( hash << 10 );
|
|
|
|
hash ^= ( hash >> 6 );
|
|
|
|
}
|
|
|
|
hash += ( hash << 3 );
|
|
|
|
hash ^= ( hash >> 11 );
|
|
|
|
hash += ( hash << 15 );
|
|
|
|
return ( hash & hashMask );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
2007-09-08 00:25:30 +02:00
|
|
|
StringSetNode<T> *StringSet<T>::find ( std::basic_string<T> &s )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
return find ( ( T * ) s.data(), s.size() );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
2007-09-08 00:25:30 +02:00
|
|
|
StringSetNode<T> *StringSet<T>::find ( T *s, size_t len, uint32_t hashValue )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
if ( hashValue == UINT_MAX )
|
|
|
|
hashValue = hash ( ( const char* ) s, len * sizeof ( T ) );
|
|
|
|
StringSetNode<T> *np;
|
|
|
|
for ( np = table[hashValue]; np != NULL; np = np->next )
|
|
|
|
if ( len == np->len && ( memcmp ( s, np->key, len ) == 0 ) )
|
|
|
|
return np;
|
|
|
|
return NULL;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
2007-09-08 00:25:30 +02:00
|
|
|
StringSetNode<T> *StringSet<T>::insert ( std::basic_string<T> &s )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
return insert ( ( T * ) s.data(), s.size() );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
2007-09-08 00:25:30 +02:00
|
|
|
StringSetNode<T> *StringSet<T>::insert ( T *s, size_t len, uint32_t hashValue )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
if ( hashValue == UINT_MAX )
|
|
|
|
hashValue = hash ( ( const char* ) s, len * sizeof ( T ) );
|
|
|
|
StringSetNode<T> *np;
|
|
|
|
if ( ( np = find ( s, len, hashValue ) ) == NULL )
|
|
|
|
{
|
|
|
|
np = new StringSetNode<T> ( s, len );
|
|
|
|
np->next = table[hashValue];
|
|
|
|
table[hashValue] = np;
|
|
|
|
++nodeCount;
|
|
|
|
}
|
|
|
|
return np;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
|
|
|
void StringSet<T>::clear()
|
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
if ( !nodeCount )
|
|
|
|
return;
|
|
|
|
StringSetNode<T> *np, *memory;
|
|
|
|
for ( uint32_t i = 0; i < hashSize; ++i )
|
|
|
|
{
|
|
|
|
for ( np = table[i]; np != NULL; np = memory )
|
|
|
|
{
|
|
|
|
memory = np->next;
|
|
|
|
delete np;
|
|
|
|
}
|
|
|
|
table[i] = NULL;
|
|
|
|
}
|
|
|
|
nodeCount = 0;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
|
|
|
int StringSet<T>::count()
|
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
return nodeCount;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
|
|
|
bool StringSet<T>::empty()
|
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
return ( nodeCount ) ? false : true;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
template<class T>
|
2007-09-08 00:25:30 +02:00
|
|
|
void StringSet<T>::allocateHashTable ( uint32_t hashSize )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
table = new StringSetNode<T> *[hashSize];
|
|
|
|
memset ( table, 0, sizeof ( StringSetNode<T> * ) * hashSize );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif
|