xml-copy-editor-code/src/stringset.h

198 lines
4.4 KiB
C
Raw Normal View History

#ifndef STRINGSET_H
#define STRINGSET_H
#include <string>
#include <stdexcept>
#include <cmath>
#include <climits>
#ifdef __WXMSW__
#include <mem.h>
#endif
template<class T>
class StringSetNode
{
public:
StringSetNode(T *keyParameter, size_t lenParameter)
{
next = NULL;
len = lenParameter;
key = new T[len];
memcpy(key, keyParameter, len * sizeof(T));
}
~StringSetNode()
{
delete[] key;
}
StringSetNode<T> *next;
T *key;
size_t len;
};
template<class T>
class StringSet
{
public:
StringSet(
int hashSizePower = 19
);
~StringSet();
StringSet(const StringSet<T>&);
StringSet& operator=(const StringSet<T>&);
inline bool empty();
inline int count();
StringSetNode<T> *insert(std::basic_string<T> &s);
StringSetNode<T> *insert(T *s, size_t len, uint32_t hash = UINT_MAX);
StringSetNode<T> *find(std::basic_string<T> &s);
StringSetNode<T> *find(T *s, size_t len, uint32_t hash = UINT_MAX);
void clear();
private:
bool emptyFlag;
uint32_t hashSize, hashMask, nodeCount;
StringSetNode<T> **table;
uint32_t hash(const char *s, size_t len);
void allocateHashTable(uint32_t hashSize);
};
template<class T>
StringSet<T>::StringSet(int hashSizePower)
{
if (hashSizePower < 1)
throw std::runtime_error("StringSet: invalid parameter");
hashSize = (int)pow((double)2, (double)hashSizePower);
hashMask = hashSize - 1;
nodeCount = 0;
allocateHashTable(hashSize);
}
template<class T>
StringSet<T>::~StringSet()
{
if (!empty())
clear();
delete[] table;
}
template<class T>
StringSet<T>::StringSet(const StringSet<T>& d)
{
hashSize = d.hashSize;
hashMask = hashSize - 1;
nodeCount = 0;
allocateHashTable(hashSize);
StringSetNode<T> *np;
for (uint32_t i = 0; i < d.hashSize; ++i)
for (np = d.table[i]; np != NULL; np = np->next)
insert(np->key, np->len, i);
}
template<class T>
StringSet<T>& StringSet<T>::operator=(const StringSet<T>& d)
{
if (this != &d)
{
clear();
StringSetNode<T> *np;
for (uint32_t i = 0; i < d.hashSize; ++i)
for (np = d.table[i]; np != NULL; np = np->next)
insert(np->key, np->len, i);
}
return *this;
}
// see 'one-at-a-time hash' (http://burtleburtle.net/bob/hash/doobs.html)
template<class T>
uint32_t StringSet<T>::hash(const char *key, size_t len)
{
uint32_t hash;
for (hash = 0; len--; ++key)
{
hash += *key;
hash += (hash << 10);
hash ^= (hash >> 6);
}
hash += (hash << 3);
hash ^= (hash >> 11);
hash += (hash << 15);
return (hash & hashMask);
}
template<class T>
StringSetNode<T> *StringSet<T>::find(std::basic_string<T> &s)
{
return find((T *)s.data(), s.size());
}
template<class T>
StringSetNode<T> *StringSet<T>::find(T *s, size_t len, uint32_t hashValue)
{
if (hashValue == UINT_MAX)
hashValue = hash((const char*)s, len * sizeof(T));
StringSetNode<T> *np;
for (np = table[hashValue]; np != NULL; np = np->next)
if (len == np->len && (memcmp(s, np->key, len) == 0))
return np;
return NULL;
}
template<class T>
StringSetNode<T> *StringSet<T>::insert(std::basic_string<T> &s)
{
return insert((T *)s.data(), s.size());
}
template<class T>
StringSetNode<T> *StringSet<T>::insert(T *s, size_t len, uint32_t hashValue)
{
if (hashValue == UINT_MAX)
hashValue = hash((const char*)s, len * sizeof(T));
StringSetNode<T> *np;
if ((np = find(s, len, hashValue)) == NULL)
{
np = new StringSetNode<T>(s, len);
np->next = table[hashValue];
table[hashValue] = np;
++nodeCount;
}
return np;
}
template<class T>
void StringSet<T>::clear()
{
if (!nodeCount)
return;
StringSetNode<T> *np, *memory;
for (uint32_t i = 0; i < hashSize; ++i)
{
for (np = table[i]; np != NULL; np = memory)
{
memory = np->next;
delete np;
}
table[i] = NULL;
}
nodeCount = 0;
}
template<class T>
int StringSet<T>::count()
{
return nodeCount;
}
template<class T>
bool StringSet<T>::empty()
{
return (nodeCount) ? false : true;
}
template<class T>
void StringSet<T>::allocateHashTable(uint32_t hashSize)
{
table = new StringSetNode<T> *[hashSize];
memset(table, 0, sizeof(StringSetNode<T> *) * hashSize);
}
#endif