Removed previous spellcheck implementation

2008-01-21 23:07:21 +00:00 · 2008-01-21 23:07:21 +00:00 · 640bf19139
parent 9ef9fd5dea
commit 640bf19139
2 changed files with 0 additions and 334 deletions
--- a/src/spellcheck.cpp
+++ b/src/spellcheck.cpp
@ -1,283 +0,0 @@
 /*
 * Copyright 2005-2007 Gerald Schmidt.
 *
 * This file is part of Xml Copy Editor.
 *
 * Xml Copy Editor is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * Xml Copy Editor is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Xml Copy Editor; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 #include "spellcheck.h"
 #include "casehandler.h"
 #include "contexthandler.h"
 #include "stringset.h"
 Spellcheck::Spellcheck (
    boost::shared_ptr<StringSet<char> > dictionaryParameter,
    boost::shared_ptr<StringSet<char> > passiveDictionaryParameter ) :
 		dictionary ( dictionaryParameter ),
 		passiveDictionary ( passiveDictionaryParameter )
 {}
 Spellcheck::~Spellcheck()
 { }
 bool Spellcheck::checkWord ( string &s )
 {
 	return checkWord ( ( char * ) s.c_str(), s.size() );
 }
 string Spellcheck::getSuggestion (
    string &s )
 {
 	string suggestion;
 	size_t len = s.size();
 	// transpose
 	if ( len > 1 )
 	{
 		for ( size_t ui = 0; ui < len - 1; ++ui )
 		{
 			if ( ( unsigned char ) s[ui] > 127 )
 				continue;
 			suggestion = s;
 			char c = suggestion[ui];
 			suggestion[ui] = suggestion[ui + 1];
 			suggestion[ui + 1] = c;
 			if (
 			    checkWord ( suggestion ) &&
 			    passiveDictionary->find ( suggestion ) == NULL )
 			{
 				CaseHandler::adjustCase ( suggestion, s );
 				return suggestion;
 			}
 		}
 	}
 	// change one letter
 	for ( size_t ui = 1; ui < len; ++ui )
 	{
 		if ( ( unsigned char ) s[ui] > 127 )
 			continue;
 		for ( char c = 'a'; c <= 'z'; ++c )
 		{
 			suggestion = s;
 			suggestion[ui] = c;
 			if (
 			    checkWord ( suggestion ) &&
 			    passiveDictionary->find ( suggestion ) == NULL )
 			{
 				CaseHandler::adjustCase ( suggestion, s );
 				return suggestion;
 			}
 		}
 	}
 	// two words
 	if ( len > 2 )
 	{
 		for ( size_t ui = 1; ui < len; ++ui )
 		{
 			char *ptr = ( char * ) s.c_str();
 			if (
 			    checkWord ( ptr, ui ) &&
 			    checkWord ( ptr + ui, len - ui )
 			)
 			{
 				suggestion = s;
 				suggestion.insert ( ui, 1, ' ' );
 				return suggestion;
 			}
 		}
 	}
 	// add one letter
 	for ( size_t ui = 1; ui < len; ++ui )
 	{
 		if ( ( unsigned char ) s[ui] > 127 )
 			continue;
 		for ( char c = 'a'; c <= 'z'; ++c )
 		{
 			suggestion = s;
 			suggestion.insert ( ui, 1, c );
 			if (
 			    checkWord ( suggestion ) &&
 			    passiveDictionary->find ( suggestion ) == NULL )
 			{
 				CaseHandler::adjustCase ( suggestion, s );
 				return suggestion;
 			}
 		}
 	}
 	// omit one letter
 	if ( len > 2 )
 	{
 		for ( size_t ui = 0; ui < len; ++ui )
 		{
 			// ignore UTF-8 multibyte sequences
 			if ( ( unsigned char ) s[ui] > 127 )
 				continue;
 			suggestion = s;
 			suggestion.erase ( ui, 1 );
 			if (
 			    checkWord ( suggestion ) &&
 			    passiveDictionary->find ( suggestion ) == NULL )
 			{
 				CaseHandler::adjustCase ( suggestion, s );
 				return suggestion;
 			}
 		}
 	}
 	return "---";
 }
 void Spellcheck::checkString (
    string &s,
    vector<ContextMatch> &v,
    int contextRange )
 {
 	string suggestion;
 	size_t len;
 	char *origin, *iterator, *ptr;
 	origin = iterator = ( char * ) s.c_str();
 	while ( ( ptr = getWord ( &iterator, &len ) ) != NULL )
 		if ( !checkWord ( ptr, len ) )
 		{
 			ContextMatch m = ContextHandler::getContext (
 			                     ptr,
 			                     len,
 			                     origin,
 			                     contextRange );
 			// handle suggestion
 			suggestion = getSuggestion ( m.match );
 			m.replace.append ( suggestion );
 			m.elementCount = 0;
 			m.offset = ptr - origin;
 			v.push_back ( m );
 		}
 }
 bool Spellcheck::checkWord ( char *s, size_t len )
 {
 	// pass empty strings and single-character words
 	if ( len < 2 )
 		return true;
 	string buffer;
 	buffer.append ( s, len );
 	if ( dictionary->find ( buffer ) != NULL )
 		return true;
 	// lower-case with capital initial
 	string::iterator it = buffer.begin();
 	++it;
 	for ( ; it != buffer.end(); it++ )
 		*it = tolower ( *it );
 	if ( dictionary->find ( buffer ) != NULL )
 		return true;
 	// lower-case throughout
 	it = buffer.begin();
 	*it = tolower ( *it );
 	if ( dictionary->find ( buffer ) != NULL )
 		return true;
 	return false;
 }
 bool Spellcheck::isWordCharacter ( char *s, size_t *bytes )
 {
 	*bytes = 0;
 	unsigned char *us = ( unsigned char * ) s;
 	if (
 	    *us < 65 ||
 	    ( *us > 90 && *us < 97 ) ||
 	    ( *us > 123 && *us < 128 )
 	)
 	{
 		*bytes = 1;
 		return false;
 	}
 	// check for UTF-8 byte sequences
 	else if ( *us > 127 )
 	{
 		// determine length
 		unsigned char *it = us;
 		for ( ; *it > 127; ++it )
 			++ ( *bytes );
 		// Unicode punctuation marks
 		// Based on http://www1.tip.nl/~t876506/utf8tbl.html
 		return (
 		           *us == 226 && * ( us + 1 ) == 128 ||
 		           *us == 194 ||
 		           *us == 203
 		       ) ? false : true;
 	}
 	else
 	{
 		*bytes = 1;
 		return true;
 	}
 }
 char *Spellcheck::getWord ( char **s, size_t *len )
 {
 	size_t bytes;
 	char *t, *u;
 	int offset;
 	t = *s;
 	while ( *t )
 	{
 		if ( isWordCharacter ( t, &bytes ) )
 		{
 			for ( u = t; *u; )
 			{
 				if ( !isWordCharacter ( u, &bytes ) )
 				{
 					*len = u - t;
 					offset = t - *s;
 					*s += *len + offset + bytes;
 					return t;
 				}
 				else
 					u += bytes;
 			}
 			if ( *t && ! ( *u ) )
 			{
 				*len = u - t;
 				offset = t - *s;
 				*s = u;
 				return t;
 			}
 			t = u;
 		}
 		else
 			t += bytes;
 	}
 	return NULL;
 }
--- a/src/spellcheck.h
+++ b/src/spellcheck.h
@ -1,51 +0,0 @@
 /*
 * Copyright 2005-2007 Gerald Schmidt.
 *
 * This file is part of Xml Copy Editor.
 *
 * Xml Copy Editor is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; version 2 of the License.
 *
 * Xml Copy Editor is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Xml Copy Editor; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 #ifndef SPELLCHECK_H
 #define SPELLCHECK_H
 #include <set>
 #include <string>
 #include <vector>
 #include <boost/shared_ptr.hpp>
 #include "contexthandler.h"
 #include "stringset.h"
 using namespace std;
 class Spellcheck
 {
 	public:
 		Spellcheck (
 		    boost::shared_ptr<StringSet<char> > dictionaryParameter,
 		    boost::shared_ptr<StringSet<char> > passiveDictionaryParameter );
 		~Spellcheck();
 		inline bool checkWord ( string &s );
 		void checkString (
 		    string &s,
 		    vector<ContextMatch> &v,
 		    int contextRange );
 		string getSuggestion ( string &s );
 	private:
 		boost::shared_ptr<StringSet<char> > dictionary, passiveDictionary;
 		bool checkWord ( char *s, size_t len );
 		char *getWord ( char **s, size_t *len );
 		inline bool isWordCharacter ( char *s, size_t *bytes );
 };
 #endif