xml-copy-editor-code/src/housestyle.cpp

347 lines
9.0 KiB
C++
Raw Normal View History

2008-01-06 23:13:01 +01:00
/*
* Copyright 2005-2007 Gerald Schmidt.
2008-01-06 23:13:01 +01:00
*
* This file is part of Xml Copy Editor.
2008-01-06 23:13:01 +01:00
*
* Xml Copy Editor is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
2008-01-06 23:13:01 +01:00
*
* Xml Copy Editor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
2008-01-06 23:13:01 +01:00
*
* You should have received a copy of the GNU General Public License
* along with Xml Copy Editor; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <cstring>
2007-09-07 23:17:30 +02:00
#include "housestyle.h"
#include "readfile.h"
2007-09-08 00:25:30 +02:00
HouseStyle::HouseStyle (
int typeParameter,
2007-09-08 00:25:30 +02:00
const std::string& bufferParameter,
const std::string& ruleDirectoryParameter,
const std::string& ruleFileParameter,
const std::string& filterDirectoryParameter,
const std::string& filterFileParameter,
const std::string& pathSeparatorParameter,
2009-01-17 23:52:54 +01:00
#ifdef __WXMSW__
const std::string& aspellDataPathParameter,
const std::string& aspellDictPathParameter,
#endif
2007-09-08 00:25:30 +02:00
int contextRangeParameter ) :
type ( typeParameter ),
2008-01-06 23:13:01 +01:00
buffer ( bufferParameter ),
ruleDirectory ( ruleDirectoryParameter ),
ruleFile ( ruleFileParameter ),
filterDirectory ( filterDirectoryParameter ),
filterFile ( filterFileParameter ),
pathSeparator ( pathSeparatorParameter ),
2009-01-17 23:52:54 +01:00
#ifdef __WXMSW__
aspellDataPath ( aspellDataPathParameter ),
aspellDictPath ( aspellDictPathParameter ),
#endif
2008-01-06 23:13:01 +01:00
contextRange ( contextRangeParameter ),
ruleVector ( new std::vector<boost::shared_ptr<Rule> > ),
dictionary ( new StringSet<char> ),
passiveDictionary ( new StringSet<char> )
2009-01-17 23:52:54 +01:00
{
}
2007-09-07 23:17:30 +02:00
HouseStyle::~HouseStyle()
2007-09-08 00:25:30 +02:00
{}
2007-09-07 23:17:30 +02:00
2007-09-08 00:25:30 +02:00
void HouseStyle::collectFilter (
std::string& fileName,
std::set<std::string>& excludeSet,
2008-01-06 23:13:01 +01:00
int *filterCount )
2007-09-07 23:17:30 +02:00
{
// from v. 1.1.0.7: always ignore
//if ( type == HS_TYPE_SPELL || fileName == "(No filter)" )
2008-01-06 23:13:01 +01:00
return;
/*
2008-01-06 23:13:01 +01:00
string filePath, buffer;
filePath = filterDirectory + pathSeparator + fileName;
if ( !ReadFile::run ( filePath, buffer ) )
return;
XmlFilterReader xfr;
if ( !xfr.parse ( buffer ) )
{
std::string report = xfr.getLastError();
throw runtime_error ( report.c_str() );
}
std::map<std::string, std::map<std::string, std::set<std::string> > >
temporaryMap;
std::map<std::string, std::map<std::string, std::set<std::string> > >::iterator
temporaryMapIterator;
xfr.getFilterMap ( temporaryMap );
for ( temporaryMapIterator = temporaryMap.begin();
temporaryMapIterator != temporaryMap.end();
++temporaryMapIterator )
{
filterMap.insert ( *temporaryMapIterator );
( *filterCount ) ++;
}
// add current file to exclude set
excludeSet.insert ( fileName );
// fetch exclude vector
std::vector<std::string> localExcludeVector;
std::vector<std::string>::iterator excludeIterator;
xfr.getExcludeVector ( localExcludeVector );
for ( excludeIterator = localExcludeVector.begin();
excludeIterator != localExcludeVector.end();
excludeIterator++ )
excludeSet.insert ( *excludeIterator );
// fetch include vector
std::vector<std::string> includeVector;
std::vector<std::string>::iterator includeIterator;
xfr.getIncludeVector ( includeVector );
if ( includeVector.empty() )
return;
for ( includeIterator = includeVector.begin();
includeIterator != includeVector.end();
includeIterator++ )
{
if ( !excludeSet.count ( *includeIterator ) )
collectFilter ( *includeIterator, excludeSet, filterCount );
}
*/
2007-09-07 23:17:30 +02:00
}
2007-09-08 00:25:30 +02:00
void HouseStyle::collectRules ( string& fileName,
boost::shared_ptr<std::vector<boost::shared_ptr<Rule> > > ruleVector,
std::set<string>& excludeSet,
2008-01-06 23:13:01 +01:00
int *ruleCount )
2007-09-07 23:17:30 +02:00
{
if (type == HS_TYPE_SPELL)
return;
2008-01-06 23:13:01 +01:00
std::string filePath, buffer;
filePath = ruleDirectory + pathSeparator + fileName;
if ( !ReadFile::run ( filePath, buffer ) )
return;
std::auto_ptr<XmlRuleReader> xrr ( new XmlRuleReader (
dictionary,
passiveDictionary,
ruleVector ) );
if ( !xrr->parse ( buffer ) )
{
std::string report = xrr->getIncorrectPatternReport();
if ( report != "" )
throw runtime_error ( report.c_str() );
else
throw runtime_error ( xrr->getLastError().c_str() );
}
// add current file to exclude set
excludeSet.insert ( fileName );
// fetch exclude vector
std::vector<std::string> localExcludeVector;
std::vector<std::string>::iterator excludeIterator;
xrr->getExcludeVector ( localExcludeVector );
for ( excludeIterator = localExcludeVector.begin();
excludeIterator != localExcludeVector.end();
excludeIterator++ )
excludeSet.insert ( *excludeIterator );
* ( ruleCount ) += xrr->getRuleCount();
// fetch include vector
std::vector<std::string> includeVector;
xrr->getIncludeVector ( includeVector );
std::vector<std::string>::iterator includeIterator;
for ( includeIterator = includeVector.begin();
includeIterator != includeVector.end();
includeIterator++ )
{
if ( !excludeSet.count ( *includeIterator ) )
collectRules ( *includeIterator, ruleVector, excludeSet, ruleCount );
}
2007-09-07 23:17:30 +02:00
}
2007-09-08 00:25:30 +02:00
2007-09-07 23:17:30 +02:00
bool HouseStyle::createReport()
{
if ( type == HS_TYPE_STYLE && !updateRules() )
2008-01-06 23:13:01 +01:00
{
error = "no rules found";
return false;
}
/*
2008-01-06 23:13:01 +01:00
updateFilter();
auto_ptr<HouseStyleReader> xtr ( new HouseStyleReader ( filterMap ) );
if ( !xtr->parse ( buffer ) )
{
error = "file is not well-formed";
return false;
}
*/
2008-01-06 23:13:01 +01:00
std::vector<std::pair<std::string, unsigned> > nodeVector;
//xtr->getNodeVector ( nodeVector );
nodeVector.push_back( make_pair ( buffer, 0 ) ); // new from 1.1.0.7
2008-01-06 23:13:01 +01:00
int ruleVectorsize, nodeVectorSize;
std::vector<ContextMatch> contextVector;
std::vector<ContextMatch>::iterator matchIterator;
ruleVectorsize = ruleVector->size();
nodeVectorSize = nodeVector.size();
WrapAspell *spellcheck = NULL;
try {
if (type == HS_TYPE_SPELL)
2009-01-17 23:52:54 +01:00
spellcheck = new WrapAspell(
2009-01-18 00:46:44 +01:00
ruleFile // carries lang information
2009-01-17 23:52:54 +01:00
#ifdef __WXMSW__
2009-01-18 00:46:44 +01:00
, aspellDataPath,
2009-01-17 23:52:54 +01:00
aspellDictPath
#endif
);
}
catch (...)
{
error = "Cannot initialise spellcheck";
return false;
}
2008-01-06 23:13:01 +01:00
std::string nodeBuffer;
unsigned elementCount;
for ( int j = 0; j < nodeVectorSize; ++j )
{
nodeBuffer = nodeVector.at ( j ).first;
elementCount = nodeVector.at ( j ).second;
if ( !nodeBuffer.size() )
continue;
// try spelling first
if ( type == HS_TYPE_SPELL && spellcheck )
{
spellcheck->checkString (
nodeBuffer,
contextVector,
contextRange );
for ( matchIterator = contextVector.begin();
matchIterator != contextVector.end();
matchIterator++ )
{
matchIterator->report = "Not in dictionary";
matchIterator->elementCount = elementCount;
matchVector.push_back ( *matchIterator );
}
contextVector.clear();
continue; // bail out before we reach style loop
}
// otherwise, proceed with style check
2008-01-06 23:13:01 +01:00
for ( int i = 0; i < ruleVectorsize; i++ )
{
if ( type == HS_TYPE_STYLE )
2008-01-06 23:13:01 +01:00
{
boost::shared_ptr<Rule> rule ( ruleVector->at ( i ) );
if ( rule->matchPatternGlobal (
nodeBuffer,
contextVector,
elementCount,
contextRange ) )
{
std::string report = rule->getReport();
for ( matchIterator = contextVector.begin();
matchIterator != contextVector.end();
matchIterator++ )
{
if ( rule->getAdjustCaseAttribute() )
CaseHandler::adjustCase (
matchIterator->replace,
matchIterator->match );
// tentative?
matchIterator->tentative =
( rule->getTentativeAttribute() ) ? true : false;
matchIterator->report = report;
matchVector.push_back ( *matchIterator );
}
contextVector.clear();
}
}
/*
// check spelling
else // if ( !dictionary->empty() )
{
spellcheck->checkString (
nodeBuffer,
contextVector,
contextRange );
2008-01-06 23:13:01 +01:00
for ( matchIterator = contextVector.begin();
matchIterator != contextVector.end();
matchIterator++ )
2008-01-06 23:13:01 +01:00
{
matchIterator->report = "Not in dictionary";
matchIterator->elementCount = elementCount;
2008-01-06 23:13:01 +01:00
matchVector.push_back ( *matchIterator );
}
contextVector.clear();
}
*/
2008-01-06 23:13:01 +01:00
}
}
2009-01-17 23:52:54 +01:00
delete spellcheck;
2008-01-06 23:13:01 +01:00
return true;
2007-09-07 23:17:30 +02:00
}
std::string HouseStyle::getLastError()
{
2008-01-06 23:13:01 +01:00
return error;
2007-09-07 23:17:30 +02:00
}
std::vector<ContextMatch> HouseStyle::getMatchVector()
{
2008-01-06 23:13:01 +01:00
return matchVector;
2007-09-07 23:17:30 +02:00
}
int HouseStyle::updateRules()
{
2008-01-06 23:13:01 +01:00
ruleVector->clear();
dictionary->clear();
passiveDictionary->clear();
int ruleCount = 0;
set<string> excludeSet;
collectRules ( ruleFile, ruleVector, excludeSet, &ruleCount );
return ruleCount;
2007-09-07 23:17:30 +02:00
}
int HouseStyle::updateFilter()
{
2008-01-06 23:13:01 +01:00
filterMap.clear();
int filterCount = 0;
set<string> excludeSet;
collectFilter ( filterFile, excludeSet, &filterCount );
2007-09-07 23:17:30 +02:00
2008-01-06 23:13:01 +01:00
return filterCount;
2007-09-07 23:17:30 +02:00
}