2008-01-06 23:13:01 +01:00
|
|
|
/*
|
2007-09-12 01:14:06 +02:00
|
|
|
* Copyright 2005-2007 Gerald Schmidt.
|
2008-01-06 23:13:01 +01:00
|
|
|
*
|
2007-09-12 01:14:06 +02:00
|
|
|
* This file is part of Xml Copy Editor.
|
2008-01-06 23:13:01 +01:00
|
|
|
*
|
2007-09-12 01:14:06 +02:00
|
|
|
* Xml Copy Editor is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; version 2 of the License.
|
2008-01-06 23:13:01 +01:00
|
|
|
*
|
2007-09-12 01:14:06 +02:00
|
|
|
* Xml Copy Editor is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
2008-01-06 23:13:01 +01:00
|
|
|
*
|
2007-09-12 01:14:06 +02:00
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with Xml Copy Editor; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
2007-09-07 23:17:30 +02:00
|
|
|
#include "housestyle.h"
|
|
|
|
#include "readfile.h"
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
HouseStyle::HouseStyle (
|
2008-01-22 00:05:47 +01:00
|
|
|
int typeParameter,
|
2007-09-08 00:25:30 +02:00
|
|
|
const std::string& bufferParameter,
|
|
|
|
const std::string& ruleDirectoryParameter,
|
|
|
|
const std::string& ruleFileParameter,
|
|
|
|
const std::string& filterDirectoryParameter,
|
|
|
|
const std::string& filterFileParameter,
|
|
|
|
const std::string& pathSeparatorParameter,
|
|
|
|
int contextRangeParameter ) :
|
2008-01-22 00:05:47 +01:00
|
|
|
type ( typeParameter ),
|
2008-01-06 23:13:01 +01:00
|
|
|
buffer ( bufferParameter ),
|
|
|
|
ruleDirectory ( ruleDirectoryParameter ),
|
|
|
|
ruleFile ( ruleFileParameter ),
|
|
|
|
filterDirectory ( filterDirectoryParameter ),
|
|
|
|
filterFile ( filterFileParameter ),
|
|
|
|
pathSeparator ( pathSeparatorParameter ),
|
|
|
|
contextRange ( contextRangeParameter ),
|
|
|
|
ruleVector ( new std::vector<boost::shared_ptr<Rule> > ),
|
|
|
|
dictionary ( new StringSet<char> ),
|
|
|
|
passiveDictionary ( new StringSet<char> )
|
2007-09-08 00:25:30 +02:00
|
|
|
{}
|
2007-09-07 23:17:30 +02:00
|
|
|
|
|
|
|
HouseStyle::~HouseStyle()
|
2007-09-08 00:25:30 +02:00
|
|
|
{}
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
void HouseStyle::collectFilter (
|
|
|
|
std::string& fileName,
|
|
|
|
std::set<std::string>& excludeSet,
|
2008-01-06 23:13:01 +01:00
|
|
|
int *filterCount )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-22 00:05:47 +01:00
|
|
|
if ( type == HS_TYPE_SPELL || fileName == "(No filter)" )
|
2008-01-06 23:13:01 +01:00
|
|
|
return;
|
|
|
|
|
|
|
|
string filePath, buffer;
|
|
|
|
filePath = filterDirectory + pathSeparator + fileName;
|
|
|
|
|
|
|
|
if ( !ReadFile::run ( filePath, buffer ) )
|
|
|
|
return;
|
|
|
|
|
|
|
|
XmlFilterReader xfr;
|
|
|
|
if ( !xfr.parse ( buffer ) )
|
|
|
|
{
|
|
|
|
std::string report = xfr.getLastError();
|
|
|
|
throw runtime_error ( report.c_str() );
|
|
|
|
}
|
|
|
|
|
|
|
|
std::map<std::string, std::map<std::string, std::set<std::string> > >
|
|
|
|
temporaryMap;
|
|
|
|
std::map<std::string, std::map<std::string, std::set<std::string> > >::iterator
|
|
|
|
temporaryMapIterator;
|
|
|
|
xfr.getFilterMap ( temporaryMap );
|
|
|
|
|
|
|
|
for ( temporaryMapIterator = temporaryMap.begin();
|
|
|
|
temporaryMapIterator != temporaryMap.end();
|
|
|
|
++temporaryMapIterator )
|
|
|
|
{
|
|
|
|
filterMap.insert ( *temporaryMapIterator );
|
|
|
|
( *filterCount ) ++;
|
|
|
|
}
|
|
|
|
|
|
|
|
// add current file to exclude set
|
|
|
|
excludeSet.insert ( fileName );
|
|
|
|
|
|
|
|
// fetch exclude vector
|
|
|
|
std::vector<std::string> localExcludeVector;
|
|
|
|
std::vector<std::string>::iterator excludeIterator;
|
|
|
|
xfr.getExcludeVector ( localExcludeVector );
|
|
|
|
for ( excludeIterator = localExcludeVector.begin();
|
|
|
|
excludeIterator != localExcludeVector.end();
|
|
|
|
excludeIterator++ )
|
|
|
|
excludeSet.insert ( *excludeIterator );
|
|
|
|
|
|
|
|
// fetch include vector
|
|
|
|
std::vector<std::string> includeVector;
|
|
|
|
std::vector<std::string>::iterator includeIterator;
|
|
|
|
xfr.getIncludeVector ( includeVector );
|
|
|
|
|
|
|
|
if ( includeVector.empty() )
|
|
|
|
return;
|
|
|
|
|
|
|
|
for ( includeIterator = includeVector.begin();
|
|
|
|
includeIterator != includeVector.end();
|
|
|
|
includeIterator++ )
|
|
|
|
{
|
|
|
|
if ( !excludeSet.count ( *includeIterator ) )
|
|
|
|
collectFilter ( *includeIterator, excludeSet, filterCount );
|
|
|
|
}
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
2007-09-08 00:25:30 +02:00
|
|
|
|
|
|
|
void HouseStyle::collectRules ( string& fileName,
|
|
|
|
boost::shared_ptr<std::vector<boost::shared_ptr<Rule> > > ruleVector,
|
|
|
|
std::set<string>& excludeSet,
|
2008-01-06 23:13:01 +01:00
|
|
|
int *ruleCount )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2008-01-22 00:05:47 +01:00
|
|
|
if (type == HS_TYPE_SPELL)
|
|
|
|
return;
|
|
|
|
|
2008-01-06 23:13:01 +01:00
|
|
|
std::string filePath, buffer;
|
|
|
|
filePath = ruleDirectory + pathSeparator + fileName;
|
|
|
|
if ( !ReadFile::run ( filePath, buffer ) )
|
|
|
|
return;
|
|
|
|
|
|
|
|
std::auto_ptr<XmlRuleReader> xrr ( new XmlRuleReader (
|
|
|
|
dictionary,
|
|
|
|
passiveDictionary,
|
|
|
|
ruleVector ) );
|
|
|
|
if ( !xrr->parse ( buffer ) )
|
|
|
|
{
|
|
|
|
std::string report = xrr->getIncorrectPatternReport();
|
|
|
|
if ( report != "" )
|
|
|
|
throw runtime_error ( report.c_str() );
|
|
|
|
else
|
|
|
|
throw runtime_error ( xrr->getLastError().c_str() );
|
|
|
|
}
|
|
|
|
|
|
|
|
// add current file to exclude set
|
|
|
|
excludeSet.insert ( fileName );
|
|
|
|
|
|
|
|
// fetch exclude vector
|
|
|
|
std::vector<std::string> localExcludeVector;
|
|
|
|
std::vector<std::string>::iterator excludeIterator;
|
|
|
|
xrr->getExcludeVector ( localExcludeVector );
|
|
|
|
for ( excludeIterator = localExcludeVector.begin();
|
|
|
|
excludeIterator != localExcludeVector.end();
|
|
|
|
excludeIterator++ )
|
|
|
|
excludeSet.insert ( *excludeIterator );
|
|
|
|
|
|
|
|
* ( ruleCount ) += xrr->getRuleCount();
|
|
|
|
|
|
|
|
// fetch include vector
|
|
|
|
std::vector<std::string> includeVector;
|
|
|
|
xrr->getIncludeVector ( includeVector );
|
|
|
|
std::vector<std::string>::iterator includeIterator;
|
|
|
|
for ( includeIterator = includeVector.begin();
|
|
|
|
includeIterator != includeVector.end();
|
|
|
|
includeIterator++ )
|
|
|
|
{
|
|
|
|
if ( !excludeSet.count ( *includeIterator ) )
|
|
|
|
collectRules ( *includeIterator, ruleVector, excludeSet, ruleCount );
|
|
|
|
}
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
2007-09-08 00:25:30 +02:00
|
|
|
|
2007-09-07 23:17:30 +02:00
|
|
|
bool HouseStyle::createReport()
|
|
|
|
{
|
2008-01-22 00:05:47 +01:00
|
|
|
if ( type == HS_TYPE_STYLE && !updateRules() )
|
2008-01-06 23:13:01 +01:00
|
|
|
{
|
|
|
|
error = "no rules found";
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
updateFilter();
|
|
|
|
|
|
|
|
auto_ptr<HouseStyleReader> xtr ( new HouseStyleReader ( filterMap ) );
|
|
|
|
if ( !xtr->parse ( buffer ) )
|
|
|
|
{
|
|
|
|
error = "file is not well-formed";
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
std::vector<std::pair<std::string, unsigned> > nodeVector;
|
|
|
|
xtr->getNodeVector ( nodeVector );
|
2008-01-22 00:05:47 +01:00
|
|
|
|
2008-01-06 23:13:01 +01:00
|
|
|
int ruleVectorsize, nodeVectorSize;
|
|
|
|
|
|
|
|
std::vector<ContextMatch> contextVector;
|
|
|
|
std::vector<ContextMatch>::iterator matchIterator;
|
|
|
|
ruleVectorsize = ruleVector->size();
|
|
|
|
|
|
|
|
nodeVectorSize = nodeVector.size();
|
|
|
|
|
2008-01-22 00:05:47 +01:00
|
|
|
WrapAspell *spellcheck = NULL;
|
|
|
|
try {
|
|
|
|
if (type == HS_TYPE_SPELL)
|
|
|
|
spellcheck = new WrapAspell( ruleFile );
|
|
|
|
}
|
|
|
|
catch (...)
|
|
|
|
{
|
|
|
|
error = "Cannot initialise spellcheck";
|
|
|
|
return false;
|
|
|
|
}
|
2008-01-06 23:13:01 +01:00
|
|
|
|
|
|
|
std::string nodeBuffer;
|
|
|
|
unsigned elementCount;
|
|
|
|
for ( int j = 0; j < nodeVectorSize; ++j )
|
|
|
|
{
|
|
|
|
nodeBuffer = nodeVector.at ( j ).first;
|
|
|
|
elementCount = nodeVector.at ( j ).second;
|
|
|
|
|
|
|
|
if ( !nodeBuffer.size() )
|
|
|
|
continue;
|
|
|
|
|
2008-01-22 00:05:47 +01:00
|
|
|
// try spelling first
|
|
|
|
if ( type == HS_TYPE_SPELL && spellcheck )
|
|
|
|
{
|
|
|
|
spellcheck->checkString (
|
|
|
|
nodeBuffer,
|
|
|
|
contextVector,
|
|
|
|
contextRange );
|
|
|
|
|
|
|
|
for ( matchIterator = contextVector.begin();
|
|
|
|
matchIterator != contextVector.end();
|
|
|
|
matchIterator++ )
|
|
|
|
{
|
|
|
|
matchIterator->report = "Not in dictionary";
|
|
|
|
matchIterator->elementCount = elementCount;
|
|
|
|
matchVector.push_back ( *matchIterator );
|
|
|
|
}
|
|
|
|
contextVector.clear();
|
|
|
|
continue; // bail out before we reach style loop
|
|
|
|
}
|
|
|
|
|
|
|
|
// otherwise, proceed with style check
|
2008-01-06 23:13:01 +01:00
|
|
|
for ( int i = 0; i < ruleVectorsize; i++ )
|
|
|
|
{
|
2008-01-22 00:05:47 +01:00
|
|
|
if ( type == HS_TYPE_STYLE )
|
2008-01-06 23:13:01 +01:00
|
|
|
{
|
2008-01-22 00:05:47 +01:00
|
|
|
boost::shared_ptr<Rule> rule ( ruleVector->at ( i ) );
|
|
|
|
if ( rule->matchPatternGlobal (
|
|
|
|
nodeBuffer,
|
|
|
|
contextVector,
|
|
|
|
elementCount,
|
|
|
|
contextRange ) )
|
|
|
|
{
|
|
|
|
std::string report = rule->getReport();
|
|
|
|
|
|
|
|
for ( matchIterator = contextVector.begin();
|
|
|
|
matchIterator != contextVector.end();
|
|
|
|
matchIterator++ )
|
|
|
|
{
|
|
|
|
if ( rule->getAdjustCaseAttribute() )
|
|
|
|
CaseHandler::adjustCase (
|
|
|
|
matchIterator->replace,
|
|
|
|
matchIterator->match );
|
|
|
|
|
|
|
|
// tentative?
|
|
|
|
matchIterator->tentative =
|
|
|
|
( rule->getTentativeAttribute() ) ? true : false;
|
|
|
|
|
|
|
|
matchIterator->report = report;
|
|
|
|
|
|
|
|
matchVector.push_back ( *matchIterator );
|
|
|
|
}
|
|
|
|
contextVector.clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
// check spelling
|
|
|
|
else // if ( !dictionary->empty() )
|
|
|
|
{
|
|
|
|
spellcheck->checkString (
|
|
|
|
nodeBuffer,
|
|
|
|
contextVector,
|
|
|
|
contextRange );
|
2008-01-06 23:13:01 +01:00
|
|
|
|
|
|
|
for ( matchIterator = contextVector.begin();
|
2008-01-22 00:05:47 +01:00
|
|
|
matchIterator != contextVector.end();
|
|
|
|
matchIterator++ )
|
2008-01-06 23:13:01 +01:00
|
|
|
{
|
2008-01-22 00:05:47 +01:00
|
|
|
matchIterator->report = "Not in dictionary";
|
|
|
|
matchIterator->elementCount = elementCount;
|
2008-01-06 23:13:01 +01:00
|
|
|
|
|
|
|
matchVector.push_back ( *matchIterator );
|
|
|
|
}
|
|
|
|
contextVector.clear();
|
|
|
|
}
|
2008-01-22 00:05:47 +01:00
|
|
|
*/
|
2008-01-06 23:13:01 +01:00
|
|
|
}
|
|
|
|
}
|
2008-01-22 00:05:47 +01:00
|
|
|
delete spellcheck; // ok if NULL
|
2008-01-06 23:13:01 +01:00
|
|
|
return true;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
std::string HouseStyle::getLastError()
|
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
return error;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<ContextMatch> HouseStyle::getMatchVector()
|
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
return matchVector;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int HouseStyle::updateRules()
|
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
ruleVector->clear();
|
|
|
|
dictionary->clear();
|
|
|
|
passiveDictionary->clear();
|
|
|
|
|
|
|
|
int ruleCount = 0;
|
|
|
|
set<string> excludeSet;
|
|
|
|
collectRules ( ruleFile, ruleVector, excludeSet, &ruleCount );
|
|
|
|
return ruleCount;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
int HouseStyle::updateFilter()
|
|
|
|
{
|
2008-01-06 23:13:01 +01:00
|
|
|
filterMap.clear();
|
|
|
|
int filterCount = 0;
|
|
|
|
set<string> excludeSet;
|
|
|
|
collectFilter ( filterFile, excludeSet, &filterCount );
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2008-01-06 23:13:01 +01:00
|
|
|
return filterCount;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|