2007-09-12 01:14:06 +02:00
|
|
|
/*
|
|
|
|
* Copyright 2005-2007 Gerald Schmidt.
|
|
|
|
*
|
|
|
|
* This file is part of Xml Copy Editor.
|
|
|
|
*
|
|
|
|
* Xml Copy Editor is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; version 2 of the License.
|
|
|
|
*
|
|
|
|
* Xml Copy Editor is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with Xml Copy Editor; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
2007-09-07 23:17:30 +02:00
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <stdexcept>
|
|
|
|
#include <expat.h>
|
|
|
|
#include "xmlrulereader.h"
|
|
|
|
#include "rule.h"
|
|
|
|
#include "stringset.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
RuleData::RuleData (
|
|
|
|
boost::shared_ptr<StringSet<char> > dictionaryParameter,
|
|
|
|
boost::shared_ptr<StringSet<char> > passiveDictionaryParameter,
|
|
|
|
boost::shared_ptr<vector<boost::shared_ptr<Rule> > > ruleVectorParameter ) :
|
|
|
|
dictionary ( dictionaryParameter ),
|
|
|
|
passiveDictionary ( passiveDictionaryParameter ),
|
|
|
|
ruleVector ( ruleVectorParameter )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
dictionaryFound = false;
|
|
|
|
initialiseAttributes();
|
|
|
|
ruleCount = 0;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
RuleData::~RuleData()
|
|
|
|
{ }
|
|
|
|
|
|
|
|
XmlRuleReader::XmlRuleReader (
|
|
|
|
boost::shared_ptr<StringSet<char> > dictionaryParameter,
|
|
|
|
boost::shared_ptr<StringSet<char> > passiveDictionaryParameter,
|
|
|
|
boost::shared_ptr<vector<boost::shared_ptr<Rule> > > ruleVectorParameter ) :
|
|
|
|
ud ( new RuleData (
|
|
|
|
dictionaryParameter,
|
|
|
|
passiveDictionaryParameter,
|
|
|
|
ruleVectorParameter ) )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
ud->setState ( STATE_UNKNOWN );
|
|
|
|
ud->p = p;
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
XML_SetUserData ( p, ud.get() );
|
|
|
|
XML_SetElementHandler ( p, start, end );
|
|
|
|
XML_SetCharacterDataHandler ( p, characterdata );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
XmlRuleReader::~XmlRuleReader()
|
2007-09-08 00:25:30 +02:00
|
|
|
{}
|
2007-09-07 23:17:30 +02:00
|
|
|
|
|
|
|
int XmlRuleReader::getRuleCount()
|
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
return ud->ruleCount;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
string XmlRuleReader::getIncorrectPatternReport()
|
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
return ud->incorrectPatternReport;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
void XmlRuleReader::getExcludeVector ( vector<string> &v )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
v = ud->excludeVector;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
void XmlRuleReader::getIncludeVector ( vector<string> &v )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
v = ud->includeVector;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
void XMLCALL XmlRuleReader::start (
|
|
|
|
void *data,
|
|
|
|
const XML_Char *el,
|
|
|
|
const XML_Char **attr )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
RuleData *ud;
|
|
|
|
ud = ( RuleData * ) data;
|
|
|
|
|
|
|
|
if ( !strcmp ( el, "rule" ) )
|
|
|
|
ud->setState ( STATE_IN_RULE );
|
|
|
|
else if ( !strcmp ( el, "term" ) )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
ud->setState ( STATE_IN_TERM );
|
|
|
|
while ( *attr )
|
|
|
|
{
|
|
|
|
if ( !strcmp ( *attr, "passive" ) && !strcmp ( * ( attr + 1 ), "true" ) )
|
|
|
|
ud->passive = true;
|
|
|
|
attr += 2;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if ( !strcmp ( el, "find" ) )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
while ( *attr )
|
|
|
|
{
|
|
|
|
if ( !strcmp ( *attr, "matchcase" ) && !strcmp ( * ( attr + 1 ), "true" ) )
|
|
|
|
ud->matchcase = true;
|
|
|
|
else if ( !strcmp ( *attr, "cipher" ) && !strcmp ( * ( attr + 1 ), "true" ) )
|
|
|
|
ud->cipher = true;
|
|
|
|
attr += 2;
|
|
|
|
}
|
|
|
|
ud->setState ( STATE_IN_FIND );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
2007-09-08 00:25:30 +02:00
|
|
|
else if ( !strcmp ( el, "replace" ) )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
while ( *attr )
|
|
|
|
{
|
|
|
|
if ( !strcmp ( *attr, "adjustcase" ) && !strcmp ( * ( attr + 1 ), "true" ) )
|
|
|
|
ud->adjustcase = true;
|
|
|
|
else if ( !strcmp ( *attr, "tentative" ) && !strcmp ( * ( attr + 1 ), "true" ) )
|
|
|
|
ud->tentative = true;
|
|
|
|
attr += 2;
|
|
|
|
}
|
|
|
|
ud->setState ( STATE_IN_REPLACE );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
2007-09-08 00:25:30 +02:00
|
|
|
else if ( !strcmp ( el, "report" ) )
|
|
|
|
ud->setState ( STATE_IN_REPORT );
|
|
|
|
else if ( !strcmp ( el, "exclude" ) )
|
|
|
|
ud->setState ( STATE_IN_EXCLUDE );
|
|
|
|
else if ( !strcmp ( el, "include" ) )
|
|
|
|
ud->setState ( STATE_IN_INCLUDE );
|
|
|
|
else if ( !strcmp ( el, "title" ) )
|
|
|
|
ud->setState ( STATE_IN_TITLE );
|
|
|
|
else
|
|
|
|
ud->setState ( STATE_UNKNOWN );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
void XMLCALL XmlRuleReader::end ( void *data, const XML_Char *el )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
RuleData *ud;
|
|
|
|
ud = ( RuleData * ) data;
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
if ( !strcmp ( el, "term" ) )
|
|
|
|
{
|
|
|
|
if ( ud->term != "" )
|
|
|
|
{
|
|
|
|
ud->dictionary->insert ( ud->term );
|
|
|
|
ud->dictionaryFound = true;
|
|
|
|
if ( ud->passive )
|
|
|
|
{
|
|
|
|
ud->passiveDictionary->insert ( ud->term );
|
|
|
|
ud->passive = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
ud->term = "";
|
|
|
|
}
|
|
|
|
ud->setState ( STATE_UNKNOWN );
|
|
|
|
}
|
|
|
|
// handle end of rule
|
|
|
|
else if ( !strcmp ( el, "rule" ) )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
try
|
|
|
|
{
|
|
|
|
boost::shared_ptr<Rule> rule ( new Rule (
|
|
|
|
ud->find,
|
|
|
|
ud->matchcase,
|
|
|
|
ud->replace ) );
|
|
|
|
|
|
|
|
string report = ud->title;
|
|
|
|
if ( ud->report != "" )
|
|
|
|
{
|
|
|
|
report += ": ";
|
|
|
|
report += ud->report;
|
|
|
|
}
|
|
|
|
rule->setReport ( report );
|
|
|
|
|
|
|
|
rule->setTentativeAttribute ( ud->tentative );
|
|
|
|
rule->setAdjustCaseAttribute ( ud->adjustcase );
|
|
|
|
ud->ruleVector->push_back ( rule );
|
|
|
|
++ ( ud->ruleCount );
|
|
|
|
|
|
|
|
ud->find = "";
|
|
|
|
ud->replace = "";
|
|
|
|
ud->report = "";
|
|
|
|
ud->setState ( STATE_UNKNOWN );
|
|
|
|
ud->initialiseAttributes();
|
|
|
|
}
|
|
|
|
catch ( exception& e )
|
|
|
|
{
|
|
|
|
ud->incorrectPatternReport = "Cannot compile: " +
|
|
|
|
ud->find +
|
|
|
|
"\r\nError: " +
|
|
|
|
e.what();
|
|
|
|
XML_StopParser ( ud->p, XML_FALSE );
|
|
|
|
}
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
2007-09-08 00:25:30 +02:00
|
|
|
else if ( !strcmp ( el, "find" ) )
|
|
|
|
ud->setState ( STATE_UNKNOWN );
|
|
|
|
else if ( !strcmp ( el, "replace" ) )
|
|
|
|
ud->setState ( STATE_UNKNOWN );
|
|
|
|
else if ( !strcmp ( el, "report" ) )
|
|
|
|
ud->setState ( STATE_UNKNOWN );
|
|
|
|
|
|
|
|
// handle excludes/includes
|
|
|
|
else if ( !strcmp ( el, "exclude" ) )
|
|
|
|
{
|
|
|
|
ud->excludeVector.push_back ( ud->exclude );
|
|
|
|
ud->exclude = "";
|
|
|
|
ud->setState ( STATE_UNKNOWN );
|
|
|
|
}
|
|
|
|
else if ( !strcmp ( el, "include" ) )
|
|
|
|
{
|
|
|
|
ud->includeVector.push_back ( ud->include );
|
|
|
|
ud->include = "";
|
|
|
|
ud->setState ( STATE_UNKNOWN );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
2007-09-08 00:25:30 +02:00
|
|
|
else if ( !strcmp ( el, "title" ) )
|
|
|
|
ud->setState ( STATE_UNKNOWN );
|
|
|
|
|
|
|
|
// count each dictionary as one rule
|
|
|
|
else if ( !strcmp ( el, "dictionary" ) )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
if ( ud->dictionaryFound )
|
|
|
|
++ ( ud->ruleCount );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
void XMLCALL XmlRuleReader::characterdata (
|
|
|
|
void *data,
|
|
|
|
const XML_Char *s,
|
|
|
|
int len )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
RuleData *ud;
|
|
|
|
ud = ( RuleData * ) data;
|
|
|
|
|
|
|
|
switch ( ud->getState() )
|
|
|
|
{
|
|
|
|
case STATE_IN_FIND:
|
|
|
|
ud->find.append ( s, len );
|
|
|
|
break;
|
2007-09-07 23:17:30 +02:00
|
|
|
case STATE_IN_REPLACE:
|
2007-09-08 00:25:30 +02:00
|
|
|
ud->replace.append ( s, len );
|
|
|
|
break;
|
2007-09-07 23:17:30 +02:00
|
|
|
case STATE_IN_REPORT:
|
2007-09-08 00:25:30 +02:00
|
|
|
ud->report.append ( s, len );
|
|
|
|
break;
|
2007-09-07 23:17:30 +02:00
|
|
|
case STATE_IN_EXCLUDE:
|
2007-09-08 00:25:30 +02:00
|
|
|
ud->exclude.append ( s, len );
|
|
|
|
break;
|
2007-09-07 23:17:30 +02:00
|
|
|
case STATE_IN_INCLUDE:
|
2007-09-08 00:25:30 +02:00
|
|
|
ud->include.append ( s, len );
|
|
|
|
break;
|
2007-09-07 23:17:30 +02:00
|
|
|
case STATE_IN_TITLE:
|
2007-09-08 00:25:30 +02:00
|
|
|
ud->title.append ( s, len );
|
|
|
|
break;
|
2007-09-07 23:17:30 +02:00
|
|
|
case STATE_IN_TERM:
|
2007-09-08 00:25:30 +02:00
|
|
|
ud->term.append ( s, len );
|
|
|
|
break;
|
2007-09-07 23:17:30 +02:00
|
|
|
default:
|
2007-09-08 00:25:30 +02:00
|
|
|
break;
|
|
|
|
}
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
void RuleData::initialiseAttributes()
|
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
matchcase = adjustcase = tentative = passive = false;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|