2007-09-12 01:14:06 +02:00
|
|
|
/*
|
|
|
|
* Copyright 2005-2007 Gerald Schmidt.
|
|
|
|
*
|
|
|
|
* This file is part of Xml Copy Editor.
|
|
|
|
*
|
|
|
|
* Xml Copy Editor is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License as published by
|
|
|
|
* the Free Software Foundation; version 2 of the License.
|
|
|
|
*
|
|
|
|
* Xml Copy Editor is distributed in the hope that it will be useful,
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
* GNU General Public License for more details.
|
|
|
|
*
|
|
|
|
* You should have received a copy of the GNU General Public License
|
|
|
|
* along with Xml Copy Editor; if not, write to the Free Software
|
|
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
*/
|
|
|
|
|
2007-09-07 23:17:30 +02:00
|
|
|
#include <iostream>
|
|
|
|
#include <string>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <stdexcept>
|
|
|
|
#include "wrapregex.h"
|
|
|
|
#include "contexthandler.h"
|
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
WrapRegex::WrapRegex (
|
|
|
|
const string& pattern,
|
|
|
|
bool matchCase,
|
|
|
|
const string& replaceParameter,
|
|
|
|
const int arrayLengthParameter ) :
|
|
|
|
replace ( replaceParameter ),
|
|
|
|
arrayLength ( arrayLengthParameter )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
if ( pattern.empty() || pattern == ".*" )
|
|
|
|
{
|
|
|
|
disabled = true;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
disabled = false;
|
|
|
|
|
|
|
|
matchArray = new int[arrayLength];
|
|
|
|
|
|
|
|
// compile
|
|
|
|
int optionsFlag = ( matchCase ) ? PCRE_UTF8 : PCRE_CASELESS | PCRE_UTF8;
|
|
|
|
const char *errorPointer;
|
|
|
|
int errorOffset;
|
|
|
|
|
|
|
|
if ( ( patternStructure = pcre_compile (
|
|
|
|
pattern.c_str(),
|
|
|
|
optionsFlag,
|
|
|
|
&errorPointer,
|
|
|
|
&errorOffset,
|
|
|
|
NULL ) ) == NULL )
|
|
|
|
{
|
|
|
|
throw runtime_error ( errorPointer );
|
|
|
|
}
|
|
|
|
|
|
|
|
patternExtraStructure = pcre_study ( patternStructure, 0, &errorPointer );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
WrapRegex::~WrapRegex()
|
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
if ( disabled )
|
|
|
|
return;
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
try
|
|
|
|
{
|
|
|
|
pcre_free ( patternStructure );
|
|
|
|
pcre_free ( patternExtraStructure );
|
|
|
|
delete[] matchArray;
|
|
|
|
}
|
|
|
|
catch ( ... )
|
|
|
|
{
|
|
|
|
throw runtime_error ( "Wrapregex::~WrapRegex" );
|
|
|
|
}
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
int WrapRegex::matchPatternGlobal (
|
|
|
|
string &buffer,
|
|
|
|
vector<ContextMatch> &matchVector,
|
|
|
|
unsigned elementCount,
|
|
|
|
int context )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
if ( disabled )
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return matchPatternGlobal_ (
|
|
|
|
buffer.c_str(),
|
|
|
|
buffer.size(),
|
|
|
|
matchVector,
|
|
|
|
elementCount,
|
|
|
|
context );
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
string WrapRegex::replaceGlobal (
|
|
|
|
const string& buffer,
|
|
|
|
int *matchCount )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
*matchCount = 0;
|
|
|
|
|
|
|
|
if ( disabled )
|
|
|
|
return buffer;
|
|
|
|
|
|
|
|
char *s = ( char * ) buffer.c_str();
|
|
|
|
|
|
|
|
string output, match;
|
|
|
|
|
|
|
|
output.reserve ( buffer.size() );
|
|
|
|
while ( ( returnValue = pcre_exec (
|
|
|
|
patternStructure,
|
|
|
|
patternExtraStructure,
|
|
|
|
s,
|
|
|
|
strlen ( s ),
|
|
|
|
0,
|
|
|
|
0,
|
|
|
|
matchArray,
|
|
|
|
arrayLength ) ) >= 0 )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
++ ( *matchCount );
|
|
|
|
|
|
|
|
output.append ( s, matchArray[0] );
|
|
|
|
|
|
|
|
match.clear();
|
|
|
|
match.append ( s + matchArray[0], matchArray[1] - matchArray[0] );
|
|
|
|
output.append ( getInterpolatedString_ ( s, ( char * ) replace.c_str() ) );
|
|
|
|
s += matchArray[1];
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
2007-09-08 00:25:30 +02:00
|
|
|
output.append ( s );
|
|
|
|
return output;
|
|
|
|
}
|
|
|
|
|
|
|
|
int WrapRegex::matchPatternGlobal_ (
|
|
|
|
const char *buffer,
|
|
|
|
size_t buflen,
|
|
|
|
vector<ContextMatch> &matchVector,
|
|
|
|
unsigned elementCount,
|
|
|
|
int context )
|
|
|
|
{
|
|
|
|
if ( disabled )
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
char *s, *origin;
|
|
|
|
int matchcount;
|
|
|
|
size_t offset;
|
|
|
|
ContextMatch match;
|
|
|
|
|
|
|
|
s = origin = ( char * ) buffer;
|
|
|
|
matchcount = 0;
|
|
|
|
offset = 0;
|
|
|
|
|
|
|
|
while ( ( returnValue = pcre_exec (
|
|
|
|
patternStructure,
|
|
|
|
patternExtraStructure,
|
|
|
|
s,
|
|
|
|
buflen,
|
|
|
|
offset,
|
|
|
|
0,
|
|
|
|
matchArray,
|
|
|
|
arrayLength ) ) >= 0 )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
++matchcount;
|
|
|
|
|
|
|
|
if ( context )
|
|
|
|
{
|
|
|
|
match = ContextHandler::getContext (
|
|
|
|
s + matchArray[0],
|
|
|
|
matchArray[1] - matchArray[0],
|
|
|
|
origin,
|
|
|
|
context );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
match.prelog = match.postlog = "";
|
|
|
|
match.match.assign ( s + matchArray[0], matchArray[1] - matchArray[0] );
|
|
|
|
}
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
// record element and offset information
|
|
|
|
match.elementCount = elementCount;
|
|
|
|
match.offset = matchArray[0];
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
if ( replace != "" )
|
|
|
|
match.replace = getInterpolatedString_ ( s, ( char * ) replace.c_str() );
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
matchVector.push_back ( match );
|
|
|
|
|
|
|
|
if ( ( offset = matchArray[1] ) >= buflen )
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return matchcount;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
string WrapRegex::getInterpolatedString_ ( char *buffer, char *source )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
if ( disabled )
|
|
|
|
return "";
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
char *s, *origin;
|
|
|
|
s = origin = ( char * ) source;
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
string interpol_string;
|
2007-09-07 23:17:30 +02:00
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
int escapeState = false;
|
|
|
|
for ( ; *s; ++s )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
if ( *s == '\\' )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
escapeState = ( escapeState ) ? false : true;
|
|
|
|
if ( escapeState )
|
|
|
|
{
|
|
|
|
if ( isdigit ( * ( s + 1 ) ) )
|
|
|
|
{
|
|
|
|
char *number, *it;
|
|
|
|
number = s + 1;
|
|
|
|
for ( it = number; *it && isdigit ( * ( it + 1 ) ); ++it )
|
|
|
|
;
|
|
|
|
size_t len = it - s;
|
|
|
|
char *tmp = new char[len + 1];
|
|
|
|
memcpy ( tmp, number, sizeof ( char ) * len );
|
|
|
|
* ( tmp + len ) = '\0';
|
|
|
|
int i = atoi ( tmp );
|
|
|
|
delete[] tmp;
|
|
|
|
|
|
|
|
interpol_string += getSubpattern_ ( buffer, i );
|
|
|
|
|
|
|
|
s += len;
|
|
|
|
escapeState = false;
|
|
|
|
}
|
|
|
|
else if ( * ( s + 1 ) == 't' )
|
|
|
|
{
|
|
|
|
interpol_string += '\t';
|
|
|
|
++s;
|
|
|
|
escapeState = false;
|
|
|
|
}
|
|
|
|
else if ( * ( s + 1 ) == 'n' )
|
|
|
|
{
|
|
|
|
interpol_string += '\n';
|
|
|
|
++s;
|
|
|
|
escapeState = false;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
interpol_string += *s;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
interpol_string += *s;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
else
|
2007-09-08 00:25:30 +02:00
|
|
|
interpol_string += *s;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
2007-09-08 00:25:30 +02:00
|
|
|
return interpol_string;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|
|
|
|
|
2007-09-08 00:25:30 +02:00
|
|
|
string WrapRegex::getSubpattern_ ( char *s, unsigned subpattern )
|
2007-09-07 23:17:30 +02:00
|
|
|
{
|
2007-09-08 00:25:30 +02:00
|
|
|
if ( disabled )
|
|
|
|
return "";
|
|
|
|
|
|
|
|
const char *sub;
|
|
|
|
int ret = pcre_get_substring ( s, matchArray, returnValue, subpattern, &sub );
|
|
|
|
if ( ret == PCRE_ERROR_NOSUBSTRING || ret == PCRE_ERROR_NOMEMORY )
|
|
|
|
return "";
|
|
|
|
string subString ( sub );
|
|
|
|
pcre_free ( ( char * ) sub );
|
|
|
|
return subString;
|
2007-09-07 23:17:30 +02:00
|
|
|
}
|