xml-copy-editor-code/src/wrapregex.cpp

265 lines
6.3 KiB
C++
Raw Normal View History

2009-11-06 18:06:12 +01:00
/*
* Copyright 2005-2007 Gerald Schmidt.
*
* This file is part of Xml Copy Editor.
*
* Xml Copy Editor is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* Xml Copy Editor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Xml Copy Editor; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <iostream>
#include <string>
#include <ctype.h>
#include <stdexcept>
#include <cstring>
#include "wrapregex.h"
#include "contexthandler.h"
using namespace std;
WrapRegex::WrapRegex (
const string& pattern,
bool matchCase,
const string& replaceParameter,
const int arrayLengthParameter ) :
replace ( replaceParameter ),
2013-11-12 17:30:07 +01:00
arrayLength ( arrayLengthParameter ),
returnValue ( 0 )
2009-11-06 18:06:12 +01:00
{
if ( pattern.empty() || pattern == ".*" )
{
disabled = true;
2013-11-12 17:30:07 +01:00
matchArray = NULL;
patternStructure = NULL;
patternExtraStructure = NULL;
2009-11-06 18:06:12 +01:00
return;
}
disabled = false;
matchArray = new int[arrayLength];
// compile
int optionsFlag = ( matchCase ) ? PCRE_UTF8 : PCRE_CASELESS | PCRE_UTF8;
const char *errorPointer;
int errorOffset;
if ( ( patternStructure = pcre_compile (
pattern.c_str(),
optionsFlag,
&errorPointer,
&errorOffset,
NULL ) ) == NULL )
{
throw runtime_error ( errorPointer );
}
patternExtraStructure = pcre_study ( patternStructure, 0, &errorPointer );
}
WrapRegex::~WrapRegex()
{
if ( disabled )
return;
2013-11-12 17:30:07 +01:00
pcre_free ( patternStructure );
pcre_free ( patternExtraStructure );
delete[] matchArray;
2009-11-06 18:06:12 +01:00
}
int WrapRegex::matchPatternGlobal (
string &buffer,
vector<ContextMatch> &matchVector,
unsigned elementCount,
int context )
{
if ( disabled )
return 0;
return matchPatternGlobal_ (
buffer.c_str(),
buffer.size(),
matchVector,
elementCount,
context );
}
string WrapRegex::replaceGlobal (
const string& buffer,
int *matchCount )
{
*matchCount = 0;
if ( disabled )
return buffer;
2013-11-12 17:30:07 +01:00
const char *s = buffer.c_str();
2009-11-06 18:06:12 +01:00
string output, match;
output.reserve ( buffer.size() );
while ( ( returnValue = pcre_exec (
patternStructure,
patternExtraStructure,
s,
strlen ( s ),
0,
0,
matchArray,
arrayLength ) ) >= 0 )
{
++ ( *matchCount );
output.append ( s, matchArray[0] );
match.clear();
match.append ( s + matchArray[0], matchArray[1] - matchArray[0] );
output.append ( getInterpolatedString_ ( s, ( char * ) replace.c_str() ) );
s += matchArray[1];
}
output.append ( s );
return output;
}
int WrapRegex::matchPatternGlobal_ (
const char *buffer,
size_t buflen,
vector<ContextMatch> &matchVector,
unsigned elementCount,
int context )
{
if ( disabled )
return 0;
2013-11-12 17:30:07 +01:00
const char *s, *origin;
2009-11-06 18:06:12 +01:00
int matchcount;
size_t offset;
ContextMatch match;
2013-11-12 17:30:07 +01:00
s = origin = buffer;
2009-11-06 18:06:12 +01:00
matchcount = 0;
offset = 0;
while ( ( returnValue = pcre_exec (
patternStructure,
patternExtraStructure,
s,
buflen,
offset,
0,
matchArray,
arrayLength ) ) >= 0 )
{
++matchcount;
if ( context )
{
match = ContextHandler::getContext (
s + matchArray[0],
matchArray[1] - matchArray[0],
origin,
context );
}
else
{
match.prelog = match.postlog = "";
match.match.assign ( s + matchArray[0], matchArray[1] - matchArray[0] );
}
// record element and offset information
match.elementCount = elementCount;
match.offset = matchArray[0];
if ( replace != "" )
match.replace = getInterpolatedString_ ( s, ( char * ) replace.c_str() );
matchVector.push_back ( match );
if ( ( offset = matchArray[1] ) >= buflen )
break;
}
return matchcount;
}
2013-11-12 17:30:07 +01:00
string WrapRegex::getInterpolatedString_ ( const char *buffer, const char *source )
2009-11-06 18:06:12 +01:00
{
if ( disabled )
return "";
2013-11-12 17:30:07 +01:00
const char *s = source;
2009-11-06 18:06:12 +01:00
string interpol_string;
int escapeState = false;
for ( ; *s; ++s )
{
if ( *s == '\\' )
{
escapeState = ( escapeState ) ? false : true;
if ( escapeState )
{
if ( isdigit ( * ( s + 1 ) ) )
{
2013-11-12 17:30:07 +01:00
const char *number, *it;
2009-11-06 18:06:12 +01:00
number = s + 1;
for ( it = number; *it && isdigit ( * ( it + 1 ) ); ++it )
;
size_t len = it - s;
char *tmp = new char[len + 1];
memcpy ( tmp, number, sizeof ( char ) * len );
* ( tmp + len ) = '\0';
int i = atoi ( tmp );
delete[] tmp;
interpol_string += getSubpattern_ ( buffer, i );
s += len;
escapeState = false;
}
else if ( * ( s + 1 ) == 't' )
{
interpol_string += '\t';
++s;
escapeState = false;
}
else if ( * ( s + 1 ) == 'n' )
{
interpol_string += '\n';
++s;
escapeState = false;
}
else
interpol_string += *s;
}
else
interpol_string += *s;
}
else
interpol_string += *s;
}
return interpol_string;
}
2013-11-12 17:30:07 +01:00
string WrapRegex::getSubpattern_ ( const char *s, unsigned subpattern )
2009-11-06 18:06:12 +01:00
{
if ( disabled )
return "";
const char *sub;
int ret = pcre_get_substring ( s, matchArray, returnValue, subpattern, &sub );
if ( ret == PCRE_ERROR_NOSUBSTRING || ret == PCRE_ERROR_NOMEMORY )
return "";
string subString ( sub );
2013-11-12 17:30:07 +01:00
pcre_free_substring ( sub );
2009-11-06 18:06:12 +01:00
return subString;
}