xml-copy-editor-code/src/xmlencodinghandler.cpp

133 lines
3.6 KiB
C++
Raw Normal View History

2009-11-06 18:06:12 +01:00
/*
* Copyright 2005-2007 Gerald Schmidt.
*
* This file is part of Xml Copy Editor.
*
* Xml Copy Editor is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
2009-11-06 18:06:12 +01:00
*
* Xml Copy Editor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Xml Copy Editor; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <cstring>
#include "xmlencodinghandler.h"
bool XmlEncodingHandler::hasDeclaration ( const std::string& utf8 )
{
size_t len = utf8.size();
if ( len < 6 ||
utf8[0] != '<' ||
utf8[1] != '?' ||
utf8[2] != 'x' ||
utf8[3] != 'm' ||
utf8[4] != 'l' )
return false;
return true;
}
std::string XmlEncodingHandler::get (
const std::string& utf8 )
{
if ( !hasDeclaration ( utf8 ) )
return "UTF-8";
size_t len = utf8.size();
std::string s;
for ( size_t i = 0; i < len; ++i )
{
s += utf8[i];
if ( utf8[i] == '>' )
break;
}
if ( s.find ( ">" ) == std::string::npos )
return "";
s = CaseHandler::lowerCase ( s );
if ( s.find ( "encoding=" ) == std::string::npos ||
s.find ( "utf-8" ) != std::string::npos )
return "UTF-8";
else if ( s.find ( "utf-16le" ) != std::string::npos )
return "UTF-16LE";
else if ( s.find ( "utf-16be" ) != std::string::npos )
return "UTF-16BE";
else if ( s.find ( "utf-16" ) != std::string::npos )
return "UTF-16";
else if ( s.find ( "iso-8859-1" ) != std::string::npos )
return "ISO-8859-1";
else if ( s.find ( "us-ascii" ) != std::string::npos )
return "US-ASCII";
else
return "";
}
bool XmlEncodingHandler::setUtf8 ( std::string& utf8, bool ignoreCurrentEncoding )
{
if ( !hasDeclaration ( utf8 ) )
return true;
if ( get ( utf8 ).empty() && !ignoreCurrentEncoding )
return false;
std::pair<int, int> p = getEncodingValueLimits ( utf8 );
if ( p.first == -1 || p.second == -1 )
return true; // attribute missing
utf8.replace ( p.first, p.second, "UTF-8" );
return true;
}
bool XmlEncodingHandler::set ( std::string& buffer, std::string& encoding )
{
std::pair<int, int> p = getEncodingValueLimits ( buffer );
if ( p.first == -1 || p.second == -1 )
return false; // attribute missing
buffer.replace ( p.first, p.second, encoding.c_str() );
return true;
}
std::pair<int, int>
XmlEncodingHandler::getEncodingValueLimits ( const std::string& utf8 )
{
if ( !hasDeclaration ( utf8 ) )
return make_pair ( -1, -1 );
char *buffer, *start, *end;
char delimit;
char anchor[] = "encoding=";
buffer = ( char * ) utf8.c_str();
start = strstr ( buffer, anchor );
if ( !start )
return make_pair ( -1, -1 );
start = start + strlen ( anchor );
if ( *start != '\'' && *start != '\"' )
return make_pair ( -1, -1 );
delimit = *start;
if ( strlen ( start ) < 2 )
return make_pair ( -1, -1 );
++start;
for ( end = start; *end && *end != delimit; ++end )
{
if (
*end == '?' || *end == '>' || *end == '<' ||
*end == ' ' || *end == '\t' || *end == '\n' )
{
return make_pair ( -1, -1 );
}
}
if ( !*end )
return make_pair ( -1, -1 );
return make_pair ( start - buffer, end - start );
}