#include "xmlencodinghandler.h" bool XmlEncodingHandler::hasDeclaration ( const std::string& utf8 ) { size_t len = utf8.size(); if ( len < 6 || utf8[0] != '<' || utf8[1] != '?' || utf8[2] != 'x' || utf8[3] != 'm' || utf8[4] != 'l' ) return false; return true; } std::string XmlEncodingHandler::get ( const std::string& utf8 ) { if ( !hasDeclaration ( utf8 ) ) return "UTF-8"; size_t len = utf8.size(); std::string s; for ( size_t i = 0; i < len; ++i ) { s += utf8[i]; if ( utf8[i] == '>' ) break; } if ( s.find ( ">" ) == std::string::npos ) return ""; s = CaseHandler::lowerCase ( s ); if ( s.find ( "encoding=" ) == std::string::npos || s.find ( "utf-8" ) != std::string::npos ) return "UTF-8"; else if ( s.find ( "utf-16le" ) != std::string::npos ) return "UTF-16LE"; else if ( s.find ( "utf-16be" ) != std::string::npos ) return "UTF-16BE"; else if ( s.find ( "utf-16" ) != std::string::npos ) return "UTF-16"; else if ( s.find ( "iso-8859-1" ) != std::string::npos ) return "ISO-8859-1"; else if ( s.find ( "us-ascii" ) != std::string::npos ) return "US-ASCII"; else return ""; } bool XmlEncodingHandler::setUtf8 ( std::string& utf8, bool ignoreCurrentEncoding ) { if ( !hasDeclaration ( utf8 ) ) return true; if ( get ( utf8 ).empty() && !ignoreCurrentEncoding ) return false; std::pair p = getEncodingValueLimits ( utf8 ); if ( p.first == -1 || p.second == -1 ) return true; // attribute missing utf8.replace ( p.first, p.second, "UTF-8" ); return true; } bool XmlEncodingHandler::set ( std::string& buffer, std::string& encoding ) { std::pair p = getEncodingValueLimits ( buffer ); if ( p.first == -1 || p.second == -1 ) return false; // attribute missing buffer.replace ( p.first, p.second, encoding.c_str() ); return true; } std::pair XmlEncodingHandler::getEncodingValueLimits ( const std::string& utf8 ) { if ( !hasDeclaration ( utf8 ) ) return make_pair ( -1, -1 ); char *buffer, *start, *end; char delimit; char anchor[] = "encoding="; buffer = ( char * ) utf8.c_str(); start = strstr ( buffer, anchor ); if ( !start ) return make_pair ( -1, -1 ); start = start + strlen ( anchor ); if ( *start != '\'' && *start != '\"' ) return make_pair ( -1, -1 ); delimit = *start; if ( strlen ( start ) < 2 ) return make_pair ( -1, -1 ); ++start; for ( end = start; *end && *end != delimit; ++end ) { if ( *end == '?' || *end == '>' || *end == '<' || *end == ' ' || *end == '\t' || *end == '\n' ) { return make_pair ( -1, -1 ); } } if ( !*end ) return make_pair ( -1, -1 ); return make_pair ( start - buffer, end - start ); }