xml-copy-editor-code/src/xmlencodinghandler.cpp

108 lines
2.7 KiB
C++
Raw Normal View History

#include "xmlencodinghandler.h"
bool XmlEncodingHandler::hasDeclaration(const std::string& utf8)
{
size_t len = utf8.size();
if (len < 6 ||
utf8[0] != '<' ||
utf8[1] != '?' ||
utf8[2] != 'x' ||
utf8[3] != 'm' ||
utf8[4] != 'l')
return false;
return true;
}
std::string XmlEncodingHandler::get(
const std::string& utf8)
{
if (!hasDeclaration(utf8))
return "UTF-8";
size_t len = utf8.size();
std::string s;
for (size_t i = 0; i < len; ++i)
{
s += utf8[i];
if (utf8[i] == '>')
break;
}
if (s.find(">") == std::string::npos)
return "";
s = CaseHandler::lowerCase(s);
if (s.find("encoding=") == std::string::npos ||
s.find("utf-8") != std::string::npos)
return "UTF-8";
else if (s.find("utf-16le") != std::string::npos)
return "UTF-16LE";
else if (s.find("utf-16be") != std::string::npos)
return "UTF-16BE";
else if (s.find("utf-16") != std::string::npos)
return "UTF-16";
else if (s.find("iso-8859-1") != std::string::npos)
return "ISO-8859-1";
else if (s.find("us-ascii") != std::string::npos)
return "US-ASCII";
else
return "";
}
bool XmlEncodingHandler::setUtf8(std::string& utf8, bool ignoreCurrentEncoding)
{
if (!hasDeclaration(utf8))
return true;
if (get(utf8).empty() && !ignoreCurrentEncoding)
return false;
std::pair<int, int> p = getEncodingValueLimits(utf8);
if (p.first == -1 || p.second == -1)
return true; // attribute missing
utf8.replace(p.first, p.second, "UTF-8");
return true;
}
bool XmlEncodingHandler::set(std::string& buffer, std::string& encoding)
{
std::pair<int, int> p = getEncodingValueLimits(buffer);
if (p.first == -1 || p.second == -1)
return false; // attribute missing
buffer.replace(p.first, p.second, encoding.c_str());
return true;
}
std::pair<int, int>
XmlEncodingHandler::getEncodingValueLimits(const std::string& utf8)
{
if (!hasDeclaration(utf8))
return make_pair(-1, -1);
char *buffer, *start, *end;
char delimit;
char anchor[] = "encoding=";
buffer = (char *)utf8.c_str();
start = strstr(buffer, anchor);
if (!start)
return make_pair(-1, -1);
start = start + strlen(anchor);
if (*start != '\'' && *start != '\"')
return make_pair(-1, -1);
delimit = *start;
if (strlen(start) < 2)
return make_pair(-1, -1);
++start;
for (end = start; *end && *end != delimit; ++end)
{
if (
*end == '?' || *end == '>' || *end == '<' ||
*end == ' ' || *end == '\t' || *end == '\n') { return make_pair(-1, -1); }
}
if (!*end)
return make_pair(-1, -1);
return make_pair(start - buffer, end - start);
}