xml-copy-editor-code/src/wraplibxml.cpp

518 lines
11 KiB
C++
Raw Normal View History

#include "wraplibxml.h"
#include <sstream>
#include <stdexcept>
#include <wx/wx.h>
WrapLibxml::WrapLibxml(bool netAccessParameter, const std::string& catalogPathParameter)
: netAccess(netAccessParameter), catalogPath(catalogPathParameter)
{
LIBXML_TEST_VERSION
xmlInitializeCatalog();
}
WrapLibxml::~WrapLibxml()
{
xsltCleanupGlobals();
xmlCleanupParser();
xmlCatalogCleanup();
}
bool WrapLibxml::validate(const std::string& fileName)
{
output = "";
xmlCleanupParser();
xmlParserCtxtPtr ctxt;
xmlDocPtr docPtr;
ctxt = xmlNewParserCtxt();
if (ctxt == NULL)
{
return false;
}
bool returnValue = false;
loadCatalog();
docPtr = xmlCtxtReadFile(
ctxt,
fileName.c_str(),
NULL,
(netAccess) ? XML_PARSE_DTDVALID : XML_PARSE_DTDVALID | XML_PARSE_NONET);
if (docPtr == NULL)
;
else if (ctxt->valid == 0)
;
else
returnValue = true;
xmlFreeDoc(docPtr);
xmlFreeParserCtxt(ctxt);
return returnValue;
}
bool WrapLibxml::validateRelaxNG(
const std::string& schemaFileName,
const std::string& fileName)
{
output = "";
xmlCleanupParser();
xmlRelaxNGValidCtxtPtr ctxtPtr;
xmlDocPtr docPtr;
xmlRelaxNGParserCtxtPtr schemaParserCtxtPtr;
xmlRelaxNGPtr schemaPtr;
schemaParserCtxtPtr = xmlRelaxNGNewParserCtxt(schemaFileName.c_str());
if (schemaParserCtxtPtr == NULL)
return false;
schemaPtr = xmlRelaxNGParse(schemaParserCtxtPtr);
if (schemaPtr == NULL)
{
xmlRelaxNGFreeParserCtxt(schemaParserCtxtPtr);
return false;
}
ctxtPtr = xmlRelaxNGNewValidCtxt(schemaPtr);
if (ctxtPtr == NULL)
{
xmlRelaxNGFree(schemaPtr);
xmlRelaxNGFreeParserCtxt(schemaParserCtxtPtr);
return false;
}
docPtr = xmlReadFile(fileName.c_str(), NULL,
(netAccess) ? XML_PARSE_DTDLOAD : XML_PARSE_DTDLOAD | XML_PARSE_NONET);
if (docPtr == NULL)
{
xmlRelaxNGFree(schemaPtr);
xmlRelaxNGFreeValidCtxt(ctxtPtr);
return false;
}
int res = xmlRelaxNGValidateDoc(ctxtPtr, docPtr);
bool returnValue = (res) ? false : true;
xmlFreeDoc(docPtr);
xmlRelaxNGFree(schemaPtr);
xmlRelaxNGFreeValidCtxt(ctxtPtr);
return returnValue;
}
bool WrapLibxml::validateW3CSchema(
const std::string& schemaFileName,
const std::string& fileName)
{
output = "";
xmlCleanupParser();
xmlSchemaValidCtxtPtr ctxtPtr;
xmlDocPtr docPtr;
xmlSchemaParserCtxtPtr schemaParserCtxtPtr;
xmlSchemaPtr schemaPtr;
schemaParserCtxtPtr = xmlSchemaNewParserCtxt(schemaFileName.c_str());
if (schemaParserCtxtPtr == NULL)
return false;
schemaPtr = xmlSchemaParse(schemaParserCtxtPtr);
if (schemaPtr == NULL)
{
xmlSchemaFreeParserCtxt(schemaParserCtxtPtr);
return false;
}
ctxtPtr = xmlSchemaNewValidCtxt(schemaPtr);
if (ctxtPtr == NULL)
{
xmlSchemaFree(schemaPtr);
xmlSchemaFreeParserCtxt(schemaParserCtxtPtr);
return false;
}
docPtr = xmlReadFile(fileName.c_str(), NULL,
(netAccess) ? XML_PARSE_DTDLOAD : XML_PARSE_DTDLOAD | XML_PARSE_NONET);
if (docPtr == NULL)
{
xmlSchemaFree(schemaPtr);
xmlSchemaFreeValidCtxt(ctxtPtr);
return false;
}
int res = xmlSchemaValidateDoc(ctxtPtr, docPtr);
bool returnValue = (res) ? false : true;
xmlFreeDoc(docPtr);
xmlSchemaFree(schemaPtr);
xmlSchemaFreeValidCtxt(ctxtPtr);
return returnValue;
}
bool WrapLibxml::parse(
const std::string& fileName,
bool indent,
bool resolveEntities)
{
output = "";
xmlCleanupParser();
xmlParserCtxtPtr ctxt;
xmlDocPtr docPtr;
ctxt = xmlNewParserCtxt();
if (ctxt == NULL)
{
return false;
}
loadCatalog();
int flags = XML_PARSE_DTDLOAD;
if (resolveEntities)
flags |= XML_PARSE_NOENT;
if (!netAccess)
flags |= XML_PARSE_NONET;
docPtr = xmlCtxtReadFile(
ctxt,
fileName.c_str(),
NULL,
flags);
if (docPtr == NULL)
{
xmlFreeParserCtxt(ctxt);
return false;
}
if (indent)
xmlKeepBlanksDefault(0);
xmlChar *buf = NULL;
int size;
// tbd: link output encoding to input encoding?
xmlDocDumpFormatMemoryEnc(
docPtr,
&buf,
&size,
"UTF-8",
indent);
if (buf)
{
output.append((const char *)buf);
free(buf);
}
bool returnValue = (!ctxt->errNo) ? true : false;
xmlFreeDoc(docPtr);
xmlFreeParserCtxt(ctxt);
return returnValue;
}
bool WrapLibxml::xpath(const std::string& path, const std::string& fileName)
{
output = "";
xmlCleanupParser();
xmlParserCtxtPtr ctxt;
xmlDocPtr docPtr;
ctxt = xmlNewParserCtxt();
if (ctxt == NULL)
{
return false;
}
loadCatalog();
docPtr = xmlCtxtReadFile(
ctxt,
fileName.c_str(),
NULL,
//(netAccess) ? XML_PARSE_DTDLOAD | XML_PARSE_NOENT : XML_PARSE_DTDLOAD | XML_PARSE_NONET | XML_PARSE_NOENT
XML_PARSE_NOENT | XML_PARSE_NONET | XML_PARSE_NSCLEAN
);
if (docPtr == NULL)
{
xmlFreeParserCtxt(ctxt);
return false;
}
xmlXPathContextPtr context;
xmlXPathObjectPtr result;
xmlNodeSetPtr nodeset;
context = xmlXPathNewContext(docPtr);
if (!context)
{
xmlFreeDoc(docPtr);
xmlFreeParserCtxt(ctxt);
return false;
}
// enable namespace prefixes
xmlXPathRegisterNs(context, (xmlChar *)"xhtml", (xmlChar *)"http://www.w3.org/1999/xhtml");
// add others as necessary!
result = xmlXPathEvalExpression((const xmlChar *)path.c_str(), context);
xmlKeepBlanksDefault(0);
bool xpathIsValid = (result) ? true : false;
while (result != NULL)
{
if(xmlXPathNodeSetIsEmpty(result->nodesetval))
break;
nodeset = result->nodesetval;
for (int i = 0; i < nodeset->nodeNr; i++)
{
xmlNodePtr node = nodeset->nodeTab[i];
xmlBufferPtr bufferPtr = xmlBufferCreate();
if (!node || !bufferPtr)
break;
xmlNodeDump(bufferPtr, NULL, node, 0, 1);
std::string nodeBuffer = (const char *)xmlBufferContent(bufferPtr);
output += nodeBuffer;
output += '\n';
}
break;
}
if (result)
xmlXPathFreeObject(result);
xmlXPathFreeContext(context);
xmlFreeDoc(docPtr);
xmlFreeParserCtxt(ctxt);
xmlCleanupParser();
return xpathIsValid;
}
bool WrapLibxml::xslt(
const std::string& styleFileName,
const std::string& fileName
)
{
xmlCleanupParser();
output = "";
xsltStylesheetPtr cur;
xmlDocPtr doc, res;
xmlSubstituteEntitiesDefault(1);
xmlLoadExtDtdDefaultValue = 1;
cur = xsltParseStylesheetFile((const xmlChar *)styleFileName.c_str());
if (!cur)
{
return false;
}
loadCatalog();
doc = xmlParseFile(fileName.c_str());
if (!doc)
{
xsltFreeStylesheet(cur);
return false;
}
// ensure entity warnings are treated as errors
if (!getLastError().empty())
{
xmlFreeDoc(doc);
xsltFreeStylesheet(cur);
return false;
}
res = xsltApplyStylesheet(cur, doc, NULL);
if (!res)
{
xmlFreeDoc(doc);
xsltFreeStylesheet(cur);
return false;
}
xmlChar *buf = NULL;
int size;
xmlDocDumpFormatMemoryEnc(res, &buf, &size, "UTF-8", 1);
if (buf)
{
output.append((char *)buf, size);
free(buf); // as above?
}
xsltFreeStylesheet(cur);
xmlFreeDoc(doc);
xmlFreeDoc(res);
return true;
}
bool WrapLibxml::bufferWellFormed(const std::string& buffer)
{
xmlCleanupParser();
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
if (!ctxt)
return false;
loadCatalog();
xmlDocPtr docPtr = xmlCtxtReadMemory(
ctxt,
buffer.c_str(),
buffer.size(),
"",
"UTF-8",
0);
bool returnValue = (docPtr) ? true : false;
xmlFreeDoc(docPtr);
xmlFreeParserCtxt(ctxt);
return returnValue;
}
int WrapLibxml::saveEncoding(
const std::string& buffer,
const std::string& fileName,
const std::string& encoding)
{
xmlCleanupParser();
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
if (!ctxt)
return -1;
xmlSubstituteEntitiesDefault(0);
loadCatalog();
xmlKeepBlanksDefault(1); // prevents single-line output
xmlDocPtr docPtr = xmlCtxtReadMemory(
ctxt,
buffer.c_str(),
buffer.size(),
"",
NULL,
XML_PARSE_DTDLOAD | XML_PARSE_DTDVALID | XML_PARSE_PEDANTIC//XML_PARSE_NONET//XML_PARSE_DTDLOAD//0//(netAccess) ? XML_PARSE_DTDLOAD | XML_PARSE_NOENT : XML_PARSE_DTDLOAD | XML_PARSE_NONET | XML_PARSE_NOENT//0
);
if (!docPtr)
{
xmlFreeParserCtxt(ctxt);
return -1;
}
int result = xmlSaveFileEnc(
fileName.c_str(),
docPtr,
encoding.c_str());
xmlFreeDoc(docPtr);
xmlFreeParserCtxt(ctxt);
return result;
}
int WrapLibxml::saveEncodingFromFile(
const std::string& fileNameSource,
const std::string& fileNameDestination,
const std::string& encoding)
{
xmlCleanupParser();
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
if (!ctxt)
return -1;
xmlSubstituteEntitiesDefault(0);
loadCatalog();
xmlKeepBlanksDefault(1); // prevents single-line output
xmlDocPtr docPtr = xmlCtxtReadFile(
ctxt,
fileNameSource.c_str(),
"UTF-8",
(netAccess) ? XML_PARSE_DTDLOAD : XML_PARSE_DTDLOAD | XML_PARSE_NONET);//XML_PARSE_NONET//XML_PARSE_DTDLOAD//0//(netAccess) ? XML_PARSE_DTDLOAD | XML_PARSE_NOENT : XML_PARSE_DTDLOAD | XML_PARSE_NONET | XML_PARSE_NOENT//0
if (!docPtr)
{
xmlFreeParserCtxt(ctxt);
return -1;
}
int result = xmlSaveFileEnc(
fileNameDestination.c_str(),
docPtr,
encoding.c_str());
// ensure entity warnings are treated as errors!
if (!getLastError().empty())
return -1;
xmlFreeDoc(docPtr);
xmlFreeParserCtxt(ctxt);
return result;
}
std::string WrapLibxml::getLastError()
{
xmlErrorPtr err = xmlGetLastError();
if (!err)
return "";
std::stringstream ss;
ss << "Error at line ";
ss << err->line;
if (err->int2)
{
ss << ", column ";
ss << err->int2;
}
ss << ": ";
ss << err->message;
return ss.str();
}
std::pair<int, int> WrapLibxml::getErrorPosition()
{
xmlErrorPtr err = xmlGetLastError();
if (!err)
return std::make_pair(1, 1);
return std::make_pair(
err->line,
err->int2);
}
std::string WrapLibxml::getOutput() { return output; }
void WrapLibxml::loadCatalog()
{
#ifdef __WXMSW__
xmlLoadCatalog(catalogPath.c_str());
#endif
}
std::string WrapLibxml::lookupPublicId(const std::string& id)
{
loadCatalog();
std::string ret;
char *s = (char *)xmlCatalogResolvePublic((const xmlChar *)id.c_str());
if (!s)
return ret;
ret = s;
free(s);
return ret;
}