xml-copy-editor-code/src/wraplibxml.cpp

680 lines
16 KiB
C++

/*
* Copyright 2005-2007 Gerald Schmidt.
*
* This file is part of Xml Copy Editor.
*
* Xml Copy Editor is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; version 2 of the License.
*
* Xml Copy Editor is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Xml Copy Editor; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "wraplibxml.h"
#include <sstream>
#include <stdexcept>
#ifdef ATTRIBUTE_PRINTF
#undef ATTRIBUTE_PRINTF
#endif
#include <wx/wx.h>
#include <wx/filesys.h>
#include <wx/uri.h>
#ifdef __WXMSW__ // Libxml supports utf8 file name on windows
#define CONV(url) ( ( const char * ) ( url ).utf8_str() )
#else
#define CONV(url) ( ( const char * ) ( url ).mb_str ( wxConvLocal ) )
#endif
static xmlCatalogPtr catalog = NULL;
class Initializer
{
public:
Initializer ( const wxString &catalogPath ) throw ()
{
xmlSetGenericErrorFunc ( xmlGenericErrorContext,
&Initializer::OnXmlGenericError );
LIBXML_TEST_VERSION
xmlInitializeCatalog();
xmlLoadCatalog ( catalogPath.mb_str() );
::catalog = xmlLoadACatalog ( catalogPath.mb_str() );
initGenericErrorDefaultFunc ( NULL );
}
~Initializer ()
{
xmlFreeCatalog ( ::catalog );
::catalog = NULL;
xsltCleanupGlobals();
xmlCatalogCleanup();
xmlCleanupParser();
}
static void XMLCDECL OnXmlGenericError (void *ctx, const char *msg, ...) throw()
{
va_list args;
size_t size = 128;
std::string buffer;
int chars;
for (;;)
{
buffer.resize ( size );
if ( buffer.size() < size )
throw std::runtime_error ( "Out of memory" );
va_start(args, msg);
chars = vsnprintf( (char *) buffer.c_str(), size, msg, args);
va_end(args);
if ( chars >= 0 && ( size_t ) chars < size )
{
buffer.resize ( chars );
throw std::runtime_error ( buffer );
}
if ( chars >= 0 )
size = chars + 1;
else
throw std::runtime_error (
std::string ( "Can't format message: " ) + msg );
}
}
};
void WrapLibxml::Init ( const wxString &catalogPath ) throw()
{
static Initializer dummy ( catalogPath );
}
WrapLibxml::WrapLibxml ( bool netAccessParameter )
: netAccess ( netAccessParameter )
{
WrapLibxml::Init();
}
WrapLibxml::~WrapLibxml()
{
}
bool WrapLibxml::validate ( const std::string& utf8DocBuf,
const wxString &docUrl )
{
output = "";
xmlParserCtxtPtr ctxt = NULL;
xmlDocPtr docPtr = NULL;
ctxt = xmlNewParserCtxt();
if ( ctxt == NULL )
{
nonParserError = _("Cannot create a parser context");
return false;
}
int flags = XML_PARSE_DTDVALID;
if ( !netAccess )
flags |= XML_PARSE_NONET;
docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(), utf8DocBuf.length(),
CONV ( docUrl ), "UTF-8", flags);
bool returnValue = docPtr != NULL && ctxt->valid != 0;
xmlFreeDoc ( docPtr );
xmlFreeParserCtxt ( ctxt );
return returnValue;
}
bool WrapLibxml::validateRelaxNG (
const wxString &schemaFileName,
const std::string &utf8DocBuf,
const wxString &docUrl )
{
output = "";
bool returnValue = false;
xmlParserCtxtPtr ctxt = NULL;
xmlDocPtr docPtr = NULL;
xmlRelaxNGValidCtxtPtr ctxtPtr = NULL;
xmlRelaxNGParserCtxtPtr rngParserCtxt = NULL;
xmlRelaxNGPtr schemaPtr = NULL;
do {
rngParserCtxt = xmlRelaxNGNewParserCtxt ( CONV ( schemaFileName ) );
if ( rngParserCtxt == NULL )
{
nonParserError = _("Cannot create an RNG parser context");
return false;
}
schemaPtr = xmlRelaxNGParse ( rngParserCtxt );
if ( schemaPtr == NULL )
break;
ctxtPtr = xmlRelaxNGNewValidCtxt ( schemaPtr );
if ( ctxtPtr == NULL )
{
nonParserError = _("Cannot create an RNG validation context");
break;
}
ctxt = xmlNewParserCtxt();
if ( ctxt == NULL )
{
nonParserError = _("Cannot create a parser context");
break;
}
int flags = XML_PARSE_DTDVALID;
if ( !netAccess )
flags |= XML_PARSE_NONET;
docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(),
utf8DocBuf.length(), CONV ( docUrl ), "UTF-8", flags );
if ( docPtr == NULL )
break;
int err = xmlRelaxNGValidateDoc ( ctxtPtr, docPtr );
returnValue = ( err ) ? false : true;
} while ( false );
xmlFreeDoc ( docPtr );
xmlFreeParserCtxt ( ctxt );
xmlRelaxNGFreeValidCtxt ( ctxtPtr );
xmlRelaxNGFree ( schemaPtr );
return returnValue;
}
bool WrapLibxml::validateW3CSchema (
const wxString &schemaFileName,
const std::string &utf8DocBuf,
const wxString &docUrl )
{
output = "";
bool returnValue = false;
xmlParserCtxtPtr ctxt = NULL;
xmlDocPtr docPtr = NULL;
xmlSchemaValidCtxtPtr ctxtPtr = NULL;
xmlSchemaParserCtxtPtr rngParserCtxt = NULL;
xmlSchemaPtr schemaPtr = NULL;
do {
rngParserCtxt = xmlSchemaNewParserCtxt ( CONV ( schemaFileName ) );
if ( rngParserCtxt == NULL )
return false;
schemaPtr = xmlSchemaParse ( rngParserCtxt );
if ( schemaPtr == NULL )
break;
ctxtPtr = xmlSchemaNewValidCtxt ( schemaPtr );
if ( ctxtPtr == NULL )
{
nonParserError = _("Cannot create a schema validation context");
break;
}
ctxt = xmlNewParserCtxt();
if ( ctxt == NULL )
{
nonParserError = _("Cannot create a parser context");
break;
}
int flags = XML_PARSE_DTDLOAD;
if ( !netAccess )
flags |= XML_PARSE_NONET;
docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(),
utf8DocBuf.length(), CONV ( docUrl ), "UTF-8", flags );
if ( docPtr == NULL )
break;
int res = xmlSchemaValidateDoc ( ctxtPtr, docPtr );
returnValue = ( res ) ? false : true;
} while ( false );
xmlFreeDoc ( docPtr );
xmlFreeParserCtxt ( ctxt );
xmlSchemaFree ( schemaPtr );
xmlSchemaFreeValidCtxt ( ctxtPtr );
return returnValue;
}
bool WrapLibxml::parse (
const std::string& utf8DocBuf,
const wxString &docUrl,
bool indent,
bool resolveEntities )
{
return parse ( utf8DocBuf.c_str(), utf8DocBuf.length(), docUrl,
indent, resolveEntities );
}
bool WrapLibxml::parse (
const wxString &docUrl,
bool indent,
bool resolveEntities )
{
return parse ( NULL, 0, docUrl, indent, resolveEntities );
}
bool WrapLibxml::parse (
const char *utf8DocBuf,
size_t utf8DocBufSize,
const wxString &docUrl,
bool indent,
bool resolveEntities )
{
output = "";
xmlParserCtxtPtr ctxt = NULL;
xmlDocPtr docPtr = NULL;
ctxt = xmlNewParserCtxt();
if ( ctxt == NULL )
{
nonParserError = _("Cannot create a parser context");
return false;
}
int flags = XML_PARSE_DTDLOAD;
if ( resolveEntities )
flags |= XML_PARSE_NOENT;
if ( !netAccess )
flags |= XML_PARSE_NONET;
if ( utf8DocBuf != NULL)
docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf, utf8DocBufSize,
CONV ( docUrl ), "UTF-8", flags );
else
docPtr = xmlCtxtReadFile ( ctxt, CONV ( docUrl ), NULL, flags );
if ( docPtr == NULL )
{
xmlFreeParserCtxt ( ctxt );
return false;
}
xmlKeepBlanksDefault ( indent ? 0 : 1 );
xmlChar *buf = NULL;
int size;
// tbd: link output encoding to input encoding?
xmlDocDumpFormatMemoryEnc (
docPtr,
&buf,
&size,
"UTF-8",
indent );
if ( buf )
{
output.append ( ( const char * ) buf );
free ( buf );
}
bool returnValue = ( !ctxt->errNo ) ? true : false;
xmlFreeDoc ( docPtr );
xmlFreeParserCtxt ( ctxt );
return returnValue;
}
bool WrapLibxml::xpath ( const wxString &xpath, const std::string &utf8DocBuf,
const wxString &docUrl )
{
output = "";
xmlParserCtxtPtr ctxt = NULL;
xmlDocPtr docPtr = NULL;
xmlKeepBlanksDefault ( 0 );
ctxt = xmlNewParserCtxt();
if ( ctxt == NULL )
{
nonParserError = _("Cannot create a parser context");
return false;
}
docPtr = xmlCtxtReadMemory (
ctxt,
utf8DocBuf.c_str(),
utf8DocBuf.length(),
CONV ( docUrl ),
"UTF-8",
//(netAccess) ? XML_PARSE_DTDLOAD | XML_PARSE_NOENT : XML_PARSE_DTDLOAD | XML_PARSE_NONET | XML_PARSE_NOENT
XML_PARSE_NOENT | XML_PARSE_NONET | XML_PARSE_NSCLEAN
);
if ( docPtr == NULL )
{
xmlFreeParserCtxt ( ctxt );
return false;
}
xmlXPathContextPtr context = NULL;
xmlXPathObjectPtr result = NULL;
xmlNodeSetPtr nodeset = NULL;
context = xmlXPathNewContext ( docPtr );
if ( !context )
{
xmlFreeDoc ( docPtr );
xmlFreeParserCtxt ( ctxt );
return false;
}
// enable namespace prefixes
xmlXPathRegisterNs ( context, ( xmlChar * ) "xhtml", ( xmlChar * ) "http://www.w3.org/1999/xhtml" );
// add others as necessary!
result = xmlXPathEvalExpression (
// Since the encoding of the buffer is UTF-8
( const xmlChar * ) ( const char * ) xpath.utf8_str (),
context );
bool xpathIsValid = ( result ) ? true : false;
while ( result != NULL )
{
if ( xmlXPathNodeSetIsEmpty ( result->nodesetval ) )
break;
xmlBufferPtr bufferPtr = xmlBufferCreate();
if ( bufferPtr == NULL )
break;
nodeset = result->nodesetval;
for ( int i = 0; i < nodeset->nodeNr; i++ )
{
xmlNodePtr node = nodeset->nodeTab[i];
if ( !node )
break;
xmlNodeDump ( bufferPtr, NULL, node, 0, 1 );
output += ( const char * ) xmlBufferContent ( bufferPtr );
output += '\n';
xmlBufferEmpty ( bufferPtr );
}
xmlBufferFree ( bufferPtr );
break;
}
xmlXPathFreeObject ( result );
xmlXPathFreeContext ( context );
xmlFreeDoc ( docPtr );
xmlFreeParserCtxt ( ctxt );
return xpathIsValid;
}
bool WrapLibxml::xslt (
const wxString &styleFileName,
const std::string &utf8DocBuf,
const wxString &docUrl
)
{
return xslt ( styleFileName, utf8DocBuf.c_str(), utf8DocBuf.length(),
docUrl );
}
bool WrapLibxml::xslt (
const wxString &styleFileName,
const wxString &docUrl
)
{
return xslt ( styleFileName, NULL, 0, docUrl );
}
bool WrapLibxml::xslt (
const wxString &styleFileName,
const char *utf8DocBuf,
size_t utf8DocBufSize,
const wxString &docUrl
)
{
output = "";
bool ret = false;
xsltStylesheetPtr cur = NULL;
xmlParserCtxtPtr ctxt = NULL;
xmlDocPtr doc = NULL, res = NULL;
do {
cur = xsltParseStylesheetFile ( ( const xmlChar * )
CONV ( styleFileName ) );
if ( !cur )
{
nonParserError = _("Cannot parse stylesheet");
return false;
}
ctxt = xmlNewParserCtxt();
if ( !ctxt )
{
nonParserError = _("Cannot create a parser context");
break;
}
int flags = XML_PARSE_NOENT | XML_PARSE_DTDLOAD;
if ( !netAccess )
flags |= XML_PARSE_NONET;
if ( utf8DocBuf != NULL )
doc = xmlCtxtReadMemory ( ctxt, utf8DocBuf, utf8DocBufSize,
CONV ( docUrl ), "UTF-8", flags );
else
doc = xmlCtxtReadFile ( ctxt, CONV ( docUrl ), NULL, flags );
if ( !doc )
break;
res = xsltApplyStylesheet ( cur, doc, NULL );
if ( !res )
{
nonParserError = _("Cannot apply stylesheet");
break;
}
xmlChar *buf = NULL;
int size;
xmlDocDumpFormatMemoryEnc ( res, &buf, &size, "UTF-8", 1 );
if ( buf )
{
output.append ( ( char * ) buf, size );
xmlFree ( buf );
}
// ensure entity warnings are treated as errors
ret = !xmlGetLastError();
} while ( false );
xmlFreeDoc ( doc );
xmlFreeParserCtxt ( ctxt );
xmlFreeDoc ( res );
xsltFreeStylesheet ( cur );
return ret;
}
bool WrapLibxml::bufferWellFormed ( const std::string& buffer )
{
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
if ( !ctxt )
{
nonParserError = _("Cannot create a parser context");
return false;
}
int flags = XML_PARSE_DTDLOAD;
if ( !netAccess )
flags |= XML_PARSE_NONET;
xmlDocPtr docPtr = xmlCtxtReadMemory ( ctxt, buffer.c_str(), buffer.size(),
"", "UTF-8", flags );
bool returnValue = ( docPtr ) ? true : false;
xmlFreeDoc ( docPtr );
xmlFreeParserCtxt ( ctxt );
return returnValue;
}
int WrapLibxml::saveEncoding (
const std::string &utf8Buffer,
const wxString &fileNameSource,
const wxString &fileNameDestination,
wxMemoryBuffer *outputBuffer,
const wxString &encoding )
{
return saveEncoding ( utf8Buffer.c_str(), utf8Buffer.length(),
fileNameSource, fileNameDestination, outputBuffer, encoding );
}
int WrapLibxml::saveEncoding (
const wxString &fileNameSource,
const wxString &fileNameDestination,
const wxString &encoding )
{
return saveEncoding ( NULL, 0, fileNameSource, fileNameDestination,
NULL, encoding );
}
int WrapLibxml::saveEncoding (
const char *utf8Buffer,
size_t utf8BufferSize,
const wxString &fileNameSource,
const wxString &fileNameDestination,
wxMemoryBuffer *outputBuffer,
const wxString &encoding )
{
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
if ( !ctxt )
{
nonParserError = _("Cannot create a parser context");
return -1;
}
xmlDocPtr docPtr;
// Don't load DTD because additional namespace declarations will be added
// to every element when processing a docbook XML
int flags = XML_PARSE_PEDANTIC /*| XML_PARSE_DTDLOAD | XML_PARSE_DTDVALID*/;
if ( !netAccess )
flags |= XML_PARSE_NONET;
if ( utf8Buffer != NULL )
docPtr = xmlCtxtReadMemory ( ctxt, utf8Buffer, utf8BufferSize,
CONV ( fileNameSource ), "UTF-8", flags );
else
docPtr = xmlCtxtReadFile ( ctxt, CONV ( fileNameSource ), NULL, flags );
if ( !docPtr )
{
xmlFreeParserCtxt ( ctxt );
return -1;
}
int result;
if ( outputBuffer == NULL )
{
result = xmlSaveFileEnc (
CONV ( fileNameDestination ),
docPtr,
encoding.utf8_str() );
}
else
{
xmlChar *buffer;
xmlDocDumpFormatMemoryEnc ( docPtr, &buffer, &result,
encoding.utf8_str(), 1 );
outputBuffer->AppendData ( buffer, result );
xmlFree ( buffer );
}
xmlFreeDoc ( docPtr );
xmlFreeParserCtxt ( ctxt );
return result;
}
wxString WrapLibxml::getLastError()
{
xmlErrorPtr err = xmlGetLastError();
if ( !err )
return nonParserError;
wxString error ( err->message, wxConvLocal );
if ( err->int2 )
return wxString::Format ( _("Error at line %d, column %d: %s"),
err->line, err->int2, error.c_str() );
return wxString::Format ( _("Error at line %d: %s"),
err->line, error.c_str() );
}
std::pair<int, int> WrapLibxml::getErrorPosition()
{
xmlErrorPtr err = xmlGetLastError();
if ( !err )
return std::make_pair ( 1, 1 );
return std::make_pair (
err->line,
err->int2 );
}
std::string WrapLibxml::getOutput()
{
return output;
}
wxString WrapLibxml::catalogResolve
( const wxString &publicId
, const wxString &systemId
)
{
// According to 7.1.2. Resolution of External Identifiers
// from http://www.oasis-open.org/committees/entity/spec-2001-08-06.html,
// our catalog may not be used if the system catalog, which is specified
// in a delegateSystem entry, is out of date, such as the catalog for
// resolving public ID "-//OASIS//DTD DocBook XML V5.0//EN"
char *s = ( char * ) xmlACatalogResolve ( ::catalog,
( const xmlChar * ) ( const char *) publicId.utf8_str(),
( const xmlChar * ) ( const char *) systemId.utf8_str() );
if ( s == NULL )
{
#ifndef __WXMSW__
s = ( char * ) xmlCatalogResolve (
( const xmlChar * ) ( const char *) publicId.utf8_str(),
( const xmlChar * ) ( const char *) systemId.utf8_str() );
if ( s == NULL )
#endif
return wxEmptyString;
}
wxString url ( s, wxConvUTF8 );
xmlFree ( s );
wxFileName file = wxFileSystem::URLToFileName ( url );
if ( file.IsFileReadable() )
return file.GetFullPath();
return url;
}