From 51b1bd3829d8aa7a7b238f21c681fca0d71dd852 Mon Sep 17 00:00:00 2001 From: "Zane U. Ji" Date: Sun, 27 Apr 2014 20:35:09 +0800 Subject: [PATCH] Worked around file URL problems wxFileSystem::FileNameToURL http://trac.wxwidgets.org/ticket/16209 Both wxWidgets (<= 2.8.12) and Xerces-C++ have problems parsing file URLs when there are multi-byte characters --- src/wraplibxml.cpp | 138 ++++++++++++++++++++++++---------- src/wraplibxml.h | 6 ++ src/wrapxerces.cpp | 33 ++++++++ src/wrapxerces.h | 4 + src/xercescatalogresolver.cpp | 72 +++--------------- src/xmlpromptgenerator.cpp | 37 ++++----- 6 files changed, 167 insertions(+), 123 deletions(-) diff --git a/src/wraplibxml.cpp b/src/wraplibxml.cpp index 4c4e9bb..50d591e 100644 --- a/src/wraplibxml.cpp +++ b/src/wraplibxml.cpp @@ -130,9 +130,10 @@ bool WrapLibxml::validate ( const std::string& utf8DocBuf, int flags = XML_PARSE_DTDVALID; if ( !netAccess ) flags |= XML_PARSE_NONET; + xmlChar *url = xmlFileNameToURL ( docFileName ); docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(), utf8DocBuf.length(), - CONV ( wxFileSystem::FileNameToURL ( docFileName ) ), - "UTF-8", flags); + ( const char * ) url, "UTF-8", flags); + xmlFree ( url ); bool returnValue = docPtr != NULL && ctxt->valid != 0; @@ -157,8 +158,9 @@ bool WrapLibxml::validateRelaxNG ( xmlRelaxNGPtr schemaPtr = NULL; do { - rngParserCtxt = xmlRelaxNGNewParserCtxt ( - CONV ( wxFileSystem::FileNameToURL ( schemaFileName ) ) ); + xmlChar *url = xmlFileNameToURL ( schemaFileName ); + rngParserCtxt = xmlRelaxNGNewParserCtxt ( ( const char * ) url ); + xmlFree ( url ); if ( rngParserCtxt == NULL ) { nonParserError = _("Cannot create an RNG parser context"); @@ -184,14 +186,10 @@ bool WrapLibxml::validateRelaxNG ( int flags = XML_PARSE_DTDVALID; if ( !netAccess ) flags |= XML_PARSE_NONET; - docPtr = xmlCtxtReadMemory ( - ctxt, - utf8DocBuf.c_str(), - utf8DocBuf.length(), - CONV ( wxFileSystem::FileNameToURL ( docFileName ) ), - "UTF-8", - flags - ); + url = xmlFileNameToURL ( docFileName ); + docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(), + utf8DocBuf.length(), ( const char * ) url, "UTF-8", flags ); + xmlFree ( url ); if ( docPtr == NULL ) break; @@ -224,8 +222,9 @@ bool WrapLibxml::validateW3CSchema ( xmlSchemaPtr schemaPtr = NULL; do { - rngParserCtxt = xmlSchemaNewParserCtxt ( - CONV ( wxFileSystem::FileNameToURL ( schemaFileName ) ) ); + xmlChar *url = xmlFileNameToURL ( schemaFileName ); + rngParserCtxt = xmlSchemaNewParserCtxt ( ( const char * ) url ); + xmlFree ( url ); if ( rngParserCtxt == NULL ) return false; @@ -249,14 +248,10 @@ bool WrapLibxml::validateW3CSchema ( int flags = XML_PARSE_DTDLOAD; if ( !netAccess ) flags |= XML_PARSE_NONET; - docPtr = xmlCtxtReadMemory ( - ctxt, - utf8DocBuf.c_str(), - utf8DocBuf.length(), - CONV ( wxFileSystem::FileNameToURL ( docFileName ) ), - "UTF-8", - flags - ); + url = xmlFileNameToURL ( docFileName ); + docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(), + utf8DocBuf.length(), ( const char * ) url, "UTF-8", flags ); + xmlFree ( url ); if ( docPtr == NULL ) break; @@ -318,9 +313,12 @@ bool WrapLibxml::parse ( flags |= XML_PARSE_NONET; if ( utf8DocBuf != NULL) + { + xmlChar *url = xmlFileNameToURL ( docFileName ); docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf, utf8DocBufSize, - CONV ( wxFileSystem::FileNameToURL ( docFileName ) ), - "UTF-8", flags ); + ( const char * ) url, "UTF-8", flags ); + xmlFree ( url ); + } else docPtr = xmlCtxtReadFile ( ctxt, CONV ( docFileName ), NULL, flags ); if ( docPtr == NULL ) @@ -373,16 +371,12 @@ bool WrapLibxml::xpath ( const wxString &xpath, const std::string &utf8DocBuf, return false; } - docPtr = xmlCtxtReadMemory ( - ctxt, - utf8DocBuf.c_str(), - utf8DocBuf.length(), - CONV ( wxFileSystem::FileNameToURL ( docFileName ) ), - "UTF-8", - //(netAccess) ? XML_PARSE_DTDLOAD | XML_PARSE_NOENT : XML_PARSE_DTDLOAD | XML_PARSE_NONET | XML_PARSE_NOENT - XML_PARSE_NOENT | XML_PARSE_NONET | XML_PARSE_NSCLEAN - ); - + //(netAccess) ? XML_PARSE_DTDLOAD | XML_PARSE_NOENT : XML_PARSE_DTDLOAD | XML_PARSE_NONET | XML_PARSE_NOENT + const static int flags = XML_PARSE_NOENT | XML_PARSE_NONET | XML_PARSE_NSCLEAN; + xmlChar *url = xmlFileNameToURL ( docFileName ); + docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(), utf8DocBuf.length(), + ( const char * ) url, "UTF-8", flags ); + xmlFree ( url ); if ( docPtr == NULL ) { xmlFreeParserCtxt ( ctxt ); @@ -496,9 +490,12 @@ bool WrapLibxml::xslt ( if ( !netAccess ) flags |= XML_PARSE_NONET; if ( utf8DocBuf != NULL ) + { + xmlChar *url = xmlFileNameToURL ( docFileName ); doc = xmlCtxtReadMemory ( ctxt, utf8DocBuf, utf8DocBufSize, - CONV ( wxFileSystem::FileNameToURL ( docFileName ) ), - "UTF-8", flags ); + ( const char * ) url, "UTF-8", flags ); + xmlFree ( url ); + } else doc = xmlCtxtReadFile ( ctxt, CONV ( docFileName ), NULL, flags ); if ( !doc ) @@ -597,9 +594,12 @@ int WrapLibxml::saveEncoding ( if ( !netAccess ) flags |= XML_PARSE_NONET; if ( utf8Buffer != NULL ) + { + xmlChar *url = xmlFileNameToURL ( fileNameSource ); docPtr = xmlCtxtReadMemory ( ctxt, utf8Buffer, utf8BufferSize, - CONV ( wxFileSystem::FileNameToURL ( fileNameSource ) ), - "UTF-8", flags ); + ( const char * ) url, "UTF-8", flags ); + xmlFree ( url ); + } else docPtr = xmlCtxtReadFile ( ctxt, CONV ( fileNameSource ), NULL, flags ); if ( !docPtr ) @@ -690,9 +690,69 @@ wxString WrapLibxml::catalogResolve wxString url ( s, wxConvUTF8 ); xmlFree ( s ); - wxFileName file = wxFileSystem::URLToFileName ( url ); + wxFileName file = URLToFileName ( url ); if ( file.IsFileReadable() ) return file.GetFullPath(); return url; } + +wxString WrapLibxml::FileNameToURL ( const wxString &fileName ) +{ + xmlChar *s = xmlFileNameToURL ( fileName ); + if ( !s ) + return wxEmptyString; + + wxString url = wxString::FromUTF8 ( ( char * ) s ); + xmlFree ( s ); + + return url; +} + +xmlChar *WrapLibxml::xmlFileNameToURL ( const wxString &fileName ) +{ + if ( fileName.empty() ) + return NULL; + + wxFileName fn ( fileName ); + fn.Normalize(wxPATH_NORM_DOTS | wxPATH_NORM_TILDE | wxPATH_NORM_ABSOLUTE); + wxString url = fn.GetFullPath(wxPATH_NATIVE); + + return xmlPathToURI ( ( xmlChar * ) ( const char * ) url.utf8_str() ); +} + +wxFileName WrapLibxml::URLToFileName ( const wxString &url ) +{ +#if wxCHECK_VERSION(2,9,0) + return wxFileSystem::URLToFileName ( url ); +#else + xmlURIPtr uri = xmlParseURI ( url.utf8_str() ); + if ( !uri ) + return wxFileName ( url ); + + do { + if ( uri->scheme == NULL || strcmp (uri->scheme, "file" ) ) + break; + if ( uri->server && stricmp ( uri->server, "localhost") ) + break; + if ( uri->path == NULL || !*uri->path ) + break; + + char *path = uri->path; + // Does it begin with "/C:" ? + if ( *path == '/' && wxIsalpha ( path[1] ) && path[2] == ':') + path++; + + wxFileName file ( wxString ( path, wxConvUTF8 ) ); + + xmlFreeURI ( uri ); + + return file; + + } while ( false ); + + xmlFreeURI ( uri ); + + return wxFileName(); +#endif // wxCHECK_VERSION(2,9,0) +} diff --git a/src/wraplibxml.h b/src/wraplibxml.h index 81a72e5..ed9295e 100644 --- a/src/wraplibxml.h +++ b/src/wraplibxml.h @@ -30,11 +30,13 @@ #include #include #include +#include #include #include #include #include #include +#include class WrapLibxml { @@ -109,6 +111,10 @@ class WrapLibxml wxString catalogResolve ( const wxString &publicId, const wxString &systemId ); + + static wxString FileNameToURL ( const wxString &fileName ); + static xmlChar *xmlFileNameToURL ( const wxString &fileName ); + static wxFileName URLToFileName ( const wxString &url ); private: bool netAccess; std::string output; diff --git a/src/wrapxerces.cpp b/src/wrapxerces.cpp index 6d36793..2712471 100644 --- a/src/wrapxerces.cpp +++ b/src/wrapxerces.cpp @@ -18,6 +18,7 @@ */ #include "wrapxerces.h" +#include "pathresolver.h" #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -245,3 +247,34 @@ void MySAX2Handler::logError ( const wxString &type, wxLogLevel level, mLevel = level; } } + +InputSource *WrapXerces::resolveEntity ( + const wxString &publicId, + const wxString &systemId, + const wxString &fileName +) +{ + XercesCatalogResolver cr; + InputSource *source = cr.resolveEntity + ( ( const XMLCh * ) WrapXerces::toString ( publicId ).GetData() + , ( const XMLCh * ) WrapXerces::toString ( systemId ).GetData() + ); + if ( source ) + return source; + + BOOST_STATIC_ASSERT ( sizeof( xmlChar ) == sizeof ( char ) ); + + // Xerces-C++ can't open a file URL when there are multi-byte characters. + // Let's use the file name instead. + wxString file = PathResolver::run ( systemId, fileName ); + if ( wxFileExists ( file ) ) + return new LocalFileInputSource ( + ( const XMLCh * ) WrapXerces::toString ( file ).GetData() ); + + wxString fileURL = WrapLibxml::FileNameToURL ( fileName ); + return new URLInputSource + ( ( const XMLCh * ) WrapXerces::toString ( fileURL ).GetData() + , ( const XMLCh * ) WrapXerces::toString ( systemId ).GetData() + , ( const XMLCh * ) WrapXerces::toString ( publicId ).GetData() + ); +} diff --git a/src/wrapxerces.h b/src/wrapxerces.h index 0d951d4..bad5e8d 100644 --- a/src/wrapxerces.h +++ b/src/wrapxerces.h @@ -124,6 +124,10 @@ class WrapXerces : private boost::noncopyable // Returns original value static bool enableNetwork ( bool enable = true ); + static xercesc::InputSource *resolveEntity ( + const wxString &publicId, + const wxString &systemId, + const wxString &fileName ); private: static const wxMBConv &getMBConv(); static XMLNetAccessor *mOriginalNetAccessor; diff --git a/src/xercescatalogresolver.cpp b/src/xercescatalogresolver.cpp index c8783f6..f5329cd 100644 --- a/src/xercescatalogresolver.cpp +++ b/src/xercescatalogresolver.cpp @@ -13,73 +13,21 @@ InputSource *XercesCatalogResolver::resolveEntity ( const XMLCh* const publicId, const XMLCh* const systemId ) { -/* the following _should_ work but doesn't always, so drop it for now -#ifndef __WXMSW__ - resolved = lookupPublicId ( narrowPublicId ); -#else - return NULL; - std::string stdPublicId = narrowPublicId; - - // on Windows, call libxml's own xmlcatalog binary - // because calling libxml from xerces causes a protection fault - - std::string narrowCommand = ""; - narrowCommand += myCatalogUtilityPath; - narrowCommand += " \""; - narrowCommand += myCatalogPath; - narrowCommand += "\" \""; - narrowCommand += narrowPublicId; - narrowCommand += "\""; - - wxString wideCommand = wxString ( - narrowCommand.c_str(), - wxConvUTF8, - narrowCommand.size() ); - - wxArrayString stringArray; - long ret = wxExecute ( - wideCommand, - stringArray, - wxEXEC_SYNC | wxEXEC_NODISABLE ); - if ( ret == -1 || stringArray.empty() ) - { - return NULL; - } - wxString returnValue = stringArray[0]; - - std::string narrowReturnValue = (const char *)returnValue.mb_str ( wxConvLocal ); - - Replace::run ( narrowReturnValue, "%20", " ", false ); - - char *s, *it; - s = (char *) narrowReturnValue.c_str(); - - for (char *scan = s; *scan; scan++ ) - if (*scan == '/') - *scan = '\\'; - - if ( strstr ( s, "No entry" ) ) - { - return NULL; - } - - it = strstr ( s, "\\\\\\" ); - if ( it ) - resolved = it + 3; - else - resolved = (const char *)s;//narrowReturnValue; -#endif -*/ wxString pubId, sysId, resolved; pubId = WrapXerces::toString ( publicId ); sysId = WrapXerces::toString ( systemId ); resolved = catalogResolve ( pubId, sysId ); - if ( resolved.empty() ) - return NULL; - - InputSource *source = new LocalFileInputSource ( + if ( !resolved.empty() ) + return new LocalFileInputSource ( ( const XMLCh * ) WrapXerces::toString ( resolved ).GetData() ); - return source; + // Xerces-C++ can't open a file URL when there are multi-byte characters. + // Parse the file URL here instead. + wxFileName file = WrapLibxml::URLToFileName ( systemId ); + if ( file.IsFileReadable() ) + return new LocalFileInputSource ( + ( const XMLCh * ) WrapXerces::toString ( file.GetFullPath() ).GetData() ); + + return NULL; } diff --git a/src/xmlpromptgenerator.cpp b/src/xmlpromptgenerator.cpp index fd9f65b..744ef23 100644 --- a/src/xmlpromptgenerator.cpp +++ b/src/xmlpromptgenerator.cpp @@ -18,8 +18,6 @@ */ #include -#include -#include #include #include "xmlpromptgenerator.h" #include "xmlencodinghandler.h" @@ -42,7 +40,6 @@ #include #include #include -#include #include using namespace xercesc; @@ -312,27 +309,23 @@ int XMLCALL XmlPromptGenerator::externalentityrefhandler ( parser.setErrorHandler ( &handler ); parser.setEntityResolver ( &catalogResolver ); - wxString wideSystemId ( systemId, wxConvUTF8 ); // TODO: Apply encoding - wxString widePublicId ( publicId, wxConvUTF8 ); - std::auto_ptr source ( catalogResolver.resolveEntity - ( ( const XMLCh * ) WrapXerces::toString ( widePublicId ).GetData() - , ( const XMLCh * ) WrapXerces::toString ( wideSystemId ).GetData() - ) ); - if ( !source.get() ) - { - wxString fileURL = wxFileSystem::FileNameToURL ( d->basePath ); - source.reset ( new URLInputSource - ( ( const XMLCh * ) WrapXerces::toString ( fileURL ).GetData() - , ( const XMLCh * ) WrapXerces::toString ( wideSystemId ).GetData() - , ( const XMLCh * ) WrapXerces::toString ( widePublicId ).GetData() - ) ); - } - - if ( pThis->TestDestroy() ) - return XML_STATUS_ERROR; - Grammar *rootGrammar; try { + wxString wideSystemId = wxString::FromUTF8 ( systemId ); + std::auto_ptr source ( WrapXerces::resolveEntity + ( wxString::FromUTF8 ( publicId ) + , wideSystemId + , d->basePath + ) ); + if ( !source.get() ) + { + wxLogError ( _T("Cann't open '%s'"), wideSystemId.c_str() ); + return XML_STATUS_ERROR; + } + + if ( pThis->TestDestroy() ) + return XML_STATUS_ERROR; + rootGrammar = parser.loadGrammar ( *source, Grammar::DTDGrammarType ); if ( !rootGrammar ) return XML_STATUS_ERROR;