Worked around file URL problems

wxFileSystem::FileNameToURL http://trac.wxwidgets.org/ticket/16209
Both wxWidgets (<= 2.8.12) and Xerces-C++ have problems parsing file URLs when there are multi-byte characters
This commit is contained in:
Zane U. Ji 2014-04-27 20:35:09 +08:00
parent 7dbc742e57
commit 51b1bd3829
6 changed files with 167 additions and 123 deletions

View File

@ -130,9 +130,10 @@ bool WrapLibxml::validate ( const std::string& utf8DocBuf,
int flags = XML_PARSE_DTDVALID; int flags = XML_PARSE_DTDVALID;
if ( !netAccess ) if ( !netAccess )
flags |= XML_PARSE_NONET; flags |= XML_PARSE_NONET;
xmlChar *url = xmlFileNameToURL ( docFileName );
docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(), utf8DocBuf.length(), docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(), utf8DocBuf.length(),
CONV ( wxFileSystem::FileNameToURL ( docFileName ) ), ( const char * ) url, "UTF-8", flags);
"UTF-8", flags); xmlFree ( url );
bool returnValue = docPtr != NULL && ctxt->valid != 0; bool returnValue = docPtr != NULL && ctxt->valid != 0;
@ -157,8 +158,9 @@ bool WrapLibxml::validateRelaxNG (
xmlRelaxNGPtr schemaPtr = NULL; xmlRelaxNGPtr schemaPtr = NULL;
do { do {
rngParserCtxt = xmlRelaxNGNewParserCtxt ( xmlChar *url = xmlFileNameToURL ( schemaFileName );
CONV ( wxFileSystem::FileNameToURL ( schemaFileName ) ) ); rngParserCtxt = xmlRelaxNGNewParserCtxt ( ( const char * ) url );
xmlFree ( url );
if ( rngParserCtxt == NULL ) if ( rngParserCtxt == NULL )
{ {
nonParserError = _("Cannot create an RNG parser context"); nonParserError = _("Cannot create an RNG parser context");
@ -184,14 +186,10 @@ bool WrapLibxml::validateRelaxNG (
int flags = XML_PARSE_DTDVALID; int flags = XML_PARSE_DTDVALID;
if ( !netAccess ) if ( !netAccess )
flags |= XML_PARSE_NONET; flags |= XML_PARSE_NONET;
docPtr = xmlCtxtReadMemory ( url = xmlFileNameToURL ( docFileName );
ctxt, docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(),
utf8DocBuf.c_str(), utf8DocBuf.length(), ( const char * ) url, "UTF-8", flags );
utf8DocBuf.length(), xmlFree ( url );
CONV ( wxFileSystem::FileNameToURL ( docFileName ) ),
"UTF-8",
flags
);
if ( docPtr == NULL ) if ( docPtr == NULL )
break; break;
@ -224,8 +222,9 @@ bool WrapLibxml::validateW3CSchema (
xmlSchemaPtr schemaPtr = NULL; xmlSchemaPtr schemaPtr = NULL;
do { do {
rngParserCtxt = xmlSchemaNewParserCtxt ( xmlChar *url = xmlFileNameToURL ( schemaFileName );
CONV ( wxFileSystem::FileNameToURL ( schemaFileName ) ) ); rngParserCtxt = xmlSchemaNewParserCtxt ( ( const char * ) url );
xmlFree ( url );
if ( rngParserCtxt == NULL ) if ( rngParserCtxt == NULL )
return false; return false;
@ -249,14 +248,10 @@ bool WrapLibxml::validateW3CSchema (
int flags = XML_PARSE_DTDLOAD; int flags = XML_PARSE_DTDLOAD;
if ( !netAccess ) if ( !netAccess )
flags |= XML_PARSE_NONET; flags |= XML_PARSE_NONET;
docPtr = xmlCtxtReadMemory ( url = xmlFileNameToURL ( docFileName );
ctxt, docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(),
utf8DocBuf.c_str(), utf8DocBuf.length(), ( const char * ) url, "UTF-8", flags );
utf8DocBuf.length(), xmlFree ( url );
CONV ( wxFileSystem::FileNameToURL ( docFileName ) ),
"UTF-8",
flags
);
if ( docPtr == NULL ) if ( docPtr == NULL )
break; break;
@ -318,9 +313,12 @@ bool WrapLibxml::parse (
flags |= XML_PARSE_NONET; flags |= XML_PARSE_NONET;
if ( utf8DocBuf != NULL) if ( utf8DocBuf != NULL)
{
xmlChar *url = xmlFileNameToURL ( docFileName );
docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf, utf8DocBufSize, docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf, utf8DocBufSize,
CONV ( wxFileSystem::FileNameToURL ( docFileName ) ), ( const char * ) url, "UTF-8", flags );
"UTF-8", flags ); xmlFree ( url );
}
else else
docPtr = xmlCtxtReadFile ( ctxt, CONV ( docFileName ), NULL, flags ); docPtr = xmlCtxtReadFile ( ctxt, CONV ( docFileName ), NULL, flags );
if ( docPtr == NULL ) if ( docPtr == NULL )
@ -373,16 +371,12 @@ bool WrapLibxml::xpath ( const wxString &xpath, const std::string &utf8DocBuf,
return false; return false;
} }
docPtr = xmlCtxtReadMemory ( //(netAccess) ? XML_PARSE_DTDLOAD | XML_PARSE_NOENT : XML_PARSE_DTDLOAD | XML_PARSE_NONET | XML_PARSE_NOENT
ctxt, const static int flags = XML_PARSE_NOENT | XML_PARSE_NONET | XML_PARSE_NSCLEAN;
utf8DocBuf.c_str(), xmlChar *url = xmlFileNameToURL ( docFileName );
utf8DocBuf.length(), docPtr = xmlCtxtReadMemory ( ctxt, utf8DocBuf.c_str(), utf8DocBuf.length(),
CONV ( wxFileSystem::FileNameToURL ( docFileName ) ), ( const char * ) url, "UTF-8", flags );
"UTF-8", xmlFree ( url );
//(netAccess) ? XML_PARSE_DTDLOAD | XML_PARSE_NOENT : XML_PARSE_DTDLOAD | XML_PARSE_NONET | XML_PARSE_NOENT
XML_PARSE_NOENT | XML_PARSE_NONET | XML_PARSE_NSCLEAN
);
if ( docPtr == NULL ) if ( docPtr == NULL )
{ {
xmlFreeParserCtxt ( ctxt ); xmlFreeParserCtxt ( ctxt );
@ -496,9 +490,12 @@ bool WrapLibxml::xslt (
if ( !netAccess ) if ( !netAccess )
flags |= XML_PARSE_NONET; flags |= XML_PARSE_NONET;
if ( utf8DocBuf != NULL ) if ( utf8DocBuf != NULL )
{
xmlChar *url = xmlFileNameToURL ( docFileName );
doc = xmlCtxtReadMemory ( ctxt, utf8DocBuf, utf8DocBufSize, doc = xmlCtxtReadMemory ( ctxt, utf8DocBuf, utf8DocBufSize,
CONV ( wxFileSystem::FileNameToURL ( docFileName ) ), ( const char * ) url, "UTF-8", flags );
"UTF-8", flags ); xmlFree ( url );
}
else else
doc = xmlCtxtReadFile ( ctxt, CONV ( docFileName ), NULL, flags ); doc = xmlCtxtReadFile ( ctxt, CONV ( docFileName ), NULL, flags );
if ( !doc ) if ( !doc )
@ -597,9 +594,12 @@ int WrapLibxml::saveEncoding (
if ( !netAccess ) if ( !netAccess )
flags |= XML_PARSE_NONET; flags |= XML_PARSE_NONET;
if ( utf8Buffer != NULL ) if ( utf8Buffer != NULL )
{
xmlChar *url = xmlFileNameToURL ( fileNameSource );
docPtr = xmlCtxtReadMemory ( ctxt, utf8Buffer, utf8BufferSize, docPtr = xmlCtxtReadMemory ( ctxt, utf8Buffer, utf8BufferSize,
CONV ( wxFileSystem::FileNameToURL ( fileNameSource ) ), ( const char * ) url, "UTF-8", flags );
"UTF-8", flags ); xmlFree ( url );
}
else else
docPtr = xmlCtxtReadFile ( ctxt, CONV ( fileNameSource ), NULL, flags ); docPtr = xmlCtxtReadFile ( ctxt, CONV ( fileNameSource ), NULL, flags );
if ( !docPtr ) if ( !docPtr )
@ -690,9 +690,69 @@ wxString WrapLibxml::catalogResolve
wxString url ( s, wxConvUTF8 ); wxString url ( s, wxConvUTF8 );
xmlFree ( s ); xmlFree ( s );
wxFileName file = wxFileSystem::URLToFileName ( url ); wxFileName file = URLToFileName ( url );
if ( file.IsFileReadable() ) if ( file.IsFileReadable() )
return file.GetFullPath(); return file.GetFullPath();
return url; return url;
} }
wxString WrapLibxml::FileNameToURL ( const wxString &fileName )
{
xmlChar *s = xmlFileNameToURL ( fileName );
if ( !s )
return wxEmptyString;
wxString url = wxString::FromUTF8 ( ( char * ) s );
xmlFree ( s );
return url;
}
xmlChar *WrapLibxml::xmlFileNameToURL ( const wxString &fileName )
{
if ( fileName.empty() )
return NULL;
wxFileName fn ( fileName );
fn.Normalize(wxPATH_NORM_DOTS | wxPATH_NORM_TILDE | wxPATH_NORM_ABSOLUTE);
wxString url = fn.GetFullPath(wxPATH_NATIVE);
return xmlPathToURI ( ( xmlChar * ) ( const char * ) url.utf8_str() );
}
wxFileName WrapLibxml::URLToFileName ( const wxString &url )
{
#if wxCHECK_VERSION(2,9,0)
return wxFileSystem::URLToFileName ( url );
#else
xmlURIPtr uri = xmlParseURI ( url.utf8_str() );
if ( !uri )
return wxFileName ( url );
do {
if ( uri->scheme == NULL || strcmp (uri->scheme, "file" ) )
break;
if ( uri->server && stricmp ( uri->server, "localhost") )
break;
if ( uri->path == NULL || !*uri->path )
break;
char *path = uri->path;
// Does it begin with "/C:" ?
if ( *path == '/' && wxIsalpha ( path[1] ) && path[2] == ':')
path++;
wxFileName file ( wxString ( path, wxConvUTF8 ) );
xmlFreeURI ( uri );
return file;
} while ( false );
xmlFreeURI ( uri );
return wxFileName();
#endif // wxCHECK_VERSION(2,9,0)
}

View File

@ -30,11 +30,13 @@
#include <libxml/catalog.h> #include <libxml/catalog.h>
#include <libxml/xmlreader.h> #include <libxml/xmlreader.h>
#include <libxml/xmlmemory.h> #include <libxml/xmlmemory.h>
#include <libxml/uri.h>
#include <libxslt/xslt.h> #include <libxslt/xslt.h>
#include <libxslt/xsltInternals.h> #include <libxslt/xsltInternals.h>
#include <libxslt/transform.h> #include <libxslt/transform.h>
#include <libxslt/xsltutils.h> #include <libxslt/xsltutils.h>
#include <wx/wx.h> #include <wx/wx.h>
#include <wx/filename.h>
class WrapLibxml class WrapLibxml
{ {
@ -109,6 +111,10 @@ class WrapLibxml
wxString catalogResolve ( wxString catalogResolve (
const wxString &publicId, const wxString &publicId,
const wxString &systemId ); const wxString &systemId );
static wxString FileNameToURL ( const wxString &fileName );
static xmlChar *xmlFileNameToURL ( const wxString &fileName );
static wxFileName URLToFileName ( const wxString &url );
private: private:
bool netAccess; bool netAccess;
std::string output; std::string output;

View File

@ -18,6 +18,7 @@
*/ */
#include "wrapxerces.h" #include "wrapxerces.h"
#include "pathresolver.h"
#include <xercesc/parsers/XercesDOMParser.hpp> #include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/sax2/XMLReaderFactory.hpp> #include <xercesc/sax2/XMLReaderFactory.hpp>
@ -26,6 +27,7 @@
#include <xercesc/util/XMLUni.hpp> #include <xercesc/util/XMLUni.hpp>
#include <xercesc/framework/MemBufInputSource.hpp> #include <xercesc/framework/MemBufInputSource.hpp>
#include <xercesc/framework/LocalFileInputSource.hpp> #include <xercesc/framework/LocalFileInputSource.hpp>
#include <xercesc/framework/URLInputSource.hpp>
#include <sstream> #include <sstream>
#include <utility> #include <utility>
#include <stdexcept> #include <stdexcept>
@ -245,3 +247,34 @@ void MySAX2Handler::logError ( const wxString &type, wxLogLevel level,
mLevel = level; mLevel = level;
} }
} }
InputSource *WrapXerces::resolveEntity (
const wxString &publicId,
const wxString &systemId,
const wxString &fileName
)
{
XercesCatalogResolver cr;
InputSource *source = cr.resolveEntity
( ( const XMLCh * ) WrapXerces::toString ( publicId ).GetData()
, ( const XMLCh * ) WrapXerces::toString ( systemId ).GetData()
);
if ( source )
return source;
BOOST_STATIC_ASSERT ( sizeof( xmlChar ) == sizeof ( char ) );
// Xerces-C++ can't open a file URL when there are multi-byte characters.
// Let's use the file name instead.
wxString file = PathResolver::run ( systemId, fileName );
if ( wxFileExists ( file ) )
return new LocalFileInputSource (
( const XMLCh * ) WrapXerces::toString ( file ).GetData() );
wxString fileURL = WrapLibxml::FileNameToURL ( fileName );
return new URLInputSource
( ( const XMLCh * ) WrapXerces::toString ( fileURL ).GetData()
, ( const XMLCh * ) WrapXerces::toString ( systemId ).GetData()
, ( const XMLCh * ) WrapXerces::toString ( publicId ).GetData()
);
}

View File

@ -124,6 +124,10 @@ class WrapXerces : private boost::noncopyable
// Returns original value // Returns original value
static bool enableNetwork ( bool enable = true ); static bool enableNetwork ( bool enable = true );
static xercesc::InputSource *resolveEntity (
const wxString &publicId,
const wxString &systemId,
const wxString &fileName );
private: private:
static const wxMBConv &getMBConv(); static const wxMBConv &getMBConv();
static XMLNetAccessor *mOriginalNetAccessor; static XMLNetAccessor *mOriginalNetAccessor;

View File

@ -13,73 +13,21 @@ InputSource *XercesCatalogResolver::resolveEntity (
const XMLCh* const publicId, const XMLCh* const publicId,
const XMLCh* const systemId ) const XMLCh* const systemId )
{ {
/* the following _should_ work but doesn't always, so drop it for now
#ifndef __WXMSW__
resolved = lookupPublicId ( narrowPublicId );
#else
return NULL;
std::string stdPublicId = narrowPublicId;
// on Windows, call libxml's own xmlcatalog binary
// because calling libxml from xerces causes a protection fault
std::string narrowCommand = "";
narrowCommand += myCatalogUtilityPath;
narrowCommand += " \"";
narrowCommand += myCatalogPath;
narrowCommand += "\" \"";
narrowCommand += narrowPublicId;
narrowCommand += "\"";
wxString wideCommand = wxString (
narrowCommand.c_str(),
wxConvUTF8,
narrowCommand.size() );
wxArrayString stringArray;
long ret = wxExecute (
wideCommand,
stringArray,
wxEXEC_SYNC | wxEXEC_NODISABLE );
if ( ret == -1 || stringArray.empty() )
{
return NULL;
}
wxString returnValue = stringArray[0];
std::string narrowReturnValue = (const char *)returnValue.mb_str ( wxConvLocal );
Replace::run ( narrowReturnValue, "%20", " ", false );
char *s, *it;
s = (char *) narrowReturnValue.c_str();
for (char *scan = s; *scan; scan++ )
if (*scan == '/')
*scan = '\\';
if ( strstr ( s, "No entry" ) )
{
return NULL;
}
it = strstr ( s, "\\\\\\" );
if ( it )
resolved = it + 3;
else
resolved = (const char *)s;//narrowReturnValue;
#endif
*/
wxString pubId, sysId, resolved; wxString pubId, sysId, resolved;
pubId = WrapXerces::toString ( publicId ); pubId = WrapXerces::toString ( publicId );
sysId = WrapXerces::toString ( systemId ); sysId = WrapXerces::toString ( systemId );
resolved = catalogResolve ( pubId, sysId ); resolved = catalogResolve ( pubId, sysId );
if ( resolved.empty() ) if ( !resolved.empty() )
return NULL; return new LocalFileInputSource (
InputSource *source = new LocalFileInputSource (
( const XMLCh * ) WrapXerces::toString ( resolved ).GetData() ); ( const XMLCh * ) WrapXerces::toString ( resolved ).GetData() );
return source; // Xerces-C++ can't open a file URL when there are multi-byte characters.
// Parse the file URL here instead.
wxFileName file = WrapLibxml::URLToFileName ( systemId );
if ( file.IsFileReadable() )
return new LocalFileInputSource (
( const XMLCh * ) WrapXerces::toString ( file.GetFullPath() ).GetData() );
return NULL;
} }

View File

@ -18,8 +18,6 @@
*/ */
#include <wx/wx.h> #include <wx/wx.h>
#include <wx/filename.h>
#include <wx/filesys.h>
#include <stdexcept> #include <stdexcept>
#include "xmlpromptgenerator.h" #include "xmlpromptgenerator.h"
#include "xmlencodinghandler.h" #include "xmlencodinghandler.h"
@ -42,7 +40,6 @@
#include <xercesc/validators/schema/SchemaValidator.hpp> #include <xercesc/validators/schema/SchemaValidator.hpp>
#include <xercesc/validators/common/ContentSpecNode.hpp> #include <xercesc/validators/common/ContentSpecNode.hpp>
#include <xercesc/validators/schema/SchemaSymbols.hpp> #include <xercesc/validators/schema/SchemaSymbols.hpp>
#include <xercesc/framework/URLInputSource.hpp>
#include <xercesc/validators/DTD/DTDGrammar.hpp> #include <xercesc/validators/DTD/DTDGrammar.hpp>
using namespace xercesc; using namespace xercesc;
@ -312,27 +309,23 @@ int XMLCALL XmlPromptGenerator::externalentityrefhandler (
parser.setErrorHandler ( &handler ); parser.setErrorHandler ( &handler );
parser.setEntityResolver ( &catalogResolver ); parser.setEntityResolver ( &catalogResolver );
wxString wideSystemId ( systemId, wxConvUTF8 ); // TODO: Apply encoding
wxString widePublicId ( publicId, wxConvUTF8 );
std::auto_ptr<InputSource> source ( catalogResolver.resolveEntity
( ( const XMLCh * ) WrapXerces::toString ( widePublicId ).GetData()
, ( const XMLCh * ) WrapXerces::toString ( wideSystemId ).GetData()
) );
if ( !source.get() )
{
wxString fileURL = wxFileSystem::FileNameToURL ( d->basePath );
source.reset ( new URLInputSource
( ( const XMLCh * ) WrapXerces::toString ( fileURL ).GetData()
, ( const XMLCh * ) WrapXerces::toString ( wideSystemId ).GetData()
, ( const XMLCh * ) WrapXerces::toString ( widePublicId ).GetData()
) );
}
if ( pThis->TestDestroy() )
return XML_STATUS_ERROR;
Grammar *rootGrammar; Grammar *rootGrammar;
try { try {
wxString wideSystemId = wxString::FromUTF8 ( systemId );
std::auto_ptr<InputSource> source ( WrapXerces::resolveEntity
( wxString::FromUTF8 ( publicId )
, wideSystemId
, d->basePath
) );
if ( !source.get() )
{
wxLogError ( _T("Cann't open '%s'"), wideSystemId.c_str() );
return XML_STATUS_ERROR;
}
if ( pThis->TestDestroy() )
return XML_STATUS_ERROR;
rootGrammar = parser.loadGrammar ( *source, Grammar::DTDGrammarType ); rootGrammar = parser.loadGrammar ( *source, Grammar::DTDGrammarType );
if ( !rootGrammar ) if ( !rootGrammar )
return XML_STATUS_ERROR; return XML_STATUS_ERROR;