From 53106034677d463e2c772c89699405db0462537c Mon Sep 17 00:00:00 2001 From: "Zane U. Ji" Date: Fri, 25 Oct 2013 19:23:34 +0800 Subject: [PATCH] Fixed Unicode problems --- src/wrapxerces.cpp | 23 +++++++- src/wrapxerces.h | 13 +++- src/xmlcopyeditor.cpp | 118 +++++++++++++++++++------------------ src/xmlpromptgenerator.cpp | 2 +- src/xmlschemagenerator.cpp | 4 +- 5 files changed, 95 insertions(+), 65 deletions(-) diff --git a/src/wrapxerces.cpp b/src/wrapxerces.cpp index ae43294..f938f3c 100644 --- a/src/wrapxerces.cpp +++ b/src/wrapxerces.cpp @@ -60,7 +60,7 @@ WrapXerces::~WrapXerces() delete catalogResolver; } -bool WrapXerces::validate ( const std::string& fileName ) +bool WrapXerces::validate ( const wxString& fileName ) { SAX2XMLReader *parser = XMLReaderFactory::createXMLReader(); @@ -84,7 +84,7 @@ bool WrapXerces::validate ( const std::string& fileName ) try { - parser->parse ( fileName.c_str() ); + parser->parse ( (const XMLCh *) toString ( fileName ).GetData() ); } catch ( XMLException& e ) { @@ -221,3 +221,22 @@ wxString WrapXerces::toString ( const XMLCh *str ) { return wxString ( ( const char * ) str, getMBConv() ); } + +wxMemoryBuffer WrapXerces::toString ( const wxString &str ) +{ + const static XMLCh chNull = '\0'; // Xerces-C crashes when the file name is NULL. We'd better return something other than NULL. + wxMemoryBuffer buffer ( 0 ); + const size_t lenWC = str.length() + 1; // Plus '\0'. This is important. Otherwise we can call wxString::mb_str(getMBConv()). + size_t lenMB = getMBConv().FromWChar ( NULL, 0, str.c_str(), lenWC ); + if ( lenMB == wxCONV_FAILED ) + { + buffer.AppendData ( &chNull, sizeof chNull ); + return buffer; + } + + buffer.SetBufSize ( lenMB ); + lenMB = getMBConv().FromWChar ( ( char * ) buffer.GetData(), lenMB, str.c_str(), lenWC ); + buffer.SetDataLen ( lenMB ); + + return buffer; +} diff --git a/src/wrapxerces.h b/src/wrapxerces.h index 52e5f43..aa0752a 100644 --- a/src/wrapxerces.h +++ b/src/wrapxerces.h @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -41,14 +42,22 @@ class WrapXerces static void Init() throw (); WrapXerces(); virtual ~WrapXerces(); - bool validate ( const std::string& fileName ); + bool validate ( const wxString &fileName ); bool validateMemory ( const char *buffer, size_t len, const wxString &system, wxThread *thread = NULL ); const wxString &getLastError(); std::pair getErrorPosition(); - static const wxMBConv &getMBConv(); static wxString toString ( const XMLCh *str ); + // Convert Unicode string to const XMLCh * +//#if wxCHECK_VERSION(2,9,0) +// static wxCharTypeBuffer toString ( const wxString &str ); +//#else + static wxMemoryBuffer toString ( const wxString &str ); +//#endif + private: + static const wxMBConv &getMBConv(); + XercesCatalogResolver *catalogResolver; wxString lastError; std::pair errorPosition; diff --git a/src/xmlcopyeditor.cpp b/src/xmlcopyeditor.cpp index 7f07e23..4ed28df 100644 --- a/src/xmlcopyeditor.cpp +++ b/src/xmlcopyeditor.cpp @@ -3123,76 +3123,75 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile ) char *finalBuffer; size_t finalBufferLen; - // adjust for UTF-8 BOM - if ( docBuffer && - ( unsigned char ) docBuffer[0] == 0xEF && - ( unsigned char ) docBuffer[1] == 0xBB && - ( unsigned char ) docBuffer[2] == 0xBF ) + std::string encoding; + if ( docBufferLen >= 4 && // UTF-32 BE + ( unsigned char ) docBuffer[0] == 0x00 && + ( unsigned char ) docBuffer[1] == 0x00 && + ( unsigned char ) docBuffer[2] == 0xFE && + ( unsigned char ) docBuffer[3] == 0xFF ) + { + docBuffer += 4; + docBufferLen -= 4; + encoding = "UTF-32BE"; + } + else if ( docBufferLen >= 4 && // UTF-32 LE + ( unsigned char ) docBuffer[0] == 0xFF && + ( unsigned char ) docBuffer[1] == 0xFE && + ( unsigned char ) docBuffer[2] == 0x00 && + ( unsigned char ) docBuffer[3] == 0x00 ) + { + docBuffer += 4; + docBufferLen -= 4; + encoding = "UTF-32LE"; + } + else if ( docBufferLen >= 2 && //UTF-16 BE + ( unsigned char ) docBuffer[0] == 0xFE && + ( unsigned char ) docBuffer[1] == 0xFF ) + { + docBuffer += 2; + docBufferLen -= 2; + encoding = "UTF-16BE"; + } + else if ( docBufferLen >= 2 && //UTF-16 LE + ( unsigned char ) docBuffer[0] == 0xFF && + ( unsigned char ) docBuffer[1] == 0xFE ) + { + docBuffer += 2; + docBufferLen -= 2; + encoding = "UTF-16LE"; + } + else if ( docBufferLen >= 3 && //UTF-8 + ( unsigned char ) docBuffer[0] == 0xEF && + ( unsigned char ) docBuffer[1] == 0xBB && + ( unsigned char ) docBuffer[2] == 0xBF ) { docBuffer += 3; docBufferLen -= 3; - isUtf8 = true; + encoding = "UTF-8"; } - // no UTF-8 BOM found - std::string encoding; - if ( !isUtf8 || !binaryfile->getDataLen() ) + if ( encoding.empty() ) { XmlEncodingSpy es; es.parse ( docBuffer, docBufferLen ); encoding = es.getEncoding(); - if ( encoding == "UTF-8" || - encoding == "utf-8" || - encoding == "US-ASCII" || - encoding == "us-ascii" ) // US-ASCII is a subset of UTF-8 - isUtf8 = true; + if ( encoding.empty() ) // Expat couldn't parse file (e.g. UTF-32) + encoding = getApproximateEncoding ( docBuffer, docBufferLen ); } // convert buffer if not UTF-8 - int nBOM = 0; - if ( isUtf8 ) + if ( encoding == "UTF-8" || + encoding == "utf-8" || + encoding == "US-ASCII" || + encoding == "us-ascii" || // US-ASCII is a subset of UTF-8 + docBufferLen == 0 ) { finalBuffer = docBuffer; finalBufferLen = docBufferLen; + isUtf8 = true; } else { - // clear any other BOMs - - if ( docBuffer && // UTF-32 BE - ( unsigned char ) docBuffer[0] == 0x00 && - ( unsigned char ) docBuffer[1] == 0x00 && - ( unsigned char ) docBuffer[2] == 0xFE && - ( unsigned char ) docBuffer[3] == 0xFF ) - { - nBOM = 4; - } - else if ( docBuffer && // UTF-32 LE - ( unsigned char ) docBuffer[0] == 0xFF && - ( unsigned char ) docBuffer[1] == 0xFE && - ( unsigned char ) docBuffer[2] == 0x00 && - ( unsigned char ) docBuffer[3] == 0x00 ) - { - nBOM = 4; - } - else if ( docBuffer && //UTF-16 BE - ( unsigned char ) docBuffer[0] == 0xFE && - ( unsigned char ) docBuffer[1] == 0xFF ) - { - nBOM = 2; - } - else if ( docBuffer && //UTF-16 LE - ( unsigned char ) docBuffer[0] == 0xFF && - ( unsigned char ) docBuffer[1] == 0xFE ) - { - nBOM = 2; - } - - if ( !encoding.size() ) // Expat couldn't parse file (e.g. UTF-32) - { - encoding = getApproximateEncoding ( docBuffer + nBOM, docBufferLen - nBOM ); - } - wxString wideEncoding = wxString ( encoding.c_str(), wxConvLocal, @@ -3227,7 +3226,7 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile ) size_t nconv; char *buffer; size_t iconvBufferLeft, docBufferLeft; - iconvBufferLen = iconvBufferLeft = (docBufferLen - nBOM) * iconvLenMultiplier + 1; + iconvBufferLen = iconvBufferLeft = docBufferLen * iconvLenMultiplier + 1; docBufferLeft = docBufferLen; iconvBuffer.extend ( iconvBufferLen ); finalBuffer = buffer = iconvBuffer.data(); // buffer will be incremented by iconv @@ -3302,10 +3301,8 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile ) statusProgress ( _T ( "Parsing document..." ) ); std::auto_ptr we ( new WrapExpat() ); - bool optimisedParseSuccess = false; - // omit XML declaration - if ( !isUtf8 && finalBufferLen && + if ( !isUtf8 && finalBufferLen > 5 && finalBuffer[0] == '<' && finalBuffer[1] == '?' && finalBuffer[2] == 'x' && @@ -3323,6 +3320,7 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile ) } } + bool optimisedParseSuccess = false; if ( finalBuffer ) { optimisedParseSuccess = we->parse ( finalBuffer, finalBufferLen ); @@ -3998,6 +3996,11 @@ void MyFrame::OnValidateSchema ( wxCommandEvent& event ) { std::string rawBuffer, schemaLocation; getRawText ( doc, rawBuffer ); + if ( !XmlEncodingHandler::setUtf8 ( rawBuffer ) ) + { + encodingMessage(); + return; + } auto_ptr xsl ( new XmlSchemaLocator() ); xsl->parse ( rawBuffer.c_str() ); if ( ( xsl->getSchemaLocation() ) . empty() ) @@ -4034,8 +4037,7 @@ void MyFrame::OnValidateSchema ( wxCommandEvent& event ) doc->clearErrorIndicators(); std::auto_ptr validator ( new WrapXerces() ); - std::string fileNameLocal = ( const char * ) fileName.mb_str ( wxConvLocal ); - if ( !validator->validate ( fileNameLocal ) ) + if ( !validator->validate ( fileName ) ) { statusProgress ( wxEmptyString ); messagePane ( validator->getLastError(), CONST_WARNING ); diff --git a/src/xmlpromptgenerator.cpp b/src/xmlpromptgenerator.cpp index 27678fb..3fe8981 100644 --- a/src/xmlpromptgenerator.cpp +++ b/src/xmlpromptgenerator.cpp @@ -432,7 +432,7 @@ void XmlPromptGenerator::handleSchema ( parser->setValidationSchemaFullChecking ( true ); Grammar *rootGrammar = parser->loadGrammar - ( ( const XMLCh * ) ( const char * ) schemaPath.mb_str ( WrapXerces::getMBConv() ) + ( ( const XMLCh * ) WrapXerces::toString ( schemaPath ).GetData() , Grammar::SchemaGrammarType ); if ( !rootGrammar ) diff --git a/src/xmlschemagenerator.cpp b/src/xmlschemagenerator.cpp index 74d9022..80f3bb4 100644 --- a/src/xmlschemagenerator.cpp +++ b/src/xmlschemagenerator.cpp @@ -203,8 +203,8 @@ void XmlSchemaGenerator::generateData ( const wxString &elementName, continue; for ( attrItr = attrMap.begin(); attrItr != attrMap.end(); attrItr++ ) { - if ( attrs->getNamedItem ( ( const XMLCh * ) ( const char * ) - attrItr->first.mb_str ( WrapXerces::getMBConv() ) ) == NULL ) + if ( attrs->getNamedItem ( ( const XMLCh * ) + WrapXerces::toString ( attrItr->first ).GetData() ) == NULL ) { optAttrs.insert ( attrItr->first ); }