Fixed Unicode problems
This commit is contained in:
parent
a9916c901e
commit
5310603467
|
@ -60,7 +60,7 @@ WrapXerces::~WrapXerces()
|
||||||
delete catalogResolver;
|
delete catalogResolver;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool WrapXerces::validate ( const std::string& fileName )
|
bool WrapXerces::validate ( const wxString& fileName )
|
||||||
{
|
{
|
||||||
SAX2XMLReader *parser = XMLReaderFactory::createXMLReader();
|
SAX2XMLReader *parser = XMLReaderFactory::createXMLReader();
|
||||||
|
|
||||||
|
@ -84,7 +84,7 @@ bool WrapXerces::validate ( const std::string& fileName )
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
parser->parse ( fileName.c_str() );
|
parser->parse ( (const XMLCh *) toString ( fileName ).GetData() );
|
||||||
}
|
}
|
||||||
catch ( XMLException& e )
|
catch ( XMLException& e )
|
||||||
{
|
{
|
||||||
|
@ -221,3 +221,22 @@ wxString WrapXerces::toString ( const XMLCh *str )
|
||||||
{
|
{
|
||||||
return wxString ( ( const char * ) str, getMBConv() );
|
return wxString ( ( const char * ) str, getMBConv() );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
wxMemoryBuffer WrapXerces::toString ( const wxString &str )
|
||||||
|
{
|
||||||
|
const static XMLCh chNull = '\0'; // Xerces-C crashes when the file name is NULL. We'd better return something other than NULL.
|
||||||
|
wxMemoryBuffer buffer ( 0 );
|
||||||
|
const size_t lenWC = str.length() + 1; // Plus '\0'. This is important. Otherwise we can call wxString::mb_str(getMBConv()).
|
||||||
|
size_t lenMB = getMBConv().FromWChar ( NULL, 0, str.c_str(), lenWC );
|
||||||
|
if ( lenMB == wxCONV_FAILED )
|
||||||
|
{
|
||||||
|
buffer.AppendData ( &chNull, sizeof chNull );
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
buffer.SetBufSize ( lenMB );
|
||||||
|
lenMB = getMBConv().FromWChar ( ( char * ) buffer.GetData(), lenMB, str.c_str(), lenWC );
|
||||||
|
buffer.SetDataLen ( lenMB );
|
||||||
|
|
||||||
|
return buffer;
|
||||||
|
}
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
|
|
||||||
#include <wx/wx.h>
|
#include <wx/wx.h>
|
||||||
#include <wx/strconv.h>
|
#include <wx/strconv.h>
|
||||||
|
#include <wx/buffer.h>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
@ -41,14 +42,22 @@ class WrapXerces
|
||||||
static void Init() throw ();
|
static void Init() throw ();
|
||||||
WrapXerces();
|
WrapXerces();
|
||||||
virtual ~WrapXerces();
|
virtual ~WrapXerces();
|
||||||
bool validate ( const std::string& fileName );
|
bool validate ( const wxString &fileName );
|
||||||
bool validateMemory ( const char *buffer, size_t len,
|
bool validateMemory ( const char *buffer, size_t len,
|
||||||
const wxString &system, wxThread *thread = NULL );
|
const wxString &system, wxThread *thread = NULL );
|
||||||
const wxString &getLastError();
|
const wxString &getLastError();
|
||||||
std::pair<int, int> getErrorPosition();
|
std::pair<int, int> getErrorPosition();
|
||||||
static const wxMBConv &getMBConv();
|
|
||||||
static wxString toString ( const XMLCh *str );
|
static wxString toString ( const XMLCh *str );
|
||||||
|
// Convert Unicode string to const XMLCh *
|
||||||
|
//#if wxCHECK_VERSION(2,9,0)
|
||||||
|
// static wxCharTypeBuffer<XMLCh> toString ( const wxString &str );
|
||||||
|
//#else
|
||||||
|
static wxMemoryBuffer toString ( const wxString &str );
|
||||||
|
//#endif
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
static const wxMBConv &getMBConv();
|
||||||
|
|
||||||
XercesCatalogResolver *catalogResolver;
|
XercesCatalogResolver *catalogResolver;
|
||||||
wxString lastError;
|
wxString lastError;
|
||||||
std::pair<int, int> errorPosition;
|
std::pair<int, int> errorPosition;
|
||||||
|
|
|
@ -3123,76 +3123,75 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile )
|
||||||
char *finalBuffer;
|
char *finalBuffer;
|
||||||
size_t finalBufferLen;
|
size_t finalBufferLen;
|
||||||
|
|
||||||
// adjust for UTF-8 BOM
|
std::string encoding;
|
||||||
if ( docBuffer &&
|
if ( docBufferLen >= 4 && // UTF-32 BE
|
||||||
( unsigned char ) docBuffer[0] == 0xEF &&
|
( unsigned char ) docBuffer[0] == 0x00 &&
|
||||||
( unsigned char ) docBuffer[1] == 0xBB &&
|
( unsigned char ) docBuffer[1] == 0x00 &&
|
||||||
( unsigned char ) docBuffer[2] == 0xBF )
|
( unsigned char ) docBuffer[2] == 0xFE &&
|
||||||
|
( unsigned char ) docBuffer[3] == 0xFF )
|
||||||
|
{
|
||||||
|
docBuffer += 4;
|
||||||
|
docBufferLen -= 4;
|
||||||
|
encoding = "UTF-32BE";
|
||||||
|
}
|
||||||
|
else if ( docBufferLen >= 4 && // UTF-32 LE
|
||||||
|
( unsigned char ) docBuffer[0] == 0xFF &&
|
||||||
|
( unsigned char ) docBuffer[1] == 0xFE &&
|
||||||
|
( unsigned char ) docBuffer[2] == 0x00 &&
|
||||||
|
( unsigned char ) docBuffer[3] == 0x00 )
|
||||||
|
{
|
||||||
|
docBuffer += 4;
|
||||||
|
docBufferLen -= 4;
|
||||||
|
encoding = "UTF-32LE";
|
||||||
|
}
|
||||||
|
else if ( docBufferLen >= 2 && //UTF-16 BE
|
||||||
|
( unsigned char ) docBuffer[0] == 0xFE &&
|
||||||
|
( unsigned char ) docBuffer[1] == 0xFF )
|
||||||
|
{
|
||||||
|
docBuffer += 2;
|
||||||
|
docBufferLen -= 2;
|
||||||
|
encoding = "UTF-16BE";
|
||||||
|
}
|
||||||
|
else if ( docBufferLen >= 2 && //UTF-16 LE
|
||||||
|
( unsigned char ) docBuffer[0] == 0xFF &&
|
||||||
|
( unsigned char ) docBuffer[1] == 0xFE )
|
||||||
|
{
|
||||||
|
docBuffer += 2;
|
||||||
|
docBufferLen -= 2;
|
||||||
|
encoding = "UTF-16LE";
|
||||||
|
}
|
||||||
|
else if ( docBufferLen >= 3 && //UTF-8
|
||||||
|
( unsigned char ) docBuffer[0] == 0xEF &&
|
||||||
|
( unsigned char ) docBuffer[1] == 0xBB &&
|
||||||
|
( unsigned char ) docBuffer[2] == 0xBF )
|
||||||
{
|
{
|
||||||
docBuffer += 3;
|
docBuffer += 3;
|
||||||
docBufferLen -= 3;
|
docBufferLen -= 3;
|
||||||
isUtf8 = true;
|
encoding = "UTF-8";
|
||||||
}
|
}
|
||||||
|
|
||||||
// no UTF-8 BOM found
|
if ( encoding.empty() )
|
||||||
std::string encoding;
|
|
||||||
if ( !isUtf8 || !binaryfile->getDataLen() )
|
|
||||||
{
|
{
|
||||||
XmlEncodingSpy es;
|
XmlEncodingSpy es;
|
||||||
es.parse ( docBuffer, docBufferLen );
|
es.parse ( docBuffer, docBufferLen );
|
||||||
encoding = es.getEncoding();
|
encoding = es.getEncoding();
|
||||||
if ( encoding == "UTF-8" ||
|
if ( encoding.empty() ) // Expat couldn't parse file (e.g. UTF-32)
|
||||||
encoding == "utf-8" ||
|
encoding = getApproximateEncoding ( docBuffer, docBufferLen );
|
||||||
encoding == "US-ASCII" ||
|
|
||||||
encoding == "us-ascii" ) // US-ASCII is a subset of UTF-8
|
|
||||||
isUtf8 = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// convert buffer if not UTF-8
|
// convert buffer if not UTF-8
|
||||||
int nBOM = 0;
|
if ( encoding == "UTF-8" ||
|
||||||
if ( isUtf8 )
|
encoding == "utf-8" ||
|
||||||
|
encoding == "US-ASCII" ||
|
||||||
|
encoding == "us-ascii" || // US-ASCII is a subset of UTF-8
|
||||||
|
docBufferLen == 0 )
|
||||||
{
|
{
|
||||||
finalBuffer = docBuffer;
|
finalBuffer = docBuffer;
|
||||||
finalBufferLen = docBufferLen;
|
finalBufferLen = docBufferLen;
|
||||||
|
isUtf8 = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// clear any other BOMs
|
|
||||||
|
|
||||||
if ( docBuffer && // UTF-32 BE
|
|
||||||
( unsigned char ) docBuffer[0] == 0x00 &&
|
|
||||||
( unsigned char ) docBuffer[1] == 0x00 &&
|
|
||||||
( unsigned char ) docBuffer[2] == 0xFE &&
|
|
||||||
( unsigned char ) docBuffer[3] == 0xFF )
|
|
||||||
{
|
|
||||||
nBOM = 4;
|
|
||||||
}
|
|
||||||
else if ( docBuffer && // UTF-32 LE
|
|
||||||
( unsigned char ) docBuffer[0] == 0xFF &&
|
|
||||||
( unsigned char ) docBuffer[1] == 0xFE &&
|
|
||||||
( unsigned char ) docBuffer[2] == 0x00 &&
|
|
||||||
( unsigned char ) docBuffer[3] == 0x00 )
|
|
||||||
{
|
|
||||||
nBOM = 4;
|
|
||||||
}
|
|
||||||
else if ( docBuffer && //UTF-16 BE
|
|
||||||
( unsigned char ) docBuffer[0] == 0xFE &&
|
|
||||||
( unsigned char ) docBuffer[1] == 0xFF )
|
|
||||||
{
|
|
||||||
nBOM = 2;
|
|
||||||
}
|
|
||||||
else if ( docBuffer && //UTF-16 LE
|
|
||||||
( unsigned char ) docBuffer[0] == 0xFF &&
|
|
||||||
( unsigned char ) docBuffer[1] == 0xFE )
|
|
||||||
{
|
|
||||||
nBOM = 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( !encoding.size() ) // Expat couldn't parse file (e.g. UTF-32)
|
|
||||||
{
|
|
||||||
encoding = getApproximateEncoding ( docBuffer + nBOM, docBufferLen - nBOM );
|
|
||||||
}
|
|
||||||
|
|
||||||
wxString wideEncoding = wxString (
|
wxString wideEncoding = wxString (
|
||||||
encoding.c_str(),
|
encoding.c_str(),
|
||||||
wxConvLocal,
|
wxConvLocal,
|
||||||
|
@ -3227,7 +3226,7 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile )
|
||||||
size_t nconv;
|
size_t nconv;
|
||||||
char *buffer;
|
char *buffer;
|
||||||
size_t iconvBufferLeft, docBufferLeft;
|
size_t iconvBufferLeft, docBufferLeft;
|
||||||
iconvBufferLen = iconvBufferLeft = (docBufferLen - nBOM) * iconvLenMultiplier + 1;
|
iconvBufferLen = iconvBufferLeft = docBufferLen * iconvLenMultiplier + 1;
|
||||||
docBufferLeft = docBufferLen;
|
docBufferLeft = docBufferLen;
|
||||||
iconvBuffer.extend ( iconvBufferLen );
|
iconvBuffer.extend ( iconvBufferLen );
|
||||||
finalBuffer = buffer = iconvBuffer.data(); // buffer will be incremented by iconv
|
finalBuffer = buffer = iconvBuffer.data(); // buffer will be incremented by iconv
|
||||||
|
@ -3302,10 +3301,8 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile )
|
||||||
statusProgress ( _T ( "Parsing document..." ) );
|
statusProgress ( _T ( "Parsing document..." ) );
|
||||||
std::auto_ptr<WrapExpat> we ( new WrapExpat() );
|
std::auto_ptr<WrapExpat> we ( new WrapExpat() );
|
||||||
|
|
||||||
bool optimisedParseSuccess = false;
|
|
||||||
|
|
||||||
// omit XML declaration
|
// omit XML declaration
|
||||||
if ( !isUtf8 && finalBufferLen &&
|
if ( !isUtf8 && finalBufferLen > 5 &&
|
||||||
finalBuffer[0] == '<' &&
|
finalBuffer[0] == '<' &&
|
||||||
finalBuffer[1] == '?' &&
|
finalBuffer[1] == '?' &&
|
||||||
finalBuffer[2] == 'x' &&
|
finalBuffer[2] == 'x' &&
|
||||||
|
@ -3323,6 +3320,7 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile )
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool optimisedParseSuccess = false;
|
||||||
if ( finalBuffer )
|
if ( finalBuffer )
|
||||||
{
|
{
|
||||||
optimisedParseSuccess = we->parse ( finalBuffer, finalBufferLen );
|
optimisedParseSuccess = we->parse ( finalBuffer, finalBufferLen );
|
||||||
|
@ -3998,6 +3996,11 @@ void MyFrame::OnValidateSchema ( wxCommandEvent& event )
|
||||||
{
|
{
|
||||||
std::string rawBuffer, schemaLocation;
|
std::string rawBuffer, schemaLocation;
|
||||||
getRawText ( doc, rawBuffer );
|
getRawText ( doc, rawBuffer );
|
||||||
|
if ( !XmlEncodingHandler::setUtf8 ( rawBuffer ) )
|
||||||
|
{
|
||||||
|
encodingMessage();
|
||||||
|
return;
|
||||||
|
}
|
||||||
auto_ptr<XmlSchemaLocator> xsl ( new XmlSchemaLocator() );
|
auto_ptr<XmlSchemaLocator> xsl ( new XmlSchemaLocator() );
|
||||||
xsl->parse ( rawBuffer.c_str() );
|
xsl->parse ( rawBuffer.c_str() );
|
||||||
if ( ( xsl->getSchemaLocation() ) . empty() )
|
if ( ( xsl->getSchemaLocation() ) . empty() )
|
||||||
|
@ -4034,8 +4037,7 @@ void MyFrame::OnValidateSchema ( wxCommandEvent& event )
|
||||||
doc->clearErrorIndicators();
|
doc->clearErrorIndicators();
|
||||||
|
|
||||||
std::auto_ptr<WrapXerces> validator ( new WrapXerces() );
|
std::auto_ptr<WrapXerces> validator ( new WrapXerces() );
|
||||||
std::string fileNameLocal = ( const char * ) fileName.mb_str ( wxConvLocal );
|
if ( !validator->validate ( fileName ) )
|
||||||
if ( !validator->validate ( fileNameLocal ) )
|
|
||||||
{
|
{
|
||||||
statusProgress ( wxEmptyString );
|
statusProgress ( wxEmptyString );
|
||||||
messagePane ( validator->getLastError(), CONST_WARNING );
|
messagePane ( validator->getLastError(), CONST_WARNING );
|
||||||
|
|
|
@ -432,7 +432,7 @@ void XmlPromptGenerator::handleSchema (
|
||||||
parser->setValidationSchemaFullChecking ( true );
|
parser->setValidationSchemaFullChecking ( true );
|
||||||
|
|
||||||
Grammar *rootGrammar = parser->loadGrammar
|
Grammar *rootGrammar = parser->loadGrammar
|
||||||
( ( const XMLCh * ) ( const char * ) schemaPath.mb_str ( WrapXerces::getMBConv() )
|
( ( const XMLCh * ) WrapXerces::toString ( schemaPath ).GetData()
|
||||||
, Grammar::SchemaGrammarType
|
, Grammar::SchemaGrammarType
|
||||||
);
|
);
|
||||||
if ( !rootGrammar )
|
if ( !rootGrammar )
|
||||||
|
|
|
@ -203,8 +203,8 @@ void XmlSchemaGenerator::generateData ( const wxString &elementName,
|
||||||
continue;
|
continue;
|
||||||
for ( attrItr = attrMap.begin(); attrItr != attrMap.end(); attrItr++ )
|
for ( attrItr = attrMap.begin(); attrItr != attrMap.end(); attrItr++ )
|
||||||
{
|
{
|
||||||
if ( attrs->getNamedItem ( ( const XMLCh * ) ( const char * )
|
if ( attrs->getNamedItem ( ( const XMLCh * )
|
||||||
attrItr->first.mb_str ( WrapXerces::getMBConv() ) ) == NULL )
|
WrapXerces::toString ( attrItr->first ).GetData() ) == NULL )
|
||||||
{
|
{
|
||||||
optAttrs.insert ( attrItr->first );
|
optAttrs.insert ( attrItr->first );
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue