Fixed Unicode problems
This commit is contained in:
parent
a9916c901e
commit
5310603467
|
@ -60,7 +60,7 @@ WrapXerces::~WrapXerces()
|
|||
delete catalogResolver;
|
||||
}
|
||||
|
||||
bool WrapXerces::validate ( const std::string& fileName )
|
||||
bool WrapXerces::validate ( const wxString& fileName )
|
||||
{
|
||||
SAX2XMLReader *parser = XMLReaderFactory::createXMLReader();
|
||||
|
||||
|
@ -84,7 +84,7 @@ bool WrapXerces::validate ( const std::string& fileName )
|
|||
|
||||
try
|
||||
{
|
||||
parser->parse ( fileName.c_str() );
|
||||
parser->parse ( (const XMLCh *) toString ( fileName ).GetData() );
|
||||
}
|
||||
catch ( XMLException& e )
|
||||
{
|
||||
|
@ -221,3 +221,22 @@ wxString WrapXerces::toString ( const XMLCh *str )
|
|||
{
|
||||
return wxString ( ( const char * ) str, getMBConv() );
|
||||
}
|
||||
|
||||
wxMemoryBuffer WrapXerces::toString ( const wxString &str )
|
||||
{
|
||||
const static XMLCh chNull = '\0'; // Xerces-C crashes when the file name is NULL. We'd better return something other than NULL.
|
||||
wxMemoryBuffer buffer ( 0 );
|
||||
const size_t lenWC = str.length() + 1; // Plus '\0'. This is important. Otherwise we can call wxString::mb_str(getMBConv()).
|
||||
size_t lenMB = getMBConv().FromWChar ( NULL, 0, str.c_str(), lenWC );
|
||||
if ( lenMB == wxCONV_FAILED )
|
||||
{
|
||||
buffer.AppendData ( &chNull, sizeof chNull );
|
||||
return buffer;
|
||||
}
|
||||
|
||||
buffer.SetBufSize ( lenMB );
|
||||
lenMB = getMBConv().FromWChar ( ( char * ) buffer.GetData(), lenMB, str.c_str(), lenWC );
|
||||
buffer.SetDataLen ( lenMB );
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
#include <wx/wx.h>
|
||||
#include <wx/strconv.h>
|
||||
#include <wx/buffer.h>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
|
@ -41,14 +42,22 @@ class WrapXerces
|
|||
static void Init() throw ();
|
||||
WrapXerces();
|
||||
virtual ~WrapXerces();
|
||||
bool validate ( const std::string& fileName );
|
||||
bool validate ( const wxString &fileName );
|
||||
bool validateMemory ( const char *buffer, size_t len,
|
||||
const wxString &system, wxThread *thread = NULL );
|
||||
const wxString &getLastError();
|
||||
std::pair<int, int> getErrorPosition();
|
||||
static const wxMBConv &getMBConv();
|
||||
static wxString toString ( const XMLCh *str );
|
||||
// Convert Unicode string to const XMLCh *
|
||||
//#if wxCHECK_VERSION(2,9,0)
|
||||
// static wxCharTypeBuffer<XMLCh> toString ( const wxString &str );
|
||||
//#else
|
||||
static wxMemoryBuffer toString ( const wxString &str );
|
||||
//#endif
|
||||
|
||||
private:
|
||||
static const wxMBConv &getMBConv();
|
||||
|
||||
XercesCatalogResolver *catalogResolver;
|
||||
wxString lastError;
|
||||
std::pair<int, int> errorPosition;
|
||||
|
|
|
@ -3123,76 +3123,75 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile )
|
|||
char *finalBuffer;
|
||||
size_t finalBufferLen;
|
||||
|
||||
// adjust for UTF-8 BOM
|
||||
if ( docBuffer &&
|
||||
( unsigned char ) docBuffer[0] == 0xEF &&
|
||||
( unsigned char ) docBuffer[1] == 0xBB &&
|
||||
( unsigned char ) docBuffer[2] == 0xBF )
|
||||
std::string encoding;
|
||||
if ( docBufferLen >= 4 && // UTF-32 BE
|
||||
( unsigned char ) docBuffer[0] == 0x00 &&
|
||||
( unsigned char ) docBuffer[1] == 0x00 &&
|
||||
( unsigned char ) docBuffer[2] == 0xFE &&
|
||||
( unsigned char ) docBuffer[3] == 0xFF )
|
||||
{
|
||||
docBuffer += 4;
|
||||
docBufferLen -= 4;
|
||||
encoding = "UTF-32BE";
|
||||
}
|
||||
else if ( docBufferLen >= 4 && // UTF-32 LE
|
||||
( unsigned char ) docBuffer[0] == 0xFF &&
|
||||
( unsigned char ) docBuffer[1] == 0xFE &&
|
||||
( unsigned char ) docBuffer[2] == 0x00 &&
|
||||
( unsigned char ) docBuffer[3] == 0x00 )
|
||||
{
|
||||
docBuffer += 4;
|
||||
docBufferLen -= 4;
|
||||
encoding = "UTF-32LE";
|
||||
}
|
||||
else if ( docBufferLen >= 2 && //UTF-16 BE
|
||||
( unsigned char ) docBuffer[0] == 0xFE &&
|
||||
( unsigned char ) docBuffer[1] == 0xFF )
|
||||
{
|
||||
docBuffer += 2;
|
||||
docBufferLen -= 2;
|
||||
encoding = "UTF-16BE";
|
||||
}
|
||||
else if ( docBufferLen >= 2 && //UTF-16 LE
|
||||
( unsigned char ) docBuffer[0] == 0xFF &&
|
||||
( unsigned char ) docBuffer[1] == 0xFE )
|
||||
{
|
||||
docBuffer += 2;
|
||||
docBufferLen -= 2;
|
||||
encoding = "UTF-16LE";
|
||||
}
|
||||
else if ( docBufferLen >= 3 && //UTF-8
|
||||
( unsigned char ) docBuffer[0] == 0xEF &&
|
||||
( unsigned char ) docBuffer[1] == 0xBB &&
|
||||
( unsigned char ) docBuffer[2] == 0xBF )
|
||||
{
|
||||
docBuffer += 3;
|
||||
docBufferLen -= 3;
|
||||
isUtf8 = true;
|
||||
encoding = "UTF-8";
|
||||
}
|
||||
|
||||
// no UTF-8 BOM found
|
||||
std::string encoding;
|
||||
if ( !isUtf8 || !binaryfile->getDataLen() )
|
||||
if ( encoding.empty() )
|
||||
{
|
||||
XmlEncodingSpy es;
|
||||
es.parse ( docBuffer, docBufferLen );
|
||||
encoding = es.getEncoding();
|
||||
if ( encoding == "UTF-8" ||
|
||||
encoding == "utf-8" ||
|
||||
encoding == "US-ASCII" ||
|
||||
encoding == "us-ascii" ) // US-ASCII is a subset of UTF-8
|
||||
isUtf8 = true;
|
||||
if ( encoding.empty() ) // Expat couldn't parse file (e.g. UTF-32)
|
||||
encoding = getApproximateEncoding ( docBuffer, docBufferLen );
|
||||
}
|
||||
|
||||
// convert buffer if not UTF-8
|
||||
int nBOM = 0;
|
||||
if ( isUtf8 )
|
||||
if ( encoding == "UTF-8" ||
|
||||
encoding == "utf-8" ||
|
||||
encoding == "US-ASCII" ||
|
||||
encoding == "us-ascii" || // US-ASCII is a subset of UTF-8
|
||||
docBufferLen == 0 )
|
||||
{
|
||||
finalBuffer = docBuffer;
|
||||
finalBufferLen = docBufferLen;
|
||||
isUtf8 = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
// clear any other BOMs
|
||||
|
||||
if ( docBuffer && // UTF-32 BE
|
||||
( unsigned char ) docBuffer[0] == 0x00 &&
|
||||
( unsigned char ) docBuffer[1] == 0x00 &&
|
||||
( unsigned char ) docBuffer[2] == 0xFE &&
|
||||
( unsigned char ) docBuffer[3] == 0xFF )
|
||||
{
|
||||
nBOM = 4;
|
||||
}
|
||||
else if ( docBuffer && // UTF-32 LE
|
||||
( unsigned char ) docBuffer[0] == 0xFF &&
|
||||
( unsigned char ) docBuffer[1] == 0xFE &&
|
||||
( unsigned char ) docBuffer[2] == 0x00 &&
|
||||
( unsigned char ) docBuffer[3] == 0x00 )
|
||||
{
|
||||
nBOM = 4;
|
||||
}
|
||||
else if ( docBuffer && //UTF-16 BE
|
||||
( unsigned char ) docBuffer[0] == 0xFE &&
|
||||
( unsigned char ) docBuffer[1] == 0xFF )
|
||||
{
|
||||
nBOM = 2;
|
||||
}
|
||||
else if ( docBuffer && //UTF-16 LE
|
||||
( unsigned char ) docBuffer[0] == 0xFF &&
|
||||
( unsigned char ) docBuffer[1] == 0xFE )
|
||||
{
|
||||
nBOM = 2;
|
||||
}
|
||||
|
||||
if ( !encoding.size() ) // Expat couldn't parse file (e.g. UTF-32)
|
||||
{
|
||||
encoding = getApproximateEncoding ( docBuffer + nBOM, docBufferLen - nBOM );
|
||||
}
|
||||
|
||||
wxString wideEncoding = wxString (
|
||||
encoding.c_str(),
|
||||
wxConvLocal,
|
||||
|
@ -3227,7 +3226,7 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile )
|
|||
size_t nconv;
|
||||
char *buffer;
|
||||
size_t iconvBufferLeft, docBufferLeft;
|
||||
iconvBufferLen = iconvBufferLeft = (docBufferLen - nBOM) * iconvLenMultiplier + 1;
|
||||
iconvBufferLen = iconvBufferLeft = docBufferLen * iconvLenMultiplier + 1;
|
||||
docBufferLeft = docBufferLen;
|
||||
iconvBuffer.extend ( iconvBufferLen );
|
||||
finalBuffer = buffer = iconvBuffer.data(); // buffer will be incremented by iconv
|
||||
|
@ -3302,10 +3301,8 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile )
|
|||
statusProgress ( _T ( "Parsing document..." ) );
|
||||
std::auto_ptr<WrapExpat> we ( new WrapExpat() );
|
||||
|
||||
bool optimisedParseSuccess = false;
|
||||
|
||||
// omit XML declaration
|
||||
if ( !isUtf8 && finalBufferLen &&
|
||||
if ( !isUtf8 && finalBufferLen > 5 &&
|
||||
finalBuffer[0] == '<' &&
|
||||
finalBuffer[1] == '?' &&
|
||||
finalBuffer[2] == 'x' &&
|
||||
|
@ -3323,6 +3320,7 @@ bool MyFrame::openFile ( wxString& fileName, bool largeFile )
|
|||
}
|
||||
}
|
||||
|
||||
bool optimisedParseSuccess = false;
|
||||
if ( finalBuffer )
|
||||
{
|
||||
optimisedParseSuccess = we->parse ( finalBuffer, finalBufferLen );
|
||||
|
@ -3998,6 +3996,11 @@ void MyFrame::OnValidateSchema ( wxCommandEvent& event )
|
|||
{
|
||||
std::string rawBuffer, schemaLocation;
|
||||
getRawText ( doc, rawBuffer );
|
||||
if ( !XmlEncodingHandler::setUtf8 ( rawBuffer ) )
|
||||
{
|
||||
encodingMessage();
|
||||
return;
|
||||
}
|
||||
auto_ptr<XmlSchemaLocator> xsl ( new XmlSchemaLocator() );
|
||||
xsl->parse ( rawBuffer.c_str() );
|
||||
if ( ( xsl->getSchemaLocation() ) . empty() )
|
||||
|
@ -4034,8 +4037,7 @@ void MyFrame::OnValidateSchema ( wxCommandEvent& event )
|
|||
doc->clearErrorIndicators();
|
||||
|
||||
std::auto_ptr<WrapXerces> validator ( new WrapXerces() );
|
||||
std::string fileNameLocal = ( const char * ) fileName.mb_str ( wxConvLocal );
|
||||
if ( !validator->validate ( fileNameLocal ) )
|
||||
if ( !validator->validate ( fileName ) )
|
||||
{
|
||||
statusProgress ( wxEmptyString );
|
||||
messagePane ( validator->getLastError(), CONST_WARNING );
|
||||
|
|
|
@ -432,7 +432,7 @@ void XmlPromptGenerator::handleSchema (
|
|||
parser->setValidationSchemaFullChecking ( true );
|
||||
|
||||
Grammar *rootGrammar = parser->loadGrammar
|
||||
( ( const XMLCh * ) ( const char * ) schemaPath.mb_str ( WrapXerces::getMBConv() )
|
||||
( ( const XMLCh * ) WrapXerces::toString ( schemaPath ).GetData()
|
||||
, Grammar::SchemaGrammarType
|
||||
);
|
||||
if ( !rootGrammar )
|
||||
|
|
|
@ -203,8 +203,8 @@ void XmlSchemaGenerator::generateData ( const wxString &elementName,
|
|||
continue;
|
||||
for ( attrItr = attrMap.begin(); attrItr != attrMap.end(); attrItr++ )
|
||||
{
|
||||
if ( attrs->getNamedItem ( ( const XMLCh * ) ( const char * )
|
||||
attrItr->first.mb_str ( WrapXerces::getMBConv() ) ) == NULL )
|
||||
if ( attrs->getNamedItem ( ( const XMLCh * )
|
||||
WrapXerces::toString ( attrItr->first ).GetData() ) == NULL )
|
||||
{
|
||||
optAttrs.insert ( attrItr->first );
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue