Improved encoding handling

This commit is contained in:
Zane U. Ji 2014-04-13 23:16:04 +08:00
parent 17f79c10c0
commit 8216a56d1e
29 changed files with 117 additions and 114 deletions

View File

@ -27,7 +27,10 @@ using namespace std;
HouseStyleReader::HouseStyleReader (
map<string, map<string, set<string> > > &m
) : WrapExpat ( true ), ud ( new HouseStyleReaderData() )
, const char *encoding /*= NULL*/
)
: WrapExpat ( encoding, true )
, ud ( new HouseStyleReaderData() )
{
ud->setState ( STATE_UNKNOWN );
ud->depth = ud->cutoffDepth = 0;

View File

@ -43,7 +43,8 @@ class HouseStyleReader : public WrapExpat
{
public:
HouseStyleReader (
map<string, map<string, set<string> > > &m
map<string, map<string, set<string> > > &m,
const char *encoding = NULL
);
virtual ~HouseStyleReader();
void getNodeVector ( vector<pair<string, unsigned> > &v );

View File

@ -13,18 +13,18 @@ DEFINE_EVENT_TYPE(wxEVT_COMMAND_VALIDATION_COMPLETED);
ValidationThread::ValidationThread (
wxEvtHandler *handler,
const char *buffer,
const char *utf8Buffer,
const wxString &system )
: wxThread ( wxTHREAD_JOINABLE )
, mStopping ( false )
{
if ( buffer == NULL )
if ( utf8Buffer == NULL )
{
throw;
}
myEventHandler = handler;
myBuffer = buffer;
myBuffer = utf8Buffer;
mySystem = system;
myIsSucceeded = false;
}

View File

@ -13,7 +13,7 @@ class ValidationThread : public wxThread
public:
ValidationThread (
wxEvtHandler *handler,
const char *buffer,
const char *utf8Buffer,
const wxString &system );
virtual void *Entry();
void setBuffer ( const char *buffer, const char *system );

View File

@ -23,7 +23,7 @@
#include <iostream>
#include <sstream>
WrapExpat::WrapExpat ( bool nameSpaceAware, const char *encoding )
WrapExpat::WrapExpat ( const char *encoding, bool nameSpaceAware )
{
p = ( nameSpaceAware ) ? XML_ParserCreateNS ( encoding, ( XML_Char ) ':' ) : XML_ParserCreate ( encoding );
if ( p == 0 )

View File

@ -32,7 +32,7 @@ using namespace std;
class WrapExpat
{
public:
WrapExpat ( bool nameSpaceAware = false, const char *encoding = NULL );
WrapExpat ( const char *encoding = NULL, bool nameSpaceAware = false );
virtual ~WrapExpat();
bool parse ( const string &buffer, bool isFinal = true );
bool parse ( const char *buffer, size_t size, bool isFinal = true );

View File

@ -27,8 +27,10 @@
XmlAssociateDtd::XmlAssociateDtd (
const std::string& path,
const std::string& publicIdentifier,
size_t size ) :
d ( new DtdData() )
const char *encoding,
size_t size )
: WrapExpat ( encoding )
, d ( new DtdData() )
{
d->buffer.reserve ( size );
d->path = path;

View File

@ -38,6 +38,7 @@ class XmlAssociateDtd : public WrapExpat
XmlAssociateDtd (
const std::string& path = "",
const std::string& publicIdentifier = "",
const char *encoding = NULL,
size_t size = BUFSIZ );
virtual ~XmlAssociateDtd();
std::string getBuffer()

View File

@ -29,8 +29,10 @@
XmlAssociateXsd::XmlAssociateXsd (
const std::string& path,
size_t size ) :
d ( new AssociateXsdData() )
const char *encoding,
size_t size )
: WrapExpat ( encoding )
, d ( new AssociateXsdData() )
{
d->buffer.reserve ( size );
d->path = path;

View File

@ -40,6 +40,7 @@ class XmlAssociateXsd : public WrapExpat
public:
XmlAssociateXsd (
const std::string& path = "",
const char *encoding = NULL,
size_t size = BUFSIZ );
virtual ~XmlAssociateXsd();
std::string getBuffer()

View File

@ -24,8 +24,12 @@
#include <cstring>
#include "xmlassociatexsl.h"
XmlAssociateXsl::XmlAssociateXsl ( const std::string& path, size_t size ) :
d ( new XslData() )
XmlAssociateXsl::XmlAssociateXsl (
const std::string& path,
const char *encoding,
size_t size )
: WrapExpat ( encoding )
, d ( new XslData() )
{
d->buffer.reserve ( size );
d->path = path;

View File

@ -38,6 +38,7 @@ class XmlAssociateXsl : public WrapExpat
public:
XmlAssociateXsl (
const std::string& path = "",
const char *encoding = NULL,
size_t size = BUFSIZ );
virtual ~XmlAssociateXsl();
std::string getBuffer()

View File

@ -1354,23 +1354,16 @@ void MyFrame::OnCheckWellformedness ( wxCommandEvent& event )
if ( utf8Buffer.empty() )
return;
// handle unusual encodings
if ( !XmlEncodingHandler::setUtf8 ( utf8Buffer ) )
{
encodingMessage();
return;
}
doc->clearErrorIndicators();
statusProgress ( _ ( "Parse in progress..." ) );
// check for well-formedness
auto_ptr<WrapExpat> we ( new WrapExpat() );
if ( !we->parse ( utf8Buffer.c_str() ) )
WrapExpat we ( "UTF-8" );
if ( !we.parse ( utf8Buffer ) )
{
statusProgress ( wxEmptyString );
messagePane ( we->getLastError(), CONST_WARNING );
std::pair<int, int> posPair = we->getErrorPosition();
messagePane ( we.getLastError(), CONST_WARNING );
std::pair<int, int> posPair = we.getErrorPosition();
-- ( posPair.first );
int cursorPos =
doc->PositionFromLine ( posPair.first );
@ -3902,14 +3895,9 @@ void MyFrame::OnValidateSchema ( wxCommandEvent& event )
{
std::string rawBuffer;
getRawText ( doc, rawBuffer );
if ( !XmlEncodingHandler::setUtf8 ( rawBuffer ) )
{
encodingMessage();
return;
}
auto_ptr<XmlSchemaLocator> xsl ( new XmlSchemaLocator() );
xsl->parse ( rawBuffer.c_str() );
if ( ( xsl->getSchemaLocation() ) . empty() )
XmlSchemaLocator xsl ( "UTF-8" );
xsl.parse ( rawBuffer.c_str() );
if ( ( xsl.getSchemaLocation() ) . empty() )
{
OnValidateDTD ( event );
return;
@ -3966,11 +3954,6 @@ void MyFrame::OnCreateSchema ( wxCommandEvent& event )
std::string rawBufferUtf8;
getRawText ( doc, rawBufferUtf8 );
if ( !XmlEncodingHandler::setUtf8 ( rawBufferUtf8 ) )
{
encodingMessage();
return;
}
const static wxString types[] = { _ ( "W3C Schema" ), _ ( "DTD" ) };
const static wxString message = _ ( "Please choose a shema type");
@ -3981,11 +3964,11 @@ void MyFrame::OnCreateSchema ( wxCommandEvent& event )
Grammar::GrammarType type = ( dlg.GetSelection() == 0 ) ?
Grammar::SchemaGrammarType : Grammar::DTDGrammarType;
std::auto_ptr<XmlSchemaGenerator> gen ( new XmlSchemaGenerator() );
const wxString &schema = gen->generate(type, doc->getFullFileName(),
rawBufferUtf8.c_str(), rawBufferUtf8.size() );
XmlSchemaGenerator gen;
const wxString &schema = gen.generate(type, doc->getFullFileName(),
rawBufferUtf8.c_str(), rawBufferUtf8.size(), _T ( "UTF-8" ) );
if (schema.IsEmpty()) {
messagePane ( gen->getLastError(), CONST_WARNING );
messagePane ( gen.getLastError(), CONST_WARNING );
return;
}
newDocument ( schema );
@ -4089,7 +4072,7 @@ void MyFrame::OnXslt ( wxCommandEvent& event )
int id = event.GetId();
if ( id == ID_XSLT )
{
XslLocator xl;
XslLocator xl ( "UTF-8" );
xl.parse ( rawBufferUtf8 );
std::string location = xl.getXslLocation();
@ -4211,12 +4194,6 @@ void MyFrame::OnPrettyPrint ( wxCommandEvent& event )
std::string encoding = XmlEncodingHandler::get ( rawBufferUtf8 );
if ( !XmlEncodingHandler::setUtf8 ( rawBufferUtf8, true ) )
{
encodingMessage();
return;
}
statusProgress ( _ ( "Pretty-printing in progress..." ) );
wxString fileName = doc->getFullFileName();
@ -4242,14 +4219,6 @@ void MyFrame::OnPrettyPrint ( wxCommandEvent& event )
CONST_STOP );
else
{
/*
if (encoding != "UTF-8")
{
std::string output = getEncodedBuffer(rawBufferUtf8, encoding);
if (!output.empty())
rawBufferUtf8 = output;
}
*/
if ( encoding != "UTF-8" && !encoding.empty() )
{
XmlEncodingHandler::set ( rawBufferUtf8, encoding );
@ -4584,7 +4553,7 @@ bool MyFrame::saveFile ( XmlDoc *doc, wxString& fileName, bool checkLastModified
try
{
getRawText ( doc, utf8Buffer );
XmlEncodingSpy es;
XmlEncodingSpy es ( "UTF-8" );
es.parse ( utf8Buffer );
encoding = es.getEncoding();
wideEncoding = wxString ( encoding.c_str(), wxConvUTF8 );
@ -4629,13 +4598,13 @@ bool MyFrame::saveFile ( XmlDoc *doc, wxString& fileName, bool checkLastModified
}
else if ( encoding == "UTF-8" )
{
auto_ptr<WrapExpat> we ( new WrapExpat() );
WrapExpat we ( "UTF-8" );
if ( !we->parse ( utf8Buffer ) )
if ( !we.parse ( utf8Buffer ) )
{
//if ( we->isEncodingError() )
// ;
messagePane ( we->getLastError(), CONST_WARNING );
messagePane ( we.getLastError(), CONST_WARNING );
}
success = saveRawUtf8 ( fileNameLocal, utf8Buffer, true, isXml );
if ( success )
@ -5786,15 +5755,14 @@ void MyFrame::OnAssociate ( wxCommandEvent& event )
std::string utf8Buffer;
getRawText ( doc, utf8Buffer );
std::string origEncoding = XmlEncodingHandler::get ( utf8Buffer );
XmlEncodingHandler::setUtf8 ( utf8Buffer, true );
std::auto_ptr<WrapExpat> wellformedparser ( new WrapExpat() );
if ( !wellformedparser->parse ( utf8Buffer ) )
WrapExpat wellformedparser ( "UTF-8" );
if ( !wellformedparser.parse ( utf8Buffer ) )
{
wxString message;
message.Printf (
_ ( "Cannot associate %s: %s" ),
type.c_str(),
wellformedparser->getLastError().c_str() );
wellformedparser.getLastError().c_str() );
messagePane ( message, CONST_STOP );
return;
}
@ -5862,30 +5830,29 @@ void MyFrame::OnAssociate ( wxCommandEvent& event )
if ( id == ID_ASSOCIATE_W3C_SCHEMA )
{
std::auto_ptr<XmlAssociateXsd> parser ( new XmlAssociateXsd ( utf8Path ) );
if ( !parser->parse ( utf8Buffer ) )
XmlAssociateXsd parser ( utf8Path, "UTF-8" );
if ( !parser.parse ( utf8Buffer ) )
return;
modifiedBuffer = parser->getBuffer();
modifiedBuffer = parser.getBuffer();
}
else if ( id == ID_ASSOCIATE_DTD_SYSTEM || id == ID_ASSOCIATE_DTD_PUBLIC )
{
std::auto_ptr<XmlAssociateDtd> parser ( new XmlAssociateDtd (
utf8Path,
( auxiliaryBox ) ? ( const char * ) aux.mb_str ( wxConvUTF8 ) : "" ) );
if ( !parser->parse ( utf8Buffer ) )
XmlAssociateDtd parser ( utf8Path,
( auxiliaryBox ) ? ( const char * ) aux.mb_str ( wxConvUTF8 ) : "", "UTF-8" );
if ( !parser.parse ( utf8Buffer ) )
return;
modifiedBuffer = parser->getBuffer();
modifiedBuffer = parser.getBuffer();
}
else if ( id == ID_ASSOCIATE_XSL )
{
std::auto_ptr<XmlAssociateXsl> parser ( new XmlAssociateXsl (
utf8Path ) );
if ( !parser->parse ( utf8Buffer ) )
XmlAssociateXsl parser( utf8Path, "UTF-8" );
if ( !parser.parse ( utf8Buffer ) )
return;
modifiedBuffer = parser->getBuffer();
modifiedBuffer = parser.getBuffer();
}
else
return;
XmlEncodingHandler::set ( modifiedBuffer, origEncoding );
doc->SetTextRaw ( modifiedBuffer.c_str() );
doc->SetFocus();

View File

@ -1034,29 +1034,24 @@ void XmlCtrl::adjustNoColumnWidth()
void XmlCtrl::updatePromptMaps()
{
wxString buffer = GetText();
std::string bufferUtf8;
bufferUtf8 = ( const char * ) buffer.mb_str ( wxConvUTF8 );
XmlEncodingHandler::setUtf8 ( bufferUtf8, true );
std::string bufferUtf8 = myGetTextRaw();
updatePromptMaps ( bufferUtf8.c_str(), bufferUtf8.size() );
}
void XmlCtrl::updatePromptMaps ( const char *buffer, size_t bufferLen )
void XmlCtrl::updatePromptMaps ( const char *utf8Buffer, size_t bufferLen )
{
attributeMap.clear();
elementMap.clear();
elementStructureMap.clear();
std::auto_ptr<XmlPromptGenerator> xpg ( new XmlPromptGenerator (
basePath,
auxPath ) );
xpg->parse ( buffer, bufferLen );
xpg->getAttributeMap ( attributeMap );
xpg->getRequiredAttributeMap ( requiredAttributeMap );
xpg->getElementMap ( elementMap );
xpg->getElementStructureMap ( elementStructureMap );
xpg->getEntitySet ( entitySet );
grammarFound = xpg->getGrammarFound();
XmlPromptGenerator xpg ( basePath, auxPath, "UTF-8" );
xpg.parse ( utf8Buffer, bufferLen );
xpg.getAttributeMap ( attributeMap );
xpg.getRequiredAttributeMap ( requiredAttributeMap );
xpg.getElementMap ( elementMap );
xpg.getElementStructureMap ( elementStructureMap );
xpg.getEntitySet ( entitySet );
grammarFound = xpg.getGrammarFound();
entitySet.insert ( _T ( "amp" ) );
entitySet.insert ( _T ( "apos" ) );
entitySet.insert ( _T ( "quot" ) );
@ -1954,8 +1949,6 @@ bool XmlCtrl::backgroundValidate()
std::string bufferUtf8 = myGetTextRaw();
XmlEncodingHandler::setUtf8( bufferUtf8, true );
return backgroundValidate (
bufferUtf8.c_str(),
basePath,

View File

@ -115,7 +115,7 @@ class XmlCtrl: public wxStyledTextCtrl
bool zoomOnly = false );
void applyVisibilityState ( int state = SHOW_TAGS );
void updatePromptMaps();
void updatePromptMaps ( const char *buffer, size_t bufferLen );
void updatePromptMaps ( const char *utf8Buffer, size_t bufferLen );
void adjustCursor();
void adjustSelection();
void foldAll();

View File

@ -23,8 +23,9 @@
#include <expat.h>
#include "xmlencodingspy.h"
XmlEncodingSpy::XmlEncodingSpy() :
d ( new EncodingData() )
XmlEncodingSpy::XmlEncodingSpy ( const char *encoding )
: WrapExpat ( encoding )
, d ( new EncodingData() )
{
XML_SetUserData ( p, d.get() );
XML_SetXmlDeclHandler ( p, xmldeclhandler );

View File

@ -34,7 +34,7 @@ struct EncodingData : public ParserData
class XmlEncodingSpy : public WrapExpat
{
public:
XmlEncodingSpy();
XmlEncodingSpy ( const char *encoding = NULL );
virtual ~XmlEncodingSpy();
std::string getEncoding()
{

View File

@ -23,7 +23,9 @@
#include <expat.h>
#include "xmlparseschemans.h"
XmlParseSchemaNs::XmlParseSchemaNs() : WrapExpat ( false ), d ( new ParseSchemaNsData() )
XmlParseSchemaNs::XmlParseSchemaNs()
: WrapExpat ( NULL, false )
, d ( new ParseSchemaNsData() )
{
d->p = p; // set parser for XML_StopParser call
XML_SetUserData ( p, d.get() );

View File

@ -45,7 +45,11 @@ using namespace xercesc;
XmlPromptGenerator::XmlPromptGenerator (
const wxString& basePath,
const wxString& auxPath ) : d ( new PromptGeneratorData() )
const wxString& auxPath,
const char *encoding
)
: WrapExpat ( encoding )
, d ( new PromptGeneratorData() )
{
XML_SetUserData ( p, d.get() );
d->p = p;

View File

@ -53,7 +53,8 @@ class XmlPromptGenerator : public WrapExpat
public:
XmlPromptGenerator (
const wxString& basePath = wxEmptyString,
const wxString& auxPath = wxEmptyString );
const wxString& auxPath = wxEmptyString,
const char *encoding = NULL );
virtual ~XmlPromptGenerator();
void getAttributeMap (
std::map<wxString, std::map<wxString, std::set<wxString> > >

View File

@ -40,8 +40,13 @@ XmlSchemaGenerator::~XmlSchemaGenerator()
{
}
const wxString &XmlSchemaGenerator::generate ( Grammar::GrammarType grammarType,
const wxString &filepath, const char *buffer, size_t len )
const wxString &XmlSchemaGenerator::generate (
Grammar::GrammarType grammarType
, const wxString &filepath
, const char *buffer
, size_t len
, const wxString &encoding
)
{
mGrammarType = grammarType;
mElements.clear();
@ -56,6 +61,9 @@ const wxString &XmlSchemaGenerator::generate ( Grammar::GrammarType grammarType,
MemBufInputSource source ( ( const XMLByte * ) buffer, len,
filepath.mb_str( wxConvLocal ) );
if ( !wxIsEmpty ( encoding ) )
source.setEncoding ( (const XMLCh *)
WrapXerces::toString ( encoding ).GetData() );
try {
//XMLPlatformUtils::fgSSE2ok = false;
parser->parse ( source );

View File

@ -38,7 +38,8 @@ public:
virtual ~XmlSchemaGenerator();
const wxString &generate ( Grammar::GrammarType grammarType,
const wxString &filepath, const char *buffer, size_t len );
const wxString &filepath, const char *buffer, size_t len,
const wxString &encoding );
const wxString &getLastError() { return mLastError; }
static void addIndent ( wxString &str, size_t nIndent )

View File

@ -24,8 +24,9 @@
#include <expat.h>
#include "xmlschemalocator.h"
XmlSchemaLocator::XmlSchemaLocator() :
WrapExpat ( true ), d ( new SchemaLocatorData() )
XmlSchemaLocator::XmlSchemaLocator ( const char *encoding )
: WrapExpat ( encoding, true )
, d ( new SchemaLocatorData() )
{
d->parser = p;
XML_SetUserData ( p, d.get() );

View File

@ -34,7 +34,7 @@ struct SchemaLocatorData
class XmlSchemaLocator : public WrapExpat
{
public:
XmlSchemaLocator();
XmlSchemaLocator ( const char *encoding );
virtual ~XmlSchemaLocator();
std::string getSchemaLocation();
private:

View File

@ -24,8 +24,13 @@
#include <expat.h>
#include "xmlschemaparser.h"
XmlSchemaParser::XmlSchemaParser ( PromptGeneratorData *data, bool nameSpaceAware ) :
WrapExpat ( nameSpaceAware ), d ( new SchemaParserData )
XmlSchemaParser::XmlSchemaParser (
PromptGeneratorData *data,
const char *encoding,
bool nameSpaceAware
)
: WrapExpat ( encoding, nameSpaceAware )
, d ( new SchemaParserData )
{
d->promptData = data;
d->setState ( STATE_UNKNOWN );

View File

@ -35,7 +35,10 @@ struct SchemaParserData : public ParserData
class XmlSchemaParser : public WrapExpat
{
public:
XmlSchemaParser ( PromptGeneratorData *data, bool nameSpaceAware );
XmlSchemaParser (
PromptGeneratorData *data,
const char *encoding,
bool nameSpaceAware );
virtual ~XmlSchemaParser();
enum {
STATE_UNKNOWN,

View File

@ -23,7 +23,7 @@
#include "getword.h"
XmlWordCount::XmlWordCount ( const char *encoding )
: WrapExpat ( false, encoding )
: WrapExpat ( encoding, false )
, wcd ( new WordCountData() )
{
wcd->wordCount = 0;

View File

@ -24,7 +24,9 @@
#include <expat.h>
#include "xsllocator.h"
XslLocator::XslLocator() : d ( new XslLocatorData() )
XslLocator::XslLocator ( const char *encoding )
: WrapExpat ( encoding )
, d ( new XslLocatorData() )
{
d->parser = p;
XML_SetUserData ( p, d.get() );

View File

@ -34,7 +34,7 @@ struct XslLocatorData : public ParserData
class XslLocator : public WrapExpat
{
public:
XslLocator();
XslLocator ( const char * encoding = NULL );
virtual ~XslLocator();
std::string getXslLocation();
private: