/* * Copyright 2013 Zane U. Ji * * This file is part of Xml Copy Editor. * * Xml Copy Editor is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * Xml Copy Editor is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Xml Copy Editor; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include #ifndef WX_PRECOMP #include #include #endif #include "dtd2schema.h" #include "wrapxerces.h" #include "xmlschemagenerator.h" // for addIndent #include #include Dtd2Schema::Dtd2Schema() { } Dtd2Schema::~Dtd2Schema() { } const wxString &Dtd2Schema::convert ( const wxString &dtdFile ) { XercesDOMParser parser; MySAX2Handler errorHandler; DTDGrammar *grammar; try { parser.setErrorHandler ( &errorHandler ); grammar = ( DTDGrammar * ) parser.loadGrammar ( ( const XMLCh * ) WrapXerces::toString ( dtdFile ).GetData() , Grammar::DTDGrammarType ); } catch ( const SAXParseException &e ) { mErrors << wxString::Format ( _("Error at line %lld, column %lld: %s[br]"), e.getLineNumber(), e.getColumnNumber(), WrapXerces::toString ( e.getMessage() ).c_str() ); return mSchema; } mSchema << _T("") << wxTextFile::GetEOL() << _T(" elemEnum = grammar->getElemEnumerator(); while ( elemEnum.hasMoreElements() ) { elements << convertElement ( elemEnum.nextElement(), nIndent ); } const XMLCh *targetNameSpace = grammar->getTargetNamespace(); if ( targetNameSpace != NULL && *targetNameSpace != 0 ) { wxString targetNS = WrapXerces::toString ( targetNameSpace ); if ( mTargetNameSpace.empty() ) mTargetNameSpace = targetNS; else if ( targetNS != mTargetNameSpace ) mErrors << wxString::Format ( _("Target namespace is redefined: %s -> %s[br]"), targetNS.c_str(), mTargetNameSpace.c_str() ); } if ( !mTargetNameSpace.empty() ) mSchema << wxTextFile::GetEOL() << _T(" targetNamespace=\"") << mTargetNameSpace << _T("\"") << wxTextFile::GetEOL() << _T(" xmlns=\"") << mTargetNameSpace << _T("\""); wxString importNS; std::map::iterator itr; itr = mNameSpaceMap.find ( _T("xml") ); if ( itr != mNameSpaceMap.end() && itr->second.empty() ) itr->second = wxString::FromUTF8 ( (const char *) XML_XML_NAMESPACE ); for ( itr = mNameSpaceMap.begin(); itr != mNameSpaceMap.end(); ++itr ) { mSchema << wxTextFile::GetEOL() << _T(" xmlns:") << itr->first << _T("=\"") << itr->second << _T("\""); importNS << _T(" second.empty() ? itr->first : itr->second ) << _T("\"/>") << wxTextFile::GetEOL(); } mSchema << _T(">") << wxTextFile::GetEOL() << importNS << wxTextFile::GetEOL() << elements; #if 0 NameIdPoolEnumerator entityEnum = grammar->getEntityEnumerator(); while ( entityEnum.hasMoreElements() ) { mSchema << convertEntity ( entityEnum.nextElement(), nIndent ); } #endif NameIdPoolEnumerator notationEnum = grammar->getNotationEnumerator(); while ( notationEnum.hasMoreElements() ) { mSchema << convertNotation ( notationEnum.nextElement(), nIndent ); } mSchema << _T("") << wxTextFile::GetEOL(); return mSchema; } wxString Dtd2Schema::convertElement ( const DTDElementDecl &element , size_t nIndent ) { wxString schema; wxString elementName = WrapXerces::toString ( element.getBaseName() ); XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("getType() == ContentSpecNode::Leaf ) { const QName *qname = contentSpec->getElement(); if ( qname == NULL ) pcdata = true; else { const XMLCh *name = qname->getRawName(); pcdata = ( name == NULL || *name == 0 ); } } if ( pcdata ) { schema << _T("\" type=\"xs:string\"/>") << wxTextFile::GetEOL() << wxTextFile::GetEOL(); return schema; } } // else Complex type schema << _T("\">") << wxTextFile::GetEOL(); // Content XmlSchemaGenerator::addIndent ( schema, nIndent + 1 ); schema << _T("") << wxTextFile::GetEOL() << convertContent ( contentSpec, nIndent + 2 ); // Attributes const XMLAttDefList &attList = element.getAttDefList(); size_t i, count; count = attList.getAttDefCount(); for ( i = 0; i < count; i++ ) { schema << convertAttribute ( attList.getAttDef ( i ), nIndent + 2, elementName ); } XmlSchemaGenerator::addIndent ( schema, nIndent + 1); schema << _T("") << wxTextFile::GetEOL(); XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL() << wxTextFile::GetEOL(); return schema; } wxString Dtd2Schema::convertContent ( const ContentSpecNode *content , size_t nIndent , const ContentSpecNode *parent /* = NULL */ ) { wxString schema, suffix; if ( content == NULL ) return schema; ContentSpecNode::NodeTypes type = content->getType(); switch ( type ) { case ContentSpecNode::ZeroOrOne: if ( parent != NULL && parent->getType() == type ) { --nIndent; break; } XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); XmlSchemaGenerator::addIndent ( suffix, nIndent ); suffix << _T("") << wxTextFile::GetEOL(); break; case ContentSpecNode::ZeroOrMore: if ( parent != NULL && parent->getType() == type ) { --nIndent; break; } XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); XmlSchemaGenerator::addIndent ( suffix, nIndent ); suffix << _T("") << wxTextFile::GetEOL(); break; case ContentSpecNode::OneOrMore: if ( parent != NULL && parent->getType() == type ) { --nIndent; break; } XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); XmlSchemaGenerator::addIndent ( suffix, nIndent ); suffix << _T("") << wxTextFile::GetEOL(); break; case ContentSpecNode::Choice: if ( parent != NULL && parent->getType() == type && parent->getMinOccurs() == content->getMinOccurs() && parent->getMaxOccurs() == content->getMaxOccurs() ) { --nIndent; break; } XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); XmlSchemaGenerator::addIndent ( suffix, nIndent ); suffix << _T("") << wxTextFile::GetEOL(); break; case ContentSpecNode::Sequence: if ( parent != NULL && parent->getType() == type && parent->getMinOccurs() == content->getMinOccurs() && parent->getMaxOccurs() == content->getMaxOccurs() ) { --nIndent; break; } XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); XmlSchemaGenerator::addIndent ( suffix, nIndent ); suffix << _T("") << wxTextFile::GetEOL(); break; case ContentSpecNode::Any: XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); XmlSchemaGenerator::addIndent ( suffix, nIndent ); suffix << _T("") << wxTextFile::GetEOL(); break; case ContentSpecNode::All: if ( parent != NULL && parent->getType() == type && parent->getMinOccurs() == content->getMinOccurs() && parent->getMaxOccurs() == content->getMaxOccurs() ) { --nIndent; break; } XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); XmlSchemaGenerator::addIndent ( suffix, nIndent ); suffix << _T("") << wxTextFile::GetEOL(); break; default: { const QName *qnm = content->getElement(); if ( qnm == NULL ) { mErrors << _("Ignored content type: ") << type << _T("[br]"); break; } wxString name = WrapXerces::toString ( qnm->getRawName() ); if ( name.empty() ) {// #PCDATA wxASSERT ( type == ContentSpecNode::Leaf ); break; } XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); size_t index = name.Index ( ':' ); if ( index != wxString::npos ) { // Validate the name space wxString nameSpace = name.Left ( index ); if ( mNameSpaceMap.find ( nameSpace ) == mNameSpaceMap.end() ) { if ( nameSpace != _T("xml") ) mErrors << wxString::Format ( _("Unknown namespace: %s[br]"), nameSpace.c_str() ); mNameSpaceMap [ nameSpace ]; // Initialize it. } } break; } // default: } // switch schema << convertContent ( content->getFirst(), nIndent + 1, content ) << convertContent ( content->getSecond(), nIndent + 1, content ) << suffix; return schema; } wxString Dtd2Schema::convertOccurrence ( const ContentSpecNode *content ) { wxString schema; int count = content->getMinOccurs(); if (count < 0) schema << _T(" minOccurs=\"unbounded\""); else if ( count != 1 ) schema << _T(" minOccurs=\"") << count << _T("\""); count = content->getMaxOccurs(); if ( count < 0 ) schema << _T(" maxOccurs=\"unbounded\""); else if ( count != 1 ) schema << _T(" maxOccurs=\"") << count << _T("\""); return schema; } wxString Dtd2Schema::convertAttribute ( const XMLAttDef &att , size_t nIndent , const wxString &element ) { wxString schema; if ( att.getType() == XMLAttDef::Any_Any ) { XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); return schema; } wxString name = WrapXerces::toString ( att.getFullName() ); // targetNamespace? if ( name == _T("xmlns") ) { wxString ns = WrapXerces::toString ( att.getValue() ); if ( mTargetNameSpace.empty() ) mTargetNameSpace = ns; else if ( mTargetNameSpace != ns ) mErrors << wxString::Format ( _("Ignored namespace of %s: %s[br]"), element.c_str(), ns.c_str() ); return wxEmptyString; } // Reference? size_t index = name.Index ( ':' ); if ( index != wxString::npos ) { wxString ns = name.Left ( index ); if ( ns == _T("xmlns") ) // A new name space is defined { wxString url = WrapXerces::toString ( att.getValue() ); ns = name.substr ( index + 1 ); // A new name space or it's been referenced in a element content if ( mNameSpaceMap [ ns ].empty() ) mNameSpaceMap [ ns ] = url; else if ( mNameSpaceMap [ ns ] != url ) mErrors << wxString::Format ( _("Namespace redefined: %s -> %s[br]"), mNameSpaceMap [ ns ].c_str(), url.c_str() ); return wxEmptyString; } else if ( mNameSpaceMap.find ( ns ) == mNameSpaceMap.end() ) { if ( ns != _T("xml") ) mErrors << wxString::Format ( _("Unknown namespace: %s[br]"), ns.c_str() ); mNameSpaceMap [ ns ]; // Initialize it } XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); return schema; } XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL() << convertAttType ( att, nIndent + 1 ); XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL(); } else { schema << convertAttType ( att, nIndent + 1 ) << convertAttValue ( att ) << _T("/>") << wxTextFile::GetEOL(); } return schema; } wxString Dtd2Schema::convertAttType ( const XMLAttDef &att, size_t nIndent ) { wxString type; wxASSERT ( att.getType() != XMLAttDef::Any_Any ); // Enumerations are prefixed with a type in docbook DTDs if ( att.getEnumeration() != NULL ) //case XMLAttDef::Enumeration: { XmlSchemaGenerator::addIndent ( type, nIndent ); type << _T("") << wxTextFile::GetEOL(); XmlSchemaGenerator::addIndent ( type, nIndent + 1 ); type << _T("") << wxTextFile::GetEOL(); wxStringTokenizer tokens ( WrapXerces::toString ( att.getEnumeration() ), _T(" ") ); while ( tokens.HasMoreTokens() ) { XmlSchemaGenerator::addIndent ( type, nIndent + 2); type << _T("") << wxTextFile::GetEOL(); } XmlSchemaGenerator::addIndent ( type, nIndent + 1 ); type << _T("") << wxTextFile::GetEOL(); XmlSchemaGenerator::addIndent ( type, nIndent ); type << _T("") << wxTextFile::GetEOL(); return type; } switch ( att.getType() ) { case XMLAttDef::CData: type << _T(" type=\"xs:string\""); break; case XMLAttDef::ID: type << _T(" type=\"xs:ID\""); break; case XMLAttDef::IDRef: type << _T(" type=\"xs:IDREF\""); break; case XMLAttDef::IDRefs: type << _T(" type=\"xs:IDREFS\""); break; case XMLAttDef::Entity: type << _T(" type=\"xs:ENTITY\""); break; case XMLAttDef::Entities: type << _T(" type=\"xs:ENTITIES\""); break; case XMLAttDef::NmToken: type << _T(" type=\"xs:NMTOKEN\""); break; case XMLAttDef::NmTokens: type << _T(" type=\"xs:NMTOKENS\""); break; case XMLAttDef::Notation: type << _T(" type=\"xs:NOTATION\""); break; default: const XMLCh *type; type = att.getAttTypeString( att.getType() ); mErrors << wxString::Format ( _("Ignored attribute \"%s\"'s type: %s[br]"), WrapXerces::toString ( att.getFullName() ).c_str(), WrapXerces::toString ( type ).c_str() ); break; } return type; } wxString Dtd2Schema::convertAttValue ( const XMLAttDef &att) { wxString value; if ( att.getValue() != NULL ) { if ( att.getDefaultType() == XMLAttDef::Fixed ) value << _T(" fixed=\""); else value << _T(" default=\""); value << WrapXerces::toString ( att.getValue() ) << _T("\""); } switch ( att.getDefaultType() ) { case XMLAttDef::Default: case XMLAttDef::Implied: case XMLAttDef::Fixed: break; case XMLAttDef::Required: value << _T(" use=\"required\""); break; case XMLAttDef::Prohibited: value << _T(" use=\"prohibited\""); break; default: const XMLCh *defaultType; defaultType = XMLAttDef::getDefAttTypeString( att.getDefaultType() ); mErrors << wxString::Format ( _("Unknown default type of attribute \"%s\": %s[br]"), WrapXerces::toString ( att.getFullName() ).c_str(), WrapXerces::toString ( defaultType ).c_str() ); break; } return value; } wxString Dtd2Schema::convertEntity ( const DTDEntityDecl &entity , size_t nIndent ) { wxString schema; return schema; } wxString Dtd2Schema::convertNotation ( const XMLNotationDecl ¬ation , size_t nIndent ) { wxString schema; XmlSchemaGenerator::addIndent ( schema, nIndent ); schema << _T("") << wxTextFile::GetEOL() << wxTextFile::GetEOL(); return schema; } void Dtd2Schema::reset() { mErrors.clear(); mSchema.clear(); mTargetNameSpace.clear(); mNameSpaceMap.clear(); }