/* * Copyright 2012 Zane U. Ji. * * This file is part of Xml Copy Editor. * * Xml Copy Editor is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * Xml Copy Editor is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Xml Copy Editor; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "xmlschemagenerator.h" #include "wrapxerces.h" #include #include #include #include #include #include #include "xercescatalogresolver.h" const static size_t maxReservedSchemaBuffer = 1024 * 1024; const static size_t maxElementSchemaBuffer = 1024; XmlSchemaGenerator::XmlSchemaGenerator ( bool inlineSimpleType /*= true*/) : mInlineSimpleType ( inlineSimpleType ) , mGrammarType ( Grammar::SchemaGrammarType ) { } XmlSchemaGenerator::~XmlSchemaGenerator() { } const wxString &XmlSchemaGenerator::generate ( Grammar::GrammarType grammarType , const wxString &filepath , const char *buffer , size_t len , const wxString &encoding ) { mGrammarType = grammarType; mElements.clear(); mSchema.Clear(); XercesCatalogResolver catalogResolver; std::auto_ptr parser ( new XercesDOMParser() ); parser->setDoNamespaces ( true ); parser->setDoSchema ( true ); parser->setValidationSchemaFullChecking ( false ); parser->setEntityResolver ( &catalogResolver ); MemBufInputSource source ( ( const XMLByte * ) buffer, len, filepath.mb_str( wxConvLocal ) ); if ( !wxIsEmpty ( encoding ) ) source.setEncoding ( (const XMLCh *) WrapXerces::toString ( encoding ).GetData() ); try { //XMLPlatformUtils::fgSSE2ok = false; parser->parse ( source ); } catch ( XMLException& e ) { mLastError = WrapXerces::toString ( e.getMessage() ); return mSchema; } xercesc::DOMDocument *doc = parser->getDocument(); if ( doc == NULL ) { mLastError = _ ("Failed to load xml file."); return mSchema; } size_t size = len / 3; if ( size > maxReservedSchemaBuffer ) size = maxReservedSchemaBuffer; mSchema.Alloc ( size ); if ( mGrammarType == Grammar::SchemaGrammarType ) mSchema << _T("") << getEOL() << _T("") << getEOL(); xercesc::DOMElement *root = doc->getDocumentElement(); if ( root != NULL ) { findAllElements ( *root ); generateData ( *root, 1 ); if ( mInlineSimpleType && mGrammarType == Grammar::SchemaGrammarType ) outputSchema ( *root ); } if ( mGrammarType == Grammar::SchemaGrammarType ) mSchema << _T("") << getEOL(); mSchema.Shrink(); return mSchema; } void XmlSchemaGenerator::findAllElements ( const DOMElement &element, size_t nIndent /*= 0*/) { wxString tagName = WrapXerces::toString ( element.getTagName() ); mElements[tagName].nodes.insert ( &element ); DOMElement *child = WrapXerces::getFirstElementChild ( element ); while ( child ) { findAllElements ( *child, nIndent ); child = WrapXerces::getNextElementSibling ( *child ); } } void XmlSchemaGenerator::generateData ( const DOMElement &element, size_t nIndent /*= 0*/) { wxString name = WrapXerces::toString ( element.getTagName() ); if ( mElements[name].name.empty() ) { // Only generate data once generateData ( name, nIndent ); } DOMElement *child = WrapXerces::getFirstElementChild ( element ); while ( child ) { generateData ( *child, nIndent ); child = WrapXerces::getNextElementSibling ( *child ); } } void XmlSchemaGenerator::generateData ( const wxString &elementName, size_t nIndent /*= 0*/) { ElmtData &data = mElements[elementName]; std::set::iterator elmtItr; data.name = elementName; //Content std::map &childMap = data.children; std::map::iterator itr; std::set precedence; elmtItr = data.nodes.begin(); for ( ; elmtItr != data.nodes.end(); ++elmtItr ) { std::map countMap; DOMNode *child = ( **elmtItr ).getFirstChild(); for ( ; child != NULL; child = child->getNextSibling() ) { DOMNode::NodeType type = child->getNodeType(); if ( type != DOMNode::ELEMENT_NODE ) { if ( type == DOMNode::TEXT_NODE ) { // Check for mixed content wxString value = WrapXerces::toString ( child->getNodeValue() ); if ( !value.Trim().Trim ( false ).empty() ) data.mixed = true; } continue; } wxString name = WrapXerces::toString ( child->getNodeName() ); childMap[name].precedence.insert ( precedence.begin(), precedence.end() ); childMap[name].precedence.erase ( name ); // Don't depend on oneself precedence.insert ( name ); countMap[name] += 1; } precedence.clear(); std::map::iterator countItr = countMap.begin(); for ( ; countItr != countMap.end(); ++countItr ) { if ( childMap[countItr->first].maxOccurs < countItr->second ) childMap[countItr->first].maxOccurs = countItr->second; } if ( childMap.size() == countMap.size() ) continue; for ( itr = childMap.begin(); itr != childMap.end(); ++itr ) { if ( countMap.find ( itr->first ) != countMap.end() ) continue; itr->second.minOccurs = 0; } } // Attribute std::map &attrMap = data.attrMap; std::set &optAttrs = data.optAttrs; std::map::iterator attrItr; elmtItr = data.nodes.begin(); for ( ; elmtItr != data.nodes.end(); ++elmtItr ) { DOMNamedNodeMap *attrs = ( **elmtItr ).getAttributes(); if ( attrs == NULL ) { for ( attrItr = attrMap.begin(); attrItr != attrMap.end(); ++attrItr ) optAttrs.insert ( attrItr->first ); continue; } wxString name; DOMAttr *attr; size_t i = attrs->getLength(); while ( i-- > 0 ) { attr = ( DOMAttr* ) attrs->item ( i ); name = WrapXerces::toString ( attr->getName() ); if ( attr->getPrefix() != NULL ) { wxLogDebug ( _T("Ignore: %s"), name.c_str() ); continue; } if ( elmtItr != data.nodes.begin() ) // Not the first node if ( attrMap.find ( name ) == attrMap.end() ) // Not in the map optAttrs.insert ( name ); if ( attr->getSpecified() ) attrMap[name]; // Initialize attribute map else attrMap[name] = attr->getValue(); } if ( attrMap.size() == optAttrs.size() ) continue; for ( attrItr = attrMap.begin(); attrItr != attrMap.end(); ++attrItr ) { if ( attrs->getNamedItem ( ( const XMLCh * ) WrapXerces::toString ( attrItr->first ).GetData() ) == NULL ) { optAttrs.insert ( attrItr->first ); } } } // Deal with sequence wxLogDebug ( _T("%s:"), elementName.c_str() ); data.useSequence = getSequence ( data.sequence, childMap ); // Now we have the data of the element if ( mGrammarType == Grammar::DTDGrammarType ) { generateDTD ( data, nIndent ); mSchema << data.schema; } else if ( !mInlineSimpleType ) { // Or wait until all data are available generateSchema ( data, nIndent ); mSchema << data.schema; } } void XmlSchemaGenerator::outputSchema ( const DOMElement &element ) { wxString tagName = WrapXerces::toString ( element.getTagName() ); ElmtData &data = mElements[tagName]; if ( data.schema.empty() ) { if ( mGrammarType == Grammar::SchemaGrammarType ) generateSchema ( data, 1 ); else generateDTD ( data, 1 ); mSchema << data.schema; } DOMElement *child = WrapXerces::getFirstElementChild ( element ); while ( child ) { outputSchema ( *child ); child = WrapXerces::getNextElementSibling ( *child ); } } void XmlSchemaGenerator::generateSchema ( ElmtData &data, size_t nIndent ) { wxString &schema = data.schema; if ( data.children.empty() && data.attrMap.empty() ) { if ( !mInlineSimpleType ) { addIndent ( schema, nIndent ); schema << _T("") << getEOL(); } return; } schema.Alloc ( maxElementSchemaBuffer ); addIndent ( schema, nIndent++ ); schema << _T("") << getEOL(); addIndent ( schema, nIndent++ ); if ( data.mixed ) schema << _T("") << getEOL(); else schema << _T("") << getEOL(); if ( !data.children.empty() ) { size_t minOccurs = 1, maxOccurs = 1, minTotal = 0; std::map::const_iterator itr; for ( itr = data.children.begin(); itr != data.children.end(); ++itr ) { if ( itr->second.minOccurs < minOccurs ) minOccurs = itr->second.minOccurs; if ( itr->second.maxOccurs > maxOccurs ) maxOccurs = itr->second.maxOccurs; minTotal += itr->second.minOccurs; } addIndent ( schema, nIndent++ ); if ( data.useSequence ) { schema << _T(" 1) schema << _T(" maxOccurs=\"unbounded\""); if ( minTotal == 0 ) schema << _T(" minOccurs=\"0\""); schema << _T(">") << getEOL(); std::vector::const_iterator seqItr; seqItr = data.sequence.begin(); for ( ; seqItr != data.sequence.end(); ++seqItr ) { const ChildData &child = data.children[*seqItr]; addIndent ( schema, nIndent ); if ( mInlineSimpleType ) { // Check if it's a simple type const ElmtData *childElmt = &mElements[*seqItr]; if ( childElmt->children.empty() && childElmt->attrMap.empty() ) { schema << _T("") << getEOL(); continue; } } schema << _T(" 1 ) { schema << _T(" maxOccurs=\"unbounded\""); } } schema << _T("/>") << getEOL(); } addIndent ( schema, --nIndent ); if ( data.useSequence ) { schema << _T("") << getEOL(); } else { schema << _T("") << getEOL(); } } // Child elements std::map::const_iterator attrItr; attrItr = data.attrMap.begin(); for ( ; attrItr != data.attrMap.end(); ++attrItr ) { addIndent ( schema, nIndent ); schema << _T("first << _T("\" type=\"xs:string\""); if ( attrItr->second != NULL ) { schema << _T(" default=\"") << WrapXerces::toString ( attrItr->second ) << _T("\""); } else if ( data.optAttrs.find ( attrItr->first ) == data.optAttrs.end() ) { schema << _T(" use=\"required\""); } schema << _T("/>") << getEOL(); } addIndent ( schema, --nIndent ); schema << _T("") << getEOL(); addIndent ( schema, --nIndent ); schema << _T("") << getEOL(); schema.Shrink(); } void XmlSchemaGenerator::generateDTD ( ElmtData &data, size_t WXUNUSED ( nIndent ) ) { wxString &schema = data.schema; schema.Alloc ( maxElementSchemaBuffer ); schema << _T("::const_iterator seqItr; seqItr = data.sequence.begin(); if (data.useSequence) { for ( ; seqItr != data.sequence.end(); ++seqItr ) { schema << separator << *seqItr; separator = _T(", "); const ChildData &child = data.children[*seqItr]; if ( child.minOccurs == 0 ) schema << ( child.maxOccurs > 1 ? _T("*") : _T("?") ); else if ( child.maxOccurs > 1 ) schema << _T("+"); } schema << _T(")"); } else { size_t minTotal = 0; for ( ; seqItr != data.sequence.end(); ++seqItr ) { schema << separator << *seqItr; separator = _T(" | "); minTotal += data.children[*seqItr].maxOccurs; } schema << ( minTotal > 0 ? _T(")+") : _T(")*") ); } } schema << _T(">") << getEOL(); if ( !data.attrMap.empty() ) { const static wxString indent = wxString ( getEOL() ) + _T(" "); schema << _T("::const_iterator attrItr; attrItr = data.attrMap.begin(); for ( ; attrItr != data.attrMap.end(); ++attrItr ) { schema << indent << attrItr->first << _T(" CDATA"); if ( attrItr->second != NULL ) // Has default value schema << _T(" \"") << WrapXerces::toString ( attrItr->second ) << _T("\""); else if ( data.optAttrs.find ( attrItr->first ) == data.optAttrs.end() ) schema << _T(" #REQUIRED"); else schema << _T(" #IMPLIED"); } schema << _T(">") << getEOL(); } schema.Shrink(); } bool XmlSchemaGenerator::getSequence ( std::vector &sequence, const std::map &elmtMap ) { bool deadlock = false; sequence.clear(); std::vector::iterator seqItr, seqFindItr; std::set::const_iterator prevItr, prevEnd; std::map::const_iterator itr; bool retry; do { retry = false; for ( itr = elmtMap.begin(); itr != elmtMap.end(); ++itr ) { seqFindItr = std::find ( sequence.begin(), sequence.end(), itr->first ); if ( seqFindItr != sequence.end() ) continue; seqItr = sequence.begin(); prevItr = itr->second.precedence.begin(); prevEnd = itr->second.precedence.end(); for ( ; prevItr != prevEnd; ++prevItr ) { // Find last index of dependent elements seqFindItr = std::find ( sequence.begin(), sequence.end(), *prevItr ); if ( seqFindItr != sequence.end() ) { if ( seqItr < seqFindItr ) { seqItr = seqFindItr; } continue; } const std::set &previous = elmtMap.find ( *prevItr )->second.precedence; if ( previous.find ( itr->first ) == previous.end() ) { // Not a deadlock retry = true; break; } else { deadlock = true; } } if ( prevItr != prevEnd ) continue; // The preceding doesn't exist if ( seqItr != sequence.end() ) { ++seqItr; } sequence.insert ( seqItr, itr->first ); wxLogDebug ( _T(" %s"), itr->first.c_str() ); } } while ( retry ); return !deadlock; }