531 lines
15 KiB
C++
531 lines
15 KiB
C++
/*
|
|
* Copyright 2012 Zane U. Ji.
|
|
*
|
|
* This file is part of Xml Copy Editor.
|
|
*
|
|
* Xml Copy Editor is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License
|
|
* as published by the Free Software Foundation; either version 2
|
|
* of the License, or (at your option) any later version.
|
|
*
|
|
* Xml Copy Editor is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Xml Copy Editor; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "xmlschemagenerator.h"
|
|
#include "wrapxerces.h"
|
|
#include <memory>
|
|
#include <algorithm>
|
|
#include <xercesc/framework/MemBufInputSource.hpp>
|
|
#include <xercesc/parsers/XercesDOMParser.hpp>
|
|
#include <xercesc/dom/DOMNamedNodeMap.hpp>
|
|
#include <xercesc/dom/DOMAttr.hpp>
|
|
#include "xercescatalogresolver.h"
|
|
|
|
const static size_t maxReservedSchemaBuffer = 1024 * 1024;
|
|
const static size_t maxElementSchemaBuffer = 1024;
|
|
|
|
XmlSchemaGenerator::XmlSchemaGenerator ( bool inlineSimpleType /*= true*/)
|
|
: mInlineSimpleType ( inlineSimpleType )
|
|
, mGrammarType ( Grammar::SchemaGrammarType )
|
|
{
|
|
}
|
|
|
|
XmlSchemaGenerator::~XmlSchemaGenerator()
|
|
{
|
|
}
|
|
|
|
const wxString &XmlSchemaGenerator::generate (
|
|
Grammar::GrammarType grammarType
|
|
, const wxString &filepath
|
|
, const char *buffer
|
|
, size_t len
|
|
, const wxString &encoding
|
|
)
|
|
{
|
|
mGrammarType = grammarType;
|
|
mElements.clear();
|
|
mSchema.Clear();
|
|
|
|
XercesCatalogResolver catalogResolver;
|
|
std::auto_ptr<XercesDOMParser> parser ( new XercesDOMParser() );
|
|
parser->setDoNamespaces ( true );
|
|
parser->setDoSchema ( true );
|
|
parser->setValidationSchemaFullChecking ( false );
|
|
parser->setEntityResolver ( &catalogResolver );
|
|
|
|
MemBufInputSource source ( ( const XMLByte * ) buffer, len,
|
|
filepath.mb_str( wxConvLocal ) );
|
|
if ( !wxIsEmpty ( encoding ) )
|
|
source.setEncoding ( (const XMLCh *)
|
|
WrapXerces::toString ( encoding ).GetData() );
|
|
try {
|
|
//XMLPlatformUtils::fgSSE2ok = false;
|
|
parser->parse ( source );
|
|
}
|
|
catch ( XMLException& e )
|
|
{
|
|
mLastError = WrapXerces::toString ( e.getMessage() );
|
|
return mSchema;
|
|
}
|
|
|
|
xercesc::DOMDocument *doc = parser->getDocument();
|
|
if ( doc == NULL )
|
|
{
|
|
mLastError = _ ("Failed to load xml file.");
|
|
return mSchema;
|
|
}
|
|
|
|
size_t size = len / 3;
|
|
if ( size > maxReservedSchemaBuffer ) size = maxReservedSchemaBuffer;
|
|
mSchema.Alloc ( size );
|
|
|
|
if ( mGrammarType == Grammar::SchemaGrammarType )
|
|
mSchema << _T("<?xml version=\"1.0\" encoding=\"UTF-8\"?>")
|
|
<< getEOL()
|
|
<< _T("<xs:schema xmlns:xs=\"http://www.w3.org/2001/XMLSchema\" elementFormDefault=\"qualified\">")
|
|
<< getEOL();
|
|
|
|
xercesc::DOMElement *root = doc->getDocumentElement();
|
|
if ( root != NULL )
|
|
{
|
|
findAllElements ( *root );
|
|
generateData ( *root, 1 );
|
|
if ( mInlineSimpleType && mGrammarType == Grammar::SchemaGrammarType )
|
|
outputSchema ( *root );
|
|
}
|
|
|
|
if ( mGrammarType == Grammar::SchemaGrammarType )
|
|
mSchema << _T("</xs:schema>") << getEOL();
|
|
|
|
mSchema.Shrink();
|
|
|
|
return mSchema;
|
|
}
|
|
|
|
void XmlSchemaGenerator::findAllElements ( const DOMElement &element,
|
|
size_t nIndent /*= 0*/)
|
|
{
|
|
wxString tagName = WrapXerces::toString ( element.getTagName() );
|
|
mElements[tagName].nodes.insert ( &element );
|
|
|
|
DOMElement *child = WrapXerces::getFirstElementChild ( element );
|
|
while ( child )
|
|
{
|
|
findAllElements ( *child, nIndent );
|
|
child = WrapXerces::getNextElementSibling ( *child );
|
|
}
|
|
}
|
|
|
|
void XmlSchemaGenerator::generateData ( const DOMElement &element,
|
|
size_t nIndent /*= 0*/)
|
|
{
|
|
wxString name = WrapXerces::toString ( element.getTagName() );
|
|
if ( mElements[name].name.empty() )
|
|
{ // Only generate data once
|
|
generateData ( name, nIndent );
|
|
}
|
|
|
|
DOMElement *child = WrapXerces::getFirstElementChild ( element );
|
|
while ( child )
|
|
{
|
|
generateData ( *child, nIndent );
|
|
child = WrapXerces::getNextElementSibling ( *child );
|
|
}
|
|
}
|
|
|
|
void XmlSchemaGenerator::generateData ( const wxString &elementName,
|
|
size_t nIndent /*= 0*/)
|
|
{
|
|
ElmtData &data = mElements[elementName];
|
|
std::set<const DOMElement *>::iterator elmtItr;
|
|
|
|
data.name = elementName;
|
|
|
|
//Content
|
|
std::map<wxString, ChildData> &childMap = data.children;
|
|
std::map<wxString, ChildData>::iterator itr;
|
|
std::set<wxString> precedence;
|
|
elmtItr = data.nodes.begin();
|
|
for ( ; elmtItr != data.nodes.end(); ++elmtItr )
|
|
{
|
|
std::map<wxString, size_t> countMap;
|
|
DOMNode *child = ( **elmtItr ).getFirstChild();
|
|
for ( ; child != NULL; child = child->getNextSibling() )
|
|
{
|
|
DOMNode::NodeType type = child->getNodeType();
|
|
if ( type != DOMNode::ELEMENT_NODE )
|
|
{
|
|
if ( type == DOMNode::TEXT_NODE )
|
|
{ // Check for mixed content
|
|
wxString value = WrapXerces::toString ( child->getNodeValue() );
|
|
if ( !value.Trim().Trim ( false ).empty() )
|
|
data.mixed = true;
|
|
}
|
|
continue;
|
|
}
|
|
wxString name = WrapXerces::toString ( child->getNodeName() );
|
|
childMap[name].precedence.insert ( precedence.begin(), precedence.end() );
|
|
childMap[name].precedence.erase ( name ); // Don't depend on oneself
|
|
precedence.insert ( name );
|
|
countMap[name] += 1;
|
|
}
|
|
precedence.clear();
|
|
|
|
std::map<wxString, size_t>::iterator countItr = countMap.begin();
|
|
for ( ; countItr != countMap.end(); ++countItr )
|
|
{
|
|
if ( childMap[countItr->first].maxOccurs < countItr->second )
|
|
childMap[countItr->first].maxOccurs = countItr->second;
|
|
}
|
|
if ( childMap.size() == countMap.size() )
|
|
continue;
|
|
for ( itr = childMap.begin(); itr != childMap.end(); ++itr )
|
|
{
|
|
if ( countMap.find ( itr->first ) != countMap.end() )
|
|
continue;
|
|
itr->second.minOccurs = 0;
|
|
}
|
|
}
|
|
// Attribute
|
|
std::map<wxString, const XMLCh *> &attrMap = data.attrMap;
|
|
std::set<wxString> &optAttrs = data.optAttrs;
|
|
std::map<wxString, const XMLCh *>::iterator attrItr;
|
|
elmtItr = data.nodes.begin();
|
|
for ( ; elmtItr != data.nodes.end(); ++elmtItr )
|
|
{
|
|
DOMNamedNodeMap *attrs = ( **elmtItr ).getAttributes();
|
|
if ( attrs == NULL )
|
|
{
|
|
for ( attrItr = attrMap.begin(); attrItr != attrMap.end(); ++attrItr )
|
|
optAttrs.insert ( attrItr->first );
|
|
continue;
|
|
}
|
|
|
|
wxString name;
|
|
DOMAttr *attr;
|
|
size_t i = attrs->getLength();
|
|
while ( i-- > 0 )
|
|
{
|
|
attr = ( DOMAttr* ) attrs->item ( i );
|
|
name = WrapXerces::toString ( attr->getName() );
|
|
if ( attr->getPrefix() != NULL )
|
|
{
|
|
wxLogDebug ( _T("Ignore: %s"), name.c_str() );
|
|
continue;
|
|
}
|
|
if ( elmtItr != data.nodes.begin() ) // Not the first node
|
|
if ( attrMap.find ( name ) == attrMap.end() ) // Not in the map
|
|
optAttrs.insert ( name );
|
|
if ( attr->getSpecified() )
|
|
attrMap[name]; // Initialize attribute map
|
|
else
|
|
attrMap[name] = attr->getValue();
|
|
}
|
|
if ( attrMap.size() == optAttrs.size() )
|
|
continue;
|
|
for ( attrItr = attrMap.begin(); attrItr != attrMap.end(); ++attrItr )
|
|
{
|
|
if ( attrs->getNamedItem ( ( const XMLCh * )
|
|
WrapXerces::toString ( attrItr->first ).GetData() ) == NULL )
|
|
{
|
|
optAttrs.insert ( attrItr->first );
|
|
}
|
|
}
|
|
}
|
|
|
|
// Deal with sequence
|
|
wxLogDebug ( _T("%s:"), elementName.c_str() );
|
|
data.useSequence = getSequence ( data.sequence, childMap );
|
|
|
|
// Now we have the data of the element
|
|
if ( mGrammarType == Grammar::DTDGrammarType )
|
|
{
|
|
generateDTD ( data, nIndent );
|
|
mSchema << data.schema;
|
|
}
|
|
else if ( !mInlineSimpleType )
|
|
{ // Or wait until all data are available
|
|
generateSchema ( data, nIndent );
|
|
mSchema << data.schema;
|
|
}
|
|
}
|
|
|
|
void XmlSchemaGenerator::outputSchema ( const DOMElement &element )
|
|
{
|
|
wxString tagName = WrapXerces::toString ( element.getTagName() );
|
|
ElmtData &data = mElements[tagName];
|
|
if ( data.schema.empty() )
|
|
{
|
|
if ( mGrammarType == Grammar::SchemaGrammarType )
|
|
generateSchema ( data, 1 );
|
|
else
|
|
generateDTD ( data, 1 );
|
|
mSchema << data.schema;
|
|
}
|
|
|
|
DOMElement *child = WrapXerces::getFirstElementChild ( element );
|
|
while ( child )
|
|
{
|
|
outputSchema ( *child );
|
|
child = WrapXerces::getNextElementSibling ( *child );
|
|
}
|
|
}
|
|
|
|
void XmlSchemaGenerator::generateSchema ( ElmtData &data, size_t nIndent )
|
|
{
|
|
wxString &schema = data.schema;
|
|
|
|
if ( data.children.empty() && data.attrMap.empty() )
|
|
{
|
|
if ( !mInlineSimpleType )
|
|
{
|
|
addIndent ( schema, nIndent );
|
|
schema << _T("<xs:element name=\"") << data.name
|
|
<< _T("\" type=\"xs:string\"/>") << getEOL();
|
|
}
|
|
return;
|
|
}
|
|
|
|
schema.Alloc ( maxElementSchemaBuffer );
|
|
|
|
addIndent ( schema, nIndent++ );
|
|
schema << _T("<xs:element name=\"") << data.name << _T("\">") << getEOL();
|
|
addIndent ( schema, nIndent++ );
|
|
if ( data.mixed )
|
|
schema << _T("<xs:complexType mixed=\"true\">") << getEOL();
|
|
else
|
|
schema << _T("<xs:complexType>") << getEOL();
|
|
if ( !data.children.empty() )
|
|
{
|
|
size_t minOccurs = 1, maxOccurs = 1, minTotal = 0;
|
|
std::map<wxString, ChildData>::const_iterator itr;
|
|
for ( itr = data.children.begin(); itr != data.children.end(); ++itr )
|
|
{
|
|
if ( itr->second.minOccurs < minOccurs )
|
|
minOccurs = itr->second.minOccurs;
|
|
if ( itr->second.maxOccurs > maxOccurs )
|
|
maxOccurs = itr->second.maxOccurs;
|
|
minTotal += itr->second.minOccurs;
|
|
}
|
|
addIndent ( schema, nIndent++ );
|
|
if ( data.useSequence )
|
|
{
|
|
schema << _T("<xs:sequence");
|
|
}
|
|
else
|
|
{
|
|
schema << _T("<xs:choice maxOccurs=\"unbounded\"");
|
|
}
|
|
//if (minOccurs != 1) schema << _T(" minOccurs=\"") << minOccurs << _T("\"");
|
|
//if (maxOccurs > 1) schema << _T(" maxOccurs=\"unbounded\"");
|
|
if ( minTotal == 0 ) schema << _T(" minOccurs=\"0\"");
|
|
schema << _T(">") << getEOL();
|
|
|
|
std::vector<wxString>::const_iterator seqItr;
|
|
seqItr = data.sequence.begin();
|
|
for ( ; seqItr != data.sequence.end(); ++seqItr )
|
|
{
|
|
const ChildData &child = data.children[*seqItr];
|
|
addIndent ( schema, nIndent );
|
|
if ( mInlineSimpleType )
|
|
{ // Check if it's a simple type
|
|
const ElmtData *childElmt = &mElements[*seqItr];
|
|
if ( childElmt->children.empty()
|
|
&& childElmt->attrMap.empty() )
|
|
{
|
|
schema << _T("<xs:element name=\"") << *seqItr
|
|
<< _T("\" type=\"xs:string\"/>") << getEOL();
|
|
continue;
|
|
}
|
|
}
|
|
schema << _T("<xs:element ref=\"") << *seqItr << _T("\"");
|
|
if ( data.useSequence )
|
|
{
|
|
if ( child.minOccurs == 0 )
|
|
{
|
|
schema << _T(" minOccurs=\"0\"");
|
|
}
|
|
if ( child.maxOccurs > 1 )
|
|
{
|
|
schema << _T(" maxOccurs=\"unbounded\"");
|
|
}
|
|
}
|
|
schema << _T("/>") << getEOL();
|
|
}
|
|
|
|
addIndent ( schema, --nIndent );
|
|
if ( data.useSequence )
|
|
{
|
|
schema << _T("</xs:sequence>") << getEOL();
|
|
}
|
|
else
|
|
{
|
|
schema << _T("</xs:choice>") << getEOL();
|
|
}
|
|
} // Child elements
|
|
|
|
std::map<wxString, const XMLCh *>::const_iterator attrItr;
|
|
attrItr = data.attrMap.begin();
|
|
for ( ; attrItr != data.attrMap.end(); ++attrItr )
|
|
{
|
|
addIndent ( schema, nIndent );
|
|
schema << _T("<xs:attribute name=\"") << attrItr->first
|
|
<< _T("\" type=\"xs:string\"");
|
|
if ( attrItr->second != NULL )
|
|
{
|
|
schema << _T(" default=\"")
|
|
<< WrapXerces::toString ( attrItr->second ) << _T("\"");
|
|
}
|
|
else if ( data.optAttrs.find ( attrItr->first )
|
|
== data.optAttrs.end() )
|
|
{
|
|
schema << _T(" use=\"required\"");
|
|
}
|
|
schema << _T("/>") << getEOL();
|
|
}
|
|
|
|
addIndent ( schema, --nIndent );
|
|
schema << _T("</xs:complexType>") << getEOL();
|
|
addIndent ( schema, --nIndent );
|
|
schema << _T("</xs:element>") << getEOL();
|
|
|
|
schema.Shrink();
|
|
}
|
|
|
|
void XmlSchemaGenerator::generateDTD ( ElmtData &data, size_t WXUNUSED ( nIndent ) )
|
|
{
|
|
wxString &schema = data.schema;
|
|
schema.Alloc ( maxElementSchemaBuffer );
|
|
|
|
schema << _T("<!ELEMENT ") << data.name;
|
|
if (data.sequence.empty())
|
|
{
|
|
schema << _T(" (#PCDATA)");
|
|
}
|
|
else
|
|
{
|
|
const wxChar *separator = _T(" (");
|
|
std::vector<wxString>::const_iterator seqItr;
|
|
seqItr = data.sequence.begin();
|
|
if (data.useSequence)
|
|
{
|
|
for ( ; seqItr != data.sequence.end(); ++seqItr )
|
|
{
|
|
schema << separator << *seqItr;
|
|
separator = _T(", ");
|
|
const ChildData &child = data.children[*seqItr];
|
|
if ( child.minOccurs == 0 )
|
|
schema << ( child.maxOccurs > 1 ? _T("*") : _T("?") );
|
|
else if ( child.maxOccurs > 1 )
|
|
schema << _T("+");
|
|
}
|
|
schema << _T(")");
|
|
}
|
|
else
|
|
{
|
|
size_t minTotal = 0;
|
|
for ( ; seqItr != data.sequence.end(); ++seqItr )
|
|
{
|
|
schema << separator << *seqItr;
|
|
separator = _T(" | ");
|
|
minTotal += data.children[*seqItr].maxOccurs;
|
|
}
|
|
schema << ( minTotal > 0 ? _T(")+") : _T(")*") );
|
|
}
|
|
}
|
|
schema << _T(">") << getEOL();
|
|
|
|
if ( !data.attrMap.empty() )
|
|
{
|
|
const static wxString indent =
|
|
wxString ( getEOL() ) + _T(" ");
|
|
schema << _T("<!ATTLIST ") << data.name;
|
|
|
|
std::map<wxString, const XMLCh *>::const_iterator attrItr;
|
|
attrItr = data.attrMap.begin();
|
|
for ( ; attrItr != data.attrMap.end(); ++attrItr )
|
|
{
|
|
schema << indent << attrItr->first << _T(" CDATA");
|
|
if ( attrItr->second != NULL ) // Has default value
|
|
schema << _T(" \"") << WrapXerces::toString ( attrItr->second ) << _T("\"");
|
|
else if ( data.optAttrs.find ( attrItr->first ) == data.optAttrs.end() )
|
|
schema << _T(" #REQUIRED");
|
|
else
|
|
schema << _T(" #IMPLIED");
|
|
}
|
|
schema << _T(">") << getEOL();
|
|
}
|
|
schema.Shrink();
|
|
}
|
|
|
|
bool XmlSchemaGenerator::getSequence ( std::vector<wxString> &sequence,
|
|
const std::map<wxString, ChildData> &elmtMap )
|
|
{
|
|
bool deadlock = false;
|
|
|
|
sequence.clear();
|
|
|
|
std::vector<wxString>::iterator seqItr, seqFindItr;
|
|
std::set<wxString>::const_iterator prevItr, prevEnd;
|
|
std::map<wxString, ChildData>::const_iterator itr;
|
|
|
|
bool retry;
|
|
do
|
|
{
|
|
retry = false;
|
|
for ( itr = elmtMap.begin(); itr != elmtMap.end(); ++itr )
|
|
{
|
|
seqFindItr = std::find ( sequence.begin(), sequence.end(),
|
|
itr->first );
|
|
if ( seqFindItr != sequence.end() )
|
|
continue;
|
|
|
|
seqItr = sequence.begin();
|
|
prevItr = itr->second.precedence.begin();
|
|
prevEnd = itr->second.precedence.end();
|
|
for ( ; prevItr != prevEnd; ++prevItr )
|
|
{ // Find last index of dependent elements
|
|
seqFindItr = std::find ( sequence.begin(), sequence.end(),
|
|
*prevItr );
|
|
if ( seqFindItr != sequence.end() )
|
|
{
|
|
if ( seqItr < seqFindItr )
|
|
{
|
|
seqItr = seqFindItr;
|
|
}
|
|
continue;
|
|
}
|
|
const std::set<wxString> &previous =
|
|
elmtMap.find ( *prevItr )->second.precedence;
|
|
if ( previous.find ( itr->first ) == previous.end() )
|
|
{ // Not a deadlock
|
|
retry = true;
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
deadlock = true;
|
|
}
|
|
}
|
|
if ( prevItr != prevEnd )
|
|
continue; // The preceding doesn't exist
|
|
|
|
if ( seqItr != sequence.end() )
|
|
{
|
|
++seqItr;
|
|
}
|
|
sequence.insert ( seqItr, itr->first );
|
|
wxLogDebug ( _T(" %s"), itr->first.c_str() );
|
|
}
|
|
} while ( retry );
|
|
|
|
return !deadlock;
|
|
}
|