Enabled XML Schema-based insertion helpers and validation-as-you-type.

This commit is contained in:
Gerald Schmidt 2008-01-06 22:14:06 +00:00
parent 55d20854af
commit 4c4536cdb4
4 changed files with 447 additions and 283 deletions

View File

@ -29,6 +29,20 @@
#include "catalogresolver.h" #include "catalogresolver.h"
#include "xmlschemaparser.h" #include "xmlschemaparser.h"
// Xerces-C req'd for Schema parsing
#define XERCES_TMPLSINC
#include <xercesc/util/NameIdPool.hpp>
#include <xercesc/util/PlatformUtils.hpp>
#include <xercesc/framework/XMLValidator.hpp>
#include <xercesc/parsers/SAXParser.hpp>
#include <xercesc/parsers/XercesDOMParser.hpp>
#include <xercesc/validators/schema/SchemaValidator.hpp>
#include <xercesc/validators/common/ContentSpecNode.hpp>
#include <xercesc/validators/schema/SchemaSymbols.hpp>
using namespace xercesc;
XmlPromptGenerator::XmlPromptGenerator ( XmlPromptGenerator::XmlPromptGenerator (
const std::string& catalogPath, const std::string& catalogPath,
const std::string& basePath, const std::string& basePath,
@ -39,8 +53,9 @@ XmlPromptGenerator::XmlPromptGenerator (
d->catalogPath = catalogPath; d->catalogPath = catalogPath;
d->auxPath = auxPath; d->auxPath = auxPath;
d->elementDeclRecurseLevel = 0; d->elementDeclRecurseLevel = 0;
d->rootElement = true; d->isRootElement = true;
d->dtdFound = false; d->grammarFound = false;
d->attributeValueCutoff = 12; // this prevents enums being stored in their thousands
XML_SetParamEntityParsing ( p, XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE ); XML_SetParamEntityParsing ( p, XML_PARAM_ENTITY_PARSING_UNLESS_STANDALONE );
XML_SetElementHandler ( p, starthandler, endhandler ); XML_SetElementHandler ( p, starthandler, endhandler );
XML_SetDoctypeDeclHandler ( p, doctypedeclstarthandler, doctypedeclendhandler ); XML_SetDoctypeDeclHandler ( p, doctypedeclstarthandler, doctypedeclendhandler );
@ -66,6 +81,19 @@ void XMLCALL XmlPromptGenerator::starthandler (
PromptGeneratorData *d; PromptGeneratorData *d;
d = ( PromptGeneratorData * ) data; d = ( PromptGeneratorData * ) data;
if (d->isRootElement)
{
d->rootElement = el;
handleSchema ( d, el, attr );
d->isRootElement = false;
if ( ! (d->elementMap.empty() ) )//if ( d->elementMap.size() == 1) // must be 1 for success
{
d->grammarFound = true;
XML_StopParser ( d->p, false );
return;
}
}
d->push ( el ); d->push ( el );
std::string parent, element; std::string parent, element;
@ -97,25 +125,9 @@ void XMLCALL XmlPromptGenerator::starthandler (
attributeName = *attr; attributeName = *attr;
attributeValue = * ( attr + 1 ); attributeValue = * ( attr + 1 );
if (d->attributeMap[element][attributeName].size() < d->attributeValueCutoff)
d->attributeMap[element][attributeName].insert ( attributeValue ); d->attributeMap[element][attributeName].insert ( attributeValue );
/*
// TBD: may not be xsi: check for http://www.w3.org/2001/XMLSchema-instance
while (d->rootElement && strstr((const char *)attr, "xsi:noNamespaceSchemaLocation"))
{
std::string schemaPath = PathResolver::run(attributeValue, d->auxPath);
std::string buffer;
if (!ReadFile::run(schemaPath, buffer))
break;
XmlSchemaParser xsp(d, true);
if (!xsp.parse(buffer))
break;
XML_StopParser(d->p, false);
return;
} }
*/
}
d->rootElement = false;
} }
void XMLCALL XmlPromptGenerator::endhandler ( void *data, const XML_Char *el ) void XMLCALL XmlPromptGenerator::endhandler ( void *data, const XML_Char *el )
@ -125,9 +137,9 @@ void XMLCALL XmlPromptGenerator::endhandler ( void *data, const XML_Char *el )
d->pop(); d->pop();
} }
bool XmlPromptGenerator::getDtdFound() bool XmlPromptGenerator::getGrammarFound()
{ {
return d->dtdFound; return d->grammarFound;
} }
void XmlPromptGenerator::getAttributeMap ( void XmlPromptGenerator::getAttributeMap (
@ -174,8 +186,8 @@ void XMLCALL XmlPromptGenerator::doctypedeclendhandler ( void *data )
d = ( PromptGeneratorData * ) data; d = ( PromptGeneratorData * ) data;
if ( !d->elementMap.empty() ) if ( !d->elementMap.empty() )
{ {
d->dtdFound = true; d->grammarFound = true;
XML_StopParser ( d->p, false ); XML_StopParser ( d->p, false ); // experimental
} }
} }
@ -287,12 +299,16 @@ int XMLCALL XmlPromptGenerator::externalentityrefhandler (
stdPublicId = publicId; stdPublicId = publicId;
std::string stdSystemId = CatalogResolver::lookupPublicId ( stdPublicId, d->catalogPath ); std::string stdSystemId = CatalogResolver::lookupPublicId ( stdPublicId, d->catalogPath );
if ( stdSystemId.empty() && systemId )
stdSystemId = systemId;
Replace::run ( stdSystemId, "file:///", "", false ); if ( !stdSystemId.empty() )
{
Replace::run ( stdSystemId, "file://", "", false );
Replace::run ( stdSystemId, "%20", " ", false ); Replace::run ( stdSystemId, "%20", " ", false );
}
else
{
if (systemId )
stdSystemId = systemId;
if ( base ) if ( base )
{ {
std::string test = PathResolver::run ( stdSystemId, base ); std::string test = PathResolver::run ( stdSystemId, base );
@ -301,6 +317,7 @@ int XMLCALL XmlPromptGenerator::externalentityrefhandler (
stdSystemId = test; stdSystemId = test;
} }
} }
}
if ( !stdSystemId.empty() ) if ( !stdSystemId.empty() )
{ {
@ -308,7 +325,7 @@ int XMLCALL XmlPromptGenerator::externalentityrefhandler (
} }
std::string encoding = XmlEncodingHandler::get ( buffer ); std::string encoding = XmlEncodingHandler::get ( buffer );
XML_Parser dtdParser = XML_ExternalEntityParserCreate ( d->p, context, encoding.c_str() );//"UTF-8"); XML_Parser dtdParser = XML_ExternalEntityParserCreate ( d->p, context, encoding.c_str() );
if ( !dtdParser ) if ( !dtdParser )
return false; return false;
@ -345,3 +362,145 @@ void XMLCALL XmlPromptGenerator::entitydeclhandler (
d->entitySet.insert ( entityName ); d->entitySet.insert ( entityName );
} }
} }
void XmlPromptGenerator::handleSchema (
PromptGeneratorData *d,
const XML_Char *el,
const XML_Char **attr )
{
// first check for XML Schema association
XML_Char **schemaAttr = ( XML_Char ** ) attr; // now redundant; could use attr
std::string path;
for ( ; d->isRootElement && *schemaAttr; schemaAttr += 2 )
{
// no namespace
if ( !strcmp ( ( const char * ) *schemaAttr, "xsi:noNamespaceSchemaLocation" ) )
{
path = ( const char * ) * ( schemaAttr + 1 );
break;
}
// with namespace -- check if this works
else if ( !strcmp ( ( const char * ) *schemaAttr, "xsi:schemaLocation" ) )
{
char *searchIterator;
for ( searchIterator = ( char * ) * ( schemaAttr + 1 ); *searchIterator && *searchIterator != ' ' && *searchIterator != '\t' && *searchIterator != '\n'; searchIterator++ )
;
if ( *searchIterator )
{
path = ( const char * ) ( searchIterator + 1 );
break;
}
}
}
if ( path.empty() )
{
return;
}
std::string schemaPath = PathResolver::run ( path, d->auxPath );
try
{
XMLPlatformUtils::Initialize();
}
catch ( const XMLException& toCatch )
{
XMLPlatformUtils::Terminate();
return;
}
XercesDOMParser *parser = new XercesDOMParser();
parser->setDoNamespaces ( true );
parser->setDoSchema ( true );
parser->setValidationSchemaFullChecking ( true );
Grammar *rootGrammar = parser->loadGrammar ( schemaPath.c_str(), Grammar::SchemaGrammarType );
if ( !rootGrammar )
{
delete parser;
return;
}
SchemaGrammar* grammar = ( SchemaGrammar* ) rootGrammar;
RefHash3KeysIdPoolEnumerator<SchemaElementDecl> elemEnum = grammar->getElemEnumerator();
if ( !elemEnum.hasMoreElements() )
{
delete grammar;
delete parser;
return;
}
while ( elemEnum.hasMoreElements() )
{
const SchemaElementDecl& curElem = elemEnum.nextElement();
std::string element;
std::set<std::string> children;
const QName *qnm = curElem.getElementName();
if ( qnm )
{
element = XMLString::transcode ( qnm->getRawName() ); // this includes any prefix:localname combinations
}
if ( element.empty() )
continue;
const XMLCh* fmtCntModel = curElem.getFormattedContentModel();
if ( fmtCntModel != NULL ) // tbd: this does not yet pick up prefix:localname combinations
{
size_t len;
char *s, *word;
s = ( char * ) XMLString::transcode ( fmtCntModel );
while ( ( word = GetWord::run ( &s, &len ) ) != NULL )
{
std::string currentValue ( word, len );
if ( currentValue.size() )
children.insert ( currentValue );
}
}
if ( !children.empty() )
d->elementMap.insert ( make_pair ( element, children ) );
// fetch attributes
if ( curElem.hasAttDefs() && ! ( curElem.getAttDefList().isEmpty() ) )
{
std::map<std::string, std::set<std::string> > attributeMap;
XMLAttDefList& attIter = curElem.getAttDefList();
for ( unsigned int i = 0; i < attIter.getAttDefCount(); i++ )
{
std::string attribute, attributeValue;
std::set<std::string> attributeValueSet;
XMLAttDef& attr = attIter.getAttDef ( i );
XMLAttDef::DefAttTypes ty = attr.getDefaultType();
if ( ty == XMLAttDef::Prohibited )
continue;
SchemaAttDef *pAttr = ( SchemaAttDef * ) &attr;
const QName *qnm = pAttr->getAttName();
if ( qnm )
{
attribute = XMLString::transcode ( qnm->getRawName() );
}
if ( attribute.empty() )
continue;
// Value
if ( pAttr->getValue() )
{
attributeValue = XMLString::transcode ( pAttr->getValue() );
attributeValueSet.insert ( attributeValue );
attributeMap.insert ( make_pair ( attribute, attributeValueSet ) );
}
}
if ( !attributeMap.empty() )
d->attributeMap.insert( make_pair ( element, attributeMap ) );
}
}
}

View File

@ -33,9 +33,10 @@ struct PromptGeneratorData : public ParserData
std::map<std::string, std::set<std::string> > elementMap; std::map<std::string, std::set<std::string> > elementMap;
std::map<std::string, std::set<std::string> > requiredAttributeMap; std::map<std::string, std::set<std::string> > requiredAttributeMap;
std::set<std::string> entitySet; std::set<std::string> entitySet;
std::string catalogPath, auxPath; std::string catalogPath, auxPath, rootElement;
int elementDeclRecurseLevel; int elementDeclRecurseLevel;
bool rootElement, dtdFound; bool isRootElement, grammarFound;
unsigned attributeValueCutoff;
XML_Parser p; XML_Parser p;
}; };
@ -56,7 +57,7 @@ public:
std::map<std::string, std::set<std::string> > &elementMap ); std::map<std::string, std::set<std::string> > &elementMap );
void getEntitySet ( void getEntitySet (
std::set<std::string> &entitySet ); std::set<std::string> &entitySet );
bool getDtdFound(); bool getGrammarFound();
private: private:
std::auto_ptr<PromptGeneratorData> d; std::auto_ptr<PromptGeneratorData> d;
static void XMLCALL starthandler ( static void XMLCALL starthandler (
@ -100,6 +101,10 @@ private:
const XML_Char *systemId, const XML_Char *systemId,
const XML_Char *publicId, const XML_Char *publicId,
const XML_Char *notationName ); const XML_Char *notationName );
static void handleSchema (
PromptGeneratorData *d,
const XML_Char *el,
const XML_Char **attr );
}; };
#endif #endif