XQuery/XML Schema to Instance

Motivation

edit

In order to generate a sample XML instance from an XML Schema file (XSD), this is useful for example if you would like to dynamically generate blank XForms instances for creating new documents from a schema.


Method

edit

[Note: See also the article XQuery/XQuery and XML Schema which has the same objective. If the author would like to contact me, I'm trying to get this code working to compare with the code I developed. ChrisWallace (talk) 15:09, 13 May 2009 (UTC)ChrisWallace]

Create an xquery function that reads in a URI to an XML Schema file (.xsd), along with a set of display parameters, and generates a sample XML instance. These parameters are:

  1. $schemaURI = the location of the .xsd file (e.g. db/cms/schemas/MySchema.xsd)
  2. $rootElementName = the root element for the sample XML file you wish to generate (i.e. doesn't have to be the root of the whole schema)
  3. $maxOccurances = for elements with a maxOccurs attribute greater than one, how many times should the element be repeated in the sample instance?
  4. $optionalElements = Should optional elements (i.e. minOccurs="0") be included? 'true' or 'false'
  5. $optionalAttributes = Should optional attributes (i.e. use="optional") be included? 'true' or 'false'
  6. $choiceStrategy = Where there is a choice between elements or groups of elements, should the sample include a random selection from the choices, or simply use the first choice? 'random' or 'first'

Call the function with the following:

xquery version "1.0";
(:     Query which calls the function     :)

import module namespace content ="/db/cms/modules/content" at "/db/cms/modules/content.xqm";

return
content:xsd-to-instance('/db/cms/content_types/schemas/Genericode.xsd','CodeList','1','true','true','random')

Notes

edit

The function currently cannot dynamically set the namespaces in the sample instance. Any assistance in getting this to work would be much appreciated.

The function requires that the xsd file use the xs namespace prefix (i.e. xs:element). Attempts to use a wildcard prefix in the xpath statements did not work for some reason (i.e. $xsdFile/*:schema/*:element). An alternative approach is to determine the prefix, assign it to a variable and then concatenate it to all the xpath statements (e.g. $xsdFile/concat($prefix,':schema/',$prefix,'element') but that makes for some pretty ugly code. Another alternative is to use another function to reset whatever the xsd file prefix is to xs. This does work fine but adds a bit more code. Any more efficient alternative suggestions would be welcome.

The function uses two internal queries assigned to variables, $subElementsQuery and $attributesQuery, and then called using util:eval. This enables the recursive collection sub-elements and attributes without having to call an external function. These two queries could just has easily been declared as external functions.

XSD to Instance Function

edit
xquery version "1.0";
(:     Content Module     :)

module namespace content ="/db/cms/modules/content";
declare namespace request="http://exist-db.org/xquery/request";
declare namespace util="http://exist-db.org/xquery/util";

(: Function :)
declare function content:xsd-to-instance($schemaURI,$rootElementName,$maxOccurances,$optionalElements,$optionalAttributes,$choiceStrategy)
{
(: 
    TO DO: 
       - Handle substitution groups
       - Dynamically include namespaces
       - Handle any xsd file prefix (e.g. xs:element or xsd:element)
:)
(: Get the main xsd file :)
let $xsdFile := doc($schemaURI)
(: Determine the namespace prefix for the xsd file (e.g. xs, xsd or none) :)
let $xsdFileNamespacePrefix := substring-before(name($xsdFile/*[1]),':')
(: get the root element based on the root element name given in the function parameters :)
let $rootElement := $xsdFile//xs:element[@name = $rootElementName]
(: Gather the namespace prefixes and namespaces included in the xsd file :)
let $namespaces := let $prefixes := in-scope-prefixes($xsdFile/xs:schema)
                   return
                   <Namespaces>
                      {for $prefix in $prefixes
                       return
                       <Namespace prefix="{$prefix}" URI="{namespace-uri-for-prefix($prefix,$xsdFile/xs:schema)}"/>}
                   </Namespaces>
(: Determine the namespace prefix and namespace for the root element :)
let $rootElementNamespace := $xsdFile/xs:schema/@targetNamespace
let $rootElementNamespacePrefix := $namespaces/Namespace[@URI = $rootElementNamespace]/@prefix
(: If the root element is a complex type, locate the complex type (not sure why the [1] predicate is required) :)
let $rootElementType := substring-after($rootElement[1]/@type,':')
let $namespacePrefix := substring-before($rootElement[1]/@type,':')
let $schemaFromPrefixQuery := string("
                                      let $namespace := namespace-uri-for-prefix($namespacePrefix,$xsdFile/*[1])
                                      let $schemaLocation := if($namespace = $xsdFile/xs:schema/@targetNamespace or $namespace = '')
                                                             then $schemaURI
                                                             else $xsdFile//xs:import[@namespace = $namespace]/@schemaLocation
                                      let $schema := if($schemaLocation = $schemaURI)
                                                     then $xsdFile
                                                     else doc($schemaLocation)
                                      return
                                      $schema
                                    ")

let $rootElementTypeSchema := util:eval($schemaFromPrefixQuery)
let $complexType :=  if($rootElement/xs:complexType)
                     then $rootElement/xs:complexType
                     else if($namespacePrefix = 'xs' or $namespacePrefix = 'xsd')
                          then ()
                          else if($rootElementTypeSchema//xs:complexType[@name = $rootElementType])
                               then $rootElementTypeSchema//xs:complexType[@name = $rootElementType]
                               else()
(: Query to recursively drill down to find the appropriate elements.
   If the complex type is a choice, include only the first sub-element.
   If the complex type is a group, include the group sub-elements.
   If the complex type is an extension, include the base sub-elements :)
let $subElementsQuery := string("
                                for $xsElement in $complexType/*
                                return
                                if(name($xsElement)='xs:all')
                                then let $complexType := $complexType/xs:all
                                     return util:eval($subElementsQuery)
                                else if(name($xsElement)='xs:sequence')
                                     then let $complexType := $complexType/xs:sequence
                                          return util:eval($subElementsQuery)
                                     else if(name($xsElement)='xs:choice')
                                          then let $choice := if($choiceStrategy = 'random')
                                                              then let $choiceCount := count($xsElement/*)
                                                                   return $choiceCount - util:random($choiceCount)
                                                              else 1
                                               return
                                               if(name($xsElement/*[$choice])='xs:element')
                                               then let $subElementName := if($xsElement/*[$choice]/@name) 
                                                                           then data($xsElement/*[$choice]/@name)
                                                                           else data(substring-after($xsElement/*[$choice]/@ref,':'))
                                                    let $namespace := namespace-uri-for-prefix($namespacePrefix,$xsdFile/*[1])
                                                    let $schemaLocation := if($namespace = $xsdFile/xs:schema/@targetNamespace or $namespace = '')
                                                                           then $schemaURI
                                                                           else $xsdFile//xs:import[@namespace = $namespace]/@schemaLocation
                                                    let $minOccurs := $xsElement/*[$choice]/@minOccurs
                                                    let $maxOccurs := $xsElement/*[$choice]/@maxOccurs
                                                    return
                                                    <SubElement>
                                                       <Name>{$subElementName}</Name>
                                                       <NamespacePrefix>{$namespacePrefix}</NamespacePrefix>
                                                       <Namespace>{$namespace}</Namespace>
                                                       <SchemaLocation>{$schemaLocation}</SchemaLocation>
                                                       <MinOccurs>{$minOccurs}</MinOccurs>
                                                       <MaxOccurs>{$maxOccurs}</MaxOccurs>
                                                    </SubElement>
                                               else if(name($xsElement/*[$choice])='xs:group')
                                                    then let $groupName := substring-after($xsElement/*[$choice]/@ref,':')
                                                         let $namespacePrefix := substring-before($xsElement/*[$choice]/@ref,':')
                                                         let $groupSchema := util:eval($schemaFromPrefixQuery)
                                                         let $complexType := $groupSchema//xs:group[@name = $groupName]
                                                         return util:eval($subElementsQuery)
                                                    else let $complexType := $xsElement/*[$choice]
                                                         return util:eval($subElementsQuery)
                                          else if(name($xsElement)='xs:group')
                                               then let $groupName := substring-after($xsElement/@ref,':')
                                                    let $namespacePrefix := substring-before($xsElement/@ref,':')
                                                    let $groupSchema := util:eval($schemaFromPrefixQuery)
                                                    let $complexType := $groupSchema//xs:group[@name = $groupName]
                                                    return util:eval($subElementsQuery)
                                               else if(name($xsElement)='xs:complexContent')
                                                    then let $complexType := $complexType/xs:complexContent
                                                         return util:eval($subElementsQuery)
                                                    else if(name($xsElement)='xs:extension')
                                                         then let $extension := let $complexType := $complexType/xs:extension
                                                                                return util:eval($subElementsQuery)
                                                              let $base := let $baseName := substring-after($xsElement/@base,':')
                                                                           let $namespacePrefix := substring-before($xsElement/@base,':')
                                                                           let $baseSchema := util:eval($schemaFromPrefixQuery)
                                                                           let $complexType := $baseSchema//xs:complexType[@name = $baseName]
                                                                           return util:eval($subElementsQuery)
                                                              return $base union $extension 
                                                         else if(name($xsElement)='xs:element')
                                                              then let $subElementName := if($xsElement/@name) 
                                                                                          then data($xsElement/@name)
                                                                                          else data(substring-after($xsElement/@ref,':'))
                                                                   let $namespace := namespace-uri-for-prefix($namespacePrefix,$xsdFile/*[1])
                                                                   let $schemaLocation := if($namespace = $xsdFile/xs:schema/@targetNamespace or $namespace = '')
                                                                                          then $schemaURI
                                                                                          else $xsdFile//xs:import[@namespace = $namespace]/@schemaLocation
                                                                   let $minOccurs := $xsElement/@minOccurs
                                                                   let $maxOccurs := $xsElement/@maxOccurs
                                                                   return
                                                                   <SubElement>
                                                                        <Name>{$subElementName}</Name>
                                                                        <NamespacePrefix>{$namespacePrefix}</NamespacePrefix>
                                                                        <Namespace>{$namespace}</Namespace>
                                                                        <SchemaLocation>{$schemaLocation}</SchemaLocation>
                                                                        <MinOccurs>{$minOccurs}</MinOccurs>
                                                                        <MaxOccurs>{$maxOccurs}</MaxOccurs>
                                                                   </SubElement>
                                                              else()
                                ")
(: Employ the sub-elements query to gather the sub-elements :)
let $subElements := util:eval($subElementsQuery)
(: Query to recursively drill down to find the appropriate attributes :)
let $attributesQuery := string("
                                for $xsElement in $complexType/*
                                return
                                if(name($xsElement)='xs:attributeGroup')
                                then let $attributeGroupName := substring-after($xsElement/@ref,':')
                                     let $namespacePrefix := substring-before($xsElement/@ref,':')
                                     let $attributeGroupSchema := util:eval($schemaFromPrefixQuery)
                                     let $complexType := $attributeGroupSchema//xs:attributeGroup[@name = $attributeGroupName]
                                     return util:eval($attributesQuery)
                                else if(name($xsElement)='xs:complexContent')
                                then let $complexType := $complexType/xs:complexContent
                                     return util:eval($attributesQuery)
                                else if(name($xsElement)='xs:extension')
                                then let $extension := let $complexType := $complexType/xs:extension
                                                       return util:eval($attributesQuery)
                                     let $base := let $baseName := substring-after($xsElement/@base,':')
                                                  let $namespacePrefix := substring-before($xsElement/@base,':')
                                                  let $baseSchema := util:eval($schemaFromPrefixQuery)
                                                  let $complexType := $baseSchema//xs:complexType[@name = $baseName]
                                                  return util:eval($attributesQuery)
                                     return $base union $extension 
                                else if(name($xsElement)='xs:attribute')
                                then $xsElement
                                else()
                                ")
(: Employ the attributes query to gather the attributes :)
let $attributes := util:eval($attributesQuery)

return

(: Create the root element :)

element{if($rootElementNamespacePrefix) 
        then concat($rootElementNamespacePrefix,':',$rootElementName) 
        else $rootElementName
        }
        {
        (: for the time being, namespace attributes must be hard coded :)
        namespace gc {'http://www.test.com'}
        (: The following should dynamically insert namespace attributes with prefixes but does not work. 
           It would be great id someone could help figure this out. 
        for $namespace in $namespaces
        return
        namespace {$namespace/Namespace/@prefix} {$namespace/Namespace/@URI},
        :)
        
        ,(: Comma is important, separates the namespaces section from the attribute section in the element constructor :)

        (: Create the element's attributes if any :)
        for $attribute in $attributes
        let $attributeName := if($attribute/@name) 
                              then data($attribute/@name) 
                              else data($attribute/@ref)
        return
        (: Make sure there is an attribute before calling the attribute constructor :)
        if($attributeName)
        then if($attribute/@use = 'optional')
             then if($optionalAttributes eq 'true')
                  then attribute{$attributeName}
                      (: Insert default attribute value if any :)
                      {if($attribute/@default) then data($attribute/@default) else if($attribute/@fixed) then data($attribute/@fixed) else ()}
                  else()
             else if($attribute/@use = 'prohibited')
                  then ()
                  else attribute{$attributeName}
                      (: Insert default attribute value if any :)
                      {if($attribute/@default) then data($attribute/@default) else if($attribute/@fixed) then data($attribute/@fixed) else ()}
        else()
    
        ,(: Comma separates the attribute section from the element content section in the element constructor :)

        (: Insert default element value if any :)
        if($rootElement/@default) 
        then data($rootElement/@default) 
        else if($rootElement/@fixed) 
             then data($rootElement/@fixed)
             else

            (: Recursively create any sub-elements :)
            for $subElement in $subElements
            let $subElementName := $subElement/Name
            let $namespacePrefix := $subElement/NamespacePrefix
            let $schemaURI := $subElement/SchemaLocation

            (: Set the number of element occurances based on the minOccurances and maxOccurances values if any :)
            let $occurances := if(xs:integer($subElement/@minOccurs) gt 0 and xs:integer($subElement/@minOccurs) gt xs:integer($maxOccurances)) 
                               then xs:integer($subElement/@minOccurs)
                               else if(xs:integer($subElement/@minOccurs) eq 0 and $optionalElements eq 'false')
                                    then 0
                                    else if($subElement/@maxOccurs eq 'unbounded')
                                         then if($maxOccurances) 
                                              then xs:integer($maxOccurances) 
                                              else 2
                                         else if(xs:integer($subElement/@maxOccurs) gt 1)
                                              then if(xs:integer($maxOccurances) lt xs:integer($subElement/@maxOccurs))
                                                   then xs:integer($maxOccurances)
                                                   else xs:integer($subElement/@maxOccurs)
                               else 1
            return
            for $i in (1 to $occurances)
                return
                content:xsd-to-instance($schemaURI,$subElementName,$maxOccurances,$optionalElements,$optionalAttributes,$choiceStrategy)
}
};