XQuery/XQuery and XML Schema

Motivation

edit

In learning any modelling language, it is helpful to see sample instances of any formal model. This is equally true when developing an XML Schema. XML Development tools like Oxygen and XML-Spy have tools to generate a random instance of a supplied XML schema, but it is useful to have a web service to do this. The service can also be used for test data generation.

The following is a test XML Schema file:

<xs:group name="TechnicalOrderVersionChoiceGroup">
  <xs:choice>
    <xs:element name="ettcsMigrationTov" type="ord:ETTCSMigrationTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcsNewSiteBuildTov" type="ord:ETTCSNewSiteBuildTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcsNewCabinetTov" type="ord:ETTCSNewCabinetTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcsSimpleTov" type="ord:ETTCSSimpleTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="lteNewCabinetTov" type="ord:LTENewCabinetTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="gsmoeTov" type="ord:GSMoETechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcsUpspeedTov" type="ord:ETTCSUpspeedTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcsRehomeTov" type="ord:ETTCSRehomeTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcsRebalanceTov" type="ord:ETTCSRebalanceTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="retTechnicalOrderVersion" type="ord:RETTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcsNSBNGUNIEVCUplinkTechnicalOrderVersion" type="ord:ETTCSNSBNGUNIEVCUplinkTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcsNSBNGUNIEVCAdditionalNode" type="ord:ETTCSNSBNGUNIEVCAdditionalNodeTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="eireTov" type="ord:EIRETechnicalOrderVersionType" minOccurs="0" />
    <!-- Granite Initiated Order Types -->
    <xs:element name="addEttcsMtsoCollectorCircuitTechnicalOrderVersion" type="ord:AddEttcsMtsoCollectorCircuitTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="r2BToR4BConversionTechnicalOrderVersion" type="ord:R2BToR4BConversionTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="eoSONETMtsoCollectorCircuitTechnicalOrderVersion" type="ord:EoSONETMtsoCollectorCircuitTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="disconnectEthernetEvcSegmentTechnicalOrderVersion" type="ord:DisconnectEthernetEvcSegmentTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcOc3MtsoR4DTechnicalOrderVersion" type="ord:EttcOc3MtsoR4DTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="gSMoEMtsoTechnicalOrderVersion" type="ord:GSMoEMtsoTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="disconnectLnsOewBaseSegmentTechnicalOrderVersionType" type="ord:DisconnectLnsOewBaseSegmentTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="disconnectLnsOewBaselinePathTechnicalOrderVersionType" type="ord:DisconnectLnsOewBaselinePathTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="discoEthernetSonetRehomeTov" type="ord:DiscoEthernetOverSonetForRehomeTechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="ethernetSonetRehomeMtsoTov" type="ord:EthernetOverSonetRehomeMtsoTechnicalOrderVersionType" minOccurs="0" />
    <!-- Iteration 17 - Two new Granite Initiated Order Types - for Aug2013 -->
    <xs:element name="gEttcsNSBNextGenUniEvcDualUplinkTov" type="ord:EttcsNSBNextGenUNIEVCDualUplinkTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="gEttcsNSBNextGenUniEvcMultiCabTov" type="ord:EttcsNSBNextGenUNIEVCMultiCabTechnicalOrderVersionType" minOccurs="0" />
    <!-- R12.1 INDOOR SMALL CELL EAMIS -->
    <xs:element name="smallCellEamis" type="ord:IndoorSmallCellEamisTechnicalOrderVersionType" minOccurs="0" />
    <!-- R13.1 Rapid Deployment-->
    <xs:element name="eoSonetOc12MtsoCollectorCircuitTov" type="ord:EoSonetOc12MtsoCollectorCircuitTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="indoorSmallCellBdslTov" type="ord:IndoorSmallCellBdslTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="indoorSmallCell3RdPartyTov" type="ord:IndoorSmallCell3RdPartyTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="indoorSmallCellSharedTransportTov" type="ord:IndoorSmallCellSharedTransportTechnicalOrderVersionType" minOccurs="0" />
    <!-- MLPPP WO Types -->
    <xs:element name="mlpppMigrationUMTS" type="ord:MLPPPMigrationUMTSTechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppMigrationDS1UMTS" type="ord:MLPPPMigrationDS1UMTSTechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppNewCabinetAddLTE" type="ord:MLPPPNewCabinetAddLTETechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppNewCabinetAddDS1LTE" type="ord:MLPPPNewCabinetAddDS1LTETechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppNewCabinetAddUMTS" type="ord:MLPPPNewCabinetAddUMTSTechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppNewCabinetAddDS1UMTS" type="ord:MLPPPNewCabinetAddDS1UMTSTechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppUpspeed" type="ord:MLPPPUpspeedTechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppUpspeedDS1" type="ord:MLPPPUpspeedDS1TechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppRehome" type="ord:MLPPPRehomeTechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppRehomeDS1" type="ord:MLPPPRehomeDS1TechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppRebalance" type="ord:MLPPPRebalanceTechnicalOrderVersionType" minOccurs="0" />    
    <xs:element name="mlpppRebalanceDS1" type="ord:MLPPPRebalanceDS1TechnicalOrderVersionType" minOccurs="0" />    
    <!-- ESCORE Initiated SmallCell WO Type - since R14.0 -->
    <xs:element name="smallcellTransport" type="ord:SmallCellTransportTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="smallcellSCR" type="ord:SmallCellSCRTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="smallcellSCS" type="ord:SmallCellSCSTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="smallcellAP" type="ord:SmallCellAPTechnicalOrderVersionType" minOccurs="0" /> <!-- Self install and Disconnect will use this same AP payload -->
    <!-- Since R1406 - Two new ETTCS automation IPv6 Types -->
    <xs:element name="ettcsIPv6Siad" type="ord:ETTCSIPv6MigrationSIADTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcsIPv6ENodeB" type="ord:ETTCSIPv6MigrationENodeBTechnicalOrderVersionType" minOccurs="0" />
    <!-- Since R1404 - RD Types -->
    <xs:element name="ettcsUniversalEvcDownspeed" type="ord:EttcsUniversalEvcDownspeedTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="outdoorSmallCell3rdPartytTov" type="ord:OutdoorSmallCell3rdPartyTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="outdoorSmallCellBdslTov" type="ord:OutdoorSmallCellBdslTechnicalOrderVersionType" minOccurs="0" />
    <!-- Since R1406 - ETTCS Site Migration -->
    <xs:element name="ettcsSiteMigrBackhaul" type="ord:ETTCSSiteMigrationBackhaulTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="ettcsSiteMigrShorthaul" type="ord:ETTCSSiteMigrationShorthaulTechnicalOrderVersionType" minOccurs="0" />
    <!-- Since R1410 - IUB Trunking Support -->
    <xs:element name="iubTrunkingTov" type="ord:IUBTrunkingTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="infrastructureTov" type="ord:InfrastructureTechnicalOrderVersionType" minOccurs="0" />
    <xs:element name="EOMWTargetTov" type="ord:ETTCSNSBNGUplinkEoMWTargetTechnicalOrderVersionType" minOccurs="0" />
  </xs:choice>
</xs:group>

XQuery to convert Simple XML Schema into an Instance Document

edit

We can use a recursive function to convert this XML Schema into an instance document. Here is an implementation of this function which covers some of the key XML Schema constructs:

declare namespace xs = "http://www.w3.org/2001/XMLSchema";
declare function local:process-element($element as node())  {
     typeswitch ($element)
        case  element(xs:complexType)   return 
              for $sub-element in  $element/*
                 return  local:process-element($sub-element)

        case element(xs:annotation) return
                ()
        case element(xs:element)   return
               let $min := ($element/@minOccurs,1)[1]
               let $max := 
                     if ($element/@maxOccurs = "unbounded") 
                     then 10
                     else ($element/@maxOccurs,1)[1]
               let $count := $min +  round(math:random() * ($max - $min))
               return 
                  if ($count  >0 )
                  then 
                      for $i in 1 to $count
                      return
                         if ($element/@ref)  
                         then 
                              local:process-element(root($element)/xs:schema/xs:element[@name=$element/@ref])
                         else 
                         if (root($element)/xs:schema/xs:complexType[@name=$element/@type]) 
                         then 
                             element {$element/@name} {                           
                                   local:process-element(root($element)/xs:schema/xs:complexType[@name=$element/@type])
                             }
                         else 
                         element {$element/@name} {                      
                             if  ($element/*)
                             then for $sub-element in $element/*
                                     return local:process-element($sub-element)
                             else  local:process-type(($element/@type,"xs:string")[1])  
                          
                      }
                    else ()
           case element(xs:attribute)   return
                 if ($element/@use="required"  or math:random() > 0.5)
                 then 
                     attribute {$element/@name} {
                         local:process-type($element/@type) 
                      }
                  else ()

           case  element(xs:sequence)   return
                 for $sub-element in $element/*
                       return local:process-element($sub-element)

           case  element(xs:all)   return
                 for $sub-element  in $element/*
                 order by math:random()
                 return local:process-element($sub-element)
                         
           case element(xs:choice)  return
                     let $count := count($element/*)
                    let $i := ceiling(math:random()*$count)
                    let $sub-element := $element/*[ $i]
                    return local:process-element($sub-element)
          default return
                   ()
};

This function uses the element construct to create a new XML instance tree based on the information in the XML Schema file and the typeswitch construct to select the appropriate processing of a given element type.

Type Instance Generation

edit

Some helper functions are required to generate random instances of (some of ) the standard types.


declare function local:pad($string as xs:string, $char as xs:string, $max as xs:integer) as xs:string {
   concat(string-pad($char,$max - string-length($string)), $string)
};

declare function local:pad2($string) {
   local:pad($string,"0",2)
};

declare function local:process-type($name )   {
(: just simple types - xs namespace :)
   if ($name = ("xs:string","xs:NMTOKEN","xs:NCName"))
   then  string-join(
              for $c  in  1 to  math:random() * 12
              return ("A","B","C","D")[ceiling(math:random() * 4)]
             , "")
   else if ($name="xs:integer")
   then  round(math:random()*100)
   else if ($name="xs:decimal")
   then math:random() * 100 - 50   
   else  if ($name="xs:boolean")
   then ("true","false")[ ceiling( math:random() *2)]
   else if ($name="xs:date")
   then string-join((1990 + round(math:random() * 20), 
                    local:pad2(ceiling(math:random()*12)) , 
                    local:pad2(ceiling(math:random()*30))),"-")
   else if ($name="xs:time")
   then string-join((local:pad2(floor(math:random() * 24)), 
                     local:pad2(floor(math:random()*60)) , 
                     local:pad2(floor(math:random()*60))),":")
   else if ($name="xs:dateTime")
   then concat (
          string-join((1990 + round(math:random() * 20), 
                       local:pad2(ceiling(math:random()*12)) , 
                       local:pad2(ceiling(math:random()*30))),"-"),
           "T",
           string-join((local:pad2(floor(math:random() * 24)),
                        local:pad2(floor(math:random()*60)) , 
                        local:pad2(floor(math:random()*60))),":")
           )
   else ()
};


Script

edit

Here is a basic script which applies this function to the example schema. By default, the root of the generated instance is the first element in the schema or a named element if a root parameter is supplied.

let $file := request:get-parameter("file",())
let $root := request:get-parameter("root",())
let $schema :=  doc($file)
return 
  if ($root)
  then  local:process-element($schema/xs:schema/xs:element[@name=$root])  
  else  local:process-element($schema/xs:schema/xs:element[1])

Execute

Sample Output

edit
<Root>
    <Start>C</Start>
    <Any>
        <AnyA>BADADAD</AnyA>
        <AnyC>BA</AnyC>
        <AnyB>D</AnyB>
    </Any> 
    <Multiple>
        <Choice>
            <Third AttributeDecimal="8.937041402178778" AttributeTime="20:38:04" AttributeDateTime="1995-08-06T21:08:43"/>
        </Choice>
        <Unbounded>BDAACAAAA</Unbounded>
        <Ref>

            <Ref1>52</Ref1>
            <Ref2>1992-09-02</Ref2>
        </Ref>
        <WithRefType>
            <RefTypeA>BBBDD</RefTypeA>
            <RefTypeB>DAADCABDC</RefTypeB>
        </WithRefType>

    </Multiple>
    <Multiple>
        <Choice>
            <Second AttributeString="ADAB" AttributeDate="2005-12-16"/>
        </Choice>
        <Unbounded/>
        <Unbounded>CABBB</Unbounded>
        <Unbounded/>

        <Unbounded>B</Unbounded>
        <Unbounded>AACBABAABCB</Unbounded>
        <Ref>
            <Ref1>56</Ref1>
            <Ref2>1992-05-22</Ref2>
        </Ref>
        <WithRefType>

            <RefTypeA>CDAA</RefTypeA>
            <RefTypeB>CBDBDBDB</RefTypeB>
        </WithRefType>
        <Optional>CC</Optional>
    </Multiple>
    <Multiple>
        <Choice>

            <First AttributeInt="21" AttributeString="AABC" AttributeBoolean="true"/>
        </Choice>
        <Unbounded>ADADDDDDB</Unbounded>
        <Unbounded>DCDCBDDD</Unbounded>
        <Unbounded>CBAD</Unbounded>
        <Unbounded>DAAAD</Unbounded>
        <Ref>

            <Ref1>13</Ref1>
            <Ref2>1997-04-30</Ref2>
        </Ref>
        <WithRefType>
            <RefTypeA>CB</RefTypeA>
            <RefTypeB/>
        </WithRefType>

    </Multiple>
    <Multiple>
        <Choice>
            <Second AttributeString="DBDA" AttributeDate="1999-05-16"/>
        </Choice>
        <Unbounded>CCDDDDCD</Unbounded>
        <Unbounded>DDABACC</Unbounded>
        <Ref>

            <Ref1>44</Ref1>
            <Ref1>30</Ref1>
            <Ref1>57</Ref1>
            <Ref2>1997-01-26</Ref2>
        </Ref>
        <WithRefType>
            <RefTypeA>CBC</RefTypeA>

            <RefTypeB>CA</RefTypeB>
        </WithRefType>
    </Multiple>
    <End>95</End>
</Root>

To do

edit
  • full set of xml types
  • mixed
  • restriction (only enumeration so far)
  • Group
  • AttributeGroup
  • problem with missing attributes in a complexType
  • hinting for distributions
  • namespaces
  • randomisation configuration

References

edit