Last modified on 29 August 2014, at 11:21

XQuery/XQuery and XML Schema

MotivationEdit

In learning any modelling language, it is helpful to see sample instances of any formal model. This is equally true when developing an XML Schema. XML Development tools like Oxygen and XML-Spy have tools to generate a random instance of a supplied XML schema, but it is useful to have a web service to do this. The service can also be used for test data generation.

The following is a test XML Schema file:

<xs:group name="TechnicalOrderVersionChoiceGroup"> <xs:choice>

           <xs:element name="ettcsMigrationTov" type="ord:ETTCSMigrationTechnicalOrderVersionType" minOccurs="0" />
           <xs:element name="ettcsNewSiteBuildTov" type="ord:ETTCSNewSiteBuildTechnicalOrderVersionType" minOccurs="0" />

<xs:element name="ettcsNewCabinetTov" type="ord:ETTCSNewCabinetTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsSimpleTov" type="ord:ETTCSSimpleTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="lteNewCabinetTov" type="ord:LTENewCabinetTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="gsmoeTov" type="ord:GSMoETechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsUpspeedTov" type="ord:ETTCSUpspeedTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsRehomeTov" type="ord:ETTCSRehomeTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsRebalanceTov" type="ord:ETTCSRebalanceTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="retTechnicalOrderVersion" type="ord:RETTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsNSBNGUNIEVCUplinkTechnicalOrderVersion" type="ord:ETTCSNSBNGUNIEVCUplinkTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsNSBNGUNIEVCAdditionalNode" type="ord:ETTCSNSBNGUNIEVCAdditionalNodeTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="eireTov" type="ord:EIRETechnicalOrderVersionType" minOccurs="0" /> <xs:element name="addEttcsMtsoCollectorCircuitTechnicalOrderVersion" type="ord:AddEttcsMtsoCollectorCircuitTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="r2BToR4BConversionTechnicalOrderVersion" type="ord:R2BToR4BConversionTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="eoSONETMtsoCollectorCircuitTechnicalOrderVersion" type="ord:EoSONETMtsoCollectorCircuitTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="disconnectEthernetEvcSegmentTechnicalOrderVersion" type="ord:DisconnectEthernetEvcSegmentTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcOc3MtsoR4DTechnicalOrderVersion" type="ord:EttcOc3MtsoR4DTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="gSMoEMtsoTechnicalOrderVersion" type="ord:GSMoEMtsoTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="disconnectLnsOewBaseSegmentTechnicalOrderVersionType" type="ord:DisconnectLnsOewBaseSegmentTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="disconnectLnsOewBaselinePathTechnicalOrderVersionType" type="ord:DisconnectLnsOewBaselinePathTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="discoEthernetSonetRehomeTov" type="ord:DiscoEthernetOverSonetForRehomeTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ethernetSonetRehomeMtsoTov" type="ord:EthernetOverSonetRehomeMtsoTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="gEttcsNSBNextGenUniEvcDualUplinkTov" type="ord:EttcsNSBNextGenUNIEVCDualUplinkTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="gEttcsNSBNextGenUniEvcMultiCabTov" type="ord:EttcsNSBNextGenUNIEVCMultiCabTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="smallCellEamis" type="ord:IndoorSmallCellEamisTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="eoSonetOc12MtsoCollectorCircuitTov" type="ord:EoSonetOc12MtsoCollectorCircuitTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="indoorSmallCellBdslTov" type="ord:IndoorSmallCellBdslTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="indoorSmallCell3RdPartyTov" type="ord:IndoorSmallCell3RdPartyTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="indoorSmallCellSharedTransportTov" type="ord:IndoorSmallCellSharedTransportTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppMigrationUMTS" type="ord:MLPPPMigrationUMTSTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppMigrationDS1UMTS" type="ord:MLPPPMigrationDS1UMTSTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppNewCabinetAddLTE" type="ord:MLPPPNewCabinetAddLTETechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppNewCabinetAddDS1LTE" type="ord:MLPPPNewCabinetAddDS1LTETechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppNewCabinetAddUMTS" type="ord:MLPPPNewCabinetAddUMTSTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppNewCabinetAddDS1UMTS" type="ord:MLPPPNewCabinetAddDS1UMTSTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppUpspeed" type="ord:MLPPPUpspeedTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppUpspeedDS1" type="ord:MLPPPUpspeedDS1TechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppRehome" type="ord:MLPPPRehomeTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppRehomeDS1" type="ord:MLPPPRehomeDS1TechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppRebalance" type="ord:MLPPPRebalanceTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="mlpppRebalanceDS1" type="ord:MLPPPRebalanceDS1TechnicalOrderVersionType" minOccurs="0" /> <xs:element name="smallcellTransport" type="ord:SmallCellTransportTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="smallcellSCR" type="ord:SmallCellSCRTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="smallcellSCS" type="ord:SmallCellSCSTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="smallcellAP" type="ord:SmallCellAPTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsIPv6Siad" type="ord:ETTCSIPv6MigrationSIADTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsIPv6ENodeB" type="ord:ETTCSIPv6MigrationENodeBTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsUniversalEvcDownspeed" type="ord:EttcsUniversalEvcDownspeedTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="outdoorSmallCell3rdPartytTov" type="ord:OutdoorSmallCell3rdPartyTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="outdoorSmallCellBdslTov" type="ord:OutdoorSmallCellBdslTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsSiteMigrBackhaul" type="ord:ETTCSSiteMigrationBackhaulTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="ettcsSiteMigrShorthaul" type="ord:ETTCSSiteMigrationShorthaulTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="iubTrunkingTov" type="ord:IUBTrunkingTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="infrastructureTov" type="ord:InfrastructureTechnicalOrderVersionType" minOccurs="0" /> <xs:element name="EOMWTargetTov" type="ord:ETTCSNSBNGUplinkEoMWTargetTechnicalOrderVersionType" minOccurs="0" /> </xs:choice> </xs:group>

XQuery to convert Simple XML Schema into an Instance DocumentEdit

We can use a recursive function to convert this XML Schema into an instance document. Here is an implementation of this function which covers some of the key XML Schema constructs:

declare namespace xs = "http://www.w3.org/2001/XMLSchema";
declare function local:process-element($element as node())  {
     typeswitch ($element)
        case  element(xs:complexType)   return 
              for $sub-element in  $element/*
                 return  local:process-element($sub-element)
 
        case element(xs:annotation) return
                ()
        case element(xs:element)   return
               let $min := ($element/@minOccurs,1)[1]
               let $max := 
                     if ($element/@maxOccurs = "unbounded") 
                     then 10
                     else ($element/@maxOccurs,1)[1]
               let $count := $min +  round(math:random() * ($max - $min))
               return 
                  if ($count  >0 )
                  then 
                      for $i in 1 to $count
                      return
                         if ($element/@ref)  
                         then 
                              local:process-element(root($element)/xs:schema/xs:element[@name=$element/@ref])
                         else 
                         if (root($element)/xs:schema/xs:complexType[@name=$element/@type]) 
                         then 
                             element {$element/@name} {                           
                                   local:process-element(root($element)/xs:schema/xs:complexType[@name=$element/@type])
                             }
                         else 
                         element {$element/@name} {                      
                             if  ($element/*)
                             then for $sub-element in $element/*
                                     return local:process-element($sub-element)
                             else  local:process-type(($element/@type,"xs:string")[1])  
 
                      }
                    else ()
           case element(xs:attribute)   return
                 if ($element/@use="required"  or math:random() > 0.5)
                 then 
                     attribute {$element/@name} {
                         local:process-type($element/@type) 
                      }
                  else ()
 
           case  element(xs:sequence)   return
                 for $sub-element in $element/*
                       return local:process-element($sub-element)
 
           case  element(xs:all)   return
                 for $sub-element  in $element/*
                 order by math:random()
                 return local:process-element($sub-element)
 
           case element(xs:choice)  return
                     let $count := count($element/*)
                    let $i := ceiling(math:random()*$count)
                    let $sub-element := $element/*[ $i]
                    return local:process-element($sub-element)
          default return
                   ()
};

This function uses the element construct to create a new XML instance tree based on the information in the XML Schema file and the typeswitch construct to select the appropriate processing of a given element type.

Type Instance GenerationEdit

Some helper functions are required to generate random instances of (some of ) the standard types.


declare function local:pad($string as xs:string, $char as xs:string, $max as xs:integer) as xs:string {
   concat(string-pad($char,$max - string-length($string)), $string)
};
 
declare function local:pad2($string) {
   local:pad($string,"0",2)
};
 
declare function local:process-type($name )   {
(: just simple types - xs namespace :)
   if ($name = ("xs:string","xs:NMTOKEN","xs:NCName"))
   then  string-join(
              for $c  in  1 to  math:random() * 12
              return ("A","B","C","D")[ceiling(math:random() * 4)]
             , "")
   else if ($name="xs:integer")
   then  round(math:random()*100)
   else if ($name="xs:decimal")
   then math:random() * 100 - 50   
   else  if ($name="xs:boolean")
   then ("true","false")[ ceiling( math:random() *2)]
   else if ($name="xs:date")
   then string-join((1990 + round(math:random() * 20), 
                    local:pad2(ceiling(math:random()*12)) , 
                    local:pad2(ceiling(math:random()*30))),"-")
   else if ($name="xs:time")
   then string-join((local:pad2(floor(math:random() * 24)), 
                     local:pad2(floor(math:random()*60)) , 
                     local:pad2(floor(math:random()*60))),":")
   else if ($name="xs:dateTime")
   then concat (
          string-join((1990 + round(math:random() * 20), 
                       local:pad2(ceiling(math:random()*12)) , 
                       local:pad2(ceiling(math:random()*30))),"-"),
           "T",
           string-join((local:pad2(floor(math:random() * 24)),
                        local:pad2(floor(math:random()*60)) , 
                        local:pad2(floor(math:random()*60))),":")
           )
   else ()
};


ScriptEdit

Here is a basic script which applies this function to the example schema. By default, the root of the generated instance is the first element in the schema or a named element if a root parameter is supplied.

let $file := request:get-parameter("file",())
let $root := request:get-parameter("root",())
let $schema :=  doc($file)
return 
  if ($root)
  then  local:process-element($schema/xs:schema/xs:element[@name=$root])  
  else  local:process-element($schema/xs:schema/xs:element[1])

Execute

Sample OutputEdit

<Root>
    <Start>C</Start>
    <Any>
        <AnyA>BADADAD</AnyA>
        <AnyC>BA</AnyC>
        <AnyB>D</AnyB>
    </Any> 
    <Multiple>
        <Choice>
            <Third AttributeDecimal="8.937041402178778" AttributeTime="20:38:04" AttributeDateTime="1995-08-06T21:08:43"/>
        </Choice>
        <Unbounded>BDAACAAAA</Unbounded>
        <Ref>
 
            <Ref1>52</Ref1>
            <Ref2>1992-09-02</Ref2>
        </Ref>
        <WithRefType>
            <RefTypeA>BBBDD</RefTypeA>
            <RefTypeB>DAADCABDC</RefTypeB>
        </WithRefType>
 
    </Multiple>
    <Multiple>
        <Choice>
            <Second AttributeString="ADAB" AttributeDate="2005-12-16"/>
        </Choice>
        <Unbounded/>
        <Unbounded>CABBB</Unbounded>
        <Unbounded/>
 
        <Unbounded>B</Unbounded>
        <Unbounded>AACBABAABCB</Unbounded>
        <Ref>
            <Ref1>56</Ref1>
            <Ref2>1992-05-22</Ref2>
        </Ref>
        <WithRefType>
 
            <RefTypeA>CDAA</RefTypeA>
            <RefTypeB>CBDBDBDB</RefTypeB>
        </WithRefType>
        <Optional>CC</Optional>
    </Multiple>
    <Multiple>
        <Choice>
 
            <First AttributeInt="21" AttributeString="AABC" AttributeBoolean="true"/>
        </Choice>
        <Unbounded>ADADDDDDB</Unbounded>
        <Unbounded>DCDCBDDD</Unbounded>
        <Unbounded>CBAD</Unbounded>
        <Unbounded>DAAAD</Unbounded>
        <Ref>
 
            <Ref1>13</Ref1>
            <Ref2>1997-04-30</Ref2>
        </Ref>
        <WithRefType>
            <RefTypeA>CB</RefTypeA>
            <RefTypeB/>
        </WithRefType>
 
    </Multiple>
    <Multiple>
        <Choice>
            <Second AttributeString="DBDA" AttributeDate="1999-05-16"/>
        </Choice>
        <Unbounded>CCDDDDCD</Unbounded>
        <Unbounded>DDABACC</Unbounded>
        <Ref>
 
            <Ref1>44</Ref1>
            <Ref1>30</Ref1>
            <Ref1>57</Ref1>
            <Ref2>1997-01-26</Ref2>
        </Ref>
        <WithRefType>
            <RefTypeA>CBC</RefTypeA>
 
            <RefTypeB>CA</RefTypeB>
        </WithRefType>
    </Multiple>
    <End>95</End>
</Root>

To doEdit

  • full set of xml types
  • mixed
  • restriction (only ennumeration so far)
  • Group
  • AttributeGroup
  • problem with missing attributes in a complexType
  • hinting for distributions
  • namespaces
  • randomisation configuration

ReferencesEdit