XQuery/Wikibook list of code links

Motivation edit

This Wikibook contains links to code samples executed on a University server. We need to keep track of all the links so that we can ensure that they remain live, so that all links can be executed by a test bed and to support changes to the directory structure or filenames.

Approach edit

The script is similar to the index script at the beginning, to get the list of pages in the book. Then it fetches each page and extracts the anchor tags where href contains ".xq" . This is a bit loose and includes some false positives. The WikiBook page is linked from the page title and the actual URL is listed.

xquery version "1.0";
import module namespace httpclient = "http://exist-db.org/xquery/httpclient";

declare option exist:serialize "method=xhtml media-type=text/html";

let $book:= request:get-parameter("book","XQuery")
let $base := "http://en.wikibooks.org"
let $indexPage :=httpclient:get(xs:anyURI(concat($base,"/wiki/Category:",$book,"?x")),false(),())/httpclient:body/*

let $pages := $indexPage//div[@class="mw-category"]//li
return

<html>
   <head>
       <title>Index of {$book} code samples</title>
    </head>
        <body>
        <h1>Index of {$book} code samples</h1>
        <ul>
{
for $letter in distinct-values($pages/upper-case(substring(substring-after(.,'/'),1,1)))[string-length(.) = 1]
for $page in $pages[starts-with(upper-case(substring-after(.,'/')),$letter)]
let $title := string($page)
let $url := concat($base,$page/a/@href)
let $refs := httpclient:get(xs:anyURI($url),false(),())/httpclient:body//a[contains(@href,".xq")]
order by $title
return
   if (exists($refs))
   then 
      <div>
        <li><a href="{$url}">{$title}</a>
          <ul>
            {for $ref in $refs
             return 
                 <li> {string( $ref /@href)} </li>
            }
         </ul>
        </li>
      </div>
    else  ()
}
   </ul>
   </body>
</html>