XQuery/Caching and indexes

Motivation edit

The views of the data about individual teams or groups needs to be supplemented with indexes to the resources for which those views are appropriate. Generating the indexes on demand is one approach but loads the SPARQL server. Given the batch nature of the DBpedia extract, it makes more sense to cache the index data and use the cache to generate an index page. (triggering the cache refresh is another problem!)

Non-caching approach edit

The following script generates an index page with links to the HTML view and the timeline views of a artist album.

declare option exist:serialize "method=xhtml media-type=text/html";
declare variable $query := "
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX p: <http://dbpedia.org/property/> 
SELECT * WHERE { 
      ?group  skos:subject <http://dbpedia.org/resource/Category:Rock_and_Roll_Hall_of_Fame_inductees>.
   }
 ";

declare function local:clean($text) {
    let $text:= util:unescape-uri($text,"UTF-8")
    let $text := replace($text,"\(.*\)","")
    let $text := replace($text,"_"," ")
    return $text
};

 let $category := request:get-parameter("category","")
 let $categoryx := replace($category,"_"," ")
 let $queryx := replace($query,"Rock_and_Roll_Hall_of_Fame_inductees",$category)
 let $sparql := concat("http://dbpedia.org/sparql?default-graph-uri=",escape-uri("http://dbpedia.org",true()),
                                      "&query=",escape-uri($queryx,true())
                         )
let $result  := doc($sparql)
return
<html>
<body>
  <h1>{$categoryx}</h1>
  <table border="1">
   {  for $row in $result/table//tr[position()>1]
      let $resource := substring-after($row/td[1],"resource/")
      let $name := local:clean($resource)
      order by $name
      return      
        <tr>
           <td>         
           {$name}
           </td>
           <td>
           <a href="group2html.xq?group={$resource}">HTML</a>
           </td>
           <td>
           <a href="groupTimeline.xq?group={$resource}">Timeline</a>
           </td>
           </tr>
   }
   </table>
 </body>
 </html>

Index examples edit

Caching Approach edit

Two scripts are needed - one to generate the data to cache, the other to generate the index page. The approach is illustrated with an index to Rock and Roll groups based on the Wikipedia category Rock and Roll Hall of Fame inductees.

Generate the index data edit

This script generates an XML file. A further development would store the XML directly to the database but it could also be saved manually to the appropriate location. It is parameterised by a category.

declare variable $query := "
PREFIX skos: <http://www.w3.org/2004/02/skos/core#>
PREFIX p: <http://dbpedia.org/property/> 
SELECT * WHERE { 
      ?group  skos:subject <http://dbpedia.org/resource/Category:Rock_and_Roll_Hall_of_Fame_inductees>.
   }
 ";

declare function local:clean($text) {
    let $text:= util:unescape-uri($text,"UTF-8")
    let $text := replace($text,"\(.*\)","")
    let $text := replace($text,"_"," ")
    return $text
};

declare function local:table-to-seq($table ) {
   let $head := $table/tr[1]
   for $row in $table/tr[position()>1]
   return
     <tuple>
            { for $cell at $i in $row/td
               return
                 element {$head/th[position()=$i]}
                     {string($cell)}
            }
      </tuple>
 };
 
let $category := request:get-parameter("category","Rock_and_Roll_Hall_of_Fame_inductees")
let $queryx := replace($query,"Rock_and_Roll_Hall_of_Fame_inductees",$category)
let $sparql := concat("http://dbpedia.org/sparql?default-graph-uri=",escape-uri("http://dbpedia.org",true()),
                                      "&query=",escape-uri($query,true())
                         )
let $result  := doc($sparql)/table
let $groups := local:table-to-seq($result)
return
<ResourceList category="{$category}">
   {for $group in $groups
    let $resource := substring-after($group/group,"resource/")
    let $name := local:clean($resource)
    order by $name
    return      
        <resource id="{$resource}" name="{$name}"/>
   }
</ResourceList>

Note: I guess a better approach would be to use triples here, saved to a local triple store.

HTML index page edit

This script, groupList, uses the cached index data

declare option exist:serialize "method=xhtml media-type=text/html";
let $list := //ResourceList[@category="Rock_and_Roll_Hall_of_Fame_inductees"]
return
<html>
<body>
  <h1>Rock Groups</h1>
  <table border="1">
   {for $declare option exist:serialize "method=xhtml media-type=text/html";
lresource in $list/resource
    order by $resource/@name
   return      
        <tr>
           <td>         
           {string($resource/@name)}
           </td>
           <td>
           <a href="group2html.xq?group={$resource/@id}">HTML</a>
           </td>
           <td>
           <a href="groupTimeline.xq?group={$resource/@id}">Timeline</a>
           </td>
           </tr>
   }
   </table>
 </body>
 </html>


Execute edit

Roll and Roll groups