Nikolai Grigoriev Q expansion
How can I traverse a graph with XSL?
Specifically, hyperlinks encode a graph, as in the following four files:
root.html
<html version = "6">
<a href = "left_branch.html"/>
<a href = "right_branch.html"/>
</html>
left_branch.html
<html version = "2">
<a href = "leaf.html"/>
</html>
right_branch.html
<html version = "3">
<a href = "leaf.html"/>
</html>
leaf.html
<html version = "1"/>
There was a lot of discussion about the XSLT power, so I
felt kinda challenged to write a graph traversal
stylesheet. Given below are my humble results. The
stylesheet operates on files of the kind described in the
original letter by Jeff Lansing. It lists files reachable
from the top node, ordering them by the number of hyperlink
transitions required to get there from the root. (It is not
exactly the same as was requested; I have chosen the
simplest output, to avoid stylesheet overgrowth). The
output looks like this (in Jeff's case):
Level 1: left_branch.html right_branch.html
Level 2: leaf.html
The whole thing uses no extensions to XSLT REC, and has been
tested under SAXON and XT.
Sure enough, it isn't a complete graph traversal engine: it
may choke on files distributed in multiple directories, it
produces a spurious reference to root.html (since there is
no info about the initial path), etc. Nevertheless, I think
the possibility to traverse the graph has been proven, and
the rest of functionality is easy to incorporate.
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" encoding="ISO-8859-1"/>
<xsl:param name="delimiter" select="'|'"/>
<xsl:variable name="separator"
select="concat ($delimiter, $delimiter)"/>
<!-- ===================================================== -->
<!-- Top template: get a list of links and start recursion -->
<!-- ===================================================== -->
<xsl:template match="/">
<!-- collect links for the first level -->
<xsl:variable name="links">
<xsl:apply-templates select="html"/>
</xsl:variable>
<!-- start recursion -->
<xsl:call-template name="visit-node-list">
<xsl:with-param name="level" select="1"/>
<xsl:with-param name="node-list" select="$links"/>
<xsl:with-param name="visited-nodes" select="$links"/>
</xsl:call-template>
</xsl:template>
<!-- ===================================================== -->
<!-- Extract unique unvisited links from a document. -->
<!-- Names of visited nodes are stored in a string; -->
<!-- $delimiter is prepended and appended to every entry -->
<!-- ===================================================== -->
<xsl:template match="html">
<xsl:param name="visited-nodes"/>
<xsl:message>Processing node <xsl:value-of
select="@version"/></xsl:message>
<xsl:for-each select=".//a">
<xsl:variable name="link" select="@href"/>
<xsl:variable name="link-record"
select="concat($delimiter, $link,
$delimiter)"/>
<!-- check if this is the first occurence of the $link -->
<xsl:if test="not (preceding::a[@href=$link]) and
not (contains($visited-nodes, $link-record))">
<xsl:value-of select="$link-record"/>
</xsl:if>
</xsl:for-each>
</xsl:template>
<!-- ===================================================== -->
<!-- Process all documents from a list. Invokes a document -->
<!-- enumeration template for a list, then recurses. -->
<!-- ===================================================== -->
<xsl:template name="visit-node-list">
<xsl:param name="level" select="1"/>
<xsl:param name="node-list"/>
<xsl:param name="visited-nodes"/>
<xsl:if test="contains ($node-list, $delimiter)">
<!-- print out level info -->
<xsl:text>Level </xsl:text>
<xsl:value-of select="$level"/>
<xsl:text>: </xsl:text>
<xsl:value-of
select="translate($node-list, $delimiter, ' ')"/>
<xsl:text>
</xsl:text>
<!-- collect links for the next level -->
<xsl:variable name="links">
<xsl:call-template
name="enumerate-links-from-list">
<xsl:with-param
name="node-list" select="$node-list"/>
<xsl:with-param
name="visited-nodes" select="$visited-nodes"/>
</xsl:call-template>
</xsl:variable>
<!-- recurse to next level -->
<xsl:call-template
name="visit-node-list">
<xsl:with-param
name="level" select="$level+1"/>
<xsl:with-param
name="node-list" select="$links"/>
<xsl:with-param
name="visited-nodes" select="concat($visited-nodes,
$links)"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
<!-- ===================================================== -->
<!-- Enumerates all documents pointed out from a node list -->
<!-- ===================================================== -->
<xsl:template name="enumerate-links-from-list">
<xsl:param name="node-list"/>
<xsl:param name="visited-nodes"/>
<xsl:if test="contains($node-list, $delimiter)">
<!-- split the first record from the $node-list -->
<xsl:variable name="head-record">
<xsl:choose>
<xsl:when test="contains($node-list, $separator)">
<xsl:value-of
select="substring-before($node-list, $separator)"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$node-list"/>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="head"
select="translate($head-record, $delimiter, '')"/>
<!-- collect links from
the first document in the $node-list -->
<xsl:variable name="links">
<xsl:apply-templates select="document($head)/html">
<xsl:with-param
name="visited-nodes" select="$visited-nodes"/>
</xsl:apply-templates>
</xsl:variable>
<xsl:value-of select="$links"/>
<!-- recursion:
repeat the same for the rest of the $node-list -->
<xsl:call-template name="enumerate-links-from-list">
<xsl:with-param name="node-list"
select="substring-after($node-list, $separator)"/>
<xsl:with-param
name="visited-nodes" select="concat($visited-nodes,
$links)"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
David Carlisle adds
<?xml version="1.0"?>
<xsl:stylesheet
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="html">
<xsl:call-template name="bfirst">
<xsl:with-param name="todo" select="."/>
</xsl:call-template>
</xsl:template>
<xsl:template name="bfirst">
<xsl:param name="seen" select="/.."/>
<xsl:param name="todo"/>
<xsl:if test="not(string($todo[1]/@version) = $seen)">
<xsl:value-of select="$todo[1]/@version"/>
<xsl:text> </xsl:text>
</xsl:if>
<xsl:if test="count($todo) >0">
<xsl:call-template name="bfirst">
<xsl:with-param name="seen"
select="$seen|$todo[1]/@version"/>
<xsl:with-param name="todo"
select="$todo[position() > 1] |
document($todo[1]/a/@href)/html"/>
</xsl:call-template>
</xsl:if>
</xsl:template>
</xsl:stylesheet>
|