diff tokenise.xsl @ 37:ac3cd8de7a10

towards big rework of tokenisation
author Henry S. Thompson <ht@markup.co.uk>
date Tue, 25 Apr 2017 18:30:04 +0100
parents
children 468a6cf8bf0b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tokenise.xsl	Tue Apr 25 18:30:04 2017 +0100
@@ -0,0 +1,138 @@
+<?xml version='1.0'?>
+<!DOCTYPE xsl:stylesheet SYSTEM "../../../lib/xml/xsl.dtd" >
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0" xmlns:s="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:e="http://markup.co.uk/excel" exclude-result-prefixes="xs s e xf" xmlns="http://markup.co.uk/excel" xmlns:xf="http://www.w3.org/2005/xpath-functions">
+ <xsl:param name="sheet-number"/>
+ <xsl:param name="xlDir"/>
+ 
+ <xsl:include href="a2n.xsl"/>
+
+  <xsl:variable name="pat1">("[^"]*")|(\{[^}]+})|(,)|([^=\-+*/();:,.$&lt;>^!]+(?:\.[^=\-+*/();:,.$&lt;>^!]+)*\()|([)])|(^=|\()|((?:(?:'[^']+')|(?:\[[0-9]+\][^!]*)|(?:[a-zA-Z_][a-zA-Z0-9._]*)!))|(\$?[A-Z]+\$?[0-9]+)|([a-zA-Z_\\][a-zA-Z0-9._]*)|(.)</xsl:variable>
+ <xsl:param name="pat" select="$pat1"/><!-- xsl:param for refinement debugging by passing in the pattern -->
+ 
+ <xsl:variable name="workbook" select="document(concat($xlDir,'/workbook.xml'))/*"/>
+ <xsl:variable name="sheet-name" select="$workbook/s:sheets/s:sheet[@sheetId=$sheet-number]/@name"/>
+ 
+ <xsl:function name="e:lookup" as="xs:string*">
+  <xsl:param name="name" as="xs:string" required="yes"/>
+  <xsl:variable name="defn" select="$workbook/s:definedNames/s:definedName[@name=$name]"/>
+  <xsl:sequence select="let $prefix := concat($sheet-name,'!')
+                   return if ($defn and
+                              starts-with($defn,$prefix))
+                           then substring-after($defn,$prefix)
+                           else ()"/>
+ </xsl:function>
+
+ <xsl:function name="e:tokenise" as="element(*)*">
+  <!-- Tokenise a formula, recursively wrt variables
+       Output is composed of e:* as follows:
+       c: A list (function parameter) separator
+       e: An external (variable, cell or range) reference
+       f: A function name followed by an opening parenthesis
+       l: The beginning of the formula or an opening paren
+       m: A constant matrix
+       p: A close-paren
+       q: A text (delimited by double quotes) 
+       r: A range reference
+       s: A single-cell reference
+       v: A variable name [should only occur inside e]
+       x: Amalgamated single characters not matched by anything else
+ -->
+  <xsl:param name="formula" as="xs:string" required="yes"/>
+  <!-- The row and column number of the cell whence the formula came -->
+  <xsl:param name="row" required="yes" as="xs:int"/>
+  <xsl:param name="col" required="yes" as="xs:int"/>
+  <xsl:sequence select="
+     let $tokens := analyze-string($formula,$pat)/xf:match/xf:group
+        return e:tok1($tokens,count($tokens),1,$row,$col,())"/>
+ </xsl:function>
+ 
+ <xsl:function name="e:tok1" as="element(*)*">
+  <xsl:param name="tokens" as="element(xf:group)*" required="yes"/>
+  <xsl:param name="n" required="yes" as="xs:int"/>
+  <xsl:param name="i" required="yes" as="xs:int"/>
+  <xsl:param name="row" required="yes" as="xs:int"/>
+  <xsl:param name="col" required="yes" as="xs:int"/>
+  <xsl:param name="soFar" required="yes" as="element(*)*"/>
+  <xsl:sequence select="
+    if ($i gt $n)
+          then $soFar
+          else 
+            let $next := e:expand($tokens,$i,true(),$row,$col),
+                $j := $next?1,
+                $res := $next?2 return
+            e:tok1($tokens,$n,$j,$row,$col,($soFar,$res))"/>
+ </xsl:function>
+ 
+ <xsl:function name="e:expand" as="element(*)*">
+  <xsl:param name="tokens" required="yes" as="element(xf:group)*"/>
+  <xsl:param name="i" required="yes" as="xs:int"/>
+  <xsl:param name="local" required="yes" as="xs:boolean"/>
+  <xsl:param name="row" required="yes" as="xs:int"/>
+  <xsl:param name="col" required="yes" as="xs:int"/>
+  <xsl:sequence select="
+    let $t := $tokens[$i],
+        $r := $tokens[$i + 1] return
+     if ($t/@nr=1) then e:exp1($i,'q',string($t))
+     else if ($t/@nr=2) then e:exp1($i,'m',string($t))
+     else if ($t/@nr=3) then e:exp1($i,'c',',')
+     else if ($t/@nr=4) then e:exp1($i,'f',string($t))
+     else if ($t/@nr=5) then e:exp1($i,'p',')')
+     else if ($t/@nr=6) then e:exp1($i,'l',string($t))
+     else if ($t/@nr=7)
+       then if (substring-before($t,'!')=('[0]',$sheet-name))
+              then (: it's a local reference after all :)
+               e:expand($tokens,$i+1,true(),$row,$col)
+              else let $ext := e:expand($tokens,$i+1,false(),$row,$col) return
+                    [$ext?1,e:external($ext?2)]
+     else if ($t/@nr=10) then e:amalgamate($tokens,$i+1,string($t))
+     else if ($r[@nr=10 and .=':'])
+       then (: a range, takes priority :)
+          e:range($tokens,$i,$ext,$row,$col)
+     else if ($t/@nr=8) then e:single($i,$ext,string($t))
+     else if ($t/@nr=9)
+       then if ($ext) then (: can't expand :) e:exp1($i,'v',string($t))
+       else e:tokenise(e:lookup(string($t)),$row,$col)
+     else (-- shouldn't ever get here --) ()"/>
+ </xsl:function>
+ 
+ <xsl:function name="e:exp1" as="array(*)">
+  <xsl:param name="i" as="xs:int"/>
+  <xsl:param name="name" as="xs:string"/>
+  <xsl:param name="val" as="xs:string"/>
+  <xsl:variable name="elt">
+   <xsl:element name="{$name}" namespace="http://markup.co.uk/excel">
+    <xsl:value-of select="$val"/>
+   </xsl:element>
+  </xsl:variable>
+  <xsl:sequence select="[$i+1,$elt]"/>
+ </xsl:function>
+ 
+ <xsl:function name="e:single" as="element(*)">
+  <xsl:param name="group" as="element(xf:group)"/>
+  <xsl:param name="row" as="xs:integer"/>
+  <xsl:param name="col" as="xs:integer"/>
+  <xsl:param name="external" as="xs:boolean"/>
+  <xsl:variable name="val" select="if ($group/@nr=9) then e:lookup($group)
+                                             else string($group)"/>
+  <xsl:choose>
+   <xsl:when test="count($val)>0 or not($external)">
+    <xsl:sequence select="e:cr($val,$row,$col)"/>
+   </xsl:when>
+   <xsl:otherwise>
+    <v><xsl:value-of select="$group"/></v>
+   </xsl:otherwise>
+  </xsl:choose>  
+ </xsl:function>
+ 
+ <xsl:function name="e:range" as="element(e:r)">
+  <xsl:param name="l" as="element(e:s)" required="yes"/>
+  <xsl:param name="r" as="element(e:s)" required="yes"/>
+  <r><xsl:copy-of select="$l"/><xsl:copy-of select="$r"/></r>
+ </xsl:function>
+ 
+ <xsl:function name="e:external" as="element(e:e)">
+  <xsl:param name="source" as="element(xf:group)" required="yes"/>
+  <xsl:param name="ref" as="element(*)" required="yes"/>
+  <e s="{$source}"><xsl:sequence select="$ref"/></e>
+ </xsl:function>
+</xsl:stylesheet>