view tokenise.xsl @ 51:793496d2d165

allow command line input
author Henry S. Thompson <ht@markup.co.uk>
date Tue, 16 May 2017 17:27:19 +0100
parents ac6d1ca099f7
children 9bb415e0adc9
line wrap: on
line source

<?xml version='1.0'?>
<!DOCTYPE xsl:stylesheet SYSTEM "../../../lib/xml/xsl.dtd" >
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0" xmlns:s="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:e="http://markup.co.uk/excel" exclude-result-prefixes="xs s e xf" xmlns="http://markup.co.uk/excel" xmlns:xf="http://www.w3.org/2005/xpath-functions">
 <xsl:param name="sheet-number"/>
 <xsl:param name="xlDir"/>

  <xsl:variable name="pat1">("[^"]*")|(\{[^}]+})|(,)|([^=\-+*/();:,.$&lt;>^!]+(?:\.[^=\-+*/();:,.$&lt;>^!]+)*\()|([)])|(^=|\()|((?:(?:'[^']+')|(?:\[[0-9]+\][^!]*)|(?:[a-zA-Z_][a-zA-Z0-9._]*)!))|(\$?[A-Z]+\$?[0-9]+)|([a-zA-Z_\\][a-zA-Z0-9._]*)|(.)</xsl:variable>
 <xsl:variable name="pat2">("[^"]*")|(\{[^}]+})|(,)|([^=\-+*/();:,.$&lt;>^!]+(?:\.[^=\-+*/();:,.$&lt;>^!]+)*\()|([)])|(^=|\()|((?:'[^']+'!)|(?:[\[0-9A-Za-z_][^=\-+*/();:,.$&lt;>^!]*!))|(\$?[A-Z]+\$?[0-9]+)|([a-zA-Z_\\][a-zA-Z0-9._]*)|(.)</xsl:variable>
 <xsl:param name="pat" select="$pat2"/><!-- xsl:param for refinement debugging by passing in the pattern -->
 
 <xsl:variable name="workbook" select="document(concat($xlDir,'/workbook.xml'))/*"/>
 <xsl:variable name="sheet-name" select="$workbook/s:sheets/s:sheet[@sheetId=$sheet-number]/@name"/>
 
 <xsl:function name="e:lookup" as="xs:string*">
  <xsl:param name="name" as="xs:string" required="yes"/>
  <xsl:value-of select="string($workbook/s:definedNames/s:definedName[@name=$name])"/>
 </xsl:function>

 <xsl:function name="e:tokenise" as="element(*)*">
  <!-- Tokenise a formula, recursively wrt variables
       Output is composed of e:* as follows:
       b: Boolean constant
       c: A list (function parameter) separator
       e: An external (variable, cell or range) reference
       l: The beginning of the formula or an opening paren
       m: A constant matrix
       o: A function name followed by an opening parenthesis
       p: A close-paren
       q: A text (delimited by double quotes) 
       r: A range reference (two children, either e or s or u (unsupported))
       s: A single-cell reference
       v: A variable name [should only occur inside e]
       x: Amalgamated single characters not matched by anything else
 -->
  <xsl:param name="formula" as="xs:string" required="yes"/>
  <!-- The row and column number of the cell whence the formula came -->
  <xsl:param name="row" required="yes" as="xs:integer"/>
  <xsl:param name="col" required="yes" as="xs:integer"/>
  <xsl:if test="false()"><xsl:message>tok: <xsl:value-of select="$formula"/></xsl:message></xsl:if>
  <xsl:sequence select="
     let $tokens := analyze-string($formula,$pat)/xf:match/xf:group
        return e:tok1($tokens,count($tokens),1,$row,$col,())"/>
 </xsl:function>
 
 <xsl:function name="e:tok1" as="element(*)*">
  <xsl:param name="tokens" as="element(xf:group)*" required="yes"/>
  <xsl:param name="n" required="yes" as="xs:integer"/>
  <xsl:param name="i" required="yes" as="xs:integer"/>
  <xsl:param name="row" required="yes" as="xs:integer"/>
  <xsl:param name="col" required="yes" as="xs:integer"/>
  <xsl:param name="soFar" as="element(*)*"/>
  <xsl:variable name="last" select="$soFar[count($soFar)]"></xsl:variable>
  <xsl:if test="false()"><xsl:message>tok1: <xsl:value-of select="$n"/>|<xsl:value-of select="$i"/>|<xsl:value-of select="if ($last instance of element()) then name($last) else 'bogus'"/>|<xsl:value-of select="string($last)"/></xsl:message></xsl:if>
  <xsl:sequence select="
    if ($i gt $n)
          then $soFar
          else 
            let $next := e:expand($tokens,$i,true(),$row,$col),
                $j := $next?1,
                $res := $next?2 return
            e:tok1($tokens,$n,$j,$row,$col,($soFar,$res))"/>
 </xsl:function>
 
 <xsl:function name="e:expand" as="array(*)">
  <xsl:param name="tokens" required="yes" as="element(xf:group)*"/>
  <xsl:param name="i" required="yes" as="xs:integer"/>
  <xsl:param name="local" required="yes" as="xs:boolean"/>
  <xsl:param name="row" required="yes" as="xs:integer"/>
  <xsl:param name="col" required="yes" as="xs:integer"/>
  <xsl:if test="false()"><xsl:message>exp: <xsl:value-of select="$tokens[$i]/@nr"/>:<xsl:value-of select="$tokens[$i]"/>,<xsl:value-of select="$tokens[$i+1]"/></xsl:message></xsl:if>
  <xsl:sequence select="
    let $t := $tokens[$i],
        $r := $tokens[$i + 1] return
     if ($t/@nr=1) then e:exp1($i,'q',string($t))
     else if ($t/@nr=2) then e:exp1($i,'m',string($t))
     else if ($t/@nr=3) then e:exp1($i,'c',',')
     else if ($t/@nr=4) then e:exp1($i,'o',string($t))
     else if ($t/@nr=5) then e:exp1($i,'p',')')
     else if ($t/@nr=6) then e:exp1($i,'l',string($t))
     else if ($t/@nr=7)
       then let $xref := substring-before($t,'!') return
              if ($xref=('[0]',$sheet-name))
              then (: it's a local reference after all :)
               e:expand($tokens,$i+1,true(),$row,$col)
              else let $ext := e:expand($tokens,$i+1,false(),$row,$col) return
                    [$ext?1,e:external($xref,$ext?2)]
     else if ($t/@nr=10) then e:amalgamate($tokens,$i+1,string($t))
     else if ($r[@nr=10 and .=':'])
       then (: a range, takes priority :)
          [$i+3,e:range($tokens,$i,$local,$row,$col)]
     else if ($t/@nr=8) then [$i+1,e:single(string($t),$row,$col)]
     else if ($t/@nr=9)
       then if (matches($t,'^(true|false)$','i'))
            then e:exp1($i,upper-case($t),'b')
            else (: a variable name, I think :) if ($local)
            then let $sub := e:tokenise(e:lookup(string($t)),$row,$col) return
              [$i+1,$sub]
            else (: can't expand :) e:exp1($i,'v',string($t))
     else (: shouldn't ever get here :) ()"/>
 </xsl:function>
 
 <xsl:function name="e:amalgamate" as="array(*)">
  <xsl:param name="tokens" as="element(xf:group)*"/>
  <xsl:param name="i" as="xs:integer"/>
  <xsl:param name="soFar" as="xs:string"/>
  <xsl:choose>
   <xsl:when test="$tokens[i]/@nr=10">
    <xsl:sequence select="e:amalgamate($tokens,$i+1,concat($soFar,
                                                       string($tokens[$i])))"/>
   </xsl:when>
   <xsl:otherwise>
    <xsl:variable name="res">
     <x><xsl:value-of select="$soFar"/></x>
    </xsl:variable>
    <xsl:if test="false()"><xsl:message>amal: <xsl:value-of select="$res/."/></xsl:message></xsl:if>
    <xsl:sequence select="[$i,$res/*]"/>
   </xsl:otherwise>
  </xsl:choose>
  
 </xsl:function>
 
 <xsl:function name="e:exp1" as="array(*)">
  <xsl:param name="i" as="xs:integer"/>
  <xsl:param name="name" as="xs:string"/>
  <xsl:param name="val" as="xs:string"/>
  <xsl:variable name="elt">
   <xsl:element name="{$name}" namespace="http://markup.co.uk/excel">
    <xsl:value-of select="$val"/>
   </xsl:element>
  </xsl:variable>
  <xsl:if test="false()"><xsl:message>exp1: <xsl:value-of select="$name"/>|<xsl:value-of select="$val"/>|<xsl:value-of select="name($elt/*)"/></xsl:message></xsl:if>
  <xsl:sequence select="[$i+1,$elt/*]"/>
 </xsl:function>
 
 <xsl:function name="e:single" as="element(e:s)">
  <!-- I'm _guessing_ that external doesn't matter, i.e. that you
       can copy an external relative ref and have it change -->
  <xsl:param name="val" as="xs:string"/>
  <xsl:param name="row" as="xs:integer"/>
  <xsl:param name="col" as="xs:integer"/>
  <xsl:sequence select="e:cr($val,$row,$col)"/>  
 </xsl:function>
 
 <xsl:function name="e:range" as="element(e:r)">
  <xsl:param name="tokens" as="element(xf:group)*"/>
  <xsl:param name="i" as="xs:integer"/>
  <xsl:param name="local" as="xs:boolean"/>
  <xsl:param name="row" as="xs:integer"/>
  <xsl:param name="col" as="xs:integer"/>
  <xsl:variable name="l" select="$tokens[$i]"/>
  <xsl:variable name="r" select="$tokens[$i+2]"/>
  <xsl:variable name="lv" select="e:rPart($l,$local,$row,$col)"/>
  <xsl:variable name="rv" select="e:rPart($r,$local,$row,$col)"/>
  <r>
   <xsl:copy-of select="($lv,$rv)"/>
  </r>
 </xsl:function>
 
 <xsl:function name="e:rPart" as="element(*)">
  <xsl:param name="g" as="element(xf:group)"/>
  <xsl:param name="local" as="xs:boolean"/>
  <xsl:param name="row" as="xs:integer"/>
  <xsl:param name="col" as="xs:integer"/>
  <xsl:sequence select="
     if ($g/@nr=8) then e:single(string($g),$row,$col)
     else if ($g/@nr=9)
          then if ($local)
               then let $tokens := e:tokenise(e:lookup(string($g)),
                                                        $row,$col) return
                      if (count($tokens)=1 and
                          $tokens[local-name()='s' or 
                                  (local-name()='e' and $tokens/e:s)])
                      then $tokens
                      else e:badRP(string($g),$g/@nr,$local,$row,$col,$tokens)
           else e:var(string($g))
     else e:badRP(string($g),$g/@nr,$local,$row,$col,())"/>
 </xsl:function>
 
 <xsl:function name="e:var" as="element(e:v)">
  <xsl:param name="name" as="xs:string"/>
  <v><xsl:value-of select="$name"/></v>
 </xsl:function>
 <xsl:function name="e:badRP" as="element(e:u)">
  <xsl:param name="s" as="xs:string"/>
  <xsl:param name="t" as="xs:integer"/>
  <xsl:param name="local" as="xs:boolean"/>
  <xsl:param name="row" as="xs:integer"/>
  <xsl:param name="col" as="xs:integer"/>
  <xsl:param name="toks" as="element(*)*"/>
  <xsl:message>Bad range part in <xsl:value-of select="concat(e:n2a($col),$row)"/>: <xsl:value-of select="$s"/> of type <xsl:value-of select="$t"/> (<xsl:value-of select="if ($local) then 'local' else 'external'"/>: (<xsl:value-of select="string-join($toks,',')"/>)</xsl:message>
  <u r="{concat(e:n2a($col),$row)}" s="{$s}" t="{$t}" local="{$local}">
   <xsl:value-of select="string-join($toks,',')"/>
  </u>
 </xsl:function>
 
 <xsl:function name="e:external" as="element(e:e)">
  <xsl:param name="xref" as="xs:string" required="yes"/>
  <xsl:param name="ref" as="element(*)" required="yes"/>
  <e x="{$xref}"><xsl:sequence select="$ref"/></e>
 </xsl:function>
</xsl:stylesheet>