changeset 28:c56a2e6990bd

convert tokenisation to a function, so can make recursive
author Henry S. Thompson <>
date Tue, 11 Apr 2017 12:28:44 +0100
parents 8309dcfce613
children 87ed04a0fde2
files notes.txt refs.xsl
diffstat 2 files changed, 49 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/notes.txt	Mon Apr 10 17:29:24 2017 +0100
+++ b/notes.txt	Tue Apr 11 12:28:44 2017 +0100
@@ -103,6 +103,11 @@
+uses appearance a lot.  That needs to be harvested from styles.xml
+The kenneth_lay enron sample has _403_ numbered formats...
 Tried the largest sheet from the largest .xlsx I could find:
   fuse1k/'benjamin_rogers__1002__NYISO Price Information version 2'.xlsx
    -rw-r--r-- 1 ht None  6273325 Apr  3 16:22 '../benjamin_rogers__1002__NYISO Price Information version 2.xlsx'
--- a/refs.xsl	Mon Apr 10 17:29:24 2017 +0100
+++ b/refs.xsl	Tue Apr 11 12:28:44 2017 +0100
@@ -10,42 +10,58 @@
  <xsl:variable name="workbook" select="document(concat($xlDir,'/workbook.xml'))/*"/>
  <xsl:variable name="sheet-name" select="$workbook/s:sheets/s:sheet[@sheetId=$sheet-number]/@name"/>
+ <xsl:function name="e:tokenise" as="array(xs:string*)*">
+  <xsl:param name="formula" as="xs:string" required="yes"/>
+  <xsl:sequence select="
+    let $tokens := analyze-string($formula,$pat)/xf:match/xf:group
+     return if ($tokens[@nr=(7,8,9)])
+             then 
+              let $n := count($tokens),
+                  $singles := for $i in (1 to $n)
+                     return if ($tokens[$i][@nr=(8,9)] and
+                                not($tokens[$i - 1][@nr=10 and
+                                .=(':','!')]) and
+                                not($tokens[$i + 1][@nr=10 and .=':']))
+                             then translate($tokens[$i],'$','')
+                             else (),
+                  $ranges := for $i in (1 to count($tokens))
+                     return if ($tokens[$i][@nr=10 and .=':' and
+                                not($i gt 2 and
+                                    $tokens[$i - 2][@nr=10 and .='!'])])
+                             then translate(concat($tokens[$i - 1],':',
+                                                   $tokens[$i + 1]),'$','')
+                             else (),
+                  $externals := for $i in (1 to count($tokens))
+                     return if ($tokens[$i][@nr=7])
+                             then 
+                              let $bit := concat($tokens[$i],'!',
+                                                 translate($tokens[$i + 2],
+                                                           '$',''))
+                               return if ((($i+3) le $n) and
+                                          $tokens[$i + 3][@nr=10 and .=':'])
+                                       then concat($bit,':',
+                                                   translate($tokens[$i + 4],
+                                                             '$',''))
+                                       else $bit
+                             else ()
+                  return [$singles,$ranges,$externals]
+             else ()"/>
+ </xsl:function>
  <xsl:template match="/">
   <refs sheetName="{$sheet-name}"><xsl:apply-templates select="//s:c"/></refs>
  <xsl:template match="s:c[s:f]">
-  <xsl:variable name="tokens" select="analyze-string(s:f/.,$pat)/xf:match/xf:group"/>
-  <xsl:if test="@r='A2'"><xsl:message><xsl:value-of select="$tokens/@nr"/></xsl:message>
-<xsl:message><xsl:value-of select="$tokens/."/></xsl:message>
+  <xsl:variable name="tokens" select="e:tokenise(s:f/.)"/>
+  <xsl:if test="@r='xxx'"><xsl:message>|</xsl:message>
-  <xsl:if test="$tokens[@nr=(7,8,9)]">
-   <xsl:variable name="n" select="count($tokens)"/>
-   <xsl:variable name="singles" select="for $i in (1 to $n)
-       return if ($tokens[$i][@nr=(8,9)] and
-                  not($tokens[$i - 1][@nr=10 and
-                      .=(':','!')]) and
-                  not($tokens[$i + 1][@nr=10 and .=':']))
-            then translate($tokens[$i],'$','')
-            else ()"/>
+  <xsl:if test="count($tokens)>0">
+   <xsl:variable name="singles" select="$tokens?1"/>
    <!-- Note that we don't bother to treat external ranges as ranges,
           since we're not going to try to detect cross-document refs -->
-   <xsl:variable name="ranges" select="for $i in (1 to count($tokens))
-          return if ($tokens[$i][@nr=10 and .=':' and
-                                not($i gt 2 and
-                                    $tokens[$i - 2][@nr=10 and .='!'])])
-            then translate(concat($tokens[$i - 1],':',$tokens[$i + 1]),'$','')
-            else ()"/>
-   <xsl:variable name="externals" select="for $i in (1 to count($tokens))
-          return if ($tokens[$i][@nr=7])
-            then 
-              let $bit := concat($tokens[$i],'!',
-                                 translate($tokens[$i + 2],'$',''))
-              return if ((($i+3) le $n) and
-                         $tokens[$i + 3][@nr=10 and .=':'])
-                then concat($bit,':',translate($tokens[$i + 4],'$',''))
-                else $bit
-            else ()"/>
+   <xsl:variable name="ranges" select="$tokens?2"/>
+   <xsl:variable name="externals" select="$tokens?3"/>
    <ref c="{@r}">
     <xsl:for-each select="distinct-values($singles)">
      <s><xsl:value-of select="."/></s>