changeset 30:16eff0d30d4d

tidied dereferencing, added simple (no recursion) coverage for variables in ranges
author Henry S. Thompson <ht@markup.co.uk>
date Tue, 11 Apr 2017 17:03:39 +0100
parents 87ed04a0fde2
children 6270bef9b5d4
files notes.txt refs.xsl
diffstat 2 files changed, 52 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/notes.txt	Tue Apr 11 14:33:14 2017 +0100
+++ b/notes.txt	Tue Apr 11 17:03:39 2017 +0100
@@ -60,12 +60,15 @@
 Using attributes to hold space-separated lists is risky, as in
 refs.xsl output, is risky!  Fixed, see below.
 -----------
-Not handling variables as references.  Not catching external
-references to variables.  Not catching naked [n]! as external
-references.
- Fixed, but not dereferenced vars
+Not handling variables as references FIXED.  Not catching external
+references to variables FIXED (as externals).  Not catching naked [n]! as external
+references FIXED
+ Solo local vars are recursively dereferenced
  The definition table is in workbook.xml definedNames/definedName[@name=$name]/.
   Sheet name to filename mapping for locals is in workbook.xml sheets/sheet[@name=$sname]/@sheetId
+ Variables on l or r of ranges are just looked up: if they are complex
+  no recursion is done: the _semantics_ of this case are not clear to
+  me, need a real-life example... 
 -----------
 Switch to default namespace in order to reduce size and improve
 readability, and to elements instead of attributes DONE
@@ -74,7 +77,7 @@
 distinct-values of all targets to all the cells which use them
 (likewise ranges) DONE. That really does mean we should move to elts for
 each ref or range, since at this point we want to compute vector
-representation as well, so we can identify projections
+representation as well DONE, so we can identify projections
 
 Slightly irritating that we'll have to serialise this as XML and then
 re-build it later...
@@ -102,12 +105,13 @@
 	  </values>
 	  </ddeItem>
        Whew!
+FIXED
 ----------
 http://upcommons.upc.edu/bitstream/handle/2117/100584/KDIR_2016_47_CR.pdf
 [downloaded]
 uses appearance a lot.  That needs to be harvested from styles.xml
 The kenneth_lay enron sample has _403_ numbered formats...
-
+----------
 Tried the largest sheet from the largest .xlsx I could find:
   fuse1k/'benjamin_rogers__1002__NYISO Price Information version 2'.xlsx
    -rw-r--r-- 1 ht None  6273325 Apr  3 16:22 '../benjamin_rogers__1002__NYISO Price Information version 2.xlsx'
--- a/refs.xsl	Tue Apr 11 14:33:14 2017 +0100
+++ b/refs.xsl	Tue Apr 11 17:03:39 2017 +0100
@@ -28,42 +28,55 @@
      return if ($tokens[@nr=(7,8,9)])
              then 
               let $n := count($tokens),
-                  $vars := for $i in (1 to $n)
-                     return if ($tokens[$i][@nr=9] and
-                                not($tokens[$i - 1][@nr=10 and
-                                .=(':','!')]) and
-                                not($tokens[$i + 1][@nr=10 and .=':']))
-                             then string($tokens[$i])
-                             else (),
+                  $vars := for $i in (1 to $n) return
+                         let $t := $tokens[$i],
+                             $l := $tokens[$i - 1],
+                             $r := $tokens[$i + 1] return    
+                          if ($t/@nr=9 and
+                              not($l[@nr=10 and
+                                     .=(':','!')]) and
+                              not($r[@nr=10 and .=':']))
+                            then string($t)
+                            else (),
                   $defns := for $var in $vars return e:lookup($var),
                   $recur := for $sub in $defns 
                               return if ($sub) then e:tokenise($defns) else (),
-                  $singles := for $i in (1 to $n)
-                     return if ($tokens[$i][@nr=8] and
-                                not($tokens[$i - 1][@nr=10 and
-                                .=(':','!')]) and
-                                not($tokens[$i + 1][@nr=10 and .=':']))
-                             then translate($tokens[$i],'$','')
+                  $singles := for $i in (1 to $n) return
+                            let $t := $tokens[$i],
+                                $l := $tokens[$i - 1],
+                                $r := $tokens[$i + 1] return
+                            if ($t/@nr=8 and
+                                not($l[@nr=10 and
+                                       .=(':','!')]) and
+                                not($r[@nr=10 and .=':']))
+                             then translate($t,'$','')
                              else (),
-                  $ranges := for $i in (1 to count($tokens))
-                     return if ($tokens[$i][@nr=10 and .=':' and
-                                not($i gt 2 and
-                                    $tokens[$i - 2][@nr=10 and .='!'])])
-                             then translate(concat($tokens[$i - 1],':',
-                                                   $tokens[$i + 1]),'$','')
+                  $ranges := for $i in (1 to count($tokens)) return
+                            let $t := $tokens[$i] return
+                            if ($t[@nr=10 and .=':' and
+                                   not($i gt 2 and
+                                       $tokens[$i - 2][@nr=10 and .='!'])])
+                             then let $l := $tokens[$i - 1],
+                                      $r := $tokens[$i + 1],
+                                      $l1 := if ($l/@nr=9) then e:lookup($l)
+                                             else $l,
+                                      $r1 := if ($r/@nr=9) then e:lookup($r)
+                                             else $r
+                                      return translate(concat($l1,':',$r1),
+                                                       '$','')
                              else (),
-                  $externals := for $i in (1 to count($tokens))
-                     return if ($tokens[$i][@nr=7])
+                  $externals := for $i in (1 to count($tokens)) return
+                            let $t := $tokens[$i] return
+                            if ($t/@nr=7)
                              then 
-                              let $bit := concat($tokens[$i],'!',
+                              let $bit := concat($t,'!',
                                                  translate($tokens[$i + 2],
-                                                           '$',''))
-                               return if ((($i+3) le $n) and
-                                          $tokens[$i + 3][@nr=10 and .=':'])
-                                       then concat($bit,':',
-                                                   translate($tokens[$i + 4],
-                                                             '$',''))
-                                       else $bit
+                                                           '$','')) return
+                               if ((($i+3) le $n) and
+                                   $tokens[$i + 3][@nr=10 and .=':'])
+                                then concat($bit,':',
+                                            translate($tokens[$i + 4],'$',''))
+                                else $bit
                              else ()
                   return [($singles,for $a in $recur return $a?1),
                           ($ranges,for $a in $recur return $a?2),