# HG changeset patch # User Henry S. Thompson # Date 1491926619 -3600 # Node ID 16eff0d30d4d51873d9ca070bfae09d1a5c3d9c2 # Parent 87ed04a0fde223955f2d8687cc6c6c79e7992e2c tidied dereferencing, added simple (no recursion) coverage for variables in ranges diff -r 87ed04a0fde2 -r 16eff0d30d4d notes.txt --- a/notes.txt Tue Apr 11 14:33:14 2017 +0100 +++ b/notes.txt Tue Apr 11 17:03:39 2017 +0100 @@ -60,12 +60,15 @@ Using attributes to hold space-separated lists is risky, as in refs.xsl output, is risky! Fixed, see below. ----------- -Not handling variables as references. Not catching external -references to variables. Not catching naked [n]! as external -references. - Fixed, but not dereferenced vars +Not handling variables as references FIXED. Not catching external +references to variables FIXED (as externals). Not catching naked [n]! as external +references FIXED + Solo local vars are recursively dereferenced The definition table is in workbook.xml definedNames/definedName[@name=$name]/. Sheet name to filename mapping for locals is in workbook.xml sheets/sheet[@name=$sname]/@sheetId + Variables on l or r of ranges are just looked up: if they are complex + no recursion is done: the _semantics_ of this case are not clear to + me, need a real-life example... ----------- Switch to default namespace in order to reduce size and improve readability, and to elements instead of attributes DONE @@ -74,7 +77,7 @@ distinct-values of all targets to all the cells which use them (likewise ranges) DONE. That really does mean we should move to elts for each ref or range, since at this point we want to compute vector -representation as well, so we can identify projections +representation as well DONE, so we can identify projections Slightly irritating that we'll have to serialise this as XML and then re-build it later... @@ -102,12 +105,13 @@ Whew! +FIXED ---------- http://upcommons.upc.edu/bitstream/handle/2117/100584/KDIR_2016_47_CR.pdf [downloaded] uses appearance a lot. That needs to be harvested from styles.xml The kenneth_lay enron sample has _403_ numbered formats... - +---------- Tried the largest sheet from the largest .xlsx I could find: fuse1k/'benjamin_rogers__1002__NYISO Price Information version 2'.xlsx -rw-r--r-- 1 ht None 6273325 Apr 3 16:22 '../benjamin_rogers__1002__NYISO Price Information version 2.xlsx' diff -r 87ed04a0fde2 -r 16eff0d30d4d refs.xsl --- a/refs.xsl Tue Apr 11 14:33:14 2017 +0100 +++ b/refs.xsl Tue Apr 11 17:03:39 2017 +0100 @@ -28,42 +28,55 @@ return if ($tokens[@nr=(7,8,9)]) then let $n := count($tokens), - $vars := for $i in (1 to $n) - return if ($tokens[$i][@nr=9] and - not($tokens[$i - 1][@nr=10 and - .=(':','!')]) and - not($tokens[$i + 1][@nr=10 and .=':'])) - then string($tokens[$i]) - else (), + $vars := for $i in (1 to $n) return + let $t := $tokens[$i], + $l := $tokens[$i - 1], + $r := $tokens[$i + 1] return + if ($t/@nr=9 and + not($l[@nr=10 and + .=(':','!')]) and + not($r[@nr=10 and .=':'])) + then string($t) + else (), $defns := for $var in $vars return e:lookup($var), $recur := for $sub in $defns return if ($sub) then e:tokenise($defns) else (), - $singles := for $i in (1 to $n) - return if ($tokens[$i][@nr=8] and - not($tokens[$i - 1][@nr=10 and - .=(':','!')]) and - not($tokens[$i + 1][@nr=10 and .=':'])) - then translate($tokens[$i],'$','') + $singles := for $i in (1 to $n) return + let $t := $tokens[$i], + $l := $tokens[$i - 1], + $r := $tokens[$i + 1] return + if ($t/@nr=8 and + not($l[@nr=10 and + .=(':','!')]) and + not($r[@nr=10 and .=':'])) + then translate($t,'$','') else (), - $ranges := for $i in (1 to count($tokens)) - return if ($tokens[$i][@nr=10 and .=':' and - not($i gt 2 and - $tokens[$i - 2][@nr=10 and .='!'])]) - then translate(concat($tokens[$i - 1],':', - $tokens[$i + 1]),'$','') + $ranges := for $i in (1 to count($tokens)) return + let $t := $tokens[$i] return + if ($t[@nr=10 and .=':' and + not($i gt 2 and + $tokens[$i - 2][@nr=10 and .='!'])]) + then let $l := $tokens[$i - 1], + $r := $tokens[$i + 1], + $l1 := if ($l/@nr=9) then e:lookup($l) + else $l, + $r1 := if ($r/@nr=9) then e:lookup($r) + else $r + return translate(concat($l1,':',$r1), + '$','') else (), - $externals := for $i in (1 to count($tokens)) - return if ($tokens[$i][@nr=7]) + $externals := for $i in (1 to count($tokens)) return + let $t := $tokens[$i] return + if ($t/@nr=7) then - let $bit := concat($tokens[$i],'!', + let $bit := concat($t,'!', translate($tokens[$i + 2], - '$','')) - return if ((($i+3) le $n) and - $tokens[$i + 3][@nr=10 and .=':']) - then concat($bit,':', - translate($tokens[$i + 4], - '$','')) - else $bit + '$','')) return + if ((($i+3) le $n) and + $tokens[$i + 3][@nr=10 and .=':']) + then concat($bit,':', + translate($tokens[$i + 4],'$','')) + else $bit else () return [($singles,for $a in $recur return $a?1), ($ranges,for $a in $recur return $a?2),