Mercurial > hg > ooxml
changeset 30:16eff0d30d4d
tidied dereferencing, added simple (no recursion) coverage for variables in ranges
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Tue, 11 Apr 2017 17:03:39 +0100 |
parents | 87ed04a0fde2 |
children | 6270bef9b5d4 |
files | notes.txt refs.xsl |
diffstat | 2 files changed, 52 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/notes.txt Tue Apr 11 14:33:14 2017 +0100 +++ b/notes.txt Tue Apr 11 17:03:39 2017 +0100 @@ -60,12 +60,15 @@ Using attributes to hold space-separated lists is risky, as in refs.xsl output, is risky! Fixed, see below. ----------- -Not handling variables as references. Not catching external -references to variables. Not catching naked [n]! as external -references. - Fixed, but not dereferenced vars +Not handling variables as references FIXED. Not catching external +references to variables FIXED (as externals). Not catching naked [n]! as external +references FIXED + Solo local vars are recursively dereferenced The definition table is in workbook.xml definedNames/definedName[@name=$name]/. Sheet name to filename mapping for locals is in workbook.xml sheets/sheet[@name=$sname]/@sheetId + Variables on l or r of ranges are just looked up: if they are complex + no recursion is done: the _semantics_ of this case are not clear to + me, need a real-life example... ----------- Switch to default namespace in order to reduce size and improve readability, and to elements instead of attributes DONE @@ -74,7 +77,7 @@ distinct-values of all targets to all the cells which use them (likewise ranges) DONE. That really does mean we should move to elts for each ref or range, since at this point we want to compute vector -representation as well, so we can identify projections +representation as well DONE, so we can identify projections Slightly irritating that we'll have to serialise this as XML and then re-build it later... @@ -102,12 +105,13 @@ </values> </ddeItem> Whew! +FIXED ---------- http://upcommons.upc.edu/bitstream/handle/2117/100584/KDIR_2016_47_CR.pdf [downloaded] uses appearance a lot. That needs to be harvested from styles.xml The kenneth_lay enron sample has _403_ numbered formats... - +---------- Tried the largest sheet from the largest .xlsx I could find: fuse1k/'benjamin_rogers__1002__NYISO Price Information version 2'.xlsx -rw-r--r-- 1 ht None 6273325 Apr 3 16:22 '../benjamin_rogers__1002__NYISO Price Information version 2.xlsx'
--- a/refs.xsl Tue Apr 11 14:33:14 2017 +0100 +++ b/refs.xsl Tue Apr 11 17:03:39 2017 +0100 @@ -28,42 +28,55 @@ return if ($tokens[@nr=(7,8,9)]) then let $n := count($tokens), - $vars := for $i in (1 to $n) - return if ($tokens[$i][@nr=9] and - not($tokens[$i - 1][@nr=10 and - .=(':','!')]) and - not($tokens[$i + 1][@nr=10 and .=':'])) - then string($tokens[$i]) - else (), + $vars := for $i in (1 to $n) return + let $t := $tokens[$i], + $l := $tokens[$i - 1], + $r := $tokens[$i + 1] return + if ($t/@nr=9 and + not($l[@nr=10 and + .=(':','!')]) and + not($r[@nr=10 and .=':'])) + then string($t) + else (), $defns := for $var in $vars return e:lookup($var), $recur := for $sub in $defns return if ($sub) then e:tokenise($defns) else (), - $singles := for $i in (1 to $n) - return if ($tokens[$i][@nr=8] and - not($tokens[$i - 1][@nr=10 and - .=(':','!')]) and - not($tokens[$i + 1][@nr=10 and .=':'])) - then translate($tokens[$i],'$','') + $singles := for $i in (1 to $n) return + let $t := $tokens[$i], + $l := $tokens[$i - 1], + $r := $tokens[$i + 1] return + if ($t/@nr=8 and + not($l[@nr=10 and + .=(':','!')]) and + not($r[@nr=10 and .=':'])) + then translate($t,'$','') else (), - $ranges := for $i in (1 to count($tokens)) - return if ($tokens[$i][@nr=10 and .=':' and - not($i gt 2 and - $tokens[$i - 2][@nr=10 and .='!'])]) - then translate(concat($tokens[$i - 1],':', - $tokens[$i + 1]),'$','') + $ranges := for $i in (1 to count($tokens)) return + let $t := $tokens[$i] return + if ($t[@nr=10 and .=':' and + not($i gt 2 and + $tokens[$i - 2][@nr=10 and .='!'])]) + then let $l := $tokens[$i - 1], + $r := $tokens[$i + 1], + $l1 := if ($l/@nr=9) then e:lookup($l) + else $l, + $r1 := if ($r/@nr=9) then e:lookup($r) + else $r + return translate(concat($l1,':',$r1), + '$','') else (), - $externals := for $i in (1 to count($tokens)) - return if ($tokens[$i][@nr=7]) + $externals := for $i in (1 to count($tokens)) return + let $t := $tokens[$i] return + if ($t/@nr=7) then - let $bit := concat($tokens[$i],'!', + let $bit := concat($t,'!', translate($tokens[$i + 2], - '$','')) - return if ((($i+3) le $n) and - $tokens[$i + 3][@nr=10 and .=':']) - then concat($bit,':', - translate($tokens[$i + 4], - '$','')) - else $bit + '$','')) return + if ((($i+3) le $n) and + $tokens[$i + 3][@nr=10 and .=':']) + then concat($bit,':', + translate($tokens[$i + 4],'$','')) + else $bit else () return [($singles,for $a in $recur return $a?1), ($ranges,for $a in $recur return $a?2),