Mercurial > hg > ooxml
changeset 23:bfa38afaea63
change to default ns
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Thu, 06 Apr 2017 16:47:53 +0100 |
parents | ca98c74a7cb1 |
children | 87e0d620deea |
files | notes.txt refs.xsl |
diffstat | 2 files changed, 83 insertions(+), 6 deletions(-) [+] |
line wrap: on
line diff
--- a/notes.txt Wed Apr 05 11:57:00 2017 +0100 +++ b/notes.txt Thu Apr 06 16:47:53 2017 +0100 @@ -45,10 +45,74 @@ to wait for ascii.xsl or html.xsl. But only copy type in in rect if there was content before. ----------- +Using attributes to hold space-separated lists is risky, as in +refs.xsl output, is risky! +----------- Not handling variables as references. Not catching external references to variables. Not catching naked [n]! as external references. Fixed, but not dereferenced vars The definition table is in workbook.xml definedNames/definedName[@name=$name]/. Sheet name to filename mapping for locals is in workbook.xml sheets/sheet[@name=$sname]/@sheetId - +----------- +Switch to default namespace in order to reduce size and improve readability +----------- +Should put another step after refs.xsl to compute a map from +distinct-values of all targets to all the cells which use them +(likewise ranges). That really does mean we should move to elts for +each ref or range, since at this point we want to compute vector +representation as well, so we can identify projections + +Slightly irritating that we'll have to serialise this as XML and then +re-build it later... +----------- + Overgenerating in kenneth_lay__19506: e.g. <e:ref c="E9" er="[1]!'.SPX' '.SPX'!"/> + from <f>[1]!'.SPX'</f> + Hmm. This cell displays in Excel as REUTERS|IDN!.SPX + The indirections work as follows: + in workbook.xml: + <externalReferences> + <externalReference r:id="rId3"/> + <externalReference r:id="rId4"/> + </externalReferences> + in _rels/workbook.xml.rels + <Relationship Id="rId3" Target="externalLinks/externalLink1.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/externalLink"/> + in externalLinks/externalLink1.xml + <ddeLink ddeService="REUTER" ddeTopic="IDN"... + <ddeItems> + ... + <ddeItem advise="1" name=".SPX"> + <values> + <value> + <val>1264.96</val> + </value> + </values> + </ddeItem> + Whew! +---------- +Tried the largest sheet from the largest .xlsx I could find: + fuse1k/'benjamin_rogers__1002__NYISO Price Information version 2'.xlsx + -rw-r--r-- 1 ht None 6273325 Apr 3 16:22 '../benjamin_rogers__1002__NYISO Price Information version 2.xlsx' + -rw-r--r-- 1 ht None 23221149 Jan 1 1980 xl/worksheets/sheet3.xml + + > lxcount xl/worksheets/sheet3.xml | sort -k2nr + *Total* 1230217 + c 596032 + v 595876 + f 19201 + row 18985 + col 106 + + <dimension ref="A1:DY18985"/> + +Blew java out of the water :-( + java.lang.OutOfMemoryError: Java heap space + +Need to try again with more memory, if I remember how... + +The raw result is going to have 18985 x 102 == 2 million cells == +(assuming average cell size of 30 bytes and row overhead of 20 (* +18985 (+ 20 (* 102 30))) 58,473,800 bytes, which is big but tolerable... +---------------- +Back to ranges - +
--- a/refs.xsl Wed Apr 05 11:57:00 2017 +0100 +++ b/refs.xsl Thu Apr 06 16:47:53 2017 +0100 @@ -1,11 +1,11 @@ <?xml version='1.0'?> <!DOCTYPE doc SYSTEM "../../../lib/xml/xsl.dtd" > -<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0" xmlns:s="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:e="http://markup.co.uk/excel" exclude-result-prefixes="xs s" xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:xf="http://www.w3.org/2005/xpath-functions"> +<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0" xmlns:s="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:e="http://markup.co.uk/excel" exclude-result-prefixes="xs s e xf" xmlns="http://markup.co.uk/excel" xmlns:xf="http://www.w3.org/2005/xpath-functions"> <xsl:variable name="pat1">("[^"]*")|(\{[^}]+})|(,)|([^=\-+*/();:,.$<>^!]+(?:\.[^=\-+*/();:,.$<>^!]+)*\()|([)])|(^=|\()|((?:'[^']+')|(?:\[[0-9]+\][^!]*))|(\$?[A-Z]+\$?[0-9]+)|([a-zA-Z_\\][a-zA-Z0-9._]*)|(.)</xsl:variable> <xsl:param name="pat" select="$pat1"/> <xsl:template match="/"> - <e:refs><xsl:apply-templates select="//s:c"/></e:refs> + <refs><xsl:apply-templates select="//s:c"/></refs> </xsl:template> <xsl:template match="s:c[s:f]"> @@ -16,9 +16,14 @@ <xsl:if test="$tokens[@nr=(7,8,9)]"> <xsl:variable name="n" select="count($tokens)"/> <xsl:variable name="singles" select="for $i in (1 to $n) - return if ($tokens[$i][@nr=(8,9)] and not($tokens[$i - 1][@nr=10 and .=(':','!')]) and not($tokens[$i + 1][@nr=10 and .=':'])) + return if ($tokens[$i][@nr=(8,9)] and + not($tokens[$i - 1][@nr=10 and + .=(':','!')]) and + not($tokens[$i + 1][@nr=10 and .=':'])) then translate($tokens[$i],'$','') else ()"/> + <!-- Note that we don't bother to treat external ranges as ranges, + since we're not going to try to detect cross-document refs --> <xsl:variable name="ranges" select="for $i in (1 to count($tokens)) return if ($tokens[$i][@nr=10 and .=':' and not($i gt 2 and @@ -30,11 +35,19 @@ then let $bit := concat($tokens[$i],'!', translate($tokens[$i + 2],'$','')) - return if ((($i+3) le $n) and $tokens[$i + 3][@nr=10 and .=':']) + return if ((($i+3) le $n) and + $tokens[$i + 3][@nr=10 and .=':']) then concat($bit,':',translate($tokens[$i + 4],'$','')) else $bit else ()"/> - <e:ref c="{@r}" r="{$singles}" rr="{$ranges}" er="{$externals}"/></xsl:if> + <ref c="{@r}"> + <!-- Assumes that space doesn't occur in variable names + Might occur in external names (who knows!) but I'm assuming we're + never going to split the value of @er back out... --> + <xsl:if test="count($singles)>0"><xsl:attribute name="r"><xsl:value-of select="distinct-values($singles)"/></xsl:attribute></xsl:if> + <xsl:if test="count($ranges)>0"><xsl:attribute name="rr"><xsl:value-of select="distinct-values($ranges)"/></xsl:attribute></xsl:if> + <xsl:if test="count($externals)>0"><xsl:attribute name="er"><xsl:value-of select="distinct-values($externals)"/></xsl:attribute></xsl:if> + </ref></xsl:if> </xsl:template> <xsl:template match="s:c"/>