Mercurial > hg > ooxml
changeset 28:c56a2e6990bd
convert tokenisation to a function, so can make recursive
author | Henry S. Thompson <ht@markup.co.uk> |
---|---|
date | Tue, 11 Apr 2017 12:28:44 +0100 |
parents | 8309dcfce613 |
children | 87ed04a0fde2 |
files | notes.txt refs.xsl |
diffstat | 2 files changed, 49 insertions(+), 28 deletions(-) [+] |
line wrap: on
line diff
--- a/notes.txt Mon Apr 10 17:29:24 2017 +0100 +++ b/notes.txt Tue Apr 11 12:28:44 2017 +0100 @@ -103,6 +103,11 @@ </ddeItem> Whew! ---------- +http://upcommons.upc.edu/bitstream/handle/2117/100584/KDIR_2016_47_CR.pdf +[downloaded] +uses appearance a lot. That needs to be harvested from styles.xml +The kenneth_lay enron sample has _403_ numbered formats... + Tried the largest sheet from the largest .xlsx I could find: fuse1k/'benjamin_rogers__1002__NYISO Price Information version 2'.xlsx -rw-r--r-- 1 ht None 6273325 Apr 3 16:22 '../benjamin_rogers__1002__NYISO Price Information version 2.xlsx'
--- a/refs.xsl Mon Apr 10 17:29:24 2017 +0100 +++ b/refs.xsl Tue Apr 11 12:28:44 2017 +0100 @@ -10,42 +10,58 @@ <xsl:variable name="workbook" select="document(concat($xlDir,'/workbook.xml'))/*"/> <xsl:variable name="sheet-name" select="$workbook/s:sheets/s:sheet[@sheetId=$sheet-number]/@name"/> + <xsl:function name="e:tokenise" as="array(xs:string*)*"> + <xsl:param name="formula" as="xs:string" required="yes"/> + <xsl:sequence select=" + let $tokens := analyze-string($formula,$pat)/xf:match/xf:group + return if ($tokens[@nr=(7,8,9)]) + then + let $n := count($tokens), + $singles := for $i in (1 to $n) + return if ($tokens[$i][@nr=(8,9)] and + not($tokens[$i - 1][@nr=10 and + .=(':','!')]) and + not($tokens[$i + 1][@nr=10 and .=':'])) + then translate($tokens[$i],'$','') + else (), + $ranges := for $i in (1 to count($tokens)) + return if ($tokens[$i][@nr=10 and .=':' and + not($i gt 2 and + $tokens[$i - 2][@nr=10 and .='!'])]) + then translate(concat($tokens[$i - 1],':', + $tokens[$i + 1]),'$','') + else (), + $externals := for $i in (1 to count($tokens)) + return if ($tokens[$i][@nr=7]) + then + let $bit := concat($tokens[$i],'!', + translate($tokens[$i + 2], + '$','')) + return if ((($i+3) le $n) and + $tokens[$i + 3][@nr=10 and .=':']) + then concat($bit,':', + translate($tokens[$i + 4], + '$','')) + else $bit + else () + return [$singles,$ranges,$externals] + else ()"/> + </xsl:function> + <xsl:template match="/"> <refs sheetName="{$sheet-name}"><xsl:apply-templates select="//s:c"/></refs> </xsl:template> <xsl:template match="s:c[s:f]"> - <xsl:variable name="tokens" select="analyze-string(s:f/.,$pat)/xf:match/xf:group"/> - <xsl:if test="@r='A2'"><xsl:message><xsl:value-of select="$tokens/@nr"/></xsl:message> -<xsl:message><xsl:value-of select="$tokens/."/></xsl:message> + <xsl:variable name="tokens" select="e:tokenise(s:f/.)"/> + <xsl:if test="@r='xxx'"><xsl:message>|</xsl:message> </xsl:if> - <xsl:if test="$tokens[@nr=(7,8,9)]"> - <xsl:variable name="n" select="count($tokens)"/> - <xsl:variable name="singles" select="for $i in (1 to $n) - return if ($tokens[$i][@nr=(8,9)] and - not($tokens[$i - 1][@nr=10 and - .=(':','!')]) and - not($tokens[$i + 1][@nr=10 and .=':'])) - then translate($tokens[$i],'$','') - else ()"/> + <xsl:if test="count($tokens)>0"> + <xsl:variable name="singles" select="$tokens?1"/> <!-- Note that we don't bother to treat external ranges as ranges, since we're not going to try to detect cross-document refs --> - <xsl:variable name="ranges" select="for $i in (1 to count($tokens)) - return if ($tokens[$i][@nr=10 and .=':' and - not($i gt 2 and - $tokens[$i - 2][@nr=10 and .='!'])]) - then translate(concat($tokens[$i - 1],':',$tokens[$i + 1]),'$','') - else ()"/> - <xsl:variable name="externals" select="for $i in (1 to count($tokens)) - return if ($tokens[$i][@nr=7]) - then - let $bit := concat($tokens[$i],'!', - translate($tokens[$i + 2],'$','')) - return if ((($i+3) le $n) and - $tokens[$i + 3][@nr=10 and .=':']) - then concat($bit,':',translate($tokens[$i + 4],'$','')) - else $bit - else ()"/> + <xsl:variable name="ranges" select="$tokens?2"/> + <xsl:variable name="externals" select="$tokens?3"/> <ref c="{@r}"> <xsl:for-each select="distinct-values($singles)"> <s><xsl:value-of select="."/></s>