changeset 27:8309dcfce613

preparing for variable deref
author Henry S. Thompson <ht@markup.co.uk>
date Mon, 10 Apr 2017 17:29:24 +0100
parents d2ca3ea1f5ae
children c56a2e6990bd
files format.xsl notes.txt rect.xsl refs.xsl visualise.xpl
diffstat 5 files changed, 53 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/format.xsl	Fri Apr 07 18:42:47 2017 +0100
+++ b/format.xsl	Mon Apr 10 17:29:24 2017 +0100
@@ -2,9 +2,9 @@
 <!DOCTYPE doc SYSTEM "../../../lib/xml/xsl.dtd" >
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0" xmlns:s="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:e="http://markup.co.uk/excel" exclude-result-prefixes="xs s" xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
  <xsl:param name="elabDir"/>
- <xsl:variable name="xldir" select="replace(base-uri(),'/[^/]*/[^/]*$','')"/>
+ <xsl:param name="xlDir"/>
  <xsl:variable name="fmts" select="document(concat($elabDir,'/fmt.xml'))/fmts/fmt"/>
- <xsl:variable name="styles" select="document(concat($xldir,'/styles.xml'))/*"/>
+ <xsl:variable name="styles" select="document(concat($xlDir,'/styles.xml'))/*"/>
  <xsl:variable name="xfs" select="$styles//s:cellXfs/s:xf"/>
  <xsl:variable name="nfs" select="$styles//s:numFmts/s:numFmt"/>
 
--- a/notes.txt	Fri Apr 07 18:42:47 2017 +0100
+++ b/notes.txt	Mon Apr 10 17:29:24 2017 +0100
@@ -28,22 +28,34 @@
     'D6' --> <R rc='D' rr='6'/>
      and
     '$E5' --> <R ac='E' rr='5'/>
+No, in fact -- absolute vs. 'variable' isn't relevant for our purposes.
+What we probably _do_ want is to add to every reference a _relative_
+version, i.e. +/-columnDelta, +/-rowDelta
 --------
 Identifying dates is . . . tedious.  They will be ints or floats (?),
 with s="<int>", where the int is a 0-origin index into the list of
 <xf...numFmtId="<bin>".../>
 children of <cellXfs> in styles.xml, and bin is a built-in date format
 code, see 18.8.30 numFmt (Number Format) in ISO/IEC 29500-1:2016(E) ==
-C071691e.pdf
+C071691e.pdf  DONE
 ---------
 Decided to distinguish between type (num, date, str, err, ...) and
-class (cur, others to come?).  If non-standard code, just record that.
+class (cur(rency), others to come?).  If non-standard code, just record
+that.
+  The current pipe has two main steps, followed by an optional
+  prettifying step:
+    format.xsl (extracts type={bool,date,num,str,err}
+                         class={cur,[nothing else yet]}
+                         code={raw format code if not recognised}
+    rect.xsl   (fills in gaps, cuts down size, using only bdnse for
+                <t>[ype] with attrs c[lass]={c,...} and [co]d[e]=...
+ For now, just using first letters of type, class DONE
 ----------
 Hmm, looking at real data (kenneth_lay__19506), I see _lots_ of cells
 with (numerical) formats, but no content.  Where do I throw those
 away?  Can throw away empty _rows_ in rect.xsl, but for _cells_ have
 to wait for ascii.xsl or html.xsl.  But only copy type in in rect if
-there was content before.
+there was content before. DONE
 -----------
 Using attributes to hold space-separated lists is risky, as in
 refs.xsl output, is risky!  Fixed, see below.
@@ -56,11 +68,11 @@
   Sheet name to filename mapping for locals is in workbook.xml sheets/sheet[@name=$sname]/@sheetId
 -----------
 Switch to default namespace in order to reduce size and improve
-readability, and to elements instead of attributes
+readability, and to elements instead of attributes DONE
 -----------
 Should put another step after refs.xsl to compute a map from
 distinct-values of all targets to all the cells which use them
-(likewise ranges).  That really does mean we should move to elts for
+(likewise ranges) DONE. That really does mean we should move to elts for
 each ref or range, since at this point we want to compute vector
 representation as well, so we can identify projections
 
--- a/rect.xsl	Fri Apr 07 18:42:47 2017 +0100
+++ b/rect.xsl	Mon Apr 10 17:29:24 2017 +0100
@@ -39,7 +39,6 @@
  </xsl:template>
 
  <xsl:template match="s:sheetData">
-  <xsl:message><xsl:value-of select="count($refs/*)"/></xsl:message>
   <xsl:if test="s:row">
    <xsl:variable name="dims" select="analyze-string($dim,'([A-Z]+)([0-9]+):([A-Z]+)([0-9]+)')"/> 
    <xsl:variable name="height" select="1+xs:integer($dims//xpf:group[@nr='4'])-xs:integer($dims//xpf:group[@nr='2'])"/>
@@ -59,7 +58,13 @@
         <xsl:variable name="reffed" select="key('ref',$r,$refs)"/>
        <c c="{$col}">
         <xsl:if test="$c/s:f"><xsl:attribute name="f">1</xsl:attribute></xsl:if>
-        <xsl:if test="$c/*"><t><xsl:value-of select="substring($c/@e:type,1,1)"/></t></xsl:if>
+        <xsl:if test="$c/*">
+         <t>
+          <xsl:if test="$c/@e:class"><xsl:attribute name="c"><xsl:value-of select="substring($c/@e:class,1,1)"/></xsl:attribute></xsl:if>
+          <xsl:if test="$c/@e:code"><xsl:attribute name="l"><xsl:value-of select="$c/@e:code"/></xsl:attribute></xsl:if>
+          <xsl:value-of select="substring($c/@e:type,1,1)"/>
+         </t>
+        </xsl:if>
         <xsl:if test="$reffed"><xsl:copy-of select="$reffed/e:r"/></xsl:if>
        </c>
       </xsl:for-each></xsl:if>
--- a/refs.xsl	Fri Apr 07 18:42:47 2017 +0100
+++ b/refs.xsl	Mon Apr 10 17:29:24 2017 +0100
@@ -1,16 +1,22 @@
 <?xml version='1.0'?>
 <!DOCTYPE doc SYSTEM "../../../lib/xml/xsl.dtd" >
 <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="3.0" xmlns:s="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:e="http://markup.co.uk/excel" exclude-result-prefixes="xs s e xf" xmlns="http://markup.co.uk/excel" xmlns:xf="http://www.w3.org/2005/xpath-functions">
+ <xsl:param name="sheet-number"/>
+ <xsl:param name="xlDir"/>
+
   <xsl:variable name="pat1">("[^"]*")|(\{[^}]+})|(,)|([^=\-+*/();:,.$&lt;>^!]+(?:\.[^=\-+*/();:,.$&lt;>^!]+)*\()|([)])|(^=|\()|((?:'[^']+')|(?:\[[0-9]+\][^!]*))|(\$?[A-Z]+\$?[0-9]+)|([a-zA-Z_\\][a-zA-Z0-9._]*)|(.)</xsl:variable>
- <xsl:param name="pat" select="$pat1"/>
+ <xsl:param name="pat" select="$pat1"/><!-- xsl:param for refinement debugging by passing in the pattern -->
+ 
+ <xsl:variable name="workbook" select="document(concat($xlDir,'/workbook.xml'))/*"/>
+ <xsl:variable name="sheet-name" select="$workbook/s:sheets/s:sheet[@sheetId=$sheet-number]/@name"/>
 
  <xsl:template match="/">
-  <refs><xsl:apply-templates select="//s:c"/></refs>
+  <refs sheetName="{$sheet-name}"><xsl:apply-templates select="//s:c"/></refs>
  </xsl:template>
  
  <xsl:template match="s:c[s:f]">
   <xsl:variable name="tokens" select="analyze-string(s:f/.,$pat)/xf:match/xf:group"/>
-  <xsl:if test="@r='xxx'"><xsl:message><xsl:value-of select="$tokens/@nr"/></xsl:message>
+  <xsl:if test="@r='A2'"><xsl:message><xsl:value-of select="$tokens/@nr"/></xsl:message>
 <xsl:message><xsl:value-of select="$tokens/."/></xsl:message>
   </xsl:if>
   <xsl:if test="$tokens[@nr=(7,8,9)]">
--- a/visualise.xpl	Fri Apr 07 18:42:47 2017 +0100
+++ b/visualise.xpl	Mon Apr 10 17:29:24 2017 +0100
@@ -1,6 +1,7 @@
 <?xml version='1.0'?>
 <!DOCTYPE p:pipeline SYSTEM "../../../WWW/XML/XProc/docs/schemas/xproc.dtd" >
-<p:pipeline xmlns:p="http://www.w3.org/ns/xproc" xmlns:x="http://www.w3.org/1999/xhtml" version="1.0" name="vis">
+<p:pipeline xmlns:p="http://www.w3.org/ns/xproc" xmlns:x="http://www.w3.org/1999/xhtml" version="1.0" name="vis" xmlns:xf="http://www.w3.org/2005/xpath-functions" xmlns:cx="http://xmlcalabash.com/ns/extensions">
+ <p:import href="http://xmlcalabash.com/extension/steps/library-1.0.xpl"/>
  
  <p:documentation xmlns="http://www.w3.org/1999/xhtml">
   <div>Run this e.g. as follows:
@@ -20,6 +21,19 @@
   </p:documentation>
  </p:option>
  
+ <p:variable name="base" select="base-uri()">
+  <p:pipe step="vis" port="source"/>
+ </p:variable>
+ <p:variable name="buPat" select="'^(.*/xl/)worksheets/sheet([^/]*).xml$'"/>
+ 
+ <!-- Will only work in XProc 3 :-( <p:variable name="baseParts" select="analyze-string($base,)"/>-->
+ 
+ <p:variable name="root" select="analyze-string($base,$buPat)/xf:match/xf:group[@nr=1]"/>
+ <p:variable name="sheet-number" select="analyze-string($base,$buPat)/xf:match/xf:group[@nr=2]"/>
+ <!--<cx:message>
+  <p:with-option name="message" select="concat($base,'|',$sheet-number,'|')"/>
+ </cx:message>-->
+ 
  <p:load name="ss1">
   <p:with-option name="href" select="concat($elabDir,'/format.xsl')"/>
   <p:documentation>
@@ -35,6 +49,7 @@
    <p:pipe step="ss1" port="result"/>
   </p:input>
   <p:with-param name="elabDir" select="$elabDir"/>
+  <p:with-param name="xlDir" select="$root"/>
  </p:xslt> 
  
  <p:load name="ssr">
@@ -51,6 +66,8 @@
   <p:input port="stylesheet">
      <p:pipe step="ssr" port="result"/>
     </p:input>
+  <p:with-param name="sheet-number" select="$sheet-number"/>
+  <p:with-param name="xlDir" select="$root"/>
  </p:xslt>
  
  <p:load name="ssr2">