# HG changeset patch # User Henry S. Thompson # Date 1493141404 -3600 # Node ID ac3cd8de7a1011dab6af0559bedf6362f5bb16cc # Parent ae605b77d1e4aadc2f44ff031d70aa2a1fc83acc towards big rework of tokenisation diff -r ae605b77d1e4 -r ac3cd8de7a10 notes.txt --- a/notes.txt Tue Apr 25 12:24:31 2017 +0100 +++ b/notes.txt Tue Apr 25 18:30:04 2017 +0100 @@ -1,3 +1,33 @@ +Tokenisation patterns, derived from parse.py, derived from + https://sites.google.com/site/e90e50/random-topics/tool-for-parsing-formulas-in-excel + and + parser_formule_with_textbox_v01_2003.xla + linked to therein + +1 ("[^"]*") q + A text (delimited by double quotes) +2 (\{[^}]+}) m + A constant matrix +3 (,) c + A list (function parameter) separator +4 ([^=\-+*/();:,.$<>^!]+(?:\.[^=\-+*/();:,.$<>^!]+)*\() f + A function name followed by an opening parenthesis +5 ([)]) p + A closing parenthesis +6 (^=|\() l + The beginning of the formula or an opening + parenthesis (not part of a function) +7 ((?:(?:'[^']+')|(?:\[[0-9]+\][^!]*)|(?:[a-zA-Z_][a-zA-Z0-9._]*)!)) n + A sheet name (either delimited by single quotes, or + bracketed number plus optional string, + or simple name (syntax is a _guess_)) +8 (\$?[A-Z]+\$?[0-9]+) s or r + A cell reference +9 ([a-zA-Z_\\][a-zA-Z0-9._]*) v + A name (always for a variable?) +10 (.) x + Single characters not matched by the previous patterns +---------- You can't depend on That is, it's _true_, but you can have a table with shared formulae @@ -65,7 +95,10 @@ references FIXED Solo local vars are recursively dereferenced The definition table is in workbook.xml definedNames/definedName[@name=$name]/. - Sheet name to filename mapping for locals is in workbook.xml sheets/sheet[@name=$sname]/@sheetId + Sheet name to filename mapping for locals is in workbook.xml + sheets/sheet[@name=$sname]/@sheetId + These appear in definedName, single-quoted if (iff?) the sheet name has spaces + (or other specials?) ??? Variables on l or r of ranges are just looked up: if they are complex no recursion is done: the _semantics_ of this case are not clear to me, need a real-life example... diff -r ae605b77d1e4 -r ac3cd8de7a10 refs.xsl --- a/refs.xsl Tue Apr 25 12:24:31 2017 +0100 +++ b/refs.xsl Tue Apr 25 18:30:04 2017 +0100 @@ -1,125 +1,6 @@ - + - - - - - - ("[^"]*")|(\{[^}]+})|(,)|([^=\-+*/();:,.$<>^!]+(?:\.[^=\-+*/();:,.$<>^!]+)*\()|([)])|(^=|\()|((?:'[^']+')|(?:\[[0-9]+\][^!]*))|(\$?[A-Z]+\$?[0-9]+)|([a-zA-Z_\\][a-zA-Z0-9._]*)|(.) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r ae605b77d1e4 -r ac3cd8de7a10 tokenise.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tokenise.xsl Tue Apr 25 18:30:04 2017 +0100 @@ -0,0 +1,138 @@ + + + + + + + + + ("[^"]*")|(\{[^}]+})|(,)|([^=\-+*/();:,.$<>^!]+(?:\.[^=\-+*/();:,.$<>^!]+)*\()|([)])|(^=|\()|((?:(?:'[^']+')|(?:\[[0-9]+\][^!]*)|(?:[a-zA-Z_][a-zA-Z0-9._]*)!))|(\$?[A-Z]+\$?[0-9]+)|([a-zA-Z_\\][a-zA-Z0-9._]*)|(.) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +