Mercurial > hg > xemacs-beta
changeset 5247:02d875ebd1ea
Make Lisp reader errors more informative with over-long hex, octal characters
src/ChangeLog addition:
2010-08-21 Aidan Kehoe <kehoea@parhasard.net>
* lread.c (read_escape):
Make error messages better reflect the text that was encountered,
when overlong hex character escapes or non-Latin-1 octal character
escapes are encountered.
man/ChangeLog addition:
2010-08-21 Aidan Kehoe <kehoea@parhasard.net>
* lispref/objects.texi (Character Type):
Go into more detail here on the specific type of error provoked on
overlong hex character escapes and non-Latin-1 octal character
escapes; give details of why the latter may be encountered, and
what to do with such code.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Sat, 21 Aug 2010 19:02:44 +0100 |
parents | 04811a268716 |
children | 9d8aaa5ac16e |
files | man/ChangeLog man/lispref/objects.texi src/ChangeLog src/lread.c |
diffstat | 4 files changed, 46 insertions(+), 17 deletions(-) [+] |
line wrap: on
line diff
--- a/man/ChangeLog Sun Aug 15 15:42:45 2010 +0100 +++ b/man/ChangeLog Sat Aug 21 19:02:44 2010 +0100 @@ -1,3 +1,11 @@ +2010-08-21 Aidan Kehoe <kehoea@parhasard.net> + + * lispref/objects.texi (Character Type): + Go into more detail here on the specific type of error provoked on + overlong hex character escapes and non-Latin-1 octal character + escapes; give details of why the latter may be encountered, and + what to do with such code. + 2010-06-13 Stephen J. Turnbull <stephen@xemacs.org> * external-widget.texi: Correct FSF address in permission notice.
--- a/man/lispref/objects.texi Sun Aug 15 15:42:45 2010 +0100 +++ b/man/lispref/objects.texi Sat Aug 21 19:02:44 2010 +0100 @@ -623,6 +623,8 @@ @cindex backslash in character constant @cindex octal character code @cindex hexadecimal character code +@cindex Overlong hex character escape +@cindex Non-ISO-8859-1 octal character escape Finally, there are two read syntaxes involving character codes. It is not possible to represent multibyte or wide characters in this @@ -643,14 +645,21 @@ @samp{?\001} for the character @kbd{C-a}, and @code{?\002} for the character @kbd{C-b}. The reader will finalize the character and start reading the next token when a non-octal-digit is encountered or three -octal digits are read. +octal digits are read. When a given character code is above +@code{#o377}, the Lisp reader signals an @code{invalid-read-syntax} +error. Such errors are typically provoked by code written for older +versions of GNU Emacs, where the absence of the #o octal syntax for +integers made the character syntax convenient for non-character +values. Those older versions of GNU Emacs are long obsolete, so +changing the code to use the #o integer escape is the best +solution. @pxref{Numbers}. The second consists of a question mark followed by a backslash, the character @samp{x}, and the character code in hexadecimal (up to two hexadecimal digits); thus, @samp{?\x41} for the character @kbd{A}, @samp{?\x1} for the character @kbd{C-a}, and @code{?\x2} for the character @kbd{C-b}. If more than two hexadecimal codes are given, the -reader signals an error. +reader signals an @code{invalid-read-syntax} error. @example @group
--- a/src/ChangeLog Sun Aug 15 15:42:45 2010 +0100 +++ b/src/ChangeLog Sat Aug 21 19:02:44 2010 +0100 @@ -1,3 +1,10 @@ +2010-08-21 Aidan Kehoe <kehoea@parhasard.net> + + * lread.c (read_escape): + Make error messages better reflect the text that was encountered, + when overlong hex character escapes or non-Latin-1 octal character + escapes are encountered. + 2010-08-15 Aidan Kehoe <kehoea@parhasard.net> * print.c (print_symbol):
--- a/src/lread.c Sun Aug 15 15:42:45 2010 +0100 +++ b/src/lread.c Sat Aug 21 19:02:44 2010 +0100 @@ -1818,8 +1818,12 @@ } } if (i >= 0400) - syntax_error ("Non-ISO-8859-1 character specified with octal escape", - make_int (i)); + { + read_syntax_error ((Ascbyte *) emacs_sprintf_malloc + (NULL, + "Non-ISO-8859-1 octal character escape, " + "?\\%.3o", i)); + } return i; } @@ -1827,13 +1831,23 @@ /* A hex escape, as in ANSI C, except that we only allow latin-1 characters to be read this way. What is "\x4e03" supposed to mean, anyways, if the internal representation is hidden? - This is also consistent with the treatment of octal escapes. */ + This is also consistent with the treatment of octal escapes. + + Note that we don't accept ?\XAB as specifying the character with + numeric value 171; it must be ?\xAB. */ { +#define OVERLONG_INFO "Overlong hex character escape, ?\\x" + REGISTER Ichar i = 0; REGISTER int count = 0; + Ascbyte seen[] = OVERLONG_INFO "\0\0\0\0\0"; + REGISTER Ascbyte *seenp = seen + sizeof (OVERLONG_INFO) - 1; + +#undef OVERLONG_INFO + while (++count <= 2) { - c = readchar (readcharfun); + c = readchar (readcharfun), *seenp = c, ++seenp; /* Remember, can't use isdigit(), isalpha() etc. on Ichars */ if (c >= '0' && c <= '9') i = (i << 4) + (c - '0'); else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10; @@ -1847,21 +1861,12 @@ if (count == 3) { - c = readchar (readcharfun); + c = readchar (readcharfun), *seenp = c, ++seenp; if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F')) { - Lisp_Object args[2]; - - if (c >= '0' && c <= '9') i = (i << 4) + (c - '0'); - else if (c >= 'a' && c <= 'f') i = (i << 4) + (c - 'a') + 10; - else if (c >= 'A' && c <= 'F') i = (i << 4) + (c - 'A') + 10; - - args[0] = build_ascstring ("?\\x%x"); - args[1] = make_int (i); - syntax_error ("Overlong hex character escape", - Fformat (2, args)); + read_syntax_error (seen); } unreadchar (readcharfun, c); }