changeset 5247:02d875ebd1ea

Make Lisp reader errors more informative with over-long hex, octal characters src/ChangeLog addition: 2010-08-21 Aidan Kehoe <kehoea@parhasard.net> * lread.c (read_escape): Make error messages better reflect the text that was encountered, when overlong hex character escapes or non-Latin-1 octal character escapes are encountered. man/ChangeLog addition: 2010-08-21 Aidan Kehoe <kehoea@parhasard.net> * lispref/objects.texi (Character Type): Go into more detail here on the specific type of error provoked on overlong hex character escapes and non-Latin-1 octal character escapes; give details of why the latter may be encountered, and what to do with such code.
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 21 Aug 2010 19:02:44 +0100
parents 04811a268716
children 9d8aaa5ac16e
files man/ChangeLog man/lispref/objects.texi src/ChangeLog src/lread.c
diffstat 4 files changed, 46 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/man/ChangeLog	Sun Aug 15 15:42:45 2010 +0100
+++ b/man/ChangeLog	Sat Aug 21 19:02:44 2010 +0100
@@ -1,3 +1,11 @@
+2010-08-21  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* lispref/objects.texi (Character Type):
+	Go into more detail here on the specific type of error provoked on
+	overlong hex character escapes and non-Latin-1 octal character
+	escapes; give details of why the latter may be encountered, and
+	what to do with such code.
+
 2010-06-13  Stephen J. Turnbull  <stephen@xemacs.org>
 
 	* external-widget.texi: Correct FSF address in permission notice.
--- a/man/lispref/objects.texi	Sun Aug 15 15:42:45 2010 +0100
+++ b/man/lispref/objects.texi	Sat Aug 21 19:02:44 2010 +0100
@@ -623,6 +623,8 @@
 @cindex backslash in character constant
 @cindex octal character code
 @cindex hexadecimal character code
+@cindex Overlong hex character escape
+@cindex Non-ISO-8859-1 octal character escape
 
   Finally, there are two read syntaxes involving character codes.
 It is not possible to represent multibyte or wide characters in this
@@ -643,14 +645,21 @@
 @samp{?\001} for the character @kbd{C-a}, and @code{?\002} for the
 character @kbd{C-b}.  The reader will finalize the character and start
 reading the next token when a non-octal-digit is encountered or three
-octal digits are read. 
+octal digits are read.  When a given character code is above
+@code{#o377}, the Lisp reader signals an @code{invalid-read-syntax}
+error.  Such errors are typically provoked by code written for older
+versions of GNU Emacs, where the absence of the #o octal syntax for
+integers made the character syntax convenient for non-character
+values.  Those older versions of GNU Emacs are long obsolete, so
+changing the code to use the #o integer escape is the best
+solution. @pxref{Numbers}.
 
   The second consists of a question mark followed by a backslash, the
 character @samp{x}, and the character code in hexadecimal (up to two
 hexadecimal digits); thus, @samp{?\x41} for the character @kbd{A},
 @samp{?\x1} for the character @kbd{C-a}, and @code{?\x2} for the
 character @kbd{C-b}.  If more than two hexadecimal codes are given, the
-reader signals an error.
+reader signals an @code{invalid-read-syntax} error.
 
 @example
 @group
--- a/src/ChangeLog	Sun Aug 15 15:42:45 2010 +0100
+++ b/src/ChangeLog	Sat Aug 21 19:02:44 2010 +0100
@@ -1,3 +1,10 @@
+2010-08-21  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* lread.c (read_escape):
+	Make error messages better reflect the text that was encountered,
+	when overlong hex character escapes or non-Latin-1 octal character
+	escapes are encountered.
+
 2010-08-15  Aidan Kehoe  <kehoea@parhasard.net>
 
 	* print.c (print_symbol):
--- a/src/lread.c	Sun Aug 15 15:42:45 2010 +0100
+++ b/src/lread.c	Sat Aug 21 19:02:44 2010 +0100
@@ -1818,8 +1818,12 @@
 	      }
 	  }
 	if (i >= 0400)
-	  syntax_error ("Non-ISO-8859-1 character specified with octal escape",
-			make_int (i));
+	  {
+	    read_syntax_error ((Ascbyte *) emacs_sprintf_malloc
+			       (NULL,
+				"Non-ISO-8859-1 octal character escape, "
+				"?\\%.3o", i));
+	  }
 	return i;
       }
 
@@ -1827,13 +1831,23 @@
       /* A hex escape, as in ANSI C, except that we only allow latin-1
 	 characters to be read this way.  What is "\x4e03" supposed to
 	 mean, anyways, if the internal representation is hidden?
-         This is also consistent with the treatment of octal escapes. */
+         This is also consistent with the treatment of octal escapes.
+
+         Note that we don't accept ?\XAB as specifying the character with
+         numeric value 171; it must be ?\xAB. */
       {
+#define OVERLONG_INFO "Overlong hex character escape, ?\\x"
+
 	REGISTER Ichar i = 0;
 	REGISTER int count = 0;
+	Ascbyte seen[] = OVERLONG_INFO "\0\0\0\0\0";
+	REGISTER Ascbyte *seenp = seen + sizeof (OVERLONG_INFO) - 1;
+
+#undef OVERLONG_INFO
+
 	while (++count <= 2)
 	  {
-	    c = readchar (readcharfun);
+	    c = readchar (readcharfun), *seenp = c, ++seenp;
 	    /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
 	    if      (c >= '0' && c <= '9')  i = (i << 4) + (c - '0');
 	    else if (c >= 'a' && c <= 'f')  i = (i << 4) + (c - 'a') + 10;
@@ -1847,21 +1861,12 @@
 
         if (count == 3)
           {
-            c = readchar (readcharfun);
+            c = readchar (readcharfun), *seenp = c, ++seenp;
             if ((c >= '0' && c <= '9') ||
                 (c >= 'a' && c <= 'f') ||
                 (c >= 'A' && c <= 'F'))
               {
-                Lisp_Object args[2];
-
-                if      (c >= '0' && c <= '9')  i = (i << 4) + (c - '0');
-                else if (c >= 'a' && c <= 'f')  i = (i << 4) + (c - 'a') + 10;
-                else if (c >= 'A' && c <= 'F')  i = (i << 4) + (c - 'A') + 10;
-
-                args[0] = build_ascstring ("?\\x%x");
-                args[1] = make_int (i);
-                syntax_error ("Overlong hex character escape",
-                              Fformat (2, args));
+		read_syntax_error (seen);
               }
             unreadchar (readcharfun, c);
           }