diff src/lread.c @ 3543:c136144fe765

[xemacs-hg @ 2006-08-04 22:55:04 by aidan] Raw strings, from Python via SXEmacs
author aidan
date Fri, 04 Aug 2006 22:55:19 +0000
parents d1754e7f0cea
children 5724b7632db3
line wrap: on
line diff
--- a/src/lread.c	Fri Aug 04 21:50:46 2006 +0000
+++ b/src/lread.c	Fri Aug 04 22:55:19 2006 +0000
@@ -1670,15 +1670,56 @@
 
   return val;
 }
+
+/* A Unicode escape, as in C# (though we only permit them in strings
+   and characters, not arbitrarily in the source code.) */
+static Ichar
+read_unicode_escape (Lisp_Object readcharfun, int unicode_hex_count)
+{
+  REGISTER Ichar i = 0, c;
+  REGISTER int count = 0;
+  Lisp_Object lisp_char;
+  while (++count <= unicode_hex_count)
+    {
+      c = readchar (readcharfun);
+      /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
+      if      (c >= '0' && c <= '9')  i = (i << 4) + (c - '0');
+      else if (c >= 'a' && c <= 'f')  i = (i << 4) + (c - 'a') + 10;
+      else if (c >= 'A' && c <= 'F')  i = (i << 4) + (c - 'A') + 10;
+      else
+	{
+	  syntax_error ("Non-hex digit used for Unicode escape",
+			make_char (c));
+	  break;
+	}
+    }
+
+  lisp_char = Funicode_to_char(make_int(i), Qnil);
+
+  if (EQ(Qnil, lisp_char))
+    {
+      /* This is ugly and horrible and trashes the user's data, but
+	 it's what unicode.c does. In the future, unicode-to-char
+	 should not return nil.  */
+#ifdef MULE
+      i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128);
+#else
+      i = '~';
+#endif
+      return i;
+    }
+  else
+    {
+      return XCHAR(lisp_char);
+    }
+}
+
 
 static Ichar
 read_escape (Lisp_Object readcharfun)
 {
   /* This function can GC */
   Ichar c = readchar (readcharfun);
-  /* \u allows up to four hex digits, \U up to eight. Default to the
-     behaviour for \u, and change this value in the case that \U is seen. */
-  int unicode_hex_count = 4;
 
   if (c < 0)
     signal_error (Qend_of_file, 0, READCHARFUN_MAYBE (readcharfun));
@@ -1797,49 +1838,10 @@
       }
     case 'U':
       /* Post-Unicode-2.0: Up to eight hex chars */
-      unicode_hex_count = 8;
+      return read_unicode_escape(readcharfun, 8);
     case 'u':
-
-      /* A Unicode escape, as in C# (though we only permit them in strings
-	 and characters, not arbitrarily in the source code.) */
-      {
-	REGISTER Ichar i = 0;
-	REGISTER int count = 0;
-	Lisp_Object lisp_char;
-	while (++count <= unicode_hex_count)
-	  {
-	    c = readchar (readcharfun);
-	    /* Remember, can't use isdigit(), isalpha() etc. on Ichars */
-	    if      (c >= '0' && c <= '9')  i = (i << 4) + (c - '0');
-	    else if (c >= 'a' && c <= 'f')  i = (i << 4) + (c - 'a') + 10;
-            else if (c >= 'A' && c <= 'F')  i = (i << 4) + (c - 'A') + 10;
-	    else
-	      {
-		syntax_error ("Non-hex digit used for Unicode escape",
-			      make_char (c));
-		break;
-	      }
-	  }
-
-	lisp_char = Funicode_to_char(make_int(i), Qnil);
-
-	if (EQ(Qnil, lisp_char))
-	  {
-	    /* This is ugly and horrible and trashes the user's data, but
-	       it's what unicode.c does. In the future, unicode-to-char
-	       should not return nil.  */
-#ifdef MULE
-	    i = make_ichar (Vcharset_japanese_jisx0208, 34 + 128, 46 + 128);
-#else
-	    i = '~';
-#endif
-            return i;
-	  }
-	else
-	  {
-	    return XCHAR(lisp_char);
-	  }
-      }
+      /* Unicode-2.0 and before; four hex chars. */
+      return read_unicode_escape(readcharfun, 4);
 
     default:
 	return c;
@@ -2270,6 +2272,113 @@
 }
 #endif
 
+static Lisp_Object
+read_string (Lisp_Object readcharfun, Ichar delim, int raw, 
+	     int honor_unicode)
+{
+#ifdef I18N3
+  /* #### If the input stream is translating, then the string
+     should be marked as translatable by setting its
+     `string-translatable' property to t.  .el and .elc files
+     normally are translating input streams.  See Fgettext()
+     and print_internal(). */
+#endif
+  Ichar c;
+  int cancel = 0;
+
+  Lstream_rewind(XLSTREAM(Vread_buffer_stream));
+  while ((c = readchar(readcharfun)) >= 0 && c != delim)
+    {
+    if (c == '\\') 
+      {
+	if (raw) 
+	  {
+	    c = readchar(readcharfun);
+	    if (honor_unicode && ('u' == c || 'U' == c))
+	      {
+		c = read_unicode_escape(readcharfun,
+					'U' == c ? 8 : 4);
+	      }
+	    else
+	      {
+		/* For raw strings, insert the
+		   backslash and the next char, */
+		Lstream_put_ichar(XLSTREAM
+				  (Vread_buffer_stream),
+				  '\\');
+	      }
+	  } 
+	else
+	  /* otherwise, backslash escapes the next char. */
+	  c = read_escape(readcharfun);
+      }
+    /* c is -1 if \ newline has just been seen */
+    if (c == -1) 
+      {
+	if (Lstream_byte_count
+	  (XLSTREAM(Vread_buffer_stream)) ==
+	  0)
+	  cancel = 1;
+      } 
+    else
+      Lstream_put_ichar(XLSTREAM
+			 (Vread_buffer_stream),
+			 c);
+    QUIT;
+    }
+  if (c < 0)
+    return Fsignal(Qend_of_file,
+		   list1(READCHARFUN_MAYBE(readcharfun)));
+
+  /* If purifying, and string starts with \ newline,
+     return zero instead.  This is for doc strings
+     that we are really going to find in lib-src/DOC.nn.nn  */
+  if (purify_flag && NILP(Vinternal_doc_file_name)
+      && cancel)
+    return Qzero;
+
+  Lstream_flush(XLSTREAM(Vread_buffer_stream));
+  return make_string(resizing_buffer_stream_ptr
+		     (XLSTREAM(Vread_buffer_stream)),
+		     Lstream_byte_count(XLSTREAM(Vread_buffer_stream)));
+}
+
+static Lisp_Object
+read_raw_string (Lisp_Object readcharfun)
+{
+  Ichar c;
+  Ichar permit_unicode = 0; 
+
+  do {
+    c = reader_nextchar(readcharfun);
+    switch (c) {
+      /* #r:engine"my sexy raw string" -- raw string w/ flags*/
+      /* case ':': */
+      /* #ru"Hi there\u20AC \U000020AC" -- raw string, honouring Unicode. */
+    case 'u':
+    case 'U':
+      permit_unicode = c; 
+      continue;
+
+      /* #r"my raw string" -- raw string */
+    case '\"':
+      return read_string(readcharfun, '\"', 1, permit_unicode);
+      /* invalid syntax */
+    default:
+      {
+	if (permit_unicode)
+	  {
+	    unreadchar(readcharfun, permit_unicode);
+	  }
+	unreadchar(readcharfun, c);
+	return Fsignal(Qinvalid_read_syntax,
+		       list1(build_string
+			     ("unrecognized raw string syntax")));
+      }
+    }
+  } while (1);
+}
+
 /* Read the next Lisp object from the stream READCHARFUN and return it.
    If the return value is a cons whose car is Qunbound, then read1()
    encountered a misplaced token (e.g. a right bracket, right paren,
@@ -2509,6 +2618,8 @@
 	  case 'x': return read_integer (readcharfun, 16);
             /* #b010 => 2 -- binary constant syntax */
 	  case 'b': return read_integer (readcharfun, 2);
+	    /* #r"raw\stringt" -- raw string syntax */
+	  case 'r': return read_raw_string(readcharfun);
             /* #s(foobar key1 val1 key2 val2) -- structure syntax */
 	  case 's': return read_structure (readcharfun);
 	  case '<':
@@ -2654,48 +2765,8 @@
       }
 
     case '\"':
-      {
-	/* String */
-#ifdef I18N3
-	/* #### If the input stream is translating, then the string
-	   should be marked as translatable by setting its
-	   `string-translatable' property to t.  .el and .elc files
-	   normally are translating input streams.  See Fgettext()
-	   and print_internal(). */
-#endif
-	int cancel = 0;
-
-	Lstream_rewind (XLSTREAM (Vread_buffer_stream));
-	while ((c = readchar (readcharfun)) >= 0
-	       && c != '\"')
-	  {
-	    if (c == '\\')
-	      c = read_escape (readcharfun);
-	    /* c is -1 if \ newline has just been seen */
-	    if (c == -1)
-	      {
-		if (Lstream_byte_count (XLSTREAM (Vread_buffer_stream)) == 0)
-		  cancel = 1;
-	      }
-	    else
-	      Lstream_put_ichar (XLSTREAM (Vread_buffer_stream), c);
-	    QUIT;
-	  }
-	if (c < 0)
-	  return Fsignal (Qend_of_file, list1 (READCHARFUN_MAYBE (readcharfun)));
-
-	/* If purifying, and string starts with \ newline,
-	   return zero instead.  This is for doc strings
-	   that we are really going to find in lib-src/DOC.nn.nn  */
-	if (purify_flag && NILP (Vinternal_doc_file_name) && cancel)
-	  return Qzero;
-
-	Lstream_flush (XLSTREAM (Vread_buffer_stream));
-	return
-	  make_string
-	  (resizing_buffer_stream_ptr (XLSTREAM (Vread_buffer_stream)),
-	   Lstream_byte_count (XLSTREAM (Vread_buffer_stream)));
-      }
+      /* String */
+      return read_string(readcharfun, '\"', 0, 1);
 
     default:
       {