Mercurial > hg > xemacs-beta

--- a/lisp/ChangeLog	Mon Jan 21 22:51:21 2008 +0100
+++ b/lisp/ChangeLog	Mon Jan 21 22:54:43 2008 +0100
@@ -45,6 +45,11 @@
 	with XEmacs. Map FUNC across the int value charset ranges of
 	CHARSET.

+2008-01-21  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* info.el (Info-suffix-list):
+	Support LZMA compression, as used--oddly--by Mandriva Linux.
+
 2008-01-17  Mike Sperber  <mike@xemacs.org>

 	* files.el (insert-directory): Bind `coding-system-for-read' to
--- a/lisp/info.el	Mon Jan 21 22:51:21 2008 +0100
+++ b/lisp/info.el	Mon Jan 21 22:54:43 2008 +0100
@@ -479,6 +479,9 @@
 			      (".info.zip" . "unzip -c %s")
 			      (".y" . "cat %s | unyabba")
 			      ("info.y" . "cat %s | unyabba")
+                              ;; Mandriva Linux uses lzma.
+                              (".lzma" . "unlzma --stdout %s")
+                              (".info.lzma" . "unlzma --stdout %s")
 			      ;; These ones are for MS-DOS filenames.
 			      (".inf" . nil)
 			      (".igz" . "gzip -dc %s")
--- a/nt/ChangeLog	Mon Jan 21 22:51:21 2008 +0100
+++ b/nt/ChangeLog	Mon Jan 21 22:54:43 2008 +0100
@@ -1,3 +1,13 @@
+2008-01-18  Vin Shelton  <acs@xemacs.org>
+
+	* xemacs.mak: Use debug version of Intel's libm, if appropriate.
+
+2008-01-17  Vin Shelton  <acs@xemacs.org>
+
+	* config.inc.samp: Added USE_INTEL_COMPILER to support the Intel
+	compiler.
+	* xemacs.mak: Use USE_INTEL_COMPILER.
+
 2007-10-15  Adrian Aichner  <adrian@xemacs.org>

 	* xemacs.mak (INFO_FILES): Sync nt/xemacs.mak and man/Makefile
--- a/nt/config.inc.samp	Mon Jan 21 22:51:21 2008 +0100
+++ b/nt/config.inc.samp	Mon Jan 21 22:54:43 2008 +0100
@@ -257,3 +257,6 @@
 # *AND CURRENTLY HAPPENS WITH VC++*, at least when using pdump.  Therefore,
 # be warned!
 USE_UNION_TYPE=0
+
+# Set this to build XEmacs with the Intel C Compiler.
+USE_INTEL_COMPILER=0
--- a/nt/xemacs.mak	Mon Jan 21 22:51:21 2008 +0100
+++ b/nt/xemacs.mak	Mon Jan 21 22:54:43 2008 +0100
@@ -195,8 +195,11 @@
 !if !defined(SUPPORT_EDIT_AND_CONTINUE)
 SUPPORT_EDIT_AND_CONTINUE=0
 !endif
+!if !defined(BUILD_FOR_SETUP_KIT)
+BUILD_FOR_SETUP_KIT=0
+!endif

-!if !defined(BUILD_FOR_SETUP_KIT) || "$(BUILD_FOR_SETUP_KIT)" == "0"
+!if !$(BUILD_FOR_SETUP_KIT)
 OK_TO_USE_MSVCRTD=1
 !else
 OK_TO_USE_MSVCRTD=0
@@ -658,6 +661,16 @@
 	$(OUTDIR)\ntheap.obj $(OUTDIR)\vm-limit.obj
 !endif

+!if $(USE_INTEL_COMPILER)
+CC=icl
+# Use static library if possible
+INTEL_LIBS=libircmt.lib libmmt.lib
+# Debugging requires DLL version of libm
+!if $(DEBUG_XEMACS)
+INTEL_LIBS=libircmt.lib libmmd.lib
+!endif
+!endif
+
 ########################### Process options related to compilation.

 #
@@ -1290,7 +1303,7 @@
 TEMACS_LIBS=$(LASTFILE) $(OPT_LIBS) \
  oldnames.lib kernel32.lib user32.lib gdi32.lib comdlg32.lib advapi32.lib \
  shell32.lib wsock32.lib netapi32.lib winmm.lib winspool.lib ole32.lib \
- mpr.lib uuid.lib imm32.lib $(LIBC_LIB)
+ mpr.lib uuid.lib imm32.lib $(INTEL_LIBS) $(LIBC_LIB)
 TEMACS_COMMON_LFLAGS=-nologo $(LIBRARIES) $(DEBUG_FLAGS_LINK) \
  -base:0x1000000 -stack:0x800000 $(TEMACS_ENTRYPOINT) -subsystem:windows \
  -heap:0x00100000 -nodefaultlib $(PROFILE_FLAGS) setargv.obj
--- a/src/ChangeLog	Mon Jan 21 22:51:21 2008 +0100
+++ b/src/ChangeLog	Mon Jan 21 22:54:43 2008 +0100
@@ -1,3 +1,50 @@
+2008-01-21  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* elhash.c (Fputhash): Document the return value.
+	(Fclrhash): Ditto.
+
+2007-12-26  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* casetab.c:
+	Extend and correct some case table documentation.
+	* search.c (search_buffer):
+	Correct a bug where only the first entry for a character in the
+	case equivalence table was examined in determining if the
+	Boyer-Moore search algorithm is appropriate.
+
+	If there are case mappings outside of the charset and row of the
+	characters specified in the search string, those case mappings can
+	be safely ignored (and Boyer-Moore search can be used) if we know
+	from the buffer statistics that the corresponding characters cannot
+	occur.
+
+	* search.c (boyer_moore):
+	Assert that we haven't been passed a string with varying
+	characters sets or rows within character sets. That's what
+	simple_search is for.
+
+	In the very rare event that a character in the search string has a
+	canonical case mapping that is not in the same character set and
+	row, don't try to search for the canonical character, search for
+	some other character that is in the the desired character set and
+	row. Assert that the case table isn't corrupt.
+
+	Do not search for any character case mappings that cannot possibly
+	occur in the buffer, given the buffer metadata about its
+	contents.
+
+2008-01-19  Aidan Kehoe  <kehoea@parhasard.net>
+
+	* dired.c (Ffile_attributes): If bignums are available, use them
+	for the file size when necessary. If they are not, be clearer
+	about the check for whether the file size can fit in a Lisp
+	integer.
+
+2008-01-18  Jerry James  <james@xemacs.org>
+
+	* device-x.c (x_init_device): Don't write to path or read from
+	format when neither has been initialized.
+
 2008-01-16  Aidan Kehoe  <kehoea@parhasard.net>

 	* elhash.c (internal_hash):
--- a/src/casetab.c	Mon Jan 21 22:51:21 2008 +0100
+++ b/src/casetab.c	Mon Jan 21 22:54:43 2008 +0100
@@ -48,13 +48,28 @@
    or vice versa, both characters will have the same entry in the canon
    table.

-   (4) `equiv' lists the "equivalence classes" defined by `canon'.  Imagine
+   (4) `eqv' lists the "equivalence classes" defined by `canon'.  Imagine
    that all characters are divided into groups having the same `canon'
-   entry; these groups are called "equivalence classes" and `equiv' lists
-   them by linking the characters in each equivalence class together in a
-   circular list.
+   entry; these groups are called "equivalence classes" and `eqv' lists them
+   by linking the characters in each equivalence class together in a
+   circular list. That is, to find out all all the members of a given char's
+   equivalence classe, you need something like the following code:

-   `canon' is used when doing case-insensitive comparisons.  `equiv' is
+    (let* ((char ?i)
+           (original-char char)
+           (standard-case-eqv (case-table-eqv (standard-case-table))))
+      (loop
+        with res = (list char)
+        until (eq (setq char (get-char-table char standard-case-eqv))
+                  original-char)
+        do (push char res)
+        finally return res))
+
+   (Where #'case-table-eqv doesn't yet exist, and probably never will, given
+   that the C code needs to keep it in a consistent state so Lisp can't mess
+   around with it.)
+
+   `canon' is used when doing case-insensitive comparisons.  `eqv' is
    used in the Boyer-Moore search code.
    */
--- a/src/device-x.c	Mon Jan 21 22:51:21 2008 +0100
+++ b/src/device-x.c	Mon Jan 21 22:54:43 2008 +0100
@@ -703,6 +703,10 @@
 	path = alloca_extbytes (strlen (data_dir) + 13 + strlen (locale) + 7);
 	format = "%sapp-defaults/%s/Emacs";
       }
+    else
+      {
+	goto no_data_directory;
+      }

     /*
      * The general form for $LANG is <language>_<country>.<encoding>.  Try
@@ -730,6 +734,7 @@
 	XrmCombineFileDatabase (path, &db, False);
     }

+  no_data_directory:
     xfree (locale, Extbyte*);
  }
 #endif /* MULE */
--- a/src/dired.c	Mon Jan 21 22:51:21 2008 +0100
+++ b/src/dired.c	Mon Jan 21 22:54:43 2008 +0100
@@ -824,7 +824,7 @@
   First integer has high-order 16 bits of time, second has low 16 bits.
  5. Last modification time, likewise.
  6. Last status change time, likewise.
- 7. Size in bytes. (-1, if number is out of range).
+ 7. Size in bytes. (-1, if number out of range and no bignum support.)
  8. File modes, as a string of ten letters or dashes as in ls -l.
  9. t iff file's gid would change if file were deleted and recreated.
 10. inode number.
@@ -900,11 +900,14 @@
   values[4] = make_time (s.st_atime);
   values[5] = make_time (s.st_mtime);
   values[6] = make_time (s.st_ctime);
-  values[7] = make_int ((EMACS_INT) s.st_size);
-  /* If the size is out of range, give back -1.  */
-  /* #### Fix when Emacs gets bignums! */
-  if (XINT (values[7]) != s.st_size)
-    values[7] = make_int (-1);
+
+#ifndef HAVE_BIGNUM
+  values[7] = make_integer (NUMBER_FITS_IN_AN_EMACS_INT (s.st_size) ?
+                            (EMACS_INT)s.st_size : -1);
+#else
+  values[7] = make_integer (s.st_size);
+#endif
+
   filemodestring (&s, modes);
   values[8] = make_string ((Ibyte *) modes, 10);
 #if defined (BSD4_2) || defined (BSD4_3)	/* file gid will be dir gid */
--- a/src/elhash.c	Mon Jan 21 22:51:21 2008 +0100
+++ b/src/elhash.c	Mon Jan 21 22:54:43 2008 +0100
@@ -1158,7 +1158,7 @@
 }

 DEFUN ("puthash", Fputhash, 3, 3, 0, /*
-Hash KEY to VALUE in HASH-TABLE.
+Hash KEY to VALUE in HASH-TABLE, and return VALUE.
 */
        (key, value, hash_table))
 {
@@ -1222,6 +1222,7 @@

 DEFUN ("clrhash", Fclrhash, 1, 1, 0, /*
 Remove all entries from HASH-TABLE, leaving it empty.
+Return HASH-TABLE.
 */
        (hash_table))
 {
--- a/src/search.c	Mon Jan 21 22:51:21 2008 +0100
+++ b/src/search.c	Mon Jan 21 22:54:43 2008 +0100
@@ -1340,11 +1340,14 @@
     {
       int charset_base = -1;
       int boyer_moore_ok = 1;
-      Ibyte *pat = 0;
       Ibyte *patbuf = alloca_ibytes (len * MAX_ICHAR_LEN);
-      pat = patbuf;
+      Ibyte *pat = patbuf;
+
 #ifdef MULE
-      /* &&#### needs some 8-bit work here */
+      int entirely_one_byte_p = buf->text->entirely_one_byte_p;
+      int nothing_greater_than_0xff =
+        buf->text->num_8_bit_fixed_chars == BUF_Z(buf) - BUF_BEG (buf);
+
       while (len > 0)
 	{
 	  Ibyte tmp_str[MAX_ICHAR_LEN];
@@ -1367,23 +1370,68 @@
 	  inv_bytelen = set_itext_ichar (tmp_str, inverse);
 	  new_bytelen = set_itext_ichar (tmp_str, translated);

-	  if (new_bytelen != orig_bytelen || inv_bytelen != orig_bytelen)
-	    boyer_moore_ok = 0;
-	  if (translated != c || inverse != c)
-	    {
-	      /* Keep track of which charset and character set row
-		 contains the characters that need translation.
-		 Zero out the bits corresponding to the last byte.
-	      */
-	      int charset_base_code = c & ~ICHAR_FIELD3_MASK;
-	      if (charset_base == -1)
-		charset_base = charset_base_code;
-	      else if (charset_base != charset_base_code)
-		/* If two different rows appear, needing translation, then
-		   we cannot use boyer_moore search.  See the comment at the
-		   head of boyer_moore(). */
-		boyer_moore_ok = 0;
-	    }
+          if (-1 == charset_base)
+            {
+              /* Keep track of which charset and character set row
+                 contains the characters that need translation.
+
+                 Zero out the bits corresponding to the last byte. */
+              charset_base = c & ~ICHAR_FIELD3_MASK;
+            }
+
+          if (boyer_moore_ok && (translated != c || inverse != c))
+            {
+	      Ichar starting_c = c;
+	      int charset_base_code;
+
+	      do
+		{
+		  c = TRANSLATE (inverse_trt, c);
+
+                  /* If a character cannot occur in the buffer, ignore
+                     it. */
+                  if (c > 0x7F && entirely_one_byte_p)
+                    continue;
+
+                  if (c > 0xFF && nothing_greater_than_0xff)
+                    continue;
+
+                  charset_base_code = c & ~ICHAR_FIELD3_MASK;
+
+                  if (charset_base_code != charset_base)
+                    {
+                      /* If two different rows, or two different charsets,
+                         appear, needing translation, then we cannot use
+                         boyer_moore search.  See the comment at the head of
+                         boyer_moore(). */
+                      boyer_moore_ok = 0;
+                      break;
+                    }
+                } while (c != starting_c);
+
+              if (boyer_moore_ok && (charset_base !=
+                                     (translated & ~ICHAR_FIELD3_MASK)))
+                {
+                  /* In the rare event that the CANON entry for this
+                     character is not in the desired set, choose one
+                     that is, from the equivalence set. It doesn't much
+                     matter which. */
+                  Ichar starting_ch = translated;
+                  do
+                    {
+                      translated = TRANSLATE (inverse_trt, translated);
+
+                      if (charset_base == (translated & ~ICHAR_FIELD3_MASK))
+                        break;
+
+                    } while (starting_ch != translated);
+
+                  assert (starting_ch != translated);
+
+                  new_bytelen = set_itext_ichar (tmp_str, translated);
+                }
+            }
+
 	  memcpy (pat, tmp_str, new_bytelen);
 	  pat += new_bytelen;
 	  base_pat += orig_bytelen;
@@ -1551,14 +1599,14 @@
    makes it possible to translate just the last byte of a character,
    and do so after just a simple test of the context.

-   If that criterion is not satisfied, do not call this function.  */
+   If that criterion is not satisfied, do not call this function.  You will
+   get an assertion failure. */

 static Charbpos
 boyer_moore (struct buffer *buf, Ibyte *base_pat, Bytecount len,
 	     Bytebpos pos, Bytebpos lim, EMACS_INT n, Lisp_Object trt,
 	     Lisp_Object inverse_trt, int USED_IF_MULE (charset_base))
 {
-  /* &&#### needs some 8-bit work here */
   /* #### Someone really really really needs to comment the workings
      of this junk somewhat better.

@@ -1602,6 +1650,13 @@
 #ifdef MULE
   Ibyte translate_prev_byte = 0;
   Ibyte translate_anteprev_byte = 0;
+  /* These need to be rethought in the event that the internal format
+     changes, or in the event that num_8_bit_fixed_chars disappears
+     (entirely_one_byte_p can be trivially worked out by checking is the
+     byte count equal to the char count.)  */
+  int buffer_entirely_one_byte_p = buf->text->entirely_one_byte_p;
+  int buffer_nothing_greater_than_0xff =
+    buf->text->num_8_bit_fixed_chars == BUF_Z(buf) - BUF_BEG (buf);
 #endif
 #ifdef C_ALLOCA
   EMACS_INT BM_tab_space[0400];
@@ -1684,20 +1739,47 @@
 	      while (!ibyte_first_byte_p (*charstart))
 		charstart--;
 	      untranslated = itext_ichar (charstart);
-	      if (charset_base == (untranslated & ~ICHAR_FIELD3_MASK))
-		{
-		  ch = TRANSLATE (trt, untranslated);
-		  if (!ibyte_first_byte_p (*ptr))
-		    {
-		      translate_prev_byte = ptr[-1];
-		      if (!ibyte_first_byte_p (translate_prev_byte))
-			translate_anteprev_byte = ptr[-2];
-		    }
-		}
-	      else
-		{
-		  this_translated = 0;
-		  ch = *ptr;
+
+              /* We shouldn't have been passed a string with varying
+                 character sets or rows. That's what simple_search is
+                 for.  */
+              assert (charset_base == (untranslated & ~ICHAR_FIELD3_MASK));
+
+              ch = TRANSLATE (trt, untranslated);
+              if (!ibyte_first_byte_p (*ptr))
+                {
+                  translate_prev_byte = ptr[-1];
+                  if (!ibyte_first_byte_p (translate_prev_byte))
+                    translate_anteprev_byte = ptr[-2];
+                }
+
+              if (charset_base != (ch & ~ICHAR_FIELD3_MASK))
+                {
+                  /* In the very rare event that the CANON entry for this
+                     character is not in the desired set, choose one that
+                     is, from the equivalence set. It doesn't much matter
+                     which, since we're building our own cheesy equivalence
+                     table instead of using that belonging to the case
+                     table directly.
+
+                     We can get here if search_buffer has worked out that
+                     the buffer is entirely single width. */
+                  Ichar starting_ch = ch;
+                  do
+                    {
+                      ch = TRANSLATE (inverse_trt, ch);
+                      if (charset_base == (ch & ~ICHAR_FIELD3_MASK))
+                        break;
+
+                    } while (starting_ch != ch);
+
+                  /* If starting_ch is equal to ch, the case table is
+                     corrupt. (Any mapping in the canon table should be
+                     reflected in the equivalence table, and we know from
+                     the canon table that untranslated maps to starting_ch
+                     and that untranslated has the correct value for
+                     charset_base.) */
+                  assert (starting_ch != ch);
 		}
 	    }
 	  else
@@ -1713,28 +1795,34 @@
 	  if (i == infinity)
 	    stride_for_teases = BM_tab[j];
 	  BM_tab[j] = dirlen - i;
-	  /* A translation table is accompanied by its inverse --
-	     see comment in casetab.c. */
+	  /* A translation table is accompanied by its inverse -- see
+	     comment in casetab.c. */
 	  if (this_translated)
 	    {
 	      Ichar starting_ch = ch;
 	      EMACS_INT starting_j = j;
-	      while (1)
+	      do
 		{
 		  ch = TRANSLATE (inverse_trt, ch);
-		  if (ch > 0400)
-		    j = ((unsigned char) ch | 0200);
-		  else
-		    j = (unsigned char) ch;
-
-		  /* For all the characters that map into CH,
-		     set up simple_translate to map the last byte
-		     into STARTING_J.  */
-		  simple_translate[j] = (Ibyte) starting_j;
-		  if (ch == starting_ch)
-		    break;
-		  BM_tab[j] = dirlen - i;
-		}
+
+                  if (ch > 0x7F && buffer_entirely_one_byte_p)
+                    continue;
+
+                  if (ch > 0xFF && buffer_nothing_greater_than_0xff)
+                    continue;
+
+                  if (ch > 0400)
+                    j = ((unsigned char) ch | 0200);
+                  else
+                    j = (unsigned char) ch;
+
+                  /* For all the characters that map into CH, set up
+                     simple_translate to map the last byte into
+                     STARTING_J.  */
+                  simple_translate[j] = (Ibyte) starting_j;
+                  BM_tab[j] = dirlen - i;
+
+		} while (ch != starting_ch);
 	    }
 #else
 	  EMACS_INT k;