Mercurial > hg > xemacs-beta
comparison man/internals/internals.texi @ 5178:97eb4942aec8
merge
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Mon, 29 Mar 2010 21:28:13 -0500 |
parents | 8b2f75cecb89 f965e31a35f0 |
children | 3889ef128488 |
comparison
equal
deleted
inserted
replaced
5177:b785049378e3 | 5178:97eb4942aec8 |
---|---|
159 that has been formatted into ASCII lists and tables. | 159 that has been formatted into ASCII lists and tables. |
160 | 160 |
161 Note: to define these routines, put point after the end of the definition | 161 Note: to define these routines, put point after the end of the definition |
162 and type C-x C-e. | 162 and type C-x C-e. |
163 | 163 |
164 (defun list-to-texinfo (b e) | 164 (defun convert-list-to-texinfo (b e) |
165 "Convert the selected region from an ASCII list to a Texinfo list." | 165 "Convert the selected region from an ASCII list to a Texinfo list." |
166 (interactive "r") | 166 (interactive "r") |
167 (save-restriction | 167 (save-restriction |
168 (narrow-to-region b e) | 168 (narrow-to-region b e) |
169 (goto-char (point-min)) | 169 (goto-char (point-min)) |
170 (let ((dash-type "^ *-+ +") | 170 (let ((dash-type "^ *\\(-+\\|o\\) +") |
171 ;; allow single-letter numbering or roman numerals | 171 ;; allow single-letter numbering or roman numerals |
172 (letter-type "^ *[[(]?\\([a-zA-Z]\\|[IVXivx]+\\)[]).] +") | 172 (letter-type "^ *[[(]?\\([a-zA-Z]\\|[IVXivx]+\\)[]).] +") |
173 (num-type "^ *[[(]?[0-9]+[]).] +") | 173 (num-type "^ *[[(]?[0-9]+[]).] +") |
174 dash regexp) | 174 dash regexp) |
175 (save-excursion | 175 (save-excursion |
237 (insert-char ?\ (- min (current-column))) | 237 (insert-char ?\ (- min (current-column))) |
238 (beginning-of-line) | 238 (beginning-of-line) |
239 (forward-char min)) | 239 (forward-char min)) |
240 (kill-rectangle b (point)))))) | 240 (kill-rectangle b (point)))))) |
241 | 241 |
242 (defun table-to-texinfo (b e) | 242 (defun convert-table-to-texinfo (b e) |
243 "Convert the selected region from an ASCII table to a Texinfo table. | 243 "Convert the selected region from an ASCII table to a Texinfo table. |
244 Assumes entries are separated by a blank line, and the first sexp in | 244 Assumes entries are separated by a blank line, and the first sexp in |
245 each entry is the table heading." | 245 each entry is the table heading." |
246 (interactive "r") | 246 (interactive "r") |
247 (save-restriction | 247 (save-restriction |
281 If the region is active, do the region; otherwise, go from point to the end | 281 If the region is active, do the region; otherwise, go from point to the end |
282 of the buffer. This query-replaces for various kinds of conventions used | 282 of the buffer. This query-replaces for various kinds of conventions used |
283 in text: @code{} surrounded by ` and ' or followed by a (); @strong{} | 283 in text: @code{} surrounded by ` and ' or followed by a (); @strong{} |
284 surrounded by *'s; @file{} something that looks like a file name." | 284 surrounded by *'s; @file{} something that looks like a file name." |
285 (interactive) | 285 (interactive) |
286 (if (and (not no-narrow) (region-active-p)) | 286 (save-excursion |
287 (save-restriction | 287 (if (and (not no-narrow) (region-active-p)) |
288 (narrow-to-region (region-beginning) (region-end)) | 288 (save-restriction |
289 (convert-text-to-texinfo t)) | 289 (narrow-to-region (region-beginning) (region-end)) |
290 (let ((p (point)) | 290 (goto-char (region-beginning)) |
291 (case-replace nil)) | 291 (zmacs-deactivate-region) |
292 (query-replace-regexp "`\\([^']+\\)'\\([^']\\)" "@code{\\1}\\2" nil) | 292 (convert-text-to-texinfo t)) |
293 (goto-char p) | 293 (let ((p (point)) |
294 (query-replace-regexp "\\(\\Sw\\)\\*\\(\\(?:\\s_\\|\\sw\\)+\\)\\*\\([^A-Za-z.}]\\)" "\\1@strong{\\2}\\3" nil) | 294 (case-replace nil)) |
295 (goto-char p) | 295 (message "Point is %d" (point)) |
296 (query-replace-regexp "\\(\\(\\s_\\|\\sw\\)+()\\)\\([^}]\\)" "@code{\\1}\\3" nil) | 296 (query-replace-regexp "`\\([^']+\\)'\\([^']\\)" "@code{\\1}\\2" nil) |
297 (goto-char p) | 297 (goto-char p) |
298 (query-replace-regexp "\\(\\(\\s_\\|\\sw\\)+\\.[A-Za-z]+\\)\\([^A-Za-z.}]\\)" "@file{\\1}\\3" nil) | 298 (query-replace-regexp "\\(\\Sw\\)\\*\\(\\(?:\\s_\\|\\sw\\)+\\)\\*\\([^A-Za-z.}]\\)" "\\1@strong{\\2}\\3" nil) |
299 ))) | 299 (goto-char p) |
300 (query-replace-regexp "\\(\\(\\s_\\|\\sw\\)+()\\)\\([^}]\\)" "@code{\\1}\\3" nil) | |
301 (goto-char p) | |
302 (query-replace-regexp "\\(\\(\\s_\\|\\sw\\)+\\.[A-Za-z]+\\)\\([^A-Za-z.}]\\)" "@file{\\1}\\3" nil) | |
303 )))) | |
300 | 304 |
301 4. Adding new sections: | 305 4. Adding new sections: |
302 ----------------------- | 306 ----------------------- |
303 | 307 |
304 NOTE: These are in the form of macros. #### FIXME Convert them to | 308 NOTE: These are in the form of macros. #### FIXME Convert them to |
1236 XEmacs is a powerful, customizable text editor and development | 1240 XEmacs is a powerful, customizable text editor and development |
1237 environment. It began in 1991 as Lucid Emacs, which was in turn | 1241 environment. It began in 1991 as Lucid Emacs, which was in turn |
1238 derived from GNU Emacs, a program written by Richard Stallman of the | 1242 derived from GNU Emacs, a program written by Richard Stallman of the |
1239 Free Software Foundation. GNU Emacs dates back to 1985 and was | 1243 Free Software Foundation. GNU Emacs dates back to 1985 and was |
1240 modelled after Unipress Emacs, an editor written by James Gosling in | 1244 modelled after Unipress Emacs, an editor written by James Gosling in |
1241 1981 and based on a series of other "Emacs"-like editors, including | 1245 1981 and based on a series of other ``Emacs''-like editors, including |
1242 EINE (EINE Is Not EMACS), c. 1976, by Dan Weinreb, which run on the | 1246 EINE (EINE Is Not EMACS), c. 1976, by Dan Weinreb, which run on the |
1243 MIT Lisp Machine and was the first Emacs written in Lisp; ZWEI (ZWEI | 1247 MIT Lisp Machine and was the first Emacs written in Lisp; ZWEI (ZWEI |
1244 Was EINE Initially), c. 1978, by Dan Weinreb and Mike McMahon; Multics | 1248 Was EINE Initially), c. 1978, by Dan Weinreb and Mike McMahon; Multics |
1245 Emacs, c. 1978, by Bernie Greenberg, which was written in MacLisp and | 1249 Emacs, c. 1978, by Bernie Greenberg, which was written in MacLisp and |
1246 also used Lisp as its extension language; and ZMACS, c. 1980, a direct | 1250 also used Lisp as its extension language; and ZMACS, c. 1980, a direct |
1247 descendant of ZWEI that on ran the Symbolics LM-2, LMI LispM, and | 1251 descendant of ZWEI that on ran the Symbolics LM-2, LMI LispM, and |
1248 later, TI Explorer (1983-1989). These in turn were inspired by the | 1252 later, TI Explorer (1983-1989). These in turn were inspired by the |
1249 first Emacs, a package called EMACS, written in 1976 by Richard | 1253 first Emacs, a package called EMACS, written in 1976 by Richard |
1250 Stallman, Guy Steele, and Dave Moon. This was a merger of TECMAC and | 1254 Stallman, Guy Steele, and Dave Moon. This was a merger of TECMAC and |
1251 TMACS, a pair of "TECO-macro realtime editors" written by Guy Steele, | 1255 TMACS, a pair of ``TECO-macro realtime editors'' written by Guy Steele, |
1252 Dave Moon, Richard Greenblatt, Charles Frankston, et al., and added a | 1256 Dave Moon, Richard Greenblatt, Charles Frankston, et al., and added a |
1253 dynamic loader and Meta-key cmds. It ran under ITS (the Incompatible | 1257 dynamic loader and Meta-key cmds. It ran under ITS (the Incompatible |
1254 Timesharing System) on a DEC PDP 10 and under TWENEX on a Tops-20 and | 1258 Timesharing System) on a DEC PDP 10 and under TWENEX on a Tops-20 and |
1255 was written in TECO and PDP 10 assembly. ITS was one of the first | 1259 was written in TECO and PDP 10 assembly. ITS was one of the first |
1256 time-sharing operating systems and dates back well before Unix. ITS, | 1260 time-sharing operating systems and dates back well before Unix. ITS, |
1284 M. Stallman (RMS) and James Gosling (the creator of Java); its extension | 1288 M. Stallman (RMS) and James Gosling (the creator of Java); its extension |
1285 language was known as @dfn{Mocklisp}. This version of Emacs-in-C formed | 1289 language was known as @dfn{Mocklisp}. This version of Emacs-in-C formed |
1286 the basis for the early versions of GNU Emacs and also for Gosling's | 1290 the basis for the early versions of GNU Emacs and also for Gosling's |
1287 Unipress Emacs, a commercial product. Because of bad blood between the | 1291 Unipress Emacs, a commercial product. Because of bad blood between the |
1288 two over the issue of commercialism, RMS pretty much disowned this | 1292 two over the issue of commercialism, RMS pretty much disowned this |
1289 collaboration, referring to it as "Gosling Emacs". | 1293 collaboration, referring to it as ``Gosling Emacs''. |
1290 | 1294 |
1291 At this point we pick up with a time line of events. (A broader timeline | 1295 At this point we pick up with a time line of events. (A broader timeline |
1292 is available at @uref{http://www.jwz.org/doc/emacs-timeline.html, | 1296 is available at @uref{http://www.jwz.org/doc/emacs-timeline.html, |
1293 ``Emacs Timeline''}.) | 1297 ``Emacs Timeline''}.) |
1294 | 1298 |
1575 redisplay code, preliminary I18N support, code merged from GNU Emacs | 1579 redisplay code, preliminary I18N support, code merged from GNU Emacs |
1576 19.8 beta) | 1580 19.8 beta) |
1577 @item | 1581 @item |
1578 Version 19.9 released January 12, 1994. (Scrollbars, Athena.) | 1582 Version 19.9 released January 12, 1994. (Scrollbars, Athena.) |
1579 @item | 1583 @item |
1580 Version 19.10 released May 27, 1994. (Uses `configure'; code merged | 1584 Version 19.10 released May 27, 1994. (Uses @code{configure}; code merged |
1581 from GNU Emacs 19.23 beta and further merging with Epoch 4.0) Known as | 1585 from GNU Emacs 19.23 beta and further merging with Epoch 4.0) Known as |
1582 "Lucid Emacs" when shipped by Lucid, and as "XEmacs" when shipped by | 1586 ``Lucid Emacs'' when shipped by Lucid, and as ``XEmacs'' when shipped by |
1583 Sun; but Lucid went out of business a few days later and it's unclear | 1587 Sun; but Lucid went out of business a few days later and it's unclear |
1584 very many copies of 19.10 were released by Lucid. (Last release by | 1588 very many copies of 19.10 were released by Lucid. (Last release by |
1585 Jamie Zawinski.) | 1589 Jamie Zawinski.) |
1586 @end itemize | 1590 @end itemize |
1587 | 1591 |
1887 rewritten redisplay, TTY support, multi-device support, device and | 1891 rewritten redisplay, TTY support, multi-device support, device and |
1888 console objects, specifiers, glyphs, toolbars, horizontal scrollbars, | 1892 console objects, specifiers, glyphs, toolbars, horizontal scrollbars, |
1889 Lucid scrollbar widget, 3-d modeline, stay-up Lucid menus, resizable | 1893 Lucid scrollbar widget, 3-d modeline, stay-up Lucid menus, resizable |
1890 minibuffer, echo area is a true buffer, MD5 hashing support, expanded | 1894 minibuffer, echo area is a true buffer, MD5 hashing support, expanded |
1891 menubar, redone menu specification format (including menu filters), | 1895 menubar, redone menu specification format (including menu filters), |
1892 rewritten extents, renamed "screen" to "frame", misc-user events, | 1896 rewritten extents, renamed ``screen'' to ``frame'', misc-user events, |
1893 rewritten face code, rewritten mouse code, warnings system, CL | 1897 rewritten face code, rewritten mouse code, warnings system, CL |
1894 backquote syntax, critical C-g, code merging with GNU Emacs 19.28. | 1898 backquote syntax, critical C-g, code merging with GNU Emacs 19.28. |
1895 New packages Hyperbole, OOBR, hm--html-menus, viper, lazy-lock, | 1899 New packages Hyperbole, OOBR, hm--html-menus, viper, lazy-lock, |
1896 ksh-mode, rsz-minibuf.) | 1900 ksh-mode, rsz-minibuf.) |
1897 @item | 1901 @item |
1935 version 20.4 released February 28, 1998. | 1939 version 20.4 released February 28, 1998. |
1936 @item | 1940 @item |
1937 version 21.0.60 released December 10, 1998. (The version naming scheme was | 1941 version 21.0.60 released December 10, 1998. (The version naming scheme was |
1938 changed at this point: [a] the second version number is odd for stable | 1942 changed at this point: [a] the second version number is odd for stable |
1939 versions, even for beta versions; [b] a third version number is added, | 1943 versions, even for beta versions; [b] a third version number is added, |
1940 replacing the "beta xxx" ending for beta versions and allowing for | 1944 replacing the ``beta xxx'' ending for beta versions and allowing for |
1941 periodic maintenance releases for stable versions. Therefore, 21.0 was | 1945 periodic maintenance releases for stable versions. Therefore, 21.0 was |
1942 never "officially" released; similarly for 21.2, etc.) | 1946 never ``officially'' released; similarly for 21.2, etc.) |
1943 @item | 1947 @item |
1944 version 21.0.61 released January 4, 1999. | 1948 version 21.0.61 released January 4, 1999. |
1945 @item | 1949 @item |
1946 version 21.0.63 released February 3, 1999. | 1950 version 21.0.63 released February 3, 1999. |
1947 @item | 1951 @item |
1953 @item | 1957 @item |
1954 version 21.0.67 released March 25, 1999. | 1958 version 21.0.67 released March 25, 1999. |
1955 @item | 1959 @item |
1956 version 21.1.2 released May 14, 1999. (This is the followup to 21.0.67. | 1960 version 21.1.2 released May 14, 1999. (This is the followup to 21.0.67. |
1957 The second version number was bumped to indicate the beginning of the | 1961 The second version number was bumped to indicate the beginning of the |
1958 "stable" series.) | 1962 ``stable'' series.) |
1959 @item | 1963 @item |
1960 version 21.1.3 released June 26, 1999. | 1964 version 21.1.3 released June 26, 1999. |
1961 @item | 1965 @item |
1962 version 21.1.4 released July 8, 1999. | 1966 version 21.1.4 released July 8, 1999. |
1963 @item | 1967 @item |
2043 @item | 2047 @item |
2044 version 21.2.39 released December 31, 2000. | 2048 version 21.2.39 released December 31, 2000. |
2045 @item | 2049 @item |
2046 version 21.2.40 released January 8, 2001. | 2050 version 21.2.40 released January 8, 2001. |
2047 @item | 2051 @item |
2048 version 21.2.41 "Polyhymnia" released January 17, 2001. | 2052 version 21.2.41 ``Polyhymnia'' released January 17, 2001. |
2049 @item | 2053 @item |
2050 version 21.2.42 "Poseidon" released January 20, 2001. | 2054 version 21.2.42 ``Poseidon'' released January 20, 2001. |
2051 @item | 2055 @item |
2052 version 21.2.43 "Terspichore" released January 26, 2001. | 2056 version 21.2.43 ``Terspichore'' released January 26, 2001. |
2053 @item | 2057 @item |
2054 version 21.2.44 "Thalia" released February 8, 2001. | 2058 version 21.2.44 ``Thalia'' released February 8, 2001. |
2055 @item | 2059 @item |
2056 version 21.2.45 "Thelxepeia" released February 23, 2001. | 2060 version 21.2.45 ``Thelxepeia'' released February 23, 2001. |
2057 @item | 2061 @item |
2058 version 21.2.46 "Urania" released March 21, 2001. | 2062 version 21.2.46 ``Urania'' released March 21, 2001. |
2059 @item | 2063 @item |
2060 version 21.2.47 "Zephir" released April 14, 2001. | 2064 version 21.2.47 ``Zephir'' released April 14, 2001. |
2061 @item | 2065 @item |
2062 XEmacs 21.4.0 "Solid Vapor" released April 16, 2001. | 2066 XEmacs 21.4.0 ``Solid Vapor'' released April 16, 2001. |
2063 @item | 2067 @item |
2064 XEmacs 21.4.1 "Copyleft" released April 19, 2001. | 2068 XEmacs 21.4.1 ``Copyleft'' released April 19, 2001. |
2065 @item | 2069 @item |
2066 XEmacs 21.4.2 "Developer-Friendly Unix APIs" released May 10, 2001. | 2070 XEmacs 21.4.2 ``Developer-Friendly Unix APIs'' released May 10, 2001. |
2067 @item | 2071 @item |
2068 XEmacs 21.4.3 "Academic Rigor" released May 17, 2001. | 2072 XEmacs 21.4.3 ``Academic Rigor'' released May 17, 2001. |
2069 @item | 2073 @item |
2070 XEmacs 21.4.4 "Artificial Intelligence" released July 28, 2001. | 2074 XEmacs 21.4.4 ``Artificial Intelligence'' released July 28, 2001. |
2071 @item | 2075 @item |
2072 XEmacs 21.4.5 "Civil Service" released October 23, 2001. | 2076 XEmacs 21.4.5 ``Civil Service'' released October 23, 2001. |
2073 @item | 2077 @item |
2074 XEmacs 21.4.6 "Common Lisp" released December 17, 2001. | 2078 XEmacs 21.4.6 ``Common Lisp'' released December 17, 2001. |
2075 @item | 2079 @item |
2076 XEmacs 21.4.7 "Economic Science" released May 4, 2002. | 2080 XEmacs 21.4.7 ``Economic Science'' released May 4, 2002. |
2077 @item | 2081 @item |
2078 XEmacs 21.4.8 "Honest Recruiter" released May 9, 2002. | 2082 XEmacs 21.4.8 ``Honest Recruiter'' released May 9, 2002. |
2079 @item | 2083 @item |
2080 XEmacs 21.4.9 "Informed Management" released August 23, 2002. | 2084 XEmacs 21.4.9 ``Informed Management'' released August 23, 2002. |
2081 @item | 2085 @item |
2082 XEmacs 21.4.10 "Military Intelligence" released November 2, 2002. | 2086 XEmacs 21.4.10 ``Military Intelligence'' released November 2, 2002. |
2083 @item | 2087 @item |
2084 XEmacs 21.4.11 "Native Windows TTY Support" released January 3, 2003. | 2088 XEmacs 21.4.11 ``Native Windows TTY Support'' released January 3, 2003. |
2085 @item | 2089 @item |
2086 XEmacs 21.4.12 "Portable Code" released January 15, 2003. | 2090 XEmacs 21.4.12 ``Portable Code'' released January 15, 2003. |
2087 @item | 2091 @item |
2088 XEmacs 21.4.13 "Rational FORTRAN" released May 25, 2003. | 2092 XEmacs 21.4.13 ``Rational FORTRAN'' released May 25, 2003. |
2089 @item | 2093 @item |
2090 XEmacs 21.4.14 "Reasonable Discussion" released September 3, 2003. | 2094 XEmacs 21.4.14 ``Reasonable Discussion'' released September 3, 2003. |
2091 @item | 2095 @item |
2092 XEmacs 21.4.15 "Security Through Obscurity" released February 2, 2004. | 2096 XEmacs 21.4.15 ``Security Through Obscurity'' released February 2, 2004. |
2093 @item | 2097 @item |
2094 XEmacs 21.4.16 "Successful IPO" released December 5, 2004. | 2098 XEmacs 21.4.16 ``Successful IPO'' released December 5, 2004. |
2095 @item | 2099 @item |
2096 version 21.5.0 "alfalfa" released April 18, 2001. | 2100 version 21.5.0 ``alfalfa'' released April 18, 2001. |
2097 @item | 2101 @item |
2098 version 21.5.1 "anise" released May 9, 2001. | 2102 version 21.5.1 ``anise'' released May 9, 2001. |
2099 @item | 2103 @item |
2100 version 21.5.2 "artichoke" released July 28, 2001. | 2104 version 21.5.2 ``artichoke'' released July 28, 2001. |
2101 @item | 2105 @item |
2102 version 21.5.3 "asparagus" released September 7, 2001. | 2106 version 21.5.3 ``asparagus'' released September 7, 2001. |
2103 @item | 2107 @item |
2104 version 21.5.4 "bamboo" released January 8, 2002. | 2108 version 21.5.4 ``bamboo'' released January 8, 2002. |
2105 @item | 2109 @item |
2106 version 21.5.5 "beets" released March 5, 2002. | 2110 version 21.5.5 ``beets'' released March 5, 2002. |
2107 @item | 2111 @item |
2108 version 21.5.6 "bok choi" released April 5, 2002. | 2112 version 21.5.6 ``bok choi'' released April 5, 2002. |
2109 @item | 2113 @item |
2110 version 21.5.7 "broccoflower" released July 2, 2002. | 2114 version 21.5.7 ``broccoflower'' released July 2, 2002. |
2111 @item | 2115 @item |
2112 version 21.5.8 "broccoli" released July 27, 2002. | 2116 version 21.5.8 ``broccoli'' released July 27, 2002. |
2113 @item | 2117 @item |
2114 version 21.5.9 "brussels sprouts" released August 30, 2002. | 2118 version 21.5.9 ``brussels sprouts'' released August 30, 2002. |
2115 @item | 2119 @item |
2116 version 21.5.10 "burdock" released January 4, 2003. | 2120 version 21.5.10 ``burdock'' released January 4, 2003. |
2117 @item | 2121 @item |
2118 version 21.5.11 "cabbage" released February 16, 2003. | 2122 version 21.5.11 ``cabbage'' released February 16, 2003. |
2119 @item | 2123 @item |
2120 version 21.5.12 "carrot" released April 24, 2003. | 2124 version 21.5.12 ``carrot'' released April 24, 2003. |
2121 @item | 2125 @item |
2122 version 21.5.13 "cauliflower" released May 10, 2003. | 2126 version 21.5.13 ``cauliflower'' released May 10, 2003. |
2123 @item | 2127 @item |
2124 version 21.5.14 "cassava" released June 1, 2003. | 2128 version 21.5.14 ``cassava'' released June 1, 2003. |
2125 @item | 2129 @item |
2126 version 21.5.15 "celery" released September 3, 2003. | 2130 version 21.5.15 ``celery'' released September 3, 2003. |
2127 @item | 2131 @item |
2128 version 21.5.16 "celeriac" released September 26, 2003. | 2132 version 21.5.16 ``celeriac'' released September 26, 2003. |
2129 @item | 2133 @item |
2130 version 21.5.17 "chayote" released March 22, 2004. | 2134 version 21.5.17 ``chayote'' released March 22, 2004. |
2131 @item | 2135 @item |
2132 version 21.5.18 "chestnut" released October 22, 2004. | 2136 version 21.5.18 ``chestnut'' released October 22, 2004. |
2133 @end itemize | 2137 @end itemize |
2134 | 2138 |
2135 @node The XEmacs Split, XEmacs from the Outside, A History of Emacs, Top | 2139 @node The XEmacs Split, XEmacs from the Outside, A History of Emacs, Top |
2136 @chapter The XEmacs Split | 2140 @chapter The XEmacs Split |
2137 @cindex XEmacs split | 2141 @cindex XEmacs split |
2151 to cooperate a bit with RMS, and the two versions of Emacs will merge. In | 2155 to cooperate a bit with RMS, and the two versions of Emacs will merge. In |
2152 fact there have been six to seven major attempts at merging, each running | 2156 fact there have been six to seven major attempts at merging, each running |
2153 hundreds of messages long and all of them coming from the XEmacs side. All | 2157 hundreds of messages long and all of them coming from the XEmacs side. All |
2154 have failed because they have eventually come to the same conclusion, which | 2158 have failed because they have eventually come to the same conclusion, which |
2155 is that RMS has no real interest in cooperation at all. If you work with | 2159 is that RMS has no real interest in cooperation at all. If you work with |
2156 him, you have to do it his way -- "my way or the highway". Specifically: | 2160 him, you have to do it his way -- ``my way or the highway''. Specifically: |
2157 | 2161 |
2158 @enumerate | 2162 @enumerate |
2159 @item | 2163 @item |
2160 | 2164 |
2161 RMS insists on having legal papers signed for every bit of code that goes | 2165 RMS insists on having legal papers signed for every bit of code that goes |
4046 zero or more Kanji characters followed by zero or more | 4050 zero or more Kanji characters followed by zero or more |
4047 Hiragana characters. | 4051 Hiragana characters. |
4048 @end display | 4052 @end display |
4049 | 4053 |
4050 Then, the problem is that now we can't say that a sequence of | 4054 Then, the problem is that now we can't say that a sequence of |
4051 word-constituents makes up a word. For instance, both Hiragana "A" | 4055 word-constituents makes up a word. For instance, both Hiragana ``A'' |
4052 and Kanji "KAN" are word-constituents but the sequence of these two | 4056 and Kanji ``KAN'' are word-constituents but the sequence of these two |
4053 letters can't be a single word. | 4057 letters can't be a single word. |
4054 | 4058 |
4055 So, we introduced Sextword for Japanese letters. | 4059 So, we introduced Sextword for Japanese letters. |
4056 @end quotation | 4060 @end quotation |
4057 | 4061 |
5006 @item | 5010 @item |
5007 Any header-file declarations of the sort | 5011 Any header-file declarations of the sort |
5008 | 5012 |
5009 struct foobar; | 5013 struct foobar; |
5010 | 5014 |
5011 go into the "types" section of lisp.h. | 5015 go into the ``types'' section of @file{lisp.h}. |
5012 @end itemize | 5016 @end itemize |
5013 | 5017 |
5014 @node Writing New Modules, Working with Lisp Objects, Introduction to Writing C Code, Rules When Writing New C Code | 5018 @node Writing New Modules, Working with Lisp Objects, Introduction to Writing C Code, Rules When Writing New C Code |
5015 @section Writing New Modules | 5019 @section Writing New Modules |
5016 @cindex writing new modules | 5020 @cindex writing new modules |
5269 style now forbids passing pointers to @samp{Lisp_<Type>} structures into | 5273 style now forbids passing pointers to @samp{Lisp_<Type>} structures into |
5270 or out of a function; instead, a @samp{Lisp_Object} should be passed or | 5274 or out of a function; instead, a @samp{Lisp_Object} should be passed or |
5271 returned (created using @samp{wrap_<type>}, if necessary). | 5275 returned (created using @samp{wrap_<type>}, if necessary). |
5272 | 5276 |
5273 @c #### declaration | 5277 @c #### declaration |
5274 @item DECLARE_LRECORD (<type>, Lisp_<Type>) | 5278 @item DECLARE_LISP_OBJECT (<type>, Lisp_<Type>) |
5275 Declares an @samp{lrecord} for @samp{<Type>}, which is the unit of | 5279 Declares a Lisp object for @samp{<Type>}, which is the unit of |
5276 allocation. | 5280 allocation. |
5277 | 5281 |
5278 @item #define X<TYPE>(x) XRECORD (x, <type>, Lisp_<Type>) | 5282 @item #define X<TYPE>(x) XRECORD (x, <type>, Lisp_<Type>) |
5279 Turns a @code{Lisp_Object} into a pointer to @samp{struct Lisp_<Type>}. | 5283 Turns a @code{Lisp_Object} into a pointer to @samp{struct Lisp_<Type>}. |
5280 | 5284 |
5336 Here is a checklist of things to do when creating a new lisp object type | 5340 Here is a checklist of things to do when creating a new lisp object type |
5337 named @var{foo}: | 5341 named @var{foo}: |
5338 | 5342 |
5339 @enumerate | 5343 @enumerate |
5340 @item | 5344 @item |
5341 create @var{foo}.h | 5345 Create @var{foo}.h |
5342 @item | 5346 @item |
5343 create @var{foo}.c | 5347 Create @var{foo}.c |
5344 @item | 5348 @item |
5345 add definitions of @code{syms_of_@var{foo}}, etc. to @file{@var{foo}.c} | 5349 Add definitions of @code{syms_of_@var{foo}}, etc. to @file{@var{foo}.c} |
5346 @item | 5350 @item |
5347 add declarations of @code{syms_of_@var{foo}}, etc. to @file{symsinit.h} | 5351 Add declarations of @code{syms_of_@var{foo}}, etc. to @file{symsinit.h} |
5348 @item | 5352 @item |
5349 add calls to @code{syms_of_@var{foo}}, etc. to @file{emacs.c} | 5353 Add calls to @code{syms_of_@var{foo}}, etc. to @file{emacs.c} |
5350 @item | 5354 @item |
5351 add definitions of macros like @code{CHECK_@var{FOO}} and | 5355 Add definitions of macros like @code{CHECK_@var{FOO}} and |
5352 @code{@var{FOO}P} to @file{@var{foo}.h} | 5356 @code{@var{FOO}P} to @file{@var{foo}.h} |
5353 @item | 5357 @item |
5354 add the new type index to @code{enum lrecord_type} | 5358 Add the new type index to @code{enum lrecord_type} |
5355 @item | 5359 @item |
5356 add a DEFINE_LRECORD_IMPLEMENTATION call to @file{@var{foo}.c} | 5360 Add a @code{DEFINE_*_LISP_OBJECT()} to @file{@var{foo}.c} |
5357 @item | 5361 @item |
5358 add an INIT_LRECORD_IMPLEMENTATION call to @code{syms_of_@var{foo}.c} | 5362 Add an @code{INIT_LISP_OBJECT} call to @code{syms_of_@var{foo}.c} |
5359 @end enumerate | 5363 @end enumerate |
5360 | 5364 |
5361 | 5365 |
5362 @node Writing Lisp Primitives, Writing Good Comments, Working with Lisp Objects, Rules When Writing New C Code | 5366 @node Writing Lisp Primitives, Writing Good Comments, Working with Lisp Objects, Rules When Writing New C Code |
5363 @section Writing Lisp Primitives | 5367 @section Writing Lisp Primitives |
5664 correct it or flag it as incorrect, as described in the previous | 5668 correct it or flag it as incorrect, as described in the previous |
5665 paragraph. Whenever you work on a section of code, @emph{always} make | 5669 paragraph. Whenever you work on a section of code, @emph{always} make |
5666 sure to update any comments to be correct -- or, at the very least, flag | 5670 sure to update any comments to be correct -- or, at the very least, flag |
5667 them as incorrect. | 5671 them as incorrect. |
5668 | 5672 |
5669 To indicate a "todo" or other problem, use four pound signs -- | 5673 To indicate a ``todo'' or other problem, use four pound signs -- |
5670 i.e. @samp{####}. | 5674 i.e. @samp{####}. |
5671 | 5675 |
5672 @node Adding Global Lisp Variables, Writing Macros, Writing Good Comments, Rules When Writing New C Code | 5676 @node Adding Global Lisp Variables, Writing Macros, Writing Good Comments, Rules When Writing New C Code |
5673 @section Adding Global Lisp Variables | 5677 @section Adding Global Lisp Variables |
5674 @cindex global Lisp variables, adding | 5678 @cindex global Lisp variables, adding |
5836 functions a gcc bug, but the gcc maintainers disagree. | 5840 functions a gcc bug, but the gcc maintainers disagree. |
5837 | 5841 |
5838 @cindex inline functions, headers | 5842 @cindex inline functions, headers |
5839 @cindex header files, inline functions | 5843 @cindex header files, inline functions |
5840 Every header which contains inline functions, either directly by using | 5844 Every header which contains inline functions, either directly by using |
5841 @code{DECLARE_INLINE_HEADER} or indirectly by using @code{DECLARE_LRECORD} must | 5845 @code{DECLARE_INLINE_HEADER} or indirectly by using |
5842 be added to @file{inline.c}'s includes to make the optimization | 5846 @code{DECLARE_LISP_OBJECT} must be added to @file{inline.c}'s includes |
5843 described above work. (Optimization note: if all INLINE_HEADER | 5847 to make the optimization described above work. (Optimization note: if |
5844 functions are in fact inlined in all translation units, then the linker | 5848 all INLINE_HEADER functions are in fact inlined in all translation |
5845 can just discard @code{inline.o}, since it contains only unreferenced code). | 5849 units, then the linker can just discard @code{inline.o}, since it |
5850 contains only unreferenced code). | |
5846 | 5851 |
5847 The three golden rules of macros: | 5852 The three golden rules of macros: |
5848 | 5853 |
5849 @enumerate | 5854 @enumerate |
5850 @item | 5855 @item |
5851 Anything that's an lvalue can be evaluated more than once. | 5856 Anything that's an lvalue can be evaluated more than once. |
5852 @item | 5857 @item |
5853 Macros where anything else can be evaluated more than once should | 5858 Macros where anything else can be evaluated more than once should |
5854 have the word "unsafe" in their name (exceptions may be made for | 5859 have the word ``unsafe'' in their name (exceptions may be made for |
5855 large sets of macros that evaluate arguments of certain types more | 5860 large sets of macros that evaluate arguments of certain types more |
5856 than once, e.g. struct buffer * arguments, when clearly indicated in | 5861 than once, e.g. struct buffer * arguments, when clearly indicated in |
5857 the macro documentation). These macros are generally meant to be | 5862 the macro documentation). These macros are generally meant to be |
5858 called only by other macros that have already stored the calling | 5863 called only by other macros that have already stored the calling |
5859 values in temporary variables. | 5864 values in temporary variables. |
5881 Capitalize macros doing stuff obviously impossible with (C) | 5886 Capitalize macros doing stuff obviously impossible with (C) |
5882 functions, e.g. directly modifying arguments as if they were passed by | 5887 functions, e.g. directly modifying arguments as if they were passed by |
5883 reference. | 5888 reference. |
5884 @item | 5889 @item |
5885 Capitalize macros that evaluate @strong{any} argument more than once regardless | 5890 Capitalize macros that evaluate @strong{any} argument more than once regardless |
5886 of whether that's "allowed" (e.g. buffer arguments). | 5891 of whether that's ``allowed'' (e.g. buffer arguments). |
5887 @item | 5892 @item |
5888 Capitalize macros that directly access a field in a Lisp_Object or | 5893 Capitalize macros that directly access a field in a Lisp_Object or |
5889 its equivalent underlying structure. In such cases, access through the | 5894 its equivalent underlying structure. In such cases, access through the |
5890 Lisp_Object precedes the macro with an X, and access through the underlying | 5895 Lisp_Object precedes the macro with an X, and access through the underlying |
5891 structure doesn't. | 5896 structure doesn't. |
5936 a search-and-replace is done to change type names and such. Some people | 5941 a search-and-replace is done to change type names and such. Some people |
5937 disagree with such changes, and certainly if done without good reason | 5942 disagree with such changes, and certainly if done without good reason |
5938 will just lead to headaches. But it's important to keep the code clean | 5943 will just lead to headaches. But it's important to keep the code clean |
5939 and understandable, and consistent naming goes a long way towards this. | 5944 and understandable, and consistent naming goes a long way towards this. |
5940 | 5945 |
5941 An example of the right way to do this was the so-called "great integral | 5946 An example of the right way to do this was the so-called ``great integral |
5942 type renaming". | 5947 type renaming''. |
5943 | 5948 |
5944 @menu | 5949 @menu |
5945 * Great Integral Type Renaming:: | 5950 * Great Integral Type Renaming:: |
5946 * Text/Char Type Renaming:: | 5951 * Text/Char Type Renaming:: |
5947 @end menu | 5952 @end menu |
5964 @item | 5969 @item |
5965 All integral types that measure quantities of anything are signed. Some | 5970 All integral types that measure quantities of anything are signed. Some |
5966 people disagree vociferously with this, but their arguments are mostly | 5971 people disagree vociferously with this, but their arguments are mostly |
5967 theoretical, and are vastly outweighed by the practical headaches of | 5972 theoretical, and are vastly outweighed by the practical headaches of |
5968 mixing signed and unsigned values, and more importantly by the far | 5973 mixing signed and unsigned values, and more importantly by the far |
5969 increased likelihood of inadvertent bugs: Because of the broken "viral" | 5974 increased likelihood of inadvertent bugs: Because of the broken ``viral'' |
5970 nature of unsigned quantities in C (operations involving mixed | 5975 nature of unsigned quantities in C (operations involving mixed |
5971 signed/unsigned are done unsigned, when exactly the opposite is nearly | 5976 signed/unsigned are done unsigned, when exactly the opposite is nearly |
5972 always wanted), even a single error in declaring a quantity unsigned | 5977 always wanted), even a single error in declaring a quantity unsigned |
5973 that should be signed, or even the even more subtle error of comparing | 5978 that should be signed, or even the even more subtle error of comparing |
5974 signed and unsigned values and forgetting the necessary cast, can be | 5979 signed and unsigned values and forgetting the necessary cast, can be |
5975 catastrophic, as comparisons will yield wrong results. -Wsign-compare | 5980 catastrophic, as comparisons will yield wrong results. @samp{-Wsign-compare} |
5976 is turned on specifically to catch this, but this tends to result in a | 5981 is turned on specifically to catch this, but this tends to result in a |
5977 great number of warnings when mixing signed and unsigned, and the casts | 5982 great number of warnings when mixing signed and unsigned, and the casts |
5978 are annoying. More has been written on this elsewhere. | 5983 are annoying. More has been written on this elsewhere. |
5979 | 5984 |
5980 @item | 5985 @item |
5989 Type names should be relatively short (no more than 10 characters or | 5994 Type names should be relatively short (no more than 10 characters or |
5990 so), with the first letter capitalized and no underscores if they can at | 5995 so), with the first letter capitalized and no underscores if they can at |
5991 all be avoided. | 5996 all be avoided. |
5992 | 5997 |
5993 @item | 5998 @item |
5994 "count" == a zero-based measurement of some quantity. Includes sizes, | 5999 ``count'' == a zero-based measurement of some quantity. Includes sizes, |
5995 offsets, and indexes. | 6000 offsets, and indexes. |
5996 | 6001 |
5997 @item | 6002 @item |
5998 "bpos" == a one-based measurement of a position in a buffer. "Charbpos" | 6003 ``bpos'' == a one-based measurement of a position in a buffer. ``Charbpos'' |
5999 and "Bytebpos" count text in the buffer, rather than bytes in memory; | 6004 and ``Bytebpos'' count text in the buffer, rather than bytes in memory; |
6000 thus Bytebpos does not directly correspond to the memory representation. | 6005 thus Bytebpos does not directly correspond to the memory representation. |
6001 Use "Membpos" for this. | 6006 Use ``Membpos'' for this. |
6002 | 6007 |
6003 @item | 6008 @item |
6004 "Char" refers to internal-format characters, not to the C type "char", | 6009 ``Char'' refers to internal-format characters, not to the C type ``char'', |
6005 which is really a byte. | 6010 which is really a byte. |
6006 @end itemize | 6011 @end itemize |
6007 | 6012 |
6008 For the actual name changes, see the script below. | 6013 For the actual name changes, see the script below. |
6009 | 6014 |
6094 #endif | 6099 #endif |
6095 | 6100 |
6096 /* The have been some arguments over the what the type should be that | 6101 /* The have been some arguments over the what the type should be that |
6097 specifies a count of bytes in a data block to be written out or read in, | 6102 specifies a count of bytes in a data block to be written out or read in, |
6098 using @code{Lstream_read()}, @code{Lstream_write()}, and related functions. | 6103 using @code{Lstream_read()}, @code{Lstream_write()}, and related functions. |
6099 Originally it was long, which worked fine; Martin "corrected" these to | 6104 Originally it was long, which worked fine; Martin ``corrected'' these to |
6100 size_t and ssize_t on the grounds that this is theoretically cleaner and | 6105 size_t and ssize_t on the grounds that this is theoretically cleaner and |
6101 is in keeping with the C standards. Unfortunately, this practice is | 6106 is in keeping with the C standards. Unfortunately, this practice is |
6102 horribly error-prone due to design flaws in the way that mixed | 6107 horribly error-prone due to design flaws in the way that mixed |
6103 signed/unsigned arithmetic happens. In fact, by doing this change, | 6108 signed/unsigned arithmetic happens. In fact, by doing this change, |
6104 Martin introduced a subtle but fatal error that caused the operation of | 6109 Martin introduced a subtle but fatal error that caused the operation of |
6469 fixed---use the @code{Known-Bug-Expect-Failure} wrapper macro to mark | 6474 fixed---use the @code{Known-Bug-Expect-Failure} wrapper macro to mark |
6470 them. | 6475 them. |
6471 | 6476 |
6472 @deffn Macro Known-Bug-Expect-Failure body | 6477 @deffn Macro Known-Bug-Expect-Failure body |
6473 Arrange for failing tests in @var{body} to generate messages prefixed | 6478 Arrange for failing tests in @var{body} to generate messages prefixed |
6474 with "KNOWN BUG:" instead of "FAIL:". @var{body} is a @code{progn}-like | 6479 with ``KNOWN BUG:'' instead of ``FAIL:''. @var{body} is a @code{progn}-like |
6475 body, and may contain several tests. | 6480 body, and may contain several tests. |
6476 @end deffn | 6481 @end deffn |
6477 | 6482 |
6478 A lot of the tests we run push limits; suppress Ebola warning messages | 6483 A lot of the tests we run push limits; suppress Ebola warning messages |
6479 with the @code{Ignore-Ebola} wrapper macro. | 6484 with the @code{Ignore-Ebola} wrapper macro. |
6650 with added or deleted files.} If you are lucky, the operation will | 6655 with added or deleted files.} If you are lucky, the operation will |
6651 simply fail. If you are less lucky, it will proceed, but make the | 6656 simply fail. If you are less lucky, it will proceed, but make the |
6652 adds and deletes on the main line, which you do not want at all. | 6657 adds and deletes on the main line, which you do not want at all. |
6653 Therefore, you must undo all adds and deletes. To find out what is | 6658 Therefore, you must undo all adds and deletes. To find out what is |
6654 added and deleted, use something like @code{cvs -n update >&! | 6659 added and deleted, use something like @code{cvs -n update >&! |
6655 cvs.out}, which does a "dry run". (You did make a backup copy first, | 6660 cvs.out}, which does a ``dry run''. (You did make a backup copy first, |
6656 right? What if you forgot the @samp{-n}, for example, and wasn't | 6661 right? What if you forgot the @samp{-n}, for example, and wasn't |
6657 prepared for the sudden onslaught of merging action?) Take a look at | 6662 prepared for the sudden onslaught of merging action?) Take a look at |
6658 the output file @file{cvs.out} and check very carefully for newly | 6663 the output file @file{cvs.out} and check very carefully for newly |
6659 added files (marked with an @samp{A}) and newly removed files (marked | 6664 added files (marked with an @samp{A}) and newly removed files (marked |
6660 with an @samp{R}). Double check that your newly added files are in | 6665 with an @samp{R}). Double check that your newly added files are in |
6682 crw tag -b ben-mule-21-5 | 6687 crw tag -b ben-mule-21-5 |
6683 @end example | 6688 @end example |
6684 | 6689 |
6685 Note that this doesn't actually do anything to your local workspace! | 6690 Note that this doesn't actually do anything to your local workspace! |
6686 It basically just creates another tag in the repository, identical to | 6691 It basically just creates another tag in the repository, identical to |
6687 the branch point tag but internally marked as a "branch tag" rather | 6692 the branch point tag but internally marked as a ``branch tag'' rather |
6688 than a regular tag. | 6693 than a regular tag. |
6689 | 6694 |
6690 @item | 6695 @item |
6691 Now, move your workspace onto the branch: | 6696 Now, move your workspace onto the branch: |
6692 | 6697 |
7016 and when you add a new element, the array automatically resizes itself | 7021 and when you add a new element, the array automatically resizes itself |
7017 if it isn't big enough. Dynarrs are extensively used in the redisplay | 7022 if it isn't big enough. Dynarrs are extensively used in the redisplay |
7018 mechanism. | 7023 mechanism. |
7019 | 7024 |
7020 | 7025 |
7021 A "dynamic array" is a contiguous array of fixed-size elements where there | 7026 A ``dynamic array'' is a contiguous array of fixed-size elements where there |
7022 is no upper limit (except available memory) on the number of elements in the | 7027 is no upper limit (except available memory) on the number of elements in the |
7023 array. Because the elements are maintained contiguously, space is used | 7028 array. Because the elements are maintained contiguously, space is used |
7024 efficiently (no per-element pointers necessary) and random access to a | 7029 efficiently (no per-element pointers necessary) and random access to a |
7025 particular element is in constant time. At any one point, the block of memory | 7030 particular element is in constant time. At any one point, the block of memory |
7026 that holds the array has an upper limit; if this limit is exceeded, the | 7031 that holds the array has an upper limit; if this limit is exceeded, the |
7027 memory is realloc()ed into a new array that is twice as big. Assuming that | 7032 memory is @code{realloc()}ed into a new array that is twice as big. Assuming that |
7028 the time to grow the array is on the order of the new size of the array | 7033 the time to grow the array is on the order of the new size of the array |
7029 block, this scheme has a provably constant amortized time (i.e. average | 7034 block, this scheme has a provably constant amortized time (i.e. average |
7030 time over all additions). | 7035 time over all additions). |
7031 | 7036 |
7032 When you add elements or retrieve elements, pointers are used. Note that | 7037 When you add elements or retrieve elements, pointers are used. Note that |
7130 onto a linked list, so they can be efficiently reused. This data type | 7135 onto a linked list, so they can be efficiently reused. This data type |
7131 is not much used in XEmacs currently, because it's a fairly new | 7136 is not much used in XEmacs currently, because it's a fairly new |
7132 addition. | 7137 addition. |
7133 | 7138 |
7134 | 7139 |
7135 A "block-type object" is used to efficiently allocate and free blocks | 7140 A ``block-type object'' is used to efficiently allocate and free blocks |
7136 of a particular size. Freed blocks are remembered in a free list and | 7141 of a particular size. Freed blocks are remembered in a free list and |
7137 are reused as necessary to allocate new blocks, so as to avoid as | 7142 are reused as necessary to allocate new blocks, so as to avoid as |
7138 much as possible making calls to malloc() and free(). | 7143 much as possible making calls to @code{malloc()} and @code{free()}. |
7139 | 7144 |
7140 This is a container object. Declare a block-type object of a specific type | 7145 This is a container object. Declare a block-type object of a specific type |
7141 as follows: | 7146 as follows: |
7142 | 7147 |
7143 struct mytype_blocktype @{ | 7148 struct mytype_blocktype @{ |
7750 characters. No special allocation or garbage collection is necessary | 7755 characters. No special allocation or garbage collection is necessary |
7751 for such objects. Lisp objects of these types do not need to be | 7756 for such objects. Lisp objects of these types do not need to be |
7752 @code{GCPRO}ed. | 7757 @code{GCPRO}ed. |
7753 @end itemize | 7758 @end itemize |
7754 | 7759 |
7755 In the remaining two categories, the type is stored in the object | 7760 In the remaining two categories, the type is stored in the object |
7756 itself. The tag for all such objects is the generic @dfn{lrecord} | 7761 itself. The tag for all such objects is the generic @dfn{lrecord} |
7757 (Lisp_Type_Record) tag. The first bytes of the object's structure are an | 7762 (Lisp_Type_Record) tag. The first bytes of the object's structure are an |
7758 integer (actually a char) characterising the object's type and some | 7763 integer (actually a char) characterising the object's type and some |
7759 flags, in particular the mark bit used for garbage collection. A | 7764 flags, in particular the mark bit used for garbage collection. A |
7760 structure describing the type is accessible thru the | 7765 structure describing the type is accessible thru the |
8275 @code{this_one_is_unmarkable} in @code{alloc.c}). | 8280 @code{this_one_is_unmarkable} in @code{alloc.c}). |
8276 | 8281 |
8277 Now, the actual marking is feasible. We do so by once using the macro | 8282 Now, the actual marking is feasible. We do so by once using the macro |
8278 @code{MARK_RECORD_HEADER} to mark the object itself (actually the | 8283 @code{MARK_RECORD_HEADER} to mark the object itself (actually the |
8279 special flag in the lrecord header), and calling its special marker | 8284 special flag in the lrecord header), and calling its special marker |
8280 "method" @code{marker} if available. The marker method marks every | 8285 ``method'' @code{marker} if available. The marker method marks every |
8281 other object that is in reach from our current object. Note, that these | 8286 other object that is in reach from our current object. Note, that these |
8282 marker methods should not call @code{mark_object} recursively, but | 8287 marker methods should not call @code{mark_object} recursively, but |
8283 instead should return the next object from where further marking has to | 8288 instead should return the next object from where further marking has to |
8284 be performed. | 8289 be performed. |
8285 | 8290 |
8330 @code{sweep_conses}, @code{sweep_bit_vectors_1}, | 8335 @code{sweep_conses}, @code{sweep_bit_vectors_1}, |
8331 @code{sweep_compiled_functions}, @code{sweep_floats}, | 8336 @code{sweep_compiled_functions}, @code{sweep_floats}, |
8332 @code{sweep_symbols}, @code{sweep_extents}, @code{sweep_markers} and | 8337 @code{sweep_symbols}, @code{sweep_extents}, @code{sweep_markers} and |
8333 @code{sweep_extents}. They are the fixed-size types cons, floats, | 8338 @code{sweep_extents}. They are the fixed-size types cons, floats, |
8334 compiled-functions, symbol, marker, extent, and event stored in | 8339 compiled-functions, symbol, marker, extent, and event stored in |
8335 so-called "frob blocks", and therefore we can basically do the same on | 8340 so-called ``frob blocks'', and therefore we can basically do the same on |
8336 every type objects, using the same macros, especially defined only to | 8341 every type objects, using the same macros, especially defined only to |
8337 handle everything with respect to fixed-size blocks. The only fixed-size | 8342 handle everything with respect to fixed-size blocks. The only fixed-size |
8338 type that is not handled here are the fixed-size portion of strings, | 8343 type that is not handled here are the fixed-size portion of strings, |
8339 because we took special care of them earlier. | 8344 because we took special care of them earlier. |
8340 | 8345 |
8486 @node Integers and Characters, Allocation from Frob Blocks, Garbage Collection - Step by Step, Allocation of Objects in XEmacs Lisp | 8491 @node Integers and Characters, Allocation from Frob Blocks, Garbage Collection - Step by Step, Allocation of Objects in XEmacs Lisp |
8487 @section Integers and Characters | 8492 @section Integers and Characters |
8488 @cindex integers and characters | 8493 @cindex integers and characters |
8489 @cindex characters, integers and | 8494 @cindex characters, integers and |
8490 | 8495 |
8491 Integer and character Lisp objects are created from integers using the | 8496 Integer and character Lisp objects are created from integers using the |
8492 macros @code{XSETINT()} and @code{XSETCHAR()} or the equivalent | |
8493 functions @code{make_int()} and @code{make_char()}. (These are actually | 8497 functions @code{make_int()} and @code{make_char()}. (These are actually |
8494 macros on most systems.) These functions basically just do some moving | 8498 macros on most systems.) These functions basically just do some moving |
8495 of bits around, since the integral value of the object is stored | 8499 of bits around, since the integral value of the object is stored |
8496 directly in the @code{Lisp_Object}. | 8500 directly in the @code{Lisp_Object}. |
8497 | 8501 |
8498 @code{XSETINT()} and the like will truncate values given to them that | |
8499 are too big; i.e. you won't get the value you expected but the tag bits | |
8500 will at least be correct. | |
8501 | |
8502 @node Allocation from Frob Blocks, lrecords, Integers and Characters, Allocation of Objects in XEmacs Lisp | 8502 @node Allocation from Frob Blocks, lrecords, Integers and Characters, Allocation of Objects in XEmacs Lisp |
8503 @section Allocation from Frob Blocks | 8503 @section Allocation from Frob Blocks |
8504 @cindex allocation from frob blocks | 8504 @cindex allocation from frob blocks |
8505 @cindex frob blocks, allocation from | 8505 @cindex frob blocks, allocation from |
8506 | 8506 |
8507 The uninitialized memory required by a @code{Lisp_Object} of a particular type | 8507 The uninitialized memory required by a @code{Lisp_Object} of a |
8508 is allocated using | 8508 particular type is allocated using @code{ALLOCATE_FIXED_TYPE()}. This |
8509 @code{ALLOCATE_FIXED_TYPE()}. This only occurs inside of the | 8509 only occurs inside of the lowest-level object-creating functions in |
8510 lowest-level object-creating functions in @file{alloc.c}: | 8510 @file{alloc.c}: @code{Fcons()}, @code{make_float()}, |
8511 @code{Fcons()}, @code{make_float()}, @code{Fmake_byte_code()}, | 8511 @code{Fmake_byte_code()}, @code{Fmake_symbol()}, |
8512 @code{Fmake_symbol()}, @code{allocate_extent()}, | 8512 @code{allocate_extent()}, @code{allocate_event()}, |
8513 @code{allocate_event()}, @code{Fmake_marker()}, and | 8513 @code{Fmake_marker()}, and @code{make_uninit_string()}. The idea is |
8514 @code{make_uninit_string()}. The idea is that, for each type, there are | 8514 that, for each type, there are a number of frob blocks (each 2K in |
8515 a number of frob blocks (each 2K in size); each frob block is divided up | 8515 size); each frob block is divided up into object-sized chunks. Each |
8516 into object-sized chunks. Each frob block will have some of these | 8516 frob block will have some of these chunks that are currently assigned |
8517 chunks that are currently assigned to objects, and perhaps some that are | 8517 to objects, and perhaps some that are free. (If a frob block has |
8518 free. (If a frob block has nothing but free chunks, it is freed at the | 8518 nothing but free chunks, it is freed at the end of the garbage |
8519 end of the garbage collection cycle.) The free chunks are stored in a | 8519 collection cycle.) The free chunks are stored in a free list, which |
8520 free list, which is chained by storing a pointer in the first four bytes | 8520 is chained by storing a pointer in the first four bytes of the |
8521 of the chunk. (Except for the free chunks at the end of the last frob | 8521 chunk. (Except for the free chunks at the end of the last frob block, |
8522 block, which are handled using an index which points past the end of the | 8522 which are handled using an index which points past the end of the |
8523 last-allocated chunk in the last frob block.) | 8523 last-allocated chunk in the last frob block.) |
8524 @code{ALLOCATE_FIXED_TYPE()} first tries to retrieve a chunk from the | 8524 @code{ALLOCATE_FIXED_TYPE()} first tries to retrieve a chunk from the |
8525 free list; if that fails, it calls | 8525 free list; if that fails, it calls |
8526 @code{ALLOCATE_FIXED_TYPE_FROM_BLOCK()}, which looks at the end of the | 8526 @code{ALLOCATE_FIXED_TYPE_FROM_BLOCK()}, which looks at the end of the |
8527 last frob block for space, and creates a new frob block if there is | 8527 last frob block for space, and creates a new frob block if there is |
8528 none. (There are actually two versions of these macros, one of which is | 8528 none. (There are actually two versions of these macros, one of which |
8529 more defensive but less efficient and is used for error-checking.) | 8529 is more defensive but less efficient and is used for error-checking.) |
8530 | 8530 |
8531 @node lrecords, Low-level allocation, Allocation from Frob Blocks, Allocation of Objects in XEmacs Lisp | 8531 @node lrecords, Low-level allocation, Allocation from Frob Blocks, Allocation of Objects in XEmacs Lisp |
8532 @section lrecords | 8532 @section lrecords |
8533 @cindex lrecords | 8533 @cindex lrecords |
8534 | 8534 |
8535 [see @file{lrecord.h}] | 8535 [see @file{lrecord.h}] |
8536 | 8536 |
8537 @strong{This node needs updating for the ``new garbage collection | 8537 @strong{This node needs updating for the ``new garbage collection |
8538 algorithms'' (KKCC) and the ``incremental'' collector.} | 8538 algorithms'' (KKCC) and the ``incremental'' collector.} |
8539 | 8539 |
8540 All lrecords have at the beginning of their structure a @code{struct | 8540 All lrecords have at the beginning of their structure a @code{struct |
8541 lrecord_header}. This just contains a type number and some flags, | 8541 lrecord_header}. This just contains a type number and some flags, |
8542 including the mark bit. All builtin type numbers are defined as | 8542 including the mark bit. All builtin type numbers are defined as |
8543 constants in @code{enum lrecord_type}, to allow the compiler to generate | 8543 constants in @code{enum lrecord_type}, to allow the compiler to generate |
8544 more efficient code for @code{@var{type}P}. The type number, thru the | 8544 more efficient code for @code{@var{type}P}. The type number, thru the |
8545 @code{lrecord_implementation_table}, gives access to a @code{struct | 8545 @code{lrecord_implementation_table}, gives access to a @code{struct |
8546 lrecord_implementation}, which is a structure containing method pointers | 8546 lrecord_implementation}, which is a structure containing method pointers |
8547 and such. There is one of these for each type, and it is a global, | 8547 and such. There is one of these for each type, and it is a global, |
8548 constant, statically-declared structure that is declared in the | 8548 constant, statically-declared structure that is declared in the |
8549 @code{DEFINE_LRECORD_IMPLEMENTATION()} macro. | 8549 @code{DEFINE_*_LISP_OBJECT()} macro. |
8550 | 8550 |
8551 Simple lrecords (of type (b) above) just have a @code{struct | 8551 Frob-block lrecords just have a @code{struct lrecord_header} at their |
8552 lrecord_header} at their beginning. lcrecords, however, actually have a | 8552 beginning. lcrecords, however, actually have a |
8553 @code{struct lcrecord_header}. This, in turn, has a @code{struct | 8553 @code{struct old_lcrecord_header}. This, in turn, has a @code{struct |
8554 lrecord_header} at its beginning, so sanity is preserved; but it also | 8554 lrecord_header} at its beginning, so sanity is preserved; but it also |
8555 has a pointer used to chain all lcrecords together, and a special ID | 8555 has a pointer used to chain all lcrecords together. |
8556 field used to distinguish one lcrecord from another. (This field is used | |
8557 only for debugging and could be removed, but the space gain is not | |
8558 significant.) | |
8559 | 8556 |
8560 @strong{lcrecords are now obsolete when using the write-barrier-based | 8557 @strong{lcrecords are now obsolete when using the write-barrier-based |
8561 collector.} | 8558 collector.} |
8562 | 8559 |
8563 Simple lrecords are created using @code{ALLOCATE_FIXED_TYPE()}, just | 8560 Frob-block objects are created using @code{ALLOC_FROB_BLOCK_LISP_OBJECT()}. |
8564 like for other frob blocks. The only change is that the implementation | 8561 All this does is call @code{ALLOCATE_FIXED_TYPE()} to allocate an |
8565 pointer must be initialized correctly. (The implementation structure for | 8562 object, and @code{set_lheader_implementation()} to initialize the header. |
8566 an lrecord, or rather the pointer to it, is named @code{lrecord_float}, | 8563 |
8567 @code{lrecord_extent}, @code{lrecord_buffer}, etc.) | 8564 Normal objects (i.e. lcrecords) are created using |
8568 | 8565 @code{ALLOC_NORMAL_LISP_OBJECT()}, which takes a type name (resolved |
8569 lcrecords are created using @code{alloc_lcrecord()}. This takes a | 8566 internally to a structure named @code{lrecord_foo} for type |
8570 size to allocate and an implementation pointer. (The size needs to be | 8567 @code{foo}). If they are of variable size, however, they are created |
8571 passed because some lcrecords, such as window configurations, are of | 8568 with @code{ALLOC_SIZED_LISP_OBJECT()}, which takes a size to allocate |
8572 variable size.) This basically just @code{malloc()}s the storage, | 8569 in addition to a type. This basically just @code{malloc()}s the |
8573 initializes the @code{struct lcrecord_header}, and chains the lcrecord | 8570 storage, initializes the @code{struct lcrecord_header}, and chains the |
8574 onto the head of the list of all lcrecords, which is stored in the | 8571 lcrecord onto the head of the list of all lcrecords, which is stored |
8575 variable @code{all_lcrecords}. The calls to @code{alloc_lcrecord()} | 8572 in the variable @code{all_lcrecords}. The calls to the above |
8576 generally occur in the lowest-level allocation function for each lrecord | 8573 allocation macros generally occur in the lowest-level allocation |
8577 type. | 8574 function for each lrecord type. |
8578 | 8575 |
8579 Whenever you create an lrecord, you need to call either | 8576 Whenever you create a normal object, you need to call one of the |
8580 @code{DEFINE_LRECORD_IMPLEMENTATION()} or | 8577 @code{DEFINE_*_LISP_OBJECT()} macros. This needs to be |
8581 @code{DEFINE_LRECORD_SEQUENCE_IMPLEMENTATION()}. This needs to be | |
8582 specified in a @file{.c} file, at the top level. What this actually | 8578 specified in a @file{.c} file, at the top level. What this actually |
8583 does is define and initialize the implementation structure for the | 8579 does is define and initialize the implementation structure for the |
8584 lrecord. (And possibly declares a function @code{error_check_foo()} that | 8580 lrecord. (And possibly declares a function @code{error_check_foo()} that |
8585 implements the @code{XFOO()} macro when error-checking is enabled.) The | 8581 implements the @code{XFOO()} macro when error-checking is enabled.) The |
8586 arguments to the macros are the actual type name (this is used to | 8582 arguments to the macros are the actual type name (this is used to |
8593 are used to encapsulate type-specific information about the object, such | 8589 are used to encapsulate type-specific information about the object, such |
8594 as how to print it or mark it for garbage collection, so that it's easy | 8590 as how to print it or mark it for garbage collection, so that it's easy |
8595 to add new object types without having to add a specific case for each | 8591 to add new object types without having to add a specific case for each |
8596 new type in a bunch of different places. | 8592 new type in a bunch of different places. |
8597 | 8593 |
8598 The difference between @code{DEFINE_LRECORD_IMPLEMENTATION()} and | 8594 The various macros for defining Lisp objects are as follows: |
8599 @code{DEFINE_LRECORD_SEQUENCE_IMPLEMENTATION()} is that the former is | 8595 |
8600 used for fixed-size object types and the latter is for variable-size | 8596 @itemize @bullet |
8601 object types. Most object types are fixed-size; some complex | 8597 @item |
8602 types, however (e.g. window configurations), are variable-size. | 8598 @code{DEFINE_*_LISP_OBJECT} is for objects with constant size. (Either |
8603 Variable-size object types have an extra method, which is called | 8599 @code{DEFINE_DUMPABLE_LISP_OBJECT} for objects that can be saved in a |
8604 to determine the actual size of a particular object of that type. | 8600 dumped executable, or @code{DEFINE_NODUMP_LISP_OBJECT} for objects |
8605 (Currently this is only used for keeping allocation statistics.) | 8601 that cannot be saved -- e.g. that contain pointers to non-persistent |
8606 | 8602 external objects such as window-system windows.) |
8607 For the purpose of keeping allocation statistics, the allocation | 8603 |
8604 @item | |
8605 @code{DEFINE_*_SIZABLE_LISP_OBJECT} is for objects whose size varies. | |
8606 This includes some simple types such as vectors, bit vectors and | |
8607 opaque objects, as well complex types, especially types such as | |
8608 specifiers, lstreams or coding systems that have subtypes and include | |
8609 subtype-specific data attached to the end of the structure. | |
8610 Variable-size objects have an extra method that returns the size of | |
8611 the object. This is not used at allocation (rather, the size is | |
8612 specified in the call to the allocation macro), but is used for | |
8613 operations such as copying a Lisp object, as well as for keeping | |
8614 allocation statistics. | |
8615 | |
8616 @item | |
8617 @code{DEFINE_*_FROB_BLOCK_LISP_OBJECT} is for objects that are | |
8618 allocated in large blocks (``frob blocks''), which are parceled up | |
8619 individually. Such objects need special handling in @file{alloc.c}. | |
8620 This does not apply to NEW_GC, because it does this automatically. | |
8621 | |
8622 @item | |
8623 @code{DEFINE_*_INTERNAL_LISP_OBJECT} is for ``internal'' objects that | |
8624 should never be visible on the Lisp level. This is a shorthand for | |
8625 the most common type of internal objects, which have no equal or hash | |
8626 method (since they generally won't appear in hash tables), no | |
8627 finalizer and @code{internal_object_printer()} as their print method | |
8628 (which prints that the object is internal and shouldn't be visible | |
8629 externally). For internal objects needing a finalizer, equal or hash | |
8630 method, or wanting to customize the print method, use the normal | |
8631 @code{DEFINE_*_LISP_OBJECT} mechanism for defining these objects. | |
8632 | |
8633 @item | |
8634 @code{DEFINE_*_GENERAL_LISP_OBJECT} is for objects that need to | |
8635 provide one of the less common methods that are omitted on most | |
8636 objects. These methods include the methods supporting the unified | |
8637 property interface using @code{get}, @code{put}, @code{remprop} and | |
8638 @code{object-plist}, and (for dumpable objects only) the | |
8639 @code{disksaver} method. | |
8640 | |
8641 @item | |
8642 @code{DEFINE_MODULE_*} is for objects defined in an external module. | |
8643 @end itemize | |
8644 | |
8645 @code{MAKE_LISP_OBJECT} and @code{MAKE_MODULE_LISP_OBJECT} are what | |
8646 underlies all of these; they define a structure containing pointers to | |
8647 object methods and other info such as the size of the structure | |
8648 containing the object. | |
8649 | |
8650 For the purpose of keeping allocation statistics, the allocation | |
8608 engine keeps a list of all the different types that exist. Note that, | 8651 engine keeps a list of all the different types that exist. Note that, |
8609 since @code{DEFINE_LRECORD_IMPLEMENTATION()} is a macro that is | 8652 since @code{DEFINE_*_LISP_OBJECT()} is a macro that is |
8610 specified at top-level, there is no way for it to initialize the global | 8653 specified at top-level, there is no way for it to initialize the |
8611 data structures containing type information, like | 8654 global data structures containing type information, like |
8612 @code{lrecord_implementations_table}. For this reason a call to | 8655 @code{lrecord_implementations_table}. For this reason a call to |
8613 @code{INIT_LRECORD_IMPLEMENTATION} must be added to the same source file | 8656 @code{INIT_LISP_OBJECT()} must be added to the same source |
8614 containing @code{DEFINE_LRECORD_IMPLEMENTATION}, but instead of to the | 8657 file containing @code{DEFINE_*_LISP_OBJECT()}, but instead of |
8615 top level, to one of the init functions, typically | 8658 to the top level, to one of the init functions, typically |
8616 @code{syms_of_@var{foo}.c}. @code{INIT_LRECORD_IMPLEMENTATION} must be | 8659 @code{syms_of_@var{foo}.c}. @code{INIT_LISP_OBJECT()} must |
8617 called before an object of this type is used. | 8660 be called before an object of this type is used. |
8618 | 8661 |
8619 The type number is also used to index into an array holding the number | 8662 The type number is also used to index into an array holding the number |
8620 of objects of each type and the total memory allocated for objects of | 8663 of objects of each type and the total memory allocated for objects of |
8621 that type. The statistics in this array are computed during the sweep | 8664 that type. The statistics in this array are computed during the sweep |
8622 stage. These statistics are returned by the call to | 8665 stage. These statistics are returned by the call to |
8623 @code{garbage-collect}. | 8666 @code{garbage-collect}. |
8624 | 8667 |
8625 Note that for every type defined with a @code{DEFINE_LRECORD_*()} | 8668 Note that for every type defined with a @code{DEFINE_*_LISP_OBJECT()} |
8626 macro, there needs to be a @code{DECLARE_LRECORD_IMPLEMENTATION()} | 8669 macro, there needs to be a @code{DECLARE_LISP_OBJECT()} somewhere in a |
8627 somewhere in a @file{.h} file, and this @file{.h} file needs to be | 8670 @file{.h} file, and this @file{.h} file needs to be included by |
8628 included by @file{inline.c}. | 8671 @file{inline.c}. |
8629 | 8672 |
8630 Furthermore, there should generally be a set of @code{XFOOBAR()}, | 8673 Furthermore, there should generally be a set of @code{XFOOBAR()}, |
8631 @code{FOOBARP()}, etc. macros in a @file{.h} (or occasionally @file{.c}) | 8674 @code{FOOBARP()}, etc. macros in a @file{.h} (or occasionally |
8632 file. To create one of these, copy an existing model and modify as | 8675 @file{.c}) file. To create one of these, copy an existing model and |
8633 necessary. | 8676 modify as necessary. |
8634 | 8677 |
8635 @strong{Please note:} If you define an lrecord in an external | 8678 @strong{Please note:} If you define an lrecord in an external |
8636 dynamically-loaded module, you must use @code{DECLARE_EXTERNAL_LRECORD}, | 8679 dynamically-loaded module, you must use |
8637 @code{DEFINE_EXTERNAL_LRECORD_IMPLEMENTATION}, and | 8680 @code{DECLARE_MODULE_LISP_OBJECT()}, |
8638 @code{DEFINE_EXTERNAL_LRECORD_SEQUENCE_IMPLEMENTATION} instead of the | 8681 @code{DEFINE_MODULE_*_LISP_OBJECT()}, and |
8639 non-EXTERNAL forms. These macros will dynamically add new type numbers | 8682 @code{INIT_MODULE_LISP_OBJECT()} instead of the non-MODULE |
8640 to the global enum that records them, whereas the non-EXTERNAL forms | 8683 forms. These macros will dynamically add new type numbers to the |
8641 assume that the programmer has already inserted the correct type numbers | 8684 global enum that records them, whereas the non-MODULE forms assume |
8642 into the enum's code at compile-time. | 8685 that the programmer has already inserted the correct type numbers into |
8686 the enum's code at compile-time. | |
8643 | 8687 |
8644 The various methods in the lrecord implementation structure are: | 8688 The various methods in the lrecord implementation structure are: |
8645 | 8689 |
8646 @enumerate | 8690 @enumerate |
8647 @item | 8691 @item |
8701 operating-system and window-system resources associated with the object | 8745 operating-system and window-system resources associated with the object |
8702 (e.g. pixmaps, fonts), etc. | 8746 (e.g. pixmaps, fonts), etc. |
8703 | 8747 |
8704 The finalize method can be NULL if nothing needs to be done. | 8748 The finalize method can be NULL if nothing needs to be done. |
8705 | 8749 |
8706 WARNING #1: The finalize method is also called at the end of the dump | |
8707 phase; this time with the for_disksave parameter set to non-zero. The | |
8708 object is @emph{not} about to disappear, so you have to make sure to | |
8709 @emph{not} free any extra @code{malloc()}ed memory if you're going to | |
8710 need it later. (Also, signal an error if there are any operating-system | |
8711 and window-system resources here, because they can't be dumped.) | |
8712 | |
8713 Finalize methods should, as a rule, set to zero any pointers after | 8750 Finalize methods should, as a rule, set to zero any pointers after |
8714 they've been freed, and check to make sure pointers are not zero before | 8751 they've been freed, and check to make sure pointers are not zero |
8715 freeing. Although I'm pretty sure that finalize methods are not called | 8752 before freeing. Although I'm pretty sure that finalize methods are |
8716 twice on the same object (except for the @code{for_disksave} proviso), | 8753 not called twice on the same object, we've gotten nastily burned in |
8717 we've gotten nastily burned in some cases by not doing this. | 8754 some cases by not doing this. |
8718 | 8755 |
8719 WARNING #2: The finalize method is @emph{only} called for | 8756 WARNING #1: The finalize method is @emph{only} called for |
8720 lcrecords, @emph{not} for simply lrecords. If you need a | 8757 normal objects, @emph{not} for frob-block objects. If you need a |
8721 finalize method for simple lrecords, you have to stick | 8758 finalize method for frob-block objects, you have to stick |
8722 it in the @code{ADDITIONAL_FREE_foo()} macro in @file{alloc.c}. | 8759 it in the @code{ADDITIONAL_FREE_foo()} macro in @file{alloc.c}. |
8723 | 8760 |
8724 WARNING #3: Things are in an @emph{extremely} bizarre state | 8761 WARNING #2: Things are in an @emph{extremely} bizarre state |
8725 when @code{ADDITIONAL_FREE_foo()} is called, so you have to | 8762 when @code{ADDITIONAL_FREE_foo()} is called, so you have to |
8726 be incredibly careful when writing one of these functions. | 8763 be incredibly careful when writing one of these functions. |
8727 See the comment in @code{gc_sweep()}. If you ever have to add | 8764 See the comment in @code{gc_sweep()}. If you ever have to add |
8728 one of these, consider using an lcrecord or dealing with | 8765 one of these, consider using an lcrecord or dealing with |
8729 the problem in a different fashion. | 8766 the problem in a different fashion. |
8759 To hash two or more values together into a single value, use | 8796 To hash two or more values together into a single value, use |
8760 @code{HASH2()}, @code{HASH3()}, @code{HASH4()}, etc. | 8797 @code{HASH2()}, @code{HASH3()}, @code{HASH4()}, etc. |
8761 | 8798 |
8762 @item | 8799 @item |
8763 @dfn{getprop}, @dfn{putprop}, @dfn{remprop}, and @dfn{plist} methods. | 8800 @dfn{getprop}, @dfn{putprop}, @dfn{remprop}, and @dfn{plist} methods. |
8764 These are used for object types that have properties. I don't feel like | 8801 These are used for object types that have properties, and are called |
8765 documenting them here. If you create one of these objects, you have to | 8802 when @code{get}, @code{put}, @code{remprop}, and @code{object-plist}, |
8766 use different macros to define them, | 8803 respectively are called on the object. If you create one of these |
8767 i.e. @code{DEFINE_LRECORD_IMPLEMENTATION_WITH_PROPS()} or | 8804 objects, you have to use a different macro to define them, |
8768 @code{DEFINE_LRECORD_SEQUENCE_IMPLEMENTATION_WITH_PROPS()}. | 8805 i.e. @code{DEFINE_*_GENERAL_LISP_OBJECT()}. |
8769 | 8806 |
8770 @item | 8807 @item |
8771 A @dfn{size_in_bytes} method, when the object is of variable-size. | 8808 A @dfn{size_in_bytes} method, when the object is of variable-size. |
8772 (i.e. declared with a @code{_SEQUENCE_IMPLEMENTATION} macro.) This should | 8809 (i.e. declared with a @code{DEFINE_*_SIZABLE_*_LISP_OBJECT} macro.) |
8773 simply return the object's size in bytes, exactly as you might expect. | 8810 This should simply return the object's size in bytes, exactly as you |
8774 For an example, see the methods for window configurations and opaques. | 8811 might expect. For an example, see the methods for lstreams and opaques. |
8812 | |
8813 @item | |
8814 A @dfn{disksave} method. This is called at the end of the dump phase. | |
8815 It is used for objects that contain pointers or handles to objects | |
8816 created in external libraries, such as window-system windows or file | |
8817 handles. Such external objects cannot be dumped, so it is necessary | |
8818 to release them at dump time and arrange somehow or other for them to | |
8819 be resurrected if necessary later on. | |
8820 | |
8821 It seems that even non-dumpable objects may be around at dump time, | |
8822 and a disksaver may be provided. (In fact, the only object currently | |
8823 with a disksaver, lstream, is non-dumpable.) | |
8824 | |
8825 Objects rarely need to provide this method; most of the time it will | |
8826 be NULL. If you want to provide this method, you have to use the | |
8827 @code{DEFINE_*_GENERAL_LISP_OBJECT()} macro to define your object. | |
8775 @end enumerate | 8828 @end enumerate |
8776 | 8829 |
8777 @node Low-level allocation, Cons, lrecords, Allocation of Objects in XEmacs Lisp | 8830 @node Low-level allocation, Cons, lrecords, Allocation of Objects in XEmacs Lisp |
8778 @section Low-level allocation | 8831 @section Low-level allocation |
8779 @cindex low-level allocation | 8832 @cindex low-level allocation |
10004 complicated depending on how much information we cache. In addition to | 10057 complicated depending on how much information we cache. In addition to |
10005 the known region, we always cache the correct conversions for point, | 10058 the known region, we always cache the correct conversions for point, |
10006 BEGV, and ZV, and in addition to this we cache 16 positions where the | 10059 BEGV, and ZV, and in addition to this we cache 16 positions where the |
10007 conversion is known. We only look in the cache or update it when we | 10060 conversion is known. We only look in the cache or update it when we |
10008 need to move the known region more than a certain amount (currently 50 | 10061 need to move the known region more than a certain amount (currently 50 |
10009 chars), and then we throw away a "random" value and replace it with the | 10062 chars), and then we throw away a ``random'' value and replace it with the |
10010 newly calculated value. | 10063 newly calculated value. |
10011 | 10064 |
10012 Finally, we maintain an extra flag that tracks whether the buffer is | 10065 Finally, we maintain an extra flag that tracks whether the buffer is |
10013 entirely ASCII, to speed up the conversions even more. This flag is | 10066 entirely ASCII, to speed up the conversions even more. This flag is |
10014 actually of dubious value because in an entirely-ASCII buffer the known | 10067 actually of dubious value because in an entirely-ASCII buffer the known |
10040 track of a shifter value (0, 1, or 2) indicating how much to shift. | 10093 track of a shifter value (0, 1, or 2) indicating how much to shift. |
10041 Multiplying by 3 can be implemented by doubling and then adding the | 10094 Multiplying by 3 can be implemented by doubling and then adding the |
10042 original value. Dividing by 3, alas, cannot be implemented in any | 10095 original value. Dividing by 3, alas, cannot be implemented in any |
10043 simple shift/subtract method, as far as I know; so we just do a table | 10096 simple shift/subtract method, as far as I know; so we just do a table |
10044 lookup. For simplicity, we use a table of size 128K, which indexes the | 10097 lookup. For simplicity, we use a table of size 128K, which indexes the |
10045 "divide-by-3" values for the first 64K non-negative numbers. (Note that | 10098 ``divide-by-3'' values for the first 64K non-negative numbers. (Note that |
10046 we can increase the size up to 384K, i.e. indexing the first 192K | 10099 we can increase the size up to 384K, i.e. indexing the first 192K |
10047 non-negative numbers, while still using shorts in the array.) This also | 10100 non-negative numbers, while still using shorts in the array.) This also |
10048 means that the size of the known region can be at most 64K for | 10101 means that the size of the known region can be at most 64K for |
10049 width-three characters. | 10102 width-three characters. |
10050 @end quotation | 10103 @end quotation |
10070 @item | 10123 @item |
10071 the position of the gap | 10124 the position of the gap |
10072 @item | 10125 @item |
10073 the last value we computed | 10126 the last value we computed |
10074 @item | 10127 @item |
10075 a set of positions that are "far away" from previously computed positions | 10128 a set of positions that are ``far away'' from previously computed positions |
10076 (5000 chars currently; #### perhaps should be smaller) | 10129 (5000 chars currently; #### perhaps should be smaller) |
10077 @end itemize | 10130 @end itemize |
10078 | 10131 |
10079 For each position, we @code{CONSIDER()} it. This means: | 10132 For each position, we @code{CONSIDER()} it. This means: |
10080 | 10133 |
10096 the simple loop in FSF with the use of @code{bytecount_to_charcount()}, | 10149 the simple loop in FSF with the use of @code{bytecount_to_charcount()}, |
10097 @code{charcount_to_bytecount()}, @code{bytecount_to_charcount_down()}, or | 10150 @code{charcount_to_bytecount()}, @code{bytecount_to_charcount_down()}, or |
10098 @code{charcount_to_bytecount_down()}. (The latter two I added for this purpose.) | 10151 @code{charcount_to_bytecount_down()}. (The latter two I added for this purpose.) |
10099 These scan 4 or 8 bytes at a time through purely single-byte characters. | 10152 These scan 4 or 8 bytes at a time through purely single-byte characters. |
10100 | 10153 |
10101 If the amount we had to scan was more than our "far away" distance (5000 | 10154 If the amount we had to scan was more than our ``far away'' distance (5000 |
10102 characters, see above), then cache the new position. | 10155 characters, see above), then cache the new position. |
10103 | 10156 |
10104 #### Things to do: | 10157 #### Things to do: |
10105 | 10158 |
10106 @itemize @bullet | 10159 @itemize @bullet |
10107 @item | 10160 @item |
10108 Look at the most recent GNU Emacs to see whether anything has changed. | 10161 Look at the most recent GNU Emacs to see whether anything has changed. |
10109 @item | 10162 @item |
10110 Think about whether it makes sense to try to implement some sort of | 10163 Think about whether it makes sense to try to implement some sort of |
10111 known region or list of "known regions", like we had before. This would | 10164 known region or list of ``known regions'', like we had before. This would |
10112 be a region of entirely single-byte characters that we can check very | 10165 be a region of entirely single-byte characters that we can check very |
10113 quickly. (Previously I used a range of same-width characters of any | 10166 quickly. (Previously I used a range of same-width characters of any |
10114 size; but this adds extra complexity and slows down the scanning, and is | 10167 size; but this adds extra complexity and slows down the scanning, and is |
10115 probably not worth it.) As part of the scanning process in | 10168 probably not worth it.) As part of the scanning process in |
10116 @code{bytecount_to_charcount()} et al, we skip over chunks of entirely | 10169 @code{bytecount_to_charcount()} et al, we skip over chunks of entirely |
10324 In terms of reading the actual code, there are five optimizations | 10377 In terms of reading the actual code, there are five optimizations |
10325 (obfuscations, if you like) that have been done. | 10378 (obfuscations, if you like) that have been done. |
10326 | 10379 |
10327 @enumerate | 10380 @enumerate |
10328 @item | 10381 @item |
10329 An explicit "failure stack" has been substituted for recursion. | 10382 An explicit ``failure stack'' has been substituted for recursion. |
10330 | 10383 |
10331 @item | 10384 @item |
10332 The @code{match_1_operator}, @code{next_p}, and @code{next_b} functions | 10385 The @code{match_1_operator}, @code{next_p}, and @code{next_b} functions |
10333 are actually inlined into the @code{match} function for efficiency. | 10386 are actually inlined into the @code{match} function for efficiency. |
10334 Then the pointer movement is interspersed with the matching operations. | 10387 Then the pointer movement is interspersed with the matching operations. |
10337 If the operator uses buffer context, the buffer pointer movement is | 10390 If the operator uses buffer context, the buffer pointer movement is |
10338 sometimes implicit in the operations retrieving the context. | 10391 sometimes implicit in the operations retrieving the context. |
10339 | 10392 |
10340 @item | 10393 @item |
10341 Some cases are combined into short preparation for individual cases, and | 10394 Some cases are combined into short preparation for individual cases, and |
10342 a "fall-through" into combined code for several cases. | 10395 a ``fall-through'' into combined code for several cases. |
10343 | 10396 |
10344 @item | 10397 @item |
10345 The @code{pattern} type is not an explicit @samp{struct}. Instead, the | 10398 The @code{pattern} type is not an explicit @samp{struct}. Instead, the |
10346 data (including, @emph{e.g.}, @samp{range_table}) is inlined into the | 10399 data (including, @emph{e.g.}, @samp{range_table}) is inlined into the |
10347 compiled bytecode. This leads to bizarre code in the interpreter like | 10400 compiled bytecode. This leads to bizarre code in the interpreter like |
10356 @example | 10409 @example |
10357 ..., 'range', count, first_8_flags, second_8_flags, ..., next_op, ... | 10410 ..., 'range', count, first_8_flags, second_8_flags, ..., next_op, ... |
10358 @end example | 10411 @end example |
10359 @end enumerate | 10412 @end enumerate |
10360 | 10413 |
10361 But if you keep your eye on the "switch in a loop" structure, you | 10414 But if you keep your eye on the ``switch in a loop'' structure, you |
10362 should be able to understand the parts you need. | 10415 should be able to understand the parts you need. |
10363 | 10416 |
10364 @node Multilingual Support, Consoles; Devices; Frames; Windows, Text, Top | 10417 @node Multilingual Support, Consoles; Devices; Frames; Windows, Text, Top |
10365 @chapter Multilingual Support | 10418 @chapter Multilingual Support |
10366 @cindex Mule character sets and encodings | 10419 @cindex Mule character sets and encodings |
10818 a simple charset like ASCII, there is only one encoding normally used -- | 10871 a simple charset like ASCII, there is only one encoding normally used -- |
10819 each character is represented by a single byte, with the same value as | 10872 each character is represented by a single byte, with the same value as |
10820 its code point. For more complicated charsets, however, things are not | 10873 its code point. For more complicated charsets, however, things are not |
10821 so obvious. Unicode version 2, for example, is a large charset with | 10874 so obvious. Unicode version 2, for example, is a large charset with |
10822 thousands of characters, each indexed by a 16-bit number, often | 10875 thousands of characters, each indexed by a 16-bit number, often |
10823 represented in hex, e.g. 0x05D0 for the Hebrew letter "aleph". One | 10876 represented in hex, e.g. 0x05D0 for the Hebrew letter ``aleph''. One |
10824 obvious encoding uses two bytes per character (actually two encodings, | 10877 obvious encoding uses two bytes per character (actually two encodings, |
10825 depending on which of the two possible byte orderings is chosen). This | 10878 depending on which of the two possible byte orderings is chosen). This |
10826 encoding is convenient for internal processing of Unicode text; however, | 10879 encoding is convenient for internal processing of Unicode text; however, |
10827 it's incompatible with ASCII, so a different encoding, e.g. UTF-8, is | 10880 it's incompatible with ASCII, so a different encoding, e.g. UTF-8, is |
10828 usually used for external text, for example files or e-mail. UTF-8 | 10881 usually used for external text, for example files or e-mail. UTF-8 |
10839 | 10892 |
10840 In an ASCII or single-European-character-set world, life is very simple. | 10893 In an ASCII or single-European-character-set world, life is very simple. |
10841 There are 256 characters, and each character is represented using the | 10894 There are 256 characters, and each character is represented using the |
10842 numbers 0 through 255, which fit into a single byte. With a few | 10895 numbers 0 through 255, which fit into a single byte. With a few |
10843 exceptions (such as case-changing operations or syntax classes like | 10896 exceptions (such as case-changing operations or syntax classes like |
10844 'whitespace'), "text" is simply an array of indices into a font. You | 10897 @code{whitespace}), ``text'' is simply an array of indices into a font. You |
10845 can get different languages simply by choosing fonts with different | 10898 can get different languages simply by choosing fonts with different |
10846 8-bit character sets (ISO-8859-1, -2, special-symbol fonts, etc.), and | 10899 8-bit character sets (ISO-8859-1, -2, special-symbol fonts, etc.), and |
10847 everything will "just work" as long as anyone else receiving your text | 10900 everything will ``just work'' as long as anyone else receiving your text |
10848 uses a compatible font. | 10901 uses a compatible font. |
10849 | 10902 |
10850 In the multi-lingual world, however, it is much more complicated. There | 10903 In the multi-lingual world, however, it is much more complicated. There |
10851 are a great number of different characters which are organized in a | 10904 are a great number of different characters which are organized in a |
10852 complex fashion into various character sets. The representation to use | 10905 complex fashion into various character sets. The representation to use |
10892 text as possible. No operations should ever be performed on text encoded | 10945 text as possible. No operations should ever be performed on text encoded |
10893 in an external representation other than simple copying, because no | 10946 in an external representation other than simple copying, because no |
10894 assumptions can reliably be made about the format of this text. You | 10947 assumptions can reliably be made about the format of this text. You |
10895 cannot assume, for example, that the end of text is terminated by a null | 10948 cannot assume, for example, that the end of text is terminated by a null |
10896 byte. (For example, if the text is Unicode, it will have many null bytes | 10949 byte. (For example, if the text is Unicode, it will have many null bytes |
10897 in it.) You cannot find the next "slash" character by searching through | 10950 in it.) You cannot find the next ``slash'' character by searching through |
10898 the bytes until you find a byte that looks like a "slash" character, | 10951 the bytes until you find a byte that looks like a ``slash'' character, |
10899 because it might actually be the second byte of a Kanji character. | 10952 because it might actually be the second byte of a Kanji character. |
10900 Furthermore, all text in the internal representation must be converted, | 10953 Furthermore, all text in the internal representation must be converted, |
10901 even if it is known to be completely ASCII, because the external | 10954 even if it is known to be completely ASCII, because the external |
10902 representation may not be ASCII compatible (for example, if it is | 10955 representation may not be ASCII compatible (for example, if it is |
10903 Unicode). | 10956 Unicode). |
10923 the structures of a particular external encoding and the methods required | 10976 the structures of a particular external encoding and the methods required |
10924 to convert to and from this encoding. A facility exists to create coding | 10977 to convert to and from this encoding. A facility exists to create coding |
10925 system aliases, which in essence gives a single coding system two | 10978 system aliases, which in essence gives a single coding system two |
10926 different names. It is effectively used in XEmacs to provide a layer of | 10979 different names. It is effectively used in XEmacs to provide a layer of |
10927 abstraction on top of the actual coding systems. For example, the coding | 10980 abstraction on top of the actual coding systems. For example, the coding |
10928 system alias "file-name" points to whichever coding system is currently | 10981 system alias ``file-name'' points to whichever coding system is currently |
10929 used for encoding and decoding file names as passed to or retrieved from | 10982 used for encoding and decoding file names as passed to or retrieved from |
10930 system calls. In general, the actual encoding will differ from system to | 10983 system calls. In general, the actual encoding will differ from system to |
10931 system, and also on the particular locale that the user is in. The use | 10984 system, and also on the particular locale that the user is in. The use |
10932 of the file-name alias effectively hides that implementation detail on | 10985 of the file-name alias effectively hides that implementation detail on |
10933 top of that abstract interface layer which provides a unified set of | 10986 top of that abstract interface layer which provides a unified set of |
11434 C = plain char, when the base type is unsigned | 11487 C = plain char, when the base type is unsigned |
11435 U = unsigned | 11488 U = unsigned |
11436 S = signed | 11489 S = signed |
11437 @end example | 11490 @end example |
11438 | 11491 |
11439 (Formerly I had a comment saying that type (e) "should be replaced with | 11492 (Formerly I had a comment saying that type (e) ``should be replaced with |
11440 void *". However, there are in fact many places where an unsigned char | 11493 void *''. However, there are in fact many places where an unsigned char |
11441 * might be used -- e.g. for ease in pointer computation, since void * | 11494 * might be used -- e.g. for ease in pointer computation, since void * |
11442 doesn't allow this, and for compatibility with external APIs.) | 11495 doesn't allow this, and for compatibility with external APIs.) |
11443 | 11496 |
11444 Note that these typedefs are purely for documentation purposes; from | 11497 Note that these typedefs are purely for documentation purposes; from |
11445 the C code's perspective, they are exactly equivalent to @code{char *}, | 11498 the C code's perspective, they are exactly equivalent to @code{char *}, |
11456 @node Different Ways of Seeing Internal Text, Buffer Positions, Byte Types, Byte/Character Types; Buffer Positions; Other Typedefs | 11509 @node Different Ways of Seeing Internal Text, Buffer Positions, Byte Types, Byte/Character Types; Buffer Positions; Other Typedefs |
11457 @subsection Different Ways of Seeing Internal Text | 11510 @subsection Different Ways of Seeing Internal Text |
11458 @cindex different ways of seeing internal text | 11511 @cindex different ways of seeing internal text |
11459 | 11512 |
11460 There are various ways of representing internal text. The two primary | 11513 There are various ways of representing internal text. The two primary |
11461 ways are as an "array" of individual characters; the other is as a | 11514 ways are as an ``array'' of individual characters; the other is as a |
11462 "stream" of bytes. In the ASCII world, where there are only 255 | 11515 ``stream'' of bytes. In the ASCII world, where there are only 255 |
11463 characters at most, things are easy because each character fits into a | 11516 characters at most, things are easy because each character fits into a |
11464 byte. In general, however, this is not true -- see the above discussion | 11517 byte. In general, however, this is not true -- see the above discussion |
11465 of characters vs. encodings. | 11518 of characters vs. encodings. |
11466 | 11519 |
11467 In some cases, it's also important to distinguish between a stream | 11520 In some cases, it's also important to distinguish between a stream |
11468 representation as a series of bytes and as a series of textual units. | 11521 representation as a series of bytes and as a series of textual units. |
11469 This is particularly important wrt Unicode. The UTF-16 representation | 11522 This is particularly important wrt Unicode. The UTF-16 representation |
11470 (sometimes referred to, rather sloppily, as simply the "Unicode" format) | 11523 (sometimes referred to, rather sloppily, as simply the ``Unicode'' format) |
11471 represents text as a series of 16-bit units. Mostly, each unit | 11524 represents text as a series of 16-bit units. Mostly, each unit |
11472 corresponds to a single character, but not necessarily, as characters | 11525 corresponds to a single character, but not necessarily, as characters |
11473 outside of the range 0-65535 (the BMP or "Basic Multilingual Plane" of | 11526 outside of the range 0-65535 (the BMP or ``Basic Multilingual Plane'' of |
11474 Unicode) require two 16-bit units, through the mechanism of | 11527 Unicode) require two 16-bit units, through the mechanism of |
11475 "surrogates". When a series of 16-bit units is serialized into a byte | 11528 ``surrogates''. When a series of 16-bit units is serialized into a byte |
11476 stream, there are at least two possible representations, little-endian | 11529 stream, there are at least two possible representations, little-endian |
11477 and big-endian, and which one is used may depend on the native format of | 11530 and big-endian, and which one is used may depend on the native format of |
11478 16-bit integers in the CPU of the machine that XEmacs is running | 11531 16-bit integers in the CPU of the machine that XEmacs is running |
11479 on. (Similarly, UTF-32 is logically a representation with 32-bit textual | 11532 on. (Similarly, UTF-32 is logically a representation with 32-bit textual |
11480 units.) | 11533 units.) |
11487 @item | 11540 @item |
11488 UTF-16 has 2-byte (16-bit) units. | 11541 UTF-16 has 2-byte (16-bit) units. |
11489 @item | 11542 @item |
11490 UTF-32 has 4-byte (32-bit) units. | 11543 UTF-32 has 4-byte (32-bit) units. |
11491 @item | 11544 @item |
11492 XEmacs-internal encoding (the old "Mule" encoding) has 1-byte (8-bit) | 11545 XEmacs-internal encoding (the old ``Mule'' encoding) has 1-byte (8-bit) |
11493 units. | 11546 units. |
11494 @item | 11547 @item |
11495 UTF-7 technically has 7-bit units that are within the "mail-safe" range | 11548 UTF-7 technically has 7-bit units that are within the ``mail-safe'' range |
11496 (ASCII 32 - 126 plus a few control characters), but normally is encoded | 11549 (ASCII 32 - 126 plus a few control characters), but normally is encoded |
11497 in an 8-bit stream. (UTF-7 is also a modal encoding, since it has a | 11550 in an 8-bit stream. (UTF-7 is also a modal encoding, since it has a |
11498 normal mode where printable ASCII characters represent themselves and a | 11551 normal mode where printable ASCII characters represent themselves and a |
11499 shifted mode, introduced with a plus sign, where a base-64 encoding is | 11552 shifted mode, introduced with a plus sign, where a base-64 encoding is |
11500 used.) | 11553 used.) |
11555 @table @code | 11608 @table @code |
11556 @item Ibyte | 11609 @item Ibyte |
11557 The data in a buffer or string is logically made up of Ibyte objects, | 11610 The data in a buffer or string is logically made up of Ibyte objects, |
11558 where a Ibyte takes up the same amount of space as a char. (It is | 11611 where a Ibyte takes up the same amount of space as a char. (It is |
11559 declared differently, though, to catch invalid usages.) Strings stored | 11612 declared differently, though, to catch invalid usages.) Strings stored |
11560 using Ibytes are said to be in "internal format". The important | 11613 using Ibytes are said to be in ``internal format''. The important |
11561 characteristics of internal format are | 11614 characteristics of internal format are |
11562 | 11615 |
11563 @itemize @minus | 11616 @itemize @minus |
11564 @item | 11617 @item |
11565 ASCII characters are represented as a single Ibyte, in the range 0 - | 11618 ASCII characters are represented as a single Ibyte, in the range 0 - |
11608 | 11661 |
11609 This means that Ichar values are upwardly compatible with the standard | 11662 This means that Ichar values are upwardly compatible with the standard |
11610 8-bit representation of ASCII/ISO-8859-1. | 11663 8-bit representation of ASCII/ISO-8859-1. |
11611 | 11664 |
11612 @item Extbyte | 11665 @item Extbyte |
11613 Strings that go in or out of Emacs are in "external format", typedef'ed | 11666 Strings that go in or out of Emacs are in ``external format'', typedef'ed |
11614 as an array of char or a char *. There is more than one external format | 11667 as an array of char or a char *. There is more than one external format |
11615 (JIS, EUC, etc.) but they all have similar properties. They are modal | 11668 (JIS, EUC, etc.) but they all have similar properties. They are modal |
11616 encodings, which is to say that the meaning of particular bytes is not | 11669 encodings, which is to say that the meaning of particular bytes is not |
11617 fixed but depends on what "mode" the string is currently in (e.g. bytes | 11670 fixed but depends on what ``mode'' the string is currently in (e.g. bytes |
11618 in the range 0 - 0x7f might be interpreted as ASCII, or as Hiragana, or | 11671 in the range 0 - 0x7f might be interpreted as ASCII, or as Hiragana, or |
11619 as 2-byte Kanji, depending on the current mode). The mode starts out in | 11672 as 2-byte Kanji, depending on the current mode). The mode starts out in |
11620 ASCII/ISO-8859-1 and is switched using escape sequences -- for example, | 11673 ASCII/ISO-8859-1 and is switched using escape sequences -- for example, |
11621 in the JIS encoding, 'ESC $ B' switches to a mode where pairs of bytes | 11674 in the JIS encoding, 'ESC $ B' switches to a mode where pairs of bytes |
11622 in the range 0 - 0x7f are interpreted as Kanji characters. | 11675 in the range 0 - 0x7f are interpreted as Kanji characters. |
11642 | 11695 |
11643 There are three possible ways to specify positions in a buffer. All | 11696 There are three possible ways to specify positions in a buffer. All |
11644 of these are one-based: the beginning of the buffer is position or | 11697 of these are one-based: the beginning of the buffer is position or |
11645 index 1, and 0 is not a valid position. | 11698 index 1, and 0 is not a valid position. |
11646 | 11699 |
11647 As a "buffer position" (typedef Charbpos): | 11700 As a ``buffer position'' (typedef Charbpos): |
11648 | 11701 |
11649 This is an index specifying an offset in characters from the | 11702 This is an index specifying an offset in characters from the |
11650 beginning of the buffer. Note that buffer positions are | 11703 beginning of the buffer. Note that buffer positions are |
11651 logically @strong{between} characters, not on a character. The | 11704 logically @strong{between} characters, not on a character. The |
11652 difference between two buffer positions specifies the number of | 11705 difference between two buffer positions specifies the number of |
11653 characters between those positions. Buffer positions are the | 11706 characters between those positions. Buffer positions are the |
11654 only kind of position externally visible to the user. | 11707 only kind of position externally visible to the user. |
11655 | 11708 |
11656 As a "byte index" (typedef Bytebpos): | 11709 As a ``byte index'' (typedef Bytebpos): |
11657 | 11710 |
11658 This is an index over the bytes used to represent the characters | 11711 This is an index over the bytes used to represent the characters |
11659 in the buffer. If there is no Mule support, this is identical | 11712 in the buffer. If there is no Mule support, this is identical |
11660 to a buffer position, because each character is represented | 11713 to a buffer position, because each character is represented |
11661 using one byte. However, with Mule support, many characters | 11714 using one byte. However, with Mule support, many characters |
11662 require two or more bytes for their representation, and so a | 11715 require two or more bytes for their representation, and so a |
11663 byte index may be greater than the corresponding buffer | 11716 byte index may be greater than the corresponding buffer |
11664 position. | 11717 position. |
11665 | 11718 |
11666 As a "memory index" (typedef Membpos): | 11719 As a ``memory index'' (typedef Membpos): |
11667 | 11720 |
11668 This is the byte index adjusted for the gap. For positions | 11721 This is the byte index adjusted for the gap. For positions |
11669 before the gap, this is identical to the byte index. For | 11722 before the gap, this is identical to the byte index. For |
11670 positions after the gap, this is the byte index plus the gap | 11723 positions after the gap, this is the byte index plus the gap |
11671 size. There are two possible memory indices for the gap | 11724 size. There are two possible memory indices for the gap |
11672 position; the memory index at the beginning of the gap should | 11725 position; the memory index at the beginning of the gap should |
11673 always be used, except in code that deals with manipulating the | 11726 always be used, except in code that deals with manipulating the |
11674 gap, where both indices may be seen. The address of the | 11727 gap, where both indices may be seen. The address of the |
11675 character "at" (i.e. following) a particular position can be | 11728 character ``at'' (i.e. following) a particular position can be |
11676 obtained from the formula | 11729 obtained from the formula |
11677 | 11730 |
11678 buffer_start_address + memory_index(position) - 1 | 11731 buffer_start_address + memory_index(position) - 1 |
11679 | 11732 |
11680 except in the case of characters at the gap position. | 11733 except in the case of characters at the gap position. |
11779 use the buffer-level functions in buffer.h, which automatically know the | 11832 use the buffer-level functions in buffer.h, which automatically know the |
11780 correct format and handle the gap. | 11833 correct format and handle the gap. |
11781 | 11834 |
11782 Some terminology: | 11835 Some terminology: |
11783 | 11836 |
11784 "itext" appearing in the macros means "internal-format text" -- type | 11837 itext" appearing in the macros means "internal-format text" -- type |
11785 @code{Ibyte *}. Operations on such pointers themselves, rather than on the | 11838 @code{Ibyte *}. Operations on such pointers themselves, rather than on the |
11786 text being pointed to, have "itext" instead of "itext" in the macro | 11839 text being pointed to, have "itext" instead of "itext" in the macro |
11787 name. "ichar" in the macro names means an Ichar -- the representation | 11840 name. "ichar" in the macro names means an Ichar -- the representation |
11788 of a character as a single integer rather than a series of bytes, as part | 11841 of a character as a single integer rather than a series of bytes, as part |
11789 of "itext". Many of the macros below are for converting between the | 11842 of "itext". Many of the macros below are for converting between the |
11988 @item | 12041 @item |
11989 (c) using the GCC extension (@{ ... @}). | 12042 (c) using the GCC extension (@{ ... @}). |
11990 @end itemize | 12043 @end itemize |
11991 | 12044 |
11992 Turned out that all of the above had bugs, all caused by GCC (hence the | 12045 Turned out that all of the above had bugs, all caused by GCC (hence the |
11993 comments about "those GCC wankers" and "ream gcc up the ass"). As for | 12046 comments about ``those GCC wankers'' and ``ream gcc up the ass''). As for |
11994 (a), some versions of GCC (especially on Intel platforms), which had | 12047 (a), some versions of GCC (especially on Intel platforms), which had |
11995 buggy implementations of @code{alloca()} that couldn't handle being called | 12048 buggy implementations of @code{alloca()} that couldn't handle being called |
11996 inside of a function call -- they just decremented the stack right in the | 12049 inside of a function call -- they just decremented the stack right in the |
11997 middle of pushing args. Oops, crash with stack trashing, very bad. (b) | 12050 middle of pushing args. Oops, crash with stack trashing, very bad. (b) |
11998 was an attempt to fix (a), and that led to further GCC crashes, esp. when | 12051 was an attempt to fix (a), and that led to further GCC crashes, esp. when |
12971 consistency. For example, the new Mule workspace contains Ibyte | 13024 consistency. For example, the new Mule workspace contains Ibyte |
12972 versions of the stdlib string functions. | 13025 versions of the stdlib string functions. |
12973 @item Extbyte, UExtbyte | 13026 @item Extbyte, UExtbyte |
12974 Pointer to text in some external format, which can be defined as all | 13027 Pointer to text in some external format, which can be defined as all |
12975 formats other than the internal one. The data representing a string | 13028 formats other than the internal one. The data representing a string |
12976 in "external" format (binary or any external encoding) is logically a | 13029 in ``external'' format (binary or any external encoding) is logically a |
12977 set of Extbytes. Extbyte is guaranteed to be just a char, so for | 13030 set of Extbytes. Extbyte is guaranteed to be just a char, so for |
12978 example strlen (Extbyte *) is OK. Extbyte is only a documentation | 13031 example strlen (Extbyte *) is OK. Extbyte is only a documentation |
12979 device for referring to external text. | 13032 device for referring to external text. |
12980 @item Ascbyte, UAscbyte | 13033 @item Ascbyte, UAscbyte |
12981 pure ASCII text, consisting of bytesf in a string in entirely US-ASCII | 13034 pure ASCII text, consisting of bytesf in a string in entirely US-ASCII |
13115 | 13168 |
13116 @node Mule-izing Code, , An Example of Mule-Aware Code, Coding for Mule | 13169 @node Mule-izing Code, , An Example of Mule-Aware Code, Coding for Mule |
13117 @subsection Mule-izing Code | 13170 @subsection Mule-izing Code |
13118 | 13171 |
13119 A lot of code is written without Mule in mind, and needs to be made | 13172 A lot of code is written without Mule in mind, and needs to be made |
13120 Mule-correct or "Mule-ized". There is really no substitute for | 13173 Mule-correct or ``Mule-ized''. There is really no substitute for |
13121 line-by-line analysis when doing this, but the following checklist can | 13174 line-by-line analysis when doing this, but the following checklist can |
13122 help: | 13175 help: |
13123 | 13176 |
13124 @itemize @bullet | 13177 @itemize @bullet |
13125 @item | 13178 @item |
13333 @item | 13386 @item |
13334 Look in the CRT sources! They come with VC++. See win32.c. | 13387 Look in the CRT sources! They come with VC++. See win32.c. |
13335 @end enumerate | 13388 @end enumerate |
13336 | 13389 |
13337 @node Locales, More about code pages, Microsoft Documentation, Microsoft Windows-Related Multilingual Issues | 13390 @node Locales, More about code pages, Microsoft Documentation, Microsoft Windows-Related Multilingual Issues |
13338 @subsection Locales, code pages, and other concepts of "language" | 13391 @subsection Locales, code pages, and other concepts of ``language'' |
13339 @cindex locales, code pages, and other concepts of "language" | 13392 @cindex locales, code pages, and other concepts of ``language'' |
13340 | 13393 |
13341 First, make sure you clearly understand the difference between the C | 13394 First, make sure you clearly understand the difference between the C |
13342 runtime library (CRT) and the Win32 API! See win32.c. | 13395 runtime library (CRT) and the Win32 API! See win32.c. |
13343 | 13396 |
13344 There are various different ways of representing the vague concept | 13397 There are various different ways of representing the vague concept |
13345 of "language", and it can be very confusing. So: | 13398 of ``language'', and it can be very confusing. So: |
13346 | 13399 |
13347 @itemize @bullet | 13400 @itemize @bullet |
13348 @item | 13401 @item |
13349 The CRT library has the concept of "locale", which is a | 13402 The CRT library has the concept of ``locale'', which is a |
13350 combination of language and country, and which controls the way | 13403 combination of language and country, and which controls the way |
13351 currency and dates are displayed, the encoding of data, etc. | 13404 currency and dates are displayed, the encoding of data, etc. |
13352 | 13405 |
13353 @item | 13406 @item |
13354 XEmacs has the concept of "language environment", more or less | 13407 XEmacs has the concept of ``language environment'', more or less |
13355 like a locale; although currently in most cases it just refers to | 13408 like a locale; although currently in most cases it just refers to |
13356 the language, and no sub-language distinctions are | 13409 the language, and no sub-language distinctions are |
13357 made. (Exceptions are with Chinese, which has different language | 13410 made. (Exceptions are with Chinese, which has different language |
13358 environments for Taiwan and mainland China, due to the different | 13411 environments for Taiwan and mainland China, due to the different |
13359 encodings and writing systems.) | 13412 encodings and writing systems.) |
13361 @item | 13414 @item |
13362 Windows has a number of different language concepts: | 13415 Windows has a number of different language concepts: |
13363 | 13416 |
13364 @enumerate | 13417 @enumerate |
13365 @item | 13418 @item |
13366 There are "languages" and "sublanguages", which correspond to | 13419 There are ``languages'' and ``sublanguages'', which correspond to |
13367 the languages and countries of the C library -- e.g. LANG_ENGLISH | 13420 the languages and countries of the C library -- e.g. LANG_ENGLISH |
13368 and SUBLANG_ENGLISH_US. These are identified by 8-bit integers, | 13421 and SUBLANG_ENGLISH_US. These are identified by 8-bit integers, |
13369 called the "primary language identifier" and "sublanguage | 13422 called the ``primary language identifier'' and ``sublanguage |
13370 identifier", respectively. These are combined into a 16-bit | 13423 identifier'', respectively. These are combined into a 16-bit |
13371 integer or "language identifier" by MAKELANGID(). | 13424 integer or ``language identifier'' by @code{MAKELANGID()}. |
13372 | 13425 |
13373 @item | 13426 @item |
13374 The language identifier in turn is combined with a "sort | 13427 The language identifier in turn is combined with a ``sort |
13375 identifier" (and optionally a "sort version") to yield a 32-bit | 13428 identifier'' (and optionally a ``sort version'') to yield a 32-bit |
13376 integer called a "locale identifier" (type LCID), which identifies | 13429 integer called a ``locale identifier'' (type LCID), which identifies |
13377 locales -- the primary means of distinguishing language/regional | 13430 locales -- the primary means of distinguishing language/regional |
13378 settings and similar to C library locales. | 13431 settings and similar to C library locales. |
13379 | 13432 |
13380 @item | 13433 @item |
13381 A "code page" combines the XEmacs concepts of "charset" and "coding | 13434 A ``code page'' combines the XEmacs concepts of ``charset'' and ``coding |
13382 system". It logically encompasses | 13435 system''. It logically encompasses |
13383 | 13436 |
13384 @itemize @minus | 13437 @itemize @minus |
13385 @item | 13438 @item |
13386 a set of supported characters | 13439 a set of supported characters |
13387 @item | 13440 @item |
13390 supported | 13443 supported |
13391 @item | 13444 @item |
13392 a way of encoding a series of characters into a string of bytes | 13445 a way of encoding a series of characters into a string of bytes |
13393 @end itemize | 13446 @end itemize |
13394 | 13447 |
13395 Note that the first two properties correspond to an XEmacs "charset" | 13448 Note that the first two properties correspond to an XEmacs ``charset'' |
13396 and the latter an XEmacs "coding system". | 13449 and the latter an XEmacs ``coding system''. |
13397 | 13450 |
13398 Traditional encodings are either simple one-byte encodings, or | 13451 Traditional encodings are either simple one-byte encodings, or |
13399 combination one-byte/two-byte encodings (aka MBCS encodings, where MBCS | 13452 combination one-byte/two-byte encodings (aka MBCS encodings, where MBCS |
13400 stands for "Multibyte Character Set") with the following properties: | 13453 stands for ``Multibyte Character Set'') with the following properties: |
13401 | 13454 |
13402 @itemize @minus | 13455 @itemize @minus |
13403 @item | 13456 @item |
13404 all characters are encoded as a one-byte or two-byte sequence | 13457 all characters are encoded as a one-byte or two-byte sequence |
13405 @item | 13458 @item |
13406 the encoding is stateless (non-modal) | 13459 the encoding is stateless (non-modal) |
13407 @item | 13460 @item |
13408 the lower 128 bytes are compatible with ASCII | 13461 the lower 128 bytes are compatible with ASCII |
13409 @item | 13462 @item |
13410 in the higher bytes, the value of the first byte ("lead byte") | 13463 in the higher bytes, the value of the first byte (``lead byte'') |
13411 determines whether a second byte follows | 13464 determines whether a second byte follows |
13412 @item | 13465 @item |
13413 the values used for second bytes may overlap those used for first | 13466 the values used for second bytes may overlap those used for first |
13414 bytes, and (in some encodings) include values in the low half; thus, | 13467 bytes, and (in some encodings) include values in the low half; thus, |
13415 moving backwards is hard, and pure-ASCII algorithms (e.g. finding the | 13468 moving backwards is hard, and pure-ASCII algorithms (e.g. finding the |
13427 Every Windows locale has four associated code pages: ANSI (an | 13480 Every Windows locale has four associated code pages: ANSI (an |
13428 international standard or some Microsoft-created approximation; the | 13481 international standard or some Microsoft-created approximation; the |
13429 native code page under Windows), OEM (a DOS encoding, still used in the | 13482 native code page under Windows), OEM (a DOS encoding, still used in the |
13430 FAT file system), Mac (an encoding used on the Macintosh) and EBCDIC (a | 13483 FAT file system), Mac (an encoding used on the Macintosh) and EBCDIC (a |
13431 non-ASCII-compatible encoding used on IBM mainframes, originally based | 13484 non-ASCII-compatible encoding used on IBM mainframes, originally based |
13432 on the BCD or "binary-coded decimal" encoding of numbers). All code | 13485 on the BCD or ``binary-coded decimal'' encoding of numbers). All code |
13433 pages associated with a locale follow (as far as I know) the properties | 13486 pages associated with a locale follow (as far as I know) the properties |
13434 listed above for traditional code pages. More than one locale can share | 13487 listed above for traditional code pages. More than one locale can share |
13435 a code page -- e.g. all the Western European languages, including | 13488 a code page -- e.g. all the Western European languages, including |
13436 English, do. | 13489 English, do. |
13437 | 13490 |
13438 @item | 13491 @item |
13439 Windows also has an "input locale identifier" (aka "keyboard | 13492 Windows also has an ``input locale identifier'' (aka ``keyboard |
13440 layout id") or HKL, which is a 32-bit integer composed of the | 13493 layout id'') or HKL, which is a 32-bit integer composed of the |
13441 16-bit language identifier and a 16-bit "device identifier", which | 13494 16-bit language identifier and a 16-bit ``device identifier'', which |
13442 originally specified a particular keyboard layout (e.g. the locale | 13495 originally specified a particular keyboard layout (e.g. the locale |
13443 "US English" can have the QWERTY layout, the Dvorak layout, etc.), | 13496 ``US English'' can have the QWERTY layout, the Dvorak layout, etc.), |
13444 but has been expanded to include speech-to-text converters and | 13497 but has been expanded to include speech-to-text converters and |
13445 other non-keyboard ways of inputting text. Note that both the HKL | 13498 other non-keyboard ways of inputting text. Note that both the HKL |
13446 and LCID share the language identifier in the lower 16 bits, and in | 13499 and LCID share the language identifier in the lower 16 bits, and in |
13447 both cases a 0 in the upper 16 bits means "default" (sort order or | 13500 both cases a 0 in the upper 16 bits means ``default'' (sort order or |
13448 device), providing a way to convert between HKL's, LCID's, and | 13501 device), providing a way to convert between HKL's, LCID's, and |
13449 language identifiers (i.e. language/sublanguage pairs). The | 13502 language identifiers (i.e. language/sublanguage pairs). The |
13450 default keyboard layout for a language is (as far as I can | 13503 default keyboard layout for a language is (as far as I can |
13451 determine) established using the Regional Settings control panel | 13504 determine) established using the Regional Settings control panel |
13452 applet, where you can add input locales as combinations of language | 13505 applet, where you can add input locales as combinations of language |
13460 | 13513 |
13461 @node More about code pages, More about locales, Locales, Microsoft Windows-Related Multilingual Issues | 13514 @node More about code pages, More about locales, Locales, Microsoft Windows-Related Multilingual Issues |
13462 @subsection More about code pages | 13515 @subsection More about code pages |
13463 @cindex more about code pages | 13516 @cindex more about code pages |
13464 | 13517 |
13465 Here is what MSDN says about code pages (article "Code Pages"): | 13518 Here is what MSDN says about code pages (article ``Code Pages''): |
13466 | 13519 |
13467 @quotation | 13520 @quotation |
13468 A code page is a character set, which can include numbers, | 13521 A code page is a character set, which can include numbers, |
13469 punctuation marks, and other glyphs. Different languages and locales | 13522 punctuation marks, and other glyphs. Different languages and locales |
13470 may use different code pages. For example, ANSI code page 1252 is | 13523 may use different code pages. For example, ANSI code page 1252 is |
13502 | 13555 |
13503 -- The "C" locale is defined by ANSI to correspond to the locale in | 13556 -- The "C" locale is defined by ANSI to correspond to the locale in |
13504 which C programs have traditionally executed. The code page for the | 13557 which C programs have traditionally executed. The code page for the |
13505 "C" locale (code page) corresponds to the ASCII character | 13558 "C" locale (code page) corresponds to the ASCII character |
13506 set. For example, in the "C" locale, islower returns true for the | 13559 set. For example, in the "C" locale, islower returns true for the |
13507 values 0x61 ?0x7A only. In another locale, islower may return true | 13560 values 0x61 to 0x7A only. In another locale, islower may return true |
13508 for these as well as other values, as defined by that locale. | 13561 for these as well as other values, as defined by that locale. |
13509 | 13562 |
13510 Under "Locale-Dependent Routines" we notice the following setlocale | 13563 Under ``Locale-Dependent Routines'' we notice the following setlocale |
13511 dependencies: | 13564 dependencies: |
13512 | 13565 |
13513 atof, atoi, atol (LC_NUMERIC) | 13566 atof, atoi, atol (LC_NUMERIC) |
13514 is Routines (LC_CTYPE) | 13567 is Routines (LC_CTYPE) |
13515 isleadbyte (LC_CTYPE) | 13568 isleadbyte (LC_CTYPE) |
13538 wcstombs (LC_CTYPE) | 13591 wcstombs (LC_CTYPE) |
13539 wctomb (LC_CTYPE) | 13592 wctomb (LC_CTYPE) |
13540 _wtoi/_wtol (LC_NUMERIC) | 13593 _wtoi/_wtol (LC_NUMERIC) |
13541 @end quotation | 13594 @end quotation |
13542 | 13595 |
13543 NOTE: The above documentation doesn't clearly explain the "locale code | 13596 NOTE: The above documentation doesn't clearly explain the ``locale code |
13544 page" and "multibyte code page". These are two different values, | 13597 page'' and ``multibyte code page''. These are two different values, |
13545 maintained respectively in the CRT global variables __lc_codepage and | 13598 maintained respectively in the CRT global variables __lc_codepage and |
13546 __mbcodepage. Calling e.g. setlocale (LC_ALL, "JAPANESE") sets @strong{ONLY} | 13599 __mbcodepage. Calling e.g. setlocale (LC_ALL, "JAPANESE") sets @strong{ONLY} |
13547 __lc_codepage to 932 (the code page for Japanese), and leaves | 13600 __lc_codepage to 932 (the code page for Japanese), and leaves |
13548 __mbcodepage unchanged (usually 1252, i.e. Windows-ANSI). You'd have to | 13601 __mbcodepage unchanged (usually 1252, i.e. Windows-ANSI). You'd have to |
13549 call _setmbcp() to change __mbcodepage. Figuring out from the | 13602 call _setmbcp() to change __mbcodepage. Figuring out from the |
13550 documentation which routines use which code page is not so obvious. But: | 13603 documentation which routines use which code page is not so obvious. But: |
13551 | 13604 |
13552 @itemize @bullet | 13605 @itemize @bullet |
13553 @item | 13606 @item |
13554 from "Interpretation of Multibyte-Character Sequences" it appears that | 13607 from ``Interpretation of Multibyte-Character Sequences'' it appears that |
13555 all "multibyte-character routines" use the multibyte code page except for | 13608 all ``multibyte-character routines'' use the multibyte code page except for |
13556 mblen(), _mbstrlen(), mbstowcs(), mbtowc(), wcstombs(), and wctomb(). | 13609 @code{mblen()}, @code{_mbstrlen()}, @code{mbstowcs()}, @code{mbtowc()}, @code{wcstombs()}, and @code{wctomb()}. |
13557 | 13610 |
13558 @item | 13611 @item |
13559 from "_setmbcp": "The multibyte code page also affects | 13612 from ``_setmbcp'': ``The multibyte code page also affects |
13560 multibyte-character processing by the following run-time library | 13613 multibyte-character processing by the following run-time library |
13561 routines: _exec functions _mktemp _stat _fullpath _spawn functions | 13614 routines: _exec functions _mktemp _stat _fullpath _spawn functions |
13562 _tempnam _makepath _splitpath tmpnam. In addition, all run-time library | 13615 _tempnam _makepath _splitpath tmpnam. In addition, all run-time library |
13563 routines that receive multibyte-character argv or envp program arguments | 13616 routines that receive multibyte-character argv or envp program arguments |
13564 as parameters (such as the _exec and _spawn families) process these | 13617 as parameters (such as the _exec and _spawn families) process these |
13565 strings according to the multibyte code page. Hence these routines are | 13618 strings according to the multibyte code page. Hence these routines are |
13566 also affected by a call to _setmbcp that changes the multibyte code | 13619 also affected by a call to _setmbcp that changes the multibyte code |
13567 page." | 13620 page.'' |
13568 @end itemize | 13621 @end itemize |
13569 | 13622 |
13570 Summary: from looking at the CRT source (which comes with VC++) and | 13623 Summary: from looking at the CRT source (which comes with VC++) and |
13571 carefully looking through the docs, it appears that: | 13624 carefully looking through the docs, it appears that: |
13572 | 13625 |
13573 @itemize @bullet | 13626 @itemize @bullet |
13574 @item | 13627 @item |
13575 the "locale code page" is used by all of the routines listed above | 13628 the ``locale code page'' is used by all of the routines listed above |
13576 under "Locale-Dependent Routines" (EXCEPT _mbccpy() and _mbclen()), | 13629 under ``Locale-Dependent Routines'' (EXCEPT @code{_mbccpy()} and @code{_mbclen()}), |
13577 as well as any other place that converts between multibyte and Unicode | 13630 as well as any other place that converts between multibyte and Unicode |
13578 strings, e.g. the startup code. | 13631 strings, e.g. the startup code. |
13579 @item | 13632 @item |
13580 the "multibyte code page" is used in all of the *mb*() routines | 13633 the ``multibyte code page'' is used in all of the @code{mb*()} routines |
13581 except mblen(), _mbstrlen(), mbstowcs(), mbtowc(), wcstombs(), | 13634 except @code{mblen()}, @code{_mbstrlen()}, @code{mbstowcs()}, @code{mbtowc()}, @code{wcstombs()}, |
13582 and wctomb(); also _exec*(), _spawn*(), _mktemp(), _stat(), _fullpath(), | 13635 and @code{wctomb()}; also @code{_exec*()}, @code{_spawn*()}, @code{_mktemp()}, @code{_stat()}, @code{_fullpath()}, |
13583 _tempnam(), _makepath(), _splitpath(), tmpnam(), and similar functions | 13636 @code{_tempnam()}, @code{_makepath()}, @code{_splitpath()}, @code{tmpnam()}, and similar functions |
13584 without the leading underscore. | 13637 without the leading underscore. |
13585 @end itemize | 13638 @end itemize |
13586 | 13639 |
13587 @node More about locales, Unicode support under Windows, More about code pages, Microsoft Windows-Related Multilingual Issues | 13640 @node More about locales, Unicode support under Windows, More about code pages, Microsoft Windows-Related Multilingual Issues |
13588 @subsection More about locales | 13641 @subsection More about locales |
13591 In addition to the locale defined by the CRT, Windows (i.e. the Win32 API) | 13644 In addition to the locale defined by the CRT, Windows (i.e. the Win32 API) |
13592 defines various locales: | 13645 defines various locales: |
13593 | 13646 |
13594 @itemize @bullet | 13647 @itemize @bullet |
13595 @item | 13648 @item |
13596 The system-default locale is the locale defined under "Language | 13649 The system-default locale is the locale defined under ``Language |
13597 settings for the system" in the "Regional Options" control panel. This | 13650 settings for the system'' in the ``Regional Options'' control panel. This |
13598 is NOT user-specific, and changing it requires a reboot (at least under | 13651 is NOT user-specific, and changing it requires a reboot (at least under |
13599 Windows 2000). The ANSI code page of the system-default locale is | 13652 Windows 2000). The ANSI code page of the system-default locale is |
13600 returned by GetACP(), and you can specify this code page in calls | 13653 returned by @code{GetACP()}, and you can specify this code page in calls |
13601 e.g. to MultiByteToWideChar with the constant CP_ACP. | 13654 e.g. to MultiByteToWideChar with the constant CP_ACP. |
13602 | 13655 |
13603 @item | 13656 @item |
13604 The user-default locale is the locale defined under "Settings for the | 13657 The user-default locale is the locale defined under ``Settings for the |
13605 current user" in the "Regional Options" control panel. | 13658 current user'' in the ``Regional Options'' control panel. |
13606 | 13659 |
13607 @item | 13660 @item |
13608 There is a thread-local locale set by SetThreadLocale. #### What is this | 13661 There is a thread-local locale set by SetThreadLocale. #### What is this |
13609 used for? | 13662 used for? |
13610 @end itemize | 13663 @end itemize |
13611 | 13664 |
13612 The Win32 API has a bunch of multibyte functions -- all of those that | 13665 The Win32 API has a bunch of multibyte functions -- all of those that |
13613 end with ...A(), and on which we spend so much effort in | 13666 end with ...@code{A()}, and on which we spend so much effort in |
13614 intl-encap-win32.c. These appear to ALWAYS use the ANSI code page of | 13667 intl-encap-win32.c. These appear to ALWAYS use the ANSI code page of |
13615 the system-default locale (GetACP(), CP_ACP). Note that this applies | 13668 the system-default locale (@code{GetACP()}, CP_ACP). Note that this applies |
13616 also, for example, to the encoding of filenames in all file-handling | 13669 also, for example, to the encoding of filenames in all file-handling |
13617 routines, including the CRT ones such as open(), because they pass their | 13670 routines, including the CRT ones such as @code{open()}, because they pass their |
13618 args unchanged to the Win32 API. | 13671 args unchanged to the Win32 API. |
13619 | 13672 |
13620 @node Unicode support under Windows, The golden rules of writing Unicode-safe code, More about locales, Microsoft Windows-Related Multilingual Issues | 13673 @node Unicode support under Windows, The golden rules of writing Unicode-safe code, More about locales, Microsoft Windows-Related Multilingual Issues |
13621 @subsection Unicode support under Windows | 13674 @subsection Unicode support under Windows |
13622 @cindex unicode support under windows | 13675 @cindex unicode support under windows |
13630 table to convert the characters of that code page to and from Unicode, and | 13683 table to convert the characters of that code page to and from Unicode, and |
13631 the Win32 API itself probably (perhaps always) uses Unicode internally. | 13684 the Win32 API itself probably (perhaps always) uses Unicode internally. |
13632 | 13685 |
13633 Under Windows there are two different versions of all library routines that | 13686 Under Windows there are two different versions of all library routines that |
13634 accept or return text, those that handle Unicode text and those handling | 13687 accept or return text, those that handle Unicode text and those handling |
13635 "multibyte" text, i.e. variable-width ASCII-compatible text in some | 13688 ``multibyte'' text, i.e. variable-width ASCII-compatible text in some |
13636 national format such as EUC or Shift-JIS. Because Windows 95 basically | 13689 national format such as EUC or Shift-JIS. Because Windows 95 basically |
13637 doesn't support Unicode but Windows NT does, and Microsoft doesn't provide | 13690 doesn't support Unicode but Windows NT does, and Microsoft doesn't provide |
13638 any way of writing a single binary that will work on both systems and still | 13691 any way of writing a single binary that will work on both systems and still |
13639 use Unicode when it's available (although see below, Microsoft Layer for | 13692 use Unicode when it's available (although see below, Microsoft Layer for |
13640 Unicode), we need to provide a way of run-time conditionalizing so you | 13693 Unicode), we need to provide a way of run-time conditionalizing so you |
13641 could have one binary for both systems. "Unicode-splitting" refers to | 13694 could have one binary for both systems. ``Unicode-splitting'' refers to |
13642 writing code that will handle this properly. This means using | 13695 writing code that will handle this properly. This means using |
13643 Qmswindows_tstr as the external conversion format, calling the appropriate | 13696 Qmswindows_tstr as the external conversion format, calling the appropriate |
13644 qxe...() Unicode-split version of library functions, and doing other things | 13697 qxe...() Unicode-split version of library functions, and doing other things |
13645 in certain cases, e.g. when a qxe() function is not present. | 13698 in certain cases, e.g. when a @code{qxe()} function is not present. |
13646 | 13699 |
13647 Unicode support also requires that the various Windows APIs be | 13700 Unicode support also requires that the various Windows APIs be |
13648 "Unicode-encapsulated", so that they automatically call the ANSI or | 13701 ``Unicode-encapsulated'', so that they automatically call the ANSI or |
13649 Unicode version of the API call appropriately and handle the size | 13702 Unicode version of the API call appropriately and handle the size |
13650 differences in structures. What this means is: | 13703 differences in structures. What this means is: |
13651 | 13704 |
13652 @itemize @bullet | 13705 @itemize @bullet |
13653 @item | 13706 @item |
13654 first, note that Windows already provides a sort of encapsulation | 13707 first, note that Windows already provides a sort of encapsulation |
13655 of all APIs that deal with text. All such APIs are underlyingly | 13708 of all APIs that deal with text. All such APIs are underlyingly |
13656 provided in two versions, with an A or W suffix (ANSI or "wide" | 13709 provided in two versions, with an A or W suffix (ANSI or ``wide'' |
13657 i.e. Unicode), and the compile-time constant UNICODE controls which is | 13710 i.e. Unicode), and the compile-time constant UNICODE controls which is |
13658 selected by the unsuffixed API. Same thing happens with structures, and | 13711 selected by the unsuffixed API. Same thing happens with structures, and |
13659 also with types, where the generic types have names beginning with T -- | 13712 also with types, where the generic types have names beginning with T -- |
13660 TCHAR, LPTSTR, etc.. Unfortunately, this is compile-time only, not | 13713 TCHAR, LPTSTR, etc.. Unfortunately, this is compile-time only, not |
13661 run-time, so not sufficient. (Creating the necessary run-time encoding | 13714 run-time, so not sufficient. (Creating the necessary run-time encoding |
13670 such an API available internally.) | 13723 such an API available internally.) |
13671 | 13724 |
13672 @item | 13725 @item |
13673 what we do is provide an encapsulation of each standard Windows API call | 13726 what we do is provide an encapsulation of each standard Windows API call |
13674 that is split into A and W versions. current theory is to avoid all | 13727 that is split into A and W versions. current theory is to avoid all |
13675 preprocessor games; so we name the function with a prefix -- "qxe" | 13728 preprocessor games; so we name the function with a prefix -- ``qxe'' |
13676 currently -- and require callers to use the prefixed name. Callers need | 13729 currently -- and require callers to use the prefixed name. Callers need |
13677 to explicitly use the W version of all structures, and convert text | 13730 to explicitly use the W version of all structures, and convert text |
13678 themselves using Qmswindows_tstr. the qxe encapsulated version will | 13731 themselves using Qmswindows_tstr. the qxe encapsulated version will |
13679 automatically call the appropriate A or W version depending on whether | 13732 automatically call the appropriate A or W version depending on whether |
13680 we're running on 9x or NT (you can force use of the A calls on NT, | 13733 we're running on 9x or NT (you can force use of the A calls on NT, |
13730 purpose, to make the code easier to follow for someone who's not familiar | 13783 purpose, to make the code easier to follow for someone who's not familiar |
13731 with it. until our library is really complete and bug-free, we should | 13784 with it. until our library is really complete and bug-free, we should |
13732 think twice before doing this. | 13785 think twice before doing this. |
13733 | 13786 |
13734 According to Microsoft documentation, only the following functions are | 13787 According to Microsoft documentation, only the following functions are |
13735 provided under Windows 9x to support Unicode (see MSDN page "Windows | 13788 provided under Windows 9x to support Unicode (see MSDN page ``Windows |
13736 95/98/Me General Limitations"): | 13789 95/98/Me General Limitations''): |
13737 | 13790 |
13738 EnumResourceLanguagesW | 13791 EnumResourceLanguagesW |
13739 EnumResourceNamesW | 13792 EnumResourceNamesW |
13740 EnumResourceTypesW | 13793 EnumResourceTypesW |
13741 ExtTextOutW | 13794 ExtTextOutW |
13752 MessageBoxExW | 13805 MessageBoxExW |
13753 MultiByteToWideChar | 13806 MultiByteToWideChar |
13754 TextOutW | 13807 TextOutW |
13755 WideCharToMultiByte | 13808 WideCharToMultiByte |
13756 | 13809 |
13757 also maybe GetTextExtentExPoint? (KB Q125671 "Unicode Functions Supported | 13810 also maybe GetTextExtentExPoint? (KB Q125671 ``Unicode Functions Supported |
13758 by Windows 95") | 13811 by Windows 95'') |
13759 | 13812 |
13760 Q210341 says this in addition: | 13813 Q210341 says this in addition: |
13761 | 13814 |
13762 @quotation | 13815 @quotation |
13763 SUMMARY: | 13816 SUMMARY: |
13778 range beyond the 256 limitation of a one-byte representation. | 13831 range beyond the 256 limitation of a one-byte representation. |
13779 | 13832 |
13780 The Unicode standard offers application developers an opportunity to | 13833 The Unicode standard offers application developers an opportunity to |
13781 work with text without the limitations of character set based | 13834 work with text without the limitations of character set based |
13782 systems. For more information on the Unicode standard see the | 13835 systems. For more information on the Unicode standard see the |
13783 "References" section of this article. Windows NT is a fully Unicode | 13836 References" section of this article. Windows NT is a fully Unicode |
13784 capable operating system so it may be desirable to write software that | 13837 capable operating system so it may be desirable to write software that |
13785 supports Unicode on Windows 95. | 13838 supports Unicode on Windows 95. |
13786 | 13839 |
13787 Even though Windows 95 and Windows 98 are not Unicode based, they do | 13840 Even though Windows 95 and Windows 98 are not Unicode based, they do |
13788 provide some limited Unicode functionality. Drawing of Unicode text is | 13841 provide some limited Unicode functionality. Drawing of Unicode text is |
13861 @itemize @bullet | 13914 @itemize @bullet |
13862 @item | 13915 @item |
13863 wmain() is completely supported, and appropriate Unicode-formatted argv | 13916 wmain() is completely supported, and appropriate Unicode-formatted argv |
13864 and envp will always be passed. | 13917 and envp will always be passed. |
13865 @item | 13918 @item |
13866 Likewise, wWinMain() is completely supported. (NOTE: The docs are not at | 13919 Likewise, @code{wWinMain()} is completely supported. (NOTE: The docs are not at |
13867 all clear on how these various entry points interact, and implies that | 13920 all clear on how these various entry points interact, and implies that |
13868 a windows-subsystem program "must" use WinMain(), while a console- | 13921 a windows-subsystem program ``must'' use @code{WinMain()}, while a console- |
13869 subsystem program "must" use main(), and a program compiled with UNICODE | 13922 subsystem program ``must'' use @code{main()}, and a program compiled with UNICODE |
13870 (which we don't, see above) "must" use the w*() versions, while a program | 13923 (which we don't, see above) ``must'' use the @code{w*()} versions, while a program |
13871 not compiled this way "must" use the plain versions. In fact it appears | 13924 not compiled this way ``must'' use the plain versions. In fact it appears |
13872 that the CRT provides four different compiler entry points, namely | 13925 that the CRT provides four different compiler entry points, namely |
13873 w?(main|WinMain)CRTStartup, and we simply choose the one we like using | 13926 w?(main|WinMain)CRTStartup, and we simply choose the one we like using |
13874 the appropriate link flag. | 13927 the appropriate link flag. |
13875 @item | 13928 @item |
13876 _wenviron, _wputenv | 13929 _wenviron, _wputenv |
17888 | +--------------------------------------------------------------------+ | | 17941 | +--------------------------------------------------------------------+ | |
17889 | | menubar | | | 17942 | | menubar | | |
17890 | ###################################################################### | | 17943 | ###################################################################### | |
17891 | # toolbar # | | 17944 | # toolbar # | |
17892 | #--------------------------------------------------------------------# | | 17945 | #--------------------------------------------------------------------# | |
17893 | # | gutter | # | | 17946 | # | internal border | # | |
17894 | # |--------------------------------------------------------------| # | | 17947 | # | +----------------------------------------------------------+ | # | |
17895 | # | | internal border width | | # | | 17948 | # | | gutter | | # | |
17896 | # | | ******************************************************** | | # | | 17949 | # | |-********************************************************-| | # | |
17897 |w# | | * |s|v* |s* | | #w| | 17950 |w# | | *@| scrollbar |v* |s* | | #w| |
17898 |i# | | * |c|e* |c* | | #i| | 17951 |i# | | *-+-------------------------|e* |c* | | #i| |
17899 |n# | | * |r|r* |r* | | #n| | 17952 |n# | | *s| |r* |r* | | #n| |
17900 |d# | | * |o|t* |o* | | #d| | 17953 |d# | | *c| |t* |o* | | #d| |
17901 |o# | | * text area |l|.* text area |l* | | #o| | 17954 |o# | | *r| |.* text area |l* | | #o| |
17902 |w# | |i* |l| * |l*i| | #w| | 17955 |w# |i| *o| | * |l* |i| #w| |
17903 |-# | |n* |b|d* |b*n| | #-| | 17956 |-# |n| *l| text area |d* |b* |n| #-| |
17904 |m# | |t* |a|i* |a*t| | #m| | 17957 |m# |t| *l| |i* |a* |t| #m| |
17905 |a# | |.* |r|v* |r*.| | #a| | 17958 |a# |e| *b| |v* |r* |e| #a| |
17906 |n# t| | *-------------------------+-|i*----------------------+-* | |t #n| | 17959 |n# t|r| *a| |i*----------------------+-* |r|t #n| |
17907 |a# o|g|b* scrollbar | |d* scrollbar | *b|g|o #a| | 17960 |a# o|n|g*r| |d* scrollbar |@*g|n|o #a| |
17908 |g# o|u|o*-------------------------+-|e*----------------------+-*o|u|o #g| | 17961 |g# o|a|u*-+-------------------------|e*----------------------+-*u|a|o #g| |
17909 |e# l|t|r* modeline |r* modeline *r|t|l #e| | 17962 |e# l|l|t* modeline |r* modeline *t|l|l #e| |
17910 |r# b|t|d********************************************************d|t|b #r| | 17963 |r# b| |t********************************************************t| |b #r| |
17911 | # a|e|e* =..texttexttex....= |s|v* |s*e|e|a # | | 17964 | # a|b|e* =..texttexttex....= |s|v* |s*e|b|a # | |
17912 |d# r|r|r*o m=..texttexttextt..=o m|c|e* |c*r|r|r #d| | 17965 |d# r|o|r*o m=..texttexttextt..=o m|c|e* |c*r|o|r #d| |
17913 |e# | | *u a=.exttexttextte...=u a|r|r* |r* | | #e| | 17966 |e# |r| *u a=.exttexttextte...=u a|r|r* |r* |r| #e| |
17914 |c# | |w*t r=....texttexttex..=t r|o|t* |o*w| | #c| | 17967 |c# |d| *t r=....texttexttex..=t r|o|t* |o* |d| #c| |
17915 |o# | |i*s g= etc. =s g|l|.* text area |l*i| | #o| | 17968 |o# |e| *s g= etc. =s g|l|.* text area |l* |e| #o| |
17916 |r# | |d*i i= =i i|l| * |l*d| | #r| | 17969 |r# |r| *i i= =i i|l| * |l* |r| #r| |
17917 |a# | |t*d n= =d n|b|d* |b*t| | #a| | 17970 |a# | | *d n= =d n|b|d* |b* | | #a| |
17918 |t# | |h*e = inner text area =e |a|i* |a*h| | #t| | 17971 |t# | | *e = inner text area =e |a|i* |a* | | #t| |
17919 |i# | | * = = |r|v* |r* | | #i| | 17972 |i# | | * = = |r|v* |r* | | #i| |
17920 |o# | | *---===================---+-|i*----------------------+-* | | #o| | 17973 |o# | | *---===================---+-|i*----------------------+-* | | #o| |
17921 |n# | | * scrollbar | |d* scrollbar | * | | #n| | 17974 |n# | | * scrollbar |@|d* scrollbar |@* | | #n| |
17922 | # | | *-------------------------+-|e*----------------------+-* | | # | | 17975 | # | | *-------------------------+-|e*----------------------+-* | | # | |
17923 | # | | * modeline |r* modeline * | | # | | 17976 | # | | * modeline |r* modeline * | | # | |
17924 | # | | ******************************************************** | | # | | 17977 | # | |-********************************************************-| | # | |
17925 | # | | * minibuffer * | | # | | 17978 | # | | gutter | | # | |
17926 | # | | ******************************************************** | | # | | 17979 | # | |-********************************************************-| | # | |
17927 | # | | internal border width | | # | | 17980 | # | |@* minibuffer *@| | # | |
17928 | # |--------------------------------------------------------------| # | | 17981 | # | +-********************************************************-+ | # | |
17929 | # | gutter | # | | 17982 | # | internal border | # | |
17930 | #--------------------------------------------------------------------# | | 17983 | #--------------------------------------------------------------------# | |
17931 | # toolbar # | | 17984 | # toolbar # | |
17932 | ###################################################################### | | 17985 | ###################################################################### | |
17933 | window manager decoration | | 17986 | window manager decoration | |
17934 +------------------------------------------------------------------------+ | 17987 +------------------------------------------------------------------------+ |
17935 | 17988 |
17936 # = boundary of client area; * = window boundaries, boundary of paned area | 17989 # = boundary of client area; * = window boundaries, boundary of paned area |
17937 = = boundary of inner text area; . = inside margin area | 17990 = = boundary of inner text area; . = inside margin area; @ = dead boxes |
17938 @end example | 17991 @end example |
17939 | 17992 |
17940 Note in particular what happens at the corners, where a "corner box" | 17993 Note in particular what happens at the corners, where a ``corner box'' |
17941 occurs. Top and bottom toolbars take precedence over left and right | 17994 occurs. Top and bottom toolbars take precedence over left and right |
17942 toolbars, extending out horizontally into the corner boxes. Gutters | 17995 toolbars, extending out horizontally into the corner boxes. Gutters |
17943 work the same way. The corner box where the scrollbars meet, however, | 17996 work the same way. The corner box where the scrollbars meet, however, |
17944 is assigned to neither scrollbar, and is known as the "dead box"; it is | 17997 is assigned to neither scrollbar, and is known as the ``dead box''; it is |
17945 an area that must be cleared specially. | 17998 an area that must be cleared specially. There are similar dead boxes at |
17999 the bottom-right and bottom-left corners where the minibuffer and | |
18000 left/right gutters meet, but there is currently a bug in that these dead | |
18001 boxes are not explicitly cleared and may contain junk. | |
17946 | 18002 |
17947 @node The Frame, The Non-Client Area, Intro to Window and Frame Geometry, Window and Frame Geometry | 18003 @node The Frame, The Non-Client Area, Intro to Window and Frame Geometry, Window and Frame Geometry |
17948 @section The Frame | 18004 @section The Frame |
17949 | 18005 |
17950 The "top-level window area" is the entire area of a top-level window (or | 18006 The ``top-level window area'' is the entire area of a top-level window (or |
17951 "frame"). The "client area" (a term from MS Windows) is the area of a | 18007 ``frame''). The ``client area'' (a term from MS Windows) is the area of a |
17952 top-level window that XEmacs draws into and manages with redisplay. | 18008 top-level window that XEmacs draws into and manages with redisplay. |
17953 This includes the toolbar, scrollbars, gutters, dividers, text area, | 18009 This includes the toolbar, scrollbars, gutters, dividers, text area, |
17954 modeline and minibuffer. It does not include the menubar, title or | 18010 modeline and minibuffer. It does not include the menubar, title or |
17955 outer borders. The "non-client area" is the area of a top-level window | 18011 outer borders. The ``non-client area'' is the area of a top-level window |
17956 outside of the client area and includes the menubar, title and outer | 18012 outside of the client area and includes the menubar, title and outer |
17957 borders. Internally, all frame coordinates are relative to the client | 18013 borders. Internally, all frame coordinates are relative to the client |
17958 area. | 18014 area. |
17959 | 18015 |
17960 | 18016 |
17967 @item | 18023 @item |
17968 The outer layer is the window-manager decorations: The title and | 18024 The outer layer is the window-manager decorations: The title and |
17969 borders. These are controlled by the window manager, a separate process | 18025 borders. These are controlled by the window manager, a separate process |
17970 that controls the desktop, the location of icons, etc. When a process | 18026 that controls the desktop, the location of icons, etc. When a process |
17971 tries to create a window, the window manager intercepts this action and | 18027 tries to create a window, the window manager intercepts this action and |
17972 "reparents" the window, placing another window around it which contains | 18028 ``reparents'' the window, placing another window around it which contains |
17973 the window decorations, including the title bar, outer borders used for | 18029 the window decorations, including the title bar, outer borders used for |
17974 resizing, etc. The window manager also implements any actions involving | 18030 resizing, etc. The window manager also implements any actions involving |
17975 the decorations, such as the ability to resize a window by dragging its | 18031 the decorations, such as the ability to resize a window by dragging its |
17976 borders, move a window by dragging its title bar, etc. If there is no | 18032 borders, move a window by dragging its title bar, etc. If there is no |
17977 window manager or you kill it, windows will have no decorations (and | 18033 window manager or you kill it, windows will have no decorations (and |
17978 will lose them if they previously had any) and you will not be able to | 18034 will lose them if they previously had any) and you will not be able to |
17979 move or resize them. | 18035 move or resize them. |
17980 | 18036 |
17981 @item | 18037 @item |
17982 Inside of the window-manager decorations is the "shell", which is | 18038 Inside of the window-manager decorations is the ``shell'', which is |
17983 managed by the toolkit and widget libraries your program is linked with. | 18039 managed by the toolkit and widget libraries your program is linked with. |
17984 The code in @file{*-x.c} uses the Xt toolkit and various possible widget | 18040 The code in @file{*-x.c} uses the Xt toolkit and various possible widget |
17985 libraries built on top of Xt, such as Motif, Athena, the "Lucid" | 18041 libraries built on top of Xt, such as Motif, Athena, the ``Lucid'' |
17986 widgets, etc. Another possibility is GTK (@file{*-gtk.c}), which implements | 18042 widgets, etc. Another possibility is GTK (@file{*-gtk.c}), which implements |
17987 both the toolkit and widgets. Under Xt, the "shell" window is an | 18043 both the toolkit and widgets. Under Xt, the ``shell'' window is an |
17988 EmacsShell widget, containing an EmacsManager widget of the same size, | 18044 EmacsShell widget, containing an EmacsManager widget of the same size, |
17989 which in turn contains a menubar widget and an EmacsFrame widget, inside | 18045 which in turn contains a menubar widget and an EmacsFrame widget, inside |
17990 of which is the client area. (The division into EmacsShell and | 18046 of which is the client area. (The division into EmacsShell and |
17991 EmacsManager is due to the complex and screwy geometry-management system | 18047 EmacsManager is due to the complex and screwy geometry-management system |
17992 in Xt [and X more generally]. The EmacsShell handles negotation with | 18048 in Xt [and X more generally]. The EmacsShell handles negotation with |
17998 | 18054 |
17999 Under Windows, the non-client area is managed by the window system. | 18055 Under Windows, the non-client area is managed by the window system. |
18000 There is no division such as under X. Part of the window-system API | 18056 There is no division such as under X. Part of the window-system API |
18001 (@file{USER.DLL}) of Win32 includes functions to control the menubars, title, | 18057 (@file{USER.DLL}) of Win32 includes functions to control the menubars, title, |
18002 etc. and implements the move and resize behavior. There @strong{is} an | 18058 etc. and implements the move and resize behavior. There @strong{is} an |
18003 equivalent of the window manager, called the "shell", but it manages | 18059 equivalent of the window manager, called the ``shell'', but it manages |
18004 only the desktop, not the windows themselves. The normal shell under | 18060 only the desktop, not the windows themselves. The normal shell under |
18005 Windows is @file{EXPLORER.EXE}; if you kill this, you will lose the bar | 18061 Windows is @file{EXPLORER.EXE}; if you kill this, you will lose the bar |
18006 containing the "Start" menu and tray and such, but the windows | 18062 containing the ``Start'' menu and tray and such, but the windows |
18007 themselves will not be affected or lose their decorations. | 18063 themselves will not be affected or lose their decorations. |
18008 | 18064 |
18009 | 18065 |
18010 @node The Client Area, The Paned Area, The Non-Client Area, Window and Frame Geometry | 18066 @node The Client Area, The Paned Area, The Non-Client Area, Window and Frame Geometry |
18011 @section The Client Area | 18067 @section The Client Area |
18012 | 18068 |
18013 Inside of the client area is the toolbars, the gutters (where the buffer | 18069 Inside of the client area is the toolbars, the gutters (where the buffer |
18014 tabs are displayed), the minibuffer, the internal border width, and one | 18070 tabs are displayed), the minibuffer, the internal border width, and one |
18015 or more non-overlapping "windows" (this is old Emacs terminology, from | 18071 or more non-overlapping ``windows'' (this is old Emacs terminology, from |
18016 before the time when frames existed at all; the standard terminology for | 18072 before the time when frames existed at all; the standard terminology for |
18017 this would be "pane"). Each window can contain a modeline, horizontal | 18073 this would be ``pane''). Each window can contain a modeline, horizontal |
18018 and/or vertical scrollbars, and (for non-rightmost windows) a vertical | 18074 and/or vertical scrollbars, and (for non-rightmost windows) a vertical |
18019 divider, surrounding a text area. | 18075 divider, surrounding a text area. |
18020 | 18076 |
18021 The dimensions of the toolbars and gutters are determined by the formula | 18077 The dimensions of the toolbars and gutters are determined by the formula |
18022 (THICKNESS + 2 * BORDER-THICKNESS), where "thickness" is a cover term | 18078 (THICKNESS + 2 * BORDER-THICKNESS), where ``thickness'' is a cover term |
18023 for height or width, as appropriate. The height and width come from | 18079 for height or width, as appropriate. The height and width come from |
18024 @code{default-toolbar-height} and @code{default-toolbar-width} and the specific | 18080 @code{default-toolbar-height} and @code{default-toolbar-width} and the specific |
18025 versions of these (@code{top-toolbar-height}, @code{left-toolbar-width}, etc.). | 18081 versions of these (@code{top-toolbar-height}, @code{left-toolbar-width}, etc.). |
18026 The border thickness comes from @code{default-toolbar-border-height} and | 18082 The border thickness comes from @code{default-toolbar-border-height} and |
18027 @code{default-toolbar-border-width}, and the specific versions of these. The | 18083 @code{default-toolbar-border-width}, and the specific versions of these. The |
18042 | 18098 |
18043 | 18099 |
18044 @node The Paned Area, Text Areas, The Client Area, Window and Frame Geometry | 18100 @node The Paned Area, Text Areas, The Client Area, Window and Frame Geometry |
18045 @section The Paned Area | 18101 @section The Paned Area |
18046 | 18102 |
18047 The area occupied by the "windows" is called the paned area. Note that | 18103 The area occupied by the ``windows'' is called the paned area. |
18048 this includes the minibuffer, which is just another window but is | 18104 Unfortunately, because of the presence of the gutter @strong{between} the |
18049 special-cased in XEmacs. Each window can include a horizontal and/or | 18105 minibuffer and other windows, the bottom of the paned area is not |
18050 vertical scrollbar, a modeline and a vertical divider to its right, as | 18106 well-defined -- does it include the minibuffer (in which case it also |
18051 well as the text area. Only non-rightmost windows can include a | 18107 includes the bottom gutter, but none others) or does it not include |
18052 vertical divider. (The minibuffer normally does not include either | 18108 the minibuffer? (In which case not all windows are included.) It would |
18053 modeline or scrollbars.) | 18109 be cleaner to put the bottom gutter @strong{below} the minibuffer instead of |
18110 above it. | |
18111 | |
18112 Each window can include a horizontal and/or vertical scrollbar, a | |
18113 modeline and a vertical divider to its right, as well as the text area. | |
18114 Only non-rightmost windows can include a vertical divider. (The | |
18115 minibuffer normally does not include either modeline or scrollbars.) | |
18054 | 18116 |
18055 Note that, because the toolbars and gutters are controlled by | 18117 Note that, because the toolbars and gutters are controlled by |
18056 specifiers, and specifiers can have window-specific and buffer-specific | 18118 specifiers, and specifiers can have window-specific and buffer-specific |
18057 values, the size of the paned area can change depending on which window | 18119 values, the size of the paned area can change depending on which window |
18058 is selected: In other words, if the selected window or buffer changes, | 18120 is selected: In other words, if the selected window or buffer changes, |
18071 @code{horizontal-scrollbar-visible-p}, @code{vertical-scrollbar-visible-p}, | 18133 @code{horizontal-scrollbar-visible-p}, @code{vertical-scrollbar-visible-p}, |
18072 @code{vertical-divider-always-visible-p}, etc. | 18134 @code{vertical-divider-always-visible-p}, etc. |
18073 | 18135 |
18074 In addition, it is possible to set margins in the text area using the | 18136 In addition, it is possible to set margins in the text area using the |
18075 specifiers @code{left-margin-width} and @code{right-margin-width}. When this is | 18137 specifiers @code{left-margin-width} and @code{right-margin-width}. When this is |
18076 done, only the "inner text area" (the area inside of the margins) will | 18138 done, only the ``inner text area'' (the area inside of the margins) will |
18077 be used for normal display of text; the margins will be used for glyphs | 18139 be used for normal display of text; the margins will be used for glyphs |
18078 with a layout policy of @code{outside-margin} (as set on an extent containing | 18140 with a layout policy of @code{outside-margin} (as set on an extent containing |
18079 the glyph by @code{set-extent-begin-glyph-layout} or | 18141 the glyph by @code{set-extent-begin-glyph-layout} or |
18080 @code{set-extent-end-glyph-layout}). However, the calculation of the text | 18142 @code{set-extent-end-glyph-layout}). However, the calculation of the text |
18081 area size (e.g. in the function @code{window-text-area-width}) includes the | 18143 area size (e.g. in the function @code{window-text-area-width}) includes the |
18082 margins. Which margin is used depends on whether a glyph has been set | 18144 margins. Which margin is used depends on whether a glyph has been set |
18083 as the begin-glyph or end-glyph of an extent (@code{set-extent-begin-glyph} | 18145 as the begin-glyph or end-glyph of an extent (@code{set-extent-begin-glyph} |
18084 etc.), using the left and right margins, respectively. | 18146 etc.), using the left and right margins, respectively. |
18085 | 18147 |
18086 Technically, the margins outside of the inner text area are known as the | 18148 Technically, the margins outside of the inner text area are known as the |
18087 "outside margins". The "inside margins" are in the inner text area and | 18149 ``outside margins''. The ``inside margins'' are in the inner text area and |
18088 constitute the whitespace between the outside margins and the first or | 18150 constitute the whitespace between the outside margins and the first or |
18089 last non-whitespace character in a line; their width can vary from line | 18151 last non-whitespace character in a line; their width can vary from line |
18090 to line. Glyphs will be placed in the inside margin if their layout | 18152 to line. Glyphs will be placed in the inside margin if their layout |
18091 policy is @code{inside-margin} or @code{whitespace}, with @code{whitespace} glyphs on | 18153 policy is @code{inside-margin} or @code{whitespace}, with @code{whitespace} glyphs on |
18092 the inside and @code{inside-margin} glyphs on the outside. Inside-margin | 18154 the inside and @code{inside-margin} glyphs on the outside. Inside-margin |
18097 | 18159 |
18098 | 18160 |
18099 @node The Displayable Area, Which Functions Use Which?, Text Areas, Window and Frame Geometry | 18161 @node The Displayable Area, Which Functions Use Which?, Text Areas, Window and Frame Geometry |
18100 @section The Displayable Area | 18162 @section The Displayable Area |
18101 | 18163 |
18102 The "displayable area" is not so much an actual area as a convenient | 18164 The ``displayable area'' is not so much an actual area as a convenient |
18103 fiction. It is the area used to convert between pixel and character | 18165 fiction. It is the area used to convert between pixel and character |
18104 dimensions for frames. The character dimensions for a frame (e.g. as | 18166 dimensions for frames. The character dimensions for a frame (e.g. as |
18105 returned by @code{frame-width} and @code{frame-height} and set by | 18167 returned by @code{frame-width} and @code{frame-height} and set by |
18106 @code{set-frame-width} and @code{set-frame-height}) are determined from the | 18168 @code{set-frame-width} and @code{set-frame-height}) are determined from the |
18107 displayable area by dividing by the pixel size of the default font as | 18169 displayable area by dividing by the pixel size of the default font as |
18108 instantiated in the frame. (For proportional fonts, the "average" width | 18170 instantiated in the frame. (For proportional fonts, the ``average'' width |
18109 is used. Under Windows, this is a built-in property of the fonts. | 18171 is used. Under Windows, this is a built-in property of the fonts. |
18110 Under X, this is based on the width of the lowercase 'n', or if this is | 18172 Under X, this is based on the width of the lowercase 'n', or if this is |
18111 zero then the width of the default character. [We prefer 'n' to the | 18173 zero then the width of the default character. [We prefer 'n' to the |
18112 specified default character because many X fonts have a default | 18174 specified default character because many X fonts have a default |
18113 character with a zero or otherwise non-representative width.]) | 18175 character with a zero or otherwise non-representative width.]) |
18114 | 18176 |
18115 The displayable area is essentially the "theoretical" paned area of the | 18177 The displayable area is essentially the ``theoretical'' gutter area of the |
18116 frame excluding the rightmost and bottom-most scrollbars. In this | 18178 frame, excluding the rightmost and bottom-most scrollbars. That is, it |
18117 context, "theoretical" means that all calculations on based on | 18179 starts from the client (or ``total'') area and then excludes the |
18118 frame-level values for toolbar, gutter and scrollbar thicknesses. | 18180 ``theoretical'' toolbars and bottom-most/rightmost scrollbars, and the |
18119 Because these thicknesses are controlled by specifiers, and specifiers | 18181 internal border width. In this context, ``theoretical'' means that all |
18120 can have window-specific and buffer-specific values, these calculations | 18182 calculations on based on frame-level values for toolbar and scrollbar |
18121 may or may not reflect the actual size of the paned area or of the | 18183 thicknesses. Because these thicknesses are controlled by specifiers, |
18122 scrollbars when any particular window is selected. Note also that the | 18184 and specifiers can have window-specific and buffer-specific values, |
18123 "displayable area" may not even be contiguous! In particular, if the | 18185 these calculations may or may not reflect the actual size of the paned |
18124 frame-level value of the horizontal scrollbar height is non-zero, then | 18186 area or of the scrollbars when any particular window is selected. Note |
18125 the displayable area includes the paned area above and below the bottom | 18187 also that the ``displayable area'' may not even be contiguous! In |
18126 horizontal scrollbar but not the scrollbar itself. | 18188 particular, the gutters are included, but the bottom-most and rightmost |
18189 scrollbars are excluded even though they are inside of the gutters. | |
18190 Furthermore, if the frame-level value of the horizontal scrollbar height | |
18191 is non-zero, then the displayable area includes the paned area above and | |
18192 below the bottom horizontal scrollbar (i.e. the modeline and minibuffer) | |
18193 but not the scrollbar itself. | |
18127 | 18194 |
18128 As a further twist, the character-dimension calculations are adjusted so | 18195 As a further twist, the character-dimension calculations are adjusted so |
18129 that the truncation and continuation glyphs (see @code{truncation-glyph} and | 18196 that the truncation and continuation glyphs (see @code{truncation-glyph} and |
18130 @code{continuation-glyph}) count as a single character even if they are wider | 18197 @code{continuation-glyph}) count as a single character even if they are wider |
18131 than the default font width. (Technically, the character width is | 18198 than the default font width. (Technically, the character width is |
18134 width before dividing by the default-font width, and then adding 1 to | 18201 width before dividing by the default-font width, and then adding 1 to |
18135 the result.) (The ultimate motivation for this kludge as well as the | 18202 the result.) (The ultimate motivation for this kludge as well as the |
18136 subtraction of the scrollbars, but not the minibuffer or bottom-most | 18203 subtraction of the scrollbars, but not the minibuffer or bottom-most |
18137 modeline, is to maintain compatibility with TTY's.) | 18204 modeline, is to maintain compatibility with TTY's.) |
18138 | 18205 |
18139 Despite all these concerns and kludges, however, the "displayable area" | 18206 Despite all these concerns and kludges, however, the ``displayable area'' |
18140 concept works well in practice and mostly ensures that by default the | 18207 concept works well in practice and mostly ensures that by default the |
18141 frame will actually fit 79 characters + continuation/truncation glyph. | 18208 frame will actually fit 79 characters + continuation/truncation glyph. |
18142 | 18209 |
18143 | 18210 |
18144 @node Which Functions Use Which?, , The Displayable Area, Window and Frame Geometry | 18211 @node Which Functions Use Which?, , The Displayable Area, Window and Frame Geometry |
19783 @section Event Queues | 19850 @section Event Queues |
19784 @cindex event queues | 19851 @cindex event queues |
19785 @cindex queues, event | 19852 @cindex queues, event |
19786 | 19853 |
19787 There are two event queues here -- the command event queue (#### which | 19854 There are two event queues here -- the command event queue (#### which |
19788 should be called "deferred event queue" and is in my glyph ws) and the | 19855 should be called ``deferred event queue'' and is in my glyph ws) and the |
19789 dispatch event queue. (MS Windows actually has an extra dispatch queue | 19856 dispatch event queue. (MS Windows actually has an extra dispatch queue |
19790 for non-user events and uses the generic one only for user events. This | 19857 for non-user events and uses the generic one only for user events. This |
19791 is because user and non-user events in Windows come through the same | 19858 is because user and non-user events in Windows come through the same |
19792 place -- the window procedure -- but under X, it's possible to | 19859 place -- the window procedure -- but under X, it's possible to |
19793 selectively process events such that we take all the user events before | 19860 selectively process events such that we take all the user events before |
19888 | 19955 |
19889 @item handle_magic_event_cb | 19956 @item handle_magic_event_cb |
19890 XEmacs calls this with an event structure which contains window-system | 19957 XEmacs calls this with an event structure which contains window-system |
19891 dependent information that XEmacs doesn't need to know about, but which | 19958 dependent information that XEmacs doesn't need to know about, but which |
19892 must happen in order. If the @code{next_event_cb} never returns an | 19959 must happen in order. If the @code{next_event_cb} never returns an |
19893 event of type "magic", this will never be used. | 19960 event of type ``magic'', this will never be used. |
19894 | 19961 |
19895 @item format_magic_event_cb | 19962 @item format_magic_event_cb |
19896 Called with a magic event; print a representation of the innards of the | 19963 Called with a magic event; print a representation of the innards of the |
19897 event to @var{PSTREAM}. | 19964 event to @var{PSTREAM}. |
19898 | 19965 |
19920 @item select_process_cb | 19987 @item select_process_cb |
19921 @item unselect_process_cb | 19988 @item unselect_process_cb |
19922 These callbacks tell the underlying implementation to add or remove a | 19989 These callbacks tell the underlying implementation to add or remove a |
19923 file descriptor from the list of fds which are polled for | 19990 file descriptor from the list of fds which are polled for |
19924 inferior-process input. When input becomes available on the given | 19991 inferior-process input. When input becomes available on the given |
19925 process connection, an event of type "process" should be generated. | 19992 process connection, an event of type ``process'' should be generated. |
19926 | 19993 |
19927 @item select_console_cb | 19994 @item select_console_cb |
19928 @item unselect_console_cb | 19995 @item unselect_console_cb |
19929 These callbacks tell the underlying implementation to add or remove a | 19996 These callbacks tell the underlying implementation to add or remove a |
19930 console from the list of consoles which are polled for user-input. | 19997 console from the list of consoles which are polled for user-input. |
20048 @cindex focus handling | 20115 @cindex focus handling |
20049 | 20116 |
20050 Ben's capsule lecture on focus: | 20117 Ben's capsule lecture on focus: |
20051 | 20118 |
20052 In GNU Emacs @code{select-frame} never changes the window-manager frame | 20119 In GNU Emacs @code{select-frame} never changes the window-manager frame |
20053 focus. All it does is change the "selected frame". This is similar to | 20120 focus. All it does is change the ``selected frame''. This is similar to |
20054 what happens when we call @code{select-device} or @code{select-console}. | 20121 what happens when we call @code{select-device} or @code{select-console}. |
20055 Whenever an event comes in (including a keyboard event), its frame is | 20122 Whenever an event comes in (including a keyboard event), its frame is |
20056 selected; therefore, evaluating @code{select-frame} in @samp{*scratch*} | 20123 selected; therefore, evaluating @code{select-frame} in @samp{*scratch*} |
20057 won't cause any effects because the next received event (in the same | 20124 won't cause any effects because the next received event (in the same |
20058 frame) will cause a switch back to the frame displaying | 20125 frame) will cause a switch back to the frame displaying |
20083 minibuffer, you essentially want to temporarily switch the WM focus to | 20150 minibuffer, you essentially want to temporarily switch the WM focus to |
20084 the frame with the minibuffer, and switch it back when you exit the | 20151 the frame with the minibuffer, and switch it back when you exit the |
20085 minibuffer. | 20152 minibuffer. |
20086 | 20153 |
20087 GNU Emacs solves this with the crockish @code{redirect-frame-focus}, | 20154 GNU Emacs solves this with the crockish @code{redirect-frame-focus}, |
20088 which says "for keyboard events received from FRAME, act like they're | 20155 which says ``for keyboard events received from FRAME, act like they're |
20089 coming from FOCUS-FRAME". I think what this means is that, when a | 20156 coming from FOCUS-FRAME''. I think what this means is that, when a |
20090 keyboard event comes in and the event manager is about to select the | 20157 keyboard event comes in and the event manager is about to select the |
20091 event's frame, if that frame has its focus redirected, the redirected-to | 20158 event's frame, if that frame has its focus redirected, the redirected-to |
20092 frame is selected instead. That way, if you're in a minibufferless | 20159 frame is selected instead. That way, if you're in a minibufferless |
20093 frame and enter the minibuffer, then all Lisp functions that run see the | 20160 frame and enter the minibuffer, then all Lisp functions that run see the |
20094 selected frame as the minibuffer's frame rather than the minibufferless | 20161 selected frame as the minibuffer's frame rather than the minibufferless |
20098 There's also some weird logic that switches the redirected frame focus | 20165 There's also some weird logic that switches the redirected frame focus |
20099 from one frame to another if Lisp code explicitly calls | 20166 from one frame to another if Lisp code explicitly calls |
20100 @code{select-frame} (but not if @code{handle-switch-frame} is called), | 20167 @code{select-frame} (but not if @code{handle-switch-frame} is called), |
20101 and saves and restores the frame focus in window configurations, | 20168 and saves and restores the frame focus in window configurations, |
20102 etc. etc. All of this logic is heavily @code{#if 0}'d, with lots of | 20169 etc. etc. All of this logic is heavily @code{#if 0}'d, with lots of |
20103 comments saying "No, this approach doesn't seem to work, so I'm trying | 20170 comments saying ``No, this approach doesn't seem to work, so I'm trying |
20104 this ... is it reasonable? Well, I'm not sure ..." that are a red flag | 20171 this ... is it reasonable? Well, I'm not sure ...'' that are a red flag |
20105 indicating crockishness. | 20172 indicating crockishness. |
20106 | 20173 |
20107 Because of our way of doing things, we can avoid all this crock. | 20174 Because of our way of doing things, we can avoid all this crock. |
20108 Keyboard events never cause a select-frame (who cares what frame they're | 20175 Keyboard events never cause a select-frame (who cares what frame they're |
20109 associated with? They come from a console, only). We change the actual | 20176 associated with? They come from a console, only). We change the actual |
24882 return value should be an alist consisting of a list of all of the | 24949 return value should be an alist consisting of a list of all of the |
24883 defined subtypes for that coding system type along with a level of | 24950 defined subtypes for that coding system type along with a level of |
24884 likelihood and a list of additional properties indicating certain | 24951 likelihood and a list of additional properties indicating certain |
24885 features detected in the data. The extra properties returned are | 24952 features detected in the data. The extra properties returned are |
24886 defined entirely by the particular coding system type and are used | 24953 defined entirely by the particular coding system type and are used |
24887 only in the algorithm described below under "user control." However, | 24954 only in the algorithm described below under ``user control.'' However, |
24888 the levels of likelihood have a standard meaning as follows: | 24955 the levels of likelihood have a standard meaning as follows: |
24889 | 24956 |
24890 Level 4 means "near certainty" and typically indicates that a | 24957 Level 4 means ``near certainty'' and typically indicates that a |
24891 signature has been detected, usually at the beginning of the data, | 24958 signature has been detected, usually at the beginning of the data, |
24892 indicating that the data is encoded in this particular coding system | 24959 indicating that the data is encoded in this particular coding system |
24893 type. An example of this would be the byte order mark at the beginning | 24960 type. An example of this would be the byte order mark at the beginning |
24894 of UCS2 encoded data or the GZIP mark at the beginning of GZIP data. | 24961 of UCS2 encoded data or the GZIP mark at the beginning of GZIP data. |
24895 | 24962 |
24896 Level 3 means "highly likely" and indicates that tell-tale signs have | 24963 Level 3 means ``highly likely'' and indicates that tell-tale signs have |
24897 been discovered in the data that are characteristic of this particular | 24964 been discovered in the data that are characteristic of this particular |
24898 coding system type. Examples of this might be ISO 2022 escape | 24965 coding system type. Examples of this might be ISO 2022 escape |
24899 sequences or the current Unicode end of line markers at regular | 24966 sequences or the current Unicode end of line markers at regular |
24900 intervals. | 24967 intervals. |
24901 | 24968 |
24902 Level 2 means "strongly statistically likely" indicating that | 24969 Level 2 means ``strongly statistically likely'' indicating that |
24903 statistical analysis concludes that there's a high chance that this | 24970 statistical analysis concludes that there's a high chance that this |
24904 data is encoded according to this particular type. For example, this | 24971 data is encoded according to this particular type. For example, this |
24905 might mean that for UCS2 data, there is a high proportion of null bytes | 24972 might mean that for UCS2 data, there is a high proportion of null bytes |
24906 or other repeated bytes in the odd-numbered bytes of the data and a | 24973 or other repeated bytes in the odd-numbered bytes of the data and a |
24907 high variance in the even-numbered bytes of the data. For Shift-JIS, | 24974 high variance in the even-numbered bytes of the data. For Shift-JIS, |
24908 this might indicate that there were no illegal Shift-JIS sequences | 24975 this might indicate that there were no illegal Shift-JIS sequences |
24909 and a fairly high occurrence of common Shift-JIS characters. | 24976 and a fairly high occurrence of common Shift-JIS characters. |
24910 | 24977 |
24911 Level 1 means "weak statistical likelihood" meaning that there is some | 24978 Level 1 means ``weak statistical likelihood'' meaning that there is some |
24912 indication that the data is encoded in this coding system type. In | 24979 indication that the data is encoded in this coding system type. In |
24913 fact, there is a reasonable chance that it may be some other type as | 24980 fact, there is a reasonable chance that it may be some other type as |
24914 well. This means, for example, that no illegal sequences were | 24981 well. This means, for example, that no illegal sequences were |
24915 encountered and at least some data was encountered that is purposely | 24982 encountered and at least some data was encountered that is purposely |
24916 not in other coding system types. For Shift-JIS data, this might mean | 24983 not in other coding system types. For Shift-JIS data, this might mean |
24917 that some bytes in the range 128 to 159 were encountered in the data. | 24984 that some bytes in the range 128 to 159 were encountered in the data. |
24918 | 24985 |
24919 Level 0 means "neutral" which is to say that there's either not enough | 24986 Level 0 means ``neutral'' which is to say that there's either not enough |
24920 data to make any decision or that the data could well be interpreted | 24987 data to make any decision or that the data could well be interpreted |
24921 as this type (meaning no illegal sequences), but there is little or no | 24988 as this type (meaning no illegal sequences), but there is little or no |
24922 indication of anything particular to this particular type. | 24989 indication of anything particular to this particular type. |
24923 | 24990 |
24924 Level -1 means "weakly unlikely" meaning that some data was | 24991 Level -1 means ``weakly unlikely'' meaning that some data was |
24925 encountered that could conceivably be part of the coding system type | 24992 encountered that could conceivably be part of the coding system type |
24926 but is probably not. For example, successively long line-lengths or | 24993 but is probably not. For example, successively long line-lengths or |
24927 very rarely-encountered sequences. | 24994 very rarely-encountered sequences. |
24928 | 24995 |
24929 Level -2 means "strongly unlikely" meaning that typically a number | 24996 Level -2 means ``strongly unlikely'' meaning that typically a number |
24930 of illegal sequences were encountered. | 24997 of illegal sequences were encountered. |
24931 | 24998 |
24932 The algorithm to determine when to stop and indicate that the data has | 24999 The algorithm to determine when to stop and indicate that the data has |
24933 been detected as a particular coding system uses a priority list, | 25000 been detected as a particular coding system uses a priority list, |
24934 which is typically specified as part of the language environment | 25001 which is typically specified as part of the language environment |
24943 Japanese-language environment particular subtypes of ISO 2022 will be | 25010 Japanese-language environment particular subtypes of ISO 2022 will be |
24944 associated with the Japanese coding system version of those | 25011 associated with the Japanese coding system version of those |
24945 subtypes). It is perfectly legal and quite common in fact, to list the | 25012 subtypes). It is perfectly legal and quite common in fact, to list the |
24946 same subtype more than once in the priority list with successively | 25013 same subtype more than once in the priority list with successively |
24947 lower requirements. Other facts that can be listed in the priority | 25014 lower requirements. Other facts that can be listed in the priority |
24948 list for a subtype are "reject", meaning that the data should never be | 25015 list for a subtype are ``reject'', meaning that the data should never be |
24949 detected as this subtype, or "ask", meaning that if the data is | 25016 detected as this subtype, or ``ask'', meaning that if the data is |
24950 detected to be this subtype, the user will be asked whether they | 25017 detected to be this subtype, the user will be asked whether they |
24951 actually mean this. This latter property could be used, for example, | 25018 actually mean this. This latter property could be used, for example, |
24952 towards the bottom of the priority list. | 25019 towards the bottom of the priority list. |
24953 | 25020 |
24954 In addition there is a global variable which specifies the minimum | 25021 In addition there is a global variable which specifies the minimum |
24961 system, the subtype, the coding system and the associated level of | 25028 system, the subtype, the coding system and the associated level of |
24962 likelihood will be prominently displayed either in the echo area or in | 25029 likelihood will be prominently displayed either in the echo area or in |
24963 a status box somewhere. | 25030 a status box somewhere. |
24964 | 25031 |
24965 If no positive match is found according to the priority list, or if | 25032 If no positive match is found according to the priority list, or if |
24966 the matches that are found have the "ask" property on them, then the | 25033 the matches that are found have the ``ask'' property on them, then the |
24967 user will be presented with a list of choices of possible encodings | 25034 user will be presented with a list of choices of possible encodings |
24968 and asked to choose one. This list is typically sorted first by level | 25035 and asked to choose one. This list is typically sorted first by level |
24969 of likelihood, and then within this, by the order in which the | 25036 of likelihood, and then within this, by the order in which the |
24970 subtypes appear in the priority list. This list is displayed in a | 25037 subtypes appear in the priority list. This list is displayed in a |
24971 special kind of dialog box or other buffer allowing the user, in | 25038 special kind of dialog box or other buffer allowing the user, in |
24978 will be in the form of errors or warnings of various levels, some of | 25045 will be in the form of errors or warnings of various levels, some of |
24979 which may be severe enough to stop the decoding entirely, and some of | 25046 which may be severe enough to stop the decoding entirely, and some of |
24980 which may either indicate definitely malformed data but from which | 25047 which may either indicate definitely malformed data but from which |
24981 it's possible to recover, or simply data that appears rather | 25048 it's possible to recover, or simply data that appears rather |
24982 questionable. If any of these status values are reported during | 25049 questionable. If any of these status values are reported during |
24983 decoding, the user will be informed of this and asked "are you sure?" | 25050 decoding, the user will be informed of this and asked ``are you sure?'' |
24984 As part of the "are you sure" dialog box or question, the user can | 25051 As part of the ``are you sure'' dialog box or question, the user can |
24985 display the results of the decoding to make sure it's correct. If the | 25052 display the results of the decoding to make sure it's correct. If the |
24986 user says "no, they're not sure," then the same list of choices as | 25053 user says ``no, they're not sure,'' then the same list of choices as |
24987 previously mentioned will be presented. | 25054 previously mentioned will be presented. |
24988 | 25055 |
24989 @subheading RFC: Autodetection | 25056 @subheading RFC: Autodetection |
24990 | 25057 |
24991 Also appeared under heading "Implementation of Coding System Priority | 25058 Also appeared under heading "Implementation of Coding System Priority |
25201 | 25268 |
25202 @enumerate | 25269 @enumerate |
25203 @item | 25270 @item |
25204 Hopefully a system general enough to handle (2)--(4) will | 25271 Hopefully a system general enough to handle (2)--(4) will |
25205 handle these, too, but we should watch out for gotchas like | 25272 handle these, too, but we should watch out for gotchas like |
25206 Unicode "plane 14" tags which (I think _both_ Ben and Olivier | 25273 Unicode ``plane 14'' tags which (I think _both_ Ben and Olivier |
25207 will agree) have no place in the internal representation, and | 25274 will agree) have no place in the internal representation, and |
25208 thus must be treated as out-of-band control sequences. I | 25275 thus must be treated as out-of-band control sequences. I |
25209 don't know if all such gotchas will be as easy to dispose of. | 25276 don't know if all such gotchas will be as easy to dispose of. |
25210 | 25277 |
25211 @item | 25278 @item |
25242 | 25309 |
25243 sly, it can't be perfect if any autodecoding is done; | 25310 sly, it can't be perfect if any autodecoding is done; |
25244 like Hrvoje should have an easily available option to | 25311 like Hrvoje should have an easily available option to |
25245 to this default (or an optimized approximation which | 25312 to this default (or an optimized approximation which |
25246 t actually read the whole file into a buffer) or simply | 25313 t actually read the whole file into a buffer) or simply |
25247 y everything as binary (with the "font" for binary files | 25314 y everything as binary (with the ``font'' for binary files |
25248 a user option). | 25315 a user option). |
25249 | 25316 |
25250 @item | 25317 @item |
25251 This implies that we should be detecting conditions in the | 25318 This implies that we should be detecting conditions in the |
25252 tail of the file which violate the implicit assumptions of the | 25319 tail of the file which violate the implicit assumptions of the |
25351 | 25418 |
25352 Date: 11/1/1999 7:24 AM | 25419 Date: 11/1/1999 7:24 AM |
25353 | 25420 |
25354 Stephen, thank you very much for writing this up. I think it is a good start, | 25421 Stephen, thank you very much for writing this up. I think it is a good start, |
25355 and definitely moving in the direction I would like to see things going: more | 25422 and definitely moving in the direction I would like to see things going: more |
25356 proposals, less arguing. (aka "more light, less heat") However, I have some | 25423 proposals, less arguing. (aka ``more light, less heat'') However, I have some |
25357 suggestions for cleaning this up: | 25424 suggestions for cleaning this up: |
25358 | 25425 |
25359 You should try to make it more layered. For example, you might have one | 25426 You should try to make it more layered. For example, you might have one |
25360 section devoted to the workings of autodetection, which starts out like this | 25427 section devoted to the workings of autodetection, which starts out like this |
25361 (the section numbers below are totally arbitrary): | 25428 (the section numbers below are totally arbitrary): |