Mercurial > hg > xemacs-beta
comparison man/internals/internals.texi @ 5128:7be849cb8828 ben-lisp-object
merge
author | Ben Wing <ben@xemacs.org> |
---|---|
date | Sun, 07 Mar 2010 02:09:59 -0600 |
parents | a9c41067dd88 e0587c615e8b |
children | f965e31a35f0 |
comparison
equal
deleted
inserted
replaced
5127:a9c41067dd88 | 5128:7be849cb8828 |
---|---|
159 that has been formatted into ASCII lists and tables. | 159 that has been formatted into ASCII lists and tables. |
160 | 160 |
161 Note: to define these routines, put point after the end of the definition | 161 Note: to define these routines, put point after the end of the definition |
162 and type C-x C-e. | 162 and type C-x C-e. |
163 | 163 |
164 (defun list-to-texinfo (b e) | 164 (defun convert-list-to-texinfo (b e) |
165 "Convert the selected region from an ASCII list to a Texinfo list." | 165 "Convert the selected region from an ASCII list to a Texinfo list." |
166 (interactive "r") | 166 (interactive "r") |
167 (save-restriction | 167 (save-restriction |
168 (narrow-to-region b e) | 168 (narrow-to-region b e) |
169 (goto-char (point-min)) | 169 (goto-char (point-min)) |
170 (let ((dash-type "^ *-+ +") | 170 (let ((dash-type "^ *\\(-+\\|o\\) +") |
171 ;; allow single-letter numbering or roman numerals | 171 ;; allow single-letter numbering or roman numerals |
172 (letter-type "^ *[[(]?\\([a-zA-Z]\\|[IVXivx]+\\)[]).] +") | 172 (letter-type "^ *[[(]?\\([a-zA-Z]\\|[IVXivx]+\\)[]).] +") |
173 (num-type "^ *[[(]?[0-9]+[]).] +") | 173 (num-type "^ *[[(]?[0-9]+[]).] +") |
174 dash regexp) | 174 dash regexp) |
175 (save-excursion | 175 (save-excursion |
237 (insert-char ?\ (- min (current-column))) | 237 (insert-char ?\ (- min (current-column))) |
238 (beginning-of-line) | 238 (beginning-of-line) |
239 (forward-char min)) | 239 (forward-char min)) |
240 (kill-rectangle b (point)))))) | 240 (kill-rectangle b (point)))))) |
241 | 241 |
242 (defun table-to-texinfo (b e) | 242 (defun convert-table-to-texinfo (b e) |
243 "Convert the selected region from an ASCII table to a Texinfo table. | 243 "Convert the selected region from an ASCII table to a Texinfo table. |
244 Assumes entries are separated by a blank line, and the first sexp in | 244 Assumes entries are separated by a blank line, and the first sexp in |
245 each entry is the table heading." | 245 each entry is the table heading." |
246 (interactive "r") | 246 (interactive "r") |
247 (save-restriction | 247 (save-restriction |
281 If the region is active, do the region; otherwise, go from point to the end | 281 If the region is active, do the region; otherwise, go from point to the end |
282 of the buffer. This query-replaces for various kinds of conventions used | 282 of the buffer. This query-replaces for various kinds of conventions used |
283 in text: @code{} surrounded by ` and ' or followed by a (); @strong{} | 283 in text: @code{} surrounded by ` and ' or followed by a (); @strong{} |
284 surrounded by *'s; @file{} something that looks like a file name." | 284 surrounded by *'s; @file{} something that looks like a file name." |
285 (interactive) | 285 (interactive) |
286 (if (and (not no-narrow) (region-active-p)) | 286 (save-excursion |
287 (save-restriction | 287 (if (and (not no-narrow) (region-active-p)) |
288 (narrow-to-region (region-beginning) (region-end)) | 288 (save-restriction |
289 (convert-text-to-texinfo t)) | 289 (narrow-to-region (region-beginning) (region-end)) |
290 (let ((p (point)) | 290 (goto-char (region-beginning)) |
291 (case-replace nil)) | 291 (zmacs-deactivate-region) |
292 (query-replace-regexp "`\\([^']+\\)'\\([^']\\)" "@code{\\1}\\2" nil) | 292 (convert-text-to-texinfo t)) |
293 (goto-char p) | 293 (let ((p (point)) |
294 (query-replace-regexp "\\(\\Sw\\)\\*\\(\\(?:\\s_\\|\\sw\\)+\\)\\*\\([^A-Za-z.}]\\)" "\\1@strong{\\2}\\3" nil) | 294 (case-replace nil)) |
295 (goto-char p) | 295 (message "Point is %d" (point)) |
296 (query-replace-regexp "\\(\\(\\s_\\|\\sw\\)+()\\)\\([^}]\\)" "@code{\\1}\\3" nil) | 296 (query-replace-regexp "`\\([^']+\\)'\\([^']\\)" "@code{\\1}\\2" nil) |
297 (goto-char p) | 297 (goto-char p) |
298 (query-replace-regexp "\\(\\(\\s_\\|\\sw\\)+\\.[A-Za-z]+\\)\\([^A-Za-z.}]\\)" "@file{\\1}\\3" nil) | 298 (query-replace-regexp "\\(\\Sw\\)\\*\\(\\(?:\\s_\\|\\sw\\)+\\)\\*\\([^A-Za-z.}]\\)" "\\1@strong{\\2}\\3" nil) |
299 ))) | 299 (goto-char p) |
300 (query-replace-regexp "\\(\\(\\s_\\|\\sw\\)+()\\)\\([^}]\\)" "@code{\\1}\\3" nil) | |
301 (goto-char p) | |
302 (query-replace-regexp "\\(\\(\\s_\\|\\sw\\)+\\.[A-Za-z]+\\)\\([^A-Za-z.}]\\)" "@file{\\1}\\3" nil) | |
303 )))) | |
300 | 304 |
301 4. Adding new sections: | 305 4. Adding new sections: |
302 ----------------------- | 306 ----------------------- |
303 | 307 |
304 NOTE: These are in the form of macros. #### FIXME Convert them to | 308 NOTE: These are in the form of macros. #### FIXME Convert them to |
1236 XEmacs is a powerful, customizable text editor and development | 1240 XEmacs is a powerful, customizable text editor and development |
1237 environment. It began in 1991 as Lucid Emacs, which was in turn | 1241 environment. It began in 1991 as Lucid Emacs, which was in turn |
1238 derived from GNU Emacs, a program written by Richard Stallman of the | 1242 derived from GNU Emacs, a program written by Richard Stallman of the |
1239 Free Software Foundation. GNU Emacs dates back to 1985 and was | 1243 Free Software Foundation. GNU Emacs dates back to 1985 and was |
1240 modelled after Unipress Emacs, an editor written by James Gosling in | 1244 modelled after Unipress Emacs, an editor written by James Gosling in |
1241 1981 and based on a series of other "Emacs"-like editors, including | 1245 1981 and based on a series of other ``Emacs''-like editors, including |
1242 EINE (EINE Is Not EMACS), c. 1976, by Dan Weinreb, which run on the | 1246 EINE (EINE Is Not EMACS), c. 1976, by Dan Weinreb, which run on the |
1243 MIT Lisp Machine and was the first Emacs written in Lisp; ZWEI (ZWEI | 1247 MIT Lisp Machine and was the first Emacs written in Lisp; ZWEI (ZWEI |
1244 Was EINE Initially), c. 1978, by Dan Weinreb and Mike McMahon; Multics | 1248 Was EINE Initially), c. 1978, by Dan Weinreb and Mike McMahon; Multics |
1245 Emacs, c. 1978, by Bernie Greenberg, which was written in MacLisp and | 1249 Emacs, c. 1978, by Bernie Greenberg, which was written in MacLisp and |
1246 also used Lisp as its extension language; and ZMACS, c. 1980, a direct | 1250 also used Lisp as its extension language; and ZMACS, c. 1980, a direct |
1247 descendant of ZWEI that on ran the Symbolics LM-2, LMI LispM, and | 1251 descendant of ZWEI that on ran the Symbolics LM-2, LMI LispM, and |
1248 later, TI Explorer (1983-1989). These in turn were inspired by the | 1252 later, TI Explorer (1983-1989). These in turn were inspired by the |
1249 first Emacs, a package called EMACS, written in 1976 by Richard | 1253 first Emacs, a package called EMACS, written in 1976 by Richard |
1250 Stallman, Guy Steele, and Dave Moon. This was a merger of TECMAC and | 1254 Stallman, Guy Steele, and Dave Moon. This was a merger of TECMAC and |
1251 TMACS, a pair of "TECO-macro realtime editors" written by Guy Steele, | 1255 TMACS, a pair of ``TECO-macro realtime editors'' written by Guy Steele, |
1252 Dave Moon, Richard Greenblatt, Charles Frankston, et al., and added a | 1256 Dave Moon, Richard Greenblatt, Charles Frankston, et al., and added a |
1253 dynamic loader and Meta-key cmds. It ran under ITS (the Incompatible | 1257 dynamic loader and Meta-key cmds. It ran under ITS (the Incompatible |
1254 Timesharing System) on a DEC PDP 10 and under TWENEX on a Tops-20 and | 1258 Timesharing System) on a DEC PDP 10 and under TWENEX on a Tops-20 and |
1255 was written in TECO and PDP 10 assembly. ITS was one of the first | 1259 was written in TECO and PDP 10 assembly. ITS was one of the first |
1256 time-sharing operating systems and dates back well before Unix. ITS, | 1260 time-sharing operating systems and dates back well before Unix. ITS, |
1284 M. Stallman (RMS) and James Gosling (the creator of Java); its extension | 1288 M. Stallman (RMS) and James Gosling (the creator of Java); its extension |
1285 language was known as @dfn{Mocklisp}. This version of Emacs-in-C formed | 1289 language was known as @dfn{Mocklisp}. This version of Emacs-in-C formed |
1286 the basis for the early versions of GNU Emacs and also for Gosling's | 1290 the basis for the early versions of GNU Emacs and also for Gosling's |
1287 Unipress Emacs, a commercial product. Because of bad blood between the | 1291 Unipress Emacs, a commercial product. Because of bad blood between the |
1288 two over the issue of commercialism, RMS pretty much disowned this | 1292 two over the issue of commercialism, RMS pretty much disowned this |
1289 collaboration, referring to it as "Gosling Emacs". | 1293 collaboration, referring to it as ``Gosling Emacs''. |
1290 | 1294 |
1291 At this point we pick up with a time line of events. (A broader timeline | 1295 At this point we pick up with a time line of events. (A broader timeline |
1292 is available at @uref{http://www.jwz.org/doc/emacs-timeline.html, | 1296 is available at @uref{http://www.jwz.org/doc/emacs-timeline.html, |
1293 ``Emacs Timeline''}.) | 1297 ``Emacs Timeline''}.) |
1294 | 1298 |
1575 redisplay code, preliminary I18N support, code merged from GNU Emacs | 1579 redisplay code, preliminary I18N support, code merged from GNU Emacs |
1576 19.8 beta) | 1580 19.8 beta) |
1577 @item | 1581 @item |
1578 Version 19.9 released January 12, 1994. (Scrollbars, Athena.) | 1582 Version 19.9 released January 12, 1994. (Scrollbars, Athena.) |
1579 @item | 1583 @item |
1580 Version 19.10 released May 27, 1994. (Uses `configure'; code merged | 1584 Version 19.10 released May 27, 1994. (Uses @code{configure}; code merged |
1581 from GNU Emacs 19.23 beta and further merging with Epoch 4.0) Known as | 1585 from GNU Emacs 19.23 beta and further merging with Epoch 4.0) Known as |
1582 "Lucid Emacs" when shipped by Lucid, and as "XEmacs" when shipped by | 1586 ``Lucid Emacs'' when shipped by Lucid, and as ``XEmacs'' when shipped by |
1583 Sun; but Lucid went out of business a few days later and it's unclear | 1587 Sun; but Lucid went out of business a few days later and it's unclear |
1584 very many copies of 19.10 were released by Lucid. (Last release by | 1588 very many copies of 19.10 were released by Lucid. (Last release by |
1585 Jamie Zawinski.) | 1589 Jamie Zawinski.) |
1586 @end itemize | 1590 @end itemize |
1587 | 1591 |
1887 rewritten redisplay, TTY support, multi-device support, device and | 1891 rewritten redisplay, TTY support, multi-device support, device and |
1888 console objects, specifiers, glyphs, toolbars, horizontal scrollbars, | 1892 console objects, specifiers, glyphs, toolbars, horizontal scrollbars, |
1889 Lucid scrollbar widget, 3-d modeline, stay-up Lucid menus, resizable | 1893 Lucid scrollbar widget, 3-d modeline, stay-up Lucid menus, resizable |
1890 minibuffer, echo area is a true buffer, MD5 hashing support, expanded | 1894 minibuffer, echo area is a true buffer, MD5 hashing support, expanded |
1891 menubar, redone menu specification format (including menu filters), | 1895 menubar, redone menu specification format (including menu filters), |
1892 rewritten extents, renamed "screen" to "frame", misc-user events, | 1896 rewritten extents, renamed ``screen'' to ``frame'', misc-user events, |
1893 rewritten face code, rewritten mouse code, warnings system, CL | 1897 rewritten face code, rewritten mouse code, warnings system, CL |
1894 backquote syntax, critical C-g, code merging with GNU Emacs 19.28. | 1898 backquote syntax, critical C-g, code merging with GNU Emacs 19.28. |
1895 New packages Hyperbole, OOBR, hm--html-menus, viper, lazy-lock, | 1899 New packages Hyperbole, OOBR, hm--html-menus, viper, lazy-lock, |
1896 ksh-mode, rsz-minibuf.) | 1900 ksh-mode, rsz-minibuf.) |
1897 @item | 1901 @item |
1935 version 20.4 released February 28, 1998. | 1939 version 20.4 released February 28, 1998. |
1936 @item | 1940 @item |
1937 version 21.0.60 released December 10, 1998. (The version naming scheme was | 1941 version 21.0.60 released December 10, 1998. (The version naming scheme was |
1938 changed at this point: [a] the second version number is odd for stable | 1942 changed at this point: [a] the second version number is odd for stable |
1939 versions, even for beta versions; [b] a third version number is added, | 1943 versions, even for beta versions; [b] a third version number is added, |
1940 replacing the "beta xxx" ending for beta versions and allowing for | 1944 replacing the ``beta xxx'' ending for beta versions and allowing for |
1941 periodic maintenance releases for stable versions. Therefore, 21.0 was | 1945 periodic maintenance releases for stable versions. Therefore, 21.0 was |
1942 never "officially" released; similarly for 21.2, etc.) | 1946 never ``officially'' released; similarly for 21.2, etc.) |
1943 @item | 1947 @item |
1944 version 21.0.61 released January 4, 1999. | 1948 version 21.0.61 released January 4, 1999. |
1945 @item | 1949 @item |
1946 version 21.0.63 released February 3, 1999. | 1950 version 21.0.63 released February 3, 1999. |
1947 @item | 1951 @item |
1953 @item | 1957 @item |
1954 version 21.0.67 released March 25, 1999. | 1958 version 21.0.67 released March 25, 1999. |
1955 @item | 1959 @item |
1956 version 21.1.2 released May 14, 1999. (This is the followup to 21.0.67. | 1960 version 21.1.2 released May 14, 1999. (This is the followup to 21.0.67. |
1957 The second version number was bumped to indicate the beginning of the | 1961 The second version number was bumped to indicate the beginning of the |
1958 "stable" series.) | 1962 ``stable'' series.) |
1959 @item | 1963 @item |
1960 version 21.1.3 released June 26, 1999. | 1964 version 21.1.3 released June 26, 1999. |
1961 @item | 1965 @item |
1962 version 21.1.4 released July 8, 1999. | 1966 version 21.1.4 released July 8, 1999. |
1963 @item | 1967 @item |
2043 @item | 2047 @item |
2044 version 21.2.39 released December 31, 2000. | 2048 version 21.2.39 released December 31, 2000. |
2045 @item | 2049 @item |
2046 version 21.2.40 released January 8, 2001. | 2050 version 21.2.40 released January 8, 2001. |
2047 @item | 2051 @item |
2048 version 21.2.41 "Polyhymnia" released January 17, 2001. | 2052 version 21.2.41 ``Polyhymnia'' released January 17, 2001. |
2049 @item | 2053 @item |
2050 version 21.2.42 "Poseidon" released January 20, 2001. | 2054 version 21.2.42 ``Poseidon'' released January 20, 2001. |
2051 @item | 2055 @item |
2052 version 21.2.43 "Terspichore" released January 26, 2001. | 2056 version 21.2.43 ``Terspichore'' released January 26, 2001. |
2053 @item | 2057 @item |
2054 version 21.2.44 "Thalia" released February 8, 2001. | 2058 version 21.2.44 ``Thalia'' released February 8, 2001. |
2055 @item | 2059 @item |
2056 version 21.2.45 "Thelxepeia" released February 23, 2001. | 2060 version 21.2.45 ``Thelxepeia'' released February 23, 2001. |
2057 @item | 2061 @item |
2058 version 21.2.46 "Urania" released March 21, 2001. | 2062 version 21.2.46 ``Urania'' released March 21, 2001. |
2059 @item | 2063 @item |
2060 version 21.2.47 "Zephir" released April 14, 2001. | 2064 version 21.2.47 ``Zephir'' released April 14, 2001. |
2061 @item | 2065 @item |
2062 XEmacs 21.4.0 "Solid Vapor" released April 16, 2001. | 2066 XEmacs 21.4.0 ``Solid Vapor'' released April 16, 2001. |
2063 @item | 2067 @item |
2064 XEmacs 21.4.1 "Copyleft" released April 19, 2001. | 2068 XEmacs 21.4.1 ``Copyleft'' released April 19, 2001. |
2065 @item | 2069 @item |
2066 XEmacs 21.4.2 "Developer-Friendly Unix APIs" released May 10, 2001. | 2070 XEmacs 21.4.2 ``Developer-Friendly Unix APIs'' released May 10, 2001. |
2067 @item | 2071 @item |
2068 XEmacs 21.4.3 "Academic Rigor" released May 17, 2001. | 2072 XEmacs 21.4.3 ``Academic Rigor'' released May 17, 2001. |
2069 @item | 2073 @item |
2070 XEmacs 21.4.4 "Artificial Intelligence" released July 28, 2001. | 2074 XEmacs 21.4.4 ``Artificial Intelligence'' released July 28, 2001. |
2071 @item | 2075 @item |
2072 XEmacs 21.4.5 "Civil Service" released October 23, 2001. | 2076 XEmacs 21.4.5 ``Civil Service'' released October 23, 2001. |
2073 @item | 2077 @item |
2074 XEmacs 21.4.6 "Common Lisp" released December 17, 2001. | 2078 XEmacs 21.4.6 ``Common Lisp'' released December 17, 2001. |
2075 @item | 2079 @item |
2076 XEmacs 21.4.7 "Economic Science" released May 4, 2002. | 2080 XEmacs 21.4.7 ``Economic Science'' released May 4, 2002. |
2077 @item | 2081 @item |
2078 XEmacs 21.4.8 "Honest Recruiter" released May 9, 2002. | 2082 XEmacs 21.4.8 ``Honest Recruiter'' released May 9, 2002. |
2079 @item | 2083 @item |
2080 XEmacs 21.4.9 "Informed Management" released August 23, 2002. | 2084 XEmacs 21.4.9 ``Informed Management'' released August 23, 2002. |
2081 @item | 2085 @item |
2082 XEmacs 21.4.10 "Military Intelligence" released November 2, 2002. | 2086 XEmacs 21.4.10 ``Military Intelligence'' released November 2, 2002. |
2083 @item | 2087 @item |
2084 XEmacs 21.4.11 "Native Windows TTY Support" released January 3, 2003. | 2088 XEmacs 21.4.11 ``Native Windows TTY Support'' released January 3, 2003. |
2085 @item | 2089 @item |
2086 XEmacs 21.4.12 "Portable Code" released January 15, 2003. | 2090 XEmacs 21.4.12 ``Portable Code'' released January 15, 2003. |
2087 @item | 2091 @item |
2088 XEmacs 21.4.13 "Rational FORTRAN" released May 25, 2003. | 2092 XEmacs 21.4.13 ``Rational FORTRAN'' released May 25, 2003. |
2089 @item | 2093 @item |
2090 XEmacs 21.4.14 "Reasonable Discussion" released September 3, 2003. | 2094 XEmacs 21.4.14 ``Reasonable Discussion'' released September 3, 2003. |
2091 @item | 2095 @item |
2092 XEmacs 21.4.15 "Security Through Obscurity" released February 2, 2004. | 2096 XEmacs 21.4.15 ``Security Through Obscurity'' released February 2, 2004. |
2093 @item | 2097 @item |
2094 XEmacs 21.4.16 "Successful IPO" released December 5, 2004. | 2098 XEmacs 21.4.16 ``Successful IPO'' released December 5, 2004. |
2095 @item | 2099 @item |
2096 version 21.5.0 "alfalfa" released April 18, 2001. | 2100 version 21.5.0 ``alfalfa'' released April 18, 2001. |
2097 @item | 2101 @item |
2098 version 21.5.1 "anise" released May 9, 2001. | 2102 version 21.5.1 ``anise'' released May 9, 2001. |
2099 @item | 2103 @item |
2100 version 21.5.2 "artichoke" released July 28, 2001. | 2104 version 21.5.2 ``artichoke'' released July 28, 2001. |
2101 @item | 2105 @item |
2102 version 21.5.3 "asparagus" released September 7, 2001. | 2106 version 21.5.3 ``asparagus'' released September 7, 2001. |
2103 @item | 2107 @item |
2104 version 21.5.4 "bamboo" released January 8, 2002. | 2108 version 21.5.4 ``bamboo'' released January 8, 2002. |
2105 @item | 2109 @item |
2106 version 21.5.5 "beets" released March 5, 2002. | 2110 version 21.5.5 ``beets'' released March 5, 2002. |
2107 @item | 2111 @item |
2108 version 21.5.6 "bok choi" released April 5, 2002. | 2112 version 21.5.6 ``bok choi'' released April 5, 2002. |
2109 @item | 2113 @item |
2110 version 21.5.7 "broccoflower" released July 2, 2002. | 2114 version 21.5.7 ``broccoflower'' released July 2, 2002. |
2111 @item | 2115 @item |
2112 version 21.5.8 "broccoli" released July 27, 2002. | 2116 version 21.5.8 ``broccoli'' released July 27, 2002. |
2113 @item | 2117 @item |
2114 version 21.5.9 "brussels sprouts" released August 30, 2002. | 2118 version 21.5.9 ``brussels sprouts'' released August 30, 2002. |
2115 @item | 2119 @item |
2116 version 21.5.10 "burdock" released January 4, 2003. | 2120 version 21.5.10 ``burdock'' released January 4, 2003. |
2117 @item | 2121 @item |
2118 version 21.5.11 "cabbage" released February 16, 2003. | 2122 version 21.5.11 ``cabbage'' released February 16, 2003. |
2119 @item | 2123 @item |
2120 version 21.5.12 "carrot" released April 24, 2003. | 2124 version 21.5.12 ``carrot'' released April 24, 2003. |
2121 @item | 2125 @item |
2122 version 21.5.13 "cauliflower" released May 10, 2003. | 2126 version 21.5.13 ``cauliflower'' released May 10, 2003. |
2123 @item | 2127 @item |
2124 version 21.5.14 "cassava" released June 1, 2003. | 2128 version 21.5.14 ``cassava'' released June 1, 2003. |
2125 @item | 2129 @item |
2126 version 21.5.15 "celery" released September 3, 2003. | 2130 version 21.5.15 ``celery'' released September 3, 2003. |
2127 @item | 2131 @item |
2128 version 21.5.16 "celeriac" released September 26, 2003. | 2132 version 21.5.16 ``celeriac'' released September 26, 2003. |
2129 @item | 2133 @item |
2130 version 21.5.17 "chayote" released March 22, 2004. | 2134 version 21.5.17 ``chayote'' released March 22, 2004. |
2131 @item | 2135 @item |
2132 version 21.5.18 "chestnut" released October 22, 2004. | 2136 version 21.5.18 ``chestnut'' released October 22, 2004. |
2133 @end itemize | 2137 @end itemize |
2134 | 2138 |
2135 @node The XEmacs Split, XEmacs from the Outside, A History of Emacs, Top | 2139 @node The XEmacs Split, XEmacs from the Outside, A History of Emacs, Top |
2136 @chapter The XEmacs Split | 2140 @chapter The XEmacs Split |
2137 @cindex XEmacs split | 2141 @cindex XEmacs split |
2151 to cooperate a bit with RMS, and the two versions of Emacs will merge. In | 2155 to cooperate a bit with RMS, and the two versions of Emacs will merge. In |
2152 fact there have been six to seven major attempts at merging, each running | 2156 fact there have been six to seven major attempts at merging, each running |
2153 hundreds of messages long and all of them coming from the XEmacs side. All | 2157 hundreds of messages long and all of them coming from the XEmacs side. All |
2154 have failed because they have eventually come to the same conclusion, which | 2158 have failed because they have eventually come to the same conclusion, which |
2155 is that RMS has no real interest in cooperation at all. If you work with | 2159 is that RMS has no real interest in cooperation at all. If you work with |
2156 him, you have to do it his way -- "my way or the highway". Specifically: | 2160 him, you have to do it his way -- ``my way or the highway''. Specifically: |
2157 | 2161 |
2158 @enumerate | 2162 @enumerate |
2159 @item | 2163 @item |
2160 | 2164 |
2161 RMS insists on having legal papers signed for every bit of code that goes | 2165 RMS insists on having legal papers signed for every bit of code that goes |
4046 zero or more Kanji characters followed by zero or more | 4050 zero or more Kanji characters followed by zero or more |
4047 Hiragana characters. | 4051 Hiragana characters. |
4048 @end display | 4052 @end display |
4049 | 4053 |
4050 Then, the problem is that now we can't say that a sequence of | 4054 Then, the problem is that now we can't say that a sequence of |
4051 word-constituents makes up a word. For instance, both Hiragana "A" | 4055 word-constituents makes up a word. For instance, both Hiragana ``A'' |
4052 and Kanji "KAN" are word-constituents but the sequence of these two | 4056 and Kanji ``KAN'' are word-constituents but the sequence of these two |
4053 letters can't be a single word. | 4057 letters can't be a single word. |
4054 | 4058 |
4055 So, we introduced Sextword for Japanese letters. | 4059 So, we introduced Sextword for Japanese letters. |
4056 @end quotation | 4060 @end quotation |
4057 | 4061 |
5006 @item | 5010 @item |
5007 Any header-file declarations of the sort | 5011 Any header-file declarations of the sort |
5008 | 5012 |
5009 struct foobar; | 5013 struct foobar; |
5010 | 5014 |
5011 go into the "types" section of lisp.h. | 5015 go into the ``types'' section of @file{lisp.h}. |
5012 @end itemize | 5016 @end itemize |
5013 | 5017 |
5014 @node Writing New Modules, Working with Lisp Objects, Introduction to Writing C Code, Rules When Writing New C Code | 5018 @node Writing New Modules, Working with Lisp Objects, Introduction to Writing C Code, Rules When Writing New C Code |
5015 @section Writing New Modules | 5019 @section Writing New Modules |
5016 @cindex writing new modules | 5020 @cindex writing new modules |
5664 correct it or flag it as incorrect, as described in the previous | 5668 correct it or flag it as incorrect, as described in the previous |
5665 paragraph. Whenever you work on a section of code, @emph{always} make | 5669 paragraph. Whenever you work on a section of code, @emph{always} make |
5666 sure to update any comments to be correct -- or, at the very least, flag | 5670 sure to update any comments to be correct -- or, at the very least, flag |
5667 them as incorrect. | 5671 them as incorrect. |
5668 | 5672 |
5669 To indicate a "todo" or other problem, use four pound signs -- | 5673 To indicate a ``todo'' or other problem, use four pound signs -- |
5670 i.e. @samp{####}. | 5674 i.e. @samp{####}. |
5671 | 5675 |
5672 @node Adding Global Lisp Variables, Writing Macros, Writing Good Comments, Rules When Writing New C Code | 5676 @node Adding Global Lisp Variables, Writing Macros, Writing Good Comments, Rules When Writing New C Code |
5673 @section Adding Global Lisp Variables | 5677 @section Adding Global Lisp Variables |
5674 @cindex global Lisp variables, adding | 5678 @cindex global Lisp variables, adding |
5849 @enumerate | 5853 @enumerate |
5850 @item | 5854 @item |
5851 Anything that's an lvalue can be evaluated more than once. | 5855 Anything that's an lvalue can be evaluated more than once. |
5852 @item | 5856 @item |
5853 Macros where anything else can be evaluated more than once should | 5857 Macros where anything else can be evaluated more than once should |
5854 have the word "unsafe" in their name (exceptions may be made for | 5858 have the word ``unsafe'' in their name (exceptions may be made for |
5855 large sets of macros that evaluate arguments of certain types more | 5859 large sets of macros that evaluate arguments of certain types more |
5856 than once, e.g. struct buffer * arguments, when clearly indicated in | 5860 than once, e.g. struct buffer * arguments, when clearly indicated in |
5857 the macro documentation). These macros are generally meant to be | 5861 the macro documentation). These macros are generally meant to be |
5858 called only by other macros that have already stored the calling | 5862 called only by other macros that have already stored the calling |
5859 values in temporary variables. | 5863 values in temporary variables. |
5881 Capitalize macros doing stuff obviously impossible with (C) | 5885 Capitalize macros doing stuff obviously impossible with (C) |
5882 functions, e.g. directly modifying arguments as if they were passed by | 5886 functions, e.g. directly modifying arguments as if they were passed by |
5883 reference. | 5887 reference. |
5884 @item | 5888 @item |
5885 Capitalize macros that evaluate @strong{any} argument more than once regardless | 5889 Capitalize macros that evaluate @strong{any} argument more than once regardless |
5886 of whether that's "allowed" (e.g. buffer arguments). | 5890 of whether that's ``allowed'' (e.g. buffer arguments). |
5887 @item | 5891 @item |
5888 Capitalize macros that directly access a field in a Lisp_Object or | 5892 Capitalize macros that directly access a field in a Lisp_Object or |
5889 its equivalent underlying structure. In such cases, access through the | 5893 its equivalent underlying structure. In such cases, access through the |
5890 Lisp_Object precedes the macro with an X, and access through the underlying | 5894 Lisp_Object precedes the macro with an X, and access through the underlying |
5891 structure doesn't. | 5895 structure doesn't. |
5936 a search-and-replace is done to change type names and such. Some people | 5940 a search-and-replace is done to change type names and such. Some people |
5937 disagree with such changes, and certainly if done without good reason | 5941 disagree with such changes, and certainly if done without good reason |
5938 will just lead to headaches. But it's important to keep the code clean | 5942 will just lead to headaches. But it's important to keep the code clean |
5939 and understandable, and consistent naming goes a long way towards this. | 5943 and understandable, and consistent naming goes a long way towards this. |
5940 | 5944 |
5941 An example of the right way to do this was the so-called "great integral | 5945 An example of the right way to do this was the so-called ``great integral |
5942 type renaming". | 5946 type renaming''. |
5943 | 5947 |
5944 @menu | 5948 @menu |
5945 * Great Integral Type Renaming:: | 5949 * Great Integral Type Renaming:: |
5946 * Text/Char Type Renaming:: | 5950 * Text/Char Type Renaming:: |
5947 @end menu | 5951 @end menu |
5964 @item | 5968 @item |
5965 All integral types that measure quantities of anything are signed. Some | 5969 All integral types that measure quantities of anything are signed. Some |
5966 people disagree vociferously with this, but their arguments are mostly | 5970 people disagree vociferously with this, but their arguments are mostly |
5967 theoretical, and are vastly outweighed by the practical headaches of | 5971 theoretical, and are vastly outweighed by the practical headaches of |
5968 mixing signed and unsigned values, and more importantly by the far | 5972 mixing signed and unsigned values, and more importantly by the far |
5969 increased likelihood of inadvertent bugs: Because of the broken "viral" | 5973 increased likelihood of inadvertent bugs: Because of the broken ``viral'' |
5970 nature of unsigned quantities in C (operations involving mixed | 5974 nature of unsigned quantities in C (operations involving mixed |
5971 signed/unsigned are done unsigned, when exactly the opposite is nearly | 5975 signed/unsigned are done unsigned, when exactly the opposite is nearly |
5972 always wanted), even a single error in declaring a quantity unsigned | 5976 always wanted), even a single error in declaring a quantity unsigned |
5973 that should be signed, or even the even more subtle error of comparing | 5977 that should be signed, or even the even more subtle error of comparing |
5974 signed and unsigned values and forgetting the necessary cast, can be | 5978 signed and unsigned values and forgetting the necessary cast, can be |
5975 catastrophic, as comparisons will yield wrong results. -Wsign-compare | 5979 catastrophic, as comparisons will yield wrong results. @samp{-Wsign-compare} |
5976 is turned on specifically to catch this, but this tends to result in a | 5980 is turned on specifically to catch this, but this tends to result in a |
5977 great number of warnings when mixing signed and unsigned, and the casts | 5981 great number of warnings when mixing signed and unsigned, and the casts |
5978 are annoying. More has been written on this elsewhere. | 5982 are annoying. More has been written on this elsewhere. |
5979 | 5983 |
5980 @item | 5984 @item |
5989 Type names should be relatively short (no more than 10 characters or | 5993 Type names should be relatively short (no more than 10 characters or |
5990 so), with the first letter capitalized and no underscores if they can at | 5994 so), with the first letter capitalized and no underscores if they can at |
5991 all be avoided. | 5995 all be avoided. |
5992 | 5996 |
5993 @item | 5997 @item |
5994 "count" == a zero-based measurement of some quantity. Includes sizes, | 5998 ``count'' == a zero-based measurement of some quantity. Includes sizes, |
5995 offsets, and indexes. | 5999 offsets, and indexes. |
5996 | 6000 |
5997 @item | 6001 @item |
5998 "bpos" == a one-based measurement of a position in a buffer. "Charbpos" | 6002 ``bpos'' == a one-based measurement of a position in a buffer. ``Charbpos'' |
5999 and "Bytebpos" count text in the buffer, rather than bytes in memory; | 6003 and ``Bytebpos'' count text in the buffer, rather than bytes in memory; |
6000 thus Bytebpos does not directly correspond to the memory representation. | 6004 thus Bytebpos does not directly correspond to the memory representation. |
6001 Use "Membpos" for this. | 6005 Use ``Membpos'' for this. |
6002 | 6006 |
6003 @item | 6007 @item |
6004 "Char" refers to internal-format characters, not to the C type "char", | 6008 ``Char'' refers to internal-format characters, not to the C type ``char'', |
6005 which is really a byte. | 6009 which is really a byte. |
6006 @end itemize | 6010 @end itemize |
6007 | 6011 |
6008 For the actual name changes, see the script below. | 6012 For the actual name changes, see the script below. |
6009 | 6013 |
6094 #endif | 6098 #endif |
6095 | 6099 |
6096 /* The have been some arguments over the what the type should be that | 6100 /* The have been some arguments over the what the type should be that |
6097 specifies a count of bytes in a data block to be written out or read in, | 6101 specifies a count of bytes in a data block to be written out or read in, |
6098 using @code{Lstream_read()}, @code{Lstream_write()}, and related functions. | 6102 using @code{Lstream_read()}, @code{Lstream_write()}, and related functions. |
6099 Originally it was long, which worked fine; Martin "corrected" these to | 6103 Originally it was long, which worked fine; Martin ``corrected'' these to |
6100 size_t and ssize_t on the grounds that this is theoretically cleaner and | 6104 size_t and ssize_t on the grounds that this is theoretically cleaner and |
6101 is in keeping with the C standards. Unfortunately, this practice is | 6105 is in keeping with the C standards. Unfortunately, this practice is |
6102 horribly error-prone due to design flaws in the way that mixed | 6106 horribly error-prone due to design flaws in the way that mixed |
6103 signed/unsigned arithmetic happens. In fact, by doing this change, | 6107 signed/unsigned arithmetic happens. In fact, by doing this change, |
6104 Martin introduced a subtle but fatal error that caused the operation of | 6108 Martin introduced a subtle but fatal error that caused the operation of |
6469 fixed---use the @code{Known-Bug-Expect-Failure} wrapper macro to mark | 6473 fixed---use the @code{Known-Bug-Expect-Failure} wrapper macro to mark |
6470 them. | 6474 them. |
6471 | 6475 |
6472 @deffn Macro Known-Bug-Expect-Failure body | 6476 @deffn Macro Known-Bug-Expect-Failure body |
6473 Arrange for failing tests in @var{body} to generate messages prefixed | 6477 Arrange for failing tests in @var{body} to generate messages prefixed |
6474 with "KNOWN BUG:" instead of "FAIL:". @var{body} is a @code{progn}-like | 6478 with ``KNOWN BUG:'' instead of ``FAIL:''. @var{body} is a @code{progn}-like |
6475 body, and may contain several tests. | 6479 body, and may contain several tests. |
6476 @end deffn | 6480 @end deffn |
6477 | 6481 |
6478 A lot of the tests we run push limits; suppress Ebola warning messages | 6482 A lot of the tests we run push limits; suppress Ebola warning messages |
6479 with the @code{Ignore-Ebola} wrapper macro. | 6483 with the @code{Ignore-Ebola} wrapper macro. |
6650 with added or deleted files.} If you are lucky, the operation will | 6654 with added or deleted files.} If you are lucky, the operation will |
6651 simply fail. If you are less lucky, it will proceed, but make the | 6655 simply fail. If you are less lucky, it will proceed, but make the |
6652 adds and deletes on the main line, which you do not want at all. | 6656 adds and deletes on the main line, which you do not want at all. |
6653 Therefore, you must undo all adds and deletes. To find out what is | 6657 Therefore, you must undo all adds and deletes. To find out what is |
6654 added and deleted, use something like @code{cvs -n update >&! | 6658 added and deleted, use something like @code{cvs -n update >&! |
6655 cvs.out}, which does a "dry run". (You did make a backup copy first, | 6659 cvs.out}, which does a ``dry run''. (You did make a backup copy first, |
6656 right? What if you forgot the @samp{-n}, for example, and wasn't | 6660 right? What if you forgot the @samp{-n}, for example, and wasn't |
6657 prepared for the sudden onslaught of merging action?) Take a look at | 6661 prepared for the sudden onslaught of merging action?) Take a look at |
6658 the output file @file{cvs.out} and check very carefully for newly | 6662 the output file @file{cvs.out} and check very carefully for newly |
6659 added files (marked with an @samp{A}) and newly removed files (marked | 6663 added files (marked with an @samp{A}) and newly removed files (marked |
6660 with an @samp{R}). Double check that your newly added files are in | 6664 with an @samp{R}). Double check that your newly added files are in |
6682 crw tag -b ben-mule-21-5 | 6686 crw tag -b ben-mule-21-5 |
6683 @end example | 6687 @end example |
6684 | 6688 |
6685 Note that this doesn't actually do anything to your local workspace! | 6689 Note that this doesn't actually do anything to your local workspace! |
6686 It basically just creates another tag in the repository, identical to | 6690 It basically just creates another tag in the repository, identical to |
6687 the branch point tag but internally marked as a "branch tag" rather | 6691 the branch point tag but internally marked as a ``branch tag'' rather |
6688 than a regular tag. | 6692 than a regular tag. |
6689 | 6693 |
6690 @item | 6694 @item |
6691 Now, move your workspace onto the branch: | 6695 Now, move your workspace onto the branch: |
6692 | 6696 |
7016 and when you add a new element, the array automatically resizes itself | 7020 and when you add a new element, the array automatically resizes itself |
7017 if it isn't big enough. Dynarrs are extensively used in the redisplay | 7021 if it isn't big enough. Dynarrs are extensively used in the redisplay |
7018 mechanism. | 7022 mechanism. |
7019 | 7023 |
7020 | 7024 |
7021 A "dynamic array" is a contiguous array of fixed-size elements where there | 7025 A ``dynamic array'' is a contiguous array of fixed-size elements where there |
7022 is no upper limit (except available memory) on the number of elements in the | 7026 is no upper limit (except available memory) on the number of elements in the |
7023 array. Because the elements are maintained contiguously, space is used | 7027 array. Because the elements are maintained contiguously, space is used |
7024 efficiently (no per-element pointers necessary) and random access to a | 7028 efficiently (no per-element pointers necessary) and random access to a |
7025 particular element is in constant time. At any one point, the block of memory | 7029 particular element is in constant time. At any one point, the block of memory |
7026 that holds the array has an upper limit; if this limit is exceeded, the | 7030 that holds the array has an upper limit; if this limit is exceeded, the |
7027 memory is realloc()ed into a new array that is twice as big. Assuming that | 7031 memory is @code{realloc()}ed into a new array that is twice as big. Assuming that |
7028 the time to grow the array is on the order of the new size of the array | 7032 the time to grow the array is on the order of the new size of the array |
7029 block, this scheme has a provably constant amortized time (i.e. average | 7033 block, this scheme has a provably constant amortized time (i.e. average |
7030 time over all additions). | 7034 time over all additions). |
7031 | 7035 |
7032 When you add elements or retrieve elements, pointers are used. Note that | 7036 When you add elements or retrieve elements, pointers are used. Note that |
7130 onto a linked list, so they can be efficiently reused. This data type | 7134 onto a linked list, so they can be efficiently reused. This data type |
7131 is not much used in XEmacs currently, because it's a fairly new | 7135 is not much used in XEmacs currently, because it's a fairly new |
7132 addition. | 7136 addition. |
7133 | 7137 |
7134 | 7138 |
7135 A "block-type object" is used to efficiently allocate and free blocks | 7139 A ``block-type object'' is used to efficiently allocate and free blocks |
7136 of a particular size. Freed blocks are remembered in a free list and | 7140 of a particular size. Freed blocks are remembered in a free list and |
7137 are reused as necessary to allocate new blocks, so as to avoid as | 7141 are reused as necessary to allocate new blocks, so as to avoid as |
7138 much as possible making calls to malloc() and free(). | 7142 much as possible making calls to @code{malloc()} and @code{free()}. |
7139 | 7143 |
7140 This is a container object. Declare a block-type object of a specific type | 7144 This is a container object. Declare a block-type object of a specific type |
7141 as follows: | 7145 as follows: |
7142 | 7146 |
7143 struct mytype_blocktype @{ | 7147 struct mytype_blocktype @{ |
8275 @code{this_one_is_unmarkable} in @code{alloc.c}). | 8279 @code{this_one_is_unmarkable} in @code{alloc.c}). |
8276 | 8280 |
8277 Now, the actual marking is feasible. We do so by once using the macro | 8281 Now, the actual marking is feasible. We do so by once using the macro |
8278 @code{MARK_RECORD_HEADER} to mark the object itself (actually the | 8282 @code{MARK_RECORD_HEADER} to mark the object itself (actually the |
8279 special flag in the lrecord header), and calling its special marker | 8283 special flag in the lrecord header), and calling its special marker |
8280 "method" @code{marker} if available. The marker method marks every | 8284 ``method'' @code{marker} if available. The marker method marks every |
8281 other object that is in reach from our current object. Note, that these | 8285 other object that is in reach from our current object. Note, that these |
8282 marker methods should not call @code{mark_object} recursively, but | 8286 marker methods should not call @code{mark_object} recursively, but |
8283 instead should return the next object from where further marking has to | 8287 instead should return the next object from where further marking has to |
8284 be performed. | 8288 be performed. |
8285 | 8289 |
8330 @code{sweep_conses}, @code{sweep_bit_vectors_1}, | 8334 @code{sweep_conses}, @code{sweep_bit_vectors_1}, |
8331 @code{sweep_compiled_functions}, @code{sweep_floats}, | 8335 @code{sweep_compiled_functions}, @code{sweep_floats}, |
8332 @code{sweep_symbols}, @code{sweep_extents}, @code{sweep_markers} and | 8336 @code{sweep_symbols}, @code{sweep_extents}, @code{sweep_markers} and |
8333 @code{sweep_extents}. They are the fixed-size types cons, floats, | 8337 @code{sweep_extents}. They are the fixed-size types cons, floats, |
8334 compiled-functions, symbol, marker, extent, and event stored in | 8338 compiled-functions, symbol, marker, extent, and event stored in |
8335 so-called "frob blocks", and therefore we can basically do the same on | 8339 so-called ``frob blocks'', and therefore we can basically do the same on |
8336 every type objects, using the same macros, especially defined only to | 8340 every type objects, using the same macros, especially defined only to |
8337 handle everything with respect to fixed-size blocks. The only fixed-size | 8341 handle everything with respect to fixed-size blocks. The only fixed-size |
8338 type that is not handled here are the fixed-size portion of strings, | 8342 type that is not handled here are the fixed-size portion of strings, |
8339 because we took special care of them earlier. | 8343 because we took special care of them earlier. |
8340 | 8344 |
10055 complicated depending on how much information we cache. In addition to | 10059 complicated depending on how much information we cache. In addition to |
10056 the known region, we always cache the correct conversions for point, | 10060 the known region, we always cache the correct conversions for point, |
10057 BEGV, and ZV, and in addition to this we cache 16 positions where the | 10061 BEGV, and ZV, and in addition to this we cache 16 positions where the |
10058 conversion is known. We only look in the cache or update it when we | 10062 conversion is known. We only look in the cache or update it when we |
10059 need to move the known region more than a certain amount (currently 50 | 10063 need to move the known region more than a certain amount (currently 50 |
10060 chars), and then we throw away a "random" value and replace it with the | 10064 chars), and then we throw away a ``random'' value and replace it with the |
10061 newly calculated value. | 10065 newly calculated value. |
10062 | 10066 |
10063 Finally, we maintain an extra flag that tracks whether the buffer is | 10067 Finally, we maintain an extra flag that tracks whether the buffer is |
10064 entirely ASCII, to speed up the conversions even more. This flag is | 10068 entirely ASCII, to speed up the conversions even more. This flag is |
10065 actually of dubious value because in an entirely-ASCII buffer the known | 10069 actually of dubious value because in an entirely-ASCII buffer the known |
10091 track of a shifter value (0, 1, or 2) indicating how much to shift. | 10095 track of a shifter value (0, 1, or 2) indicating how much to shift. |
10092 Multiplying by 3 can be implemented by doubling and then adding the | 10096 Multiplying by 3 can be implemented by doubling and then adding the |
10093 original value. Dividing by 3, alas, cannot be implemented in any | 10097 original value. Dividing by 3, alas, cannot be implemented in any |
10094 simple shift/subtract method, as far as I know; so we just do a table | 10098 simple shift/subtract method, as far as I know; so we just do a table |
10095 lookup. For simplicity, we use a table of size 128K, which indexes the | 10099 lookup. For simplicity, we use a table of size 128K, which indexes the |
10096 "divide-by-3" values for the first 64K non-negative numbers. (Note that | 10100 ``divide-by-3'' values for the first 64K non-negative numbers. (Note that |
10097 we can increase the size up to 384K, i.e. indexing the first 192K | 10101 we can increase the size up to 384K, i.e. indexing the first 192K |
10098 non-negative numbers, while still using shorts in the array.) This also | 10102 non-negative numbers, while still using shorts in the array.) This also |
10099 means that the size of the known region can be at most 64K for | 10103 means that the size of the known region can be at most 64K for |
10100 width-three characters. | 10104 width-three characters. |
10101 @end quotation | 10105 @end quotation |
10121 @item | 10125 @item |
10122 the position of the gap | 10126 the position of the gap |
10123 @item | 10127 @item |
10124 the last value we computed | 10128 the last value we computed |
10125 @item | 10129 @item |
10126 a set of positions that are "far away" from previously computed positions | 10130 a set of positions that are ``far away'' from previously computed positions |
10127 (5000 chars currently; #### perhaps should be smaller) | 10131 (5000 chars currently; #### perhaps should be smaller) |
10128 @end itemize | 10132 @end itemize |
10129 | 10133 |
10130 For each position, we @code{CONSIDER()} it. This means: | 10134 For each position, we @code{CONSIDER()} it. This means: |
10131 | 10135 |
10147 the simple loop in FSF with the use of @code{bytecount_to_charcount()}, | 10151 the simple loop in FSF with the use of @code{bytecount_to_charcount()}, |
10148 @code{charcount_to_bytecount()}, @code{bytecount_to_charcount_down()}, or | 10152 @code{charcount_to_bytecount()}, @code{bytecount_to_charcount_down()}, or |
10149 @code{charcount_to_bytecount_down()}. (The latter two I added for this purpose.) | 10153 @code{charcount_to_bytecount_down()}. (The latter two I added for this purpose.) |
10150 These scan 4 or 8 bytes at a time through purely single-byte characters. | 10154 These scan 4 or 8 bytes at a time through purely single-byte characters. |
10151 | 10155 |
10152 If the amount we had to scan was more than our "far away" distance (5000 | 10156 If the amount we had to scan was more than our ``far away'' distance (5000 |
10153 characters, see above), then cache the new position. | 10157 characters, see above), then cache the new position. |
10154 | 10158 |
10155 #### Things to do: | 10159 #### Things to do: |
10156 | 10160 |
10157 @itemize @bullet | 10161 @itemize @bullet |
10158 @item | 10162 @item |
10159 Look at the most recent GNU Emacs to see whether anything has changed. | 10163 Look at the most recent GNU Emacs to see whether anything has changed. |
10160 @item | 10164 @item |
10161 Think about whether it makes sense to try to implement some sort of | 10165 Think about whether it makes sense to try to implement some sort of |
10162 known region or list of "known regions", like we had before. This would | 10166 known region or list of ``known regions'', like we had before. This would |
10163 be a region of entirely single-byte characters that we can check very | 10167 be a region of entirely single-byte characters that we can check very |
10164 quickly. (Previously I used a range of same-width characters of any | 10168 quickly. (Previously I used a range of same-width characters of any |
10165 size; but this adds extra complexity and slows down the scanning, and is | 10169 size; but this adds extra complexity and slows down the scanning, and is |
10166 probably not worth it.) As part of the scanning process in | 10170 probably not worth it.) As part of the scanning process in |
10167 @code{bytecount_to_charcount()} et al, we skip over chunks of entirely | 10171 @code{bytecount_to_charcount()} et al, we skip over chunks of entirely |
10375 In terms of reading the actual code, there are five optimizations | 10379 In terms of reading the actual code, there are five optimizations |
10376 (obfuscations, if you like) that have been done. | 10380 (obfuscations, if you like) that have been done. |
10377 | 10381 |
10378 @enumerate | 10382 @enumerate |
10379 @item | 10383 @item |
10380 An explicit "failure stack" has been substituted for recursion. | 10384 An explicit ``failure stack'' has been substituted for recursion. |
10381 | 10385 |
10382 @item | 10386 @item |
10383 The @code{match_1_operator}, @code{next_p}, and @code{next_b} functions | 10387 The @code{match_1_operator}, @code{next_p}, and @code{next_b} functions |
10384 are actually inlined into the @code{match} function for efficiency. | 10388 are actually inlined into the @code{match} function for efficiency. |
10385 Then the pointer movement is interspersed with the matching operations. | 10389 Then the pointer movement is interspersed with the matching operations. |
10388 If the operator uses buffer context, the buffer pointer movement is | 10392 If the operator uses buffer context, the buffer pointer movement is |
10389 sometimes implicit in the operations retrieving the context. | 10393 sometimes implicit in the operations retrieving the context. |
10390 | 10394 |
10391 @item | 10395 @item |
10392 Some cases are combined into short preparation for individual cases, and | 10396 Some cases are combined into short preparation for individual cases, and |
10393 a "fall-through" into combined code for several cases. | 10397 a ``fall-through'' into combined code for several cases. |
10394 | 10398 |
10395 @item | 10399 @item |
10396 The @code{pattern} type is not an explicit @samp{struct}. Instead, the | 10400 The @code{pattern} type is not an explicit @samp{struct}. Instead, the |
10397 data (including, @emph{e.g.}, @samp{range_table}) is inlined into the | 10401 data (including, @emph{e.g.}, @samp{range_table}) is inlined into the |
10398 compiled bytecode. This leads to bizarre code in the interpreter like | 10402 compiled bytecode. This leads to bizarre code in the interpreter like |
10407 @example | 10411 @example |
10408 ..., 'range', count, first_8_flags, second_8_flags, ..., next_op, ... | 10412 ..., 'range', count, first_8_flags, second_8_flags, ..., next_op, ... |
10409 @end example | 10413 @end example |
10410 @end enumerate | 10414 @end enumerate |
10411 | 10415 |
10412 But if you keep your eye on the "switch in a loop" structure, you | 10416 But if you keep your eye on the ``switch in a loop'' structure, you |
10413 should be able to understand the parts you need. | 10417 should be able to understand the parts you need. |
10414 | 10418 |
10415 @node Multilingual Support, Consoles; Devices; Frames; Windows, Text, Top | 10419 @node Multilingual Support, Consoles; Devices; Frames; Windows, Text, Top |
10416 @chapter Multilingual Support | 10420 @chapter Multilingual Support |
10417 @cindex Mule character sets and encodings | 10421 @cindex Mule character sets and encodings |
10869 a simple charset like ASCII, there is only one encoding normally used -- | 10873 a simple charset like ASCII, there is only one encoding normally used -- |
10870 each character is represented by a single byte, with the same value as | 10874 each character is represented by a single byte, with the same value as |
10871 its code point. For more complicated charsets, however, things are not | 10875 its code point. For more complicated charsets, however, things are not |
10872 so obvious. Unicode version 2, for example, is a large charset with | 10876 so obvious. Unicode version 2, for example, is a large charset with |
10873 thousands of characters, each indexed by a 16-bit number, often | 10877 thousands of characters, each indexed by a 16-bit number, often |
10874 represented in hex, e.g. 0x05D0 for the Hebrew letter "aleph". One | 10878 represented in hex, e.g. 0x05D0 for the Hebrew letter ``aleph''. One |
10875 obvious encoding uses two bytes per character (actually two encodings, | 10879 obvious encoding uses two bytes per character (actually two encodings, |
10876 depending on which of the two possible byte orderings is chosen). This | 10880 depending on which of the two possible byte orderings is chosen). This |
10877 encoding is convenient for internal processing of Unicode text; however, | 10881 encoding is convenient for internal processing of Unicode text; however, |
10878 it's incompatible with ASCII, so a different encoding, e.g. UTF-8, is | 10882 it's incompatible with ASCII, so a different encoding, e.g. UTF-8, is |
10879 usually used for external text, for example files or e-mail. UTF-8 | 10883 usually used for external text, for example files or e-mail. UTF-8 |
10890 | 10894 |
10891 In an ASCII or single-European-character-set world, life is very simple. | 10895 In an ASCII or single-European-character-set world, life is very simple. |
10892 There are 256 characters, and each character is represented using the | 10896 There are 256 characters, and each character is represented using the |
10893 numbers 0 through 255, which fit into a single byte. With a few | 10897 numbers 0 through 255, which fit into a single byte. With a few |
10894 exceptions (such as case-changing operations or syntax classes like | 10898 exceptions (such as case-changing operations or syntax classes like |
10895 'whitespace'), "text" is simply an array of indices into a font. You | 10899 @code{whitespace}), ``text'' is simply an array of indices into a font. You |
10896 can get different languages simply by choosing fonts with different | 10900 can get different languages simply by choosing fonts with different |
10897 8-bit character sets (ISO-8859-1, -2, special-symbol fonts, etc.), and | 10901 8-bit character sets (ISO-8859-1, -2, special-symbol fonts, etc.), and |
10898 everything will "just work" as long as anyone else receiving your text | 10902 everything will ``just work'' as long as anyone else receiving your text |
10899 uses a compatible font. | 10903 uses a compatible font. |
10900 | 10904 |
10901 In the multi-lingual world, however, it is much more complicated. There | 10905 In the multi-lingual world, however, it is much more complicated. There |
10902 are a great number of different characters which are organized in a | 10906 are a great number of different characters which are organized in a |
10903 complex fashion into various character sets. The representation to use | 10907 complex fashion into various character sets. The representation to use |
10943 text as possible. No operations should ever be performed on text encoded | 10947 text as possible. No operations should ever be performed on text encoded |
10944 in an external representation other than simple copying, because no | 10948 in an external representation other than simple copying, because no |
10945 assumptions can reliably be made about the format of this text. You | 10949 assumptions can reliably be made about the format of this text. You |
10946 cannot assume, for example, that the end of text is terminated by a null | 10950 cannot assume, for example, that the end of text is terminated by a null |
10947 byte. (For example, if the text is Unicode, it will have many null bytes | 10951 byte. (For example, if the text is Unicode, it will have many null bytes |
10948 in it.) You cannot find the next "slash" character by searching through | 10952 in it.) You cannot find the next ``slash'' character by searching through |
10949 the bytes until you find a byte that looks like a "slash" character, | 10953 the bytes until you find a byte that looks like a ``slash'' character, |
10950 because it might actually be the second byte of a Kanji character. | 10954 because it might actually be the second byte of a Kanji character. |
10951 Furthermore, all text in the internal representation must be converted, | 10955 Furthermore, all text in the internal representation must be converted, |
10952 even if it is known to be completely ASCII, because the external | 10956 even if it is known to be completely ASCII, because the external |
10953 representation may not be ASCII compatible (for example, if it is | 10957 representation may not be ASCII compatible (for example, if it is |
10954 Unicode). | 10958 Unicode). |
10974 the structures of a particular external encoding and the methods required | 10978 the structures of a particular external encoding and the methods required |
10975 to convert to and from this encoding. A facility exists to create coding | 10979 to convert to and from this encoding. A facility exists to create coding |
10976 system aliases, which in essence gives a single coding system two | 10980 system aliases, which in essence gives a single coding system two |
10977 different names. It is effectively used in XEmacs to provide a layer of | 10981 different names. It is effectively used in XEmacs to provide a layer of |
10978 abstraction on top of the actual coding systems. For example, the coding | 10982 abstraction on top of the actual coding systems. For example, the coding |
10979 system alias "file-name" points to whichever coding system is currently | 10983 system alias ``file-name'' points to whichever coding system is currently |
10980 used for encoding and decoding file names as passed to or retrieved from | 10984 used for encoding and decoding file names as passed to or retrieved from |
10981 system calls. In general, the actual encoding will differ from system to | 10985 system calls. In general, the actual encoding will differ from system to |
10982 system, and also on the particular locale that the user is in. The use | 10986 system, and also on the particular locale that the user is in. The use |
10983 of the file-name alias effectively hides that implementation detail on | 10987 of the file-name alias effectively hides that implementation detail on |
10984 top of that abstract interface layer which provides a unified set of | 10988 top of that abstract interface layer which provides a unified set of |
11485 C = plain char, when the base type is unsigned | 11489 C = plain char, when the base type is unsigned |
11486 U = unsigned | 11490 U = unsigned |
11487 S = signed | 11491 S = signed |
11488 @end example | 11492 @end example |
11489 | 11493 |
11490 (Formerly I had a comment saying that type (e) "should be replaced with | 11494 (Formerly I had a comment saying that type (e) ``should be replaced with |
11491 void *". However, there are in fact many places where an unsigned char | 11495 void *''. However, there are in fact many places where an unsigned char |
11492 * might be used -- e.g. for ease in pointer computation, since void * | 11496 * might be used -- e.g. for ease in pointer computation, since void * |
11493 doesn't allow this, and for compatibility with external APIs.) | 11497 doesn't allow this, and for compatibility with external APIs.) |
11494 | 11498 |
11495 Note that these typedefs are purely for documentation purposes; from | 11499 Note that these typedefs are purely for documentation purposes; from |
11496 the C code's perspective, they are exactly equivalent to @code{char *}, | 11500 the C code's perspective, they are exactly equivalent to @code{char *}, |
11507 @node Different Ways of Seeing Internal Text, Buffer Positions, Byte Types, Byte/Character Types; Buffer Positions; Other Typedefs | 11511 @node Different Ways of Seeing Internal Text, Buffer Positions, Byte Types, Byte/Character Types; Buffer Positions; Other Typedefs |
11508 @subsection Different Ways of Seeing Internal Text | 11512 @subsection Different Ways of Seeing Internal Text |
11509 @cindex different ways of seeing internal text | 11513 @cindex different ways of seeing internal text |
11510 | 11514 |
11511 There are various ways of representing internal text. The two primary | 11515 There are various ways of representing internal text. The two primary |
11512 ways are as an "array" of individual characters; the other is as a | 11516 ways are as an ``array'' of individual characters; the other is as a |
11513 "stream" of bytes. In the ASCII world, where there are only 255 | 11517 ``stream'' of bytes. In the ASCII world, where there are only 255 |
11514 characters at most, things are easy because each character fits into a | 11518 characters at most, things are easy because each character fits into a |
11515 byte. In general, however, this is not true -- see the above discussion | 11519 byte. In general, however, this is not true -- see the above discussion |
11516 of characters vs. encodings. | 11520 of characters vs. encodings. |
11517 | 11521 |
11518 In some cases, it's also important to distinguish between a stream | 11522 In some cases, it's also important to distinguish between a stream |
11519 representation as a series of bytes and as a series of textual units. | 11523 representation as a series of bytes and as a series of textual units. |
11520 This is particularly important wrt Unicode. The UTF-16 representation | 11524 This is particularly important wrt Unicode. The UTF-16 representation |
11521 (sometimes referred to, rather sloppily, as simply the "Unicode" format) | 11525 (sometimes referred to, rather sloppily, as simply the ``Unicode'' format) |
11522 represents text as a series of 16-bit units. Mostly, each unit | 11526 represents text as a series of 16-bit units. Mostly, each unit |
11523 corresponds to a single character, but not necessarily, as characters | 11527 corresponds to a single character, but not necessarily, as characters |
11524 outside of the range 0-65535 (the BMP or "Basic Multilingual Plane" of | 11528 outside of the range 0-65535 (the BMP or ``Basic Multilingual Plane'' of |
11525 Unicode) require two 16-bit units, through the mechanism of | 11529 Unicode) require two 16-bit units, through the mechanism of |
11526 "surrogates". When a series of 16-bit units is serialized into a byte | 11530 ``surrogates''. When a series of 16-bit units is serialized into a byte |
11527 stream, there are at least two possible representations, little-endian | 11531 stream, there are at least two possible representations, little-endian |
11528 and big-endian, and which one is used may depend on the native format of | 11532 and big-endian, and which one is used may depend on the native format of |
11529 16-bit integers in the CPU of the machine that XEmacs is running | 11533 16-bit integers in the CPU of the machine that XEmacs is running |
11530 on. (Similarly, UTF-32 is logically a representation with 32-bit textual | 11534 on. (Similarly, UTF-32 is logically a representation with 32-bit textual |
11531 units.) | 11535 units.) |
11538 @item | 11542 @item |
11539 UTF-16 has 2-byte (16-bit) units. | 11543 UTF-16 has 2-byte (16-bit) units. |
11540 @item | 11544 @item |
11541 UTF-32 has 4-byte (32-bit) units. | 11545 UTF-32 has 4-byte (32-bit) units. |
11542 @item | 11546 @item |
11543 XEmacs-internal encoding (the old "Mule" encoding) has 1-byte (8-bit) | 11547 XEmacs-internal encoding (the old ``Mule'' encoding) has 1-byte (8-bit) |
11544 units. | 11548 units. |
11545 @item | 11549 @item |
11546 UTF-7 technically has 7-bit units that are within the "mail-safe" range | 11550 UTF-7 technically has 7-bit units that are within the ``mail-safe'' range |
11547 (ASCII 32 - 126 plus a few control characters), but normally is encoded | 11551 (ASCII 32 - 126 plus a few control characters), but normally is encoded |
11548 in an 8-bit stream. (UTF-7 is also a modal encoding, since it has a | 11552 in an 8-bit stream. (UTF-7 is also a modal encoding, since it has a |
11549 normal mode where printable ASCII characters represent themselves and a | 11553 normal mode where printable ASCII characters represent themselves and a |
11550 shifted mode, introduced with a plus sign, where a base-64 encoding is | 11554 shifted mode, introduced with a plus sign, where a base-64 encoding is |
11551 used.) | 11555 used.) |
11606 @table @code | 11610 @table @code |
11607 @item Ibyte | 11611 @item Ibyte |
11608 The data in a buffer or string is logically made up of Ibyte objects, | 11612 The data in a buffer or string is logically made up of Ibyte objects, |
11609 where a Ibyte takes up the same amount of space as a char. (It is | 11613 where a Ibyte takes up the same amount of space as a char. (It is |
11610 declared differently, though, to catch invalid usages.) Strings stored | 11614 declared differently, though, to catch invalid usages.) Strings stored |
11611 using Ibytes are said to be in "internal format". The important | 11615 using Ibytes are said to be in ``internal format''. The important |
11612 characteristics of internal format are | 11616 characteristics of internal format are |
11613 | 11617 |
11614 @itemize @minus | 11618 @itemize @minus |
11615 @item | 11619 @item |
11616 ASCII characters are represented as a single Ibyte, in the range 0 - | 11620 ASCII characters are represented as a single Ibyte, in the range 0 - |
11659 | 11663 |
11660 This means that Ichar values are upwardly compatible with the standard | 11664 This means that Ichar values are upwardly compatible with the standard |
11661 8-bit representation of ASCII/ISO-8859-1. | 11665 8-bit representation of ASCII/ISO-8859-1. |
11662 | 11666 |
11663 @item Extbyte | 11667 @item Extbyte |
11664 Strings that go in or out of Emacs are in "external format", typedef'ed | 11668 Strings that go in or out of Emacs are in ``external format'', typedef'ed |
11665 as an array of char or a char *. There is more than one external format | 11669 as an array of char or a char *. There is more than one external format |
11666 (JIS, EUC, etc.) but they all have similar properties. They are modal | 11670 (JIS, EUC, etc.) but they all have similar properties. They are modal |
11667 encodings, which is to say that the meaning of particular bytes is not | 11671 encodings, which is to say that the meaning of particular bytes is not |
11668 fixed but depends on what "mode" the string is currently in (e.g. bytes | 11672 fixed but depends on what ``mode'' the string is currently in (e.g. bytes |
11669 in the range 0 - 0x7f might be interpreted as ASCII, or as Hiragana, or | 11673 in the range 0 - 0x7f might be interpreted as ASCII, or as Hiragana, or |
11670 as 2-byte Kanji, depending on the current mode). The mode starts out in | 11674 as 2-byte Kanji, depending on the current mode). The mode starts out in |
11671 ASCII/ISO-8859-1 and is switched using escape sequences -- for example, | 11675 ASCII/ISO-8859-1 and is switched using escape sequences -- for example, |
11672 in the JIS encoding, 'ESC $ B' switches to a mode where pairs of bytes | 11676 in the JIS encoding, 'ESC $ B' switches to a mode where pairs of bytes |
11673 in the range 0 - 0x7f are interpreted as Kanji characters. | 11677 in the range 0 - 0x7f are interpreted as Kanji characters. |
11693 | 11697 |
11694 There are three possible ways to specify positions in a buffer. All | 11698 There are three possible ways to specify positions in a buffer. All |
11695 of these are one-based: the beginning of the buffer is position or | 11699 of these are one-based: the beginning of the buffer is position or |
11696 index 1, and 0 is not a valid position. | 11700 index 1, and 0 is not a valid position. |
11697 | 11701 |
11698 As a "buffer position" (typedef Charbpos): | 11702 As a ``buffer position'' (typedef Charbpos): |
11699 | 11703 |
11700 This is an index specifying an offset in characters from the | 11704 This is an index specifying an offset in characters from the |
11701 beginning of the buffer. Note that buffer positions are | 11705 beginning of the buffer. Note that buffer positions are |
11702 logically @strong{between} characters, not on a character. The | 11706 logically @strong{between} characters, not on a character. The |
11703 difference between two buffer positions specifies the number of | 11707 difference between two buffer positions specifies the number of |
11704 characters between those positions. Buffer positions are the | 11708 characters between those positions. Buffer positions are the |
11705 only kind of position externally visible to the user. | 11709 only kind of position externally visible to the user. |
11706 | 11710 |
11707 As a "byte index" (typedef Bytebpos): | 11711 As a ``byte index'' (typedef Bytebpos): |
11708 | 11712 |
11709 This is an index over the bytes used to represent the characters | 11713 This is an index over the bytes used to represent the characters |
11710 in the buffer. If there is no Mule support, this is identical | 11714 in the buffer. If there is no Mule support, this is identical |
11711 to a buffer position, because each character is represented | 11715 to a buffer position, because each character is represented |
11712 using one byte. However, with Mule support, many characters | 11716 using one byte. However, with Mule support, many characters |
11713 require two or more bytes for their representation, and so a | 11717 require two or more bytes for their representation, and so a |
11714 byte index may be greater than the corresponding buffer | 11718 byte index may be greater than the corresponding buffer |
11715 position. | 11719 position. |
11716 | 11720 |
11717 As a "memory index" (typedef Membpos): | 11721 As a ``memory index'' (typedef Membpos): |
11718 | 11722 |
11719 This is the byte index adjusted for the gap. For positions | 11723 This is the byte index adjusted for the gap. For positions |
11720 before the gap, this is identical to the byte index. For | 11724 before the gap, this is identical to the byte index. For |
11721 positions after the gap, this is the byte index plus the gap | 11725 positions after the gap, this is the byte index plus the gap |
11722 size. There are two possible memory indices for the gap | 11726 size. There are two possible memory indices for the gap |
11723 position; the memory index at the beginning of the gap should | 11727 position; the memory index at the beginning of the gap should |
11724 always be used, except in code that deals with manipulating the | 11728 always be used, except in code that deals with manipulating the |
11725 gap, where both indices may be seen. The address of the | 11729 gap, where both indices may be seen. The address of the |
11726 character "at" (i.e. following) a particular position can be | 11730 character ``at'' (i.e. following) a particular position can be |
11727 obtained from the formula | 11731 obtained from the formula |
11728 | 11732 |
11729 buffer_start_address + memory_index(position) - 1 | 11733 buffer_start_address + memory_index(position) - 1 |
11730 | 11734 |
11731 except in the case of characters at the gap position. | 11735 except in the case of characters at the gap position. |
11830 use the buffer-level functions in buffer.h, which automatically know the | 11834 use the buffer-level functions in buffer.h, which automatically know the |
11831 correct format and handle the gap. | 11835 correct format and handle the gap. |
11832 | 11836 |
11833 Some terminology: | 11837 Some terminology: |
11834 | 11838 |
11835 "itext" appearing in the macros means "internal-format text" -- type | 11839 itext" appearing in the macros means "internal-format text" -- type |
11836 @code{Ibyte *}. Operations on such pointers themselves, rather than on the | 11840 @code{Ibyte *}. Operations on such pointers themselves, rather than on the |
11837 text being pointed to, have "itext" instead of "itext" in the macro | 11841 text being pointed to, have "itext" instead of "itext" in the macro |
11838 name. "ichar" in the macro names means an Ichar -- the representation | 11842 name. "ichar" in the macro names means an Ichar -- the representation |
11839 of a character as a single integer rather than a series of bytes, as part | 11843 of a character as a single integer rather than a series of bytes, as part |
11840 of "itext". Many of the macros below are for converting between the | 11844 of "itext". Many of the macros below are for converting between the |
12039 @item | 12043 @item |
12040 (c) using the GCC extension (@{ ... @}). | 12044 (c) using the GCC extension (@{ ... @}). |
12041 @end itemize | 12045 @end itemize |
12042 | 12046 |
12043 Turned out that all of the above had bugs, all caused by GCC (hence the | 12047 Turned out that all of the above had bugs, all caused by GCC (hence the |
12044 comments about "those GCC wankers" and "ream gcc up the ass"). As for | 12048 comments about ``those GCC wankers'' and ``ream gcc up the ass''). As for |
12045 (a), some versions of GCC (especially on Intel platforms), which had | 12049 (a), some versions of GCC (especially on Intel platforms), which had |
12046 buggy implementations of @code{alloca()} that couldn't handle being called | 12050 buggy implementations of @code{alloca()} that couldn't handle being called |
12047 inside of a function call -- they just decremented the stack right in the | 12051 inside of a function call -- they just decremented the stack right in the |
12048 middle of pushing args. Oops, crash with stack trashing, very bad. (b) | 12052 middle of pushing args. Oops, crash with stack trashing, very bad. (b) |
12049 was an attempt to fix (a), and that led to further GCC crashes, esp. when | 12053 was an attempt to fix (a), and that led to further GCC crashes, esp. when |
13022 consistency. For example, the new Mule workspace contains Ibyte | 13026 consistency. For example, the new Mule workspace contains Ibyte |
13023 versions of the stdlib string functions. | 13027 versions of the stdlib string functions. |
13024 @item Extbyte, UExtbyte | 13028 @item Extbyte, UExtbyte |
13025 Pointer to text in some external format, which can be defined as all | 13029 Pointer to text in some external format, which can be defined as all |
13026 formats other than the internal one. The data representing a string | 13030 formats other than the internal one. The data representing a string |
13027 in "external" format (binary or any external encoding) is logically a | 13031 in ``external'' format (binary or any external encoding) is logically a |
13028 set of Extbytes. Extbyte is guaranteed to be just a char, so for | 13032 set of Extbytes. Extbyte is guaranteed to be just a char, so for |
13029 example strlen (Extbyte *) is OK. Extbyte is only a documentation | 13033 example strlen (Extbyte *) is OK. Extbyte is only a documentation |
13030 device for referring to external text. | 13034 device for referring to external text. |
13031 @item Ascbyte, UAscbyte | 13035 @item Ascbyte, UAscbyte |
13032 pure ASCII text, consisting of bytesf in a string in entirely US-ASCII | 13036 pure ASCII text, consisting of bytesf in a string in entirely US-ASCII |
13166 | 13170 |
13167 @node Mule-izing Code, , An Example of Mule-Aware Code, Coding for Mule | 13171 @node Mule-izing Code, , An Example of Mule-Aware Code, Coding for Mule |
13168 @subsection Mule-izing Code | 13172 @subsection Mule-izing Code |
13169 | 13173 |
13170 A lot of code is written without Mule in mind, and needs to be made | 13174 A lot of code is written without Mule in mind, and needs to be made |
13171 Mule-correct or "Mule-ized". There is really no substitute for | 13175 Mule-correct or ``Mule-ized''. There is really no substitute for |
13172 line-by-line analysis when doing this, but the following checklist can | 13176 line-by-line analysis when doing this, but the following checklist can |
13173 help: | 13177 help: |
13174 | 13178 |
13175 @itemize @bullet | 13179 @itemize @bullet |
13176 @item | 13180 @item |
13384 @item | 13388 @item |
13385 Look in the CRT sources! They come with VC++. See win32.c. | 13389 Look in the CRT sources! They come with VC++. See win32.c. |
13386 @end enumerate | 13390 @end enumerate |
13387 | 13391 |
13388 @node Locales, More about code pages, Microsoft Documentation, Microsoft Windows-Related Multilingual Issues | 13392 @node Locales, More about code pages, Microsoft Documentation, Microsoft Windows-Related Multilingual Issues |
13389 @subsection Locales, code pages, and other concepts of "language" | 13393 @subsection Locales, code pages, and other concepts of ``language'' |
13390 @cindex locales, code pages, and other concepts of "language" | 13394 @cindex locales, code pages, and other concepts of ``language'' |
13391 | 13395 |
13392 First, make sure you clearly understand the difference between the C | 13396 First, make sure you clearly understand the difference between the C |
13393 runtime library (CRT) and the Win32 API! See win32.c. | 13397 runtime library (CRT) and the Win32 API! See win32.c. |
13394 | 13398 |
13395 There are various different ways of representing the vague concept | 13399 There are various different ways of representing the vague concept |
13396 of "language", and it can be very confusing. So: | 13400 of ``language'', and it can be very confusing. So: |
13397 | 13401 |
13398 @itemize @bullet | 13402 @itemize @bullet |
13399 @item | 13403 @item |
13400 The CRT library has the concept of "locale", which is a | 13404 The CRT library has the concept of ``locale'', which is a |
13401 combination of language and country, and which controls the way | 13405 combination of language and country, and which controls the way |
13402 currency and dates are displayed, the encoding of data, etc. | 13406 currency and dates are displayed, the encoding of data, etc. |
13403 | 13407 |
13404 @item | 13408 @item |
13405 XEmacs has the concept of "language environment", more or less | 13409 XEmacs has the concept of ``language environment'', more or less |
13406 like a locale; although currently in most cases it just refers to | 13410 like a locale; although currently in most cases it just refers to |
13407 the language, and no sub-language distinctions are | 13411 the language, and no sub-language distinctions are |
13408 made. (Exceptions are with Chinese, which has different language | 13412 made. (Exceptions are with Chinese, which has different language |
13409 environments for Taiwan and mainland China, due to the different | 13413 environments for Taiwan and mainland China, due to the different |
13410 encodings and writing systems.) | 13414 encodings and writing systems.) |
13412 @item | 13416 @item |
13413 Windows has a number of different language concepts: | 13417 Windows has a number of different language concepts: |
13414 | 13418 |
13415 @enumerate | 13419 @enumerate |
13416 @item | 13420 @item |
13417 There are "languages" and "sublanguages", which correspond to | 13421 There are ``languages'' and ``sublanguages'', which correspond to |
13418 the languages and countries of the C library -- e.g. LANG_ENGLISH | 13422 the languages and countries of the C library -- e.g. LANG_ENGLISH |
13419 and SUBLANG_ENGLISH_US. These are identified by 8-bit integers, | 13423 and SUBLANG_ENGLISH_US. These are identified by 8-bit integers, |
13420 called the "primary language identifier" and "sublanguage | 13424 called the ``primary language identifier'' and ``sublanguage |
13421 identifier", respectively. These are combined into a 16-bit | 13425 identifier'', respectively. These are combined into a 16-bit |
13422 integer or "language identifier" by MAKELANGID(). | 13426 integer or ``language identifier'' by @code{MAKELANGID()}. |
13423 | 13427 |
13424 @item | 13428 @item |
13425 The language identifier in turn is combined with a "sort | 13429 The language identifier in turn is combined with a ``sort |
13426 identifier" (and optionally a "sort version") to yield a 32-bit | 13430 identifier'' (and optionally a ``sort version'') to yield a 32-bit |
13427 integer called a "locale identifier" (type LCID), which identifies | 13431 integer called a ``locale identifier'' (type LCID), which identifies |
13428 locales -- the primary means of distinguishing language/regional | 13432 locales -- the primary means of distinguishing language/regional |
13429 settings and similar to C library locales. | 13433 settings and similar to C library locales. |
13430 | 13434 |
13431 @item | 13435 @item |
13432 A "code page" combines the XEmacs concepts of "charset" and "coding | 13436 A ``code page'' combines the XEmacs concepts of ``charset'' and ``coding |
13433 system". It logically encompasses | 13437 system''. It logically encompasses |
13434 | 13438 |
13435 @itemize @minus | 13439 @itemize @minus |
13436 @item | 13440 @item |
13437 a set of supported characters | 13441 a set of supported characters |
13438 @item | 13442 @item |
13441 supported | 13445 supported |
13442 @item | 13446 @item |
13443 a way of encoding a series of characters into a string of bytes | 13447 a way of encoding a series of characters into a string of bytes |
13444 @end itemize | 13448 @end itemize |
13445 | 13449 |
13446 Note that the first two properties correspond to an XEmacs "charset" | 13450 Note that the first two properties correspond to an XEmacs ``charset'' |
13447 and the latter an XEmacs "coding system". | 13451 and the latter an XEmacs ``coding system''. |
13448 | 13452 |
13449 Traditional encodings are either simple one-byte encodings, or | 13453 Traditional encodings are either simple one-byte encodings, or |
13450 combination one-byte/two-byte encodings (aka MBCS encodings, where MBCS | 13454 combination one-byte/two-byte encodings (aka MBCS encodings, where MBCS |
13451 stands for "Multibyte Character Set") with the following properties: | 13455 stands for ``Multibyte Character Set'') with the following properties: |
13452 | 13456 |
13453 @itemize @minus | 13457 @itemize @minus |
13454 @item | 13458 @item |
13455 all characters are encoded as a one-byte or two-byte sequence | 13459 all characters are encoded as a one-byte or two-byte sequence |
13456 @item | 13460 @item |
13457 the encoding is stateless (non-modal) | 13461 the encoding is stateless (non-modal) |
13458 @item | 13462 @item |
13459 the lower 128 bytes are compatible with ASCII | 13463 the lower 128 bytes are compatible with ASCII |
13460 @item | 13464 @item |
13461 in the higher bytes, the value of the first byte ("lead byte") | 13465 in the higher bytes, the value of the first byte (``lead byte'') |
13462 determines whether a second byte follows | 13466 determines whether a second byte follows |
13463 @item | 13467 @item |
13464 the values used for second bytes may overlap those used for first | 13468 the values used for second bytes may overlap those used for first |
13465 bytes, and (in some encodings) include values in the low half; thus, | 13469 bytes, and (in some encodings) include values in the low half; thus, |
13466 moving backwards is hard, and pure-ASCII algorithms (e.g. finding the | 13470 moving backwards is hard, and pure-ASCII algorithms (e.g. finding the |
13478 Every Windows locale has four associated code pages: ANSI (an | 13482 Every Windows locale has four associated code pages: ANSI (an |
13479 international standard or some Microsoft-created approximation; the | 13483 international standard or some Microsoft-created approximation; the |
13480 native code page under Windows), OEM (a DOS encoding, still used in the | 13484 native code page under Windows), OEM (a DOS encoding, still used in the |
13481 FAT file system), Mac (an encoding used on the Macintosh) and EBCDIC (a | 13485 FAT file system), Mac (an encoding used on the Macintosh) and EBCDIC (a |
13482 non-ASCII-compatible encoding used on IBM mainframes, originally based | 13486 non-ASCII-compatible encoding used on IBM mainframes, originally based |
13483 on the BCD or "binary-coded decimal" encoding of numbers). All code | 13487 on the BCD or ``binary-coded decimal'' encoding of numbers). All code |
13484 pages associated with a locale follow (as far as I know) the properties | 13488 pages associated with a locale follow (as far as I know) the properties |
13485 listed above for traditional code pages. More than one locale can share | 13489 listed above for traditional code pages. More than one locale can share |
13486 a code page -- e.g. all the Western European languages, including | 13490 a code page -- e.g. all the Western European languages, including |
13487 English, do. | 13491 English, do. |
13488 | 13492 |
13489 @item | 13493 @item |
13490 Windows also has an "input locale identifier" (aka "keyboard | 13494 Windows also has an ``input locale identifier'' (aka ``keyboard |
13491 layout id") or HKL, which is a 32-bit integer composed of the | 13495 layout id'') or HKL, which is a 32-bit integer composed of the |
13492 16-bit language identifier and a 16-bit "device identifier", which | 13496 16-bit language identifier and a 16-bit ``device identifier'', which |
13493 originally specified a particular keyboard layout (e.g. the locale | 13497 originally specified a particular keyboard layout (e.g. the locale |
13494 "US English" can have the QWERTY layout, the Dvorak layout, etc.), | 13498 ``US English'' can have the QWERTY layout, the Dvorak layout, etc.), |
13495 but has been expanded to include speech-to-text converters and | 13499 but has been expanded to include speech-to-text converters and |
13496 other non-keyboard ways of inputting text. Note that both the HKL | 13500 other non-keyboard ways of inputting text. Note that both the HKL |
13497 and LCID share the language identifier in the lower 16 bits, and in | 13501 and LCID share the language identifier in the lower 16 bits, and in |
13498 both cases a 0 in the upper 16 bits means "default" (sort order or | 13502 both cases a 0 in the upper 16 bits means ``default'' (sort order or |
13499 device), providing a way to convert between HKL's, LCID's, and | 13503 device), providing a way to convert between HKL's, LCID's, and |
13500 language identifiers (i.e. language/sublanguage pairs). The | 13504 language identifiers (i.e. language/sublanguage pairs). The |
13501 default keyboard layout for a language is (as far as I can | 13505 default keyboard layout for a language is (as far as I can |
13502 determine) established using the Regional Settings control panel | 13506 determine) established using the Regional Settings control panel |
13503 applet, where you can add input locales as combinations of language | 13507 applet, where you can add input locales as combinations of language |
13511 | 13515 |
13512 @node More about code pages, More about locales, Locales, Microsoft Windows-Related Multilingual Issues | 13516 @node More about code pages, More about locales, Locales, Microsoft Windows-Related Multilingual Issues |
13513 @subsection More about code pages | 13517 @subsection More about code pages |
13514 @cindex more about code pages | 13518 @cindex more about code pages |
13515 | 13519 |
13516 Here is what MSDN says about code pages (article "Code Pages"): | 13520 Here is what MSDN says about code pages (article ``Code Pages''): |
13517 | 13521 |
13518 @quotation | 13522 @quotation |
13519 A code page is a character set, which can include numbers, | 13523 A code page is a character set, which can include numbers, |
13520 punctuation marks, and other glyphs. Different languages and locales | 13524 punctuation marks, and other glyphs. Different languages and locales |
13521 may use different code pages. For example, ANSI code page 1252 is | 13525 may use different code pages. For example, ANSI code page 1252 is |
13553 | 13557 |
13554 -- The "C" locale is defined by ANSI to correspond to the locale in | 13558 -- The "C" locale is defined by ANSI to correspond to the locale in |
13555 which C programs have traditionally executed. The code page for the | 13559 which C programs have traditionally executed. The code page for the |
13556 "C" locale (code page) corresponds to the ASCII character | 13560 "C" locale (code page) corresponds to the ASCII character |
13557 set. For example, in the "C" locale, islower returns true for the | 13561 set. For example, in the "C" locale, islower returns true for the |
13558 values 0x61 ?0x7A only. In another locale, islower may return true | 13562 values 0x61 to 0x7A only. In another locale, islower may return true |
13559 for these as well as other values, as defined by that locale. | 13563 for these as well as other values, as defined by that locale. |
13560 | 13564 |
13561 Under "Locale-Dependent Routines" we notice the following setlocale | 13565 Under ``Locale-Dependent Routines'' we notice the following setlocale |
13562 dependencies: | 13566 dependencies: |
13563 | 13567 |
13564 atof, atoi, atol (LC_NUMERIC) | 13568 atof, atoi, atol (LC_NUMERIC) |
13565 is Routines (LC_CTYPE) | 13569 is Routines (LC_CTYPE) |
13566 isleadbyte (LC_CTYPE) | 13570 isleadbyte (LC_CTYPE) |
13589 wcstombs (LC_CTYPE) | 13593 wcstombs (LC_CTYPE) |
13590 wctomb (LC_CTYPE) | 13594 wctomb (LC_CTYPE) |
13591 _wtoi/_wtol (LC_NUMERIC) | 13595 _wtoi/_wtol (LC_NUMERIC) |
13592 @end quotation | 13596 @end quotation |
13593 | 13597 |
13594 NOTE: The above documentation doesn't clearly explain the "locale code | 13598 NOTE: The above documentation doesn't clearly explain the ``locale code |
13595 page" and "multibyte code page". These are two different values, | 13599 page'' and ``multibyte code page''. These are two different values, |
13596 maintained respectively in the CRT global variables __lc_codepage and | 13600 maintained respectively in the CRT global variables __lc_codepage and |
13597 __mbcodepage. Calling e.g. setlocale (LC_ALL, "JAPANESE") sets @strong{ONLY} | 13601 __mbcodepage. Calling e.g. setlocale (LC_ALL, "JAPANESE") sets @strong{ONLY} |
13598 __lc_codepage to 932 (the code page for Japanese), and leaves | 13602 __lc_codepage to 932 (the code page for Japanese), and leaves |
13599 __mbcodepage unchanged (usually 1252, i.e. Windows-ANSI). You'd have to | 13603 __mbcodepage unchanged (usually 1252, i.e. Windows-ANSI). You'd have to |
13600 call _setmbcp() to change __mbcodepage. Figuring out from the | 13604 call _setmbcp() to change __mbcodepage. Figuring out from the |
13601 documentation which routines use which code page is not so obvious. But: | 13605 documentation which routines use which code page is not so obvious. But: |
13602 | 13606 |
13603 @itemize @bullet | 13607 @itemize @bullet |
13604 @item | 13608 @item |
13605 from "Interpretation of Multibyte-Character Sequences" it appears that | 13609 from ``Interpretation of Multibyte-Character Sequences'' it appears that |
13606 all "multibyte-character routines" use the multibyte code page except for | 13610 all ``multibyte-character routines'' use the multibyte code page except for |
13607 mblen(), _mbstrlen(), mbstowcs(), mbtowc(), wcstombs(), and wctomb(). | 13611 @code{mblen()}, @code{_mbstrlen()}, @code{mbstowcs()}, @code{mbtowc()}, @code{wcstombs()}, and @code{wctomb()}. |
13608 | 13612 |
13609 @item | 13613 @item |
13610 from "_setmbcp": "The multibyte code page also affects | 13614 from ``_setmbcp'': ``The multibyte code page also affects |
13611 multibyte-character processing by the following run-time library | 13615 multibyte-character processing by the following run-time library |
13612 routines: _exec functions _mktemp _stat _fullpath _spawn functions | 13616 routines: _exec functions _mktemp _stat _fullpath _spawn functions |
13613 _tempnam _makepath _splitpath tmpnam. In addition, all run-time library | 13617 _tempnam _makepath _splitpath tmpnam. In addition, all run-time library |
13614 routines that receive multibyte-character argv or envp program arguments | 13618 routines that receive multibyte-character argv or envp program arguments |
13615 as parameters (such as the _exec and _spawn families) process these | 13619 as parameters (such as the _exec and _spawn families) process these |
13616 strings according to the multibyte code page. Hence these routines are | 13620 strings according to the multibyte code page. Hence these routines are |
13617 also affected by a call to _setmbcp that changes the multibyte code | 13621 also affected by a call to _setmbcp that changes the multibyte code |
13618 page." | 13622 page.'' |
13619 @end itemize | 13623 @end itemize |
13620 | 13624 |
13621 Summary: from looking at the CRT source (which comes with VC++) and | 13625 Summary: from looking at the CRT source (which comes with VC++) and |
13622 carefully looking through the docs, it appears that: | 13626 carefully looking through the docs, it appears that: |
13623 | 13627 |
13624 @itemize @bullet | 13628 @itemize @bullet |
13625 @item | 13629 @item |
13626 the "locale code page" is used by all of the routines listed above | 13630 the ``locale code page'' is used by all of the routines listed above |
13627 under "Locale-Dependent Routines" (EXCEPT _mbccpy() and _mbclen()), | 13631 under ``Locale-Dependent Routines'' (EXCEPT @code{_mbccpy()} and @code{_mbclen()}), |
13628 as well as any other place that converts between multibyte and Unicode | 13632 as well as any other place that converts between multibyte and Unicode |
13629 strings, e.g. the startup code. | 13633 strings, e.g. the startup code. |
13630 @item | 13634 @item |
13631 the "multibyte code page" is used in all of the *mb*() routines | 13635 the ``multibyte code page'' is used in all of the @code{mb*()} routines |
13632 except mblen(), _mbstrlen(), mbstowcs(), mbtowc(), wcstombs(), | 13636 except @code{mblen()}, @code{_mbstrlen()}, @code{mbstowcs()}, @code{mbtowc()}, @code{wcstombs()}, |
13633 and wctomb(); also _exec*(), _spawn*(), _mktemp(), _stat(), _fullpath(), | 13637 and @code{wctomb()}; also @code{_exec*()}, @code{_spawn*()}, @code{_mktemp()}, @code{_stat()}, @code{_fullpath()}, |
13634 _tempnam(), _makepath(), _splitpath(), tmpnam(), and similar functions | 13638 @code{_tempnam()}, @code{_makepath()}, @code{_splitpath()}, @code{tmpnam()}, and similar functions |
13635 without the leading underscore. | 13639 without the leading underscore. |
13636 @end itemize | 13640 @end itemize |
13637 | 13641 |
13638 @node More about locales, Unicode support under Windows, More about code pages, Microsoft Windows-Related Multilingual Issues | 13642 @node More about locales, Unicode support under Windows, More about code pages, Microsoft Windows-Related Multilingual Issues |
13639 @subsection More about locales | 13643 @subsection More about locales |
13642 In addition to the locale defined by the CRT, Windows (i.e. the Win32 API) | 13646 In addition to the locale defined by the CRT, Windows (i.e. the Win32 API) |
13643 defines various locales: | 13647 defines various locales: |
13644 | 13648 |
13645 @itemize @bullet | 13649 @itemize @bullet |
13646 @item | 13650 @item |
13647 The system-default locale is the locale defined under "Language | 13651 The system-default locale is the locale defined under ``Language |
13648 settings for the system" in the "Regional Options" control panel. This | 13652 settings for the system'' in the ``Regional Options'' control panel. This |
13649 is NOT user-specific, and changing it requires a reboot (at least under | 13653 is NOT user-specific, and changing it requires a reboot (at least under |
13650 Windows 2000). The ANSI code page of the system-default locale is | 13654 Windows 2000). The ANSI code page of the system-default locale is |
13651 returned by GetACP(), and you can specify this code page in calls | 13655 returned by @code{GetACP()}, and you can specify this code page in calls |
13652 e.g. to MultiByteToWideChar with the constant CP_ACP. | 13656 e.g. to MultiByteToWideChar with the constant CP_ACP. |
13653 | 13657 |
13654 @item | 13658 @item |
13655 The user-default locale is the locale defined under "Settings for the | 13659 The user-default locale is the locale defined under ``Settings for the |
13656 current user" in the "Regional Options" control panel. | 13660 current user'' in the ``Regional Options'' control panel. |
13657 | 13661 |
13658 @item | 13662 @item |
13659 There is a thread-local locale set by SetThreadLocale. #### What is this | 13663 There is a thread-local locale set by SetThreadLocale. #### What is this |
13660 used for? | 13664 used for? |
13661 @end itemize | 13665 @end itemize |
13662 | 13666 |
13663 The Win32 API has a bunch of multibyte functions -- all of those that | 13667 The Win32 API has a bunch of multibyte functions -- all of those that |
13664 end with ...A(), and on which we spend so much effort in | 13668 end with ...@code{A()}, and on which we spend so much effort in |
13665 intl-encap-win32.c. These appear to ALWAYS use the ANSI code page of | 13669 intl-encap-win32.c. These appear to ALWAYS use the ANSI code page of |
13666 the system-default locale (GetACP(), CP_ACP). Note that this applies | 13670 the system-default locale (@code{GetACP()}, CP_ACP). Note that this applies |
13667 also, for example, to the encoding of filenames in all file-handling | 13671 also, for example, to the encoding of filenames in all file-handling |
13668 routines, including the CRT ones such as open(), because they pass their | 13672 routines, including the CRT ones such as @code{open()}, because they pass their |
13669 args unchanged to the Win32 API. | 13673 args unchanged to the Win32 API. |
13670 | 13674 |
13671 @node Unicode support under Windows, The golden rules of writing Unicode-safe code, More about locales, Microsoft Windows-Related Multilingual Issues | 13675 @node Unicode support under Windows, The golden rules of writing Unicode-safe code, More about locales, Microsoft Windows-Related Multilingual Issues |
13672 @subsection Unicode support under Windows | 13676 @subsection Unicode support under Windows |
13673 @cindex unicode support under windows | 13677 @cindex unicode support under windows |
13681 table to convert the characters of that code page to and from Unicode, and | 13685 table to convert the characters of that code page to and from Unicode, and |
13682 the Win32 API itself probably (perhaps always) uses Unicode internally. | 13686 the Win32 API itself probably (perhaps always) uses Unicode internally. |
13683 | 13687 |
13684 Under Windows there are two different versions of all library routines that | 13688 Under Windows there are two different versions of all library routines that |
13685 accept or return text, those that handle Unicode text and those handling | 13689 accept or return text, those that handle Unicode text and those handling |
13686 "multibyte" text, i.e. variable-width ASCII-compatible text in some | 13690 ``multibyte'' text, i.e. variable-width ASCII-compatible text in some |
13687 national format such as EUC or Shift-JIS. Because Windows 95 basically | 13691 national format such as EUC or Shift-JIS. Because Windows 95 basically |
13688 doesn't support Unicode but Windows NT does, and Microsoft doesn't provide | 13692 doesn't support Unicode but Windows NT does, and Microsoft doesn't provide |
13689 any way of writing a single binary that will work on both systems and still | 13693 any way of writing a single binary that will work on both systems and still |
13690 use Unicode when it's available (although see below, Microsoft Layer for | 13694 use Unicode when it's available (although see below, Microsoft Layer for |
13691 Unicode), we need to provide a way of run-time conditionalizing so you | 13695 Unicode), we need to provide a way of run-time conditionalizing so you |
13692 could have one binary for both systems. "Unicode-splitting" refers to | 13696 could have one binary for both systems. ``Unicode-splitting'' refers to |
13693 writing code that will handle this properly. This means using | 13697 writing code that will handle this properly. This means using |
13694 Qmswindows_tstr as the external conversion format, calling the appropriate | 13698 Qmswindows_tstr as the external conversion format, calling the appropriate |
13695 qxe...() Unicode-split version of library functions, and doing other things | 13699 qxe...() Unicode-split version of library functions, and doing other things |
13696 in certain cases, e.g. when a qxe() function is not present. | 13700 in certain cases, e.g. when a @code{qxe()} function is not present. |
13697 | 13701 |
13698 Unicode support also requires that the various Windows APIs be | 13702 Unicode support also requires that the various Windows APIs be |
13699 "Unicode-encapsulated", so that they automatically call the ANSI or | 13703 ``Unicode-encapsulated'', so that they automatically call the ANSI or |
13700 Unicode version of the API call appropriately and handle the size | 13704 Unicode version of the API call appropriately and handle the size |
13701 differences in structures. What this means is: | 13705 differences in structures. What this means is: |
13702 | 13706 |
13703 @itemize @bullet | 13707 @itemize @bullet |
13704 @item | 13708 @item |
13705 first, note that Windows already provides a sort of encapsulation | 13709 first, note that Windows already provides a sort of encapsulation |
13706 of all APIs that deal with text. All such APIs are underlyingly | 13710 of all APIs that deal with text. All such APIs are underlyingly |
13707 provided in two versions, with an A or W suffix (ANSI or "wide" | 13711 provided in two versions, with an A or W suffix (ANSI or ``wide'' |
13708 i.e. Unicode), and the compile-time constant UNICODE controls which is | 13712 i.e. Unicode), and the compile-time constant UNICODE controls which is |
13709 selected by the unsuffixed API. Same thing happens with structures, and | 13713 selected by the unsuffixed API. Same thing happens with structures, and |
13710 also with types, where the generic types have names beginning with T -- | 13714 also with types, where the generic types have names beginning with T -- |
13711 TCHAR, LPTSTR, etc.. Unfortunately, this is compile-time only, not | 13715 TCHAR, LPTSTR, etc.. Unfortunately, this is compile-time only, not |
13712 run-time, so not sufficient. (Creating the necessary run-time encoding | 13716 run-time, so not sufficient. (Creating the necessary run-time encoding |
13721 such an API available internally.) | 13725 such an API available internally.) |
13722 | 13726 |
13723 @item | 13727 @item |
13724 what we do is provide an encapsulation of each standard Windows API call | 13728 what we do is provide an encapsulation of each standard Windows API call |
13725 that is split into A and W versions. current theory is to avoid all | 13729 that is split into A and W versions. current theory is to avoid all |
13726 preprocessor games; so we name the function with a prefix -- "qxe" | 13730 preprocessor games; so we name the function with a prefix -- ``qxe'' |
13727 currently -- and require callers to use the prefixed name. Callers need | 13731 currently -- and require callers to use the prefixed name. Callers need |
13728 to explicitly use the W version of all structures, and convert text | 13732 to explicitly use the W version of all structures, and convert text |
13729 themselves using Qmswindows_tstr. the qxe encapsulated version will | 13733 themselves using Qmswindows_tstr. the qxe encapsulated version will |
13730 automatically call the appropriate A or W version depending on whether | 13734 automatically call the appropriate A or W version depending on whether |
13731 we're running on 9x or NT (you can force use of the A calls on NT, | 13735 we're running on 9x or NT (you can force use of the A calls on NT, |
13781 purpose, to make the code easier to follow for someone who's not familiar | 13785 purpose, to make the code easier to follow for someone who's not familiar |
13782 with it. until our library is really complete and bug-free, we should | 13786 with it. until our library is really complete and bug-free, we should |
13783 think twice before doing this. | 13787 think twice before doing this. |
13784 | 13788 |
13785 According to Microsoft documentation, only the following functions are | 13789 According to Microsoft documentation, only the following functions are |
13786 provided under Windows 9x to support Unicode (see MSDN page "Windows | 13790 provided under Windows 9x to support Unicode (see MSDN page ``Windows |
13787 95/98/Me General Limitations"): | 13791 95/98/Me General Limitations''): |
13788 | 13792 |
13789 EnumResourceLanguagesW | 13793 EnumResourceLanguagesW |
13790 EnumResourceNamesW | 13794 EnumResourceNamesW |
13791 EnumResourceTypesW | 13795 EnumResourceTypesW |
13792 ExtTextOutW | 13796 ExtTextOutW |
13803 MessageBoxExW | 13807 MessageBoxExW |
13804 MultiByteToWideChar | 13808 MultiByteToWideChar |
13805 TextOutW | 13809 TextOutW |
13806 WideCharToMultiByte | 13810 WideCharToMultiByte |
13807 | 13811 |
13808 also maybe GetTextExtentExPoint? (KB Q125671 "Unicode Functions Supported | 13812 also maybe GetTextExtentExPoint? (KB Q125671 ``Unicode Functions Supported |
13809 by Windows 95") | 13813 by Windows 95'') |
13810 | 13814 |
13811 Q210341 says this in addition: | 13815 Q210341 says this in addition: |
13812 | 13816 |
13813 @quotation | 13817 @quotation |
13814 SUMMARY: | 13818 SUMMARY: |
13829 range beyond the 256 limitation of a one-byte representation. | 13833 range beyond the 256 limitation of a one-byte representation. |
13830 | 13834 |
13831 The Unicode standard offers application developers an opportunity to | 13835 The Unicode standard offers application developers an opportunity to |
13832 work with text without the limitations of character set based | 13836 work with text without the limitations of character set based |
13833 systems. For more information on the Unicode standard see the | 13837 systems. For more information on the Unicode standard see the |
13834 "References" section of this article. Windows NT is a fully Unicode | 13838 References" section of this article. Windows NT is a fully Unicode |
13835 capable operating system so it may be desirable to write software that | 13839 capable operating system so it may be desirable to write software that |
13836 supports Unicode on Windows 95. | 13840 supports Unicode on Windows 95. |
13837 | 13841 |
13838 Even though Windows 95 and Windows 98 are not Unicode based, they do | 13842 Even though Windows 95 and Windows 98 are not Unicode based, they do |
13839 provide some limited Unicode functionality. Drawing of Unicode text is | 13843 provide some limited Unicode functionality. Drawing of Unicode text is |
13912 @itemize @bullet | 13916 @itemize @bullet |
13913 @item | 13917 @item |
13914 wmain() is completely supported, and appropriate Unicode-formatted argv | 13918 wmain() is completely supported, and appropriate Unicode-formatted argv |
13915 and envp will always be passed. | 13919 and envp will always be passed. |
13916 @item | 13920 @item |
13917 Likewise, wWinMain() is completely supported. (NOTE: The docs are not at | 13921 Likewise, @code{wWinMain()} is completely supported. (NOTE: The docs are not at |
13918 all clear on how these various entry points interact, and implies that | 13922 all clear on how these various entry points interact, and implies that |
13919 a windows-subsystem program "must" use WinMain(), while a console- | 13923 a windows-subsystem program ``must'' use @code{WinMain()}, while a console- |
13920 subsystem program "must" use main(), and a program compiled with UNICODE | 13924 subsystem program ``must'' use @code{main()}, and a program compiled with UNICODE |
13921 (which we don't, see above) "must" use the w*() versions, while a program | 13925 (which we don't, see above) ``must'' use the @code{w*()} versions, while a program |
13922 not compiled this way "must" use the plain versions. In fact it appears | 13926 not compiled this way ``must'' use the plain versions. In fact it appears |
13923 that the CRT provides four different compiler entry points, namely | 13927 that the CRT provides four different compiler entry points, namely |
13924 w?(main|WinMain)CRTStartup, and we simply choose the one we like using | 13928 w?(main|WinMain)CRTStartup, and we simply choose the one we like using |
13925 the appropriate link flag. | 13929 the appropriate link flag. |
13926 @item | 13930 @item |
13927 _wenviron, _wputenv | 13931 _wenviron, _wputenv |
17939 | +--------------------------------------------------------------------+ | | 17943 | +--------------------------------------------------------------------+ | |
17940 | | menubar | | | 17944 | | menubar | | |
17941 | ###################################################################### | | 17945 | ###################################################################### | |
17942 | # toolbar # | | 17946 | # toolbar # | |
17943 | #--------------------------------------------------------------------# | | 17947 | #--------------------------------------------------------------------# | |
17944 | # | gutter | # | | 17948 | # | internal border | # | |
17945 | # |--------------------------------------------------------------| # | | 17949 | # | +----------------------------------------------------------+ | # | |
17946 | # | | internal border width | | # | | 17950 | # | | gutter | | # | |
17947 | # | | ******************************************************** | | # | | 17951 | # | |-********************************************************-| | # | |
17948 |w# | | * |s|v* |s* | | #w| | 17952 |w# | | *@| scrollbar |v* |s* | | #w| |
17949 |i# | | * |c|e* |c* | | #i| | 17953 |i# | | *-+-------------------------|e* |c* | | #i| |
17950 |n# | | * |r|r* |r* | | #n| | 17954 |n# | | *s| |r* |r* | | #n| |
17951 |d# | | * |o|t* |o* | | #d| | 17955 |d# | | *c| |t* |o* | | #d| |
17952 |o# | | * text area |l|.* text area |l* | | #o| | 17956 |o# | | *r| |.* text area |l* | | #o| |
17953 |w# | |i* |l| * |l*i| | #w| | 17957 |w# |i| *o| | * |l* |i| #w| |
17954 |-# | |n* |b|d* |b*n| | #-| | 17958 |-# |n| *l| text area |d* |b* |n| #-| |
17955 |m# | |t* |a|i* |a*t| | #m| | 17959 |m# |t| *l| |i* |a* |t| #m| |
17956 |a# | |.* |r|v* |r*.| | #a| | 17960 |a# |e| *b| |v* |r* |e| #a| |
17957 |n# t| | *-------------------------+-|i*----------------------+-* | |t #n| | 17961 |n# t|r| *a| |i*----------------------+-* |r|t #n| |
17958 |a# o|g|b* scrollbar | |d* scrollbar | *b|g|o #a| | 17962 |a# o|n|g*r| |d* scrollbar |@*g|n|o #a| |
17959 |g# o|u|o*-------------------------+-|e*----------------------+-*o|u|o #g| | 17963 |g# o|a|u*-+-------------------------|e*----------------------+-*u|a|o #g| |
17960 |e# l|t|r* modeline |r* modeline *r|t|l #e| | 17964 |e# l|l|t* modeline |r* modeline *t|l|l #e| |
17961 |r# b|t|d********************************************************d|t|b #r| | 17965 |r# b| |t********************************************************t| |b #r| |
17962 | # a|e|e* =..texttexttex....= |s|v* |s*e|e|a # | | 17966 | # a|b|e* =..texttexttex....= |s|v* |s*e|b|a # | |
17963 |d# r|r|r*o m=..texttexttextt..=o m|c|e* |c*r|r|r #d| | 17967 |d# r|o|r*o m=..texttexttextt..=o m|c|e* |c*r|o|r #d| |
17964 |e# | | *u a=.exttexttextte...=u a|r|r* |r* | | #e| | 17968 |e# |r| *u a=.exttexttextte...=u a|r|r* |r* |r| #e| |
17965 |c# | |w*t r=....texttexttex..=t r|o|t* |o*w| | #c| | 17969 |c# |d| *t r=....texttexttex..=t r|o|t* |o* |d| #c| |
17966 |o# | |i*s g= etc. =s g|l|.* text area |l*i| | #o| | 17970 |o# |e| *s g= etc. =s g|l|.* text area |l* |e| #o| |
17967 |r# | |d*i i= =i i|l| * |l*d| | #r| | 17971 |r# |r| *i i= =i i|l| * |l* |r| #r| |
17968 |a# | |t*d n= =d n|b|d* |b*t| | #a| | 17972 |a# | | *d n= =d n|b|d* |b* | | #a| |
17969 |t# | |h*e = inner text area =e |a|i* |a*h| | #t| | 17973 |t# | | *e = inner text area =e |a|i* |a* | | #t| |
17970 |i# | | * = = |r|v* |r* | | #i| | 17974 |i# | | * = = |r|v* |r* | | #i| |
17971 |o# | | *---===================---+-|i*----------------------+-* | | #o| | 17975 |o# | | *---===================---+-|i*----------------------+-* | | #o| |
17972 |n# | | * scrollbar | |d* scrollbar | * | | #n| | 17976 |n# | | * scrollbar |@|d* scrollbar |@* | | #n| |
17973 | # | | *-------------------------+-|e*----------------------+-* | | # | | 17977 | # | | *-------------------------+-|e*----------------------+-* | | # | |
17974 | # | | * modeline |r* modeline * | | # | | 17978 | # | | * modeline |r* modeline * | | # | |
17975 | # | | ******************************************************** | | # | | 17979 | # | |-********************************************************-| | # | |
17976 | # | | * minibuffer * | | # | | 17980 | # | | gutter | | # | |
17977 | # | | ******************************************************** | | # | | 17981 | # | |-********************************************************-| | # | |
17978 | # | | internal border width | | # | | 17982 | # | |@* minibuffer *@| | # | |
17979 | # |--------------------------------------------------------------| # | | 17983 | # | +-********************************************************-+ | # | |
17980 | # | gutter | # | | 17984 | # | internal border | # | |
17981 | #--------------------------------------------------------------------# | | 17985 | #--------------------------------------------------------------------# | |
17982 | # toolbar # | | 17986 | # toolbar # | |
17983 | ###################################################################### | | 17987 | ###################################################################### | |
17984 | window manager decoration | | 17988 | window manager decoration | |
17985 +------------------------------------------------------------------------+ | 17989 +------------------------------------------------------------------------+ |
17986 | 17990 |
17987 # = boundary of client area; * = window boundaries, boundary of paned area | 17991 # = boundary of client area; * = window boundaries, boundary of paned area |
17988 = = boundary of inner text area; . = inside margin area | 17992 = = boundary of inner text area; . = inside margin area; @ = dead boxes |
17989 @end example | 17993 @end example |
17990 | 17994 |
17991 Note in particular what happens at the corners, where a "corner box" | 17995 Note in particular what happens at the corners, where a ``corner box'' |
17992 occurs. Top and bottom toolbars take precedence over left and right | 17996 occurs. Top and bottom toolbars take precedence over left and right |
17993 toolbars, extending out horizontally into the corner boxes. Gutters | 17997 toolbars, extending out horizontally into the corner boxes. Gutters |
17994 work the same way. The corner box where the scrollbars meet, however, | 17998 work the same way. The corner box where the scrollbars meet, however, |
17995 is assigned to neither scrollbar, and is known as the "dead box"; it is | 17999 is assigned to neither scrollbar, and is known as the ``dead box''; it is |
17996 an area that must be cleared specially. | 18000 an area that must be cleared specially. There are similar dead boxes at |
18001 the bottom-right and bottom-left corners where the minibuffer and | |
18002 left/right gutters meet, but there is currently a bug in that these dead | |
18003 boxes are not explicitly cleared and may contain junk. | |
17997 | 18004 |
17998 @node The Frame, The Non-Client Area, Intro to Window and Frame Geometry, Window and Frame Geometry | 18005 @node The Frame, The Non-Client Area, Intro to Window and Frame Geometry, Window and Frame Geometry |
17999 @section The Frame | 18006 @section The Frame |
18000 | 18007 |
18001 The "top-level window area" is the entire area of a top-level window (or | 18008 The ``top-level window area'' is the entire area of a top-level window (or |
18002 "frame"). The "client area" (a term from MS Windows) is the area of a | 18009 ``frame''). The ``client area'' (a term from MS Windows) is the area of a |
18003 top-level window that XEmacs draws into and manages with redisplay. | 18010 top-level window that XEmacs draws into and manages with redisplay. |
18004 This includes the toolbar, scrollbars, gutters, dividers, text area, | 18011 This includes the toolbar, scrollbars, gutters, dividers, text area, |
18005 modeline and minibuffer. It does not include the menubar, title or | 18012 modeline and minibuffer. It does not include the menubar, title or |
18006 outer borders. The "non-client area" is the area of a top-level window | 18013 outer borders. The ``non-client area'' is the area of a top-level window |
18007 outside of the client area and includes the menubar, title and outer | 18014 outside of the client area and includes the menubar, title and outer |
18008 borders. Internally, all frame coordinates are relative to the client | 18015 borders. Internally, all frame coordinates are relative to the client |
18009 area. | 18016 area. |
18010 | 18017 |
18011 | 18018 |
18018 @item | 18025 @item |
18019 The outer layer is the window-manager decorations: The title and | 18026 The outer layer is the window-manager decorations: The title and |
18020 borders. These are controlled by the window manager, a separate process | 18027 borders. These are controlled by the window manager, a separate process |
18021 that controls the desktop, the location of icons, etc. When a process | 18028 that controls the desktop, the location of icons, etc. When a process |
18022 tries to create a window, the window manager intercepts this action and | 18029 tries to create a window, the window manager intercepts this action and |
18023 "reparents" the window, placing another window around it which contains | 18030 ``reparents'' the window, placing another window around it which contains |
18024 the window decorations, including the title bar, outer borders used for | 18031 the window decorations, including the title bar, outer borders used for |
18025 resizing, etc. The window manager also implements any actions involving | 18032 resizing, etc. The window manager also implements any actions involving |
18026 the decorations, such as the ability to resize a window by dragging its | 18033 the decorations, such as the ability to resize a window by dragging its |
18027 borders, move a window by dragging its title bar, etc. If there is no | 18034 borders, move a window by dragging its title bar, etc. If there is no |
18028 window manager or you kill it, windows will have no decorations (and | 18035 window manager or you kill it, windows will have no decorations (and |
18029 will lose them if they previously had any) and you will not be able to | 18036 will lose them if they previously had any) and you will not be able to |
18030 move or resize them. | 18037 move or resize them. |
18031 | 18038 |
18032 @item | 18039 @item |
18033 Inside of the window-manager decorations is the "shell", which is | 18040 Inside of the window-manager decorations is the ``shell'', which is |
18034 managed by the toolkit and widget libraries your program is linked with. | 18041 managed by the toolkit and widget libraries your program is linked with. |
18035 The code in @file{*-x.c} uses the Xt toolkit and various possible widget | 18042 The code in @file{*-x.c} uses the Xt toolkit and various possible widget |
18036 libraries built on top of Xt, such as Motif, Athena, the "Lucid" | 18043 libraries built on top of Xt, such as Motif, Athena, the ``Lucid'' |
18037 widgets, etc. Another possibility is GTK (@file{*-gtk.c}), which implements | 18044 widgets, etc. Another possibility is GTK (@file{*-gtk.c}), which implements |
18038 both the toolkit and widgets. Under Xt, the "shell" window is an | 18045 both the toolkit and widgets. Under Xt, the ``shell'' window is an |
18039 EmacsShell widget, containing an EmacsManager widget of the same size, | 18046 EmacsShell widget, containing an EmacsManager widget of the same size, |
18040 which in turn contains a menubar widget and an EmacsFrame widget, inside | 18047 which in turn contains a menubar widget and an EmacsFrame widget, inside |
18041 of which is the client area. (The division into EmacsShell and | 18048 of which is the client area. (The division into EmacsShell and |
18042 EmacsManager is due to the complex and screwy geometry-management system | 18049 EmacsManager is due to the complex and screwy geometry-management system |
18043 in Xt [and X more generally]. The EmacsShell handles negotation with | 18050 in Xt [and X more generally]. The EmacsShell handles negotation with |
18049 | 18056 |
18050 Under Windows, the non-client area is managed by the window system. | 18057 Under Windows, the non-client area is managed by the window system. |
18051 There is no division such as under X. Part of the window-system API | 18058 There is no division such as under X. Part of the window-system API |
18052 (@file{USER.DLL}) of Win32 includes functions to control the menubars, title, | 18059 (@file{USER.DLL}) of Win32 includes functions to control the menubars, title, |
18053 etc. and implements the move and resize behavior. There @strong{is} an | 18060 etc. and implements the move and resize behavior. There @strong{is} an |
18054 equivalent of the window manager, called the "shell", but it manages | 18061 equivalent of the window manager, called the ``shell'', but it manages |
18055 only the desktop, not the windows themselves. The normal shell under | 18062 only the desktop, not the windows themselves. The normal shell under |
18056 Windows is @file{EXPLORER.EXE}; if you kill this, you will lose the bar | 18063 Windows is @file{EXPLORER.EXE}; if you kill this, you will lose the bar |
18057 containing the "Start" menu and tray and such, but the windows | 18064 containing the ``Start'' menu and tray and such, but the windows |
18058 themselves will not be affected or lose their decorations. | 18065 themselves will not be affected or lose their decorations. |
18059 | 18066 |
18060 | 18067 |
18061 @node The Client Area, The Paned Area, The Non-Client Area, Window and Frame Geometry | 18068 @node The Client Area, The Paned Area, The Non-Client Area, Window and Frame Geometry |
18062 @section The Client Area | 18069 @section The Client Area |
18063 | 18070 |
18064 Inside of the client area is the toolbars, the gutters (where the buffer | 18071 Inside of the client area is the toolbars, the gutters (where the buffer |
18065 tabs are displayed), the minibuffer, the internal border width, and one | 18072 tabs are displayed), the minibuffer, the internal border width, and one |
18066 or more non-overlapping "windows" (this is old Emacs terminology, from | 18073 or more non-overlapping ``windows'' (this is old Emacs terminology, from |
18067 before the time when frames existed at all; the standard terminology for | 18074 before the time when frames existed at all; the standard terminology for |
18068 this would be "pane"). Each window can contain a modeline, horizontal | 18075 this would be ``pane''). Each window can contain a modeline, horizontal |
18069 and/or vertical scrollbars, and (for non-rightmost windows) a vertical | 18076 and/or vertical scrollbars, and (for non-rightmost windows) a vertical |
18070 divider, surrounding a text area. | 18077 divider, surrounding a text area. |
18071 | 18078 |
18072 The dimensions of the toolbars and gutters are determined by the formula | 18079 The dimensions of the toolbars and gutters are determined by the formula |
18073 (THICKNESS + 2 * BORDER-THICKNESS), where "thickness" is a cover term | 18080 (THICKNESS + 2 * BORDER-THICKNESS), where ``thickness'' is a cover term |
18074 for height or width, as appropriate. The height and width come from | 18081 for height or width, as appropriate. The height and width come from |
18075 @code{default-toolbar-height} and @code{default-toolbar-width} and the specific | 18082 @code{default-toolbar-height} and @code{default-toolbar-width} and the specific |
18076 versions of these (@code{top-toolbar-height}, @code{left-toolbar-width}, etc.). | 18083 versions of these (@code{top-toolbar-height}, @code{left-toolbar-width}, etc.). |
18077 The border thickness comes from @code{default-toolbar-border-height} and | 18084 The border thickness comes from @code{default-toolbar-border-height} and |
18078 @code{default-toolbar-border-width}, and the specific versions of these. The | 18085 @code{default-toolbar-border-width}, and the specific versions of these. The |
18093 | 18100 |
18094 | 18101 |
18095 @node The Paned Area, Text Areas, The Client Area, Window and Frame Geometry | 18102 @node The Paned Area, Text Areas, The Client Area, Window and Frame Geometry |
18096 @section The Paned Area | 18103 @section The Paned Area |
18097 | 18104 |
18098 The area occupied by the "windows" is called the paned area. Note that | 18105 The area occupied by the ``windows'' is called the paned area. |
18099 this includes the minibuffer, which is just another window but is | 18106 Unfortunately, because of the presence of the gutter @strong{between} the |
18100 special-cased in XEmacs. Each window can include a horizontal and/or | 18107 minibuffer and other windows, the bottom of the paned area is not |
18101 vertical scrollbar, a modeline and a vertical divider to its right, as | 18108 well-defined -- does it include the minibuffer (in which case it also |
18102 well as the text area. Only non-rightmost windows can include a | 18109 includes the bottom gutter, but none others) or does it not include |
18103 vertical divider. (The minibuffer normally does not include either | 18110 the minibuffer? (In which case not all windows are included.) It would |
18104 modeline or scrollbars.) | 18111 be cleaner to put the bottom gutter @strong{below} the minibuffer instead of |
18112 above it. | |
18113 | |
18114 Each window can include a horizontal and/or vertical scrollbar, a | |
18115 modeline and a vertical divider to its right, as well as the text area. | |
18116 Only non-rightmost windows can include a vertical divider. (The | |
18117 minibuffer normally does not include either modeline or scrollbars.) | |
18105 | 18118 |
18106 Note that, because the toolbars and gutters are controlled by | 18119 Note that, because the toolbars and gutters are controlled by |
18107 specifiers, and specifiers can have window-specific and buffer-specific | 18120 specifiers, and specifiers can have window-specific and buffer-specific |
18108 values, the size of the paned area can change depending on which window | 18121 values, the size of the paned area can change depending on which window |
18109 is selected: In other words, if the selected window or buffer changes, | 18122 is selected: In other words, if the selected window or buffer changes, |
18122 @code{horizontal-scrollbar-visible-p}, @code{vertical-scrollbar-visible-p}, | 18135 @code{horizontal-scrollbar-visible-p}, @code{vertical-scrollbar-visible-p}, |
18123 @code{vertical-divider-always-visible-p}, etc. | 18136 @code{vertical-divider-always-visible-p}, etc. |
18124 | 18137 |
18125 In addition, it is possible to set margins in the text area using the | 18138 In addition, it is possible to set margins in the text area using the |
18126 specifiers @code{left-margin-width} and @code{right-margin-width}. When this is | 18139 specifiers @code{left-margin-width} and @code{right-margin-width}. When this is |
18127 done, only the "inner text area" (the area inside of the margins) will | 18140 done, only the ``inner text area'' (the area inside of the margins) will |
18128 be used for normal display of text; the margins will be used for glyphs | 18141 be used for normal display of text; the margins will be used for glyphs |
18129 with a layout policy of @code{outside-margin} (as set on an extent containing | 18142 with a layout policy of @code{outside-margin} (as set on an extent containing |
18130 the glyph by @code{set-extent-begin-glyph-layout} or | 18143 the glyph by @code{set-extent-begin-glyph-layout} or |
18131 @code{set-extent-end-glyph-layout}). However, the calculation of the text | 18144 @code{set-extent-end-glyph-layout}). However, the calculation of the text |
18132 area size (e.g. in the function @code{window-text-area-width}) includes the | 18145 area size (e.g. in the function @code{window-text-area-width}) includes the |
18133 margins. Which margin is used depends on whether a glyph has been set | 18146 margins. Which margin is used depends on whether a glyph has been set |
18134 as the begin-glyph or end-glyph of an extent (@code{set-extent-begin-glyph} | 18147 as the begin-glyph or end-glyph of an extent (@code{set-extent-begin-glyph} |
18135 etc.), using the left and right margins, respectively. | 18148 etc.), using the left and right margins, respectively. |
18136 | 18149 |
18137 Technically, the margins outside of the inner text area are known as the | 18150 Technically, the margins outside of the inner text area are known as the |
18138 "outside margins". The "inside margins" are in the inner text area and | 18151 ``outside margins''. The ``inside margins'' are in the inner text area and |
18139 constitute the whitespace between the outside margins and the first or | 18152 constitute the whitespace between the outside margins and the first or |
18140 last non-whitespace character in a line; their width can vary from line | 18153 last non-whitespace character in a line; their width can vary from line |
18141 to line. Glyphs will be placed in the inside margin if their layout | 18154 to line. Glyphs will be placed in the inside margin if their layout |
18142 policy is @code{inside-margin} or @code{whitespace}, with @code{whitespace} glyphs on | 18155 policy is @code{inside-margin} or @code{whitespace}, with @code{whitespace} glyphs on |
18143 the inside and @code{inside-margin} glyphs on the outside. Inside-margin | 18156 the inside and @code{inside-margin} glyphs on the outside. Inside-margin |
18148 | 18161 |
18149 | 18162 |
18150 @node The Displayable Area, Which Functions Use Which?, Text Areas, Window and Frame Geometry | 18163 @node The Displayable Area, Which Functions Use Which?, Text Areas, Window and Frame Geometry |
18151 @section The Displayable Area | 18164 @section The Displayable Area |
18152 | 18165 |
18153 The "displayable area" is not so much an actual area as a convenient | 18166 The ``displayable area'' is not so much an actual area as a convenient |
18154 fiction. It is the area used to convert between pixel and character | 18167 fiction. It is the area used to convert between pixel and character |
18155 dimensions for frames. The character dimensions for a frame (e.g. as | 18168 dimensions for frames. The character dimensions for a frame (e.g. as |
18156 returned by @code{frame-width} and @code{frame-height} and set by | 18169 returned by @code{frame-width} and @code{frame-height} and set by |
18157 @code{set-frame-width} and @code{set-frame-height}) are determined from the | 18170 @code{set-frame-width} and @code{set-frame-height}) are determined from the |
18158 displayable area by dividing by the pixel size of the default font as | 18171 displayable area by dividing by the pixel size of the default font as |
18159 instantiated in the frame. (For proportional fonts, the "average" width | 18172 instantiated in the frame. (For proportional fonts, the ``average'' width |
18160 is used. Under Windows, this is a built-in property of the fonts. | 18173 is used. Under Windows, this is a built-in property of the fonts. |
18161 Under X, this is based on the width of the lowercase 'n', or if this is | 18174 Under X, this is based on the width of the lowercase 'n', or if this is |
18162 zero then the width of the default character. [We prefer 'n' to the | 18175 zero then the width of the default character. [We prefer 'n' to the |
18163 specified default character because many X fonts have a default | 18176 specified default character because many X fonts have a default |
18164 character with a zero or otherwise non-representative width.]) | 18177 character with a zero or otherwise non-representative width.]) |
18165 | 18178 |
18166 The displayable area is essentially the "theoretical" paned area of the | 18179 The displayable area is essentially the ``theoretical'' gutter area of the |
18167 frame excluding the rightmost and bottom-most scrollbars. In this | 18180 frame, excluding the rightmost and bottom-most scrollbars. That is, it |
18168 context, "theoretical" means that all calculations on based on | 18181 starts from the client (or ``total'') area and then excludes the |
18169 frame-level values for toolbar, gutter and scrollbar thicknesses. | 18182 ``theoretical'' toolbars and bottom-most/rightmost scrollbars, and the |
18170 Because these thicknesses are controlled by specifiers, and specifiers | 18183 internal border width. In this context, ``theoretical'' means that all |
18171 can have window-specific and buffer-specific values, these calculations | 18184 calculations on based on frame-level values for toolbar and scrollbar |
18172 may or may not reflect the actual size of the paned area or of the | 18185 thicknesses. Because these thicknesses are controlled by specifiers, |
18173 scrollbars when any particular window is selected. Note also that the | 18186 and specifiers can have window-specific and buffer-specific values, |
18174 "displayable area" may not even be contiguous! In particular, if the | 18187 these calculations may or may not reflect the actual size of the paned |
18175 frame-level value of the horizontal scrollbar height is non-zero, then | 18188 area or of the scrollbars when any particular window is selected. Note |
18176 the displayable area includes the paned area above and below the bottom | 18189 also that the ``displayable area'' may not even be contiguous! In |
18177 horizontal scrollbar but not the scrollbar itself. | 18190 particular, the gutters are included, but the bottom-most and rightmost |
18191 scrollbars are excluded even though they are inside of the gutters. | |
18192 Furthermore, if the frame-level value of the horizontal scrollbar height | |
18193 is non-zero, then the displayable area includes the paned area above and | |
18194 below the bottom horizontal scrollbar (i.e. the modeline and minibuffer) | |
18195 but not the scrollbar itself. | |
18178 | 18196 |
18179 As a further twist, the character-dimension calculations are adjusted so | 18197 As a further twist, the character-dimension calculations are adjusted so |
18180 that the truncation and continuation glyphs (see @code{truncation-glyph} and | 18198 that the truncation and continuation glyphs (see @code{truncation-glyph} and |
18181 @code{continuation-glyph}) count as a single character even if they are wider | 18199 @code{continuation-glyph}) count as a single character even if they are wider |
18182 than the default font width. (Technically, the character width is | 18200 than the default font width. (Technically, the character width is |
18185 width before dividing by the default-font width, and then adding 1 to | 18203 width before dividing by the default-font width, and then adding 1 to |
18186 the result.) (The ultimate motivation for this kludge as well as the | 18204 the result.) (The ultimate motivation for this kludge as well as the |
18187 subtraction of the scrollbars, but not the minibuffer or bottom-most | 18205 subtraction of the scrollbars, but not the minibuffer or bottom-most |
18188 modeline, is to maintain compatibility with TTY's.) | 18206 modeline, is to maintain compatibility with TTY's.) |
18189 | 18207 |
18190 Despite all these concerns and kludges, however, the "displayable area" | 18208 Despite all these concerns and kludges, however, the ``displayable area'' |
18191 concept works well in practice and mostly ensures that by default the | 18209 concept works well in practice and mostly ensures that by default the |
18192 frame will actually fit 79 characters + continuation/truncation glyph. | 18210 frame will actually fit 79 characters + continuation/truncation glyph. |
18193 | 18211 |
18194 | 18212 |
18195 @node Which Functions Use Which?, , The Displayable Area, Window and Frame Geometry | 18213 @node Which Functions Use Which?, , The Displayable Area, Window and Frame Geometry |
19834 @section Event Queues | 19852 @section Event Queues |
19835 @cindex event queues | 19853 @cindex event queues |
19836 @cindex queues, event | 19854 @cindex queues, event |
19837 | 19855 |
19838 There are two event queues here -- the command event queue (#### which | 19856 There are two event queues here -- the command event queue (#### which |
19839 should be called "deferred event queue" and is in my glyph ws) and the | 19857 should be called ``deferred event queue'' and is in my glyph ws) and the |
19840 dispatch event queue. (MS Windows actually has an extra dispatch queue | 19858 dispatch event queue. (MS Windows actually has an extra dispatch queue |
19841 for non-user events and uses the generic one only for user events. This | 19859 for non-user events and uses the generic one only for user events. This |
19842 is because user and non-user events in Windows come through the same | 19860 is because user and non-user events in Windows come through the same |
19843 place -- the window procedure -- but under X, it's possible to | 19861 place -- the window procedure -- but under X, it's possible to |
19844 selectively process events such that we take all the user events before | 19862 selectively process events such that we take all the user events before |
19939 | 19957 |
19940 @item handle_magic_event_cb | 19958 @item handle_magic_event_cb |
19941 XEmacs calls this with an event structure which contains window-system | 19959 XEmacs calls this with an event structure which contains window-system |
19942 dependent information that XEmacs doesn't need to know about, but which | 19960 dependent information that XEmacs doesn't need to know about, but which |
19943 must happen in order. If the @code{next_event_cb} never returns an | 19961 must happen in order. If the @code{next_event_cb} never returns an |
19944 event of type "magic", this will never be used. | 19962 event of type ``magic'', this will never be used. |
19945 | 19963 |
19946 @item format_magic_event_cb | 19964 @item format_magic_event_cb |
19947 Called with a magic event; print a representation of the innards of the | 19965 Called with a magic event; print a representation of the innards of the |
19948 event to @var{PSTREAM}. | 19966 event to @var{PSTREAM}. |
19949 | 19967 |
19971 @item select_process_cb | 19989 @item select_process_cb |
19972 @item unselect_process_cb | 19990 @item unselect_process_cb |
19973 These callbacks tell the underlying implementation to add or remove a | 19991 These callbacks tell the underlying implementation to add or remove a |
19974 file descriptor from the list of fds which are polled for | 19992 file descriptor from the list of fds which are polled for |
19975 inferior-process input. When input becomes available on the given | 19993 inferior-process input. When input becomes available on the given |
19976 process connection, an event of type "process" should be generated. | 19994 process connection, an event of type ``process'' should be generated. |
19977 | 19995 |
19978 @item select_console_cb | 19996 @item select_console_cb |
19979 @item unselect_console_cb | 19997 @item unselect_console_cb |
19980 These callbacks tell the underlying implementation to add or remove a | 19998 These callbacks tell the underlying implementation to add or remove a |
19981 console from the list of consoles which are polled for user-input. | 19999 console from the list of consoles which are polled for user-input. |
20099 @cindex focus handling | 20117 @cindex focus handling |
20100 | 20118 |
20101 Ben's capsule lecture on focus: | 20119 Ben's capsule lecture on focus: |
20102 | 20120 |
20103 In GNU Emacs @code{select-frame} never changes the window-manager frame | 20121 In GNU Emacs @code{select-frame} never changes the window-manager frame |
20104 focus. All it does is change the "selected frame". This is similar to | 20122 focus. All it does is change the ``selected frame''. This is similar to |
20105 what happens when we call @code{select-device} or @code{select-console}. | 20123 what happens when we call @code{select-device} or @code{select-console}. |
20106 Whenever an event comes in (including a keyboard event), its frame is | 20124 Whenever an event comes in (including a keyboard event), its frame is |
20107 selected; therefore, evaluating @code{select-frame} in @samp{*scratch*} | 20125 selected; therefore, evaluating @code{select-frame} in @samp{*scratch*} |
20108 won't cause any effects because the next received event (in the same | 20126 won't cause any effects because the next received event (in the same |
20109 frame) will cause a switch back to the frame displaying | 20127 frame) will cause a switch back to the frame displaying |
20134 minibuffer, you essentially want to temporarily switch the WM focus to | 20152 minibuffer, you essentially want to temporarily switch the WM focus to |
20135 the frame with the minibuffer, and switch it back when you exit the | 20153 the frame with the minibuffer, and switch it back when you exit the |
20136 minibuffer. | 20154 minibuffer. |
20137 | 20155 |
20138 GNU Emacs solves this with the crockish @code{redirect-frame-focus}, | 20156 GNU Emacs solves this with the crockish @code{redirect-frame-focus}, |
20139 which says "for keyboard events received from FRAME, act like they're | 20157 which says ``for keyboard events received from FRAME, act like they're |
20140 coming from FOCUS-FRAME". I think what this means is that, when a | 20158 coming from FOCUS-FRAME''. I think what this means is that, when a |
20141 keyboard event comes in and the event manager is about to select the | 20159 keyboard event comes in and the event manager is about to select the |
20142 event's frame, if that frame has its focus redirected, the redirected-to | 20160 event's frame, if that frame has its focus redirected, the redirected-to |
20143 frame is selected instead. That way, if you're in a minibufferless | 20161 frame is selected instead. That way, if you're in a minibufferless |
20144 frame and enter the minibuffer, then all Lisp functions that run see the | 20162 frame and enter the minibuffer, then all Lisp functions that run see the |
20145 selected frame as the minibuffer's frame rather than the minibufferless | 20163 selected frame as the minibuffer's frame rather than the minibufferless |
20149 There's also some weird logic that switches the redirected frame focus | 20167 There's also some weird logic that switches the redirected frame focus |
20150 from one frame to another if Lisp code explicitly calls | 20168 from one frame to another if Lisp code explicitly calls |
20151 @code{select-frame} (but not if @code{handle-switch-frame} is called), | 20169 @code{select-frame} (but not if @code{handle-switch-frame} is called), |
20152 and saves and restores the frame focus in window configurations, | 20170 and saves and restores the frame focus in window configurations, |
20153 etc. etc. All of this logic is heavily @code{#if 0}'d, with lots of | 20171 etc. etc. All of this logic is heavily @code{#if 0}'d, with lots of |
20154 comments saying "No, this approach doesn't seem to work, so I'm trying | 20172 comments saying ``No, this approach doesn't seem to work, so I'm trying |
20155 this ... is it reasonable? Well, I'm not sure ..." that are a red flag | 20173 this ... is it reasonable? Well, I'm not sure ...'' that are a red flag |
20156 indicating crockishness. | 20174 indicating crockishness. |
20157 | 20175 |
20158 Because of our way of doing things, we can avoid all this crock. | 20176 Because of our way of doing things, we can avoid all this crock. |
20159 Keyboard events never cause a select-frame (who cares what frame they're | 20177 Keyboard events never cause a select-frame (who cares what frame they're |
20160 associated with? They come from a console, only). We change the actual | 20178 associated with? They come from a console, only). We change the actual |
24933 return value should be an alist consisting of a list of all of the | 24951 return value should be an alist consisting of a list of all of the |
24934 defined subtypes for that coding system type along with a level of | 24952 defined subtypes for that coding system type along with a level of |
24935 likelihood and a list of additional properties indicating certain | 24953 likelihood and a list of additional properties indicating certain |
24936 features detected in the data. The extra properties returned are | 24954 features detected in the data. The extra properties returned are |
24937 defined entirely by the particular coding system type and are used | 24955 defined entirely by the particular coding system type and are used |
24938 only in the algorithm described below under "user control." However, | 24956 only in the algorithm described below under ``user control.'' However, |
24939 the levels of likelihood have a standard meaning as follows: | 24957 the levels of likelihood have a standard meaning as follows: |
24940 | 24958 |
24941 Level 4 means "near certainty" and typically indicates that a | 24959 Level 4 means ``near certainty'' and typically indicates that a |
24942 signature has been detected, usually at the beginning of the data, | 24960 signature has been detected, usually at the beginning of the data, |
24943 indicating that the data is encoded in this particular coding system | 24961 indicating that the data is encoded in this particular coding system |
24944 type. An example of this would be the byte order mark at the beginning | 24962 type. An example of this would be the byte order mark at the beginning |
24945 of UCS2 encoded data or the GZIP mark at the beginning of GZIP data. | 24963 of UCS2 encoded data or the GZIP mark at the beginning of GZIP data. |
24946 | 24964 |
24947 Level 3 means "highly likely" and indicates that tell-tale signs have | 24965 Level 3 means ``highly likely'' and indicates that tell-tale signs have |
24948 been discovered in the data that are characteristic of this particular | 24966 been discovered in the data that are characteristic of this particular |
24949 coding system type. Examples of this might be ISO 2022 escape | 24967 coding system type. Examples of this might be ISO 2022 escape |
24950 sequences or the current Unicode end of line markers at regular | 24968 sequences or the current Unicode end of line markers at regular |
24951 intervals. | 24969 intervals. |
24952 | 24970 |
24953 Level 2 means "strongly statistically likely" indicating that | 24971 Level 2 means ``strongly statistically likely'' indicating that |
24954 statistical analysis concludes that there's a high chance that this | 24972 statistical analysis concludes that there's a high chance that this |
24955 data is encoded according to this particular type. For example, this | 24973 data is encoded according to this particular type. For example, this |
24956 might mean that for UCS2 data, there is a high proportion of null bytes | 24974 might mean that for UCS2 data, there is a high proportion of null bytes |
24957 or other repeated bytes in the odd-numbered bytes of the data and a | 24975 or other repeated bytes in the odd-numbered bytes of the data and a |
24958 high variance in the even-numbered bytes of the data. For Shift-JIS, | 24976 high variance in the even-numbered bytes of the data. For Shift-JIS, |
24959 this might indicate that there were no illegal Shift-JIS sequences | 24977 this might indicate that there were no illegal Shift-JIS sequences |
24960 and a fairly high occurrence of common Shift-JIS characters. | 24978 and a fairly high occurrence of common Shift-JIS characters. |
24961 | 24979 |
24962 Level 1 means "weak statistical likelihood" meaning that there is some | 24980 Level 1 means ``weak statistical likelihood'' meaning that there is some |
24963 indication that the data is encoded in this coding system type. In | 24981 indication that the data is encoded in this coding system type. In |
24964 fact, there is a reasonable chance that it may be some other type as | 24982 fact, there is a reasonable chance that it may be some other type as |
24965 well. This means, for example, that no illegal sequences were | 24983 well. This means, for example, that no illegal sequences were |
24966 encountered and at least some data was encountered that is purposely | 24984 encountered and at least some data was encountered that is purposely |
24967 not in other coding system types. For Shift-JIS data, this might mean | 24985 not in other coding system types. For Shift-JIS data, this might mean |
24968 that some bytes in the range 128 to 159 were encountered in the data. | 24986 that some bytes in the range 128 to 159 were encountered in the data. |
24969 | 24987 |
24970 Level 0 means "neutral" which is to say that there's either not enough | 24988 Level 0 means ``neutral'' which is to say that there's either not enough |
24971 data to make any decision or that the data could well be interpreted | 24989 data to make any decision or that the data could well be interpreted |
24972 as this type (meaning no illegal sequences), but there is little or no | 24990 as this type (meaning no illegal sequences), but there is little or no |
24973 indication of anything particular to this particular type. | 24991 indication of anything particular to this particular type. |
24974 | 24992 |
24975 Level -1 means "weakly unlikely" meaning that some data was | 24993 Level -1 means ``weakly unlikely'' meaning that some data was |
24976 encountered that could conceivably be part of the coding system type | 24994 encountered that could conceivably be part of the coding system type |
24977 but is probably not. For example, successively long line-lengths or | 24995 but is probably not. For example, successively long line-lengths or |
24978 very rarely-encountered sequences. | 24996 very rarely-encountered sequences. |
24979 | 24997 |
24980 Level -2 means "strongly unlikely" meaning that typically a number | 24998 Level -2 means ``strongly unlikely'' meaning that typically a number |
24981 of illegal sequences were encountered. | 24999 of illegal sequences were encountered. |
24982 | 25000 |
24983 The algorithm to determine when to stop and indicate that the data has | 25001 The algorithm to determine when to stop and indicate that the data has |
24984 been detected as a particular coding system uses a priority list, | 25002 been detected as a particular coding system uses a priority list, |
24985 which is typically specified as part of the language environment | 25003 which is typically specified as part of the language environment |
24994 Japanese-language environment particular subtypes of ISO 2022 will be | 25012 Japanese-language environment particular subtypes of ISO 2022 will be |
24995 associated with the Japanese coding system version of those | 25013 associated with the Japanese coding system version of those |
24996 subtypes). It is perfectly legal and quite common in fact, to list the | 25014 subtypes). It is perfectly legal and quite common in fact, to list the |
24997 same subtype more than once in the priority list with successively | 25015 same subtype more than once in the priority list with successively |
24998 lower requirements. Other facts that can be listed in the priority | 25016 lower requirements. Other facts that can be listed in the priority |
24999 list for a subtype are "reject", meaning that the data should never be | 25017 list for a subtype are ``reject'', meaning that the data should never be |
25000 detected as this subtype, or "ask", meaning that if the data is | 25018 detected as this subtype, or ``ask'', meaning that if the data is |
25001 detected to be this subtype, the user will be asked whether they | 25019 detected to be this subtype, the user will be asked whether they |
25002 actually mean this. This latter property could be used, for example, | 25020 actually mean this. This latter property could be used, for example, |
25003 towards the bottom of the priority list. | 25021 towards the bottom of the priority list. |
25004 | 25022 |
25005 In addition there is a global variable which specifies the minimum | 25023 In addition there is a global variable which specifies the minimum |
25012 system, the subtype, the coding system and the associated level of | 25030 system, the subtype, the coding system and the associated level of |
25013 likelihood will be prominently displayed either in the echo area or in | 25031 likelihood will be prominently displayed either in the echo area or in |
25014 a status box somewhere. | 25032 a status box somewhere. |
25015 | 25033 |
25016 If no positive match is found according to the priority list, or if | 25034 If no positive match is found according to the priority list, or if |
25017 the matches that are found have the "ask" property on them, then the | 25035 the matches that are found have the ``ask'' property on them, then the |
25018 user will be presented with a list of choices of possible encodings | 25036 user will be presented with a list of choices of possible encodings |
25019 and asked to choose one. This list is typically sorted first by level | 25037 and asked to choose one. This list is typically sorted first by level |
25020 of likelihood, and then within this, by the order in which the | 25038 of likelihood, and then within this, by the order in which the |
25021 subtypes appear in the priority list. This list is displayed in a | 25039 subtypes appear in the priority list. This list is displayed in a |
25022 special kind of dialog box or other buffer allowing the user, in | 25040 special kind of dialog box or other buffer allowing the user, in |
25029 will be in the form of errors or warnings of various levels, some of | 25047 will be in the form of errors or warnings of various levels, some of |
25030 which may be severe enough to stop the decoding entirely, and some of | 25048 which may be severe enough to stop the decoding entirely, and some of |
25031 which may either indicate definitely malformed data but from which | 25049 which may either indicate definitely malformed data but from which |
25032 it's possible to recover, or simply data that appears rather | 25050 it's possible to recover, or simply data that appears rather |
25033 questionable. If any of these status values are reported during | 25051 questionable. If any of these status values are reported during |
25034 decoding, the user will be informed of this and asked "are you sure?" | 25052 decoding, the user will be informed of this and asked ``are you sure?'' |
25035 As part of the "are you sure" dialog box or question, the user can | 25053 As part of the ``are you sure'' dialog box or question, the user can |
25036 display the results of the decoding to make sure it's correct. If the | 25054 display the results of the decoding to make sure it's correct. If the |
25037 user says "no, they're not sure," then the same list of choices as | 25055 user says ``no, they're not sure,'' then the same list of choices as |
25038 previously mentioned will be presented. | 25056 previously mentioned will be presented. |
25039 | 25057 |
25040 @subheading RFC: Autodetection | 25058 @subheading RFC: Autodetection |
25041 | 25059 |
25042 Also appeared under heading "Implementation of Coding System Priority | 25060 Also appeared under heading "Implementation of Coding System Priority |
25252 | 25270 |
25253 @enumerate | 25271 @enumerate |
25254 @item | 25272 @item |
25255 Hopefully a system general enough to handle (2)--(4) will | 25273 Hopefully a system general enough to handle (2)--(4) will |
25256 handle these, too, but we should watch out for gotchas like | 25274 handle these, too, but we should watch out for gotchas like |
25257 Unicode "plane 14" tags which (I think _both_ Ben and Olivier | 25275 Unicode ``plane 14'' tags which (I think _both_ Ben and Olivier |
25258 will agree) have no place in the internal representation, and | 25276 will agree) have no place in the internal representation, and |
25259 thus must be treated as out-of-band control sequences. I | 25277 thus must be treated as out-of-band control sequences. I |
25260 don't know if all such gotchas will be as easy to dispose of. | 25278 don't know if all such gotchas will be as easy to dispose of. |
25261 | 25279 |
25262 @item | 25280 @item |
25293 | 25311 |
25294 sly, it can't be perfect if any autodecoding is done; | 25312 sly, it can't be perfect if any autodecoding is done; |
25295 like Hrvoje should have an easily available option to | 25313 like Hrvoje should have an easily available option to |
25296 to this default (or an optimized approximation which | 25314 to this default (or an optimized approximation which |
25297 t actually read the whole file into a buffer) or simply | 25315 t actually read the whole file into a buffer) or simply |
25298 y everything as binary (with the "font" for binary files | 25316 y everything as binary (with the ``font'' for binary files |
25299 a user option). | 25317 a user option). |
25300 | 25318 |
25301 @item | 25319 @item |
25302 This implies that we should be detecting conditions in the | 25320 This implies that we should be detecting conditions in the |
25303 tail of the file which violate the implicit assumptions of the | 25321 tail of the file which violate the implicit assumptions of the |
25402 | 25420 |
25403 Date: 11/1/1999 7:24 AM | 25421 Date: 11/1/1999 7:24 AM |
25404 | 25422 |
25405 Stephen, thank you very much for writing this up. I think it is a good start, | 25423 Stephen, thank you very much for writing this up. I think it is a good start, |
25406 and definitely moving in the direction I would like to see things going: more | 25424 and definitely moving in the direction I would like to see things going: more |
25407 proposals, less arguing. (aka "more light, less heat") However, I have some | 25425 proposals, less arguing. (aka ``more light, less heat'') However, I have some |
25408 suggestions for cleaning this up: | 25426 suggestions for cleaning this up: |
25409 | 25427 |
25410 You should try to make it more layered. For example, you might have one | 25428 You should try to make it more layered. For example, you might have one |
25411 section devoted to the workings of autodetection, which starts out like this | 25429 section devoted to the workings of autodetection, which starts out like this |
25412 (the section numbers below are totally arbitrary): | 25430 (the section numbers below are totally arbitrary): |