Mercurial > hg > xemacs-beta
comparison src/file-coding.c @ 3025:facf3239ba30
[xemacs-hg @ 2005-10-25 11:16:19 by ben]
rename new->new_, convert 'foo to `foo'
EmacsFrame.c, ExternalClient.c, ExternalShell.c, chartab.c, cmdloop.c, compiler.h, console.c, database.c, device-msw.c, device-x.c, device.c, doc.c, dragdrop.c, eval.c, event-msw.c, event-stream.c, events.c, extents.c, file-coding.c, fns.c, frame-tty.c, frame.c, gpmevent.c, gutter.c, hash.c, imgproc.c, indent.c, keymap.c, lisp-union.h, macros.c, malloc.c, marker.c, menubar-x.c, menubar.c, mule-charset.c, number.c, process.c, profile.h, ralloc.c, redisplay.c, select-common.h, select.c, syntax.c, sysfile.h, sysproc.h, systime.h, syswindows.h, toolbar.c, tooltalk.c, tparam.c, unexaix.c, unexalpha.c, unexconvex.c, unexec.c, unexhp9k800.c, unexmips.c, unicode.c, window.c: new -> new_.
'foo -> `foo'.
lwlib-internal.h: redo assert macros to follow lisp.h and not trigger warnings.
lwlib.c, xlwtabs.c: new -> new_.
author | ben |
---|---|
date | Tue, 25 Oct 2005 11:16:49 +0000 |
parents | b7f26b2f78bd |
children | 77f5a5135b3a 3742ea8250b5 |
comparison
equal
deleted
inserted
replaced
3024:b7f26b2f78bd | 3025:facf3239ba30 |
---|---|
1 /* Text encoding conversion functions; coding-system object. | 1 /* Text encoding conversion functions; coding-system object. |
2 #### rename me to coding-system.c or coding.c | 2 #### rename me to coding-system.c or coding.c |
3 Copyright (C) 1991, 1995 Free Software Foundation, Inc. | 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc. |
4 Copyright (C) 1995 Sun Microsystems, Inc. | 4 Copyright (C) 1995 Sun Microsystems, Inc. |
5 Copyright (C) 2000, 2001, 2002, 2003 Ben Wing. | 5 Copyright (C) 2000, 2001, 2002, 2003, 2005 Ben Wing. |
6 | 6 |
7 This file is part of XEmacs. | 7 This file is part of XEmacs. |
8 | 8 |
9 XEmacs is free software; you can redistribute it and/or modify it | 9 XEmacs is free software; you can redistribute it and/or modify it |
10 under the terms of the GNU General Public License as published by the | 10 under the terms of the GNU General Public License as published by the |
404 } | 404 } |
405 | 405 |
406 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* | 406 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* |
407 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. | 407 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. |
408 Valid types depend on how XEmacs was compiled but may include | 408 Valid types depend on how XEmacs was compiled but may include |
409 'undecided, 'chain, 'integer, 'ccl, 'iso2022, 'big5, 'shift-jis, | 409 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis', |
410 'utf-16, 'ucs-4, 'utf-8, etc. | 410 `utf-16', `ucs-4', `utf-8', etc. |
411 */ | 411 */ |
412 (coding_system_type)) | 412 (coding_system_type)) |
413 { | 413 { |
414 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil; | 414 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil; |
415 } | 415 } |
1042 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /* | 1042 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /* |
1043 Register symbol NAME as a coding system. | 1043 Register symbol NAME as a coding system. |
1044 | 1044 |
1045 TYPE describes the conversion method used and should be one of | 1045 TYPE describes the conversion method used and should be one of |
1046 | 1046 |
1047 nil or 'undecided | 1047 nil or `undecided' |
1048 Automatic conversion. XEmacs attempts to detect the coding system | 1048 Automatic conversion. XEmacs attempts to detect the coding system |
1049 used in the file. | 1049 used in the file. |
1050 'chain | 1050 `chain' |
1051 Chain two or more coding systems together to make a combination coding | 1051 Chain two or more coding systems together to make a combination coding |
1052 system. | 1052 system. |
1053 'no-conversion | 1053 `no-conversion' |
1054 No conversion. Use this for binary files and such. On output, | 1054 No conversion. Use this for binary files and such. On output, |
1055 graphic characters that are not in ASCII or Latin-1 will be | 1055 graphic characters that are not in ASCII or Latin-1 will be |
1056 replaced by a ?. (For a no-conversion-encoded buffer, these | 1056 replaced by a ?. (For a no-conversion-encoded buffer, these |
1057 characters will only be present if you explicitly insert them.) | 1057 characters will only be present if you explicitly insert them.) |
1058 'convert-eol | 1058 `convert-eol' |
1059 Convert CRLF sequences or CR to LF. | 1059 Convert CRLF sequences or CR to LF. |
1060 'shift-jis | 1060 `shift-jis' |
1061 Shift-JIS (a Japanese encoding commonly used in PC operating systems). | 1061 Shift-JIS (a Japanese encoding commonly used in PC operating systems). |
1062 'unicode | 1062 `unicode' |
1063 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.). | 1063 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.). |
1064 'mswindows-unicode-to-multibyte | 1064 `mswindows-unicode-to-multibyte' |
1065 (MS Windows only) Converts from Windows Unicode to Windows Multibyte | 1065 (MS Windows only) Converts from Windows Unicode to Windows Multibyte |
1066 (any code page encoding) upon encoding, and the other way upon decoding. | 1066 (any code page encoding) upon encoding, and the other way upon decoding. |
1067 'mswindows-multibyte | 1067 `mswindows-multibyte' |
1068 Converts to or from Windows Multibyte (any code page encoding). | 1068 Converts to or from Windows Multibyte (any code page encoding). |
1069 This is resolved into a chain of `mswindows-unicode' and | 1069 This is resolved into a chain of `mswindows-unicode' and |
1070 `mswindows-unicode-to-multibyte'. | 1070 `mswindows-unicode-to-multibyte'. |
1071 'iso2022 | 1071 `iso2022' |
1072 Any ISO2022-compliant encoding. Among other things, this includes | 1072 Any ISO2022-compliant encoding. Among other things, this includes |
1073 JIS (the Japanese encoding commonly used for e-mail), EUC (the | 1073 JIS (the Japanese encoding commonly used for e-mail), EUC (the |
1074 standard Unix encoding for Japanese and other languages), and | 1074 standard Unix encoding for Japanese and other languages), and |
1075 Compound Text (the encoding used in X11). You can specify more | 1075 Compound Text (the encoding used in X11). You can specify more |
1076 specific information about the conversion with the PROPS argument. | 1076 specific information about the conversion with the PROPS argument. |
1077 'big5 | 1077 `big5' |
1078 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan). | 1078 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan). |
1079 'ccl | 1079 `ccl' |
1080 The conversion is performed using a user-written pseudo-code | 1080 The conversion is performed using a user-written pseudo-code |
1081 program. CCL (Code Conversion Language) is the name of this | 1081 program. CCL (Code Conversion Language) is the name of this |
1082 pseudo-code. | 1082 pseudo-code. |
1083 'gzip | 1083 `gzip' |
1084 GZIP compression format. | 1084 GZIP compression format. |
1085 'internal | 1085 `internal' |
1086 Write out or read in the raw contents of the memory representing | 1086 Write out or read in the raw contents of the memory representing |
1087 the buffer's text. This is primarily useful for debugging | 1087 the buffer's text. This is primarily useful for debugging |
1088 purposes, and is only enabled when XEmacs has been compiled with | 1088 purposes, and is only enabled when XEmacs has been compiled with |
1089 DEBUG_XEMACS defined (via the --debug configure option). | 1089 DEBUG_XEMACS defined (via the --debug configure option). |
1090 WARNING: Reading in a file using 'internal conversion can result | 1090 WARNING: Reading in a file using `internal' conversion can result |
1091 in an internal inconsistency in the memory representing a | 1091 in an internal inconsistency in the memory representing a |
1092 buffer's text, which will produce unpredictable results and may | 1092 buffer's text, which will produce unpredictable results and may |
1093 cause XEmacs to crash. Under normal circumstances you should | 1093 cause XEmacs to crash. Under normal circumstances you should |
1094 never use 'internal conversion. | 1094 never use `internal' conversion. |
1095 | 1095 |
1096 DESCRIPTION is a short English phrase describing the coding system, | 1096 DESCRIPTION is a short English phrase describing the coding system, |
1097 suitable for use as a menu item. (See also the `documentation' property | 1097 suitable for use as a menu item. (See also the `documentation' property |
1098 below.) | 1098 below.) |
1099 | 1099 |
1100 PROPS is a property list, describing the specific nature of the | 1100 PROPS is a property list, describing the specific nature of the |
1101 character set. Recognized properties are: | 1101 character set. Recognized properties are: |
1102 | 1102 |
1103 'mnemonic | 1103 `mnemonic' |
1104 String to be displayed in the modeline when this coding system is | 1104 String to be displayed in the modeline when this coding system is |
1105 active. | 1105 active. |
1106 | 1106 |
1107 'documentation | 1107 `documentation' |
1108 Detailed documentation on the coding system. | 1108 Detailed documentation on the coding system. |
1109 | 1109 |
1110 'eol-type | 1110 `eol-type' |
1111 End-of-line conversion to be used. It should be one of | 1111 End-of-line conversion to be used. It should be one of |
1112 | 1112 |
1113 nil | 1113 nil |
1114 Automatically detect the end-of-line type (LF, CRLF, | 1114 Automatically detect the end-of-line type (LF, CRLF, |
1115 or CR). Also generate subsidiary coding systems named | 1115 or CR). Also generate subsidiary coding systems named |
1116 `NAME-unix', `NAME-dos', and `NAME-mac', that are | 1116 `NAME-unix', `NAME-dos', and `NAME-mac', that are |
1117 identical to this coding system but have an EOL-TYPE | 1117 identical to this coding system but have an EOL-TYPE |
1118 value of 'lf, 'crlf, and 'cr, respectively. | 1118 value of `lf', `crlf', and `cr', respectively. |
1119 'lf | 1119 `lf' |
1120 The end of a line is marked externally using ASCII LF. | 1120 The end of a line is marked externally using ASCII LF. |
1121 Since this is also the way that XEmacs represents an | 1121 Since this is also the way that XEmacs represents an |
1122 end-of-line internally, specifying this option results | 1122 end-of-line internally, specifying this option results |
1123 in no end-of-line conversion. This is the standard | 1123 in no end-of-line conversion. This is the standard |
1124 format for Unix text files. | 1124 format for Unix text files. |
1125 'crlf | 1125 `crlf' |
1126 The end of a line is marked externally using ASCII | 1126 The end of a line is marked externally using ASCII |
1127 CRLF. This is the standard format for MS-DOS text | 1127 CRLF. This is the standard format for MS-DOS text |
1128 files. | 1128 files. |
1129 'cr | 1129 `cr' |
1130 The end of a line is marked externally using ASCII CR. | 1130 The end of a line is marked externally using ASCII CR. |
1131 This is the standard format for Macintosh text files. | 1131 This is the standard format for Macintosh text files. |
1132 t | 1132 t |
1133 Automatically detect the end-of-line type but do not | 1133 Automatically detect the end-of-line type but do not |
1134 generate subsidiary coding systems. (This value is | 1134 generate subsidiary coding systems. (This value is |
1135 converted to nil when stored internally, and | 1135 converted to nil when stored internally, and |
1136 `coding-system-property' will return nil.) | 1136 `coding-system-property' will return nil.) |
1137 | 1137 |
1138 'post-read-conversion | 1138 `post-read-conversion' |
1139 The value is a function to call after some text is inserted and | 1139 The value is a function to call after some text is inserted and |
1140 decoded by the coding system itself and before any functions in | 1140 decoded by the coding system itself and before any functions in |
1141 `after-change-functions' are called. (#### Not actually true in | 1141 `after-change-functions' are called. (#### Not actually true in |
1142 XEmacs. `after-change-functions' will be called twice if | 1142 XEmacs. `after-change-functions' will be called twice if |
1143 `post-read-conversion' changes something.) The argument of this | 1143 `post-read-conversion' changes something.) The argument of this |
1144 function is the same as for a function in | 1144 function is the same as for a function in |
1145 `after-insert-file-functions', i.e. LENGTH of the text inserted, | 1145 `after-insert-file-functions', i.e. LENGTH of the text inserted, |
1146 with point at the head of the text to be decoded. | 1146 with point at the head of the text to be decoded. |
1147 | 1147 |
1148 'pre-write-conversion | 1148 `pre-write-conversion' |
1149 The value is a function to call after all functions in | 1149 The value is a function to call after all functions in |
1150 `write-region-annotate-functions' and `buffer-file-format' are | 1150 `write-region-annotate-functions' and `buffer-file-format' are |
1151 called, and before the text is encoded by the coding system itself. | 1151 called, and before the text is encoded by the coding system itself. |
1152 The arguments to this function are the same as those of a function | 1152 The arguments to this function are the same as those of a function |
1153 in `write-region-annotate-functions', i.e. FROM and TO, specifying | 1153 in `write-region-annotate-functions', i.e. FROM and TO, specifying |
1156 | 1156 |
1157 | 1157 |
1158 The following properties are allowed for FSF compatibility but currently | 1158 The following properties are allowed for FSF compatibility but currently |
1159 ignored: | 1159 ignored: |
1160 | 1160 |
1161 'translation-table-for-decode | 1161 `translation-table-for-decode' |
1162 The value is a translation table to be applied on decoding. See | 1162 The value is a translation table to be applied on decoding. See |
1163 the function `make-translation-table' for the format of translation | 1163 the function `make-translation-table' for the format of translation |
1164 table. This is not applicable to CCL-based coding systems. | 1164 table. This is not applicable to CCL-based coding systems. |
1165 | 1165 |
1166 'translation-table-for-encode | 1166 `translation-table-for-encode' |
1167 The value is a translation table to be applied on encoding. This is | 1167 The value is a translation table to be applied on encoding. This is |
1168 not applicable to CCL-based coding systems. | 1168 not applicable to CCL-based coding systems. |
1169 | 1169 |
1170 'safe-chars | 1170 `safe-chars' |
1171 The value is a char table. If a character has non-nil value in it, | 1171 The value is a char table. If a character has non-nil value in it, |
1172 the character is safely supported by the coding system. This | 1172 the character is safely supported by the coding system. This |
1173 overrides the specification of safe-charsets. | 1173 overrides the specification of safe-charsets. |
1174 | 1174 |
1175 'safe-charsets | 1175 `safe-charsets' |
1176 The value is a list of charsets safely supported by the coding | 1176 The value is a list of charsets safely supported by the coding |
1177 system. The value t means that all charsets Emacs handles are | 1177 system. The value t means that all charsets Emacs handles are |
1178 supported. Even if some charset is not in this list, it doesn't | 1178 supported. Even if some charset is not in this list, it doesn't |
1179 mean that the charset can't be encoded in the coding system; | 1179 mean that the charset can't be encoded in the coding system; |
1180 it just means that some other receiver of text encoded | 1180 it just means that some other receiver of text encoded |
1181 in the coding system won't be able to handle that charset. | 1181 in the coding system won't be able to handle that charset. |
1182 | 1182 |
1183 'mime-charset | 1183 `mime-charset' |
1184 The value is a symbol of which name is `MIME-charset' parameter of | 1184 The value is a symbol of which name is `MIME-charset' parameter of |
1185 the coding system. | 1185 the coding system. |
1186 | 1186 |
1187 'valid-codes (meaningful only for a coding system based on CCL) | 1187 `valid-codes' (meaningful only for a coding system based on CCL) |
1188 The value is a list to indicate valid byte ranges of the encoded | 1188 The value is a list to indicate valid byte ranges of the encoded |
1189 file. Each element of the list is an integer or a cons of integer. | 1189 file. Each element of the list is an integer or a cons of integer. |
1190 In the former case, the integer value is a valid byte code. In the | 1190 In the former case, the integer value is a valid byte code. In the |
1191 latter case, the integers specifies the range of valid byte codes. | 1191 latter case, the integers specifies the range of valid byte codes. |
1192 | 1192 |
1193 | 1193 |
1194 | 1194 |
1195 The following additional property is recognized if TYPE is 'convert-eol: | 1195 The following additional property is recognized if TYPE is `convert-eol': |
1196 | 1196 |
1197 'subtype | 1197 `subtype' |
1198 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding, | 1198 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding, |
1199 the corresponding sequence will be converted to LF. When encoding, | 1199 the corresponding sequence will be converted to LF. When encoding, |
1200 the opposite happens. This coding system converts characters to | 1200 the opposite happens. This coding system converts characters to |
1201 characters. | 1201 characters. |
1202 | 1202 |
1203 | 1203 |
1204 | 1204 |
1205 The following additional properties are recognized if TYPE is 'iso2022: | 1205 The following additional properties are recognized if TYPE is `iso2022': |
1206 | 1206 |
1207 'charset-g0 | 1207 `charset-g0' |
1208 'charset-g1 | 1208 `charset-g1' |
1209 'charset-g2 | 1209 `charset-g2' |
1210 'charset-g3 | 1210 `charset-g3' |
1211 The character set initially designated to the G0 - G3 registers. | 1211 The character set initially designated to the G0 - G3 registers. |
1212 The value should be one of | 1212 The value should be one of |
1213 | 1213 |
1214 -- A charset object (designate that character set) | 1214 -- A charset object (designate that character set) |
1215 -- nil (do not ever use this register) | 1215 -- nil (do not ever use this register) |
1216 -- t (no character set is initially designated to | 1216 -- t (no character set is initially designated to |
1217 the register, but may be later on; this automatically | 1217 the register, but may be later on; this automatically |
1218 sets the corresponding `force-g*-on-output' property) | 1218 sets the corresponding `force-g*-on-output' property) |
1219 | 1219 |
1220 'force-g0-on-output | 1220 `force-g0-on-output' |
1221 'force-g1-on-output | 1221 `force-g1-on-output' |
1222 'force-g2-on-output | 1222 `force-g2-on-output' |
1223 'force-g2-on-output | 1223 `force-g2-on-output' |
1224 If non-nil, send an explicit designation sequence on output before | 1224 If non-nil, send an explicit designation sequence on output before |
1225 using the specified register. | 1225 using the specified register. |
1226 | 1226 |
1227 'short | 1227 `short' |
1228 If non-nil, use the short forms "ESC $ @", "ESC $ A", and | 1228 If non-nil, use the short forms "ESC $ @", "ESC $ A", and |
1229 "ESC $ B" on output in place of the full designation sequences | 1229 "ESC $ B" on output in place of the full designation sequences |
1230 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B". | 1230 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B". |
1231 | 1231 |
1232 'no-ascii-eol | 1232 `no-ascii-eol' |
1233 If non-nil, don't designate ASCII to G0 at each end of line on output. | 1233 If non-nil, don't designate ASCII to G0 at each end of line on output. |
1234 Setting this to non-nil also suppresses other state-resetting that | 1234 Setting this to non-nil also suppresses other state-resetting that |
1235 normally happens at the end of a line. | 1235 normally happens at the end of a line. |
1236 | 1236 |
1237 'no-ascii-cntl | 1237 `no-ascii-cntl' |
1238 If non-nil, don't designate ASCII to G0 before control chars on output. | 1238 If non-nil, don't designate ASCII to G0 before control chars on output. |
1239 | 1239 |
1240 'seven | 1240 `seven' |
1241 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit | 1241 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit |
1242 environment. | 1242 environment. |
1243 | 1243 |
1244 'lock-shift | 1244 `lock-shift' |
1245 If non-nil, use locking-shift (SO/SI) instead of single-shift | 1245 If non-nil, use locking-shift (SO/SI) instead of single-shift |
1246 or designation by escape sequence. | 1246 or designation by escape sequence. |
1247 | 1247 |
1248 'no-iso6429 | 1248 `no-iso6429' |
1249 If non-nil, don't use ISO6429's direction specification. | 1249 If non-nil, don't use ISO6429's direction specification. |
1250 | 1250 |
1251 'escape-quoted | 1251 `escape-quoted' |
1252 If non-nil, literal control characters that are the same as | 1252 If non-nil, literal control characters that are the same as |
1253 the beginning of a recognized ISO2022 or ISO6429 escape sequence | 1253 the beginning of a recognized ISO2022 or ISO6429 escape sequence |
1254 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E), | 1254 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E), |
1255 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character | 1255 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character |
1256 so that they can be properly distinguished from an escape sequence. | 1256 so that they can be properly distinguished from an escape sequence. |
1259 is a good choice for a quoting character because there are no | 1259 is a good choice for a quoting character because there are no |
1260 escape sequences whose second byte is a character from the Control-0 | 1260 escape sequences whose second byte is a character from the Control-0 |
1261 or Control-1 character sets; this is explicitly disallowed by the | 1261 or Control-1 character sets; this is explicitly disallowed by the |
1262 ISO2022 standard. | 1262 ISO2022 standard. |
1263 | 1263 |
1264 'input-charset-conversion | 1264 `input-charset-conversion' |
1265 A list of conversion specifications, specifying conversion of | 1265 A list of conversion specifications, specifying conversion of |
1266 characters in one charset to another when decoding is performed. | 1266 characters in one charset to another when decoding is performed. |
1267 Each specification is a list of two elements: the source charset, | 1267 Each specification is a list of two elements: the source charset, |
1268 and the destination charset. | 1268 and the destination charset. |
1269 | 1269 |
1270 'output-charset-conversion | 1270 `output-charset-conversion' |
1271 A list of conversion specifications, specifying conversion of | 1271 A list of conversion specifications, specifying conversion of |
1272 characters in one charset to another when encoding is performed. | 1272 characters in one charset to another when encoding is performed. |
1273 The form of each specification is the same as for | 1273 The form of each specification is the same as for |
1274 'input-charset-conversion. | 1274 `input-charset-conversion'. |
1275 | 1275 |
1276 | 1276 |
1277 | 1277 |
1278 The following additional properties are recognized (and required) | 1278 The following additional properties are recognized (and required) |
1279 if TYPE is 'ccl: | 1279 if TYPE is `ccl': |
1280 | 1280 |
1281 'decode | 1281 `decode' |
1282 CCL program used for decoding (converting to internal format). | 1282 CCL program used for decoding (converting to internal format). |
1283 | 1283 |
1284 'encode | 1284 `encode' |
1285 CCL program used for encoding (converting to external format). | 1285 CCL program used for encoding (converting to external format). |
1286 | 1286 |
1287 | 1287 |
1288 The following additional properties are recognized if TYPE is 'chain: | 1288 The following additional properties are recognized if TYPE is `chain': |
1289 | 1289 |
1290 'chain | 1290 `chain' |
1291 List of coding systems to be chained together, in decoding order. | 1291 List of coding systems to be chained together, in decoding order. |
1292 | 1292 |
1293 'canonicalize-after-coding | 1293 `canonicalize-after-coding' |
1294 Coding system to be returned by the detector routines in place of | 1294 Coding system to be returned by the detector routines in place of |
1295 this coding system. | 1295 this coding system. |
1296 | 1296 |
1297 | 1297 |
1298 | 1298 |
1299 The following additional properties are recognized if TYPE is 'unicode: | 1299 The following additional properties are recognized if TYPE is `unicode': |
1300 | 1300 |
1301 'type | 1301 `type' |
1302 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not | 1302 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not |
1303 yet implemented). `utf-16' is the basic two-byte encoding; | 1303 yet implemented). `utf-16' is the basic two-byte encoding; |
1304 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible | 1304 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible |
1305 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using | 1305 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using |
1306 only characters that will safely pass through all mail gateways. | 1306 only characters that will safely pass through all mail gateways. |
1307 [[ This should be \"transformation format\". There should also be | 1307 [[ This should be \"transformation format\". There should also be |
1308 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]] | 1308 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]] |
1309 | 1309 |
1310 'little-endian | 1310 `little-endian' |
1311 If non-nil, `utf-16' and `ucs-4' will write out the groups of two | 1311 If non-nil, `utf-16' and `ucs-4' will write out the groups of two |
1312 or four bytes little-endian instead of big-endian. This is required, | 1312 or four bytes little-endian instead of big-endian. This is required, |
1313 for example, under Windows. | 1313 for example, under Windows. |
1314 | 1314 |
1315 'need-bom | 1315 `need-bom' |
1316 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be | 1316 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be |
1317 written out at the beginning of the data. This serves both to | 1317 written out at the beginning of the data. This serves both to |
1318 identify the endianness of the following data and to mark the | 1318 identify the endianness of the following data and to mark the |
1319 data as Unicode (at least, this is how Windows uses it). | 1319 data as Unicode (at least, this is how Windows uses it). |
1320 [[ The correct term is \"signature\", since this technique may also | 1320 [[ The correct term is \"signature\", since this technique may also |
1321 be used with UTF-8. That is the term used in the standard. ]] | 1321 be used with UTF-8. That is the term used in the standard. ]] |
1322 | 1322 |
1323 | 1323 |
1324 The following additional properties are recognized if TYPE is | 1324 The following additional properties are recognized if TYPE is |
1325 'mswindows-multibyte: | 1325 `mswindows-multibyte': |
1326 | 1326 |
1327 'code-page | 1327 `code-page' |
1328 Either a number (specifying a particular code page) or one of the | 1328 Either a number (specifying a particular code page) or one of the |
1329 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI, | 1329 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI, |
1330 OEM, Macintosh, or EBCDIC code page associated with a particular | 1330 OEM, Macintosh, or EBCDIC code page associated with a particular |
1331 locale (given by the `locale' property). NOTE: EBCDIC code pages | 1331 locale (given by the `locale' property). NOTE: EBCDIC code pages |
1332 only exist in Windows 2000 and later. | 1332 only exist in Windows 2000 and later. |
1333 | 1333 |
1334 'locale | 1334 `locale' |
1335 If `code-page' is a symbol, this specifies the locale whose code | 1335 If `code-page' is a symbol, this specifies the locale whose code |
1336 page of the corresponding type should be used. This should be | 1336 page of the corresponding type should be used. This should be |
1337 one of the following: A cons of two strings, (LANGUAGE | 1337 one of the following: A cons of two strings, (LANGUAGE |
1338 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a | 1338 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a |
1339 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is | 1339 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is |
1342 `mswindows-current-locale', `mswindows-user-default-locale', or | 1342 `mswindows-current-locale', `mswindows-user-default-locale', or |
1343 `mswindows-system-default-locale', respectively. | 1343 `mswindows-system-default-locale', respectively. |
1344 | 1344 |
1345 | 1345 |
1346 | 1346 |
1347 The following additional properties are recognized if TYPE is 'undecided: | 1347 The following additional properties are recognized if TYPE is `undecided': |
1348 [[ Doesn't GNU use \"detect-*\" for the following two? ]] | 1348 [[ Doesn't GNU use \"detect-*\" for the following two? ]] |
1349 | 1349 |
1350 'do-eol | 1350 `do-eol' |
1351 Do EOL detection. | 1351 Do EOL detection. |
1352 | 1352 |
1353 'do-coding | 1353 `do-coding' |
1354 Do encoding detection. | 1354 Do encoding detection. |
1355 | 1355 |
1356 'coding-system | 1356 `coding-system' |
1357 If encoding detection is not done, use the specified coding system | 1357 If encoding detection is not done, use the specified coding system |
1358 to do decoding. This is used internally when implementing coding | 1358 to do decoding. This is used internally when implementing coding |
1359 systems with an EOL type that specifies autodetection (the default), | 1359 systems with an EOL type that specifies autodetection (the default), |
1360 so that the detector routines return the proper subsidiary. | 1360 so that the detector routines return the proper subsidiary. |
1361 | 1361 |
1362 | 1362 |
1363 | 1363 |
1364 The following additional property is recognized if TYPE is 'gzip: | 1364 The following additional property is recognized if TYPE is `gzip': |
1365 | 1365 |
1366 'level | 1366 `level' |
1367 Compression level: 0 through 9, or `default' (currently 6). | 1367 Compression level: 0 through 9, or `default' (currently 6). |
1368 | 1368 |
1369 */ | 1369 */ |
1370 (name, type, description, props)) | 1370 (name, type, description, props)) |
1371 { | 1371 { |
4062 } | 4062 } |
4063 | 4063 |
4064 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* | 4064 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* |
4065 Detect coding system of the text in the region between START and END. | 4065 Detect coding system of the text in the region between START and END. |
4066 Return a list of possible coding systems ordered by priority. | 4066 Return a list of possible coding systems ordered by priority. |
4067 If only ASCII characters are found, return 'undecided or one of | 4067 If only ASCII characters are found, return `undecided' or one of |
4068 its subsidiary coding systems according to a detected end-of-line | 4068 its subsidiary coding systems according to a detected end-of-line |
4069 type. Optional arg BUFFER defaults to the current buffer. | 4069 type. Optional arg BUFFER defaults to the current buffer. |
4070 */ | 4070 */ |
4071 (start, end, buffer)) | 4071 (start, end, buffer)) |
4072 { | 4072 { |