comparison src/file-coding.c @ 3025:facf3239ba30

[xemacs-hg @ 2005-10-25 11:16:19 by ben] rename new->new_, convert 'foo to `foo' EmacsFrame.c, ExternalClient.c, ExternalShell.c, chartab.c, cmdloop.c, compiler.h, console.c, database.c, device-msw.c, device-x.c, device.c, doc.c, dragdrop.c, eval.c, event-msw.c, event-stream.c, events.c, extents.c, file-coding.c, fns.c, frame-tty.c, frame.c, gpmevent.c, gutter.c, hash.c, imgproc.c, indent.c, keymap.c, lisp-union.h, macros.c, malloc.c, marker.c, menubar-x.c, menubar.c, mule-charset.c, number.c, process.c, profile.h, ralloc.c, redisplay.c, select-common.h, select.c, syntax.c, sysfile.h, sysproc.h, systime.h, syswindows.h, toolbar.c, tooltalk.c, tparam.c, unexaix.c, unexalpha.c, unexconvex.c, unexec.c, unexhp9k800.c, unexmips.c, unicode.c, window.c: new -> new_. 'foo -> `foo'. lwlib-internal.h: redo assert macros to follow lisp.h and not trigger warnings. lwlib.c, xlwtabs.c: new -> new_.
author ben
date Tue, 25 Oct 2005 11:16:49 +0000
parents b7f26b2f78bd
children 77f5a5135b3a 3742ea8250b5
comparison
equal deleted inserted replaced
3024:b7f26b2f78bd 3025:facf3239ba30
1 /* Text encoding conversion functions; coding-system object. 1 /* Text encoding conversion functions; coding-system object.
2 #### rename me to coding-system.c or coding.c 2 #### rename me to coding-system.c or coding.c
3 Copyright (C) 1991, 1995 Free Software Foundation, Inc. 3 Copyright (C) 1991, 1995 Free Software Foundation, Inc.
4 Copyright (C) 1995 Sun Microsystems, Inc. 4 Copyright (C) 1995 Sun Microsystems, Inc.
5 Copyright (C) 2000, 2001, 2002, 2003 Ben Wing. 5 Copyright (C) 2000, 2001, 2002, 2003, 2005 Ben Wing.
6 6
7 This file is part of XEmacs. 7 This file is part of XEmacs.
8 8
9 XEmacs is free software; you can redistribute it and/or modify it 9 XEmacs is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published by the 10 under the terms of the GNU General Public License as published by the
404 } 404 }
405 405
406 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /* 406 DEFUN ("valid-coding-system-type-p", Fvalid_coding_system_type_p, 1, 1, 0, /*
407 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid. 407 Given a CODING-SYSTEM-TYPE, return non-nil if it is valid.
408 Valid types depend on how XEmacs was compiled but may include 408 Valid types depend on how XEmacs was compiled but may include
409 'undecided, 'chain, 'integer, 'ccl, 'iso2022, 'big5, 'shift-jis, 409 `undecided', `chain', `integer', `ccl', `iso2022', `big5', `shift-jis',
410 'utf-16, 'ucs-4, 'utf-8, etc. 410 `utf-16', `ucs-4', `utf-8', etc.
411 */ 411 */
412 (coding_system_type)) 412 (coding_system_type))
413 { 413 {
414 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil; 414 return valid_coding_system_type_p (coding_system_type) ? Qt : Qnil;
415 } 415 }
1042 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /* 1042 DEFUN ("make-coding-system", Fmake_coding_system, 2, 4, 0, /*
1043 Register symbol NAME as a coding system. 1043 Register symbol NAME as a coding system.
1044 1044
1045 TYPE describes the conversion method used and should be one of 1045 TYPE describes the conversion method used and should be one of
1046 1046
1047 nil or 'undecided 1047 nil or `undecided'
1048 Automatic conversion. XEmacs attempts to detect the coding system 1048 Automatic conversion. XEmacs attempts to detect the coding system
1049 used in the file. 1049 used in the file.
1050 'chain 1050 `chain'
1051 Chain two or more coding systems together to make a combination coding 1051 Chain two or more coding systems together to make a combination coding
1052 system. 1052 system.
1053 'no-conversion 1053 `no-conversion'
1054 No conversion. Use this for binary files and such. On output, 1054 No conversion. Use this for binary files and such. On output,
1055 graphic characters that are not in ASCII or Latin-1 will be 1055 graphic characters that are not in ASCII or Latin-1 will be
1056 replaced by a ?. (For a no-conversion-encoded buffer, these 1056 replaced by a ?. (For a no-conversion-encoded buffer, these
1057 characters will only be present if you explicitly insert them.) 1057 characters will only be present if you explicitly insert them.)
1058 'convert-eol 1058 `convert-eol'
1059 Convert CRLF sequences or CR to LF. 1059 Convert CRLF sequences or CR to LF.
1060 'shift-jis 1060 `shift-jis'
1061 Shift-JIS (a Japanese encoding commonly used in PC operating systems). 1061 Shift-JIS (a Japanese encoding commonly used in PC operating systems).
1062 'unicode 1062 `unicode'
1063 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.). 1063 Any Unicode encoding (UCS-4, UTF-8, UTF-16, etc.).
1064 'mswindows-unicode-to-multibyte 1064 `mswindows-unicode-to-multibyte'
1065 (MS Windows only) Converts from Windows Unicode to Windows Multibyte 1065 (MS Windows only) Converts from Windows Unicode to Windows Multibyte
1066 (any code page encoding) upon encoding, and the other way upon decoding. 1066 (any code page encoding) upon encoding, and the other way upon decoding.
1067 'mswindows-multibyte 1067 `mswindows-multibyte'
1068 Converts to or from Windows Multibyte (any code page encoding). 1068 Converts to or from Windows Multibyte (any code page encoding).
1069 This is resolved into a chain of `mswindows-unicode' and 1069 This is resolved into a chain of `mswindows-unicode' and
1070 `mswindows-unicode-to-multibyte'. 1070 `mswindows-unicode-to-multibyte'.
1071 'iso2022 1071 `iso2022'
1072 Any ISO2022-compliant encoding. Among other things, this includes 1072 Any ISO2022-compliant encoding. Among other things, this includes
1073 JIS (the Japanese encoding commonly used for e-mail), EUC (the 1073 JIS (the Japanese encoding commonly used for e-mail), EUC (the
1074 standard Unix encoding for Japanese and other languages), and 1074 standard Unix encoding for Japanese and other languages), and
1075 Compound Text (the encoding used in X11). You can specify more 1075 Compound Text (the encoding used in X11). You can specify more
1076 specific information about the conversion with the PROPS argument. 1076 specific information about the conversion with the PROPS argument.
1077 'big5 1077 `big5'
1078 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan). 1078 Big5 (the encoding commonly used for Mandarin Chinese in Taiwan).
1079 'ccl 1079 `ccl'
1080 The conversion is performed using a user-written pseudo-code 1080 The conversion is performed using a user-written pseudo-code
1081 program. CCL (Code Conversion Language) is the name of this 1081 program. CCL (Code Conversion Language) is the name of this
1082 pseudo-code. 1082 pseudo-code.
1083 'gzip 1083 `gzip'
1084 GZIP compression format. 1084 GZIP compression format.
1085 'internal 1085 `internal'
1086 Write out or read in the raw contents of the memory representing 1086 Write out or read in the raw contents of the memory representing
1087 the buffer's text. This is primarily useful for debugging 1087 the buffer's text. This is primarily useful for debugging
1088 purposes, and is only enabled when XEmacs has been compiled with 1088 purposes, and is only enabled when XEmacs has been compiled with
1089 DEBUG_XEMACS defined (via the --debug configure option). 1089 DEBUG_XEMACS defined (via the --debug configure option).
1090 WARNING: Reading in a file using 'internal conversion can result 1090 WARNING: Reading in a file using `internal' conversion can result
1091 in an internal inconsistency in the memory representing a 1091 in an internal inconsistency in the memory representing a
1092 buffer's text, which will produce unpredictable results and may 1092 buffer's text, which will produce unpredictable results and may
1093 cause XEmacs to crash. Under normal circumstances you should 1093 cause XEmacs to crash. Under normal circumstances you should
1094 never use 'internal conversion. 1094 never use `internal' conversion.
1095 1095
1096 DESCRIPTION is a short English phrase describing the coding system, 1096 DESCRIPTION is a short English phrase describing the coding system,
1097 suitable for use as a menu item. (See also the `documentation' property 1097 suitable for use as a menu item. (See also the `documentation' property
1098 below.) 1098 below.)
1099 1099
1100 PROPS is a property list, describing the specific nature of the 1100 PROPS is a property list, describing the specific nature of the
1101 character set. Recognized properties are: 1101 character set. Recognized properties are:
1102 1102
1103 'mnemonic 1103 `mnemonic'
1104 String to be displayed in the modeline when this coding system is 1104 String to be displayed in the modeline when this coding system is
1105 active. 1105 active.
1106 1106
1107 'documentation 1107 `documentation'
1108 Detailed documentation on the coding system. 1108 Detailed documentation on the coding system.
1109 1109
1110 'eol-type 1110 `eol-type'
1111 End-of-line conversion to be used. It should be one of 1111 End-of-line conversion to be used. It should be one of
1112 1112
1113 nil 1113 nil
1114 Automatically detect the end-of-line type (LF, CRLF, 1114 Automatically detect the end-of-line type (LF, CRLF,
1115 or CR). Also generate subsidiary coding systems named 1115 or CR). Also generate subsidiary coding systems named
1116 `NAME-unix', `NAME-dos', and `NAME-mac', that are 1116 `NAME-unix', `NAME-dos', and `NAME-mac', that are
1117 identical to this coding system but have an EOL-TYPE 1117 identical to this coding system but have an EOL-TYPE
1118 value of 'lf, 'crlf, and 'cr, respectively. 1118 value of `lf', `crlf', and `cr', respectively.
1119 'lf 1119 `lf'
1120 The end of a line is marked externally using ASCII LF. 1120 The end of a line is marked externally using ASCII LF.
1121 Since this is also the way that XEmacs represents an 1121 Since this is also the way that XEmacs represents an
1122 end-of-line internally, specifying this option results 1122 end-of-line internally, specifying this option results
1123 in no end-of-line conversion. This is the standard 1123 in no end-of-line conversion. This is the standard
1124 format for Unix text files. 1124 format for Unix text files.
1125 'crlf 1125 `crlf'
1126 The end of a line is marked externally using ASCII 1126 The end of a line is marked externally using ASCII
1127 CRLF. This is the standard format for MS-DOS text 1127 CRLF. This is the standard format for MS-DOS text
1128 files. 1128 files.
1129 'cr 1129 `cr'
1130 The end of a line is marked externally using ASCII CR. 1130 The end of a line is marked externally using ASCII CR.
1131 This is the standard format for Macintosh text files. 1131 This is the standard format for Macintosh text files.
1132 t 1132 t
1133 Automatically detect the end-of-line type but do not 1133 Automatically detect the end-of-line type but do not
1134 generate subsidiary coding systems. (This value is 1134 generate subsidiary coding systems. (This value is
1135 converted to nil when stored internally, and 1135 converted to nil when stored internally, and
1136 `coding-system-property' will return nil.) 1136 `coding-system-property' will return nil.)
1137 1137
1138 'post-read-conversion 1138 `post-read-conversion'
1139 The value is a function to call after some text is inserted and 1139 The value is a function to call after some text is inserted and
1140 decoded by the coding system itself and before any functions in 1140 decoded by the coding system itself and before any functions in
1141 `after-change-functions' are called. (#### Not actually true in 1141 `after-change-functions' are called. (#### Not actually true in
1142 XEmacs. `after-change-functions' will be called twice if 1142 XEmacs. `after-change-functions' will be called twice if
1143 `post-read-conversion' changes something.) The argument of this 1143 `post-read-conversion' changes something.) The argument of this
1144 function is the same as for a function in 1144 function is the same as for a function in
1145 `after-insert-file-functions', i.e. LENGTH of the text inserted, 1145 `after-insert-file-functions', i.e. LENGTH of the text inserted,
1146 with point at the head of the text to be decoded. 1146 with point at the head of the text to be decoded.
1147 1147
1148 'pre-write-conversion 1148 `pre-write-conversion'
1149 The value is a function to call after all functions in 1149 The value is a function to call after all functions in
1150 `write-region-annotate-functions' and `buffer-file-format' are 1150 `write-region-annotate-functions' and `buffer-file-format' are
1151 called, and before the text is encoded by the coding system itself. 1151 called, and before the text is encoded by the coding system itself.
1152 The arguments to this function are the same as those of a function 1152 The arguments to this function are the same as those of a function
1153 in `write-region-annotate-functions', i.e. FROM and TO, specifying 1153 in `write-region-annotate-functions', i.e. FROM and TO, specifying
1156 1156
1157 1157
1158 The following properties are allowed for FSF compatibility but currently 1158 The following properties are allowed for FSF compatibility but currently
1159 ignored: 1159 ignored:
1160 1160
1161 'translation-table-for-decode 1161 `translation-table-for-decode'
1162 The value is a translation table to be applied on decoding. See 1162 The value is a translation table to be applied on decoding. See
1163 the function `make-translation-table' for the format of translation 1163 the function `make-translation-table' for the format of translation
1164 table. This is not applicable to CCL-based coding systems. 1164 table. This is not applicable to CCL-based coding systems.
1165 1165
1166 'translation-table-for-encode 1166 `translation-table-for-encode'
1167 The value is a translation table to be applied on encoding. This is 1167 The value is a translation table to be applied on encoding. This is
1168 not applicable to CCL-based coding systems. 1168 not applicable to CCL-based coding systems.
1169 1169
1170 'safe-chars 1170 `safe-chars'
1171 The value is a char table. If a character has non-nil value in it, 1171 The value is a char table. If a character has non-nil value in it,
1172 the character is safely supported by the coding system. This 1172 the character is safely supported by the coding system. This
1173 overrides the specification of safe-charsets. 1173 overrides the specification of safe-charsets.
1174 1174
1175 'safe-charsets 1175 `safe-charsets'
1176 The value is a list of charsets safely supported by the coding 1176 The value is a list of charsets safely supported by the coding
1177 system. The value t means that all charsets Emacs handles are 1177 system. The value t means that all charsets Emacs handles are
1178 supported. Even if some charset is not in this list, it doesn't 1178 supported. Even if some charset is not in this list, it doesn't
1179 mean that the charset can't be encoded in the coding system; 1179 mean that the charset can't be encoded in the coding system;
1180 it just means that some other receiver of text encoded 1180 it just means that some other receiver of text encoded
1181 in the coding system won't be able to handle that charset. 1181 in the coding system won't be able to handle that charset.
1182 1182
1183 'mime-charset 1183 `mime-charset'
1184 The value is a symbol of which name is `MIME-charset' parameter of 1184 The value is a symbol of which name is `MIME-charset' parameter of
1185 the coding system. 1185 the coding system.
1186 1186
1187 'valid-codes (meaningful only for a coding system based on CCL) 1187 `valid-codes' (meaningful only for a coding system based on CCL)
1188 The value is a list to indicate valid byte ranges of the encoded 1188 The value is a list to indicate valid byte ranges of the encoded
1189 file. Each element of the list is an integer or a cons of integer. 1189 file. Each element of the list is an integer or a cons of integer.
1190 In the former case, the integer value is a valid byte code. In the 1190 In the former case, the integer value is a valid byte code. In the
1191 latter case, the integers specifies the range of valid byte codes. 1191 latter case, the integers specifies the range of valid byte codes.
1192 1192
1193 1193
1194 1194
1195 The following additional property is recognized if TYPE is 'convert-eol: 1195 The following additional property is recognized if TYPE is `convert-eol':
1196 1196
1197 'subtype 1197 `subtype'
1198 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding, 1198 One of `lf', `crlf', `cr' or nil (for autodetection). When decoding,
1199 the corresponding sequence will be converted to LF. When encoding, 1199 the corresponding sequence will be converted to LF. When encoding,
1200 the opposite happens. This coding system converts characters to 1200 the opposite happens. This coding system converts characters to
1201 characters. 1201 characters.
1202 1202
1203 1203
1204 1204
1205 The following additional properties are recognized if TYPE is 'iso2022: 1205 The following additional properties are recognized if TYPE is `iso2022':
1206 1206
1207 'charset-g0 1207 `charset-g0'
1208 'charset-g1 1208 `charset-g1'
1209 'charset-g2 1209 `charset-g2'
1210 'charset-g3 1210 `charset-g3'
1211 The character set initially designated to the G0 - G3 registers. 1211 The character set initially designated to the G0 - G3 registers.
1212 The value should be one of 1212 The value should be one of
1213 1213
1214 -- A charset object (designate that character set) 1214 -- A charset object (designate that character set)
1215 -- nil (do not ever use this register) 1215 -- nil (do not ever use this register)
1216 -- t (no character set is initially designated to 1216 -- t (no character set is initially designated to
1217 the register, but may be later on; this automatically 1217 the register, but may be later on; this automatically
1218 sets the corresponding `force-g*-on-output' property) 1218 sets the corresponding `force-g*-on-output' property)
1219 1219
1220 'force-g0-on-output 1220 `force-g0-on-output'
1221 'force-g1-on-output 1221 `force-g1-on-output'
1222 'force-g2-on-output 1222 `force-g2-on-output'
1223 'force-g2-on-output 1223 `force-g2-on-output'
1224 If non-nil, send an explicit designation sequence on output before 1224 If non-nil, send an explicit designation sequence on output before
1225 using the specified register. 1225 using the specified register.
1226 1226
1227 'short 1227 `short'
1228 If non-nil, use the short forms "ESC $ @", "ESC $ A", and 1228 If non-nil, use the short forms "ESC $ @", "ESC $ A", and
1229 "ESC $ B" on output in place of the full designation sequences 1229 "ESC $ B" on output in place of the full designation sequences
1230 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B". 1230 "ESC $ ( @", "ESC $ ( A", and "ESC $ ( B".
1231 1231
1232 'no-ascii-eol 1232 `no-ascii-eol'
1233 If non-nil, don't designate ASCII to G0 at each end of line on output. 1233 If non-nil, don't designate ASCII to G0 at each end of line on output.
1234 Setting this to non-nil also suppresses other state-resetting that 1234 Setting this to non-nil also suppresses other state-resetting that
1235 normally happens at the end of a line. 1235 normally happens at the end of a line.
1236 1236
1237 'no-ascii-cntl 1237 `no-ascii-cntl'
1238 If non-nil, don't designate ASCII to G0 before control chars on output. 1238 If non-nil, don't designate ASCII to G0 before control chars on output.
1239 1239
1240 'seven 1240 `seven'
1241 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit 1241 If non-nil, use 7-bit environment on output. Otherwise, use 8-bit
1242 environment. 1242 environment.
1243 1243
1244 'lock-shift 1244 `lock-shift'
1245 If non-nil, use locking-shift (SO/SI) instead of single-shift 1245 If non-nil, use locking-shift (SO/SI) instead of single-shift
1246 or designation by escape sequence. 1246 or designation by escape sequence.
1247 1247
1248 'no-iso6429 1248 `no-iso6429'
1249 If non-nil, don't use ISO6429's direction specification. 1249 If non-nil, don't use ISO6429's direction specification.
1250 1250
1251 'escape-quoted 1251 `escape-quoted'
1252 If non-nil, literal control characters that are the same as 1252 If non-nil, literal control characters that are the same as
1253 the beginning of a recognized ISO2022 or ISO6429 escape sequence 1253 the beginning of a recognized ISO2022 or ISO6429 escape sequence
1254 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E), 1254 (in particular, ESC (0x1B), SO (0x0E), SI (0x0F), SS2 (0x8E),
1255 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character 1255 SS3 (0x8F), and CSI (0x9B)) are "quoted" with an escape character
1256 so that they can be properly distinguished from an escape sequence. 1256 so that they can be properly distinguished from an escape sequence.
1259 is a good choice for a quoting character because there are no 1259 is a good choice for a quoting character because there are no
1260 escape sequences whose second byte is a character from the Control-0 1260 escape sequences whose second byte is a character from the Control-0
1261 or Control-1 character sets; this is explicitly disallowed by the 1261 or Control-1 character sets; this is explicitly disallowed by the
1262 ISO2022 standard. 1262 ISO2022 standard.
1263 1263
1264 'input-charset-conversion 1264 `input-charset-conversion'
1265 A list of conversion specifications, specifying conversion of 1265 A list of conversion specifications, specifying conversion of
1266 characters in one charset to another when decoding is performed. 1266 characters in one charset to another when decoding is performed.
1267 Each specification is a list of two elements: the source charset, 1267 Each specification is a list of two elements: the source charset,
1268 and the destination charset. 1268 and the destination charset.
1269 1269
1270 'output-charset-conversion 1270 `output-charset-conversion'
1271 A list of conversion specifications, specifying conversion of 1271 A list of conversion specifications, specifying conversion of
1272 characters in one charset to another when encoding is performed. 1272 characters in one charset to another when encoding is performed.
1273 The form of each specification is the same as for 1273 The form of each specification is the same as for
1274 'input-charset-conversion. 1274 `input-charset-conversion'.
1275 1275
1276 1276
1277 1277
1278 The following additional properties are recognized (and required) 1278 The following additional properties are recognized (and required)
1279 if TYPE is 'ccl: 1279 if TYPE is `ccl':
1280 1280
1281 'decode 1281 `decode'
1282 CCL program used for decoding (converting to internal format). 1282 CCL program used for decoding (converting to internal format).
1283 1283
1284 'encode 1284 `encode'
1285 CCL program used for encoding (converting to external format). 1285 CCL program used for encoding (converting to external format).
1286 1286
1287 1287
1288 The following additional properties are recognized if TYPE is 'chain: 1288 The following additional properties are recognized if TYPE is `chain':
1289 1289
1290 'chain 1290 `chain'
1291 List of coding systems to be chained together, in decoding order. 1291 List of coding systems to be chained together, in decoding order.
1292 1292
1293 'canonicalize-after-coding 1293 `canonicalize-after-coding'
1294 Coding system to be returned by the detector routines in place of 1294 Coding system to be returned by the detector routines in place of
1295 this coding system. 1295 this coding system.
1296 1296
1297 1297
1298 1298
1299 The following additional properties are recognized if TYPE is 'unicode: 1299 The following additional properties are recognized if TYPE is `unicode':
1300 1300
1301 'type 1301 `type'
1302 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not 1302 One of `utf-16', `utf-8', `ucs-4', or `utf-7' (the latter is not
1303 yet implemented). `utf-16' is the basic two-byte encoding; 1303 yet implemented). `utf-16' is the basic two-byte encoding;
1304 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible 1304 `ucs-4' is the four-byte encoding; `utf-8' is an ASCII-compatible
1305 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using 1305 variable-width 8-bit encoding; `utf-7' is a 7-bit encoding using
1306 only characters that will safely pass through all mail gateways. 1306 only characters that will safely pass through all mail gateways.
1307 [[ This should be \"transformation format\". There should also be 1307 [[ This should be \"transformation format\". There should also be
1308 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]] 1308 `ucs-2' (or `bmp' -- no surrogates) and `utf-32' (range checked). ]]
1309 1309
1310 'little-endian 1310 `little-endian'
1311 If non-nil, `utf-16' and `ucs-4' will write out the groups of two 1311 If non-nil, `utf-16' and `ucs-4' will write out the groups of two
1312 or four bytes little-endian instead of big-endian. This is required, 1312 or four bytes little-endian instead of big-endian. This is required,
1313 for example, under Windows. 1313 for example, under Windows.
1314 1314
1315 'need-bom 1315 `need-bom'
1316 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be 1316 If non-nil, a byte order mark (BOM, or Unicode FFFE) should be
1317 written out at the beginning of the data. This serves both to 1317 written out at the beginning of the data. This serves both to
1318 identify the endianness of the following data and to mark the 1318 identify the endianness of the following data and to mark the
1319 data as Unicode (at least, this is how Windows uses it). 1319 data as Unicode (at least, this is how Windows uses it).
1320 [[ The correct term is \"signature\", since this technique may also 1320 [[ The correct term is \"signature\", since this technique may also
1321 be used with UTF-8. That is the term used in the standard. ]] 1321 be used with UTF-8. That is the term used in the standard. ]]
1322 1322
1323 1323
1324 The following additional properties are recognized if TYPE is 1324 The following additional properties are recognized if TYPE is
1325 'mswindows-multibyte: 1325 `mswindows-multibyte':
1326 1326
1327 'code-page 1327 `code-page'
1328 Either a number (specifying a particular code page) or one of the 1328 Either a number (specifying a particular code page) or one of the
1329 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI, 1329 symbols `ansi', `oem', `mac', or `ebcdic', specifying the ANSI,
1330 OEM, Macintosh, or EBCDIC code page associated with a particular 1330 OEM, Macintosh, or EBCDIC code page associated with a particular
1331 locale (given by the `locale' property). NOTE: EBCDIC code pages 1331 locale (given by the `locale' property). NOTE: EBCDIC code pages
1332 only exist in Windows 2000 and later. 1332 only exist in Windows 2000 and later.
1333 1333
1334 'locale 1334 `locale'
1335 If `code-page' is a symbol, this specifies the locale whose code 1335 If `code-page' is a symbol, this specifies the locale whose code
1336 page of the corresponding type should be used. This should be 1336 page of the corresponding type should be used. This should be
1337 one of the following: A cons of two strings, (LANGUAGE 1337 one of the following: A cons of two strings, (LANGUAGE
1338 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a 1338 . SUBLANGUAGE) (see `mswindows-set-current-locale'); a string (a
1339 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is 1339 language; SUBLANG_DEFAULT, i.e. the default sublanguage, is
1342 `mswindows-current-locale', `mswindows-user-default-locale', or 1342 `mswindows-current-locale', `mswindows-user-default-locale', or
1343 `mswindows-system-default-locale', respectively. 1343 `mswindows-system-default-locale', respectively.
1344 1344
1345 1345
1346 1346
1347 The following additional properties are recognized if TYPE is 'undecided: 1347 The following additional properties are recognized if TYPE is `undecided':
1348 [[ Doesn't GNU use \"detect-*\" for the following two? ]] 1348 [[ Doesn't GNU use \"detect-*\" for the following two? ]]
1349 1349
1350 'do-eol 1350 `do-eol'
1351 Do EOL detection. 1351 Do EOL detection.
1352 1352
1353 'do-coding 1353 `do-coding'
1354 Do encoding detection. 1354 Do encoding detection.
1355 1355
1356 'coding-system 1356 `coding-system'
1357 If encoding detection is not done, use the specified coding system 1357 If encoding detection is not done, use the specified coding system
1358 to do decoding. This is used internally when implementing coding 1358 to do decoding. This is used internally when implementing coding
1359 systems with an EOL type that specifies autodetection (the default), 1359 systems with an EOL type that specifies autodetection (the default),
1360 so that the detector routines return the proper subsidiary. 1360 so that the detector routines return the proper subsidiary.
1361 1361
1362 1362
1363 1363
1364 The following additional property is recognized if TYPE is 'gzip: 1364 The following additional property is recognized if TYPE is `gzip':
1365 1365
1366 'level 1366 `level'
1367 Compression level: 0 through 9, or `default' (currently 6). 1367 Compression level: 0 through 9, or `default' (currently 6).
1368 1368
1369 */ 1369 */
1370 (name, type, description, props)) 1370 (name, type, description, props))
1371 { 1371 {
4062 } 4062 }
4063 4063
4064 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /* 4064 DEFUN ("detect-coding-region", Fdetect_coding_region, 2, 3, 0, /*
4065 Detect coding system of the text in the region between START and END. 4065 Detect coding system of the text in the region between START and END.
4066 Return a list of possible coding systems ordered by priority. 4066 Return a list of possible coding systems ordered by priority.
4067 If only ASCII characters are found, return 'undecided or one of 4067 If only ASCII characters are found, return `undecided' or one of
4068 its subsidiary coding systems according to a detected end-of-line 4068 its subsidiary coding systems according to a detected end-of-line
4069 type. Optional arg BUFFER defaults to the current buffer. 4069 type. Optional arg BUFFER defaults to the current buffer.
4070 */ 4070 */
4071 (start, end, buffer)) 4071 (start, end, buffer))
4072 { 4072 {