comparison src/unicode.c @ 1318:b531bf8658e9

[xemacs-hg @ 2003-02-21 06:56:46 by ben] redisplay fixes et al. PROBLEMS: Add comment about Cygwin, unexec and sysmalloc. Move some non-general stuff out of general. Make a section for x86. configure.in: Add check for broken alloca in funcalls. mule/mule-cmds.el: Alias file-name to native not vice-versa. Do set EOL of native but not of process output to fix various problems and be consistent with code-init.el. code-cmds.el: Return a name not a coding system. code-init.el: Reindent. Remove `file-name' since it should always be the same as native. unicode.el: Rename to load-unicode-mapping-table as suggested by the anonymous (but rather Turnbullian) comment in unicode.c. xemacs.dsp: Add /k to default build. alloc.c: Make gc_currently_forbidden static. config.h.in, lisp.h: Move some stuff to lisp.h. console-gtk.h, console-impl.h, console-msw.h, console-x.h, event-Xt.c, event-msw.c, redisplay-gtk.c, redisplay-msw.c, redisplay-output.c, redisplay-x.c, gtk-xemacs.c: Remove duplicated code to redraw exposed area. Add deadbox method needed by the generalized redraw code. Defer redrawing if already in redisplay. frame-msw.c, event-stream.c, frame.c: Add comments about calling Lisp. debug.c, general-slots.h: Move generalish symbols to general-slots.h. doprnt.c: reindent. lisp.h, dynarr.c: Add debug code for locking a dynarr to catch invalid mods. Use in redisplay.c. eval.c: file-coding.c: Define file-name as alias for native not vice-versa. frame-gtk.c, frame-x.c: Move Qwindow_id to general-slots. dialog-msw.c, glyphs-gtk.c, glyphs-msw.c, glyphs-widget.c, glyphs-x.c, gui.c, gui.h, menubar-msw.c, menubar.c: Ensure that various glyph functions that eval within redisplay protect the evals. Same for calls to internal_equal(). Modify various functions, e.g. gui_item_*(), to protect evals within redisplay, taking an in_redisplay parameter if it's possible for them to be called both inside and outside of redisplay. gutter.c: Defer specifier-changed updating till after redisplay, if necessary, since we need to enter redisplay to do it. gutter.c: Do nothing if in redisplay. lisp.h: Add version of alloca() for use in function calls. lisp.h: Add XCAD[D+]R up to 6 D's, and aliases X1ST, X2ND, etc. frame.c, frame.h, redisplay.c, redisplay.h, signal.c, toolbar.c: Redo critical-section code and move from frame.c to redisplay.c. Require that every place inside of redisplay catch errors itself, not at the edge of the critical section (thereby bypassing the rest of redisplay and leaving things in an inconsistent state). Introduce separate means of holding frame-size changes without entering a complete critical section. Introduce "post-redisplay" methods for deferring things till after redisplay. Abort if we enter redisplay reentrantly. Disable all quit checking in redisplay since it's too dangerous. Ensure that all calls to QUIT trigger an abort if unprotected. redisplay.c, scrollbar-gtk.c, scrollbar-x.c, scrollbar.c: Create enter/exit_redisplay_critical_section_maybe() for code that needs to ensure it's in a critical section but doesn't interfere with an existing critical section. sysdep.c: Use _wexecve() when under Windows NT for Unicode correctness. text.c, text.h: Add new_dfc() functions, which return an alloca()ed value rather than requiring an lvalue. (Not really used yet; used in another workspace, to come.) Add some macros for SIZED_EXTERNAL. Update the encoding aliases after involved scrutinization of the X manual. unicode.c: Answer the anonymous but suspiciously Turnbullian questions. Rename parse-unicode-translation-table to load-unicode-mapping-table, as suggested.
author ben
date Fri, 21 Feb 2003 06:57:21 +0000
parents 70921960b980
children 969b7290edca
comparison
equal deleted inserted replaced
1317:d9d08dc5e617 1318:b531bf8658e9
52 Inquiring Minds Want To Know Dept: does the above WARNING mean that 52 Inquiring Minds Want To Know Dept: does the above WARNING mean that
53 _if_ it happens, then it will signal error, or then it will do 53 _if_ it happens, then it will signal error, or then it will do
54 something evil and unpredictable? Signaling an error is OK: for 54 something evil and unpredictable? Signaling an error is OK: for
55 all national standards, the national to Unicode map is an inclusion 55 all national standards, the national to Unicode map is an inclusion
56 (1-to-1). Any character set that does not behave that way is 56 (1-to-1). Any character set that does not behave that way is
57 broken according to the Unicode standard. */ 57 broken according to the Unicode standard.
58
59 Answer: You will get an abort(), since the purpose of the sledgehammer
60 routines is self-checking. The above problem with non-1-to-1 mapping
61 occurs in the Big5 tables, as provided by the Unicode Consortium. */
58 62
59 /* #define SLEDGEHAMMER_CHECK_UNICODE */ 63 /* #define SLEDGEHAMMER_CHECK_UNICODE */
60 64
61 /* We currently use the following format for tables: 65 /* We currently use the following format for tables:
62 66
162 Lisp_Object Qutf_8_bom; 166 Lisp_Object Qutf_8_bom;
163 167
164 #ifdef MULE 168 #ifdef MULE
165 169
166 /* #### Using ints for to_unicode is OK (as long as they are >= 32 bits). 170 /* #### Using ints for to_unicode is OK (as long as they are >= 32 bits).
167 However, shouldn't the shorts below be unsigned? */ 171 However, shouldn't the shorts below be unsigned?
172
173 Answer: Doesn't matter because the values being converted to are only
174 96x96. */
168 static int *to_unicode_blank_1; 175 static int *to_unicode_blank_1;
169 static int **to_unicode_blank_2; 176 static int **to_unicode_blank_2;
170 177
171 static short *from_unicode_blank_1; 178 static short *from_unicode_blank_1;
172 static short **from_unicode_blank_2; 179 static short **from_unicode_blank_2;
297 from_unicode_blank_3 = xnew_array (short **, 256); 304 from_unicode_blank_3 = xnew_array (short **, 256);
298 from_unicode_blank_4 = xnew_array (short ***, 256); 305 from_unicode_blank_4 = xnew_array (short ***, 256);
299 for (i = 0; i < 256; i++) 306 for (i = 0; i < 256; i++)
300 { 307 {
301 /* #### IMWTK: Why does using -1 here work? Simply because there are 308 /* #### IMWTK: Why does using -1 here work? Simply because there are
302 no existing 96x96 charsets? */ 309 no existing 96x96 charsets?
310
311 Answer: I don't understand the concern. -1 indicates there is no
312 entry for this particular codepoint, which is always the case for
313 blank tables. */
303 from_unicode_blank_1[i] = (short) -1; 314 from_unicode_blank_1[i] = (short) -1;
304 from_unicode_blank_2[i] = from_unicode_blank_1; 315 from_unicode_blank_2[i] = from_unicode_blank_1;
305 from_unicode_blank_3[i] = from_unicode_blank_2; 316 from_unicode_blank_3[i] = from_unicode_blank_2;
306 from_unicode_blank_4[i] = from_unicode_blank_3; 317 from_unicode_blank_4[i] = from_unicode_blank_3;
307 } 318 }
352 return 0; 363 return 0;
353 } 364 }
354 } 365 }
355 366
356 /* Allocate and blank the tables. 367 /* Allocate and blank the tables.
357 Loading them up is done by parse-unicode-translation-table. */ 368 Loading them up is done by load-unicode-mapping-table. */
358 void 369 void
359 init_charset_unicode_tables (Lisp_Object charset) 370 init_charset_unicode_tables (Lisp_Object charset)
360 { 371 {
361 if (XCHARSET_DIMENSION (charset) == 1) 372 if (XCHARSET_DIMENSION (charset) == 1)
362 { 373 {
1053 are followed by the remaining charsets, in some arbitrary order. 1064 are followed by the remaining charsets, in some arbitrary order.
1054 1065
1055 The language-specific precedence list is meant to be set as part of the 1066 The language-specific precedence list is meant to be set as part of the
1056 language environment initialization; the default precedence list is meant 1067 language environment initialization; the default precedence list is meant
1057 to be set by the user. 1068 to be set by the user.
1069
1070 #### NOTE: This interface may be changed.
1058 */ 1071 */
1059 ()) 1072 ())
1060 { 1073 {
1061 int i; 1074 int i;
1062 Lisp_Object list = Qnil; 1075 Lisp_Object list = Qnil;
1071 to have varying opinions about whether ISO Cyrillic, KOI8-R, or Windows 1084 to have varying opinions about whether ISO Cyrillic, KOI8-R, or Windows
1072 1251 should take precedence, and whether Big Five or CNS should take 1085 1251 should take precedence, and whether Big Five or CNS should take
1073 precedence, respectively. This means that users are sometimes going to 1086 precedence, respectively. This means that users are sometimes going to
1074 want to set Vlanguage_unicode_precedence_list. 1087 want to set Vlanguage_unicode_precedence_list.
1075 Furthermore, this should be language-local (buffer-local would be a 1088 Furthermore, this should be language-local (buffer-local would be a
1076 reasonable approximation). */ 1089 reasonable approximation).
1090
1091 Answer: You are right, this needs rethinking. */
1077 DEFUN ("set-language-unicode-precedence-list", 1092 DEFUN ("set-language-unicode-precedence-list",
1078 Fset_language_unicode_precedence_list, 1093 Fset_language_unicode_precedence_list,
1079 1, 1, 0, /* 1094 1, 1, 0, /*
1080 Set the language-specific precedence of charsets in Unicode decoding. 1095 Set the language-specific precedence of charsets in Unicode decoding.
1081 LIST is a list of charsets. 1096 LIST is a list of charsets.
1082 See `unicode-precedence-list' for more information. 1097 See `unicode-precedence-list' for more information.
1098
1099 #### NOTE: This interface may be changed.
1083 */ 1100 */
1084 (list)) 1101 (list))
1085 { 1102 {
1086 { 1103 {
1087 EXTERNAL_LIST_LOOP_2 (elt, list) 1104 EXTERNAL_LIST_LOOP_2 (elt, list)
1096 DEFUN ("language-unicode-precedence-list", 1113 DEFUN ("language-unicode-precedence-list",
1097 Flanguage_unicode_precedence_list, 1114 Flanguage_unicode_precedence_list,
1098 0, 0, 0, /* 1115 0, 0, 0, /*
1099 Return the language-specific precedence list used for Unicode decoding. 1116 Return the language-specific precedence list used for Unicode decoding.
1100 See `unicode-precedence-list' for more information. 1117 See `unicode-precedence-list' for more information.
1118
1119 #### NOTE: This interface may be changed.
1101 */ 1120 */
1102 ()) 1121 ())
1103 { 1122 {
1104 return Vlanguage_unicode_precedence_list; 1123 return Vlanguage_unicode_precedence_list;
1105 } 1124 }
1108 Fset_default_unicode_precedence_list, 1127 Fset_default_unicode_precedence_list,
1109 1, 1, 0, /* 1128 1, 1, 0, /*
1110 Set the default precedence list used for Unicode decoding. 1129 Set the default precedence list used for Unicode decoding.
1111 This is intended to be set by the user. See 1130 This is intended to be set by the user. See
1112 `unicode-precedence-list' for more information. 1131 `unicode-precedence-list' for more information.
1132
1133 #### NOTE: This interface may be changed.
1113 */ 1134 */
1114 (list)) 1135 (list))
1115 { 1136 {
1116 { 1137 {
1117 EXTERNAL_LIST_LOOP_2 (elt, list) 1138 EXTERNAL_LIST_LOOP_2 (elt, list)
1126 DEFUN ("default-unicode-precedence-list", 1147 DEFUN ("default-unicode-precedence-list",
1127 Fdefault_unicode_precedence_list, 1148 Fdefault_unicode_precedence_list,
1128 0, 0, 0, /* 1149 0, 0, 0, /*
1129 Return the default precedence list used for Unicode decoding. 1150 Return the default precedence list used for Unicode decoding.
1130 See `unicode-precedence-list' for more information. 1151 See `unicode-precedence-list' for more information.
1152
1153 #### NOTE: This interface may be changed.
1131 */ 1154 */
1132 ()) 1155 ())
1133 { 1156 {
1134 return Vdefault_unicode_precedence_list; 1157 return Vdefault_unicode_precedence_list;
1135 } 1158 }
1257 FILE *file = (FILE *) get_opaque_ptr (fulano); 1280 FILE *file = (FILE *) get_opaque_ptr (fulano);
1258 retry_fclose (file); 1281 retry_fclose (file);
1259 return Qnil; 1282 return Qnil;
1260 } 1283 }
1261 1284
1262 /* #### shouldn't this interface be called load-unicode-mapping-table 1285 DEFUN ("load-unicode-mapping-table", Fload_unicode_mapping_table,
1263 for consistency with Unicode Consortium terminology? */
1264 DEFUN ("parse-unicode-translation-table", Fparse_unicode_translation_table,
1265 2, 6, 0, /* 1286 2, 6, 0, /*
1266 Load Unicode tables with the Unicode mapping data in FILENAME for CHARSET. 1287 Load Unicode tables with the Unicode mapping data in FILENAME for CHARSET.
1267 Data is text, in the form of one translation per line -- charset 1288 Data is text, in the form of one translation per line -- charset
1268 codepoint followed by Unicode codepoint. Numbers are decimal or hex 1289 codepoint followed by Unicode codepoint. Numbers are decimal or hex
1269 \(preceded by 0x). Comments are marked with a #. Charset codepoints 1290 \(preceded by 0x). Comments are marked with a #. Charset codepoints
1329 ignore_first_column = 1; 1350 ignore_first_column = 1;
1330 else if (EQ (elt, Qbig5)) 1351 else if (EQ (elt, Qbig5))
1331 big5 = 1; 1352 big5 = 1;
1332 else 1353 else
1333 invalid_constant 1354 invalid_constant
1334 ("Unrecognized `parse-unicode-translation-table' flag", elt); 1355 ("Unrecognized `load-unicode-mapping-table' flag", elt);
1335 } 1356 }
1336 } 1357 }
1337 1358
1338 GCPRO1 (filename); 1359 GCPRO1 (filename);
1339 filename = Fexpand_file_name (filename, Qnil); 1360 filename = Fexpand_file_name (filename, Qnil);
2313 DEFSUBR (Flanguage_unicode_precedence_list); 2334 DEFSUBR (Flanguage_unicode_precedence_list);
2314 DEFSUBR (Fset_default_unicode_precedence_list); 2335 DEFSUBR (Fset_default_unicode_precedence_list);
2315 DEFSUBR (Fdefault_unicode_precedence_list); 2336 DEFSUBR (Fdefault_unicode_precedence_list);
2316 DEFSUBR (Fset_unicode_conversion); 2337 DEFSUBR (Fset_unicode_conversion);
2317 2338
2318 DEFSUBR (Fparse_unicode_translation_table); 2339 DEFSUBR (Fload_unicode_mapping_table);
2319 2340
2320 DEFSYMBOL (Qignore_first_column); 2341 DEFSYMBOL (Qignore_first_column);
2321 #endif /* MULE */ 2342 #endif /* MULE */
2322 2343
2323 DEFSUBR (Fchar_to_unicode); 2344 DEFSUBR (Fchar_to_unicode);