comparison src/unicode.c @ 4834:b3ea9c582280

Use new cygwin_conv_path API with Cygwin 1.7 for converting names between Win32 and POSIX, UTF-8-aware, with attendant changes elsewhere
author Ben Wing <ben@xemacs.org>
date Tue, 12 Jan 2010 01:38:04 -0600
parents c12b646d84ee
children 19a72041c5ed
comparison
equal deleted inserted replaced
4833:4dd2389173fc 4834:b3ea9c582280
1 /* Code to handle Unicode conversion. 1 /* Code to handle Unicode conversion.
2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005 Ben Wing. 2 Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2010 Ben Wing.
3 3
4 This file is part of XEmacs. 4 This file is part of XEmacs.
5 5
6 XEmacs is free software; you can redistribute it and/or modify it 6 XEmacs is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the 7 under the terms of the GNU General Public License as published by the
3288 Vunicode_invalid_string = Qnil; 3288 Vunicode_invalid_string = Qnil;
3289 staticpro (&Vutf_8_invalid_string); 3289 staticpro (&Vutf_8_invalid_string);
3290 Vutf_8_invalid_string = Qnil; 3290 Vutf_8_invalid_string = Qnil;
3291 #endif /* MULE */ 3291 #endif /* MULE */
3292 } 3292 }
3293
3294 void
3295 complex_vars_of_unicode (void)
3296 {
3297 /* We used to define this in unicode.el. But we need it early for
3298 Cygwin 1.7 -- used in LOCAL_FILE_FORMAT_TO_TSTR() et al. */
3299 Fmake_coding_system_internal
3300 (Qutf_8, Qunicode,
3301 build_msg_string ("UTF-8"),
3302 nconc2 (list4 (Qdocumentation,
3303 build_msg_string (
3304 "UTF-8 Unicode encoding -- ASCII-compatible 8-bit variable-width encoding\n"
3305 "sharing the following principles with the Mule-internal encoding:\n"
3306 "\n"
3307 " -- All ASCII characters (codepoints 0 through 127) are represented\n"
3308 " by themselves (i.e. using one byte, with the same value as the\n"
3309 " ASCII codepoint), and these bytes are disjoint from bytes\n"
3310 " representing non-ASCII characters.\n"
3311 "\n"
3312 " This means that any 8-bit clean application can safely process\n"
3313 " UTF-8-encoded text as it were ASCII, with no corruption (e.g. a\n"
3314 " '/' byte is always a slash character, never the second byte of\n"
3315 " some other character, as with Big5, so a pathname encoded in\n"
3316 " UTF-8 can safely be split up into components and reassembled\n"
3317 " again using standard ASCII processes).\n"
3318 "\n"
3319 " -- Leading bytes and non-leading bytes in the encoding of a\n"
3320 " character are disjoint, so moving backwards is easy.\n"
3321 "\n"
3322 " -- Given only the leading byte, you know how many following bytes\n"
3323 " are present.\n"
3324 ),
3325 Qmnemonic, build_string ("UTF8")),
3326 list2 (Qunicode_type, Qutf_8)));
3327 }