comparison vendor/pear/net_idna2/Net/IDNA2.php @ 0:1e000243b222

vanilla 1.3.3 distro, I hope
author Charlie Root
date Thu, 04 Jan 2018 15:50:29 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1e000243b222
1 <?php
2
3 // {{{ license
4
5 /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
6 //
7 // +----------------------------------------------------------------------+
8 // | This library is free software; you can redistribute it and/or modify |
9 // | it under the terms of the GNU Lesser General Public License as |
10 // | published by the Free Software Foundation; either version 2.1 of the |
11 // | License, or (at your option) any later version. |
12 // | |
13 // | This library is distributed in the hope that it will be useful, but |
14 // | WITHOUT ANY WARRANTY; without even the implied warranty of |
15 // | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
16 // | Lesser General Public License for more details. |
17 // | |
18 // | You should have received a copy of the GNU Lesser General Public |
19 // | License along with this library; if not, write to the Free Software |
20 // | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
21 // | USA. |
22 // +----------------------------------------------------------------------+
23 //
24
25 // }}}
26 require_once 'Net/IDNA2/Exception.php';
27 require_once 'Net/IDNA2/Exception/Nameprep.php';
28
29 /**
30 * Encode/decode Internationalized Domain Names.
31 *
32 * The class allows one to convert internationalized domain names
33 * (see RFC 3490 for details) as they can be used with various registries worldwide
34 * to be translated between their original (localized) form and their encoded form
35 * as it will be used in the DNS (Domain Name System).
36 *
37 * The class provides two public methods, encode() and decode(), which do exactly
38 * what you would expect them to do. You are allowed to use complete domain names,
39 * simple strings and complete email addresses as well. That means, that you might
40 * use any of the following notations:
41 *
42 * - www.n�rgler.com
43 * - xn--nrgler-wxa
44 * - xn--brse-5qa.xn--knrz-1ra.info
45 *
46 * Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4
47 * array. Unicode output is available in the same formats.
48 * You can select your preferred format via {@link set_paramter()}.
49 *
50 * ACE input and output is always expected to be ASCII.
51 *
52 * @package Net
53 * @author Markus Nix <mnix@docuverse.de>
54 * @author Matthias Sommerfeld <mso@phlylabs.de>
55 * @author Stefan Neufeind <pear.neufeind@speedpartner.de>
56 * @version $Id$
57 */
58 class Net_IDNA2
59 {
60 // {{{ npdata
61 /**
62 * These Unicode codepoints are
63 * mapped to nothing, See RFC3454 for details
64 *
65 * @static
66 * @var array
67 * @access private
68 */
69 private static $_np_map_nothing = array(
70 0xAD,
71 0x34F,
72 0x1806,
73 0x180B,
74 0x180C,
75 0x180D,
76 0x200B,
77 0x200C,
78 0x200D,
79 0x2060,
80 0xFE00,
81 0xFE01,
82 0xFE02,
83 0xFE03,
84 0xFE04,
85 0xFE05,
86 0xFE06,
87 0xFE07,
88 0xFE08,
89 0xFE09,
90 0xFE0A,
91 0xFE0B,
92 0xFE0C,
93 0xFE0D,
94 0xFE0E,
95 0xFE0F,
96 0xFEFF
97 );
98
99 /**
100 * Prohibited codepints
101 *
102 * @static
103 * @var array
104 * @access private
105 */
106 private static $_general_prohibited = array(
107 0,
108 1,
109 2,
110 3,
111 4,
112 5,
113 6,
114 7,
115 8,
116 9,
117 0xA,
118 0xB,
119 0xC,
120 0xD,
121 0xE,
122 0xF,
123 0x10,
124 0x11,
125 0x12,
126 0x13,
127 0x14,
128 0x15,
129 0x16,
130 0x17,
131 0x18,
132 0x19,
133 0x1A,
134 0x1B,
135 0x1C,
136 0x1D,
137 0x1E,
138 0x1F,
139 0x20,
140 0x21,
141 0x22,
142 0x23,
143 0x24,
144 0x25,
145 0x26,
146 0x27,
147 0x28,
148 0x29,
149 0x2A,
150 0x2B,
151 0x2C,
152 0x2F,
153 0x3B,
154 0x3C,
155 0x3D,
156 0x3E,
157 0x3F,
158 0x40,
159 0x5B,
160 0x5C,
161 0x5D,
162 0x5E,
163 0x5F,
164 0x60,
165 0x7B,
166 0x7C,
167 0x7D,
168 0x7E,
169 0x7F,
170 0x3002
171 );
172
173 /**
174 * Codepints prohibited by Nameprep
175 * @static
176 * @var array
177 * @access private
178 */
179 private static $_np_prohibit = array(
180 0xA0,
181 0x1680,
182 0x2000,
183 0x2001,
184 0x2002,
185 0x2003,
186 0x2004,
187 0x2005,
188 0x2006,
189 0x2007,
190 0x2008,
191 0x2009,
192 0x200A,
193 0x200B,
194 0x202F,
195 0x205F,
196 0x3000,
197 0x6DD,
198 0x70F,
199 0x180E,
200 0x200C,
201 0x200D,
202 0x2028,
203 0x2029,
204 0xFEFF,
205 0xFFF9,
206 0xFFFA,
207 0xFFFB,
208 0xFFFC,
209 0xFFFE,
210 0xFFFF,
211 0x1FFFE,
212 0x1FFFF,
213 0x2FFFE,
214 0x2FFFF,
215 0x3FFFE,
216 0x3FFFF,
217 0x4FFFE,
218 0x4FFFF,
219 0x5FFFE,
220 0x5FFFF,
221 0x6FFFE,
222 0x6FFFF,
223 0x7FFFE,
224 0x7FFFF,
225 0x8FFFE,
226 0x8FFFF,
227 0x9FFFE,
228 0x9FFFF,
229 0xAFFFE,
230 0xAFFFF,
231 0xBFFFE,
232 0xBFFFF,
233 0xCFFFE,
234 0xCFFFF,
235 0xDFFFE,
236 0xDFFFF,
237 0xEFFFE,
238 0xEFFFF,
239 0xFFFFE,
240 0xFFFFF,
241 0x10FFFE,
242 0x10FFFF,
243 0xFFF9,
244 0xFFFA,
245 0xFFFB,
246 0xFFFC,
247 0xFFFD,
248 0x340,
249 0x341,
250 0x200E,
251 0x200F,
252 0x202A,
253 0x202B,
254 0x202C,
255 0x202D,
256 0x202E,
257 0x206A,
258 0x206B,
259 0x206C,
260 0x206D,
261 0x206E,
262 0x206F,
263 0xE0001
264 );
265
266 /**
267 * Codepoint ranges prohibited by nameprep
268 *
269 * @static
270 * @var array
271 * @access private
272 */
273 private static $_np_prohibit_ranges = array(
274 array(0x80, 0x9F ),
275 array(0x2060, 0x206F ),
276 array(0x1D173, 0x1D17A ),
277 array(0xE000, 0xF8FF ),
278 array(0xF0000, 0xFFFFD ),
279 array(0x100000, 0x10FFFD),
280 array(0xFDD0, 0xFDEF ),
281 array(0xD800, 0xDFFF ),
282 array(0x2FF0, 0x2FFB ),
283 array(0xE0020, 0xE007F )
284 );
285
286 /**
287 * Replacement mappings (casemapping, replacement sequences, ...)
288 *
289 * @static
290 * @var array
291 * @access private
292 */
293 private static $_np_replacemaps = array(
294 0x41 => array(0x61),
295 0x42 => array(0x62),
296 0x43 => array(0x63),
297 0x44 => array(0x64),
298 0x45 => array(0x65),
299 0x46 => array(0x66),
300 0x47 => array(0x67),
301 0x48 => array(0x68),
302 0x49 => array(0x69),
303 0x4A => array(0x6A),
304 0x4B => array(0x6B),
305 0x4C => array(0x6C),
306 0x4D => array(0x6D),
307 0x4E => array(0x6E),
308 0x4F => array(0x6F),
309 0x50 => array(0x70),
310 0x51 => array(0x71),
311 0x52 => array(0x72),
312 0x53 => array(0x73),
313 0x54 => array(0x74),
314 0x55 => array(0x75),
315 0x56 => array(0x76),
316 0x57 => array(0x77),
317 0x58 => array(0x78),
318 0x59 => array(0x79),
319 0x5A => array(0x7A),
320 0xB5 => array(0x3BC),
321 0xC0 => array(0xE0),
322 0xC1 => array(0xE1),
323 0xC2 => array(0xE2),
324 0xC3 => array(0xE3),
325 0xC4 => array(0xE4),
326 0xC5 => array(0xE5),
327 0xC6 => array(0xE6),
328 0xC7 => array(0xE7),
329 0xC8 => array(0xE8),
330 0xC9 => array(0xE9),
331 0xCA => array(0xEA),
332 0xCB => array(0xEB),
333 0xCC => array(0xEC),
334 0xCD => array(0xED),
335 0xCE => array(0xEE),
336 0xCF => array(0xEF),
337 0xD0 => array(0xF0),
338 0xD1 => array(0xF1),
339 0xD2 => array(0xF2),
340 0xD3 => array(0xF3),
341 0xD4 => array(0xF4),
342 0xD5 => array(0xF5),
343 0xD6 => array(0xF6),
344 0xD8 => array(0xF8),
345 0xD9 => array(0xF9),
346 0xDA => array(0xFA),
347 0xDB => array(0xFB),
348 0xDC => array(0xFC),
349 0xDD => array(0xFD),
350 0xDE => array(0xFE),
351 0xDF => array(0x73, 0x73),
352 0x100 => array(0x101),
353 0x102 => array(0x103),
354 0x104 => array(0x105),
355 0x106 => array(0x107),
356 0x108 => array(0x109),
357 0x10A => array(0x10B),
358 0x10C => array(0x10D),
359 0x10E => array(0x10F),
360 0x110 => array(0x111),
361 0x112 => array(0x113),
362 0x114 => array(0x115),
363 0x116 => array(0x117),
364 0x118 => array(0x119),
365 0x11A => array(0x11B),
366 0x11C => array(0x11D),
367 0x11E => array(0x11F),
368 0x120 => array(0x121),
369 0x122 => array(0x123),
370 0x124 => array(0x125),
371 0x126 => array(0x127),
372 0x128 => array(0x129),
373 0x12A => array(0x12B),
374 0x12C => array(0x12D),
375 0x12E => array(0x12F),
376 0x130 => array(0x69, 0x307),
377 0x132 => array(0x133),
378 0x134 => array(0x135),
379 0x136 => array(0x137),
380 0x139 => array(0x13A),
381 0x13B => array(0x13C),
382 0x13D => array(0x13E),
383 0x13F => array(0x140),
384 0x141 => array(0x142),
385 0x143 => array(0x144),
386 0x145 => array(0x146),
387 0x147 => array(0x148),
388 0x149 => array(0x2BC, 0x6E),
389 0x14A => array(0x14B),
390 0x14C => array(0x14D),
391 0x14E => array(0x14F),
392 0x150 => array(0x151),
393 0x152 => array(0x153),
394 0x154 => array(0x155),
395 0x156 => array(0x157),
396 0x158 => array(0x159),
397 0x15A => array(0x15B),
398 0x15C => array(0x15D),
399 0x15E => array(0x15F),
400 0x160 => array(0x161),
401 0x162 => array(0x163),
402 0x164 => array(0x165),
403 0x166 => array(0x167),
404 0x168 => array(0x169),
405 0x16A => array(0x16B),
406 0x16C => array(0x16D),
407 0x16E => array(0x16F),
408 0x170 => array(0x171),
409 0x172 => array(0x173),
410 0x174 => array(0x175),
411 0x176 => array(0x177),
412 0x178 => array(0xFF),
413 0x179 => array(0x17A),
414 0x17B => array(0x17C),
415 0x17D => array(0x17E),
416 0x17F => array(0x73),
417 0x181 => array(0x253),
418 0x182 => array(0x183),
419 0x184 => array(0x185),
420 0x186 => array(0x254),
421 0x187 => array(0x188),
422 0x189 => array(0x256),
423 0x18A => array(0x257),
424 0x18B => array(0x18C),
425 0x18E => array(0x1DD),
426 0x18F => array(0x259),
427 0x190 => array(0x25B),
428 0x191 => array(0x192),
429 0x193 => array(0x260),
430 0x194 => array(0x263),
431 0x196 => array(0x269),
432 0x197 => array(0x268),
433 0x198 => array(0x199),
434 0x19C => array(0x26F),
435 0x19D => array(0x272),
436 0x19F => array(0x275),
437 0x1A0 => array(0x1A1),
438 0x1A2 => array(0x1A3),
439 0x1A4 => array(0x1A5),
440 0x1A6 => array(0x280),
441 0x1A7 => array(0x1A8),
442 0x1A9 => array(0x283),
443 0x1AC => array(0x1AD),
444 0x1AE => array(0x288),
445 0x1AF => array(0x1B0),
446 0x1B1 => array(0x28A),
447 0x1B2 => array(0x28B),
448 0x1B3 => array(0x1B4),
449 0x1B5 => array(0x1B6),
450 0x1B7 => array(0x292),
451 0x1B8 => array(0x1B9),
452 0x1BC => array(0x1BD),
453 0x1C4 => array(0x1C6),
454 0x1C5 => array(0x1C6),
455 0x1C7 => array(0x1C9),
456 0x1C8 => array(0x1C9),
457 0x1CA => array(0x1CC),
458 0x1CB => array(0x1CC),
459 0x1CD => array(0x1CE),
460 0x1CF => array(0x1D0),
461 0x1D1 => array(0x1D2),
462 0x1D3 => array(0x1D4),
463 0x1D5 => array(0x1D6),
464 0x1D7 => array(0x1D8),
465 0x1D9 => array(0x1DA),
466 0x1DB => array(0x1DC),
467 0x1DE => array(0x1DF),
468 0x1E0 => array(0x1E1),
469 0x1E2 => array(0x1E3),
470 0x1E4 => array(0x1E5),
471 0x1E6 => array(0x1E7),
472 0x1E8 => array(0x1E9),
473 0x1EA => array(0x1EB),
474 0x1EC => array(0x1ED),
475 0x1EE => array(0x1EF),
476 0x1F0 => array(0x6A, 0x30C),
477 0x1F1 => array(0x1F3),
478 0x1F2 => array(0x1F3),
479 0x1F4 => array(0x1F5),
480 0x1F6 => array(0x195),
481 0x1F7 => array(0x1BF),
482 0x1F8 => array(0x1F9),
483 0x1FA => array(0x1FB),
484 0x1FC => array(0x1FD),
485 0x1FE => array(0x1FF),
486 0x200 => array(0x201),
487 0x202 => array(0x203),
488 0x204 => array(0x205),
489 0x206 => array(0x207),
490 0x208 => array(0x209),
491 0x20A => array(0x20B),
492 0x20C => array(0x20D),
493 0x20E => array(0x20F),
494 0x210 => array(0x211),
495 0x212 => array(0x213),
496 0x214 => array(0x215),
497 0x216 => array(0x217),
498 0x218 => array(0x219),
499 0x21A => array(0x21B),
500 0x21C => array(0x21D),
501 0x21E => array(0x21F),
502 0x220 => array(0x19E),
503 0x222 => array(0x223),
504 0x224 => array(0x225),
505 0x226 => array(0x227),
506 0x228 => array(0x229),
507 0x22A => array(0x22B),
508 0x22C => array(0x22D),
509 0x22E => array(0x22F),
510 0x230 => array(0x231),
511 0x232 => array(0x233),
512 0x345 => array(0x3B9),
513 0x37A => array(0x20, 0x3B9),
514 0x386 => array(0x3AC),
515 0x388 => array(0x3AD),
516 0x389 => array(0x3AE),
517 0x38A => array(0x3AF),
518 0x38C => array(0x3CC),
519 0x38E => array(0x3CD),
520 0x38F => array(0x3CE),
521 0x390 => array(0x3B9, 0x308, 0x301),
522 0x391 => array(0x3B1),
523 0x392 => array(0x3B2),
524 0x393 => array(0x3B3),
525 0x394 => array(0x3B4),
526 0x395 => array(0x3B5),
527 0x396 => array(0x3B6),
528 0x397 => array(0x3B7),
529 0x398 => array(0x3B8),
530 0x399 => array(0x3B9),
531 0x39A => array(0x3BA),
532 0x39B => array(0x3BB),
533 0x39C => array(0x3BC),
534 0x39D => array(0x3BD),
535 0x39E => array(0x3BE),
536 0x39F => array(0x3BF),
537 0x3A0 => array(0x3C0),
538 0x3A1 => array(0x3C1),
539 0x3A3 => array(0x3C3),
540 0x3A4 => array(0x3C4),
541 0x3A5 => array(0x3C5),
542 0x3A6 => array(0x3C6),
543 0x3A7 => array(0x3C7),
544 0x3A8 => array(0x3C8),
545 0x3A9 => array(0x3C9),
546 0x3AA => array(0x3CA),
547 0x3AB => array(0x3CB),
548 0x3B0 => array(0x3C5, 0x308, 0x301),
549 0x3C2 => array(0x3C3),
550 0x3D0 => array(0x3B2),
551 0x3D1 => array(0x3B8),
552 0x3D2 => array(0x3C5),
553 0x3D3 => array(0x3CD),
554 0x3D4 => array(0x3CB),
555 0x3D5 => array(0x3C6),
556 0x3D6 => array(0x3C0),
557 0x3D8 => array(0x3D9),
558 0x3DA => array(0x3DB),
559 0x3DC => array(0x3DD),
560 0x3DE => array(0x3DF),
561 0x3E0 => array(0x3E1),
562 0x3E2 => array(0x3E3),
563 0x3E4 => array(0x3E5),
564 0x3E6 => array(0x3E7),
565 0x3E8 => array(0x3E9),
566 0x3EA => array(0x3EB),
567 0x3EC => array(0x3ED),
568 0x3EE => array(0x3EF),
569 0x3F0 => array(0x3BA),
570 0x3F1 => array(0x3C1),
571 0x3F2 => array(0x3C3),
572 0x3F4 => array(0x3B8),
573 0x3F5 => array(0x3B5),
574 0x400 => array(0x450),
575 0x401 => array(0x451),
576 0x402 => array(0x452),
577 0x403 => array(0x453),
578 0x404 => array(0x454),
579 0x405 => array(0x455),
580 0x406 => array(0x456),
581 0x407 => array(0x457),
582 0x408 => array(0x458),
583 0x409 => array(0x459),
584 0x40A => array(0x45A),
585 0x40B => array(0x45B),
586 0x40C => array(0x45C),
587 0x40D => array(0x45D),
588 0x40E => array(0x45E),
589 0x40F => array(0x45F),
590 0x410 => array(0x430),
591 0x411 => array(0x431),
592 0x412 => array(0x432),
593 0x413 => array(0x433),
594 0x414 => array(0x434),
595 0x415 => array(0x435),
596 0x416 => array(0x436),
597 0x417 => array(0x437),
598 0x418 => array(0x438),
599 0x419 => array(0x439),
600 0x41A => array(0x43A),
601 0x41B => array(0x43B),
602 0x41C => array(0x43C),
603 0x41D => array(0x43D),
604 0x41E => array(0x43E),
605 0x41F => array(0x43F),
606 0x420 => array(0x440),
607 0x421 => array(0x441),
608 0x422 => array(0x442),
609 0x423 => array(0x443),
610 0x424 => array(0x444),
611 0x425 => array(0x445),
612 0x426 => array(0x446),
613 0x427 => array(0x447),
614 0x428 => array(0x448),
615 0x429 => array(0x449),
616 0x42A => array(0x44A),
617 0x42B => array(0x44B),
618 0x42C => array(0x44C),
619 0x42D => array(0x44D),
620 0x42E => array(0x44E),
621 0x42F => array(0x44F),
622 0x460 => array(0x461),
623 0x462 => array(0x463),
624 0x464 => array(0x465),
625 0x466 => array(0x467),
626 0x468 => array(0x469),
627 0x46A => array(0x46B),
628 0x46C => array(0x46D),
629 0x46E => array(0x46F),
630 0x470 => array(0x471),
631 0x472 => array(0x473),
632 0x474 => array(0x475),
633 0x476 => array(0x477),
634 0x478 => array(0x479),
635 0x47A => array(0x47B),
636 0x47C => array(0x47D),
637 0x47E => array(0x47F),
638 0x480 => array(0x481),
639 0x48A => array(0x48B),
640 0x48C => array(0x48D),
641 0x48E => array(0x48F),
642 0x490 => array(0x491),
643 0x492 => array(0x493),
644 0x494 => array(0x495),
645 0x496 => array(0x497),
646 0x498 => array(0x499),
647 0x49A => array(0x49B),
648 0x49C => array(0x49D),
649 0x49E => array(0x49F),
650 0x4A0 => array(0x4A1),
651 0x4A2 => array(0x4A3),
652 0x4A4 => array(0x4A5),
653 0x4A6 => array(0x4A7),
654 0x4A8 => array(0x4A9),
655 0x4AA => array(0x4AB),
656 0x4AC => array(0x4AD),
657 0x4AE => array(0x4AF),
658 0x4B0 => array(0x4B1),
659 0x4B2 => array(0x4B3),
660 0x4B4 => array(0x4B5),
661 0x4B6 => array(0x4B7),
662 0x4B8 => array(0x4B9),
663 0x4BA => array(0x4BB),
664 0x4BC => array(0x4BD),
665 0x4BE => array(0x4BF),
666 0x4C1 => array(0x4C2),
667 0x4C3 => array(0x4C4),
668 0x4C5 => array(0x4C6),
669 0x4C7 => array(0x4C8),
670 0x4C9 => array(0x4CA),
671 0x4CB => array(0x4CC),
672 0x4CD => array(0x4CE),
673 0x4D0 => array(0x4D1),
674 0x4D2 => array(0x4D3),
675 0x4D4 => array(0x4D5),
676 0x4D6 => array(0x4D7),
677 0x4D8 => array(0x4D9),
678 0x4DA => array(0x4DB),
679 0x4DC => array(0x4DD),
680 0x4DE => array(0x4DF),
681 0x4E0 => array(0x4E1),
682 0x4E2 => array(0x4E3),
683 0x4E4 => array(0x4E5),
684 0x4E6 => array(0x4E7),
685 0x4E8 => array(0x4E9),
686 0x4EA => array(0x4EB),
687 0x4EC => array(0x4ED),
688 0x4EE => array(0x4EF),
689 0x4F0 => array(0x4F1),
690 0x4F2 => array(0x4F3),
691 0x4F4 => array(0x4F5),
692 0x4F8 => array(0x4F9),
693 0x500 => array(0x501),
694 0x502 => array(0x503),
695 0x504 => array(0x505),
696 0x506 => array(0x507),
697 0x508 => array(0x509),
698 0x50A => array(0x50B),
699 0x50C => array(0x50D),
700 0x50E => array(0x50F),
701 0x531 => array(0x561),
702 0x532 => array(0x562),
703 0x533 => array(0x563),
704 0x534 => array(0x564),
705 0x535 => array(0x565),
706 0x536 => array(0x566),
707 0x537 => array(0x567),
708 0x538 => array(0x568),
709 0x539 => array(0x569),
710 0x53A => array(0x56A),
711 0x53B => array(0x56B),
712 0x53C => array(0x56C),
713 0x53D => array(0x56D),
714 0x53E => array(0x56E),
715 0x53F => array(0x56F),
716 0x540 => array(0x570),
717 0x541 => array(0x571),
718 0x542 => array(0x572),
719 0x543 => array(0x573),
720 0x544 => array(0x574),
721 0x545 => array(0x575),
722 0x546 => array(0x576),
723 0x547 => array(0x577),
724 0x548 => array(0x578),
725 0x549 => array(0x579),
726 0x54A => array(0x57A),
727 0x54B => array(0x57B),
728 0x54C => array(0x57C),
729 0x54D => array(0x57D),
730 0x54E => array(0x57E),
731 0x54F => array(0x57F),
732 0x550 => array(0x580),
733 0x551 => array(0x581),
734 0x552 => array(0x582),
735 0x553 => array(0x583),
736 0x554 => array(0x584),
737 0x555 => array(0x585),
738 0x556 => array(0x586),
739 0x587 => array(0x565, 0x582),
740 0x1E00 => array(0x1E01),
741 0x1E02 => array(0x1E03),
742 0x1E04 => array(0x1E05),
743 0x1E06 => array(0x1E07),
744 0x1E08 => array(0x1E09),
745 0x1E0A => array(0x1E0B),
746 0x1E0C => array(0x1E0D),
747 0x1E0E => array(0x1E0F),
748 0x1E10 => array(0x1E11),
749 0x1E12 => array(0x1E13),
750 0x1E14 => array(0x1E15),
751 0x1E16 => array(0x1E17),
752 0x1E18 => array(0x1E19),
753 0x1E1A => array(0x1E1B),
754 0x1E1C => array(0x1E1D),
755 0x1E1E => array(0x1E1F),
756 0x1E20 => array(0x1E21),
757 0x1E22 => array(0x1E23),
758 0x1E24 => array(0x1E25),
759 0x1E26 => array(0x1E27),
760 0x1E28 => array(0x1E29),
761 0x1E2A => array(0x1E2B),
762 0x1E2C => array(0x1E2D),
763 0x1E2E => array(0x1E2F),
764 0x1E30 => array(0x1E31),
765 0x1E32 => array(0x1E33),
766 0x1E34 => array(0x1E35),
767 0x1E36 => array(0x1E37),
768 0x1E38 => array(0x1E39),
769 0x1E3A => array(0x1E3B),
770 0x1E3C => array(0x1E3D),
771 0x1E3E => array(0x1E3F),
772 0x1E40 => array(0x1E41),
773 0x1E42 => array(0x1E43),
774 0x1E44 => array(0x1E45),
775 0x1E46 => array(0x1E47),
776 0x1E48 => array(0x1E49),
777 0x1E4A => array(0x1E4B),
778 0x1E4C => array(0x1E4D),
779 0x1E4E => array(0x1E4F),
780 0x1E50 => array(0x1E51),
781 0x1E52 => array(0x1E53),
782 0x1E54 => array(0x1E55),
783 0x1E56 => array(0x1E57),
784 0x1E58 => array(0x1E59),
785 0x1E5A => array(0x1E5B),
786 0x1E5C => array(0x1E5D),
787 0x1E5E => array(0x1E5F),
788 0x1E60 => array(0x1E61),
789 0x1E62 => array(0x1E63),
790 0x1E64 => array(0x1E65),
791 0x1E66 => array(0x1E67),
792 0x1E68 => array(0x1E69),
793 0x1E6A => array(0x1E6B),
794 0x1E6C => array(0x1E6D),
795 0x1E6E => array(0x1E6F),
796 0x1E70 => array(0x1E71),
797 0x1E72 => array(0x1E73),
798 0x1E74 => array(0x1E75),
799 0x1E76 => array(0x1E77),
800 0x1E78 => array(0x1E79),
801 0x1E7A => array(0x1E7B),
802 0x1E7C => array(0x1E7D),
803 0x1E7E => array(0x1E7F),
804 0x1E80 => array(0x1E81),
805 0x1E82 => array(0x1E83),
806 0x1E84 => array(0x1E85),
807 0x1E86 => array(0x1E87),
808 0x1E88 => array(0x1E89),
809 0x1E8A => array(0x1E8B),
810 0x1E8C => array(0x1E8D),
811 0x1E8E => array(0x1E8F),
812 0x1E90 => array(0x1E91),
813 0x1E92 => array(0x1E93),
814 0x1E94 => array(0x1E95),
815 0x1E96 => array(0x68, 0x331),
816 0x1E97 => array(0x74, 0x308),
817 0x1E98 => array(0x77, 0x30A),
818 0x1E99 => array(0x79, 0x30A),
819 0x1E9A => array(0x61, 0x2BE),
820 0x1E9B => array(0x1E61),
821 0x1EA0 => array(0x1EA1),
822 0x1EA2 => array(0x1EA3),
823 0x1EA4 => array(0x1EA5),
824 0x1EA6 => array(0x1EA7),
825 0x1EA8 => array(0x1EA9),
826 0x1EAA => array(0x1EAB),
827 0x1EAC => array(0x1EAD),
828 0x1EAE => array(0x1EAF),
829 0x1EB0 => array(0x1EB1),
830 0x1EB2 => array(0x1EB3),
831 0x1EB4 => array(0x1EB5),
832 0x1EB6 => array(0x1EB7),
833 0x1EB8 => array(0x1EB9),
834 0x1EBA => array(0x1EBB),
835 0x1EBC => array(0x1EBD),
836 0x1EBE => array(0x1EBF),
837 0x1EC0 => array(0x1EC1),
838 0x1EC2 => array(0x1EC3),
839 0x1EC4 => array(0x1EC5),
840 0x1EC6 => array(0x1EC7),
841 0x1EC8 => array(0x1EC9),
842 0x1ECA => array(0x1ECB),
843 0x1ECC => array(0x1ECD),
844 0x1ECE => array(0x1ECF),
845 0x1ED0 => array(0x1ED1),
846 0x1ED2 => array(0x1ED3),
847 0x1ED4 => array(0x1ED5),
848 0x1ED6 => array(0x1ED7),
849 0x1ED8 => array(0x1ED9),
850 0x1EDA => array(0x1EDB),
851 0x1EDC => array(0x1EDD),
852 0x1EDE => array(0x1EDF),
853 0x1EE0 => array(0x1EE1),
854 0x1EE2 => array(0x1EE3),
855 0x1EE4 => array(0x1EE5),
856 0x1EE6 => array(0x1EE7),
857 0x1EE8 => array(0x1EE9),
858 0x1EEA => array(0x1EEB),
859 0x1EEC => array(0x1EED),
860 0x1EEE => array(0x1EEF),
861 0x1EF0 => array(0x1EF1),
862 0x1EF2 => array(0x1EF3),
863 0x1EF4 => array(0x1EF5),
864 0x1EF6 => array(0x1EF7),
865 0x1EF8 => array(0x1EF9),
866 0x1F08 => array(0x1F00),
867 0x1F09 => array(0x1F01),
868 0x1F0A => array(0x1F02),
869 0x1F0B => array(0x1F03),
870 0x1F0C => array(0x1F04),
871 0x1F0D => array(0x1F05),
872 0x1F0E => array(0x1F06),
873 0x1F0F => array(0x1F07),
874 0x1F18 => array(0x1F10),
875 0x1F19 => array(0x1F11),
876 0x1F1A => array(0x1F12),
877 0x1F1B => array(0x1F13),
878 0x1F1C => array(0x1F14),
879 0x1F1D => array(0x1F15),
880 0x1F28 => array(0x1F20),
881 0x1F29 => array(0x1F21),
882 0x1F2A => array(0x1F22),
883 0x1F2B => array(0x1F23),
884 0x1F2C => array(0x1F24),
885 0x1F2D => array(0x1F25),
886 0x1F2E => array(0x1F26),
887 0x1F2F => array(0x1F27),
888 0x1F38 => array(0x1F30),
889 0x1F39 => array(0x1F31),
890 0x1F3A => array(0x1F32),
891 0x1F3B => array(0x1F33),
892 0x1F3C => array(0x1F34),
893 0x1F3D => array(0x1F35),
894 0x1F3E => array(0x1F36),
895 0x1F3F => array(0x1F37),
896 0x1F48 => array(0x1F40),
897 0x1F49 => array(0x1F41),
898 0x1F4A => array(0x1F42),
899 0x1F4B => array(0x1F43),
900 0x1F4C => array(0x1F44),
901 0x1F4D => array(0x1F45),
902 0x1F50 => array(0x3C5, 0x313),
903 0x1F52 => array(0x3C5, 0x313, 0x300),
904 0x1F54 => array(0x3C5, 0x313, 0x301),
905 0x1F56 => array(0x3C5, 0x313, 0x342),
906 0x1F59 => array(0x1F51),
907 0x1F5B => array(0x1F53),
908 0x1F5D => array(0x1F55),
909 0x1F5F => array(0x1F57),
910 0x1F68 => array(0x1F60),
911 0x1F69 => array(0x1F61),
912 0x1F6A => array(0x1F62),
913 0x1F6B => array(0x1F63),
914 0x1F6C => array(0x1F64),
915 0x1F6D => array(0x1F65),
916 0x1F6E => array(0x1F66),
917 0x1F6F => array(0x1F67),
918 0x1F80 => array(0x1F00, 0x3B9),
919 0x1F81 => array(0x1F01, 0x3B9),
920 0x1F82 => array(0x1F02, 0x3B9),
921 0x1F83 => array(0x1F03, 0x3B9),
922 0x1F84 => array(0x1F04, 0x3B9),
923 0x1F85 => array(0x1F05, 0x3B9),
924 0x1F86 => array(0x1F06, 0x3B9),
925 0x1F87 => array(0x1F07, 0x3B9),
926 0x1F88 => array(0x1F00, 0x3B9),
927 0x1F89 => array(0x1F01, 0x3B9),
928 0x1F8A => array(0x1F02, 0x3B9),
929 0x1F8B => array(0x1F03, 0x3B9),
930 0x1F8C => array(0x1F04, 0x3B9),
931 0x1F8D => array(0x1F05, 0x3B9),
932 0x1F8E => array(0x1F06, 0x3B9),
933 0x1F8F => array(0x1F07, 0x3B9),
934 0x1F90 => array(0x1F20, 0x3B9),
935 0x1F91 => array(0x1F21, 0x3B9),
936 0x1F92 => array(0x1F22, 0x3B9),
937 0x1F93 => array(0x1F23, 0x3B9),
938 0x1F94 => array(0x1F24, 0x3B9),
939 0x1F95 => array(0x1F25, 0x3B9),
940 0x1F96 => array(0x1F26, 0x3B9),
941 0x1F97 => array(0x1F27, 0x3B9),
942 0x1F98 => array(0x1F20, 0x3B9),
943 0x1F99 => array(0x1F21, 0x3B9),
944 0x1F9A => array(0x1F22, 0x3B9),
945 0x1F9B => array(0x1F23, 0x3B9),
946 0x1F9C => array(0x1F24, 0x3B9),
947 0x1F9D => array(0x1F25, 0x3B9),
948 0x1F9E => array(0x1F26, 0x3B9),
949 0x1F9F => array(0x1F27, 0x3B9),
950 0x1FA0 => array(0x1F60, 0x3B9),
951 0x1FA1 => array(0x1F61, 0x3B9),
952 0x1FA2 => array(0x1F62, 0x3B9),
953 0x1FA3 => array(0x1F63, 0x3B9),
954 0x1FA4 => array(0x1F64, 0x3B9),
955 0x1FA5 => array(0x1F65, 0x3B9),
956 0x1FA6 => array(0x1F66, 0x3B9),
957 0x1FA7 => array(0x1F67, 0x3B9),
958 0x1FA8 => array(0x1F60, 0x3B9),
959 0x1FA9 => array(0x1F61, 0x3B9),
960 0x1FAA => array(0x1F62, 0x3B9),
961 0x1FAB => array(0x1F63, 0x3B9),
962 0x1FAC => array(0x1F64, 0x3B9),
963 0x1FAD => array(0x1F65, 0x3B9),
964 0x1FAE => array(0x1F66, 0x3B9),
965 0x1FAF => array(0x1F67, 0x3B9),
966 0x1FB2 => array(0x1F70, 0x3B9),
967 0x1FB3 => array(0x3B1, 0x3B9),
968 0x1FB4 => array(0x3AC, 0x3B9),
969 0x1FB6 => array(0x3B1, 0x342),
970 0x1FB7 => array(0x3B1, 0x342, 0x3B9),
971 0x1FB8 => array(0x1FB0),
972 0x1FB9 => array(0x1FB1),
973 0x1FBA => array(0x1F70),
974 0x1FBB => array(0x1F71),
975 0x1FBC => array(0x3B1, 0x3B9),
976 0x1FBE => array(0x3B9),
977 0x1FC2 => array(0x1F74, 0x3B9),
978 0x1FC3 => array(0x3B7, 0x3B9),
979 0x1FC4 => array(0x3AE, 0x3B9),
980 0x1FC6 => array(0x3B7, 0x342),
981 0x1FC7 => array(0x3B7, 0x342, 0x3B9),
982 0x1FC8 => array(0x1F72),
983 0x1FC9 => array(0x1F73),
984 0x1FCA => array(0x1F74),
985 0x1FCB => array(0x1F75),
986 0x1FCC => array(0x3B7, 0x3B9),
987 0x1FD2 => array(0x3B9, 0x308, 0x300),
988 0x1FD3 => array(0x3B9, 0x308, 0x301),
989 0x1FD6 => array(0x3B9, 0x342),
990 0x1FD7 => array(0x3B9, 0x308, 0x342),
991 0x1FD8 => array(0x1FD0),
992 0x1FD9 => array(0x1FD1),
993 0x1FDA => array(0x1F76),
994 0x1FDB => array(0x1F77),
995 0x1FE2 => array(0x3C5, 0x308, 0x300),
996 0x1FE3 => array(0x3C5, 0x308, 0x301),
997 0x1FE4 => array(0x3C1, 0x313),
998 0x1FE6 => array(0x3C5, 0x342),
999 0x1FE7 => array(0x3C5, 0x308, 0x342),
1000 0x1FE8 => array(0x1FE0),
1001 0x1FE9 => array(0x1FE1),
1002 0x1FEA => array(0x1F7A),
1003 0x1FEB => array(0x1F7B),
1004 0x1FEC => array(0x1FE5),
1005 0x1FF2 => array(0x1F7C, 0x3B9),
1006 0x1FF3 => array(0x3C9, 0x3B9),
1007 0x1FF4 => array(0x3CE, 0x3B9),
1008 0x1FF6 => array(0x3C9, 0x342),
1009 0x1FF7 => array(0x3C9, 0x342, 0x3B9),
1010 0x1FF8 => array(0x1F78),
1011 0x1FF9 => array(0x1F79),
1012 0x1FFA => array(0x1F7C),
1013 0x1FFB => array(0x1F7D),
1014 0x1FFC => array(0x3C9, 0x3B9),
1015 0x20A8 => array(0x72, 0x73),
1016 0x2102 => array(0x63),
1017 0x2103 => array(0xB0, 0x63),
1018 0x2107 => array(0x25B),
1019 0x2109 => array(0xB0, 0x66),
1020 0x210B => array(0x68),
1021 0x210C => array(0x68),
1022 0x210D => array(0x68),
1023 0x2110 => array(0x69),
1024 0x2111 => array(0x69),
1025 0x2112 => array(0x6C),
1026 0x2115 => array(0x6E),
1027 0x2116 => array(0x6E, 0x6F),
1028 0x2119 => array(0x70),
1029 0x211A => array(0x71),
1030 0x211B => array(0x72),
1031 0x211C => array(0x72),
1032 0x211D => array(0x72),
1033 0x2120 => array(0x73, 0x6D),
1034 0x2121 => array(0x74, 0x65, 0x6C),
1035 0x2122 => array(0x74, 0x6D),
1036 0x2124 => array(0x7A),
1037 0x2126 => array(0x3C9),
1038 0x2128 => array(0x7A),
1039 0x212A => array(0x6B),
1040 0x212B => array(0xE5),
1041 0x212C => array(0x62),
1042 0x212D => array(0x63),
1043 0x2130 => array(0x65),
1044 0x2131 => array(0x66),
1045 0x2133 => array(0x6D),
1046 0x213E => array(0x3B3),
1047 0x213F => array(0x3C0),
1048 0x2145 => array(0x64),
1049 0x2160 => array(0x2170),
1050 0x2161 => array(0x2171),
1051 0x2162 => array(0x2172),
1052 0x2163 => array(0x2173),
1053 0x2164 => array(0x2174),
1054 0x2165 => array(0x2175),
1055 0x2166 => array(0x2176),
1056 0x2167 => array(0x2177),
1057 0x2168 => array(0x2178),
1058 0x2169 => array(0x2179),
1059 0x216A => array(0x217A),
1060 0x216B => array(0x217B),
1061 0x216C => array(0x217C),
1062 0x216D => array(0x217D),
1063 0x216E => array(0x217E),
1064 0x216F => array(0x217F),
1065 0x24B6 => array(0x24D0),
1066 0x24B7 => array(0x24D1),
1067 0x24B8 => array(0x24D2),
1068 0x24B9 => array(0x24D3),
1069 0x24BA => array(0x24D4),
1070 0x24BB => array(0x24D5),
1071 0x24BC => array(0x24D6),
1072 0x24BD => array(0x24D7),
1073 0x24BE => array(0x24D8),
1074 0x24BF => array(0x24D9),
1075 0x24C0 => array(0x24DA),
1076 0x24C1 => array(0x24DB),
1077 0x24C2 => array(0x24DC),
1078 0x24C3 => array(0x24DD),
1079 0x24C4 => array(0x24DE),
1080 0x24C5 => array(0x24DF),
1081 0x24C6 => array(0x24E0),
1082 0x24C7 => array(0x24E1),
1083 0x24C8 => array(0x24E2),
1084 0x24C9 => array(0x24E3),
1085 0x24CA => array(0x24E4),
1086 0x24CB => array(0x24E5),
1087 0x24CC => array(0x24E6),
1088 0x24CD => array(0x24E7),
1089 0x24CE => array(0x24E8),
1090 0x24CF => array(0x24E9),
1091 0x3371 => array(0x68, 0x70, 0x61),
1092 0x3373 => array(0x61, 0x75),
1093 0x3375 => array(0x6F, 0x76),
1094 0x3380 => array(0x70, 0x61),
1095 0x3381 => array(0x6E, 0x61),
1096 0x3382 => array(0x3BC, 0x61),
1097 0x3383 => array(0x6D, 0x61),
1098 0x3384 => array(0x6B, 0x61),
1099 0x3385 => array(0x6B, 0x62),
1100 0x3386 => array(0x6D, 0x62),
1101 0x3387 => array(0x67, 0x62),
1102 0x338A => array(0x70, 0x66),
1103 0x338B => array(0x6E, 0x66),
1104 0x338C => array(0x3BC, 0x66),
1105 0x3390 => array(0x68, 0x7A),
1106 0x3391 => array(0x6B, 0x68, 0x7A),
1107 0x3392 => array(0x6D, 0x68, 0x7A),
1108 0x3393 => array(0x67, 0x68, 0x7A),
1109 0x3394 => array(0x74, 0x68, 0x7A),
1110 0x33A9 => array(0x70, 0x61),
1111 0x33AA => array(0x6B, 0x70, 0x61),
1112 0x33AB => array(0x6D, 0x70, 0x61),
1113 0x33AC => array(0x67, 0x70, 0x61),
1114 0x33B4 => array(0x70, 0x76),
1115 0x33B5 => array(0x6E, 0x76),
1116 0x33B6 => array(0x3BC, 0x76),
1117 0x33B7 => array(0x6D, 0x76),
1118 0x33B8 => array(0x6B, 0x76),
1119 0x33B9 => array(0x6D, 0x76),
1120 0x33BA => array(0x70, 0x77),
1121 0x33BB => array(0x6E, 0x77),
1122 0x33BC => array(0x3BC, 0x77),
1123 0x33BD => array(0x6D, 0x77),
1124 0x33BE => array(0x6B, 0x77),
1125 0x33BF => array(0x6D, 0x77),
1126 0x33C0 => array(0x6B, 0x3C9),
1127 0x33C1 => array(0x6D, 0x3C9),
1128 /* 0x33C2 => array(0x61, 0x2E, 0x6D, 0x2E), */
1129 0x33C3 => array(0x62, 0x71),
1130 0x33C6 => array(0x63, 0x2215, 0x6B, 0x67),
1131 0x33C7 => array(0x63, 0x6F, 0x2E),
1132 0x33C8 => array(0x64, 0x62),
1133 0x33C9 => array(0x67, 0x79),
1134 0x33CB => array(0x68, 0x70),
1135 0x33CD => array(0x6B, 0x6B),
1136 0x33CE => array(0x6B, 0x6D),
1137 0x33D7 => array(0x70, 0x68),
1138 0x33D9 => array(0x70, 0x70, 0x6D),
1139 0x33DA => array(0x70, 0x72),
1140 0x33DC => array(0x73, 0x76),
1141 0x33DD => array(0x77, 0x62),
1142 0xFB00 => array(0x66, 0x66),
1143 0xFB01 => array(0x66, 0x69),
1144 0xFB02 => array(0x66, 0x6C),
1145 0xFB03 => array(0x66, 0x66, 0x69),
1146 0xFB04 => array(0x66, 0x66, 0x6C),
1147 0xFB05 => array(0x73, 0x74),
1148 0xFB06 => array(0x73, 0x74),
1149 0xFB13 => array(0x574, 0x576),
1150 0xFB14 => array(0x574, 0x565),
1151 0xFB15 => array(0x574, 0x56B),
1152 0xFB16 => array(0x57E, 0x576),
1153 0xFB17 => array(0x574, 0x56D),
1154 0xFF21 => array(0xFF41),
1155 0xFF22 => array(0xFF42),
1156 0xFF23 => array(0xFF43),
1157 0xFF24 => array(0xFF44),
1158 0xFF25 => array(0xFF45),
1159 0xFF26 => array(0xFF46),
1160 0xFF27 => array(0xFF47),
1161 0xFF28 => array(0xFF48),
1162 0xFF29 => array(0xFF49),
1163 0xFF2A => array(0xFF4A),
1164 0xFF2B => array(0xFF4B),
1165 0xFF2C => array(0xFF4C),
1166 0xFF2D => array(0xFF4D),
1167 0xFF2E => array(0xFF4E),
1168 0xFF2F => array(0xFF4F),
1169 0xFF30 => array(0xFF50),
1170 0xFF31 => array(0xFF51),
1171 0xFF32 => array(0xFF52),
1172 0xFF33 => array(0xFF53),
1173 0xFF34 => array(0xFF54),
1174 0xFF35 => array(0xFF55),
1175 0xFF36 => array(0xFF56),
1176 0xFF37 => array(0xFF57),
1177 0xFF38 => array(0xFF58),
1178 0xFF39 => array(0xFF59),
1179 0xFF3A => array(0xFF5A),
1180 0x10400 => array(0x10428),
1181 0x10401 => array(0x10429),
1182 0x10402 => array(0x1042A),
1183 0x10403 => array(0x1042B),
1184 0x10404 => array(0x1042C),
1185 0x10405 => array(0x1042D),
1186 0x10406 => array(0x1042E),
1187 0x10407 => array(0x1042F),
1188 0x10408 => array(0x10430),
1189 0x10409 => array(0x10431),
1190 0x1040A => array(0x10432),
1191 0x1040B => array(0x10433),
1192 0x1040C => array(0x10434),
1193 0x1040D => array(0x10435),
1194 0x1040E => array(0x10436),
1195 0x1040F => array(0x10437),
1196 0x10410 => array(0x10438),
1197 0x10411 => array(0x10439),
1198 0x10412 => array(0x1043A),
1199 0x10413 => array(0x1043B),
1200 0x10414 => array(0x1043C),
1201 0x10415 => array(0x1043D),
1202 0x10416 => array(0x1043E),
1203 0x10417 => array(0x1043F),
1204 0x10418 => array(0x10440),
1205 0x10419 => array(0x10441),
1206 0x1041A => array(0x10442),
1207 0x1041B => array(0x10443),
1208 0x1041C => array(0x10444),
1209 0x1041D => array(0x10445),
1210 0x1041E => array(0x10446),
1211 0x1041F => array(0x10447),
1212 0x10420 => array(0x10448),
1213 0x10421 => array(0x10449),
1214 0x10422 => array(0x1044A),
1215 0x10423 => array(0x1044B),
1216 0x10424 => array(0x1044C),
1217 0x10425 => array(0x1044D),
1218 0x1D400 => array(0x61),
1219 0x1D401 => array(0x62),
1220 0x1D402 => array(0x63),
1221 0x1D403 => array(0x64),
1222 0x1D404 => array(0x65),
1223 0x1D405 => array(0x66),
1224 0x1D406 => array(0x67),
1225 0x1D407 => array(0x68),
1226 0x1D408 => array(0x69),
1227 0x1D409 => array(0x6A),
1228 0x1D40A => array(0x6B),
1229 0x1D40B => array(0x6C),
1230 0x1D40C => array(0x6D),
1231 0x1D40D => array(0x6E),
1232 0x1D40E => array(0x6F),
1233 0x1D40F => array(0x70),
1234 0x1D410 => array(0x71),
1235 0x1D411 => array(0x72),
1236 0x1D412 => array(0x73),
1237 0x1D413 => array(0x74),
1238 0x1D414 => array(0x75),
1239 0x1D415 => array(0x76),
1240 0x1D416 => array(0x77),
1241 0x1D417 => array(0x78),
1242 0x1D418 => array(0x79),
1243 0x1D419 => array(0x7A),
1244 0x1D434 => array(0x61),
1245 0x1D435 => array(0x62),
1246 0x1D436 => array(0x63),
1247 0x1D437 => array(0x64),
1248 0x1D438 => array(0x65),
1249 0x1D439 => array(0x66),
1250 0x1D43A => array(0x67),
1251 0x1D43B => array(0x68),
1252 0x1D43C => array(0x69),
1253 0x1D43D => array(0x6A),
1254 0x1D43E => array(0x6B),
1255 0x1D43F => array(0x6C),
1256 0x1D440 => array(0x6D),
1257 0x1D441 => array(0x6E),
1258 0x1D442 => array(0x6F),
1259 0x1D443 => array(0x70),
1260 0x1D444 => array(0x71),
1261 0x1D445 => array(0x72),
1262 0x1D446 => array(0x73),
1263 0x1D447 => array(0x74),
1264 0x1D448 => array(0x75),
1265 0x1D449 => array(0x76),
1266 0x1D44A => array(0x77),
1267 0x1D44B => array(0x78),
1268 0x1D44C => array(0x79),
1269 0x1D44D => array(0x7A),
1270 0x1D468 => array(0x61),
1271 0x1D469 => array(0x62),
1272 0x1D46A => array(0x63),
1273 0x1D46B => array(0x64),
1274 0x1D46C => array(0x65),
1275 0x1D46D => array(0x66),
1276 0x1D46E => array(0x67),
1277 0x1D46F => array(0x68),
1278 0x1D470 => array(0x69),
1279 0x1D471 => array(0x6A),
1280 0x1D472 => array(0x6B),
1281 0x1D473 => array(0x6C),
1282 0x1D474 => array(0x6D),
1283 0x1D475 => array(0x6E),
1284 0x1D476 => array(0x6F),
1285 0x1D477 => array(0x70),
1286 0x1D478 => array(0x71),
1287 0x1D479 => array(0x72),
1288 0x1D47A => array(0x73),
1289 0x1D47B => array(0x74),
1290 0x1D47C => array(0x75),
1291 0x1D47D => array(0x76),
1292 0x1D47E => array(0x77),
1293 0x1D47F => array(0x78),
1294 0x1D480 => array(0x79),
1295 0x1D481 => array(0x7A),
1296 0x1D49C => array(0x61),
1297 0x1D49E => array(0x63),
1298 0x1D49F => array(0x64),
1299 0x1D4A2 => array(0x67),
1300 0x1D4A5 => array(0x6A),
1301 0x1D4A6 => array(0x6B),
1302 0x1D4A9 => array(0x6E),
1303 0x1D4AA => array(0x6F),
1304 0x1D4AB => array(0x70),
1305 0x1D4AC => array(0x71),
1306 0x1D4AE => array(0x73),
1307 0x1D4AF => array(0x74),
1308 0x1D4B0 => array(0x75),
1309 0x1D4B1 => array(0x76),
1310 0x1D4B2 => array(0x77),
1311 0x1D4B3 => array(0x78),
1312 0x1D4B4 => array(0x79),
1313 0x1D4B5 => array(0x7A),
1314 0x1D4D0 => array(0x61),
1315 0x1D4D1 => array(0x62),
1316 0x1D4D2 => array(0x63),
1317 0x1D4D3 => array(0x64),
1318 0x1D4D4 => array(0x65),
1319 0x1D4D5 => array(0x66),
1320 0x1D4D6 => array(0x67),
1321 0x1D4D7 => array(0x68),
1322 0x1D4D8 => array(0x69),
1323 0x1D4D9 => array(0x6A),
1324 0x1D4DA => array(0x6B),
1325 0x1D4DB => array(0x6C),
1326 0x1D4DC => array(0x6D),
1327 0x1D4DD => array(0x6E),
1328 0x1D4DE => array(0x6F),
1329 0x1D4DF => array(0x70),
1330 0x1D4E0 => array(0x71),
1331 0x1D4E1 => array(0x72),
1332 0x1D4E2 => array(0x73),
1333 0x1D4E3 => array(0x74),
1334 0x1D4E4 => array(0x75),
1335 0x1D4E5 => array(0x76),
1336 0x1D4E6 => array(0x77),
1337 0x1D4E7 => array(0x78),
1338 0x1D4E8 => array(0x79),
1339 0x1D4E9 => array(0x7A),
1340 0x1D504 => array(0x61),
1341 0x1D505 => array(0x62),
1342 0x1D507 => array(0x64),
1343 0x1D508 => array(0x65),
1344 0x1D509 => array(0x66),
1345 0x1D50A => array(0x67),
1346 0x1D50D => array(0x6A),
1347 0x1D50E => array(0x6B),
1348 0x1D50F => array(0x6C),
1349 0x1D510 => array(0x6D),
1350 0x1D511 => array(0x6E),
1351 0x1D512 => array(0x6F),
1352 0x1D513 => array(0x70),
1353 0x1D514 => array(0x71),
1354 0x1D516 => array(0x73),
1355 0x1D517 => array(0x74),
1356 0x1D518 => array(0x75),
1357 0x1D519 => array(0x76),
1358 0x1D51A => array(0x77),
1359 0x1D51B => array(0x78),
1360 0x1D51C => array(0x79),
1361 0x1D538 => array(0x61),
1362 0x1D539 => array(0x62),
1363 0x1D53B => array(0x64),
1364 0x1D53C => array(0x65),
1365 0x1D53D => array(0x66),
1366 0x1D53E => array(0x67),
1367 0x1D540 => array(0x69),
1368 0x1D541 => array(0x6A),
1369 0x1D542 => array(0x6B),
1370 0x1D543 => array(0x6C),
1371 0x1D544 => array(0x6D),
1372 0x1D546 => array(0x6F),
1373 0x1D54A => array(0x73),
1374 0x1D54B => array(0x74),
1375 0x1D54C => array(0x75),
1376 0x1D54D => array(0x76),
1377 0x1D54E => array(0x77),
1378 0x1D54F => array(0x78),
1379 0x1D550 => array(0x79),
1380 0x1D56C => array(0x61),
1381 0x1D56D => array(0x62),
1382 0x1D56E => array(0x63),
1383 0x1D56F => array(0x64),
1384 0x1D570 => array(0x65),
1385 0x1D571 => array(0x66),
1386 0x1D572 => array(0x67),
1387 0x1D573 => array(0x68),
1388 0x1D574 => array(0x69),
1389 0x1D575 => array(0x6A),
1390 0x1D576 => array(0x6B),
1391 0x1D577 => array(0x6C),
1392 0x1D578 => array(0x6D),
1393 0x1D579 => array(0x6E),
1394 0x1D57A => array(0x6F),
1395 0x1D57B => array(0x70),
1396 0x1D57C => array(0x71),
1397 0x1D57D => array(0x72),
1398 0x1D57E => array(0x73),
1399 0x1D57F => array(0x74),
1400 0x1D580 => array(0x75),
1401 0x1D581 => array(0x76),
1402 0x1D582 => array(0x77),
1403 0x1D583 => array(0x78),
1404 0x1D584 => array(0x79),
1405 0x1D585 => array(0x7A),
1406 0x1D5A0 => array(0x61),
1407 0x1D5A1 => array(0x62),
1408 0x1D5A2 => array(0x63),
1409 0x1D5A3 => array(0x64),
1410 0x1D5A4 => array(0x65),
1411 0x1D5A5 => array(0x66),
1412 0x1D5A6 => array(0x67),
1413 0x1D5A7 => array(0x68),
1414 0x1D5A8 => array(0x69),
1415 0x1D5A9 => array(0x6A),
1416 0x1D5AA => array(0x6B),
1417 0x1D5AB => array(0x6C),
1418 0x1D5AC => array(0x6D),
1419 0x1D5AD => array(0x6E),
1420 0x1D5AE => array(0x6F),
1421 0x1D5AF => array(0x70),
1422 0x1D5B0 => array(0x71),
1423 0x1D5B1 => array(0x72),
1424 0x1D5B2 => array(0x73),
1425 0x1D5B3 => array(0x74),
1426 0x1D5B4 => array(0x75),
1427 0x1D5B5 => array(0x76),
1428 0x1D5B6 => array(0x77),
1429 0x1D5B7 => array(0x78),
1430 0x1D5B8 => array(0x79),
1431 0x1D5B9 => array(0x7A),
1432 0x1D5D4 => array(0x61),
1433 0x1D5D5 => array(0x62),
1434 0x1D5D6 => array(0x63),
1435 0x1D5D7 => array(0x64),
1436 0x1D5D8 => array(0x65),
1437 0x1D5D9 => array(0x66),
1438 0x1D5DA => array(0x67),
1439 0x1D5DB => array(0x68),
1440 0x1D5DC => array(0x69),
1441 0x1D5DD => array(0x6A),
1442 0x1D5DE => array(0x6B),
1443 0x1D5DF => array(0x6C),
1444 0x1D5E0 => array(0x6D),
1445 0x1D5E1 => array(0x6E),
1446 0x1D5E2 => array(0x6F),
1447 0x1D5E3 => array(0x70),
1448 0x1D5E4 => array(0x71),
1449 0x1D5E5 => array(0x72),
1450 0x1D5E6 => array(0x73),
1451 0x1D5E7 => array(0x74),
1452 0x1D5E8 => array(0x75),
1453 0x1D5E9 => array(0x76),
1454 0x1D5EA => array(0x77),
1455 0x1D5EB => array(0x78),
1456 0x1D5EC => array(0x79),
1457 0x1D5ED => array(0x7A),
1458 0x1D608 => array(0x61),
1459 0x1D609 => array(0x62),
1460 0x1D60A => array(0x63),
1461 0x1D60B => array(0x64),
1462 0x1D60C => array(0x65),
1463 0x1D60D => array(0x66),
1464 0x1D60E => array(0x67),
1465 0x1D60F => array(0x68),
1466 0x1D610 => array(0x69),
1467 0x1D611 => array(0x6A),
1468 0x1D612 => array(0x6B),
1469 0x1D613 => array(0x6C),
1470 0x1D614 => array(0x6D),
1471 0x1D615 => array(0x6E),
1472 0x1D616 => array(0x6F),
1473 0x1D617 => array(0x70),
1474 0x1D618 => array(0x71),
1475 0x1D619 => array(0x72),
1476 0x1D61A => array(0x73),
1477 0x1D61B => array(0x74),
1478 0x1D61C => array(0x75),
1479 0x1D61D => array(0x76),
1480 0x1D61E => array(0x77),
1481 0x1D61F => array(0x78),
1482 0x1D620 => array(0x79),
1483 0x1D621 => array(0x7A),
1484 0x1D63C => array(0x61),
1485 0x1D63D => array(0x62),
1486 0x1D63E => array(0x63),
1487 0x1D63F => array(0x64),
1488 0x1D640 => array(0x65),
1489 0x1D641 => array(0x66),
1490 0x1D642 => array(0x67),
1491 0x1D643 => array(0x68),
1492 0x1D644 => array(0x69),
1493 0x1D645 => array(0x6A),
1494 0x1D646 => array(0x6B),
1495 0x1D647 => array(0x6C),
1496 0x1D648 => array(0x6D),
1497 0x1D649 => array(0x6E),
1498 0x1D64A => array(0x6F),
1499 0x1D64B => array(0x70),
1500 0x1D64C => array(0x71),
1501 0x1D64D => array(0x72),
1502 0x1D64E => array(0x73),
1503 0x1D64F => array(0x74),
1504 0x1D650 => array(0x75),
1505 0x1D651 => array(0x76),
1506 0x1D652 => array(0x77),
1507 0x1D653 => array(0x78),
1508 0x1D654 => array(0x79),
1509 0x1D655 => array(0x7A),
1510 0x1D670 => array(0x61),
1511 0x1D671 => array(0x62),
1512 0x1D672 => array(0x63),
1513 0x1D673 => array(0x64),
1514 0x1D674 => array(0x65),
1515 0x1D675 => array(0x66),
1516 0x1D676 => array(0x67),
1517 0x1D677 => array(0x68),
1518 0x1D678 => array(0x69),
1519 0x1D679 => array(0x6A),
1520 0x1D67A => array(0x6B),
1521 0x1D67B => array(0x6C),
1522 0x1D67C => array(0x6D),
1523 0x1D67D => array(0x6E),
1524 0x1D67E => array(0x6F),
1525 0x1D67F => array(0x70),
1526 0x1D680 => array(0x71),
1527 0x1D681 => array(0x72),
1528 0x1D682 => array(0x73),
1529 0x1D683 => array(0x74),
1530 0x1D684 => array(0x75),
1531 0x1D685 => array(0x76),
1532 0x1D686 => array(0x77),
1533 0x1D687 => array(0x78),
1534 0x1D688 => array(0x79),
1535 0x1D689 => array(0x7A),
1536 0x1D6A8 => array(0x3B1),
1537 0x1D6A9 => array(0x3B2),
1538 0x1D6AA => array(0x3B3),
1539 0x1D6AB => array(0x3B4),
1540 0x1D6AC => array(0x3B5),
1541 0x1D6AD => array(0x3B6),
1542 0x1D6AE => array(0x3B7),
1543 0x1D6AF => array(0x3B8),
1544 0x1D6B0 => array(0x3B9),
1545 0x1D6B1 => array(0x3BA),
1546 0x1D6B2 => array(0x3BB),
1547 0x1D6B3 => array(0x3BC),
1548 0x1D6B4 => array(0x3BD),
1549 0x1D6B5 => array(0x3BE),
1550 0x1D6B6 => array(0x3BF),
1551 0x1D6B7 => array(0x3C0),
1552 0x1D6B8 => array(0x3C1),
1553 0x1D6B9 => array(0x3B8),
1554 0x1D6BA => array(0x3C3),
1555 0x1D6BB => array(0x3C4),
1556 0x1D6BC => array(0x3C5),
1557 0x1D6BD => array(0x3C6),
1558 0x1D6BE => array(0x3C7),
1559 0x1D6BF => array(0x3C8),
1560 0x1D6C0 => array(0x3C9),
1561 0x1D6D3 => array(0x3C3),
1562 0x1D6E2 => array(0x3B1),
1563 0x1D6E3 => array(0x3B2),
1564 0x1D6E4 => array(0x3B3),
1565 0x1D6E5 => array(0x3B4),
1566 0x1D6E6 => array(0x3B5),
1567 0x1D6E7 => array(0x3B6),
1568 0x1D6E8 => array(0x3B7),
1569 0x1D6E9 => array(0x3B8),
1570 0x1D6EA => array(0x3B9),
1571 0x1D6EB => array(0x3BA),
1572 0x1D6EC => array(0x3BB),
1573 0x1D6ED => array(0x3BC),
1574 0x1D6EE => array(0x3BD),
1575 0x1D6EF => array(0x3BE),
1576 0x1D6F0 => array(0x3BF),
1577 0x1D6F1 => array(0x3C0),
1578 0x1D6F2 => array(0x3C1),
1579 0x1D6F3 => array(0x3B8),
1580 0x1D6F4 => array(0x3C3),
1581 0x1D6F5 => array(0x3C4),
1582 0x1D6F6 => array(0x3C5),
1583 0x1D6F7 => array(0x3C6),
1584 0x1D6F8 => array(0x3C7),
1585 0x1D6F9 => array(0x3C8),
1586 0x1D6FA => array(0x3C9),
1587 0x1D70D => array(0x3C3),
1588 0x1D71C => array(0x3B1),
1589 0x1D71D => array(0x3B2),
1590 0x1D71E => array(0x3B3),
1591 0x1D71F => array(0x3B4),
1592 0x1D720 => array(0x3B5),
1593 0x1D721 => array(0x3B6),
1594 0x1D722 => array(0x3B7),
1595 0x1D723 => array(0x3B8),
1596 0x1D724 => array(0x3B9),
1597 0x1D725 => array(0x3BA),
1598 0x1D726 => array(0x3BB),
1599 0x1D727 => array(0x3BC),
1600 0x1D728 => array(0x3BD),
1601 0x1D729 => array(0x3BE),
1602 0x1D72A => array(0x3BF),
1603 0x1D72B => array(0x3C0),
1604 0x1D72C => array(0x3C1),
1605 0x1D72D => array(0x3B8),
1606 0x1D72E => array(0x3C3),
1607 0x1D72F => array(0x3C4),
1608 0x1D730 => array(0x3C5),
1609 0x1D731 => array(0x3C6),
1610 0x1D732 => array(0x3C7),
1611 0x1D733 => array(0x3C8),
1612 0x1D734 => array(0x3C9),
1613 0x1D747 => array(0x3C3),
1614 0x1D756 => array(0x3B1),
1615 0x1D757 => array(0x3B2),
1616 0x1D758 => array(0x3B3),
1617 0x1D759 => array(0x3B4),
1618 0x1D75A => array(0x3B5),
1619 0x1D75B => array(0x3B6),
1620 0x1D75C => array(0x3B7),
1621 0x1D75D => array(0x3B8),
1622 0x1D75E => array(0x3B9),
1623 0x1D75F => array(0x3BA),
1624 0x1D760 => array(0x3BB),
1625 0x1D761 => array(0x3BC),
1626 0x1D762 => array(0x3BD),
1627 0x1D763 => array(0x3BE),
1628 0x1D764 => array(0x3BF),
1629 0x1D765 => array(0x3C0),
1630 0x1D766 => array(0x3C1),
1631 0x1D767 => array(0x3B8),
1632 0x1D768 => array(0x3C3),
1633 0x1D769 => array(0x3C4),
1634 0x1D76A => array(0x3C5),
1635 0x1D76B => array(0x3C6),
1636 0x1D76C => array(0x3C7),
1637 0x1D76D => array(0x3C8),
1638 0x1D76E => array(0x3C9),
1639 0x1D781 => array(0x3C3),
1640 0x1D790 => array(0x3B1),
1641 0x1D791 => array(0x3B2),
1642 0x1D792 => array(0x3B3),
1643 0x1D793 => array(0x3B4),
1644 0x1D794 => array(0x3B5),
1645 0x1D795 => array(0x3B6),
1646 0x1D796 => array(0x3B7),
1647 0x1D797 => array(0x3B8),
1648 0x1D798 => array(0x3B9),
1649 0x1D799 => array(0x3BA),
1650 0x1D79A => array(0x3BB),
1651 0x1D79B => array(0x3BC),
1652 0x1D79C => array(0x3BD),
1653 0x1D79D => array(0x3BE),
1654 0x1D79E => array(0x3BF),
1655 0x1D79F => array(0x3C0),
1656 0x1D7A0 => array(0x3C1),
1657 0x1D7A1 => array(0x3B8),
1658 0x1D7A2 => array(0x3C3),
1659 0x1D7A3 => array(0x3C4),
1660 0x1D7A4 => array(0x3C5),
1661 0x1D7A5 => array(0x3C6),
1662 0x1D7A6 => array(0x3C7),
1663 0x1D7A7 => array(0x3C8),
1664 0x1D7A8 => array(0x3C9),
1665 0x1D7BB => array(0x3C3),
1666 0x3F9 => array(0x3C3),
1667 0x1D2C => array(0x61),
1668 0x1D2D => array(0xE6),
1669 0x1D2E => array(0x62),
1670 0x1D30 => array(0x64),
1671 0x1D31 => array(0x65),
1672 0x1D32 => array(0x1DD),
1673 0x1D33 => array(0x67),
1674 0x1D34 => array(0x68),
1675 0x1D35 => array(0x69),
1676 0x1D36 => array(0x6A),
1677 0x1D37 => array(0x6B),
1678 0x1D38 => array(0x6C),
1679 0x1D39 => array(0x6D),
1680 0x1D3A => array(0x6E),
1681 0x1D3C => array(0x6F),
1682 0x1D3D => array(0x223),
1683 0x1D3E => array(0x70),
1684 0x1D3F => array(0x72),
1685 0x1D40 => array(0x74),
1686 0x1D41 => array(0x75),
1687 0x1D42 => array(0x77),
1688 0x213B => array(0x66, 0x61, 0x78),
1689 0x3250 => array(0x70, 0x74, 0x65),
1690 0x32CC => array(0x68, 0x67),
1691 0x32CE => array(0x65, 0x76),
1692 0x32CF => array(0x6C, 0x74, 0x64),
1693 0x337A => array(0x69, 0x75),
1694 0x33DE => array(0x76, 0x2215, 0x6D),
1695 0x33DF => array(0x61, 0x2215, 0x6D)
1696 );
1697
1698 /**
1699 * Normalization Combining Classes; Code Points not listed
1700 * got Combining Class 0.
1701 *
1702 * @static
1703 * @var array
1704 * @access private
1705 */
1706 private static $_np_norm_combcls = array(
1707 0x334 => 1,
1708 0x335 => 1,
1709 0x336 => 1,
1710 0x337 => 1,
1711 0x338 => 1,
1712 0x93C => 7,
1713 0x9BC => 7,
1714 0xA3C => 7,
1715 0xABC => 7,
1716 0xB3C => 7,
1717 0xCBC => 7,
1718 0x1037 => 7,
1719 0x3099 => 8,
1720 0x309A => 8,
1721 0x94D => 9,
1722 0x9CD => 9,
1723 0xA4D => 9,
1724 0xACD => 9,
1725 0xB4D => 9,
1726 0xBCD => 9,
1727 0xC4D => 9,
1728 0xCCD => 9,
1729 0xD4D => 9,
1730 0xDCA => 9,
1731 0xE3A => 9,
1732 0xF84 => 9,
1733 0x1039 => 9,
1734 0x1714 => 9,
1735 0x1734 => 9,
1736 0x17D2 => 9,
1737 0x5B0 => 10,
1738 0x5B1 => 11,
1739 0x5B2 => 12,
1740 0x5B3 => 13,
1741 0x5B4 => 14,
1742 0x5B5 => 15,
1743 0x5B6 => 16,
1744 0x5B7 => 17,
1745 0x5B8 => 18,
1746 0x5B9 => 19,
1747 0x5BB => 20,
1748 0x5Bc => 21,
1749 0x5BD => 22,
1750 0x5BF => 23,
1751 0x5C1 => 24,
1752 0x5C2 => 25,
1753 0xFB1E => 26,
1754 0x64B => 27,
1755 0x64C => 28,
1756 0x64D => 29,
1757 0x64E => 30,
1758 0x64F => 31,
1759 0x650 => 32,
1760 0x651 => 33,
1761 0x652 => 34,
1762 0x670 => 35,
1763 0x711 => 36,
1764 0xC55 => 84,
1765 0xC56 => 91,
1766 0xE38 => 103,
1767 0xE39 => 103,
1768 0xE48 => 107,
1769 0xE49 => 107,
1770 0xE4A => 107,
1771 0xE4B => 107,
1772 0xEB8 => 118,
1773 0xEB9 => 118,
1774 0xEC8 => 122,
1775 0xEC9 => 122,
1776 0xECA => 122,
1777 0xECB => 122,
1778 0xF71 => 129,
1779 0xF72 => 130,
1780 0xF7A => 130,
1781 0xF7B => 130,
1782 0xF7C => 130,
1783 0xF7D => 130,
1784 0xF80 => 130,
1785 0xF74 => 132,
1786 0x321 => 202,
1787 0x322 => 202,
1788 0x327 => 202,
1789 0x328 => 202,
1790 0x31B => 216,
1791 0xF39 => 216,
1792 0x1D165 => 216,
1793 0x1D166 => 216,
1794 0x1D16E => 216,
1795 0x1D16F => 216,
1796 0x1D170 => 216,
1797 0x1D171 => 216,
1798 0x1D172 => 216,
1799 0x302A => 218,
1800 0x316 => 220,
1801 0x317 => 220,
1802 0x318 => 220,
1803 0x319 => 220,
1804 0x31C => 220,
1805 0x31D => 220,
1806 0x31E => 220,
1807 0x31F => 220,
1808 0x320 => 220,
1809 0x323 => 220,
1810 0x324 => 220,
1811 0x325 => 220,
1812 0x326 => 220,
1813 0x329 => 220,
1814 0x32A => 220,
1815 0x32B => 220,
1816 0x32C => 220,
1817 0x32D => 220,
1818 0x32E => 220,
1819 0x32F => 220,
1820 0x330 => 220,
1821 0x331 => 220,
1822 0x332 => 220,
1823 0x333 => 220,
1824 0x339 => 220,
1825 0x33A => 220,
1826 0x33B => 220,
1827 0x33C => 220,
1828 0x347 => 220,
1829 0x348 => 220,
1830 0x349 => 220,
1831 0x34D => 220,
1832 0x34E => 220,
1833 0x353 => 220,
1834 0x354 => 220,
1835 0x355 => 220,
1836 0x356 => 220,
1837 0x591 => 220,
1838 0x596 => 220,
1839 0x59B => 220,
1840 0x5A3 => 220,
1841 0x5A4 => 220,
1842 0x5A5 => 220,
1843 0x5A6 => 220,
1844 0x5A7 => 220,
1845 0x5AA => 220,
1846 0x655 => 220,
1847 0x656 => 220,
1848 0x6E3 => 220,
1849 0x6EA => 220,
1850 0x6ED => 220,
1851 0x731 => 220,
1852 0x734 => 220,
1853 0x737 => 220,
1854 0x738 => 220,
1855 0x739 => 220,
1856 0x73B => 220,
1857 0x73C => 220,
1858 0x73E => 220,
1859 0x742 => 220,
1860 0x744 => 220,
1861 0x746 => 220,
1862 0x748 => 220,
1863 0x952 => 220,
1864 0xF18 => 220,
1865 0xF19 => 220,
1866 0xF35 => 220,
1867 0xF37 => 220,
1868 0xFC6 => 220,
1869 0x193B => 220,
1870 0x20E8 => 220,
1871 0x1D17B => 220,
1872 0x1D17C => 220,
1873 0x1D17D => 220,
1874 0x1D17E => 220,
1875 0x1D17F => 220,
1876 0x1D180 => 220,
1877 0x1D181 => 220,
1878 0x1D182 => 220,
1879 0x1D18A => 220,
1880 0x1D18B => 220,
1881 0x59A => 222,
1882 0x5AD => 222,
1883 0x1929 => 222,
1884 0x302D => 222,
1885 0x302E => 224,
1886 0x302F => 224,
1887 0x1D16D => 226,
1888 0x5AE => 228,
1889 0x18A9 => 228,
1890 0x302B => 228,
1891 0x300 => 230,
1892 0x301 => 230,
1893 0x302 => 230,
1894 0x303 => 230,
1895 0x304 => 230,
1896 0x305 => 230,
1897 0x306 => 230,
1898 0x307 => 230,
1899 0x308 => 230,
1900 0x309 => 230,
1901 0x30A => 230,
1902 0x30B => 230,
1903 0x30C => 230,
1904 0x30D => 230,
1905 0x30E => 230,
1906 0x30F => 230,
1907 0x310 => 230,
1908 0x311 => 230,
1909 0x312 => 230,
1910 0x313 => 230,
1911 0x314 => 230,
1912 0x33D => 230,
1913 0x33E => 230,
1914 0x33F => 230,
1915 0x340 => 230,
1916 0x341 => 230,
1917 0x342 => 230,
1918 0x343 => 230,
1919 0x344 => 230,
1920 0x346 => 230,
1921 0x34A => 230,
1922 0x34B => 230,
1923 0x34C => 230,
1924 0x350 => 230,
1925 0x351 => 230,
1926 0x352 => 230,
1927 0x357 => 230,
1928 0x363 => 230,
1929 0x364 => 230,
1930 0x365 => 230,
1931 0x366 => 230,
1932 0x367 => 230,
1933 0x368 => 230,
1934 0x369 => 230,
1935 0x36A => 230,
1936 0x36B => 230,
1937 0x36C => 230,
1938 0x36D => 230,
1939 0x36E => 230,
1940 0x36F => 230,
1941 0x483 => 230,
1942 0x484 => 230,
1943 0x485 => 230,
1944 0x486 => 230,
1945 0x592 => 230,
1946 0x593 => 230,
1947 0x594 => 230,
1948 0x595 => 230,
1949 0x597 => 230,
1950 0x598 => 230,
1951 0x599 => 230,
1952 0x59C => 230,
1953 0x59D => 230,
1954 0x59E => 230,
1955 0x59F => 230,
1956 0x5A0 => 230,
1957 0x5A1 => 230,
1958 0x5A8 => 230,
1959 0x5A9 => 230,
1960 0x5AB => 230,
1961 0x5AC => 230,
1962 0x5AF => 230,
1963 0x5C4 => 230,
1964 0x610 => 230,
1965 0x611 => 230,
1966 0x612 => 230,
1967 0x613 => 230,
1968 0x614 => 230,
1969 0x615 => 230,
1970 0x653 => 230,
1971 0x654 => 230,
1972 0x657 => 230,
1973 0x658 => 230,
1974 0x6D6 => 230,
1975 0x6D7 => 230,
1976 0x6D8 => 230,
1977 0x6D9 => 230,
1978 0x6DA => 230,
1979 0x6DB => 230,
1980 0x6DC => 230,
1981 0x6DF => 230,
1982 0x6E0 => 230,
1983 0x6E1 => 230,
1984 0x6E2 => 230,
1985 0x6E4 => 230,
1986 0x6E7 => 230,
1987 0x6E8 => 230,
1988 0x6EB => 230,
1989 0x6EC => 230,
1990 0x730 => 230,
1991 0x732 => 230,
1992 0x733 => 230,
1993 0x735 => 230,
1994 0x736 => 230,
1995 0x73A => 230,
1996 0x73D => 230,
1997 0x73F => 230,
1998 0x740 => 230,
1999 0x741 => 230,
2000 0x743 => 230,
2001 0x745 => 230,
2002 0x747 => 230,
2003 0x749 => 230,
2004 0x74A => 230,
2005 0x951 => 230,
2006 0x953 => 230,
2007 0x954 => 230,
2008 0xF82 => 230,
2009 0xF83 => 230,
2010 0xF86 => 230,
2011 0xF87 => 230,
2012 0x170D => 230,
2013 0x193A => 230,
2014 0x20D0 => 230,
2015 0x20D1 => 230,
2016 0x20D4 => 230,
2017 0x20D5 => 230,
2018 0x20D6 => 230,
2019 0x20D7 => 230,
2020 0x20DB => 230,
2021 0x20DC => 230,
2022 0x20E1 => 230,
2023 0x20E7 => 230,
2024 0x20E9 => 230,
2025 0xFE20 => 230,
2026 0xFE21 => 230,
2027 0xFE22 => 230,
2028 0xFE23 => 230,
2029 0x1D185 => 230,
2030 0x1D186 => 230,
2031 0x1D187 => 230,
2032 0x1D189 => 230,
2033 0x1D188 => 230,
2034 0x1D1AA => 230,
2035 0x1D1AB => 230,
2036 0x1D1AC => 230,
2037 0x1D1AD => 230,
2038 0x315 => 232,
2039 0x31A => 232,
2040 0x302C => 232,
2041 0x35F => 233,
2042 0x362 => 233,
2043 0x35D => 234,
2044 0x35E => 234,
2045 0x360 => 234,
2046 0x361 => 234,
2047 0x345 => 240
2048 );
2049 // }}}
2050
2051 // {{{ properties
2052 /**
2053 * @var string
2054 * @access private
2055 */
2056 private $_punycode_prefix = 'xn--';
2057
2058 /**
2059 * @access private
2060 */
2061 private $_invalid_ucs = 0x80000000;
2062
2063 /**
2064 * @access private
2065 */
2066 private $_max_ucs = 0x10FFFF;
2067
2068 /**
2069 * @var int
2070 * @access private
2071 */
2072 private $_base = 36;
2073
2074 /**
2075 * @var int
2076 * @access private
2077 */
2078 private $_tmin = 1;
2079
2080 /**
2081 * @var int
2082 * @access private
2083 */
2084 private $_tmax = 26;
2085
2086 /**
2087 * @var int
2088 * @access private
2089 */
2090 private $_skew = 38;
2091
2092 /**
2093 * @var int
2094 * @access private
2095 */
2096 private $_damp = 700;
2097
2098 /**
2099 * @var int
2100 * @access private
2101 */
2102 private $_initial_bias = 72;
2103
2104 /**
2105 * @var int
2106 * @access private
2107 */
2108 private $_initial_n = 0x80;
2109
2110 /**
2111 * @var int
2112 * @access private
2113 */
2114 private $_slast;
2115
2116 /**
2117 * @access private
2118 */
2119 private $_sbase = 0xAC00;
2120
2121 /**
2122 * @access private
2123 */
2124 private $_lbase = 0x1100;
2125
2126 /**
2127 * @access private
2128 */
2129 private $_vbase = 0x1161;
2130
2131 /**
2132 * @access private
2133 */
2134 private $_tbase = 0x11a7;
2135
2136 /**
2137 * @var int
2138 * @access private
2139 */
2140 private $_lcount = 19;
2141
2142 /**
2143 * @var int
2144 * @access private
2145 */
2146 private $_vcount = 21;
2147
2148 /**
2149 * @var int
2150 * @access private
2151 */
2152 private $_tcount = 28;
2153
2154 /**
2155 * vcount * tcount
2156 *
2157 * @var int
2158 * @access private
2159 */
2160 private $_ncount = 588;
2161
2162 /**
2163 * lcount * tcount * vcount
2164 *
2165 * @var int
2166 * @access private
2167 */
2168 private $_scount = 11172;
2169
2170 /**
2171 * Default encoding for encode()'s input and decode()'s output is UTF-8;
2172 * Other possible encodings are ucs4_string and ucs4_array
2173 * See {@link setParams()} for how to select these
2174 *
2175 * @var bool
2176 * @access private
2177 */
2178 private $_api_encoding = 'utf8';
2179
2180 /**
2181 * Overlong UTF-8 encodings are forbidden
2182 *
2183 * @var bool
2184 * @access private
2185 */
2186 private $_allow_overlong = false;
2187
2188 /**
2189 * Behave strict or not
2190 *
2191 * @var bool
2192 * @access private
2193 */
2194 private $_strict_mode = false;
2195
2196 /**
2197 * IDNA-version to use
2198 *
2199 * Values are "2003" and "2008".
2200 * Defaults to "2003", since that was the original version and for
2201 * compatibility with previous versions of this library.
2202 * If you need to encode "new" characters like the German "Eszett",
2203 * please switch to 2008 first before encoding.
2204 *
2205 * @var bool
2206 * @access private
2207 */
2208 private $_version = '2003';
2209
2210 /**
2211 * Cached value indicating whether or not mbstring function overloading is
2212 * on for strlen
2213 *
2214 * This is cached for optimal performance.
2215 *
2216 * @var boolean
2217 * @see Net_IDNA2::_byteLength()
2218 */
2219 private static $_mb_string_overload = null;
2220 // }}}
2221
2222
2223 // {{{ constructor
2224 /**
2225 * Constructor
2226 *
2227 * @param array $options Options to initialise the object with
2228 *
2229 * @access public
2230 * @see setParams()
2231 */
2232 public function __construct($options = null)
2233 {
2234 $this->_slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
2235
2236 if (is_array($options)) {
2237 $this->setParams($options);
2238 }
2239
2240 // populate mbstring overloading cache if not set
2241 if (self::$_mb_string_overload === null) {
2242 self::$_mb_string_overload = (extension_loaded('mbstring')
2243 && (ini_get('mbstring.func_overload') & 0x02) === 0x02);
2244 }
2245 }
2246 // }}}
2247
2248
2249 /**
2250 * Sets a new option value. Available options and values:
2251 *
2252 * [utf8 - Use either UTF-8 or ISO-8859-1 as input (true for UTF-8, false
2253 * otherwise); The output is always UTF-8]
2254 * [overlong - Unicode does not allow unnecessarily long encodings of chars,
2255 * to allow this, set this parameter to true, else to false;
2256 * default is false.]
2257 * [strict - true: strict mode, good for registration purposes - Causes errors
2258 * on failures; false: loose mode, ideal for "wildlife" applications
2259 * by silently ignoring errors and returning the original input instead]
2260 *
2261 * @param mixed $option Parameter to set (string: single parameter; array of Parameter => Value pairs)
2262 * @param string $value Value to use (if parameter 1 is a string)
2263 *
2264 * @return boolean true on success, false otherwise
2265 * @access public
2266 */
2267 public function setParams($option, $value = false)
2268 {
2269 if (!is_array($option)) {
2270 $option = array($option => $value);
2271 }
2272
2273 foreach ($option as $k => $v) {
2274 switch ($k) {
2275 case 'encoding':
2276 switch ($v) {
2277 case 'utf8':
2278 case 'ucs4_string':
2279 case 'ucs4_array':
2280 $this->_api_encoding = $v;
2281 break;
2282
2283 default:
2284 throw new InvalidArgumentException('Set Parameter: Unknown parameter '.$v.' for option '.$k);
2285 }
2286
2287 break;
2288
2289 case 'overlong':
2290 $this->_allow_overlong = ($v) ? true : false;
2291 break;
2292
2293 case 'strict':
2294 $this->_strict_mode = ($v) ? true : false;
2295 break;
2296
2297 case 'version':
2298 if (in_array($v, array('2003', '2008'))) {
2299 $this->_version = $v;
2300 } else {
2301 throw new InvalidArgumentException('Set Parameter: Invalid parameter '.$v.' for option '.$k);
2302 }
2303 break;
2304
2305 default:
2306 return false;
2307 }
2308 }
2309
2310 return true;
2311 }
2312
2313 /**
2314 * Encode a given UTF-8 domain name.
2315 *
2316 * @param string $decoded Domain name (UTF-8 or UCS-4)
2317 * @param string $one_time_encoding Desired input encoding, see {@link set_parameter}
2318 * If not given will use default-encoding
2319 *
2320 * @return string Encoded Domain name (ACE string)
2321 * @return mixed processed string
2322 * @throws Exception
2323 * @access public
2324 */
2325 public function encode($decoded, $one_time_encoding = false)
2326 {
2327 // Forcing conversion of input to UCS4 array
2328 // If one time encoding is given, use this, else the objects property
2329 switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
2330 case 'utf8':
2331 $decoded = $this->_utf8_to_ucs4($decoded);
2332 break;
2333 case 'ucs4_string':
2334 $decoded = $this->_ucs4_string_to_ucs4($decoded);
2335 case 'ucs4_array': // No break; before this line. Catch case, but do nothing
2336 break;
2337 default:
2338 throw new InvalidArgumentException('Unsupported input format');
2339 }
2340
2341 // No input, no output, what else did you expect?
2342 if (empty($decoded)) return '';
2343
2344 // Anchors for iteration
2345 $last_begin = 0;
2346 // Output string
2347 $output = '';
2348
2349 foreach ($decoded as $k => $v) {
2350 // Make sure to use just the plain dot
2351 switch($v) {
2352 case 0x3002:
2353 case 0xFF0E:
2354 case 0xFF61:
2355 $decoded[$k] = 0x2E;
2356 // It's right, no break here
2357 // The codepoints above have to be converted to dots anyway
2358
2359 // Stumbling across an anchoring character
2360 case 0x2E:
2361 case 0x2F:
2362 case 0x3A:
2363 case 0x3F:
2364 case 0x40:
2365 // Neither email addresses nor URLs allowed in strict mode
2366 if ($this->_strict_mode) {
2367 throw new InvalidArgumentException('Neither email addresses nor URLs are allowed in strict mode.');
2368 }
2369 // Skip first char
2370 if ($k) {
2371 $encoded = '';
2372 $encoded = $this->_encode(array_slice($decoded, $last_begin, (($k)-$last_begin)));
2373 if ($encoded) {
2374 $output .= $encoded;
2375 } else {
2376 $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin)));
2377 }
2378 $output .= chr($decoded[$k]);
2379 }
2380 $last_begin = $k + 1;
2381 }
2382 }
2383 // Catch the rest of the string
2384 if ($last_begin) {
2385 $inp_len = sizeof($decoded);
2386 $encoded = '';
2387 $encoded = $this->_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
2388 if ($encoded) {
2389 $output .= $encoded;
2390 } else {
2391 $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
2392 }
2393 return $output;
2394 }
2395
2396 if ($output = $this->_encode($decoded)) {
2397 return $output;
2398 }
2399
2400 return $this->_ucs4_to_utf8($decoded);
2401 }
2402
2403 /**
2404 * Decode a given ACE domain name.
2405 *
2406 * @param string $input Domain name (ACE string)
2407 * @param string $one_time_encoding Desired output encoding, see {@link set_parameter}
2408 *
2409 * @return string Decoded Domain name (UTF-8 or UCS-4)
2410 * @throws Exception
2411 * @access public
2412 */
2413 public function decode($input, $one_time_encoding = false)
2414 {
2415 // Optionally set
2416 if ($one_time_encoding) {
2417 switch ($one_time_encoding) {
2418 case 'utf8':
2419 case 'ucs4_string':
2420 case 'ucs4_array':
2421 break;
2422 default:
2423 throw new InvalidArgumentException('Unknown encoding '.$one_time_encoding);
2424 }
2425 }
2426 // Make sure to drop any newline characters around
2427 $input = trim($input);
2428
2429 // Negotiate input and try to determine, whether it is a plain string,
2430 // an email address or something like a complete URL
2431 if (strpos($input, '@')) { // Maybe it is an email address
2432 // No no in strict mode
2433 if ($this->_strict_mode) {
2434 throw new InvalidArgumentException('Only simple domain name parts can be handled in strict mode');
2435 }
2436 list($email_pref, $input) = explode('@', $input, 2);
2437 $arr = explode('.', $input);
2438 foreach ($arr as $k => $v) {
2439 $conv = $this->_decode($v);
2440 if ($conv) $arr[$k] = $conv;
2441 }
2442 $return = $email_pref . '@' . join('.', $arr);
2443 } elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
2444 // No no in strict mode
2445 if ($this->_strict_mode) {
2446 throw new InvalidArgumentException('Only simple domain name parts can be handled in strict mode');
2447 }
2448
2449 $parsed = parse_url($input);
2450 if (isset($parsed['host'])) {
2451 $arr = explode('.', $parsed['host']);
2452 foreach ($arr as $k => $v) {
2453 $conv = $this->_decode($v);
2454 if ($conv) $arr[$k] = $conv;
2455 }
2456 $parsed['host'] = join('.', $arr);
2457 if (isset($parsed['scheme'])) {
2458 $parsed['scheme'] .= (strtolower($parsed['scheme']) == 'mailto') ? ':' : '://';
2459 }
2460 $return = $this->_unparse_url($parsed);
2461 } else { // parse_url seems to have failed, try without it
2462 $arr = explode('.', $input);
2463 foreach ($arr as $k => $v) {
2464 $conv = $this->_decode($v);
2465 if ($conv) $arr[$k] = $conv;
2466 }
2467 $return = join('.', $arr);
2468 }
2469 } else { // Otherwise we consider it being a pure domain name string
2470 $return = $this->_decode($input);
2471 }
2472 // The output is UTF-8 by default, other output formats need conversion here
2473 // If one time encoding is given, use this, else the objects property
2474 switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
2475 case 'utf8':
2476 return $return;
2477 break;
2478 case 'ucs4_string':
2479 return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return));
2480 break;
2481 case 'ucs4_array':
2482 return $this->_utf8_to_ucs4($return);
2483 break;
2484 default:
2485 throw new InvalidArgumentException('Unsupported output format');
2486 }
2487 }
2488
2489
2490 // {{{ private
2491 /**
2492 * Opposite function to parse_url()
2493 *
2494 * Inspired by code from comments of php.net-documentation for parse_url()
2495 *
2496 * @param array $parts_arr parts (strings) as returned by parse_url()
2497 *
2498 * @return string
2499 * @access private
2500 */
2501 private function _unparse_url($parts_arr)
2502 {
2503 if (!empty($parts_arr['scheme'])) {
2504 $ret_url = $parts_arr['scheme'];
2505 }
2506 if (!empty($parts_arr['user'])) {
2507 $ret_url .= $parts_arr['user'];
2508 if (!empty($parts_arr['pass'])) {
2509 $ret_url .= ':' . $parts_arr['pass'];
2510 }
2511 $ret_url .= '@';
2512 }
2513 $ret_url .= $parts_arr['host'];
2514 if (!empty($parts_arr['port'])) {
2515 $ret_url .= ':' . $parts_arr['port'];
2516 }
2517 $ret_url .= $parts_arr['path'];
2518 if (!empty($parts_arr['query'])) {
2519 $ret_url .= '?' . $parts_arr['query'];
2520 }
2521 if (!empty($parts_arr['fragment'])) {
2522 $ret_url .= '#' . $parts_arr['fragment'];
2523 }
2524 return $ret_url;
2525 }
2526
2527 /**
2528 * The actual encoding algorithm.
2529 *
2530 * @param string $decoded Decoded string which should be encoded
2531 *
2532 * @return string Encoded string
2533 * @throws Exception
2534 * @access private
2535 */
2536 private function _encode($decoded)
2537 {
2538 // We cannot encode a domain name containing the Punycode prefix
2539 $extract = self::_byteLength($this->_punycode_prefix);
2540 $check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
2541 $check_deco = array_slice($decoded, 0, $extract);
2542
2543 if ($check_pref == $check_deco) {
2544 throw new InvalidArgumentException('This is already a punycode string');
2545 }
2546
2547 // We will not try to encode strings consisting of basic code points only
2548 $encodable = false;
2549 foreach ($decoded as $k => $v) {
2550 if ($v > 0x7a) {
2551 $encodable = true;
2552 break;
2553 }
2554 }
2555 if (!$encodable) {
2556 if ($this->_strict_mode) {
2557 throw new InvalidArgumentException('The given string does not contain encodable chars');
2558 }
2559
2560 return false;
2561 }
2562
2563 // Do NAMEPREP
2564 $decoded = $this->_nameprep($decoded);
2565
2566 $deco_len = count($decoded);
2567
2568 // Empty array
2569 if (!$deco_len) {
2570 return false;
2571 }
2572
2573 // How many chars have been consumed
2574 $codecount = 0;
2575
2576 // Start with the prefix; copy it to output
2577 $encoded = $this->_punycode_prefix;
2578
2579 $encoded = '';
2580 // Copy all basic code points to output
2581 for ($i = 0; $i < $deco_len; ++$i) {
2582 $test = $decoded[$i];
2583 // Will match [0-9a-zA-Z-]
2584 if ((0x2F < $test && $test < 0x40)
2585 || (0x40 < $test && $test < 0x5B)
2586 || (0x60 < $test && $test <= 0x7B)
2587 || (0x2D == $test)
2588 ) {
2589 $encoded .= chr($decoded[$i]);
2590 $codecount++;
2591 }
2592 }
2593
2594 // All codepoints were basic ones
2595 if ($codecount == $deco_len) {
2596 return $encoded;
2597 }
2598
2599 // Start with the prefix; copy it to output
2600 $encoded = $this->_punycode_prefix . $encoded;
2601
2602 // If we have basic code points in output, add an hyphen to the end
2603 if ($codecount) {
2604 $encoded .= '-';
2605 }
2606
2607 // Now find and encode all non-basic code points
2608 $is_first = true;
2609 $cur_code = $this->_initial_n;
2610 $bias = $this->_initial_bias;
2611 $delta = 0;
2612
2613 while ($codecount < $deco_len) {
2614 // Find the smallest code point >= the current code point and
2615 // remember the last ouccrence of it in the input
2616 for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) {
2617 if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) {
2618 $next_code = $decoded[$i];
2619 }
2620 }
2621
2622 $delta += ($next_code - $cur_code) * ($codecount + 1);
2623 $cur_code = $next_code;
2624
2625 // Scan input again and encode all characters whose code point is $cur_code
2626 for ($i = 0; $i < $deco_len; $i++) {
2627 if ($decoded[$i] < $cur_code) {
2628 $delta++;
2629 } else if ($decoded[$i] == $cur_code) {
2630 for ($q = $delta, $k = $this->_base; 1; $k += $this->_base) {
2631 $t = ($k <= $bias)?
2632 $this->_tmin :
2633 (($k >= $bias + $this->_tmax)? $this->_tmax : $k - $bias);
2634
2635 if ($q < $t) {
2636 break;
2637 }
2638
2639 $encoded .= $this->_encodeDigit(ceil($t + (($q - $t) % ($this->_base - $t))));
2640 $q = ($q - $t) / ($this->_base - $t);
2641 }
2642
2643 $encoded .= $this->_encodeDigit($q);
2644 $bias = $this->_adapt($delta, $codecount + 1, $is_first);
2645 $codecount++;
2646 $delta = 0;
2647 $is_first = false;
2648 }
2649 }
2650
2651 $delta++;
2652 $cur_code++;
2653 }
2654
2655 return $encoded;
2656 }
2657
2658 /**
2659 * The actual decoding algorithm.
2660 *
2661 * @param string $encoded Encoded string which should be decoded
2662 *
2663 * @return string Decoded string
2664 * @throws Exception
2665 * @access private
2666 */
2667 private function _decode($encoded)
2668 {
2669 // We do need to find the Punycode prefix
2670 if (!preg_match('!^' . preg_quote($this->_punycode_prefix, '!') . '!', $encoded)) {
2671 return false;
2672 }
2673
2674 $encode_test = preg_replace('!^' . preg_quote($this->_punycode_prefix, '!') . '!', '', $encoded);
2675
2676 // If nothing left after removing the prefix, it is hopeless
2677 if (!$encode_test) {
2678 return false;
2679 }
2680
2681 // Find last occurrence of the delimiter
2682 $delim_pos = strrpos($encoded, '-');
2683
2684 if ($delim_pos > self::_byteLength($this->_punycode_prefix)) {
2685 for ($k = self::_byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
2686 $decoded[] = ord($encoded{$k});
2687 }
2688 } else {
2689 $decoded = array();
2690 }
2691
2692 $deco_len = count($decoded);
2693 $enco_len = self::_byteLength($encoded);
2694
2695 // Wandering through the strings; init
2696 $is_first = true;
2697 $bias = $this->_initial_bias;
2698 $idx = 0;
2699 $char = $this->_initial_n;
2700
2701 for ($enco_idx = ($delim_pos)? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) {
2702 for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k += $this->_base) {
2703 $digit = $this->_decodeDigit($encoded{$enco_idx++});
2704 $idx += $digit * $w;
2705
2706 $t = ($k <= $bias) ?
2707 $this->_tmin :
2708 (($k >= $bias + $this->_tmax)? $this->_tmax : ($k - $bias));
2709
2710 if ($digit < $t) {
2711 break;
2712 }
2713
2714 $w = (int)($w * ($this->_base - $t));
2715 }
2716
2717 $bias = $this->_adapt($idx - $old_idx, $deco_len + 1, $is_first);
2718 $is_first = false;
2719 $char += (int) ($idx / ($deco_len + 1));
2720 $idx %= ($deco_len + 1);
2721
2722 if ($deco_len > 0) {
2723 // Make room for the decoded char
2724 for ($i = $deco_len; $i > $idx; $i--) {
2725 $decoded[$i] = $decoded[($i - 1)];
2726 }
2727 }
2728
2729 $decoded[$idx++] = $char;
2730 }
2731
2732 return $this->_ucs4_to_utf8($decoded);
2733 }
2734
2735 /**
2736 * Adapt the bias according to the current code point and position.
2737 *
2738 * @param int $delta ...
2739 * @param int $npoints ...
2740 * @param boolean $is_first ...
2741 *
2742 * @return int
2743 * @access private
2744 */
2745 private function _adapt($delta, $npoints, $is_first)
2746 {
2747 $delta = (int) ($is_first ? ($delta / $this->_damp) : ($delta / 2));
2748 $delta += (int) ($delta / $npoints);
2749
2750 for ($k = 0; $delta > (($this->_base - $this->_tmin) * $this->_tmax) / 2; $k += $this->_base) {
2751 $delta = (int) ($delta / ($this->_base - $this->_tmin));
2752 }
2753
2754 return (int) ($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew));
2755 }
2756
2757 /**
2758 * Encoding a certain digit.
2759 *
2760 * @param int $d One digit to encode
2761 *
2762 * @return char Encoded digit
2763 * @access private
2764 */
2765 private function _encodeDigit($d)
2766 {
2767 return chr($d + 22 + 75 * ($d < 26));
2768 }
2769
2770 /**
2771 * Decode a certain digit.
2772 *
2773 * @param char $cp One digit (character) to decode
2774 *
2775 * @return int Decoded digit
2776 * @access private
2777 */
2778 private function _decodeDigit($cp)
2779 {
2780 $cp = ord($cp);
2781 return ($cp - 48 < 10)? $cp - 22 : (($cp - 65 < 26)? $cp - 65 : (($cp - 97 < 26)? $cp - 97 : $this->_base));
2782 }
2783
2784 /**
2785 * Do Nameprep according to RFC3491 and RFC3454.
2786 *
2787 * @param array $input Unicode Characters
2788 *
2789 * @return string Unicode Characters, Nameprep'd
2790 * @throws Exception
2791 * @access private
2792 */
2793 private function _nameprep($input)
2794 {
2795 $output = array();
2796
2797 // Walking through the input array, performing the required steps on each of
2798 // the input chars and putting the result into the output array
2799 // While mapping required chars we apply the canonical ordering
2800
2801 foreach ($input as $v) {
2802 // Map to nothing == skip that code point
2803 if (in_array($v, self::$_np_map_nothing)) {
2804 continue;
2805 }
2806
2807 // Try to find prohibited input
2808 if (in_array($v, self::$_np_prohibit) || in_array($v, self::$_general_prohibited)) {
2809 throw new Net_IDNA2_Exception_Nameprep('Prohibited input U+' . sprintf('%08X', $v));
2810 }
2811
2812 foreach (self::$_np_prohibit_ranges as $range) {
2813 if ($range[0] <= $v && $v <= $range[1]) {
2814 throw new Net_IDNA2_Exception_Nameprep('Prohibited input U+' . sprintf('%08X', $v));
2815 }
2816 }
2817
2818 // Hangul syllable decomposition
2819 if (0xAC00 <= $v && $v <= 0xD7AF) {
2820 foreach ($this->_hangulDecompose($v) as $out) {
2821 $output[] = $out;
2822 }
2823 } else if (($this->_version == '2003') && isset(self::$_np_replacemaps[$v])) {
2824 // There's a decomposition mapping for that code point
2825 // Decompositions only in version 2003 (original) of IDNA
2826 foreach ($this->_applyCannonicalOrdering(self::$_np_replacemaps[$v]) as $out) {
2827 $output[] = $out;
2828 }
2829 } else {
2830 $output[] = $v;
2831 }
2832 }
2833
2834 // Combine code points
2835
2836 $last_class = 0;
2837 $last_starter = 0;
2838 $out_len = count($output);
2839
2840 for ($i = 0; $i < $out_len; ++$i) {
2841 $class = $this->_getCombiningClass($output[$i]);
2842
2843 if ((!$last_class || $last_class != $class) && $class) {
2844 // Try to match
2845 $seq_len = $i - $last_starter;
2846 $out = $this->_combine(array_slice($output, $last_starter, $seq_len));
2847
2848 // On match: Replace the last starter with the composed character and remove
2849 // the now redundant non-starter(s)
2850 if ($out) {
2851 $output[$last_starter] = $out;
2852
2853 if (count($out) != $seq_len) {
2854 for ($j = $i + 1; $j < $out_len; ++$j) {
2855 $output[$j - 1] = $output[$j];
2856 }
2857
2858 unset($output[$out_len]);
2859 }
2860
2861 // Rewind the for loop by one, since there can be more possible compositions
2862 $i--;
2863 $out_len--;
2864 $last_class = ($i == $last_starter)? 0 : $this->_getCombiningClass($output[$i - 1]);
2865
2866 continue;
2867 }
2868 }
2869
2870 // The current class is 0
2871 if (!$class) {
2872 $last_starter = $i;
2873 }
2874
2875 $last_class = $class;
2876 }
2877
2878 return $output;
2879 }
2880
2881 /**
2882 * Decomposes a Hangul syllable
2883 * (see http://www.unicode.org/unicode/reports/tr15/#Hangul).
2884 *
2885 * @param integer $char 32bit UCS4 code point
2886 *
2887 * @return array Either Hangul Syllable decomposed or original 32bit
2888 * value as one value array
2889 * @access private
2890 */
2891 private function _hangulDecompose($char)
2892 {
2893 $sindex = $char - $this->_sbase;
2894
2895 if ($sindex < 0 || $sindex >= $this->_scount) {
2896 return array($char);
2897 }
2898
2899 $result = array();
2900 $T = $this->_tbase + $sindex % $this->_tcount;
2901 $result[] = (int)($this->_lbase + $sindex / $this->_ncount);
2902 $result[] = (int)($this->_vbase + ($sindex % $this->_ncount) / $this->_tcount);
2903
2904 if ($T != $this->_tbase) {
2905 $result[] = $T;
2906 }
2907
2908 return $result;
2909 }
2910
2911 /**
2912 * Ccomposes a Hangul syllable
2913 * (see http://www.unicode.org/unicode/reports/tr15/#Hangul).
2914 *
2915 * @param array $input Decomposed UCS4 sequence
2916 *
2917 * @return array UCS4 sequence with syllables composed
2918 * @access private
2919 */
2920 private function _hangulCompose($input)
2921 {
2922 $inp_len = count($input);
2923
2924 if (!$inp_len) {
2925 return array();
2926 }
2927
2928 $result = array();
2929 $last = $input[0];
2930 $result[] = $last; // copy first char from input to output
2931
2932 for ($i = 1; $i < $inp_len; ++$i) {
2933 $char = $input[$i];
2934
2935 // Find out, wether two current characters from L and V
2936 $lindex = $last - $this->_lbase;
2937
2938 if (0 <= $lindex && $lindex < $this->_lcount) {
2939 $vindex = $char - $this->_vbase;
2940
2941 if (0 <= $vindex && $vindex < $this->_vcount) {
2942 // create syllable of form LV
2943 $last = ($this->_sbase + ($lindex * $this->_vcount + $vindex) * $this->_tcount);
2944 $out_off = count($result) - 1;
2945 $result[$out_off] = $last; // reset last
2946
2947 // discard char
2948 continue;
2949 }
2950 }
2951
2952 // Find out, wether two current characters are LV and T
2953 $sindex = $last - $this->_sbase;
2954
2955 if (0 <= $sindex && $sindex < $this->_scount && ($sindex % $this->_tcount) == 0) {
2956 $tindex = $char - $this->_tbase;
2957
2958 if (0 <= $tindex && $tindex <= $this->_tcount) {
2959 // create syllable of form LVT
2960 $last += $tindex;
2961 $out_off = count($result) - 1;
2962 $result[$out_off] = $last; // reset last
2963
2964 // discard char
2965 continue;
2966 }
2967 }
2968
2969 // if neither case was true, just add the character
2970 $last = $char;
2971 $result[] = $char;
2972 }
2973
2974 return $result;
2975 }
2976
2977 /**
2978 * Returns the combining class of a certain wide char.
2979 *
2980 * @param integer $char Wide char to check (32bit integer)
2981 *
2982 * @return integer Combining class if found, else 0
2983 * @access private
2984 */
2985 private function _getCombiningClass($char)
2986 {
2987 return isset(self::$_np_norm_combcls[$char])? self::$_np_norm_combcls[$char] : 0;
2988 }
2989
2990 /**
2991 * Apllies the canonical ordering of a decomposed UCS4 sequence.
2992 *
2993 * @param array $input Decomposed UCS4 sequence
2994 *
2995 * @return array Ordered USC4 sequence
2996 * @access private
2997 */
2998 private function _applyCannonicalOrdering($input)
2999 {
3000 $swap = true;
3001 $size = count($input);
3002
3003 while ($swap) {
3004 $swap = false;
3005 $last = $this->_getCombiningClass($input[0]);
3006
3007 for ($i = 0; $i < $size - 1; ++$i) {
3008 $next = $this->_getCombiningClass($input[$i + 1]);
3009
3010 if ($next != 0 && $last > $next) {
3011 // Move item leftward until it fits
3012 for ($j = $i + 1; $j > 0; --$j) {
3013 if ($this->_getCombiningClass($input[$j - 1]) <= $next) {
3014 break;
3015 }
3016
3017 $t = $input[$j];
3018 $input[$j] = $input[$j - 1];
3019 $input[$j - 1] = $t;
3020 $swap = 1;
3021 }
3022
3023 // Reentering the loop looking at the old character again
3024 $next = $last;
3025 }
3026
3027 $last = $next;
3028 }
3029 }
3030
3031 return $input;
3032 }
3033
3034 /**
3035 * Do composition of a sequence of starter and non-starter.
3036 *
3037 * @param array $input UCS4 Decomposed sequence
3038 *
3039 * @return array Ordered USC4 sequence
3040 * @access private
3041 */
3042 private function _combine($input)
3043 {
3044 $inp_len = count($input);
3045
3046 // Is it a Hangul syllable?
3047 if (1 != $inp_len) {
3048 $hangul = $this->_hangulCompose($input);
3049
3050 // This place is probably wrong
3051 if (count($hangul) != $inp_len) {
3052 return $hangul;
3053 }
3054 }
3055
3056 foreach (self::$_np_replacemaps as $np_src => $np_target) {
3057 if ($np_target[0] != $input[0]) {
3058 continue;
3059 }
3060
3061 if (count($np_target) != $inp_len) {
3062 continue;
3063 }
3064
3065 $hit = false;
3066
3067 foreach ($input as $k2 => $v2) {
3068 if ($v2 == $np_target[$k2]) {
3069 $hit = true;
3070 } else {
3071 $hit = false;
3072 break;
3073 }
3074 }
3075
3076 if ($hit) {
3077 return $np_src;
3078 }
3079 }
3080
3081 return false;
3082 }
3083
3084 /**
3085 * This converts an UTF-8 encoded string to its UCS-4 (array) representation
3086 * By talking about UCS-4 we mean arrays of 32bit integers representing
3087 * each of the "chars". This is due to PHP not being able to handle strings with
3088 * bit depth different from 8. This applies to the reverse method _ucs4_to_utf8(), too.
3089 * The following UTF-8 encodings are supported:
3090 *
3091 * bytes bits representation
3092 * 1 7 0xxxxxxx
3093 * 2 11 110xxxxx 10xxxxxx
3094 * 3 16 1110xxxx 10xxxxxx 10xxxxxx
3095 * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
3096 * 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3097 * 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3098 *
3099 * Each x represents a bit that can be used to store character data.
3100 *
3101 * @param string $input utf8-encoded string
3102 *
3103 * @return array ucs4-encoded array
3104 * @throws Exception
3105 * @access private
3106 */
3107 private function _utf8_to_ucs4($input)
3108 {
3109 $output = array();
3110 $out_len = 0;
3111 $inp_len = self::_byteLength($input, '8bit');
3112 $mode = 'next';
3113 $test = 'none';
3114 for ($k = 0; $k < $inp_len; ++$k) {
3115 $v = ord($input{$k}); // Extract byte from input string
3116
3117 if ($v < 128) { // We found an ASCII char - put into string as is
3118 $output[$out_len] = $v;
3119 ++$out_len;
3120 if ('add' == $mode) {
3121 throw new UnexpectedValueException('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
3122 }
3123 continue;
3124 }
3125 if ('next' == $mode) { // Try to find the next start byte; determine the width of the Unicode char
3126 $start_byte = $v;
3127 $mode = 'add';
3128 $test = 'range';
3129 if ($v >> 5 == 6) { // &110xxxxx 10xxxxx
3130 $next_byte = 0; // Tells, how many times subsequent bitmasks must rotate 6bits to the left
3131 $v = ($v - 192) << 6;
3132 } elseif ($v >> 4 == 14) { // &1110xxxx 10xxxxxx 10xxxxxx
3133 $next_byte = 1;
3134 $v = ($v - 224) << 12;
3135 } elseif ($v >> 3 == 30) { // &11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
3136 $next_byte = 2;
3137 $v = ($v - 240) << 18;
3138 } elseif ($v >> 2 == 62) { // &111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3139 $next_byte = 3;
3140 $v = ($v - 248) << 24;
3141 } elseif ($v >> 1 == 126) { // &1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
3142 $next_byte = 4;
3143 $v = ($v - 252) << 30;
3144 } else {
3145 throw new UnexpectedValueException('This might be UTF-8, but I don\'t understand it at byte '.$k);
3146 }
3147 if ('add' == $mode) {
3148 $output[$out_len] = (int) $v;
3149 ++$out_len;
3150 continue;
3151 }
3152 }
3153 if ('add' == $mode) {
3154 if (!$this->_allow_overlong && $test == 'range') {
3155 $test = 'none';
3156 if (($v < 0xA0 && $start_byte == 0xE0) || ($v < 0x90 && $start_byte == 0xF0) || ($v > 0x8F && $start_byte == 0xF4)) {
3157 throw new OutOfRangeException('Bogus UTF-8 character detected (out of legal range) at byte '.$k);
3158 }
3159 }
3160 if ($v >> 6 == 2) { // Bit mask must be 10xxxxxx
3161 $v = ($v - 128) << ($next_byte * 6);
3162 $output[($out_len - 1)] += $v;
3163 --$next_byte;
3164 } else {
3165 throw new UnexpectedValueException('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
3166 }
3167 if ($next_byte < 0) {
3168 $mode = 'next';
3169 }
3170 }
3171 } // for
3172 return $output;
3173 }
3174
3175 /**
3176 * Convert UCS-4 array into UTF-8 string
3177 *
3178 * @param array $input ucs4-encoded array
3179 *
3180 * @return string utf8-encoded string
3181 * @throws Exception
3182 * @access private
3183 */
3184 private function _ucs4_to_utf8($input)
3185 {
3186 $output = '';
3187
3188 foreach ($input as $v) {
3189 // $v = ord($v);
3190
3191 if ($v < 128) {
3192 // 7bit are transferred literally
3193 $output .= chr($v);
3194 } else if ($v < 1 << 11) {
3195 // 2 bytes
3196 $output .= chr(192 + ($v >> 6))
3197 . chr(128 + ($v & 63));
3198 } else if ($v < 1 << 16) {
3199 // 3 bytes
3200 $output .= chr(224 + ($v >> 12))
3201 . chr(128 + (($v >> 6) & 63))
3202 . chr(128 + ($v & 63));
3203 } else if ($v < 1 << 21) {
3204 // 4 bytes
3205 $output .= chr(240 + ($v >> 18))
3206 . chr(128 + (($v >> 12) & 63))
3207 . chr(128 + (($v >> 6) & 63))
3208 . chr(128 + ($v & 63));
3209 } else if ($v < 1 << 26) {
3210 // 5 bytes
3211 $output .= chr(248 + ($v >> 24))
3212 . chr(128 + (($v >> 18) & 63))
3213 . chr(128 + (($v >> 12) & 63))
3214 . chr(128 + (($v >> 6) & 63))
3215 . chr(128 + ($v & 63));
3216 } else if ($v < 1 << 31) {
3217 // 6 bytes
3218 $output .= chr(252 + ($v >> 30))
3219 . chr(128 + (($v >> 24) & 63))
3220 . chr(128 + (($v >> 18) & 63))
3221 . chr(128 + (($v >> 12) & 63))
3222 . chr(128 + (($v >> 6) & 63))
3223 . chr(128 + ($v & 63));
3224 } else {
3225 throw new UnexpectedValueException('Conversion from UCS-4 to UTF-8 failed: malformed input');
3226 }
3227 }
3228
3229 return $output;
3230 }
3231
3232 /**
3233 * Convert UCS-4 array into UCS-4 string
3234 *
3235 * @param array $input ucs4-encoded array
3236 *
3237 * @return string ucs4-encoded string
3238 * @throws Exception
3239 * @access private
3240 */
3241 private function _ucs4_to_ucs4_string($input)
3242 {
3243 $output = '';
3244 // Take array values and split output to 4 bytes per value
3245 // The bit mask is 255, which reads &11111111
3246 foreach ($input as $v) {
3247 $output .= ($v & (255 << 24) >> 24) . ($v & (255 << 16) >> 16) . ($v & (255 << 8) >> 8) . ($v & 255);
3248 }
3249 return $output;
3250 }
3251
3252 /**
3253 * Convert UCS-4 string into UCS-4 array
3254 *
3255 * @param string $input ucs4-encoded string
3256 *
3257 * @return array ucs4-encoded array
3258 * @throws InvalidArgumentException
3259 * @access private
3260 */
3261 private function _ucs4_string_to_ucs4($input)
3262 {
3263 $output = array();
3264
3265 $inp_len = self::_byteLength($input);
3266 // Input length must be dividable by 4
3267 if ($inp_len % 4) {
3268 throw new InvalidArgumentException('Input UCS4 string is broken');
3269 }
3270
3271 // Empty input - return empty output
3272 if (!$inp_len) {
3273 return $output;
3274 }
3275
3276 for ($i = 0, $out_len = -1; $i < $inp_len; ++$i) {
3277 // Increment output position every 4 input bytes
3278 if (!$i % 4) {
3279 $out_len++;
3280 $output[$out_len] = 0;
3281 }
3282 $output[$out_len] += ord($input{$i}) << (8 * (3 - ($i % 4) ) );
3283 }
3284 return $output;
3285 }
3286
3287 /**
3288 * Echo hex representation of UCS4 sequence.
3289 *
3290 * @param array $input UCS4 sequence
3291 * @param boolean $include_bit Include bitmask in output
3292 *
3293 * @return void
3294 * @static
3295 * @access private
3296 */
3297 private static function _showHex($input, $include_bit = false)
3298 {
3299 foreach ($input as $k => $v) {
3300 echo '[', $k, '] => ', sprintf('%X', $v);
3301
3302 if ($include_bit) {
3303 echo ' (', Net_IDNA2::_showBitmask($v), ')';
3304 }
3305
3306 echo "\n";
3307 }
3308 }
3309
3310 /**
3311 * Gives you a bit representation of given Byte (8 bits), Word (16 bits) or DWord (32 bits)
3312 * Output width is automagically determined
3313 *
3314 * @param int $octet ...
3315 *
3316 * @return string Bitmask-representation
3317 * @static
3318 * @access private
3319 */
3320 private static function _showBitmask($octet)
3321 {
3322 if ($octet >= (1 << 16)) {
3323 $w = 31;
3324 } else if ($octet >= (1 << 8)) {
3325 $w = 15;
3326 } else {
3327 $w = 7;
3328 }
3329
3330 $return = '';
3331
3332 for ($i = $w; $i > -1; $i--) {
3333 $return .= ($octet & (1 << $i))? '1' : '0';
3334 }
3335
3336 return $return;
3337 }
3338
3339 /**
3340 * Gets the length of a string in bytes even if mbstring function
3341 * overloading is turned on
3342 *
3343 * @param string $string the string for which to get the length.
3344 *
3345 * @return integer the length of the string in bytes.
3346 *
3347 * @see Net_IDNA2::$_mb_string_overload
3348 */
3349 private static function _byteLength($string)
3350 {
3351 if (self::$_mb_string_overload) {
3352 return mb_strlen($string, '8bit');
3353 }
3354 return strlen((binary)$string);
3355 }
3356
3357 // }}}}
3358
3359 // {{{ factory
3360 /**
3361 * Attempts to return a concrete IDNA instance for either php4 or php5.
3362 *
3363 * @param array $params Set of paramaters
3364 *
3365 * @return Net_IDNA2
3366 * @access public
3367 */
3368 public static function getInstance($params = array())
3369 {
3370 return new Net_IDNA2($params);
3371 }
3372 // }}}
3373
3374 // {{{ singleton
3375 /**
3376 * Attempts to return a concrete IDNA instance for either php4 or php5,
3377 * only creating a new instance if no IDNA instance with the same
3378 * parameters currently exists.
3379 *
3380 * @param array $params Set of parameters
3381 *
3382 * @return object Net_IDNA2
3383 * @access public
3384 */
3385 public static function singleton($params = array())
3386 {
3387 static $instances;
3388 if (!isset($instances)) {
3389 $instances = array();
3390 }
3391
3392 $signature = serialize($params);
3393 if (!isset($instances[$signature])) {
3394 $instances[$signature] = Net_IDNA2::getInstance($params);
3395 }
3396
3397 return $instances[$signature];
3398 }
3399 // }}}
3400 }
3401
3402 ?>