Mercurial > hg > xemacs-beta
comparison lisp/descr-text.el @ 5268:09f8ed0933c7
Avoid byte compiler warnings, some needless consing, descr-text.el
lisp/ChangeLog addition:
2010-09-16 Aidan Kehoe <kehoea@parhasard.net>
* descr-text.el (unidata-initialize-unicodedata-database)
(unidata-initialize-unihan-database, describe-char-unicode-data)
(describe-char-unicode-data):
Wrap calls to the database functions with (with-fboundp ...),
avoiding byte compile warnings on builds without support for the
database functions.
(describe-char): (reduce #'max ...), not (apply #'max ...), no
need to cons needlessly.
(describe-char): Remove a redundant lambda wrapping
#'extent-properties.
(describe-char-unicode-data): Call #'nsubst when replacing "" with
nil in the result of #'split-string, instead of consing inside
mapcar.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Thu, 16 Sep 2010 15:24:40 +0100 |
parents | b36d089cbed5 |
children | 174aed57a32a 308d34e9f07d |
comparison
equal
deleted
inserted
replaced
5267:668c73e222fd | 5268:09f8ed0933c7 |
---|---|
455 course of events UNIDATA-FILE-NAME is the value of | 455 course of events UNIDATA-FILE-NAME is the value of |
456 `unidata-default-file-name', which see. " | 456 `unidata-default-file-name', which see. " |
457 (check-argument-type #'file-readable-p unidata-file-name) | 457 (check-argument-type #'file-readable-p unidata-file-name) |
458 (unless unidata-database-format | 458 (unless unidata-database-format |
459 (error 'unimplemented "No (non-SQL) DB support available")) | 459 (error 'unimplemented "No (non-SQL) DB support available")) |
460 (let* ((database-format unidata-database-format) | 460 (with-fboundp '(open-database put-database close-database) |
461 (size (eighth (file-attributes unidata-file-name))) | 461 (let* ((database-format unidata-database-format) |
462 (database-file-name | 462 (size (eighth (file-attributes unidata-file-name))) |
463 (unidata-generate-database-file-name unidata-file-name | 463 (database-file-name |
464 size database-format)) | 464 (unidata-generate-database-file-name unidata-file-name |
465 (database-handle (open-database database-file-name database-format | 465 size database-format)) |
466 nil "rw+" #o644 'no-conversion-unix)) | 466 (database-handle (open-database database-file-name database-format |
467 (coding-system-for-read 'no-conversion-unix) | 467 nil "rw+" #o644 |
468 (buffer-size 32768) | 468 'no-conversion-unix)) |
469 (offset-start 0) | 469 (coding-system-for-read 'no-conversion-unix) |
470 (offset-end buffer-size) | 470 (buffer-size 32768) |
471 (range-information (make-range-table 'start-closed-end-closed)) | 471 (offset-start 0) |
472 (range-staging (make-hash-table :test 'equal)) | 472 (offset-end buffer-size) |
473 (message "Initializing UnicodeData database cache: ") | 473 (range-information (make-range-table 'start-closed-end-closed)) |
474 (loop-count 1) | 474 (range-staging (make-hash-table :test 'equal)) |
475 range-startinfo) | 475 (message "Initializing UnicodeData database cache: ") |
476 (with-temp-buffer | 476 (loop-count 1) |
477 range-startinfo) | |
478 (with-temp-buffer | |
479 (progress-feedback-with-label 'describe-char-unicodedata-file | |
480 "%s" 0 message) | |
481 (while (progn | |
482 (delete-region (point-min) (point-max)) | |
483 (insert-file-contents unidata-file-name nil | |
484 offset-start offset-end) | |
485 ;; If we've reached the end of the data, pass nil back to | |
486 ;; the while loop test. | |
487 (not (= (point-min) (point-max)))) | |
488 | |
489 (when (= buffer-size (- (point-max) (point-min))) | |
490 ;; If we're in the body of the file, and there's a trailing | |
491 ;; incomplete end-line, delete it, and adjust offset-end | |
492 ;; appropriately. | |
493 (goto-char (point-max)) | |
494 (search-backward "\n") | |
495 (forward-char) | |
496 (delete-region (point) (point-max)) | |
497 (setq offset-end (+ offset-start (- (point) (point-min))))) | |
498 | |
499 (progress-feedback-with-label 'describe-char-unicodedata-file | |
500 "%s" (truncate | |
501 (* (/ offset-start size) 100)) | |
502 (concat message | |
503 (make-string | |
504 (mod loop-count 39) ?.))) | |
505 (incf loop-count) | |
506 (goto-char (point-min)) | |
507 (while (re-search-forward | |
508 #r"^\([0-9A-F]\{4,6\}\);\([^;]*\);.*$" nil t) | |
509 (cond | |
510 ((and (> (- (match-end 2) (match-beginning 2)) 7) | |
511 (equal (substring (match-string 2) -7) | |
512 " First>")) | |
513 ;; Start of a range. Save the start info in range-staging. | |
514 (puthash (substring (match-string 2) 0 -7) | |
515 (list (string-to-int (match-string 1) 16) | |
516 (+ offset-start (1- (match-beginning 0)))) | |
517 range-staging)) | |
518 ((and (> (- (match-end 2) (match-beginning 2)) 7) | |
519 (equal (substring (match-string 2) -6) | |
520 " Last>")) | |
521 ;; End of a range. Combine with the start info, save it to the | |
522 ;; range-information range table. | |
523 (setq range-startinfo | |
524 (gethash (substring (match-string 2) 0 -6) range-staging)) | |
525 (assert range-startinfo nil | |
526 "Unexpected order for range information.") | |
527 (put-range-table | |
528 (first range-startinfo) | |
529 (string-to-int (match-string 1) 16) | |
530 (list (second range-startinfo) | |
531 (+ offset-start (1- (match-end 0)))) | |
532 range-information) | |
533 (remhash (substring (match-string 2) 0 -6) range-staging)) | |
534 (t | |
535 ;; Normal character. Save the associated information in the | |
536 ;; database directly. | |
537 (put-database (match-string 1) | |
538 (format "(%d %d)" | |
539 (+ offset-start (1- (match-beginning 0))) | |
540 (+ offset-start (1- (match-end 0)))) | |
541 database-handle)))) | |
542 (goto-char (point-min)) | |
543 (setq offset-start offset-end | |
544 offset-end (+ buffer-size offset-end)))) | |
545 ;; Save the range information as such in the database. | |
546 (put-database "range-information" | |
547 (let ((print-readably t)) | |
548 (prin1-to-string range-information)) | |
549 database-handle) | |
550 (close-database database-handle) | |
477 (progress-feedback-with-label 'describe-char-unicodedata-file | 551 (progress-feedback-with-label 'describe-char-unicodedata-file |
478 "%s" 0 message) | 552 "%s" 100 message) |
479 (while (progn | 553 database-file-name))) |
480 (delete-region (point-min) (point-max)) | |
481 (insert-file-contents unidata-file-name nil | |
482 offset-start offset-end) | |
483 ;; If we've reached the end of the data, pass nil back to | |
484 ;; the while loop test. | |
485 (not (= (point-min) (point-max)))) | |
486 | |
487 (when (= buffer-size (- (point-max) (point-min))) | |
488 ;; If we're in the body of the file, and there's a trailing | |
489 ;; incomplete end-line, delete it, and adjust offset-end | |
490 ;; appropriately. | |
491 (goto-char (point-max)) | |
492 (search-backward "\n") | |
493 (forward-char) | |
494 (delete-region (point) (point-max)) | |
495 (setq offset-end (+ offset-start (- (point) (point-min))))) | |
496 | |
497 (progress-feedback-with-label 'describe-char-unicodedata-file | |
498 "%s" (truncate | |
499 (* (/ offset-start size) 100)) | |
500 (concat message | |
501 (make-string | |
502 (mod loop-count 39) ?.))) | |
503 (incf loop-count) | |
504 (goto-char (point-min)) | |
505 (while (re-search-forward | |
506 #r"^\([0-9A-F]\{4,6\}\);\([^;]*\);.*$" nil t) | |
507 (cond | |
508 ((and (> (- (match-end 2) (match-beginning 2)) 7) | |
509 (equal (substring (match-string 2) -7) | |
510 " First>")) | |
511 ;; Start of a range. Save the start info in range-staging. | |
512 (puthash (substring (match-string 2) 0 -7) | |
513 (list (string-to-int (match-string 1) 16) | |
514 (+ offset-start (1- (match-beginning 0)))) | |
515 range-staging)) | |
516 ((and (> (- (match-end 2) (match-beginning 2)) 7) | |
517 (equal (substring (match-string 2) -6) | |
518 " Last>")) | |
519 ;; End of a range. Combine with the start info, save it to the | |
520 ;; range-information range table. | |
521 (setq range-startinfo | |
522 (gethash (substring (match-string 2) 0 -6) range-staging)) | |
523 (assert range-startinfo nil | |
524 "Unexpected order for range information.") | |
525 (put-range-table | |
526 (first range-startinfo) | |
527 (string-to-int (match-string 1) 16) | |
528 (list (second range-startinfo) | |
529 (+ offset-start (1- (match-end 0)))) | |
530 range-information) | |
531 (remhash (substring (match-string 2) 0 -6) range-staging)) | |
532 (t | |
533 ;; Normal character. Save the associated information in the | |
534 ;; database directly. | |
535 (put-database (match-string 1) | |
536 (format "(%d %d)" | |
537 (+ offset-start (1- (match-beginning 0))) | |
538 (+ offset-start (1- (match-end 0)))) | |
539 database-handle)))) | |
540 (goto-char (point-min)) | |
541 (setq offset-start offset-end | |
542 offset-end (+ buffer-size offset-end)))) | |
543 ;; Save the range information as such in the database. | |
544 (put-database "range-information" | |
545 (let ((print-readably t)) | |
546 (prin1-to-string range-information)) | |
547 database-handle) | |
548 (close-database database-handle) | |
549 (progress-feedback-with-label 'describe-char-unicodedata-file | |
550 "%s" 100 message) | |
551 database-file-name)) | |
552 | 554 |
553 (defun unidata-initialize-unihan-database (unihan-file-name) | 555 (defun unidata-initialize-unihan-database (unihan-file-name) |
554 "Init the berkeley or gdbm lookup table for UNIHAN-FILE-NAME. | 556 "Init the berkeley or gdbm lookup table for UNIHAN-FILE-NAME. |
555 | 557 |
556 The table is a (non-SQL) database with information on the file offset of | 558 The table is a (non-SQL) database with information on the file offset of |
560 slow, so creating this cache makes it reasonable to display Unihan info in | 562 slow, so creating this cache makes it reasonable to display Unihan info in |
561 the output of \\[universal-argument] \\[what-cursor-position] . " | 563 the output of \\[universal-argument] \\[what-cursor-position] . " |
562 (check-argument-type #'file-readable-p unihan-file-name) | 564 (check-argument-type #'file-readable-p unihan-file-name) |
563 (unless unidata-database-format | 565 (unless unidata-database-format |
564 (error 'unimplemented "No (non-SQL) DB support available")) | 566 (error 'unimplemented "No (non-SQL) DB support available")) |
565 (let* ((database-format unidata-database-format) | 567 (with-fboundp '(open-database put-database close-database) |
566 (size (eighth (file-attributes unihan-file-name))) | 568 (let* ((database-format unidata-database-format) |
567 (database-file-name | 569 (size (eighth (file-attributes unihan-file-name))) |
568 (unidata-generate-database-file-name unihan-file-name | 570 (database-file-name |
569 size database-format)) | 571 (unidata-generate-database-file-name unihan-file-name |
570 (database-handle (open-database database-file-name database-format | 572 size database-format)) |
571 nil "rw+" #o644 'no-conversion-unix)) | 573 (database-handle (open-database database-file-name database-format |
572 (coding-system-for-read 'no-conversion-unix) | 574 nil "rw+" #o644 |
573 (buffer-size 65536) | 575 'no-conversion-unix)) |
574 (offset-start 0) | 576 (coding-system-for-read 'no-conversion-unix) |
575 (offset-end buffer-size) | 577 (buffer-size 65536) |
576 (message "Initializing Unihan database cache: ") | 578 (offset-start 0) |
577 (loop-count 1) | 579 (offset-end buffer-size) |
578 trailing-unicode leading-unicode character-start character-end) | 580 (message "Initializing Unihan database cache: ") |
579 (with-temp-buffer | 581 (loop-count 1) |
582 trailing-unicode leading-unicode character-start character-end) | |
583 (with-temp-buffer | |
584 (progress-feedback-with-label 'describe-char-unihan-file | |
585 "%s" 0 message) | |
586 (while (progn | |
587 (delete-region (point-min) (point-max)) | |
588 (insert-file-contents unihan-file-name nil | |
589 offset-start offset-end) | |
590 ;; If we've reached the end of the data, return nil to the | |
591 ;; while. | |
592 (not (= (point-min) (point-max)))) | |
593 | |
594 (incf loop-count) | |
595 (progress-feedback-with-label 'describe-char-unihan-file | |
596 "%s" (truncate | |
597 (* (/ offset-start size) 100)) | |
598 (concat message | |
599 (make-string | |
600 (mod loop-count 44) ?.))) | |
601 (block 'dealing-with-chars | |
602 (when (= buffer-size (- (point-max) (point-min))) | |
603 ;; If we're in the body of the file, we need to delete the | |
604 ;; character info for the last character, and set offset-end | |
605 ;; appropriately. Otherwise, we may not be able to pick where | |
606 ;; the actual description of a character ends and begins. | |
607 ;; | |
608 ;; This breaks if any single Unihan character description is | |
609 ;; greater than the buffer size in length. | |
610 (goto-char (point-max)) | |
611 (beginning-of-line) | |
612 | |
613 (when (< (- (point-max) (point)) (eval-when-compile | |
614 (length "U+ABCDEF\t"))) | |
615 ;; If the character ID of the last line may have been cut off, | |
616 ;; we need to delete all of that line here. | |
617 (delete-region (point) (point-max)) | |
618 (forward-line -1)) | |
619 | |
620 (when (looking-at "^\\(U\\+[0-9A-F]\\{4,6\\}\\)\t") | |
621 (setq trailing-unicode (match-string 1) | |
622 trailing-unicode | |
623 (format "^%s\t" (regexp-quote trailing-unicode))) | |
624 | |
625 (end-of-line) | |
626 | |
627 ;; Go back until we hit a line that doesn't start with this | |
628 ;; character info. | |
629 (while (re-search-backward trailing-unicode nil t)) | |
630 | |
631 ;; The re-search-backward failed, so point is still at the end | |
632 ;; of the last match. Move to its beginning. | |
633 (beginning-of-line) | |
634 (delete-region (point) (point-max)) | |
635 (setq offset-end (+ offset-start (- (point) (point-min)))))) | |
636 (goto-char (point-min)) | |
637 (while t | |
638 (when (= (point) (point-max)) | |
639 ;; We're at the end of this part of the file. | |
640 (return-from 'dealing-with-chars)) | |
641 | |
642 (unless (re-search-forward "^\\(U\\+[0-9A-F]\\{4,6\\}\\)\t" | |
643 nil t) | |
644 ;; We're probably in the comments at the start of the | |
645 ;; file. No need to look for character info. | |
646 (return-from 'dealing-with-chars)) | |
647 | |
648 ;; Store where the character started. | |
649 (beginning-of-line) | |
650 (setq character-start (point)) | |
651 | |
652 (setq leading-unicode | |
653 (format "^%s\t" (regexp-quote (match-string 1)))) | |
654 | |
655 ;; Loop until we get past this entry. | |
656 (while (re-search-forward leading-unicode nil t)) | |
657 | |
658 ;; Now, store the information. | |
659 (setq leading-unicode | |
660 (string-to-number (substring leading-unicode 3) 16) | |
661 leading-unicode (format "%04X" leading-unicode) | |
662 character-end (prog2 (end-of-line) (point))) | |
663 (put-database leading-unicode | |
664 (format "(%d %d)" | |
665 (+ offset-start (1- character-start)) | |
666 (+ offset-start (1- character-end))) | |
667 database-handle) | |
668 (forward-line))) | |
669 (setq offset-start offset-end | |
670 offset-end (+ buffer-size offset-end)))) | |
671 (close-database database-handle) | |
580 (progress-feedback-with-label 'describe-char-unihan-file | 672 (progress-feedback-with-label 'describe-char-unihan-file |
581 "%s" 0 message) | 673 "%s" 100 |
582 (while (progn | 674 message) |
583 (delete-region (point-min) (point-max)) | 675 database-file-name))) |
584 (insert-file-contents unihan-file-name nil | |
585 offset-start offset-end) | |
586 ;; If we've reached the end of the data, return nil to the | |
587 ;; while. | |
588 (not (= (point-min) (point-max)))) | |
589 | |
590 (incf loop-count) | |
591 (progress-feedback-with-label 'describe-char-unihan-file | |
592 "%s" (truncate | |
593 (* (/ offset-start size) 100)) | |
594 (concat message | |
595 (make-string | |
596 (mod loop-count 44) ?.))) | |
597 (block 'dealing-with-chars | |
598 (when (= buffer-size (- (point-max) (point-min))) | |
599 ;; If we're in the body of the file, we need to delete the | |
600 ;; character info for the last character, and set offset-end | |
601 ;; appropriately. Otherwise, we may not be able to pick where | |
602 ;; the actual description of a character ends and | |
603 ;; begins. | |
604 ;; | |
605 ;; This breaks if any single Unihan character description is | |
606 ;; greater than the buffer size in length. | |
607 (goto-char (point-max)) | |
608 (beginning-of-line) | |
609 | |
610 (when (< (- (point-max) (point)) (eval-when-compile | |
611 (length "U+ABCDEF\t"))) | |
612 ;; If the character ID of the last line may have been cut off, | |
613 ;; we need to delete all of that line here. | |
614 (delete-region (point) (point-max)) | |
615 (forward-line -1)) | |
616 | |
617 (when (looking-at "^\\(U\\+[0-9A-F]\\{4,6\\}\\)\t") | |
618 (setq trailing-unicode (match-string 1) | |
619 trailing-unicode | |
620 (format "^%s\t" (regexp-quote trailing-unicode))) | |
621 | |
622 (end-of-line) | |
623 | |
624 ;; Go back until we hit a line that doesn't start with this | |
625 ;; character info. | |
626 (while (re-search-backward trailing-unicode nil t)) | |
627 | |
628 ;; The re-search-backward failed, so point is still at the end | |
629 ;; of the last match. Move to its beginning. | |
630 (beginning-of-line) | |
631 (delete-region (point) (point-max)) | |
632 (setq offset-end (+ offset-start (- (point) (point-min)))))) | |
633 (goto-char (point-min)) | |
634 (while t | |
635 (when (= (point) (point-max)) | |
636 ;; We're at the end of this part of the file. | |
637 (return-from 'dealing-with-chars)) | |
638 | |
639 (unless (re-search-forward "^\\(U\\+[0-9A-F]\\{4,6\\}\\)\t" | |
640 nil t) | |
641 ;; We're probably in the comments at the start of the file. No | |
642 ;; need to look for character info. | |
643 (return-from 'dealing-with-chars)) | |
644 | |
645 ;; Store where the character started. | |
646 (beginning-of-line) | |
647 (setq character-start (point)) | |
648 | |
649 (setq leading-unicode | |
650 (format "^%s\t" (regexp-quote (match-string 1)))) | |
651 | |
652 ;; Loop until we get past this entry. | |
653 (while (re-search-forward leading-unicode nil t)) | |
654 | |
655 ;; Now, store the information. | |
656 (setq leading-unicode | |
657 (string-to-number (substring leading-unicode 3) 16) | |
658 leading-unicode (format "%04X" leading-unicode) | |
659 character-end (prog2 (end-of-line) (point))) | |
660 (put-database leading-unicode | |
661 (format "(%d %d)" | |
662 (+ offset-start (1- character-start)) | |
663 (+ offset-start (1- character-end))) | |
664 database-handle) | |
665 (forward-line))) | |
666 (setq offset-start offset-end | |
667 offset-end (+ buffer-size offset-end)))) | |
668 (close-database database-handle) | |
669 (progress-feedback-with-label 'describe-char-unihan-file | |
670 "%s" 100 | |
671 message) | |
672 database-file-name)) | |
673 ;; End XEmacs additions. | 676 ;; End XEmacs additions. |
674 | 677 |
675 (defun describe-char-unicode-data (char) | 678 (defun describe-char-unicode-data (char) |
676 "Return a list of Unicode data for unicode CHAR. | 679 "Return a list of Unicode data for unicode CHAR. |
677 Each element is a list of a property description and the property value. | 680 Each element is a list of a property description and the property value. |
686 (when (characterp char) | 689 (when (characterp char) |
687 (setq char (encode-char char 'ucs))) | 690 (setq char (encode-char char 'ucs))) |
688 (with-temp-buffer | 691 (with-temp-buffer |
689 (let ((coding-system-for-read coding-system-for-read) | 692 (let ((coding-system-for-read coding-system-for-read) |
690 database-handle key lookup) | 693 database-handle key lookup) |
691 (if (and describe-char-use-cache | 694 (with-fboundp '(open-database get-database close-database) |
692 (prog1 | 695 (if (and describe-char-use-cache |
693 (setq database-handle | 696 (prog1 |
694 (open-database | 697 (setq database-handle |
695 (unidata-generate-database-file-name | 698 (open-database |
696 describe-char-unicodedata-file | 699 (unidata-generate-database-file-name |
697 (eighth (file-attributes | 700 describe-char-unicodedata-file |
698 describe-char-unicodedata-file)) | 701 (eighth (file-attributes |
699 unidata-database-format) | 702 describe-char-unicodedata-file)) |
700 unidata-database-format | 703 unidata-database-format) |
701 nil "r" | 704 unidata-database-format |
702 #o644 'no-conversion-unix)) | 705 nil "r" |
703 (unless database-handle | 706 #o644 'no-conversion-unix)) |
704 (warn "Could not open %s as a %s database" | 707 (unless database-handle |
705 (unidata-generate-database-file-name | 708 (warn "Could not open %s as a %s database" |
706 describe-char-unicodedata-file | 709 (unidata-generate-database-file-name |
707 (eighth (file-attributes | 710 describe-char-unicodedata-file |
708 describe-char-unicodedata-file)) | 711 (eighth (file-attributes |
709 unidata-database-format) | 712 describe-char-unicodedata-file)) |
710 unidata-database-format)))) | 713 unidata-database-format) |
711 (progn | 714 unidata-database-format)))) |
712 ;; Use the database info. | 715 (progn |
713 (setq coding-system-for-read 'no-conversion-unix | 716 ;; Use the database info. |
714 key (format "%04X" char) | 717 (setq coding-system-for-read 'no-conversion-unix |
715 lookup (get-database key database-handle)) | 718 key (format "%04X" char) |
716 (if lookup | 719 lookup (get-database key database-handle)) |
717 ;; Okay, we have information on that character in particular. | 720 (if lookup |
718 (progn (setq lookup (read lookup)) | 721 ;; Okay, we have information on that character in |
719 (insert-file-contents describe-char-unicodedata-file | 722 ;; particular. |
720 nil (first lookup) | 723 (progn (setq lookup (read lookup)) |
721 (second lookup))) | 724 (insert-file-contents describe-char-unicodedata-file |
722 ;; No information on that character in particular. Do we | 725 nil (first lookup) |
723 ;; have range information? If so, load and check for our | 726 (second lookup))) |
724 ;; desired character. | 727 ;; No information on that character in particular. Do we |
725 (setq lookup (get-database "range-information" database-handle) | 728 ;; have range information? If so, load and check for our |
726 lookup (if lookup (read lookup)) | 729 ;; desired character. |
727 lookup (if lookup (get-range-table char lookup))) | 730 (setq lookup (get-database "range-information" |
728 (when lookup | 731 database-handle) |
729 (insert-file-contents describe-char-unicodedata-file nil | 732 lookup (if lookup (read lookup)) |
730 (first lookup) (second lookup)))) | 733 lookup (if lookup (get-range-table char lookup))) |
731 (close-database database-handle)) | 734 (when lookup |
732 ;; Otherwise, insert the whole file (the FSF approach). | 735 (insert-file-contents describe-char-unicodedata-file nil |
733 (set-buffer (get-buffer-create " *Unicode Data*")) | 736 (first lookup) (second lookup)))) |
734 (when (zerop (buffer-size)) | 737 (close-database database-handle)) |
735 ;; Don't use -literally in case of DOS line endings. | 738 ;; Otherwise, insert the whole file (the FSF approach). |
736 (insert-file-contents describe-char-unicodedata-file)))) | 739 (set-buffer (get-buffer-create " *Unicode Data*")) |
740 (when (zerop (buffer-size)) | |
741 ;; Don't use -literally in case of DOS line endings. | |
742 (insert-file-contents describe-char-unicodedata-file))))) | |
737 (goto-char (point-min)) | 743 (goto-char (point-min)) |
738 (let ((hex (format "%04X" char)) | 744 (let ((hex (format "%04X" char)) |
739 found first last unihan-match unihan-info unihan-database-handle | 745 found first last unihan-match unihan-info unihan-database-handle |
740 (coding-system-for-read 'no-conversion-unix)) | 746 (coding-system-for-read 'no-conversion-unix)) |
741 (if (re-search-forward (concat "^" hex) nil t) | 747 (if (re-search-forward (concat "^" hex) nil t) |
753 (string-to-number (match-string 1) 16)))))) | 759 (string-to-number (match-string 1) 16)))))) |
754 (if (and first (>= char first) | 760 (if (and first (>= char first) |
755 last (<= char last)) | 761 last (<= char last)) |
756 (setq found t))) | 762 (setq found t))) |
757 (if found | 763 (if found |
758 (let ((fields (mapcar (lambda (elt) | 764 (let ((fields (cdr (nsubst nil "" (split-string |
759 (if (> (length elt) 0) | 765 (buffer-substring |
760 elt)) | 766 (line-beginning-position) |
761 (cdr (split-string | 767 (line-end-position)) ";") |
762 (buffer-substring | 768 :test 'equal)))) |
763 (line-beginning-position) | |
764 (line-end-position)) | |
765 ";"))))) | |
766 ;; The length depends on whether the last field was empty. | 769 ;; The length depends on whether the last field was empty. |
767 (unless (or (= 13 (length fields)) | 770 (unless (or (= 13 (length fields)) |
768 (= 14 (length fields))) | 771 (= 14 (length fields))) |
769 (error 'invalid-argument | 772 (error 'invalid-argument |
770 (format "Invalid contents in %s" | 773 (format "Invalid contents in %s" |
917 ;; the Unihan information, or tell the user that it's not | 920 ;; the Unihan information, or tell the user that it's not |
918 ;; available. | 921 ;; available. |
919 (if (and (> (length (nth 0 fields)) 13) | 922 (if (and (> (length (nth 0 fields)) 13) |
920 (equal "<CJK Ideograph" | 923 (equal "<CJK Ideograph" |
921 (substring (nth 0 fields) 0 14))) | 924 (substring (nth 0 fields) 0 14))) |
922 (if (and describe-char-unihan-file | 925 (with-fboundp '(open-database get-database close-database) |
923 (setq unihan-database-handle | 926 (if (and describe-char-unihan-file |
924 (open-database | 927 (setq unihan-database-handle |
925 (unidata-generate-database-file-name | 928 (open-database |
926 describe-char-unihan-file | 929 (unidata-generate-database-file-name |
927 (eighth (file-attributes | 930 describe-char-unihan-file |
928 describe-char-unihan-file)) | 931 (eighth (file-attributes |
929 unidata-database-format) | 932 describe-char-unihan-file)) |
930 unidata-database-format | 933 unidata-database-format) |
931 nil "r" #o644 'no-conversion-unix)) | 934 unidata-database-format |
932 (setq unihan-match | 935 nil "r" #o644 'no-conversion-unix)) |
933 (get-database (format "%04X" char) | 936 (setq unihan-match |
934 unihan-database-handle) | 937 (get-database (format "%04X" char) |
935 unihan-match | 938 unihan-database-handle) |
936 (and unihan-match (read unihan-match)))) | 939 unihan-match |
937 (with-temp-buffer | 940 (and unihan-match (read unihan-match)))) |
938 (insert-file-contents describe-char-unihan-file | 941 (with-temp-buffer |
939 nil (first unihan-match) | 942 (insert-file-contents describe-char-unihan-file |
940 (second unihan-match)) | 943 nil (first unihan-match) |
941 (goto-char (point-min)) | 944 (second unihan-match)) |
942 (while (re-search-forward | 945 (goto-char (point-min)) |
943 "^U\\+[0-9A-F]+\t\\(k[^\t]+\\)\t\\(.*\\)$" | 946 (while (re-search-forward |
944 nil t) | 947 "^U\\+[0-9A-F]+\t\\(k[^\t]+\\)\t\\(.*\\)$" |
945 (push | 948 nil t) |
946 (list | 949 (push |
947 (or (gethash | 950 (list |
948 (match-string 1) | 951 (or (gethash |
949 describe-char-unihan-field-descriptions) | 952 (match-string 1) |
950 (match-string 1)) | 953 describe-char-unihan-field-descriptions) |
951 (decode-coding-string (match-string 2) 'utf-8)) | 954 (match-string 1)) |
952 unihan-info)) | 955 (decode-coding-string (match-string 2) 'utf-8)) |
953 (close-database unihan-database-handle) | 956 unihan-info)) |
954 unihan-info) | 957 (close-database unihan-database-handle) |
958 unihan-info) | |
955 ;; It's a Han character, but Unihan.txt is not | 959 ;; It's a Han character, but Unihan.txt is not |
956 ;; available. Tell the user. | 960 ;; available. Tell the user. |
957 (list | 961 (list |
958 '("Unihan" | 962 '("Unihan" |
959 "No Unihan information available; is \ | 963 "No Unihan information available; is \ |
960 `describe-char-unihan-file' set, and its cache initialized?"))))))))))) | 964 `describe-char-unihan-file' set, and its cache initialized?")))))))))))) |
961 | 965 |
962 ;; Return information about how CHAR is displayed at the buffer | 966 ;; Return information about how CHAR is displayed at the buffer |
963 ;; position POS. If the selected frame is on a graphic display, | 967 ;; position POS. If the selected frame is on a graphic display, |
964 ;; return a cons (FONTNAME . GLYPH-CODE). Otherwise, return a string | 968 ;; return a cons (FONTNAME . GLYPH-CODE). Otherwise, return a string |
965 ;; describing the terminal codes for the character. | 969 ;; describing the terminal codes for the character. |
1028 (component-chars nil) | 1032 (component-chars nil) |
1029 (display-table | 1033 (display-table |
1030 (specifier-instance current-display-table (selected-window))) | 1034 (specifier-instance current-display-table (selected-window))) |
1031 (disp-table-entry (and display-table | 1035 (disp-table-entry (and display-table |
1032 (get-display-table char display-table))) | 1036 (get-display-table char display-table))) |
1033 (extents (mapcar #'(lambda (o) (extent-properties o)) | 1037 (extents (mapcar #'extent-properties (extents-at pos))) |
1034 (extents-at pos))) | |
1035 (char-description (single-key-description char)) | 1038 (char-description (single-key-description char)) |
1036 (text-props-desc | 1039 (text-props-desc |
1037 (let ((tmp-buf (generate-new-buffer " *text-props*"))) | 1040 (let ((tmp-buf (generate-new-buffer " *text-props*"))) |
1038 (unwind-protect | 1041 (unwind-protect |
1039 (progn | 1042 (progn |
1200 ,@(progn | 1203 ,@(progn |
1201 (setq unicodedata (and unicode | 1204 (setq unicodedata (and unicode |
1202 (describe-char-unicode-data unicode))) | 1205 (describe-char-unicode-data unicode))) |
1203 (if unicodedata | 1206 (if unicodedata |
1204 (cons (list "Unicode data" " ") unicodedata))))) | 1207 (cons (list "Unicode data" " ") unicodedata))))) |
1205 (setq max-width (apply #'max (mapcar #'(lambda (x) | 1208 (setq max-width |
1206 (if (cadr x) (length (car x)) 0)) | 1209 (reduce #'max (remove-if-not #'cadr item-list) :initial-value 0 |
1207 item-list))) | 1210 :key #'(lambda (object) (length (car object))))) |
1208 (when (and unicodedata (> max-width max-unicode-description-width)) | 1211 (when (and unicodedata (> max-width max-unicode-description-width)) |
1209 (setq max-width max-unicode-description-width) | 1212 (setq max-width max-unicode-description-width) |
1210 (with-temp-buffer | 1213 (with-temp-buffer |
1211 (let ((fill-column max-unicode-description-width) | 1214 (let ((fill-column max-unicode-description-width) |
1212 (indent-tabs-mode nil)) | 1215 (indent-tabs-mode nil)) |