Mercurial > hg > xemacs-beta
view tests/automated/query-coding-tests.el @ 4596:4fc32a3a086e
Fix a couple of bugs, #'query-coding-region, #'query-coding-string.
2009-02-04 Aidan Kehoe <kehoea@parhasard.net>
* coding.el (query-coding-region):
Revert this to being a defun, add a compiler macro without
needless binding.
(query-coding-string):
Correct a bug here, string indices are zero- not one-based.
* mule/general-late.el (unicode-query-coding-skip-chars-arg):
Correct the algorithm used to initialise this variable.
author | Aidan Kehoe <kehoea@parhasard.net> |
---|---|
date | Wed, 04 Feb 2009 12:14:38 +0000 |
parents | e6a7054a9c30 |
children | e0a8715fdb1f |
line wrap: on
line source
;; Copyright (C) 2008 Free Software Foundation, Inc. -*- coding: iso-8859-1 -*- ;; Author: Aidan Kehoe <kehoea@parhasard.net> ;; Maintainer: Aidan Kehoe <kehoea@parhasard.net> ;; Created: 2008 ;; Keywords: tests, query-coding-region ;; This file is part of XEmacs. ;; XEmacs is free software; you can redistribute it and/or modify it ;; under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 2, or (at your option) ;; any later version. ;; XEmacs is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with XEmacs; see the file COPYING. If not, write to the Free ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA ;; 02111-1307, USA. ;;; Synched up with: Not in FSF. ;;; Commentary: ;; Test the query-coding-region and query-coding-string implementations for ;; some well-known coding systems. (require 'bytecomp) (defun q-c-debug (&rest aerger) (let ((standard-output (get-buffer-create "query-coding-debug")) (fmt (condition-case nil (and (stringp (first aerger)) (apply #'format aerger)) (error nil)))) (if fmt (progn (princ (apply #'format aerger)) (terpri)) (princ "--> ") (let ((i 1)) (dolist (sgra aerger) (if (> i 1) (princ " ")) (princ (format "%d. " i)) (prin1 sgra) (incf i)) (terpri))))) ;; Comment this out if debugging: (defalias 'q-c-debug #'ignore) (when (featurep 'mule) (let ((ascii-chars-string (apply #'string (loop for i from #x0 to #x7f collect (int-to-char i)))) (latin-1-chars-string (apply #'string (loop for i from #x0 to #xff collect (int-to-char i)))) unix-coding-system text-conversion-error-signalled) (with-temp-buffer (insert ascii-chars-string) ;; First, check all the coding systems that are ASCII-transparent for ;; ASCII-transparency in the check. (dolist (coding-system (delete-duplicates (mapcar #'(lambda (coding-system) (unless (coding-system-alias-p coding-system) ;; We're only interested in the version with ;; Unix line endings right now. (setq unix-coding-system (subsidiary-coding-system (coding-system-base coding-system) 'lf)) (when (and ;; ASCII-transparent (equal ascii-chars-string (encode-coding-string ascii-chars-string unix-coding-system)) (not (memq (coding-system-type unix-coding-system) '(undecided chain)))) unix-coding-system))) (coding-system-list nil)) :test #'eq)) (q-c-debug "looking at coding system %S" (coding-system-name coding-system)) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) coding-system) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table))) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-string ascii-chars-string coding-system) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table)))) (delete-region (point-min) (point-max)) ;; Check for success from the two Latin-1 coding systems (insert latin-1-chars-string) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) 'iso-8859-1-unix) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table))) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-string (buffer-string) 'iso-8859-1-unix) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table))) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-string (buffer-string) 'iso-latin-1-with-esc-unix) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table))) ;; Make it fail, check that it fails correctly (insert (decode-char 'ucs #x20AC)) ;; EURO SIGN (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) 'iso-8859-1-unix) (Assert (null query-coding-succeeded)) (Assert (equal query-coding-table #s(range-table type start-closed-end-open data ((257 258) t))))) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) 'iso-latin-1-with-esc-unix) ;; Stupidly, this succeeds. The behaviour is compatible with ;; GNU, though, and we encourage people not to use ;; iso-latin-1-with-esc-unix anyway: (Assert query-coding-succeeded) (Assert (null query-coding-table))) ;; Check that it errors correctly. (setq text-conversion-error-signalled nil) (condition-case nil (query-coding-region (point-min) (point-max) 'iso-8859-1-unix nil t) (text-conversion-error (setq text-conversion-error-signalled t))) (Assert text-conversion-error-signalled) (setq text-conversion-error-signalled nil) (condition-case nil (query-coding-region (point-min) (point-max) 'iso-latin-1-with-esc-unix nil t) (text-conversion-error (setq text-conversion-error-signalled t))) (Assert (null text-conversion-error-signalled)) (delete-region (point-min) (point-max)) (insert latin-1-chars-string) (decode-coding-region (point-min) (point-max) 'windows-1252-unix) (goto-char (point-max)) ;; #'decode-coding-region just messed up point. (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) 'windows-1252-unix) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table))) (insert ?\x80) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) 'windows-1252-unix) (Assert (null query-coding-succeeded)) (Assert (equal query-coding-table #s(range-table type start-closed-end-open data ((257 258) t))))) ;; Try a similar approach with koi8-o, the koi8 variant with ;; support for Old Church Slavonic. (delete-region (point-min) (point-max)) (insert latin-1-chars-string) (decode-coding-region (point-min) (point-max) 'koi8-o-unix) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) 'koi8-o-unix) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table))) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) 'escape-quoted) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table))) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) 'windows-1252-unix) (Assert (null query-coding-succeeded)) (Assert (equal query-coding-table #s(range-table type start-closed-end-open data ((129 131) t (132 133) t (139 140) t (141 146) t (155 156) t (157 161) t (162 170) t (173 176) t (178 187) t (189 192) t (193 257) t))))) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) 'koi8-r-unix) (Assert (null query-coding-succeeded)) (Assert (equal query-coding-table #s(range-table type start-closed-end-open data ((129 154) t (155 161) t (162 164) t (165 177) t (178 180) t (181 192) t))))) ;; Check that the Unicode coding systems handle characters ;; without Unicode mappings. (delete-region (point-min) (point-max)) (insert latin-1-chars-string) (decode-coding-region (point-min) (point-max) 'greek-iso-8bit-with-esc) (dolist (coding-system '(utf-16-mac ucs-4-mac utf-16-little-endian-bom-dos ucs-4-dos utf-16-little-endian-mac utf-16-bom-unix utf-16-little-endian ucs-4 utf-16-dos ucs-4-little-endian-dos utf-16-bom-mac utf-16-bom utf-16-unix utf-32-unix utf-32-little-endian utf-32-dos utf-32 utf-32-little-endian-dos utf-8-bom utf-16-bom-dos ucs-4-unix utf-16-little-endian-bom-unix utf-8-bom-mac utf-32-little-endian-unix utf-16 utf-16-little-endian-dos utf-16-little-endian-bom-mac utf-8-bom-dos ucs-4-little-endian-mac utf-8-bom-unix utf-32-little-endian-mac utf-8-dos utf-8-unix utf-32-mac utf-8-mac utf-16-little-endian-unix ucs-4-little-endian ucs-4-little-endian-unix utf-8 utf-16-little-endian-bom)) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) (point-max) coding-system) (Assert (null query-coding-succeeded)) (Assert (equal query-coding-table #s(range-table type start-closed-end-open data ((173 174) t (209 210) t (254 255) t))))) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region (point-min) 173 coding-system) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table))) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region 174 209 coding-system) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table))) (multiple-value-bind (query-coding-succeeded query-coding-table) (query-coding-region 210 254 coding-system) (Assert (eq t query-coding-succeeded)) (Assert (null query-coding-table))) ;; Check that it errors correctly. (setq text-conversion-error-signalled nil) (condition-case nil (query-coding-region (point-min) (point-max) coding-system nil t) (text-conversion-error (setq text-conversion-error-signalled t))) (Assert text-conversion-error-signalled) (setq text-conversion-error-signalled nil) (condition-case nil (query-coding-region (point-min) 173 coding-system nil t) (text-conversion-error (setq text-conversion-error-signalled t))) (Assert (null text-conversion-error-signalled))) ;; Now to test #'encode-coding-char. Most of the functionality was ;; tested in the query-coding-region tests above, so we don't go into ;; as much detail here. (Assert (null (encode-coding-char (decode-char 'ucs #x20ac) 'iso-8859-1))) (Assert (equal "\x80" (encode-coding-char (decode-char 'ucs #x20ac) 'windows-1252))) (delete-region (point-min) (point-max)) ;; And #'unencodable-char-position. (insert latin-1-chars-string) (insert (decode-char 'ucs #x20ac)) (Assert (= 257 (unencodable-char-position (point-min) (point-max) 'iso-8859-1))) (Assert (equal '(257) (unencodable-char-position (point-min) (point-max) 'iso-8859-1 1))) ;; Compatiblity, sigh: (Assert (equal '(257) (unencodable-char-position (point-min) (point-max) 'iso-8859-1 0))) (dotimes (i 6) (insert (decode-char 'ucs #x20ac))) ;; Check if it stops at one: (Assert (equal '(257) (unencodable-char-position (point-min) (point-max) 'iso-8859-1 1))) ;; Check if it stops at four: (Assert (equal '(260 259 258 257) (unencodable-char-position (point-min) (point-max) 'iso-8859-1 4))) ;; Check whether it stops at seven: (Assert (equal '(263 262 261 260 259 258 257) (unencodable-char-position (point-min) (point-max) 'iso-8859-1 7))) ;; Check that it still stops at seven: (Assert (equal '(263 262 261 260 259 258 257) (unencodable-char-position (point-min) (point-max) 'iso-8859-1 2000))) ;; Now, #'check-coding-systems-region. ;; UTF-8 should certainly be able to encode these characters: (Assert (eq t (check-coding-systems-region (point-min) (point-max) '(utf-8)))) (Assert (equal '((iso-8859-1 257 258 259 260 261 262 263) (windows-1252 129 131 132 133 134 135 136 137 138 139 140 141 143 146 147 148 149 150 151 152 153 154 155 156 157 159 160)) (sort (check-coding-systems-region (point-min) (point-max) '(utf-8 iso-8859-1 windows-1252)) ;; (The sort is to make the algorithm irrelevant.) #'(lambda (left right) (string< (car left) (car right)))))) ;; Ensure that the indices are all decreased by one when passed a ;; string: (Assert (equal '((iso-8859-1 256 257 258 259 260 261 262) (windows-1252 128 130 131 132 133 134 135 136 137 138 139 140 142 145 146 147 148 149 150 151 152 153 154 155 156 158 159)) (sort (check-coding-systems-region (buffer-string) nil '(utf-8 iso-8859-1 windows-1252)) #'(lambda (left right) (string< (car left) (car right)))))))))