comparison tests/automated/query-coding-tests.el @ 4568:1d74a1d115ee

Add #'query-coding-region tests; do the work necessary to get them running. lisp/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * coding.el (default-query-coding-region): Declare using defun*, so we can #'return-from to it on encountering a safe-charsets value of t. Comment out a few debug messages. (query-coding-region): Correct the docstring, it deals with a region, not a string. (unencodable-char-position): Correct the implementation for non-nil COUNT, special-case a zero value for count, treat it as one. Don't rely on dynamic scope when calling the main lambda. * unicode.el (unicode-query-coding-region): Comment out some debug messages here. * mule/mule-coding.el (8-bit-fixed-query-coding-region): Comment out some debug messages here. * code-init.el (raw-text): Add a safe-charsets property to this coding system. * mule/korean.el (iso-2022-int-1): * mule/korean.el (euc-kr): * mule/korean.el (iso-2022-kr): Add safe-charsets properties for these coding systems. * mule/japanese.el (iso-2022-jp): * mule/japanese.el (jis7): * mule/japanese.el (jis8): * mule/japanese.el (shift-jis): * mule/japanese.el (iso-2022-jp-1978-irv): * mule/japanese.el (euc-jp): Add safe-charsets properties for all these coding systems. * mule/iso-with-esc.el: Add safe-charsets properties to all the coding systems in here. Comment on the downside of a safe-charsets value of t for iso-latin-1-with-esc. * mule/hebrew.el (ctext-hebrew): Add a safe-charsets property for this coding system. * mule/devanagari.el (in-is13194-devanagari): Add a safe-charsets property for this coding system. * mule/chinese.el (cn-gb-2312): * mule/chinese.el (hz-gb-2312): * mule/chinese.el (big5): Add safe-charsets properties for these coding systems. * mule/latin.el (iso-8859-14): Add an implementation for this, using #'make-8-bit-coding-system. * mule/mule-coding.el (ctext): * mule/mule-coding.el (iso-2022-8bit-ss2): * mule/mule-coding.el (iso-2022-7bit-ss2): * mule/mule-coding.el (iso-2022-jp-2): * mule/mule-coding.el (iso-2022-7bit): * mule/mule-coding.el (iso-2022-8): * mule/mule-coding.el (escape-quoted): * mule/mule-coding.el (iso-2022-lock): Add safe-charsets properties for all these coding systems. src/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * file-coding.c (Fmake_coding_system): Document our use of the safe-chars and safe-charsets properties, and the differences compared to GNU. (make_coding_system_1): Don't drop the safe-chars and safe-charsets properties. (Fcoding_system_property): Return the safe-chars and safe-charsets properties when asked for them. * file-coding.h (CODING_SYSTEM_SAFE_CHARSETS): * coding-system-slots.h: Make the safe-chars and safe-charsets slots available in these headers. tests/ChangeLog addition: 2008-12-28 Aidan Kehoe <kehoea@parhasard.net> * automated/query-coding-tests.el: New file, testing the functionality of #'query-coding-region and #'query-coding-string.
author Aidan Kehoe <kehoea@parhasard.net>
date Sun, 28 Dec 2008 14:46:24 +0000
parents
children e6a7054a9c30
comparison
equal deleted inserted replaced
4567:84d618b355f5 4568:1d74a1d115ee
1 ;; Copyright (C) 2008 Free Software Foundation, Inc. -*- coding: iso-8859-1 -*-
2
3 ;; Author: Aidan Kehoe <kehoea@parhasard.net>
4 ;; Maintainer: Aidan Kehoe <kehoea@parhasard.net>
5 ;; Created: 2008
6 ;; Keywords: tests, query-coding-region
7
8 ;; This file is part of XEmacs.
9
10 ;; XEmacs is free software; you can redistribute it and/or modify it
11 ;; under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation; either version 2, or (at your option)
13 ;; any later version.
14
15 ;; XEmacs is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with XEmacs; see the file COPYING. If not, write to the Free
22 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
23 ;; 02111-1307, USA.
24
25 ;;; Synched up with: Not in FSF.
26
27 ;;; Commentary:
28
29 ;; Test the query-coding-region and query-coding-string implementations for
30 ;; some well-known coding systems.
31
32 (require 'bytecomp)
33
34 (defun q-c-debug (&rest aerger)
35 (let ((standard-output (get-buffer-create "query-coding-debug"))
36 (fmt (condition-case nil
37 (and (stringp (first aerger))
38 (apply #'format aerger))
39 (error nil))))
40 (if fmt
41 (progn
42 (princ (apply #'format aerger))
43 (terpri))
44 (princ "--> ")
45 (let ((i 1))
46 (dolist (sgra aerger)
47 (if (> i 1) (princ " "))
48 (princ (format "%d. " i))
49 (prin1 sgra)
50 (incf i))
51 (terpri)))))
52
53 ;; Comment this out if debugging:
54 (defalias 'q-c-debug #'ignore)
55
56 (when (featurep 'mule)
57 (let ((ascii-chars-string (apply #'string
58 (loop for i from #x0 to #x7f
59 collect (int-to-char i))))
60 (latin-1-chars-string (apply #'string
61 (loop for i from #x0 to #xff
62 collect (int-to-char i))))
63 unix-coding-system text-conversion-error-signalled)
64 (with-temp-buffer
65 (insert ascii-chars-string)
66 ;; First, check all the coding systems that are ASCII-transparent for
67 ;; ASCII-transparency in the check.
68 (dolist (coding-system
69 (delete-duplicates
70 (mapcar #'(lambda (coding-system)
71 (unless (coding-system-alias-p coding-system)
72 ;; We're only interested in the version with
73 ;; Unix line endings right now.
74 (setq unix-coding-system
75 (subsidiary-coding-system
76 (coding-system-base coding-system) 'lf))
77 (when (and
78 ;; ASCII-transparent
79 (equal ascii-chars-string
80 (encode-coding-string
81 ascii-chars-string
82 unix-coding-system))
83 (not
84 (memq (coding-system-type
85 unix-coding-system)
86 '(undecided chain))))
87 unix-coding-system)))
88 (coding-system-list nil))
89 :test #'eq))
90 (q-c-debug "looking at coding system %S" (coding-system-name
91 coding-system))
92 (multiple-value-bind (query-coding-succeeded query-coding-table)
93 (query-coding-region (point-min) (point-max) coding-system)
94 (q-c-debug "checking type, coding-system, q-c-s, q-c-t %S"
95 (list (coding-system-type coding-system)
96 coding-system query-coding-succeeded
97 query-coding-table))
98 (unless (and (eq t query-coding-succeeded)
99 (null query-coding-table))
100 (q-c-debug "(eq t query-coding-succeeded) %S, (\
101 null query-coding-table) %S" (eq t query-coding-succeeded)
102 (null query-coding-table)))
103 (Assert (eq t query-coding-succeeded))
104 (Assert (null query-coding-table)))
105 (q-c-debug "testing the ASCII strings for %S" coding-system)
106 (multiple-value-bind (query-coding-succeeded query-coding-table)
107 (query-coding-string ascii-chars-string coding-system)
108 (unless (and (eq t query-coding-succeeded)
109 (null query-coding-table))
110 (q-c-debug "(eq t query-coding-succeeded) %S, (\
111 null query-coding-table) %S" (eq t query-coding-succeeded)
112 (null query-coding-table)))
113 (Assert (eq t query-coding-succeeded))
114 (Assert (null query-coding-table))))
115 (q-c-debug "past the loop through the coding systems")
116 (delete-region (point-min) (point-max))
117 ;; Check for success from the two Latin-1 coding systems
118 (insert latin-1-chars-string)
119 (q-c-debug "point is now %S" (point))
120 (multiple-value-bind (query-coding-succeeded query-coding-table)
121 (query-coding-region (point-min) (point-max) 'iso-8859-1-unix)
122 (Assert (eq t query-coding-succeeded))
123 (Assert (null query-coding-table)))
124 (q-c-debug "point is now %S" (point))
125 (multiple-value-bind (query-coding-succeeded query-coding-table)
126 (query-coding-string (buffer-string) 'iso-8859-1-unix)
127 (Assert (eq t query-coding-succeeded))
128 (Assert (null query-coding-table)))
129 (q-c-debug "point is now %S" (point))
130 (multiple-value-bind (query-coding-succeeded query-coding-table)
131 (query-coding-string (buffer-string) 'iso-latin-1-with-esc-unix)
132 (Assert (eq t query-coding-succeeded))
133 (Assert (null query-coding-table)))
134 (q-c-debug "point is now %S" (point))
135 ;; Make it fail, check that it fails correctly
136 (insert (decode-char 'ucs #x20AC)) ;; EURO SIGN
137 (multiple-value-bind (query-coding-succeeded query-coding-table)
138 (query-coding-region (point-min) (point-max) 'iso-8859-1-unix)
139 (unless (and (null query-coding-succeeded)
140 (equal query-coding-table
141 #s(range-table type start-closed-end-open data
142 ((257 258) t))))
143 (q-c-debug "dealing with %S" 'iso-8859-1-unix)
144 (q-c-debug "query-coding-succeeded not null, query-coding-table \
145 %S" query-coding-table))
146 (Assert (null query-coding-succeeded))
147 (Assert (equal query-coding-table
148 #s(range-table type start-closed-end-open data
149 ((257 258) t)))))
150 (multiple-value-bind (query-coding-succeeded query-coding-table)
151 (query-coding-region (point-min) (point-max)
152 'iso-latin-1-with-esc-unix)
153 ;; Stupidly, this succeeds. The behaviour is compatible with
154 ;; GNU, though, and we encourage people not to use
155 ;; iso-latin-1-with-esc-unix anyway:
156
157 (unless (and query-coding-succeeded
158 (null query-coding-table))
159 (q-c-debug "dealing with %S" 'iso-latin-1-with-esc-unix)
160 (q-c-debug "query-coding-succeeded %S, query-coding-table \
161 %S" query-coding-succeeded query-coding-table))
162 (Assert query-coding-succeeded)
163 (Assert (null query-coding-table)))
164 ;; Check that it errors correctly.
165 (setq text-conversion-error-signalled nil)
166 (condition-case nil
167 (query-coding-region (point-min) (point-max) 'iso-8859-1-unix nil t)
168 (text-conversion-error
169 (setq text-conversion-error-signalled t)))
170 (Assert text-conversion-error-signalled)
171 (setq text-conversion-error-signalled nil)
172 (condition-case nil
173 (query-coding-region (point-min) (point-max)
174 'iso-latin-1-with-esc-unix nil t)
175 (text-conversion-error
176 (setq text-conversion-error-signalled t)))
177 (Assert (null text-conversion-error-signalled))
178 (delete-region (point-min) (point-max))
179 (insert latin-1-chars-string)
180 (decode-coding-region (point-min) (point-max) 'windows-1252-unix)
181 (goto-char (point-max)) ;; #'decode-coding-region just messed up point.
182 (multiple-value-bind (query-coding-succeeded query-coding-table)
183 (query-coding-region (point-min) (point-max) 'windows-1252-unix)
184 (Assert (eq t query-coding-succeeded))
185 (Assert (null query-coding-table)))
186 (insert ?\x80)
187 (multiple-value-bind (query-coding-succeeded query-coding-table)
188 (query-coding-region (point-min) (point-max) 'windows-1252-unix)
189 (unless (and (null query-coding-succeeded)
190 (equal query-coding-table
191 #s(range-table type start-closed-end-open data
192 ((257 258) t))))
193 (q-c-debug "dealing with %S" 'windows-1252-unix)
194 (q-c-debug "query-coding-succeeded not null, query-coding-table \
195 %S" query-coding-table))
196 (Assert (null query-coding-succeeded))
197 (Assert (equal query-coding-table
198 #s(range-table type start-closed-end-open data
199 ((257 258) t)))))
200 ;; Try a similar approach with koi8-o, the koi8 variant with
201 ;; support for Old Church Slavonic.
202 (delete-region (point-min) (point-max))
203 (insert latin-1-chars-string)
204 (decode-coding-region (point-min) (point-max) 'koi8-o-unix)
205 (multiple-value-bind (query-coding-succeeded query-coding-table)
206 (query-coding-region (point-min) (point-max) 'koi8-o-unix)
207 (Assert (eq t query-coding-succeeded))
208 (Assert (null query-coding-table)))
209 (multiple-value-bind (query-coding-succeeded query-coding-table)
210 (query-coding-region (point-min) (point-max) 'escape-quoted)
211 (Assert (eq t query-coding-succeeded))
212 (Assert (null query-coding-table)))
213 (multiple-value-bind (query-coding-succeeded query-coding-table)
214 (query-coding-region (point-min) (point-max) 'windows-1252-unix)
215 (unless (and (null query-coding-succeeded)
216 (equal query-coding-table
217 #s(range-table type start-closed-end-open
218 data ((129 131) t (132 133) t
219 (139 140) t (141 146) t
220 (155 156) t (157 161) t
221 (162 170) t (173 176) t
222 (178 187) t (189 192) t
223 (193 257) t))))
224 (q-c-debug "query-coding-succeeded not null, query-coding-table \
225 %S" query-coding-table))
226 (Assert (null query-coding-succeeded))
227 (Assert (equal query-coding-table
228 #s(range-table type start-closed-end-open
229 data ((129 131) t (132 133) t (139 140) t
230 (141 146) t (155 156) t (157 161) t
231 (162 170) t (173 176) t (178 187) t
232 (189 192) t (193 257) t)))))
233 (multiple-value-bind (query-coding-succeeded query-coding-table)
234 (query-coding-region (point-min) (point-max) 'koi8-r-unix)
235 (Assert (null query-coding-succeeded))
236 (Assert (equal query-coding-table
237 #s(range-table type start-closed-end-open
238 data ((129 154) t (155 161) t (162 164) t
239 (165 177) t (178 180) t
240 (181 192) t)))))
241 ;; Check that the Unicode coding systems handle characters
242 ;; without Unicode mappings.
243 (delete-region (point-min) (point-max))
244 (insert latin-1-chars-string)
245 (decode-coding-region (point-min) (point-max) 'greek-iso-8bit-with-esc)
246 (dolist (coding-system
247 '(utf-16-mac ucs-4-mac utf-16-little-endian-bom-dos ucs-4-dos
248 utf-16-little-endian-mac utf-16-bom-unix
249 utf-16-little-endian ucs-4 utf-16-dos
250 ucs-4-little-endian-dos utf-16-bom-mac utf-16-bom
251 utf-16-unix utf-32-unix utf-32-little-endian
252 utf-32-dos utf-32 utf-32-little-endian-dos utf-8-bom
253 utf-16-bom-dos ucs-4-unix
254 utf-16-little-endian-bom-unix utf-8-bom-mac
255 utf-32-little-endian-unix utf-16
256 utf-16-little-endian-dos utf-16-little-endian-bom-mac
257 utf-8-bom-dos ucs-4-little-endian-mac utf-8-bom-unix
258 utf-32-little-endian-mac utf-8-dos utf-8-unix
259 utf-32-mac utf-8-mac utf-16-little-endian-unix
260 ucs-4-little-endian ucs-4-little-endian-unix utf-8
261 utf-16-little-endian-bom))
262 (multiple-value-bind (query-coding-succeeded query-coding-table)
263 (query-coding-region (point-min) (point-max) coding-system)
264 (Assert (null query-coding-succeeded))
265 (Assert (equal query-coding-table
266 #s(range-table type start-closed-end-open data
267 ((173 174) t (209 210) t
268 (254 255) t)))))
269 (multiple-value-bind (query-coding-succeeded query-coding-table)
270 (query-coding-region (point-min) 173 coding-system)
271 (Assert (eq t query-coding-succeeded))
272 (Assert (null query-coding-table)))
273 (multiple-value-bind (query-coding-succeeded query-coding-table)
274 (query-coding-region 174 209 coding-system)
275 (Assert (eq t query-coding-succeeded))
276 (Assert (null query-coding-table)))
277 (multiple-value-bind (query-coding-succeeded query-coding-table)
278 (query-coding-region 210 254 coding-system)
279 (Assert (eq t query-coding-succeeded))
280 (Assert (null query-coding-table)))
281 ;; Check that it errors correctly.
282 (setq text-conversion-error-signalled nil)
283 (condition-case nil
284 (query-coding-region (point-min) (point-max) coding-system nil t)
285 (text-conversion-error
286 (setq text-conversion-error-signalled t)))
287 (Assert text-conversion-error-signalled)
288 (setq text-conversion-error-signalled nil)
289 (condition-case nil
290 (query-coding-region (point-min) 173 coding-system nil t)
291 (text-conversion-error
292 (setq text-conversion-error-signalled t)))
293 (Assert (null text-conversion-error-signalled))))))