3767
|
1 ;;; latin.el --- Roman-alphabet languages -*- coding: iso-2022-7bit; -*-
|
464
|
2
|
3767
|
3 ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
|
|
4 ;; Licensed to the Free Software Foundation.
|
|
5 ;; Copyright (C) 1997 MORIOKA Tomohiko
|
|
6 ;; Copyright (C) 2001 Ben Wing.
|
|
7 ;; Copyright (C) 2002, 2005, 2006 Free Software Foundation
|
|
8
|
|
9 ;; Keywords: multilingual, latin, dumped
|
464
|
10
|
|
11 ;; This file is part of XEmacs.
|
|
12
|
|
13 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
14 ;; under the terms of the GNU General Public License as published by
|
|
15 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
16 ;; any later version.
|
|
17
|
|
18 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
19 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
21 ;; General Public License for more details.
|
|
22
|
|
23 ;; You should have received a copy of the GNU General Public License
|
|
24 ;; along with XEmacs; see the file COPYING. If not, write to the Free
|
|
25 ;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
|
|
26 ;; 02111-1307, USA.
|
|
27
|
|
28 ;;; Commentary:
|
|
29
|
3767
|
30 ;; For Roman-alphabet-using Europeans, eight coded character sets,
|
|
31 ;; ISO8859-1,2,3,4,9,14,15,16 are supported.
|
464
|
32
|
|
33 ;;; Code:
|
|
34
|
|
35 ;; Case table setup. We set up all the case tables using
|
|
36 ;; put-case-table-pair. The data for this comes from FSF Emacs 20.7
|
|
37 ;; (lisp/international/latin-*.el), written by several people and
|
|
38 ;; updated by Erik Naggum.
|
|
39
|
|
40 (defun setup-case-pairs (charset pairs)
|
3767
|
41 (loop
|
|
42 for (uc lc) in pairs
|
|
43 with table = (standard-case-table)
|
|
44 do (put-case-table-pair
|
|
45 (make-char charset uc) (make-char charset lc) table)))
|
|
46
|
|
47 ;; Latin-1's case is dealt with in iso8859-1.el, which see. Its syntax is
|
|
48 ;; initialised in syntax.c:complex_vars_of_syntax.
|
464
|
49
|
3767
|
50
|
|
51 ;; Latin-2 (ISO-8859-2). Central Europe; Czech, Slovak, Hungarian, Polish,
|
|
52 ;; Croatian, other languages.
|
|
53 ;;
|
|
54 ;; (Yes, it really is Central European. German written in Latin 2 and using
|
|
55 ;; only Umlaute and the sharp S in its non-ASCII repertoire is bit-for-bit
|
|
56 ;; identical with the same text in Latin-1.)
|
464
|
57
|
3767
|
58 ;; The default character syntax is now word. Pay attention to the
|
|
59 ;; exceptions in ISO-8859-2, copying them from ISO-8859-1.
|
|
60 (loop
|
|
61 for (latin-2 latin-1)
|
|
62 in '((#xA0 #xA0) ;; NO BREAK SPACE
|
|
63 (#xA2 #xB4) ;; BREVE, ACUTE ACCENT
|
|
64 (#xA4 #xA4) ;; CURRENCY SIGN
|
|
65 (#xA7 #xA7) ;; SECTION SIGN
|
|
66 (#xA8 #xA8) ;; DIAERESIS
|
|
67 (#xAD #xAD) ;; SOFT HYPHEN
|
|
68 (#xB0 #xB0) ;; DEGREE SIGN
|
|
69 (#xB2 #xB4) ;; OGONEK, ACUTE ACCENT
|
|
70 (#xB4 #xB4) ;; ACUTE ACCENT
|
|
71 (#xB7 #xB4) ;; CARON, ACUTE ACCENT
|
|
72 (#xB8 #xB8) ;; CEDILLA
|
|
73 (#xBD #xB4) ;; DOUBLE ACUTE ACCENT, ACUTE ACCENT
|
|
74 (#xD7 #xD7) ;; MULTIPLICATION SIGN
|
|
75 (#xF7 #xF7) ;; DIVISION SIGN
|
|
76 (#xFF #xB4)) ;; DOT ABOVE, ACUTE ACCENT
|
|
77 with syntax-table = (standard-syntax-table)
|
|
78 do (modify-syntax-entry
|
|
79 (make-char 'latin-iso8859-2 latin-2)
|
|
80 (string (char-syntax (make-char 'latin-iso8859-1 latin-1)))
|
|
81 syntax-table))
|
464
|
82
|
3767
|
83 ;; Case.
|
464
|
84 (setup-case-pairs
|
|
85 'latin-iso8859-2
|
3767
|
86 '((#xA1 #xB1) ;; A WITH OGONEK
|
|
87 (#xA3 #xB3) ;; L WITH STROKE
|
|
88 (#xA5 #xB5) ;; L WITH CARON
|
|
89 (#xA6 #xB6) ;; S WITH ACUTE
|
|
90 (#xA9 #xB9) ;; S WITH CARON
|
|
91 (#xAA #xBA) ;; S WITH CEDILLA
|
|
92 (#xAB #xBB) ;; T WITH CARON
|
|
93 (#xAC #xBC) ;; Z WITH ACUTE
|
|
94 (#xAE #xBE) ;; Z WITH CARON
|
|
95 (#xAF #xBF) ;; Z WITH DOT ABOVE
|
|
96 (#xC0 #xE0) ;; R WITH ACUTE
|
|
97 (#xC1 #xE1) ;; A WITH ACUTE
|
|
98 (#xC2 #xE2) ;; A WITH CIRCUMFLEX
|
|
99 (#xC3 #xE3) ;; A WITH BREVE
|
|
100 (#xC4 #xE4) ;; A WITH DIAERESIS
|
|
101 (#xC5 #xE5) ;; L WITH ACUTE
|
|
102 (#xC6 #xE6) ;; C WITH ACUTE
|
|
103 (#xC7 #xE7) ;; C WITH CEDILLA
|
|
104 (#xC8 #xE8) ;; C WITH CARON
|
|
105 (#xC9 #xE9) ;; E WITH ACUTE
|
|
106 (#xCA #xEA) ;; E WITH OGONEK
|
|
107 (#xCB #xEB) ;; E WITH DIAERESIS
|
|
108 (#xCC #xEC) ;; E WITH CARON
|
|
109 (#xCD #xED) ;; I WITH ACUTE
|
|
110 (#xCE #xEE) ;; I WITH CIRCUMFLEX
|
|
111 (#xCF #xEF) ;; D WITH CARON
|
|
112 (#xD0 #xF0) ;; D WITH STROKE
|
|
113 (#xD1 #xF1) ;; N WITH ACUTE
|
|
114 (#xD2 #xF2) ;; N WITH CARON
|
|
115 (#xD3 #xF3) ;; O WITH ACUTE
|
|
116 (#xD4 #xF4) ;; O WITH CIRCUMFLEX
|
|
117 (#xD5 #xF5) ;; O WITH DOUBLE ACUTE
|
|
118 (#xD6 #xF6) ;; O WITH DIAERESIS
|
|
119 (#xD8 #xF8) ;; R WITH CARON
|
|
120 (#xD9 #xF9) ;; U WITH RING ABOVE
|
|
121 (#xDA #xFA) ;; U WITH ACUTE
|
|
122 (#xDB #xFB) ;; U WITH DOUBLE ACUTE
|
|
123 (#xDC #xFC) ;; U WITH DIAERESIS
|
|
124 (#xDD #xFD) ;; Y WITH ACUTE
|
|
125 (#xDE #xFE))) ;; T WITH CEDILLA
|
464
|
126
|
3767
|
127 (make-coding-system
|
|
128 'iso-8859-2 'iso2022 "ISO-8859-2 (Latin-2)"
|
|
129 '(charset-g0 ascii
|
|
130 charset-g1 latin-iso8859-2
|
|
131 charset-g2 t
|
|
132 charset-g3 t
|
|
133 mnemonic "MIME/Ltn-2"))
|
|
134
|
|
135
|
|
136 ;;
|
|
137 ;; Latin-3 (ISO-8859-3). Esperanto, Maltese and Turkish. Obsolescent.
|
464
|
138
|
3767
|
139 ;; Initialise the non-word syntax codes in ISO-8859-3, copying them from
|
|
140 ;; ISO-8859-1.
|
|
141 (loop
|
|
142 for (latin-3 latin-1)
|
|
143 in '((#xA0 #xA0) ;; NO BREAK SPACE
|
|
144 (#xA2 #xB4) ;; BREVE, ACUTE ACCENT
|
|
145 (#xA3 #xA3) ;; POUND SIGN
|
|
146 (#xA4 #xA4) ;; CURRENCY SIGN
|
|
147 (#xA7 #xA7) ;; SECTION SIGN
|
|
148 (#xA8 #xA8) ;; DIAERESIS
|
|
149 (#xAD #xAD) ;; SOFT HYPHEN
|
|
150 (#xB0 #xB0) ;; DEGREE SIGN
|
|
151 (#xB2 #xB2) ;; SUPERSCRIPT TWO
|
|
152 (#xB3 #xB3) ;; SUPERSCRIPT THREE
|
|
153 (#xB4 #xB4) ;; ACUTE ACCENT
|
|
154 (#xB5 #xB5) ;; MICRO SIGN
|
|
155 (#xB7 #xB7) ;; MIDDLE DOT
|
|
156 (#xB8 #xB8) ;; CEDILLA
|
|
157 (#xBD #xBD) ;; VULGAR FRACTION ONE HALF
|
|
158 (#xD7 #xD7) ;; MULTIPLICATION SIGN
|
|
159 (#xF7 #xF7) ;; DIVISION SIGN
|
|
160 (#xFF #xB4)) ;; DOT ABOVE, ACUTE ACCENT
|
|
161 with syntax-table = (standard-syntax-table)
|
|
162 do (modify-syntax-entry
|
|
163 (make-char 'latin-iso8859-3 latin-3)
|
|
164 (string (char-syntax (make-char 'latin-iso8859-1 latin-1)))
|
|
165 syntax-table))
|
|
166
|
|
167 ;; Case.
|
464
|
168 (setup-case-pairs
|
|
169 'latin-iso8859-3
|
3767
|
170 '((#xA1 #xB1) ;; H WITH STROKE
|
|
171 (#xA6 #xB6) ;; H WITH CIRCUMFLEX
|
|
172 (#xAA #xBA) ;; S WITH CEDILLA
|
|
173 (#xAB #xBB) ;; G WITH BREVE
|
|
174 (#xAC #xBC) ;; J WITH CIRCUMFLEX
|
|
175 (#xAF #xBF) ;; Z WITH DOT ABOVE
|
|
176 (#xC0 #xE0) ;; A WITH GRAVE
|
|
177 (#xC1 #xE1) ;; A WITH ACUTE
|
|
178 (#xC2 #xE2) ;; A WITH CIRCUMFLEX
|
|
179 (#xC4 #xE4) ;; A WITH DIAERESIS
|
|
180 (#xC5 #xE5) ;; C WITH DOT ABOVE
|
|
181 (#xC6 #xE6) ;; C WITH CIRCUMFLEX
|
|
182 (#xC7 #xE7) ;; C WITH CEDILLA
|
|
183 (#xC8 #xE8) ;; E WITH GRAVE
|
|
184 (#xC9 #xE9) ;; E WITH ACUTE
|
|
185 (#xCA #xEA) ;; E WITH CIRCUMFLEX
|
|
186 (#xCB #xEB) ;; E WITH DIAERESIS
|
|
187 (#xCC #xEC) ;; I WITH GRAVE
|
|
188 (#xCD #xED) ;; I WITH ACUTE
|
|
189 (#xCE #xEE) ;; I WITH CIRCUMFLEX
|
|
190 (#xCF #xEF) ;; I WITH DIAERESIS
|
|
191 (#xD1 #xF1) ;; N WITH TILDE
|
|
192 (#xD2 #xF2) ;; O WITH GRAVE
|
|
193 (#xD3 #xF3) ;; O WITH ACUTE
|
|
194 (#xD4 #xF4) ;; O WITH CIRCUMFLEX
|
|
195 (#xD5 #xF5) ;; G WITH DOT ABOVE
|
|
196 (#xD6 #xF6) ;; O WITH DIAERESIS
|
|
197 (#xD8 #xF8) ;; G WITH CIRCUMFLEX
|
|
198 (#xD9 #xF9) ;; U WITH GRAVE
|
|
199 (#xDA #xFA) ;; U WITH ACUTE
|
|
200 (#xDB #xFB) ;; U WITH CIRCUMFLEX
|
|
201 (#xDC #xFC) ;; U WITH DIAERESIS
|
|
202 (#xDD #xFD) ;; U WITH BREVE
|
|
203 (#xDE #xFE))) ;; S WITH CIRCUMFLEX
|
464
|
204
|
3767
|
205 (make-coding-system
|
|
206 'iso-8859-3 'iso2022 "ISO-8859-3 (Latin-3)"
|
|
207 '(charset-g0 ascii
|
|
208 charset-g1 latin-iso8859-3
|
|
209 charset-g2 t
|
|
210 charset-g3 t
|
|
211 mnemonic "MIME/Ltn-3"))
|
|
212
|
|
213
|
|
214 ;; Latin-4 (ISO-8859-4)
|
|
215
|
|
216 ;; Estonian, Latvian, Lithuanian, Greenlandic, and Sami. Obsolescent.
|
464
|
217
|
3767
|
218 ;; The default character syntax is now word. Pay attention to the
|
|
219 ;; exceptions in ISO-8859-4, copying them from ISO-8859-1.
|
|
220 (loop
|
|
221 for (latin-4 latin-1)
|
|
222 in '((#xA0 #xA0) ;; NO BREAK SPACE
|
|
223 (#xA4 #xA4) ;; CURRENCY SIGN
|
|
224 (#xA7 #xA7) ;; SECTION SIGN
|
|
225 (#xA8 #xA8) ;; DIAERESIS
|
|
226 (#xAD #xAD) ;; SOFT HYPHEN
|
|
227 (#xB0 #xB0) ;; DEGREE SIGN
|
|
228 (#xB2 #xB4) ;; OGONEK, ACUTE ACCENT
|
|
229 (#xB4 #xB4) ;; ACUTE ACCENT
|
|
230 (#xB7 #xB4) ;; CARON, ACUTE ACCENT
|
|
231 (#xB8 #xB8) ;; CEDILLA
|
|
232 (#xD7 #xD7) ;; MULTIPLICATION SIGN
|
|
233 (#xF7 #xF7) ;; DIVISION SIGN
|
|
234 (#xFF #xB4)) ;; DOT ABOVE, ACUTE ACCENT
|
|
235 with syntax-table = (standard-syntax-table)
|
|
236 do (modify-syntax-entry
|
|
237 (make-char 'latin-iso8859-4 latin-4)
|
|
238 (string (char-syntax (make-char 'latin-iso8859-1 latin-1)))
|
|
239 syntax-table))
|
|
240
|
|
241 ;; Case.
|
464
|
242 (setup-case-pairs
|
|
243 'latin-iso8859-4
|
3767
|
244 '((#xA1 #xB1) ;; A WITH OGONEK
|
|
245 (#xA3 #xB3) ;; R WITH CEDILLA
|
|
246 (#xA5 #xB5) ;; I WITH TILDE
|
|
247 (#xA6 #xB6) ;; L WITH CEDILLA
|
|
248 (#xA9 #xB9) ;; S WITH CARON
|
|
249 (#xAA #xBA) ;; E WITH MACRON
|
|
250 (#xAB #xBB) ;; G WITH CEDILLA
|
|
251 (#xAC #xBC) ;; T WITH STROKE
|
|
252 (#xAE #xBE) ;; Z WITH CARON
|
|
253 (#xBD #xBF) ;; ENG
|
|
254 (#xC0 #xE0) ;; A WITH MACRON
|
|
255 (#xC1 #xE1) ;; A WITH ACUTE
|
|
256 (#xC2 #xE2) ;; A WITH CIRCUMFLEX
|
|
257 (#xC3 #xE3) ;; A WITH TILDE
|
|
258 (#xC4 #xE4) ;; A WITH DIAERESIS
|
|
259 (#xC5 #xE5) ;; A WITH RING ABOVE
|
|
260 (#xC6 #xE6) ;; AE
|
|
261 (#xC7 #xE7) ;; I WITH OGONEK
|
|
262 (#xC8 #xE8) ;; C WITH CARON
|
|
263 (#xC9 #xE9) ;; E WITH ACUTE
|
|
264 (#xCA #xEA) ;; E WITH OGONEK
|
|
265 (#xCB #xEB) ;; E WITH DIAERESIS
|
|
266 (#xCC #xEC) ;; E WITH DOT ABOVE
|
|
267 (#xCD #xED) ;; I WITH ACUTE
|
|
268 (#xCE #xEE) ;; I WITH CIRCUMFLEX
|
|
269 (#xCF #xEF) ;; I WITH MACRON
|
|
270 (#xD0 #xF0) ;; D WITH STROKE
|
|
271 (#xD1 #xF1) ;; N WITH CEDILLA
|
|
272 (#xD2 #xF2) ;; O WITH MACRON
|
|
273 (#xD3 #xF3) ;; K WITH CEDILLA
|
|
274 (#xD4 #xF4) ;; O WITH CIRCUMFLEX
|
|
275 (#xD5 #xF5) ;; O WITH TILDE
|
|
276 (#xD6 #xF6) ;; O WITH DIAERESIS
|
|
277 (#xD8 #xF8) ;; O WITH STROKE
|
|
278 (#xD9 #xF9) ;; U WITH OGONEK
|
|
279 (#xDA #xFA) ;; U WITH ACUTE
|
|
280 (#xDB #xFB) ;; U WITH CIRCUMFLEX
|
|
281 (#xDC #xFC) ;; U WITH DIAERESIS
|
|
282 (#xDD #xFD) ;; U WITH TILDE
|
|
283 (#xDE #xFE))) ;; U WITH MACRON
|
|
284
|
|
285 (make-coding-system
|
|
286 'iso-8859-4 'iso2022 "ISO-8859-4 (Latin-4)"
|
|
287 '(charset-g0 ascii
|
|
288 charset-g1 latin-iso8859-4
|
|
289 charset-g2 t
|
|
290 charset-g3 t
|
|
291 mnemonic "MIME/Ltn-4"))
|
|
292
|
|
293
|
|
294 ;; Latin-8 (ISO 8859-14) Celtic.
|
|
295
|
|
296 ;; Never widely used. Current-orthography Gaelic, both Irish and Scots, is
|
|
297 ;; easily written with Latin-1. Wikipedia says the same about Welsh.
|
|
298
|
|
299 (make-charset 'latin-iso8859-14
|
|
300 "Right-Hand Part of Latin Alphabet 8 (ISO/IEC 8859-14)"
|
|
301 '(dimension 1
|
|
302 registries ["ISO8859-14"]
|
|
303 chars 96
|
|
304 columns 1
|
|
305 direction l2r
|
|
306 final ?_
|
|
307 graphic 1
|
|
308 short-name "RHP of Latin-8"
|
|
309 long-name "RHP of Latin-8 (ISO 8859-14)"))
|
464
|
310
|
3767
|
311 ;;
|
|
312 ;; Character syntax defaults to word. The exceptions here shared with Latin-1.
|
|
313 (dolist (code '(#xa0 ;; NO BREAK SPACE
|
|
314 #xa3 ;; POUND SIGN
|
|
315 #xa7 ;; SECTION SIGN
|
|
316 #xa9 ;; COPYRIGHT
|
|
317 #xad ;; SOFT HYPHEN
|
|
318 #xae ;; REGISTERED
|
|
319 #xb6)) ;; PILCROW SIGN
|
|
320 (modify-syntax-entry (make-char 'latin-iso8859-14 code)
|
|
321 (string (char-syntax (make-char 'latin-iso8859-1 code)))
|
|
322 (standard-syntax-table)))
|
|
323 ;; Case.
|
|
324 (setup-case-pairs
|
|
325 'latin-iso8859-14
|
|
326 '((#xA1 #xA2) ;; B WITH DOT ABOVE
|
|
327 (#xA4 #xA5) ;; C WITH DOT ABOVE
|
|
328 (#xA6 #xAB) ;; D WITH DOT ABOVE
|
|
329 (#xA8 #xB8) ;; W WITH GRAVE
|
|
330 (#xAA #xBA) ;; W WITH ACUTE
|
|
331 (#xAC #xBC) ;; Y WITH GRAVE
|
|
332 (#xAF #xFF) ;; Y WITH DIAERESIS
|
|
333 (#xB0 #xB1) ;; F WITH DOT ABOVE
|
|
334 (#xB2 #xB3) ;; G WITH DOT ABOVE
|
|
335 (#xB4 #xB5) ;; M WITH DOT ABOVE
|
|
336 (#xB7 #xB9) ;; P WITH DOT ABOVE
|
|
337 (#xBB #xBF) ;; S WITH DOT ABOVE
|
|
338 (#xBD #xBE) ;; W WITH DIAERESIS
|
|
339 (#xC0 #xE0) ;; A WITH GRAVE
|
|
340 (#xC1 #xE1) ;; A WITH ACUTE
|
|
341 (#xC2 #xE2) ;; A WITH CIRCUMFLEX
|
|
342 (#xC3 #xE3) ;; A WITH TILDE
|
|
343 (#xC4 #xE4) ;; A WITH DIAERESIS
|
|
344 (#xC5 #xE5) ;; A WITH RING ABOVE
|
|
345 (#xC6 #xE6) ;; AE
|
|
346 (#xC7 #xE7) ;; C WITH CEDILLA
|
|
347 (#xC8 #xE8) ;; E WITH GRAVE
|
|
348 (#xC9 #xE9) ;; E WITH ACUTE
|
|
349 (#xCA #xEA) ;; E WITH CIRCUMFLEX
|
|
350 (#xCB #xEB) ;; E WITH DIAERESIS
|
|
351 (#xCC #xEC) ;; I WITH GRAVE
|
|
352 (#xCD #xED) ;; I WITH ACUTE
|
|
353 (#xCE #xEE) ;; I WITH CIRCUMFLEX
|
|
354 (#xCF #xEF) ;; I WITH DIAERESIS
|
|
355 (#xD0 #xF0) ;; W WITH CIRCUMFLEX
|
|
356 (#xD1 #xF1) ;; N WITH TILDE
|
|
357 (#xD2 #xF2) ;; O WITH GRAVE
|
|
358 (#xD3 #xF3) ;; O WITH ACUTE
|
|
359 (#xD4 #xF4) ;; O WITH CIRCUMFLEX
|
|
360 (#xD5 #xF5) ;; O WITH TILDE
|
|
361 (#xD6 #xF6) ;; O WITH DIAERESIS
|
|
362 (#xD7 #xF7) ;; T WITH DOT ABOVE
|
|
363 (#xD8 #xF8) ;; O WITH STROKE
|
|
364 (#xD9 #xF9) ;; U WITH GRAVE
|
|
365 (#xDA #xFA) ;; U WITH ACUTE
|
|
366 (#xDB #xFB) ;; U WITH CIRCUMFLEX
|
|
367 (#xDC #xFC) ;; U WITH DIAERESIS
|
|
368 (#xDD #xFD) ;; Y WITH ACUTE
|
|
369 (#xDE #xFE))) ;; Y WITH CIRCUMFLEX
|
464
|
370
|
3767
|
371
|
|
372 ;; The syntax table code for ISO 8859-15 and ISO 8859-16 requires that the
|
|
373 ;; guillemets not have parenthesis syntax, which they used to have in the
|
|
374 ;; past. See syntax.c:complex_vars_of_syntax.
|
|
375 (assert (not (memq (char-syntax (make-char 'latin-iso8859-1 #xAB)) '(?\( ?\))))
|
|
376 t "This code assumes \xAB does not have parenthesis syntax. ")
|
|
377
|
|
378 (assert (not (memq (char-syntax (make-char 'latin-iso8859-1 #xBB)) '(?\( ?\))))
|
|
379 t "This code assumes \xBB does not have parenthesis syntax. ")
|
|
380
|
|
381
|
|
382 ;; Latin-9 (ISO-8859-15)
|
|
383 ;;
|
|
384 ;; Latin-1 plus Euro, plus a few accented characters for the sake of correct
|
|
385 ;; Finnish and French orthography. Only ever widely used on Unix.
|
2765
|
386
|
3767
|
387 ;;
|
|
388 ;; Based on Latin-1 and differences therefrom.
|
|
389 ;;
|
|
390 ;; First, initialise the syntax from the corresponding Latin-1 characters.
|
|
391 (loop
|
|
392 for c from #xa0 to #xff
|
|
393 with syntax-table = (standard-syntax-table)
|
|
394 do (modify-syntax-entry
|
|
395 (make-char 'latin-iso8859-15 c)
|
|
396 (string (char-syntax (make-char 'latin-iso8859-1 c)))
|
|
397 syntax-table))
|
|
398
|
|
399 ;; Now, the exceptions. The Euro sign retains the syntax of CURRENCY SIGN.
|
|
400 (loop
|
|
401 for c in '(?,b&(B ?,b((B ?,b4(B ?,b8(B ?,b<(B ?,b=(B ?,b>(B)
|
|
402 with syntax-table = (standard-syntax-table)
|
|
403 do (modify-syntax-entry c "w" syntax-table))
|
|
404
|
|
405 ;; Case.
|
2765
|
406 (setup-case-pairs
|
|
407 'latin-iso8859-15
|
3767
|
408 '((#xA6 #xA8) ;; S WITH CARON *
|
|
409 (#xB4 #xB8) ;; Z WITH CARON *
|
|
410 (#xBC #xBD) ;; LATIN LIGATURE OE *
|
|
411 (#xBE #xFF) ;; Y WITH DIAERESIS *
|
|
412 (#xC0 #xE0) ;; A WITH GRAVE
|
|
413 (#xC1 #xE1) ;; A WITH ACUTE
|
|
414 (#xC2 #xE2) ;; A WITH CIRCUMFLEX
|
|
415 (#xC3 #xE3) ;; A WITH TILDE
|
|
416 (#xC4 #xE4) ;; A WITH DIAERESIS
|
|
417 (#xC5 #xE5) ;; A WITH RING ABOVE
|
|
418 (#xC6 #xE6) ;; AE
|
|
419 (#xC7 #xE7) ;; C WITH CEDILLA
|
|
420 (#xC8 #xE8) ;; E WITH GRAVE
|
|
421 (#xC9 #xE9) ;; E WITH ACUTE
|
|
422 (#xCA #xEA) ;; E WITH CIRCUMFLEX
|
|
423 (#xCB #xEB) ;; E WITH DIAERESIS
|
|
424 (#xCC #xEC) ;; I WITH GRAVE
|
|
425 (#xCD #xED) ;; I WITH ACUTE
|
|
426 (#xCE #xEE) ;; I WITH CIRCUMFLEX
|
|
427 (#xCF #xEF) ;; I WITH DIAERESIS
|
|
428 (#xD0 #xF0) ;; ETH
|
|
429 (#xD1 #xF1) ;; N WITH TILDE
|
|
430 (#xD2 #xF2) ;; O WITH GRAVE
|
|
431 (#xD3 #xF3) ;; O WITH ACUTE
|
|
432 (#xD4 #xF4) ;; O WITH CIRCUMFLEX
|
|
433 (#xD5 #xF5) ;; O WITH TILDE
|
|
434 (#xD6 #xF6) ;; O WITH DIAERESIS
|
|
435 (#xD8 #xF8) ;; O WITH STROKE
|
|
436 (#xD9 #xF9) ;; U WITH GRAVE
|
|
437 (#xDA #xFA) ;; U WITH ACUTE
|
|
438 (#xDB #xFB) ;; U WITH CIRCUMFLEX
|
|
439 (#xDC #xFC) ;; U WITH DIAERESIS
|
|
440 (#xDD #xFD) ;; Y WITH ACUTE
|
|
441 (#xDE #xFE))) ;; THORN
|
|
442
|
|
443 (make-coding-system
|
|
444 'iso-8859-15 'iso2022
|
|
445 "ISO 4873 conforming 8-bit code (ASCII + Latin 9; aka Latin-1 with Euro)"
|
4072
|
446 '(mnemonic "MIME/Ltn-9" ; bletch
|
3767
|
447 eol-type nil
|
|
448 charset-g0 ascii
|
|
449 charset-g1 latin-iso8859-15
|
|
450 charset-g2 t
|
|
451 charset-g3 t))
|
2765
|
452
|
3767
|
453 ;; end of ISO 8859-15.
|
|
454
|
|
455 ;;
|
|
456 ;; Latin-10 (ISO 8859-16).
|
|
457 ;;
|
|
458 ;; "South-Eastern European." Not, to my knowledge, ever widely used.
|
|
459
|
|
460 (make-charset 'latin-iso8859-16
|
|
461 "Right-Hand Part of Latin Alphabet 10 (ISO/IEC 8859-16)"
|
|
462 '(dimension 1
|
|
463 registries ["ISO8859-16"]
|
|
464 chars 96
|
|
465 columns 1
|
|
466 direction l2r
|
|
467 final ?f ; octet 06/06; cf ISO-IR 226
|
|
468 graphic 1
|
|
469 short-name "RHP of Latin-10"
|
|
470 long-name "RHP of Latin-10 (ISO 8859-16)"))
|
2765
|
471
|
3767
|
472 ;; Copy over the non-word syntax this charset has in common with Latin 1.
|
|
473 (dolist (code '(#xa0 ;; NO BREAK SPACE
|
|
474 #xa7 ;; SECTION SIGN
|
|
475 #xa9 ;; COPYRIGHT
|
|
476 #xab ;; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
|
477 #xad ;; SOFT HYPHEN
|
|
478 #xb0 ;; DEGREE
|
|
479 #xb1 ;; PLUS-MINUS SIGN
|
|
480 #xb6 ;; PILCROW SIGN
|
|
481 #xb7 ;; MIDDLE DOT
|
|
482 #xbb)) ;; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
|
483 (modify-syntax-entry (make-char 'latin-iso8859-16 code)
|
|
484 (string (char-syntax (make-char 'latin-iso8859-1 code)))
|
|
485 (standard-syntax-table)))
|
|
486
|
|
487 ;; EURO SIGN. Take its syntax from the pound sign.
|
|
488 (modify-syntax-entry (make-char 'latin-iso8859-16 #xa4)
|
|
489 (string (char-syntax (make-char 'latin-iso8859-1 #xa3)))
|
|
490 (standard-syntax-table))
|
|
491
|
|
492 ;; Take DOUBLE LOW-9 QUOTATION MARK's syntax from that of LEFT-POINTING
|
|
493 ;; DOUBLE ANGLE QUOTATION MARK.
|
|
494 (modify-syntax-entry (make-char 'latin-iso8859-16 #xa5)
|
|
495 (string (char-syntax (make-char 'latin-iso8859-1 #xab)))
|
|
496 (standard-syntax-table))
|
|
497
|
|
498 ;; Take RIGHT DOUBLE QUOTATION MARK's syntax from that of RIGHT-POINTING
|
|
499 ;; DOUBLE ANGLE QUOTATION MARK.
|
|
500 (modify-syntax-entry (make-char 'latin-iso8859-16 #xb5)
|
|
501 (string (char-syntax (make-char 'latin-iso8859-1 #xbb)))
|
|
502 (standard-syntax-table))
|
|
503
|
|
504 ;; Case.
|
2765
|
505 (setup-case-pairs
|
|
506 'latin-iso8859-16
|
3767
|
507 '((#xA1 #xA2) ;; A WITH OGONEK
|
|
508 (#xA3 #xB3) ;; L WITH STROKE
|
|
509 (#xA6 #xA8) ;; S WITH CARON
|
|
510 (#xAA #xBA) ;; S WITH COMMA BELOW
|
|
511 (#xAC #xAE) ;; Z WITH ACUTE
|
|
512 (#xAF #xBF) ;; Z WITH DOT ABOVE
|
|
513 (#xB2 #xB9) ;; C WITH CARON
|
|
514 (#xB4 #xB8) ;; Z WITH CARON
|
|
515 (#xBE #xFF) ;; Y WITH DIAERESIS
|
|
516 (#xC0 #xE0) ;; A WITH GRAVE
|
|
517 (#xC1 #xE1) ;; A WITH ACUTE
|
|
518 (#xC2 #xE2) ;; A WITH CIRCUMFLEX
|
|
519 (#xC3 #xE3) ;; A WITH BREVE
|
|
520 (#xC4 #xE4) ;; A WITH DIAERESIS
|
|
521 (#xC5 #xE5) ;; C WITH ACUTE
|
|
522 (#xC6 #xE6) ;; AE
|
|
523 (#xC7 #xE7) ;; C WITH CEDILLA
|
|
524 (#xC8 #xE8) ;; E WITH GRAVE
|
|
525 (#xC9 #xE9) ;; E WITH ACUTE
|
|
526 (#xCA #xEA) ;; E WITH CIRCUMFLEX
|
|
527 (#xCB #xEB) ;; E WITH DIAERESIS
|
|
528 (#xCC #xEC) ;; I WITH GRAVE
|
|
529 (#xCD #xED) ;; I WITH ACUTE
|
|
530 (#xCE #xEE) ;; I WITH CIRCUMFLEX
|
|
531 (#xCF #xEF) ;; I WITH DIAERESIS
|
|
532 (#xD0 #xF0) ;; D WITH STROKE
|
|
533 (#xD1 #xF1) ;; N WITH ACUTE
|
|
534 (#xD2 #xF2) ;; O WITH GRAVE
|
|
535 (#xD3 #xF3) ;; O WITH ACUTE
|
|
536 (#xD4 #xF4) ;; O WITH CIRCUMFLEX
|
|
537 (#xD5 #xF5) ;; O WITH DOUBLE ACUTE
|
|
538 (#xD6 #xF6) ;; O WITH DIAERESIS
|
|
539 (#xD7 #xF7) ;; S WITH ACUTE
|
|
540 (#xD8 #xF8) ;; U WITH DOUBLE ACUTE
|
|
541 (#xD9 #xF9) ;; U WITH GRAVE
|
|
542 (#xDA #xFA) ;; U WITH ACUTE
|
|
543 (#xDB #xFB) ;; U WITH CIRCUMFLEX
|
|
544 (#xDC #xFC) ;; U WITH DIAERESIS
|
|
545 (#xDD #xFD) ;; E WITH OGONEK
|
|
546 (#xDE #xFE))) ;; T WITH COMMA BELOW
|
|
547
|
|
548 ;; Add a coding system for ISO 8859-16.
|
|
549 (make-coding-system
|
|
550 'iso-8859-16 'iso2022 "MIME ISO-8859-16"
|
|
551 '(charset-g0 ascii
|
|
552 charset-g1 latin-iso8859-16
|
|
553 charset-g2 t ; grrr
|
|
554 charset-g3 t ; grrr
|
|
555 mnemonic "MIME/Ltn-10"))
|
|
556
|
|
557 ;; end of ISO 8859-16.
|
|
558
|
|
559
|
|
560 (provide 'romanian)
|
|
561
|
|
562 ;; Czech support originally from czech.el
|
|
563 ;; Author: Milan Zamazal <pdm@zamazal.org>
|
|
564 ;; Maintainer (FSF): Pavel Jan,Am(Bk <Pavel@Janik.cz>
|
|
565 ;; Maintainer (for XEmacs): David Sauer <davids@penguin.cz>
|
|
566
|
|
567 (provide 'czech)
|
|
568
|
|
569 ;; Slovak support originally from slovak.el
|
|
570 ;; Authors: Tibor ,B)(Bimko <tibor.simko@fmph.uniba.sk>,
|
|
571 ;; Milan Zamazal <pdm@fi.muni.cz>
|
|
572 ;; Maintainer: Milan Zamazal <pdm@fi.muni.cz>
|
|
573
|
|
574 (provide 'slovenian)
|
|
575
|
|
576 ;; Latin-5 (ISO-8859-9)
|
|
577
|
|
578 ;; Turkish (more generally Turkic.) This is identical to Latin-1, with the
|
|
579 ;; exception that the Icelandic-specific letters have been replaced by
|
|
580 ;; Turkish-specific letters. As such, we can simply copy the Latin-1 syntax
|
|
581 ;; table.
|
|
582
|
|
583 (loop
|
|
584 for i from #xA0 to #xFF
|
|
585 with syntax-table = (standard-syntax-table)
|
|
586 do (modify-syntax-entry
|
|
587 (make-char 'latin-iso8859-9 i)
|
|
588 (string (char-syntax (make-char 'latin-iso8859-1 i)))
|
|
589 syntax-table))
|
|
590
|
|
591 ;; Case. #### Bug: this doesn't handle I WITH DOT ABOVE.
|
|
592 (setup-case-pairs
|
|
593 'latin-iso8859-9
|
|
594 '((#xC0 #xE0) ;; A WITH GRAVE
|
|
595 (#xC1 #xE1) ;; A WITH ACUTE
|
|
596 (#xC2 #xE2) ;; A WITH CIRCUMFLEX
|
|
597 (#xC3 #xE3) ;; A WITH TILDE
|
|
598 (#xC4 #xE4) ;; A WITH DIAERESIS
|
|
599 (#xC5 #xE5) ;; A WITH RING ABOVE
|
|
600 (#xC6 #xE6) ;; AE
|
|
601 (#xC7 #xE7) ;; C WITH CEDILLA
|
|
602 (#xC8 #xE8) ;; E WITH GRAVE
|
|
603 (#xC9 #xE9) ;; E WITH ACUTE
|
|
604 (#xCB #xEB) ;; E WITH DIAERESIS
|
|
605 (#xCD #xED) ;; I WITH ACUTE
|
|
606 (#xCE #xEE) ;; I WITH CIRCUMFLEX
|
|
607 (#xD0 #xF0) ;; G WITH BREVE
|
|
608 (#xD1 #xF1) ;; N WITH TILDE
|
|
609 (#xD2 #xF2) ;; O WITH GRAVE
|
|
610 (#xD3 #xF3) ;; O WITH ACUTE
|
|
611 (#xD4 #xF4) ;; O WITH CIRCUMFLEX
|
|
612 (#xD5 #xF5) ;; O WITH TILDE
|
|
613 (#xD6 #xF6) ;; O WITH DIAERESIS
|
|
614 (#xD8 #xF8) ;; O WITH STROKE
|
|
615 (#xD9 #xF9) ;; U WITH GRAVE
|
|
616 (#xDA #xFA) ;; U WITH ACUTE
|
|
617 (#xDB #xFB) ;; U WITH CIRCUMFLEX
|
|
618 (#xDC #xFC) ;; U WITH DIAERESIS
|
|
619 (#xDE #xFE))) ;; S WITH CEDILLA
|
|
620
|
|
621 (make-coding-system
|
|
622 'iso-8859-9 'iso2022 "ISO-8859-9 (Latin-5)"
|
|
623 '(charset-g0 ascii
|
|
624 charset-g1 latin-iso8859-9
|
|
625 charset-g2 t
|
|
626 charset-g3 t
|
|
627 mnemonic "MIME/Ltn-5"))
|
|
628
|
|
629 ;; end of ISO-8859-9
|
|
630
|
|
631 ;; This is a utility function; we don't want it in the dumped XEmacs.
|
|
632
|
|
633 (fmakunbound 'setup-case-pairs)
|
2765
|
634
|
464
|
635
|
3767
|
636 ;; Language environments.
|
|
637 (loop
|
|
638 for ((charset codesys default-input nice-charset-1 nice-charset-2
|
|
639 ;; supported-langs is a list if the doc string is replaced
|
|
640 ;; entirely
|
|
641 supported-langs)
|
|
642 langenvs) in
|
|
643 '(((latin-iso8859-1 iso-8859-1 "latin-1-prefix" "Latin-1" "ISO-8859-1"
|
|
644 " Danish, Dutch, English, Faeroese, Finnish, French, German, Icelandic,
|
|
645 Irish, Italian, Norwegian, Portuguese, Spanish, and Swedish.")
|
|
646 (("Danish" "da")
|
|
647 ("Dutch" "nl" "TUTORIAL.nl")
|
4090
|
648 ("Faeroese" "fo")
|
3767
|
649 ("Finnish" "fi")
|
|
650 ("French" "fr" "TUTORIAL.fr" "Bonjour, ,Ag(Ba va?")
|
|
651 ("German" "de" "TUTORIAL.de" "\
|
|
652 German (Deutsch Nord) Guten Tag
|
|
653 German (Deutsch S,A|(Bd) Gr,A|_(B Gott"
|
|
654 "german-postfix")
|
|
655 ("Icelandic" "is")
|
|
656 ("Irish" "ga")
|
|
657 ("Italian" "it")
|
|
658 ("Norwegian" "no" "TUTORIAL.no")
|
|
659 ("Portuguese" "pt" nil "Bem-vindo! Tudo bem?")
|
|
660 ("Spanish" "es" "TUTORIAL.es" ",A!(BHola!")
|
|
661 ("Swedish" "sv" "TUTORIAL.se" "Hej!")))
|
|
662 ((latin-iso8859-15 iso-8859-15 "latin-1-prefix" ;; #### FIXME
|
|
663 "Latin-9" "ISO-8859-15")
|
|
664 ())
|
|
665 ((latin-iso8859-2 iso-8859-2 "latin-2-prefix" "Latin-2" "ISO-8859-2"
|
|
666 " Albanian, Czech, English, German, Hungarian, Polish, Romanian,
|
|
667 Serbian, Croatian, Slovak, Slovene, Sorbian (upper and lower),
|
|
668 and Swedish.") ;; " added because fontification got screwed up, CVS-20061203.
|
4090
|
669 (("Albanian" "sq")
|
3767
|
670 ("Croatian" ("hrvatski" "hr") "TUTORIAL.hr")
|
|
671 ("Czech" ("cs" "cz") "TUTORIAL.cs" "P,Bx(Bejeme v,Ba(Bm hezk,B}(B den!"
|
|
672 "latin-2-postfix")
|
|
673 ("Hungarian" ("hungarian" "hu"))
|
|
674 ("Polish" "po" "TUTORIAL.pl")
|
|
675 ("Romanian" "ro" "TUTORIAL.ro" "Bun,Bc(B ziua, bine a,B~(Bi venit!"
|
|
676 "latin-2-postfix")
|
|
677 ("Serbian" "sr")
|
|
678 ("Slovak" "sk" "TUTORIAL.sk" "Prajeme V,Ba(Bm pr,Bm(Bjemn,B}(B de,Br(B!"
|
|
679 "latin-2-postfix")
|
|
680 ("Slovenian" "sl" "TUTORIAL.sl" ",B.(Belimo vam uspe,B9(Ben dan!"
|
|
681 "latin-2-postfix")
|
|
682 ("Sorbian" nil)))
|
|
683 ((latin-iso8859-3 iso-8859-3 "latin-3-prefix" "Latin-3" "ISO-8859-3"
|
|
684 " Afrikaans, Catalan, Dutch, English, Esperanto, French, Galician,
|
|
685 German, Italian, Maltese, Spanish, and Turkish.")
|
|
686 (("Afrikaans" "af")
|
|
687 ("Catalan" ("catalan" "ca"))
|
4090
|
688 ("Esperanto" "eo")
|
|
689 ("Galician" "gl")
|
|
690 ("Maltese" "mt")))
|
3767
|
691 ((latin-iso8859-4 iso-8859-4 "latin-4-prefix" "Latin-4" "ISO-8859-4"
|
|
692 " Danish, English, Estonian, Finnish, German, Greenlandic, Lappish,
|
|
693 Latvian, Lithuanian, and Norwegian.")
|
|
694 (("Estonian" "et")
|
4090
|
695 ("Greenlandic" "kl")
|
|
696 ("Lappish" "se")
|
3767
|
697 ("Latvian" "lv")
|
|
698 ("Lithuanian" "li")))
|
3977
|
699 ((latin-iso8859-9 iso-8859-9 "latin-5-prefix" "Latin-5" "ISO-8859-9")
|
3767
|
700 (("Turkish" "tr"))))
|
|
701 do
|
|
702 (set-language-info-alist
|
|
703 nice-charset-1
|
|
704 `((charset ascii ,charset)
|
|
705 (coding-system ,codesys)
|
|
706 (coding-priority ,codesys)
|
|
707 (native-coding-system ,codesys)
|
|
708 (documentation . ,(if (listp supported-langs) (car supported-langs)
|
|
709 (format "\
|
|
710 Generic language environment for %s (%s)." nice-charset-1 nice-charset-2))))
|
|
711 '("European"))
|
|
712 (loop for (name locale tutorial sample-text input-method) in langenvs
|
|
713 do
|
|
714 (set-language-info-alist
|
|
715 name
|
|
716 `((charset ascii ,charset)
|
|
717 (coding-system ,codesys)
|
|
718 (coding-priority ,codesys)
|
|
719 (native-coding-system ,codesys)
|
|
720 ,@(if locale `((locale . ,locale)))
|
|
721 ,@(if tutorial `((tutorial . ,tutorial)))
|
|
722 ,@(if sample-text `((sample-text . ,sample-text)))
|
|
723 (input-method . ,(or input-method default-input))
|
|
724 (documentation . ,(format "\
|
|
725 This language environment supports %s. " name)))
|
|
726 '("European"))))
|
464
|
727
|
3767
|
728 ;;; latin.el ends here
|