Mercurial > hg > xemacs-beta
comparison lisp/utils/mail-extr.el @ 0:376386a54a3c r19-14
Import from CVS: tag r19-14
author | cvs |
---|---|
date | Mon, 13 Aug 2007 08:45:50 +0200 |
parents | |
children | ac2d302a0011 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:376386a54a3c |
---|---|
1 ;;; mail-extr.el --- extract full name and address from RFC 822 mail header. | |
2 | |
3 ;; Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc. | |
4 | |
5 ;; Author: Joe Wells <jbw@cs.bu.edu> | |
6 ;; Maintainer: Chuck Thompson <cthomp@xemacs.org> | |
7 ;; Version: 1.8 | |
8 ;; Keywords: mail | |
9 | |
10 ;; This file is part of XEmacs. | |
11 | |
12 ;; XEmacs is free software; you can redistribute it and/or modify it | |
13 ;; under the terms of the GNU General Public License as published by | |
14 ;; the Free Software Foundation; either version 2, or (at your option) | |
15 ;; any later version. | |
16 | |
17 ;; XEmacs is distributed in the hope that it will be useful, but | |
18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
20 ;; General Public License for more details. | |
21 | |
22 ;; You should have received a copy of the GNU General Public License | |
23 ;; along with XEmacs; see the file COPYING. If not, write to the Free | |
24 ;; Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. | |
25 | |
26 ;;; Synched up with: Not synched with FSF but close to 19.28. | |
27 | |
28 ;;; Commentary: | |
29 | |
30 ;; The entry point of this code is | |
31 ;; | |
32 ;; mail-extract-address-components: (address) | |
33 ;; | |
34 ;; Given an RFC-822 ADDRESS, extract full name and canonical address. | |
35 ;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). | |
36 ;; If no name can be extracted, FULL-NAME will be nil. | |
37 ;; ADDRESS may be a string or a buffer. If it is a buffer, the visible | |
38 ;; (narrowed) portion of the buffer will be interpreted as the address. | |
39 ;; (This feature exists so that the clever caller might be able to avoid | |
40 ;; consing a string.) | |
41 ;; If ADDRESS contains more than one RFC-822 address, only the first is | |
42 ;; returned. | |
43 ;; | |
44 ;; This code is more correct (and more heuristic) parser than the code in | |
45 ;; rfc822.el. And despite its size, it's fairly fast. | |
46 ;; | |
47 ;; There are two main benefits: | |
48 ;; | |
49 ;; 1. Higher probability of getting the correct full name for a human than | |
50 ;; any other package we know of. (On the other hand, it will cheerfully | |
51 ;; mangle non-human names/comments.) | |
52 ;; 2. Address part is put in a canonical form. | |
53 ;; | |
54 ;; The interface is not yet carved in stone; please give us suggestions. | |
55 ;; | |
56 ;; We have an extensive test-case collection of funny addresses if you want to | |
57 ;; work with the code. Developing this code requires frequent testing to | |
58 ;; make sure you're not breaking functionality. The test cases aren't included | |
59 ;; because they are over 100K. | |
60 ;; | |
61 ;; If you find an address that mail-extr fails on, please send it to the | |
62 ;; maintainer along with what you think the correct results should be. We do | |
63 ;; not consider it a bug if mail-extr mangles a comment that does not | |
64 ;; correspond to a real human full name, although we would prefer that | |
65 ;; mail-extr would return the comment as-is. | |
66 ;; | |
67 ;; Features: | |
68 ;; | |
69 ;; * Full name handling: | |
70 ;; | |
71 ;; * knows where full names can be found in an address. | |
72 ;; * avoids using empty comments and quoted text. | |
73 ;; * extracts full names from mailbox names. | |
74 ;; * recognizes common formats for comments after a full name. | |
75 ;; * puts a period and a space after each initial. | |
76 ;; * understands & referring to the mailbox name, capitalized. | |
77 ;; * strips name prefixes like "Prof.", etc. | |
78 ;; * understands what characters can occur in names (not just letters). | |
79 ;; * figures out middle initial from mailbox name. | |
80 ;; * removes funny nicknames. | |
81 ;; * keeps suffixes such as Jr., Sr., III, etc. | |
82 ;; * reorders "Last, First" type names. | |
83 ;; | |
84 ;; * Address handling: | |
85 ;; | |
86 ;; * parses rfc822 quoted text, comments, and domain literals. | |
87 ;; * parses rfc822 multi-line headers. | |
88 ;; * does something reasonable with rfc822 GROUP addresses. | |
89 ;; * handles many rfc822 noncompliant and garbage addresses. | |
90 ;; * canonicalizes addresses (after stripping comments/phrases outside <>). | |
91 ;; * converts ! addresses into .UUCP and %-style addresses. | |
92 ;; * converts rfc822 ROUTE addresses to %-style addresses. | |
93 ;; * truncates %-style addresses at leftmost fully qualified domain name. | |
94 ;; * handles local relative precedence of ! vs. % and @ (untested). | |
95 ;; | |
96 ;; It does almost no string creation. It primarily uses the built-in | |
97 ;; parsing routines with the appropriate syntax tables. This should | |
98 ;; result in greater speed. | |
99 ;; | |
100 ;; TODO: | |
101 ;; | |
102 ;; * handle all test cases. (This will take forever.) | |
103 ;; * software to pick the correct header to use (eg., "Senders-Name:"). | |
104 ;; * multiple addresses in the "From:" header (almost all of the necessary | |
105 ;; code is there). | |
106 ;; * flag to not treat `,' as an address separator. (This is useful when | |
107 ;; there is a "From:" header but no "Sender:" header, because then there | |
108 ;; is only allowed to be one address.) | |
109 ;; * mailbox name does not necessarily contain full name. | |
110 ;; * fixing capitalization when it's all upper or lowercase. (Hard!) | |
111 ;; * some of the domain literal handling is missing. (But I've never even | |
112 ;; seen one of these in a mail address, so maybe no big deal.) | |
113 ;; * arrange to have syntax tables byte-compiled. | |
114 ;; * speed hacks. | |
115 ;; * delete unused variables. | |
116 ;; * arrange for testing with different relative precedences of ! vs. @ | |
117 ;; and %. | |
118 ;; * insert documentation strings! | |
119 ;; * handle X.400-gatewayed addresses according to RFC 1148. | |
120 | |
121 ;;; Change Log: | |
122 ;; | |
123 ;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@netscape.com) | |
124 ;; | |
125 ;; * merged with jbw's latest version | |
126 ;; | |
127 ;; Wed Feb 9 21:56:27 1994 Jamie Zawinski (jwz@netscape.com) | |
128 ;; | |
129 ;; * high-bit chars in comments weren't treated as word syntax | |
130 ;; | |
131 ;; Sat Feb 5 03:13:40 1994 Jamie Zawinski (jwz@netscape.com) | |
132 ;; | |
133 ;; * call replace-match with fixed-case arg | |
134 ;; | |
135 ;; Thu Dec 16 21:56:45 1993 Jamie Zawinski (jwz@netscape.com) | |
136 ;; | |
137 ;; * some more cleanup, doc, added provide | |
138 ;; | |
139 ;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu) | |
140 ;; | |
141 ;; * Made mail-full-name-prefixes a user-customizable variable. | |
142 ;; Allow passing the address as a buffer as well as as a string. | |
143 ;; Allow [ and ] as name characters (Finnish character set). | |
144 ;; | |
145 ;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu) | |
146 ;; | |
147 ;; * Handle "null" addresses. Handle = used for spacing in mailbox | |
148 ;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are | |
149 ;; missing their brackets. Handle uppercase "JR". Extract full | |
150 ;; names from X.400 addresses encoded in RFC-822. Fix bug in | |
151 ;; handling of multiple addresses where first has trailing comment. | |
152 ;; Handle more kinds of telephone extension lead-ins. | |
153 ;; | |
154 ;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu) | |
155 ;; | |
156 ;; * Handle HZ encoding for embedding GB encoded chinese characters. | |
157 ;; | |
158 ;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu) | |
159 ;; | |
160 ;; * Fixed too broad matching of ham radio call signs. Fixed bug in | |
161 ;; handling an unmatched ' in a name string. Enhanced recognition | |
162 ;; of when . in the mailbox name terminates the name portion. | |
163 ;; Narrowed conversion of . to space to only the necessary | |
164 ;; situation. Deal with VMS's stupid date stamps. Handle a unique | |
165 ;; way of introducing an alternate address. Fixed spacing bug I | |
166 ;; introduced in switching last name order. Fixed bug in handling | |
167 ;; address with ! and % but no @. Narrowed the cases in which | |
168 ;; certain trailing words are discarded. | |
169 ;; | |
170 ;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu) | |
171 ;; | |
172 ;; * Fixed bugs in handling GROUP addresses. Certain words in the | |
173 ;; middle of a name no longer terminate it. Handle LISTSERV list | |
174 ;; names. Ignore comment field containing mailbox name. | |
175 ;; | |
176 ;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu) | |
177 ;; | |
178 ;; * Moved variant-method code back into main function. Handle | |
179 ;; underscores as spaces in comments. Handle leading nickname. Add | |
180 ;; flag to ignore single-word names. Other changes. | |
181 ;; | |
182 ;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu) | |
183 ;; | |
184 ;; * Added in changes by Rod Whitby and Jamie Zawinski. This | |
185 ;; includes the flag mail-extr-guess-middle-initial and the fix for | |
186 ;; handling multiple addresses correctly. | |
187 ;; | |
188 ;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu) | |
189 ;; | |
190 ;; * Cleaned up some more. Release version 1.0 to world. | |
191 ;; | |
192 ;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu) | |
193 ;; | |
194 ;; * Cleaned up full name extraction extensively. | |
195 ;; | |
196 ;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu) | |
197 ;; | |
198 ;; * Total rewrite. Integrated mail-canonicalize-address into | |
199 ;; mail-extract-address-components. Now handles GROUP addresses more | |
200 ;; or less correctly. Better handling of lots of different cases. | |
201 ;; | |
202 ;; Fri Jun 14 19:39:50 1991 | |
203 ;; * Created. | |
204 | |
205 ;;; Code: | |
206 | |
207 | |
208 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
209 ;; | |
210 ;; User configuration variable definitions. | |
211 ;; | |
212 | |
213 (defvar mail-extr-guess-middle-initial nil | |
214 "*Whether to try to guess middle initial from mail address. | |
215 If true, then when we see an address like \"John Smith <jqs@host.com>\" | |
216 we will assume that \"John Q. Smith\" is the fellow's name.") | |
217 | |
218 (defvar mail-extr-ignore-single-names t | |
219 "*Whether to ignore a name that is just a single word. | |
220 If true, then when we see an address like \"Idiot <dumb@stupid.com>\" | |
221 we will act as though we couldn't find a full name in the address.") | |
222 | |
223 ;; Matches a leading title that is not part of the name (does not | |
224 ;; contribute to uniquely identifying the person). | |
225 (defvar mail-extr-full-name-prefixes | |
226 (purecopy | |
227 "\\(Prof\\|D[Rr]\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.?[ \t\n]") | |
228 "*Matches prefixes to the full name that identify a person's position. | |
229 These are stripped from the full name because they do not contribute to | |
230 uniquely identifying the person.") | |
231 | |
232 (defvar mail-extr-@-binds-tighter-than-! nil | |
233 "*Whether the local mail transport agent looks at ! before @.") | |
234 | |
235 (defvar mail-extr-mangle-uucp nil | |
236 "*Whether to throw away information in UUCP addresses | |
237 by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\".") | |
238 | |
239 ;;---------------------------------------------------------------------- | |
240 ;; what orderings are meaningful????? | |
241 ;;(defvar mail-operator-precedence-list '(?! ?% ?@)) | |
242 ;; Right operand of a % or a @ must be a domain name, period. No other | |
243 ;; operators allowed. Left operand of a @ is an address relative to that | |
244 ;; site. | |
245 | |
246 ;; Left operand of a ! must be a domain name. Right operand is an | |
247 ;; arbitrary address. | |
248 ;;---------------------------------------------------------------------- | |
249 | |
250 | |
251 | |
252 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
253 ;; | |
254 ;; Constant definitions. | |
255 ;; | |
256 | |
257 ;; Codes in | |
258 ;; Names in ISO 8859-1 Name | |
259 ;; ISO 10XXX ISO 8859-2 in | |
260 ;; ISO 6937 ISO 10646 RFC Swedish | |
261 ;; etc. Hex Oct 1345 TeX Split ASCII Description | |
262 ;; --------- ---------- ---- --- ----- ----- ------------------------------- | |
263 ;; %a E4 344 a: \"a ae { latin small a + diaeresis ä | |
264 ;; %o F6 366 o: \"o oe | latin small o + diaeresis ö | |
265 ;; @a E5 345 aa \oa aa } latin small a + ring above å | |
266 ;; %u FC 374 u: \"u ue ~ latin small u + diaeresis ü | |
267 ;; /e E9 351 e' \'e ` latin small e + acute é | |
268 ;; %A C4 304 A: \"A AE [ latin capital a + diaeresis Ä | |
269 ;; %O D6 326 O: \"O OE \ latin capital o + diaeresis Ö | |
270 ;; @A C5 305 AA \oA AA ] latin capital a + ring above Å | |
271 ;; %U DC 334 U: \"U UE ^ latin capital u + diaeresis Ü | |
272 ;; /E C9 311 E' \'E @ latin capital e + acute É | |
273 | |
274 ;; NOTE: @a and @A are not in ISO 8859-2 (the codes mentioned above invoke | |
275 ;; /l and /L). Some of this data was retrieved from | |
276 ;; listserv@jhuvm.hcf.jhu.edu. | |
277 | |
278 ;; Any character that can occur in a name, not counting characters that | |
279 ;; separate parts of a multipart name (hyphen and period). | |
280 ;; Yes, there are weird people with digits in their names. | |
281 ;; You will also notice the consideration for the | |
282 ;; Swedish/Finnish/Norwegian character set. | |
283 ;; #### (go to \376 instead of \377 to work around bug in search.c...) | |
284 (defconst mail-extr-all-letters-but-separators | |
285 (purecopy "][A-Za-z{|}'~0-9`\200-\376")) | |
286 | |
287 ;; Any character that can occur in a name in an RFC822 address including | |
288 ;; the separator (hyphen and possibly period) for multipart names. | |
289 ;; #### should . be in here? | |
290 (defconst mail-extr-all-letters | |
291 (purecopy (concat mail-extr-all-letters-but-separators "---"))) | |
292 | |
293 ;; Any character that can start a name. | |
294 ;; Keep this set as minimal as possible. | |
295 (defconst mail-extr-first-letters (purecopy "A-Za-z")) | |
296 | |
297 ;; Any character that can end a name. | |
298 ;; Keep this set as minimal as possible. | |
299 (defconst mail-extr-last-letters (purecopy "[A-Za-z`'.")) | |
300 | |
301 (defconst mail-extr-leading-garbage | |
302 (purecopy (format "[^%s]+" mail-extr-first-letters))) | |
303 | |
304 ;; (defconst mail-extr-non-name-chars | |
305 ;; (purecopy (concat "^" mail-extr-all-letters "."))) | |
306 ;; (defconst mail-extr-non-begin-name-chars | |
307 ;; (purecopy (concat "^" mail-extr-first-letters))) | |
308 ;; (defconst mail-extr-non-end-name-chars | |
309 ;; (purecopy (concat "^" mail-extr-last-letters))) | |
310 | |
311 ;; Matches an initial not followed by both a period and a space. | |
312 ;; (defconst mail-extr-bad-initials-pattern | |
313 ;; (purecopy | |
314 ;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)" | |
315 ;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters))) | |
316 | |
317 ;; Matches periods used instead of spaces. Must not match the period | |
318 ;; following an initial. | |
319 (defconst mail-extr-bad-dot-pattern | |
320 (purecopy | |
321 (format "\\([%s][%s]\\)\\.+\\([%s]\\)" | |
322 mail-extr-all-letters | |
323 mail-extr-last-letters | |
324 mail-extr-first-letters))) | |
325 | |
326 ;; Matches an embedded or leading nickname that should be removed. | |
327 ;; (defconst mail-extr-nickname-pattern | |
328 ;; (purecopy | |
329 ;; (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] " | |
330 ;; mail-extr-all-letters))) | |
331 | |
332 ;; Matches the occurrence of a generational name suffix, and the last | |
333 ;; character of the preceding name. This is important because we want to | |
334 ;; keep such suffixes: they help to uniquely identify the person. | |
335 ;; *** Perhaps this should be a user-customizable variable. However, the | |
336 ;; *** regular expression is fairly tricky to alter, so maybe not. | |
337 (defconst mail-extr-full-name-suffix-pattern | |
338 (purecopy | |
339 (format | |
340 "\\(,? ?\\([JjSs][Rr]\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)" | |
341 mail-extr-all-letters mail-extr-all-letters))) | |
342 | |
343 (defconst mail-extr-roman-numeral-pattern (purecopy "V?I+V?\\b")) | |
344 | |
345 ;; Matches a trailing uppercase (with other characters possible) acronym. | |
346 ;; Must not match a trailing uppercase last name or trailing initial | |
347 (defconst mail-extr-weird-acronym-pattern | |
348 (purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)")) | |
349 | |
350 ;; Matches a mixed-case or lowercase name (not an initial). | |
351 ;; #### Match Latin1 lower case letters here too? | |
352 ;; (defconst mail-extr-mixed-case-name-pattern | |
353 ;; (purecopy | |
354 ;; (format | |
355 ;; "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)" | |
356 ;; mail-extr-all-letters mail-extr-last-letters | |
357 ;; mail-extr-first-letters mail-extr-all-letters mail-extr-all-letters | |
358 ;; mail-extr-last-letters mail-extr-first-letters mail-extr-all-letters))) | |
359 | |
360 ;; Matches a trailing alternative address. | |
361 ;; #### Match Latin1 letters here too? | |
362 ;; #### Match _ before @ here too? | |
363 (defconst mail-extr-alternative-address-pattern | |
364 (purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]")) | |
365 | |
366 ;; Matches a variety of trailing comments not including comma-delimited | |
367 ;; comments. | |
368 (defconst mail-extr-trailing-comment-start-pattern | |
369 (purecopy " [-{]\\|--\\|[+@#></\;]")) | |
370 | |
371 ;; Matches a name (not an initial). | |
372 ;; This doesn't force a word boundary at the end because sometimes a | |
373 ;; comment is separated by a `-' with no preceding space. | |
374 (defconst mail-extr-name-pattern | |
375 (purecopy (format "\\b[%s][%s]*[%s]" | |
376 mail-extr-first-letters | |
377 mail-extr-all-letters | |
378 mail-extr-last-letters))) | |
379 | |
380 (defconst mail-extr-initial-pattern | |
381 (purecopy (format "\\b[%s]\\([. ]\\|\\b\\)" mail-extr-first-letters))) | |
382 | |
383 ;; Matches a single name before a comma. | |
384 ;; (defconst mail-extr-last-name-first-pattern | |
385 ;; (purecopy (concat "\\`" mail-extr-name-pattern ","))) | |
386 | |
387 ;; Matches telephone extensions. | |
388 (defconst mail-extr-telephone-extension-pattern | |
389 (purecopy | |
390 "\\(\\([Ee]xt\\|\\|[Tt]ph\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+")) | |
391 | |
392 ;; Matches ham radio call signs. | |
393 ;; Help from: Mat Maessen N2NJZ <maessm@rpi.edu>, Mark Feit | |
394 ;; <mark@era.com>, Michael Covington <mcovingt@ai.uga.edu>. | |
395 ;; Examples: DX504 DX515 K5MRU K8DHK KA9WGN KA9WGN KD3FU KD6EUI KD6HBW | |
396 ;; KE9TV KF0NV N1API N3FU N3GZE N3IGS N4KCC N7IKQ N9HHU W4YHF W6ANK WA2SUH | |
397 ;; WB7VZI N2NJZ NR3G KJ4KK AB4UM AL7NI KH6OH WN3KBT N4TMI W1A N0NZO | |
398 (defconst mail-extr-ham-call-sign-pattern | |
399 (purecopy "\\b\\(DX[0-9]+\\|[AKNW][A-Z]?[0-9][A-Z][A-Z]?[A-Z]?\\)")) | |
400 | |
401 ;; Possible trailing suffixes: "\\(/\\(KT\\|A[AEG]\\|[R0-9]\\)\\)?" | |
402 ;; /KT == Temporary Technician (has CSC but not "real" license) | |
403 ;; /AA == Temporary Advanced | |
404 ;; /AE == Temporary Extra | |
405 ;; /AG == Temporary General | |
406 ;; /R == repeater | |
407 ;; /# == stations operating out of home district | |
408 ;; I don't include these in the regexp above because I can't imagine | |
409 ;; anyone putting them with their name in an e-mail address. | |
410 | |
411 ;; Matches normal single-part name | |
412 (defconst mail-extr-normal-name-pattern | |
413 (purecopy (format "\\b[%s][%s]+[%s]" | |
414 mail-extr-first-letters | |
415 mail-extr-all-letters-but-separators | |
416 mail-extr-last-letters))) | |
417 | |
418 ;; Matches a single word name. | |
419 ;; (defconst mail-extr-one-name-pattern | |
420 ;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'"))) | |
421 | |
422 ;; Matches normal two names with missing middle initial | |
423 ;; The first name is not allowed to have a hyphen because this can cause | |
424 ;; false matches where the "middle initial" is actually the first letter | |
425 ;; of the second part of the first name. | |
426 (defconst mail-extr-two-name-pattern | |
427 (purecopy | |
428 (concat "\\`\\(" mail-extr-normal-name-pattern | |
429 "\\|" mail-extr-initial-pattern | |
430 "\\) +\\(" mail-extr-name-pattern "\\)\\(,\\|\\'\\)"))) | |
431 | |
432 (defconst mail-extr-listserv-list-name-pattern | |
433 (purecopy "Multiple recipients of list \\([-A-Z]+\\)")) | |
434 | |
435 (defconst mail-extr-stupid-vms-date-stamp-pattern | |
436 (purecopy | |
437 "[0-9][0-9]-[JFMASOND][aepuco][nbrylgptvc]-[0-9][0-9][0-9][0-9] [0-9]+ *")) | |
438 | |
439 ;;; HZ -- GB (PRC Chinese character encoding) in ASCII embedding protocol | |
440 ;; | |
441 ;; In ASCII mode, a byte is interpreted as an ASCII character, unless a '~' is | |
442 ;; encountered. The character '~' is an escape character. By convention, it | |
443 ;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the | |
444 ;; following special meaning. | |
445 ;; | |
446 ;; o The escape sequence '~~' is interpreted as a '~'. | |
447 ;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB. | |
448 ;; o The escape sequence '~\n' is a line-continuation marker to be consumed | |
449 ;; with no output produced. | |
450 ;; | |
451 ;; In GB mode, characters are interpreted two bytes at a time as (pure) GB | |
452 ;; codes until the escape-from-GB code '~}' is read. This code switches the | |
453 ;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}' | |
454 ;; ($7E7D) is outside the defined GB range.) | |
455 (defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern | |
456 (purecopy "~{\\([^~].\\|~[^\}]\\)+~}")) | |
457 | |
458 ;; The leading optional lowercase letters are for a bastardized version of | |
459 ;; the encoding, as is the optional nature of the final slash. | |
460 (defconst mail-extr-x400-encoded-address-pattern | |
461 (purecopy "[a-z]?[a-z]?\\(/[A-Za-z]+\\(\\.[A-Za-z]+\\)?=[^/]+\\)+/?\\'")) | |
462 | |
463 (defconst mail-extr-x400-encoded-address-field-pattern-format | |
464 (purecopy "/%s=\\([^/]+\\)\\(/\\|\\'\\)")) | |
465 | |
466 (defconst mail-extr-x400-encoded-address-surname-pattern | |
467 ;; S stands for Surname (family name). | |
468 (purecopy | |
469 (format mail-extr-x400-encoded-address-field-pattern-format "[Ss]"))) | |
470 | |
471 (defconst mail-extr-x400-encoded-address-given-name-pattern | |
472 ;; G stands for Given name. | |
473 (purecopy | |
474 (format mail-extr-x400-encoded-address-field-pattern-format "[Gg]"))) | |
475 | |
476 (defconst mail-extr-x400-encoded-address-full-name-pattern | |
477 ;; PN stands for Personal Name. When used it represents the combination | |
478 ;; of the G and S fields. | |
479 ;; "The one system I used having this field asked it with the prompt | |
480 ;; `Personal Name'. But they mapped it into G and S on outgoing real | |
481 ;; X.400 addresses. As they mapped G and S into PN on incoming..." | |
482 (purecopy | |
483 (format mail-extr-x400-encoded-address-field-pattern-format "[Pp][Nn]"))) | |
484 | |
485 | |
486 | |
487 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
488 ;; | |
489 ;; Syntax tables used for quick parsing. | |
490 ;; | |
491 | |
492 (defconst mail-extr-address-syntax-table (make-syntax-table)) | |
493 (defconst mail-extr-address-comment-syntax-table (make-syntax-table)) | |
494 (defconst mail-extr-address-domain-literal-syntax-table (make-syntax-table)) | |
495 (defconst mail-extr-address-text-comment-syntax-table (make-syntax-table)) | |
496 (defconst mail-extr-address-text-syntax-table (make-syntax-table)) | |
497 (mapcar | |
498 (function | |
499 (lambda (pair) | |
500 (let ((syntax-table (symbol-value (car pair)))) | |
501 (mapcar | |
502 (function | |
503 (lambda (item) | |
504 (if (eq 2 (length item)) | |
505 ;; modifying syntax of a single character | |
506 (modify-syntax-entry (car item) (car (cdr item)) syntax-table) | |
507 ;; modifying syntax of a range of characters | |
508 (let ((char (nth 0 item)) | |
509 (bound (nth 1 item)) | |
510 (syntax (nth 2 item))) | |
511 (while (<= char bound) | |
512 (modify-syntax-entry char syntax syntax-table) | |
513 (setq char (1+ char))))))) | |
514 (cdr pair))))) | |
515 '((mail-extr-address-syntax-table | |
516 (?\000 ?\037 "w") ;control characters | |
517 (?\040 " ") ;SPC | |
518 (?! ?~ "w") ;printable characters | |
519 (?\177 "w") ;DEL | |
520 (?\200 ?\377 "w") ;high-bit-on characters | |
521 (?\240 " ") ;nobreakspace | |
522 (?\t " ") | |
523 (?\r " ") | |
524 (?\n " ") | |
525 (?\( ".") | |
526 (?\) ".") | |
527 (?< ".") | |
528 (?> ".") | |
529 (?@ ".") | |
530 (?, ".") | |
531 (?\; ".") | |
532 (?: ".") | |
533 (?\\ "\\") | |
534 (?\" "\"") | |
535 (?. ".") | |
536 (?\[ ".") | |
537 (?\] ".") | |
538 ;; % and ! aren't RFC822 characters, but it is convenient to pretend | |
539 (?% ".") | |
540 (?! ".") ;; this needs to be word-constituent when not in .UUCP mode | |
541 ) | |
542 (mail-extr-address-comment-syntax-table | |
543 (?\000 ?\377 "w") | |
544 (?\040 " ") | |
545 (?\240 " ") | |
546 (?\t " ") | |
547 (?\r " ") | |
548 (?\n " ") | |
549 (?\( "\(\)") | |
550 (?\) "\)\(") | |
551 (?\\ "\\")) | |
552 (mail-extr-address-domain-literal-syntax-table | |
553 (?\000 ?\377 "w") | |
554 (?\040 " ") | |
555 (?\240 " ") | |
556 (?\t " ") | |
557 (?\r " ") | |
558 (?\n " ") | |
559 (?\[ "\(\]") ;?????? | |
560 (?\] "\)\[") ;?????? | |
561 (?\\ "\\")) | |
562 (mail-extr-address-text-comment-syntax-table | |
563 (?\000 ?\377 "w") | |
564 (?\040 " ") | |
565 (?\240 " ") | |
566 (?\t " ") | |
567 (?\r " ") | |
568 (?\n " ") | |
569 (?\( "\(\)") | |
570 (?\) "\)\(") | |
571 (?\[ "\(\]") | |
572 (?\] "\)\[") | |
573 (?\{ "\(\}") | |
574 (?\} "\)\{") | |
575 (?\\ "\\") | |
576 (?\" "\"") | |
577 ;; (?\' "\)\`") | |
578 ;; (?\` "\(\'") | |
579 ) | |
580 (mail-extr-address-text-syntax-table | |
581 (?\000 ?\177 ".") | |
582 (?\200 ?\377 "w") | |
583 (?\040 " ") | |
584 (?\t " ") | |
585 (?\r " ") | |
586 (?\n " ") | |
587 (?A ?Z "w") | |
588 (?a ?z "w") | |
589 (?- "w") | |
590 (?\} "w") | |
591 (?\{ "w") | |
592 (?| "w") | |
593 (?\' "w") | |
594 (?~ "w") | |
595 (?0 ?9 "w")) | |
596 )) | |
597 | |
598 | |
599 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
600 ;; | |
601 ;; Utility functions and macros. | |
602 ;; | |
603 | |
604 (defmacro mail-extr-delete-char (n) | |
605 ;; in v19, delete-char is compiled as a function call, but delete-region | |
606 ;; is byte-coded, so it's much much faster. | |
607 (list 'delete-region '(point) (list '+ '(point) n))) | |
608 | |
609 (defmacro mail-extr-skip-whitespace-forward () | |
610 ;; v19 fn skip-syntax-forward is more tasteful, but not byte-coded. | |
611 '(skip-chars-forward " \t\n\r\240")) | |
612 | |
613 (defmacro mail-extr-skip-whitespace-backward () | |
614 ;; v19 fn skip-syntax-backward is more tasteful, but not byte-coded. | |
615 '(skip-chars-backward " \t\n\r\240")) | |
616 | |
617 | |
618 (defmacro mail-extr-undo-backslash-quoting (beg end) | |
619 (`(save-excursion | |
620 (save-restriction | |
621 (narrow-to-region (, beg) (, end)) | |
622 (goto-char (point-min)) | |
623 ;; undo \ quoting | |
624 (while (search-forward "\\" nil t) | |
625 (mail-extr-delete-char -1) | |
626 (or (eobp) | |
627 (forward-char 1)) | |
628 ))))) | |
629 | |
630 (defmacro mail-extr-nuke-char-at (pos) | |
631 (` (save-excursion | |
632 (goto-char (, pos)) | |
633 (mail-extr-delete-char 1) | |
634 (insert ?\ )))) | |
635 | |
636 (put 'mail-extr-nuke-outside-range | |
637 'edebug-form-spec '(symbolp &optional form form atom)) | |
638 | |
639 (defmacro mail-extr-nuke-outside-range (list-symbol | |
640 beg-symbol end-symbol | |
641 &optional no-replace) | |
642 ;; LIST-SYMBOL names a variable holding a list of buffer positions | |
643 ;; BEG-SYMBOL and END-SYMBOL name variables delimiting a range | |
644 ;; Each element of LIST-SYMBOL which lies outside of the range is | |
645 ;; deleted from the list. | |
646 ;; Unless NO-REPLACE is true, at each of the positions in LIST-SYMBOL | |
647 ;; which lie outside of the range, one character at that position is | |
648 ;; replaced with a SPC. | |
649 (or (memq no-replace '(t nil)) | |
650 (error "no-replace must be t or nil, evalable at macroexpand-time.")) | |
651 (` (let ((temp (, list-symbol)) | |
652 ch) | |
653 (while temp | |
654 (setq ch (car temp)) | |
655 (cond ((or (> ch (, end-symbol)) | |
656 (< ch (, beg-symbol))) | |
657 (,@ (if no-replace | |
658 nil | |
659 (` ((mail-extr-nuke-char-at ch))))) | |
660 (setcar temp nil))) | |
661 (setq temp (cdr temp))) | |
662 (setq (, list-symbol) (delq nil (, list-symbol)))))) | |
663 | |
664 (defun mail-extr-demarkerize (marker) | |
665 ;; if arg is a marker, destroys the marker, then returns the old value. | |
666 ;; otherwise returns the arg. | |
667 (if (markerp marker) | |
668 (let ((temp (marker-position marker))) | |
669 (set-marker marker nil) | |
670 temp) | |
671 marker)) | |
672 | |
673 (defun mail-extr-markerize (pos) | |
674 ;; coerces pos to a marker if non-nil. | |
675 (if (or (markerp pos) (null pos)) | |
676 pos | |
677 (copy-marker pos))) | |
678 | |
679 (defmacro mail-extr-last (list) | |
680 ;; Returns last element of LIST. | |
681 ;; Could be a subst. | |
682 (` (let ((list (, list))) | |
683 (while (not (null (cdr list))) | |
684 (setq list (cdr list))) | |
685 (car list)))) | |
686 | |
687 (defmacro mail-extr-safe-move-sexp (arg) | |
688 ;; Safely skip over one balanced sexp, if there is one. Return t if success. | |
689 (` (condition-case error | |
690 (progn | |
691 (goto-char (scan-sexps (point) (, arg))) | |
692 t) | |
693 (error | |
694 ;; #### kludge kludge kludge kludge kludge kludge kludge !!! | |
695 (if (string-equal (nth 1 error) "Unbalanced parentheses") | |
696 nil | |
697 (while t | |
698 (signal (car error) (cdr error)))))))) | |
699 | |
700 (or (fboundp 'buffer-disable-undo) ;; v18 compat | |
701 (fset 'buffer-disable-undo 'buffer-flush-undo)) | |
702 | |
703 | |
704 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
705 ;; | |
706 ;; The main function to grind addresses | |
707 ;; | |
708 | |
709 (defvar disable-initial-guessing-flag) ; dynamic assignment | |
710 (defvar cbeg) ; dynamic assignment | |
711 (defvar cend) ; dynamic assignment | |
712 | |
713 ;;;###autoload | |
714 (defun mail-extract-address-components (address) | |
715 "Given an RFC-822 ADDRESS, extract full name and canonical address. | |
716 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). | |
717 If no name can be extracted, FULL-NAME will be nil. | |
718 ADDRESS may be a string or a buffer. If it is a buffer, the visible | |
719 (narrowed) portion of the buffer will be interpreted as the address. | |
720 (This feature exists so that the clever caller might be able to avoid | |
721 consing a string.) | |
722 If ADDRESS contains more than one RFC-822 address, only the first is | |
723 returned. Some day this function may be extended to extract multiple | |
724 addresses, or perhaps return the position at which parsing stopped." | |
725 (let ((canonicalization-buffer (get-buffer-create " *canonical address*")) | |
726 (extraction-buffer (get-buffer-create " *extract address components*")) | |
727 char | |
728 ;; multiple-addresses | |
729 <-pos >-pos @-pos colon-pos comma-pos !-pos %-pos \;-pos | |
730 group-colon-pos group-\;-pos route-addr-colon-pos | |
731 record-pos-symbol | |
732 first-real-pos last-real-pos | |
733 phrase-beg phrase-end | |
734 cbeg cend ; dynamically set from -voodoo | |
735 quote-beg quote-end | |
736 atom-beg atom-end | |
737 mbox-beg mbox-end | |
738 \.-ends-name | |
739 temp | |
740 ;; name-suffix | |
741 fi mi li ; first, middle, last initial | |
742 saved-%-pos saved-!-pos saved-@-pos | |
743 domain-pos \.-pos insert-point | |
744 ;; mailbox-name-processed-flag | |
745 disable-initial-guessing-flag ; dynamically set from -voodoo | |
746 ) | |
747 | |
748 (save-excursion | |
749 (set-buffer extraction-buffer) | |
750 (fundamental-mode) | |
751 (kill-all-local-variables) | |
752 (buffer-disable-undo extraction-buffer) | |
753 (set-syntax-table mail-extr-address-syntax-table) | |
754 (widen) | |
755 (erase-buffer) | |
756 (setq case-fold-search nil) | |
757 | |
758 ;; Insert extra space at beginning to allow later replacement with < | |
759 ;; without having to move markers. | |
760 (insert ?\ ) | |
761 | |
762 ;; Insert the address itself. | |
763 (cond ((stringp address) | |
764 (insert address)) | |
765 ((bufferp address) | |
766 (insert-buffer-substring address)) | |
767 (t | |
768 (error "Illegal address: %s" address))) | |
769 | |
770 ;; stolen from rfc822.el | |
771 ;; Unfold multiple lines. | |
772 (goto-char (point-min)) | |
773 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t) | |
774 (replace-match "\\1 " t)) | |
775 | |
776 ;; first pass grabs useful information about address | |
777 (goto-char (point-min)) | |
778 (while (progn | |
779 (mail-extr-skip-whitespace-forward) | |
780 (not (eobp))) | |
781 (setq char (char-after (point))) | |
782 (or first-real-pos | |
783 (if (not (eq char ?\()) | |
784 (setq first-real-pos (point)))) | |
785 (cond | |
786 ;; comment | |
787 ((eq char ?\() | |
788 (set-syntax-table mail-extr-address-comment-syntax-table) | |
789 ;; only record the first non-empty comment's position | |
790 (if (and (not cbeg) | |
791 (save-excursion | |
792 (forward-char 1) | |
793 (mail-extr-skip-whitespace-forward) | |
794 (not (eq ?\) (char-after (point)))))) | |
795 (setq cbeg (point))) | |
796 ;; TODO: don't record if unbalanced | |
797 (or (mail-extr-safe-move-sexp 1) | |
798 (forward-char 1)) | |
799 (set-syntax-table mail-extr-address-syntax-table) | |
800 (if (and cbeg | |
801 (not cend)) | |
802 (setq cend (point)))) | |
803 ;; quoted text | |
804 ((eq char ?\") | |
805 ;; only record the first non-empty quote's position | |
806 (if (and (not quote-beg) | |
807 (save-excursion | |
808 (forward-char 1) | |
809 (mail-extr-skip-whitespace-forward) | |
810 (not (eq ?\" (char-after (point)))))) | |
811 (setq quote-beg (point))) | |
812 ;; TODO: don't record if unbalanced | |
813 (or (mail-extr-safe-move-sexp 1) | |
814 (forward-char 1)) | |
815 (if (and quote-beg | |
816 (not quote-end)) | |
817 (setq quote-end (point)))) | |
818 ;; domain literals | |
819 ((eq char ?\[) | |
820 (set-syntax-table mail-extr-address-domain-literal-syntax-table) | |
821 (or (mail-extr-safe-move-sexp 1) | |
822 (forward-char 1)) | |
823 (set-syntax-table mail-extr-address-syntax-table)) | |
824 ;; commas delimit addresses when outside < > pairs. | |
825 ((and (eq char ?,) | |
826 (or (and (null <-pos) | |
827 ;; Handle ROUTE-ADDR address that is missing its <. | |
828 (not (eq ?@ (char-after (1+ (point)))))) | |
829 (and >-pos | |
830 ;; handle weird munged addresses | |
831 ;; BUG FIX: This test was reversed. Thanks to the | |
832 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au> | |
833 ;; for discovering this! | |
834 (< (mail-extr-last <-pos) (car >-pos))))) | |
835 ;; It'd be great if some day this worked, but for now, punt. | |
836 ;; (setq multiple-addresses t) | |
837 ;; ;; *** Why do I want this: | |
838 ;; (mail-extr-delete-char 1) | |
839 ;; (narrow-to-region (point-min) (point)) | |
840 (delete-region (point) (point-max)) | |
841 (setq char ?\() ; HAVE I NO SHAME?? | |
842 ) | |
843 ;; record the position of various interesting chars, determine | |
844 ;; legality later. | |
845 ((setq record-pos-symbol | |
846 (cdr (assq char | |
847 '((?< . <-pos) (?> . >-pos) (?@ . @-pos) | |
848 (?: . colon-pos) (?, . comma-pos) (?! . !-pos) | |
849 (?% . %-pos) (?\; . \;-pos))))) | |
850 (set record-pos-symbol | |
851 (cons (point) (symbol-value record-pos-symbol))) | |
852 (forward-char 1)) | |
853 ((eq char ?.) | |
854 (forward-char 1)) | |
855 ((memq char '( | |
856 ;; comment terminator illegal | |
857 ?\) | |
858 ;; domain literal terminator illegal | |
859 ?\] | |
860 ;; \ allowed only within quoted strings, | |
861 ;; domain literals, and comments | |
862 ?\\ | |
863 )) | |
864 (mail-extr-nuke-char-at (point)) | |
865 (forward-char 1)) | |
866 (t | |
867 (forward-word 1))) | |
868 (or (eq char ?\() | |
869 ;; At the end of first address of a multiple address header. | |
870 (and (eq char ?,) | |
871 (eobp)) | |
872 (setq last-real-pos (point)))) | |
873 | |
874 ;; Use only the leftmost <, if any. Replace all others with spaces. | |
875 (while (cdr <-pos) | |
876 (mail-extr-nuke-char-at (car <-pos)) | |
877 (setq <-pos (cdr <-pos))) | |
878 | |
879 ;; Use only the rightmost >, if any. Replace all others with spaces. | |
880 (while (cdr >-pos) | |
881 (mail-extr-nuke-char-at (nth 1 >-pos)) | |
882 (setcdr >-pos (nthcdr 2 >-pos))) | |
883 | |
884 ;; If multiple @s and a :, but no < and >, insert around buffer. | |
885 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc | |
886 ;; This commonly happens on the UUCP "From " line. Ugh. | |
887 (cond ((and (> (length @-pos) 1) | |
888 (eq 1 (length colon-pos)) ;TODO: check if between last two @s | |
889 (not \;-pos) | |
890 (not <-pos)) | |
891 (goto-char (point-min)) | |
892 (mail-extr-delete-char 1) | |
893 (setq <-pos (list (point))) | |
894 (insert ?<))) | |
895 | |
896 ;; If < but no >, insert > in rightmost possible position | |
897 (cond ((and <-pos | |
898 (null >-pos)) | |
899 (goto-char (point-max)) | |
900 (setq >-pos (list (point))) | |
901 (insert ?>))) | |
902 | |
903 ;; If > but no <, replace > with space. | |
904 (cond ((and >-pos | |
905 (null <-pos)) | |
906 (mail-extr-nuke-char-at (car >-pos)) | |
907 (setq >-pos nil))) | |
908 | |
909 ;; Turn >-pos and <-pos into non-lists | |
910 (setq >-pos (car >-pos) | |
911 <-pos (car <-pos)) | |
912 | |
913 ;; Trim other punctuation lists of items outside < > pair to handle | |
914 ;; stupid MTAs. | |
915 (cond (<-pos ; don't need to check >-pos also | |
916 ;; handle bozo software that violates RFC 822 by sticking | |
917 ;; punctuation marks outside of a < > pair | |
918 (mail-extr-nuke-outside-range @-pos <-pos >-pos t) | |
919 ;; RFC 822 says nothing about these two outside < >, but | |
920 ;; remove those positions from the lists to make things | |
921 ;; easier. | |
922 (mail-extr-nuke-outside-range !-pos <-pos >-pos t) | |
923 (mail-extr-nuke-outside-range %-pos <-pos >-pos t))) | |
924 | |
925 ;; Check for : that indicates GROUP list and for : part of | |
926 ;; ROUTE-ADDR spec. | |
927 ;; Can't possibly be more than two :. Nuke any extra. | |
928 (while colon-pos | |
929 (setq temp (car colon-pos) | |
930 colon-pos (cdr colon-pos)) | |
931 (cond ((and <-pos >-pos | |
932 (> temp <-pos) | |
933 (< temp >-pos)) | |
934 (if (or route-addr-colon-pos | |
935 (< (length @-pos) 2) | |
936 (> temp (car @-pos)) | |
937 (< temp (nth 1 @-pos))) | |
938 (mail-extr-nuke-char-at temp) | |
939 (setq route-addr-colon-pos temp))) | |
940 ((or (not <-pos) | |
941 (and <-pos | |
942 (< temp <-pos))) | |
943 (setq group-colon-pos temp)))) | |
944 | |
945 ;; Nuke any ; that is in or to the left of a < > pair or to the left | |
946 ;; of a GROUP starting :. Also, there may only be one ;. | |
947 (while \;-pos | |
948 (setq temp (car \;-pos) | |
949 \;-pos (cdr \;-pos)) | |
950 (cond ((and <-pos >-pos | |
951 (> temp <-pos) | |
952 (< temp >-pos)) | |
953 (mail-extr-nuke-char-at temp)) | |
954 ((and (or (not group-colon-pos) | |
955 (> temp group-colon-pos)) | |
956 (not group-\;-pos)) | |
957 (setq group-\;-pos temp)))) | |
958 | |
959 ;; Nuke unmatched GROUP syntax characters. | |
960 (cond ((and group-colon-pos (not group-\;-pos)) | |
961 ;; *** Do I really need to erase it? | |
962 (mail-extr-nuke-char-at group-colon-pos) | |
963 (setq group-colon-pos nil))) | |
964 (cond ((and group-\;-pos (not group-colon-pos)) | |
965 ;; *** Do I really need to erase it? | |
966 (mail-extr-nuke-char-at group-\;-pos) | |
967 (setq group-\;-pos nil))) | |
968 | |
969 ;; Handle junk like ";@host.company.dom" that sendmail adds. | |
970 ;; **** should I remember comment positions? | |
971 (cond | |
972 (group-\;-pos | |
973 ;; this is fine for now | |
974 (mail-extr-nuke-outside-range !-pos group-colon-pos group-\;-pos t) | |
975 (mail-extr-nuke-outside-range @-pos group-colon-pos group-\;-pos t) | |
976 (mail-extr-nuke-outside-range %-pos group-colon-pos group-\;-pos t) | |
977 (mail-extr-nuke-outside-range comma-pos group-colon-pos group-\;-pos t) | |
978 (and last-real-pos | |
979 (> last-real-pos (1+ group-\;-pos)) | |
980 (setq last-real-pos (1+ group-\;-pos))) | |
981 ;; *** This may be wrong: | |
982 (and cend | |
983 (> cend group-\;-pos) | |
984 (setq cend nil | |
985 cbeg nil)) | |
986 (and quote-end | |
987 (> quote-end group-\;-pos) | |
988 (setq quote-end nil | |
989 quote-beg nil)) | |
990 ;; This was both wrong and unnecessary: | |
991 ;;(narrow-to-region (point-min) group-\;-pos) | |
992 | |
993 ;; *** The entire handling of GROUP addresses seems rather lame. | |
994 ;; *** It deserves a complete rethink, except that these addresses | |
995 ;; *** are hardly ever seen. | |
996 )) | |
997 | |
998 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any | |
999 ;; others. | |
1000 ;; Hell, go ahead an nuke all of the commas. | |
1001 ;; **** This will cause problems when we start handling commas in | |
1002 ;; the PHRASE part .... no it won't ... yes it will ... ????? | |
1003 (mail-extr-nuke-outside-range comma-pos 1 1) | |
1004 | |
1005 ;; can only have multiple @s inside < >. The fact that some MTAs | |
1006 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is | |
1007 ;; handled above. | |
1008 | |
1009 ;; Locate PHRASE part of ROUTE-ADDR. | |
1010 (cond (<-pos | |
1011 (goto-char <-pos) | |
1012 (mail-extr-skip-whitespace-backward) | |
1013 (setq phrase-end (point)) | |
1014 (goto-char (or ;;group-colon-pos | |
1015 (point-min))) | |
1016 (mail-extr-skip-whitespace-forward) | |
1017 (if (< (point) phrase-end) | |
1018 (setq phrase-beg (point)) | |
1019 (setq phrase-end nil)))) | |
1020 | |
1021 ;; handle ROUTE-ADDRS with real ROUTEs. | |
1022 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and | |
1023 ;; any % or ! must be semantically meaningless. | |
1024 ;; TODO: do this processing into canonicalization buffer | |
1025 (cond (route-addr-colon-pos | |
1026 (setq !-pos nil | |
1027 %-pos nil | |
1028 >-pos (copy-marker >-pos) | |
1029 route-addr-colon-pos (copy-marker route-addr-colon-pos)) | |
1030 (goto-char >-pos) | |
1031 (insert-before-markers ?X) | |
1032 (goto-char (car @-pos)) | |
1033 (while (setq @-pos (cdr @-pos)) | |
1034 (mail-extr-delete-char 1) | |
1035 (setq %-pos (cons (point-marker) %-pos)) | |
1036 (insert "%") | |
1037 (goto-char (1- >-pos)) | |
1038 (save-excursion | |
1039 (insert-buffer-substring extraction-buffer | |
1040 (car @-pos) route-addr-colon-pos) | |
1041 (delete-region (car @-pos) route-addr-colon-pos)) | |
1042 (or (cdr @-pos) | |
1043 (setq saved-@-pos (list (point))))) | |
1044 (setq @-pos saved-@-pos) | |
1045 (goto-char >-pos) | |
1046 (mail-extr-delete-char -1) | |
1047 (mail-extr-nuke-char-at route-addr-colon-pos) | |
1048 (mail-extr-demarkerize route-addr-colon-pos) | |
1049 (setq route-addr-colon-pos nil | |
1050 >-pos (mail-extr-demarkerize >-pos) | |
1051 %-pos (mapcar 'mail-extr-demarkerize %-pos)))) | |
1052 | |
1053 ;; de-listify @-pos | |
1054 (setq @-pos (car @-pos)) | |
1055 | |
1056 ;; TODO: remove comments in the middle of an address | |
1057 | |
1058 (set-buffer canonicalization-buffer) | |
1059 (fundamental-mode) | |
1060 (kill-all-local-variables) | |
1061 (buffer-disable-undo canonicalization-buffer) | |
1062 (set-syntax-table mail-extr-address-syntax-table) | |
1063 (setq case-fold-search nil) | |
1064 | |
1065 (widen) | |
1066 (erase-buffer) | |
1067 (insert-buffer-substring extraction-buffer) | |
1068 | |
1069 (if <-pos | |
1070 (narrow-to-region (progn | |
1071 (goto-char (1+ <-pos)) | |
1072 (mail-extr-skip-whitespace-forward) | |
1073 (point)) | |
1074 >-pos) | |
1075 (if (and first-real-pos last-real-pos) | |
1076 (narrow-to-region first-real-pos last-real-pos) | |
1077 ;; ****** Oh no! What if the address is completely empty! | |
1078 ;; *** Is this correct? | |
1079 (narrow-to-region (point-max) (point-max)) | |
1080 )) | |
1081 | |
1082 (and @-pos %-pos | |
1083 (mail-extr-nuke-outside-range %-pos (point-min) @-pos)) | |
1084 (and %-pos !-pos | |
1085 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos))) | |
1086 (and @-pos !-pos (not %-pos) | |
1087 (mail-extr-nuke-outside-range !-pos (point-min) @-pos)) | |
1088 | |
1089 ;; Error condition:?? (and %-pos (not @-pos)) | |
1090 | |
1091 ;; WARNING: THIS CODE IS DUPLICATED BELOW. | |
1092 (cond ((and %-pos | |
1093 (not @-pos)) | |
1094 (goto-char (car %-pos)) | |
1095 (mail-extr-delete-char 1) | |
1096 (setq @-pos (point)) | |
1097 (insert "@") | |
1098 (setq %-pos (cdr %-pos)))) | |
1099 | |
1100 (if mail-extr-mangle-uucp | |
1101 (cond (!-pos | |
1102 ;; **** I don't understand this save-restriction and the | |
1103 ;; narrow-to-region inside it. Why did I do that? | |
1104 (save-restriction | |
1105 (cond ((and @-pos | |
1106 mail-extr-@-binds-tighter-than-!) | |
1107 (goto-char @-pos) | |
1108 (setq %-pos (cons (point) %-pos) | |
1109 @-pos nil) | |
1110 (mail-extr-delete-char 1) | |
1111 (insert "%") | |
1112 (setq insert-point (point-max))) | |
1113 (mail-extr-@-binds-tighter-than-! | |
1114 (setq insert-point (point-max))) | |
1115 (%-pos | |
1116 (setq insert-point (mail-extr-last %-pos) | |
1117 saved-%-pos (mapcar 'mail-extr-markerize %-pos) | |
1118 %-pos nil | |
1119 @-pos (mail-extr-markerize @-pos))) | |
1120 (@-pos | |
1121 (setq insert-point @-pos) | |
1122 (setq @-pos (mail-extr-markerize @-pos))) | |
1123 (t | |
1124 (setq insert-point (point-max)))) | |
1125 (narrow-to-region (point-min) insert-point) | |
1126 (setq saved-!-pos (car !-pos)) | |
1127 (while !-pos | |
1128 (goto-char (point-max)) | |
1129 (cond ((and (not @-pos) | |
1130 (not (cdr !-pos))) | |
1131 (setq @-pos (point)) | |
1132 (insert-before-markers "@ ")) | |
1133 (t | |
1134 (setq %-pos (cons (point) %-pos)) | |
1135 (insert-before-markers "% "))) | |
1136 (backward-char 1) | |
1137 (insert-buffer-substring | |
1138 (current-buffer) | |
1139 (if (nth 1 !-pos) | |
1140 (1+ (nth 1 !-pos)) | |
1141 (point-min)) | |
1142 (car !-pos)) | |
1143 (mail-extr-delete-char 1) | |
1144 (or (save-excursion | |
1145 (mail-extr-safe-move-sexp -1) | |
1146 (mail-extr-skip-whitespace-backward) | |
1147 (eq ?. (preceding-char))) | |
1148 (insert-before-markers | |
1149 (if (save-excursion | |
1150 (mail-extr-skip-whitespace-backward) | |
1151 (eq ?. (preceding-char))) | |
1152 "" | |
1153 ".") | |
1154 "uucp")) | |
1155 (setq !-pos (cdr !-pos)))) | |
1156 (and saved-%-pos | |
1157 (setq %-pos (append (mapcar 'mail-extr-demarkerize | |
1158 saved-%-pos) | |
1159 %-pos))) | |
1160 (setq @-pos (mail-extr-demarkerize @-pos)) | |
1161 (narrow-to-region (1+ saved-!-pos) (point-max))))) | |
1162 | |
1163 ;; WARNING: THIS CODE IS DUPLICATED ABOVE. | |
1164 (cond ((and %-pos | |
1165 (not @-pos)) | |
1166 (goto-char (car %-pos)) | |
1167 (mail-extr-delete-char 1) | |
1168 (setq @-pos (point)) | |
1169 (insert "@") | |
1170 (setq %-pos (cdr %-pos)))) | |
1171 | |
1172 (setq %-pos (nreverse %-pos)) | |
1173 ;; RFC 1034 doesn't approve of this, oh well: | |
1174 (downcase-region (or (car %-pos) @-pos (point-max)) (point-max)) | |
1175 (cond (%-pos ; implies @-pos valid | |
1176 (setq temp %-pos) | |
1177 (catch 'truncated | |
1178 (while temp | |
1179 (goto-char (or (nth 1 temp) | |
1180 @-pos)) | |
1181 (mail-extr-skip-whitespace-backward) | |
1182 (save-excursion | |
1183 (mail-extr-safe-move-sexp -1) | |
1184 (setq domain-pos (point)) | |
1185 (mail-extr-skip-whitespace-backward) | |
1186 (setq \.-pos (eq ?. (preceding-char)))) | |
1187 (cond ((and \.-pos | |
1188 ;; #### string consing | |
1189 (let ((s (intern-soft | |
1190 (buffer-substring domain-pos (point)) | |
1191 all-top-level-domains))) | |
1192 (and s (get s 'domain-name)))) | |
1193 (narrow-to-region (point-min) (point)) | |
1194 (goto-char (car temp)) | |
1195 (mail-extr-delete-char 1) | |
1196 (setq @-pos (point)) | |
1197 (setcdr temp nil) | |
1198 (setq %-pos (delq @-pos %-pos)) | |
1199 (insert "@") | |
1200 (throw 'truncated t))) | |
1201 (setq temp (cdr temp)))))) | |
1202 (setq mbox-beg (point-min) | |
1203 mbox-end (if %-pos (car %-pos) | |
1204 (or @-pos | |
1205 (point-max)))) | |
1206 | |
1207 ;; Done canonicalizing address. | |
1208 | |
1209 (set-buffer extraction-buffer) | |
1210 | |
1211 ;; Decide what part of the address to search to find the full name. | |
1212 (cond ( | |
1213 ;; Example: "First M. Last" <fml@foo.bar.dom> | |
1214 (and phrase-beg | |
1215 (eq quote-beg phrase-beg) | |
1216 (<= quote-end phrase-end)) | |
1217 (narrow-to-region (1+ quote-beg) (1- quote-end)) | |
1218 (mail-extr-undo-backslash-quoting (point-min) (point-max))) | |
1219 | |
1220 ;; Example: First Last <fml@foo.bar.dom> | |
1221 (phrase-beg | |
1222 (narrow-to-region phrase-beg phrase-end)) | |
1223 | |
1224 ;; Example: fml@foo.bar.dom (First M. Last) | |
1225 (cbeg | |
1226 (narrow-to-region (1+ cbeg) (1- cend)) | |
1227 (mail-extr-undo-backslash-quoting (point-min) (point-max)) | |
1228 | |
1229 ;; Deal with spacing problems | |
1230 (goto-char (point-min)) | |
1231 ; (cond ((not (search-forward " " nil t)) | |
1232 ; (goto-char (point-min)) | |
1233 ; (cond ((search-forward "_" nil t) | |
1234 ; ;; Handle the *idiotic* use of underlines as spaces. | |
1235 ; ;; Example: fml@foo.bar.dom (First_M._Last) | |
1236 ; (goto-char (point-min)) | |
1237 ; (while (search-forward "_" nil t) | |
1238 ; (replace-match " " t))) | |
1239 ; ((search-forward "." nil t) | |
1240 ; ;; Fix . used as space | |
1241 ; ;; Example: danj1@cb.att.com (daniel.jacobson) | |
1242 ; (goto-char (point-min)) | |
1243 ; (while (re-search-forward mail-extr-bad-dot-pattern nil t) | |
1244 ; (replace-match "\\1 \\2" t)))))) | |
1245 ) | |
1246 | |
1247 ;; Otherwise we try to get the name from the mailbox portion | |
1248 ;; of the address. | |
1249 ;; Example: First_M_Last@foo.bar.dom | |
1250 (t | |
1251 ;; *** Work in canon buffer instead? No, can't. Hmm. | |
1252 (goto-char (point-max)) | |
1253 (narrow-to-region (point) (point)) | |
1254 (insert-buffer-substring canonicalization-buffer | |
1255 mbox-beg mbox-end) | |
1256 (goto-char (point-min)) | |
1257 | |
1258 ;; Example: First_Last.XXX@foo.bar.dom | |
1259 (setq \.-ends-name (re-search-forward "[_0-9]" nil t)) | |
1260 | |
1261 (goto-char (point-min)) | |
1262 | |
1263 (if (not mail-extr-mangle-uucp) | |
1264 (modify-syntax-entry ?! "w" (syntax-table))) | |
1265 | |
1266 (while (progn | |
1267 (mail-extr-skip-whitespace-forward) | |
1268 (not (eobp))) | |
1269 (setq char (char-after (point))) | |
1270 (cond | |
1271 ((eq char ?\") | |
1272 (setq quote-beg (point)) | |
1273 (or (mail-extr-safe-move-sexp 1) | |
1274 ;; TODO: handle this error condition!!!!! | |
1275 (forward-char 1)) | |
1276 ;; take into account deletions | |
1277 (setq quote-end (- (point) 2)) | |
1278 (save-excursion | |
1279 (backward-char 1) | |
1280 (mail-extr-delete-char 1) | |
1281 (goto-char quote-beg) | |
1282 (mail-extr-delete-char 1)) | |
1283 (mail-extr-undo-backslash-quoting quote-beg quote-end) | |
1284 (or (eq ?\ (char-after (point))) | |
1285 (insert " ")) | |
1286 ;; (setq mailbox-name-processed-flag t) | |
1287 (setq \.-ends-name t)) | |
1288 ((eq char ?.) | |
1289 (if (memq (char-after (1+ (point))) '(?_ ?=)) | |
1290 (progn | |
1291 (forward-char 1) | |
1292 (mail-extr-delete-char 1) | |
1293 (insert ?\ )) | |
1294 (if \.-ends-name | |
1295 (narrow-to-region (point-min) (point)) | |
1296 (mail-extr-delete-char 1) | |
1297 (insert " "))) | |
1298 ;; (setq mailbox-name-processed-flag t) | |
1299 ) | |
1300 ((memq (char-syntax char) '(?. ?\\)) | |
1301 (mail-extr-delete-char 1) | |
1302 (insert " ") | |
1303 ;; (setq mailbox-name-processed-flag t) | |
1304 ) | |
1305 (t | |
1306 (setq atom-beg (point)) | |
1307 (forward-word 1) | |
1308 (setq atom-end (point)) | |
1309 (goto-char atom-beg) | |
1310 (save-restriction | |
1311 (narrow-to-region atom-beg atom-end) | |
1312 (cond | |
1313 | |
1314 ;; Handle X.400 addresses encoded in RFC-822. | |
1315 ;; *** Shit! This has to handle the case where it is | |
1316 ;; *** embedded in a quote too! | |
1317 ;; *** Shit! The input is being broken up into atoms | |
1318 ;; *** by periods! | |
1319 ((looking-at mail-extr-x400-encoded-address-pattern) | |
1320 | |
1321 ;; Copy the contents of the individual fields that | |
1322 ;; might hold name data to the beginning. | |
1323 (mapcar | |
1324 (function | |
1325 (lambda (field-pattern) | |
1326 (cond | |
1327 ((save-excursion | |
1328 (re-search-forward field-pattern nil t)) | |
1329 (insert-buffer-substring (current-buffer) | |
1330 (match-beginning 1) | |
1331 (match-end 1)) | |
1332 (insert " "))))) | |
1333 (list mail-extr-x400-encoded-address-given-name-pattern | |
1334 mail-extr-x400-encoded-address-surname-pattern | |
1335 mail-extr-x400-encoded-address-full-name-pattern)) | |
1336 | |
1337 ;; Discard the rest, since it contains stuff like | |
1338 ;; routing information, not part of a name. | |
1339 (mail-extr-skip-whitespace-backward) | |
1340 (delete-region (point) (point-max)) | |
1341 | |
1342 ;; Handle periods used for spacing. | |
1343 (while (re-search-forward mail-extr-bad-dot-pattern nil t) | |
1344 (replace-match "\\1 \\2" t)) | |
1345 | |
1346 ;; (setq mailbox-name-processed-flag t) | |
1347 ) | |
1348 | |
1349 ;; Handle normal addresses. | |
1350 (t | |
1351 (goto-char (point-min)) | |
1352 ;; Handle _ and = used for spacing. | |
1353 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t) | |
1354 (replace-match "\\1 " t) | |
1355 ;; (setq mailbox-name-processed-flag t) | |
1356 ) | |
1357 (goto-char (point-max)))))))) | |
1358 | |
1359 ;; undo the dirty deed | |
1360 (if (not mail-extr-mangle-uucp) | |
1361 (modify-syntax-entry ?! "." (syntax-table))) | |
1362 ;; | |
1363 ;; If we derived the name from the mailbox part of the address, | |
1364 ;; and we only got one word out of it, don't treat that as a | |
1365 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar") | |
1366 ;; (if (not mailbox-name-processed-flag) | |
1367 ;; (delete-region (point-min) (point-max))) | |
1368 )) | |
1369 | |
1370 (set-syntax-table mail-extr-address-text-syntax-table) | |
1371 | |
1372 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer) | |
1373 (goto-char (point-min)) | |
1374 | |
1375 ;; If name is "First Last" and userid is "F?L", then assume | |
1376 ;; the middle initial is the second letter in the userid. | |
1377 ;; Initial code by Jamie Zawinski <jwz@netscape.com> | |
1378 ;; *** Make it work when there's a suffix as well. | |
1379 (goto-char (point-min)) | |
1380 (cond ((and mail-extr-guess-middle-initial | |
1381 (not disable-initial-guessing-flag) | |
1382 (eq 3 (- mbox-end mbox-beg)) | |
1383 (progn | |
1384 (goto-char (point-min)) | |
1385 (looking-at mail-extr-two-name-pattern))) | |
1386 (setq fi (char-after (match-beginning 0)) | |
1387 li (char-after (match-beginning 3))) | |
1388 (save-excursion | |
1389 (set-buffer canonicalization-buffer) | |
1390 ;; char-equal is ignoring case here, so no need to upcase | |
1391 ;; or downcase. | |
1392 (let ((case-fold-search t)) | |
1393 (and (char-equal fi (char-after mbox-beg)) | |
1394 (char-equal li (char-after (1- mbox-end))) | |
1395 (setq mi (char-after (1+ mbox-beg)))))) | |
1396 (cond ((and mi | |
1397 ;; TODO: use better table than syntax table | |
1398 (eq ?w (char-syntax mi))) | |
1399 (goto-char (match-beginning 3)) | |
1400 (insert (upcase mi) ". "))))) | |
1401 | |
1402 ;; Nuke name if it is the same as mailbox name. | |
1403 (let ((buffer-length (- (point-max) (point-min))) | |
1404 (i 0) | |
1405 (names-match-flag t)) | |
1406 (cond ((and (> buffer-length 0) | |
1407 (eq buffer-length (- mbox-end mbox-beg))) | |
1408 (goto-char (point-max)) | |
1409 (insert-buffer-substring canonicalization-buffer | |
1410 mbox-beg mbox-end) | |
1411 (while (and names-match-flag | |
1412 (< i buffer-length)) | |
1413 (or (eq (downcase (char-after (+ i (point-min)))) | |
1414 (downcase | |
1415 (char-after (+ i buffer-length (point-min))))) | |
1416 (setq names-match-flag nil)) | |
1417 (setq i (1+ i))) | |
1418 (delete-region (+ (point-min) buffer-length) (point-max)) | |
1419 (if names-match-flag | |
1420 (narrow-to-region (point) (point)))))) | |
1421 | |
1422 ;; Nuke name if it's just one word. | |
1423 (goto-char (point-min)) | |
1424 (and mail-extr-ignore-single-names | |
1425 (not (re-search-forward "[- ]" nil t)) | |
1426 (narrow-to-region (point) (point))) | |
1427 | |
1428 ;; Result | |
1429 (list (if (not (= (point-min) (point-max))) | |
1430 (buffer-string)) | |
1431 (progn | |
1432 (set-buffer canonicalization-buffer) | |
1433 (if (not (= (point-min) (point-max))) | |
1434 (buffer-string)))) | |
1435 ))) | |
1436 | |
1437 (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer) | |
1438 (let ((word-count 0) | |
1439 (case-fold-search nil) | |
1440 mixed-case-flag lower-case-flag ;;upper-case-flag | |
1441 suffix-flag last-name-comma-flag | |
1442 ;;cbeg cend | |
1443 initial | |
1444 begin-again-flag | |
1445 drop-this-word-if-trailing-flag | |
1446 drop-last-word-if-trailing-flag | |
1447 word-found-flag | |
1448 this-word-beg last-word-beg | |
1449 name-beg name-end | |
1450 name-done-flag | |
1451 ) | |
1452 (save-excursion | |
1453 (set-syntax-table mail-extr-address-text-syntax-table) | |
1454 | |
1455 ;; This was moved above. | |
1456 ;; Fix . used as space | |
1457 ;; But it belongs here because it occurs not only as | |
1458 ;; rypens@reks.uia.ac.be (Piet.Rypens) | |
1459 ;; but also as | |
1460 ;; "Piet.Rypens" <rypens@reks.uia.ac.be> | |
1461 ;;(goto-char (point-min)) | |
1462 ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t) | |
1463 ;; (replace-match "\\1 \\2" t)) | |
1464 | |
1465 (cond ((not (search-forward " " nil t)) | |
1466 (goto-char (point-min)) | |
1467 (cond ((search-forward "_" nil t) | |
1468 ;; Handle the *idiotic* use of underlines as spaces. | |
1469 ;; Example: fml@foo.bar.dom (First_M._Last) | |
1470 (goto-char (point-min)) | |
1471 (while (search-forward "_" nil t) | |
1472 (replace-match " " t))) | |
1473 ((search-forward "." nil t) | |
1474 ;; Fix . used as space | |
1475 ;; Example: danj1@cb.att.com (daniel.jacobson) | |
1476 (goto-char (point-min)) | |
1477 (while (re-search-forward mail-extr-bad-dot-pattern nil t) | |
1478 (replace-match "\\1 \\2" t)))))) | |
1479 | |
1480 | |
1481 ;; Loop over the words (and other junk) in the name. | |
1482 (goto-char (point-min)) | |
1483 (while (not name-done-flag) | |
1484 | |
1485 (cond (word-found-flag | |
1486 ;; Last time through this loop we skipped over a word. | |
1487 (setq last-word-beg this-word-beg) | |
1488 (setq drop-last-word-if-trailing-flag | |
1489 drop-this-word-if-trailing-flag) | |
1490 (setq word-found-flag nil))) | |
1491 | |
1492 (cond (begin-again-flag | |
1493 ;; Last time through the loop we found something that | |
1494 ;; indicates we should pretend we are beginning again from | |
1495 ;; the start. | |
1496 (setq word-count 0) | |
1497 (setq last-word-beg nil) | |
1498 (setq drop-last-word-if-trailing-flag nil) | |
1499 (setq mixed-case-flag nil) | |
1500 (setq lower-case-flag nil) | |
1501 ;; (setq upper-case-flag nil) | |
1502 (setq begin-again-flag nil) | |
1503 )) | |
1504 | |
1505 ;; Initialize for this iteration of the loop. | |
1506 (mail-extr-skip-whitespace-forward) | |
1507 (if (eq word-count 0) (narrow-to-region (point) (point-max))) | |
1508 (setq this-word-beg (point)) | |
1509 (setq drop-this-word-if-trailing-flag nil) | |
1510 | |
1511 ;; Decide what to do based on what we are looking at. | |
1512 (cond | |
1513 | |
1514 ;; Delete title | |
1515 ((and (eq word-count 0) | |
1516 (looking-at mail-extr-full-name-prefixes)) | |
1517 (goto-char (match-end 0)) | |
1518 (narrow-to-region (point) (point-max))) | |
1519 | |
1520 ;; Stop after name suffix | |
1521 ((and (>= word-count 2) | |
1522 (looking-at mail-extr-full-name-suffix-pattern)) | |
1523 (mail-extr-skip-whitespace-backward) | |
1524 (setq suffix-flag (point)) | |
1525 (if (eq ?, (following-char)) | |
1526 (forward-char 1) | |
1527 (insert ?,)) | |
1528 ;; Enforce at least one space after comma | |
1529 (or (eq ?\ (following-char)) | |
1530 (insert ?\ )) | |
1531 (mail-extr-skip-whitespace-forward) | |
1532 (cond ((memq (following-char) '(?j ?J ?s ?S)) | |
1533 (capitalize-word 1) | |
1534 (if (eq (following-char) ?.) | |
1535 (forward-char 1) | |
1536 (insert ?.))) | |
1537 (t | |
1538 (upcase-word 1))) | |
1539 (setq word-found-flag t) | |
1540 (setq name-done-flag t)) | |
1541 | |
1542 ;; Handle SCA names | |
1543 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As" | |
1544 (goto-char (match-beginning 1)) | |
1545 (narrow-to-region (point) (point-max)) | |
1546 (setq begin-again-flag t)) | |
1547 | |
1548 ;; Check for initial last name followed by comma | |
1549 ((and (eq ?, (following-char)) | |
1550 (eq word-count 1)) | |
1551 (forward-char 1) | |
1552 (setq last-name-comma-flag t) | |
1553 (or (eq ?\ (following-char)) | |
1554 (insert ?\ ))) | |
1555 | |
1556 ;; Stop before trailing comma-separated comment | |
1557 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | |
1558 ;; *** This case is redundant??? | |
1559 ;;((eq ?, (following-char)) | |
1560 ;; (setq name-done-flag t)) | |
1561 | |
1562 ;; Delete parenthesized/quoted comment/nickname | |
1563 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`)) | |
1564 (setq cbeg (point)) | |
1565 (set-syntax-table mail-extr-address-text-comment-syntax-table) | |
1566 (cond ((memq (following-char) '(?\' ?\`)) | |
1567 (or (search-forward "'" nil t | |
1568 (if (eq ?\' (following-char)) 2 1)) | |
1569 (mail-extr-delete-char 1))) | |
1570 (t | |
1571 (or (mail-extr-safe-move-sexp 1) | |
1572 (goto-char (point-max))))) | |
1573 (set-syntax-table mail-extr-address-text-syntax-table) | |
1574 (setq cend (point)) | |
1575 (cond | |
1576 ;; Handle case of entire name being quoted | |
1577 ((and (eq word-count 0) | |
1578 (looking-at " *\\'") | |
1579 (>= (- cend cbeg) 2)) | |
1580 (narrow-to-region (1+ cbeg) (1- cend)) | |
1581 (goto-char (point-min))) | |
1582 (t | |
1583 ;; Handle case of quoted initial | |
1584 (if (and (or (= 3 (- cend cbeg)) | |
1585 (and (= 4 (- cend cbeg)) | |
1586 (eq ?. (char-after (+ 2 cbeg))))) | |
1587 (not (looking-at " *\\'"))) | |
1588 (setq initial (char-after (1+ cbeg))) | |
1589 (setq initial nil)) | |
1590 (delete-region cbeg cend) | |
1591 (if initial | |
1592 (insert initial ". "))))) | |
1593 | |
1594 ;; Handle & substitution | |
1595 ((and (or (bobp) | |
1596 (eq ?\ (preceding-char))) | |
1597 (looking-at "&\\( \\|\\'\\)")) | |
1598 (mail-extr-delete-char 1) | |
1599 (capitalize-region | |
1600 (point) | |
1601 (progn | |
1602 (insert-buffer-substring canonicalization-buffer | |
1603 mbox-beg mbox-end) | |
1604 (point))) | |
1605 (setq disable-initial-guessing-flag t) | |
1606 (setq word-found-flag t)) | |
1607 | |
1608 ;; Handle *Stupid* VMS date stamps | |
1609 ((looking-at mail-extr-stupid-vms-date-stamp-pattern) | |
1610 (replace-match "" t)) | |
1611 | |
1612 ;; Handle Chinese characters. | |
1613 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern) | |
1614 (goto-char (match-end 0)) | |
1615 (setq word-found-flag t)) | |
1616 | |
1617 ;; Skip initial garbage characters. | |
1618 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES. | |
1619 ((and (eq word-count 0) | |
1620 (looking-at mail-extr-leading-garbage)) | |
1621 (goto-char (match-end 0)) | |
1622 ;; *** Skip backward over these??? | |
1623 ;; (skip-chars-backward "& \"") | |
1624 (narrow-to-region (point) (point-max))) | |
1625 | |
1626 ;; Various stopping points | |
1627 ((or | |
1628 | |
1629 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case | |
1630 ;; words. Example: XT-DEM. | |
1631 (and (>= word-count 2) | |
1632 mixed-case-flag | |
1633 (looking-at mail-extr-weird-acronym-pattern) | |
1634 (not (looking-at mail-extr-roman-numeral-pattern))) | |
1635 | |
1636 ;; Stop before trailing alternative address | |
1637 (looking-at mail-extr-alternative-address-pattern) | |
1638 | |
1639 ;; Stop before trailing comment not introduced by comma | |
1640 ;; THIS CASE MUST BE AFTER AN EARLIER CASE. | |
1641 (looking-at mail-extr-trailing-comment-start-pattern) | |
1642 | |
1643 ;; Stop before telephone numbers | |
1644 (looking-at mail-extr-telephone-extension-pattern)) | |
1645 (setq name-done-flag t)) | |
1646 | |
1647 ;; Delete ham radio call signs | |
1648 ((looking-at mail-extr-ham-call-sign-pattern) | |
1649 (delete-region (match-beginning 0) (match-end 0))) | |
1650 | |
1651 ;; Fixup initials | |
1652 ((looking-at mail-extr-initial-pattern) | |
1653 (or (eq (following-char) (upcase (following-char))) | |
1654 (setq lower-case-flag t)) | |
1655 (forward-char 1) | |
1656 (if (eq ?. (following-char)) | |
1657 (forward-char 1) | |
1658 (insert ?.)) | |
1659 (or (eq ?\ (following-char)) | |
1660 (insert ?\ )) | |
1661 (setq word-found-flag t)) | |
1662 | |
1663 ;; Handle BITNET LISTSERV list names. | |
1664 ((and (eq word-count 0) | |
1665 (looking-at mail-extr-listserv-list-name-pattern)) | |
1666 (narrow-to-region (match-beginning 1) (match-end 1)) | |
1667 (setq word-found-flag t) | |
1668 (setq name-done-flag t)) | |
1669 | |
1670 ;; Regular name words | |
1671 ((looking-at mail-extr-name-pattern) | |
1672 (setq name-beg (point)) | |
1673 (setq name-end (match-end 0)) | |
1674 | |
1675 ;; Certain words will be dropped if they are at the end. | |
1676 (and (>= word-count 2) | |
1677 (not lower-case-flag) | |
1678 (or | |
1679 ;; A trailing 4-or-more letter lowercase words preceded by | |
1680 ;; mixed case or uppercase words will be dropped. | |
1681 (looking-at "[a-z][a-z][a-z][a-z]+[ \t]*\\'") | |
1682 ;; Drop a trailing word which is terminated with a period. | |
1683 (eq ?. (char-after (1- name-end)))) | |
1684 (setq drop-this-word-if-trailing-flag t)) | |
1685 | |
1686 ;; Set the flags that indicate whether we have seen a lowercase | |
1687 ;; word, a mixed case word, and an uppercase word. | |
1688 (if (re-search-forward "[a-z]" name-end t) | |
1689 (if (progn | |
1690 (goto-char name-beg) | |
1691 (re-search-forward "[A-Z]" name-end t)) | |
1692 (setq mixed-case-flag t) | |
1693 (setq lower-case-flag t)) | |
1694 ;; (setq upper-case-flag t) | |
1695 ) | |
1696 | |
1697 (goto-char name-end) | |
1698 (setq word-found-flag t)) | |
1699 | |
1700 (t | |
1701 (setq name-done-flag t) | |
1702 )) | |
1703 | |
1704 ;; Count any word that we skipped over. | |
1705 (if word-found-flag | |
1706 (setq word-count (1+ word-count)))) | |
1707 | |
1708 ;; If the last thing in the name is 2 or more periods, or one or more | |
1709 ;; other sentence terminators (but not a single period) then keep them | |
1710 ;; and the preceeding word. This is for the benefit of whole sentences | |
1711 ;; in the name field: it's better behavior than dropping the last word | |
1712 ;; of the sentence... | |
1713 (if (and (not suffix-flag) | |
1714 (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'")) | |
1715 (goto-char (setq suffix-flag (point-max)))) | |
1716 | |
1717 ;; Drop everything after point and certain trailing words. | |
1718 (narrow-to-region (point-min) | |
1719 (or (and drop-last-word-if-trailing-flag | |
1720 last-word-beg) | |
1721 (point))) | |
1722 | |
1723 ;; Xerox's mailers SUCK!!!!!! | |
1724 ;; We simply refuse to believe that any last name is PARC or ADOC. | |
1725 ;; If it looks like that is the last name, that there is no meaningful | |
1726 ;; here at all. Actually I guess it would be best to map patterns | |
1727 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't | |
1728 ;; actually know that that is what's going on. | |
1729 (cond ((not suffix-flag) | |
1730 (goto-char (point-min)) | |
1731 (let ((case-fold-search t)) | |
1732 (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'") | |
1733 (erase-buffer))))) | |
1734 | |
1735 ;; If last name first put it at end (but before suffix) | |
1736 (cond (last-name-comma-flag | |
1737 (goto-char (point-min)) | |
1738 (search-forward ",") | |
1739 (setq name-end (1- (point))) | |
1740 (goto-char (or suffix-flag (point-max))) | |
1741 (or (eq ?\ (preceding-char)) | |
1742 (insert ?\ )) | |
1743 (insert-buffer-substring (current-buffer) (point-min) name-end) | |
1744 (goto-char name-end) | |
1745 (skip-chars-forward "\t ,") | |
1746 (narrow-to-region (point) (point-max)))) | |
1747 | |
1748 ;; Delete leading and trailing junk characters. | |
1749 ;; *** This is probably completly unneeded now. | |
1750 ;;(goto-char (point-max)) | |
1751 ;;(skip-chars-backward mail-extr-non-end-name-chars) | |
1752 ;;(if (eq ?. (following-char)) | |
1753 ;; (forward-char 1)) | |
1754 ;;(narrow-to-region (point) | |
1755 ;; (progn | |
1756 ;; (goto-char (point-min)) | |
1757 ;; (skip-chars-forward mail-extr-non-begin-name-chars) | |
1758 ;; (point))) | |
1759 | |
1760 ;; Compress whitespace | |
1761 (goto-char (point-min)) | |
1762 (while (re-search-forward "[ \t\n]+" nil t) | |
1763 (replace-match (if (eobp) "" " ") t)) | |
1764 ))) | |
1765 | |
1766 | |
1767 | |
1768 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
1769 ;; | |
1770 ;; Table of top-level domain names. | |
1771 ;; | |
1772 ;; This is used during address canonicalization; be careful of format changes. | |
1773 ;; Keep in mind that the country abbreviations follow ISO-3166. There is | |
1774 ;; a U.S. FIPS that specifies a different set of two-letter country | |
1775 ;; abbreviations. | |
1776 | |
1777 (defconst all-top-level-domains | |
1778 (let ((ob (make-vector 509 0))) | |
1779 (mapcar | |
1780 (function | |
1781 (lambda (x) | |
1782 (put (intern (downcase (car x)) ob) | |
1783 'domain-name | |
1784 (if (nth 2 x) | |
1785 (format (nth 2 x) (nth 1 x)) | |
1786 (nth 1 x))))) | |
1787 '(("ag" "Antigua") | |
1788 ("ar" "Argentina" "Argentine Republic") | |
1789 ("arpa" t "Advanced Projects Research Agency") | |
1790 ("at" "Austria" "The Republic of %s") | |
1791 ("au" "Australia") | |
1792 ("bb" "Barbados") | |
1793 ("be" "Belgium" "The Kingdom of %s") | |
1794 ("bg" "Bulgaria") | |
1795 ("bitnet" t "Because It's Time NET") | |
1796 ("bo" "Bolivia" "Republic of %s") | |
1797 ("br" "Brazil" "The Federative Republic of %s") | |
1798 ("bs" "Bahamas") | |
1799 ("bz" "Belize") | |
1800 ("ca" "Canada") | |
1801 ("ch" "Switzerland" "The Swiss Confederation") | |
1802 ("cl" "Chile" "The Republic of %s") | |
1803 ("cn" "China" "The People's Republic of %s") | |
1804 ("co" "Columbia") | |
1805 ("com" t "Commercial") | |
1806 ("cr" "Costa Rica" "The Republic of %s") | |
1807 ("cs" "Czechoslovakia") | |
1808 ("de" "Germany") | |
1809 ("dk" "Denmark") | |
1810 ("dm" "Dominica") | |
1811 ("do" "Dominican Republic" "The %s") | |
1812 ("ec" "Ecuador" "The Republic of %s") | |
1813 ("edu" t "Educational") | |
1814 ("eg" "Egypt" "The Arab Republic of %s") | |
1815 ("es" "Spain" "The Kingdom of %s") | |
1816 ("fi" "Finland" "The Republic of %s") | |
1817 ("fj" "Fiji") | |
1818 ("fr" "France") | |
1819 ("gov" t "Government (U.S.A.)") | |
1820 ("gr" "Greece" "The Hellenic Republic (%s)") | |
1821 ("hk" "Hong Kong") | |
1822 ("hu" "Hungary" "The Hungarian People's Republic") ;??? | |
1823 ("ie" "Ireland") | |
1824 ("il" "Israel" "The State of %s") | |
1825 ("in" "India" "The Republic of %s") | |
1826 ("int" t "(something British, don't know what)") | |
1827 ("is" "Iceland" "The Republic of %s") | |
1828 ("it" "Italy" "The Italian Republic") | |
1829 ("jm" "Jamaica") | |
1830 ("jp" "Japan") | |
1831 ("kn" "St. Kitts and Nevis") | |
1832 ("kr" "South Korea") | |
1833 ("lc" "St. Lucia") | |
1834 ("lk" "Sri Lanka" "The Democratic Socialist Republic of %s") | |
1835 ("mil" t "Military (U.S.A.)") | |
1836 ("mx" "Mexico" "The United Mexican States") | |
1837 ("my" "Malaysia" "%s (changed to Myanmar?)") ;??? | |
1838 ("na" "Namibia") | |
1839 ("nato" t "North Atlantic Treaty Organization") | |
1840 ("net" t "Network") | |
1841 ("ni" "Nicaragua" "The Republic of %s") | |
1842 ("nl" "Netherlands" "The Kingdom of the %s") | |
1843 ("no" "Norway" "The Kingdom of %s") | |
1844 ("nz" "New Zealand") | |
1845 ("org" t "Organization") | |
1846 ("pe" "Peru") | |
1847 ("pg" "Papua New Guinea") | |
1848 ("ph" "Philippines" "The Republic of the %s") | |
1849 ("pl" "Poland") | |
1850 ("pr" "Puerto Rico") | |
1851 ("pt" "Portugal" "The Portugese Republic") | |
1852 ("py" "Paraguay") | |
1853 ("se" "Sweden" "The Kingdom of %s") | |
1854 ("sg" "Singapore" "The Republic of %s") | |
1855 ("sr" "Suriname") | |
1856 ("su" "Soviet Union") | |
1857 ("th" "Thailand" "The Kingdom of %s") | |
1858 ("tn" "Tunisia") | |
1859 ("tr" "Turkey" "The Republic of %s") | |
1860 ("tt" "Trinidad and Tobago") | |
1861 ("tw" "Taiwan") | |
1862 ("uk" "United Kingdom" "The %s of Great Britain") | |
1863 ("unter-dom" t "(something German)") | |
1864 ("us" "U.S.A." "The United States of America") | |
1865 ("uucp" t "Unix to Unix CoPy") | |
1866 ("uy" "Uruguay" "The Eastern Republic of %s") | |
1867 ("vc" "St. Vincent and the Grenadines") | |
1868 ("ve" "Venezuela" "The Republic of %s") | |
1869 ("yu" "Yugoslavia" "The Socialist Federal Republic of %s") | |
1870 ;; Also said to be Zambia ... (why not Zaire???) | |
1871 ("za" "South Africa" "The Republic of %s (or Zambia? Zaire?)") | |
1872 ("zw" "Zimbabwe" "Republic of %s") | |
1873 ;; fipnet | |
1874 )) | |
1875 ob)) | |
1876 | |
1877 ;;;###autoload | |
1878 (defun what-domain (x) | |
1879 "Prompts for a mail domain, and prints the country it corresponds to | |
1880 in the minibuffer." | |
1881 (interactive | |
1882 (let ((completion-ignore-case t)) | |
1883 (list (completing-read "Domain: " all-top-level-domains nil t)))) | |
1884 (or (setq x (intern-soft (downcase x) all-top-level-domains)) | |
1885 (error "no such domain")) | |
1886 (message "%s: %s" (upcase (symbol-name x)) (get x 'domain-name))) | |
1887 | |
1888 | |
1889 ;(let ((all nil)) | |
1890 ; (mapatoms #'(lambda (x) | |
1891 ; (if (and (boundp x) | |
1892 ; (string-match "^mail-extr-" (symbol-name x))) | |
1893 ; (setq all (cons x all))))) | |
1894 ; (setq all (sort all #'string-lessp)) | |
1895 ; (cons 'setq | |
1896 ; (apply 'nconc (mapcar #'(lambda (x) | |
1897 ; (list x (symbol-value x))) | |
1898 ; all)))) | |
1899 | |
1900 | |
1901 (provide 'mail-extr) | |
1902 | |
1903 ;;; mail-extr.el ends here |