0
|
1 ;;; mail-extr.el --- extract full name and address from RFC 822 mail header.
|
|
2
|
|
3 ;; Copyright (C) 1991, 1992, 1993, 1994 Free Software Foundation, Inc.
|
|
4
|
|
5 ;; Author: Joe Wells <jbw@cs.bu.edu>
|
|
6 ;; Maintainer: Chuck Thompson <cthomp@xemacs.org>
|
|
7 ;; Version: 1.8
|
|
8 ;; Keywords: mail
|
|
9
|
|
10 ;; This file is part of XEmacs.
|
|
11
|
|
12 ;; XEmacs is free software; you can redistribute it and/or modify it
|
|
13 ;; under the terms of the GNU General Public License as published by
|
|
14 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
15 ;; any later version.
|
|
16
|
|
17 ;; XEmacs is distributed in the hope that it will be useful, but
|
|
18 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
20 ;; General Public License for more details.
|
|
21
|
|
22 ;; You should have received a copy of the GNU General Public License
|
16
|
23 ;; along with XEmacs; see the file COPYING. If not, write to the
|
|
24 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
25 ;; Boston, MA 02111-1307, USA.
|
0
|
26
|
|
27 ;;; Synched up with: Not synched with FSF but close to 19.28.
|
|
28
|
|
29 ;;; Commentary:
|
|
30
|
|
31 ;; The entry point of this code is
|
|
32 ;;
|
|
33 ;; mail-extract-address-components: (address)
|
|
34 ;;
|
|
35 ;; Given an RFC-822 ADDRESS, extract full name and canonical address.
|
|
36 ;; Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
|
|
37 ;; If no name can be extracted, FULL-NAME will be nil.
|
|
38 ;; ADDRESS may be a string or a buffer. If it is a buffer, the visible
|
|
39 ;; (narrowed) portion of the buffer will be interpreted as the address.
|
|
40 ;; (This feature exists so that the clever caller might be able to avoid
|
|
41 ;; consing a string.)
|
|
42 ;; If ADDRESS contains more than one RFC-822 address, only the first is
|
|
43 ;; returned.
|
|
44 ;;
|
|
45 ;; This code is more correct (and more heuristic) parser than the code in
|
|
46 ;; rfc822.el. And despite its size, it's fairly fast.
|
|
47 ;;
|
|
48 ;; There are two main benefits:
|
|
49 ;;
|
|
50 ;; 1. Higher probability of getting the correct full name for a human than
|
|
51 ;; any other package we know of. (On the other hand, it will cheerfully
|
|
52 ;; mangle non-human names/comments.)
|
|
53 ;; 2. Address part is put in a canonical form.
|
|
54 ;;
|
|
55 ;; The interface is not yet carved in stone; please give us suggestions.
|
|
56 ;;
|
|
57 ;; We have an extensive test-case collection of funny addresses if you want to
|
|
58 ;; work with the code. Developing this code requires frequent testing to
|
|
59 ;; make sure you're not breaking functionality. The test cases aren't included
|
|
60 ;; because they are over 100K.
|
|
61 ;;
|
|
62 ;; If you find an address that mail-extr fails on, please send it to the
|
|
63 ;; maintainer along with what you think the correct results should be. We do
|
|
64 ;; not consider it a bug if mail-extr mangles a comment that does not
|
|
65 ;; correspond to a real human full name, although we would prefer that
|
|
66 ;; mail-extr would return the comment as-is.
|
|
67 ;;
|
|
68 ;; Features:
|
|
69 ;;
|
|
70 ;; * Full name handling:
|
|
71 ;;
|
|
72 ;; * knows where full names can be found in an address.
|
|
73 ;; * avoids using empty comments and quoted text.
|
|
74 ;; * extracts full names from mailbox names.
|
|
75 ;; * recognizes common formats for comments after a full name.
|
|
76 ;; * puts a period and a space after each initial.
|
|
77 ;; * understands & referring to the mailbox name, capitalized.
|
|
78 ;; * strips name prefixes like "Prof.", etc.
|
|
79 ;; * understands what characters can occur in names (not just letters).
|
|
80 ;; * figures out middle initial from mailbox name.
|
|
81 ;; * removes funny nicknames.
|
|
82 ;; * keeps suffixes such as Jr., Sr., III, etc.
|
|
83 ;; * reorders "Last, First" type names.
|
|
84 ;;
|
|
85 ;; * Address handling:
|
|
86 ;;
|
|
87 ;; * parses rfc822 quoted text, comments, and domain literals.
|
|
88 ;; * parses rfc822 multi-line headers.
|
|
89 ;; * does something reasonable with rfc822 GROUP addresses.
|
|
90 ;; * handles many rfc822 noncompliant and garbage addresses.
|
|
91 ;; * canonicalizes addresses (after stripping comments/phrases outside <>).
|
|
92 ;; * converts ! addresses into .UUCP and %-style addresses.
|
|
93 ;; * converts rfc822 ROUTE addresses to %-style addresses.
|
|
94 ;; * truncates %-style addresses at leftmost fully qualified domain name.
|
|
95 ;; * handles local relative precedence of ! vs. % and @ (untested).
|
|
96 ;;
|
|
97 ;; It does almost no string creation. It primarily uses the built-in
|
|
98 ;; parsing routines with the appropriate syntax tables. This should
|
|
99 ;; result in greater speed.
|
|
100 ;;
|
|
101 ;; TODO:
|
|
102 ;;
|
|
103 ;; * handle all test cases. (This will take forever.)
|
|
104 ;; * software to pick the correct header to use (eg., "Senders-Name:").
|
|
105 ;; * multiple addresses in the "From:" header (almost all of the necessary
|
|
106 ;; code is there).
|
|
107 ;; * flag to not treat `,' as an address separator. (This is useful when
|
|
108 ;; there is a "From:" header but no "Sender:" header, because then there
|
|
109 ;; is only allowed to be one address.)
|
|
110 ;; * mailbox name does not necessarily contain full name.
|
|
111 ;; * fixing capitalization when it's all upper or lowercase. (Hard!)
|
|
112 ;; * some of the domain literal handling is missing. (But I've never even
|
|
113 ;; seen one of these in a mail address, so maybe no big deal.)
|
|
114 ;; * arrange to have syntax tables byte-compiled.
|
|
115 ;; * speed hacks.
|
|
116 ;; * delete unused variables.
|
|
117 ;; * arrange for testing with different relative precedences of ! vs. @
|
|
118 ;; and %.
|
|
119 ;; * insert documentation strings!
|
|
120 ;; * handle X.400-gatewayed addresses according to RFC 1148.
|
|
121
|
|
122 ;;; Change Log:
|
|
123 ;;
|
|
124 ;; Thu Feb 17 17:57:33 1994 Jamie Zawinski (jwz@netscape.com)
|
|
125 ;;
|
|
126 ;; * merged with jbw's latest version
|
|
127 ;;
|
|
128 ;; Wed Feb 9 21:56:27 1994 Jamie Zawinski (jwz@netscape.com)
|
|
129 ;;
|
|
130 ;; * high-bit chars in comments weren't treated as word syntax
|
|
131 ;;
|
|
132 ;; Sat Feb 5 03:13:40 1994 Jamie Zawinski (jwz@netscape.com)
|
|
133 ;;
|
|
134 ;; * call replace-match with fixed-case arg
|
|
135 ;;
|
|
136 ;; Thu Dec 16 21:56:45 1993 Jamie Zawinski (jwz@netscape.com)
|
|
137 ;;
|
|
138 ;; * some more cleanup, doc, added provide
|
|
139 ;;
|
|
140 ;; Tue Mar 23 21:23:18 1993 Joe Wells (jbw at csd.bu.edu)
|
|
141 ;;
|
|
142 ;; * Made mail-full-name-prefixes a user-customizable variable.
|
2
|
143 ;; Allow passing the address as a buffer as well as a string.
|
0
|
144 ;; Allow [ and ] as name characters (Finnish character set).
|
|
145 ;;
|
|
146 ;; Mon Mar 22 21:20:56 1993 Joe Wells (jbw at bigbird.bu.edu)
|
|
147 ;;
|
|
148 ;; * Handle "null" addresses. Handle = used for spacing in mailbox
|
|
149 ;; name. Fix bug in handling of ROUTE-ADDR-type addresses that are
|
|
150 ;; missing their brackets. Handle uppercase "JR". Extract full
|
|
151 ;; names from X.400 addresses encoded in RFC-822. Fix bug in
|
|
152 ;; handling of multiple addresses where first has trailing comment.
|
|
153 ;; Handle more kinds of telephone extension lead-ins.
|
|
154 ;;
|
|
155 ;; Mon Mar 22 20:16:57 1993 Joe Wells (jbw at bigbird.bu.edu)
|
|
156 ;;
|
|
157 ;; * Handle HZ encoding for embedding GB encoded chinese characters.
|
|
158 ;;
|
|
159 ;; Mon Mar 22 00:46:12 1993 Joe Wells (jbw at bigbird.bu.edu)
|
|
160 ;;
|
|
161 ;; * Fixed too broad matching of ham radio call signs. Fixed bug in
|
|
162 ;; handling an unmatched ' in a name string. Enhanced recognition
|
|
163 ;; of when . in the mailbox name terminates the name portion.
|
|
164 ;; Narrowed conversion of . to space to only the necessary
|
|
165 ;; situation. Deal with VMS's stupid date stamps. Handle a unique
|
|
166 ;; way of introducing an alternate address. Fixed spacing bug I
|
|
167 ;; introduced in switching last name order. Fixed bug in handling
|
|
168 ;; address with ! and % but no @. Narrowed the cases in which
|
|
169 ;; certain trailing words are discarded.
|
|
170 ;;
|
|
171 ;; Sun Mar 21 21:41:06 1993 Joe Wells (jbw at bigbird.bu.edu)
|
|
172 ;;
|
|
173 ;; * Fixed bugs in handling GROUP addresses. Certain words in the
|
|
174 ;; middle of a name no longer terminate it. Handle LISTSERV list
|
|
175 ;; names. Ignore comment field containing mailbox name.
|
|
176 ;;
|
|
177 ;; Sun Mar 21 14:39:38 1993 Joe Wells (jbw at bigbird.bu.edu)
|
|
178 ;;
|
|
179 ;; * Moved variant-method code back into main function. Handle
|
|
180 ;; underscores as spaces in comments. Handle leading nickname. Add
|
|
181 ;; flag to ignore single-word names. Other changes.
|
|
182 ;;
|
|
183 ;; Mon Feb 1 22:23:31 1993 Joe Wells (jbw at bigbird.bu.edu)
|
|
184 ;;
|
|
185 ;; * Added in changes by Rod Whitby and Jamie Zawinski. This
|
|
186 ;; includes the flag mail-extr-guess-middle-initial and the fix for
|
|
187 ;; handling multiple addresses correctly.
|
|
188 ;;
|
|
189 ;; Mon Apr 6 23:59:09 1992 Joe Wells (jbw at bigbird.bu.edu)
|
|
190 ;;
|
|
191 ;; * Cleaned up some more. Release version 1.0 to world.
|
|
192 ;;
|
|
193 ;; Sun Apr 5 19:39:08 1992 Joe Wells (jbw at bigbird.bu.edu)
|
|
194 ;;
|
|
195 ;; * Cleaned up full name extraction extensively.
|
|
196 ;;
|
|
197 ;; Sun Feb 2 14:45:24 1992 Joe Wells (jbw at bigbird.bu.edu)
|
|
198 ;;
|
|
199 ;; * Total rewrite. Integrated mail-canonicalize-address into
|
|
200 ;; mail-extract-address-components. Now handles GROUP addresses more
|
|
201 ;; or less correctly. Better handling of lots of different cases.
|
|
202 ;;
|
|
203 ;; Fri Jun 14 19:39:50 1991
|
|
204 ;; * Created.
|
|
205
|
|
206 ;;; Code:
|
|
207
|
|
208
|
|
209 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
210 ;;
|
|
211 ;; User configuration variable definitions.
|
|
212 ;;
|
|
213
|
|
214 (defvar mail-extr-guess-middle-initial nil
|
|
215 "*Whether to try to guess middle initial from mail address.
|
|
216 If true, then when we see an address like \"John Smith <jqs@host.com>\"
|
|
217 we will assume that \"John Q. Smith\" is the fellow's name.")
|
|
218
|
|
219 (defvar mail-extr-ignore-single-names t
|
|
220 "*Whether to ignore a name that is just a single word.
|
|
221 If true, then when we see an address like \"Idiot <dumb@stupid.com>\"
|
|
222 we will act as though we couldn't find a full name in the address.")
|
|
223
|
|
224 ;; Matches a leading title that is not part of the name (does not
|
|
225 ;; contribute to uniquely identifying the person).
|
|
226 (defvar mail-extr-full-name-prefixes
|
|
227 (purecopy
|
|
228 "\\(Prof\\|D[Rr]\\|Mrs?\\|Rev\\|Rabbi\\|SysOp\\|LCDR\\)\\.?[ \t\n]")
|
|
229 "*Matches prefixes to the full name that identify a person's position.
|
|
230 These are stripped from the full name because they do not contribute to
|
|
231 uniquely identifying the person.")
|
|
232
|
|
233 (defvar mail-extr-@-binds-tighter-than-! nil
|
|
234 "*Whether the local mail transport agent looks at ! before @.")
|
|
235
|
|
236 (defvar mail-extr-mangle-uucp nil
|
|
237 "*Whether to throw away information in UUCP addresses
|
|
238 by translating things like \"foo!bar!baz@host\" into \"baz@bar.UUCP\".")
|
|
239
|
|
240 ;;----------------------------------------------------------------------
|
|
241 ;; what orderings are meaningful?????
|
|
242 ;;(defvar mail-operator-precedence-list '(?! ?% ?@))
|
|
243 ;; Right operand of a % or a @ must be a domain name, period. No other
|
|
244 ;; operators allowed. Left operand of a @ is an address relative to that
|
|
245 ;; site.
|
|
246
|
|
247 ;; Left operand of a ! must be a domain name. Right operand is an
|
|
248 ;; arbitrary address.
|
|
249 ;;----------------------------------------------------------------------
|
|
250
|
|
251
|
|
252
|
|
253 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
254 ;;
|
|
255 ;; Constant definitions.
|
|
256 ;;
|
|
257
|
|
258 ;; Codes in
|
|
259 ;; Names in ISO 8859-1 Name
|
|
260 ;; ISO 10XXX ISO 8859-2 in
|
|
261 ;; ISO 6937 ISO 10646 RFC Swedish
|
|
262 ;; etc. Hex Oct 1345 TeX Split ASCII Description
|
|
263 ;; --------- ---------- ---- --- ----- ----- -------------------------------
|
|
264 ;; %a E4 344 a: \"a ae { latin small a + diaeresis ä
|
|
265 ;; %o F6 366 o: \"o oe | latin small o + diaeresis ö
|
|
266 ;; @a E5 345 aa \oa aa } latin small a + ring above å
|
|
267 ;; %u FC 374 u: \"u ue ~ latin small u + diaeresis ü
|
|
268 ;; /e E9 351 e' \'e ` latin small e + acute é
|
|
269 ;; %A C4 304 A: \"A AE [ latin capital a + diaeresis Ä
|
|
270 ;; %O D6 326 O: \"O OE \ latin capital o + diaeresis Ö
|
|
271 ;; @A C5 305 AA \oA AA ] latin capital a + ring above Å
|
|
272 ;; %U DC 334 U: \"U UE ^ latin capital u + diaeresis Ü
|
|
273 ;; /E C9 311 E' \'E @ latin capital e + acute É
|
|
274
|
|
275 ;; NOTE: @a and @A are not in ISO 8859-2 (the codes mentioned above invoke
|
|
276 ;; /l and /L). Some of this data was retrieved from
|
|
277 ;; listserv@jhuvm.hcf.jhu.edu.
|
|
278
|
|
279 ;; Any character that can occur in a name, not counting characters that
|
|
280 ;; separate parts of a multipart name (hyphen and period).
|
|
281 ;; Yes, there are weird people with digits in their names.
|
|
282 ;; You will also notice the consideration for the
|
|
283 ;; Swedish/Finnish/Norwegian character set.
|
|
284 ;; #### (go to \376 instead of \377 to work around bug in search.c...)
|
|
285 (defconst mail-extr-all-letters-but-separators
|
|
286 (purecopy "][A-Za-z{|}'~0-9`\200-\376"))
|
|
287
|
|
288 ;; Any character that can occur in a name in an RFC822 address including
|
|
289 ;; the separator (hyphen and possibly period) for multipart names.
|
|
290 ;; #### should . be in here?
|
|
291 (defconst mail-extr-all-letters
|
|
292 (purecopy (concat mail-extr-all-letters-but-separators "---")))
|
|
293
|
|
294 ;; Any character that can start a name.
|
|
295 ;; Keep this set as minimal as possible.
|
|
296 (defconst mail-extr-first-letters (purecopy "A-Za-z"))
|
|
297
|
|
298 ;; Any character that can end a name.
|
|
299 ;; Keep this set as minimal as possible.
|
|
300 (defconst mail-extr-last-letters (purecopy "[A-Za-z`'."))
|
|
301
|
|
302 (defconst mail-extr-leading-garbage
|
|
303 (purecopy (format "[^%s]+" mail-extr-first-letters)))
|
|
304
|
|
305 ;; (defconst mail-extr-non-name-chars
|
|
306 ;; (purecopy (concat "^" mail-extr-all-letters ".")))
|
|
307 ;; (defconst mail-extr-non-begin-name-chars
|
|
308 ;; (purecopy (concat "^" mail-extr-first-letters)))
|
|
309 ;; (defconst mail-extr-non-end-name-chars
|
|
310 ;; (purecopy (concat "^" mail-extr-last-letters)))
|
|
311
|
|
312 ;; Matches an initial not followed by both a period and a space.
|
|
313 ;; (defconst mail-extr-bad-initials-pattern
|
|
314 ;; (purecopy
|
|
315 ;; (format "\\(\\([^%s]\\|\\`\\)[%s]\\)\\(\\.\\([^ ]\\)\\| \\|\\([^%s .]\\)\\|\\'\\)"
|
|
316 ;; mail-extr-all-letters mail-extr-first-letters mail-extr-all-letters)))
|
|
317
|
|
318 ;; Matches periods used instead of spaces. Must not match the period
|
|
319 ;; following an initial.
|
|
320 (defconst mail-extr-bad-dot-pattern
|
|
321 (purecopy
|
|
322 (format "\\([%s][%s]\\)\\.+\\([%s]\\)"
|
|
323 mail-extr-all-letters
|
|
324 mail-extr-last-letters
|
|
325 mail-extr-first-letters)))
|
|
326
|
|
327 ;; Matches an embedded or leading nickname that should be removed.
|
|
328 ;; (defconst mail-extr-nickname-pattern
|
|
329 ;; (purecopy
|
|
330 ;; (format "\\([ .]\\|\\`\\)[\"'`\[\(]\\([ .%s]+\\)[\]\"'\)] "
|
|
331 ;; mail-extr-all-letters)))
|
|
332
|
|
333 ;; Matches the occurrence of a generational name suffix, and the last
|
|
334 ;; character of the preceding name. This is important because we want to
|
|
335 ;; keep such suffixes: they help to uniquely identify the person.
|
|
336 ;; *** Perhaps this should be a user-customizable variable. However, the
|
|
337 ;; *** regular expression is fairly tricky to alter, so maybe not.
|
|
338 (defconst mail-extr-full-name-suffix-pattern
|
|
339 (purecopy
|
|
340 (format
|
|
341 "\\(,? ?\\([JjSs][Rr]\\.?\\|V?I+V?\\)\\)\\([^%s]\\([^%s]\\|\\'\\)\\|\\'\\)"
|
|
342 mail-extr-all-letters mail-extr-all-letters)))
|
|
343
|
|
344 (defconst mail-extr-roman-numeral-pattern (purecopy "V?I+V?\\b"))
|
|
345
|
|
346 ;; Matches a trailing uppercase (with other characters possible) acronym.
|
|
347 ;; Must not match a trailing uppercase last name or trailing initial
|
|
348 (defconst mail-extr-weird-acronym-pattern
|
|
349 (purecopy "\\([A-Z]+[-_/]\\|[A-Z][A-Z][A-Z]?\\b\\)"))
|
|
350
|
|
351 ;; Matches a mixed-case or lowercase name (not an initial).
|
|
352 ;; #### Match Latin1 lower case letters here too?
|
|
353 ;; (defconst mail-extr-mixed-case-name-pattern
|
|
354 ;; (purecopy
|
|
355 ;; (format
|
|
356 ;; "\\b\\([a-z][%s]*[%s]\\|[%s][%s]*[a-z][%s]*[%s]\\|[%s][%s]*[a-z]\\)"
|
|
357 ;; mail-extr-all-letters mail-extr-last-letters
|
|
358 ;; mail-extr-first-letters mail-extr-all-letters mail-extr-all-letters
|
|
359 ;; mail-extr-last-letters mail-extr-first-letters mail-extr-all-letters)))
|
|
360
|
|
361 ;; Matches a trailing alternative address.
|
|
362 ;; #### Match Latin1 letters here too?
|
|
363 ;; #### Match _ before @ here too?
|
|
364 (defconst mail-extr-alternative-address-pattern
|
|
365 (purecopy "\\(aka *\\)?[a-zA-Z.]+[!@][a-zA-Z.]"))
|
|
366
|
|
367 ;; Matches a variety of trailing comments not including comma-delimited
|
|
368 ;; comments.
|
|
369 (defconst mail-extr-trailing-comment-start-pattern
|
|
370 (purecopy " [-{]\\|--\\|[+@#></\;]"))
|
|
371
|
|
372 ;; Matches a name (not an initial).
|
|
373 ;; This doesn't force a word boundary at the end because sometimes a
|
|
374 ;; comment is separated by a `-' with no preceding space.
|
|
375 (defconst mail-extr-name-pattern
|
|
376 (purecopy (format "\\b[%s][%s]*[%s]"
|
|
377 mail-extr-first-letters
|
|
378 mail-extr-all-letters
|
|
379 mail-extr-last-letters)))
|
|
380
|
|
381 (defconst mail-extr-initial-pattern
|
|
382 (purecopy (format "\\b[%s]\\([. ]\\|\\b\\)" mail-extr-first-letters)))
|
|
383
|
|
384 ;; Matches a single name before a comma.
|
|
385 ;; (defconst mail-extr-last-name-first-pattern
|
|
386 ;; (purecopy (concat "\\`" mail-extr-name-pattern ",")))
|
|
387
|
|
388 ;; Matches telephone extensions.
|
|
389 (defconst mail-extr-telephone-extension-pattern
|
|
390 (purecopy
|
|
391 "\\(\\([Ee]xt\\|\\|[Tt]ph\\|[Tt]el\\|[Xx]\\).?\\)? *\\+?[0-9][- 0-9]+"))
|
|
392
|
|
393 ;; Matches ham radio call signs.
|
|
394 ;; Help from: Mat Maessen N2NJZ <maessm@rpi.edu>, Mark Feit
|
|
395 ;; <mark@era.com>, Michael Covington <mcovingt@ai.uga.edu>.
|
|
396 ;; Examples: DX504 DX515 K5MRU K8DHK KA9WGN KA9WGN KD3FU KD6EUI KD6HBW
|
|
397 ;; KE9TV KF0NV N1API N3FU N3GZE N3IGS N4KCC N7IKQ N9HHU W4YHF W6ANK WA2SUH
|
|
398 ;; WB7VZI N2NJZ NR3G KJ4KK AB4UM AL7NI KH6OH WN3KBT N4TMI W1A N0NZO
|
|
399 (defconst mail-extr-ham-call-sign-pattern
|
|
400 (purecopy "\\b\\(DX[0-9]+\\|[AKNW][A-Z]?[0-9][A-Z][A-Z]?[A-Z]?\\)"))
|
|
401
|
|
402 ;; Possible trailing suffixes: "\\(/\\(KT\\|A[AEG]\\|[R0-9]\\)\\)?"
|
|
403 ;; /KT == Temporary Technician (has CSC but not "real" license)
|
|
404 ;; /AA == Temporary Advanced
|
|
405 ;; /AE == Temporary Extra
|
|
406 ;; /AG == Temporary General
|
|
407 ;; /R == repeater
|
|
408 ;; /# == stations operating out of home district
|
|
409 ;; I don't include these in the regexp above because I can't imagine
|
|
410 ;; anyone putting them with their name in an e-mail address.
|
|
411
|
|
412 ;; Matches normal single-part name
|
|
413 (defconst mail-extr-normal-name-pattern
|
|
414 (purecopy (format "\\b[%s][%s]+[%s]"
|
|
415 mail-extr-first-letters
|
|
416 mail-extr-all-letters-but-separators
|
|
417 mail-extr-last-letters)))
|
|
418
|
|
419 ;; Matches a single word name.
|
|
420 ;; (defconst mail-extr-one-name-pattern
|
|
421 ;; (purecopy (concat "\\`" mail-extr-normal-name-pattern "\\'")))
|
|
422
|
|
423 ;; Matches normal two names with missing middle initial
|
|
424 ;; The first name is not allowed to have a hyphen because this can cause
|
|
425 ;; false matches where the "middle initial" is actually the first letter
|
|
426 ;; of the second part of the first name.
|
|
427 (defconst mail-extr-two-name-pattern
|
|
428 (purecopy
|
|
429 (concat "\\`\\(" mail-extr-normal-name-pattern
|
|
430 "\\|" mail-extr-initial-pattern
|
|
431 "\\) +\\(" mail-extr-name-pattern "\\)\\(,\\|\\'\\)")))
|
|
432
|
|
433 (defconst mail-extr-listserv-list-name-pattern
|
|
434 (purecopy "Multiple recipients of list \\([-A-Z]+\\)"))
|
|
435
|
|
436 (defconst mail-extr-stupid-vms-date-stamp-pattern
|
|
437 (purecopy
|
|
438 "[0-9][0-9]-[JFMASOND][aepuco][nbrylgptvc]-[0-9][0-9][0-9][0-9] [0-9]+ *"))
|
|
439
|
|
440 ;;; HZ -- GB (PRC Chinese character encoding) in ASCII embedding protocol
|
|
441 ;;
|
|
442 ;; In ASCII mode, a byte is interpreted as an ASCII character, unless a '~' is
|
|
443 ;; encountered. The character '~' is an escape character. By convention, it
|
|
444 ;; must be immediately followed ONLY by '~', '{' or '\n' (<LF>), with the
|
|
445 ;; following special meaning.
|
|
446 ;;
|
|
447 ;; o The escape sequence '~~' is interpreted as a '~'.
|
|
448 ;; o The escape-to-GB sequence '~{' switches the mode from ASCII to GB.
|
|
449 ;; o The escape sequence '~\n' is a line-continuation marker to be consumed
|
|
450 ;; with no output produced.
|
|
451 ;;
|
|
452 ;; In GB mode, characters are interpreted two bytes at a time as (pure) GB
|
|
453 ;; codes until the escape-from-GB code '~}' is read. This code switches the
|
|
454 ;; mode from GB back to ASCII. (Note that the escape-from-GB code '~}'
|
|
455 ;; ($7E7D) is outside the defined GB range.)
|
|
456 (defconst mail-extr-hz-embedded-gb-encoded-chinese-pattern
|
|
457 (purecopy "~{\\([^~].\\|~[^\}]\\)+~}"))
|
|
458
|
|
459 ;; The leading optional lowercase letters are for a bastardized version of
|
|
460 ;; the encoding, as is the optional nature of the final slash.
|
|
461 (defconst mail-extr-x400-encoded-address-pattern
|
|
462 (purecopy "[a-z]?[a-z]?\\(/[A-Za-z]+\\(\\.[A-Za-z]+\\)?=[^/]+\\)+/?\\'"))
|
|
463
|
|
464 (defconst mail-extr-x400-encoded-address-field-pattern-format
|
|
465 (purecopy "/%s=\\([^/]+\\)\\(/\\|\\'\\)"))
|
|
466
|
|
467 (defconst mail-extr-x400-encoded-address-surname-pattern
|
|
468 ;; S stands for Surname (family name).
|
|
469 (purecopy
|
|
470 (format mail-extr-x400-encoded-address-field-pattern-format "[Ss]")))
|
|
471
|
|
472 (defconst mail-extr-x400-encoded-address-given-name-pattern
|
|
473 ;; G stands for Given name.
|
|
474 (purecopy
|
|
475 (format mail-extr-x400-encoded-address-field-pattern-format "[Gg]")))
|
|
476
|
|
477 (defconst mail-extr-x400-encoded-address-full-name-pattern
|
|
478 ;; PN stands for Personal Name. When used it represents the combination
|
|
479 ;; of the G and S fields.
|
|
480 ;; "The one system I used having this field asked it with the prompt
|
|
481 ;; `Personal Name'. But they mapped it into G and S on outgoing real
|
|
482 ;; X.400 addresses. As they mapped G and S into PN on incoming..."
|
|
483 (purecopy
|
|
484 (format mail-extr-x400-encoded-address-field-pattern-format "[Pp][Nn]")))
|
|
485
|
|
486
|
|
487
|
|
488 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
489 ;;
|
|
490 ;; Syntax tables used for quick parsing.
|
|
491 ;;
|
|
492
|
|
493 (defconst mail-extr-address-syntax-table (make-syntax-table))
|
|
494 (defconst mail-extr-address-comment-syntax-table (make-syntax-table))
|
|
495 (defconst mail-extr-address-domain-literal-syntax-table (make-syntax-table))
|
|
496 (defconst mail-extr-address-text-comment-syntax-table (make-syntax-table))
|
|
497 (defconst mail-extr-address-text-syntax-table (make-syntax-table))
|
|
498 (mapcar
|
|
499 (function
|
|
500 (lambda (pair)
|
|
501 (let ((syntax-table (symbol-value (car pair))))
|
|
502 (mapcar
|
|
503 (function
|
|
504 (lambda (item)
|
|
505 (if (eq 2 (length item))
|
|
506 ;; modifying syntax of a single character
|
|
507 (modify-syntax-entry (car item) (car (cdr item)) syntax-table)
|
|
508 ;; modifying syntax of a range of characters
|
|
509 (let ((char (nth 0 item))
|
|
510 (bound (nth 1 item))
|
|
511 (syntax (nth 2 item)))
|
|
512 (while (<= char bound)
|
|
513 (modify-syntax-entry char syntax syntax-table)
|
|
514 (setq char (1+ char)))))))
|
|
515 (cdr pair)))))
|
|
516 '((mail-extr-address-syntax-table
|
|
517 (?\000 ?\037 "w") ;control characters
|
|
518 (?\040 " ") ;SPC
|
|
519 (?! ?~ "w") ;printable characters
|
|
520 (?\177 "w") ;DEL
|
|
521 (?\200 ?\377 "w") ;high-bit-on characters
|
|
522 (?\240 " ") ;nobreakspace
|
|
523 (?\t " ")
|
|
524 (?\r " ")
|
|
525 (?\n " ")
|
|
526 (?\( ".")
|
|
527 (?\) ".")
|
|
528 (?< ".")
|
|
529 (?> ".")
|
|
530 (?@ ".")
|
|
531 (?, ".")
|
|
532 (?\; ".")
|
|
533 (?: ".")
|
|
534 (?\\ "\\")
|
|
535 (?\" "\"")
|
|
536 (?. ".")
|
|
537 (?\[ ".")
|
|
538 (?\] ".")
|
|
539 ;; % and ! aren't RFC822 characters, but it is convenient to pretend
|
|
540 (?% ".")
|
|
541 (?! ".") ;; this needs to be word-constituent when not in .UUCP mode
|
|
542 )
|
|
543 (mail-extr-address-comment-syntax-table
|
|
544 (?\000 ?\377 "w")
|
|
545 (?\040 " ")
|
|
546 (?\240 " ")
|
|
547 (?\t " ")
|
|
548 (?\r " ")
|
|
549 (?\n " ")
|
|
550 (?\( "\(\)")
|
|
551 (?\) "\)\(")
|
|
552 (?\\ "\\"))
|
|
553 (mail-extr-address-domain-literal-syntax-table
|
|
554 (?\000 ?\377 "w")
|
|
555 (?\040 " ")
|
|
556 (?\240 " ")
|
|
557 (?\t " ")
|
|
558 (?\r " ")
|
|
559 (?\n " ")
|
|
560 (?\[ "\(\]") ;??????
|
|
561 (?\] "\)\[") ;??????
|
|
562 (?\\ "\\"))
|
|
563 (mail-extr-address-text-comment-syntax-table
|
|
564 (?\000 ?\377 "w")
|
|
565 (?\040 " ")
|
|
566 (?\240 " ")
|
|
567 (?\t " ")
|
|
568 (?\r " ")
|
|
569 (?\n " ")
|
|
570 (?\( "\(\)")
|
|
571 (?\) "\)\(")
|
|
572 (?\[ "\(\]")
|
|
573 (?\] "\)\[")
|
|
574 (?\{ "\(\}")
|
|
575 (?\} "\)\{")
|
|
576 (?\\ "\\")
|
|
577 (?\" "\"")
|
|
578 ;; (?\' "\)\`")
|
|
579 ;; (?\` "\(\'")
|
|
580 )
|
|
581 (mail-extr-address-text-syntax-table
|
|
582 (?\000 ?\177 ".")
|
|
583 (?\200 ?\377 "w")
|
|
584 (?\040 " ")
|
|
585 (?\t " ")
|
|
586 (?\r " ")
|
|
587 (?\n " ")
|
|
588 (?A ?Z "w")
|
|
589 (?a ?z "w")
|
|
590 (?- "w")
|
|
591 (?\} "w")
|
|
592 (?\{ "w")
|
|
593 (?| "w")
|
|
594 (?\' "w")
|
|
595 (?~ "w")
|
|
596 (?0 ?9 "w"))
|
|
597 ))
|
|
598
|
|
599
|
|
600 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
601 ;;
|
|
602 ;; Utility functions and macros.
|
|
603 ;;
|
|
604
|
|
605 (defmacro mail-extr-delete-char (n)
|
|
606 ;; in v19, delete-char is compiled as a function call, but delete-region
|
|
607 ;; is byte-coded, so it's much much faster.
|
|
608 (list 'delete-region '(point) (list '+ '(point) n)))
|
|
609
|
|
610 (defmacro mail-extr-skip-whitespace-forward ()
|
|
611 ;; v19 fn skip-syntax-forward is more tasteful, but not byte-coded.
|
|
612 '(skip-chars-forward " \t\n\r\240"))
|
|
613
|
|
614 (defmacro mail-extr-skip-whitespace-backward ()
|
|
615 ;; v19 fn skip-syntax-backward is more tasteful, but not byte-coded.
|
|
616 '(skip-chars-backward " \t\n\r\240"))
|
|
617
|
|
618
|
|
619 (defmacro mail-extr-undo-backslash-quoting (beg end)
|
|
620 (`(save-excursion
|
|
621 (save-restriction
|
|
622 (narrow-to-region (, beg) (, end))
|
|
623 (goto-char (point-min))
|
|
624 ;; undo \ quoting
|
|
625 (while (search-forward "\\" nil t)
|
|
626 (mail-extr-delete-char -1)
|
|
627 (or (eobp)
|
|
628 (forward-char 1))
|
|
629 )))))
|
|
630
|
|
631 (defmacro mail-extr-nuke-char-at (pos)
|
|
632 (` (save-excursion
|
|
633 (goto-char (, pos))
|
|
634 (mail-extr-delete-char 1)
|
|
635 (insert ?\ ))))
|
|
636
|
|
637 (put 'mail-extr-nuke-outside-range
|
|
638 'edebug-form-spec '(symbolp &optional form form atom))
|
|
639
|
|
640 (defmacro mail-extr-nuke-outside-range (list-symbol
|
|
641 beg-symbol end-symbol
|
|
642 &optional no-replace)
|
|
643 ;; LIST-SYMBOL names a variable holding a list of buffer positions
|
|
644 ;; BEG-SYMBOL and END-SYMBOL name variables delimiting a range
|
|
645 ;; Each element of LIST-SYMBOL which lies outside of the range is
|
|
646 ;; deleted from the list.
|
|
647 ;; Unless NO-REPLACE is true, at each of the positions in LIST-SYMBOL
|
|
648 ;; which lie outside of the range, one character at that position is
|
|
649 ;; replaced with a SPC.
|
|
650 (or (memq no-replace '(t nil))
|
|
651 (error "no-replace must be t or nil, evalable at macroexpand-time."))
|
|
652 (` (let ((temp (, list-symbol))
|
|
653 ch)
|
|
654 (while temp
|
|
655 (setq ch (car temp))
|
|
656 (cond ((or (> ch (, end-symbol))
|
|
657 (< ch (, beg-symbol)))
|
|
658 (,@ (if no-replace
|
|
659 nil
|
|
660 (` ((mail-extr-nuke-char-at ch)))))
|
|
661 (setcar temp nil)))
|
|
662 (setq temp (cdr temp)))
|
|
663 (setq (, list-symbol) (delq nil (, list-symbol))))))
|
|
664
|
|
665 (defun mail-extr-demarkerize (marker)
|
|
666 ;; if arg is a marker, destroys the marker, then returns the old value.
|
|
667 ;; otherwise returns the arg.
|
|
668 (if (markerp marker)
|
|
669 (let ((temp (marker-position marker)))
|
|
670 (set-marker marker nil)
|
|
671 temp)
|
|
672 marker))
|
|
673
|
|
674 (defun mail-extr-markerize (pos)
|
|
675 ;; coerces pos to a marker if non-nil.
|
|
676 (if (or (markerp pos) (null pos))
|
|
677 pos
|
|
678 (copy-marker pos)))
|
|
679
|
|
680 (defmacro mail-extr-last (list)
|
|
681 ;; Returns last element of LIST.
|
|
682 ;; Could be a subst.
|
|
683 (` (let ((list (, list)))
|
|
684 (while (not (null (cdr list)))
|
|
685 (setq list (cdr list)))
|
|
686 (car list))))
|
|
687
|
|
688 (defmacro mail-extr-safe-move-sexp (arg)
|
|
689 ;; Safely skip over one balanced sexp, if there is one. Return t if success.
|
|
690 (` (condition-case error
|
|
691 (progn
|
|
692 (goto-char (scan-sexps (point) (, arg)))
|
|
693 t)
|
|
694 (error
|
|
695 ;; #### kludge kludge kludge kludge kludge kludge kludge !!!
|
|
696 (if (string-equal (nth 1 error) "Unbalanced parentheses")
|
|
697 nil
|
|
698 (while t
|
|
699 (signal (car error) (cdr error))))))))
|
|
700
|
|
701 (or (fboundp 'buffer-disable-undo) ;; v18 compat
|
|
702 (fset 'buffer-disable-undo 'buffer-flush-undo))
|
|
703
|
|
704
|
|
705 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
706 ;;
|
|
707 ;; The main function to grind addresses
|
|
708 ;;
|
|
709
|
|
710 (defvar disable-initial-guessing-flag) ; dynamic assignment
|
|
711 (defvar cbeg) ; dynamic assignment
|
|
712 (defvar cend) ; dynamic assignment
|
|
713
|
|
714 ;;;###autoload
|
|
715 (defun mail-extract-address-components (address)
|
|
716 "Given an RFC-822 ADDRESS, extract full name and canonical address.
|
|
717 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
|
|
718 If no name can be extracted, FULL-NAME will be nil.
|
|
719 ADDRESS may be a string or a buffer. If it is a buffer, the visible
|
|
720 (narrowed) portion of the buffer will be interpreted as the address.
|
|
721 (This feature exists so that the clever caller might be able to avoid
|
|
722 consing a string.)
|
|
723 If ADDRESS contains more than one RFC-822 address, only the first is
|
|
724 returned. Some day this function may be extended to extract multiple
|
|
725 addresses, or perhaps return the position at which parsing stopped."
|
|
726 (let ((canonicalization-buffer (get-buffer-create " *canonical address*"))
|
|
727 (extraction-buffer (get-buffer-create " *extract address components*"))
|
|
728 char
|
|
729 ;; multiple-addresses
|
|
730 <-pos >-pos @-pos colon-pos comma-pos !-pos %-pos \;-pos
|
|
731 group-colon-pos group-\;-pos route-addr-colon-pos
|
|
732 record-pos-symbol
|
|
733 first-real-pos last-real-pos
|
|
734 phrase-beg phrase-end
|
|
735 cbeg cend ; dynamically set from -voodoo
|
|
736 quote-beg quote-end
|
|
737 atom-beg atom-end
|
|
738 mbox-beg mbox-end
|
|
739 \.-ends-name
|
|
740 temp
|
|
741 ;; name-suffix
|
|
742 fi mi li ; first, middle, last initial
|
|
743 saved-%-pos saved-!-pos saved-@-pos
|
|
744 domain-pos \.-pos insert-point
|
|
745 ;; mailbox-name-processed-flag
|
|
746 disable-initial-guessing-flag ; dynamically set from -voodoo
|
|
747 )
|
|
748
|
|
749 (save-excursion
|
|
750 (set-buffer extraction-buffer)
|
|
751 (fundamental-mode)
|
|
752 (kill-all-local-variables)
|
|
753 (buffer-disable-undo extraction-buffer)
|
|
754 (set-syntax-table mail-extr-address-syntax-table)
|
|
755 (widen)
|
|
756 (erase-buffer)
|
|
757 (setq case-fold-search nil)
|
|
758
|
|
759 ;; Insert extra space at beginning to allow later replacement with <
|
|
760 ;; without having to move markers.
|
|
761 (insert ?\ )
|
|
762
|
|
763 ;; Insert the address itself.
|
|
764 (cond ((stringp address)
|
|
765 (insert address))
|
|
766 ((bufferp address)
|
|
767 (insert-buffer-substring address))
|
|
768 (t
|
|
769 (error "Illegal address: %s" address)))
|
|
770
|
|
771 ;; stolen from rfc822.el
|
|
772 ;; Unfold multiple lines.
|
|
773 (goto-char (point-min))
|
|
774 (while (re-search-forward "\\([^\\]\\(\\\\\\\\\\)*\\)\n[ \t]" nil t)
|
|
775 (replace-match "\\1 " t))
|
|
776
|
|
777 ;; first pass grabs useful information about address
|
|
778 (goto-char (point-min))
|
|
779 (while (progn
|
|
780 (mail-extr-skip-whitespace-forward)
|
|
781 (not (eobp)))
|
|
782 (setq char (char-after (point)))
|
|
783 (or first-real-pos
|
|
784 (if (not (eq char ?\())
|
|
785 (setq first-real-pos (point))))
|
|
786 (cond
|
|
787 ;; comment
|
|
788 ((eq char ?\()
|
|
789 (set-syntax-table mail-extr-address-comment-syntax-table)
|
|
790 ;; only record the first non-empty comment's position
|
|
791 (if (and (not cbeg)
|
|
792 (save-excursion
|
|
793 (forward-char 1)
|
|
794 (mail-extr-skip-whitespace-forward)
|
|
795 (not (eq ?\) (char-after (point))))))
|
|
796 (setq cbeg (point)))
|
|
797 ;; TODO: don't record if unbalanced
|
|
798 (or (mail-extr-safe-move-sexp 1)
|
|
799 (forward-char 1))
|
|
800 (set-syntax-table mail-extr-address-syntax-table)
|
|
801 (if (and cbeg
|
|
802 (not cend))
|
|
803 (setq cend (point))))
|
|
804 ;; quoted text
|
|
805 ((eq char ?\")
|
|
806 ;; only record the first non-empty quote's position
|
|
807 (if (and (not quote-beg)
|
|
808 (save-excursion
|
|
809 (forward-char 1)
|
|
810 (mail-extr-skip-whitespace-forward)
|
|
811 (not (eq ?\" (char-after (point))))))
|
|
812 (setq quote-beg (point)))
|
|
813 ;; TODO: don't record if unbalanced
|
|
814 (or (mail-extr-safe-move-sexp 1)
|
|
815 (forward-char 1))
|
|
816 (if (and quote-beg
|
|
817 (not quote-end))
|
|
818 (setq quote-end (point))))
|
|
819 ;; domain literals
|
|
820 ((eq char ?\[)
|
|
821 (set-syntax-table mail-extr-address-domain-literal-syntax-table)
|
|
822 (or (mail-extr-safe-move-sexp 1)
|
|
823 (forward-char 1))
|
|
824 (set-syntax-table mail-extr-address-syntax-table))
|
|
825 ;; commas delimit addresses when outside < > pairs.
|
|
826 ((and (eq char ?,)
|
|
827 (or (and (null <-pos)
|
|
828 ;; Handle ROUTE-ADDR address that is missing its <.
|
|
829 (not (eq ?@ (char-after (1+ (point))))))
|
|
830 (and >-pos
|
|
831 ;; handle weird munged addresses
|
|
832 ;; BUG FIX: This test was reversed. Thanks to the
|
|
833 ;; brilliant Rod Whitby <rwhitby@research.canon.oz.au>
|
|
834 ;; for discovering this!
|
|
835 (< (mail-extr-last <-pos) (car >-pos)))))
|
|
836 ;; It'd be great if some day this worked, but for now, punt.
|
|
837 ;; (setq multiple-addresses t)
|
|
838 ;; ;; *** Why do I want this:
|
|
839 ;; (mail-extr-delete-char 1)
|
|
840 ;; (narrow-to-region (point-min) (point))
|
|
841 (delete-region (point) (point-max))
|
|
842 (setq char ?\() ; HAVE I NO SHAME??
|
|
843 )
|
|
844 ;; record the position of various interesting chars, determine
|
|
845 ;; legality later.
|
|
846 ((setq record-pos-symbol
|
|
847 (cdr (assq char
|
|
848 '((?< . <-pos) (?> . >-pos) (?@ . @-pos)
|
|
849 (?: . colon-pos) (?, . comma-pos) (?! . !-pos)
|
|
850 (?% . %-pos) (?\; . \;-pos)))))
|
|
851 (set record-pos-symbol
|
|
852 (cons (point) (symbol-value record-pos-symbol)))
|
|
853 (forward-char 1))
|
|
854 ((eq char ?.)
|
|
855 (forward-char 1))
|
|
856 ((memq char '(
|
|
857 ;; comment terminator illegal
|
|
858 ?\)
|
|
859 ;; domain literal terminator illegal
|
|
860 ?\]
|
|
861 ;; \ allowed only within quoted strings,
|
|
862 ;; domain literals, and comments
|
|
863 ?\\
|
|
864 ))
|
|
865 (mail-extr-nuke-char-at (point))
|
|
866 (forward-char 1))
|
|
867 (t
|
|
868 (forward-word 1)))
|
|
869 (or (eq char ?\()
|
|
870 ;; At the end of first address of a multiple address header.
|
|
871 (and (eq char ?,)
|
|
872 (eobp))
|
|
873 (setq last-real-pos (point))))
|
|
874
|
|
875 ;; Use only the leftmost <, if any. Replace all others with spaces.
|
|
876 (while (cdr <-pos)
|
|
877 (mail-extr-nuke-char-at (car <-pos))
|
|
878 (setq <-pos (cdr <-pos)))
|
|
879
|
|
880 ;; Use only the rightmost >, if any. Replace all others with spaces.
|
|
881 (while (cdr >-pos)
|
|
882 (mail-extr-nuke-char-at (nth 1 >-pos))
|
|
883 (setcdr >-pos (nthcdr 2 >-pos)))
|
|
884
|
|
885 ;; If multiple @s and a :, but no < and >, insert around buffer.
|
|
886 ;; Example: @foo.bar.dom,@xxx.yyy.zzz:mailbox@aaa.bbb.ccc
|
|
887 ;; This commonly happens on the UUCP "From " line. Ugh.
|
|
888 (cond ((and (> (length @-pos) 1)
|
|
889 (eq 1 (length colon-pos)) ;TODO: check if between last two @s
|
|
890 (not \;-pos)
|
|
891 (not <-pos))
|
|
892 (goto-char (point-min))
|
|
893 (mail-extr-delete-char 1)
|
|
894 (setq <-pos (list (point)))
|
|
895 (insert ?<)))
|
|
896
|
|
897 ;; If < but no >, insert > in rightmost possible position
|
|
898 (cond ((and <-pos
|
|
899 (null >-pos))
|
|
900 (goto-char (point-max))
|
|
901 (setq >-pos (list (point)))
|
|
902 (insert ?>)))
|
|
903
|
|
904 ;; If > but no <, replace > with space.
|
|
905 (cond ((and >-pos
|
|
906 (null <-pos))
|
|
907 (mail-extr-nuke-char-at (car >-pos))
|
|
908 (setq >-pos nil)))
|
|
909
|
|
910 ;; Turn >-pos and <-pos into non-lists
|
|
911 (setq >-pos (car >-pos)
|
|
912 <-pos (car <-pos))
|
|
913
|
|
914 ;; Trim other punctuation lists of items outside < > pair to handle
|
|
915 ;; stupid MTAs.
|
|
916 (cond (<-pos ; don't need to check >-pos also
|
|
917 ;; handle bozo software that violates RFC 822 by sticking
|
|
918 ;; punctuation marks outside of a < > pair
|
|
919 (mail-extr-nuke-outside-range @-pos <-pos >-pos t)
|
|
920 ;; RFC 822 says nothing about these two outside < >, but
|
|
921 ;; remove those positions from the lists to make things
|
|
922 ;; easier.
|
|
923 (mail-extr-nuke-outside-range !-pos <-pos >-pos t)
|
|
924 (mail-extr-nuke-outside-range %-pos <-pos >-pos t)))
|
|
925
|
|
926 ;; Check for : that indicates GROUP list and for : part of
|
|
927 ;; ROUTE-ADDR spec.
|
|
928 ;; Can't possibly be more than two :. Nuke any extra.
|
|
929 (while colon-pos
|
|
930 (setq temp (car colon-pos)
|
|
931 colon-pos (cdr colon-pos))
|
|
932 (cond ((and <-pos >-pos
|
|
933 (> temp <-pos)
|
|
934 (< temp >-pos))
|
|
935 (if (or route-addr-colon-pos
|
|
936 (< (length @-pos) 2)
|
|
937 (> temp (car @-pos))
|
|
938 (< temp (nth 1 @-pos)))
|
|
939 (mail-extr-nuke-char-at temp)
|
|
940 (setq route-addr-colon-pos temp)))
|
|
941 ((or (not <-pos)
|
|
942 (and <-pos
|
|
943 (< temp <-pos)))
|
|
944 (setq group-colon-pos temp))))
|
|
945
|
|
946 ;; Nuke any ; that is in or to the left of a < > pair or to the left
|
|
947 ;; of a GROUP starting :. Also, there may only be one ;.
|
|
948 (while \;-pos
|
|
949 (setq temp (car \;-pos)
|
|
950 \;-pos (cdr \;-pos))
|
|
951 (cond ((and <-pos >-pos
|
|
952 (> temp <-pos)
|
|
953 (< temp >-pos))
|
|
954 (mail-extr-nuke-char-at temp))
|
|
955 ((and (or (not group-colon-pos)
|
|
956 (> temp group-colon-pos))
|
|
957 (not group-\;-pos))
|
|
958 (setq group-\;-pos temp))))
|
|
959
|
|
960 ;; Nuke unmatched GROUP syntax characters.
|
|
961 (cond ((and group-colon-pos (not group-\;-pos))
|
|
962 ;; *** Do I really need to erase it?
|
|
963 (mail-extr-nuke-char-at group-colon-pos)
|
|
964 (setq group-colon-pos nil)))
|
|
965 (cond ((and group-\;-pos (not group-colon-pos))
|
|
966 ;; *** Do I really need to erase it?
|
|
967 (mail-extr-nuke-char-at group-\;-pos)
|
|
968 (setq group-\;-pos nil)))
|
|
969
|
|
970 ;; Handle junk like ";@host.company.dom" that sendmail adds.
|
|
971 ;; **** should I remember comment positions?
|
|
972 (cond
|
|
973 (group-\;-pos
|
|
974 ;; this is fine for now
|
|
975 (mail-extr-nuke-outside-range !-pos group-colon-pos group-\;-pos t)
|
|
976 (mail-extr-nuke-outside-range @-pos group-colon-pos group-\;-pos t)
|
|
977 (mail-extr-nuke-outside-range %-pos group-colon-pos group-\;-pos t)
|
|
978 (mail-extr-nuke-outside-range comma-pos group-colon-pos group-\;-pos t)
|
|
979 (and last-real-pos
|
|
980 (> last-real-pos (1+ group-\;-pos))
|
|
981 (setq last-real-pos (1+ group-\;-pos)))
|
|
982 ;; *** This may be wrong:
|
|
983 (and cend
|
|
984 (> cend group-\;-pos)
|
|
985 (setq cend nil
|
|
986 cbeg nil))
|
|
987 (and quote-end
|
|
988 (> quote-end group-\;-pos)
|
|
989 (setq quote-end nil
|
|
990 quote-beg nil))
|
|
991 ;; This was both wrong and unnecessary:
|
|
992 ;;(narrow-to-region (point-min) group-\;-pos)
|
|
993
|
|
994 ;; *** The entire handling of GROUP addresses seems rather lame.
|
|
995 ;; *** It deserves a complete rethink, except that these addresses
|
|
996 ;; *** are hardly ever seen.
|
|
997 ))
|
|
998
|
|
999 ;; Any commas must be between < and : of ROUTE-ADDR. Nuke any
|
|
1000 ;; others.
|
|
1001 ;; Hell, go ahead an nuke all of the commas.
|
|
1002 ;; **** This will cause problems when we start handling commas in
|
|
1003 ;; the PHRASE part .... no it won't ... yes it will ... ?????
|
|
1004 (mail-extr-nuke-outside-range comma-pos 1 1)
|
|
1005
|
|
1006 ;; can only have multiple @s inside < >. The fact that some MTAs
|
|
1007 ;; put de-bracketed ROUTE-ADDRs in the UUCP-style "From " line is
|
|
1008 ;; handled above.
|
|
1009
|
|
1010 ;; Locate PHRASE part of ROUTE-ADDR.
|
|
1011 (cond (<-pos
|
|
1012 (goto-char <-pos)
|
|
1013 (mail-extr-skip-whitespace-backward)
|
|
1014 (setq phrase-end (point))
|
|
1015 (goto-char (or ;;group-colon-pos
|
|
1016 (point-min)))
|
|
1017 (mail-extr-skip-whitespace-forward)
|
|
1018 (if (< (point) phrase-end)
|
|
1019 (setq phrase-beg (point))
|
|
1020 (setq phrase-end nil))))
|
|
1021
|
|
1022 ;; handle ROUTE-ADDRS with real ROUTEs.
|
|
1023 ;; If there are multiple @s, then we assume ROUTE-ADDR syntax, and
|
|
1024 ;; any % or ! must be semantically meaningless.
|
|
1025 ;; TODO: do this processing into canonicalization buffer
|
|
1026 (cond (route-addr-colon-pos
|
|
1027 (setq !-pos nil
|
|
1028 %-pos nil
|
|
1029 >-pos (copy-marker >-pos)
|
|
1030 route-addr-colon-pos (copy-marker route-addr-colon-pos))
|
|
1031 (goto-char >-pos)
|
|
1032 (insert-before-markers ?X)
|
|
1033 (goto-char (car @-pos))
|
|
1034 (while (setq @-pos (cdr @-pos))
|
|
1035 (mail-extr-delete-char 1)
|
|
1036 (setq %-pos (cons (point-marker) %-pos))
|
|
1037 (insert "%")
|
|
1038 (goto-char (1- >-pos))
|
|
1039 (save-excursion
|
|
1040 (insert-buffer-substring extraction-buffer
|
|
1041 (car @-pos) route-addr-colon-pos)
|
|
1042 (delete-region (car @-pos) route-addr-colon-pos))
|
|
1043 (or (cdr @-pos)
|
|
1044 (setq saved-@-pos (list (point)))))
|
|
1045 (setq @-pos saved-@-pos)
|
|
1046 (goto-char >-pos)
|
|
1047 (mail-extr-delete-char -1)
|
|
1048 (mail-extr-nuke-char-at route-addr-colon-pos)
|
|
1049 (mail-extr-demarkerize route-addr-colon-pos)
|
|
1050 (setq route-addr-colon-pos nil
|
|
1051 >-pos (mail-extr-demarkerize >-pos)
|
|
1052 %-pos (mapcar 'mail-extr-demarkerize %-pos))))
|
|
1053
|
|
1054 ;; de-listify @-pos
|
|
1055 (setq @-pos (car @-pos))
|
|
1056
|
|
1057 ;; TODO: remove comments in the middle of an address
|
|
1058
|
|
1059 (set-buffer canonicalization-buffer)
|
|
1060 (fundamental-mode)
|
|
1061 (kill-all-local-variables)
|
|
1062 (buffer-disable-undo canonicalization-buffer)
|
|
1063 (set-syntax-table mail-extr-address-syntax-table)
|
|
1064 (setq case-fold-search nil)
|
|
1065
|
|
1066 (widen)
|
|
1067 (erase-buffer)
|
|
1068 (insert-buffer-substring extraction-buffer)
|
|
1069
|
|
1070 (if <-pos
|
|
1071 (narrow-to-region (progn
|
|
1072 (goto-char (1+ <-pos))
|
|
1073 (mail-extr-skip-whitespace-forward)
|
|
1074 (point))
|
|
1075 >-pos)
|
|
1076 (if (and first-real-pos last-real-pos)
|
|
1077 (narrow-to-region first-real-pos last-real-pos)
|
|
1078 ;; ****** Oh no! What if the address is completely empty!
|
|
1079 ;; *** Is this correct?
|
|
1080 (narrow-to-region (point-max) (point-max))
|
|
1081 ))
|
|
1082
|
|
1083 (and @-pos %-pos
|
|
1084 (mail-extr-nuke-outside-range %-pos (point-min) @-pos))
|
|
1085 (and %-pos !-pos
|
|
1086 (mail-extr-nuke-outside-range !-pos (point-min) (car %-pos)))
|
|
1087 (and @-pos !-pos (not %-pos)
|
|
1088 (mail-extr-nuke-outside-range !-pos (point-min) @-pos))
|
|
1089
|
|
1090 ;; Error condition:?? (and %-pos (not @-pos))
|
|
1091
|
|
1092 ;; WARNING: THIS CODE IS DUPLICATED BELOW.
|
|
1093 (cond ((and %-pos
|
|
1094 (not @-pos))
|
|
1095 (goto-char (car %-pos))
|
|
1096 (mail-extr-delete-char 1)
|
|
1097 (setq @-pos (point))
|
|
1098 (insert "@")
|
|
1099 (setq %-pos (cdr %-pos))))
|
|
1100
|
|
1101 (if mail-extr-mangle-uucp
|
|
1102 (cond (!-pos
|
|
1103 ;; **** I don't understand this save-restriction and the
|
|
1104 ;; narrow-to-region inside it. Why did I do that?
|
|
1105 (save-restriction
|
|
1106 (cond ((and @-pos
|
|
1107 mail-extr-@-binds-tighter-than-!)
|
|
1108 (goto-char @-pos)
|
|
1109 (setq %-pos (cons (point) %-pos)
|
|
1110 @-pos nil)
|
|
1111 (mail-extr-delete-char 1)
|
|
1112 (insert "%")
|
|
1113 (setq insert-point (point-max)))
|
|
1114 (mail-extr-@-binds-tighter-than-!
|
|
1115 (setq insert-point (point-max)))
|
|
1116 (%-pos
|
|
1117 (setq insert-point (mail-extr-last %-pos)
|
|
1118 saved-%-pos (mapcar 'mail-extr-markerize %-pos)
|
|
1119 %-pos nil
|
|
1120 @-pos (mail-extr-markerize @-pos)))
|
|
1121 (@-pos
|
|
1122 (setq insert-point @-pos)
|
|
1123 (setq @-pos (mail-extr-markerize @-pos)))
|
|
1124 (t
|
|
1125 (setq insert-point (point-max))))
|
|
1126 (narrow-to-region (point-min) insert-point)
|
|
1127 (setq saved-!-pos (car !-pos))
|
|
1128 (while !-pos
|
|
1129 (goto-char (point-max))
|
|
1130 (cond ((and (not @-pos)
|
|
1131 (not (cdr !-pos)))
|
|
1132 (setq @-pos (point))
|
|
1133 (insert-before-markers "@ "))
|
|
1134 (t
|
|
1135 (setq %-pos (cons (point) %-pos))
|
|
1136 (insert-before-markers "% ")))
|
|
1137 (backward-char 1)
|
|
1138 (insert-buffer-substring
|
|
1139 (current-buffer)
|
|
1140 (if (nth 1 !-pos)
|
|
1141 (1+ (nth 1 !-pos))
|
|
1142 (point-min))
|
|
1143 (car !-pos))
|
|
1144 (mail-extr-delete-char 1)
|
|
1145 (or (save-excursion
|
|
1146 (mail-extr-safe-move-sexp -1)
|
|
1147 (mail-extr-skip-whitespace-backward)
|
|
1148 (eq ?. (preceding-char)))
|
|
1149 (insert-before-markers
|
|
1150 (if (save-excursion
|
|
1151 (mail-extr-skip-whitespace-backward)
|
|
1152 (eq ?. (preceding-char)))
|
|
1153 ""
|
|
1154 ".")
|
|
1155 "uucp"))
|
|
1156 (setq !-pos (cdr !-pos))))
|
|
1157 (and saved-%-pos
|
|
1158 (setq %-pos (append (mapcar 'mail-extr-demarkerize
|
|
1159 saved-%-pos)
|
|
1160 %-pos)))
|
|
1161 (setq @-pos (mail-extr-demarkerize @-pos))
|
|
1162 (narrow-to-region (1+ saved-!-pos) (point-max)))))
|
|
1163
|
|
1164 ;; WARNING: THIS CODE IS DUPLICATED ABOVE.
|
|
1165 (cond ((and %-pos
|
|
1166 (not @-pos))
|
|
1167 (goto-char (car %-pos))
|
|
1168 (mail-extr-delete-char 1)
|
|
1169 (setq @-pos (point))
|
|
1170 (insert "@")
|
|
1171 (setq %-pos (cdr %-pos))))
|
|
1172
|
|
1173 (setq %-pos (nreverse %-pos))
|
|
1174 ;; RFC 1034 doesn't approve of this, oh well:
|
6
|
1175 ;; Neither do we, sb/lmi
|
|
1176 ;; (downcase-region (or (car %-pos) @-pos (point-max)) (point-max))
|
0
|
1177 (cond (%-pos ; implies @-pos valid
|
|
1178 (setq temp %-pos)
|
|
1179 (catch 'truncated
|
|
1180 (while temp
|
|
1181 (goto-char (or (nth 1 temp)
|
|
1182 @-pos))
|
|
1183 (mail-extr-skip-whitespace-backward)
|
|
1184 (save-excursion
|
|
1185 (mail-extr-safe-move-sexp -1)
|
|
1186 (setq domain-pos (point))
|
|
1187 (mail-extr-skip-whitespace-backward)
|
|
1188 (setq \.-pos (eq ?. (preceding-char))))
|
|
1189 (cond ((and \.-pos
|
|
1190 ;; #### string consing
|
|
1191 (let ((s (intern-soft
|
|
1192 (buffer-substring domain-pos (point))
|
|
1193 all-top-level-domains)))
|
|
1194 (and s (get s 'domain-name))))
|
|
1195 (narrow-to-region (point-min) (point))
|
|
1196 (goto-char (car temp))
|
|
1197 (mail-extr-delete-char 1)
|
|
1198 (setq @-pos (point))
|
|
1199 (setcdr temp nil)
|
|
1200 (setq %-pos (delq @-pos %-pos))
|
|
1201 (insert "@")
|
|
1202 (throw 'truncated t)))
|
|
1203 (setq temp (cdr temp))))))
|
|
1204 (setq mbox-beg (point-min)
|
|
1205 mbox-end (if %-pos (car %-pos)
|
|
1206 (or @-pos
|
|
1207 (point-max))))
|
|
1208
|
|
1209 ;; Done canonicalizing address.
|
|
1210
|
|
1211 (set-buffer extraction-buffer)
|
|
1212
|
|
1213 ;; Decide what part of the address to search to find the full name.
|
|
1214 (cond (
|
|
1215 ;; Example: "First M. Last" <fml@foo.bar.dom>
|
|
1216 (and phrase-beg
|
|
1217 (eq quote-beg phrase-beg)
|
|
1218 (<= quote-end phrase-end))
|
|
1219 (narrow-to-region (1+ quote-beg) (1- quote-end))
|
|
1220 (mail-extr-undo-backslash-quoting (point-min) (point-max)))
|
|
1221
|
|
1222 ;; Example: First Last <fml@foo.bar.dom>
|
|
1223 (phrase-beg
|
|
1224 (narrow-to-region phrase-beg phrase-end))
|
|
1225
|
|
1226 ;; Example: fml@foo.bar.dom (First M. Last)
|
|
1227 (cbeg
|
|
1228 (narrow-to-region (1+ cbeg) (1- cend))
|
|
1229 (mail-extr-undo-backslash-quoting (point-min) (point-max))
|
|
1230
|
|
1231 ;; Deal with spacing problems
|
|
1232 (goto-char (point-min))
|
|
1233 ; (cond ((not (search-forward " " nil t))
|
|
1234 ; (goto-char (point-min))
|
|
1235 ; (cond ((search-forward "_" nil t)
|
|
1236 ; ;; Handle the *idiotic* use of underlines as spaces.
|
|
1237 ; ;; Example: fml@foo.bar.dom (First_M._Last)
|
|
1238 ; (goto-char (point-min))
|
|
1239 ; (while (search-forward "_" nil t)
|
|
1240 ; (replace-match " " t)))
|
|
1241 ; ((search-forward "." nil t)
|
|
1242 ; ;; Fix . used as space
|
|
1243 ; ;; Example: danj1@cb.att.com (daniel.jacobson)
|
|
1244 ; (goto-char (point-min))
|
|
1245 ; (while (re-search-forward mail-extr-bad-dot-pattern nil t)
|
|
1246 ; (replace-match "\\1 \\2" t))))))
|
|
1247 )
|
|
1248
|
|
1249 ;; Otherwise we try to get the name from the mailbox portion
|
|
1250 ;; of the address.
|
|
1251 ;; Example: First_M_Last@foo.bar.dom
|
|
1252 (t
|
|
1253 ;; *** Work in canon buffer instead? No, can't. Hmm.
|
|
1254 (goto-char (point-max))
|
|
1255 (narrow-to-region (point) (point))
|
|
1256 (insert-buffer-substring canonicalization-buffer
|
|
1257 mbox-beg mbox-end)
|
|
1258 (goto-char (point-min))
|
|
1259
|
|
1260 ;; Example: First_Last.XXX@foo.bar.dom
|
|
1261 (setq \.-ends-name (re-search-forward "[_0-9]" nil t))
|
|
1262
|
|
1263 (goto-char (point-min))
|
|
1264
|
|
1265 (if (not mail-extr-mangle-uucp)
|
|
1266 (modify-syntax-entry ?! "w" (syntax-table)))
|
|
1267
|
|
1268 (while (progn
|
|
1269 (mail-extr-skip-whitespace-forward)
|
|
1270 (not (eobp)))
|
|
1271 (setq char (char-after (point)))
|
|
1272 (cond
|
|
1273 ((eq char ?\")
|
|
1274 (setq quote-beg (point))
|
|
1275 (or (mail-extr-safe-move-sexp 1)
|
|
1276 ;; TODO: handle this error condition!!!!!
|
|
1277 (forward-char 1))
|
|
1278 ;; take into account deletions
|
|
1279 (setq quote-end (- (point) 2))
|
|
1280 (save-excursion
|
|
1281 (backward-char 1)
|
|
1282 (mail-extr-delete-char 1)
|
|
1283 (goto-char quote-beg)
|
|
1284 (mail-extr-delete-char 1))
|
|
1285 (mail-extr-undo-backslash-quoting quote-beg quote-end)
|
|
1286 (or (eq ?\ (char-after (point)))
|
|
1287 (insert " "))
|
|
1288 ;; (setq mailbox-name-processed-flag t)
|
|
1289 (setq \.-ends-name t))
|
|
1290 ((eq char ?.)
|
|
1291 (if (memq (char-after (1+ (point))) '(?_ ?=))
|
|
1292 (progn
|
|
1293 (forward-char 1)
|
|
1294 (mail-extr-delete-char 1)
|
|
1295 (insert ?\ ))
|
|
1296 (if \.-ends-name
|
|
1297 (narrow-to-region (point-min) (point))
|
|
1298 (mail-extr-delete-char 1)
|
|
1299 (insert " ")))
|
|
1300 ;; (setq mailbox-name-processed-flag t)
|
|
1301 )
|
|
1302 ((memq (char-syntax char) '(?. ?\\))
|
|
1303 (mail-extr-delete-char 1)
|
|
1304 (insert " ")
|
|
1305 ;; (setq mailbox-name-processed-flag t)
|
|
1306 )
|
|
1307 (t
|
|
1308 (setq atom-beg (point))
|
|
1309 (forward-word 1)
|
|
1310 (setq atom-end (point))
|
|
1311 (goto-char atom-beg)
|
|
1312 (save-restriction
|
|
1313 (narrow-to-region atom-beg atom-end)
|
|
1314 (cond
|
|
1315
|
|
1316 ;; Handle X.400 addresses encoded in RFC-822.
|
|
1317 ;; *** Shit! This has to handle the case where it is
|
|
1318 ;; *** embedded in a quote too!
|
|
1319 ;; *** Shit! The input is being broken up into atoms
|
|
1320 ;; *** by periods!
|
|
1321 ((looking-at mail-extr-x400-encoded-address-pattern)
|
|
1322
|
|
1323 ;; Copy the contents of the individual fields that
|
|
1324 ;; might hold name data to the beginning.
|
|
1325 (mapcar
|
|
1326 (function
|
|
1327 (lambda (field-pattern)
|
|
1328 (cond
|
|
1329 ((save-excursion
|
|
1330 (re-search-forward field-pattern nil t))
|
|
1331 (insert-buffer-substring (current-buffer)
|
|
1332 (match-beginning 1)
|
|
1333 (match-end 1))
|
|
1334 (insert " ")))))
|
|
1335 (list mail-extr-x400-encoded-address-given-name-pattern
|
|
1336 mail-extr-x400-encoded-address-surname-pattern
|
|
1337 mail-extr-x400-encoded-address-full-name-pattern))
|
|
1338
|
|
1339 ;; Discard the rest, since it contains stuff like
|
|
1340 ;; routing information, not part of a name.
|
|
1341 (mail-extr-skip-whitespace-backward)
|
|
1342 (delete-region (point) (point-max))
|
|
1343
|
|
1344 ;; Handle periods used for spacing.
|
|
1345 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
|
|
1346 (replace-match "\\1 \\2" t))
|
|
1347
|
|
1348 ;; (setq mailbox-name-processed-flag t)
|
|
1349 )
|
|
1350
|
|
1351 ;; Handle normal addresses.
|
|
1352 (t
|
|
1353 (goto-char (point-min))
|
|
1354 ;; Handle _ and = used for spacing.
|
|
1355 (while (re-search-forward "\\([^_=]+\\)[_=]" nil t)
|
|
1356 (replace-match "\\1 " t)
|
|
1357 ;; (setq mailbox-name-processed-flag t)
|
|
1358 )
|
|
1359 (goto-char (point-max))))))))
|
|
1360
|
|
1361 ;; undo the dirty deed
|
|
1362 (if (not mail-extr-mangle-uucp)
|
|
1363 (modify-syntax-entry ?! "." (syntax-table)))
|
|
1364 ;;
|
|
1365 ;; If we derived the name from the mailbox part of the address,
|
|
1366 ;; and we only got one word out of it, don't treat that as a
|
|
1367 ;; name. "foo@bar" --> (nil "foo@bar"), not ("foo" "foo@bar")
|
|
1368 ;; (if (not mailbox-name-processed-flag)
|
|
1369 ;; (delete-region (point-min) (point-max)))
|
|
1370 ))
|
|
1371
|
|
1372 (set-syntax-table mail-extr-address-text-syntax-table)
|
|
1373
|
|
1374 (mail-extr-voodoo mbox-beg mbox-end canonicalization-buffer)
|
|
1375 (goto-char (point-min))
|
|
1376
|
|
1377 ;; If name is "First Last" and userid is "F?L", then assume
|
|
1378 ;; the middle initial is the second letter in the userid.
|
|
1379 ;; Initial code by Jamie Zawinski <jwz@netscape.com>
|
|
1380 ;; *** Make it work when there's a suffix as well.
|
|
1381 (goto-char (point-min))
|
|
1382 (cond ((and mail-extr-guess-middle-initial
|
|
1383 (not disable-initial-guessing-flag)
|
|
1384 (eq 3 (- mbox-end mbox-beg))
|
|
1385 (progn
|
|
1386 (goto-char (point-min))
|
|
1387 (looking-at mail-extr-two-name-pattern)))
|
|
1388 (setq fi (char-after (match-beginning 0))
|
|
1389 li (char-after (match-beginning 3)))
|
|
1390 (save-excursion
|
|
1391 (set-buffer canonicalization-buffer)
|
|
1392 ;; char-equal is ignoring case here, so no need to upcase
|
|
1393 ;; or downcase.
|
|
1394 (let ((case-fold-search t))
|
|
1395 (and (char-equal fi (char-after mbox-beg))
|
|
1396 (char-equal li (char-after (1- mbox-end)))
|
|
1397 (setq mi (char-after (1+ mbox-beg))))))
|
|
1398 (cond ((and mi
|
|
1399 ;; TODO: use better table than syntax table
|
|
1400 (eq ?w (char-syntax mi)))
|
|
1401 (goto-char (match-beginning 3))
|
|
1402 (insert (upcase mi) ". ")))))
|
|
1403
|
|
1404 ;; Nuke name if it is the same as mailbox name.
|
|
1405 (let ((buffer-length (- (point-max) (point-min)))
|
|
1406 (i 0)
|
|
1407 (names-match-flag t))
|
|
1408 (cond ((and (> buffer-length 0)
|
|
1409 (eq buffer-length (- mbox-end mbox-beg)))
|
|
1410 (goto-char (point-max))
|
|
1411 (insert-buffer-substring canonicalization-buffer
|
|
1412 mbox-beg mbox-end)
|
|
1413 (while (and names-match-flag
|
|
1414 (< i buffer-length))
|
|
1415 (or (eq (downcase (char-after (+ i (point-min))))
|
|
1416 (downcase
|
|
1417 (char-after (+ i buffer-length (point-min)))))
|
|
1418 (setq names-match-flag nil))
|
|
1419 (setq i (1+ i)))
|
|
1420 (delete-region (+ (point-min) buffer-length) (point-max))
|
|
1421 (if names-match-flag
|
|
1422 (narrow-to-region (point) (point))))))
|
|
1423
|
|
1424 ;; Nuke name if it's just one word.
|
|
1425 (goto-char (point-min))
|
|
1426 (and mail-extr-ignore-single-names
|
|
1427 (not (re-search-forward "[- ]" nil t))
|
|
1428 (narrow-to-region (point) (point)))
|
|
1429
|
|
1430 ;; Result
|
|
1431 (list (if (not (= (point-min) (point-max)))
|
|
1432 (buffer-string))
|
|
1433 (progn
|
|
1434 (set-buffer canonicalization-buffer)
|
|
1435 (if (not (= (point-min) (point-max)))
|
|
1436 (buffer-string))))
|
|
1437 )))
|
|
1438
|
|
1439 (defun mail-extr-voodoo (mbox-beg mbox-end canonicalization-buffer)
|
|
1440 (let ((word-count 0)
|
|
1441 (case-fold-search nil)
|
|
1442 mixed-case-flag lower-case-flag ;;upper-case-flag
|
|
1443 suffix-flag last-name-comma-flag
|
|
1444 ;;cbeg cend
|
|
1445 initial
|
|
1446 begin-again-flag
|
|
1447 drop-this-word-if-trailing-flag
|
|
1448 drop-last-word-if-trailing-flag
|
|
1449 word-found-flag
|
|
1450 this-word-beg last-word-beg
|
|
1451 name-beg name-end
|
|
1452 name-done-flag
|
|
1453 )
|
|
1454 (save-excursion
|
|
1455 (set-syntax-table mail-extr-address-text-syntax-table)
|
|
1456
|
|
1457 ;; This was moved above.
|
|
1458 ;; Fix . used as space
|
|
1459 ;; But it belongs here because it occurs not only as
|
|
1460 ;; rypens@reks.uia.ac.be (Piet.Rypens)
|
|
1461 ;; but also as
|
|
1462 ;; "Piet.Rypens" <rypens@reks.uia.ac.be>
|
|
1463 ;;(goto-char (point-min))
|
|
1464 ;;(while (re-search-forward mail-extr-bad-dot-pattern nil t)
|
|
1465 ;; (replace-match "\\1 \\2" t))
|
|
1466
|
|
1467 (cond ((not (search-forward " " nil t))
|
|
1468 (goto-char (point-min))
|
|
1469 (cond ((search-forward "_" nil t)
|
|
1470 ;; Handle the *idiotic* use of underlines as spaces.
|
|
1471 ;; Example: fml@foo.bar.dom (First_M._Last)
|
|
1472 (goto-char (point-min))
|
|
1473 (while (search-forward "_" nil t)
|
|
1474 (replace-match " " t)))
|
|
1475 ((search-forward "." nil t)
|
|
1476 ;; Fix . used as space
|
|
1477 ;; Example: danj1@cb.att.com (daniel.jacobson)
|
|
1478 (goto-char (point-min))
|
|
1479 (while (re-search-forward mail-extr-bad-dot-pattern nil t)
|
|
1480 (replace-match "\\1 \\2" t))))))
|
|
1481
|
|
1482
|
|
1483 ;; Loop over the words (and other junk) in the name.
|
|
1484 (goto-char (point-min))
|
|
1485 (while (not name-done-flag)
|
|
1486
|
|
1487 (cond (word-found-flag
|
|
1488 ;; Last time through this loop we skipped over a word.
|
|
1489 (setq last-word-beg this-word-beg)
|
|
1490 (setq drop-last-word-if-trailing-flag
|
|
1491 drop-this-word-if-trailing-flag)
|
|
1492 (setq word-found-flag nil)))
|
|
1493
|
|
1494 (cond (begin-again-flag
|
|
1495 ;; Last time through the loop we found something that
|
|
1496 ;; indicates we should pretend we are beginning again from
|
|
1497 ;; the start.
|
|
1498 (setq word-count 0)
|
|
1499 (setq last-word-beg nil)
|
|
1500 (setq drop-last-word-if-trailing-flag nil)
|
|
1501 (setq mixed-case-flag nil)
|
|
1502 (setq lower-case-flag nil)
|
|
1503 ;; (setq upper-case-flag nil)
|
|
1504 (setq begin-again-flag nil)
|
|
1505 ))
|
|
1506
|
|
1507 ;; Initialize for this iteration of the loop.
|
|
1508 (mail-extr-skip-whitespace-forward)
|
|
1509 (if (eq word-count 0) (narrow-to-region (point) (point-max)))
|
|
1510 (setq this-word-beg (point))
|
|
1511 (setq drop-this-word-if-trailing-flag nil)
|
|
1512
|
|
1513 ;; Decide what to do based on what we are looking at.
|
|
1514 (cond
|
|
1515
|
|
1516 ;; Delete title
|
|
1517 ((and (eq word-count 0)
|
|
1518 (looking-at mail-extr-full-name-prefixes))
|
|
1519 (goto-char (match-end 0))
|
|
1520 (narrow-to-region (point) (point-max)))
|
|
1521
|
|
1522 ;; Stop after name suffix
|
|
1523 ((and (>= word-count 2)
|
|
1524 (looking-at mail-extr-full-name-suffix-pattern))
|
|
1525 (mail-extr-skip-whitespace-backward)
|
|
1526 (setq suffix-flag (point))
|
|
1527 (if (eq ?, (following-char))
|
|
1528 (forward-char 1)
|
|
1529 (insert ?,))
|
|
1530 ;; Enforce at least one space after comma
|
|
1531 (or (eq ?\ (following-char))
|
|
1532 (insert ?\ ))
|
|
1533 (mail-extr-skip-whitespace-forward)
|
|
1534 (cond ((memq (following-char) '(?j ?J ?s ?S))
|
|
1535 (capitalize-word 1)
|
|
1536 (if (eq (following-char) ?.)
|
|
1537 (forward-char 1)
|
|
1538 (insert ?.)))
|
|
1539 (t
|
|
1540 (upcase-word 1)))
|
|
1541 (setq word-found-flag t)
|
|
1542 (setq name-done-flag t))
|
|
1543
|
|
1544 ;; Handle SCA names
|
|
1545 ((looking-at "MKA \\(.+\\)") ; "Mundanely Known As"
|
|
1546 (goto-char (match-beginning 1))
|
|
1547 (narrow-to-region (point) (point-max))
|
|
1548 (setq begin-again-flag t))
|
|
1549
|
|
1550 ;; Check for initial last name followed by comma
|
|
1551 ((and (eq ?, (following-char))
|
|
1552 (eq word-count 1))
|
|
1553 (forward-char 1)
|
|
1554 (setq last-name-comma-flag t)
|
|
1555 (or (eq ?\ (following-char))
|
|
1556 (insert ?\ )))
|
|
1557
|
|
1558 ;; Stop before trailing comma-separated comment
|
|
1559 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
|
|
1560 ;; *** This case is redundant???
|
|
1561 ;;((eq ?, (following-char))
|
|
1562 ;; (setq name-done-flag t))
|
|
1563
|
|
1564 ;; Delete parenthesized/quoted comment/nickname
|
|
1565 ((memq (following-char) '(?\( ?\{ ?\[ ?\" ?\' ?\`))
|
|
1566 (setq cbeg (point))
|
|
1567 (set-syntax-table mail-extr-address-text-comment-syntax-table)
|
|
1568 (cond ((memq (following-char) '(?\' ?\`))
|
|
1569 (or (search-forward "'" nil t
|
|
1570 (if (eq ?\' (following-char)) 2 1))
|
|
1571 (mail-extr-delete-char 1)))
|
|
1572 (t
|
|
1573 (or (mail-extr-safe-move-sexp 1)
|
|
1574 (goto-char (point-max)))))
|
|
1575 (set-syntax-table mail-extr-address-text-syntax-table)
|
|
1576 (setq cend (point))
|
|
1577 (cond
|
|
1578 ;; Handle case of entire name being quoted
|
|
1579 ((and (eq word-count 0)
|
|
1580 (looking-at " *\\'")
|
|
1581 (>= (- cend cbeg) 2))
|
|
1582 (narrow-to-region (1+ cbeg) (1- cend))
|
|
1583 (goto-char (point-min)))
|
|
1584 (t
|
|
1585 ;; Handle case of quoted initial
|
|
1586 (if (and (or (= 3 (- cend cbeg))
|
|
1587 (and (= 4 (- cend cbeg))
|
|
1588 (eq ?. (char-after (+ 2 cbeg)))))
|
|
1589 (not (looking-at " *\\'")))
|
|
1590 (setq initial (char-after (1+ cbeg)))
|
|
1591 (setq initial nil))
|
|
1592 (delete-region cbeg cend)
|
|
1593 (if initial
|
|
1594 (insert initial ". ")))))
|
|
1595
|
|
1596 ;; Handle & substitution
|
|
1597 ((and (or (bobp)
|
|
1598 (eq ?\ (preceding-char)))
|
|
1599 (looking-at "&\\( \\|\\'\\)"))
|
|
1600 (mail-extr-delete-char 1)
|
|
1601 (capitalize-region
|
|
1602 (point)
|
|
1603 (progn
|
|
1604 (insert-buffer-substring canonicalization-buffer
|
|
1605 mbox-beg mbox-end)
|
|
1606 (point)))
|
|
1607 (setq disable-initial-guessing-flag t)
|
|
1608 (setq word-found-flag t))
|
|
1609
|
|
1610 ;; Handle *Stupid* VMS date stamps
|
|
1611 ((looking-at mail-extr-stupid-vms-date-stamp-pattern)
|
|
1612 (replace-match "" t))
|
|
1613
|
|
1614 ;; Handle Chinese characters.
|
|
1615 ((looking-at mail-extr-hz-embedded-gb-encoded-chinese-pattern)
|
|
1616 (goto-char (match-end 0))
|
|
1617 (setq word-found-flag t))
|
|
1618
|
|
1619 ;; Skip initial garbage characters.
|
|
1620 ;; THIS CASE MUST BE AFTER THE PRECEDING CASES.
|
|
1621 ((and (eq word-count 0)
|
|
1622 (looking-at mail-extr-leading-garbage))
|
|
1623 (goto-char (match-end 0))
|
|
1624 ;; *** Skip backward over these???
|
|
1625 ;; (skip-chars-backward "& \"")
|
|
1626 (narrow-to-region (point) (point-max)))
|
|
1627
|
|
1628 ;; Various stopping points
|
|
1629 ((or
|
|
1630
|
|
1631 ;; Stop before ALL CAPS acronyms, if preceded by mixed-case
|
|
1632 ;; words. Example: XT-DEM.
|
|
1633 (and (>= word-count 2)
|
|
1634 mixed-case-flag
|
|
1635 (looking-at mail-extr-weird-acronym-pattern)
|
|
1636 (not (looking-at mail-extr-roman-numeral-pattern)))
|
|
1637
|
|
1638 ;; Stop before trailing alternative address
|
|
1639 (looking-at mail-extr-alternative-address-pattern)
|
|
1640
|
|
1641 ;; Stop before trailing comment not introduced by comma
|
|
1642 ;; THIS CASE MUST BE AFTER AN EARLIER CASE.
|
|
1643 (looking-at mail-extr-trailing-comment-start-pattern)
|
|
1644
|
|
1645 ;; Stop before telephone numbers
|
|
1646 (looking-at mail-extr-telephone-extension-pattern))
|
|
1647 (setq name-done-flag t))
|
|
1648
|
|
1649 ;; Delete ham radio call signs
|
|
1650 ((looking-at mail-extr-ham-call-sign-pattern)
|
|
1651 (delete-region (match-beginning 0) (match-end 0)))
|
|
1652
|
|
1653 ;; Fixup initials
|
|
1654 ((looking-at mail-extr-initial-pattern)
|
|
1655 (or (eq (following-char) (upcase (following-char)))
|
|
1656 (setq lower-case-flag t))
|
|
1657 (forward-char 1)
|
|
1658 (if (eq ?. (following-char))
|
|
1659 (forward-char 1)
|
|
1660 (insert ?.))
|
|
1661 (or (eq ?\ (following-char))
|
|
1662 (insert ?\ ))
|
|
1663 (setq word-found-flag t))
|
|
1664
|
|
1665 ;; Handle BITNET LISTSERV list names.
|
|
1666 ((and (eq word-count 0)
|
|
1667 (looking-at mail-extr-listserv-list-name-pattern))
|
|
1668 (narrow-to-region (match-beginning 1) (match-end 1))
|
|
1669 (setq word-found-flag t)
|
|
1670 (setq name-done-flag t))
|
|
1671
|
|
1672 ;; Regular name words
|
|
1673 ((looking-at mail-extr-name-pattern)
|
|
1674 (setq name-beg (point))
|
|
1675 (setq name-end (match-end 0))
|
|
1676
|
|
1677 ;; Certain words will be dropped if they are at the end.
|
|
1678 (and (>= word-count 2)
|
|
1679 (not lower-case-flag)
|
|
1680 (or
|
|
1681 ;; A trailing 4-or-more letter lowercase words preceded by
|
|
1682 ;; mixed case or uppercase words will be dropped.
|
|
1683 (looking-at "[a-z][a-z][a-z][a-z]+[ \t]*\\'")
|
|
1684 ;; Drop a trailing word which is terminated with a period.
|
|
1685 (eq ?. (char-after (1- name-end))))
|
|
1686 (setq drop-this-word-if-trailing-flag t))
|
|
1687
|
|
1688 ;; Set the flags that indicate whether we have seen a lowercase
|
|
1689 ;; word, a mixed case word, and an uppercase word.
|
|
1690 (if (re-search-forward "[a-z]" name-end t)
|
|
1691 (if (progn
|
|
1692 (goto-char name-beg)
|
|
1693 (re-search-forward "[A-Z]" name-end t))
|
|
1694 (setq mixed-case-flag t)
|
|
1695 (setq lower-case-flag t))
|
|
1696 ;; (setq upper-case-flag t)
|
|
1697 )
|
|
1698
|
|
1699 (goto-char name-end)
|
|
1700 (setq word-found-flag t))
|
|
1701
|
|
1702 (t
|
|
1703 (setq name-done-flag t)
|
|
1704 ))
|
|
1705
|
|
1706 ;; Count any word that we skipped over.
|
|
1707 (if word-found-flag
|
|
1708 (setq word-count (1+ word-count))))
|
|
1709
|
|
1710 ;; If the last thing in the name is 2 or more periods, or one or more
|
|
1711 ;; other sentence terminators (but not a single period) then keep them
|
30
|
1712 ;; and the preceding word. This is for the benefit of whole sentences
|
0
|
1713 ;; in the name field: it's better behavior than dropping the last word
|
|
1714 ;; of the sentence...
|
|
1715 (if (and (not suffix-flag)
|
|
1716 (looking-at "\\(\\.+\\|[?!;:.][?!;:.]+\\|[?!;:][?!;:.]*\\)\\'"))
|
|
1717 (goto-char (setq suffix-flag (point-max))))
|
|
1718
|
|
1719 ;; Drop everything after point and certain trailing words.
|
|
1720 (narrow-to-region (point-min)
|
|
1721 (or (and drop-last-word-if-trailing-flag
|
|
1722 last-word-beg)
|
|
1723 (point)))
|
|
1724
|
|
1725 ;; Xerox's mailers SUCK!!!!!!
|
|
1726 ;; We simply refuse to believe that any last name is PARC or ADOC.
|
|
1727 ;; If it looks like that is the last name, that there is no meaningful
|
|
1728 ;; here at all. Actually I guess it would be best to map patterns
|
|
1729 ;; like foo.hoser@xerox.com into foo@hoser.xerox.com, but I don't
|
|
1730 ;; actually know that that is what's going on.
|
|
1731 (cond ((not suffix-flag)
|
|
1732 (goto-char (point-min))
|
|
1733 (let ((case-fold-search t))
|
|
1734 (if (looking-at "[-A-Za-z_]+[. ]\\(PARC\\|ADOC\\)\\'")
|
|
1735 (erase-buffer)))))
|
|
1736
|
|
1737 ;; If last name first put it at end (but before suffix)
|
|
1738 (cond (last-name-comma-flag
|
|
1739 (goto-char (point-min))
|
|
1740 (search-forward ",")
|
|
1741 (setq name-end (1- (point)))
|
|
1742 (goto-char (or suffix-flag (point-max)))
|
|
1743 (or (eq ?\ (preceding-char))
|
|
1744 (insert ?\ ))
|
|
1745 (insert-buffer-substring (current-buffer) (point-min) name-end)
|
|
1746 (goto-char name-end)
|
|
1747 (skip-chars-forward "\t ,")
|
|
1748 (narrow-to-region (point) (point-max))))
|
|
1749
|
|
1750 ;; Delete leading and trailing junk characters.
|
|
1751 ;; *** This is probably completly unneeded now.
|
|
1752 ;;(goto-char (point-max))
|
|
1753 ;;(skip-chars-backward mail-extr-non-end-name-chars)
|
|
1754 ;;(if (eq ?. (following-char))
|
|
1755 ;; (forward-char 1))
|
|
1756 ;;(narrow-to-region (point)
|
|
1757 ;; (progn
|
|
1758 ;; (goto-char (point-min))
|
|
1759 ;; (skip-chars-forward mail-extr-non-begin-name-chars)
|
|
1760 ;; (point)))
|
|
1761
|
|
1762 ;; Compress whitespace
|
|
1763 (goto-char (point-min))
|
|
1764 (while (re-search-forward "[ \t\n]+" nil t)
|
|
1765 (replace-match (if (eobp) "" " ") t))
|
|
1766 )))
|
|
1767
|
|
1768
|
|
1769
|
|
1770 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
1771 ;;
|
|
1772 ;; Table of top-level domain names.
|
|
1773 ;;
|
|
1774 ;; This is used during address canonicalization; be careful of format changes.
|
|
1775 ;; Keep in mind that the country abbreviations follow ISO-3166. There is
|
|
1776 ;; a U.S. FIPS that specifies a different set of two-letter country
|
|
1777 ;; abbreviations.
|
|
1778
|
|
1779 (defconst all-top-level-domains
|
|
1780 (let ((ob (make-vector 509 0)))
|
|
1781 (mapcar
|
|
1782 (function
|
|
1783 (lambda (x)
|
|
1784 (put (intern (downcase (car x)) ob)
|
|
1785 'domain-name
|
|
1786 (if (nth 2 x)
|
|
1787 (format (nth 2 x) (nth 1 x))
|
|
1788 (nth 1 x)))))
|
|
1789 '(("ag" "Antigua")
|
|
1790 ("ar" "Argentina" "Argentine Republic")
|
|
1791 ("arpa" t "Advanced Projects Research Agency")
|
|
1792 ("at" "Austria" "The Republic of %s")
|
|
1793 ("au" "Australia")
|
|
1794 ("bb" "Barbados")
|
|
1795 ("be" "Belgium" "The Kingdom of %s")
|
|
1796 ("bg" "Bulgaria")
|
|
1797 ("bitnet" t "Because It's Time NET")
|
|
1798 ("bo" "Bolivia" "Republic of %s")
|
|
1799 ("br" "Brazil" "The Federative Republic of %s")
|
|
1800 ("bs" "Bahamas")
|
|
1801 ("bz" "Belize")
|
|
1802 ("ca" "Canada")
|
|
1803 ("ch" "Switzerland" "The Swiss Confederation")
|
|
1804 ("cl" "Chile" "The Republic of %s")
|
|
1805 ("cn" "China" "The People's Republic of %s")
|
|
1806 ("co" "Columbia")
|
|
1807 ("com" t "Commercial")
|
|
1808 ("cr" "Costa Rica" "The Republic of %s")
|
|
1809 ("cs" "Czechoslovakia")
|
|
1810 ("de" "Germany")
|
|
1811 ("dk" "Denmark")
|
|
1812 ("dm" "Dominica")
|
|
1813 ("do" "Dominican Republic" "The %s")
|
|
1814 ("ec" "Ecuador" "The Republic of %s")
|
|
1815 ("edu" t "Educational")
|
|
1816 ("eg" "Egypt" "The Arab Republic of %s")
|
|
1817 ("es" "Spain" "The Kingdom of %s")
|
|
1818 ("fi" "Finland" "The Republic of %s")
|
|
1819 ("fj" "Fiji")
|
|
1820 ("fr" "France")
|
|
1821 ("gov" t "Government (U.S.A.)")
|
|
1822 ("gr" "Greece" "The Hellenic Republic (%s)")
|
|
1823 ("hk" "Hong Kong")
|
28
|
1824 ("hr" "Croatia" "The Republic of %s")
|
0
|
1825 ("hu" "Hungary" "The Hungarian People's Republic") ;???
|
|
1826 ("ie" "Ireland")
|
|
1827 ("il" "Israel" "The State of %s")
|
|
1828 ("in" "India" "The Republic of %s")
|
|
1829 ("int" t "(something British, don't know what)")
|
|
1830 ("is" "Iceland" "The Republic of %s")
|
|
1831 ("it" "Italy" "The Italian Republic")
|
|
1832 ("jm" "Jamaica")
|
|
1833 ("jp" "Japan")
|
|
1834 ("kn" "St. Kitts and Nevis")
|
|
1835 ("kr" "South Korea")
|
|
1836 ("lc" "St. Lucia")
|
|
1837 ("lk" "Sri Lanka" "The Democratic Socialist Republic of %s")
|
|
1838 ("mil" t "Military (U.S.A.)")
|
|
1839 ("mx" "Mexico" "The United Mexican States")
|
|
1840 ("my" "Malaysia" "%s (changed to Myanmar?)") ;???
|
|
1841 ("na" "Namibia")
|
|
1842 ("nato" t "North Atlantic Treaty Organization")
|
|
1843 ("net" t "Network")
|
|
1844 ("ni" "Nicaragua" "The Republic of %s")
|
|
1845 ("nl" "Netherlands" "The Kingdom of the %s")
|
|
1846 ("no" "Norway" "The Kingdom of %s")
|
|
1847 ("nz" "New Zealand")
|
|
1848 ("org" t "Organization")
|
|
1849 ("pe" "Peru")
|
|
1850 ("pg" "Papua New Guinea")
|
|
1851 ("ph" "Philippines" "The Republic of the %s")
|
|
1852 ("pl" "Poland")
|
|
1853 ("pr" "Puerto Rico")
|
|
1854 ("pt" "Portugal" "The Portugese Republic")
|
|
1855 ("py" "Paraguay")
|
|
1856 ("se" "Sweden" "The Kingdom of %s")
|
|
1857 ("sg" "Singapore" "The Republic of %s")
|
|
1858 ("sr" "Suriname")
|
|
1859 ("su" "Soviet Union")
|
|
1860 ("th" "Thailand" "The Kingdom of %s")
|
|
1861 ("tn" "Tunisia")
|
|
1862 ("tr" "Turkey" "The Republic of %s")
|
|
1863 ("tt" "Trinidad and Tobago")
|
|
1864 ("tw" "Taiwan")
|
|
1865 ("uk" "United Kingdom" "The %s of Great Britain")
|
|
1866 ("unter-dom" t "(something German)")
|
|
1867 ("us" "U.S.A." "The United States of America")
|
|
1868 ("uucp" t "Unix to Unix CoPy")
|
|
1869 ("uy" "Uruguay" "The Eastern Republic of %s")
|
|
1870 ("vc" "St. Vincent and the Grenadines")
|
|
1871 ("ve" "Venezuela" "The Republic of %s")
|
|
1872 ("yu" "Yugoslavia" "The Socialist Federal Republic of %s")
|
|
1873 ;; Also said to be Zambia ... (why not Zaire???)
|
|
1874 ("za" "South Africa" "The Republic of %s (or Zambia? Zaire?)")
|
|
1875 ("zw" "Zimbabwe" "Republic of %s")
|
|
1876 ;; fipnet
|
|
1877 ))
|
|
1878 ob))
|
|
1879
|
|
1880 ;;;###autoload
|
|
1881 (defun what-domain (x)
|
|
1882 "Prompts for a mail domain, and prints the country it corresponds to
|
|
1883 in the minibuffer."
|
|
1884 (interactive
|
|
1885 (let ((completion-ignore-case t))
|
|
1886 (list (completing-read "Domain: " all-top-level-domains nil t))))
|
|
1887 (or (setq x (intern-soft (downcase x) all-top-level-domains))
|
|
1888 (error "no such domain"))
|
|
1889 (message "%s: %s" (upcase (symbol-name x)) (get x 'domain-name)))
|
|
1890
|
|
1891
|
|
1892 ;(let ((all nil))
|
|
1893 ; (mapatoms #'(lambda (x)
|
|
1894 ; (if (and (boundp x)
|
|
1895 ; (string-match "^mail-extr-" (symbol-name x)))
|
|
1896 ; (setq all (cons x all)))))
|
|
1897 ; (setq all (sort all #'string-lessp))
|
|
1898 ; (cons 'setq
|
|
1899 ; (apply 'nconc (mapcar #'(lambda (x)
|
|
1900 ; (list x (symbol-value x)))
|
|
1901 ; all))))
|
|
1902
|
|
1903
|
|
1904 (provide 'mail-extr)
|
|
1905
|
|
1906 ;;; mail-extr.el ends here
|