comparison lisp/w3/w3-parse.el @ 2:ac2d302a0011 r19-15b2

Import from CVS: tag r19-15b2
author cvs
date Mon, 13 Aug 2007 08:46:35 +0200
parents 376386a54a3c
children 9ee227acff29
comparison
equal deleted inserted replaced
1:c0c6a60d29db 2:ac2d302a0011
1 ;; Created by: Joe Wells, jbw@csb.bu.edu 1 ;; Created by: Joe Wells, jbw@csb.bu.edu
2 ;; Created on: Sat Sep 30 17:25:40 1995 2 ;; Created on: Sat Sep 30 17:25:40 1995
3 ;; Filename: w3-parse.el 3 ;; Filename: w3-parse.el
4 ;; Purpose: Parse HTML and/or SGML for Emacs W3 browser. 4 ;; Purpose: Parse HTML and/or SGML for Emacs W3 browser.
5 5
6 ;; Copyright © 1995 Joseph Brian Wells 6 ;; Copyright © 1995, 1996 Joseph Brian Wells
7 ;; Copyright © 1993, 1994, 1995 by William M. Perry (wmperry@spry.com) 7 ;; Copyright © 1993, 1994, 1995 by William M. Perry (wmperry@cs.indiana.edu)
8 ;; 8 ;;
9 ;; This program is free software; you can redistribute it and/or modify 9 ;; This program is free software; you can redistribute it and/or modify
10 ;; it under the terms of the GNU General Public License as published by 10 ;; it under the terms of the GNU General Public License as published by
11 ;; the Free Software Foundation; either version 2 of the License, or 11 ;; the Free Software Foundation; either version 2 of the License, or
12 ;; (at your option) any later version. 12 ;; (at your option) any later version.
286 286
287 (let ((html-entities w3-html-entities)) 287 (let ((html-entities w3-html-entities))
288 (while html-entities 288 (while html-entities
289 (put (car (car html-entities)) 'html-entity-expansion 289 (put (car (car html-entities)) 'html-entity-expansion
290 (cons 'CDATA (if (integerp (cdr (car html-entities))) 290 (cons 'CDATA (if (integerp (cdr (car html-entities)))
291 (char-to-string (cdr (car html-entities))) 291 (char-to-string
292 (let ((c (cdr (car html-entities))))
293 (cond
294 ((and (> c 127) (boundp 'MULE))
295 (make-character lc-ltn1 c))
296 ;;((and (> c 127) (featurep 'mule))
297 ;; What???
298 ;;)
299 (t
300 c))))
292 (cdr (car html-entities))))) 301 (cdr (car html-entities)))))
293 (setq html-entities (cdr html-entities)))) 302 (setq html-entities (cdr html-entities))))
294 303
295 ;; These are handled differently than the normal HTML entities because 304 ;; These are handled differently than the normal HTML entities because
296 ;; we need to define the entities with 'nil instead of 'CDATA so 305 ;; we need to define the entities with 'nil instead of 'CDATA so
313 (cur nil)) 322 (cur nil))
314 (while html-entities 323 (while html-entities
315 (setq cur (car html-entities) 324 (setq cur (car html-entities)
316 html-entities (cdr html-entities)) 325 html-entities (cdr html-entities))
317 (put (nth 0 cur) 'html-entity-expansion 326 (put (nth 0 cur) 'html-entity-expansion
318 (cons 'nil (format "img src=\"%s/%s%s\" alt=\"%s\"" 327 (cons 'nil (format "<img src=\"%s/%s%s\" alt=\"%s\">"
319 w3-icon-directory 328 w3-icon-directory
320 (nth 1 cur) 329 (nth 1 cur)
321 (if w3-icon-format 330 (if w3-icon-format
322 (concat "." (symbol-name w3-icon-format)) 331 (concat "." (symbol-name w3-icon-format))
323 "") 332 "")
431 ;; larger than 255. I think in MULE it does. Is this true? 440 ;; larger than 255. I think in MULE it does. Is this true?
432 ;; Bill wants to call w3-resolve-numeric-entity here, but I think 441 ;; Bill wants to call w3-resolve-numeric-entity here, but I think
433 ;; that functionality belongs in char-to-string. 442 ;; that functionality belongs in char-to-string.
434 ;; The largest valid character in the I18N version of HTML is 65533. 443 ;; The largest valid character in the I18N version of HTML is 65533.
435 ;; <URL:ftp://ds.internic.net/internet-drafts/draft-ietf-html-i18n-01.txt> 444 ;; <URL:ftp://ds.internic.net/internet-drafts/draft-ietf-html-i18n-01.txt>
436 (insert (char-to-string w3-p-s-num))) 445 ;; wrongo! Apparently, mule doesn't do sane things with char-to-string
437 446 ;; -wmp 7/9/96
447 (insert (char-to-string
448 (cond
449 ((and (boundp 'MULE) (> w3-p-s-num 127))
450 (make-character lc-ltn1 w3-p-s-num))
451 ;;((and (featurep 'mule) (> w3-p-s-num 127))
452 ;;what??
453 ;;)
454 (t
455 w3-p-s-num)))))
438 ((looking-at "&#\\(re\\|rs\\|space\\|tab\\)[\ ;\n]?") ; \n should be \r 456 ((looking-at "&#\\(re\\|rs\\|space\\|tab\\)[\ ;\n]?") ; \n should be \r
439 (replace-match (assq (upcase (char-after (+ 3 (point)))) 457 (replace-match (assq (upcase (char-after (+ 3 (point))))
440 '(;; *** Strictly speaking, record end should be 458 '(;; *** Strictly speaking, record end should be
441 ;; carriage return. 459 ;; carriage return.
442 (?E . "\n") ; RE 460 (?E . "\n") ; RE
602 ;; using its content model. 620 ;; using its content model.
603 ;; 621 ;;
604 ;; NEW-STATE (optional, default *same) is the index of the state to 622 ;; NEW-STATE (optional, default *same) is the index of the state to
605 ;; move to after processing the element or one of these: 623 ;; move to after processing the element or one of these:
606 ;; *same: no state change occurs. 624 ;; *same: no state change occurs.
607 ;; *next: change the the current state + 1. 625 ;; *next: change the current state + 1.
608 ;; The initial state is 0. NEW-STATE does not matter if ACTION is 626 ;; The initial state is 0. NEW-STATE does not matter if ACTION is
609 ;; *close. 627 ;; *close.
610 ;; 628 ;;
611 ;; ERRORP (optional, default nil) if non-nil indicates this transition 629 ;; ERRORP (optional, default nil) if non-nil indicates this transition
612 ;; represents an error. The error message includes this value if it 630 ;; represents an error. The error message includes this value if it
1148 (eval-when-compile 1166 (eval-when-compile
1149 (w3-unfold-dtd 1167 (w3-unfold-dtd
1150 (w3-expand-parameters 1168 (w3-expand-parameters
1151 '( 1169 '(
1152 (%headempty . (link base meta range)) 1170 (%headempty . (link base meta range))
1171 (%headmisc . (script))
1153 (%head-deprecated . (nextid)) 1172 (%head-deprecated . (nextid))
1154 1173
1155 ;; client-side imagemaps 1174 ;; client-side imagemaps
1156 (%imagemaps . (area map)) 1175 (%imagemaps . (area map))
1157 ;; special action is taken for %text inside %body.content in the 1176 ;; special action is taken for %text inside %body.content in the
1158 ;; content model of each element. 1177 ;; content model of each element.
1159 (%body.content . (%heading %block hr div address %imagemaps)) 1178 (%body.content . (%heading %block hr div address %imagemaps))
1160 1179
1161 (%heading . (h1 h2 h3 h4 h5 h6)) 1180 (%heading . (h1 h2 h3 h4 h5 h6))
1162 1181
1163 ;; Netscape's CENTER, FONT, and BASEFONT are handled
1164 ;; non-standardly. In actual psuedo-HTML on the net, these are
1165 ;; used as both text-level constructs and block-level constructs.
1166 ;; They are the only items in both %block and %text in this
1167 ;; definition here.
1168 ;; *** Perhaps add BLINK here too? But no one uses that as a block
1169 ;; construct. What about NOBR?
1170 (%netscape-crud . (center font basefont))
1171
1172 ;; Emacs-w3 extensions 1182 ;; Emacs-w3 extensions
1173 (%emacsw3-crud-nonempty . (roach secret wired))
1174 (%emacsw3-crud . (pinhead flame cookie yogsothoth hype peek)) 1183 (%emacsw3-crud . (pinhead flame cookie yogsothoth hype peek))
1175 1184
1176 (%block . (p %list dl form %preformatted %netscape-crud font 1185 (%block . (p %list dl form %preformatted font
1177 %blockquote isindex fn table fig note 1186 %blockquote isindex fn table fig note
1178 %block-deprecated %block-obsoleted)) 1187 center %block-deprecated %block-obsoleted))
1179 (%list . (ul ol)) 1188 (%list . (ul ol))
1180 (%preformatted . (pre)) 1189 (%preformatted . (pre))
1181 (%blockquote . (bq)) 1190 (%blockquote . (bq))
1182 (%block-deprecated . (dir menu blockquote)) 1191 (%block-deprecated . (dir menu blockquote))
1183 (%block-obsoleted . (xmp listing)) 1192 (%block-obsoleted . (xmp listing))
1184 1193
1185 ;; Why is IMG in this list? 1194 ;; Why is IMG in this list?
1186 (%pre.exclusion . (*include img *discard tab math big small sub sup)) 1195 (%pre.exclusion . (*include img *discard tab math big small sub sup))
1187 1196
1188 (%text . (*data b %notmath %netscape-crud sub sup 1197 (%text . (*data b %notmath sub sup %emacsw3-crud))
1189 %emacsw3-crud %emacsw3-crud-nonempty))
1190 (%notmath . (%special %font %phrase %misc)) 1198 (%notmath . (%special %font %phrase %misc))
1191 (%font . (i u s strike tt big small)) ; B left out for MATH handling 1199 (%font . (i u s strike tt big small sub sup
1192 (%phrase . (em strong code samp kbd var cite blink)) 1200 roach secret wired)) ;; B left out for MATH
1193 ;; Don't know if this is right place for EMBED. 1201 (%phrase . (em strong dfn code samp kbd var cite blink))
1194 (%special . (a img br wbr nobr tab math embed)) 1202 (%special . (a img applet font br script map math tab))
1195 (%misc . (q lang au dfn person acronym abbrev ins del)) 1203 (%misc . (q lang au person acronym abbrev ins del))
1196 1204
1197 (%formula . (*data %math)) 1205 (%formula . (*data %math))
1198 (%math . (box above below %mathvec root sqrt array sub sup 1206 (%math . (box above below %mathvec root sqrt array sub sup
1199 %mathface)) 1207 %mathface))
1200 (%mathvec . (vec bar dot ddot hat tilde)) 1208 (%mathvec . (vec bar dot ddot hat tilde))
1256 nil 1264 nil
1257 (*include *same "after FRAMESET")) 1265 (*include *same "after FRAMESET"))
1258 ]) 1266 ])
1259 (end-tag-omissible . t)) 1267 (end-tag-omissible . t))
1260 ((head) 1268 ((head)
1261 (content-model . [((title isindex %headempty style %head-deprecated) 1269 (content-model . [((title isindex %headempty %headmisc
1270 style %head-deprecated)
1262 nil 1271 nil
1263 nil 1272 nil
1264 ;; *** Should only close if tag can 1273 ;; *** Should only close if tag can
1265 ;; legitimately follow head. So many can that 1274 ;; legitimately follow head. So many can that
1266 ;; I haven't bothered to enumerate them. 1275 ;; I haven't bothered to enumerate them.
1267 (*close))]) 1276 (*close))])
1268 (end-tag-omissible . t)) 1277 (end-tag-omissible . t))
1278 ;; SCRIPT - - (#PCDATA)
1279 ((script)
1280 (content-model . CDATA ; not official, but allows
1281 ; comment hiding of script
1282 ))
1269 ;; TITLE - - (#PCDATA) 1283 ;; TITLE - - (#PCDATA)
1270 ((title) 1284 ((title)
1271 (content-model . RCDATA ; not official 1285 (content-model . RCDATA ; not official
1272 ;; [((*data) include-space nil nil)] 1286 ;; [((*data) include-space nil nil)]
1273 )) 1287 ))
1274 ;; STYLE - O (#PCDATA) 1288 ;; STYLE - O (#PCDATA)
1275 ;; STYLE needs to be #PCDATA to allow omitted end tag. Bleagh. 1289 ;; STYLE needs to be #PCDATA to allow omitted end tag. Bleagh.
1276 ((style) 1290 ((style)
1277 (content-model . [((*data) 1291 (content-model . CDATA)
1278 include-space
1279 nil
1280 ;; *** Should only close if tag can
1281 ;; legitimately follow style. So many can that
1282 ;; I haven't bothered to enumerate them.
1283 (*close))])
1284 (end-tag-omissible . t)) 1292 (end-tag-omissible . t))
1285 ((body) 1293 ((body)
1286 (content-model . [((banner) nil nil (*retry *next)) 1294 (content-model . [((banner) nil nil (*retry *next))
1287 ((bodytext) nil nil (bodytext *next)) 1295 ((bodytext) nil nil (bodytext *next))
1288 (nil nil (((plaintext) *close)) nil)]) 1296 (nil nil (((plaintext) *close)) nil)])
1300 ;; caused by BQ's sharing of BODYTEXT. BQ 1308 ;; caused by BQ's sharing of BODYTEXT. BQ
1301 ;; should have its own BQTEXT. 1309 ;; should have its own BQTEXT.
1302 ((credit plaintext) *close)) 1310 ((credit plaintext) *close))
1303 nil)]) 1311 nil)])
1304 (end-tag-omissible . t)) 1312 (end-tag-omissible . t))
1305 ((div banner) 1313 ((div banner center)
1306 (content-model . [((%body.content) 1314 (content-model . [((%body.content)
1307 nil 1315 nil
1308 ;; Push <P> before data characters. Non-SGML. 1316 ;; Push <P> before data characters. Non-SGML.
1309 (((%text) p)) 1317 (((%text) p))
1310 nil)])) 1318 nil)]))
1448 ((credit) *close)) 1456 ((credit) *close))
1449 nil)]) 1457 nil)])
1450 (end-tag-omissible . t)) 1458 (end-tag-omissible . t))
1451 ((%emacsw3-crud) 1459 ((%emacsw3-crud)
1452 (content-model . EMPTY)) 1460 (content-model . EMPTY))
1453 ((%netscape-crud)
1454 ;; Special non-SGML treatment of Netscape's shit.
1455 (content-model . XINHERIT))
1456 ;; FORM - - %body.content -(FORM) +(INPUT|KEYGEN|SELECT|TEXTAREA) 1461 ;; FORM - - %body.content -(FORM) +(INPUT|KEYGEN|SELECT|TEXTAREA)
1457 ((form) 1462 ((form)
1458 ;; Same as BODY. Ugh! 1463 ;; Same as BODY. Ugh!
1459 (content-model . [((%body.content) 1464 (content-model . [((%body.content)
1460 nil 1465 nil
1513 ;; bad formatting if the A is *never* closed. 1518 ;; bad formatting if the A is *never* closed.
1514 ;;((p) *discard *same error) 1519 ;;((p) *discard *same error)
1515 ) 1520 )
1516 nil)]) 1521 nil)])
1517 (exclusions . (a))) 1522 (exclusions . (a)))
1518 ((b %font %phrase %misc nobr %emacsw3-crud-nonempty) 1523 ((b font %font %phrase %misc nobr)
1519 (content-model . [((%text) 1524 (content-model . [((%text)
1520 include-space 1525 include-space
1521 ((%in-text-ignore)) 1526 ((%in-text-ignore))
1522 nil)])) 1527 nil)]))
1523 ((plaintext) 1528 ((plaintext)