Mercurial > hg > xemacs-beta
comparison lisp/w3/w3-parse.el @ 2:ac2d302a0011 r19-15b2
Import from CVS: tag r19-15b2
author | cvs |
---|---|
date | Mon, 13 Aug 2007 08:46:35 +0200 |
parents | 376386a54a3c |
children | 9ee227acff29 |
comparison
equal
deleted
inserted
replaced
1:c0c6a60d29db | 2:ac2d302a0011 |
---|---|
1 ;; Created by: Joe Wells, jbw@csb.bu.edu | 1 ;; Created by: Joe Wells, jbw@csb.bu.edu |
2 ;; Created on: Sat Sep 30 17:25:40 1995 | 2 ;; Created on: Sat Sep 30 17:25:40 1995 |
3 ;; Filename: w3-parse.el | 3 ;; Filename: w3-parse.el |
4 ;; Purpose: Parse HTML and/or SGML for Emacs W3 browser. | 4 ;; Purpose: Parse HTML and/or SGML for Emacs W3 browser. |
5 | 5 |
6 ;; Copyright © 1995 Joseph Brian Wells | 6 ;; Copyright © 1995, 1996 Joseph Brian Wells |
7 ;; Copyright © 1993, 1994, 1995 by William M. Perry (wmperry@spry.com) | 7 ;; Copyright © 1993, 1994, 1995 by William M. Perry (wmperry@cs.indiana.edu) |
8 ;; | 8 ;; |
9 ;; This program is free software; you can redistribute it and/or modify | 9 ;; This program is free software; you can redistribute it and/or modify |
10 ;; it under the terms of the GNU General Public License as published by | 10 ;; it under the terms of the GNU General Public License as published by |
11 ;; the Free Software Foundation; either version 2 of the License, or | 11 ;; the Free Software Foundation; either version 2 of the License, or |
12 ;; (at your option) any later version. | 12 ;; (at your option) any later version. |
286 | 286 |
287 (let ((html-entities w3-html-entities)) | 287 (let ((html-entities w3-html-entities)) |
288 (while html-entities | 288 (while html-entities |
289 (put (car (car html-entities)) 'html-entity-expansion | 289 (put (car (car html-entities)) 'html-entity-expansion |
290 (cons 'CDATA (if (integerp (cdr (car html-entities))) | 290 (cons 'CDATA (if (integerp (cdr (car html-entities))) |
291 (char-to-string (cdr (car html-entities))) | 291 (char-to-string |
292 (let ((c (cdr (car html-entities)))) | |
293 (cond | |
294 ((and (> c 127) (boundp 'MULE)) | |
295 (make-character lc-ltn1 c)) | |
296 ;;((and (> c 127) (featurep 'mule)) | |
297 ;; What??? | |
298 ;;) | |
299 (t | |
300 c)))) | |
292 (cdr (car html-entities))))) | 301 (cdr (car html-entities))))) |
293 (setq html-entities (cdr html-entities)))) | 302 (setq html-entities (cdr html-entities)))) |
294 | 303 |
295 ;; These are handled differently than the normal HTML entities because | 304 ;; These are handled differently than the normal HTML entities because |
296 ;; we need to define the entities with 'nil instead of 'CDATA so | 305 ;; we need to define the entities with 'nil instead of 'CDATA so |
313 (cur nil)) | 322 (cur nil)) |
314 (while html-entities | 323 (while html-entities |
315 (setq cur (car html-entities) | 324 (setq cur (car html-entities) |
316 html-entities (cdr html-entities)) | 325 html-entities (cdr html-entities)) |
317 (put (nth 0 cur) 'html-entity-expansion | 326 (put (nth 0 cur) 'html-entity-expansion |
318 (cons 'nil (format "img src=\"%s/%s%s\" alt=\"%s\"" | 327 (cons 'nil (format "<img src=\"%s/%s%s\" alt=\"%s\">" |
319 w3-icon-directory | 328 w3-icon-directory |
320 (nth 1 cur) | 329 (nth 1 cur) |
321 (if w3-icon-format | 330 (if w3-icon-format |
322 (concat "." (symbol-name w3-icon-format)) | 331 (concat "." (symbol-name w3-icon-format)) |
323 "") | 332 "") |
431 ;; larger than 255. I think in MULE it does. Is this true? | 440 ;; larger than 255. I think in MULE it does. Is this true? |
432 ;; Bill wants to call w3-resolve-numeric-entity here, but I think | 441 ;; Bill wants to call w3-resolve-numeric-entity here, but I think |
433 ;; that functionality belongs in char-to-string. | 442 ;; that functionality belongs in char-to-string. |
434 ;; The largest valid character in the I18N version of HTML is 65533. | 443 ;; The largest valid character in the I18N version of HTML is 65533. |
435 ;; <URL:ftp://ds.internic.net/internet-drafts/draft-ietf-html-i18n-01.txt> | 444 ;; <URL:ftp://ds.internic.net/internet-drafts/draft-ietf-html-i18n-01.txt> |
436 (insert (char-to-string w3-p-s-num))) | 445 ;; wrongo! Apparently, mule doesn't do sane things with char-to-string |
437 | 446 ;; -wmp 7/9/96 |
447 (insert (char-to-string | |
448 (cond | |
449 ((and (boundp 'MULE) (> w3-p-s-num 127)) | |
450 (make-character lc-ltn1 w3-p-s-num)) | |
451 ;;((and (featurep 'mule) (> w3-p-s-num 127)) | |
452 ;;what?? | |
453 ;;) | |
454 (t | |
455 w3-p-s-num))))) | |
438 ((looking-at "&#\\(re\\|rs\\|space\\|tab\\)[\ ;\n]?") ; \n should be \r | 456 ((looking-at "&#\\(re\\|rs\\|space\\|tab\\)[\ ;\n]?") ; \n should be \r |
439 (replace-match (assq (upcase (char-after (+ 3 (point)))) | 457 (replace-match (assq (upcase (char-after (+ 3 (point)))) |
440 '(;; *** Strictly speaking, record end should be | 458 '(;; *** Strictly speaking, record end should be |
441 ;; carriage return. | 459 ;; carriage return. |
442 (?E . "\n") ; RE | 460 (?E . "\n") ; RE |
602 ;; using its content model. | 620 ;; using its content model. |
603 ;; | 621 ;; |
604 ;; NEW-STATE (optional, default *same) is the index of the state to | 622 ;; NEW-STATE (optional, default *same) is the index of the state to |
605 ;; move to after processing the element or one of these: | 623 ;; move to after processing the element or one of these: |
606 ;; *same: no state change occurs. | 624 ;; *same: no state change occurs. |
607 ;; *next: change the the current state + 1. | 625 ;; *next: change the current state + 1. |
608 ;; The initial state is 0. NEW-STATE does not matter if ACTION is | 626 ;; The initial state is 0. NEW-STATE does not matter if ACTION is |
609 ;; *close. | 627 ;; *close. |
610 ;; | 628 ;; |
611 ;; ERRORP (optional, default nil) if non-nil indicates this transition | 629 ;; ERRORP (optional, default nil) if non-nil indicates this transition |
612 ;; represents an error. The error message includes this value if it | 630 ;; represents an error. The error message includes this value if it |
1148 (eval-when-compile | 1166 (eval-when-compile |
1149 (w3-unfold-dtd | 1167 (w3-unfold-dtd |
1150 (w3-expand-parameters | 1168 (w3-expand-parameters |
1151 '( | 1169 '( |
1152 (%headempty . (link base meta range)) | 1170 (%headempty . (link base meta range)) |
1171 (%headmisc . (script)) | |
1153 (%head-deprecated . (nextid)) | 1172 (%head-deprecated . (nextid)) |
1154 | 1173 |
1155 ;; client-side imagemaps | 1174 ;; client-side imagemaps |
1156 (%imagemaps . (area map)) | 1175 (%imagemaps . (area map)) |
1157 ;; special action is taken for %text inside %body.content in the | 1176 ;; special action is taken for %text inside %body.content in the |
1158 ;; content model of each element. | 1177 ;; content model of each element. |
1159 (%body.content . (%heading %block hr div address %imagemaps)) | 1178 (%body.content . (%heading %block hr div address %imagemaps)) |
1160 | 1179 |
1161 (%heading . (h1 h2 h3 h4 h5 h6)) | 1180 (%heading . (h1 h2 h3 h4 h5 h6)) |
1162 | 1181 |
1163 ;; Netscape's CENTER, FONT, and BASEFONT are handled | |
1164 ;; non-standardly. In actual psuedo-HTML on the net, these are | |
1165 ;; used as both text-level constructs and block-level constructs. | |
1166 ;; They are the only items in both %block and %text in this | |
1167 ;; definition here. | |
1168 ;; *** Perhaps add BLINK here too? But no one uses that as a block | |
1169 ;; construct. What about NOBR? | |
1170 (%netscape-crud . (center font basefont)) | |
1171 | |
1172 ;; Emacs-w3 extensions | 1182 ;; Emacs-w3 extensions |
1173 (%emacsw3-crud-nonempty . (roach secret wired)) | |
1174 (%emacsw3-crud . (pinhead flame cookie yogsothoth hype peek)) | 1183 (%emacsw3-crud . (pinhead flame cookie yogsothoth hype peek)) |
1175 | 1184 |
1176 (%block . (p %list dl form %preformatted %netscape-crud font | 1185 (%block . (p %list dl form %preformatted font |
1177 %blockquote isindex fn table fig note | 1186 %blockquote isindex fn table fig note |
1178 %block-deprecated %block-obsoleted)) | 1187 center %block-deprecated %block-obsoleted)) |
1179 (%list . (ul ol)) | 1188 (%list . (ul ol)) |
1180 (%preformatted . (pre)) | 1189 (%preformatted . (pre)) |
1181 (%blockquote . (bq)) | 1190 (%blockquote . (bq)) |
1182 (%block-deprecated . (dir menu blockquote)) | 1191 (%block-deprecated . (dir menu blockquote)) |
1183 (%block-obsoleted . (xmp listing)) | 1192 (%block-obsoleted . (xmp listing)) |
1184 | 1193 |
1185 ;; Why is IMG in this list? | 1194 ;; Why is IMG in this list? |
1186 (%pre.exclusion . (*include img *discard tab math big small sub sup)) | 1195 (%pre.exclusion . (*include img *discard tab math big small sub sup)) |
1187 | 1196 |
1188 (%text . (*data b %notmath %netscape-crud sub sup | 1197 (%text . (*data b %notmath sub sup %emacsw3-crud)) |
1189 %emacsw3-crud %emacsw3-crud-nonempty)) | |
1190 (%notmath . (%special %font %phrase %misc)) | 1198 (%notmath . (%special %font %phrase %misc)) |
1191 (%font . (i u s strike tt big small)) ; B left out for MATH handling | 1199 (%font . (i u s strike tt big small sub sup |
1192 (%phrase . (em strong code samp kbd var cite blink)) | 1200 roach secret wired)) ;; B left out for MATH |
1193 ;; Don't know if this is right place for EMBED. | 1201 (%phrase . (em strong dfn code samp kbd var cite blink)) |
1194 (%special . (a img br wbr nobr tab math embed)) | 1202 (%special . (a img applet font br script map math tab)) |
1195 (%misc . (q lang au dfn person acronym abbrev ins del)) | 1203 (%misc . (q lang au person acronym abbrev ins del)) |
1196 | 1204 |
1197 (%formula . (*data %math)) | 1205 (%formula . (*data %math)) |
1198 (%math . (box above below %mathvec root sqrt array sub sup | 1206 (%math . (box above below %mathvec root sqrt array sub sup |
1199 %mathface)) | 1207 %mathface)) |
1200 (%mathvec . (vec bar dot ddot hat tilde)) | 1208 (%mathvec . (vec bar dot ddot hat tilde)) |
1256 nil | 1264 nil |
1257 (*include *same "after FRAMESET")) | 1265 (*include *same "after FRAMESET")) |
1258 ]) | 1266 ]) |
1259 (end-tag-omissible . t)) | 1267 (end-tag-omissible . t)) |
1260 ((head) | 1268 ((head) |
1261 (content-model . [((title isindex %headempty style %head-deprecated) | 1269 (content-model . [((title isindex %headempty %headmisc |
1270 style %head-deprecated) | |
1262 nil | 1271 nil |
1263 nil | 1272 nil |
1264 ;; *** Should only close if tag can | 1273 ;; *** Should only close if tag can |
1265 ;; legitimately follow head. So many can that | 1274 ;; legitimately follow head. So many can that |
1266 ;; I haven't bothered to enumerate them. | 1275 ;; I haven't bothered to enumerate them. |
1267 (*close))]) | 1276 (*close))]) |
1268 (end-tag-omissible . t)) | 1277 (end-tag-omissible . t)) |
1278 ;; SCRIPT - - (#PCDATA) | |
1279 ((script) | |
1280 (content-model . CDATA ; not official, but allows | |
1281 ; comment hiding of script | |
1282 )) | |
1269 ;; TITLE - - (#PCDATA) | 1283 ;; TITLE - - (#PCDATA) |
1270 ((title) | 1284 ((title) |
1271 (content-model . RCDATA ; not official | 1285 (content-model . RCDATA ; not official |
1272 ;; [((*data) include-space nil nil)] | 1286 ;; [((*data) include-space nil nil)] |
1273 )) | 1287 )) |
1274 ;; STYLE - O (#PCDATA) | 1288 ;; STYLE - O (#PCDATA) |
1275 ;; STYLE needs to be #PCDATA to allow omitted end tag. Bleagh. | 1289 ;; STYLE needs to be #PCDATA to allow omitted end tag. Bleagh. |
1276 ((style) | 1290 ((style) |
1277 (content-model . [((*data) | 1291 (content-model . CDATA) |
1278 include-space | |
1279 nil | |
1280 ;; *** Should only close if tag can | |
1281 ;; legitimately follow style. So many can that | |
1282 ;; I haven't bothered to enumerate them. | |
1283 (*close))]) | |
1284 (end-tag-omissible . t)) | 1292 (end-tag-omissible . t)) |
1285 ((body) | 1293 ((body) |
1286 (content-model . [((banner) nil nil (*retry *next)) | 1294 (content-model . [((banner) nil nil (*retry *next)) |
1287 ((bodytext) nil nil (bodytext *next)) | 1295 ((bodytext) nil nil (bodytext *next)) |
1288 (nil nil (((plaintext) *close)) nil)]) | 1296 (nil nil (((plaintext) *close)) nil)]) |
1300 ;; caused by BQ's sharing of BODYTEXT. BQ | 1308 ;; caused by BQ's sharing of BODYTEXT. BQ |
1301 ;; should have its own BQTEXT. | 1309 ;; should have its own BQTEXT. |
1302 ((credit plaintext) *close)) | 1310 ((credit plaintext) *close)) |
1303 nil)]) | 1311 nil)]) |
1304 (end-tag-omissible . t)) | 1312 (end-tag-omissible . t)) |
1305 ((div banner) | 1313 ((div banner center) |
1306 (content-model . [((%body.content) | 1314 (content-model . [((%body.content) |
1307 nil | 1315 nil |
1308 ;; Push <P> before data characters. Non-SGML. | 1316 ;; Push <P> before data characters. Non-SGML. |
1309 (((%text) p)) | 1317 (((%text) p)) |
1310 nil)])) | 1318 nil)])) |
1448 ((credit) *close)) | 1456 ((credit) *close)) |
1449 nil)]) | 1457 nil)]) |
1450 (end-tag-omissible . t)) | 1458 (end-tag-omissible . t)) |
1451 ((%emacsw3-crud) | 1459 ((%emacsw3-crud) |
1452 (content-model . EMPTY)) | 1460 (content-model . EMPTY)) |
1453 ((%netscape-crud) | |
1454 ;; Special non-SGML treatment of Netscape's shit. | |
1455 (content-model . XINHERIT)) | |
1456 ;; FORM - - %body.content -(FORM) +(INPUT|KEYGEN|SELECT|TEXTAREA) | 1461 ;; FORM - - %body.content -(FORM) +(INPUT|KEYGEN|SELECT|TEXTAREA) |
1457 ((form) | 1462 ((form) |
1458 ;; Same as BODY. Ugh! | 1463 ;; Same as BODY. Ugh! |
1459 (content-model . [((%body.content) | 1464 (content-model . [((%body.content) |
1460 nil | 1465 nil |
1513 ;; bad formatting if the A is *never* closed. | 1518 ;; bad formatting if the A is *never* closed. |
1514 ;;((p) *discard *same error) | 1519 ;;((p) *discard *same error) |
1515 ) | 1520 ) |
1516 nil)]) | 1521 nil)]) |
1517 (exclusions . (a))) | 1522 (exclusions . (a))) |
1518 ((b %font %phrase %misc nobr %emacsw3-crud-nonempty) | 1523 ((b font %font %phrase %misc nobr) |
1519 (content-model . [((%text) | 1524 (content-model . [((%text) |
1520 include-space | 1525 include-space |
1521 ((%in-text-ignore)) | 1526 ((%in-text-ignore)) |
1522 nil)])) | 1527 nil)])) |
1523 ((plaintext) | 1528 ((plaintext) |