Mercurial > hg > xemacs-beta
comparison lisp/w3/w3-parse.el @ 70:131b0175ea99 r20-0b30
Import from CVS: tag r20-0b30
author | cvs |
---|---|
date | Mon, 13 Aug 2007 09:02:59 +0200 |
parents | 1a767b41a199 |
children | 1ce6082ce73f |
comparison
equal
deleted
inserted
replaced
69:804d1389bcd6 | 70:131b0175ea99 |
---|---|
1 ;; Created by: Joe Wells, jbw@cs.bu.edu | 1 ;; Created by: Joe Wells, jbw@csb.bu.edu |
2 ;; Created on: Sat Sep 30 17:25:40 1995 | 2 ;; Created on: Sat Sep 30 17:25:40 1995 |
3 ;; Filename: w3-parse.el | 3 ;; Filename: w3-parse.el |
4 ;; Purpose: Parse HTML and/or SGML for Emacs W3 browser. | 4 ;; Purpose: Parse HTML and/or SGML for Emacs W3 browser. |
5 | 5 |
6 ;; Copyright © 1995, 1996, 1997 Joseph Brian Wells | 6 ;; Copyright © 1995, 1996 Joseph Brian Wells |
7 ;; Copyright © 1993, 1994, 1995 by William M. Perry (wmperry@cs.indiana.edu) | 7 ;; Copyright © 1993, 1994, 1995 by William M. Perry (wmperry@cs.indiana.edu) |
8 ;; | 8 ;; |
9 ;; This program is free software; you can redistribute it and/or modify | 9 ;; This program is free software; you can redistribute it and/or modify |
10 ;; it under the terms of the GNU General Public License as published by | 10 ;; it under the terms of the GNU General Public License as published by |
11 ;; the Free Software Foundation; either version 2 of the License, or | 11 ;; the Free Software Foundation; either version 2 of the License, or |
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | 15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
17 ;; GNU General Public License for more details. | 17 ;; GNU General Public License for more details. |
18 ;; | 18 ;; |
19 ;; You should have received a copy of the GNU General Public License | 19 ;; You should have received a copy of the GNU General Public License |
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the | 20 ;; along with this program; if not, write to the Free Software |
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 21 ;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
22 ;; Boston, MA 02111-1307, USA. | |
23 ;; | 22 ;; |
24 ;; On November 13, 1995, the license was available at | 23 ;; On November 13, 1995, the license was available at |
25 ;; <URL:ftp://prep.ai.mit.edu/pub/gnu/COPYING-2.0>. It may still be | 24 ;; <URL:ftp://prep.ai.mit.edu/pub/gnu/COPYING-2.0>. It may still be |
26 ;; obtainable via that URL. | 25 ;; obtainable via that URL. |
27 | 26 |
56 ;; (remember this is really all one function). Some of the code which | 55 ;; (remember this is really all one function). Some of the code which |
57 ;; updates them is located inside the subfunctions. So that the compiler | 56 ;; updates them is located inside the subfunctions. So that the compiler |
58 ;; will not complain, these variables are defined with defvar. | 57 ;; will not complain, these variables are defined with defvar. |
59 | 58 |
60 (require 'w3-vars) | 59 (require 'w3-vars) |
61 (require 'mule-sysdp) | |
62 | 60 |
63 (eval-when-compile | 61 (eval-when-compile |
64 (defconst w3-p-s-var-list nil | 62 (defconst w3-p-s-var-list nil |
65 "A list of the scratch variables used by functions called by | 63 "A list of the scratch variables used by functions called by |
66 w3-parse-buffer which it is w3-parse-buffer's responsibility to | 64 w3-parse-buffer which it is w3-parse-buffer's responsibility to |
115 | 113 |
116 (defvar w3-p-d-open-element-stack) | 114 (defvar w3-p-d-open-element-stack) |
117 (put 'w3-p-d-open-element-stack 'variable-documentation | 115 (put 'w3-p-d-open-element-stack 'variable-documentation |
118 "A stack of the currently open elements, with the innermost enclosing | 116 "A stack of the currently open elements, with the innermost enclosing |
119 element on top and the outermost on bottom.") | 117 element on top and the outermost on bottom.") |
118 | |
119 (defvar w3-p-d-parse-tag-stream-tail-pointer) | |
120 (put 'w3-p-d-parse-tag-stream-tail-pointer 'variable-documentation | |
121 "Points to last cons cell in parse-tag stream. We add items to tail of | |
122 parse-tag-stream instead of head.") | |
120 | 123 |
121 (defvar w3-p-d-shortrefs) | 124 (defvar w3-p-d-shortrefs) |
122 (put 'w3-p-d-shortrefs 'variable-documentation | 125 (put 'w3-p-d-shortrefs 'variable-documentation |
123 "An alist of the magic entity reference strings in the current | 126 "An alist of the magic entity reference strings in the current |
124 between-tags region and their replacements. Each item is of the format | 127 between-tags region and their replacements. Each item is of the format |
262 (goto-char (point-min)) | 265 (goto-char (point-min)) |
263 (insert "\"") | 266 (insert "\"") |
264 (while (progn | 267 (while (progn |
265 (skip-chars-forward "^\"\\\t\n\r") | 268 (skip-chars-forward "^\"\\\t\n\r") |
266 (not (eobp))) | 269 (not (eobp))) |
267 (insert "\\" (cdr (assq (char-after (point)) '((?\" . "\"") | 270 (insert "\\" (cdr (assq (following-char) '((?\" . "\"") |
268 (?\\ . "\\") | 271 (?\\ . "\\") |
269 (?\t . "t") | 272 (?\t . "t") |
270 (?\n . "n") | 273 (?\n . "n") |
271 (?\r . "r"))))) | 274 (?\r . "r"))))) |
272 (delete-char 1)) | 275 (delete-char 1)) |
273 (insert "\"") | 276 (insert "\"") |
274 (buffer-string))) | 277 (buffer-string))) |
275 | 278 |
276 | 279 |
284 (let ((html-entities w3-html-entities)) | 287 (let ((html-entities w3-html-entities)) |
285 (while html-entities | 288 (while html-entities |
286 (put (car (car html-entities)) 'html-entity-expansion | 289 (put (car (car html-entities)) 'html-entity-expansion |
287 (cons 'CDATA (if (integerp (cdr (car html-entities))) | 290 (cons 'CDATA (if (integerp (cdr (car html-entities))) |
288 (char-to-string | 291 (char-to-string |
289 (mule-make-iso-character (cdr (car html-entities)))) | 292 (let ((c (cdr (car html-entities)))) |
293 (cond | |
294 ((and (> c 127) (boundp 'MULE)) | |
295 (make-character lc-ltn1 c)) | |
296 ;;((and (> c 127) (featurep 'mule)) | |
297 ;; What??? | |
298 ;;) | |
299 (t | |
300 c)))) | |
290 (cdr (car html-entities))))) | 301 (cdr (car html-entities))))) |
291 (setq html-entities (cdr html-entities)))) | 302 (setq html-entities (cdr html-entities)))) |
292 | 303 |
293 ;; These are handled differently than the normal HTML entities because | 304 ;; These are handled differently than the normal HTML entities because |
294 ;; we need to define the entities with 'nil instead of 'CDATA so | 305 ;; we need to define the entities with 'nil instead of 'CDATA so |
428 ;; char-to-string will hopefully do something useful with characters | 439 ;; char-to-string will hopefully do something useful with characters |
429 ;; larger than 255. I think in MULE it does. Is this true? | 440 ;; larger than 255. I think in MULE it does. Is this true? |
430 ;; Bill wants to call w3-resolve-numeric-entity here, but I think | 441 ;; Bill wants to call w3-resolve-numeric-entity here, but I think |
431 ;; that functionality belongs in char-to-string. | 442 ;; that functionality belongs in char-to-string. |
432 ;; The largest valid character in the I18N version of HTML is 65533. | 443 ;; The largest valid character in the I18N version of HTML is 65533. |
433 ;; ftp://ds.internic.net/internet-drafts/draft-ietf-html-i18n-01.txt | 444 ;; <URL:ftp://ds.internic.net/internet-drafts/draft-ietf-html-i18n-01.txt> |
434 ;; wrongo! Apparently, mule doesn't do sane things with char-to-string | 445 ;; wrongo! Apparently, mule doesn't do sane things with char-to-string |
435 ;; -wmp 7/9/96 | 446 ;; -wmp 7/9/96 |
436 (insert (char-to-string | 447 (insert (char-to-string |
437 (mule-make-iso-character w3-p-s-num)))) | 448 (cond |
449 ((and (boundp 'MULE) (> w3-p-s-num 127)) | |
450 (make-character lc-ltn1 w3-p-s-num)) | |
451 ;;((and (featurep 'mule) (> w3-p-s-num 127)) | |
452 ;;what?? | |
453 ;;) | |
454 (t | |
455 w3-p-s-num))))) | |
438 ((looking-at "&#\\(re\\|rs\\|space\\|tab\\)[\ ;\n]?") ; \n should be \r | 456 ((looking-at "&#\\(re\\|rs\\|space\\|tab\\)[\ ;\n]?") ; \n should be \r |
439 (replace-match (assq (upcase (char-after (+ 3 (point)))) | 457 (replace-match (assq (upcase (char-after (+ 3 (point)))) |
440 '(;; *** Strictly speaking, record end should be | 458 '(;; *** Strictly speaking, record end should be |
441 ;; carriage return. | 459 ;; carriage return. |
442 (?E . "\n") ; RE | 460 (?E . "\n") ; RE |
448 ;; doesn't get rescanned. | 466 ;; doesn't get rescanned. |
449 ;; *** Strictly speaking, we should issue a warning for &#foo; if foo | 467 ;; *** Strictly speaking, we should issue a warning for &#foo; if foo |
450 ;; is not a function character in the SGML declaration. | 468 ;; is not a function character in the SGML declaration. |
451 ) | 469 ) |
452 | 470 |
453 ((eq ?& (char-after (point))) | 471 ((eq ?& (following-char)) |
454 ;; We are either looking at an undefined reference or a & that does | 472 ;; We are either looking at an undefined reference or a & that does |
455 ;; not start a reference (in which case we should not have been called). | 473 ;; not start a reference (in which case we should not have been called). |
456 ;; Skip over the &. | 474 ;; Skip over the &. |
457 (forward-char 1)) | 475 (forward-char 1)) |
458 | 476 |
765 (concat "^&<" | 783 (concat "^&<" |
766 (w3-invalid-sgml-chars) | 784 (w3-invalid-sgml-chars) |
767 (if w3-p-d-null-end-tag-enabled "/" "") | 785 (if w3-p-d-null-end-tag-enabled "/" "") |
768 (if w3-p-d-in-parsed-marked-section "]" "") | 786 (if w3-p-d-in-parsed-marked-section "]" "") |
769 (or w3-p-d-shortref-chars "")))) | 787 (or w3-p-d-shortref-chars "")))) |
788 | |
789 ;; Modifies free variable: | |
790 ;; w3-p-d-parse-tag-stream-tail-pointer | |
791 (defsubst w3-add-display-item (tag value) | |
792 (setcdr w3-p-d-parse-tag-stream-tail-pointer | |
793 (list (cons tag value))) | |
794 (setq w3-p-d-parse-tag-stream-tail-pointer | |
795 (cdr w3-p-d-parse-tag-stream-tail-pointer))) | |
796 | |
770 ) | 797 ) |
771 | 798 |
772 (eval-when-compile | 799 (eval-when-compile |
773 (w3-p-s-var-def w3-p-s-overrides) | 800 (w3-p-s-var-def w3-p-s-overrides) |
774 (w3-p-s-var-def w3-p-s-undo-list) | 801 (w3-p-s-var-def w3-p-s-undo-list) |
779 ;; w3-p-d-current-element, w3-p-d-open-element-stack | 806 ;; w3-p-d-current-element, w3-p-d-open-element-stack |
780 ;; Destroys free variables: | 807 ;; Destroys free variables: |
781 ;; w3-p-s-overrides, w3-p-s-undo-list, w3-p-s-var | 808 ;; w3-p-s-overrides, w3-p-s-undo-list, w3-p-s-var |
782 (defsubst w3-open-element (tag attributes) | 809 (defsubst w3-open-element (tag attributes) |
783 | 810 |
811 ;; Send trailing data character item in the old current element to | |
812 ;; display engine. | |
813 (if (stringp (car-safe (w3-element-content w3-p-d-current-element))) | |
814 (w3-add-display-item | |
815 'text | |
816 (car-safe (w3-element-content w3-p-d-current-element)))) | |
817 | |
784 ;; Push new element on stack. | 818 ;; Push new element on stack. |
785 (setq w3-p-d-open-element-stack (cons w3-p-d-current-element | 819 (setq w3-p-d-open-element-stack (cons w3-p-d-current-element |
786 w3-p-d-open-element-stack)) | 820 w3-p-d-open-element-stack)) |
787 (setq w3-p-d-current-element (w3-fresh-element-for-tag tag)) | 821 (setq w3-p-d-current-element (w3-fresh-element-for-tag tag)) |
788 | 822 |
838 (if (eq 'XINHERIT (w3-element-content-model w3-p-d-current-element)) | 872 (if (eq 'XINHERIT (w3-element-content-model w3-p-d-current-element)) |
839 (w3-set-element-content-model | 873 (w3-set-element-content-model |
840 w3-p-d-current-element | 874 w3-p-d-current-element |
841 (w3-element-content-model (car w3-p-d-open-element-stack)))) | 875 (w3-element-content-model (car w3-p-d-open-element-stack)))) |
842 | 876 |
843 ) | 877 ;; Send the start-tag and attributes to the display engine. |
878 (if (memq tag '(plaintext style xmp textarea)) | |
879 ;; Garbage special-casing for old display engine. | |
880 ;; Nothing is sent until end-tag is found. | |
881 ;; The DTD will ensure no subelements of these elements. | |
882 nil | |
883 ;; Normal procedure. | |
884 (w3-add-display-item tag attributes))) | |
844 ) | 885 ) |
845 | 886 |
846 ;; The protocol for handing items to the display engine is as follows. | 887 ;; The protocol for handing items to the display engine is as follows. |
847 ;; | 888 ;; |
848 ;; For an element, send (START-TAG . ATTS), each member of the content, | 889 ;; For an element, send (START-TAG . ATTS), each member of the content, |
917 ;; any trailing data character item to display engine. | 958 ;; any trailing data character item to display engine. |
918 (setq w3-p-s-content (w3-element-content w3-p-d-current-element)) | 959 (setq w3-p-s-content (w3-element-content w3-p-d-current-element)) |
919 (cond ((null w3-p-s-content)) | 960 (cond ((null w3-p-s-content)) |
920 ((equal "\n" (car w3-p-s-content)) | 961 ((equal "\n" (car w3-p-s-content)) |
921 (setq w3-p-s-content (cdr w3-p-s-content))) | 962 (setq w3-p-s-content (cdr w3-p-s-content))) |
922 ) | 963 ((and (stringp (car w3-p-s-content)) |
964 ;; Garbage special-casing for old display engine. | |
965 (not (memq w3-p-s-end-tag | |
966 '(/plaintext /style /xmp /textarea)))) | |
967 (w3-add-display-item 'text (car w3-p-s-content)))) | |
923 | 968 |
924 (cond ;; *** Handle LISTING the way the old parser did. | 969 ;; Send the end-tag to the display engine, but only if the element is |
970 ;; allowed to have an end tag. | |
971 (cond ((memq w3-p-s-end-tag '(/plaintext /style /xmp /textarea)) | |
972 ;; Garbage special-casing for old display engine. | |
973 ;; Format old display engine expects for these elements: | |
974 ;; (START-TAG . ((data . DATA-CHARACTERS) . ATTRIBUTES)) | |
975 (w3-add-display-item | |
976 ;; Use the *start*-tag, not the end-tag. | |
977 (w3-element-name w3-p-d-current-element) | |
978 (cons (cons 'data | |
979 (condition-case nil | |
980 (mapconcat 'identity w3-p-s-content "") | |
981 (error "eeek! subelement content!"))) | |
982 (w3-element-attributes w3-p-d-current-element)))) | |
983 ;; *** Handle LISTING the way the old parser did. | |
925 ((eq 'EMPTY (w3-element-content-model w3-p-d-current-element)) | 984 ((eq 'EMPTY (w3-element-content-model w3-p-d-current-element)) |
926 ;; Do nothing, can't have an end tag. | 985 ;; Do nothing, can't have an end tag. |
927 ) | 986 ) |
928 (t | 987 (t |
929 ;; Normal case. | 988 ;; Normal case. |
989 (w3-add-display-item w3-p-s-end-tag nil) | |
930 (if (null w3-p-s-content) | 990 (if (null w3-p-s-content) |
931 (w3-debug-html | 991 (w3-debug-html |
932 :bad-style :outer | 992 :bad-style :outer |
933 ;; Don't warn for empty TD elements or empty A elements | 993 ;; Don't warn for empty TD elements or empty A elements |
934 ;; with no HREF attribute. | 994 ;; with no HREF attribute. |
1111 (%headmisc . (script)) | 1171 (%headmisc . (script)) |
1112 (%head-deprecated . (nextid)) | 1172 (%head-deprecated . (nextid)) |
1113 | 1173 |
1114 ;; client-side imagemaps | 1174 ;; client-side imagemaps |
1115 (%imagemaps . (area map)) | 1175 (%imagemaps . (area map)) |
1116 (%input.fields . (input select textarea keygen label)) | |
1117 ;; special action is taken for %text inside %body.content in the | 1176 ;; special action is taken for %text inside %body.content in the |
1118 ;; content model of each element. | 1177 ;; content model of each element. |
1119 (%body.content . (%heading %block style hr div address %imagemaps)) | 1178 (%body.content . (%heading %block hr div address %imagemaps)) |
1120 | 1179 |
1121 (%heading . (h1 h2 h3 h4 h5 h6)) | 1180 (%heading . (h1 h2 h3 h4 h5 h6)) |
1122 | 1181 |
1123 ;; Emacs-w3 extensions | 1182 ;; Emacs-w3 extensions |
1124 (%emacsw3-crud . (pinhead flame cookie yogsothoth hype peek)) | 1183 (%emacsw3-crud . (pinhead flame cookie yogsothoth hype peek)) |
1125 | 1184 |
1126 (%block . (p %list dl form %preformatted | 1185 (%block . (p %list dl form %preformatted font |
1127 %blockquote isindex fn table fig note | 1186 %blockquote isindex fn table fig note |
1128 multicol center %block-deprecated %block-obsoleted)) | 1187 center %block-deprecated %block-obsoleted)) |
1129 (%list . (ul ol)) | 1188 (%list . (ul ol)) |
1130 (%preformatted . (pre)) | 1189 (%preformatted . (pre)) |
1131 (%blockquote . (bq)) | 1190 (%blockquote . (bq)) |
1132 (%block-deprecated . (dir menu blockquote)) | 1191 (%block-deprecated . (dir menu blockquote)) |
1133 (%block-obsoleted . (xmp listing)) | 1192 (%block-obsoleted . (xmp listing)) |
1134 | 1193 |
1135 ;; Why is IMG in this list? | 1194 ;; Why is IMG in this list? |
1136 (%pre.exclusion . (*include img *discard tab math big small sub sup)) | 1195 (%pre.exclusion . (*include img *discard tab math big small sub sup)) |
1137 | 1196 |
1138 (%text . (*data b %notmath sub sup %emacsw3-crud %input.fields)) | 1197 (%text . (*data b %notmath sub sup %emacsw3-crud)) |
1139 (%notmath . (%special %font %phrase %misc)) | 1198 (%notmath . (%special %font %phrase %misc)) |
1140 (%font . (i u s strike tt big small sub sup font | 1199 (%font . (i u s strike tt big small sub sup |
1141 roach secret wired)) ;; B left out for MATH | 1200 roach secret wired)) ;; B left out for MATH |
1142 (%phrase . (em strong dfn code samp kbd var cite blink)) | 1201 (%phrase . (em strong dfn code samp kbd var cite blink)) |
1143 (%special . (a img applet object font basefont br script style map math tab span bdo)) | 1202 (%special . (a img applet font br script map math tab)) |
1144 (%misc . (q lang au person acronym abbrev ins del)) | 1203 (%misc . (q lang au person acronym abbrev ins del)) |
1145 | 1204 |
1146 (%formula . (*data %math)) | 1205 (%formula . (*data %math)) |
1147 (%math . (box above below %mathvec root sqrt array sub sup | 1206 (%math . (box above below %mathvec root sqrt array sub sup |
1148 %mathface)) | 1207 %mathface)) |
1216 ;; I haven't bothered to enumerate them. | 1275 ;; I haven't bothered to enumerate them. |
1217 (*close))]) | 1276 (*close))]) |
1218 (end-tag-omissible . t)) | 1277 (end-tag-omissible . t)) |
1219 ;; SCRIPT - - (#PCDATA) | 1278 ;; SCRIPT - - (#PCDATA) |
1220 ((script) | 1279 ((script) |
1221 (content-model . XCDATA ; not official, but allows | 1280 (content-model . CDATA ; not official, but allows |
1222 ; comment hiding of script, and also | 1281 ; comment hiding of script |
1223 ; idiots that use '</' in scripts. | |
1224 )) | 1282 )) |
1225 ;; TITLE - - (#PCDATA) | 1283 ;; TITLE - - (#PCDATA) |
1226 ((title) | 1284 ((title) |
1227 (content-model . RCDATA ; not official | 1285 (content-model . RCDATA ; not official |
1228 ;; [((*data) include-space nil nil)] | 1286 ;; [((*data) include-space nil nil)] |
1250 ;; caused by BQ's sharing of BODYTEXT. BQ | 1308 ;; caused by BQ's sharing of BODYTEXT. BQ |
1251 ;; should have its own BQTEXT. | 1309 ;; should have its own BQTEXT. |
1252 ((credit plaintext) *close)) | 1310 ((credit plaintext) *close)) |
1253 nil)]) | 1311 nil)]) |
1254 (end-tag-omissible . t)) | 1312 (end-tag-omissible . t)) |
1255 ((div banner center multicol) | 1313 ((div banner center) |
1256 (content-model . [((%body.content) | 1314 (content-model . [((%body.content) |
1257 nil | 1315 nil |
1258 ;; Push <P> before data characters. Non-SGML. | 1316 ;; Push <P> before data characters. Non-SGML. |
1259 (((%text) p)) | 1317 (((%text) p)) |
1260 nil)])) | 1318 nil)])) |
1267 ((%heading) | 1325 ((%heading) |
1268 (content-model . [((%text) | 1326 (content-model . [((%text) |
1269 include-space | 1327 include-space |
1270 ((%in-text-ignore)) | 1328 ((%in-text-ignore)) |
1271 nil)])) | 1329 nil)])) |
1272 ((span bdo) | |
1273 (content-model . [((%text) | |
1274 include-space | |
1275 nil | |
1276 nil)]) | |
1277 ) | |
1278 ((p) | 1330 ((p) |
1279 (content-model . [((%text) | 1331 (content-model . [((%text) |
1280 include-space | 1332 include-space |
1281 nil | 1333 nil |
1282 ;; *** Should only close if tag can | 1334 ;; *** Should only close if tag can |
1402 ;; Push <P> before data characters. Very non-SGML. | 1454 ;; Push <P> before data characters. Very non-SGML. |
1403 (((%text) p) | 1455 (((%text) p) |
1404 ((credit) *close)) | 1456 ((credit) *close)) |
1405 nil)]) | 1457 nil)]) |
1406 (end-tag-omissible . t)) | 1458 (end-tag-omissible . t)) |
1407 ((%emacsw3-crud basefont) | 1459 ((%emacsw3-crud) |
1408 (content-model . EMPTY)) | 1460 (content-model . EMPTY)) |
1409 ;; FORM - - %body.content -(FORM) +(INPUT|KEYGEN|SELECT|TEXTAREA) | 1461 ;; FORM - - %body.content -(FORM) +(INPUT|KEYGEN|SELECT|TEXTAREA) |
1410 ((form) | 1462 ((form) |
1411 ;; Same as BODY. Ugh! | 1463 ;; Same as BODY. Ugh! |
1412 (content-model . [((%body.content) | 1464 (content-model . [((%body.content) |
1484 ;; <URL:ftp://ds.internic.net/internet-drafts/draft-ietf-html-tables-03.txt> | 1536 ;; <URL:ftp://ds.internic.net/internet-drafts/draft-ietf-html-tables-03.txt> |
1485 ((table) | 1537 ((table) |
1486 (content-model . [(nil | 1538 (content-model . [(nil |
1487 nil | 1539 nil |
1488 (((caption) *include *next) | 1540 (((caption) *include *next) |
1489 ((%text) tr *same error) | |
1490 ((col colgroup thead tfoot tbody tr) *retry *next)) | 1541 ((col colgroup thead tfoot tbody tr) *retry *next)) |
1491 (*retry *next)) ;error handling | 1542 (*retry *next)) ;error handling |
1492 ((col colgroup) | 1543 ((col colgroup) |
1493 nil | 1544 nil |
1494 (((thead tfoot tbody tr) *retry *next)) | 1545 (((thead tfoot tbody tr) *retry *next)) |
1528 ((tfoot tbody) | 1579 ((tfoot tbody) |
1529 (content-model . [((tr) | 1580 (content-model . [((tr) |
1530 nil | 1581 nil |
1531 (((tbody) *close) | 1582 (((tbody) *close) |
1532 ;; error handling | 1583 ;; error handling |
1533 ((td th) tr *same error) | |
1534 ((%body.content) tr *same error)) | 1584 ((%body.content) tr *same error)) |
1535 nil)]) | 1585 nil)]) |
1536 (end-tag-omissible . t)) | 1586 (end-tag-omissible . t)) |
1537 ((tr) | 1587 ((tr) |
1538 (content-model . [((td th) | 1588 (content-model . [((td th) |
1539 nil | 1589 nil |
1540 (((tr tfoot tbody) *close) | 1590 (((tr tfoot tbody) *close) |
1541 ;; error handling | 1591 ;; error handling |
1542 ((%body.content %text) td *same error)) | 1592 ((%body.content) td *same error)) |
1543 nil)]) | 1593 nil)]) |
1544 (end-tag-omissible . t)) | 1594 (end-tag-omissible . t)) |
1545 ((td th) | 1595 ((td th) |
1546 ;; Arrgh! Another %body.content!!! Stupid!!! | 1596 ;; Arrgh! Another %body.content!!! Stupid!!! |
1547 (content-model . [((%body.content) | 1597 (content-model . [((%body.content) |
1664 nil)])) | 1714 nil)])) |
1665 ((frame) | 1715 ((frame) |
1666 (content-model . EMPTY)) | 1716 (content-model . EMPTY)) |
1667 ;; | 1717 ;; |
1668 ;; APPLET is a Java thing. | 1718 ;; APPLET is a Java thing. |
1669 ;; OBJECT is a cougar thing | |
1670 ;; <URL:http://java.sun.com/JDK-beta/filesinkit/README> | 1719 ;; <URL:http://java.sun.com/JDK-beta/filesinkit/README> |
1671 ((applet object) | 1720 ((applet) |
1672 ;; I really don't want to add another ANY content-model. | 1721 ;; I really don't want to add another ANY content-model. |
1673 (content-model . XINHERIT) | 1722 (content-model . XINHERIT) |
1674 (inclusions . (param))) | 1723 (inclusions . (param))) |
1675 ((param) | 1724 ((param) |
1676 (content-model . EMPTY)) | 1725 (content-model . EMPTY)) |
1698 (w3-p-s-var-def w3-p-s-state-transitions) | 1747 (w3-p-s-var-def w3-p-s-state-transitions) |
1699 (w3-p-s-var-def w3-p-s-transition) | 1748 (w3-p-s-var-def w3-p-s-transition) |
1700 (w3-p-s-var-def w3-p-s-tran-list) | 1749 (w3-p-s-var-def w3-p-s-tran-list) |
1701 (w3-p-s-var-def w3-p-s-content-model) | 1750 (w3-p-s-var-def w3-p-s-content-model) |
1702 (w3-p-s-var-def w3-p-s-except) | 1751 (w3-p-s-var-def w3-p-s-except) |
1703 (w3-p-s-var-def w3-p-s-baseobject) | |
1704 (w3-p-s-var-def w3-p-s-btdt) | |
1705 ;; Uses free variables: | 1752 ;; Uses free variables: |
1706 ;; w3-p-d-current-element, w3-p-d-exceptions | 1753 ;; w3-p-d-current-element, w3-p-d-exceptions |
1707 ;; Destroys free variables: | 1754 ;; Destroys free variables: |
1708 ;; w3-p-s-includep, w3-p-s-state-transitions, w3-p-s-transition, | 1755 ;; w3-p-s-includep, w3-p-s-state-transitions, w3-p-s-transition, |
1709 ;; w3-p-s-tran-list, w3-p-s-content-model, w3-p-s-except | 1756 ;; w3-p-s-tran-list, w3-p-s-content-model, w3-p-s-except |
1716 (w3-element-content-model w3-p-d-current-element))) | 1763 (w3-element-content-model w3-p-d-current-element))) |
1717 (or (and (memq w3-p-s-content-model | 1764 (or (and (memq w3-p-s-content-model |
1718 '(CDATA RCDATA XCDATA XXCDATA)) | 1765 '(CDATA RCDATA XCDATA XXCDATA)) |
1719 (memq tag-name '(*data *space))) | 1766 (memq tag-name '(*data *space))) |
1720 ;; *** Implement ANY. | 1767 ;; *** Implement ANY. |
1721 (error "impossible content model lossage")) | 1768 (error "impossible")) |
1722 (setq w3-p-s-includep t) | 1769 (setq w3-p-s-includep t) |
1723 ;; Exit loop. | 1770 ;; Exit loop. |
1724 nil) | 1771 nil) |
1725 (t | 1772 (t |
1726 ;; We have a complex content model. | 1773 ;; We have a complex content model. |
1896 (w3-open-element (car w3-p-s-transition) nil) | 1943 (w3-open-element (car w3-p-s-transition) nil) |
1897 ;; Now we loop and try again in the new element's | 1944 ;; Now we loop and try again in the new element's |
1898 ;; content-model. | 1945 ;; content-model. |
1899 t) | 1946 t) |
1900 (t | 1947 (t |
1901 (error "impossible transition"))))))) | 1948 (error "impossible"))))))) |
1902 | 1949 |
1903 ;; Empty while loop body. | 1950 ;; Empty while loop body. |
1904 ) | 1951 ) |
1905 | 1952 |
1906 ;; Return value to user indicating whether to include or discard item: | 1953 ;; Return value to user indicating whether to include or discard item: |
1936 ;; % % | 1983 ;; % % |
1937 ;; % This is the *ONLY* valid entry point in this file! % | 1984 ;; % This is the *ONLY* valid entry point in this file! % |
1938 ;; % DO NOT call any of the other functions! % | 1985 ;; % DO NOT call any of the other functions! % |
1939 ;; % % | 1986 ;; % % |
1940 ;; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% | 1987 ;; %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% |
1941 (defun w3-parse-buffer (&optional buff) | 1988 (defun w3-parse-buffer (&optional buff nodraw) |
1942 "Parse contents of BUFF as HTML. | 1989 "Parse contents of BUFF as HTML. |
1943 BUFF defaults to the value of url-working-buffer. | 1990 BUFF defaults to the value of url-working-buffer. |
1944 Destructively alters contents of BUFF. | 1991 Destructively alters contents of BUFF. |
1992 Unless optional second argument NODRAW is non-nil, calls the display | |
1993 engine on the parsed HTML. | |
1945 Returns a data structure containing the parsed information." | 1994 Returns a data structure containing the parsed information." |
1946 | 1995 |
1947 (set-buffer (or buff url-working-buffer)) | 1996 (set-buffer (or buff url-working-buffer)) |
1948 (setq buff (current-buffer)) | 1997 (setq buff (current-buffer)) |
1949 (set-syntax-table w3-sgml-md-syntax-table) | 1998 (set-syntax-table w3-sgml-md-syntax-table) |
1960 (goto-char (point-min)) | 2009 (goto-char (point-min)) |
1961 | 2010 |
1962 ;; *** Should premunge line boundaries. | 2011 ;; *** Should premunge line boundaries. |
1963 ;; ******************** | 2012 ;; ******************** |
1964 | 2013 |
2014 ;; Prepare another buffer to draw in unless told not to. | |
2015 (if (not nodraw) | |
2016 (w3-prepare-draw-buffer-for-parse-buffer)) | |
2017 | |
1965 (let* ( | 2018 (let* ( |
1966 ;; Speed hack, see the variable doc string. | 2019 ;; Speed hack, see the variable doc string. |
1967 (gc-cons-threshold (if (> w3-gc-cons-threshold-multiplier 0) | 2020 (gc-cons-threshold (if (> w3-gc-cons-threshold-multiplier 0) |
1968 (* w3-gc-cons-threshold-multiplier | 2021 (* w3-gc-cons-threshold-multiplier |
1969 gc-cons-threshold) | 2022 gc-cons-threshold) |
1983 (format "Parsed %%3d%%%% of %d..." (- (point-max) (point-min))))) | 2036 (format "Parsed %%3d%%%% of %d..." (- (point-max) (point-min))))) |
1984 | 2037 |
1985 ;; Use a float value for 100 if possible, otherwise integer. | 2038 ;; Use a float value for 100 if possible, otherwise integer. |
1986 ;; Determine which we can use outside of the loop for speed. | 2039 ;; Determine which we can use outside of the loop for speed. |
1987 (one-hundred (funcall (if (fboundp 'float) 'float 'identity) 100)) | 2040 (one-hundred (funcall (if (fboundp 'float) 'float 'identity) 100)) |
2041 | |
2042 ;; Speed up checking whether to do incremental display. | |
2043 (w3-do-incremental-display (if nodraw nil w3-do-incremental-display)) | |
2044 | |
2045 ;; Used to convert parse tree to tag stream that old display | |
2046 ;; engine expects. Will change when display engine is rewritten. | |
2047 (parse-tag-stream '(*dummy)) | |
2048 | |
2049 ;; See doc string. | |
2050 (w3-p-d-parse-tag-stream-tail-pointer parse-tag-stream) | |
2051 | |
2052 ;; Points to cons cell in parse-tag-stream whose car is the last | |
2053 ;; item that has been sent to display engine. | |
2054 (parse-tag-stream-last-displayed-item parse-tag-stream) | |
1988 | 2055 |
1989 ;; The buffer which contains the HTML we are parsing. This | 2056 ;; The buffer which contains the HTML we are parsing. This |
1990 ;; variable is used to avoid using the more expensive | 2057 ;; variable is used to avoid using the more expensive |
1991 ;; save-excursion. | 2058 ;; save-excursion. |
1992 (parse-buffer (current-buffer)) | 2059 (parse-buffer (current-buffer)) |
2069 ;; Scratch variables used in this function | 2136 ;; Scratch variables used in this function |
2070 ref attr-name attr-value content-model content open-list | 2137 ref attr-name attr-value content-model content open-list |
2071 ) | 2138 ) |
2072 ;; Scratch variables used by macros and defsubsts we call. | 2139 ;; Scratch variables used by macros and defsubsts we call. |
2073 (w3-p-s-let-bindings | 2140 (w3-p-s-let-bindings |
2141 | |
2074 (w3-update-non-markup-chars) | 2142 (w3-update-non-markup-chars) |
2075 (setq w3-p-s-baseobject (url-generic-parse-url (url-view-url t))) | 2143 |
2076 ;; Main loop. Handle markup as follows: | 2144 ;; Main loop. Handle markup as follows: |
2077 ;; | 2145 ;; |
2078 ;; non-empty tag: Handle the region since the previous tag as PCDATA, | 2146 ;; non-empty tag: Handle the region since the previous tag as PCDATA, |
2079 ;; RCDATA, CDATA, if allowed by syntax. Then handle the tag. | 2147 ;; RCDATA, CDATA, if allowed by syntax. Then handle the tag. |
2080 ;; | 2148 ;; |
2103 (setq last-loop-start (point))) | 2171 (setq last-loop-start (point))) |
2104 | 2172 |
2105 ;; Display progress messages if asked and/or do incremental display | 2173 ;; Display progress messages if asked and/or do incremental display |
2106 ;; of results | 2174 ;; of results |
2107 (cond ((= 0 (% (setq loop-count (1+ loop-count)) 40)) | 2175 (cond ((= 0 (% (setq loop-count (1+ loop-count)) 40)) |
2176 (if w3-do-incremental-display | |
2177 (w3-pause)) | |
2108 (if status-message-format | 2178 (if status-message-format |
2109 (message status-message-format | 2179 (message status-message-format |
2110 ;; Percentage of buffer processed. | 2180 ;; Percentage of buffer processed. |
2111 (/ (* (point) one-hundred) (point-max)))))) | 2181 (/ (* (point) one-hundred) (point-max)))))) |
2112 | 2182 |
2115 | 2185 |
2116 ;; We are looking at a markup-starting character, and invalid | 2186 ;; We are looking at a markup-starting character, and invalid |
2117 ;; character, or end of buffer. | 2187 ;; character, or end of buffer. |
2118 (cond | 2188 (cond |
2119 | 2189 |
2120 ((eq ?< (char-after (point))) | 2190 ((= ?< (following-char)) |
2121 | 2191 |
2122 ;; We are looking at a tag, comment, markup declaration, SGML marked | 2192 ;; We are looking at a tag, comment, markup declaration, SGML marked |
2123 ;; section, SGML processing instruction, or non-markup "<". | 2193 ;; section, SGML processing instruction, or non-markup "<". |
2124 (forward-char) | 2194 (forward-char) |
2125 (cond | 2195 (cond |
2126 | 2196 |
2127 ((looking-at "/?\\([a-z][-a-z0-9.]*\\)") | 2197 ((looking-at "/?\\([a-z][-a-z0-9.]*\\)") |
2128 ;; We are looking at a non-empty tag. | 2198 ;; We are looking at a non-empty tag. |
2129 | 2199 |
2130 ;; Downcase it in the buffer, to save creation of a string | |
2131 (downcase-region (match-beginning 1) (match-end 1)) | |
2132 (setq w3-p-d-tag-name | 2200 (setq w3-p-d-tag-name |
2133 (intern (buffer-substring (match-beginning 1) | 2201 (intern (downcase (buffer-substring (match-beginning 1) |
2134 (match-end 1)))) | 2202 (match-end 1))))) |
2135 (setq w3-p-d-end-tag-p (eq ?/ (char-after (point))) | 2203 (setq w3-p-d-end-tag-p (= ?/ (following-char))) |
2136 between-tags-end (1- (point))) | 2204 (setq between-tags-end (1- (point))) |
2137 (goto-char (match-end 0)) | 2205 (goto-char (match-end 0)) |
2138 | 2206 |
2139 ;; Read the attributes from a start-tag. | 2207 ;; Read the attributes from a start-tag. |
2140 (if w3-p-d-end-tag-p | 2208 (or |
2141 (if (looking-at "[ \t\r\n/]*[<>]") | 2209 w3-p-d-end-tag-p |
2142 nil | |
2143 ;; This is in here to deal with those idiots who stick | |
2144 ;; attribute/value pairs on end tags. *sigh* | |
2145 (w3-debug-html "Evil attributes on end tag.") | |
2146 (skip-chars-forward "^>")) | |
2147 | 2210 |
2148 ;; Attribute values can be: | 2211 ;; Attribute values can be: |
2149 ;; "STRING" where STRING does not contain the double quote | 2212 ;; "STRING" where STRING does not contain the double quote |
2150 ;; 'STRING' where STRING does not contain the single quote | 2213 ;; 'STRING' where STRING does not contain the single quote |
2151 ;; name-start character, *name character | 2214 ;; name-start character, *name character |
2162 (concat | 2225 (concat |
2163 ;; Leading whitespace. | 2226 ;; Leading whitespace. |
2164 "[ \n\r\t]*" | 2227 "[ \n\r\t]*" |
2165 ;; The attribute name, possibly with a bad syntax | 2228 ;; The attribute name, possibly with a bad syntax |
2166 ;; component. | 2229 ;; component. |
2167 "\\([a-z_][-a-z0-9.]*\\(\\([_][-a-z0-9._]*\\)?\\)\\)" | 2230 "\\([a-z][-a-z0-9.]*\\(\\([_][-a-z0-9._]*\\)?\\)\\)" |
2168 ;; Trailing whitespace and perhaps an "=". | 2231 ;; Trailing whitespace and perhaps an "=". |
2169 "[ \n\r\t]*\\(\\(=[ \n\r\t]*\\)?\\)"))) | 2232 "[ \n\r\t]*\\(\\(=[ \n\r\t]*\\)?\\)"))) |
2170 | 2233 |
2171 (cond ((/= (match-beginning 2) (match-end 2)) | 2234 (cond ((/= (match-beginning 2) (match-end 2)) |
2172 (w3-debug-html | 2235 (w3-debug-html |
2173 :nocontext | 2236 :nocontext |
2174 (format "Bad attribute name syntax: %s" | 2237 (format "Bad attribute name syntax: %s" |
2175 (buffer-substring (match-beginning 1) | 2238 (buffer-substring (match-beginning 1) |
2176 (match-end 1)))))) | 2239 (match-end 1)))))) |
2177 | 2240 |
2178 ;; Downcase it in the buffer, to save creation of a string | |
2179 (downcase-region (match-beginning 1) (match-end 1)) | |
2180 (setq attr-name | 2241 (setq attr-name |
2181 (intern (buffer-substring (match-beginning 1) | 2242 (intern (downcase (buffer-substring (match-beginning 1) |
2182 (match-end 1)))) | 2243 (match-end 1))))) |
2183 (goto-char (match-end 0)) | 2244 (goto-char (match-end 0)) |
2184 (cond | 2245 (cond |
2185 ((< (match-beginning 4) (match-end 4)) | 2246 ((< (match-beginning 4) (match-end 4)) |
2186 ;; A value was specified (e.g. ATTRIBUTE=VALUE). | 2247 ;; A value was specified (e.g. ATTRIBUTE=VALUE). |
2187 (cond | 2248 (cond |
2190 (concat | 2251 (concat |
2191 ;; Literal with double quotes. | 2252 ;; Literal with double quotes. |
2192 "\"\\([^\"]*\\)\"" | 2253 "\"\\([^\"]*\\)\"" |
2193 "\\|" | 2254 "\\|" |
2194 ;; Literal with single quotes. | 2255 ;; Literal with single quotes. |
2195 "'\\([^']*\\)'" | 2256 "'\\([^']\\)*'" |
2196 "\\|" | 2257 "\\|" |
2197 ;; Handle bad HTML conflicting with NET-enabling | 2258 ;; Handle bad HTML conflicting with NET-enabling |
2198 ;; start-tags. | 2259 ;; start-tags. |
2199 "\\([-a-z0-9.]+/[-a-z0-9._/#]+\\)[ \t\n\r>]" | 2260 "\\([-a-z0-9.]+/[-a-z0-9._/#]+\\)[ \t\n\r>]" |
2200 "\\|" | 2261 "\\|" |
2223 (goto-char (point-min)) | 2284 (goto-char (point-min)) |
2224 (while (progn | 2285 (while (progn |
2225 (skip-chars-forward "^&") | 2286 (skip-chars-forward "^&") |
2226 (not (eobp))) | 2287 (not (eobp))) |
2227 (w3-expand-entity-at-point-maybe)) | 2288 (w3-expand-entity-at-point-maybe)) |
2228 (subst-char-in-region (point-min) (point-max) ?\t ? ) | 2289 (subst-char-in-region (point-min) (point-max) ?\t 32) |
2229 (subst-char-in-region (point-min) (point-max) ?\n ? )) | 2290 (subst-char-in-region (point-min) (point-max) ?\n 32)) |
2230 ;; Set this after we have changed the size of the | 2291 ;; Set this after we have changed the size of the |
2231 ;; attribute. | 2292 ;; attribute. |
2232 (setq attribute-value-end (1+ (point-max)))) | 2293 (setq attribute-value-end (1+ (point-max)))) |
2233 ((match-beginning 4) | 2294 ((match-beginning 4) |
2234 (setq attribute-value-end (match-end 4)) | 2295 (setq attribute-value-end (match-end 4)) |
2242 (match-end 3))) | 2303 (match-end 3))) |
2243 (w3-debug-html :nocontext | 2304 (w3-debug-html :nocontext |
2244 (format "Evil attribute value syntax: %s" | 2305 (format "Evil attribute value syntax: %s" |
2245 (buffer-substring (point-min) (point-max))))) | 2306 (buffer-substring (point-min) (point-max))))) |
2246 (t | 2307 (t |
2247 (error "impossible attribute value")))) | 2308 (error "impossible")))) |
2248 ((memq (char-after (point)) '(?\" ?')) | 2309 ((memq (following-char) '(?\" ?')) |
2249 ;; Missing terminating quote character. | 2310 ;; Missing terminating quote character. |
2250 (narrow-to-region (point) | 2311 (narrow-to-region (point) |
2251 (progn | 2312 (progn |
2252 (forward-char 1) | 2313 (forward-char 1) |
2253 (skip-chars-forward "^ \t\n\r'\"<>") | 2314 (skip-chars-forward "^ \t\n\r'\"=<>") |
2254 (setq attribute-value-end (point)))) | 2315 (setq attribute-value-end (point)))) |
2255 (w3-debug-html :nocontext | 2316 (w3-debug-html :nocontext |
2256 (format "Attribute value missing end quote: %s" | 2317 (format "Attribute value missing end quote: %s" |
2257 (buffer-substring (point-min) (point-max)))) | 2318 (buffer-substring (point-min) (point-max)))) |
2258 (narrow-to-region (1+ (point-min)) (point-max))) | 2319 (narrow-to-region (1+ (point-min)) (point-max))) |
2259 (t | 2320 (t |
2260 ;; We have a syntactically invalid attribute value. Let's | 2321 ;; We have a syntactically invalid attribute value. Let's |
2261 ;; make a best guess as to what the author intended. | 2322 ;; make a best guess as to what the author intended. |
2262 (narrow-to-region (point) | 2323 (narrow-to-region (point) |
2263 (progn | 2324 (progn |
2264 (skip-chars-forward "^ \t\n\r'\"<>") | 2325 (skip-chars-forward "^ \t\n\r'\"=<>") |
2265 (setq attribute-value-end (point)))) | 2326 (setq attribute-value-end (point)))) |
2266 (w3-debug-html :nocontext | 2327 (w3-debug-html :nocontext |
2267 (format "Bad attribute value syntax: %s" | 2328 (format "Bad attribute value syntax: %s" |
2268 (buffer-substring (point-min) (point-max)))))) | 2329 (buffer-substring (point-min) (point-max)))))) |
2269 ;; Now we have isolated the attribute value. We need to | 2330 ;; Now we have isolated the attribute value. We need to |
2276 ;; * smash case | 2337 ;; * smash case |
2277 ;; * remove leading/trailing whitespace | 2338 ;; * remove leading/trailing whitespace |
2278 ;; * smash multiple space sequences into single spaces | 2339 ;; * smash multiple space sequences into single spaces |
2279 ;; * verify the syntax of each token | 2340 ;; * verify the syntax of each token |
2280 (setq attr-value (buffer-substring (point-min) (point-max))) | 2341 (setq attr-value (buffer-substring (point-min) (point-max))) |
2281 (case attr-name | |
2282 (class | |
2283 (setq attr-value (split-string attr-value "[ ,]+"))) | |
2284 (align | |
2285 (if (string-match "^[ \t\r\n]*\\(.*\\)[ \t\r\n]*$" | |
2286 attr-value) | |
2287 (setq attr-value (downcase | |
2288 (substring attr-value | |
2289 (match-beginning 1) | |
2290 (match-end 1)))) | |
2291 (setq attr-value (downcase attr-value))) | |
2292 (setq attr-value (intern attr-value))) | |
2293 ((src href) | |
2294 ;; I should expand URLs here | |
2295 ) | |
2296 (otherwise nil) | |
2297 ) | |
2298 (widen) | 2342 (widen) |
2299 (goto-char attribute-value-end)) | 2343 (goto-char attribute-value-end)) |
2300 (t | 2344 (t |
2301 ;; No value was specified, in which case NAME should be | 2345 ;; No value was specified, in which case NAME should be |
2302 ;; taken as ATTRIBUTE=NAME where NAME is one of the | 2346 ;; taken as ATTRIBUTE=NAME where NAME is one of the |
2306 ;; is wrong. | 2350 ;; is wrong. |
2307 (setq attr-value (symbol-name attr-name)))) | 2351 (setq attr-value (symbol-name attr-name)))) |
2308 | 2352 |
2309 ;; Accumulate the attributes. | 2353 ;; Accumulate the attributes. |
2310 (setq tag-attributes (cons (cons attr-name attr-value) | 2354 (setq tag-attributes (cons (cons attr-name attr-value) |
2311 tag-attributes))) | 2355 tag-attributes)))) |
2312 | |
2313 (cond | |
2314 ((and (eq w3-p-d-tag-name 'base) | |
2315 (setq w3-p-s-baseobject | |
2316 (or (assq 'src tag-attributes) | |
2317 (assq 'href tag-attributes)))) | |
2318 (setq w3-p-s-baseobject (url-generic-parse-url | |
2319 (cdr w3-p-s-baseobject)))) | |
2320 ((setq w3-p-s-btdt (or (assq 'src tag-attributes) | |
2321 (assq 'href tag-attributes) | |
2322 (assq 'action tag-attributes))) | |
2323 (setcdr w3-p-s-btdt (url-expand-file-name (cdr w3-p-s-btdt) | |
2324 w3-p-s-baseobject)) | |
2325 (setq w3-p-s-btdt (if (url-have-visited-url (cdr w3-p-s-btdt)) | |
2326 ":visited" | |
2327 ":link")) | |
2328 (if (assq 'class tag-attributes) | |
2329 (setcdr (assq 'class tag-attributes) | |
2330 (cons w3-p-s-btdt | |
2331 (cdr (assq 'class tag-attributes)))) | |
2332 (setq tag-attributes (cons (cons 'class (list w3-p-s-btdt)) | |
2333 tag-attributes)))) | |
2334 ) | |
2335 (if (not (eq w3-p-d-tag-name 'input)) | |
2336 nil | |
2337 (setq w3-p-s-btdt (concat ":" | |
2338 (downcase | |
2339 (or (cdr-safe | |
2340 (assq 'type tag-attributes)) | |
2341 "text")))) | |
2342 (if (assq 'class tag-attributes) | |
2343 (setcdr (assq 'class tag-attributes) | |
2344 (cons w3-p-s-btdt | |
2345 (cdr (assq 'class tag-attributes)))) | |
2346 (setq tag-attributes (cons (cons 'class (list w3-p-s-btdt)) | |
2347 tag-attributes)))) | |
2348 ) | |
2349 | 2356 |
2350 ;; Process the end of the tag. | 2357 ;; Process the end of the tag. |
2351 (skip-chars-forward " \t\n\r") | 2358 (skip-chars-forward " \t\n\r") |
2352 (cond ((eq ?> (char-after (point))) | 2359 (cond ((= ?> (following-char)) |
2353 ;; Ordinary tag end. | 2360 ;; Ordinary tag end. |
2354 (forward-char 1)) | 2361 (forward-char 1)) |
2355 ((and (eq ?/ (char-after (point))) | 2362 ((and (= ?/ (following-char)) |
2356 (not w3-p-d-end-tag-p)) | 2363 (not w3-p-d-end-tag-p)) |
2357 ;; This is a NET-enabling start-tag. | 2364 ;; This is a NET-enabling start-tag. |
2358 (setq net-tag-p t) | 2365 (setq net-tag-p t) |
2359 (forward-char 1)) | 2366 (forward-char 1)) |
2360 ((eq ?< (char-after (point))) | 2367 ((= ?< (following-char)) |
2361 ;; *** Strictly speaking, the following text has to | 2368 ;; *** Strictly speaking, the following text has to |
2362 ;; lexically be STAGO or ETAGO, which means that it | 2369 ;; lexically be STAGO or ETAGO, which means that it |
2363 ;; can't match some other lexical unit. | 2370 ;; can't match some other lexical unit. |
2364 ;; Unclosed tag. | 2371 ;; Unclosed tag. |
2365 nil) | 2372 nil) |
2372 | 2379 |
2373 (setq tag-end (point))) | 2380 (setq tag-end (point))) |
2374 | 2381 |
2375 ((looking-at "/?>") | 2382 ((looking-at "/?>") |
2376 ;; We are looking at an empty tag (<>, </>). | 2383 ;; We are looking at an empty tag (<>, </>). |
2377 (setq w3-p-d-end-tag-p (eq ?/ (char-after (point)))) | 2384 (setq w3-p-d-end-tag-p (= ?/ (following-char))) |
2378 (setq w3-p-d-tag-name (if w3-p-d-end-tag-p | 2385 (setq w3-p-d-tag-name (if w3-p-d-end-tag-p |
2379 (w3-element-name w3-p-d-current-element) | 2386 (w3-element-name w3-p-d-current-element) |
2380 ;; *** Strictly speaking, if OMITTAG NO, then | 2387 ;; *** Strictly speaking, if OMITTAG NO, then |
2381 ;; we should use the most recently closed tag. | 2388 ;; we should use the most recently closed tag. |
2382 ;; But OMITTAG YES in HTML and I'm lazy. | 2389 ;; But OMITTAG YES in HTML and I'm lazy. |
2434 ;; parentheses somewhere inside the declaration. Handling | 2441 ;; parentheses somewhere inside the declaration. Handling |
2435 ;; this properly would require full parsing of markup | 2442 ;; this properly would require full parsing of markup |
2436 ;; declarations, a goal for the future. | 2443 ;; declarations, a goal for the future. |
2437 (w3-debug-html "Bad <! syntax.") | 2444 (w3-debug-html "Bad <! syntax.") |
2438 (skip-chars-forward "^>") | 2445 (skip-chars-forward "^>") |
2439 (if (eq ?> (char-after (point))) | 2446 (if (= ?> (following-char)) |
2440 (forward-char)))) | 2447 (forward-char)))) |
2441 (point)))) | 2448 (point)))) |
2442 | 2449 |
2443 ((looking-at "!\\\[\\(\\([ \t\n\r]*[a-z]+\\)+[ \t\n\r]*\\)\\\[") | 2450 ((looking-at "!\\\[\\(\\([ \t\n\r]*[a-z]+\\)+[ \t\n\r]*\\)\\\[") |
2444 ;; We are looking at a marked section. | 2451 ;; We are looking at a marked section. |
2461 (keyword (car-safe (cond ((memq 'IGNORE keywords)) | 2468 (keyword (car-safe (cond ((memq 'IGNORE keywords)) |
2462 ((memq 'CDATA keywords)) | 2469 ((memq 'CDATA keywords)) |
2463 ((memq 'RCDATA keywords)) | 2470 ((memq 'RCDATA keywords)) |
2464 ((memq 'INCLUDE keywords)) | 2471 ((memq 'INCLUDE keywords)) |
2465 ((memq 'TEMP keywords)))))) | 2472 ((memq 'TEMP keywords)))))) |
2466 (or (eq ?\[ (char-after (point))) | 2473 (or (= ?\[ (following-char)) |
2467 ;; I probably shouldn't even check this, since it is so | 2474 ;; I probably shouldn't even check this, since it is so |
2468 ;; impossible. | 2475 ;; impossible. |
2469 (error "impossible ??")) | 2476 (error "impossible")) |
2470 (forward-char 1) | 2477 (forward-char 1) |
2471 (delete-region (1- (match-beginning 0)) (point)) | 2478 (delete-region (1- (match-beginning 0)) (point)) |
2472 (cond ((eq 'IGNORE keyword) | 2479 (cond ((eq 'IGNORE keyword) |
2473 ;; Scan forward skipping over matching <![ ... ]]> | 2480 ;; Scan forward skipping over matching <![ ... ]]> |
2474 ;; until we find an unmatched "]]>". | 2481 ;; until we find an unmatched "]]>". |
2502 (backward-char 1) | 2509 (backward-char 1) |
2503 (delete-region | 2510 (delete-region |
2504 (point) | 2511 (point) |
2505 (progn | 2512 (progn |
2506 (skip-chars-forward "^>") | 2513 (skip-chars-forward "^>") |
2507 (if (eq ?> (char-after (point))) | 2514 (if (= ?> (following-char)) |
2508 (forward-char)) | 2515 (forward-char)) |
2509 (point)))) | 2516 (point)))) |
2510 (t | 2517 (t |
2511 ;; This < is not a markup character. Pretend we didn't notice | 2518 ;; This < is not a markup character. Pretend we didn't notice |
2512 ;; it at all. We have skipped over the < already, so just loop | 2519 ;; it at all. We have skipped over the < already, so just loop |
2513 ;; again. | 2520 ;; again. |
2514 ))) | 2521 ))) |
2515 | 2522 |
2516 ((eq ?& (char-after (point))) | 2523 ((= ?& (following-char)) |
2517 (w3-expand-entity-at-point-maybe)) | 2524 (w3-expand-entity-at-point-maybe)) |
2518 | 2525 |
2519 ((and (eq ?\] (char-after (point))) | 2526 ((and (= ?\] (following-char)) |
2520 w3-p-d-in-parsed-marked-section | 2527 w3-p-d-in-parsed-marked-section |
2521 (looking-at "]]>")) | 2528 (looking-at "]]>")) |
2522 ;; *** handle the end of a parsed marked section. | 2529 ;; *** handle the end of a parsed marked section. |
2523 (error "***unimplemented***")) | 2530 (error "***unimplemented***")) |
2524 | 2531 |
2525 ((and (eq ?/ (char-after (point))) | 2532 ((and (= ?/ (following-char)) |
2526 w3-p-d-null-end-tag-enabled) | 2533 w3-p-d-null-end-tag-enabled) |
2527 ;; We are looking at a null end tag. | 2534 ;; We are looking at a null end tag. |
2528 (setq w3-p-d-end-tag-p t) | 2535 (setq w3-p-d-end-tag-p t) |
2529 (setq between-tags-end (point)) | 2536 (setq between-tags-end (point)) |
2530 (setq tag-end (1+ (point))) | 2537 (setq tag-end (1+ (point))) |
2553 (goto-char pt))) | 2560 (goto-char pt))) |
2554 | 2561 |
2555 ((looking-at (eval-when-compile | 2562 ((looking-at (eval-when-compile |
2556 (concat "[" (w3-invalid-sgml-chars) "]"))) | 2563 (concat "[" (w3-invalid-sgml-chars) "]"))) |
2557 (w3-debug-html | 2564 (w3-debug-html |
2558 (format "Invalid SGML character: %c" (char-after (point)))) | 2565 (format "Invalid SGML character: %c" (following-char))) |
2559 (insert (or (cdr-safe (assq (char-after (point)) | 2566 (insert (or (cdr-safe (assq (following-char) |
2560 ;; These characters are apparently | 2567 ;; These characters are apparently |
2561 ;; from a Windows character set. | 2568 ;; from a Windows character set. |
2562 '((146 . "'") | 2569 '((146 . "'") |
2563 (153 . "TM")))) | 2570 (153 . "TM")))) |
2564 "")) | 2571 "")) |
2609 ;; w3-grok-tag-or-data, but it's not clear which will be | 2616 ;; w3-grok-tag-or-data, but it's not clear which will be |
2610 ;; faster in practice. | 2617 ;; faster in practice. |
2611 (or (setq content (w3-element-content w3-p-d-current-element)) | 2618 (or (setq content (w3-element-content w3-p-d-current-element)) |
2612 ;; *** Strictly speaking, in SGML the record end is | 2619 ;; *** Strictly speaking, in SGML the record end is |
2613 ;; carriage return, not line feed. | 2620 ;; carriage return, not line feed. |
2614 (if (eq ?\n (char-after between-tags-start)) | 2621 (if (= ?\n (char-after between-tags-start)) |
2615 (setq between-tags-start (1+ between-tags-start)))) | 2622 (setq between-tags-start (1+ between-tags-start)))) |
2616 (if (= between-tags-start (point)) | 2623 (if (= between-tags-start (point)) |
2617 ;; Do nothing. | 2624 ;; Do nothing. |
2618 nil | 2625 nil |
2619 ;; We are definitely going to add data characters to the | 2626 ;; We are definitely going to add data characters to the |
2620 ;; content. | 2627 ;; content. |
2628 ;; Protocol is that all but last data character item | |
2629 ;; must have been sent to display engine. | |
2630 (and content | |
2631 (stringp (car content)) | |
2632 ;; Gross, disgusting hack to deal with old interface | |
2633 ;; to display engine. Remove as soon as possible. | |
2634 (not (memq (w3-element-name w3-p-d-current-element) | |
2635 '(plaintext style xmp textarea))) | |
2636 (w3-add-display-item 'text (car content))) | |
2621 (cond | 2637 (cond |
2622 ((and (= ?\n (preceding-char)) | 2638 ((and (= ?\n (preceding-char)) |
2623 (/= between-tags-start (1- (point)))) | 2639 (/= between-tags-start (1- (point)))) |
2624 (setq content (cons (buffer-substring between-tags-start | 2640 (setq content (cons (buffer-substring between-tags-start |
2625 (1- (point))) | 2641 (1- (point))) |
2626 content)) | 2642 content)) |
2643 ;; Gross, disgusting hack to deal with old interface | |
2644 ;; to display engine. Remove as soon as possible. | |
2645 (or (memq (w3-element-name w3-p-d-current-element) | |
2646 '(plaintext style xmp textarea)) | |
2647 (w3-add-display-item 'text (car content))) | |
2627 (setq content (cons "\n" content))) | 2648 (setq content (cons "\n" content))) |
2628 (t | 2649 (t |
2629 (setq content (cons (buffer-substring between-tags-start | 2650 (setq content (cons (buffer-substring between-tags-start |
2630 (point)) | 2651 (point)) |
2631 content)))) | 2652 content)))) |
2738 (if (re-search-forward (if w3-p-d-null-end-tag-enabled | 2759 (if (re-search-forward (if w3-p-d-null-end-tag-enabled |
2739 "</[a-z>]\\|[/&]" | 2760 "</[a-z>]\\|[/&]" |
2740 "</[a-z>]\\|&") | 2761 "</[a-z>]\\|&") |
2741 nil 'move) | 2762 nil 'move) |
2742 (goto-char (match-beginning 0))) | 2763 (goto-char (match-beginning 0))) |
2743 (eq ?& (char-after (point)))) | 2764 (= ?& (following-char))) |
2744 (w3-expand-entity-at-point-maybe))))))) | 2765 (w3-expand-entity-at-point-maybe))))))) |
2745 (t | 2766 (t |
2746 ;; The element is illegal here. We'll just discard the start | 2767 ;; The element is illegal here. We'll just discard the start |
2747 ;; tag as though we never saw it. | 2768 ;; tag as though we never saw it. |
2748 )))) | 2769 )))) |
2751 (setq w3-p-d-end-tag-p nil) | 2772 (setq w3-p-d-end-tag-p nil) |
2752 (setq net-tag-p nil) | 2773 (setq net-tag-p nil) |
2753 (setq tag-attributes nil) | 2774 (setq tag-attributes nil) |
2754 (setq tag-end nil))) | 2775 (setq tag-end nil))) |
2755 | 2776 |
2777 ;; Hand items to the display engine. | |
2778 (cond ((not nodraw) | |
2779 (set-buffer w3-draw-buffer) | |
2780 (while (not (eq parse-tag-stream-last-displayed-item | |
2781 w3-p-d-parse-tag-stream-tail-pointer)) | |
2782 (setq parse-tag-stream-last-displayed-item | |
2783 (cdr parse-tag-stream-last-displayed-item)) | |
2784 ;; We call w3-handle-single-tag from only one spot so that it | |
2785 ;; is reasonable to inline it, since it is a big function. | |
2786 (w3-handle-single-tag | |
2787 (car (car parse-tag-stream-last-displayed-item)) | |
2788 (cdr (car parse-tag-stream-last-displayed-item)))) | |
2789 (set-buffer parse-buffer))) | |
2790 | |
2756 ;; End of main while loop. | 2791 ;; End of main while loop. |
2757 ) | 2792 ) |
2758 | 2793 |
2759 ;; We have finished parsing the buffer! | 2794 ;; We have finished parsing the buffer! |
2760 (if status-message-format | 2795 (if status-message-format |
2761 (message "%sdone" (format status-message-format 100))) | 2796 (message "%sdone" (format status-message-format 100))) |
2797 ;; Do this now so the user can see the full results before Emacs | |
2798 ;; goes off and garbage-collects for an hour. :-( | |
2799 (if w3-do-incremental-display | |
2800 (w3-pause)) | |
2762 | 2801 |
2763 ;; *** For debugging, save the true parse tree. | 2802 ;; *** For debugging, save the true parse tree. |
2764 ;; *** Make this look inside *DOCUMENT. | 2803 ;; *** Make this look inside *DOCUMENT. |
2765 (setq w3-last-parse-tree | 2804 (setq w3-last-parse-tree |
2766 (w3-element-content w3-p-d-current-element)) | 2805 (w3-element-content w3-p-d-current-element)) |
2767 | 2806 |
2768 (w3-element-content w3-p-d-current-element) | 2807 ;; Return the parse in the format expected, a stream of tags |
2808 ;; possibly with a buffer at the front. | |
2809 (if nodraw | |
2810 ;; Discard the *dummy item at start of list. | |
2811 (cdr parse-tag-stream) | |
2812 (cons w3-draw-buffer (cdr parse-tag-stream))) | |
2813 | |
2769 ))) | 2814 ))) |
2815 | |
2816 | |
2817 ;;; | |
2818 ;;; Initialization of display engine to accept parser output. | |
2819 ;;; | |
2820 | |
2821 (defun w3-prepare-draw-buffer-for-parse-buffer () | |
2822 (setq list-buffers-directory nil) | |
2823 (let ((buf (get-buffer-create (url-generate-new-buffer-name | |
2824 "Untitled"))) | |
2825 (info (mapcar (function (lambda (x) (cons x (symbol-value x)))) | |
2826 w3-persistent-variables))) | |
2827 (setq w3-draw-buffer buf) | |
2828 (save-excursion | |
2829 (set-window-buffer (selected-window) buf) | |
2830 (set-buffer buf) | |
2831 (setq w3-draw-buffer (current-buffer)) | |
2832 (erase-buffer) | |
2833 (buffer-disable-undo (current-buffer)) | |
2834 (mapcar (function (lambda (x) (set (car x) (cdr x)))) info) | |
2835 (setq w3-last-fill-pos (point)) | |
2836 (setq fill-column (min (- (or w3-strict-width (window-width)) | |
2837 w3-right-border) | |
2838 (or w3-maximum-line-length (window-width)))) | |
2839 (setq fill-prefix "") | |
2840 (w3-init-state)))) | |
2770 | 2841 |
2771 | 2842 |
2772 | 2843 |
2773 (provide 'w3-parse) | 2844 (provide 'w3-parse) |
2774 | 2845 |