Mercurial > hg > xemacs-beta
comparison lisp/w3/w3-parse.el @ 36:c53a95d3c46d r19-15b101
Import from CVS: tag r19-15b101
author | cvs |
---|---|
date | Mon, 13 Aug 2007 08:53:38 +0200 |
parents | ec9a17fef872 |
children | 1a767b41a199 |
comparison
equal
deleted
inserted
replaced
35:279432d5c479 | 36:c53a95d3c46d |
---|---|
262 (goto-char (point-min)) | 262 (goto-char (point-min)) |
263 (insert "\"") | 263 (insert "\"") |
264 (while (progn | 264 (while (progn |
265 (skip-chars-forward "^\"\\\t\n\r") | 265 (skip-chars-forward "^\"\\\t\n\r") |
266 (not (eobp))) | 266 (not (eobp))) |
267 (insert "\\" (cdr (assq (following-char) '((?\" . "\"") | 267 (insert "\\" (cdr (assq (char-after (point)) '((?\" . "\"") |
268 (?\\ . "\\") | 268 (?\\ . "\\") |
269 (?\t . "t") | 269 (?\t . "t") |
270 (?\n . "n") | 270 (?\n . "n") |
271 (?\r . "r"))))) | 271 (?\r . "r"))))) |
272 (delete-char 1)) | 272 (delete-char 1)) |
273 (insert "\"") | 273 (insert "\"") |
274 (buffer-string))) | 274 (buffer-string))) |
275 | 275 |
276 | 276 |
448 ;; doesn't get rescanned. | 448 ;; doesn't get rescanned. |
449 ;; *** Strictly speaking, we should issue a warning for &#foo; if foo | 449 ;; *** Strictly speaking, we should issue a warning for &#foo; if foo |
450 ;; is not a function character in the SGML declaration. | 450 ;; is not a function character in the SGML declaration. |
451 ) | 451 ) |
452 | 452 |
453 ((eq ?& (following-char)) | 453 ((eq ?& (char-after (point))) |
454 ;; We are either looking at an undefined reference or a & that does | 454 ;; We are either looking at an undefined reference or a & that does |
455 ;; not start a reference (in which case we should not have been called). | 455 ;; not start a reference (in which case we should not have been called). |
456 ;; Skip over the &. | 456 ;; Skip over the &. |
457 (forward-char 1)) | 457 (forward-char 1)) |
458 | 458 |
2114 | 2114 |
2115 ;; We are looking at a markup-starting character, and invalid | 2115 ;; We are looking at a markup-starting character, and invalid |
2116 ;; character, or end of buffer. | 2116 ;; character, or end of buffer. |
2117 (cond | 2117 (cond |
2118 | 2118 |
2119 ((= ?< (following-char)) | 2119 ((= ?< (char-after (point))) |
2120 | 2120 |
2121 ;; We are looking at a tag, comment, markup declaration, SGML marked | 2121 ;; We are looking at a tag, comment, markup declaration, SGML marked |
2122 ;; section, SGML processing instruction, or non-markup "<". | 2122 ;; section, SGML processing instruction, or non-markup "<". |
2123 (forward-char) | 2123 (forward-char) |
2124 (cond | 2124 (cond |
2129 ;; Downcase it in the buffer, to save creation of a string | 2129 ;; Downcase it in the buffer, to save creation of a string |
2130 (downcase-region (match-beginning 1) (match-end 1)) | 2130 (downcase-region (match-beginning 1) (match-end 1)) |
2131 (setq w3-p-d-tag-name | 2131 (setq w3-p-d-tag-name |
2132 (intern (buffer-substring (match-beginning 1) | 2132 (intern (buffer-substring (match-beginning 1) |
2133 (match-end 1)))) | 2133 (match-end 1)))) |
2134 (setq w3-p-d-end-tag-p (= ?/ (following-char))) | 2134 (setq w3-p-d-end-tag-p (eq ?/ (char-after (point))) |
2135 (setq between-tags-end (1- (point))) | 2135 between-tags-end (1- (point))) |
2136 (goto-char (match-end 0)) | 2136 (goto-char (match-end 0)) |
2137 | 2137 |
2138 ;; Read the attributes from a start-tag. | 2138 ;; Read the attributes from a start-tag. |
2139 (if w3-p-d-end-tag-p | 2139 (if w3-p-d-end-tag-p |
2140 (if (looking-at "[ \t\r\n/]*[<>]") | 2140 (if (looking-at "[ \t\r\n/]*[<>]") |
2242 (w3-debug-html :nocontext | 2242 (w3-debug-html :nocontext |
2243 (format "Evil attribute value syntax: %s" | 2243 (format "Evil attribute value syntax: %s" |
2244 (buffer-substring (point-min) (point-max))))) | 2244 (buffer-substring (point-min) (point-max))))) |
2245 (t | 2245 (t |
2246 (error "impossible attribute value")))) | 2246 (error "impossible attribute value")))) |
2247 ((memq (following-char) '(?\" ?')) | 2247 ((memq (char-after (point)) '(?\" ?')) |
2248 ;; Missing terminating quote character. | 2248 ;; Missing terminating quote character. |
2249 (narrow-to-region (point) | 2249 (narrow-to-region (point) |
2250 (progn | 2250 (progn |
2251 (forward-char 1) | 2251 (forward-char 1) |
2252 (skip-chars-forward "^ \t\n\r'\"<>") | 2252 (skip-chars-forward "^ \t\n\r'\"<>") |
2346 tag-attributes)))) | 2346 tag-attributes)))) |
2347 ) | 2347 ) |
2348 | 2348 |
2349 ;; Process the end of the tag. | 2349 ;; Process the end of the tag. |
2350 (skip-chars-forward " \t\n\r") | 2350 (skip-chars-forward " \t\n\r") |
2351 (cond ((= ?> (following-char)) | 2351 (cond ((eq ?> (char-after (point))) |
2352 ;; Ordinary tag end. | 2352 ;; Ordinary tag end. |
2353 (forward-char 1)) | 2353 (forward-char 1)) |
2354 ((and (= ?/ (following-char)) | 2354 ((and (eq ?/ (char-after (point))) |
2355 (not w3-p-d-end-tag-p)) | 2355 (not w3-p-d-end-tag-p)) |
2356 ;; This is a NET-enabling start-tag. | 2356 ;; This is a NET-enabling start-tag. |
2357 (setq net-tag-p t) | 2357 (setq net-tag-p t) |
2358 (forward-char 1)) | 2358 (forward-char 1)) |
2359 ((= ?< (following-char)) | 2359 ((eq ?< (char-after (point))) |
2360 ;; *** Strictly speaking, the following text has to | 2360 ;; *** Strictly speaking, the following text has to |
2361 ;; lexically be STAGO or ETAGO, which means that it | 2361 ;; lexically be STAGO or ETAGO, which means that it |
2362 ;; can't match some other lexical unit. | 2362 ;; can't match some other lexical unit. |
2363 ;; Unclosed tag. | 2363 ;; Unclosed tag. |
2364 nil) | 2364 nil) |
2371 | 2371 |
2372 (setq tag-end (point))) | 2372 (setq tag-end (point))) |
2373 | 2373 |
2374 ((looking-at "/?>") | 2374 ((looking-at "/?>") |
2375 ;; We are looking at an empty tag (<>, </>). | 2375 ;; We are looking at an empty tag (<>, </>). |
2376 (setq w3-p-d-end-tag-p (= ?/ (following-char))) | 2376 (setq w3-p-d-end-tag-p (eq ?/ (char-after (point)))) |
2377 (setq w3-p-d-tag-name (if w3-p-d-end-tag-p | 2377 (setq w3-p-d-tag-name (if w3-p-d-end-tag-p |
2378 (w3-element-name w3-p-d-current-element) | 2378 (w3-element-name w3-p-d-current-element) |
2379 ;; *** Strictly speaking, if OMITTAG NO, then | 2379 ;; *** Strictly speaking, if OMITTAG NO, then |
2380 ;; we should use the most recently closed tag. | 2380 ;; we should use the most recently closed tag. |
2381 ;; But OMITTAG YES in HTML and I'm lazy. | 2381 ;; But OMITTAG YES in HTML and I'm lazy. |
2433 ;; parentheses somewhere inside the declaration. Handling | 2433 ;; parentheses somewhere inside the declaration. Handling |
2434 ;; this properly would require full parsing of markup | 2434 ;; this properly would require full parsing of markup |
2435 ;; declarations, a goal for the future. | 2435 ;; declarations, a goal for the future. |
2436 (w3-debug-html "Bad <! syntax.") | 2436 (w3-debug-html "Bad <! syntax.") |
2437 (skip-chars-forward "^>") | 2437 (skip-chars-forward "^>") |
2438 (if (= ?> (following-char)) | 2438 (if (eq ?> (char-after (point))) |
2439 (forward-char)))) | 2439 (forward-char)))) |
2440 (point)))) | 2440 (point)))) |
2441 | 2441 |
2442 ((looking-at "!\\\[\\(\\([ \t\n\r]*[a-z]+\\)+[ \t\n\r]*\\)\\\[") | 2442 ((looking-at "!\\\[\\(\\([ \t\n\r]*[a-z]+\\)+[ \t\n\r]*\\)\\\[") |
2443 ;; We are looking at a marked section. | 2443 ;; We are looking at a marked section. |
2460 (keyword (car-safe (cond ((memq 'IGNORE keywords)) | 2460 (keyword (car-safe (cond ((memq 'IGNORE keywords)) |
2461 ((memq 'CDATA keywords)) | 2461 ((memq 'CDATA keywords)) |
2462 ((memq 'RCDATA keywords)) | 2462 ((memq 'RCDATA keywords)) |
2463 ((memq 'INCLUDE keywords)) | 2463 ((memq 'INCLUDE keywords)) |
2464 ((memq 'TEMP keywords)))))) | 2464 ((memq 'TEMP keywords)))))) |
2465 (or (= ?\[ (following-char)) | 2465 (or (eq ?\[ (char-after (point))) |
2466 ;; I probably shouldn't even check this, since it is so | 2466 ;; I probably shouldn't even check this, since it is so |
2467 ;; impossible. | 2467 ;; impossible. |
2468 (error "impossible ??")) | 2468 (error "impossible ??")) |
2469 (forward-char 1) | 2469 (forward-char 1) |
2470 (delete-region (1- (match-beginning 0)) (point)) | 2470 (delete-region (1- (match-beginning 0)) (point)) |
2501 (backward-char 1) | 2501 (backward-char 1) |
2502 (delete-region | 2502 (delete-region |
2503 (point) | 2503 (point) |
2504 (progn | 2504 (progn |
2505 (skip-chars-forward "^>") | 2505 (skip-chars-forward "^>") |
2506 (if (= ?> (following-char)) | 2506 (if (eq ?> (char-after (point))) |
2507 (forward-char)) | 2507 (forward-char)) |
2508 (point)))) | 2508 (point)))) |
2509 (t | 2509 (t |
2510 ;; This < is not a markup character. Pretend we didn't notice | 2510 ;; This < is not a markup character. Pretend we didn't notice |
2511 ;; it at all. We have skipped over the < already, so just loop | 2511 ;; it at all. We have skipped over the < already, so just loop |
2512 ;; again. | 2512 ;; again. |
2513 ))) | 2513 ))) |
2514 | 2514 |
2515 ((= ?& (following-char)) | 2515 ((eq ?& (char-after (point))) |
2516 (w3-expand-entity-at-point-maybe)) | 2516 (w3-expand-entity-at-point-maybe)) |
2517 | 2517 |
2518 ((and (= ?\] (following-char)) | 2518 ((and (eq ?\] (char-after (point))) |
2519 w3-p-d-in-parsed-marked-section | 2519 w3-p-d-in-parsed-marked-section |
2520 (looking-at "]]>")) | 2520 (looking-at "]]>")) |
2521 ;; *** handle the end of a parsed marked section. | 2521 ;; *** handle the end of a parsed marked section. |
2522 (error "***unimplemented***")) | 2522 (error "***unimplemented***")) |
2523 | 2523 |
2524 ((and (= ?/ (following-char)) | 2524 ((and (eq ?/ (char-after (point))) |
2525 w3-p-d-null-end-tag-enabled) | 2525 w3-p-d-null-end-tag-enabled) |
2526 ;; We are looking at a null end tag. | 2526 ;; We are looking at a null end tag. |
2527 (setq w3-p-d-end-tag-p t) | 2527 (setq w3-p-d-end-tag-p t) |
2528 (setq between-tags-end (point)) | 2528 (setq between-tags-end (point)) |
2529 (setq tag-end (1+ (point))) | 2529 (setq tag-end (1+ (point))) |
2552 (goto-char pt))) | 2552 (goto-char pt))) |
2553 | 2553 |
2554 ((looking-at (eval-when-compile | 2554 ((looking-at (eval-when-compile |
2555 (concat "[" (w3-invalid-sgml-chars) "]"))) | 2555 (concat "[" (w3-invalid-sgml-chars) "]"))) |
2556 (w3-debug-html | 2556 (w3-debug-html |
2557 (format "Invalid SGML character: %c" (following-char))) | 2557 (format "Invalid SGML character: %c" (char-after (point)))) |
2558 (insert (or (cdr-safe (assq (following-char) | 2558 (insert (or (cdr-safe (assq (char-after (point)) |
2559 ;; These characters are apparently | 2559 ;; These characters are apparently |
2560 ;; from a Windows character set. | 2560 ;; from a Windows character set. |
2561 '((146 . "'") | 2561 '((146 . "'") |
2562 (153 . "TM")))) | 2562 (153 . "TM")))) |
2563 "")) | 2563 "")) |
2737 (if (re-search-forward (if w3-p-d-null-end-tag-enabled | 2737 (if (re-search-forward (if w3-p-d-null-end-tag-enabled |
2738 "</[a-z>]\\|[/&]" | 2738 "</[a-z>]\\|[/&]" |
2739 "</[a-z>]\\|&") | 2739 "</[a-z>]\\|&") |
2740 nil 'move) | 2740 nil 'move) |
2741 (goto-char (match-beginning 0))) | 2741 (goto-char (match-beginning 0))) |
2742 (= ?& (following-char))) | 2742 (eq ?& (char-after (point)))) |
2743 (w3-expand-entity-at-point-maybe))))))) | 2743 (w3-expand-entity-at-point-maybe))))))) |
2744 (t | 2744 (t |
2745 ;; The element is illegal here. We'll just discard the start | 2745 ;; The element is illegal here. We'll just discard the start |
2746 ;; tag as though we never saw it. | 2746 ;; tag as though we never saw it. |
2747 )))) | 2747 )))) |