comparison lisp/w3/w3-parse.el @ 36:c53a95d3c46d r19-15b101

Import from CVS: tag r19-15b101
author cvs
date Mon, 13 Aug 2007 08:53:38 +0200
parents ec9a17fef872
children 1a767b41a199
comparison
equal deleted inserted replaced
35:279432d5c479 36:c53a95d3c46d
262 (goto-char (point-min)) 262 (goto-char (point-min))
263 (insert "\"") 263 (insert "\"")
264 (while (progn 264 (while (progn
265 (skip-chars-forward "^\"\\\t\n\r") 265 (skip-chars-forward "^\"\\\t\n\r")
266 (not (eobp))) 266 (not (eobp)))
267 (insert "\\" (cdr (assq (following-char) '((?\" . "\"") 267 (insert "\\" (cdr (assq (char-after (point)) '((?\" . "\"")
268 (?\\ . "\\") 268 (?\\ . "\\")
269 (?\t . "t") 269 (?\t . "t")
270 (?\n . "n") 270 (?\n . "n")
271 (?\r . "r"))))) 271 (?\r . "r")))))
272 (delete-char 1)) 272 (delete-char 1))
273 (insert "\"") 273 (insert "\"")
274 (buffer-string))) 274 (buffer-string)))
275 275
276 276
448 ;; doesn't get rescanned. 448 ;; doesn't get rescanned.
449 ;; *** Strictly speaking, we should issue a warning for &#foo; if foo 449 ;; *** Strictly speaking, we should issue a warning for &#foo; if foo
450 ;; is not a function character in the SGML declaration. 450 ;; is not a function character in the SGML declaration.
451 ) 451 )
452 452
453 ((eq ?& (following-char)) 453 ((eq ?& (char-after (point)))
454 ;; We are either looking at an undefined reference or a & that does 454 ;; We are either looking at an undefined reference or a & that does
455 ;; not start a reference (in which case we should not have been called). 455 ;; not start a reference (in which case we should not have been called).
456 ;; Skip over the &. 456 ;; Skip over the &.
457 (forward-char 1)) 457 (forward-char 1))
458 458
2114 2114
2115 ;; We are looking at a markup-starting character, and invalid 2115 ;; We are looking at a markup-starting character, and invalid
2116 ;; character, or end of buffer. 2116 ;; character, or end of buffer.
2117 (cond 2117 (cond
2118 2118
2119 ((= ?< (following-char)) 2119 ((= ?< (char-after (point)))
2120 2120
2121 ;; We are looking at a tag, comment, markup declaration, SGML marked 2121 ;; We are looking at a tag, comment, markup declaration, SGML marked
2122 ;; section, SGML processing instruction, or non-markup "<". 2122 ;; section, SGML processing instruction, or non-markup "<".
2123 (forward-char) 2123 (forward-char)
2124 (cond 2124 (cond
2129 ;; Downcase it in the buffer, to save creation of a string 2129 ;; Downcase it in the buffer, to save creation of a string
2130 (downcase-region (match-beginning 1) (match-end 1)) 2130 (downcase-region (match-beginning 1) (match-end 1))
2131 (setq w3-p-d-tag-name 2131 (setq w3-p-d-tag-name
2132 (intern (buffer-substring (match-beginning 1) 2132 (intern (buffer-substring (match-beginning 1)
2133 (match-end 1)))) 2133 (match-end 1))))
2134 (setq w3-p-d-end-tag-p (= ?/ (following-char))) 2134 (setq w3-p-d-end-tag-p (eq ?/ (char-after (point)))
2135 (setq between-tags-end (1- (point))) 2135 between-tags-end (1- (point)))
2136 (goto-char (match-end 0)) 2136 (goto-char (match-end 0))
2137 2137
2138 ;; Read the attributes from a start-tag. 2138 ;; Read the attributes from a start-tag.
2139 (if w3-p-d-end-tag-p 2139 (if w3-p-d-end-tag-p
2140 (if (looking-at "[ \t\r\n/]*[<>]") 2140 (if (looking-at "[ \t\r\n/]*[<>]")
2242 (w3-debug-html :nocontext 2242 (w3-debug-html :nocontext
2243 (format "Evil attribute value syntax: %s" 2243 (format "Evil attribute value syntax: %s"
2244 (buffer-substring (point-min) (point-max))))) 2244 (buffer-substring (point-min) (point-max)))))
2245 (t 2245 (t
2246 (error "impossible attribute value")))) 2246 (error "impossible attribute value"))))
2247 ((memq (following-char) '(?\" ?')) 2247 ((memq (char-after (point)) '(?\" ?'))
2248 ;; Missing terminating quote character. 2248 ;; Missing terminating quote character.
2249 (narrow-to-region (point) 2249 (narrow-to-region (point)
2250 (progn 2250 (progn
2251 (forward-char 1) 2251 (forward-char 1)
2252 (skip-chars-forward "^ \t\n\r'\"<>") 2252 (skip-chars-forward "^ \t\n\r'\"<>")
2346 tag-attributes)))) 2346 tag-attributes))))
2347 ) 2347 )
2348 2348
2349 ;; Process the end of the tag. 2349 ;; Process the end of the tag.
2350 (skip-chars-forward " \t\n\r") 2350 (skip-chars-forward " \t\n\r")
2351 (cond ((= ?> (following-char)) 2351 (cond ((eq ?> (char-after (point)))
2352 ;; Ordinary tag end. 2352 ;; Ordinary tag end.
2353 (forward-char 1)) 2353 (forward-char 1))
2354 ((and (= ?/ (following-char)) 2354 ((and (eq ?/ (char-after (point)))
2355 (not w3-p-d-end-tag-p)) 2355 (not w3-p-d-end-tag-p))
2356 ;; This is a NET-enabling start-tag. 2356 ;; This is a NET-enabling start-tag.
2357 (setq net-tag-p t) 2357 (setq net-tag-p t)
2358 (forward-char 1)) 2358 (forward-char 1))
2359 ((= ?< (following-char)) 2359 ((eq ?< (char-after (point)))
2360 ;; *** Strictly speaking, the following text has to 2360 ;; *** Strictly speaking, the following text has to
2361 ;; lexically be STAGO or ETAGO, which means that it 2361 ;; lexically be STAGO or ETAGO, which means that it
2362 ;; can't match some other lexical unit. 2362 ;; can't match some other lexical unit.
2363 ;; Unclosed tag. 2363 ;; Unclosed tag.
2364 nil) 2364 nil)
2371 2371
2372 (setq tag-end (point))) 2372 (setq tag-end (point)))
2373 2373
2374 ((looking-at "/?>") 2374 ((looking-at "/?>")
2375 ;; We are looking at an empty tag (<>, </>). 2375 ;; We are looking at an empty tag (<>, </>).
2376 (setq w3-p-d-end-tag-p (= ?/ (following-char))) 2376 (setq w3-p-d-end-tag-p (eq ?/ (char-after (point))))
2377 (setq w3-p-d-tag-name (if w3-p-d-end-tag-p 2377 (setq w3-p-d-tag-name (if w3-p-d-end-tag-p
2378 (w3-element-name w3-p-d-current-element) 2378 (w3-element-name w3-p-d-current-element)
2379 ;; *** Strictly speaking, if OMITTAG NO, then 2379 ;; *** Strictly speaking, if OMITTAG NO, then
2380 ;; we should use the most recently closed tag. 2380 ;; we should use the most recently closed tag.
2381 ;; But OMITTAG YES in HTML and I'm lazy. 2381 ;; But OMITTAG YES in HTML and I'm lazy.
2433 ;; parentheses somewhere inside the declaration. Handling 2433 ;; parentheses somewhere inside the declaration. Handling
2434 ;; this properly would require full parsing of markup 2434 ;; this properly would require full parsing of markup
2435 ;; declarations, a goal for the future. 2435 ;; declarations, a goal for the future.
2436 (w3-debug-html "Bad <! syntax.") 2436 (w3-debug-html "Bad <! syntax.")
2437 (skip-chars-forward "^>") 2437 (skip-chars-forward "^>")
2438 (if (= ?> (following-char)) 2438 (if (eq ?> (char-after (point)))
2439 (forward-char)))) 2439 (forward-char))))
2440 (point)))) 2440 (point))))
2441 2441
2442 ((looking-at "!\\\[\\(\\([ \t\n\r]*[a-z]+\\)+[ \t\n\r]*\\)\\\[") 2442 ((looking-at "!\\\[\\(\\([ \t\n\r]*[a-z]+\\)+[ \t\n\r]*\\)\\\[")
2443 ;; We are looking at a marked section. 2443 ;; We are looking at a marked section.
2460 (keyword (car-safe (cond ((memq 'IGNORE keywords)) 2460 (keyword (car-safe (cond ((memq 'IGNORE keywords))
2461 ((memq 'CDATA keywords)) 2461 ((memq 'CDATA keywords))
2462 ((memq 'RCDATA keywords)) 2462 ((memq 'RCDATA keywords))
2463 ((memq 'INCLUDE keywords)) 2463 ((memq 'INCLUDE keywords))
2464 ((memq 'TEMP keywords)))))) 2464 ((memq 'TEMP keywords))))))
2465 (or (= ?\[ (following-char)) 2465 (or (eq ?\[ (char-after (point)))
2466 ;; I probably shouldn't even check this, since it is so 2466 ;; I probably shouldn't even check this, since it is so
2467 ;; impossible. 2467 ;; impossible.
2468 (error "impossible ??")) 2468 (error "impossible ??"))
2469 (forward-char 1) 2469 (forward-char 1)
2470 (delete-region (1- (match-beginning 0)) (point)) 2470 (delete-region (1- (match-beginning 0)) (point))
2501 (backward-char 1) 2501 (backward-char 1)
2502 (delete-region 2502 (delete-region
2503 (point) 2503 (point)
2504 (progn 2504 (progn
2505 (skip-chars-forward "^>") 2505 (skip-chars-forward "^>")
2506 (if (= ?> (following-char)) 2506 (if (eq ?> (char-after (point)))
2507 (forward-char)) 2507 (forward-char))
2508 (point)))) 2508 (point))))
2509 (t 2509 (t
2510 ;; This < is not a markup character. Pretend we didn't notice 2510 ;; This < is not a markup character. Pretend we didn't notice
2511 ;; it at all. We have skipped over the < already, so just loop 2511 ;; it at all. We have skipped over the < already, so just loop
2512 ;; again. 2512 ;; again.
2513 ))) 2513 )))
2514 2514
2515 ((= ?& (following-char)) 2515 ((eq ?& (char-after (point)))
2516 (w3-expand-entity-at-point-maybe)) 2516 (w3-expand-entity-at-point-maybe))
2517 2517
2518 ((and (= ?\] (following-char)) 2518 ((and (eq ?\] (char-after (point)))
2519 w3-p-d-in-parsed-marked-section 2519 w3-p-d-in-parsed-marked-section
2520 (looking-at "]]>")) 2520 (looking-at "]]>"))
2521 ;; *** handle the end of a parsed marked section. 2521 ;; *** handle the end of a parsed marked section.
2522 (error "***unimplemented***")) 2522 (error "***unimplemented***"))
2523 2523
2524 ((and (= ?/ (following-char)) 2524 ((and (eq ?/ (char-after (point)))
2525 w3-p-d-null-end-tag-enabled) 2525 w3-p-d-null-end-tag-enabled)
2526 ;; We are looking at a null end tag. 2526 ;; We are looking at a null end tag.
2527 (setq w3-p-d-end-tag-p t) 2527 (setq w3-p-d-end-tag-p t)
2528 (setq between-tags-end (point)) 2528 (setq between-tags-end (point))
2529 (setq tag-end (1+ (point))) 2529 (setq tag-end (1+ (point)))
2552 (goto-char pt))) 2552 (goto-char pt)))
2553 2553
2554 ((looking-at (eval-when-compile 2554 ((looking-at (eval-when-compile
2555 (concat "[" (w3-invalid-sgml-chars) "]"))) 2555 (concat "[" (w3-invalid-sgml-chars) "]")))
2556 (w3-debug-html 2556 (w3-debug-html
2557 (format "Invalid SGML character: %c" (following-char))) 2557 (format "Invalid SGML character: %c" (char-after (point))))
2558 (insert (or (cdr-safe (assq (following-char) 2558 (insert (or (cdr-safe (assq (char-after (point))
2559 ;; These characters are apparently 2559 ;; These characters are apparently
2560 ;; from a Windows character set. 2560 ;; from a Windows character set.
2561 '((146 . "'") 2561 '((146 . "'")
2562 (153 . "TM")))) 2562 (153 . "TM"))))
2563 "")) 2563 ""))
2737 (if (re-search-forward (if w3-p-d-null-end-tag-enabled 2737 (if (re-search-forward (if w3-p-d-null-end-tag-enabled
2738 "</[a-z>]\\|[/&]" 2738 "</[a-z>]\\|[/&]"
2739 "</[a-z>]\\|&") 2739 "</[a-z>]\\|&")
2740 nil 'move) 2740 nil 'move)
2741 (goto-char (match-beginning 0))) 2741 (goto-char (match-beginning 0)))
2742 (= ?& (following-char))) 2742 (eq ?& (char-after (point))))
2743 (w3-expand-entity-at-point-maybe))))))) 2743 (w3-expand-entity-at-point-maybe)))))))
2744 (t 2744 (t
2745 ;; The element is illegal here. We'll just discard the start 2745 ;; The element is illegal here. We'll just discard the start
2746 ;; tag as though we never saw it. 2746 ;; tag as though we never saw it.
2747 )))) 2747 ))))