comparison lisp/psgml/psgml-parse.el @ 0:376386a54a3c r19-14

Import from CVS: tag r19-14
author cvs
date Mon, 13 Aug 2007 08:45:50 +0200
parents
children ac2d302a0011
comparison
equal deleted inserted replaced
-1:000000000000 0:376386a54a3c
1 ;;;; psgml-parse.el --- Parser for SGML-editing mode with parsing support
2 ;; $Id: psgml-parse.el,v 1.1.1.1 1996/12/18 03:35:21 steve Exp $
3
4 ;; Copyright (C) 1994, 1995 Lennart Staflin
5
6 ;; Author: Lennart Staflin <lenst@lysator.liu.se>
7 ;; Acknowledgment:
8 ;; The catalog parsing code was contributed by
9 ;; David Megginson <dmeggins@aix1.uottawa.CA>
10
11 ;; This program is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License
13 ;; as published by the Free Software Foundation; either version 2
14 ;; of the License, or (at your option) any later version.
15 ;;
16 ;; This program is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20 ;;
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with this program; if not, write to the Free Software
23 ;; Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24
25
26 ;;;; Commentary:
27
28 ;; Part of major mode for editing the SGML document-markup language.
29
30
31 ;;;; Code:
32
33 (require 'psgml)
34
35 ;;; Interface to psgml-dtd
36 (eval-and-compile
37 (autoload 'sgml-do-usemap-element "psgml-dtd")
38 (autoload 'sgml-write-dtd "psgml-dtd")
39 (autoload 'sgml-check-dtd-subset "psgml-dtd")
40 )
41
42
43 ;;;; Advise to do-auto-fill
44
45 (defvar sgml-auto-fill-inhibit-function nil
46 "If non-nil, it should be a function of no arguments.
47 The functions is evaluated before the standard auto-fill function,
48 do-auto-fill, tries to fill a line. If the function returns a true
49 value the auto-fill is inhibited.")
50
51 ;;(defadvice do-auto-fill (around disable-auto-fill-hook activate)
52 ;; (or (and sgml-auto-fill-inhibit-function
53 ;; (funcall sgml-auto-fill-inhibit-function))
54 ;; ad-do-it))
55
56
57 ;;;; Variables
58
59 ;;; Hooks
60
61 (defvar sgml-open-element-hook nil
62 "The hook run by `sgml-open-element'.
63 Theses functions are called with two arguments, the first argument is
64 the opened element and the second argument is the attribute specification
65 list. It is probably best not to refer to the content or the end-tag of
66 the element.")
67
68 (defvar sgml-close-element-hook nil
69 "The hook run by `sgml-close-element'.
70 These functions are invoked with `sgml-current-tree' bound to the
71 element just parsed.")
72
73 (defvar sgml-doctype-parsed-hook nil
74 "This hook is caled after the doctype has been parsed.
75 It can be used to load any additional information into the DTD structure.")
76
77 (defvar sgml-sysid-resolve-functions nil
78 "This variable should contain a list of functions.
79 Each function should take one argument, the system identifier of an entity.
80 If the function can handle that identifier, it should insert the text
81 of the entity into the current buffer at point and return t. If the
82 system identifier is not handled the function should return nil.")
83
84 ;;; Internal variables
85
86 (defconst sgml-pcdata-token (intern "#PCDATA"))
87
88 (defvar sgml-computed-map nil
89 "Internal representation of entity search map.")
90
91 (defvar sgml-used-entity-map nil
92 "The value of `sgml-current-entity-map' used to compute the map in
93 `sgml-compute-map'.")
94
95 (defvar sgml-last-element nil
96 "Used to keep information about position in element structure between
97 commands.")
98
99 (defconst sgml-users-of-last-element
100 '(sgml-beginning-of-element
101 sgml-end-of-element
102 sgml-up-element
103 sgml-backward-up-element
104 sgml-backward-element
105 sgml-forward-element
106 sgml-down-element
107 sgml-show-context
108 sgml-next-data-field
109 )
110 "List of commands that set the sgml-last-element variable.")
111
112 (defvar sgml-parser-syntax nil
113 "Syntax table used during parsing.")
114
115 (defvar sgml-ecat-assoc nil
116 "Assoc list caching parsed ecats.")
117
118 (defvar sgml-catalog-assoc nil
119 "Assoc list caching parsed catalogs.")
120
121
122 ;;; Variables dynamically bound to affect parsing
123
124 (defvar sgml-throw-on-warning nil
125 "Set to a symbol other than nil to make sgml-log-warning throw to that symbol.")
126
127 (defvar sgml-throw-on-error nil
128 "Set to a symbol other than nil to make sgml-error throw to that symbol.")
129
130 (defvar sgml-show-warnings nil
131 "Set to t to show warnings.")
132
133 (defvar sgml-close-element-trap nil
134 "Can be nil for no trap, an element or t for any element.
135 Tested by sgml-close-element to see if the parse should be ended.")
136
137 (defvar sgml-goal 0
138 "Point in buffer to parse up to.")
139
140 (defvar sgml-shortref-handler (function sgml-handle-shortref)
141 "Function called by parser to handle a short reference.
142 Called with the entity as argument. The start and end of the
143 short reference is `sgml-markup-start' and point.")
144
145 (defvar sgml-data-function nil
146 "Function called with parsed data.")
147
148 (defvar sgml-entity-function nil
149 "Function called with entity referenced at current point in parse.")
150
151 (defvar sgml-pi-function nil
152 "Function called with parsed process instruction.")
153
154 (defvar sgml-signal-data-function nil
155 "Called when some data characters are conceptually parsed,
156 e.g. a data entity reference.")
157
158 (defvar sgml-throw-on-element-change nil
159 "Throw tag.")
160
161 ;;; Global variables active during parsing
162
163 (defvar sgml-parsing-dtd nil
164 "This variable is bound to `t' while parsing a DTD (subset).")
165
166 (defvar sgml-rs-ignore-pos nil
167 "Set to position of last parsing start in current buffer.")
168 (make-variable-buffer-local 'sgml-rs-ignore-pos)
169
170 (defvar sgml-dtd-info nil
171 "Holds the `sgml-dtd' structure describing the current DTD.")
172
173 (defvar sgml-current-omittag nil
174 "Value of `sgml-omittag' in main buffer. Valid during parsing.")
175
176 (defvar sgml-current-shorttag nil
177 "Value of `sgml-shorttag' in main buffer. Valid during parsing.")
178
179 (defvar sgml-current-localcat nil
180 "Value of `sgml-local-catalogs' in main buffer. Valid during parsing.")
181
182 (defvar sgml-current-local-ecat nil
183 "Value of `sgml-local-ecat-files' in main buffer. Valid during parsing.")
184
185 (defvar sgml-current-state nil
186 "Current state in content model or model type if CDATA, RCDATA or ANY.")
187
188 (defvar sgml-current-shortmap nil
189 "The current active short reference map.")
190
191 (defvar sgml-current-tree nil
192 "Current parse tree node, identifies open element.")
193
194 (defvar sgml-previous-tree nil
195 "Previous tree node in current tree.
196 This is nil if no previous node.")
197
198 (defvar sgml-markup-type nil
199 "Contains the type of markup parsed last.
200 The value is a symbol:
201 nil - pcdata or space
202 CDATA - CDATA or RCDATA
203 comment - comment declaration
204 doctype - doctype declaration
205 end-tag
206 ignored - ignored marked section
207 ms-end - marked section start, if not ignored
208 ms-start - marked section end, if not ignored
209 pi - processing instruction
210 sgml - SGML declaration
211 start-tag
212 entity - general entity reference
213 param - parameter reference
214 shortref- short reference
215 mdecl - markup declaration
216 ")
217
218 (defvar sgml-top-tree nil
219 "Root node of parse tree during parsing.")
220
221 (defvar sgml-markup-tree nil
222 "Tree node of markup parsed.
223 In case markup closed element this is different from sgml-current-tree.
224 Only valid after `sgml-parse-to'.")
225
226 (defvar sgml-markup-start nil
227 "Start point of markup beeing parsed.")
228
229 (defvar sgml-conref-flag nil
230 "This variable is set by `sgml-parse-attribute-specification-list'
231 if a CONREF attribute is parsed.")
232
233 (defvar sgml-no-elements nil
234 "Number of declared elements.")
235
236 ;;; Vars used in *param* buffers
237
238 (defvar sgml-previous-buffer nil)
239
240 (defvar sgml-current-eref nil
241 "This is the entity reference used to enter current entity.
242 If this is nil, then current entity is main buffer.")
243
244 (defvar sgml-scratch-buffer nil
245 "The global value of this variable is the first scratch buffer for
246 entities. The entity buffers can have a buffer local value for this variable
247 to point to the next scratch buffer.")
248
249 (defvar sgml-last-entity-buffer nil)
250
251 ;;; For loading DTD
252
253 (eval-and-compile
254 (defconst sgml-max-single-octet-number 250
255 "Octets greater than this is the first of a two octet coding."))
256
257 (defvar sgml-read-token-vector nil) ; Vector of symbols used to decode
258 ; token numbers.
259 (defvar sgml-read-nodes nil) ; Vector of nodes used when reading
260 ; a finite automaton.
261
262 ;; Buffer local variables
263
264 (defvar sgml-loaded-dtd nil
265 "File name corresponding to current DTD.")
266 (make-variable-buffer-local 'sgml-loaded-dtd)
267
268 (defvar sgml-current-element-name nil
269 "Name of current element for mode line display.")
270
271
272 ;;;; Build parser syntax table
273
274 (setq sgml-parser-syntax (make-syntax-table))
275
276 (let ((i 0))
277 (while (< i 256)
278 (modify-syntax-entry i " " sgml-parser-syntax)
279 (setq i (1+ i))))
280
281 (mapconcat (function (lambda (c)
282 (modify-syntax-entry c "w" sgml-parser-syntax)))
283 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrtsuvwxyz" "")
284 (mapconcat (function (lambda (c)
285 (modify-syntax-entry c "_" sgml-parser-syntax)))
286 "-.0123456789" "")
287 (mapconcat (function (lambda (c)
288 (modify-syntax-entry c "." sgml-parser-syntax)))
289 "</>&%#[]" ".")
290
291 ;;(progn (set-syntax-table sgml-parser-syntax) (describe-syntax))
292
293
294 (defmacro sgml-with-parser-syntax (&rest body)
295 (` (let ((normal-syntax-table (syntax-table)))
296 (set-syntax-table sgml-parser-syntax)
297 (unwind-protect
298 (progn (,@ body))
299 (set-syntax-table normal-syntax-table)))))
300
301
302 ;;;; State machine
303
304 ;; From the parsers POV a state is a mapping from tokens (in sgml it
305 ;; is primitive state tokens) to states. The pairs of the mapping is
306 ;; called moves.
307
308 ;; DFAs are always represented by the start state, which is a
309 ;; normal state. Normal states contain moves of two types:
310 ;; 1. moves for required tokens, 2. moves for optional tokens.
311 ;; By design these are keept in two different sets.
312 ;; [Alt: they could perhaps have been keept in one set but
313 ;; marked in different ways.]
314
315 ;; The &-model groups creates too big state machines, therefor
316 ;; there is a datastruture called &-node.
317
318 ;; A &-node is a specification for a dfa that has not been computed.
319 ;; It contains a set of dfas that all have to be traversed befor going
320 ;; to the next state. The &-nodes are only stored in moves and are
321 ;; not seen by the parser. When a move is taken the &-node is converted
322 ;; to a &-state.
323
324 ;; A &-state keeps track of which dfas still need to be
325 ;; traversed and the state of the current dfa.
326
327 ;; move = <token, node>
328
329 ;; node = normal-state | &-node
330
331 ;; &-node = <dfas, next>
332 ;; where: dfas is a set of normal-state
333 ;; next is a normal-state
334
335 ;; State = normal-state | &-state
336 ;; The parser only knows about the state type.
337
338 ;; normal-state = <opts, reqs>
339 ;; where: opts is a set of moves for optional tokens
340 ;; reqs is a set of moves for required tokens
341
342 ;; &-state = <substate, dfas, next>
343 ;; where: substate is a normal-state
344 ;; dfas is a set of states
345 ;; next is the next state
346
347 ;; The &-state is only used during the parsing.
348 ;; Primitiv functions to get data from parse state need
349 ;; to know both normal-state and &-state.
350
351
352 ;;; Representations:
353
354 ;;move: (token . node)
355
356 (defmacro sgml-make-move (token node)
357 (` (cons (, token) (, node))))
358
359 (defmacro sgml-move-token (x)
360 (` (car (, x))))
361
362 (defmacro sgml-move-dest (x)
363 (` (cdr (, x))))
364
365 ;; set of moves: list of moves
366
367 (defmacro sgml-add-move-to-set (token node set)
368 (`(cons (cons (, token) (, node)) (, set))))
369
370 (defmacro sgml-moves-lookup (token set)
371 (` (assq (, token) (, set))))
372
373 ;; normal-state: ('normal-state opts . reqs)
374
375 (defsubst sgml-make-state ()
376 (cons 'normal-state (cons nil nil)))
377
378 (defmacro sgml-normal-state-p (s)
379 (` (eq (car (, s)) 'normal-state)))
380
381 (defmacro sgml-state-opts (s)
382 (` (cadr (, s))))
383
384 (defmacro sgml-state-reqs (s)
385 (` (cddr (, s))))
386
387 (defmacro sgml-state-final-p (s)
388 (`(null (sgml-state-reqs (, s)))))
389
390 ;; adding moves
391 ;; *** Should these functions check for ambiguity?
392 ;; What if adding a optional move for a token that has a
393 ;; required move?
394 ;; What about the other way?
395
396 (defsubst sgml-add-opt-move (s token dest)
397 (or (sgml-moves-lookup token (sgml-state-opts s))
398 (setf (sgml-state-opts s)
399 (sgml-add-move-to-set token dest (sgml-state-opts s)))))
400
401 (defsubst sgml-add-req-move (s token dest)
402 (or (sgml-moves-lookup token (sgml-state-reqs s))
403 (setf (sgml-state-reqs s)
404 (sgml-add-move-to-set token dest (sgml-state-reqs s)))))
405
406 (defsubst sgml-make-primitive-content-token (token)
407 (let ((s1 (sgml-make-state))
408 (s2 (sgml-make-state)))
409 (sgml-add-req-move s1 token s2)
410 s1))
411
412 ;;&-state: (state next . dfas)
413
414 (defsubst sgml-make-&state (state dfas next)
415 (cons state (cons next dfas)))
416
417 (defsubst sgml-step-&state (state &state)
418 (cons state (cdr &state)))
419
420 (defsubst sgml-&state-substate (s)
421 (car s))
422
423 (defsubst sgml-&state-dfas (s)
424 (cddr s))
425
426 (defsubst sgml-&state-next (s)
427 (cadr s))
428
429
430 ;;&-node: (next . dfas)
431
432 (defsubst sgml-make-&node (dfas next)
433 (cons next dfas))
434
435 (defmacro sgml-&node-next (n)
436 (` (car (, n))))
437
438 (defmacro sgml-&node-dfas (n)
439 (` (cdr (, n))))
440
441
442 ;;; Using states
443
444 ;; get-move: State x Token --> State|nil
445
446 (defsubst sgml-get-move (state token)
447 "Return a new state or nil, after traversing TOKEN from STATE."
448 (cond
449 ((sgml-normal-state-p state)
450 (let ((c (or (sgml-moves-lookup token (sgml-state-opts state))
451 (sgml-moves-lookup token (sgml-state-reqs state)))))
452 (if c
453 (let ((dest (sgml-move-dest c)))
454 (if (sgml-normal-state-p dest)
455 dest
456 ;; dest is a &-node
457 (sgml-next-sub& (sgml-&node-dfas dest)
458 token
459 (sgml-&node-next dest)))))))
460 (t ;state is a &-state
461 (sgml-get-&move state token))))
462
463 (defun sgml-get-&move (state token)
464 ;; state is a &-state
465 (let ((m (sgml-get-move (sgml-&state-substate state) token)))
466 (cond (m (cons m (cdr state)))
467 ((sgml-state-final-p (sgml-&state-substate state))
468 (sgml-next-sub& (sgml-&state-dfas state)
469 token
470 (sgml-&state-next state))))))
471
472 (defun sgml-next-sub& (dfas token next)
473 "Compute the next state, choosing from DFAS and moving by TOKEN.
474 If this is not possible, but all DFAS are final, move by TOKEN in NEXT."
475 (let ((allfinal t)
476 (l dfas)
477 (res nil)
478 s1 s2)
479 (while (and l (not res))
480 (setq s1 (car l)
481 allfinal (and allfinal (sgml-state-final-p s1))
482 s2 (sgml-get-move s1 token)
483 res (and s2 (sgml-make-&state s2 (remq s1 dfas) next))
484 l (cdr l)))
485 (cond (res)
486 (allfinal (sgml-get-move next token)))))
487
488 (defsubst sgml-tokens-of-moves (moves)
489 (mapcar (function (lambda (m) (sgml-move-token m)))
490 moves))
491
492 (defun sgml-required-tokens (state)
493 (if (sgml-normal-state-p state)
494 (sgml-tokens-of-moves (sgml-state-reqs state))
495 (or (sgml-required-tokens (sgml-&state-substate state))
496 (loop for s in (sgml-&state-dfas state)
497 nconc (sgml-tokens-of-moves (sgml-state-reqs s)))
498 (sgml-tokens-of-moves (sgml-state-reqs (sgml-&state-next state))))))
499
500
501 (defsubst sgml-final (state)
502 (if (sgml-normal-state-p state)
503 (sgml-state-final-p state)
504 (sgml-final& state)))
505
506 (defun sgml-final& (state)
507 (and (sgml-final (sgml-&state-substate state))
508 (loop for s in (sgml-&state-dfas state)
509 always (sgml-state-final-p s))
510 (sgml-state-final-p (sgml-&state-next state))))
511
512 (defun sgml-optional-tokens (state)
513 (if (sgml-normal-state-p state)
514 (sgml-tokens-of-moves (sgml-state-opts state))
515 (nconc
516 (sgml-optional-tokens (sgml-&state-substate state))
517 (if (sgml-final (sgml-&state-substate state))
518 (loop for s in (sgml-&state-dfas state)
519 nconc (sgml-tokens-of-moves (sgml-state-opts s))))
520 (if (loop for s in (sgml-&state-dfas state)
521 always (sgml-state-final-p s))
522 (sgml-tokens-of-moves (sgml-state-opts (sgml-&state-next state)))))))
523
524
525 ;;;; Attribute Types
526
527 ;;; Basic Types
528 ;; name = string attribute names are lisp symbols
529 ;; attval = string attribute values are lisp strings
530
531 ;;; Attribute Declaration Type
532 ;; attdecl = <name, declared-value, default-value>
533
534 ;; This is the result of the ATTLIST declarations in the DTD.
535 ;; All attribute declarations for an element is the elements
536 ;; attlist.
537
538 ;;; Attribute Declaration Operations
539 ;; sgml-make-attdecl: name declared-value default-value -> attdecl
540 ;; sgml-attdecl-name: attdecl -> name
541 ;; sgml-attdecl-declared-value: attdecl -> declared-value
542 ;; sgml-attdecl-default-value: attdecl -> default-value
543
544 ;;; Attribute Declaration List Type
545 ;; attlist = attdecl*
546
547 ;;; Attribute Declaration List Operations
548 ;; sgml-lookup-attdecl: name x attlist -> attdecl
549
550 ;;; Declared Value Type
551 ;; declared-value = (token-group | notation | simpel)
552 ;; token-group = nametoken+
553 ;; notation = nametoken+
554 ;; simple = symbol lisp symbol corresponding to SGML type
555
556 ;;; Declared Value Operations
557 ;; sgml-declared-value-token-group: declared-value -> list of symbols
558 ;; sgml-declared-value-notation: declared-value -> list of symbols
559 ;; (empty list if not token-group/notation)
560
561 ;;; Default Value Type
562 ;; default-value = (required | implied | conref | specified )
563 ;; implied, conref = constant symbol
564 ;; specified = (fixed | normal)
565 ;; fixed, normal = attval
566
567 ;;; Default Value Operations
568 ;; sgml-default-value-attval: default-value -> (attval | nil)
569 ;; sgml-default-value-type-p: type x default-value -> cond
570
571 ;;; Attribute Specification Type
572 ;; attspec = <name, attval>
573
574 ;; This is the result of parsing an attribute specification.
575
576 ;; sgml-make-attspec: name x attval -> attspec
577 ;; sgml-attspec-name: attspec -> name
578 ;; sgml-attspec-attval: attspec -> attval
579
580
581 ;;; Attribute Specification List Type
582 ;; asl = attspec*
583
584 ;; aka. attribute value list
585
586
587 ;;; Code
588
589 ;;; attdecl representation = (name declared-value default-value)
590
591 (defun sgml-make-attdecl (name dcl-value default-value)
592 (list name dcl-value default-value))
593
594 (defun sgml-attdecl-name (attdecl)
595 (car attdecl))
596
597 (defun sgml-attdecl-declared-value (attdecl)
598 "The declared value of ATTDECL.
599 It may be a symbol or (name-token-group (NAME1 ... NAMEn))
600 or (notation (NOT1 ... NOTn))"
601 (cadr attdecl))
602
603 (defun sgml-attdecl-default-value (attdecl)
604 "The default value of ATTDECL.
605 The default value is either a symbol (required | implied | current |
606 conref) or a list with first element nil or symbol 'fixed' and second
607 element the value."
608 (caddr attdecl))
609
610
611 ;;; attlist representation = (attspec*)
612
613 (defun sgml-lookup-attdecl (name attlist)
614 "Return the attribute declaration for NAME in ATTLIST."
615 (assoc name attlist))
616
617 (defun sgml-attribute-with-declared-value (attlist declared-value)
618 "Find the first attribute in ATTLIST that has DECLARED-VALUE."
619 (let ((found nil))
620 (while (and attlist (not found))
621 (when (equal declared-value
622 (sgml-attdecl-declared-value (car attlist)))
623 (setq found (car attlist)))
624 (setq attlist (cdr attlist)))
625 found))
626
627
628 ;;; declared-value representation
629 ;; token-group = (name-token (symbol+))
630 ;; notation = (notation (symbol+))
631 ;; simple = symbol lisp symbol correspoinding to SGML type
632
633 (defun sgml-make-declared-value (type &optional names)
634 "Make a declared-value of TYPE.
635 TYPE should be a symbol. If TYPE is name-token-group or notation
636 NAMES should be a list of symbols."
637 (if (consp names)
638 (list type names)
639 type))
640
641 (defun sgml-declared-value-token-group (declared-value)
642 "Return the name token group for the DECLARED-VALUE.
643 This applies to name token groups. For other declared values nil is
644 returned."
645 (and (consp declared-value)
646 (eq 'name-token-group (car declared-value))
647 (cadr declared-value)))
648
649 (defun sgml-declared-value-notation (declared-value)
650 "Return the list of notation names for the DECLARED-VALUE.
651 This applies to notation declared value. For other declared values
652 nil is returned."
653 (and (consp declared-value)
654 (eq 'notation (car declared-value))
655 (cadr declared-value)))
656
657 ;;; default-value representation = symbol | ((nil | 'fixed) attval)
658
659 (defun sgml-make-default-value (type &optional attval)
660 (if attval
661 (list type attval)
662 type))
663
664 (defun sgml-default-value-attval (default-value)
665 "Return the actual default value of the declared DEFAULT-VALUE.
666 The actual value is a string. Return nil if no actual value."
667 (and (consp default-value)
668 (cadr default-value)))
669
670 (defun sgml-default-value-type-p (type default-value)
671 "Return true if DEFAULT-VALUE is of TYPE.
672 Where TYPE is a symbol, one of required, implied, conref, or fixed."
673 (or (eq type default-value)
674 (and (consp default-value)
675 (eq type (car default-value)))))
676
677
678 ;;; attspec representation = (symbol . string)
679
680 (defun sgml-make-attspec (name attval)
681 "Create an attspec from NAME and ATTVAL.
682 Special case, if ATTVAL is nil this is an implied attribute."
683 (cons name attval))
684
685 ;; sgml-attspec-name: attspec -> name
686 (defun sgml-attspec-name (attspec)
687 (car attspec))
688
689 ;; sgml-attspec-attval: attspec -> attval
690 (defun sgml-attspec-attval (attspec)
691 "Return the value of attribute specification ATTSPEC.
692 If ATTSPEC is nil, nil is returned."
693 (cdr attspec))
694
695 ;;; asl representaion = (attspec*)
696
697 (defun sgml-lookup-attspec (name asl)
698 (assoc name asl))
699
700
701 ;;;; Element content types
702
703 ;; The content of an element is defined as
704 ;; (125 declared content | 126 content model),
705 ;; 125 declared content = "CDATA" | "RCDATA" | "EMPTY"
706 ;; 126 content model = (127 model group | "ANY"),
707 ;; (65 ps+, 138 exceptions)?
708
709 ;; I represent a model group with the first state of a corresponding finite
710 ;; automaton (this is a cons). Exceptions are handled separately.
711 ;; The other content types are represented by symbols.
712
713 (defsubst sgml-model-group-p (model)
714 (consp model))
715
716 (defconst sgml-cdata 'CDATA)
717 (defconst sgml-rcdata 'RCDATA)
718 (defconst sgml-empty 'EMPTY)
719 (defconst sgml-any 'ANY)
720
721
722 ;;;; External identifier
723 ;; extid = (pubid? sysid? dir)
724 ;; Representation as (pubid sysid . dir)
725 ;; where pubid = nil | string
726 ;; sysid = nil | string
727 ;; dir = string
728
729 (defun sgml-make-extid (pubid sysid &optional dir)
730 (cons pubid (cons sysid (or dir default-directory))))
731
732 (defun sgml-extid-pubid (extid)
733 (car extid))
734
735 (defun sgml-extid-sysid (extid)
736 (if (consp (cdr extid))
737 (cadr extid)
738 (cdr extid)))
739
740 (defun sgml-extid-dir (extid)
741 "Directory where EXTID was declared"
742 (if (consp (cdr extid))
743 (cddr extid)
744 nil))
745
746 (defun sgml-extid-expand (file extid)
747 "Expand file name FILE in the context of EXTID."
748 (expand-file-name file (sgml-extid-dir extid)))
749
750 ;;;; DTD
751
752 ;; DTD = (doctype, eltypes, parameters, entities, shortmaps,
753 ;; notations, dependencies, merged)
754 ;; DTDsubset ~=~ DTD, but doctype is unused
755 ;;
756 ;; doctype = name
757 ;; eltypes = oblist
758 ;; parameters = entity*
759 ;; entities = entity*
760 ;; shortmaps = (name, shortmap)*
761 ;; dependencies = file*
762 ;; merged = Compiled-DTD? where Compiled-DTD = (file, DTD)
763
764 (defstruct (sgml-dtd
765 (:type vector)
766 (:constructor sgml-make-dtd (doctype)))
767 doctype ; STRING, name of doctype
768 (eltypes ; OBLIST, element types defined
769 (sgml-make-eltype-table))
770 (parameters ; ALIST
771 (sgml-make-entity-table))
772 (entities ; ALIST
773 (sgml-make-entity-table))
774 (shortmaps ; ALIST
775 (sgml-make-shortref-table))
776 (notations ; ??
777 nil)
778 (dependencies ; LIST
779 nil)
780 (merged ; (file . DTD)
781 nil)
782 (undef-entities ; LIST of entity names
783 nil))
784
785
786 ;;;; Element type objects
787
788 ;; An element type object contains the information about an element type
789 ;; obtained from parsing the DTD.
790
791 ;; An element type object is represented by a symbol in a special oblist.
792 ;; A table of element type objects is represented by a oblist.
793
794
795 ;;; Element type objects
796
797 (defun sgml-eltype-name (et)
798 (symbol-name et))
799
800 (define-compiler-macro sgml-eltype-name (et)
801 (`(symbol-name (, et))))
802
803 (defun sgml-eltype-defined (et)
804 (fboundp et))
805
806 (defun sgml-eltype-token (et)
807 "Return a token for the element type"
808 et)
809
810 (define-compiler-macro sgml-eltype-token (et)
811 et)
812
813 (defun sgml-token-eltype (token)
814 "Return the element type corresponding to TOKEN."
815 token)
816
817 (define-compiler-macro sgml-token-eltype (token)
818 token)
819
820 (defmacro sgml-prop-fields (&rest names)
821 (cons
822 'progn
823 (loop for n in names collect
824 (`(defmacro (, (intern (format "sgml-eltype-%s" n))) (et)
825 (list 'get et ''(, n)))))))
826
827 (sgml-prop-fields
828 ;;flags ; optional tags and mixed
829 ; (perhaps in value field)
830 ;;model ; Content type
831 ; (perhaps in function field)
832 attlist ; List of defined attributes
833 includes ; List of included elements
834 excludes ; List of excluded elements
835 shortmap ; Associated shortref map
836 ; nil if none and 'empty if #empty
837 )
838
839 (defmacro sgml-eltype-flags (et)
840 (` (symbol-value (, et))))
841
842 (defun sgml-eltype-model (et)
843 (if (fboundp et)
844 (symbol-function et)
845 sgml-any))
846
847 (defsetf sgml-eltype-model fset)
848
849
850 (defun sgml-eltype-stag-optional (et)
851 (oddp (sgml-eltype-flags et)))
852
853 (defun sgml-eltype-etag-optional (et)
854 (/= 0 (logand 2 (sgml-eltype-flags et))))
855
856 (defun sgml-eltype-mixed (et)
857 (< 3 (sgml-eltype-flags et)))
858 (define-compiler-macro sgml-eltype-mixed (et)
859 (`(< 3 (sgml-eltype-flags (, et)))))
860
861 (defsetf sgml-eltype-stag-optional (et) (f)
862 (list 'sgml-set-eltype-flag et 1 f))
863 (defsetf sgml-eltype-etag-optional (et) (f)
864 (list 'sgml-set-eltype-flag et 2 f))
865 (defsetf sgml-eltype-mixed (et) (f)
866 (list 'sgml-set-eltype-flag et 4 f))
867
868 (defun sgml-set-eltype-flag (et mask f)
869 (setf (sgml-eltype-flags et)
870 (logior (logand (if (boundp et)
871 (sgml-eltype-flags et)
872 0)
873 (lognot mask))
874 (if f mask 0))))
875
876 (defun sgml-maybe-put (sym prop val)
877 (when val (put sym prop val)))
878
879 (defsetf sgml-eltype-includes (et) (l)
880 (list 'sgml-maybe-put et ''includes l))
881
882 (defsetf sgml-eltype-excludes (et) (l)
883 (list 'sgml-maybe-put et ''excludes l))
884
885 (defmacro sgml-eltype-appdata (et prop)
886 "Get application data from element type ET with name PROP.
887 PROP should be a symbol, reserved names are: flags, model, attlist,
888 includes, excludes, conref-regexp, mixed, stag-optional, etag-optional."
889 (` (get (, et) (, prop))))
890
891 (defun sgml-eltype-all-miscdata (et)
892 (loop for p on (symbol-plist et) by (function cddr)
893 unless (memq (car p) '(model flags includes excludes))
894 nconc (list (car p) (cadr p))))
895
896 (defun sgml-eltype-set-all-miscdata (et miscdata)
897 (setf (symbol-plist et)
898 (nconc (symbol-plist et) miscdata)))
899
900 (defun sgml-make-eltype (name)
901 (let ((et (make-symbol name)))
902 (setf (sgml-eltype-flags et) 0)
903 et))
904
905
906 ;;; Element type tables
907
908 (defun sgml-make-eltype-table ()
909 "Make an empty table of element types."
910 (make-vector 73 0))
911
912 (defun sgml-eltype-table-empty (eltype-table)
913 (loop for x across eltype-table always (eq x 0)))
914
915 (defun sgml-merge-eltypes (eltypes1 eltypes2)
916 "Return the merge of two element type tables ELTYPES1 and ELTYPES2.
917 This may change ELTYPES1, ELTYPES2 is unchanged. Returns the new table."
918 (if (sgml-eltype-table-empty eltypes1)
919 eltypes2
920 (progn
921 (mapatoms
922 (function (lambda (sym)
923 (let ((et (intern (symbol-name sym) eltypes1)))
924 (unless (fboundp et) ; not yet defined by <!element
925 (when (fboundp sym)
926 (fset et (symbol-function sym)))
927 (when (boundp sym)
928 (set et (symbol-value sym))))
929 (setf (symbol-plist et)
930 (nconc (symbol-plist et)
931 (copy-list (symbol-plist sym)))))))
932 eltypes2)
933 eltypes1)))
934
935 (defun sgml-lookup-eltype (name &optional dtd)
936 "Lookup the element defintion for NAME (string)."
937 (intern name (sgml-dtd-eltypes (or dtd sgml-dtd-info))))
938
939 (defun sgml-eltype-completion-table (eltypes)
940 "Make a completion table from a list, ELTYPES, of element types."
941 (loop for et in eltypes as name = (sgml-eltype-name et)
942 if (boundp et)
943 collect (cons name name)))
944
945 (defun sgml-read-element-type (prompt dtd &optional default)
946 "Read an element type name.
947 PROMPT is displayed as a prompt and DTD should be the dtd to get the
948 element types from. Optional argument DEFAULT (string) will be used as
949 a default for the element type name."
950 (let ((name
951 (completing-read prompt
952 (sgml-dtd-eltypes dtd)
953 (function fboundp)
954 t
955 nil
956 nil)))
957 (when (equal name "")
958 (setq name (or default (error "Aborted"))))
959 (sgml-lookup-eltype name dtd)))
960
961 (defun sgml-map-eltypes (fn dtd &optional collect all)
962 (let ((*res* nil))
963 (mapatoms
964 (cond ((and collect all)
965 (function (lambda (a) (push (funcall fn a) *res*))))
966 (collect
967 (function (lambda (a) (when (boundp a)
968 (push (funcall fn a) *res*)))))
969 (all
970 (function (lambda (a) (funcall fn a))))
971 (t
972 (function (lambda (a) (when (boundp a) (funcall fn a))))))
973 (sgml-dtd-eltypes dtd))
974 (nreverse *res*)))
975
976 ;;;; Load a saved dtd
977
978 (defmacro sgml-char-int (ch)
979 (if (fboundp 'char-int)
980 (` (char-int (, ch)))
981 ch))
982
983 (defsubst sgml-read-octet ()
984 (prog1 (sgml-char-int (following-char))
985 (forward-char)))
986
987 (defsubst sgml-read-number ()
988 "Read a number.
989 A number is 1: an octet [0--sgml-max-singel-octet-number]
990 or 2: two octets (n,m) interpreted as (n-t-1)*256+m+t."
991 (if (> (following-char) sgml-max-single-octet-number)
992 (+ (* (- (following-char) (eval-when-compile
993 (1+ sgml-max-single-octet-number)))
994 256)
995 (prog1 (char-after (1+ (point)))
996 (forward-char 2))
997 sgml-max-single-octet-number)
998 (sgml-read-octet)))
999
1000 (defsubst sgml-read-peek ()
1001 (following-char))
1002
1003 (defun sgml-read-sexp ()
1004 (prog1
1005 (let ((standard-input (current-buffer)))
1006 (read))
1007 (skip-chars-forward " \t")
1008 (forward-char 1)))
1009
1010 (defsubst sgml-read-token ()
1011 (aref sgml-read-token-vector (sgml-read-number)))
1012
1013 (defsubst sgml-read-node-ref ()
1014 (aref sgml-read-nodes (sgml-read-octet)))
1015
1016 (defun sgml-read-model-seq ()
1017 (loop repeat (sgml-read-number) collect (sgml-read-model)))
1018
1019 (defun sgml-read-token-seq ()
1020 (loop repeat (sgml-read-number) collect (sgml-read-token)))
1021
1022 (defun sgml-read-moves ()
1023 (loop repeat (sgml-read-number)
1024 collect (sgml-make-move (sgml-read-token) (sgml-read-node-ref))))
1025
1026 (defun sgml-read-model ()
1027 (let* ((n (sgml-read-number))
1028 (sgml-read-nodes (make-vector n nil)))
1029 (loop for i below n do (aset sgml-read-nodes i (sgml-make-state)))
1030 (loop for e across sgml-read-nodes do
1031 (cond ((eq 255 (sgml-read-peek)) ; a &node
1032 (sgml-read-octet) ; skip
1033 (setf (sgml-&node-next e) (sgml-read-node-ref))
1034 (setf (sgml-&node-dfas e) (sgml-read-model-seq)))
1035 (t ; a normal-state
1036 (setf (sgml-state-opts e) (sgml-read-moves))
1037 (setf (sgml-state-reqs e) (sgml-read-moves)))))
1038 (aref sgml-read-nodes 0)))
1039
1040 (defun sgml-read-content ()
1041 (let ((c (sgml-read-octet)))
1042 (cond ((eq c 0) sgml-cdata)
1043 ((eq c 1) sgml-rcdata)
1044 ((eq c 2) sgml-empty)
1045 ((eq c 3) sgml-any)
1046 ((eq c 4) nil)
1047 ((eq c 128)
1048 (sgml-read-model)))))
1049
1050 (defun sgml-read-decode-flag (flag mask)
1051 (not (zerop (logand flag mask))))
1052
1053 (defun sgml-read-element (et)
1054 (sgml-eltype-set-all-miscdata et (sgml-read-sexp))
1055 (let ((flags (sgml-read-octet)))
1056 (unless (= flags 128)
1057 (setf (sgml-eltype-flags et) flags
1058 (sgml-eltype-model et) (sgml-read-content)
1059 (sgml-eltype-includes et) (sgml-read-token-seq)
1060 (sgml-eltype-excludes et) (sgml-read-token-seq)))))
1061
1062 (defun sgml-read-dtd ()
1063 "Decode the saved DTD in current buffer, return the DTD."
1064 (let ((gc-cons-threshold (max gc-cons-threshold 500000))
1065 temp dtd)
1066 (setq temp (sgml-read-sexp)) ; file-version
1067 (cond
1068 ((equal temp '(sgml-saved-dtd-version 5))
1069 ;; Doctype -- create dtd structure
1070 (setq dtd (sgml-make-dtd (sgml-read-sexp)))
1071 ;; Element type names -- read and create token vector
1072 (setq temp (sgml-read-number)) ; # eltypes
1073 (setq sgml-read-token-vector (make-vector (1+ temp) nil))
1074 (aset sgml-read-token-vector 0 sgml-pcdata-token)
1075 (loop for i from 1 to temp do
1076 (aset sgml-read-token-vector i
1077 (sgml-lookup-eltype (sgml-read-sexp) dtd)))
1078 ;; Element type descriptions
1079 (loop for i from 1 to (sgml-read-number) do
1080 (sgml-read-element (aref sgml-read-token-vector i)))
1081 (setf (sgml-dtd-parameters dtd) (sgml-read-sexp))
1082 (setf (sgml-dtd-entities dtd) (sgml-read-sexp))
1083 (setf (sgml-dtd-shortmaps dtd) (sgml-read-sexp))
1084 (setf (sgml-dtd-notations dtd) (sgml-read-sexp))
1085 (setf (sgml-dtd-dependencies dtd) (sgml-read-sexp)))
1086 ;; New version
1087 ((equal temp '(sgml-saved-dtd-version 6))
1088 (setq dtd (sgml-bdtd-read-dtd)))
1089 ;; Something else
1090 (t
1091 (error "Unknown file format for saved DTD: %s" temp)))
1092 dtd))
1093
1094 (defun sgml-load-dtd (file)
1095 "Load a saved DTD from FILE."
1096 (interactive
1097 (let ((tem (expand-file-name
1098 (or sgml-default-dtd-file
1099 (sgml-default-dtd-file)))))
1100 (list (read-file-name "Load DTD from: "
1101 (file-name-directory tem)
1102 tem
1103 t
1104 (file-name-nondirectory tem)))))
1105 (setq sgml-loaded-dtd nil) ; Allow reloading of DTD
1106 ;; Search for 'file' on the sgml-system-path [ndw]
1107 (let ((real-file (car (mapcan (function
1108 (lambda (dir)
1109 (let ((f (expand-file-name file dir)))
1110 (if (file-exists-p f)
1111 (list f)))))
1112 (cons "."
1113 ;; wing change -- add sgml-data-directory
1114 (append sgml-system-path
1115 (list sgml-data-directory)))))))
1116 (or real-file
1117 (error "Saved DTD file %s not found" file))
1118 (let ((cb (current-buffer))
1119 (tem nil)
1120 (dtd nil)
1121 (l (buffer-list))
1122 (find-file-type ; Allways binary
1123 (function (lambda (fname) 1))))
1124 ;; Search loaded buffer for a already loaded DTD
1125 (while (and l (null tem))
1126 (set-buffer (car l))
1127 (if (equal sgml-loaded-dtd real-file)
1128 (setq tem (current-buffer)))
1129 (setq l (cdr l)))
1130 (cond
1131 (tem ; loaded DTD found
1132 (setq dtd (sgml-pstate-dtd sgml-buffer-parse-state)))
1133 (t ; load DTD from file
1134 (set-buffer cb)
1135 (sgml-push-to-entity real-file)
1136 (message "Loading DTD from %s..." real-file)
1137 (setq dtd (sgml-read-dtd))
1138 (message "Loading DTD from %s...done" real-file)
1139 (sgml-pop-entity)))
1140 (set-buffer cb)
1141 (sgml-set-initial-state dtd)
1142 (setq sgml-default-dtd-file file)
1143 (setq sgml-loaded-dtd real-file))))
1144
1145 ;;;; Biniary coded DTD module
1146 ;;; Works on the binary coded compiled DTD (bdtd)
1147
1148 ;;; bdtd-load: cfile dtdfile ents -> bdtd
1149 ;;; bdtd-merge: bdtd dtd -> dtd?
1150 ;;; bdtd-read-dtd: bdtd -> dtd
1151
1152 ;;; Implement by letting bdtd be implicitly the current buffer and
1153 ;;; dtd implicit in sgml-dtd-info.
1154
1155 (defun sgml-bdtd-load (cfile dtdfile ents)
1156 "Load the compiled dtd from CFILE into the current buffer.
1157 If this file does not exists, is of an old version or out of date, a
1158 new compiled dtd will be creted from file DTDFILE and parameter entity
1159 settings in ENTS."
1160 ;;(Assume the current buffer is a scratch buffer and is empty)
1161 (sgml-debug "Trying to load compiled DTD from %s..." cfile)
1162 (or (and (file-readable-p cfile)
1163 (let ((find-file-type ; Allways binary
1164 (function (lambda (fname) 1))))
1165 ;; fifth arg to insert-file-contents is not available in early
1166 ;; v19.
1167 (insert-file-contents cfile nil nil nil))
1168 (equal '(sgml-saved-dtd-version 6) (sgml-read-sexp))
1169 (or (sgml-up-to-date-p cfile (sgml-read-sexp))
1170 (if (eq 'ask sgml-recompile-out-of-date-cdtd)
1171 (not (y-or-n-p
1172 "Compiled DTD is out of date, recompile? "))
1173 (not sgml-recompile-out-of-date-cdtd))))
1174 (sgml-compile-dtd dtdfile cfile ents)))
1175
1176 (defun sgml-up-to-date-p (file dependencies)
1177 "Check if FILE is newer than all files in the list DEPENDENCIES.
1178 If DEPENDENCIES contains the symbol `t', FILE is not considered newer."
1179 (if (memq t dependencies)
1180 nil
1181 (loop for f in dependencies
1182 always (file-newer-than-file-p file f))))
1183
1184 (defun sgml-compile-dtd (dtd-file to-file ents)
1185 "Construct a binary code compiled dtd from DTD-FILE and write it to TO-FILE.
1186 The dtd will be constructed with the parameter entities set according
1187 to ENTS. The bdtd will be left in the current buffer. The current
1188 buffer is assumend to be empty to start with."
1189 (sgml-log-message "Recompiling DTD file %s..." dtd-file)
1190 (let* ((sgml-dtd-info (sgml-make-dtd nil))
1191 (parameters (sgml-dtd-parameters sgml-dtd-info))
1192 (sgml-parsing-dtd t))
1193 (push dtd-file
1194 (sgml-dtd-dependencies sgml-dtd-info))
1195 (loop for (name . val) in ents
1196 do (sgml-entity-declare name parameters 'text val))
1197 (sgml-push-to-entity dtd-file)
1198 (sgml-check-dtd-subset)
1199 (sgml-pop-entity)
1200 (erase-buffer)
1201 (sgml-write-dtd sgml-dtd-info to-file)
1202 t))
1203
1204 (defun sgml-check-entities (params1 params2)
1205 "Check that PARAMS1 is compatible with PARAMS2."
1206 (block check-entities
1207 (sgml-map-entities
1208 (function (lambda (entity)
1209 (let ((other
1210 (sgml-lookup-entity (sgml-entity-name entity)
1211 params2)))
1212 (unless (or (null other)
1213 (equal entity other))
1214 (sgml-log-message
1215 "Parameter %s in complied DTD has wrong value;\
1216 is '%s' should be '%s'"
1217 (sgml-entity-name entity)
1218 (sgml-entity-text other)
1219 (sgml-entity-text entity))
1220 (return-from check-entities nil)))))
1221 params1)
1222 t))
1223
1224 (defun sgml-bdtd-merge ()
1225 "Merge the binary coded dtd in the current buffer with the current dtd.
1226 The current dtd is the variable sgml-dtd-info. Return t if mereged
1227 was successfull or nil if failed."
1228 (goto-char (point-min))
1229 (sgml-read-sexp) ; skip filev
1230 (let ((dependencies (sgml-read-sexp))
1231 (parameters (sgml-read-sexp))
1232 (gc-cons-threshold (max gc-cons-threshold 500000))
1233 temp)
1234 ;; Check comaptibility of parameters
1235 (and (sgml-check-entities (sgml-dtd-parameters sgml-dtd-info)
1236 parameters)
1237 (progn
1238 ;; Do the merger
1239 (sgml-message "Reading compiled DTD...")
1240 (sgml-merge-entity-tables (sgml-dtd-parameters sgml-dtd-info)
1241 parameters)
1242 (setf (sgml-dtd-dependencies sgml-dtd-info)
1243 (nconc (sgml-dtd-dependencies sgml-dtd-info)
1244 dependencies))
1245 ;; Doctype
1246 (setq temp (sgml-read-sexp))
1247 (when (and temp (null (sgml-dtd-doctype sgml-dtd-info)))
1248 (setf (sgml-dtd-doctype sgml-dtd-info) temp))
1249
1250 ;; Element type names -- read and create token vector
1251 (setq temp (sgml-read-number)) ; # eltypes
1252 (setq sgml-read-token-vector (make-vector (1+ temp) nil))
1253 (aset sgml-read-token-vector 0 sgml-pcdata-token)
1254 (loop for i from 1 to temp do
1255 (aset sgml-read-token-vector i
1256 (sgml-lookup-eltype (sgml-read-sexp))))
1257 ;; Element type descriptions
1258 (loop for i from 1 to (sgml-read-number) do
1259 (sgml-read-element (aref sgml-read-token-vector i)))
1260 (sgml-merge-entity-tables (sgml-dtd-entities sgml-dtd-info)
1261 (sgml-read-sexp))
1262 (sgml-merge-shortmaps (sgml-dtd-shortmaps sgml-dtd-info)
1263 (sgml-read-sexp))
1264 (setf (sgml-dtd-notations sgml-dtd-info) (sgml-read-sexp))
1265 t))))
1266
1267 (defun sgml-bdtd-read-dtd ()
1268 "Create and return a dtd from the binary coded dtd in the current buffer."
1269 (let ((sgml-dtd-info (sgml-make-dtd nil)))
1270 (sgml-bdtd-merge)
1271 sgml-dtd-info))
1272
1273 ;;;; Set markup type
1274
1275 (defsubst sgml-set-markup-type (type)
1276 "Set the type of the markup parsed to TYPE.
1277 The markup starts at position given by variable sgml-markup-start and
1278 ends at point."
1279 (when (and sgml-set-face
1280 (null sgml-current-eref))
1281 (sgml-set-face-for sgml-markup-start (point) type))
1282 (setq sgml-markup-type type))
1283
1284
1285 ;;;; Parsing delimiters
1286
1287 (eval-and-compile
1288 (defconst sgml-delimiters
1289 '("AND" "&"
1290 "COM" "--"
1291 "CRO" "&#"
1292 "DSC" "]"
1293 "DSO" "["
1294 "DTGC" "]"
1295 "DTGO" "["
1296 "ERO" "&"
1297 "ETAGO" "</"
1298 "GRPC" ")"
1299 "GRPO" "("
1300 "LIT" "\""
1301 "LITA" "'"
1302 "MDC" ">"
1303 "MDO" "<!"
1304 "MINUS" "-"
1305 "MSC" "]]"
1306 "NET" "/"
1307 "OPT" "?"
1308 "OR" "|"
1309 "PERO" "%"
1310 "PIC" ">"
1311 "PIO" "<?"
1312 "PLUS" "+"
1313 "REFC" ";"
1314 "REP" "*"
1315 "RNI" "#"
1316 "SEQ" ","
1317 "STAGO" "<"
1318 "TAGC" ">"
1319 "VI" "="
1320 ;; Some combinations
1321 "MS-START" "<![" ; MDO DSO
1322 "MS-END" "]]>" ; MSC MDC
1323 ;; Pseudo
1324 "NULL" ""
1325 )))
1326
1327
1328 (defmacro sgml-is-delim (delim &optional context move offset)
1329 "Macro for matching delimiters.
1330 Syntax: DELIM &optional CONTEXT MOVE
1331 where DELIM is the delimiter name (string or symbol),
1332 CONTEXT the contextual constraint, and
1333 MOVE is `nil', `move' or `check'.
1334
1335 Test if the text following point in current buffer matches the SGML
1336 delimiter DELIM. Also check the characters after the delimiter for
1337 CONTEXT. Applicable values for CONTEXT is
1338 `gi' -- name start or TAGC if SHORTTAG YES,
1339 `com' -- if COM or MDC,
1340 `nmstart' -- name start character,
1341 `stagc' -- TAGC if SHORTTAG YES,
1342 `digit' -- any Digit,
1343 string -- delimiter with that name,
1344 list -- any of the contextual constraints in the list."
1345
1346 (or offset (setq offset 0))
1347 (let ((ds (member (upcase (format "%s" delim))
1348 sgml-delimiters)))
1349 (assert ds)
1350 (setq delim (car ds)
1351 ds (cadr ds))
1352 (cond ((eq context 'gi)
1353 (setq context '(nmstart stagc)))
1354 ((eq context 'com)
1355 (setq context '("COM" "MDC")))
1356 ((null context)
1357 (setq context '(t)))
1358 ((not (listp context))
1359 (setq context (list context))))
1360 (`(if (and ; This and checks that characters
1361 ; of the delimiter
1362 (,@(loop for i from 0 below (length ds) collect
1363 (` (eq (, (aref ds i))
1364 (sgml-following-char (, (+ i offset)))))))
1365 (or
1366 (,@(loop
1367 for c in context collect ; context check
1368 (cond
1369 ((eq c 'nmstart) ; name start character
1370 (`(sgml-startnm-char
1371 (or (sgml-following-char (, (length ds))) 0))))
1372 ((eq c 'stagc)
1373 (`(and sgml-current-shorttag
1374 (sgml-is-delim "TAGC" nil nil (, (length ds))))))
1375 ((eq c 'digit)
1376 (`(memq (sgml-following-char (, (length ds)))
1377 '(?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9))))
1378 ((stringp c)
1379 (`(sgml-is-delim (, c) nil nil (, (length ds)))))
1380 ((eq c t))
1381 (t (error "Context type: %s" c))))
1382 )))
1383
1384 (progn ; Do operations if delimiter found
1385 (,@ (if move (`((forward-char (, (length ds)))))))
1386 (,@ (if (not (eq move 'check))
1387 '(t))))
1388 (,@ (if (eq move 'check)
1389 (`((sgml-delimiter-parse-error (, delim))))))))))
1390
1391 (defmacro sgml-following-char (n)
1392 (cond ((zerop n) '(following-char))
1393 ((= n 1) '(char-after (1+ (point))))
1394 (t (` (char-after (+ (, n) (point)))))))
1395
1396 (defun sgml-delimiter-parse-error (delim)
1397 (sgml-parse-error "Delimiter %s (%s) expected"
1398 delim (cadr (member delim sgml-delimiters))))
1399
1400 (defmacro sgml-parse-delim (delim &optional context)
1401 (`(sgml-is-delim (, delim) (, context) move)))
1402
1403 (defmacro sgml-check-delim (delim &optional context)
1404 (`(sgml-is-delim (, delim) (, context) check)))
1405
1406 (defmacro sgml-skip-upto (delim)
1407 "Skip until the delimiter or first char of one of the delimiters.
1408 If DELIM is a string/symbol this is should be a delimiter role.
1409 Characters are skipped until the delimiter is recognized.
1410 If DELIM is a list of delimiters, skip until a character that is first
1411 in any of them."
1412 (cond
1413 ((consp delim)
1414 (list 'skip-chars-forward
1415 (concat "^"
1416 (loop for d in delim
1417 concat (let ((ds (member (upcase (format "%s" d))
1418 sgml-delimiters)))
1419 (assert ds)
1420 (let ((s (substring (cadr ds) 0 1)))
1421 (if (member s '("-" "\\"))
1422 (concat "\\" s)
1423 s)))))))
1424 (t
1425 (let ((ds (member (upcase (format "%s" delim))
1426 sgml-delimiters)))
1427 (assert ds)
1428 (setq ds (cadr ds))
1429 (if (= 1 (length ds))
1430 (list 'skip-chars-forward (concat "^" ds))
1431 (`(and (search-forward (, ds) nil t)
1432 (backward-char (, (length ds))))))))))
1433
1434
1435 ;;(macroexpand '(sgml-is-delim mdo))
1436 ;;(macroexpand '(sgml-parse-delim mdo))
1437 ;;(macroexpand '(sgml-check-delim mdo))
1438
1439
1440 ;;;; General lexical functions
1441 ;;; Naming conventions
1442 ;;; sgml-parse-xx try to parse xx, return nil if can't else return
1443 ;;; some propriate non-nil value.
1444 ;;; Except: for name/nametoken parsing, return 0 if can't.
1445 ;;; sgml-check-xx require xx, report error if can't parse. Return
1446 ;;; aproporiate value.
1447
1448 (defmacro sgml-parse-char (char)
1449 (` (cond ((eq (, char) (following-char))
1450 (forward-char 1)
1451 t))))
1452
1453 (defmacro sgml-parse-chars (char1 char2 &optional char3)
1454 "Parse two or three chars; return nil if can't"
1455 (if (null char3)
1456 (` (cond ((and (eq (, char1) (following-char))
1457 (eq (, char2) (char-after (1+ (point)))))
1458 (forward-char 2)
1459 t)))
1460 (` (cond ((and (eq (, char1) (following-char))
1461 (eq (, char2) (char-after (1+ (point))))
1462 (eq (, char3) (char-after (1+ (1+ (point))))))
1463 (forward-char 3)
1464 t)))))
1465
1466 (defun sgml-check-char (char)
1467 (cond ((not (sgml-parse-char char))
1468 (sgml-parse-error "Expecting %c" char))))
1469
1470 (defun sgml-parse-RE ()
1471 (or (sgml-parse-char ?\n)
1472 (sgml-parse-char ?\r)))
1473
1474 (defmacro sgml-startnm-char (c)
1475 (` (eq ?w (char-syntax (, c)))))
1476
1477 (defun sgml-startnm-char-next ()
1478 (and (not (eobp))
1479 (sgml-startnm-char (following-char))))
1480
1481 (defun sgml-name-char (c)
1482 (and c
1483 (or (sgml-startnm-char c)
1484 (eq ?_ (char-syntax c)))))
1485
1486 (defun sgml-is-end-tag ()
1487 (sgml-is-delim "ETAGO" gi))
1488
1489 (defsubst sgml-is-enabled-net ()
1490 (and (sgml-is-delim "NET")
1491 sgml-current-shorttag
1492 (sgml-tree-net-enabled sgml-current-tree)))
1493
1494 (defun sgml-is-start-tag ()
1495 (sgml-is-delim "STAGO" gi))
1496
1497 (defsubst sgml-parse-s (&optional shortmap)
1498 (if shortmap
1499 (or (/= 0 (skip-chars-forward " "))
1500 (/= 0 (skip-chars-forward "\t"))
1501 (sgml-parse-char ?\n)
1502 (sgml-parse-char ?\r))
1503 (/= 0 (skip-chars-forward " \t\n\r"))))
1504
1505 (defsubst sgml-parse-processing-instruction ()
1506 (if (sgml-parse-delim "PIO")
1507 (sgml-do-processing-instruction)))
1508
1509 (defun sgml-do-processing-instruction ()
1510 (let ((start (point)))
1511 (sgml-skip-upto "PIC")
1512 (when sgml-pi-function
1513 (funcall sgml-pi-function
1514 (buffer-substring-no-properties start (point)))))
1515 (sgml-check-delim "PIC")
1516 (sgml-set-markup-type 'pi)
1517 t)
1518
1519
1520 (defmacro sgml-general-case (string) (`(downcase (, string))))
1521 (defmacro sgml-entity-case (string) string)
1522
1523 (defun sgml-parse-name (&optional entity-name)
1524 (if (sgml-startnm-char-next)
1525 (let ((name (buffer-substring-no-properties
1526 (point)
1527 (progn (skip-syntax-forward "w_")
1528 (point)))))
1529 (if entity-name
1530 (sgml-entity-case name)
1531 (sgml-general-case name)))))
1532
1533 (define-compiler-macro sgml-parse-name (&whole form &optional entity-name)
1534 (cond
1535 ((memq entity-name '(nil t))
1536 (` (if (sgml-startnm-char-next)
1537 ((, (if entity-name 'sgml-entity-case 'sgml-general-case))
1538 (buffer-substring-no-properties (point)
1539 (progn (skip-syntax-forward "w_")
1540 (point)))))))
1541 (t
1542 form)))
1543
1544 (defun sgml-check-name (&optional entity-name)
1545 (or (sgml-parse-name entity-name)
1546 (sgml-parse-error "Name expected")))
1547
1548 (define-compiler-macro sgml-check-name (&optional entity-name)
1549 (`(or (, (if entity-name
1550 (`(sgml-parse-name (, entity-name)))
1551 '(sgml-parse-name)))
1552 (sgml-parse-error "Name expected"))))
1553
1554
1555 (defun sgml-parse-nametoken (&optional entity-name)
1556 "Parses a name token and returns a string or nil if no nametoken."
1557 (if (sgml-name-char (following-char))
1558 (let ((name (buffer-substring-no-properties
1559 (point)
1560 (progn (skip-syntax-forward "w_")
1561 (point)))))
1562 (if entity-name
1563 (sgml-entity-case name)
1564 (sgml-general-case name)))))
1565
1566 (defun sgml-check-nametoken ()
1567 (or (sgml-parse-nametoken)
1568 (sgml-parse-error "Name token expected")))
1569
1570 ;;(defun sgml-gname-symbol (string)
1571 ;; "Convert a string to a general name/nametoken/numbertoken."
1572 ;; (intern (sgml-general-case string)))
1573
1574 ;;(defun sgml-ename-symbol (string)
1575 ;; "Convert a string to an entity name."
1576 ;; (intern (sgml-entity-case string)))
1577
1578 (defsubst sgml-parse-general-entity-ref ()
1579 (if (sgml-parse-delim "ERO" nmstart)
1580 (sgml-do-general-entity-ref)))
1581
1582 (defun sgml-do-general-entity-ref ()
1583 (sgml-do-entity-ref
1584 (prog1 (sgml-parse-name t)
1585 (or (sgml-parse-delim "REFC")
1586 (sgml-parse-RE))
1587 (sgml-set-markup-type 'entity)))
1588 t)
1589
1590 (defun sgml-do-entity-ref (name)
1591 (let ((entity
1592 (sgml-lookup-entity name
1593 (sgml-dtd-entities sgml-dtd-info))))
1594 (cond ((and (null entity)
1595 sgml-warn-about-undefined-entities)
1596 (sgml-log-warning
1597 "Undefined entity %s" name))
1598 ((sgml-entity-data-p entity)
1599 (when sgml-signal-data-function
1600 (funcall sgml-signal-data-function))
1601 (cond
1602 (sgml-entity-function
1603 (funcall sgml-entity-function entity))
1604 (sgml-data-function
1605 (sgml-push-to-entity entity sgml-markup-start)
1606 (funcall sgml-data-function (buffer-string))
1607 (sgml-pop-entity))))
1608 (t
1609 (sgml-push-to-entity entity sgml-markup-start)))))
1610
1611 (defsubst sgml-parse-parameter-entity-ref ()
1612 "Parse and push to a parameter entity, return nil if no ref here."
1613 ;;(setq sgml-markup-start (point))
1614 (if (sgml-parse-delim "PERO" nmstart)
1615 (sgml-do-parameter-entity-ref)))
1616
1617 (defun sgml-do-parameter-entity-ref ()
1618 (let* ((name (sgml-parse-name t))
1619 (ent (sgml-lookup-entity name
1620 (sgml-dtd-parameters sgml-dtd-info))))
1621 (or (sgml-parse-delim "REFC")
1622 (sgml-parse-char ?\n))
1623 ;;(sgml-set-markup-type 'param)
1624 (cond (ent
1625 (sgml-push-to-entity ent sgml-markup-start 'param))
1626 (t
1627 (sgml-log-warning
1628 "Undefined parameter entity %s" name)))
1629 t))
1630
1631 (defun sgml-parse-comment ()
1632 (if (sgml-parse-delim "COM")
1633 (progn (sgml-skip-upto "COM")
1634 (sgml-check-delim "COM")
1635 t)))
1636
1637 (defun sgml-skip-cs ()
1638 "Skip over the separator used in the catalog.
1639 Return true if not at the end of the buffer."
1640 (while (or (sgml-parse-s)
1641 (sgml-parse-comment)))
1642 (not (eobp)))
1643
1644 (defsubst sgml-skip-ps ()
1645 "Move point forward stopping before a char that isn't a parameter separator."
1646 (while
1647 (or (sgml-parse-s)
1648 (if (eobp) (sgml-pop-entity))
1649 (sgml-parse-parameter-entity-ref)
1650 (sgml-parse-comment))))
1651
1652 (defsubst sgml-parse-ds ()
1653 ;71 ds = 5 s | EE | 60+ parameter entity reference
1654 ; | 91 comment declaration
1655 ; | 44 processing instruction
1656 ; | 93 marked section declaration ***
1657 (or (and (eobp) (sgml-pop-entity)) ;EE
1658 (sgml-parse-s) ;5 s
1659 ;;(sgml-parse-comment-declaration) ;91 comment declaration
1660 (sgml-parse-parameter-entity-ref)
1661 (sgml-parse-processing-instruction)))
1662
1663 (defun sgml-skip-ds ()
1664 (while (sgml-parse-ds)))
1665
1666 (defmacro sgml-parse-rni (&optional name)
1667 "Parse a RNI (#) return nil if none; with optional NAME,
1668 a RNI must be followed by NAME."
1669 (cond
1670 (name
1671 (` (if (sgml-parse-delim "RNI")
1672 (sgml-check-token (, name)))))
1673 (t '(sgml-parse-delim "RNI"))))
1674
1675 (defun sgml-check-token (name)
1676 (or (equal (sgml-check-name) name)
1677 (sgml-parse-error "Reserved name not expected")))
1678
1679 (defun sgml-parse-literal ()
1680 "Parse a literal and return a string, if no literal return nil."
1681 (let (lita start value)
1682 (cond ((or (sgml-parse-delim "LIT")
1683 (setq lita (sgml-parse-delim "LITA")))
1684 (setq start (point))
1685 (if lita
1686 (sgml-skip-upto "LITA")
1687 (sgml-skip-upto "LIT"))
1688 (setq value (buffer-substring-no-properties start (point)))
1689 (if lita
1690 (sgml-check-delim "LITA")
1691 (sgml-check-delim "LIT"))
1692 value))))
1693
1694 (defun sgml-check-literal ()
1695 (or (sgml-parse-literal)
1696 (sgml-parse-error "A litteral expected")))
1697
1698 (defun sgml-parse-minimum-literal ()
1699 "Parse a quoted SGML string and return it, if no string return nil."
1700 (cond
1701 ((memq (following-char) '(?\" ?\'))
1702 (let* ((qchar (following-char))
1703 (blanks " \t\r\n")
1704 (qskip (format "^%s%c" blanks qchar))
1705 (start (point))
1706 (value ; accumulates the literal value
1707 "")
1708 (spaced ""))
1709 (forward-char 1)
1710 (skip-chars-forward blanks)
1711 (while (not (sgml-parse-char qchar))
1712 (cond ((eobp)
1713 (goto-char start)
1714 (sgml-parse-error "Unterminated literal"))
1715 ((sgml-parse-s)
1716 (setq spaced " "))
1717 (t
1718 (setq value
1719 (concat value spaced
1720 (buffer-substring-no-properties
1721 (point)
1722 (progn (skip-chars-forward qskip)
1723 (point))))
1724 spaced ""))))
1725 value))))
1726
1727 (defun sgml-check-minimum-literal ()
1728 (or (sgml-parse-minimum-literal)
1729 (sgml-parse-error "A minimum literal expected")))
1730
1731 (defun sgml-parse-external ()
1732 "Leaves nil if no external id, or (pubid . sysid)"
1733 (sgml-skip-ps)
1734 (let* ((p (point))
1735 (token (sgml-parse-nametoken)))
1736 (cond
1737 (token
1738 (sgml-skip-ps)
1739 (cond ((member token '("public" "system"))
1740 (let* ((pubid ; the public id
1741 (if (string-equal token "public")
1742 (or (sgml-parse-minimum-literal)
1743 (sgml-parse-error "Public identifier expected"))))
1744 (sysid ; the system id
1745 (progn (sgml-skip-ps)
1746 (sgml-parse-literal))))
1747 (sgml-make-extid pubid sysid)))
1748 (t
1749 (goto-char p)
1750 nil))))))
1751
1752 (defun sgml-skip-tag ()
1753 (when (sgml-parse-char ?<)
1754 (sgml-parse-char ?/)
1755 (unless (search-forward-regexp
1756 "\\([^\"'<>/]\\|\"[^\"]*\"\\|'[^']*'\\)*"
1757 nil t)
1758 (sgml-error "Invalid tag"))
1759 (or (sgml-parse-char ?>)
1760 (sgml-parse-char ?/))))
1761
1762
1763 ;;;; Entity Manager
1764
1765 (defstruct (sgml-entity
1766 (:type list)
1767 (:constructor sgml-make-entity (name type text)))
1768 name ; Name of entity (string)
1769 type ; Type of entity CDATA NDATA PI SDATA
1770 text ; string or external
1771 )
1772
1773 (defun sgml-entity-data-p (entity)
1774 "True if ENTITY is a data entity, that is not a text entity."
1775 (not (eq (sgml-entity-type entity) 'text)))
1776
1777 (defun sgml-entity-marked-undefined-p (entity)
1778 (cdddr entity))
1779
1780 (defun sgml-entity-insert-text (entity &optional ptype)
1781 "Insert the text of ENTITY.
1782 PTYPE can be 'param if this is a parameter entity."
1783 (let ((text (sgml-entity-text entity)))
1784 (cond
1785 ((stringp text)
1786 (insert text))
1787 (t
1788 (unless (sgml-insert-external-entity text
1789 (or ptype
1790 (sgml-entity-type entity))
1791 (sgml-entity-name entity))
1792 ;; Mark entity as not found
1793 (setcdr (cddr entity) t) ;***
1794 )))))
1795
1796 (defun sgml-entity-file (entity &optional ptype)
1797 (sgml-external-file (sgml-entity-text entity)
1798 (or ptype (sgml-entity-type entity))
1799 (sgml-entity-name entity)))
1800
1801 ;;; Entity tables
1802 ;; Represented by a cons-cell whose car is the default entity (or nil)
1803 ;; and whose cdr is as an association list.
1804
1805 (defun sgml-make-entity-table ()
1806 (list nil))
1807
1808 (defun sgml-lookup-entity (name entity-table)
1809 (or (assoc name (cdr entity-table))
1810 (car entity-table)))
1811
1812 (defun sgml-entity-declare (name entity-table type text)
1813 "Declare an entity with name NAME in table ENTITY-TABLE.
1814 TYPE should be the type of the entity (text|cdata|ndata|sdata...).
1815 TEXT is the text of the entity, a string or an external identifier.
1816 If NAME is nil, this defines the default entity."
1817 (cond
1818 (name
1819 (unless (sgml-lookup-entity name entity-table)
1820 (sgml-debug "Declare entity %s %s as %S" name type text)
1821 (nconc entity-table
1822 (list (sgml-make-entity name type text)))))
1823 (t
1824 (unless (car entity-table)
1825 (sgml-debug "Declare default entity %s as %S" type text)
1826 (setcar entity-table (sgml-make-entity name type text))))))
1827
1828 (defun sgml-entity-completion-table (entity-table)
1829 "Make a completion table from the ENTITY-TABLE."
1830 (cdr entity-table))
1831
1832 (defun sgml-map-entities (fn entity-table &optional collect)
1833 (if collect
1834 (mapcar fn (cdr entity-table))
1835 (loop for e in (cdr entity-table) do (funcall fn e))))
1836
1837 (defun sgml-merge-entity-tables (tab1 tab2)
1838 "Merge entity table TAB2 into TAB1. TAB1 is modified."
1839 (nconc tab1 (cdr tab2))
1840 (setcar tab1 (or (car tab1) (car tab2))))
1841
1842
1843 ;;;; External identifyer resolve
1844
1845 (defun sgml-cache-catalog (file cache-var parser-fun)
1846 "Return parsed catalog.
1847 FILE is the file containing the catalog. Maintains a cache of parsed
1848 catalog files in variable CACHE-VAR. The parsing is done by function
1849 PARSER-FUN that should parse the current buffer and return the parsed
1850 repreaentation of the catalog."
1851 (setq file (expand-file-name file))
1852 (and
1853 (file-readable-p file)
1854 (let ((c (assoc file (symbol-value cache-var)))
1855 (modtime (elt (file-attributes file) 5)))
1856 (if (and c (equal (second c) modtime))
1857 (cddr c)
1858 (when c (set cache-var (delq c (symbol-value cache-var))))
1859 (let (new)
1860 (message "Loading %s ..." file)
1861 (sgml-push-to-entity file)
1862 (setq default-directory (file-name-directory file))
1863 (setq new (funcall parser-fun))
1864 (sgml-pop-entity)
1865 (push (cons file (cons modtime new)) (symbol-value cache-var))
1866 (message "Loading %s ... done" file)
1867 new)))))
1868
1869 (defun sgml-catalog-lookup (files pubid type name)
1870 "Look up the public identifier/entity name in catalogs.
1871 FILES is a list of catalogs to use. PUBID is the public identifier
1872 \(if any). TYPE is the entity type and NAME is the entity name."
1873 (cond ((eq type 'param)
1874 (setq name (format "%%%s" name)
1875 type 'entity))
1876 ((eq type 'dtd)
1877 (setq type 'doctype)))
1878
1879 (loop
1880 for f in files thereis
1881 (let ((cat (sgml-cache-catalog f 'sgml-catalog-assoc
1882 (function sgml-parse-catalog-buffer))))
1883 (or
1884 ;; Giv PUBLIC entries priority over ENTITY and DOCTYPE
1885 (if pubid
1886 (loop for (key cname file) in cat
1887 thereis (and (eq 'public key)
1888 (string= pubid cname)
1889 (file-readable-p file)
1890 file)))
1891 (loop for (key cname file) in cat
1892 thereis (and (eq type key)
1893 (or (null cname)
1894 (string= name cname))
1895 (file-readable-p file)
1896 file))))))
1897
1898 (defun sgml-search-catalog (func filter)
1899 (loop
1900 for files in (list sgml-local-catalogs sgml-catalog-files)
1901 thereis
1902 (loop for file in files thereis
1903 (loop for entry in (sgml-cache-catalog
1904 file 'sgml-catalog-assoc
1905 (function sgml-parse-catalog-buffer))
1906 when (or (null filter)
1907 (memq (car entry) filter))
1908 thereis (funcall func entry)))))
1909
1910 (defun sgml-path-lookup (extid type name)
1911 (let* ((pubid (sgml-extid-pubid extid))
1912 (sysid (sgml-extid-sysid extid))
1913 (subst (list '(?% ?%))))
1914 (when pubid
1915 (nconc subst (list (cons ?p (sgml-transliterate-file pubid)))
1916 (sgml-pubid-parts pubid))
1917 (setq pubid (sgml-canonize-pubid pubid)))
1918 (when sysid (nconc subst (list (cons ?s sysid))))
1919 (when name (nconc subst (list (cons ?n name))))
1920 (when type (nconc subst (list (cons ?y (cond ((eq type 'dtd) "dtd")
1921 ((eq type 'text) "text")
1922 ((eq type 'param) "parm")
1923 (t "sgml"))))))
1924 (sgml-debug "Ext. file subst. = %S" subst)
1925 (loop for cand in sgml-public-map
1926 thereis
1927 (and (setq cand (sgml-subst-expand cand subst))
1928 (file-readable-p
1929 (setq cand
1930 (sgml-extid-expand (substitute-in-file-name cand)
1931 extid)))
1932 (not (file-directory-p cand))
1933 cand))))
1934
1935 (defun sgml-external-file (extid &optional type name)
1936 "Return file name for entity with external identifier EXTID.
1937 Optional argument TYPE should be the type of entity and NAME should be
1938 the entity name."
1939 ;; extid is (pubid . sysid)
1940 (let ((pubid (sgml-extid-pubid extid)))
1941 (when pubid (setq pubid (sgml-canonize-pubid pubid)))
1942 (or (if sgml-system-identifiers-are-preferred
1943 (sgml-lookup-sysid-as-file extid))
1944 (sgml-catalog-lookup sgml-current-localcat pubid type name)
1945 (sgml-catalog-lookup sgml-catalog-files pubid type name)
1946 (if (not sgml-system-identifiers-are-preferred)
1947 (sgml-lookup-sysid-as-file extid))
1948 (sgml-path-lookup extid type name))))
1949
1950 (defun sgml-lookup-sysid-as-file (extid)
1951 (let ((sysid (sgml-extid-sysid extid)))
1952 (and sysid
1953 (loop for pat in sgml-public-map
1954 never (string-match "%[Ss]" pat))
1955 (file-readable-p (setq sysid (sgml-extid-expand sysid extid)))
1956 sysid)))
1957
1958 (defun sgml-insert-external-entity (extid &optional type name)
1959 "Insert the contents of an external entity at point.
1960 EXTID is the external identifier of the entity. Optional arguments TYPE
1961 is the entity type and NAME is the entity name, used to find the entity.
1962 Returns nil if entity is not found."
1963 (let* ((pubid (sgml-extid-pubid extid))
1964 (sysid (sgml-extid-sysid extid)))
1965 (or (if sysid
1966 (loop for fn in sgml-sysid-resolve-functions
1967 thereis (funcall fn sysid)))
1968 (let ((file (sgml-external-file extid type name)))
1969 (and file (insert-file-contents file)))
1970 (progn
1971 (sgml-log-warning "External entity %s not found" name)
1972 (when pubid
1973 (sgml-log-warning " Public identifier %s" pubid))
1974 (when sysid
1975 (sgml-log-warning " System identfier %s" sysid))
1976 nil))))
1977
1978
1979 ;; Parse a buffer full of catalogue entries.
1980 (defun sgml-parse-catalog-buffer ()
1981 "Parse all entries in a catalogue."
1982 (loop
1983 while (sgml-skip-cs)
1984 for type = (downcase (sgml-check-cat-literal))
1985 for class = (cdr (assoc type '(("public" . public) ("dtddecl" . public)
1986 ("entity" . name) ("linktype" . name)
1987 ("doctype" . name) ("sgmldecl" . noname)
1988 ("document" . noname))))
1989 when (not (null class))
1990 collect
1991 (let* ((name
1992 (cond ((eq class 'public)
1993 (sgml-skip-cs)
1994 (sgml-canonize-pubid (sgml-check-minimum-literal)))
1995 ((string= type "doctype")
1996 (sgml-general-case (sgml-check-cat-literal)))
1997 ((eq class 'name)
1998 (sgml-entity-case (sgml-check-cat-literal)))))
1999 (file
2000 (expand-file-name (sgml-check-cat-literal))))
2001 (list (intern type) name file))))
2002
2003
2004 (defun sgml-check-cat-literal ()
2005 "Read the next catalog token.
2006 Skips any leading spaces/comments."
2007 (sgml-skip-cs)
2008 (or (sgml-parse-literal)
2009 (buffer-substring-no-properties
2010 (point)
2011 (progn (skip-chars-forward "^ \r\n\t")
2012 (point)))))
2013
2014 (defconst sgml-formal-pubid-regexp
2015 (concat
2016 "^\\(+//\\|-//\\|\\)" ; Registered indicator [1]
2017 "\\(\\([^/]\\|/[^/]\\)+\\)" ; Owner [2]
2018 "//"
2019 "\\([^ ]+\\)" ; Text class [4]
2020 " "
2021 "\\(\\([^/]\\|/[^/]\\)*\\)" ; Text description [5]
2022 "//"
2023 "\\(\\([^/]\\|/[^/]\\)*\\)" ; Language [7]
2024 "\\(//" ; [9]
2025 "\\(\\([^/]\\|/[^/]\\)*\\)" ; Version [10]
2026 "\\)?"))
2027
2028 (defun sgml-pubid-parts (pubid)
2029 (nconc
2030 (if (string-match sgml-formal-pubid-regexp pubid)
2031 (nconc
2032 (list
2033 (cons ?o (sgml-transliterate-file (sgml-matched-string pubid 2)))
2034 (cons ?c (downcase (sgml-matched-string pubid 4)))
2035 (cons ?d (sgml-transliterate-file (sgml-matched-string pubid 5)))
2036 ;; t alias for d (%T used by sgmls)
2037 (cons ?t (sgml-transliterate-file (sgml-matched-string pubid 5)))
2038 (cons ?l (downcase (sgml-matched-string pubid 7))))
2039 (if (match-beginning 9)
2040 (list (cons ?v (sgml-transliterate-file
2041 (sgml-matched-string pubid 10)))))))))
2042
2043 (defun sgml-canonize-pubid (pubid)
2044 (if (string-match sgml-formal-pubid-regexp pubid)
2045 (concat
2046 (sgml-matched-string pubid 1) ; registered indicator
2047 (sgml-matched-string pubid 2) ; Owner
2048 "//"
2049 (upcase (sgml-matched-string pubid 4)) ; class
2050 " "
2051 (sgml-matched-string pubid 5) ; Text description
2052 "//"
2053 (upcase (sgml-matched-string pubid 7)) ; Language
2054 "//"
2055 (if (match-beginning 9)
2056 (sgml-matched-string pubid 10) ""))))
2057
2058 (defun sgml-transliterate-file (string)
2059 (mapconcat (function (lambda (c)
2060 (char-to-string
2061 (or (cdr-safe (assq c sgml-public-transliterations))
2062 c))))
2063 string ""))
2064
2065 (defun sgml-subst-expand-char (c parts)
2066 (cdr-safe (assq (downcase c) parts)))
2067
2068 (defun sgml-subst-expand (s parts)
2069 (loop for i from 0 to (1- (length s))
2070 as c = (aref s i)
2071 concat (if (eq c ?%)
2072 (or (sgml-subst-expand-char (aref s (incf i)) parts)
2073 (return nil))
2074 (char-to-string (aref s i)))))
2075
2076 (defun sgml-matched-string (string n &optional regexp noerror)
2077 (let ((res (if regexp
2078 (or (string-match regexp string)
2079 noerror
2080 (error "String match fail")))))
2081 (if (or (null regexp)
2082 (numberp res))
2083 (substring string (match-beginning n)
2084 (match-end n)))))
2085
2086 ;;;; Files for SGML declaration and DOCTYPE declaration
2087
2088 (defun sgml-declaration ()
2089 (or sgml-declaration
2090 (if sgml-doctype
2091 (sgml-in-file-eval sgml-doctype
2092 '(sgml-declaration)))
2093 (if sgml-parent-document
2094 (sgml-in-file-eval (car sgml-parent-document)
2095 '(sgml-declaration)))
2096 ;; *** check for sgmldecl comment
2097 (sgml-external-file nil 'sgmldecl)
2098 )
2099 )
2100
2101 (defun sgml-in-file-eval (file expr)
2102 (let ((cb (current-buffer)))
2103 (set-buffer (find-file-noselect file))
2104 (prog1 (eval expr)
2105 (set-buffer cb))))
2106
2107
2108 ;;;; Entity references and positions
2109
2110 (defstruct (sgml-eref
2111 (:constructor sgml-make-eref (entity start end))
2112 (:type list))
2113 entity
2114 start ; type: epos
2115 end)
2116
2117 (defun sgml-make-epos (eref pos)
2118 (cons eref pos))
2119
2120 (defun sgml-epos-eref (epos)
2121 (if (consp epos)
2122 (car epos)))
2123
2124 (defun sgml-epos-pos (epos)
2125 (if (consp epos)
2126 (cdr epos)
2127 epos))
2128
2129 (defun sgml-bpos-p (epos)
2130 (numberp epos))
2131
2132 (defun sgml-strict-epos-p (epos)
2133 (consp epos))
2134
2135 (defun sgml-epos (pos)
2136 "Convert a buffer position POS into an epos."
2137 (if sgml-current-eref
2138 (sgml-make-epos sgml-current-eref pos)
2139 pos))
2140
2141 (defun sgml-epos-erliest (epos)
2142 (while (consp epos)
2143 (setq epos (sgml-eref-start (sgml-epos-eref epos))))
2144 epos)
2145
2146 (defun sgml-epos-latest (epos)
2147 (while (consp epos)
2148 (setq epos (sgml-eref-end (sgml-epos-eref epos))))
2149 epos)
2150
2151 (defun sgml-epos-promote (epos)
2152 (while (and (consp epos)
2153 (= (cdr epos) 1))
2154 (setq epos (sgml-eref-start (car epos))))
2155 (sgml-epos-latest epos))
2156
2157
2158 ;;;; DTD repository
2159 ;;compiled-dtd: extid -> Compiled-DTD?
2160 ;;extid-cdtd-name: extid -> file?
2161 ;;up-to-date-p: (file, dependencies) -> cond
2162
2163 ;; Emacs Catalogues:
2164 ;; Syntax:
2165 ;; ecat ::= (cs | ecat-entry)*
2166 ;; cs ::= (s | comment)
2167 ;; ecat-entry ::= (pub-entry | file-entry)
2168 ;; pub-entry ::= ("PUBLIC", minimal literal, ent-spec?, cat literal)
2169 ;; pub-entry ::= ("FILE", literal, ent-spec?, cat literal)
2170 ;; ent-spec ::= ("[", (name, literal)*, "]")
2171
2172 ;; Parsed ecat = (eent*)
2173 ;; eent = (type ...)
2174 ;; = ('public pubid cfile . ents)
2175 ;; = ('file file cfile . ents)
2176
2177 (defun sgml-load-ecat (file)
2178 "Return ecat for FILE."
2179 (sgml-cache-catalog
2180 file 'sgml-ecat-assoc
2181 (function
2182 (lambda ()
2183 (let (new type ents from to name val)
2184 (while (progn (sgml-skip-cs)
2185 (setq type (sgml-parse-name)))
2186 (setq type (intern (downcase type)))
2187 (setq ents nil from nil)
2188 (sgml-skip-cs)
2189 (cond
2190 ((eq type 'public)
2191 (setq from (sgml-canonize-pubid (sgml-check-minimum-literal))))
2192 ((eq type 'file)
2193 (setq from (expand-file-name (sgml-check-cat-literal)))))
2194 (cond
2195 ((null from)
2196 (error "Syntax error in ECAT: %s" file))
2197 (t
2198 (sgml-skip-cs)
2199 (when (sgml-parse-char ?\[)
2200 (while (progn (sgml-skip-cs)
2201 (setq name (sgml-parse-name t)))
2202 (sgml-skip-cs)
2203 (setq val (sgml-check-literal))
2204 (push (cons name val) ents))
2205 (sgml-check-char ?\])
2206 (sgml-skip-cs))
2207 (setq to (expand-file-name (sgml-check-cat-literal)))
2208 (push (cons type (cons from (cons to ents)))
2209 new))))
2210 (nreverse new))))))
2211
2212 (defun sgml-ecat-lookup (files pubid file)
2213 "Return (file . ents) or nil."
2214 (let ((params (sgml-dtd-parameters sgml-dtd-info)))
2215 (loop
2216 for f in files
2217 do (sgml-debug "Search ECAT %s" f)
2218 thereis
2219 (loop
2220 for (type name cfile . ents) in (sgml-load-ecat f)
2221 thereis
2222 (if (and (cond ((eq type 'public) (equal name pubid))
2223 ((eq type 'file) (equal name file)))
2224 (loop for (name . val) in ents
2225 for entity = (sgml-lookup-entity name params)
2226 always (and entity
2227 (equal val (sgml-entity-text entity)))))
2228 (cons cfile ents))))))
2229
2230 ;;(let ((sgml-dtd-info (sgml-make-dtd nil)))
2231 ;; (sgml-ecat-lookup sgml-ecat-files
2232 ;; "-//lenst//DTD My DTD//EN//"
2233 ;; "/home/u5/lenst/src/psgml/bar.dtd"))
2234
2235
2236 ;;;; Merge compiled dtd
2237
2238 (defun sgml-try-merge-compiled-dtd (pubid file)
2239 (when pubid (setq pubid (sgml-canonize-pubid pubid)))
2240 (when file (setq file (expand-file-name file)))
2241 (sgml-debug "Find compiled dtd for %s %s" pubid file)
2242 (let ((ce (or (sgml-ecat-lookup sgml-current-local-ecat pubid file)
2243 (sgml-ecat-lookup sgml-ecat-files pubid file)))
2244 cfile dtd ents)
2245 (and ce
2246 (let ((cfile (car ce))
2247 (ents (cdr ce)))
2248 (sgml-debug "Found %s" cfile)
2249 (if (sgml-use-special-case)
2250 (sgml-try-merge-special-case pubid file cfile ents)
2251 (and (sgml-bdtd-load cfile file ents)
2252 (sgml-bdtd-merge)))))))
2253
2254 (defun sgml-use-special-case ()
2255 (and (null (sgml-dtd-merged sgml-dtd-info))
2256 (sgml-eltype-table-empty (sgml-dtd-eltypes sgml-dtd-info))
2257 (eq 'dtd (sgml-entity-type (sgml-eref-entity sgml-current-eref)))))
2258
2259 (defun sgml-try-merge-special-case (pubid file cfile ents)
2260 (let (cdtd)
2261 (sgml-debug "Merging special case")
2262 ;; Look for a compiled dtd in som other buffer
2263 (let ((cb (current-buffer)))
2264 (loop for b in (buffer-list)
2265 until
2266 (progn (set-buffer b)
2267 (and sgml-buffer-parse-state
2268 (let ((m (sgml-dtd-merged
2269 (sgml-pstate-dtd sgml-buffer-parse-state))))
2270 (and m
2271 (string-equal cfile (car m))
2272 (setq cdtd (cdr m)))))))
2273 (set-buffer cb))
2274 ;; Load a new compiled dtd
2275 (unless cdtd
2276 (and (sgml-bdtd-load cfile file ents)
2277 (setq cdtd (sgml-bdtd-read-dtd))))
2278 ;; Do the merger
2279 (cond
2280 ((and cdtd
2281 (sgml-check-entities (sgml-dtd-parameters sgml-dtd-info)
2282 (sgml-dtd-parameters cdtd)))
2283 (setf (sgml-dtd-eltypes sgml-dtd-info)
2284 (sgml-dtd-eltypes cdtd))
2285 (sgml-merge-entity-tables (sgml-dtd-entities sgml-dtd-info)
2286 (sgml-dtd-entities cdtd))
2287 (sgml-merge-entity-tables (sgml-dtd-parameters sgml-dtd-info)
2288 (sgml-dtd-parameters cdtd))
2289 (sgml-merge-shortmaps (sgml-dtd-shortmaps sgml-dtd-info)
2290 (sgml-dtd-shortmaps cdtd))
2291 (setf (sgml-dtd-dependencies sgml-dtd-info)
2292 (nconc (sgml-dtd-dependencies sgml-dtd-info)
2293 (sgml-dtd-dependencies cdtd)))
2294 (setf (sgml-dtd-merged sgml-dtd-info) (cons cfile cdtd))))))
2295
2296
2297 ;;;; Pushing and poping entities
2298
2299 (defun sgml-push-to-entity (entity &optional ref-start type)
2300 "Set current buffer to a buffer containing the entity ENTITY.
2301 ENTITY can also be a file name. Optional argument REF-START should be
2302 the start point of the entity reference. Optional argument TYPE,
2303 overrides the entity type in entity look up."
2304 (when ref-start
2305 (setq sgml-rs-ignore-pos ref-start))
2306 (unless (and sgml-scratch-buffer
2307 (buffer-name sgml-scratch-buffer))
2308 (setq sgml-scratch-buffer (generate-new-buffer " *entity*")))
2309 (let ((cb (current-buffer))
2310 (dd default-directory)
2311 ;;*** should eref be argument ?
2312 (eref (sgml-make-eref (if (stringp entity)
2313 (sgml-make-entity entity nil nil)
2314 entity)
2315 (sgml-epos (or ref-start (point)))
2316 (sgml-epos (point)))))
2317 (set-buffer sgml-scratch-buffer)
2318 (when (eq sgml-scratch-buffer (default-value 'sgml-scratch-buffer))
2319 (make-local-variable 'sgml-scratch-buffer)
2320 (setq sgml-scratch-buffer nil))
2321 (setq sgml-last-entity-buffer (current-buffer))
2322 (erase-buffer)
2323 (setq default-directory dd)
2324 (make-local-variable 'sgml-current-eref)
2325 (setq sgml-current-eref eref)
2326 (set-syntax-table sgml-parser-syntax)
2327 (make-local-variable 'sgml-previous-buffer)
2328 (setq sgml-previous-buffer cb)
2329 (setq sgml-rs-ignore-pos ; don't interpret beginning of buffer
2330 ; as #RS if internal entity.
2331 (if (or (stringp entity)
2332 (stringp (sgml-entity-text entity)))
2333 (point)
2334 0))
2335 (cond
2336 ((stringp entity) ; a file name
2337 (save-excursion (insert-file-contents entity))
2338 (setq default-directory (file-name-directory entity)))
2339 ((and sgml-parsing-dtd
2340 (consp (sgml-entity-text entity))) ; external id?
2341 (let ((file (sgml-entity-file entity type)))
2342 (sgml-debug "Push to %s = %s" (sgml-entity-text entity) file)
2343 (cond
2344 ((and file
2345 (sgml-try-merge-compiled-dtd (car (sgml-entity-text entity))
2346 file))
2347 (goto-char (point-max)))
2348 (file
2349 ;; fifth arg not available in early v19
2350 (erase-buffer)
2351 (insert-file-contents file nil nil nil)
2352 (setq default-directory (file-name-directory file))
2353 (goto-char (point-min))
2354 (push file (sgml-dtd-dependencies sgml-dtd-info)))
2355 (t
2356 (push t (sgml-dtd-dependencies sgml-dtd-info))
2357 (save-excursion (sgml-entity-insert-text entity type))))))
2358 (t
2359 (save-excursion (sgml-entity-insert-text entity type))))))
2360
2361 (defun sgml-pop-entity ()
2362 (cond ((and (boundp 'sgml-previous-buffer)
2363 (bufferp sgml-previous-buffer))
2364 (sgml-debug "Exit entity")
2365 (setq sgml-last-entity-buffer sgml-previous-buffer)
2366 (set-buffer sgml-previous-buffer)
2367 t)))
2368
2369 (defun sgml-goto-epos (epos)
2370 "Goto a position in an entity given by EPOS."
2371 (assert epos)
2372 (cond ((sgml-bpos-p epos)
2373 (goto-char epos))
2374 (t
2375 (let ((eref (sgml-epos-eref epos)))
2376 (sgml-cleanup-entities)
2377 (sgml-goto-epos (sgml-eref-end eref))
2378 (sgml-push-to-entity (sgml-eref-entity eref)
2379 (sgml-epos-pos (sgml-eref-start eref))))
2380 (goto-char (sgml-epos-pos epos)))))
2381
2382 (defun sgml-pop-all-entities ()
2383 (while (sgml-pop-entity)))
2384
2385 (defun sgml-cleanup-entities ()
2386 (let ((cb (current-buffer))
2387 (n 0))
2388 (while (and sgml-scratch-buffer (buffer-name sgml-scratch-buffer))
2389 (set-buffer sgml-scratch-buffer)
2390 (assert (not (eq sgml-scratch-buffer
2391 (default-value 'sgml-scratch-buffer))))
2392 (incf n))
2393 (while (> n 10)
2394 (set-buffer (prog1 sgml-previous-buffer
2395 (kill-buffer (current-buffer))))
2396 (decf n))
2397 (set-buffer cb)))
2398
2399 (defun sgml-any-open-param/file ()
2400 "Return true if there currently is a parameter or file open."
2401 (and (boundp 'sgml-previous-buffer)
2402 sgml-previous-buffer))
2403
2404
2405 ;;;; Parse tree
2406
2407 (defstruct (sgml-tree
2408 (:type vector)
2409 (:constructor sgml-make-tree
2410 (eltype stag-epos stag-len parent level
2411 excludes includes pstate net-enabled
2412 conref &optional shortmap pshortmap asl)))
2413 eltype ; element object
2414 ;;start ; start point in buffer
2415 ;;end ; end point in buffer
2416 stag-epos ; start-tag entity position
2417 etag-epos ; end-tag entity position
2418 stag-len ; length of start-tag
2419 etag-len ; length of end-tag
2420 parent ; parent tree
2421 level ; depth of this node
2422 excludes ; current excluded elements
2423 includes ; current included elements
2424 pstate ; state in parent
2425 next ; next sibling tree
2426 content ; child trees
2427 net-enabled ; if NET enabled (t this element,
2428 ; other non-nil, some parent)
2429 conref ; if conref attribute used
2430 shortmap ; shortmap at start of element
2431 pshortmap ; parents shortmap
2432 asl ; attribute specification list
2433 )
2434
2435
2436 ;;element-end (e):
2437 ;; If bpos-p (etag-epos (e)):
2438 ;; return etag-epos (e) + etag-len (e)
2439 ;; If etag-len (e) = 0: return promote (etag-epos (e))
2440 ;; else: return latest (etag-epos (e))
2441 (defun sgml-tree-end (tree)
2442 "Buffer position after end of TREE"
2443 (let ((epos (sgml-tree-etag-epos tree))
2444 (len (sgml-tree-etag-len tree)))
2445 (cond ((sgml-bpos-p epos)
2446 (+ epos len))
2447 ((zerop len)
2448 (sgml-epos-promote epos))
2449 (t
2450 (sgml-epos-latest epos)))))
2451
2452
2453 ;;;; (text) Element view of parse tree
2454
2455 (defmacro sgml-alias-fields (orig dest &rest fields)
2456 (let ((macs nil))
2457 (while fields
2458 (push
2459 (` (defmacro (, (intern (format "%s-%s" dest (car fields)))) (element)
2460 (, (format "Return %s field of ELEMENT." (car fields)))
2461 (list
2462 '(, (intern (format "%s-%s" orig (car fields))))
2463 element)))
2464 macs)
2465 (setq fields (cdr fields)))
2466 (cons 'progn macs)))
2467
2468 (sgml-alias-fields sgml-tree sgml-element
2469 eltype ; element object
2470 ;; start ; start point in buffer
2471 stag-epos
2472 etag-epos
2473 stag-len ; length of start-tag
2474 etag-len ; length of end-tag
2475 parent ; parent tree
2476 level ; depth of this node
2477 excludes ; current excluded elements
2478 includes ; current included elements
2479 pstate ; state in parent
2480 net-enabled ; if NET enabled
2481 )
2482
2483 (defun sgml-element-model (element)
2484 "Declared content or content model of ELEMENT."
2485 (sgml-eltype-model (sgml-tree-eltype element)))
2486
2487 (defun sgml-element-name (element)
2488 "Return name (symbol) of ELEMENT."
2489 (sgml-tree-eltype element))
2490
2491 (defun sgml-element-gi (element)
2492 "Return general identifier (string) of ELEMENT."
2493 (sgml-eltype-name (sgml-tree-eltype element)))
2494
2495 (defun sgml-element-appdata (element prop)
2496 "Return the application data named PROP associated with the type of ELEMENT."
2497 (sgml-eltype-appdata (sgml-tree-eltype element) prop))
2498
2499 (defmacro sgml-element-stag-optional (element)
2500 "True if start-tag of ELEMENT is omissible."
2501 (`(sgml-eltype-stag-optional (sgml-tree-eltype (, element)))))
2502
2503 (defun sgml-element-etag-optional (element)
2504 "True if end-tag of ELEMENT is omissible."
2505 (sgml-eltype-etag-optional (sgml-tree-eltype element)))
2506
2507 (define-compiler-macro sgml-element-etag-optional (element)
2508 "True if end-tag of ELEMENT is omissible."
2509 (`(sgml-eltype-etag-optional (sgml-tree-eltype (, element)))))
2510
2511 (defun sgml-element-attlist (element)
2512 "Return the attribute specification list of ELEMENT."
2513 (sgml-eltype-attlist (sgml-tree-eltype element)))
2514
2515 (defun sgml-element-mixed (element)
2516 "True if ELEMENT has mixed content."
2517 (sgml-eltype-mixed (sgml-tree-eltype element)))
2518
2519 (define-compiler-macro sgml-element-mixed (element)
2520 (`(sgml-eltype-mixed (sgml-tree-eltype (, element)))))
2521
2522 (defun sgml-element-start (element)
2523 "Position before start of ELEMENT."
2524 (sgml-epos-promote (sgml-tree-stag-epos element)))
2525
2526 (defun sgml-element-stag-end (element)
2527 "Position after start-tag of ELEMENT."
2528 (let ((epos (sgml-tree-stag-epos element))
2529 (len (sgml-tree-stag-len element)))
2530 (cond ((sgml-bpos-p epos)
2531 (+ epos len))
2532 ((zerop len)
2533 (sgml-epos-promote epos))
2534 (t
2535 (sgml-epos-latest epos)))))
2536
2537 (defun sgml-element-empty (element)
2538 "True if ELEMENT is empty."
2539 (or (eq sgml-empty (sgml-element-model element))
2540 (sgml-tree-conref element)))
2541
2542 (defun sgml-element-data-p (element)
2543 "True if ELEMENT can have data characters in its content."
2544 (or (sgml-element-mixed element)
2545 (eq sgml-cdata (sgml-element-model element))
2546 (eq sgml-rcdata (sgml-element-model element))))
2547
2548 (defun sgml-element-context-string (element)
2549 "Return string describing context of ELEMENT."
2550 (if (eq element sgml-top-tree)
2551 ""
2552 (format "in %s %s"
2553 (sgml-element-gi element)
2554 (sgml-element-context-string (sgml-tree-parent element)))))
2555
2556 ;;;; Display and Mode-line
2557
2558 (defun sgml-update-display ()
2559 (when (not (eq this-command 'keyboard-quit))
2560 ;; Don't let point be inside an invisible region
2561 (when (and (get-text-property (point) 'invisible)
2562 (eq (get-text-property (point) 'invisible)
2563 (get-text-property (1- (point)) 'invisible)))
2564 (setq sgml-last-element nil) ; May not be valid after point moved
2565 (if (memq this-command '(backward-char previous-line backward-word))
2566 (goto-char (or (previous-single-property-change (point) 'invisible)
2567 (point-min)))
2568 (goto-char (or (next-single-property-change (point) 'invisible)
2569 (point-max)))))
2570 (when (and (not (input-pending-p))
2571 (or sgml-live-element-indicator
2572 sgml-set-face))
2573 (let ((deactivate-mark nil)
2574 (sgml-suppress-warning t)
2575 (oldname sgml-current-element-name))
2576 (condition-case nil
2577 (save-excursion
2578 (cond ((and (memq this-command sgml-users-of-last-element)
2579 sgml-last-element)
2580 (setq sgml-current-element-name
2581 (sgml-element-gi sgml-last-element)))
2582
2583 (sgml-live-element-indicator
2584 (setq sgml-current-element-name "*error*")
2585 (save-excursion
2586 (sgml-parse-to (point) (function input-pending-p)))
2587 (unless (input-pending-p)
2588 (setq sgml-current-element-name
2589 (sgml-element-gi sgml-current-tree)))))
2590 (unless (input-pending-p)
2591 (force-mode-line-update)
2592 (when (and sgml-set-face
2593 (null
2594 (sgml-tree-etag-epos
2595 (sgml-pstate-top-tree sgml-buffer-parse-state))))
2596 (sit-for 0)
2597 (sgml-parse-until-end-of nil nil
2598 (function input-pending-p)
2599 t))))
2600 (error nil))))))
2601
2602
2603 (defun sgml-set-active-dtd-indicator (name)
2604 (set (make-local-variable 'sgml-active-dtd-indicator)
2605 (list (format " [%s" name)
2606 '(sgml-live-element-indicator ("/" sgml-current-element-name))
2607 "]"))
2608 (force-mode-line-update))
2609
2610 ;;;; Parser state
2611
2612 (defstruct (sgml-pstate
2613 (:constructor sgml-make-pstate (dtd top-tree)))
2614 dtd
2615 top-tree)
2616
2617 (defsubst sgml-excludes ()
2618 (sgml-tree-excludes sgml-current-tree))
2619
2620 (defsubst sgml-includes ()
2621 (sgml-tree-includes sgml-current-tree))
2622
2623 (defsubst sgml-current-mixed-p ()
2624 (sgml-element-mixed sgml-current-tree))
2625
2626 (defun sgml-set-initial-state (dtd)
2627 "Set initial state of parsing"
2628 (make-local-variable 'before-change-function)
2629 (setq before-change-function 'sgml-note-change-at)
2630 (set (make-local-variable 'after-change-function)
2631 'sgml-set-face-after-change)
2632 (sgml-set-active-dtd-indicator (sgml-dtd-doctype dtd))
2633 (let ((top-type ; Fake element type for the top
2634 ; node of the parse tree
2635 (sgml-make-eltype "#DOC") ; was "Document (no element)"
2636 ))
2637 (setf (sgml-eltype-model top-type)
2638 (sgml-make-primitive-content-token
2639 (sgml-eltype-token
2640 (sgml-lookup-eltype (sgml-dtd-doctype dtd) dtd))))
2641 (setq sgml-buffer-parse-state
2642 (sgml-make-pstate dtd
2643 (sgml-make-tree top-type
2644 0 0 nil 0 nil nil nil nil nil)))))
2645
2646 (defun sgml-set-parse-state (tree where)
2647 "Set parse state from TREE, either from start of TREE if WHERE is start
2648 or from after TREE if WHERE is after."
2649 (setq sgml-current-tree tree
2650 sgml-markup-tree tree)
2651 (let ((empty
2652 (sgml-element-empty tree)))
2653 (cond ((and (eq where 'start)
2654 (not empty))
2655 (setq sgml-current-state (sgml-element-model sgml-current-tree)
2656 sgml-current-shortmap (sgml-tree-shortmap sgml-current-tree)
2657 sgml-previous-tree nil)
2658 (setq sgml-markup-type
2659 (if (and (not (zerop (sgml-tree-stag-len tree)))
2660 (sgml-bpos-p (sgml-tree-stag-epos tree)))
2661 'start-tag)
2662 sgml-markup-start (sgml-element-start sgml-current-tree))
2663 (sgml-goto-epos (sgml-tree-stag-epos sgml-current-tree))
2664 (forward-char (sgml-tree-stag-len sgml-current-tree)))
2665 (t
2666 (setq sgml-current-state (sgml-tree-pstate sgml-current-tree)
2667 sgml-current-shortmap (sgml-tree-pshortmap sgml-current-tree)
2668 sgml-previous-tree sgml-current-tree)
2669 (sgml-goto-epos (sgml-tree-etag-epos sgml-current-tree))
2670 (forward-char (sgml-tree-etag-len sgml-current-tree))
2671 (setq sgml-markup-type (if empty 'start-tag 'end-tag)
2672 sgml-markup-start (- (point)
2673 (sgml-tree-etag-len sgml-current-tree)))
2674 (setq sgml-current-tree (sgml-tree-parent sgml-current-tree))))
2675 (assert sgml-current-state)))
2676
2677 (defsubst sgml-final-p (state)
2678 ;; Test if a state/model can be ended
2679 (or (not (sgml-model-group-p state))
2680 (sgml-final state)))
2681
2682 ;(defun sgml-current-element-contains-data ()
2683 ; "Retrun true if the current open element is either mixed or is (r)cdata."
2684 ; (or (eq sgml-cdata sgml-current-state)
2685 ; (eq sgml-rcdata sgml-current-state)
2686 ; (sgml-current-mixed-p)))
2687
2688 ;(defun sgml-current-element-content-class ()
2689 ; "Return a string describing the type of content in the current element.
2690 ;The type can be CDATA, RCDATA, ANY, #PCDATA or none."
2691 ; (cond ((eq sgml-cdata sgml-current-state)
2692 ; "CDATA")
2693 ; ((eq sgml-rcdata sgml-current-state)
2694 ; "RCDATA")
2695 ; ((eq sgml-any sgml-current-state)
2696 ; "ANY")
2697 ; ((sgml-current-mixed-p)
2698 ; "#PCDATA")
2699 ; (t "")))
2700
2701 (defun sgml-promoted-epos (start end)
2702 "Return an entity position for start of region START END.
2703 If region is empty, choose return an epos as high in the
2704 entity hierarchy as possible."
2705 ;; This does not work if the entity is entered by a shortref that
2706 ;; only is active in the current element.
2707 (let ((epos (sgml-epos start)))
2708 (when (= start end)
2709 (while (and (sgml-strict-epos-p epos)
2710 (= 1 (sgml-epos-pos epos)))
2711 (setq epos (sgml-eref-start (sgml-epos-eref epos)))))
2712 epos))
2713
2714 (defun sgml-open-element (eltype conref before-tag after-tag &optional asl)
2715 (unless (sgml-eltype-defined eltype)
2716 (setf (sgml-eltype-mixed eltype) t)
2717 (setf (sgml-eltype-etag-optional eltype) t)
2718 (when sgml-warn-about-undefined-elements
2719 (sgml-log-warning
2720 "Start-tag of undefined element %s; assume O O ANY"
2721 (sgml-eltype-name eltype))))
2722 (let* ((emap (sgml-eltype-shortmap eltype))
2723 (newmap (if emap
2724 (if (eq 'empty emap)
2725 nil
2726 (sgml-lookup-shortref-map
2727 (sgml-dtd-shortmaps sgml-dtd-info)
2728 emap))
2729 sgml-current-shortmap))
2730 (nt (sgml-make-tree
2731 eltype
2732 (sgml-promoted-epos before-tag after-tag) ; stag-epos
2733 (- after-tag before-tag) ; stag-len
2734 sgml-current-tree ; parent
2735 (1+ (sgml-tree-level sgml-current-tree)) ; level
2736 (append (sgml-eltype-excludes eltype) (sgml-excludes))
2737 (append (sgml-eltype-includes eltype) (sgml-includes))
2738 sgml-current-state
2739 (if (sgml-tree-net-enabled sgml-current-tree) 1)
2740 conref
2741 newmap
2742 sgml-current-shortmap
2743 asl)))
2744 ;; (let ((u (sgml-tree-content sgml-current-tree)))
2745 ;; (cond ((and u (> before-tag (sgml-element-start u)))
2746 ;; (while (and (sgml-tree-next u)
2747 ;; (> before-tag
2748 ;; (sgml-element-start (sgml-tree-next u))))
2749 ;; (setq u (sgml-tree-next u)))
2750 ;; (setf (sgml-tree-next u) nt))
2751 ;; (t
2752 ;; (setf (sgml-tree-content sgml-current-tree) nt))))
2753 ;; Install new node in tree
2754 (cond (sgml-previous-tree
2755 (setf (sgml-tree-next sgml-previous-tree) nt))
2756 (t
2757 (setf (sgml-tree-content sgml-current-tree) nt)))
2758 ;; Prune tree
2759 ;; *** all the way up? tree-end = nil?
2760 (setf (sgml-tree-next sgml-current-tree) nil)
2761 ;; Set new state
2762 (setq sgml-current-state (sgml-eltype-model eltype)
2763 sgml-current-shortmap newmap
2764 sgml-current-tree nt
2765 sgml-previous-tree nil)
2766 (assert sgml-current-state)
2767 (setq sgml-markup-tree sgml-current-tree)
2768 (run-hook-with-args 'sgml-open-element-hook sgml-current-tree asl)
2769 (when (sgml-element-empty sgml-current-tree)
2770 (sgml-close-element after-tag after-tag))))
2771
2772 (defun sgml-fake-open-element (tree el &optional state)
2773 (sgml-make-tree
2774 el 0 0
2775 tree
2776 0
2777 (append (sgml-eltype-excludes el) (sgml-tree-excludes tree))
2778 (append (sgml-eltype-includes el) (sgml-tree-includes tree))
2779 state
2780 nil
2781 nil))
2782
2783 (defun sgml-close-element (before-tag after-tag)
2784 (when (or (eq sgml-close-element-trap t)
2785 (eq sgml-close-element-trap sgml-current-tree))
2786 (setq sgml-goal (point)))
2787 (when sgml-throw-on-element-change
2788 (throw sgml-throw-on-element-change 'end))
2789 (setf (sgml-tree-etag-epos sgml-current-tree)
2790 ;;(sgml-promoted-epos before-tag after-tag)
2791 (sgml-epos before-tag))
2792 (setf (sgml-tree-etag-len sgml-current-tree) (- after-tag before-tag))
2793 (run-hooks 'sgml-close-element-hook)
2794 (setq sgml-markup-tree sgml-current-tree)
2795 (cond ((eq sgml-current-tree sgml-top-tree)
2796 (unless (eobp)
2797 (sgml-error "Parse ended")))
2798 (t
2799 (setq sgml-previous-tree sgml-current-tree
2800 sgml-current-state (sgml-tree-pstate sgml-current-tree)
2801 sgml-current-shortmap (sgml-tree-pshortmap sgml-current-tree)
2802 sgml-current-tree (sgml-tree-parent sgml-current-tree))
2803 (assert sgml-current-state))))
2804
2805 (defun sgml-fake-close-element (tree)
2806 (sgml-tree-parent tree))
2807
2808 (defun sgml-note-change-at (at &optional end)
2809 ;; Inform the cache that there have been some changes after AT
2810 (when sgml-buffer-parse-state
2811 (let ((u (sgml-pstate-top-tree sgml-buffer-parse-state)))
2812 (when u
2813 ;;(message "%d" at)
2814 (while
2815 (cond
2816 ((and (sgml-tree-next u) ; Change clearly in next element
2817 (> at (sgml-element-stag-end (sgml-tree-next u))))
2818 (setq u (sgml-tree-next u)))
2819 (t ;
2820 (setf (sgml-tree-next u) nil) ; Forget next element
2821 (cond
2822 ;; If change after this element and it is ended by an end
2823 ;; tag no pruning is done. If the end of the element is
2824 ;; implied changing the tag that implied it may change
2825 ;; the extent of the element.
2826 ((and (sgml-tree-etag-epos u)
2827 (> at (sgml-tree-end u))
2828 (or (> (sgml-tree-etag-len u) 0)
2829 (sgml-element-empty u)))
2830 nil)
2831 (t
2832 (setf (sgml-tree-etag-epos u) nil)
2833 (cond;; Enter into content if change is clearly in it
2834 ((and (sgml-tree-content u)
2835 (> at (sgml-element-stag-end (sgml-tree-content u))))
2836 (setq u (sgml-tree-content u)))
2837 ;; Check if element has no start tag,
2838 ;; then it must be pruned as a change could create
2839 ;; a valid start tag for the element.
2840 ((and (zerop (sgml-tree-stag-len u))
2841 (> at (sgml-element-start u)))
2842 ;; restart from to with new position
2843 ;; this can't loop forever as
2844 ;; position allways gets smaller
2845 (setq at (sgml-element-start u)
2846 u sgml-top-tree))
2847 (t
2848 (setf (sgml-tree-content u) nil))))))))))))
2849
2850 (defun sgml-list-implications (token type)
2851 "Return a list of the tags implied by a token TOKEN.
2852 TOKEN is a token, and the list elements are either tokens or `t'.
2853 Where the latter represents end-tags."
2854 (let ((state sgml-current-state)
2855 (tree sgml-current-tree)
2856 (temp nil)
2857 (imps nil))
2858 (while ; Until token accepted
2859 (cond
2860 ;; Test if accepted in state
2861 ((or (eq state sgml-any)
2862 (and (sgml-model-group-p state)
2863 (not (memq token (sgml-excludes)))
2864 (or (memq token (sgml-includes))
2865 (sgml-get-move state token))))
2866 nil)
2867 ;; Test if end tag implied
2868 ((or (eq state sgml-empty)
2869 (and (sgml-final-p state)
2870 (not (eq tree sgml-top-tree))))
2871 (unless (eq state sgml-empty) ; not realy implied
2872 (push t imps))
2873 (setq state (sgml-tree-pstate tree)
2874 tree (sgml-fake-close-element tree))
2875 t)
2876 ;; Test if start-tag can be implied
2877 ((and (setq temp (sgml-required-tokens state))
2878 (null (cdr temp)))
2879 (setq temp (car temp)
2880 tree (sgml-fake-open-element tree temp
2881 (sgml-get-move state temp))
2882 state (sgml-element-model tree))
2883 (push temp imps)
2884 t)
2885 ;; No implictions and not accepted
2886 (t
2887 (sgml-log-warning "Out of context %s" type)
2888 (setq imps nil))))
2889 ;; Return the implications in correct order
2890 (nreverse imps)))
2891
2892
2893 (defun sgml-eltypes-in-state (tree state)
2894 "Return list of element types (eltype) valid in STATE and TREE."
2895 (let* ((req ; Required tokens
2896 (if (sgml-model-group-p state)
2897 (sgml-required-tokens state)))
2898 (elems ; Normally valid tokens
2899 (if (sgml-model-group-p state)
2900 (nconc req
2901 (delq sgml-pcdata-token (sgml-optional-tokens state))))))
2902 ;; Modify for exceptions
2903 (loop for et in (sgml-tree-includes tree) ;*** Tokens or eltypes?
2904 unless (memq et elems) do (push et elems))
2905 (loop for et in (sgml-tree-excludes tree)
2906 do (setq elems (delq et elems)))
2907 ;; Check for omitable start-tags
2908 (when (and sgml-omittag-transparent
2909 (not (sgml-final-p state))
2910 req
2911 (null (cdr req)))
2912 (let ((et (sgml-token-eltype (car req))))
2913 (when (sgml-eltype-stag-optional et)
2914 (setq elems
2915 (nconc elems ; *** possibility of duplicates
2916 (sgml-eltypes-in-state
2917 (sgml-fake-open-element tree et)
2918 (sgml-eltype-model et)))))))
2919 elems))
2920
2921 (defun sgml-current-list-of-valid-eltypes ()
2922 "Returns a list of contextually valid element types (eltype)."
2923 (let ((elems (sgml-eltypes-in-state sgml-current-tree sgml-current-state))
2924 (tree sgml-current-tree)
2925 (state sgml-current-state))
2926 (when sgml-omittag-transparent
2927 (while (and tree
2928 (sgml-final-p state)
2929 (sgml-element-etag-optional tree))
2930 (setq state (sgml-tree-pstate tree)
2931 tree (sgml-tree-parent tree))
2932 (loop for e in (sgml-eltypes-in-state tree state) do
2933 (when (not (memq e elems))
2934 (setq elems (nconc elems (list e)))))))
2935 ;; *** Filter out elements that are undefined?
2936 (sort elems (function string-lessp))))
2937
2938 (defun sgml-current-list-of-endable-eltypes ()
2939 "Return a list of the element types endable in current state."
2940 (let* ((elems nil)
2941 (tree sgml-current-tree)
2942 (state sgml-current-state))
2943 (while
2944 (and (sgml-final-p state)
2945 (not (eq tree sgml-top-tree))
2946 (progn
2947 (setq elems
2948 (nconc elems (list (sgml-tree-eltype tree))))
2949 sgml-omittag)
2950 (sgml-eltype-etag-optional (sgml-tree-eltype tree)))
2951 (setq state (sgml-tree-pstate tree)
2952 tree (sgml-tree-parent tree)))
2953 elems))
2954
2955 ;;;; Logging of warnings
2956
2957 (defconst sgml-log-buffer-name "*SGML LOG*")
2958
2959 (defvar sgml-log-last-size 0)
2960
2961 (defun sgml-display-log ()
2962 (let ((buf (get-buffer sgml-log-buffer-name)))
2963 (when buf
2964 (display-buffer buf)
2965 (setq sgml-log-last-size (save-excursion (set-buffer buf)
2966 (point-max))))))
2967
2968 (defun sgml-log-warning (format &rest things)
2969 (when sgml-throw-on-warning
2970 (apply 'message format things)
2971 (throw sgml-throw-on-warning t))
2972 (when (or sgml-show-warnings sgml-parsing-dtd)
2973 (apply 'sgml-message format things)
2974 (apply 'sgml-log-message format things)))
2975
2976 (defun sgml-log-message (format &rest things)
2977 (let ((mess (apply 'format format things))
2978 (buf (get-buffer-create sgml-log-buffer-name))
2979 (cb (current-buffer)))
2980 (set-buffer buf)
2981 (goto-char (point-max))
2982 (insert mess "\n")
2983 (when (get-buffer-window buf)
2984 (setq sgml-log-last-size (point-max)))
2985 (set-buffer cb)))
2986
2987 (defun sgml-error (format &rest things)
2988 (when sgml-throw-on-error
2989 (throw sgml-throw-on-error nil))
2990 (while (and (boundp 'sgml-previous-buffer) sgml-previous-buffer)
2991 (when sgml-current-eref
2992 (sgml-log-message
2993 "Line %s in %S "
2994 (count-lines (point-min) (point))
2995 (sgml-entity-name (sgml-eref-entity sgml-current-eref))))
2996 (sgml-pop-entity))
2997 (apply 'sgml-log-warning format things)
2998 (apply 'error format things))
2999
3000 (defun sgml-parse-error (format &rest things)
3001 (apply 'sgml-error
3002 (concat format "; at: %s")
3003 (append things (list (buffer-substring-no-properties
3004 (point)
3005 (min (point-max) (+ (point) 12)))))))
3006
3007 (defun sgml-message (format &rest things)
3008 (let ((buf (get-buffer sgml-log-buffer-name)))
3009 (when (and buf
3010 (> (save-excursion (set-buffer buf)
3011 (point-max))
3012 sgml-log-last-size))
3013 (sgml-display-log)))
3014 (apply 'message format things))
3015
3016 (defun sgml-reset-log ()
3017 (let ((buf (get-buffer sgml-log-buffer-name)))
3018 (when buf
3019 (setq sgml-log-last-size
3020 (save-excursion (set-buffer buf)
3021 (point-max))))))
3022
3023 (defun sgml-clear-log ()
3024 (let ((b (get-buffer sgml-log-buffer-name)))
3025 (when b
3026 (delete-windows-on b)
3027 (kill-buffer b)
3028 (setq sgml-log-last-size 0))))
3029
3030 (defun sgml-show-or-clear-log ()
3031 "Show the *SGML LOG* buffer if it is not showing, or clear and
3032 remove it if it is showing."
3033 (interactive)
3034 (cond ((and (get-buffer sgml-log-buffer-name)
3035 (null (get-buffer-window sgml-log-buffer-name)))
3036 (sgml-display-log))
3037 (t
3038 (sgml-clear-log))))
3039
3040
3041
3042 ;;; This has noting to do with warnings...
3043
3044 (defvar sgml-lazy-time 0)
3045
3046 (defun sgml-lazy-message (&rest args)
3047 (unless (= sgml-lazy-time (second (current-time)))
3048 (apply 'message args)
3049 (setq sgml-lazy-time (second (current-time)))))
3050
3051 ;;;; Shortref maps
3052
3053 (eval-and-compile
3054 (defconst sgml-shortref-list
3055 '(
3056 "\t" ;&#TAB
3057 "\n" ;&#RE;
3058 "\001" ;&#RS;
3059 "\001B"
3060 "\001\n"
3061 "\001B\n"
3062 "B\n"
3063 " " ;&#SPACE;
3064 "BB"
3065 "\"" ;&#34;
3066 "#"
3067 "%"
3068 "'"
3069 "("
3070 ")"
3071 "*"
3072 "+"
3073 ","
3074 "-"
3075 "--"
3076 ":"
3077 ";"
3078 "="
3079 "@"
3080 "["
3081 "]"
3082 "^"
3083 "_"
3084 "{"
3085 "|"
3086 "}"
3087 "~")))
3088
3089 (eval-and-compile
3090 (defun sgml-shortref-index (string)
3091 (let ((pos (member string sgml-shortref-list))
3092 (len (length sgml-shortref-list)))
3093 (and pos (- len (length pos))) )))
3094
3095 (defun sgml-make-shortmap (pairs)
3096 "Create a shortreference map from PAIRS.
3097 Where PAIRS is a list of (delim . ename)."
3098 (let ((map
3099 (make-vector (1+ (length sgml-shortref-list))
3100 nil))
3101 index)
3102 (loop for p in pairs
3103 for delim = (car p)
3104 for name = (cdr p)
3105 do
3106 (setq index (sgml-shortref-index delim))
3107 (cond ((null index)
3108 (sgml-log-warning
3109 "Illegal short reference delimiter '%s'" delim))
3110 (t
3111 (aset map index name))))
3112 ;; Compute a suitable string for skip-chars-forward that
3113 ;; can be used to skip over pcdata
3114 (aset map
3115 (eval-when-compile (length sgml-shortref-list))
3116 (if (some (function
3117 (lambda (r) (aref map (sgml-shortref-index r))))
3118 '("\001B\n" "B\n" " " "BB"))
3119 "^<]/& \n\t\"#%'()*+,\\-:;=@[]\\^_{|}~"
3120 "^<]/&\n\t\"#%'()*+,\\-:;=@[]\\^_{|}~"))
3121 map))
3122
3123 (defun sgml-shortmap-skipstring (map)
3124 (if (bolp)
3125 ""
3126 (aref map (eval-when-compile (length sgml-shortref-list)))))
3127
3128
3129 (defconst sgml-shortref-oneassq
3130 (loop for d in sgml-shortref-list
3131 for c = (aref d 0)
3132 when (and (= 1 (length d))
3133 (/= 1 c) (/= 10 c))
3134 collect (cons c (sgml-shortref-index d))))
3135
3136 (defun sgml-parse-B ()
3137 (/= 0 (skip-chars-forward " \t")))
3138
3139 (defun sgml-deref-shortmap (map &optional nobol)
3140 "Identify shortref delimiter at point and return entity name.
3141 Also move point. Return nil, either if no shortref or undefined."
3142
3143 (macrolet
3144 ((delim (x) (` (aref map (, (sgml-shortref-index x))))))
3145 (let ((i (if nobol 1 0)))
3146 (while (numberp i)
3147 (setq i
3148 (cond
3149 ((and (bolp) (zerop i)) ; Either "\001" "\001B"
3150 ; "\001\n" "\001B\n"
3151 (cond ((sgml-parse-B) ; "\001B"
3152 (if (eolp)
3153 (delim "\001B\n")
3154 (delim "\001B")))
3155 ((sgml-parse-RE) (delim "\001\n"))
3156 ((delim "\001"))
3157 (t 1)))
3158 ((cond ((sgml-parse-char ?\t) (setq i (delim "\t")) t)
3159 ((sgml-parse-char ? ) (setq i (delim " ")) t))
3160 (cond ((sgml-parse-B) (setq i (delim "BB"))))
3161 (cond ((sgml-parse-char ?\n)
3162 (delim "B\n"))
3163 (t i)))
3164 ((sgml-parse-RE) (delim "\n"))
3165 ((sgml-parse-chars ?- ?-) (delim "--"))
3166 ;; The other one character delimiters
3167 ((setq i (assq (following-char) sgml-shortref-oneassq))
3168 (when i (forward-char 1))
3169 (aref map (cdr i))))))
3170 i)))
3171
3172 ;;; Table of shortref maps
3173
3174 (defun sgml-make-shortref-table ()
3175 (list nil))
3176
3177 (defun sgml-add-shortref-map (table name map)
3178 (nconc table (list (cons name map))))
3179
3180 (defun sgml-lookup-shortref-map (table name)
3181 (cdr (assoc name (cdr table))))
3182
3183 (defun sgml-merge-shortmaps (tab1 tab2)
3184 "Merge tables of short reference maps TAB2 into TAB1, modifying TAB1."
3185 (nconc tab1 (cdr tab2)))
3186
3187 ;;;; Parse markup declarations
3188
3189 (defun sgml-skip-until-dsc ()
3190 (while (progn
3191 (sgml-skip-upto ("DSO" "DSC" "LITA" "LIT" "COM"))
3192 (not (sgml-parse-delim "DSC")))
3193 (cond ((sgml-parse-literal))
3194 ((sgml-parse-delim "DSO")
3195 (sgml-skip-until-dsc))
3196 ((sgml-parse-comment))
3197 (t (forward-char 1)))))
3198
3199 (defun sgml-skip-upto-mdc ()
3200 "Move point forward until end of current markup declaration.
3201 Assumes starts with point inside a markup declaration."
3202 (while (progn
3203 (sgml-skip-upto ("DSO" "MDC" "LIT" "LITA" "COM"))
3204 (not (sgml-is-delim "MDC")))
3205 (cond ((sgml-parse-delim "DSO")
3206 (sgml-skip-until-dsc))
3207 ((sgml-parse-literal))
3208 ((sgml-parse-comment))
3209 (t (forward-char 1)))))
3210
3211 (defun sgml-do-sgml-declaration ()
3212 (sgml-skip-upto-mdc)
3213 (setq sgml-markup-type 'sgml))
3214
3215 (defun sgml-do-doctype ()
3216 (cond
3217 (sgml-dtd-info ; Has doctype already been defined
3218 (sgml-skip-upto-mdc))
3219 (t
3220 (let (sgml-markup-start)
3221 (message "Parsing doctype...")
3222 (sgml-setup-doctype (sgml-check-name)
3223 (sgml-parse-external))
3224 (message "Parsing doctype...done"))))
3225 (setq sgml-markup-type 'doctype))
3226
3227 (defun sgml-setup-doctype (docname external)
3228 (let ((sgml-parsing-dtd t))
3229 (setq sgml-no-elements 0)
3230 (setq sgml-dtd-info (sgml-make-dtd docname))
3231 ;;(setq sgml-dtd-shortmaps nil)
3232 (sgml-skip-ps)
3233 (cond
3234 ((sgml-parse-delim "DSO")
3235 (sgml-check-dtd-subset)
3236 (sgml-check-delim "DSC")))
3237 (cond (external
3238 (sgml-push-to-entity (sgml-make-entity docname 'dtd external))
3239 (unless (eobp)
3240 (sgml-check-dtd-subset)
3241 (unless (eobp)
3242 (sgml-parse-error "DTD subset ended")))
3243 (sgml-pop-entity)))
3244 ;;; (loop for map in sgml-dtd-shortmaps do
3245 ;;; (sgml-add-shortref-map
3246 ;;; (sgml-dtd-shortmaps sgml-dtd-info)
3247 ;;; (car map)
3248 ;;; (sgml-make-shortmap (cdr map))))
3249 (sgml-set-initial-state sgml-dtd-info)
3250 (run-hooks 'sgml-doctype-parsed-hook)))
3251
3252 (defun sgml-do-data (type &optional marked-section)
3253 "Move point forward until there is an end-tag open after point."
3254 (let ((start (point))
3255 (done nil)
3256 (eref sgml-current-eref)
3257 sgml-signal-data-function)
3258 (while (not done)
3259 (cond (marked-section
3260 (skip-chars-forward (if (eq type sgml-cdata) "^]" "^&]"))
3261 (when sgml-data-function
3262 (funcall sgml-data-function (buffer-substring-no-properties
3263 start (point))))
3264 (setq done (sgml-parse-delim "MS-END")))
3265 (t
3266 (skip-chars-forward (if (eq type sgml-cdata) "^</" "^</&"))
3267 (when sgml-data-function
3268 (funcall sgml-data-function (buffer-substring-no-properties start (point))))
3269 (setq done (or (sgml-is-delim "ETAGO" gi)
3270 (sgml-is-enabled-net)))))
3271 (setq start (point))
3272 (cond
3273 (done)
3274 ((eobp)
3275 (when (eq eref sgml-current-eref)
3276 (sgml-error "Unterminated %s %s"
3277 type (if marked-section "marked section")))
3278 (sgml-pop-entity)
3279 (setq start (point)))
3280 ((null sgml-data-function)
3281 (forward-char 1))
3282 ((sgml-parse-general-entity-ref)
3283 (setq start (point)))
3284 (t
3285 (forward-char 1))))))
3286
3287
3288 (defun sgml-do-marked-section ()
3289 (let ((status nil))
3290 (while (progn (sgml-skip-ps)
3291 (not (sgml-parse-char ?\[)))
3292 (push (sgml-check-name)
3293 status))
3294 (cond
3295 ((member "ignore" status)
3296 (sgml-skip-marked-section)
3297 (sgml-set-markup-type 'ignored))
3298 ((or (member "cdata" status)
3299 (member "rcdata" status))
3300 (when sgml-signal-data-function
3301 (funcall sgml-signal-data-function))
3302 (let ((type (if (member "cdata" status) sgml-cdata sgml-rcdata)))
3303 (sgml-do-data type t)
3304 (sgml-set-markup-type type)))
3305 (t
3306 (sgml-set-markup-type 'ms-start)))))
3307
3308 (defun sgml-skip-marked-section ()
3309 (while (progn
3310 (sgml-skip-upto ("MS-START" "MS-END"))
3311 (when (eobp) (sgml-error "Marked section unterminated"))
3312 (not (sgml-parse-delim "MS-END")))
3313 (cond ((sgml-parse-delim "MS-START")
3314 ;;(search-forward "[")
3315 (sgml-skip-marked-section))
3316 (t (forward-char 1)))))
3317
3318 (defun sgml-do-usemap ()
3319 (let (mapname associated)
3320 ;;(setq sgml-markup-type 'usemap)
3321 (unless (sgml-parse-rni "empty")
3322 (setq mapname (sgml-check-name)))
3323 (sgml-skip-ps)
3324 (cond
3325 ((sgml-is-delim "MDC")
3326 (sgml-debug "USEMAP %s" (if mapname mapname "#EMPTY"))
3327 (cond (sgml-dtd-info
3328 (setq sgml-current-shortmap
3329 (if mapname
3330 (or (sgml-lookup-shortref-map
3331 (sgml-dtd-shortmaps sgml-dtd-info)
3332 mapname)
3333 (sgml-error "Undefined shortref map %s" mapname)))))
3334 ;; If in prolog
3335 (t
3336 (sgml-log-warning
3337 "USEMAP without associated element type in prolog"))))
3338 (t
3339 ;; Should be handled by psgml-dtd
3340 (sgml-do-usemap-element mapname)))))
3341
3342 (defconst sgml-markup-declaration-table
3343 '(("sgml" . sgml-do-sgml-declaration)
3344 ("doctype" . sgml-do-doctype)
3345 ("element" . sgml-declare-element)
3346 ("entity" . sgml-declare-entity)
3347 ("usemap" . sgml-do-usemap)
3348 ("shortref" . sgml-declare-shortref)
3349 ("notation" . sgml-declare-notation)
3350 ("attlist" . sgml-declare-attlist)
3351 ("uselink" . sgml-skip-upto-mdc)
3352 ("linktype" . sgml-skip-upto-mdc)
3353 ("link" . sgml-skip-upto-mdc)
3354 ("idlink" . sgml-skip-upto-mdc)
3355 ))
3356
3357 (defun sgml-parse-markup-declaration (option)
3358 "Parse a markup declartion.
3359 OPTION can be `prolog' if parsing the prolog or `dtd' if parsing the
3360 dtd or `ignore' if the declaration is to be ignored."
3361 (cond
3362 ((sgml-parse-delim "MDO" (nmstart "COM" "MDC"))
3363 (cond
3364 ((sgml-startnm-char-next)
3365 (setq sgml-markup-type nil)
3366 (let* ((tok (sgml-parse-nametoken))
3367 (rut (assoc tok sgml-markup-declaration-table)))
3368 (when (and (not (memq option '(prolog ignore)))
3369 (member tok '("sgml" "doctype")))
3370 (sgml-error "%s declaration is only valid in prolog" tok))
3371 (when (and (not (memq option '(dtd ignore)))
3372 (member tok '("element" "entity" "attlist" "notation"
3373 "shortref")))
3374 (sgml-error "%s declaration is only valid in doctype" tok))
3375 (cond ((eq option 'ignore)
3376 (sgml-skip-upto-mdc))
3377 (rut (sgml-skip-ps)
3378 (funcall (cdr rut)))
3379 (t (sgml-parse-error
3380 "Illegal markup declaration %s" tok)))))
3381 (t
3382 (setq sgml-markup-type 'comment)))
3383 (sgml-skip-ps)
3384 (sgml-check-delim "MDC")
3385 (unless (eq option 'ignore) ; Set the markup type given
3386 (when sgml-markup-type
3387 (sgml-set-markup-type sgml-markup-type)))
3388 t)
3389 ((sgml-parse-delim "MS-START")
3390 (sgml-do-marked-section))))
3391
3392
3393 ;;;; Parsing attribute values
3394
3395 (defun sgml-parse-attribute-specification-list (&optional eltype)
3396 "Parse an attribute specification list.
3397 Optional argument ELTYPE, is used to resolve omitted name=.
3398 Returns a list of attspec (attribute specification)."
3399 (setq sgml-conref-flag nil)
3400 (let ((attlist (if eltype (sgml-eltype-attlist eltype)))
3401 name val asl attdecl)
3402 (while (setq name (progn (sgml-parse-s)
3403 (sgml-parse-nametoken)))
3404 (sgml-parse-s)
3405 (cond ((sgml-parse-delim "VI")
3406 (sgml-parse-s)
3407 (setq val (sgml-check-attribute-value-specification))
3408 (when eltype
3409 (or (setq attdecl (sgml-lookup-attdecl name attlist))
3410 (sgml-log-warning
3411 "Attribute %s not declared for element %s"
3412 name (sgml-eltype-name eltype)))))
3413 ((null eltype)
3414 (sgml-parse-error "Expecting a ="))
3415 ((progn
3416 (unless sgml-current-shorttag
3417 (sgml-log-warning
3418 "Must have attribute name when SHORTTAG NO"))
3419 (setq attdecl
3420 (sgml-find-attdecl-for-value (setq val name)
3421 eltype))))
3422 (t
3423 (sgml-log-warning
3424 "%s is not in any name group for element %s."
3425 val
3426 (sgml-eltype-name eltype))))
3427 ;; *** What happens when eltype is nil ??
3428 (when attdecl
3429 (push (sgml-make-attspec (sgml-attdecl-name attdecl) val)
3430 asl)
3431 (when (sgml-default-value-type-p 'conref
3432 (sgml-attdecl-default-value attdecl))
3433 (setq sgml-conref-flag t))))
3434 asl))
3435
3436 (defun sgml-check-attribute-value-specification ()
3437 (or (sgml-parse-literal)
3438 (sgml-parse-nametoken t) ; Not really a nametoken, but an
3439 ; undelimited literal
3440 (sgml-parse-error "Expecting an attribute value: literal or token")))
3441
3442 (defun sgml-find-attdecl-for-value (value eltype)
3443 "Find the attribute declaration of ELTYPE that has VALUE in its name group.
3444 VALUE is a string. Returns nil or an attdecl."
3445 (let ((al (sgml-eltype-attlist eltype))
3446 dv)
3447 (while (and al
3448 (or (atom (setq dv (sgml-attdecl-declared-value (car al))))
3449 (not (member value
3450 (sgml-declared-value-token-group dv)))))
3451 (setq al (cdr al)))
3452 (if al (car al))))
3453
3454
3455 ;;;; Parser driver
3456
3457 ;; The parser maintains a partial parse tree during the parse. This tree
3458 ;; can be inspected to find information, and also be used to restart the
3459 ;; parse. The parser also has a postition in the current content model.
3460 ;; (Called a state.) The parser is used for several things:
3461 ;; 1) To find the state the parser would be in at a point in the buffer.
3462 ;; (Point in emacs sense, I.e. between chararacters).
3463 ;; 2) Identify the element containing a character.
3464 ;; 3) Find end of an element.
3465 ;; 4) Find the next element.
3466 ;; 5) To find the previous element.
3467
3468 ;; These tasks are done by a combination of parsing and traversing
3469 ;; the partial parse tree. The primitive parse operation is to parse
3470 ;; until a goal point in the buffer has been passed. In addition to
3471 ;; this it is possible to "trap" closing of elements. Either for a
3472 ;; specific element or for any element. When the trap is sprung the
3473 ;; parse is ended. This is used to extend the parse tree. When the
3474 ;; trap is used the parser is usually called with the end of the
3475 ;; buffer as the goal point.
3476
3477 (defun sgml-need-dtd ()
3478 "Make sure that an eventual DTD is parsed or loaded."
3479 (sgml-cleanup-entities)
3480 (when (null sgml-buffer-parse-state) ; first parse in this buffer
3481 ;;(sgml-set-initial-state) ; fall back DTD
3482 (add-hook 'pre-command-hook 'sgml-reset-log)
3483 (make-local-variable 'sgml-auto-fill-inhibit-function)
3484 (setq sgml-auto-fill-inhibit-function (function sgml-in-prolog-p))
3485 (if sgml-default-dtd-file
3486 (sgml-load-dtd sgml-default-dtd-file)
3487 (sgml-load-doctype)))
3488 (setq sgml-dtd-info (sgml-pstate-dtd sgml-buffer-parse-state)
3489 sgml-top-tree (sgml-pstate-top-tree sgml-buffer-parse-state))
3490 (sgml-set-global))
3491
3492
3493 (defun sgml-load-doctype ()
3494 (cond
3495 ;; Case of doctype in another file
3496 ((or sgml-parent-document sgml-doctype)
3497 (let ((dtd
3498 (save-excursion ; get DTD from parent document
3499 (set-buffer (find-file-noselect
3500 (if (consp sgml-parent-document)
3501 (car sgml-parent-document)
3502 (or sgml-doctype sgml-parent-document))))
3503 (sgml-need-dtd)
3504 (sgml-pstate-dtd sgml-buffer-parse-state))))
3505 (sgml-set-initial-state dtd)
3506 (when (consp sgml-parent-document) ; modify DTD for child documents
3507 (sgml-modify-dtd (cdr sgml-parent-document)))))
3508
3509 ;; The doctype declaration should be in the current buffer
3510 (t
3511 (save-excursion (sgml-parse-prolog)))))
3512
3513
3514 (defun sgml-modify-dtd (modifier)
3515 (setq sgml-dtd-info (sgml-pstate-dtd sgml-buffer-parse-state)
3516 sgml-top-tree (sgml-pstate-top-tree sgml-buffer-parse-state))
3517 (sgml-set-global)
3518
3519 (while (stringp (cadr modifier)) ; Loop thru the context elements
3520 (let ((et (sgml-lookup-eltype (car modifier))))
3521 (sgml-open-element et nil (point-min) (point-min))
3522 (setq modifier (cdr modifier))))
3523
3524 (unless (stringp (car modifier))
3525 (error "wrong format of sgml-parent-document"))
3526
3527 (let* ((doctypename (car modifier))
3528 (et (sgml-lookup-eltype
3529 (sgml-general-case (if (symbolp doctypename)
3530 (symbol-name doctypename)
3531 doctypename)))))
3532
3533 (setq sgml-current-state
3534 (sgml-make-primitive-content-token et))
3535
3536 (when (consp (cadr modifier)) ; There are "seen" elements
3537 (sgml-open-element et nil (point-min) (point-min))
3538 (loop for seenel in (cadr modifier)
3539 do (setq sgml-current-state
3540 (sgml-get-move sgml-current-state
3541 (sgml-lookup-eltype seenel))))))
3542
3543 (let ((top (sgml-pstate-top-tree sgml-buffer-parse-state)))
3544 (setf (sgml-tree-includes top) (sgml-includes))
3545 (setf (sgml-tree-excludes top) (sgml-excludes))
3546 (setf (sgml-tree-shortmap top) sgml-current-shortmap)
3547 (setf (sgml-eltype-model (sgml-tree-eltype top))
3548 sgml-current-state)))
3549
3550
3551 (defun sgml-set-global ()
3552 (setq sgml-current-omittag sgml-omittag
3553 sgml-current-shorttag sgml-shorttag
3554 sgml-current-localcat sgml-local-catalogs
3555 sgml-current-local-ecat sgml-local-ecat-files))
3556
3557 (defun sgml-parse-prolog ()
3558 "Parse the document prolog to learn the DTD."
3559 (interactive)
3560 (sgml-clear-log)
3561 (message "Parsing prolog...")
3562 (sgml-cleanup-entities)
3563 (sgml-set-global)
3564 (setq sgml-dtd-info nil)
3565 (goto-char (point-min))
3566 (sgml-with-parser-syntax
3567 (while (progn (sgml-skip-ds)
3568 (setq sgml-markup-start (point))
3569 (and (sgml-parse-markup-declaration 'prolog)
3570 (null sgml-dtd-info))))
3571 (unless sgml-dtd-info ; Set up a default doctype
3572 (let ((docname (or sgml-default-doctype-name
3573 (if (sgml-parse-delim "STAGO" gi)
3574 (sgml-parse-name)))))
3575 (when docname
3576 (sgml-setup-doctype docname '(nil))))))
3577 (unless sgml-dtd-info
3578 (error "No document type defined by prolog"))
3579 (sgml-message "Parsing prolog...done"))
3580
3581 (defun sgml-parse-until-end-of (sgml-close-element-trap &optional
3582 cont extra-cond quiet)
3583 "Parse until the SGML-CLOSE-ELEMENT-TRAP has ended,
3584 or if it is t, any additional element has ended,
3585 or if nil, until end of buffer."
3586 (cond
3587 (cont (sgml-parse-continue (point-max)))
3588 (t (sgml-parse-to (point-max) extra-cond quiet)))
3589 (when (eobp) ; End of buffer, can imply
3590 ; end of any open element.
3591 (while (prog1 (not
3592 (or (eq sgml-close-element-trap t)
3593 (eq sgml-close-element-trap sgml-current-tree)
3594 (eq sgml-current-tree sgml-top-tree)))
3595 (sgml-implied-end-tag "buffer end" (point) (point))))))
3596
3597 (defun sgml-parse-to (sgml-goal &optional extra-cond quiet)
3598 "Parse until (at least) SGML-GOAL.
3599 Optional argument EXTRA-COND should be a function. This function is
3600 called in the parser loop, and the loop is exited if the function returns t.
3601 If third argument QUIT is non-nil, no \"Parsing...\" message will be displayed."
3602 (sgml-need-dtd)
3603 (sgml-find-start-point (min sgml-goal (point-max)))
3604 (assert sgml-current-tree)
3605 (let ((bigparse (and (not quiet) (> (- sgml-goal (point)) 10000))))
3606 (when bigparse
3607 (sgml-message "Parsing..."))
3608 (sgml-with-parser-syntax
3609 (sgml-parser-loop extra-cond))
3610 (when bigparse
3611 (sgml-message ""))))
3612
3613 (defun sgml-parse-continue (sgml-goal)
3614 "Parse until (at least) SGML-GOAL."
3615 (assert sgml-current-tree)
3616 (sgml-message "Parsing...")
3617 (sgml-with-parser-syntax
3618 (sgml-parser-loop nil))
3619 (sgml-message ""))
3620
3621 (defun sgml-reparse-buffer (shortref-fun)
3622 "Reparse the buffer and let SHORTREF-FUN take care of short references.
3623 SHORTREF-FUN is called with the entity as argument and `sgml-markup-start'
3624 pointing to start of short ref and point pointing to the end."
3625 (sgml-note-change-at (point-min))
3626 (let ((sgml-shortref-handler shortref-fun))
3627 (sgml-parse-until-end-of nil)))
3628
3629 (defun sgml-move-current-state (token)
3630 (setq sgml-current-state
3631 (or (sgml-get-move sgml-current-state token)
3632 sgml-current-state)))
3633
3634 (defun sgml-execute-implied (imps type)
3635 (loop for token in imps do
3636 (if (eq t token)
3637 (sgml-implied-end-tag type sgml-markup-start sgml-markup-start)
3638 (sgml-move-current-state token)
3639 (when sgml-throw-on-element-change
3640 (throw sgml-throw-on-element-change 'start))
3641 (sgml-open-element (sgml-token-eltype token)
3642 nil sgml-markup-start sgml-markup-start)
3643 (unless (and sgml-current-omittag
3644 (sgml-element-stag-optional sgml-current-tree))
3645 (sgml-log-warning
3646 "%s start-tag implied by %s; not minimizable"
3647 (sgml-eltype-name (sgml-token-eltype token))
3648 type)))))
3649
3650 (defun sgml-do-move (token type)
3651 (sgml-execute-implied (sgml-list-implications token type) type)
3652 (unless (eq sgml-any sgml-current-state)
3653 (sgml-move-current-state token)))
3654
3655 (defun sgml-pcdata-move ()
3656 "Moify parser state to reflect parsed data."
3657 (sgml-do-move sgml-pcdata-token "data character"))
3658
3659 (defsubst sgml-parse-pcdata ()
3660 (/= 0
3661 (if sgml-current-shortmap
3662 (skip-chars-forward (sgml-shortmap-skipstring sgml-current-shortmap))
3663 (skip-chars-forward "^<]/&"))))
3664
3665 (defsubst sgml-do-pcdata ()
3666 ;; Parse pcdata
3667 (sgml-pcdata-move)
3668 ;;*** assume sgml-markup-start = point
3669 ;;*** should perhaps handle &#nn;?
3670 (forward-char 1)
3671 (sgml-parse-pcdata)
3672 (when sgml-data-function
3673 (funcall sgml-data-function (buffer-substring-no-properties
3674 sgml-markup-start
3675 (point))))
3676 (sgml-set-markup-type nil))
3677
3678 (defun sgml-parser-loop (extra-cond)
3679 (let (tem
3680 (sgml-signal-data-function (function sgml-pcdata-move)))
3681 (while (and (eq sgml-current-tree sgml-top-tree)
3682 (or (< (point) sgml-goal) sgml-current-eref)
3683 (progn (setq sgml-markup-start (point)
3684 sgml-markup-type nil)
3685 (or (sgml-parse-s)
3686 (sgml-parse-markup-declaration 'prolog)
3687 (sgml-parse-processing-instruction)))))
3688 (while (and (or (< (point) sgml-goal) sgml-current-eref)
3689 (not (if extra-cond (funcall extra-cond))))
3690 (assert sgml-current-tree)
3691 (setq sgml-markup-start (point)
3692 sgml-markup-type nil)
3693 (cond
3694 ((eobp) (sgml-pop-entity))
3695 ((and (or (eq sgml-current-state sgml-cdata)
3696 (eq sgml-current-state sgml-rcdata)))
3697 (if (or (sgml-parse-delim "ETAGO" gi)
3698 (sgml-is-enabled-net))
3699 (sgml-do-end-tag)
3700 (sgml-do-data sgml-current-state)))
3701 ((and sgml-current-shortmap
3702 (or (setq tem (sgml-deref-shortmap sgml-current-shortmap
3703 (eq (point)
3704 sgml-rs-ignore-pos)))
3705 ;; Restore position, to consider the delim for S+ or data
3706 (progn (goto-char sgml-markup-start)
3707 nil)))
3708 (setq sgml-rs-ignore-pos (point))
3709 (funcall sgml-shortref-handler tem))
3710 ((and (not (sgml-current-mixed-p))
3711 (sgml-parse-s sgml-current-shortmap)))
3712 ((or (sgml-parse-delim "ETAGO" gi)
3713 (sgml-is-enabled-net))
3714 (sgml-do-end-tag))
3715 ((sgml-parse-delim "STAGO" gi)
3716 (sgml-do-start-tag))
3717 ((sgml-parse-general-entity-ref))
3718 ((sgml-parse-markup-declaration nil))
3719 ((sgml-parse-delim "MS-END") ; end of marked section
3720 (sgml-set-markup-type 'ms-end))
3721 ((sgml-parse-processing-instruction))
3722 (t
3723 (sgml-do-pcdata))))))
3724
3725 (defun sgml-handle-shortref (name)
3726 (sgml-set-markup-type 'shortref)
3727 (sgml-do-entity-ref name))
3728
3729 (defun sgml-do-start-tag ()
3730 ;; Assume point after STAGO
3731 (when sgml-throw-on-element-change
3732 (throw sgml-throw-on-element-change 'start))
3733 (setq sgml-conref-flag nil)
3734 (let (temp net-enabled et asl)
3735 (setq et (if (sgml-is-delim "TAGC") ; empty start-tag
3736 (sgml-do-empty-start-tag)
3737 (sgml-lookup-eltype (sgml-check-name))))
3738 (unless (sgml-parse-delim "TAGC") ; optimize common case
3739 (setq asl (sgml-parse-attribute-specification-list et))
3740 (or
3741 (if (sgml-parse-delim "NET")
3742 (prog1 (setq net-enabled t)
3743 (or sgml-current-shorttag
3744 (sgml-log-warning
3745 "NET enabling start-tag is not allowed with SHORTTAG NO"))))
3746 (sgml-check-tag-close)))
3747 (sgml-set-markup-type 'start-tag)
3748 (cond ((and sgml-ignore-undefined-elements
3749 (not (sgml-eltype-defined et)))
3750 (when sgml-warn-about-undefined-elements
3751 (sgml-log-warning
3752 "Start-tag of undefined element %s; ignored"
3753 (sgml-eltype-name et))))
3754 (t
3755 (sgml-do-move (sgml-eltype-token et)
3756 (format "%s start-tag" (sgml-eltype-name et)))
3757 (sgml-open-element et sgml-conref-flag
3758 sgml-markup-start (point) asl)
3759 (when net-enabled
3760 (setf (sgml-tree-net-enabled sgml-current-tree) t))))))
3761
3762
3763 (defun sgml-do-empty-start-tag ()
3764 "Return eltype to use if empty start tag"
3765 (cond
3766 ;; Document element if no element is open
3767 ((eq sgml-current-tree sgml-top-tree)
3768 (sgml-lookup-eltype
3769 (sgml-dtd-doctype sgml-dtd-info)))
3770 ;; If omittag use current open element
3771 (sgml-current-omittag
3772 (sgml-tree-eltype sgml-current-tree))
3773 ;; Find the eltype of the last closed element.
3774 ;; If element has a left sibling then use that
3775 (sgml-previous-tree
3776 (sgml-tree-eltype sgml-previous-tree))
3777 ;; No sibling, last closed must be found in enclosing element
3778 (t
3779 (loop named outer
3780 for current = sgml-current-tree then (sgml-tree-parent current)
3781 for parent = (sgml-tree-parent current)
3782 do;; Search for a parent with a child before current
3783 (when (eq parent sgml-top-tree)
3784 (sgml-error "No previously closed element"))
3785 (unless (eq current (sgml-tree-content parent))
3786 ;; Search content of u for element before current
3787 (loop for c = (sgml-tree-content parent) then (sgml-tree-next c)
3788 do (when (eq current (sgml-tree-next c))
3789 (return-from outer (sgml-tree-eltype c)))))))))
3790
3791
3792 (defun sgml-do-end-tag ()
3793 "Assume point after </ or at / in a NET"
3794 (let ((gi "Null") ; Name of element to end or "NET"
3795 et ; Element type of end tag
3796 (found ; Set to true when found element to end
3797 t))
3798 (cond ((sgml-parse-delim "TAGC") ; empty end-tag
3799 (setq et (sgml-tree-eltype sgml-current-tree)))
3800 ((sgml-parse-delim "NET"))
3801 (t
3802 (setq et (sgml-lookup-eltype (sgml-check-name)))
3803 (sgml-parse-s)
3804 (sgml-check-tag-close)))
3805 (sgml-set-markup-type 'end-tag) ; This will create the overlay for
3806 ; the end-tag before the element
3807 ; is closed
3808 (when et
3809 (setq gi (sgml-eltype-name et))
3810 (setq found ; check if there is an open element
3811 ; with the right eltype
3812 (loop for u = sgml-current-tree then (sgml-tree-parent u)
3813 while u
3814 thereis (eq et (sgml-tree-eltype u))))
3815 (unless found
3816 (sgml-log-warning
3817 "End-tag %s does not end any open element; ignored"
3818 gi)))
3819 (when found
3820 (setq found nil)
3821 (while (not found) ; Loop until correct element to
3822 ; end is found
3823 (unless (sgml-final-p sgml-current-state)
3824 (sgml-log-warning
3825 "%s element can't end here, need one of %s; %s end-tag out of context"
3826 (sgml-element-gi sgml-current-tree)
3827 (sgml-required-tokens sgml-current-state)
3828 gi))
3829 (when (eq sgml-current-tree sgml-top-tree)
3830 (sgml-error "%s end-tag ended document and parse" gi))
3831 (setq found
3832 (or (eq et (sgml-tree-eltype sgml-current-tree))
3833 (and (null et) ; Null end-tag
3834 (eq t (sgml-tree-net-enabled sgml-current-tree)))))
3835 (unless found
3836 (sgml-implied-end-tag (format "%s end-tag" gi)
3837 sgml-markup-start sgml-markup-start)))
3838 (sgml-close-element sgml-markup-start (point)))))
3839
3840 (defun sgml-is-goal-after-start (goal tree)
3841 (and tree
3842 ;;(not (zerop (sgml-tree-stag-len tree)))
3843 (> goal (sgml-element-start tree))))
3844
3845 (defun sgml-find-start-point (goal)
3846 (let ((u sgml-top-tree))
3847 (while
3848 (cond
3849 ((sgml-is-goal-after-start goal (sgml-tree-next u))
3850 (setq u (sgml-tree-next u)))
3851 ((and (sgml-tree-etag-epos u)
3852 (if (> (sgml-tree-etag-len u) 0) ; if threre is an end-tag
3853 (>= goal (sgml-tree-end u)) ; precisely after is after
3854 (> goal (sgml-tree-end u)))) ; else it could possibly
3855 ; become part of the element
3856 (sgml-set-parse-state u 'after)
3857 nil)
3858 ((sgml-is-goal-after-start goal (sgml-tree-content u))
3859 (setq u (sgml-tree-content u)))
3860 (t
3861 (sgml-set-parse-state u 'start)
3862 nil)))
3863 )
3864 )
3865
3866
3867 (defun sgml-check-tag-close ()
3868 (or
3869 (sgml-parse-delim "TAGC")
3870 (if (or (sgml-is-delim "STAGO" gi)
3871 (sgml-is-delim "ETAGO" gi))
3872 (or sgml-current-shorttag
3873 (sgml-log-warning
3874 "Unclosed tag is not allowed with SHORTTAG NO")
3875 t))
3876 (sgml-error "Invalid character in markup %c"
3877 (following-char))))
3878
3879 (defun sgml-implied-end-tag (type start end)
3880 (cond ((eq sgml-current-tree sgml-top-tree)
3881 (unless (= start (point-max))
3882 (sgml-error
3883 "document ended by %s" type)))
3884 ((not
3885 (and sgml-current-omittag
3886 (sgml-element-etag-optional sgml-current-tree)))
3887 (sgml-log-warning
3888 "%s end-tag implied by %s; not minimizable"
3889 (sgml-element-gi sgml-current-tree)
3890 type)))
3891 (sgml-close-element start end))
3892
3893
3894 ;;;; Parsing tasks and extending the element view of the parse tree
3895
3896 (defun sgml-find-context-of (pos)
3897 "Find the parser context for POS, returns the parse tree.
3898 Also sets sgml-current-tree and sgml-current-state. If POS is in
3899 markup, sgml-markup-type will be a symbol identifying the markup
3900 type. It will be nil otherwise."
3901 (save-excursion
3902 (sgml-parse-to pos)
3903 (cond ((and (> (point) pos)
3904 sgml-markup-type)
3905 ;;(setq sgml-current-state sgml-markup-type)
3906 (cond ((memq sgml-markup-type '(start-tag end-tag))
3907 (setq sgml-current-tree sgml-markup-tree))))
3908 (t
3909 (setq sgml-markup-type nil)))
3910 sgml-current-tree))
3911
3912 (defun sgml-parse-to-here ()
3913 "Find context of point.
3914 See documentation of sgml-find-context-of."
3915 (sgml-find-context-of (point)))
3916
3917 (defun sgml-find-element-of (pos)
3918 "Find the element containing character at POS."
3919 (when (eq pos (point-max))
3920 (error "End of buffer"))
3921 (save-excursion
3922 (sgml-parse-to (1+ pos)) ; Ensures that the element is
3923 ; in the tree.
3924 ;; Find p in u:
3925 ;; assert p >= start(u)
3926 ;; if next(u) and p >= start(next(u)): find p in next(u)
3927 ;; else if end(u) and p >= end(u): in parent(u) unless u is top
3928 ;; else if content:
3929 ;; if p < start(content(u)): in u
3930 ;; else find p in content(u)
3931 ;; else: in u
3932 (let ((u sgml-top-tree))
3933 (while ; pos >= start(u)
3934 (cond ((and (sgml-tree-next u)
3935 (>= pos (sgml-element-start (sgml-tree-next u))))
3936 (setq u (sgml-tree-next u))) ; continue searching next node
3937 ((and (sgml-tree-etag-epos u)
3938 (>= pos (sgml-tree-end u)))
3939 (setq u (sgml-tree-parent u)) ; must be parent node
3940 nil)
3941 ((and (sgml-tree-content u)
3942 (>= pos (sgml-element-start (sgml-tree-content u))))
3943 (setq u (sgml-tree-content u))))) ; search content
3944 u)))
3945
3946 (defun sgml-find-previous-element (pos &optional in-element)
3947 "Find the element before POS and return it, error if non found.
3948 If in IN-ELEMENT is given look for previous element in IN-ELEMENT else
3949 look in current element. If this element has no content elements but
3950 end at POS, it will be returned as previous element."
3951 (save-excursion
3952 ;; Parse to point; now the previous element is in the parse tree
3953 (sgml-parse-to pos)
3954 ;; containing element may be given or obtained from parser
3955 (or in-element (setq in-element sgml-current-tree))
3956 ;; in-element is the containing element
3957 (let* ((c ; this is the content of the
3958 ; containing element
3959 (sgml-tree-content in-element)))
3960 (while
3961 (cond
3962 ((null c) ; If c = Nil: no previous element.
3963 ;; But maybe the containing element ends at pos too.
3964 (cond ((= pos (sgml-element-end in-element))
3965 (setq c in-element))) ; Previous is parent!
3966 nil)
3967 ((<= pos (sgml-element-start c)) ; Pos before first content el
3968 (setq c nil)) ; No, previous element.
3969 ((null (sgml-tree-next c)) nil) ; No next, c must be the prev el
3970 ((>= (sgml-element-start (sgml-tree-next c)) pos)
3971 nil)
3972 (t
3973 (setq c (sgml-tree-next c)))))
3974 (or c
3975 (error "No previous element in %s element"
3976 (sgml-element-gi in-element))))))
3977
3978 (defun sgml-find-element-after (pos &optional in-element)
3979 "Find the first element starting after POS.
3980 Returns parse tree; error if no element after POS."
3981 (setq in-element (or in-element
3982 (save-excursion (sgml-find-context-of pos))))
3983 (or
3984 ;; First try to find element after POS in IN-ELEMENT/current element
3985 (let ((c ; content of in-element
3986 (sgml-element-content in-element)))
3987 (while (and c
3988 (> pos (sgml-element-start c)))
3989 (setq c (sgml-element-next c)))
3990 c)
3991 ;; If there is no more elements IN-ELEMENT/current element try
3992 ;; to identify the element containing the character after POS.
3993 ;; If this element starts at POS, use it for element after POS.
3994 (let ((el (sgml-find-element-of pos)))
3995 (if (and el (= pos (sgml-element-start el)))
3996 el))
3997 (progn
3998 (sgml-message "") ; force display of log buffer
3999 (error "No more elements in %s element"
4000 (sgml-element-gi in-element)))))
4001
4002 (defun sgml-element-content (element)
4003 "First element in content of ELEMENT, or nil."
4004 (when (null (or (sgml-tree-content element)
4005 (sgml-tree-etag-epos element)))
4006 (save-excursion (sgml-parse-until-end-of t)))
4007 (sgml-tree-content element))
4008
4009 (defun sgml-element-next (element)
4010 "Next sibling of ELEMENT."
4011 (unless (sgml-tree-etag-epos element)
4012 (save-excursion (sgml-parse-until-end-of element)))
4013 (unless (or (sgml-tree-next element)
4014 (sgml-tree-etag-epos (sgml-tree-parent element)))
4015 (save-excursion (sgml-parse-until-end-of t)))
4016 (sgml-tree-next element))
4017
4018 (defun sgml-element-etag-start (element)
4019 "Last position in content of ELEMENT and start of end-tag, if any."
4020 (unless (sgml-tree-etag-epos element)
4021 (save-excursion
4022 (sgml-parse-until-end-of element)))
4023 (assert (sgml-tree-etag-epos element))
4024 (sgml-epos-promote (sgml-tree-etag-epos element)))
4025
4026 (defun sgml-element-end (element)
4027 "First position after ELEMENT."
4028 (sgml-element-etag-start element) ; make end be defined
4029 (sgml-tree-end element))
4030
4031 (defun sgml-read-element-name (prompt)
4032 (sgml-parse-to-here)
4033 (cond (sgml-markup-type
4034 (error "No elements allowed in markup"))
4035 ((and ;;sgml-buffer-eltype-map
4036 (not (eq sgml-current-state sgml-any)))
4037 (let ((tab
4038 (mapcar (function (lambda (x) (cons (symbol-name x) nil)))
4039 (sgml-current-list-of-valid-eltypes))))
4040 (cond ((null tab)
4041 (error "No element valid at this point"))
4042 (t
4043 (completing-read prompt tab nil t
4044 (and (null (cdr tab)) (caar tab)))))))
4045 (t
4046 (read-from-minibuffer prompt))))
4047
4048 (defun sgml-element-attribute-specification-list (element)
4049 "Return the attribute specification list for ELEMENT.
4050 This is a list of (attname value) lists."
4051 ;;; (if (> (sgml-element-stag-len element) 2)
4052 ;;; (save-excursion
4053 ;;; (sgml-with-parser-syntax
4054 ;;; (sgml-goto-epos (sgml-element-stag-epos element))
4055 ;;; (sgml-check-delim "STAGO")
4056 ;;; (sgml-check-name)
4057 ;;; (prog1 (sgml-parse-attribute-specification-list
4058 ;;; (sgml-element-eltype element))
4059 ;;; (sgml-pop-all-entities)))))
4060 (sgml-tree-asl element))
4061
4062 (defun sgml-find-attribute-element ()
4063 "Return the element to which an attribute editing command should be applied."
4064 (let ((el (sgml-find-element-of (point))))
4065 (save-excursion
4066 (sgml-parse-to (point))
4067 ;; If after a start-tag of an empty element return that element
4068 ;; instead of current element
4069 (if (eq sgml-markup-type 'start-tag)
4070 sgml-markup-tree ; the element of the start-tag
4071 el))))
4072
4073
4074 (defun sgml-element-attval (element attribute)
4075 "Return the value of the ATTRIBUTE in ELEMENT, string or nil."
4076 (let ((asl (sgml-element-attribute-specification-list element))
4077 (def (sgml-attdecl-default-value
4078 (sgml-lookup-attdecl attribute (sgml-element-attlist element)))))
4079 (or (sgml-attspec-attval (sgml-lookup-attspec attribute asl))
4080 (sgml-default-value-attval def))))
4081
4082
4083 (defun sgml-cohere-name (x)
4084 "Convert X into a string where X can be a string, a symbol or an element."
4085 (cond ((stringp x) x)
4086 ((symbolp x) (symbol-name x))
4087 (t (sgml-element-gi x))))
4088
4089 (defun sgml-start-tag-of (element)
4090 "Return the start-tag for ELEMENT."
4091 (format "<%s>" (sgml-cohere-name element)))
4092
4093 (defun sgml-end-tag-of (element)
4094 "Return the end-tag for ELEMENT (token or element)."
4095 (format "</%s>" (sgml-cohere-name element)))
4096
4097 (defun sgml-top-element ()
4098 "Return the document element."
4099 (sgml-element-content (sgml-find-context-of (point-min))))
4100
4101 (defun sgml-off-top-p (element)
4102 "True if ELEMENT is the pseudo element above the document element."
4103 (null (sgml-tree-parent element)))
4104
4105 (defun sgml-safe-context-of (pos)
4106 (let ((sgml-throw-on-error 'parse-error))
4107 (catch sgml-throw-on-error
4108 (sgml-find-context-of pos))))
4109
4110 (defun sgml-safe-element-at (pos)
4111 (let ((sgml-throw-on-error 'parse-error))
4112 (catch sgml-throw-on-error
4113 (if (= pos (point-max))
4114 (sgml-find-context-of pos)
4115 (sgml-find-element-of pos)))))
4116
4117 (defun sgml-in-prolog-p ()
4118 (let ((el (sgml-safe-context-of (point))))
4119 (or (null el)
4120 (sgml-off-top-p el))))
4121
4122
4123 ;;;; Provide
4124
4125 (provide 'psgml-parse)
4126
4127 ;;; psgml-parse.el ends here