0
|
1 /* Prepare TeX index dribble output into an actual index.
|
|
2
|
|
3 Version 1.45
|
|
4
|
|
5 Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc.
|
|
6
|
|
7 This program is free software; you can redistribute it and/or modify
|
|
8 it under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 2, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 This program is distributed in the hope that it will be useful,
|
|
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 GNU General Public License for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with this program; if not, write to the Free Software
|
|
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
|
|
20
|
|
21
|
|
22 #if defined (HAVE_CONFIG_H)
|
|
23 #include "config.h"
|
|
24 #endif
|
|
25
|
|
26 #include <stdio.h>
|
|
27 #include <ctype.h>
|
|
28 #include <errno.h>
|
|
29 #include "getopt.h"
|
|
30
|
|
31 #define TEXINDEX_VERSION_STRING "GNU Texindex 2.0 for Texinfo release 3.4"
|
|
32
|
|
33 #if defined (emacs)
|
|
34 # include "../src/config.h"
|
|
35 /* Some s/os.h files redefine these. */
|
|
36 # undef read
|
|
37 # undef close
|
|
38 # undef write
|
|
39 # undef open
|
|
40 #endif
|
|
41
|
|
42 #if defined (STDC_HEADERS)
|
|
43 # include <stdlib.h>
|
|
44 # undef HAVE_MEMSET
|
|
45 # define HAVE_MEMSET
|
|
46 # undef HAVE_STRCHR
|
|
47 # define HAVE_STRCHR
|
|
48 #else /* !STDC_HEADERS */
|
|
49 char *getenv (), *malloc (), *realloc ();
|
|
50 #endif /* !STDC_HEADERS */
|
|
51
|
|
52 #if defined (HAVE_STRING_H)
|
|
53 # include <string.h>
|
|
54 #endif /* HAVE_STRING_H */
|
|
55
|
|
56 #if !defined (HAVE_STRCHR)
|
|
57 char *strrchr ();
|
|
58 #endif /* !HAVE_STRCHR */
|
|
59
|
|
60 #if defined (HAVE_UNISTD_H)
|
|
61 # include <unistd.h>
|
|
62 #else /* !HAVE_UNISTD_H */
|
|
63 off_t lseek ();
|
|
64 #endif /* !HAVE_UNISTD_H */
|
|
65
|
|
66 #if !defined (HAVE_MEMSET)
|
|
67 #undef memset
|
|
68 #define memset(ptr, ignore, count) bzero (ptr, count)
|
|
69 #endif
|
|
70
|
|
71
|
|
72 char *mktemp ();
|
|
73
|
|
74 #if defined (VMS)
|
|
75 # include <file.h>
|
|
76 # define TI_NO_ERROR ((1 << 28) | 1)
|
|
77 # define TI_FATAL_ERROR ((1 << 28) | 4)
|
|
78 # define unlink delete
|
|
79 #else /* !VMS */
|
|
80
|
|
81 # if defined (HAVE_SYS_FCNTL_H)
|
|
82 # include <sys/types.h>
|
|
83 # include <sys/fcntl.h>
|
|
84 # endif /* HAVE_SYS_FCNTL_H */
|
|
85
|
|
86 # if defined (_AIX) || !defined (_POSIX_VERSION)
|
|
87 # include <sys/file.h>
|
|
88 # else /* !AIX && _POSIX_VERSION */
|
|
89 # if !defined (HAVE_SYS_FCNTL_H)
|
|
90 # include <fcntl.h>
|
|
91 # endif /* !HAVE_FCNTL_H */
|
|
92 # endif /* !_AIX && _POSIX_VERSION */
|
|
93 # define TI_NO_ERROR 0
|
|
94 # define TI_FATAL_ERROR 1
|
|
95 #endif /* !VMS */
|
|
96
|
|
97 #if !defined (SEEK_SET)
|
|
98 # define SEEK_SET 0
|
|
99 # define SEEK_CUR 1
|
|
100 # define SEEK_END 2
|
|
101 #endif /* !SEEK_SET */
|
|
102
|
|
103 #if !defined (errno)
|
|
104 extern int errno;
|
|
105 #endif
|
|
106 char *strerror ();
|
|
107
|
|
108 /* When sorting in core, this structure describes one line
|
|
109 and the position and length of its first keyfield. */
|
|
110 struct lineinfo
|
|
111 {
|
|
112 char *text; /* The actual text of the line. */
|
|
113 union {
|
|
114 char *text; /* The start of the key (for textual comparison). */
|
|
115 long number; /* The numeric value (for numeric comparison). */
|
|
116 } key;
|
|
117 long keylen; /* Length of KEY field. */
|
|
118 };
|
|
119
|
|
120 /* This structure describes a field to use as a sort key. */
|
|
121 struct keyfield
|
|
122 {
|
|
123 int startwords; /* Number of words to skip. */
|
|
124 int startchars; /* Number of additional chars to skip. */
|
|
125 int endwords; /* Number of words to ignore at end. */
|
|
126 int endchars; /* Ditto for characters of last word. */
|
|
127 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
|
|
128 char fold_case; /* Non-zero means case doesn't matter. */
|
|
129 char reverse; /* Non-zero means compare in reverse order. */
|
|
130 char numeric; /* Non-zeros means field is ASCII numeric. */
|
|
131 char positional; /* Sort according to file position. */
|
|
132 char braced; /* Count balanced-braced groupings as fields. */
|
|
133 };
|
|
134
|
|
135 /* Vector of keyfields to use. */
|
|
136 struct keyfield keyfields[3];
|
|
137
|
|
138 /* Number of keyfields stored in that vector. */
|
|
139 int num_keyfields = 3;
|
|
140
|
|
141 /* Vector of input file names, terminated with a null pointer. */
|
|
142 char **infiles;
|
|
143
|
|
144 /* Vector of corresponding output file names, or NULL, meaning default it
|
|
145 (add an `s' to the end). */
|
|
146 char **outfiles;
|
|
147
|
|
148 /* Length of `infiles'. */
|
|
149 int num_infiles;
|
|
150
|
|
151 /* Pointer to the array of pointers to lines being sorted. */
|
|
152 char **linearray;
|
|
153
|
|
154 /* The allocated length of `linearray'. */
|
|
155 long nlines;
|
|
156
|
|
157 /* Directory to use for temporary files. On Unix, it ends with a slash. */
|
|
158 char *tempdir;
|
|
159
|
|
160 /* Start of filename to use for temporary files. */
|
|
161 char *tempbase;
|
|
162
|
|
163 /* Number of last temporary file. */
|
|
164 int tempcount;
|
|
165
|
|
166 /* Number of last temporary file already deleted.
|
|
167 Temporary files are deleted by `flush_tempfiles' in order of creation. */
|
|
168 int last_deleted_tempcount;
|
|
169
|
|
170 /* During in-core sort, this points to the base of the data block
|
|
171 which contains all the lines of data. */
|
|
172 char *text_base;
|
|
173
|
|
174 /* Additional command switches .*/
|
|
175
|
|
176 /* Nonzero means do not delete tempfiles -- for debugging. */
|
|
177 int keep_tempfiles;
|
|
178
|
|
179 /* The name this program was run with. */
|
|
180 char *program_name;
|
|
181
|
|
182 /* Forward declarations of functions in this file. */
|
|
183
|
|
184 struct linebuffer;
|
|
185 void decode_command (int, char **);
|
|
186 void sort_in_core (char *, long, char *);
|
|
187 void sort_offline (char *, int, long, char *);
|
|
188 char **parsefile (char *, char **, char *, long);
|
|
189 char *find_field (struct keyfield *, char *, long *);
|
|
190 char *find_pos (char *, int, int, int);
|
|
191 long find_value (char *, long);
|
|
192 char *find_braced_pos (char *, int, int, int);
|
|
193 char *find_braced_end (char *);
|
|
194 void writelines (char **, int, FILE *);
|
|
195 int compare_field (struct keyfield *, char *, long, long, char *, long, long);
|
|
196 int compare_full (const void *, const void *);
|
|
197 long readline (struct linebuffer *, FILE *);
|
|
198 int merge_files (char **, int, char *);
|
|
199 int merge_direct (char **, int, char *);
|
|
200 void pfatal_with_name (char *);
|
|
201 void fatal (char *, char *);
|
|
202 void error (char *, char *);
|
|
203 void *xmalloc (int), *xrealloc (void *, int);
|
|
204 char *concat (char *, char *, char *);
|
|
205 char *maketempname (int);
|
|
206 void flush_tempfiles (int);
|
|
207 char *tempcopy (int);
|
|
208 static void memory_error (char *callers_name, int bytes_wanted);
|
|
209
|
|
210 #define MAX_IN_CORE_SORT 500000
|
|
211
|
|
212 void
|
|
213 main (argc, argv)
|
|
214 int argc;
|
|
215 char **argv;
|
|
216 {
|
|
217 int i;
|
|
218
|
|
219 tempcount = 0;
|
|
220 last_deleted_tempcount = 0;
|
|
221
|
|
222 program_name = strrchr (argv[0], '/');
|
|
223 if (program_name != (char *)NULL)
|
|
224 program_name++;
|
|
225 else
|
|
226 program_name = argv[0];
|
|
227
|
|
228 /* Describe the kind of sorting to do. */
|
|
229 /* The first keyfield uses the first braced field and folds case. */
|
|
230 keyfields[0].braced = 1;
|
|
231 keyfields[0].fold_case = 1;
|
|
232 keyfields[0].endwords = -1;
|
|
233 keyfields[0].endchars = -1;
|
|
234
|
|
235 /* The second keyfield uses the second braced field, numerically. */
|
|
236 keyfields[1].braced = 1;
|
|
237 keyfields[1].numeric = 1;
|
|
238 keyfields[1].startwords = 1;
|
|
239 keyfields[1].endwords = -1;
|
|
240 keyfields[1].endchars = -1;
|
|
241
|
|
242 /* The third keyfield (which is ignored while discarding duplicates)
|
|
243 compares the whole line. */
|
|
244 keyfields[2].endwords = -1;
|
|
245 keyfields[2].endchars = -1;
|
|
246
|
|
247 decode_command (argc, argv);
|
|
248
|
|
249 tempbase = mktemp (concat ("txiXXXXXX", "", ""));
|
|
250
|
|
251 /* Process input files completely, one by one. */
|
|
252
|
|
253 for (i = 0; i < num_infiles; i++)
|
|
254 {
|
|
255 int desc;
|
|
256 long ptr;
|
|
257 char *outfile;
|
|
258
|
|
259 desc = open (infiles[i], O_RDONLY, 0);
|
|
260 if (desc < 0)
|
|
261 pfatal_with_name (infiles[i]);
|
|
262 lseek (desc, (off_t) 0, SEEK_END);
|
|
263 ptr = (long) lseek (desc, (off_t) 0, SEEK_CUR);
|
|
264
|
|
265 close (desc);
|
|
266
|
|
267 outfile = outfiles[i];
|
|
268 if (!outfile)
|
|
269 {
|
|
270 outfile = concat (infiles[i], "s", "");
|
|
271 }
|
|
272
|
|
273 if (ptr < MAX_IN_CORE_SORT)
|
|
274 /* Sort a small amount of data. */
|
|
275 sort_in_core (infiles[i], ptr, outfile);
|
|
276 else
|
|
277 sort_offline (infiles[i], 1, ptr, outfile);
|
|
278 }
|
|
279
|
|
280 flush_tempfiles (tempcount);
|
|
281 exit (TI_NO_ERROR);
|
|
282 }
|
|
283
|
|
284 typedef struct
|
|
285 {
|
|
286 char *long_name;
|
|
287 char *short_name;
|
|
288 int *variable_ref;
|
|
289 int variable_value;
|
|
290 char *arg_name;
|
|
291 char *doc_string;
|
|
292 } TEXINDEX_OPTION;
|
|
293
|
|
294 TEXINDEX_OPTION texindex_options[] = {
|
|
295 { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
|
|
296 "Keep temporary files around after processing" },
|
|
297 { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
|
|
298 "Do not keep temporary files around after processing (default)" },
|
|
299 { "--output", "-o", (int *)NULL, 0, "FILE",
|
|
300 "Send output to FILE" },
|
|
301 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
|
|
302 "Show version information" },
|
|
303 { "--help", "-h", (int *)NULL, 0, (char *)NULL, "Produce this listing" },
|
|
304 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
|
|
305 };
|
|
306
|
|
307 static void
|
|
308 usage (int result_value)
|
|
309 {
|
|
310 register int i;
|
|
311
|
|
312 fprintf (stderr, "Usage: %s [OPTIONS] FILE...\n", program_name);
|
|
313 fprintf (stderr, " Generate a permuted index for the TeX files given.\n");
|
|
314 fprintf (stderr, " Usually FILE... is `foo.\?\?' for the source file `foo.tex'.\n");
|
|
315 fprintf (stderr, " The OPTIONS are:\n");
|
|
316
|
|
317 for (i = 0; texindex_options[i].long_name; i++)
|
|
318 {
|
|
319 fprintf (stderr, " %s %s",
|
|
320 texindex_options[i].long_name,
|
|
321 texindex_options[i].arg_name ?
|
|
322 texindex_options[i].arg_name : "");
|
|
323
|
|
324 if (texindex_options[i].short_name)
|
|
325 fprintf (stderr, " \n or %s %s",
|
|
326 texindex_options[i].short_name,
|
|
327 texindex_options[i].arg_name ?
|
|
328 texindex_options[i].arg_name : "");
|
|
329 fprintf (stderr, "\t%s\n", texindex_options[i].doc_string);
|
|
330 }
|
|
331 exit (result_value);
|
|
332 }
|
|
333
|
|
334 /* Decode the command line arguments to set the parameter variables
|
|
335 and set up the vector of keyfields and the vector of input files. */
|
|
336
|
|
337 void
|
|
338 decode_command (argc, argv)
|
|
339 int argc;
|
|
340 char **argv;
|
|
341 {
|
|
342 int arg_index = 1;
|
|
343 char **ip;
|
|
344 char **op;
|
|
345
|
|
346 /* Store default values into parameter variables. */
|
|
347
|
|
348 tempdir = getenv ("TMPDIR");
|
|
349 #ifdef VMS
|
|
350 if (tempdir == NULL)
|
|
351 tempdir = "sys$scratch:";
|
|
352 #else
|
|
353 if (tempdir == NULL)
|
|
354 tempdir = "/tmp/";
|
|
355 else
|
|
356 tempdir = concat (tempdir, "/", "");
|
|
357 #endif
|
|
358
|
|
359 keep_tempfiles = 0;
|
|
360
|
|
361 /* Allocate ARGC input files, which must be enough. */
|
|
362
|
|
363 infiles = (char **) xmalloc (argc * sizeof (char *));
|
|
364 outfiles = (char **) xmalloc (argc * sizeof (char *));
|
|
365 ip = infiles;
|
|
366 op = outfiles;
|
|
367
|
|
368 while (arg_index < argc)
|
|
369 {
|
|
370 char *arg = argv[arg_index++];
|
|
371
|
|
372 if (*arg == '-')
|
|
373 {
|
|
374 if (strcmp (arg, "--version") == 0)
|
|
375 {
|
|
376 fprintf (stderr, "%s\n", TEXINDEX_VERSION_STRING);
|
|
377 exit (0);
|
|
378 }
|
|
379 else if ((strcmp (arg, "--keep") == 0) ||
|
|
380 (strcmp (arg, "-k") == 0))
|
|
381 {
|
|
382 keep_tempfiles = 1;
|
|
383 }
|
|
384 else if ((strcmp (arg, "--help") == 0) ||
|
|
385 (strcmp (arg, "-h") == 0))
|
|
386 {
|
|
387 usage (0);
|
|
388 }
|
|
389 else if ((strcmp (arg, "--output") == 0) ||
|
|
390 (strcmp (arg, "-o") == 0))
|
|
391 {
|
|
392 if (argv[arg_index] != (char *)NULL)
|
|
393 {
|
|
394 arg_index++;
|
|
395 if (op > outfiles)
|
|
396 *(op - 1) = argv[arg_index];
|
|
397 }
|
|
398 else
|
|
399 usage (1);
|
|
400 }
|
|
401 else
|
|
402 usage (1);
|
|
403 }
|
|
404 else
|
|
405 {
|
|
406 *ip++ = arg;
|
|
407 *op++ = (char *)NULL;
|
|
408 }
|
|
409 }
|
|
410 for ( ; optind < argc; optind++)
|
|
411 {
|
|
412 *ip++ = argv[optind];
|
|
413 *op++ = NULL;
|
|
414 }
|
|
415
|
|
416 /* Record number of keyfields and terminate list of filenames. */
|
|
417 num_infiles = ip - infiles;
|
|
418 *ip = (char *)NULL;
|
|
419 if (num_infiles == 0)
|
|
420 usage (1);
|
|
421 }
|
|
422
|
|
423 /* Return a name for a temporary file. */
|
|
424
|
|
425 char *
|
|
426 maketempname (count)
|
|
427 int count;
|
|
428 {
|
|
429 char tempsuffix[10];
|
|
430 sprintf (tempsuffix, "%d", count);
|
|
431 return concat (tempdir, tempbase, tempsuffix);
|
|
432 }
|
|
433
|
|
434 /* Delete all temporary files up to TO_COUNT. */
|
|
435
|
|
436 void
|
|
437 flush_tempfiles (to_count)
|
|
438 int to_count;
|
|
439 {
|
|
440 if (keep_tempfiles)
|
|
441 return;
|
|
442 while (last_deleted_tempcount < to_count)
|
|
443 unlink (maketempname (++last_deleted_tempcount));
|
|
444 }
|
|
445
|
|
446 /* Copy the input file open on IDESC into a temporary file
|
|
447 and return the temporary file name. */
|
|
448
|
|
449 #define BUFSIZE 1024
|
|
450
|
|
451 char *
|
|
452 tempcopy (idesc)
|
|
453 int idesc;
|
|
454 {
|
|
455 char *outfile = maketempname (++tempcount);
|
|
456 int odesc;
|
|
457 char buffer[BUFSIZE];
|
|
458
|
|
459 odesc = open (outfile, O_WRONLY | O_CREAT, 0666);
|
|
460
|
|
461 if (odesc < 0)
|
|
462 pfatal_with_name (outfile);
|
|
463
|
|
464 while (1)
|
|
465 {
|
|
466 int nread = read (idesc, buffer, BUFSIZE);
|
|
467 write (odesc, buffer, nread);
|
|
468 if (!nread)
|
|
469 break;
|
|
470 }
|
|
471
|
|
472 close (odesc);
|
|
473
|
|
474 return outfile;
|
|
475 }
|
|
476
|
|
477 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
|
|
478
|
|
479 int
|
|
480 compare_full (const void *arg1, const void *arg2)
|
|
481 {
|
|
482 char **line1, **line2;
|
|
483 int i;
|
|
484
|
|
485 line1 = (char **) arg1;
|
|
486 line2 = (char **) arg2;
|
|
487
|
|
488 /* Compare using the first keyfield;
|
|
489 if that does not distinguish the lines, try the second keyfield;
|
|
490 and so on. */
|
|
491
|
|
492 for (i = 0; i < num_keyfields; i++)
|
|
493 {
|
|
494 long length1, length2;
|
|
495 char *start1 = find_field (&keyfields[i], *line1, &length1);
|
|
496 char *start2 = find_field (&keyfields[i], *line2, &length2);
|
|
497 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base,
|
|
498 start2, length2, *line2 - text_base);
|
|
499 if (tem)
|
|
500 {
|
|
501 if (keyfields[i].reverse)
|
|
502 return -tem;
|
|
503 return tem;
|
|
504 }
|
|
505 }
|
|
506
|
|
507 return 0; /* Lines match exactly. */
|
|
508 }
|
|
509
|
|
510 /* Compare LINE1 and LINE2, described by structures
|
|
511 in which the first keyfield is identified in advance.
|
|
512 For positional sorting, assumes that the order of the lines in core
|
|
513 reflects their nominal order. */
|
|
514
|
|
515 static int
|
|
516 compare_prepared (const void *arg1, const void *arg2)
|
|
517 {
|
|
518 struct lineinfo *line1, *line2;
|
|
519 int i;
|
|
520 int tem;
|
|
521 char *text1, *text2;
|
|
522
|
|
523 line1 = (struct lineinfo *) arg1;
|
|
524 line2 = (struct lineinfo *) arg2;
|
|
525
|
|
526 /* Compare using the first keyfield, which has been found for us already. */
|
|
527 if (keyfields->positional)
|
|
528 {
|
|
529 if (line1->text - text_base > line2->text - text_base)
|
|
530 tem = 1;
|
|
531 else
|
|
532 tem = -1;
|
|
533 }
|
|
534 else if (keyfields->numeric)
|
|
535 tem = line1->key.number - line2->key.number;
|
|
536 else
|
|
537 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
|
|
538 line2->key.text, line2->keylen, 0);
|
|
539 if (tem)
|
|
540 {
|
|
541 if (keyfields->reverse)
|
|
542 return -tem;
|
|
543 return tem;
|
|
544 }
|
|
545
|
|
546 text1 = line1->text;
|
|
547 text2 = line2->text;
|
|
548
|
|
549 /* Compare using the second keyfield;
|
|
550 if that does not distinguish the lines, try the third keyfield;
|
|
551 and so on. */
|
|
552
|
|
553 for (i = 1; i < num_keyfields; i++)
|
|
554 {
|
|
555 long length1, length2;
|
|
556 char *start1 = find_field (&keyfields[i], text1, &length1);
|
|
557 char *start2 = find_field (&keyfields[i], text2, &length2);
|
|
558 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base,
|
|
559 start2, length2, text2 - text_base);
|
|
560 if (tem)
|
|
561 {
|
|
562 if (keyfields[i].reverse)
|
|
563 return -tem;
|
|
564 return tem;
|
|
565 }
|
|
566 }
|
|
567
|
|
568 return 0; /* Lines match exactly. */
|
|
569 }
|
|
570
|
|
571 /* Like compare_full but more general.
|
|
572 You can pass any strings, and you can say how many keyfields to use.
|
|
573 POS1 and POS2 should indicate the nominal positional ordering of
|
|
574 the two lines in the input. */
|
|
575
|
|
576 static int
|
|
577 compare_general (char *str1, char *str2, long pos1, long pos2,
|
|
578 int use_keyfields)
|
|
579 {
|
|
580 int i;
|
|
581
|
|
582 /* Compare using the first keyfield;
|
|
583 if that does not distinguish the lines, try the second keyfield;
|
|
584 and so on. */
|
|
585
|
|
586 for (i = 0; i < use_keyfields; i++)
|
|
587 {
|
|
588 long length1, length2;
|
|
589 char *start1 = find_field (&keyfields[i], str1, &length1);
|
|
590 char *start2 = find_field (&keyfields[i], str2, &length2);
|
|
591 int tem = compare_field (&keyfields[i], start1, length1, pos1,
|
|
592 start2, length2, pos2);
|
|
593 if (tem)
|
|
594 {
|
|
595 if (keyfields[i].reverse)
|
|
596 return -tem;
|
|
597 return tem;
|
|
598 }
|
|
599 }
|
|
600
|
|
601 return 0; /* Lines match exactly. */
|
|
602 }
|
|
603
|
|
604 /* Find the start and length of a field in STR according to KEYFIELD.
|
|
605 A pointer to the starting character is returned, and the length
|
|
606 is stored into the int that LENGTHPTR points to. */
|
|
607
|
|
608 char *
|
|
609 find_field (keyfield, str, lengthptr)
|
|
610 struct keyfield *keyfield;
|
|
611 char *str;
|
|
612 long *lengthptr;
|
|
613 {
|
|
614 char *start;
|
|
615 char *end;
|
|
616 char *(*fun) ();
|
|
617
|
|
618 if (keyfield->braced)
|
|
619 fun = find_braced_pos;
|
|
620 else
|
|
621 fun = find_pos;
|
|
622
|
|
623 start = (*fun) (str, keyfield->startwords, keyfield->startchars,
|
|
624 keyfield->ignore_blanks);
|
|
625 if (keyfield->endwords < 0)
|
|
626 {
|
|
627 if (keyfield->braced)
|
|
628 end = find_braced_end (start);
|
|
629 else
|
|
630 {
|
|
631 end = start;
|
|
632 while (*end && *end != '\n')
|
|
633 end++;
|
|
634 }
|
|
635 }
|
|
636 else
|
|
637 {
|
|
638 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
|
|
639 if (end - str < start - str)
|
|
640 end = start;
|
|
641 }
|
|
642 *lengthptr = end - start;
|
|
643 return start;
|
|
644 }
|
|
645
|
|
646 /* Return a pointer to a specified place within STR,
|
|
647 skipping (from the beginning) WORDS words and then CHARS chars.
|
|
648 If IGNORE_BLANKS is nonzero, we skip all blanks
|
|
649 after finding the specified word. */
|
|
650
|
|
651 char *
|
|
652 find_pos (str, words, chars, ignore_blanks)
|
|
653 char *str;
|
|
654 int words, chars;
|
|
655 int ignore_blanks;
|
|
656 {
|
|
657 int i;
|
|
658 char *p = str;
|
|
659
|
|
660 for (i = 0; i < words; i++)
|
|
661 {
|
|
662 char c;
|
|
663 /* Find next bunch of nonblanks and skip them. */
|
|
664 while ((c = *p) == ' ' || c == '\t')
|
|
665 p++;
|
|
666 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
|
|
667 p++;
|
|
668 if (!*p || *p == '\n')
|
|
669 return p;
|
|
670 }
|
|
671
|
|
672 while (*p == ' ' || *p == '\t')
|
|
673 p++;
|
|
674
|
|
675 for (i = 0; i < chars; i++)
|
|
676 {
|
|
677 if (!*p || *p == '\n')
|
|
678 break;
|
|
679 p++;
|
|
680 }
|
|
681 return p;
|
|
682 }
|
|
683
|
|
684 /* Like find_pos but assumes that each field is surrounded by braces
|
|
685 and that braces within fields are balanced. */
|
|
686
|
|
687 char *
|
|
688 find_braced_pos (str, words, chars, ignore_blanks)
|
|
689 char *str;
|
|
690 int words, chars;
|
|
691 int ignore_blanks;
|
|
692 {
|
|
693 int i;
|
|
694 int bracelevel;
|
|
695 char *p = str;
|
|
696 char c;
|
|
697
|
|
698 for (i = 0; i < words; i++)
|
|
699 {
|
|
700 bracelevel = 1;
|
|
701 while ((c = *p++) != '{' && c != '\n' && c)
|
|
702 /* Do nothing. */ ;
|
|
703 if (c != '{')
|
|
704 return p - 1;
|
|
705 while (bracelevel)
|
|
706 {
|
|
707 c = *p++;
|
|
708 if (c == '{')
|
|
709 bracelevel++;
|
|
710 if (c == '}')
|
|
711 bracelevel--;
|
|
712 if (c == 0 || c == '\n')
|
|
713 return p - 1;
|
|
714 }
|
|
715 }
|
|
716
|
|
717 while ((c = *p++) != '{' && c != '\n' && c)
|
|
718 /* Do nothing. */ ;
|
|
719
|
|
720 if (c != '{')
|
|
721 return p - 1;
|
|
722
|
|
723 if (ignore_blanks)
|
|
724 while ((c = *p) == ' ' || c == '\t')
|
|
725 p++;
|
|
726
|
|
727 for (i = 0; i < chars; i++)
|
|
728 {
|
|
729 if (!*p || *p == '\n')
|
|
730 break;
|
|
731 p++;
|
|
732 }
|
|
733 return p;
|
|
734 }
|
|
735
|
|
736 /* Find the end of the balanced-brace field which starts at STR.
|
|
737 The position returned is just before the closing brace. */
|
|
738
|
|
739 char *
|
|
740 find_braced_end (str)
|
|
741 char *str;
|
|
742 {
|
|
743 int bracelevel;
|
|
744 char *p = str;
|
|
745 char c;
|
|
746
|
|
747 bracelevel = 1;
|
|
748 while (bracelevel)
|
|
749 {
|
|
750 c = *p++;
|
|
751 if (c == '{')
|
|
752 bracelevel++;
|
|
753 if (c == '}')
|
|
754 bracelevel--;
|
|
755 if (c == 0 || c == '\n')
|
|
756 return p - 1;
|
|
757 }
|
|
758 return p - 1;
|
|
759 }
|
|
760
|
|
761 long
|
|
762 find_value (start, length)
|
|
763 char *start;
|
|
764 long length;
|
|
765 {
|
|
766 while (length != 0L)
|
|
767 {
|
|
768 if (isdigit (*start))
|
|
769 return atol (start);
|
|
770 length--;
|
|
771 start++;
|
|
772 }
|
|
773 return 0l;
|
|
774 }
|
|
775
|
|
776 /* Vector used to translate characters for comparison.
|
|
777 This is how we make all alphanumerics follow all else,
|
|
778 and ignore case in the first sorting. */
|
|
779 int char_order[256];
|
|
780
|
|
781 static void
|
|
782 init_char_order (void)
|
|
783 {
|
|
784 int i;
|
|
785 for (i = 1; i < 256; i++)
|
|
786 char_order[i] = i;
|
|
787
|
|
788 for (i = '0'; i <= '9'; i++)
|
|
789 char_order[i] += 512;
|
|
790
|
|
791 for (i = 'a'; i <= 'z'; i++)
|
|
792 {
|
|
793 char_order[i] = 512 + i;
|
|
794 char_order[i + 'A' - 'a'] = 512 + i;
|
|
795 }
|
|
796 }
|
|
797
|
|
798 /* Compare two fields (each specified as a start pointer and a character count)
|
|
799 according to KEYFIELD.
|
|
800 The sign of the value reports the relation between the fields. */
|
|
801
|
|
802 int
|
|
803 compare_field (keyfield, start1, length1, pos1, start2, length2, pos2)
|
|
804 struct keyfield *keyfield;
|
|
805 char *start1;
|
|
806 long length1;
|
|
807 long pos1;
|
|
808 char *start2;
|
|
809 long length2;
|
|
810 long pos2;
|
|
811 {
|
|
812 if (keyfields->positional)
|
|
813 {
|
|
814 if (pos1 > pos2)
|
|
815 return 1;
|
|
816 else
|
|
817 return -1;
|
|
818 }
|
|
819 if (keyfield->numeric)
|
|
820 {
|
|
821 long value = find_value (start1, length1) - find_value (start2, length2);
|
|
822 if (value > 0)
|
|
823 return 1;
|
|
824 if (value < 0)
|
|
825 return -1;
|
|
826 return 0;
|
|
827 }
|
|
828 else
|
|
829 {
|
|
830 char *p1 = start1;
|
|
831 char *p2 = start2;
|
|
832 char *e1 = start1 + length1;
|
|
833 char *e2 = start2 + length2;
|
|
834
|
|
835 while (1)
|
|
836 {
|
|
837 int c1, c2;
|
|
838
|
|
839 if (p1 == e1)
|
|
840 c1 = 0;
|
|
841 else
|
|
842 c1 = *p1++;
|
|
843 if (p2 == e2)
|
|
844 c2 = 0;
|
|
845 else
|
|
846 c2 = *p2++;
|
|
847
|
|
848 if (char_order[c1] != char_order[c2])
|
|
849 return char_order[c1] - char_order[c2];
|
|
850 if (!c1)
|
|
851 break;
|
|
852 }
|
|
853
|
|
854 /* Strings are equal except possibly for case. */
|
|
855 p1 = start1;
|
|
856 p2 = start2;
|
|
857 while (1)
|
|
858 {
|
|
859 int c1, c2;
|
|
860
|
|
861 if (p1 == e1)
|
|
862 c1 = 0;
|
|
863 else
|
|
864 c1 = *p1++;
|
|
865 if (p2 == e2)
|
|
866 c2 = 0;
|
|
867 else
|
|
868 c2 = *p2++;
|
|
869
|
|
870 if (c1 != c2)
|
|
871 /* Reverse sign here so upper case comes out last. */
|
|
872 return c2 - c1;
|
|
873 if (!c1)
|
|
874 break;
|
|
875 }
|
|
876
|
|
877 return 0;
|
|
878 }
|
|
879 }
|
|
880
|
|
881 /* A `struct linebuffer' is a structure which holds a line of text.
|
|
882 `readline' reads a line from a stream into a linebuffer
|
|
883 and works regardless of the length of the line. */
|
|
884
|
|
885 struct linebuffer
|
|
886 {
|
|
887 long size;
|
|
888 char *buffer;
|
|
889 };
|
|
890
|
|
891 /* Initialize LINEBUFFER for use. */
|
|
892
|
|
893 static void
|
|
894 initbuffer (struct linebuffer *linebuffer)
|
|
895 {
|
|
896 linebuffer->size = 200;
|
|
897 linebuffer->buffer = (char *) xmalloc (200);
|
|
898 }
|
|
899
|
|
900 /* Read a line of text from STREAM into LINEBUFFER.
|
|
901 Return the length of the line. */
|
|
902
|
|
903 long
|
|
904 readline (linebuffer, stream)
|
|
905 struct linebuffer *linebuffer;
|
|
906 FILE *stream;
|
|
907 {
|
|
908 char *buffer = linebuffer->buffer;
|
|
909 char *p = linebuffer->buffer;
|
|
910 char *end = p + linebuffer->size;
|
|
911
|
|
912 while (1)
|
|
913 {
|
|
914 int c = getc (stream);
|
|
915 if (p == end)
|
|
916 {
|
|
917 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
|
|
918 p += buffer - linebuffer->buffer;
|
|
919 end += buffer - linebuffer->buffer;
|
|
920 linebuffer->buffer = buffer;
|
|
921 }
|
|
922 if (c < 0 || c == '\n')
|
|
923 {
|
|
924 *p = 0;
|
|
925 break;
|
|
926 }
|
|
927 *p++ = c;
|
|
928 }
|
|
929
|
|
930 return p - buffer;
|
|
931 }
|
|
932
|
|
933 /* Sort an input file too big to sort in core. */
|
|
934
|
|
935 void
|
|
936 sort_offline (infile, nfiles, total, outfile)
|
|
937 char *infile;
|
|
938 int nfiles;
|
|
939 long total;
|
|
940 char *outfile;
|
|
941 {
|
|
942 /* More than enough. */
|
|
943 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
|
|
944 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
|
|
945 FILE *istream = fopen (infile, "r");
|
|
946 int i;
|
|
947 struct linebuffer lb;
|
|
948 long linelength;
|
|
949 int failure = 0;
|
|
950
|
|
951 initbuffer (&lb);
|
|
952
|
|
953 /* Read in one line of input data. */
|
|
954
|
|
955 linelength = readline (&lb, istream);
|
|
956
|
|
957 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
|
|
958 {
|
|
959 error ("%s: not a texinfo index file", infile);
|
|
960 return;
|
|
961 }
|
|
962
|
|
963 /* Split up the input into `ntemps' temporary files, or maybe fewer,
|
|
964 and put the new files' names into `tempfiles' */
|
|
965
|
|
966 for (i = 0; i < ntemps; i++)
|
|
967 {
|
|
968 char *outname = maketempname (++tempcount);
|
|
969 FILE *ostream = fopen (outname, "w");
|
|
970 long tempsize = 0;
|
|
971
|
|
972 if (!ostream)
|
|
973 pfatal_with_name (outname);
|
|
974 tempfiles[i] = outname;
|
|
975
|
|
976 /* Copy lines into this temp file as long as it does not make file
|
|
977 "too big" or until there are no more lines. */
|
|
978
|
|
979 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
|
|
980 {
|
|
981 tempsize += linelength + 1;
|
|
982 fputs (lb.buffer, ostream);
|
|
983 putc ('\n', ostream);
|
|
984
|
|
985 /* Read another line of input data. */
|
|
986
|
|
987 linelength = readline (&lb, istream);
|
|
988 if (!linelength && feof (istream))
|
|
989 break;
|
|
990
|
|
991 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
|
|
992 {
|
|
993 error ("%s: not a texinfo index file", infile);
|
|
994 failure = 1;
|
|
995 goto fail;
|
|
996 }
|
|
997 }
|
|
998 fclose (ostream);
|
|
999 if (feof (istream))
|
|
1000 break;
|
|
1001 }
|
|
1002
|
|
1003 free (lb.buffer);
|
|
1004
|
|
1005 fail:
|
|
1006 /* Record number of temp files we actually needed. */
|
|
1007
|
|
1008 ntemps = i;
|
|
1009
|
|
1010 /* Sort each tempfile into another tempfile.
|
|
1011 Delete the first set of tempfiles and put the names of the second
|
|
1012 into `tempfiles'. */
|
|
1013
|
|
1014 for (i = 0; i < ntemps; i++)
|
|
1015 {
|
|
1016 char *newtemp = maketempname (++tempcount);
|
|
1017 sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp);
|
|
1018 if (!keep_tempfiles)
|
|
1019 unlink (tempfiles[i]);
|
|
1020 tempfiles[i] = newtemp;
|
|
1021 }
|
|
1022
|
|
1023 if (failure)
|
|
1024 return;
|
|
1025
|
|
1026 /* Merge the tempfiles together and indexify. */
|
|
1027
|
|
1028 merge_files (tempfiles, ntemps, outfile);
|
|
1029 }
|
|
1030
|
|
1031 /* Sort INFILE, whose size is TOTAL,
|
|
1032 assuming that is small enough to be done in-core,
|
|
1033 then indexify it and send the output to OUTFILE (or to stdout). */
|
|
1034
|
|
1035 void
|
|
1036 sort_in_core (infile, total, outfile)
|
|
1037 char *infile;
|
|
1038 long total;
|
|
1039 char *outfile;
|
|
1040 {
|
|
1041 char **nextline;
|
|
1042 char *data = (char *) xmalloc (total + 1);
|
|
1043 char *file_data;
|
|
1044 long file_size;
|
|
1045 int i;
|
|
1046 FILE *ostream = stdout;
|
|
1047 struct lineinfo *lineinfo;
|
|
1048
|
|
1049 /* Read the contents of the file into the moby array `data'. */
|
|
1050
|
|
1051 int desc = open (infile, O_RDONLY, 0);
|
|
1052
|
|
1053 if (desc < 0)
|
|
1054 fatal ("failure reopening %s", infile);
|
|
1055 for (file_size = 0;;)
|
|
1056 {
|
|
1057 i = read (desc, data + file_size, total - file_size);
|
|
1058 if (i <= 0)
|
|
1059 break;
|
|
1060 file_size += i;
|
|
1061 }
|
|
1062 file_data = data;
|
|
1063 data[file_size] = 0;
|
|
1064
|
|
1065 close (desc);
|
|
1066
|
|
1067 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
|
|
1068 {
|
|
1069 error ("%s: not a texinfo index file", infile);
|
|
1070 return;
|
|
1071 }
|
|
1072
|
|
1073 init_char_order ();
|
|
1074
|
|
1075 /* Sort routines want to know this address. */
|
|
1076
|
|
1077 text_base = data;
|
|
1078
|
|
1079 /* Create the array of pointers to lines, with a default size
|
|
1080 frequently enough. */
|
|
1081
|
|
1082 nlines = total / 50;
|
|
1083 if (!nlines)
|
|
1084 nlines = 2;
|
|
1085 linearray = (char **) xmalloc (nlines * sizeof (char *));
|
|
1086
|
|
1087 /* `nextline' points to the next free slot in this array.
|
|
1088 `nlines' is the allocated size. */
|
|
1089
|
|
1090 nextline = linearray;
|
|
1091
|
|
1092 /* Parse the input file's data, and make entries for the lines. */
|
|
1093
|
|
1094 nextline = parsefile (infile, nextline, file_data, file_size);
|
|
1095 if (nextline == 0)
|
|
1096 {
|
|
1097 error ("%s: not a texinfo index file", infile);
|
|
1098 return;
|
|
1099 }
|
|
1100
|
|
1101 /* Sort the lines. */
|
|
1102
|
|
1103 /* If we have enough space, find the first keyfield of each line in advance.
|
|
1104 Make a `struct lineinfo' for each line, which records the keyfield
|
|
1105 as well as the line, and sort them. */
|
|
1106
|
|
1107 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo));
|
|
1108
|
|
1109 if (lineinfo)
|
|
1110 {
|
|
1111 struct lineinfo *lp;
|
|
1112 char **p;
|
|
1113
|
|
1114 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
|
|
1115 {
|
|
1116 lp->text = *p;
|
|
1117 lp->key.text = find_field (keyfields, *p, &lp->keylen);
|
|
1118 if (keyfields->numeric)
|
|
1119 lp->key.number = find_value (lp->key.text, lp->keylen);
|
|
1120 }
|
|
1121
|
|
1122 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
|
|
1123 compare_prepared);
|
|
1124
|
|
1125 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
|
|
1126 *p = lp->text;
|
|
1127
|
|
1128 free (lineinfo);
|
|
1129 }
|
|
1130 else
|
|
1131 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
|
|
1132
|
|
1133 /* Open the output file. */
|
|
1134
|
|
1135 if (outfile)
|
|
1136 {
|
|
1137 ostream = fopen (outfile, "w");
|
|
1138 if (!ostream)
|
|
1139 pfatal_with_name (outfile);
|
|
1140 }
|
|
1141
|
|
1142 writelines (linearray, nextline - linearray, ostream);
|
|
1143 if (outfile)
|
|
1144 fclose (ostream);
|
|
1145
|
|
1146 free (linearray);
|
|
1147 free (data);
|
|
1148 }
|
|
1149
|
|
1150 /* Parse an input string in core into lines.
|
|
1151 DATA is the input string, and SIZE is its length.
|
|
1152 Data goes in LINEARRAY starting at NEXTLINE.
|
|
1153 The value returned is the first entry in LINEARRAY still unused.
|
|
1154 Value 0 means input file contents are invalid. */
|
|
1155
|
|
1156 char **
|
|
1157 parsefile (filename, nextline, data, size)
|
|
1158 char *filename;
|
|
1159 char **nextline;
|
|
1160 char *data;
|
|
1161 long size;
|
|
1162 {
|
|
1163 char *p, *end;
|
|
1164 char **line = nextline;
|
|
1165
|
|
1166 p = data;
|
|
1167 end = p + size;
|
|
1168 *end = 0;
|
|
1169
|
|
1170 while (p != end)
|
|
1171 {
|
|
1172 if (p[0] != '\\' && p[0] != '@')
|
|
1173 return 0;
|
|
1174
|
|
1175 *line = p;
|
|
1176 while (*p && *p != '\n')
|
|
1177 p++;
|
|
1178 if (p != end)
|
|
1179 p++;
|
|
1180
|
|
1181 line++;
|
|
1182 if (line == linearray + nlines)
|
|
1183 {
|
|
1184 char **old = linearray;
|
|
1185 linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4));
|
|
1186 line += linearray - old;
|
|
1187 }
|
|
1188 }
|
|
1189
|
|
1190 return line;
|
|
1191 }
|
|
1192
|
|
1193 /* Indexification is a filter applied to the sorted lines
|
|
1194 as they are being written to the output file.
|
|
1195 Multiple entries for the same name, with different page numbers,
|
|
1196 get combined into a single entry with multiple page numbers.
|
|
1197 The first braced field, which is used for sorting, is discarded.
|
|
1198 However, its first character is examined, folded to lower case,
|
|
1199 and if it is different from that in the previous line fed to us
|
|
1200 a \initial line is written with one argument, the new initial.
|
|
1201
|
|
1202 If an entry has four braced fields, then the second and third
|
|
1203 constitute primary and secondary names.
|
|
1204 In this case, each change of primary name
|
|
1205 generates a \primary line which contains only the primary name,
|
|
1206 and in between these are \secondary lines which contain
|
|
1207 just a secondary name and page numbers. */
|
|
1208
|
|
1209 /* The last primary name we wrote a \primary entry for.
|
|
1210 If only one level of indexing is being done, this is the last name seen. */
|
|
1211 char *lastprimary;
|
|
1212 /* Length of storage allocated for lastprimary. */
|
|
1213 int lastprimarylength;
|
|
1214
|
|
1215 /* Similar, for the secondary name. */
|
|
1216 char *lastsecondary;
|
|
1217 int lastsecondarylength;
|
|
1218
|
|
1219 /* Zero if we are not in the middle of writing an entry.
|
|
1220 One if we have written the beginning of an entry but have not
|
|
1221 yet written any page numbers into it.
|
|
1222 Greater than one if we have written the beginning of an entry
|
|
1223 plus at least one page number. */
|
|
1224 int pending;
|
|
1225
|
|
1226 /* The initial (for sorting purposes) of the last primary entry written.
|
|
1227 When this changes, a \initial {c} line is written */
|
|
1228
|
|
1229 char *lastinitial;
|
|
1230
|
|
1231 int lastinitiallength;
|
|
1232
|
|
1233 /* When we need a string of length 1 for the value of lastinitial,
|
|
1234 store it here. */
|
|
1235
|
|
1236 char lastinitial1[2];
|
|
1237
|
|
1238 /* Initialize static storage for writing an index. */
|
|
1239
|
|
1240 static void
|
|
1241 init_index (void)
|
|
1242 {
|
|
1243 pending = 0;
|
|
1244 lastinitial = lastinitial1;
|
|
1245 lastinitial1[0] = 0;
|
|
1246 lastinitial1[1] = 0;
|
|
1247 lastinitiallength = 0;
|
|
1248 lastprimarylength = 100;
|
|
1249 lastprimary = (char *) xmalloc (lastprimarylength + 1);
|
|
1250 memset (lastprimary, '\0', lastprimarylength + 1);
|
|
1251 lastsecondarylength = 100;
|
|
1252 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
|
|
1253 memset (lastsecondary, '\0', lastsecondarylength + 1);
|
|
1254 }
|
|
1255
|
|
1256 /* Indexify. Merge entries for the same name,
|
|
1257 insert headers for each initial character, etc. */
|
|
1258
|
|
1259 static void
|
|
1260 indexify (char *line, FILE *ostream)
|
|
1261 {
|
|
1262 char *primary, *secondary, *pagenumber;
|
|
1263 int primarylength, secondarylength = 0, pagelength;
|
|
1264 int nosecondary;
|
|
1265 int initiallength;
|
|
1266 char *initial;
|
|
1267 char initial1[2];
|
|
1268 register char *p;
|
|
1269
|
|
1270 /* First, analyze the parts of the entry fed to us this time. */
|
|
1271
|
|
1272 p = find_braced_pos (line, 0, 0, 0);
|
|
1273 if (*p == '{')
|
|
1274 {
|
|
1275 initial = p;
|
|
1276 /* Get length of inner pair of braces starting at `p',
|
|
1277 including that inner pair of braces. */
|
|
1278 initiallength = find_braced_end (p + 1) + 1 - p;
|
|
1279 }
|
|
1280 else
|
|
1281 {
|
|
1282 initial = initial1;
|
|
1283 initial1[0] = *p;
|
|
1284 initial1[1] = 0;
|
|
1285 initiallength = 1;
|
|
1286
|
|
1287 if (initial1[0] >= 'a' && initial1[0] <= 'z')
|
|
1288 initial1[0] -= 040;
|
|
1289 }
|
|
1290
|
|
1291 pagenumber = find_braced_pos (line, 1, 0, 0);
|
|
1292 pagelength = find_braced_end (pagenumber) - pagenumber;
|
|
1293 if (pagelength == 0)
|
|
1294 abort ();
|
|
1295
|
|
1296 primary = find_braced_pos (line, 2, 0, 0);
|
|
1297 primarylength = find_braced_end (primary) - primary;
|
|
1298
|
|
1299 secondary = find_braced_pos (line, 3, 0, 0);
|
|
1300 nosecondary = !*secondary;
|
|
1301 if (!nosecondary)
|
|
1302 secondarylength = find_braced_end (secondary) - secondary;
|
|
1303
|
|
1304 /* If the primary is different from before, make a new primary entry. */
|
|
1305 if (strncmp (primary, lastprimary, primarylength))
|
|
1306 {
|
|
1307 /* Close off current secondary entry first, if one is open. */
|
|
1308 if (pending)
|
|
1309 {
|
|
1310 fputs ("}\n", ostream);
|
|
1311 pending = 0;
|
|
1312 }
|
|
1313
|
|
1314 /* If this primary has a different initial, include an entry for
|
|
1315 the initial. */
|
|
1316 if (initiallength != lastinitiallength ||
|
|
1317 strncmp (initial, lastinitial, initiallength))
|
|
1318 {
|
|
1319 fprintf (ostream, "\\initial {");
|
|
1320 fwrite (initial, 1, initiallength, ostream);
|
|
1321 fputs ("}\n", ostream);
|
|
1322 if (initial == initial1)
|
|
1323 {
|
|
1324 lastinitial = lastinitial1;
|
|
1325 *lastinitial1 = *initial1;
|
|
1326 }
|
|
1327 else
|
|
1328 {
|
|
1329 lastinitial = initial;
|
|
1330 }
|
|
1331 lastinitiallength = initiallength;
|
|
1332 }
|
|
1333
|
|
1334 /* Make the entry for the primary. */
|
|
1335 if (nosecondary)
|
|
1336 fputs ("\\entry {", ostream);
|
|
1337 else
|
|
1338 fputs ("\\primary {", ostream);
|
|
1339 fwrite (primary, primarylength, 1, ostream);
|
|
1340 if (nosecondary)
|
|
1341 {
|
|
1342 fputs ("}{", ostream);
|
|
1343 pending = 1;
|
|
1344 }
|
|
1345 else
|
|
1346 fputs ("}\n", ostream);
|
|
1347
|
|
1348 /* Record name of most recent primary. */
|
|
1349 if (lastprimarylength < primarylength)
|
|
1350 {
|
|
1351 lastprimarylength = primarylength + 100;
|
|
1352 lastprimary = (char *) xrealloc (lastprimary,
|
|
1353 1 + lastprimarylength);
|
|
1354 }
|
|
1355 strncpy (lastprimary, primary, primarylength);
|
|
1356 lastprimary[primarylength] = 0;
|
|
1357
|
|
1358 /* There is no current secondary within this primary, now. */
|
|
1359 lastsecondary[0] = 0;
|
|
1360 }
|
|
1361
|
|
1362 /* Should not have an entry with no subtopic following one with a subtopic. */
|
|
1363
|
|
1364 if (nosecondary && *lastsecondary)
|
|
1365 error ("entry %s follows an entry with a secondary name", line);
|
|
1366
|
|
1367 /* Start a new secondary entry if necessary. */
|
|
1368 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
|
|
1369 {
|
|
1370 if (pending)
|
|
1371 {
|
|
1372 fputs ("}\n", ostream);
|
|
1373 pending = 0;
|
|
1374 }
|
|
1375
|
|
1376 /* Write the entry for the secondary. */
|
|
1377 fputs ("\\secondary {", ostream);
|
|
1378 fwrite (secondary, secondarylength, 1, ostream);
|
|
1379 fputs ("}{", ostream);
|
|
1380 pending = 1;
|
|
1381
|
|
1382 /* Record name of most recent secondary. */
|
|
1383 if (lastsecondarylength < secondarylength)
|
|
1384 {
|
|
1385 lastsecondarylength = secondarylength + 100;
|
|
1386 lastsecondary = (char *) xrealloc (lastsecondary,
|
|
1387 1 + lastsecondarylength);
|
|
1388 }
|
|
1389 strncpy (lastsecondary, secondary, secondarylength);
|
|
1390 lastsecondary[secondarylength] = 0;
|
|
1391 }
|
|
1392
|
|
1393 /* Here to add one more page number to the current entry. */
|
|
1394 if (pending++ != 1)
|
|
1395 fputs (", ", ostream); /* Punctuate first, if this is not the first. */
|
|
1396 fwrite (pagenumber, pagelength, 1, ostream);
|
|
1397 }
|
|
1398
|
|
1399 /* Close out any unfinished output entry. */
|
|
1400
|
|
1401 static void
|
|
1402 finish_index (FILE *ostream)
|
|
1403 {
|
|
1404 if (pending)
|
|
1405 fputs ("}\n", ostream);
|
|
1406 free (lastprimary);
|
|
1407 free (lastsecondary);
|
|
1408 }
|
|
1409
|
|
1410 /* Copy the lines in the sorted order.
|
|
1411 Each line is copied out of the input file it was found in. */
|
|
1412
|
|
1413 void
|
|
1414 writelines (linearray, nlines, ostream)
|
|
1415 char **linearray;
|
|
1416 int nlines;
|
|
1417 FILE *ostream;
|
|
1418 {
|
|
1419 char **stop_line = linearray + nlines;
|
|
1420 char **next_line;
|
|
1421
|
|
1422 init_index ();
|
|
1423
|
|
1424 /* Output the text of the lines, and free the buffer space. */
|
|
1425
|
|
1426 for (next_line = linearray; next_line != stop_line; next_line++)
|
|
1427 {
|
|
1428 /* If -u was specified, output the line only if distinct from previous one. */
|
|
1429 if (next_line == linearray
|
|
1430 /* Compare previous line with this one, using only the
|
|
1431 explicitly specd keyfields. */
|
|
1432 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1))
|
|
1433 {
|
|
1434 char *p = *next_line;
|
|
1435 char c;
|
|
1436
|
|
1437 while ((c = *p++) && c != '\n')
|
|
1438 /* Do nothing. */ ;
|
|
1439 *(p - 1) = 0;
|
|
1440 indexify (*next_line, ostream);
|
|
1441 }
|
|
1442 }
|
|
1443
|
|
1444 finish_index (ostream);
|
|
1445 }
|
|
1446
|
|
1447 /* Assume (and optionally verify) that each input file is sorted;
|
|
1448 merge them and output the result.
|
|
1449 Returns nonzero if any input file fails to be sorted.
|
|
1450
|
|
1451 This is the high-level interface that can handle an unlimited
|
|
1452 number of files. */
|
|
1453
|
|
1454 #define MAX_DIRECT_MERGE 10
|
|
1455
|
|
1456 int
|
|
1457 merge_files (infiles, nfiles, outfile)
|
|
1458 char **infiles;
|
|
1459 int nfiles;
|
|
1460 char *outfile;
|
|
1461 {
|
|
1462 char **tempfiles;
|
|
1463 int ntemps;
|
|
1464 int i;
|
|
1465 int value = 0;
|
|
1466 int start_tempcount = tempcount;
|
|
1467
|
|
1468 if (nfiles <= MAX_DIRECT_MERGE)
|
|
1469 return merge_direct (infiles, nfiles, outfile);
|
|
1470
|
|
1471 /* Merge groups of MAX_DIRECT_MERGE input files at a time,
|
|
1472 making a temporary file to hold each group's result. */
|
|
1473
|
|
1474 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
|
|
1475 tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
|
|
1476 for (i = 0; i < ntemps; i++)
|
|
1477 {
|
|
1478 int nf = MAX_DIRECT_MERGE;
|
|
1479 if (i + 1 == ntemps)
|
|
1480 nf = nfiles - i * MAX_DIRECT_MERGE;
|
|
1481 tempfiles[i] = maketempname (++tempcount);
|
|
1482 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
|
|
1483 }
|
|
1484
|
|
1485 /* All temporary files that existed before are no longer needed
|
|
1486 since their contents have been merged into our new tempfiles.
|
|
1487 So delete them. */
|
|
1488 flush_tempfiles (start_tempcount);
|
|
1489
|
|
1490 /* Now merge the temporary files we created. */
|
|
1491
|
|
1492 merge_files (tempfiles, ntemps, outfile);
|
|
1493
|
|
1494 free (tempfiles);
|
|
1495
|
|
1496 return value;
|
|
1497 }
|
|
1498
|
|
1499 /* Assume (and optionally verify) that each input file is sorted;
|
|
1500 merge them and output the result.
|
|
1501 Returns nonzero if any input file fails to be sorted.
|
|
1502
|
|
1503 This version of merging will not work if the number of
|
|
1504 input files gets too high. Higher level functions
|
|
1505 use it only with a bounded number of input files. */
|
|
1506
|
|
1507 int
|
|
1508 merge_direct (infiles, nfiles, outfile)
|
|
1509 char **infiles;
|
|
1510 int nfiles;
|
|
1511 char *outfile;
|
|
1512 {
|
|
1513 struct linebuffer *lb1, *lb2;
|
|
1514 struct linebuffer **thisline, **prevline;
|
|
1515 FILE **streams;
|
|
1516 int i;
|
|
1517 int nleft;
|
|
1518 int lossage = 0;
|
|
1519 int *file_lossage;
|
|
1520 struct linebuffer *prev_out = 0;
|
|
1521 FILE *ostream = stdout;
|
|
1522
|
|
1523 if (outfile)
|
|
1524 {
|
|
1525 ostream = fopen (outfile, "w");
|
|
1526 }
|
|
1527 if (!ostream)
|
|
1528 pfatal_with_name (outfile);
|
|
1529
|
|
1530 init_index ();
|
|
1531
|
|
1532 if (nfiles == 0)
|
|
1533 {
|
|
1534 if (outfile)
|
|
1535 fclose (ostream);
|
|
1536 return 0;
|
|
1537 }
|
|
1538
|
|
1539 /* For each file, make two line buffers.
|
|
1540 Also, for each file, there is an element of `thisline'
|
|
1541 which points at any time to one of the file's two buffers,
|
|
1542 and an element of `prevline' which points to the other buffer.
|
|
1543 `thisline' is supposed to point to the next available line from the file,
|
|
1544 while `prevline' holds the last file line used,
|
|
1545 which is remembered so that we can verify that the file is properly sorted. */
|
|
1546
|
|
1547 /* lb1 and lb2 contain one buffer each per file. */
|
|
1548 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
|
|
1549 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
|
|
1550
|
|
1551 /* thisline[i] points to the linebuffer holding the next available line in file i,
|
|
1552 or is zero if there are no lines left in that file. */
|
|
1553 thisline = (struct linebuffer **)
|
|
1554 xmalloc (nfiles * sizeof (struct linebuffer *));
|
|
1555 /* prevline[i] points to the linebuffer holding the last used line
|
|
1556 from file i. This is just for verifying that file i is properly
|
|
1557 sorted. */
|
|
1558 prevline = (struct linebuffer **)
|
|
1559 xmalloc (nfiles * sizeof (struct linebuffer *));
|
|
1560 /* streams[i] holds the input stream for file i. */
|
|
1561 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
|
|
1562 /* file_lossage[i] is nonzero if we already know file i is not
|
|
1563 properly sorted. */
|
|
1564 file_lossage = (int *) xmalloc (nfiles * sizeof (int));
|
|
1565
|
|
1566 /* Allocate and initialize all that storage. */
|
|
1567
|
|
1568 for (i = 0; i < nfiles; i++)
|
|
1569 {
|
|
1570 initbuffer (&lb1[i]);
|
|
1571 initbuffer (&lb2[i]);
|
|
1572 thisline[i] = &lb1[i];
|
|
1573 prevline[i] = &lb2[i];
|
|
1574 file_lossage[i] = 0;
|
|
1575 streams[i] = fopen (infiles[i], "r");
|
|
1576 if (!streams[i])
|
|
1577 pfatal_with_name (infiles[i]);
|
|
1578
|
|
1579 readline (thisline[i], streams[i]);
|
|
1580 }
|
|
1581
|
|
1582 /* Keep count of number of files not at eof. */
|
|
1583 nleft = nfiles;
|
|
1584
|
|
1585 while (nleft)
|
|
1586 {
|
|
1587 struct linebuffer *best = 0;
|
|
1588 struct linebuffer *exch;
|
|
1589 int bestfile = -1;
|
|
1590 int i;
|
|
1591
|
|
1592 /* Look at the next avail line of each file; choose the least one. */
|
|
1593
|
|
1594 for (i = 0; i < nfiles; i++)
|
|
1595 {
|
|
1596 if (thisline[i] &&
|
|
1597 (!best ||
|
|
1598 0 < compare_general (best->buffer, thisline[i]->buffer,
|
|
1599 (long) bestfile, (long) i, num_keyfields)))
|
|
1600 {
|
|
1601 best = thisline[i];
|
|
1602 bestfile = i;
|
|
1603 }
|
|
1604 }
|
|
1605
|
|
1606 /* Output that line, unless it matches the previous one and we
|
|
1607 don't want duplicates. */
|
|
1608
|
|
1609 if (!(prev_out &&
|
|
1610 !compare_general (prev_out->buffer,
|
|
1611 best->buffer, 0L, 1L, num_keyfields - 1)))
|
|
1612 indexify (best->buffer, ostream);
|
|
1613 prev_out = best;
|
|
1614
|
|
1615 /* Now make the line the previous of its file, and fetch a new
|
|
1616 line from that file. */
|
|
1617
|
|
1618 exch = prevline[bestfile];
|
|
1619 prevline[bestfile] = thisline[bestfile];
|
|
1620 thisline[bestfile] = exch;
|
|
1621
|
|
1622 while (1)
|
|
1623 {
|
|
1624 /* If the file has no more, mark it empty. */
|
|
1625
|
|
1626 if (feof (streams[bestfile]))
|
|
1627 {
|
|
1628 thisline[bestfile] = 0;
|
|
1629 /* Update the number of files still not empty. */
|
|
1630 nleft--;
|
|
1631 break;
|
|
1632 }
|
|
1633 readline (thisline[bestfile], streams[bestfile]);
|
|
1634 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
|
|
1635 break;
|
|
1636 }
|
|
1637 }
|
|
1638
|
|
1639 finish_index (ostream);
|
|
1640
|
|
1641 /* Free all storage and close all input streams. */
|
|
1642
|
|
1643 for (i = 0; i < nfiles; i++)
|
|
1644 {
|
|
1645 fclose (streams[i]);
|
|
1646 free (lb1[i].buffer);
|
|
1647 free (lb2[i].buffer);
|
|
1648 }
|
|
1649 free (file_lossage);
|
|
1650 free (lb1);
|
|
1651 free (lb2);
|
|
1652 free (thisline);
|
|
1653 free (prevline);
|
|
1654 free (streams);
|
|
1655
|
|
1656 if (outfile)
|
|
1657 fclose (ostream);
|
|
1658
|
|
1659 return lossage;
|
|
1660 }
|
|
1661
|
|
1662 /* Print error message and exit. */
|
|
1663
|
|
1664 void
|
|
1665 fatal (format, arg)
|
|
1666 char *format, *arg;
|
|
1667 {
|
|
1668 error (format, arg);
|
|
1669 exit (TI_FATAL_ERROR);
|
|
1670 }
|
|
1671
|
|
1672 /* Print error message. FORMAT is printf control string, ARG is arg for it. */
|
|
1673 void
|
|
1674 error (format, arg)
|
|
1675 char *format, *arg;
|
|
1676 {
|
|
1677 printf ("%s: ", program_name);
|
|
1678 printf (format, arg);
|
|
1679 if (format[strlen (format) -1] != '\n')
|
|
1680 printf ("\n");
|
|
1681 }
|
|
1682
|
|
1683 /* unused */
|
|
1684 #if 0
|
|
1685 static void
|
|
1686 perror_with_name (char *name)
|
|
1687 {
|
|
1688 char *s;
|
|
1689
|
|
1690 s = strerror (errno);
|
|
1691 printf ("%s: ", program_name);
|
|
1692 printf ("%s; for file `%s'.\n", s, name);
|
|
1693 }
|
|
1694 #endif
|
|
1695
|
|
1696 void
|
|
1697 pfatal_with_name (name)
|
|
1698 char *name;
|
|
1699 {
|
|
1700 char *s;
|
|
1701
|
|
1702 s = strerror (errno);
|
|
1703 printf ("%s: ", program_name);
|
|
1704 printf ("%s; for file `%s'.\n", s, name);
|
|
1705 exit (TI_FATAL_ERROR);
|
|
1706 }
|
|
1707
|
|
1708 /* Return a newly-allocated string whose contents concatenate those of
|
|
1709 S1, S2, S3. */
|
|
1710
|
|
1711 char *
|
|
1712 concat (s1, s2, s3)
|
|
1713 char *s1, *s2, *s3;
|
|
1714 {
|
|
1715 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
|
|
1716 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
|
|
1717
|
|
1718 strcpy (result, s1);
|
|
1719 strcpy (result + len1, s2);
|
|
1720 strcpy (result + len1 + len2, s3);
|
|
1721 *(result + len1 + len2 + len3) = 0;
|
|
1722
|
|
1723 return result;
|
|
1724 }
|
|
1725
|
|
1726 #if !defined (HAVE_STRERROR)
|
|
1727 extern char *sys_errlist[];
|
|
1728 extern int sys_nerr;
|
|
1729
|
|
1730 char *
|
|
1731 strerror (num)
|
|
1732 int num;
|
|
1733 {
|
|
1734 if (num >= sys_nerr)
|
|
1735 return ("");
|
|
1736 else
|
|
1737 return (sys_errlist[num]);
|
|
1738 }
|
|
1739 #endif /* !HAVE_STRERROR */
|
|
1740
|
|
1741 #if !defined (HAVE_STRCHR)
|
|
1742 char *
|
|
1743 strrchr (string, character)
|
|
1744 char *string;
|
|
1745 int character;
|
|
1746 {
|
|
1747 register int i;
|
|
1748
|
|
1749 for (i = strlen (string) - 1; i > -1; i--)
|
|
1750 if (string[i] == character)
|
|
1751 return (string + i);
|
|
1752
|
|
1753 return ((char *)NULL);
|
|
1754 }
|
|
1755 #endif /* HAVE_STRCHR */
|
|
1756
|
|
1757 /* Just like malloc, but kills the program in case of fatal error. */
|
|
1758 void *
|
|
1759 xmalloc (nbytes)
|
|
1760 int nbytes;
|
|
1761 {
|
|
1762 void *temp = (void *) malloc (nbytes);
|
|
1763
|
|
1764 if (nbytes && temp == (void *)NULL)
|
|
1765 memory_error ("xmalloc", nbytes);
|
|
1766
|
|
1767 return (temp);
|
|
1768 }
|
|
1769
|
|
1770 /* Like realloc (), but barfs if there isn't enough memory. */
|
|
1771 void *
|
|
1772 xrealloc (pointer, nbytes)
|
|
1773 void *pointer;
|
|
1774 int nbytes;
|
|
1775 {
|
|
1776 void *temp;
|
|
1777
|
|
1778 if (!pointer)
|
|
1779 temp = (void *)xmalloc (nbytes);
|
|
1780 else
|
|
1781 temp = (void *)realloc (pointer, nbytes);
|
|
1782
|
|
1783 if (nbytes && !temp)
|
|
1784 memory_error ("xrealloc", nbytes);
|
|
1785
|
|
1786 return (temp);
|
|
1787 }
|
|
1788
|
|
1789 static void
|
|
1790 memory_error (char *callers_name, int bytes_wanted)
|
|
1791 {
|
|
1792 char printable_string[80];
|
|
1793
|
|
1794 sprintf (printable_string,
|
|
1795 "Virtual memory exhausted in %s ()! Needed %d bytes.",
|
|
1796 callers_name, bytes_wanted);
|
|
1797
|
|
1798 error (printable_string, 0);
|
|
1799 abort ();
|
|
1800 }
|
|
1801
|