Mercurial > hg > xemacs-beta
comparison man/texindex.c @ 0:376386a54a3c r19-14
Import from CVS: tag r19-14
author | cvs |
---|---|
date | Mon, 13 Aug 2007 08:45:50 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:376386a54a3c |
---|---|
1 /* Prepare TeX index dribble output into an actual index. | |
2 | |
3 Version 1.45 | |
4 | |
5 Copyright (C) 1987, 1991, 1992 Free Software Foundation, Inc. | |
6 | |
7 This program is free software; you can redistribute it and/or modify | |
8 it under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 2, or (at your option) | |
10 any later version. | |
11 | |
12 This program is distributed in the hope that it will be useful, | |
13 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 GNU General Public License for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with this program; if not, write to the Free Software | |
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */ | |
20 | |
21 | |
22 #if defined (HAVE_CONFIG_H) | |
23 #include "config.h" | |
24 #endif | |
25 | |
26 #include <stdio.h> | |
27 #include <ctype.h> | |
28 #include <errno.h> | |
29 #include "getopt.h" | |
30 | |
31 #define TEXINDEX_VERSION_STRING "GNU Texindex 2.0 for Texinfo release 3.4" | |
32 | |
33 #if defined (emacs) | |
34 # include "../src/config.h" | |
35 /* Some s/os.h files redefine these. */ | |
36 # undef read | |
37 # undef close | |
38 # undef write | |
39 # undef open | |
40 #endif | |
41 | |
42 #if defined (STDC_HEADERS) | |
43 # include <stdlib.h> | |
44 # undef HAVE_MEMSET | |
45 # define HAVE_MEMSET | |
46 # undef HAVE_STRCHR | |
47 # define HAVE_STRCHR | |
48 #else /* !STDC_HEADERS */ | |
49 char *getenv (), *malloc (), *realloc (); | |
50 #endif /* !STDC_HEADERS */ | |
51 | |
52 #if defined (HAVE_STRING_H) | |
53 # include <string.h> | |
54 #endif /* HAVE_STRING_H */ | |
55 | |
56 #if !defined (HAVE_STRCHR) | |
57 char *strrchr (); | |
58 #endif /* !HAVE_STRCHR */ | |
59 | |
60 #if defined (HAVE_UNISTD_H) | |
61 # include <unistd.h> | |
62 #else /* !HAVE_UNISTD_H */ | |
63 off_t lseek (); | |
64 #endif /* !HAVE_UNISTD_H */ | |
65 | |
66 #if !defined (HAVE_MEMSET) | |
67 #undef memset | |
68 #define memset(ptr, ignore, count) bzero (ptr, count) | |
69 #endif | |
70 | |
71 | |
72 char *mktemp (); | |
73 | |
74 #if defined (VMS) | |
75 # include <file.h> | |
76 # define TI_NO_ERROR ((1 << 28) | 1) | |
77 # define TI_FATAL_ERROR ((1 << 28) | 4) | |
78 # define unlink delete | |
79 #else /* !VMS */ | |
80 | |
81 # if defined (HAVE_SYS_FCNTL_H) | |
82 # include <sys/types.h> | |
83 # include <sys/fcntl.h> | |
84 # endif /* HAVE_SYS_FCNTL_H */ | |
85 | |
86 # if defined (_AIX) || !defined (_POSIX_VERSION) | |
87 # include <sys/file.h> | |
88 # else /* !AIX && _POSIX_VERSION */ | |
89 # if !defined (HAVE_SYS_FCNTL_H) | |
90 # include <fcntl.h> | |
91 # endif /* !HAVE_FCNTL_H */ | |
92 # endif /* !_AIX && _POSIX_VERSION */ | |
93 # define TI_NO_ERROR 0 | |
94 # define TI_FATAL_ERROR 1 | |
95 #endif /* !VMS */ | |
96 | |
97 #if !defined (SEEK_SET) | |
98 # define SEEK_SET 0 | |
99 # define SEEK_CUR 1 | |
100 # define SEEK_END 2 | |
101 #endif /* !SEEK_SET */ | |
102 | |
103 #if !defined (errno) | |
104 extern int errno; | |
105 #endif | |
106 char *strerror (); | |
107 | |
108 /* When sorting in core, this structure describes one line | |
109 and the position and length of its first keyfield. */ | |
110 struct lineinfo | |
111 { | |
112 char *text; /* The actual text of the line. */ | |
113 union { | |
114 char *text; /* The start of the key (for textual comparison). */ | |
115 long number; /* The numeric value (for numeric comparison). */ | |
116 } key; | |
117 long keylen; /* Length of KEY field. */ | |
118 }; | |
119 | |
120 /* This structure describes a field to use as a sort key. */ | |
121 struct keyfield | |
122 { | |
123 int startwords; /* Number of words to skip. */ | |
124 int startchars; /* Number of additional chars to skip. */ | |
125 int endwords; /* Number of words to ignore at end. */ | |
126 int endchars; /* Ditto for characters of last word. */ | |
127 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */ | |
128 char fold_case; /* Non-zero means case doesn't matter. */ | |
129 char reverse; /* Non-zero means compare in reverse order. */ | |
130 char numeric; /* Non-zeros means field is ASCII numeric. */ | |
131 char positional; /* Sort according to file position. */ | |
132 char braced; /* Count balanced-braced groupings as fields. */ | |
133 }; | |
134 | |
135 /* Vector of keyfields to use. */ | |
136 struct keyfield keyfields[3]; | |
137 | |
138 /* Number of keyfields stored in that vector. */ | |
139 int num_keyfields = 3; | |
140 | |
141 /* Vector of input file names, terminated with a null pointer. */ | |
142 char **infiles; | |
143 | |
144 /* Vector of corresponding output file names, or NULL, meaning default it | |
145 (add an `s' to the end). */ | |
146 char **outfiles; | |
147 | |
148 /* Length of `infiles'. */ | |
149 int num_infiles; | |
150 | |
151 /* Pointer to the array of pointers to lines being sorted. */ | |
152 char **linearray; | |
153 | |
154 /* The allocated length of `linearray'. */ | |
155 long nlines; | |
156 | |
157 /* Directory to use for temporary files. On Unix, it ends with a slash. */ | |
158 char *tempdir; | |
159 | |
160 /* Start of filename to use for temporary files. */ | |
161 char *tempbase; | |
162 | |
163 /* Number of last temporary file. */ | |
164 int tempcount; | |
165 | |
166 /* Number of last temporary file already deleted. | |
167 Temporary files are deleted by `flush_tempfiles' in order of creation. */ | |
168 int last_deleted_tempcount; | |
169 | |
170 /* During in-core sort, this points to the base of the data block | |
171 which contains all the lines of data. */ | |
172 char *text_base; | |
173 | |
174 /* Additional command switches .*/ | |
175 | |
176 /* Nonzero means do not delete tempfiles -- for debugging. */ | |
177 int keep_tempfiles; | |
178 | |
179 /* The name this program was run with. */ | |
180 char *program_name; | |
181 | |
182 /* Forward declarations of functions in this file. */ | |
183 | |
184 struct linebuffer; | |
185 void decode_command (int, char **); | |
186 void sort_in_core (char *, long, char *); | |
187 void sort_offline (char *, int, long, char *); | |
188 char **parsefile (char *, char **, char *, long); | |
189 char *find_field (struct keyfield *, char *, long *); | |
190 char *find_pos (char *, int, int, int); | |
191 long find_value (char *, long); | |
192 char *find_braced_pos (char *, int, int, int); | |
193 char *find_braced_end (char *); | |
194 void writelines (char **, int, FILE *); | |
195 int compare_field (struct keyfield *, char *, long, long, char *, long, long); | |
196 int compare_full (const void *, const void *); | |
197 long readline (struct linebuffer *, FILE *); | |
198 int merge_files (char **, int, char *); | |
199 int merge_direct (char **, int, char *); | |
200 void pfatal_with_name (char *); | |
201 void fatal (char *, char *); | |
202 void error (char *, char *); | |
203 void *xmalloc (int), *xrealloc (void *, int); | |
204 char *concat (char *, char *, char *); | |
205 char *maketempname (int); | |
206 void flush_tempfiles (int); | |
207 char *tempcopy (int); | |
208 static void memory_error (char *callers_name, int bytes_wanted); | |
209 | |
210 #define MAX_IN_CORE_SORT 500000 | |
211 | |
212 void | |
213 main (argc, argv) | |
214 int argc; | |
215 char **argv; | |
216 { | |
217 int i; | |
218 | |
219 tempcount = 0; | |
220 last_deleted_tempcount = 0; | |
221 | |
222 program_name = strrchr (argv[0], '/'); | |
223 if (program_name != (char *)NULL) | |
224 program_name++; | |
225 else | |
226 program_name = argv[0]; | |
227 | |
228 /* Describe the kind of sorting to do. */ | |
229 /* The first keyfield uses the first braced field and folds case. */ | |
230 keyfields[0].braced = 1; | |
231 keyfields[0].fold_case = 1; | |
232 keyfields[0].endwords = -1; | |
233 keyfields[0].endchars = -1; | |
234 | |
235 /* The second keyfield uses the second braced field, numerically. */ | |
236 keyfields[1].braced = 1; | |
237 keyfields[1].numeric = 1; | |
238 keyfields[1].startwords = 1; | |
239 keyfields[1].endwords = -1; | |
240 keyfields[1].endchars = -1; | |
241 | |
242 /* The third keyfield (which is ignored while discarding duplicates) | |
243 compares the whole line. */ | |
244 keyfields[2].endwords = -1; | |
245 keyfields[2].endchars = -1; | |
246 | |
247 decode_command (argc, argv); | |
248 | |
249 tempbase = mktemp (concat ("txiXXXXXX", "", "")); | |
250 | |
251 /* Process input files completely, one by one. */ | |
252 | |
253 for (i = 0; i < num_infiles; i++) | |
254 { | |
255 int desc; | |
256 long ptr; | |
257 char *outfile; | |
258 | |
259 desc = open (infiles[i], O_RDONLY, 0); | |
260 if (desc < 0) | |
261 pfatal_with_name (infiles[i]); | |
262 lseek (desc, (off_t) 0, SEEK_END); | |
263 ptr = (long) lseek (desc, (off_t) 0, SEEK_CUR); | |
264 | |
265 close (desc); | |
266 | |
267 outfile = outfiles[i]; | |
268 if (!outfile) | |
269 { | |
270 outfile = concat (infiles[i], "s", ""); | |
271 } | |
272 | |
273 if (ptr < MAX_IN_CORE_SORT) | |
274 /* Sort a small amount of data. */ | |
275 sort_in_core (infiles[i], ptr, outfile); | |
276 else | |
277 sort_offline (infiles[i], 1, ptr, outfile); | |
278 } | |
279 | |
280 flush_tempfiles (tempcount); | |
281 exit (TI_NO_ERROR); | |
282 } | |
283 | |
284 typedef struct | |
285 { | |
286 char *long_name; | |
287 char *short_name; | |
288 int *variable_ref; | |
289 int variable_value; | |
290 char *arg_name; | |
291 char *doc_string; | |
292 } TEXINDEX_OPTION; | |
293 | |
294 TEXINDEX_OPTION texindex_options[] = { | |
295 { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL, | |
296 "Keep temporary files around after processing" }, | |
297 { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL, | |
298 "Do not keep temporary files around after processing (default)" }, | |
299 { "--output", "-o", (int *)NULL, 0, "FILE", | |
300 "Send output to FILE" }, | |
301 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL, | |
302 "Show version information" }, | |
303 { "--help", "-h", (int *)NULL, 0, (char *)NULL, "Produce this listing" }, | |
304 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL } | |
305 }; | |
306 | |
307 static void | |
308 usage (int result_value) | |
309 { | |
310 register int i; | |
311 | |
312 fprintf (stderr, "Usage: %s [OPTIONS] FILE...\n", program_name); | |
313 fprintf (stderr, " Generate a permuted index for the TeX files given.\n"); | |
314 fprintf (stderr, " Usually FILE... is `foo.\?\?' for the source file `foo.tex'.\n"); | |
315 fprintf (stderr, " The OPTIONS are:\n"); | |
316 | |
317 for (i = 0; texindex_options[i].long_name; i++) | |
318 { | |
319 fprintf (stderr, " %s %s", | |
320 texindex_options[i].long_name, | |
321 texindex_options[i].arg_name ? | |
322 texindex_options[i].arg_name : ""); | |
323 | |
324 if (texindex_options[i].short_name) | |
325 fprintf (stderr, " \n or %s %s", | |
326 texindex_options[i].short_name, | |
327 texindex_options[i].arg_name ? | |
328 texindex_options[i].arg_name : ""); | |
329 fprintf (stderr, "\t%s\n", texindex_options[i].doc_string); | |
330 } | |
331 exit (result_value); | |
332 } | |
333 | |
334 /* Decode the command line arguments to set the parameter variables | |
335 and set up the vector of keyfields and the vector of input files. */ | |
336 | |
337 void | |
338 decode_command (argc, argv) | |
339 int argc; | |
340 char **argv; | |
341 { | |
342 int arg_index = 1; | |
343 char **ip; | |
344 char **op; | |
345 | |
346 /* Store default values into parameter variables. */ | |
347 | |
348 tempdir = getenv ("TMPDIR"); | |
349 #ifdef VMS | |
350 if (tempdir == NULL) | |
351 tempdir = "sys$scratch:"; | |
352 #else | |
353 if (tempdir == NULL) | |
354 tempdir = "/tmp/"; | |
355 else | |
356 tempdir = concat (tempdir, "/", ""); | |
357 #endif | |
358 | |
359 keep_tempfiles = 0; | |
360 | |
361 /* Allocate ARGC input files, which must be enough. */ | |
362 | |
363 infiles = (char **) xmalloc (argc * sizeof (char *)); | |
364 outfiles = (char **) xmalloc (argc * sizeof (char *)); | |
365 ip = infiles; | |
366 op = outfiles; | |
367 | |
368 while (arg_index < argc) | |
369 { | |
370 char *arg = argv[arg_index++]; | |
371 | |
372 if (*arg == '-') | |
373 { | |
374 if (strcmp (arg, "--version") == 0) | |
375 { | |
376 fprintf (stderr, "%s\n", TEXINDEX_VERSION_STRING); | |
377 exit (0); | |
378 } | |
379 else if ((strcmp (arg, "--keep") == 0) || | |
380 (strcmp (arg, "-k") == 0)) | |
381 { | |
382 keep_tempfiles = 1; | |
383 } | |
384 else if ((strcmp (arg, "--help") == 0) || | |
385 (strcmp (arg, "-h") == 0)) | |
386 { | |
387 usage (0); | |
388 } | |
389 else if ((strcmp (arg, "--output") == 0) || | |
390 (strcmp (arg, "-o") == 0)) | |
391 { | |
392 if (argv[arg_index] != (char *)NULL) | |
393 { | |
394 arg_index++; | |
395 if (op > outfiles) | |
396 *(op - 1) = argv[arg_index]; | |
397 } | |
398 else | |
399 usage (1); | |
400 } | |
401 else | |
402 usage (1); | |
403 } | |
404 else | |
405 { | |
406 *ip++ = arg; | |
407 *op++ = (char *)NULL; | |
408 } | |
409 } | |
410 for ( ; optind < argc; optind++) | |
411 { | |
412 *ip++ = argv[optind]; | |
413 *op++ = NULL; | |
414 } | |
415 | |
416 /* Record number of keyfields and terminate list of filenames. */ | |
417 num_infiles = ip - infiles; | |
418 *ip = (char *)NULL; | |
419 if (num_infiles == 0) | |
420 usage (1); | |
421 } | |
422 | |
423 /* Return a name for a temporary file. */ | |
424 | |
425 char * | |
426 maketempname (count) | |
427 int count; | |
428 { | |
429 char tempsuffix[10]; | |
430 sprintf (tempsuffix, "%d", count); | |
431 return concat (tempdir, tempbase, tempsuffix); | |
432 } | |
433 | |
434 /* Delete all temporary files up to TO_COUNT. */ | |
435 | |
436 void | |
437 flush_tempfiles (to_count) | |
438 int to_count; | |
439 { | |
440 if (keep_tempfiles) | |
441 return; | |
442 while (last_deleted_tempcount < to_count) | |
443 unlink (maketempname (++last_deleted_tempcount)); | |
444 } | |
445 | |
446 /* Copy the input file open on IDESC into a temporary file | |
447 and return the temporary file name. */ | |
448 | |
449 #define BUFSIZE 1024 | |
450 | |
451 char * | |
452 tempcopy (idesc) | |
453 int idesc; | |
454 { | |
455 char *outfile = maketempname (++tempcount); | |
456 int odesc; | |
457 char buffer[BUFSIZE]; | |
458 | |
459 odesc = open (outfile, O_WRONLY | O_CREAT, 0666); | |
460 | |
461 if (odesc < 0) | |
462 pfatal_with_name (outfile); | |
463 | |
464 while (1) | |
465 { | |
466 int nread = read (idesc, buffer, BUFSIZE); | |
467 write (odesc, buffer, nread); | |
468 if (!nread) | |
469 break; | |
470 } | |
471 | |
472 close (odesc); | |
473 | |
474 return outfile; | |
475 } | |
476 | |
477 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */ | |
478 | |
479 int | |
480 compare_full (const void *arg1, const void *arg2) | |
481 { | |
482 char **line1, **line2; | |
483 int i; | |
484 | |
485 line1 = (char **) arg1; | |
486 line2 = (char **) arg2; | |
487 | |
488 /* Compare using the first keyfield; | |
489 if that does not distinguish the lines, try the second keyfield; | |
490 and so on. */ | |
491 | |
492 for (i = 0; i < num_keyfields; i++) | |
493 { | |
494 long length1, length2; | |
495 char *start1 = find_field (&keyfields[i], *line1, &length1); | |
496 char *start2 = find_field (&keyfields[i], *line2, &length2); | |
497 int tem = compare_field (&keyfields[i], start1, length1, *line1 - text_base, | |
498 start2, length2, *line2 - text_base); | |
499 if (tem) | |
500 { | |
501 if (keyfields[i].reverse) | |
502 return -tem; | |
503 return tem; | |
504 } | |
505 } | |
506 | |
507 return 0; /* Lines match exactly. */ | |
508 } | |
509 | |
510 /* Compare LINE1 and LINE2, described by structures | |
511 in which the first keyfield is identified in advance. | |
512 For positional sorting, assumes that the order of the lines in core | |
513 reflects their nominal order. */ | |
514 | |
515 static int | |
516 compare_prepared (const void *arg1, const void *arg2) | |
517 { | |
518 struct lineinfo *line1, *line2; | |
519 int i; | |
520 int tem; | |
521 char *text1, *text2; | |
522 | |
523 line1 = (struct lineinfo *) arg1; | |
524 line2 = (struct lineinfo *) arg2; | |
525 | |
526 /* Compare using the first keyfield, which has been found for us already. */ | |
527 if (keyfields->positional) | |
528 { | |
529 if (line1->text - text_base > line2->text - text_base) | |
530 tem = 1; | |
531 else | |
532 tem = -1; | |
533 } | |
534 else if (keyfields->numeric) | |
535 tem = line1->key.number - line2->key.number; | |
536 else | |
537 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0, | |
538 line2->key.text, line2->keylen, 0); | |
539 if (tem) | |
540 { | |
541 if (keyfields->reverse) | |
542 return -tem; | |
543 return tem; | |
544 } | |
545 | |
546 text1 = line1->text; | |
547 text2 = line2->text; | |
548 | |
549 /* Compare using the second keyfield; | |
550 if that does not distinguish the lines, try the third keyfield; | |
551 and so on. */ | |
552 | |
553 for (i = 1; i < num_keyfields; i++) | |
554 { | |
555 long length1, length2; | |
556 char *start1 = find_field (&keyfields[i], text1, &length1); | |
557 char *start2 = find_field (&keyfields[i], text2, &length2); | |
558 int tem = compare_field (&keyfields[i], start1, length1, text1 - text_base, | |
559 start2, length2, text2 - text_base); | |
560 if (tem) | |
561 { | |
562 if (keyfields[i].reverse) | |
563 return -tem; | |
564 return tem; | |
565 } | |
566 } | |
567 | |
568 return 0; /* Lines match exactly. */ | |
569 } | |
570 | |
571 /* Like compare_full but more general. | |
572 You can pass any strings, and you can say how many keyfields to use. | |
573 POS1 and POS2 should indicate the nominal positional ordering of | |
574 the two lines in the input. */ | |
575 | |
576 static int | |
577 compare_general (char *str1, char *str2, long pos1, long pos2, | |
578 int use_keyfields) | |
579 { | |
580 int i; | |
581 | |
582 /* Compare using the first keyfield; | |
583 if that does not distinguish the lines, try the second keyfield; | |
584 and so on. */ | |
585 | |
586 for (i = 0; i < use_keyfields; i++) | |
587 { | |
588 long length1, length2; | |
589 char *start1 = find_field (&keyfields[i], str1, &length1); | |
590 char *start2 = find_field (&keyfields[i], str2, &length2); | |
591 int tem = compare_field (&keyfields[i], start1, length1, pos1, | |
592 start2, length2, pos2); | |
593 if (tem) | |
594 { | |
595 if (keyfields[i].reverse) | |
596 return -tem; | |
597 return tem; | |
598 } | |
599 } | |
600 | |
601 return 0; /* Lines match exactly. */ | |
602 } | |
603 | |
604 /* Find the start and length of a field in STR according to KEYFIELD. | |
605 A pointer to the starting character is returned, and the length | |
606 is stored into the int that LENGTHPTR points to. */ | |
607 | |
608 char * | |
609 find_field (keyfield, str, lengthptr) | |
610 struct keyfield *keyfield; | |
611 char *str; | |
612 long *lengthptr; | |
613 { | |
614 char *start; | |
615 char *end; | |
616 char *(*fun) (); | |
617 | |
618 if (keyfield->braced) | |
619 fun = find_braced_pos; | |
620 else | |
621 fun = find_pos; | |
622 | |
623 start = (*fun) (str, keyfield->startwords, keyfield->startchars, | |
624 keyfield->ignore_blanks); | |
625 if (keyfield->endwords < 0) | |
626 { | |
627 if (keyfield->braced) | |
628 end = find_braced_end (start); | |
629 else | |
630 { | |
631 end = start; | |
632 while (*end && *end != '\n') | |
633 end++; | |
634 } | |
635 } | |
636 else | |
637 { | |
638 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0); | |
639 if (end - str < start - str) | |
640 end = start; | |
641 } | |
642 *lengthptr = end - start; | |
643 return start; | |
644 } | |
645 | |
646 /* Return a pointer to a specified place within STR, | |
647 skipping (from the beginning) WORDS words and then CHARS chars. | |
648 If IGNORE_BLANKS is nonzero, we skip all blanks | |
649 after finding the specified word. */ | |
650 | |
651 char * | |
652 find_pos (str, words, chars, ignore_blanks) | |
653 char *str; | |
654 int words, chars; | |
655 int ignore_blanks; | |
656 { | |
657 int i; | |
658 char *p = str; | |
659 | |
660 for (i = 0; i < words; i++) | |
661 { | |
662 char c; | |
663 /* Find next bunch of nonblanks and skip them. */ | |
664 while ((c = *p) == ' ' || c == '\t') | |
665 p++; | |
666 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t')) | |
667 p++; | |
668 if (!*p || *p == '\n') | |
669 return p; | |
670 } | |
671 | |
672 while (*p == ' ' || *p == '\t') | |
673 p++; | |
674 | |
675 for (i = 0; i < chars; i++) | |
676 { | |
677 if (!*p || *p == '\n') | |
678 break; | |
679 p++; | |
680 } | |
681 return p; | |
682 } | |
683 | |
684 /* Like find_pos but assumes that each field is surrounded by braces | |
685 and that braces within fields are balanced. */ | |
686 | |
687 char * | |
688 find_braced_pos (str, words, chars, ignore_blanks) | |
689 char *str; | |
690 int words, chars; | |
691 int ignore_blanks; | |
692 { | |
693 int i; | |
694 int bracelevel; | |
695 char *p = str; | |
696 char c; | |
697 | |
698 for (i = 0; i < words; i++) | |
699 { | |
700 bracelevel = 1; | |
701 while ((c = *p++) != '{' && c != '\n' && c) | |
702 /* Do nothing. */ ; | |
703 if (c != '{') | |
704 return p - 1; | |
705 while (bracelevel) | |
706 { | |
707 c = *p++; | |
708 if (c == '{') | |
709 bracelevel++; | |
710 if (c == '}') | |
711 bracelevel--; | |
712 if (c == 0 || c == '\n') | |
713 return p - 1; | |
714 } | |
715 } | |
716 | |
717 while ((c = *p++) != '{' && c != '\n' && c) | |
718 /* Do nothing. */ ; | |
719 | |
720 if (c != '{') | |
721 return p - 1; | |
722 | |
723 if (ignore_blanks) | |
724 while ((c = *p) == ' ' || c == '\t') | |
725 p++; | |
726 | |
727 for (i = 0; i < chars; i++) | |
728 { | |
729 if (!*p || *p == '\n') | |
730 break; | |
731 p++; | |
732 } | |
733 return p; | |
734 } | |
735 | |
736 /* Find the end of the balanced-brace field which starts at STR. | |
737 The position returned is just before the closing brace. */ | |
738 | |
739 char * | |
740 find_braced_end (str) | |
741 char *str; | |
742 { | |
743 int bracelevel; | |
744 char *p = str; | |
745 char c; | |
746 | |
747 bracelevel = 1; | |
748 while (bracelevel) | |
749 { | |
750 c = *p++; | |
751 if (c == '{') | |
752 bracelevel++; | |
753 if (c == '}') | |
754 bracelevel--; | |
755 if (c == 0 || c == '\n') | |
756 return p - 1; | |
757 } | |
758 return p - 1; | |
759 } | |
760 | |
761 long | |
762 find_value (start, length) | |
763 char *start; | |
764 long length; | |
765 { | |
766 while (length != 0L) | |
767 { | |
768 if (isdigit (*start)) | |
769 return atol (start); | |
770 length--; | |
771 start++; | |
772 } | |
773 return 0l; | |
774 } | |
775 | |
776 /* Vector used to translate characters for comparison. | |
777 This is how we make all alphanumerics follow all else, | |
778 and ignore case in the first sorting. */ | |
779 int char_order[256]; | |
780 | |
781 static void | |
782 init_char_order (void) | |
783 { | |
784 int i; | |
785 for (i = 1; i < 256; i++) | |
786 char_order[i] = i; | |
787 | |
788 for (i = '0'; i <= '9'; i++) | |
789 char_order[i] += 512; | |
790 | |
791 for (i = 'a'; i <= 'z'; i++) | |
792 { | |
793 char_order[i] = 512 + i; | |
794 char_order[i + 'A' - 'a'] = 512 + i; | |
795 } | |
796 } | |
797 | |
798 /* Compare two fields (each specified as a start pointer and a character count) | |
799 according to KEYFIELD. | |
800 The sign of the value reports the relation between the fields. */ | |
801 | |
802 int | |
803 compare_field (keyfield, start1, length1, pos1, start2, length2, pos2) | |
804 struct keyfield *keyfield; | |
805 char *start1; | |
806 long length1; | |
807 long pos1; | |
808 char *start2; | |
809 long length2; | |
810 long pos2; | |
811 { | |
812 if (keyfields->positional) | |
813 { | |
814 if (pos1 > pos2) | |
815 return 1; | |
816 else | |
817 return -1; | |
818 } | |
819 if (keyfield->numeric) | |
820 { | |
821 long value = find_value (start1, length1) - find_value (start2, length2); | |
822 if (value > 0) | |
823 return 1; | |
824 if (value < 0) | |
825 return -1; | |
826 return 0; | |
827 } | |
828 else | |
829 { | |
830 char *p1 = start1; | |
831 char *p2 = start2; | |
832 char *e1 = start1 + length1; | |
833 char *e2 = start2 + length2; | |
834 | |
835 while (1) | |
836 { | |
837 int c1, c2; | |
838 | |
839 if (p1 == e1) | |
840 c1 = 0; | |
841 else | |
842 c1 = *p1++; | |
843 if (p2 == e2) | |
844 c2 = 0; | |
845 else | |
846 c2 = *p2++; | |
847 | |
848 if (char_order[c1] != char_order[c2]) | |
849 return char_order[c1] - char_order[c2]; | |
850 if (!c1) | |
851 break; | |
852 } | |
853 | |
854 /* Strings are equal except possibly for case. */ | |
855 p1 = start1; | |
856 p2 = start2; | |
857 while (1) | |
858 { | |
859 int c1, c2; | |
860 | |
861 if (p1 == e1) | |
862 c1 = 0; | |
863 else | |
864 c1 = *p1++; | |
865 if (p2 == e2) | |
866 c2 = 0; | |
867 else | |
868 c2 = *p2++; | |
869 | |
870 if (c1 != c2) | |
871 /* Reverse sign here so upper case comes out last. */ | |
872 return c2 - c1; | |
873 if (!c1) | |
874 break; | |
875 } | |
876 | |
877 return 0; | |
878 } | |
879 } | |
880 | |
881 /* A `struct linebuffer' is a structure which holds a line of text. | |
882 `readline' reads a line from a stream into a linebuffer | |
883 and works regardless of the length of the line. */ | |
884 | |
885 struct linebuffer | |
886 { | |
887 long size; | |
888 char *buffer; | |
889 }; | |
890 | |
891 /* Initialize LINEBUFFER for use. */ | |
892 | |
893 static void | |
894 initbuffer (struct linebuffer *linebuffer) | |
895 { | |
896 linebuffer->size = 200; | |
897 linebuffer->buffer = (char *) xmalloc (200); | |
898 } | |
899 | |
900 /* Read a line of text from STREAM into LINEBUFFER. | |
901 Return the length of the line. */ | |
902 | |
903 long | |
904 readline (linebuffer, stream) | |
905 struct linebuffer *linebuffer; | |
906 FILE *stream; | |
907 { | |
908 char *buffer = linebuffer->buffer; | |
909 char *p = linebuffer->buffer; | |
910 char *end = p + linebuffer->size; | |
911 | |
912 while (1) | |
913 { | |
914 int c = getc (stream); | |
915 if (p == end) | |
916 { | |
917 buffer = (char *) xrealloc (buffer, linebuffer->size *= 2); | |
918 p += buffer - linebuffer->buffer; | |
919 end += buffer - linebuffer->buffer; | |
920 linebuffer->buffer = buffer; | |
921 } | |
922 if (c < 0 || c == '\n') | |
923 { | |
924 *p = 0; | |
925 break; | |
926 } | |
927 *p++ = c; | |
928 } | |
929 | |
930 return p - buffer; | |
931 } | |
932 | |
933 /* Sort an input file too big to sort in core. */ | |
934 | |
935 void | |
936 sort_offline (infile, nfiles, total, outfile) | |
937 char *infile; | |
938 int nfiles; | |
939 long total; | |
940 char *outfile; | |
941 { | |
942 /* More than enough. */ | |
943 int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT; | |
944 char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); | |
945 FILE *istream = fopen (infile, "r"); | |
946 int i; | |
947 struct linebuffer lb; | |
948 long linelength; | |
949 int failure = 0; | |
950 | |
951 initbuffer (&lb); | |
952 | |
953 /* Read in one line of input data. */ | |
954 | |
955 linelength = readline (&lb, istream); | |
956 | |
957 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') | |
958 { | |
959 error ("%s: not a texinfo index file", infile); | |
960 return; | |
961 } | |
962 | |
963 /* Split up the input into `ntemps' temporary files, or maybe fewer, | |
964 and put the new files' names into `tempfiles' */ | |
965 | |
966 for (i = 0; i < ntemps; i++) | |
967 { | |
968 char *outname = maketempname (++tempcount); | |
969 FILE *ostream = fopen (outname, "w"); | |
970 long tempsize = 0; | |
971 | |
972 if (!ostream) | |
973 pfatal_with_name (outname); | |
974 tempfiles[i] = outname; | |
975 | |
976 /* Copy lines into this temp file as long as it does not make file | |
977 "too big" or until there are no more lines. */ | |
978 | |
979 while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT) | |
980 { | |
981 tempsize += linelength + 1; | |
982 fputs (lb.buffer, ostream); | |
983 putc ('\n', ostream); | |
984 | |
985 /* Read another line of input data. */ | |
986 | |
987 linelength = readline (&lb, istream); | |
988 if (!linelength && feof (istream)) | |
989 break; | |
990 | |
991 if (lb.buffer[0] != '\\' && lb.buffer[0] != '@') | |
992 { | |
993 error ("%s: not a texinfo index file", infile); | |
994 failure = 1; | |
995 goto fail; | |
996 } | |
997 } | |
998 fclose (ostream); | |
999 if (feof (istream)) | |
1000 break; | |
1001 } | |
1002 | |
1003 free (lb.buffer); | |
1004 | |
1005 fail: | |
1006 /* Record number of temp files we actually needed. */ | |
1007 | |
1008 ntemps = i; | |
1009 | |
1010 /* Sort each tempfile into another tempfile. | |
1011 Delete the first set of tempfiles and put the names of the second | |
1012 into `tempfiles'. */ | |
1013 | |
1014 for (i = 0; i < ntemps; i++) | |
1015 { | |
1016 char *newtemp = maketempname (++tempcount); | |
1017 sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp); | |
1018 if (!keep_tempfiles) | |
1019 unlink (tempfiles[i]); | |
1020 tempfiles[i] = newtemp; | |
1021 } | |
1022 | |
1023 if (failure) | |
1024 return; | |
1025 | |
1026 /* Merge the tempfiles together and indexify. */ | |
1027 | |
1028 merge_files (tempfiles, ntemps, outfile); | |
1029 } | |
1030 | |
1031 /* Sort INFILE, whose size is TOTAL, | |
1032 assuming that is small enough to be done in-core, | |
1033 then indexify it and send the output to OUTFILE (or to stdout). */ | |
1034 | |
1035 void | |
1036 sort_in_core (infile, total, outfile) | |
1037 char *infile; | |
1038 long total; | |
1039 char *outfile; | |
1040 { | |
1041 char **nextline; | |
1042 char *data = (char *) xmalloc (total + 1); | |
1043 char *file_data; | |
1044 long file_size; | |
1045 int i; | |
1046 FILE *ostream = stdout; | |
1047 struct lineinfo *lineinfo; | |
1048 | |
1049 /* Read the contents of the file into the moby array `data'. */ | |
1050 | |
1051 int desc = open (infile, O_RDONLY, 0); | |
1052 | |
1053 if (desc < 0) | |
1054 fatal ("failure reopening %s", infile); | |
1055 for (file_size = 0;;) | |
1056 { | |
1057 i = read (desc, data + file_size, total - file_size); | |
1058 if (i <= 0) | |
1059 break; | |
1060 file_size += i; | |
1061 } | |
1062 file_data = data; | |
1063 data[file_size] = 0; | |
1064 | |
1065 close (desc); | |
1066 | |
1067 if (file_size > 0 && data[0] != '\\' && data[0] != '@') | |
1068 { | |
1069 error ("%s: not a texinfo index file", infile); | |
1070 return; | |
1071 } | |
1072 | |
1073 init_char_order (); | |
1074 | |
1075 /* Sort routines want to know this address. */ | |
1076 | |
1077 text_base = data; | |
1078 | |
1079 /* Create the array of pointers to lines, with a default size | |
1080 frequently enough. */ | |
1081 | |
1082 nlines = total / 50; | |
1083 if (!nlines) | |
1084 nlines = 2; | |
1085 linearray = (char **) xmalloc (nlines * sizeof (char *)); | |
1086 | |
1087 /* `nextline' points to the next free slot in this array. | |
1088 `nlines' is the allocated size. */ | |
1089 | |
1090 nextline = linearray; | |
1091 | |
1092 /* Parse the input file's data, and make entries for the lines. */ | |
1093 | |
1094 nextline = parsefile (infile, nextline, file_data, file_size); | |
1095 if (nextline == 0) | |
1096 { | |
1097 error ("%s: not a texinfo index file", infile); | |
1098 return; | |
1099 } | |
1100 | |
1101 /* Sort the lines. */ | |
1102 | |
1103 /* If we have enough space, find the first keyfield of each line in advance. | |
1104 Make a `struct lineinfo' for each line, which records the keyfield | |
1105 as well as the line, and sort them. */ | |
1106 | |
1107 lineinfo = (struct lineinfo *) malloc ((nextline - linearray) * sizeof (struct lineinfo)); | |
1108 | |
1109 if (lineinfo) | |
1110 { | |
1111 struct lineinfo *lp; | |
1112 char **p; | |
1113 | |
1114 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) | |
1115 { | |
1116 lp->text = *p; | |
1117 lp->key.text = find_field (keyfields, *p, &lp->keylen); | |
1118 if (keyfields->numeric) | |
1119 lp->key.number = find_value (lp->key.text, lp->keylen); | |
1120 } | |
1121 | |
1122 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo), | |
1123 compare_prepared); | |
1124 | |
1125 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++) | |
1126 *p = lp->text; | |
1127 | |
1128 free (lineinfo); | |
1129 } | |
1130 else | |
1131 qsort (linearray, nextline - linearray, sizeof (char *), compare_full); | |
1132 | |
1133 /* Open the output file. */ | |
1134 | |
1135 if (outfile) | |
1136 { | |
1137 ostream = fopen (outfile, "w"); | |
1138 if (!ostream) | |
1139 pfatal_with_name (outfile); | |
1140 } | |
1141 | |
1142 writelines (linearray, nextline - linearray, ostream); | |
1143 if (outfile) | |
1144 fclose (ostream); | |
1145 | |
1146 free (linearray); | |
1147 free (data); | |
1148 } | |
1149 | |
1150 /* Parse an input string in core into lines. | |
1151 DATA is the input string, and SIZE is its length. | |
1152 Data goes in LINEARRAY starting at NEXTLINE. | |
1153 The value returned is the first entry in LINEARRAY still unused. | |
1154 Value 0 means input file contents are invalid. */ | |
1155 | |
1156 char ** | |
1157 parsefile (filename, nextline, data, size) | |
1158 char *filename; | |
1159 char **nextline; | |
1160 char *data; | |
1161 long size; | |
1162 { | |
1163 char *p, *end; | |
1164 char **line = nextline; | |
1165 | |
1166 p = data; | |
1167 end = p + size; | |
1168 *end = 0; | |
1169 | |
1170 while (p != end) | |
1171 { | |
1172 if (p[0] != '\\' && p[0] != '@') | |
1173 return 0; | |
1174 | |
1175 *line = p; | |
1176 while (*p && *p != '\n') | |
1177 p++; | |
1178 if (p != end) | |
1179 p++; | |
1180 | |
1181 line++; | |
1182 if (line == linearray + nlines) | |
1183 { | |
1184 char **old = linearray; | |
1185 linearray = (char **) xrealloc (linearray, sizeof (char *) * (nlines *= 4)); | |
1186 line += linearray - old; | |
1187 } | |
1188 } | |
1189 | |
1190 return line; | |
1191 } | |
1192 | |
1193 /* Indexification is a filter applied to the sorted lines | |
1194 as they are being written to the output file. | |
1195 Multiple entries for the same name, with different page numbers, | |
1196 get combined into a single entry with multiple page numbers. | |
1197 The first braced field, which is used for sorting, is discarded. | |
1198 However, its first character is examined, folded to lower case, | |
1199 and if it is different from that in the previous line fed to us | |
1200 a \initial line is written with one argument, the new initial. | |
1201 | |
1202 If an entry has four braced fields, then the second and third | |
1203 constitute primary and secondary names. | |
1204 In this case, each change of primary name | |
1205 generates a \primary line which contains only the primary name, | |
1206 and in between these are \secondary lines which contain | |
1207 just a secondary name and page numbers. */ | |
1208 | |
1209 /* The last primary name we wrote a \primary entry for. | |
1210 If only one level of indexing is being done, this is the last name seen. */ | |
1211 char *lastprimary; | |
1212 /* Length of storage allocated for lastprimary. */ | |
1213 int lastprimarylength; | |
1214 | |
1215 /* Similar, for the secondary name. */ | |
1216 char *lastsecondary; | |
1217 int lastsecondarylength; | |
1218 | |
1219 /* Zero if we are not in the middle of writing an entry. | |
1220 One if we have written the beginning of an entry but have not | |
1221 yet written any page numbers into it. | |
1222 Greater than one if we have written the beginning of an entry | |
1223 plus at least one page number. */ | |
1224 int pending; | |
1225 | |
1226 /* The initial (for sorting purposes) of the last primary entry written. | |
1227 When this changes, a \initial {c} line is written */ | |
1228 | |
1229 char *lastinitial; | |
1230 | |
1231 int lastinitiallength; | |
1232 | |
1233 /* When we need a string of length 1 for the value of lastinitial, | |
1234 store it here. */ | |
1235 | |
1236 char lastinitial1[2]; | |
1237 | |
1238 /* Initialize static storage for writing an index. */ | |
1239 | |
1240 static void | |
1241 init_index (void) | |
1242 { | |
1243 pending = 0; | |
1244 lastinitial = lastinitial1; | |
1245 lastinitial1[0] = 0; | |
1246 lastinitial1[1] = 0; | |
1247 lastinitiallength = 0; | |
1248 lastprimarylength = 100; | |
1249 lastprimary = (char *) xmalloc (lastprimarylength + 1); | |
1250 memset (lastprimary, '\0', lastprimarylength + 1); | |
1251 lastsecondarylength = 100; | |
1252 lastsecondary = (char *) xmalloc (lastsecondarylength + 1); | |
1253 memset (lastsecondary, '\0', lastsecondarylength + 1); | |
1254 } | |
1255 | |
1256 /* Indexify. Merge entries for the same name, | |
1257 insert headers for each initial character, etc. */ | |
1258 | |
1259 static void | |
1260 indexify (char *line, FILE *ostream) | |
1261 { | |
1262 char *primary, *secondary, *pagenumber; | |
1263 int primarylength, secondarylength = 0, pagelength; | |
1264 int nosecondary; | |
1265 int initiallength; | |
1266 char *initial; | |
1267 char initial1[2]; | |
1268 register char *p; | |
1269 | |
1270 /* First, analyze the parts of the entry fed to us this time. */ | |
1271 | |
1272 p = find_braced_pos (line, 0, 0, 0); | |
1273 if (*p == '{') | |
1274 { | |
1275 initial = p; | |
1276 /* Get length of inner pair of braces starting at `p', | |
1277 including that inner pair of braces. */ | |
1278 initiallength = find_braced_end (p + 1) + 1 - p; | |
1279 } | |
1280 else | |
1281 { | |
1282 initial = initial1; | |
1283 initial1[0] = *p; | |
1284 initial1[1] = 0; | |
1285 initiallength = 1; | |
1286 | |
1287 if (initial1[0] >= 'a' && initial1[0] <= 'z') | |
1288 initial1[0] -= 040; | |
1289 } | |
1290 | |
1291 pagenumber = find_braced_pos (line, 1, 0, 0); | |
1292 pagelength = find_braced_end (pagenumber) - pagenumber; | |
1293 if (pagelength == 0) | |
1294 abort (); | |
1295 | |
1296 primary = find_braced_pos (line, 2, 0, 0); | |
1297 primarylength = find_braced_end (primary) - primary; | |
1298 | |
1299 secondary = find_braced_pos (line, 3, 0, 0); | |
1300 nosecondary = !*secondary; | |
1301 if (!nosecondary) | |
1302 secondarylength = find_braced_end (secondary) - secondary; | |
1303 | |
1304 /* If the primary is different from before, make a new primary entry. */ | |
1305 if (strncmp (primary, lastprimary, primarylength)) | |
1306 { | |
1307 /* Close off current secondary entry first, if one is open. */ | |
1308 if (pending) | |
1309 { | |
1310 fputs ("}\n", ostream); | |
1311 pending = 0; | |
1312 } | |
1313 | |
1314 /* If this primary has a different initial, include an entry for | |
1315 the initial. */ | |
1316 if (initiallength != lastinitiallength || | |
1317 strncmp (initial, lastinitial, initiallength)) | |
1318 { | |
1319 fprintf (ostream, "\\initial {"); | |
1320 fwrite (initial, 1, initiallength, ostream); | |
1321 fputs ("}\n", ostream); | |
1322 if (initial == initial1) | |
1323 { | |
1324 lastinitial = lastinitial1; | |
1325 *lastinitial1 = *initial1; | |
1326 } | |
1327 else | |
1328 { | |
1329 lastinitial = initial; | |
1330 } | |
1331 lastinitiallength = initiallength; | |
1332 } | |
1333 | |
1334 /* Make the entry for the primary. */ | |
1335 if (nosecondary) | |
1336 fputs ("\\entry {", ostream); | |
1337 else | |
1338 fputs ("\\primary {", ostream); | |
1339 fwrite (primary, primarylength, 1, ostream); | |
1340 if (nosecondary) | |
1341 { | |
1342 fputs ("}{", ostream); | |
1343 pending = 1; | |
1344 } | |
1345 else | |
1346 fputs ("}\n", ostream); | |
1347 | |
1348 /* Record name of most recent primary. */ | |
1349 if (lastprimarylength < primarylength) | |
1350 { | |
1351 lastprimarylength = primarylength + 100; | |
1352 lastprimary = (char *) xrealloc (lastprimary, | |
1353 1 + lastprimarylength); | |
1354 } | |
1355 strncpy (lastprimary, primary, primarylength); | |
1356 lastprimary[primarylength] = 0; | |
1357 | |
1358 /* There is no current secondary within this primary, now. */ | |
1359 lastsecondary[0] = 0; | |
1360 } | |
1361 | |
1362 /* Should not have an entry with no subtopic following one with a subtopic. */ | |
1363 | |
1364 if (nosecondary && *lastsecondary) | |
1365 error ("entry %s follows an entry with a secondary name", line); | |
1366 | |
1367 /* Start a new secondary entry if necessary. */ | |
1368 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength)) | |
1369 { | |
1370 if (pending) | |
1371 { | |
1372 fputs ("}\n", ostream); | |
1373 pending = 0; | |
1374 } | |
1375 | |
1376 /* Write the entry for the secondary. */ | |
1377 fputs ("\\secondary {", ostream); | |
1378 fwrite (secondary, secondarylength, 1, ostream); | |
1379 fputs ("}{", ostream); | |
1380 pending = 1; | |
1381 | |
1382 /* Record name of most recent secondary. */ | |
1383 if (lastsecondarylength < secondarylength) | |
1384 { | |
1385 lastsecondarylength = secondarylength + 100; | |
1386 lastsecondary = (char *) xrealloc (lastsecondary, | |
1387 1 + lastsecondarylength); | |
1388 } | |
1389 strncpy (lastsecondary, secondary, secondarylength); | |
1390 lastsecondary[secondarylength] = 0; | |
1391 } | |
1392 | |
1393 /* Here to add one more page number to the current entry. */ | |
1394 if (pending++ != 1) | |
1395 fputs (", ", ostream); /* Punctuate first, if this is not the first. */ | |
1396 fwrite (pagenumber, pagelength, 1, ostream); | |
1397 } | |
1398 | |
1399 /* Close out any unfinished output entry. */ | |
1400 | |
1401 static void | |
1402 finish_index (FILE *ostream) | |
1403 { | |
1404 if (pending) | |
1405 fputs ("}\n", ostream); | |
1406 free (lastprimary); | |
1407 free (lastsecondary); | |
1408 } | |
1409 | |
1410 /* Copy the lines in the sorted order. | |
1411 Each line is copied out of the input file it was found in. */ | |
1412 | |
1413 void | |
1414 writelines (linearray, nlines, ostream) | |
1415 char **linearray; | |
1416 int nlines; | |
1417 FILE *ostream; | |
1418 { | |
1419 char **stop_line = linearray + nlines; | |
1420 char **next_line; | |
1421 | |
1422 init_index (); | |
1423 | |
1424 /* Output the text of the lines, and free the buffer space. */ | |
1425 | |
1426 for (next_line = linearray; next_line != stop_line; next_line++) | |
1427 { | |
1428 /* If -u was specified, output the line only if distinct from previous one. */ | |
1429 if (next_line == linearray | |
1430 /* Compare previous line with this one, using only the | |
1431 explicitly specd keyfields. */ | |
1432 || compare_general (*(next_line - 1), *next_line, 0L, 0L, num_keyfields - 1)) | |
1433 { | |
1434 char *p = *next_line; | |
1435 char c; | |
1436 | |
1437 while ((c = *p++) && c != '\n') | |
1438 /* Do nothing. */ ; | |
1439 *(p - 1) = 0; | |
1440 indexify (*next_line, ostream); | |
1441 } | |
1442 } | |
1443 | |
1444 finish_index (ostream); | |
1445 } | |
1446 | |
1447 /* Assume (and optionally verify) that each input file is sorted; | |
1448 merge them and output the result. | |
1449 Returns nonzero if any input file fails to be sorted. | |
1450 | |
1451 This is the high-level interface that can handle an unlimited | |
1452 number of files. */ | |
1453 | |
1454 #define MAX_DIRECT_MERGE 10 | |
1455 | |
1456 int | |
1457 merge_files (infiles, nfiles, outfile) | |
1458 char **infiles; | |
1459 int nfiles; | |
1460 char *outfile; | |
1461 { | |
1462 char **tempfiles; | |
1463 int ntemps; | |
1464 int i; | |
1465 int value = 0; | |
1466 int start_tempcount = tempcount; | |
1467 | |
1468 if (nfiles <= MAX_DIRECT_MERGE) | |
1469 return merge_direct (infiles, nfiles, outfile); | |
1470 | |
1471 /* Merge groups of MAX_DIRECT_MERGE input files at a time, | |
1472 making a temporary file to hold each group's result. */ | |
1473 | |
1474 ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE; | |
1475 tempfiles = (char **) xmalloc (ntemps * sizeof (char *)); | |
1476 for (i = 0; i < ntemps; i++) | |
1477 { | |
1478 int nf = MAX_DIRECT_MERGE; | |
1479 if (i + 1 == ntemps) | |
1480 nf = nfiles - i * MAX_DIRECT_MERGE; | |
1481 tempfiles[i] = maketempname (++tempcount); | |
1482 value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]); | |
1483 } | |
1484 | |
1485 /* All temporary files that existed before are no longer needed | |
1486 since their contents have been merged into our new tempfiles. | |
1487 So delete them. */ | |
1488 flush_tempfiles (start_tempcount); | |
1489 | |
1490 /* Now merge the temporary files we created. */ | |
1491 | |
1492 merge_files (tempfiles, ntemps, outfile); | |
1493 | |
1494 free (tempfiles); | |
1495 | |
1496 return value; | |
1497 } | |
1498 | |
1499 /* Assume (and optionally verify) that each input file is sorted; | |
1500 merge them and output the result. | |
1501 Returns nonzero if any input file fails to be sorted. | |
1502 | |
1503 This version of merging will not work if the number of | |
1504 input files gets too high. Higher level functions | |
1505 use it only with a bounded number of input files. */ | |
1506 | |
1507 int | |
1508 merge_direct (infiles, nfiles, outfile) | |
1509 char **infiles; | |
1510 int nfiles; | |
1511 char *outfile; | |
1512 { | |
1513 struct linebuffer *lb1, *lb2; | |
1514 struct linebuffer **thisline, **prevline; | |
1515 FILE **streams; | |
1516 int i; | |
1517 int nleft; | |
1518 int lossage = 0; | |
1519 int *file_lossage; | |
1520 struct linebuffer *prev_out = 0; | |
1521 FILE *ostream = stdout; | |
1522 | |
1523 if (outfile) | |
1524 { | |
1525 ostream = fopen (outfile, "w"); | |
1526 } | |
1527 if (!ostream) | |
1528 pfatal_with_name (outfile); | |
1529 | |
1530 init_index (); | |
1531 | |
1532 if (nfiles == 0) | |
1533 { | |
1534 if (outfile) | |
1535 fclose (ostream); | |
1536 return 0; | |
1537 } | |
1538 | |
1539 /* For each file, make two line buffers. | |
1540 Also, for each file, there is an element of `thisline' | |
1541 which points at any time to one of the file's two buffers, | |
1542 and an element of `prevline' which points to the other buffer. | |
1543 `thisline' is supposed to point to the next available line from the file, | |
1544 while `prevline' holds the last file line used, | |
1545 which is remembered so that we can verify that the file is properly sorted. */ | |
1546 | |
1547 /* lb1 and lb2 contain one buffer each per file. */ | |
1548 lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); | |
1549 lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer)); | |
1550 | |
1551 /* thisline[i] points to the linebuffer holding the next available line in file i, | |
1552 or is zero if there are no lines left in that file. */ | |
1553 thisline = (struct linebuffer **) | |
1554 xmalloc (nfiles * sizeof (struct linebuffer *)); | |
1555 /* prevline[i] points to the linebuffer holding the last used line | |
1556 from file i. This is just for verifying that file i is properly | |
1557 sorted. */ | |
1558 prevline = (struct linebuffer **) | |
1559 xmalloc (nfiles * sizeof (struct linebuffer *)); | |
1560 /* streams[i] holds the input stream for file i. */ | |
1561 streams = (FILE **) xmalloc (nfiles * sizeof (FILE *)); | |
1562 /* file_lossage[i] is nonzero if we already know file i is not | |
1563 properly sorted. */ | |
1564 file_lossage = (int *) xmalloc (nfiles * sizeof (int)); | |
1565 | |
1566 /* Allocate and initialize all that storage. */ | |
1567 | |
1568 for (i = 0; i < nfiles; i++) | |
1569 { | |
1570 initbuffer (&lb1[i]); | |
1571 initbuffer (&lb2[i]); | |
1572 thisline[i] = &lb1[i]; | |
1573 prevline[i] = &lb2[i]; | |
1574 file_lossage[i] = 0; | |
1575 streams[i] = fopen (infiles[i], "r"); | |
1576 if (!streams[i]) | |
1577 pfatal_with_name (infiles[i]); | |
1578 | |
1579 readline (thisline[i], streams[i]); | |
1580 } | |
1581 | |
1582 /* Keep count of number of files not at eof. */ | |
1583 nleft = nfiles; | |
1584 | |
1585 while (nleft) | |
1586 { | |
1587 struct linebuffer *best = 0; | |
1588 struct linebuffer *exch; | |
1589 int bestfile = -1; | |
1590 int i; | |
1591 | |
1592 /* Look at the next avail line of each file; choose the least one. */ | |
1593 | |
1594 for (i = 0; i < nfiles; i++) | |
1595 { | |
1596 if (thisline[i] && | |
1597 (!best || | |
1598 0 < compare_general (best->buffer, thisline[i]->buffer, | |
1599 (long) bestfile, (long) i, num_keyfields))) | |
1600 { | |
1601 best = thisline[i]; | |
1602 bestfile = i; | |
1603 } | |
1604 } | |
1605 | |
1606 /* Output that line, unless it matches the previous one and we | |
1607 don't want duplicates. */ | |
1608 | |
1609 if (!(prev_out && | |
1610 !compare_general (prev_out->buffer, | |
1611 best->buffer, 0L, 1L, num_keyfields - 1))) | |
1612 indexify (best->buffer, ostream); | |
1613 prev_out = best; | |
1614 | |
1615 /* Now make the line the previous of its file, and fetch a new | |
1616 line from that file. */ | |
1617 | |
1618 exch = prevline[bestfile]; | |
1619 prevline[bestfile] = thisline[bestfile]; | |
1620 thisline[bestfile] = exch; | |
1621 | |
1622 while (1) | |
1623 { | |
1624 /* If the file has no more, mark it empty. */ | |
1625 | |
1626 if (feof (streams[bestfile])) | |
1627 { | |
1628 thisline[bestfile] = 0; | |
1629 /* Update the number of files still not empty. */ | |
1630 nleft--; | |
1631 break; | |
1632 } | |
1633 readline (thisline[bestfile], streams[bestfile]); | |
1634 if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile])) | |
1635 break; | |
1636 } | |
1637 } | |
1638 | |
1639 finish_index (ostream); | |
1640 | |
1641 /* Free all storage and close all input streams. */ | |
1642 | |
1643 for (i = 0; i < nfiles; i++) | |
1644 { | |
1645 fclose (streams[i]); | |
1646 free (lb1[i].buffer); | |
1647 free (lb2[i].buffer); | |
1648 } | |
1649 free (file_lossage); | |
1650 free (lb1); | |
1651 free (lb2); | |
1652 free (thisline); | |
1653 free (prevline); | |
1654 free (streams); | |
1655 | |
1656 if (outfile) | |
1657 fclose (ostream); | |
1658 | |
1659 return lossage; | |
1660 } | |
1661 | |
1662 /* Print error message and exit. */ | |
1663 | |
1664 void | |
1665 fatal (format, arg) | |
1666 char *format, *arg; | |
1667 { | |
1668 error (format, arg); | |
1669 exit (TI_FATAL_ERROR); | |
1670 } | |
1671 | |
1672 /* Print error message. FORMAT is printf control string, ARG is arg for it. */ | |
1673 void | |
1674 error (format, arg) | |
1675 char *format, *arg; | |
1676 { | |
1677 printf ("%s: ", program_name); | |
1678 printf (format, arg); | |
1679 if (format[strlen (format) -1] != '\n') | |
1680 printf ("\n"); | |
1681 } | |
1682 | |
1683 /* unused */ | |
1684 #if 0 | |
1685 static void | |
1686 perror_with_name (char *name) | |
1687 { | |
1688 char *s; | |
1689 | |
1690 s = strerror (errno); | |
1691 printf ("%s: ", program_name); | |
1692 printf ("%s; for file `%s'.\n", s, name); | |
1693 } | |
1694 #endif | |
1695 | |
1696 void | |
1697 pfatal_with_name (name) | |
1698 char *name; | |
1699 { | |
1700 char *s; | |
1701 | |
1702 s = strerror (errno); | |
1703 printf ("%s: ", program_name); | |
1704 printf ("%s; for file `%s'.\n", s, name); | |
1705 exit (TI_FATAL_ERROR); | |
1706 } | |
1707 | |
1708 /* Return a newly-allocated string whose contents concatenate those of | |
1709 S1, S2, S3. */ | |
1710 | |
1711 char * | |
1712 concat (s1, s2, s3) | |
1713 char *s1, *s2, *s3; | |
1714 { | |
1715 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3); | |
1716 char *result = (char *) xmalloc (len1 + len2 + len3 + 1); | |
1717 | |
1718 strcpy (result, s1); | |
1719 strcpy (result + len1, s2); | |
1720 strcpy (result + len1 + len2, s3); | |
1721 *(result + len1 + len2 + len3) = 0; | |
1722 | |
1723 return result; | |
1724 } | |
1725 | |
1726 #if !defined (HAVE_STRERROR) | |
1727 extern char *sys_errlist[]; | |
1728 extern int sys_nerr; | |
1729 | |
1730 char * | |
1731 strerror (num) | |
1732 int num; | |
1733 { | |
1734 if (num >= sys_nerr) | |
1735 return (""); | |
1736 else | |
1737 return (sys_errlist[num]); | |
1738 } | |
1739 #endif /* !HAVE_STRERROR */ | |
1740 | |
1741 #if !defined (HAVE_STRCHR) | |
1742 char * | |
1743 strrchr (string, character) | |
1744 char *string; | |
1745 int character; | |
1746 { | |
1747 register int i; | |
1748 | |
1749 for (i = strlen (string) - 1; i > -1; i--) | |
1750 if (string[i] == character) | |
1751 return (string + i); | |
1752 | |
1753 return ((char *)NULL); | |
1754 } | |
1755 #endif /* HAVE_STRCHR */ | |
1756 | |
1757 /* Just like malloc, but kills the program in case of fatal error. */ | |
1758 void * | |
1759 xmalloc (nbytes) | |
1760 int nbytes; | |
1761 { | |
1762 void *temp = (void *) malloc (nbytes); | |
1763 | |
1764 if (nbytes && temp == (void *)NULL) | |
1765 memory_error ("xmalloc", nbytes); | |
1766 | |
1767 return (temp); | |
1768 } | |
1769 | |
1770 /* Like realloc (), but barfs if there isn't enough memory. */ | |
1771 void * | |
1772 xrealloc (pointer, nbytes) | |
1773 void *pointer; | |
1774 int nbytes; | |
1775 { | |
1776 void *temp; | |
1777 | |
1778 if (!pointer) | |
1779 temp = (void *)xmalloc (nbytes); | |
1780 else | |
1781 temp = (void *)realloc (pointer, nbytes); | |
1782 | |
1783 if (nbytes && !temp) | |
1784 memory_error ("xrealloc", nbytes); | |
1785 | |
1786 return (temp); | |
1787 } | |
1788 | |
1789 static void | |
1790 memory_error (char *callers_name, int bytes_wanted) | |
1791 { | |
1792 char printable_string[80]; | |
1793 | |
1794 sprintf (printable_string, | |
1795 "Virtual memory exhausted in %s ()! Needed %d bytes.", | |
1796 callers_name, bytes_wanted); | |
1797 | |
1798 error (printable_string, 0); | |
1799 abort (); | |
1800 } | |
1801 |