view lib-src/hexl.c @ 4407:4ee73bbe4f8e

Always use boyer_moore in ASCII or Latin-1 buffers with ASCII search strings. 2007-12-26 Aidan Kehoe <kehoea@parhasard.net> * casetab.c: Extend and correct some case table documentation. * search.c (search_buffer): Correct a bug where only the first entry for a character in the case equivalence table was examined in determining if the Boyer-Moore search algorithm is appropriate. If there are case mappings outside of the charset and row of the characters specified in the search string, those case mappings can be safely ignored (and Boyer-Moore search can be used) if we know from the buffer statistics that the corresponding characters cannot occur. * search.c (boyer_moore): Assert that we haven't been passed a string with varying characters sets or rows within character sets. That's what simple_search is for. In the very rare event that a character in the search string has a canonical case mapping that is not in the same character set and row, don't try to search for the canonical character, search for some other character that is in the the desired character set and row. Assert that the case table isn't corrupt. Do not search for any character case mappings that cannot possibly occur in the buffer, given the buffer metadata about its contents.
author Aidan Kehoe <kehoea@parhasard.net>
date Wed, 26 Dec 2007 17:30:16 +0100
parents abe6d1db359e
children 061f4f90f874 06dd936cde16
line wrap: on
line source

/* Synched up with: FSF 19.28. */

#include <config.h>

#include <stdio.h>
#include <ctype.h>
#ifdef WIN32_NATIVE
#include <io.h>
#include <fcntl.h>
#endif

#if __STDC__ || defined(STDC_HEADERS)
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <string.h>
#endif

#define DEFAULT_GROUPING	0x01
#define DEFAULT_BASE		16

#undef TRUE
#undef FALSE
#define TRUE  (1)
#define FALSE (0)

int base = DEFAULT_BASE, un_flag = FALSE, iso_flag = FALSE, endian = 1;
int group_by = DEFAULT_GROUPING;
char *progname;

void usage (void);

int
main (int argc, char *argv[])
{
  register long address;
  char string[18];
  FILE *fp;
  
  progname = *argv++; --argc;
  
  /*
  ** -hex		hex dump
  ** -oct		Octal dump
  ** -group-by-8-bits
  ** -group-by-16-bits
  ** -group-by-32-bits
  ** -group-by-64-bits
  ** -iso		iso character set.
  ** -big-endian	Big Endian
  ** -little-endian	Little Endian
  ** -un || -de	from hexl format to binary.
  ** --		End switch list.
  ** <filename>	dump filename
  ** -		(as filename == stdin)
  */
    
  while (*argv && *argv[0] == '-' && (*argv)[1])
    {
      /* A switch! */
      if (!strcmp (*argv, "--"))
	{
	  --argc; argv++;
	  break;
	}
      else if (!strcmp (*argv, "-un") || !strcmp (*argv, "-de"))
	{
	  un_flag = TRUE;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-hex"))
	{
	  base = 16;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-iso"))
	{
	  iso_flag = TRUE;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-oct"))
	{
	  base = 8;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-big-endian"))
	{
	  endian = 1;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-little-endian"))
	{
	  endian = 0;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-group-by-8-bits"))
	{
	  group_by = 0x00;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-group-by-16-bits"))
	{
	  group_by = 0x01;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-group-by-32-bits"))
	{
	  group_by = 0x03;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-group-by-64-bits"))
	{
	  group_by = 0x07;
	  endian = 0;
	  --argc; argv++;
	}
      else
	{
	  (void) fprintf (stderr, "%s: invalid switch: \"%s\".\n", progname,
		   *argv);
	  usage ();
	}
    }

  do
    {
      if (*argv == NULL)
	fp = stdin;
      else
	{
	  char *filename = *argv++;

	  if (!strcmp (filename, "-"))
	    fp = stdin;
	  else if ((fp = fopen (filename, "r")) == NULL)
	    {
	      perror (filename);
	      continue;
	    }
	}

      if (un_flag)
	{
	  char buf[18];

#ifdef WIN32_NATIVE
	  _setmode (_fileno (stdout), O_BINARY);
#endif
	  for (;;)
	    {
	      register int i, c = 0, d;

#define hexchar(x) (isdigit (x) ? x - '0' : x - 'a' + 10)

	      fread (buf, 1, 10, fp); /* skip 10 bytes */

	      for (i=0; i < 16; ++i)
		{
		  if ((c = getc (fp)) == ' ' || c == EOF)
		    break;

		  d = getc (fp);
		  c = hexchar (c) * 0x10 + hexchar (d);
		  putchar (c);

		  if ((i&group_by) == group_by)
		    getc (fp);
		}

	      if (c == ' ')
		{
		  while ((c = getc (fp)) != '\n' && c != EOF)
		    ;

		  if (c == EOF)
		    break;
		}
	      else
		{
		  if (i < 16)
		    break;

		  fread (buf, 1, 18, fp); /* skip 18 bytes */
		}
	    }
	}
      else
	{
#ifdef WIN32_NATIVE
	  _setmode (_fileno (fp), O_BINARY);
#endif
	  address = 0;
	  string[0] = ' ';
	  string[17] = '\0';
	  for (;;)
	    {
	      register int i, c = 0;

	      for (i=0; i < 16; ++i)
		{
		  if ((c = getc (fp)) == EOF)
		    {
		      if (!i)
			break;

		      fputs ("  ", stdout);
		      string[i+1] = '\0';
		    }
		  else
		    {
		      if (!i)
			(void) printf ("%08lx: ", address);

		      if (iso_flag)
			string[i+1] =
			  (c < 0x20 || (c >= 0x7F && c < 0xa0)) ? '.' :c;
		      else
			string[i+1] = (c < 0x20 || c >= 0x7F) ? '.' : c;

		      (void) printf ("%02x", c);
		    }

		  if ((i&group_by) == group_by)
		    putchar (' ');
		}

	      if (i)
		puts (string);

	      if (c == EOF)
		break;

	      address += 0x10;

	    }
	}

      if (fp != stdin)
	(void) fclose (fp);

    } while (*argv != NULL);
  return 0;
}

void
usage (void)
{
  fprintf (stderr, "Usage: %s [-de] [-iso]\n", progname);
  exit (1);
}