view lib-src/hexl.c @ 4604:e0a8715fdb1f

Support new IGNORE-INVALID-SEQUENCESP argument, #'query-coding-region. lisp/ChangeLog addition: 2009-02-07 Aidan Kehoe <kehoea@parhasard.net> * coding.el (query-coding-clear-highlights): Rename the BUFFER argument to BUFFER-OR-STRING, describe it as possibly being a string in its documentation. (default-query-coding-region): Add a new IGNORE-INVALID-SEQUENCESP argument, document that this function does not support it. Bind case-fold-search to nil, we don't want this to influence what the function thinks is encodable or not. (query-coding-region): Add a new IGNORE-INVALID-SEQUENCESP argument, document what it does; reflect this new argument in the associated compiler macro. (query-coding-string): Add a new IGNORE-INVALID-SEQUENCESP argument, document what it does. Support the HIGHLIGHT argument correctly. * unicode.el (unicode-query-coding-region): Add a new IGNORE-INVALID-SEQUENCESP argument, document what it does, implement this. Document a potential problem. Use #'query-coding-clear-highlights instead of reimplementing it ourselves. Remove some debugging messages. * mule/arabic.el (iso-8859-6): * mule/cyrillic.el (iso-8859-5): * mule/greek.el (iso-8859-7): * mule/hebrew.el (iso-8859-8): * mule/latin.el (iso-8859-2): * mule/latin.el (iso-8859-3): * mule/latin.el (iso-8859-4): * mule/latin.el (iso-8859-14): * mule/latin.el (iso-8859-15): * mule/latin.el (iso-8859-16): * mule/latin.el (iso-8859-9): * mule/latin.el (windows-1252): * mule/mule-coding.el (iso-8859-1): Avoid the assumption that characters not given an explicit mapping in these coding systems map to the ISO 8859-1 characters corresponding to the octets on disk; this makes it much more reasonable to implement the IGNORE-INVALID-SEQUENCESP argument to query-coding-region. * mule/mule-cmds.el (set-language-info): Correct the docstring. * mule/mule-cmds.el (finish-set-language-environment): Treat invalid Unicode sequences produced from invalid-sequence-coding-system and corresponding to control characters the same as control characters in redisplay. * mule/mule-cmds.el: Document that encode-coding-char is available in coding.el * mule/mule-coding.el (make-8-bit-generate-helper): Change to return the both the encode-program generated and the relevant non-ASCII charset; update the docstring to reflect this. * mule/mule-coding.el (make-8-bit-generate-encode-program-and-skip-chars-strings): Rename this function; have it return skip-chars-strings as well as the encode program. Have these skip-chars-strings use ranges for charsets, where possible. * mule/mule-coding.el (make-8-bit-create-decode-encode-tables): Revise this to allow people to specify explicitly characters that should be undefined (= corresponding to keys in unicode-error-default-translation-table), and treating unspecified octets above #x7f as undefined by default. * mule/mule-coding.el (8-bit-fixed-query-coding-region): Add a new IGNORE-INVALID-SEQUENCESP argument, implement support for it using the 8-bit-fixed-invalid-sequences-skip-chars coding system property; remove some debugging messages. * mule/mule-coding.el (make-8-bit-coding-system): This function is dumped, autoloading it makes no sense. Document what happens when characters above #x7f are not specified, implement this. * mule/vietnamese.el: Correct spelling. tests/ChangeLog addition: 2009-02-07 Aidan Kehoe <kehoea@parhasard.net> * automated/query-coding-tests.el: Add FAILING-CASE arguments to the Assert calls, making #'q-c-debug mostly unnecessary. Remove #'q-c-debug. Add new tests that use the IGNORE-INVALID-SEQUENCESP argument to #'query-coding-region; rework the existing ones to respect it.
author Aidan Kehoe <kehoea@parhasard.net>
date Sat, 07 Feb 2009 17:13:37 +0000
parents abe6d1db359e
children 061f4f90f874 06dd936cde16
line wrap: on
line source

/* Synched up with: FSF 19.28. */

#include <config.h>

#include <stdio.h>
#include <ctype.h>
#ifdef WIN32_NATIVE
#include <io.h>
#include <fcntl.h>
#endif

#if __STDC__ || defined(STDC_HEADERS)
#include <stdlib.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include <string.h>
#endif

#define DEFAULT_GROUPING	0x01
#define DEFAULT_BASE		16

#undef TRUE
#undef FALSE
#define TRUE  (1)
#define FALSE (0)

int base = DEFAULT_BASE, un_flag = FALSE, iso_flag = FALSE, endian = 1;
int group_by = DEFAULT_GROUPING;
char *progname;

void usage (void);

int
main (int argc, char *argv[])
{
  register long address;
  char string[18];
  FILE *fp;
  
  progname = *argv++; --argc;
  
  /*
  ** -hex		hex dump
  ** -oct		Octal dump
  ** -group-by-8-bits
  ** -group-by-16-bits
  ** -group-by-32-bits
  ** -group-by-64-bits
  ** -iso		iso character set.
  ** -big-endian	Big Endian
  ** -little-endian	Little Endian
  ** -un || -de	from hexl format to binary.
  ** --		End switch list.
  ** <filename>	dump filename
  ** -		(as filename == stdin)
  */
    
  while (*argv && *argv[0] == '-' && (*argv)[1])
    {
      /* A switch! */
      if (!strcmp (*argv, "--"))
	{
	  --argc; argv++;
	  break;
	}
      else if (!strcmp (*argv, "-un") || !strcmp (*argv, "-de"))
	{
	  un_flag = TRUE;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-hex"))
	{
	  base = 16;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-iso"))
	{
	  iso_flag = TRUE;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-oct"))
	{
	  base = 8;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-big-endian"))
	{
	  endian = 1;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-little-endian"))
	{
	  endian = 0;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-group-by-8-bits"))
	{
	  group_by = 0x00;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-group-by-16-bits"))
	{
	  group_by = 0x01;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-group-by-32-bits"))
	{
	  group_by = 0x03;
	  --argc; argv++;
	}
      else if (!strcmp (*argv, "-group-by-64-bits"))
	{
	  group_by = 0x07;
	  endian = 0;
	  --argc; argv++;
	}
      else
	{
	  (void) fprintf (stderr, "%s: invalid switch: \"%s\".\n", progname,
		   *argv);
	  usage ();
	}
    }

  do
    {
      if (*argv == NULL)
	fp = stdin;
      else
	{
	  char *filename = *argv++;

	  if (!strcmp (filename, "-"))
	    fp = stdin;
	  else if ((fp = fopen (filename, "r")) == NULL)
	    {
	      perror (filename);
	      continue;
	    }
	}

      if (un_flag)
	{
	  char buf[18];

#ifdef WIN32_NATIVE
	  _setmode (_fileno (stdout), O_BINARY);
#endif
	  for (;;)
	    {
	      register int i, c = 0, d;

#define hexchar(x) (isdigit (x) ? x - '0' : x - 'a' + 10)

	      fread (buf, 1, 10, fp); /* skip 10 bytes */

	      for (i=0; i < 16; ++i)
		{
		  if ((c = getc (fp)) == ' ' || c == EOF)
		    break;

		  d = getc (fp);
		  c = hexchar (c) * 0x10 + hexchar (d);
		  putchar (c);

		  if ((i&group_by) == group_by)
		    getc (fp);
		}

	      if (c == ' ')
		{
		  while ((c = getc (fp)) != '\n' && c != EOF)
		    ;

		  if (c == EOF)
		    break;
		}
	      else
		{
		  if (i < 16)
		    break;

		  fread (buf, 1, 18, fp); /* skip 18 bytes */
		}
	    }
	}
      else
	{
#ifdef WIN32_NATIVE
	  _setmode (_fileno (fp), O_BINARY);
#endif
	  address = 0;
	  string[0] = ' ';
	  string[17] = '\0';
	  for (;;)
	    {
	      register int i, c = 0;

	      for (i=0; i < 16; ++i)
		{
		  if ((c = getc (fp)) == EOF)
		    {
		      if (!i)
			break;

		      fputs ("  ", stdout);
		      string[i+1] = '\0';
		    }
		  else
		    {
		      if (!i)
			(void) printf ("%08lx: ", address);

		      if (iso_flag)
			string[i+1] =
			  (c < 0x20 || (c >= 0x7F && c < 0xa0)) ? '.' :c;
		      else
			string[i+1] = (c < 0x20 || c >= 0x7F) ? '.' : c;

		      (void) printf ("%02x", c);
		    }

		  if ((i&group_by) == group_by)
		    putchar (' ');
		}

	      if (i)
		puts (string);

	      if (c == EOF)
		break;

	      address += 0x10;

	    }
	}

      if (fp != stdin)
	(void) fclose (fp);

    } while (*argv != NULL);
  return 0;
}

void
usage (void)
{
  fprintf (stderr, "Usage: %s [-de] [-iso]\n", progname);
  exit (1);
}