/* Conversion of files between different charsets and usages.
   Copyright (C) 1990, 1993 Free Software Foundation, Inc.
   Francois Pinard <pinard@iro.umontreal.ca>, 1993.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; either version 2, or (at your option)
   any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include "recode.h"

/* Maximum number of charset values.  */
#define MAX_CHARSETS 200

/* Hash table size for charset names.  */
#define HASH_TABLE_SIZE 997

/* Maximum number of characters per 10646 symbol.  */
#define MAX_SYMBOL_SIZE 9

/* Known character sets.  */

struct hash
  {
    const char *name;		/* charset or alias name, or NULL */
    CHARSET *charset;		/* associated charset */
    struct hash *next;		/* next index in table, or NULL */
  };
struct hash hash_table[HASH_TABLE_SIZE];
CHARSET charset_array[MAX_CHARSETS];
int number_of_charsets;

/* Array of strings ready for argmatch.  */
static const char **argmatch_array;

#include "charset.h"

/*--------------------------------------.
| Prepare charsets for initialization.  |
`--------------------------------------*/

void
prepare_charset_initialization (void)
{
  int counter;

  for (counter = 0; counter < HASH_TABLE_SIZE; counter++)
    {
      hash_table[counter].name = NULL;
      hash_table[counter].next = NULL;
    }
  number_of_charsets = 0;
}

/*-----------------------------------------------------------------------.
| Return a newly allocated copy of charset NAME, with upper case letters |
| turned into lower case, and all non alphanumeric discarded.		 |
`-----------------------------------------------------------------------*/

static char *
cleanup_charset_name (const char *name)
{
  char *result;
  char *out;
  const char *in;
  int character;

  result = xmalloc (strlen (name) + 1);
  out = result;
  for (in = name; *in; in++)
    {
      character = *(const unsigned char *) in;
      if ((character >= 'a' && character <= 'z')
	  || (character >= '0' && character <= '9'))
	*out++ = character;
      else if (character >= 'A' && character <= 'Z')
	*out++ = character - 'A' + 'a';
    }
  *out = '\0';
  return result;
}

/*-----------------------------------.
| Return a hash index for a STRING.  |
`-----------------------------------*/

/* The hash function is naive, but sufficient for our purpose.  */

static int
hash_string (const char *string)
{
  int value;

  value = 0;
  for (; *string; string++)
    value = ((value * 31 + (int) *(const unsigned char *) string)
	     % HASH_TABLE_SIZE);
  return value;
}

/*--------------------------------------------------------------------------.
| Return the charset from its NAME or alias name.  If it does not already   |
| exist, add a new charset entry and initialize it with a brand new value.  |
`--------------------------------------------------------------------------*/

CHARSET *
find_charset (const char *name)
{
  char *hashname;
  struct hash *hash;
  CHARSET *charset;

  /* Search the whole hash bucket and return any match.  */

  hashname = cleanup_charset_name (name);
  for (hash = hash_table + hash_string (hashname);
       hash->name;
       hash = hash->next)
    {
      if (strcmp (hashname, hash->name) == 0)
	{
	  free (hashname);
	  return hash->charset;
	}
      if (!hash->next)
	break;
    }

  /* A new charset has to be created.  */

  if (number_of_charsets == MAX_CHARSETS)
    error (EXIT_FAILURE, 0, "MAX_CHARSETS is too small");

  charset = charset_array + number_of_charsets++;

  /* If the current slot is already used, create an overflow entry and
     initialize it enough so it could be taken for the current slot.  */

  if (hash->name)
    {
      hash->next = (struct hash *) xmalloc (sizeof (struct hash));
      hash = hash->next;
      hash->next = NULL;
    }

  /* Initialize the current slot with the new charset.  */

  hash->name = hashname;
  hash->charset = charset;

  charset->name = name;
  charset->ignore = 0;
  charset->table = NULL;

  return charset;
}

/*-------------------------------------------------------------------------.
| Have NAME as an alternate charset name for OLD_NAME.  Create OLD_NAME if |
| it does not exist already.						   |
`-------------------------------------------------------------------------*/

void
declare_alias (const char *name, const char *old_name)
{
  char *hashname;
  struct hash *hash;
  CHARSET *old_charset;

  /* Find the old value.  */

  old_charset = find_charset (old_name);

  /* Search the whole hash bucket.  */

  hashname = cleanup_charset_name (name);
  for (hash = hash_table + hash_string (hashname);
       hash->name;
       hash = hash->next)
    {
      if (strcmp (hashname, hash->name) == 0)
	{
	  if (hash->charset != old_charset)
	    error (EXIT_FAILURE, 0, "Charset %s already exists and is not %s",
		   name, old_name);
	  free (hashname);
	  return;
	}
      if (!hash->next)
	break;
    }

  /* If the current slot is already used, create an overflow entry and
     initialize it enough so it could be taken for the current slot.  */

  if (hash->name)
    {
      hash->next = (struct hash *) xmalloc (sizeof (struct hash));
      hash = hash->next;
      hash->next = NULL;
    }

  /* Initialize the current slot with the new charset.  */

  hash->name = hashname;
  hash->charset = old_charset;
}

/*------------------------------------------.
| Construct the string array for argmatch.  |
`------------------------------------------*/

void
make_argmatch_array (void)
{
  struct hash *hash;		/* cursor in charsets */
  int number;			/* number of strings */
  int counter;			/* all purpose counter */

  /* Count how many strings we need.  */

  number = 0;
  for (counter = 0; counter < HASH_TABLE_SIZE; counter++)
    for (hash = hash_table + counter;
	 hash && hash->name;
	 hash = hash->next)
      number++;

  /* Allocate the argmatch array, with place for a NULL sentinel.  */

  argmatch_array
    = (const char **) xmalloc ((number + 1) * sizeof (const char *));

  /* Fill in the array.  */

  number = 0;
  for (counter = 0; counter < HASH_TABLE_SIZE; counter++)
    for (hash = hash_table + counter;
	 hash && hash->name;
	 hash = hash->next)
      argmatch_array[number++] = hash->name;

  argmatch_array[number] = NULL;
}

/*-----------------------------------------------------------------------.
| Return the NAME of a charset, un-abbreviated and cleaned up.  Diagnose |
| and abort if this cannot be done successfully.  A NULL or empty string |
| means the default charset, if this default charset is defined.	 |
`-----------------------------------------------------------------------*/

const char *
clean_charset_name (const char *name)
{
  char *hashname;
  int ordinal;

  /* Notify usage that we are decoding charsets.  */

  decoding_charset_flag = 1;

  /* Look for a match.  */

  if (!name)
    name = "";
#ifdef DEFAULT_CHARSET
  if (!*name)
    name = DEFAULT_CHARSET;
#endif
  hashname = cleanup_charset_name (name);
  ordinal = argmatch (hashname, argmatch_array);
  free (hashname);

  /* Diagnose any match error.  */

  switch (ordinal)
    {
    case -2:
      error (0, 0, "Ambiguous charset `%s'", name);
      usage (EXIT_FAILURE);

    case -1:
      error (0, 0, "Unknown charset `%s'", name);
      usage (EXIT_FAILURE);
    }

  /* Notify usage that we are finished with charsets, then return.  */

  decoding_charset_flag = 0;
  return argmatch_array[ordinal];
}

/*----------------------------------------------------------------------.
| Order two struct hash's, using the true charset name as the first key |
| and the current name as the second key.			        |
`----------------------------------------------------------------------*/

static int
compare_struct_hash (const void *void_first, const void *void_second)
{
  int value;

  value = strcmp (((const struct hash *) void_first)->charset->name,
		  ((const struct hash *) void_second)->charset->name);
  if (value != 0)
    return value;
  
  value = strcmp (((const struct hash *) void_first)->name,
		  ((const struct hash *) void_second)->name);
  return value;
}

/*------------------------------.
| List all available charsets.  |
`------------------------------*/

void
list_all_charsets (void)
{
  struct hash *array;
  struct hash *hash;
  int number;
  int counter;

  /* Count how many charsets we have.  */

  number = 0;
  for (counter = 0; counter < HASH_TABLE_SIZE; counter++)
    for (hash = hash_table + counter;
	 hash && hash->name;
	 hash = hash->next)
      number++;

  /* Allocate a structure to hold them.  */

  array = (struct hash *) xmalloc (number * sizeof (struct hash));

  /* Copy all charsets in it.  */

  number = 0;
  for (counter = 0; counter < HASH_TABLE_SIZE; counter++)
    for (hash = hash_table + counter;
	 hash && hash->name;
	 hash = hash->next)
      array[number++] = *hash;

  /* Sort it.  */

  qsort (array, number, sizeof (struct hash), compare_struct_hash);

  /* Print it, one line per charset, giving the true charset name first,
     followed by all its alias in lexicographic order.  */

  for (hash = array; hash < array + number; hash++)
    {

      /* Begin a new line with the true charset name when it changes.  */

      if (hash == array || hash->charset->name != (hash - 1)->charset->name)
	{
	  if (hash != array)
	    printf ("\n");
	  printf ("%s", hash->charset->name);
	}

      /* Print the charset name or alias in its cleaned up form.  */

      printf (" %s", hash->name);
    }
  printf ("\n");

  /* Release the work array.  */

  free (array);
}

/*--------------------------------------------------------------------.
| Return a statically allocated 10646 symbol in a CHARSET for a given |
| CODE, or NULL if this symbol is not defined.			      |
`--------------------------------------------------------------------*/

static char *
code_to_symbol (CHARSET *charset, int code)
{
  static char symbol[MAX_SYMBOL_SIZE + 1];
  const char *in;
  char *out;
  int counter;

  if (in = (*charset->table)[code / 32], !in)
    return NULL;

  in += charset->size * (code % 32);
  if (*in == ' ')
    return NULL;

  out = symbol;
  for (counter = 0; counter < charset->size; counter++)
    if (*in == ' ')
      in++;
    else
      *out++ = *in++;
  *out = '\0'; 
  return symbol;
}

/*------------------------------------------------------------------------.
| Print a 10646 symbol in a CHARSET for a given CODE, padding with spaces |
| after to the proper width.						  |
`------------------------------------------------------------------------*/

static void
print_symbol (CHARSET *charset, int code)
{
  int counter;
  char *cursor;

  counter = 0;
  cursor = code_to_symbol (charset, code);
  
  if (cursor)
    for (; *cursor && counter < charset->size; counter++)
      {
	putchar (*cursor);
	cursor++;
      }
  for (; counter < charset->size; counter++)
    putchar (' ');
}

/*------------------------------------------------------.
| Print a full CHARSET description on standard output.  |
`------------------------------------------------------*/

void
list_full_charset (CHARSET *charset)
{
  int insert_white;		/* insert a while line before printing */
  int code;			/* code counter */
  const char *symbol;		/* symbol for code */
  const char *charname;		/* charname for code */

  /* Ensure we have a double table to play with.  */

  if (!charset->table)
    error (EXIT_FAILURE, 0,
	   "Cannot list `%s', no 10646 names available for this charset",
	   charset->name);

  /* Print the long table.  */

  printf ("dec  oct hex    ch   %s\n", charset->name);
  insert_white = 1;

  for (code = 0; code < 256; code++)
    if ((symbol = code_to_symbol (charset, code)), symbol)
      {
	if (insert_white)
	  {
	    printf ("\n");
	    insert_white = 0;
	  }
	printf ("%3d  %0.3o  %0.2x    ", code, code, code);
	print_symbol (charset, code);
	if ((charname = symbol_to_charname (symbol)), charname)
	  printf ("   %s", charname);
	printf ("\n");
      }
    else
      insert_white = 1;
}

/*------------------------------------------------------------------.
| Print a concise, tabular CHARSET description on standard output.  |
`------------------------------------------------------------------*/

void
list_concise_charset (CHARSET *charset)
{
  DOUBLE_TABLE *table;		/* double table */
  int half;			/* half 0, half 1 of the table */
  const char *format;		/* format string */
  int counter;			/* code counter */
  int counter2;			/* code counter */
  int code;			/* code value */

  /* Ensure we have a double table to play with.  */

  if (charset->table)
    table = charset->table;
  else
    error (EXIT_FAILURE, 0,
	   "Cannot list `%s', no 10646 names available for this charset",
	   charset->name);

  printf ("%s\n", charset->name);

  /* Select format for numbers.  */

  switch (list_format)
    {
    case FULL_FORMAT:
      return;			/* cannot happen */

    case NO_FORMAT:
    case DECIMAL_FORMAT:
      format = "%3d";
      break;

    case OCTAL_FORMAT:
      format = "%0.3o";
      break;

    case HEXADECIMAL_FORMAT:
      format = "%0.2x";
      break;
    }

  /* Print both halves of the table.  */

  for (half = 0; half < 2; half++)
    {

      /* Skip printing this half if it is empty.  */

      for (counter = 4 * half; counter < 4 * half + 4; counter++)
	if ((*table)[counter])
	  break;
      if (counter == 4 * half + 4)
	continue;

      /* Print this half.  */

      printf ("\n");
      for (counter = 128 * half; counter < 128 * half + 16; counter++)
	for (counter2 = 0; counter2 < 128; counter2 += 16)
	  {
	    if (counter2 > 0)
	      printf ("  ");

	    code = counter + counter2;
	    printf (format, code);
	    printf (" ");
	    print_symbol (charset, code);

	    if (counter2 == 112)
	      printf ("\n");
	  }
    }
}
