/* This file is part of Malaga, a system for Natural Language Analysis.
 * Copyright (C) 1995-1999 Bjoern Beutel
 *
 * Bjoern Beutel
 * Universitaet Erlangen-Nuernberg
 * Abteilung fuer Computerlinguistik
 * Bismarckstrasse 12
 * D-91054 Erlangen
 * e-mail: malaga@linguistik.uni-erlangen.de 
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* description ==============================================================*/

/* This module compiles malaga symbol files. */

/* includes =================================================================*/

#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include "basic.h"
#include "pools.h"
#include "values.h"
#include "scanner.h"
#include "files.h"
#include "malaga_files.h"
#include "symbols.h"

#undef GLOBAL
#define GLOBAL

#include "sym_compiler.h"

/* types ====================================================================*/

/* a node of the symbol table */
typedef struct SYMBOL_NODE_T
{
  struct SYMBOL_NODE_T *left_son; /* sons with a name that before <name> */
  struct SYMBOL_NODE_T *right_son; /* sons with a name that after <name> */
  string_t name; /* name of the node in <string_pool> */
  bool_t atomic; /* TRUE iff symbol is atomic */
  symbol_t symbol; /* symbol which this node represents */
  symbol_entry_t *symbol_ptr; /* pointer to <symbol_pool> */
} symbol_node_t;

/* variables ================================================================*/

LOCAL symbol_node_t *symbol_tree = NULL; /* root node of the symbol tree */

LOCAL pool_t symbol_pool; /* symbol_entries */
LOCAL pool_t value_pool; /* lists of atomar symbols */
LOCAL pool_t string_pool; /* symbol names */

/* functions ================================================================*/

LOCAL void free_symbols (symbol_node_t **node)
/* Free the symbol table with root <*node>. */
{
  if (*node != NULL) 
  {
    free_symbols (&(*node)->left_son);
    free_symbols (&(*node)->right_son);
    free_mem (node);
  }
}

/*---------------------------------------------------------------------------*/

LOCAL symbol_node_t *find_symbol_node (string_t name, bool_t new_symbol)
/* Find and return a symbol with given name.
 * If <new_symbol> == TRUE, create a new symbol
 * (error if symbol already exists).
 * else return an old symbol (error if symbol does not exist). */
{
  symbol_node_t **node_ptr; /* link to the current node (link may be NULL) */

  node_ptr = &symbol_tree;
  while (TRUE) 
  {
    int_t comp_result;
    symbol_node_t *node;
    
    if (*node_ptr == NULL) /* The node doesn't exist yet. */
    {
      if (! new_symbol)
	error ("symbol \"%s\" is not defined", name);
      else 
      {
	/* Allocate and initialise node. */
	node = new_mem (sizeof (symbol_node_t));
	node->left_son = NULL;
	node->right_son = NULL;
	
	/* Link node into tree. */
	*node_ptr = node;
	return node;
      }
    }
    
    /* Node is not NULL. */
    node = *node_ptr;
    comp_result = strcmp_no_case (name, node->name);
    if (comp_result < 0) 
      node_ptr = &node->left_son;
    else if (comp_result > 0)
      node_ptr = &node->right_son;
    else
    {
      /* The node already exists. */
      if (new_symbol)
	error ("symbol \"%s\" is already defined", name);
      else
	return node;
    }
  }
}

/*---------------------------------------------------------------------------*/

LOCAL symbol_t find_atomic_symbol (string_t name)
/* Find symbol <name> and check if it is atomic.
 * Return the symbol. */
{
  symbol_node_t *node;

  node = find_symbol_node (name, FALSE);

  if (! node->atomic)
    error ("symbol \"%s\" is not atomic", name);

  return node->symbol;
}

/*---------------------------------------------------------------------------*/

LOCAL void enter_symbol (string_t name, value_t atoms)
/* Enter <name> as a symbol name with atomic symbol list <atoms>
 * in the symbol tree. */
{
  symbol_node_t *node;
  symbol_entry_t symbol_entry;
  symbol_t symbol = pool_items (symbol_pool);

  if (symbol >= SYMBOL_MAX)
    error ("too many symbols");

  node = find_symbol_node (name, TRUE);
  node->name = copy_string_to_pool (string_pool, name, &symbol_entry.name);
  node->symbol = symbol;
  node->atomic = (atoms == NULL);
  if (atoms == NULL)
  {
    top = 0;
    push_symbol_value (symbol);
    build_list (1);
    atoms = value_stack[0];
  }
  copy_value_to_pool (value_pool, atoms, &symbol_entry.atoms);
  node->symbol_ptr = (symbol_entry_t *) copy_to_pool (symbol_pool, 
						      &symbol_entry, 1, NULL);
}

/*---------------------------------------------------------------------------*/

LOCAL void parse_atom_list (value_t *atom_list_ptr)
/* STACK EFFECTS: (nothing) -> <list>.
 * Parse a list of symbols. */
{
  int_t i;

  top = 0;
  test_token ('<');
  do 
  {
    read_next_token ();
    
    test_token (TOK_IDENT);

    push_symbol_value (find_atomic_symbol (token_name));
    
    /* Test if <symbol> already occurs in <symbols>. */
    for (i = 0; i < top-1; i++) 
    {
      if (values_equal (value_stack[top-1], value_stack[i]))
	error ("symbol \"%s\" twice in atom list", token_name);
    }
    
    read_next_token (); /* Read after <token_name>. */
  } while (next_token == ',');
  
  parse_token ('>');
  
  if (top < 2)
    error ("atom list must contain at least 2 atoms");

  build_list (top);

  /* Check that there is no identical atom list. */
  for (i = 0; i < pool_items (symbol_pool); i++)
  {
    symbol_entry_t *entry = (symbol_entry_t *) pool_item (symbol_pool, i);
    value_t atoms = (value_t) pool_item (value_pool, entry->atoms);
      
    if (compare_atom_lists (value_stack[0], atoms) == 0)
      error ("atom list is same as for \"%s\"", 
	     (string_t) pool_item (string_pool, entry->name));
  }
  
  *atom_list_ptr = value_stack[0];
}

/*---------------------------------------------------------------------------*/

LOCAL void parse_symbols (void)
/* Parse a symbol file. */
{
  while (next_token != EOF) 
  {
    if (next_token == TOK_INCLUDE) /* Include a new file. */
    { 
      string_t file_name;
      
      read_next_token ();
      test_token (TOK_STRING);
      file_name = absolute_path (token_string, current_file_name ());
      begin_include (file_name);
      parse_symbols ();
      end_include ();
      free_mem (&file_name); 
      parse_token (';');
    } 
    else /* Read a symbol. */
    {
      string_t symbol_name;
      value_t atom_list;
      
      test_token (TOK_IDENT);
      symbol_name = new_string (token_name, NULL);
      
      read_next_token ();
      if (next_token == TOK_ASSIGN)
      {
	read_next_token ();
	parse_atom_list (&atom_list);
      }
      else 
	atom_list = NULL;

      enter_symbol (symbol_name, atom_list);
      free_mem (&symbol_name);
      parse_token (';');
    } 
  }
}

/*---------------------------------------------------------------------------*/

LOCAL void write_symbols (string_t file_name)
/* Write symbol table to file <file_name>. */
{ 
  FILE *stream;
  symbol_header_t symbol_header;

  stream = open_stream (file_name, "wb");

  /* Set rule file header data. */
  set_header (&symbol_header.common_header, SYMBOL_FILE, SYMBOL_CODE_VERSION);
  symbol_header.symbols_size = pool_items (symbol_pool);
  symbol_header.values_size = pool_items (value_pool);
  symbol_header.strings_size = pool_items (string_pool);

  /* Write header. */
  write_vector (&symbol_header, sizeof (symbol_header), 1, stream, file_name); 

  /* Write tables to stream. */
  write_pool (symbol_pool, stream, file_name);
  write_pool (value_pool, stream, file_name);
  write_pool (string_pool, stream, file_name);

  close_stream (&stream, file_name);
}

/*---------------------------------------------------------------------------*/

LOCAL void init_sym_compiler (void)
/* Initialise this module. */
{
  /* Initialise global data. */
  symbol_pool = new_pool (sizeof (symbol_entry_t));
  value_pool = new_pool (sizeof (symbol_t));
  string_pool = new_pool (sizeof (char));
}
  
/*---------------------------------------------------------------------------*/

LOCAL void terminate_sym_compiler (void)
/* Terminate this module. */
{
  /* Free global data. */
  free_pool (&symbol_pool);
  free_pool (&value_pool);
  free_pool (&string_pool);
  free_symbols (&symbol_tree);
}
  
/*---------------------------------------------------------------------------*/

GLOBAL void compile_symbol_file (string_t source_file, 
				 string_t object_file,
				 string_t old_symbol_file)
/* Read symbol file <source_file> and create translated file <object_file>.
 * If <old_symbol_file> != NULL, all symbols from this file are included in
 * the new file. */
{
  init_sym_compiler ();

  if (old_symbol_file != NULL)
  {
    value_t atoms;
    symbol_t symbol;
    
    /* Enter the symbols from "old_symbol_file". */
    init_symbols (old_symbol_file);
    for (symbol = 0; symbol < number_of_symbols (); symbol++)
    {
      atoms = get_atoms (symbol);
      if (get_list_length (atoms) <= 1)
	atoms = NULL;
      enter_symbol (get_symbol_name (symbol), atoms);
    }
    terminate_symbols ();
  }
  else
  {
    /* Enter the standard symbols in the same order as in "values.h". */
    enter_symbol ("nil", NULL);
    enter_symbol ("yes", NULL);
    enter_symbol ("no", NULL);
    enter_symbol ("symbol", NULL);
    enter_symbol ("string", NULL);
    enter_symbol ("number", NULL);
    enter_symbol ("list", NULL);
    enter_symbol ("record", NULL);
  }
  
  begin_include (source_file); 
  parse_symbols ();
  if (next_token != EOF)
    error ("symbol definition expected");
  
  end_include ();
  
  write_symbols (object_file);
  
  terminate_sym_compiler ();
}

/* end of file ==============================================================*/
