/*
 * ʸܹ
 * 
 *
 * generate transition matrix
 */
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include "../include/segclass.h"
#include "iis.h"

#define MAX_FEATURE 300
#define MAX_SEG_STRUCT_FEATURE 340

/* ʸϢܹ */
struct matrix {
  struct input_set *is[SEG_SIZE];
};

/**/
struct seg_struct_info {
  struct input_set *is;
};

struct array {
  int len;
  int f[16];
};

static void
add_dummy_line(struct input_set *is)
{
  int i;
  int f[SEG_SIZE];
  for (i = 0; i < SEG_SIZE; i++) {
    f[i] = i;
  }
  iis_set_features(is, f, SEG_SIZE, 0.01);
}

static struct matrix *
init_matrix(void)
{
  struct matrix *m;
  int i;
  m = malloc(sizeof(struct matrix));
  for (i = 0; i < SEG_SIZE; i++) {
    m->is[i] = iis_create(MAX_FEATURE);
  }
  return m;
}

static int
read_sentence(FILE *fp, int *classes)
{
  int nr;
  char buf[1024];
  char *str, *tok;
  int x = 0;
  /**/
  if (!fgets(buf, 1024, fp)) {
    return 0;
  }
  /**/
  nr = 1;
  classes[0] = SEG_HEAD;
  str = buf;
  /**/
  while ((tok = strsep(&str, " \t\n\r"))) {
    int sc;
    if (strlen(tok) == 0) {
      continue;
    }
    x ++;
    if ((x % 2)) {
      continue;
    }
    sc = anthy_seg_class_by_name(tok);
    printf("(%d)", sc);
    classes[nr] = sc;
    nr++;
  }
  /**/
  classes[nr] = SEG_TAIL;
  nr ++;
  return nr;
}

static void
proc_sentence(struct matrix *m, int *classes, int nr)
{
  int i;
  for (i = 1; i < nr; i++) {
    int prev = classes[i - 1];
    int cur = classes[i];
    int f[1];
    if (cur >= SEG_SIZE || prev >= SEG_SIZE) {
      continue;
    }
    f[0] = cur;
    iis_set_features(m->is[prev], f, 1, 1.0);
  }
}

static void
do_iis(struct matrix *m)
{
  int i;
  for (i = 0; i < SEG_SIZE; i++) {
    add_dummy_line(m->is[i]);
    iis_init_lambda_and_delta(m->is[i]);
    iis_iterate(m->is[i], 0.001);
    /*dump_count(m->is[i]);
      dump_p(m->is[i]);*/
  }
}

static void
dump_row(FILE *ofp, struct input_set *is)
{
  int i;
  for (i = 0; i < MAX_FEATURE; i++) {
    double lambda = iis_get_lambda(is, i);
    fprintf(ofp, "%f,", lambda);
  }
}

static void
dump_matrix(FILE *ofp, struct matrix *m)
{
  int i;
  /* max_feature */
  fprintf(ofp, "#define MAX_FEATURE %d\n\n", MAX_FEATURE);
  /* z */
  fprintf(ofp, "static const double g_z[] = {\n");
  for (i = 0; i < SEG_SIZE; i++) {
    fprintf(ofp, "%f,", iis_get_z(m->is[i]));
  }
  fprintf(ofp, "\n};\n\n");
  /* lambda */
  fprintf(ofp, "static const double g_lambda[] = {\n");
  for (i = 0; i < SEG_SIZE; i++) {
    fprintf(ofp, "/* %d */", i);
    dump_row(ofp, m->is[i]);
    fprintf(ofp, "\n");
  }
  fprintf(ofp, "};\n");
}

static int
get_corpus_type(char *fn)
{
  char buf[1024];
  FILE *fp = fopen(fn, "r");
  if (!fp) {
    return -1;
  }
  if (!fgets(buf, 1024, fp)) {
    return -1;
  }
  if (!strncmp("segments:", buf, 9)) {
    return 1;
  }
  return 0;
}

static double
get_sentence_weight(const char *line)
{
  (void)line;
  return 1.0;
}

static void
parse_features(struct array *features, char *s)
{
  char *tok, *str = s;
  tok = strtok(str, ",");
  features->len = 0;
  do {
    features->f[features->len] = atoi(tok);
    features->len++;
    tok = strtok(NULL, ",");
  } while(tok);
}

static void
add_seg_struct_info(struct seg_struct_info *si,
		    int pc,
		    struct array *cur_features,
		    double weight)
{
  struct array features;
  features = *cur_features;
  features.f[features.len] = pc + 300;
  features.len ++;
  iis_set_features(si->is, features.f, features.len, weight);
}

static void
read_morph_file(struct seg_struct_info *si, struct matrix *m, FILE *fp)
{
  char buf[1024];
  int pc = 0, cc;
  struct array features;
  double weight = 1.0;

  while (fgets(buf, 1024, fp)) {
    if (!strncmp(buf, "segments:", 9)) {
      cc = SEG_HEAD;
      weight = get_sentence_weight(buf);
    } else if (!strncmp(buf, "indep_word", 10) ||
	       !strncmp(buf, "eos", 3)) {
      char *s;
      /**/
      s = strstr(buf, "seg_class=");
      cc = SEG_BUNSETSU;
      if (s) {
	s += 10;
	cc = atoi(s);
      }
      s = strstr(buf, "features=");
      if (s) {
	s += 9;
	parse_features(&features, s);
      }
      /* ³ */
      iis_set_features(m->is[pc], features.f, features.len, weight);
      /**/
      add_seg_struct_info(si, pc, &features, weight);
    }
    /**/
    pc = cc;
  }
}

static void
read_file(struct seg_struct_info *si, struct matrix *m, char *fn)
{
  int classes[1000];
  int nr;
  FILE *ifp;
  int mode = get_corpus_type(fn);
  ifp = fopen(fn, "r");
  if (!ifp) {
    return ;
  }
  if (mode) {
    /**/
    read_morph_file(si, m, ifp);
  } else {
    /*  */
    while ((nr = read_sentence(ifp, classes))) {
      proc_sentence(m, classes, nr);
    }
  }
  fclose(ifp);
}

static void
dump_seg_struct_info_array(FILE *ofp, struct seg_struct_info *si)
{
  int i;
  fprintf(ofp, "#define MAX_SEG_STRUCT_FEATURE %d\n\n", MAX_SEG_STRUCT_FEATURE);
  fprintf(ofp, "static const double g_seg_z = %f;\n", iis_get_z(si->is));
  fprintf(ofp, "static const double g_seg_lambda[] = {\n");
  for (i = 0; i < MAX_SEG_STRUCT_FEATURE; i++) {
    double lambda = iis_get_lambda(si->is, i);
    fprintf(ofp, "%f,", lambda);
  }
  fprintf(ofp, "};\n");
}

static void
proc_corpus(int nr_fn, char **fns, FILE *ofp)
{
  int i;
  struct matrix *m;
  struct seg_struct_info si;
  /**/
  m = init_matrix();
  si.is = iis_create(MAX_FEATURE + 40);
  for (i = 0; i < nr_fn; i++) {
    read_file(&si, m, fns[i]);
  }
  /**/
  do_iis(m);
  iis_init_lambda_and_delta(si.is);
  iis_iterate(si.is, 0.001);
  /**/
  fprintf(ofp, "#ifndef CAND_INFO\n");
  dump_matrix(ofp, m);
  fprintf(ofp, "#else\n");
  dump_seg_struct_info_array(ofp, &si);
  fprintf(ofp, "#endif\n");
}

int
main(int argc, char **argv)
{
  FILE *ofp;
  int i;
  int nr_input = 0;
  char **input_files;

  ofp = NULL;
  input_files = malloc(sizeof(char *) * argc);
  
  for (i = 1; i < argc; i++) {
    char *arg = argv[i];
    if (!strcmp(arg, "-o")) {
      ofp = fopen(argv[i+1], "w");
      i ++;
    } else {
      input_files[nr_input] = arg;
      nr_input ++;
    }
  }
  if (!ofp) {
    ofp = stdout;
  }
  proc_corpus(nr_input, input_files, ofp);

  return 0;
}
