/*
    ras - Redundant Archive System
    Copyright (C) 1999  Nick Cleaton

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA

    Nick Cleaton <nick@cleaton.net>
*/
/*
 * The magic of Lagrange polynomial interpolation...
 *
 * Given (x1,y1), (x2,y2)...,(xn,yn) and x0, you want to find y0.
 * This is hardly unique unless you specify that you want a polynomial in x
 * fitted to the given points and evaluated at x0.  If the polynomial is
 * constrained to be of degree <= n-1 (in the unknown x), then it is unique.
 *
 * We use this (bytewise) to make sumfiles by letting the bytes of the
 * segfiles be y1,y2, ...yn (the x values being 0,1,...n-1) and 
 * evaluating the polynomial at other x values (n to 255) to get the
 * bytes of the segfiles. The segfile of sum ID m is generated using the
 * x value (m + segcount).
 *
 * The easy way to find this polynomial is to use a set of degree n-1 basis
 * polynomials bi(x) which are 1 at a single xi and 0 at all the other
 * xj, j != i.  (Needless to say, this is only possible if xi != xj.)
 * Then you can just evaluate y = b1(x)*y1 + ... + bn(x)*yn.
 *
 * The zeroness is assured by including the factors (x-x1), ... (x-xn)
 * in the numerator of the polynomial bi(x), omitting only (x-xi).
 * This makes a polynomial of degree n-1 in x, as desired.  Ensuring
 * that bi(xi) == 1 requires dividing by the appropriate constant,
 * (xi-x1)*...*(xi-xn), the numerator evaluated at xi, again omitting the
 * (xi-xi) term.
 *
 * In more concrete terms,
 *
 *                   (x0-x2)*(x0-x3)*(x0-x4)*...*(x0-xn)
 * y0 = y1 * -------------------------------------------
 *                   (x1-x2)*(x1-x3)*(x1-x4)*...*(x1-xn)
 *
 *           (x0-x1)*        (x0-x3)*(x0-x4)*...*(x0-xn)
 *    + y2 * -------------------------------------------
 *           (x2-x0)*        (x2-x3)*(x2-x4)*...*(x2-xn)
 * 
 *    + ...
 *
 *           (x0-x1)*(x0-x2)*(x0-x3)*(x0-x4)*...
 *    + yn * -------------------------------------------
 *           (xn-x0)*(xn-x2)*(xn-x3)*(xn-x4)*...
 *
 * It turns out to be easier in practice to pre-compute (x0-x1)*...*(x0-xn)
 * and then add the appropriate (x0-xi) term to the denominator to cancel
 * the unneeded numerator term.
 *
 * For this arithmetic to work a finite field is needed. F_256 is
 * convenient, as each number occupies one byte (the field_t type).
 * Multiplication is done through log and antilog tables, f_log[] and
 * f_exp[].  The f_exp[] * table is double-sized so that the sum of two
 * logs can be looked up in it without range reduction, although larger
 * accumulations require reduction modulo the order of the multiplicative
 * group, namely 256-1 = 255.
 *
 * Most compilers can optimize division and remainder by a power
 * of 2, but this one-off is usually handled by a general division,
 * which is slow on most machines.  There is a much faster version,
 * using the fact that x*FIELD_SIZE == x (mod FIELD_SIZE-1).  Thus,
 * x == (x%FIELD_SIZE) + (x/FIELD_SIZE) (mod FIELD_SIZE-1), using
 * truncating integer division.  This obviously reduces the range of
 * the output x.  It can't reduce it below 0..FIELD_SIZE-1, but
 * the antilog table is already big enough for that.
 *
 * The smallest x which is mapped to FIELD_SIZE or larger under this
 * reduction is 2*FIELD_SIZE-1.  The smallest x which is reduced to
 * *that* or larger than FIELD_SIZE^2 - 1.  So adding up
 * FIELD_SIZE+1 entries with all values of FIELD_SIZE-1 will require a
 * third iteration to achieve complete reduction, but less than that (all
 * that we ever do here) requires only two.
 */

#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include "common.h"
#include "utils.h"
#include "field.h"

#define FIELD_SIZE ((size_t)256)
#define FIELD_POLY 0x169

/*
 * The possible primitive polynomials of degree 8 are
 * 0x11d, 0x12b, 0x12d, 0x14d, 0x15f, 0x163, 0x165, 0x169,
 * 0x171, 0x187, 0x18d, 0x1a9, 0x1c3, 0x1cf, 0x1e7, 0x1f5
 * Other polynomials (irreducible but not primitive) are possible
 * if you use something other than x as the generator.
 */

static field_t *f_exp;
static field_t *f_log;
static field_t *f_mult;

/**************************************************************************/

/*
 * Initialize the f_exp and f_log arrays and the multiplicative lookup
 * table.
 */
void init_field()
{
   unsigned i, j, x;
   field_t prod;

   f_exp = Malloc( 2*FIELD_SIZE           * sizeof(*f_exp)  );
   f_log = Malloc( FIELD_SIZE             * sizeof(*f_log)  );
   f_mult = Malloc( FIELD_SIZE*FIELD_SIZE * sizeof(*f_mult) );
   
   /*
   ** Fill in the log and antilog tables
   */
   x = 1;
   for (i = 0; i < FIELD_SIZE-1; i++) {
      f_exp[i] = x;
      f_exp[i+FIELD_SIZE-1] = x;
      f_log[x] = i;
      x <<= 1;
      if (x & FIELD_SIZE)
	 x ^= FIELD_POLY;
   }
   /* x should be 1 here */
   f_exp[2*FIELD_SIZE-2] = f_exp[0];
   f_exp[2*FIELD_SIZE-1] = f_exp[1];
   f_log[0] = i;   /* Bogus value, FIELD_SIZE-1 */

   /*
   ** Fill in the multiplication table
   */
   for( i=0 ; i<FIELD_SIZE ; i++ )
   {  f_mult[FIELD_SIZE*i + 0] = 0;
      f_mult[FIELD_SIZE*0 + i] = 0;
   }
   for( j=1 ; j<FIELD_SIZE ; j++ )
   {  for( i=j ; i<FIELD_SIZE ; i++ )
      {  prod = f_exp[ f_log[i] + f_log[j] ];
         f_mult[FIELD_SIZE*i + j] = prod;
	 f_mult[FIELD_SIZE*j + i] = prod;
      }
   }
}

/**************************************************************************/

/* Just to make the application clear... */
#define f_add(x,y) ((x)^(y))
#define f_sub(x,y) f_add(x,y)

/*
 * This computes the coefficients used in Lagrange polynomial 
 * interpolation, returning the vector of b1(xtarget), b2(xtarget), ...,
 * bn(xtarget).
 * The y value at xtarget can be computed by taking the dot product of
 * the returned coefficient vector with the y values.
 */
field_t *Makevector(size_t segcount, int *gotpoints, int wantpoint)
{
   field_t *vec;
   field_t xi, xj;
   int i, j;
   unsigned long numer, denom;
   
   vec = Malloc(segcount * sizeof(*vec));

   /* First, accumulate the numerator, Prod(wantpoint-gotpoints[i],i=0..n) */
   numer = 0;
   for (i = 0; i < segcount; i++)
      numer += f_log[f_sub(gotpoints[i],wantpoint)];
   /* Preliminary partial reduction */
   numer = (numer%FIELD_SIZE) + (numer/FIELD_SIZE);

   /* Then, for each coefficient, compute the corresponding denominator */
   for (i = 0; i < segcount; i++) {
      xi = gotpoints[i];
      denom = 0;
      for (j = 0; j < segcount; j++) {
         xj = (i == j) ? wantpoint : gotpoints[j];
         if (xi == xj)
         {  fprintf(stderr,PROGNAME
	                   ": internal fault: indistinct xi in Makevector\n");
	    exit_because_of_error(INTERNAL_FAILURE);
	 }
         denom += f_log[f_sub(xi,xj)];
      }
      denom = (denom%FIELD_SIZE)+(denom/FIELD_SIZE);
      /* 0 <= denom < 2*FIELD_SIZE-1. */

      /* Now find numer/denom.  In log form, that's a subtract. */
      denom = numer + 2*FIELD_SIZE-2 - denom;
      denom = (denom%FIELD_SIZE)+(denom/FIELD_SIZE);
      denom = (denom%FIELD_SIZE)+(denom/FIELD_SIZE);

      vec[i] = f_exp[denom];
   }

   return vec;
}

/**************************************************************************/

void free_field()
{
   free( f_mult );
   free( f_log );
   free( f_exp );
}

/**************************************************************************/

void dotproduct(size_t segcount, uchar *inbufbase, size_t inbufoffset,
                                   uchar *outbuf, field_t *vector, size_t bytes)
{
   int i, n;
   unsigned char *src, *dst, *mult;
   
   /*  Start by making the output vector[0] multiplied by the first input  */
   mult = f_mult + (vector[0] * FIELD_SIZE);
   /*  mult now points to the base of a table of size FIELD_SIZE for
       multiplication by vector[0].                        */
   src = inbufbase;
   dst = outbuf;
   n = bytes;
   while(n--)
      *dst++ = mult[ *src++ ];

   /*  Continue by adding vector[i] multiplied by the ith input to the
       output for i>0 
   */
   for( i=1 ; i<segcount ; i++ )
   {  mult = f_mult + (vector[i] * FIELD_SIZE);
      /*  mult now points to the base of a table of size FIELD_SIZE for
          multiplication by vector[i].                        */
      src = inbufbase + (i * inbufoffset);
      dst = outbuf;
      n = bytes;
      while(n--)
         *dst++ ^= mult[ *src++ ];
   }
}

/**************************************************************************/

