/* ---------------------------------------------------------------------
 *
 * -- Automatically Tuned Linear Algebra Software (ATLAS)
 *    (C) Copyright 1999 All Rights Reserved
 *
 * -- ATLAS routine -- Version 2.0 -- December 25, 1999
 *
 * -- Suggestions,  comments,  bugs reports should be sent to the follo-
 *    wing e-mail address: atlas@cs.utk.edu
 *
 *  Author         : Antoine P. Petitet
 * University of Tennessee - Innovative Computing Laboratory
 * Knoxville TN, 37996-1301, USA.
 *
 * ---------------------------------------------------------------------
 *
 * -- Copyright notice and Licensing terms:
 *
 * Redistribution  and  use in  source and binary forms, with or without
 * modification, are  permitted provided  that the following  conditions
 * are met:
 *
 * 1) Redistributions  of  source  code  must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2) Redistributions in binary form must reproduce  the above copyright
 *    notice,  this list of  conditions and the  following disclaimer in
 *    the documentation and/or other materials provided with the distri-
 *    bution.
 * 3) All advertising materials mentioning features or use of this soft-
 *    ware must display the folowing acknowledgement:
 *    This product includes software developed by the ATLAS group of the
 *    University of Tennesee, Knoxville and its contributors.
 * 4) The names of the  University of Tennessee,  Knoxville,  the  ATLAS
 *    group, or the names of its contributors may not be used to endorse
 *    or  promote products derived  from  this software without specific
 *    prior written permission.
 *
 * -- Disclaimer:
 *
 * The University of Tennessee, Knoxville,  the ATLAS group,  or the au-
 * thors make  no representations about the suitability of this software
 * for any purpose.  This software is provided ``as is'' without express
 * or implied warranty.
 *
 * ---------------------------------------------------------------------
 */
/*
 * Include files
 */
#include "atlas_refmisc.h"
#include "atlas_reflevel3.h"

void ATL_zrefgemm
(
   const enum ATLAS_TRANS     TRANSA,
   const enum ATLAS_TRANS     TRANSB,
   const int                  M,
   const int                  N,
   const int                  K,
   const double               * ALPHA,
   const double               * A,
   const int                  LDA,
   const double               * B,
   const int                  LDB,
   const double               * BETA,
   double                     * C,
   const int                  LDC
)
{
/*
 * Purpose
 * =======
 *
 * ATL_zrefgemm  performs one of the matrix-matrix operations
 *
 *    C := alpha * op( A ) * op( B ) + beta * C,
 *
 * where op( X ) is one of
 *
 *    op( X ) = X   or   op( X ) = X'   or   op( X ) = conjg( X' ).
 *
 * Alpha and beta are scalars, and A, B and C are matrices, with op( A )
 * an m by k matrix,  op( B )  a  k by n matrix and  C an m by n matrix.
 *
 * Arguments
 * =========
 *
 * TRANSA  (input)                       const enum ATLAS_TRANS
 *         On entry, TRANSA  specifies the form of op( A ) to be used in
 *         the matrix multiplication as follows:
 *
 *            TRANSA = AtlasNoTrans    op( A ) = A,
 *
 *            TRANSA = AtlasTrans      op( A ) = A',
 *
 *            TRANSA = AtlasConjTrans  op( A ) = conjg( A' ).
 *
 *         Unchanged on exit.
 *
 * TRANSB  (input)                       const enum ATLAS_TRANS
 *         On entry, TRANSB  specifies the form of op( A ) to be used in
 *         the matrix multiplication as follows:
 *
 *            TRANSB = AtlasNoTrans    op( B ) = B,
 *
 *            TRANSB = AtlasTrans      op( B ) = B',
 *
 *            TRANSB = AtlasConjTrans  op( B ) = conjg( B' ).
 *
 *         Unchanged on exit.
 *
 * M       (input)                       const int
 *         On entry,  M  specifies  the  number  of rows  of the  matrix
 *         op( A )  and  of the  matrix  C.  M  must  be at least  zero.
 *         Unchanged on exit.
 *
 * N       (input)                       const int
 *         On entry,  N  specifies  the number  of columns of the matrix
 *         op( B )  and the number of columns of the matrix C. N must be
 *         at least zero. Unchanged on exit.
 *
 * K       (input)                       const int
 *         On entry,  K  specifies  the  number of columns of the matrix
 *         op( A ) and the number of rows  of the matrix op( B ). K must
 *         be at least  zero. Unchanged on exit.
 *
 * ALPHA   (input)                       const double *
 *         On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
 *         supplied  as  zero  then the elements of the matrices A and B
 *         need not be set on input. Unchanged on exit.
 *
 * A       (input)                       const double *
 *         On entry,  A  points  to an array of size equal to or greater
 *         than   LDA * ka * sizeof( double[2] ),   where  ka  is k when
 *         TRANSA = AtlasNoTrans, and is m otherwise. Before  entry with
 *         TRANSA = AtlasNoTrans, the leading m by k part of the array A
 *         must contain the matrix  A, otherwise the leading k by m part
 *         of the array A must contain the matrix A. Unchanged on exit.
 *
 * LDA     (input)                       const int
 *         On entry, LDA  specifies the leading dimension of A as decla-
 *         red  in  the  calling  (sub) program.  LDA  must be  at least
 *         MAX( 1, m ) when TRANS = AtlasNotrans, and MAX( 1, k ) other-
 *         wise. Unchanged on exit.
 *
 * B       (input)                       const double *
 *         On entry,  B  points  to an array of size equal to or greater
 *         than   LDB * kb * sizeof( double[2] ),   where  kb  is n when
 *         TRANSB = AtlasNoTrans, and is k otherwise. Before  entry with
 *         TRANSB = AtlasNoTrans, the leading k by n part of the array B
 *         must contain the matrix  B, otherwise the leading n by k part
 *         of the array B must contain the matrix B. Unchanged on exit.
 *
 * LDB     (input)                       const int
 *         On entry, LDB  specifies the leading dimension of A as decla-
 *         red  in  the  calling  (sub) program.  LDB  must be  at least
 *         MAX( 1, k )  when  TRANS = AtlasNotrans or TRANS = AtlasConj,
 *         and MAX( 1, n ) otherwise. Unchanged on exit.
 *
 * BETA    (input)                       const double *
 *         On entry,  BETA  specifies the scalar  beta.   When  BETA  is
 *         supplied  as  zero  then  the  elements of the matrix C  need
 *         not be set on input. Unchanged on exit.
 *
 * C       (input/output)                double *
 *         On entry,  C  points  to an array of size equal to or greater
 *         than   LDC * n * sizeof( double[2] ). Before  entry, the lea-
 *         ding  m by n  part of the array C must contain the matrix  C,
 *         except when beta is zero, in which case C need not be  set on
 *         entry. On exit, the array C is overwritten by the  m by n ma-
 *         trix ( alpha*op( A )*op( B ) + beta*C ).
 *
 * LDC     (input)                       const int
 *         On entry, LDC  specifies the leading dimension of A as decla-
 *         red  in  the  calling  (sub) program.  LDC  must be  at least
 *         MAX( 1, m ). Unchanged on exit.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
   int                        i, iai, iail, iali, ibj, ibjl, iblj, icij,
                              j, jai, jal, jbj, jcj, l, lda2 = ( LDA << 1 ),
                              ldb2 = ( LDB << 1 ), ldc2 = ( LDC << 1 );
   register double            t0_i, t0_r;
/* ..
 * .. Executable Statements ..
 *
 */
   if( ( M == 0 ) || ( N == 0 ) ||
       ( ( Mdzero( ALPHA[0], ALPHA[1] ) || ( K == 0 ) ) &&
         Mdone( BETA[0], BETA[1] ) ) ) return;

   if( Mdzero( BETA[0], BETA[1] ) )
   {
      for( j = 0, jcj = 0; j < N; j++, jcj += ldc2 )
      {
         for( i = 0, icij = jcj; i < M; i++, icij += 2 )
         {
            Mset( ATL_dZERO, ATL_dZERO, C[icij], C[icij+1] );
         }
      }
   }
   else if( !Mdone( BETA[0], BETA[1] ) )
   {
      for( j = 0, jcj = 0; j < N; j++, jcj += ldc2 )
      {
         for( i = 0, icij = jcj; i < M; i++, icij += 2 )
         {
            Mdscl( BETA[0], BETA[1], C[icij], C[icij+1] );
         }
      }
   }

   if( Mdzero( ALPHA[0], ALPHA[1] ) ) return;

   if( TRANSB == AtlasNoTrans )
   {
      if(      TRANSA == AtlasNoTrans )
      {
         for( j = 0,      jbj  = 0,    jcj  = 0;
              j < N; j++, jbj += ldb2, jcj += ldc2 )
         {
            for( l = 0,      jal  = 0,    iblj  = jbj;
                 l < K; l++, jal += lda2, iblj += 2 )
            {
               Mmul( ALPHA[0], ALPHA[1], B[iblj], B[iblj+1], t0_r, t0_i );
               for( i = 0,      iail  = jal, icij = jcj;
                    i < M; i++, iail += 2,   icij += 2 )
               {
                  Mmla( A[iail], A[iail+1], t0_r, t0_i, C[icij], C[icij+1] );
               }
            }
         }
      }
      else if( TRANSA == AtlasTrans )
      {
         for( j = 0,      jbj  = 0,    jcj  = 0;
              j < N; j++, jbj += ldb2, jcj += ldc2 )
         {
            for( i = 0,      icij  = jcj, iai  = 0;
                 i < M; i++, icij += 2,   iai += lda2 )
            {
               Mset( ATL_dZERO, ATL_dZERO, t0_r, t0_i );
               for( l = 0,      iail  = iai, iblj  = jbj;
                    l < K; l++, iail += 2,   iblj += 2 )
               {
                  Mmla( A[iail], A[iail+1], B[iblj], B[iblj+1], t0_r, t0_i );
               }
               Mmla( ALPHA[0], ALPHA[1], t0_r, t0_i, C[icij], C[icij+1] );
            }
         }
      }
      else if( TRANSA == AtlasConjTrans )
      {
         for( j = 0,      jbj  = 0,    jcj  = 0;
              j < N; j++, jbj += ldb2, jcj += ldc2 )
         {
            for( i = 0,      icij  = jcj, iai  = 0;
                 i < M; i++, icij += 2,   iai += lda2 )
            {
               Mset( ATL_dZERO, ATL_dZERO, t0_r, t0_i );
               for( l = 0,      iail  = iai, iblj  = jbj;
                    l < K; l++, iail += 2,   iblj += 2 )
               {
                  Mmla( A[iail], -A[iail+1], B[iblj], B[iblj+1], t0_r, t0_i );
               }
               Mmla( ALPHA[0], ALPHA[1], t0_r, t0_i, C[icij], C[icij+1] );
            }
         }
      }
   }
   else if( TRANSB == AtlasTrans )
   {
      if( TRANSA == AtlasNoTrans )
      {
         for( j = 0,      ibj  = 0, jcj  = 0;
              j < N; j++, ibj += 2, jcj += ldc2 )
         {
            for( l = 0,      jal  = 0,    ibjl  = ibj;
                 l < K; l++, jal += lda2, ibjl += ldb2 )
            {
               Mmul( ALPHA[0], ALPHA[1], B[ibjl], B[ibjl+1], t0_r, t0_i );
               for( i = 0,      iail  = jal, icij  = jcj;
                    i < M; i++, iail += 2,   icij += 2 )
               {
                  Mmla( A[iail], A[iail+1], t0_r, t0_i, C[icij], C[icij+1] );
               }
            }
         }
      }
      else if( TRANSA == AtlasTrans )
      {
         for( j = 0,      ibj  = 0, jcj  = 0;
              j < N; j++, ibj += 2, jcj += ldc2 )
         {
            for( i = 0,      icij  = jcj, jai  = 0;
                 i < M; i++, icij += 2,   jai += lda2 )
            {
               Mset( ATL_dZERO, ATL_dZERO, t0_r, t0_i );
               for( l = 0,      iali  = jai, ibjl  = ibj;
                    l < K; l++, iali += 2,   ibjl += ldb2 )
               {
                  Mmla( A[iali], A[iali+1], B[ibjl], B[ibjl+1], t0_r, t0_i );
               }
               Mmla( ALPHA[0], ALPHA[1], t0_r, t0_i, C[icij], C[icij+1] );
            }
         }
      }
      else if( TRANSA == AtlasConjTrans )
      {
         for( j = 0,      ibj  = 0, jcj  = 0;
              j < N; j++, ibj += 2, jcj += ldc2 )
         {
            for( i = 0,      icij  = jcj, jai  = 0;
                 i < M; i++, icij += 2,   jai += lda2 )
            {
               Mset( ATL_dZERO, ATL_dZERO, t0_r, t0_i );
               for( l = 0,      iali  = jai, ibjl  = ibj;
                    l < K; l++, iali += 2,   ibjl += ldb2 )
               {
                  Mmla( A[iali], -A[iali+1], B[ibjl], B[ibjl+1], t0_r, t0_i );
               }
               Mmla( ALPHA[0], ALPHA[1], t0_r, t0_i, C[icij], C[icij+1] );
            }
         }
      }
   }
   else if( TRANSB == AtlasConjTrans )
   {
      if( TRANSA == AtlasNoTrans )
      {
         for( j = 0,      ibj  = 0, jcj  = 0;
              j < N; j++, ibj += 2, jcj += ldc2 )
         {
            for( l = 0,      jal  = 0,    ibjl  = ibj;
                 l < K; l++, jal += lda2, ibjl += ldb2 )
            {
               Mmul( ALPHA[0], ALPHA[1], B[ibjl], -B[ibjl+1], t0_r, t0_i );
               for( i = 0,      iail  = jal, icij  = jcj;
                    i < M; i++, iail += 2,   icij += 2 )
               {
                  Mmla( A[iail], A[iail+1], t0_r, t0_i, C[icij], C[icij+1] );
               }
            }
         }
      }
      else if( TRANSA == AtlasTrans )
      {
         for( j = 0,      ibj  = 0, jcj  = 0;
              j < N; j++, ibj += 2, jcj += ldc2 )
         {
            for( i = 0,      icij  = jcj, jai  = 0;
                 i < M; i++, icij += 2,   jai += lda2 )
            {
               Mset( ATL_dZERO, ATL_dZERO, t0_r, t0_i );
               for( l = 0,      iali  = jai, ibjl  = ibj;
                    l < K; l++, iali += 2,   ibjl += ldb2 )
               {
                  Mmla( A[iali], A[iali+1], B[ibjl], -B[ibjl+1], t0_r, t0_i );
               }
               Mmla( ALPHA[0], ALPHA[1], t0_r, t0_i, C[icij], C[icij+1] );
            }
         }
      }
      else if( TRANSA == AtlasConjTrans )
      {
         for( j = 0,      ibj  = 0, jcj  = 0;
              j < N; j++, ibj += 2, jcj += ldc2 )
         {
            for( i = 0,      icij  = jcj, jai  = 0;
                 i < M; i++, icij += 2,   jai += lda2 )
            {
               Mset( ATL_dZERO, ATL_dZERO, t0_r, t0_i );
               for( l = 0,      iali  = jai, ibjl  = ibj;
                    l < K; l++, iali += 2,   ibjl += ldb2 )
               {
                  Mmla( B[ibjl], -B[ibjl+1], A[iali], -A[iali+1], t0_r, t0_i );
               }
               Mmla( ALPHA[0], ALPHA[1], t0_r, t0_i, C[icij], C[icij+1] );
            }
         }
      }
   }
/*
 * End of ATL_zrefgemm
 */
}
