#include "tools.h"
void Cpscol2row2(trans, N, nvec, X, IX, JX, descX, Y, IY, JY, descY)
char *trans;
int N;
int nvec;
float *X;
int IX;
int JX;
int *descX;
float *Y;
int IY;
int JY;
int  *descY;
{
   void Cblacs_gridinfo();
   void Csgesd2d();
   void Csgerv2d();
   void Cscpycvec2cvec();
   void Cscpyrvec2rvec();
   void Cscpycvec2rvec();
   void Cscpyrvec2cvec();
   void Cscpy();
   void CscpyTrans();

   int ctxt, nprow, npcol, myrow, mycol, rsrc, cdest, xrow, xcol, yrow, ycol;
   int nblocks, lcm, rblkskip, cblkskip, mydist;
   int i, j, k, istart, iskip, nb, kb, LOCp, LOCq;
   float *x, *y, *work, *sptr;
   CPYPTR pack, unpack;

/*
 * Get some commonly used info
 */
   ctxt = descX[CTXT_];
   nb   = descX[MB_];    /* remember, descX[MB_] == descY[NB_] */
   Cblacs_gridinfo(ctxt, &nprow, &npcol, &myrow, &mycol);
   Cinfog2l(IX, JX, descX, nprow, npcol, myrow, mycol, &i, &j, &xrow, &xcol);
   x = &X[ i+j*descX[LLD_] ];
   Cinfog2l(IY, JY, descY, nprow, npcol, myrow, mycol, &i, &j, &yrow, &ycol);
   y = &Y[ i+j*descY[LLD_] ];
   lcm = Clcm(nprow, npcol);
   rblkskip = lcm / npcol;
   cblkskip = lcm / nprow;
/*
 * Get workspace
 */
   LOCp = Cnumroc2(N, IX, nb, myrow, descX[RSRC_], nprow);
   LOCq = Cnumroc2(N, JY, nb, mycol, descY[CSRC_], npcol);
   i = k = 0;
   if (mycol == xcol) k = ( ((LOCp + nb-1)/nb + cblkskip-1) / cblkskip );
   if (myrow == yrow) i = ( ((LOCq + nb-1)/nb + rblkskip-1) / rblkskip );
   if (k < i) k = nb * nvec * i;
   else k *= nb * nvec;
   work = (float *) malloc(k * sizeof(*x));
/*
 * See if we can use specially designed vector pack/unpack, or if we must
 * use general (un)packing routine.  Also, allow the user to perform either
 * tranpose or hermition transpose.
 */
   if (nvec == 1)
   {
      pack = Cscpycvec2cvec;
      unpack = Cscpycvec2rvec;
   }
   else
   {
      pack = Cscpy;
      if (*trans == 't')
         unpack = CscpyTrans;
      else
         unpack = Cscpy;
   }

/*
 * If the first block is partial, handle it seperately
 */
   kb = nb - (IX % nb);
   if (kb > N) kb = N;
   if (kb != nb)
   {
      if ( (myrow==yrow) && (mycol == ycol) )
      {
         if ( (myrow != xrow) || (mycol != xcol) )
         {
            Csgerv2d(ctxt, kb, nvec, work, kb, xrow, xcol);
            unpack(kb, nvec, work, kb, y, descY[LLD_]);
         }
         else
         {
            unpack(kb, nvec, x, descX[LLD_], y, descY[LLD_]);
            x += kb;
            LOCp -= kb;
         }
         y += kb*descY[LLD_];
         LOCq -= kb;
      }
      else if ( (myrow==xrow) && (mycol==xcol) )
      {
         Csgesd2d(ctxt, kb, nvec, x, descX[LLD_], yrow, ycol);
         x += kb;
         LOCp -= kb;
      }
      IX += kb;
      JY += kb;
      xrow = (xrow + 1) % nprow;
      ycol = (ycol + 1) % npcol;
      N -= kb;
   }
/*
 * If I'm in the process column owning X, the source vector
 */
   if (mycol == xcol)
   {
/*
 *    Figure my distance from process owning first element of sub( X ).
 *    The process in yrow the same distance from the process owning the
 *    first element of sub( Y ) will want my first block.
 */
      istart = 0;
      mydist = (nprow + myrow - xrow) % nprow;
      cdest = (ycol + mydist) % npcol;
      iskip = nb * cblkskip;
/*
 *    Loop over all possible destination processes
 */
      for(k=0; k < cblkskip; k++)
      {
/*
 *       If I'm not the destination process
 */
         if ( (myrow != yrow) || (mycol != cdest) )
         {
/*
 *          Pack all relavent blocks into work
 */
            for (j=0, i=istart; i < LOCp; i += iskip, j += nb*nvec)
            {
               kb = LOCp - i;
               if (kb > nb) kb = nb;
               pack(kb, nvec, &x[i], descX[LLD_], &work[j], kb);
            }
/*
 *          Send appropriate blocks of X, if any.  Note that we send nb-kb extra
 *          rows if nb != kb, but it makes computation easier for receiving node.
 */
            if (j > 0) Csgesd2d(ctxt, j, 1, work, j, yrow, cdest);
         }
/*
 *       If I'm both source and destination, save where to start copying from
 */
         else sptr = &x[istart];
/*
 *       Increment where we start packing from, and go on to next destination
 *       process
 */
         istart += nb;
         cdest = (cdest + nprow) % npcol;
      }
   }
/*
 * If I'm part of the process row owning sub( Y )
 */
   if (myrow == yrow)
   {
/*
 *    Figure my distance from process owning first element of sub( Y ).
 *    The process in xcol the same distance from the process owning the
 *    first element of sub( X ) will have my first block.
 */
      istart = 0;
      mydist = (npcol + mycol - ycol) % npcol;
      rsrc = (xrow + mydist) % nprow;
      iskip = nb * rblkskip;
      for(k=0; k < rblkskip; k++)
      {
         if ( (myrow != rsrc) || (mycol != xcol) )
         {
/*
 *          Figure how much data to receive, and if there is any, receive it
 */
            nblocks = (LOCq - istart + nb-1) / nb;
            j = nvec * nb * ( (nblocks+rblkskip-1) / rblkskip );
            if (j > 0) Csgerv2d(ctxt, j, 1, work, j, rsrc, xcol);
/*
 *          Copy X's data to Y
 */
            for (i=istart, j=0; i < LOCq; i += iskip, j += nb*nvec)
            {
               kb = LOCq - i;
               if (kb > nb) kb = nb;
               unpack(kb, nvec, &work[j], kb, &y[ i*descY[LLD_] ], descY[LLD_]);
            }
         }
/*
 *       If I'm both source and destination, just copy
 */
         else
         {
            for (i=istart; i < LOCq; i += iskip)
            {
               kb = LOCq - i;
               if (kb > nb) kb = nb;
               unpack(kb, nvec, sptr, descX[LLD_], &y[ i*descY[LLD_] ],
                      descY[LLD_]);
               sptr += nb * cblkskip;
            }
         }
/*
 *       Increment where we start packing from, and go on to next destination
 *       process
 */
         istart += nb;
         rsrc = (rsrc + npcol) % nprow;
      }
   }
   free(work);
}
void Cscpycvec2cvec(M, N, src, lds, dest, ldd)
int M;
int N;
float *src;
int lds;
float *dest;
int  ldd;
{
   F_VOID_FCT scopy_();
   int ione=1;
   scopy_(&M, src, &ione, dest, &ione);
}

void Cscpyrvec2rvec(M, N, src, lds, dest, ldd)
int M;
int N;
float *src;
int lds;
float *dest;
int  ldd;
{
   F_VOID_FCT scopy_();
   scopy_(&N, src, &lds, dest, &ldd);
}

void Cscpycvec2rvec(M, N, src, lds, dest, ldd)
int M;
int N;
float *src;
int lds;
float *dest;
int  ldd;
{
   F_VOID_FCT scopy_();
   int ione=1;
   scopy_(&M, src, &ione, dest, &ldd);
}

void Cscpyrvec2cvec(M, N, src, lds, dest, ldd)
int M;
int N;
float *src;
int lds;
float *dest;
int  ldd;
{
   F_VOID_FCT scopy_();
   int ione=1;
   scopy_(&N, src, &lds, dest, &ione);
}

void Cscpy(M, N, src, lds, dest, ldd)
int M;
int N;
float *src;
int lds;
float *dest;
int  ldd;
{
   int i, j;
   for (j=0; j < N; j++)
   {
      for (i=0; i < M; i++) dest[i] = src[i];
      src += lds;
      dest += ldd;
   }
}

void CscpyTrans(M, N, src, lds, dest, ldd)
int M;
int N;
float *src;
int lds;
float *dest;
int  ldd;
/*
 * Transposes while copying
 */
{
   int i, j, k;
   for (j=0; j < N; j++)
   {
      k = j * lds;
      for (i=0; i < M; i++) dest[i*ldd+j] = src[i+k];
   }
}

