/**************************************************************************
*                                                                         *
*  Author      : Dr. Thomas Brandes, GMD, I1.HR                           *
*  Copyright   : GMD St. Augustin, Germany                                *
*  Date        : Aug 92                                                   *
*  Last Update : Apr 93                                                   *
*                                                                         *
*  This Module is part of the DALIB                                       *
*                                                                         *
*  Module      : section1.c                                               *
*                                                                         *
*  Function    : Sending/Receiving a section of a distributed array       *
*                                                                         *
*  Export :    ONLY INTERNAL USE IN DALIB                                 *
*                                                                         *
*  void dalib_setup_sectionk (size, N1, x1, y1, ..., Nk, xk, yk)          *
*  int size N1, x1, y1, N2, x2, ..., Nk, xk, yk;                          *
*                                                                         *
*  void dalib_send_sectionk (to, a)                        k = 1,...,4    *
*  int to; unsigned char *a;                                              *
*                                                                         *
*  void dalib_recv_sectionk (from, a)                      k = 1,...,4    *
*  int from; unsigned char *a;                                            *
*                                                                         *
*  void dalib_copy_sectionk (to, a)                        k = 1,...,4    *
*  unsigned char *to; unsigned char *a;                                   *
*                                                                         *
**************************************************************************/

# undef DEBUG

#include "system.h"

     /*********************************************************
     *                                                        *
     *  GLOBALS for whole array and considered section        *
     *                                                        *
     *********************************************************/

# define DIM 4       /* maximal supported dimension */

/* global data of the distributed array */

int a_N [DIM];
int a_rank, a_size;

/* global data of the section in the distributed array */

int sc_x[DIM];      /* bounds of the section */
int sc_y[DIM];
int sc_n[DIM];      /* shape of the section */

int sc_size;        /* size of the whole section */

/* global data for looping through the section */

int sc_ptr;       /* offset to first element in section */
int sc_length;    /* length of section in bytes */

int sc_loops;
int sc_times[DIM];  /* iterations of the corresponding loop */
int sc_inc  [DIM];  /* pointer increment for the loop       */
int sc_loops;     /* number of loops for traversing section */

     /*********************************************************
     *                                                        *
     *  Computing the loops of a given section                *
     *                                                        *
     *********************************************************/

void dalib_section_loops ()

{ int i, lv, removed, errors;

  /* normalize and compute shape of the section */

#ifdef DEBUG
  printf ("dalib_section_loops called with rank = %d\n",a_rank);
#endif

  errors  = 0;
  sc_size = 1;
  for (i=0; i<a_rank; i++)
    { sc_x[i] = sc_x[i] - 1;
      sc_y[i] = sc_y[i] - 1;
      sc_n[i] = sc_y[i] - sc_x[i] + 1;
#ifdef DEBUG
  printf ("dalib_section_loops , i = %d, x = %d, y = %d, n = %d\n",
           i, sc_x[i], sc_y[i], sc_n[i]);
#endif
      if (sc_n[i] <= 0)
        { printf ("Section of rank = %d, dim %d illegal: %d - %d\n",
                  a_rank, i+1, sc_x[i], sc_y[i]);
          errors += 1;
        }
      sc_size *= sc_n[i];
    }
  if (errors > 0) exit (-1);

  /*  Set up the initial loops (will be optimized later)

     loops   times     inc   
       1      n1        1
       2      n2        N1
       3      n3        N1 * N2
       4      n4        N1 * N2 * N3      */

  for (i=0; i<a_rank; i++)
    { sc_times[i] = sc_n[i];
      if (i==0)
         sc_inc [i] = 1;
       else
         sc_inc [i] = sc_inc[i-1] * a_N[i-1];
     }

  /* optimize the loops */

  sc_loops = a_rank;
  lv = 1;
  while (lv < sc_loops)
    { removed = 0;
      if ( (sc_times[lv-1] * sc_inc[lv-1]) == sc_inc[lv])
         { /* collapse with previous loop , eg. n1 == N1 */
           sc_times [lv-1] *= sc_times[lv];
           removed = 1;
         }
       else if (sc_times[lv] == 1)
         { /* loop with one iteration is unnecessary */
           removed = 1;
         }
      if (removed == 1)
         { /* move next loops back */
           for (i=lv+1; i<sc_loops; i++)
              { sc_times [i-1] = sc_times[i];
                sc_inc   [i-1] = sc_inc  [i];
              }
           sc_loops -= 1;
         }
       else  /* consider next loop */
         lv += 1;
    }  /* while for traversing loops */
      
   /* note : sc_loops - 1 will later be necessary, first loop has inc 1 */

   /* compute the ptr to the first element, e.g.
      sc_ptr  = (x3 * N2 + x2) * N1 + x1  */

   sc_ptr = 0;
   for (i = a_rank-1; i >= 0; i--)
      sc_ptr = sc_ptr * a_N[i] + sc_x[i];

   /* now set up correctly for later */

   sc_length = sc_times[0] * a_size;     /* sc_inc[0] == 1 */
   sc_loops  = sc_loops - 1;

   sc_ptr *= a_size;
   for (i=1; i<=sc_loops; i++)
      sc_inc[i] *= a_size;

#ifdef DEBUG
   printf ("dalib_section_loops has %d loops\n", sc_loops);
   for (i=1; i<=sc_loops; i++)
      printf ("inc of loop %d = %d\n", i, sc_inc[i]);
#endif

} /* dalib_section_loops */
  

     /*********************************************************
     *                                                        *
     *  [ 1  2  3  4  my_low ... my_up  .......   ]           *
     *                                                        *
     *  N is then number of all elements                      *
     *                                                        *
     *  (x:y) is the subsection of (my_low:my_up)             *
     *                                                        *
     *********************************************************/

void set_range (dim, N, x, y)
int dim, N, x, y;

{  sc_x[dim-1] = x;
   sc_y[dim-1] = y;
   a_N [dim-1] = N;
}

void localize_range (dim, N, x, y)
int dim, N, x, y;

{  int my_low, my_high;

   my_low   = ((pcb.i - 1) * N) / pcb.p + 1;
   my_high  = (pcb.i * N) / pcb.p;

   sc_x[dim-1] = x - my_low + 1;
   sc_y[dim-1] = y - my_low + 1;
   a_N [dim-1] = my_high - my_low + 1;
}
      
     /*********************************************************
     *                                                        *
     *  SetUp of a local section                              *
     *                                                        *
     *  consider section a(x1:y1,x2:y2,x3:y3,x4:y4) of        *
     *                   whole array a(N1, N2, N3, N4)        *
     *                                                        *
     *  for i4, i3, i2, i1 ...                                *
     *                                                        *
     *  for   times   inc             length                  *
     *    4    n4     (N3-n3)*N2*N1                           *
     *    3    n3     (N2-n2)*N1                              *
     *    2    n2     N1-n1                                   *
     *    1    n1     1                  1                    *
     *                                                        *
     *********************************************************/

void dalib_setup_section1 (size, N1, x1, y1)
int size, N1, x1, y1;

{ /* set global the size of section */

  a_rank = 1;
  a_size = size;

  localize_range (1, N1, x1, y1);

  /* compute the loops for traversing sections */

  dalib_section_loops ();

#ifdef DEBUG
  printf ("sec: %d ready setup section1: %d elems, len = %d, ptr = %d\n", 
           pcb.i, sc_n[0], sc_length, sc_ptr);
#endif
}

void dalib_setup_section2 (size, N1, x1, y1, N2, x2, y2)
int size, N1, x1, y1, N2, x2, y2;

{ a_rank = 2;
  a_size = size;

  /* set global the size of section */

  set_range (1, N1, x1, y1);
  localize_range (2, N2, x2, y2);

  /* compute the loops for traversing sections */

  dalib_section_loops ();

#ifdef DEBUG
   printf ("sec: %d setup section2: %d x %d needs %d loops\n",
           pcb.i, sc_n[0], sc_n[1], sc_loops);
#endif
}

void dalib_setup_section3 (size, N1, x1, y1, N2, x2, y2, N3, x3, y3)
int size, N1, x1, y1, N2, x2, y2, N3, x3, y3;

{ a_rank = 3;
  a_size = size;

  set_range (1, N1, x1, y1);
  set_range (2, N2, x2, y2);
  localize_range (3, N3, x3, y3);

  /* compute the loops for traversing sections */

  dalib_section_loops ();

#ifdef DEBUG
   printf ("sec: %d setup section3: %d x %d x %d, %d loops\n",
           pcb.i, sc_n[0], sc_n[1], sc_n[2], sc_loops);
#endif
}

void dalib_setup_section4 (size, N1, x1, y1, N2, x2, y2, N3, x3, y3, N4, x4, y4)
int size, N1, x1, y1, N2, x2, y2, N3, x3, y3, N4, x4, y4;

{ a_rank = 4;
  a_size = size;

  set_range (1, N1, x1, y1);
  set_range (2, N2, x2, y2);
  set_range (3, N3, x3, y3);
  localize_range (4, N4, x4, y4);

  /* compute the loops for traversing sections */

  dalib_section_loops ();

#ifdef DEBUG
   printf ("sec: %d setup section4: %d x %d x %d x %d, %d loops\n",
           pcb.i, sc_n[0], sc_n[1], sc_n[2], sc_n[3], sc_loops);
#endif
}

     /*********************************************************
     *                                                        *
     *  Send of a local section                               *
     *                                                        *
     *********************************************************/

void dalib_send_section1 (to, a)
int to;
unsigned char *a;

{ int from;
  unsigned char *ptr;

  /* send section */

  from = dalib_pid_ ();

#ifdef DEBUG
  printf ("sec: %d sends local (%d-%d) to %d, length = %d, ptr = %d\n",
           from, sc_x[0], sc_y[0], to, sc_length, sc_ptr);
#endif

  ptr    = a + sc_ptr;
  asend (from, to, ptr, sc_length);
}

void dalib_send_section2 (to, a)
int to;
unsigned char *a;

{ int i, from;
  unsigned char *ptr;

  /* send section */

  from = dalib_pid_ ();

#ifdef DEBUG
  printf ("sec: %d send2 (%d-%d,%d-%d) to %d, len = %d, lps = %d, ptr = %d\n",
           from, sc_x[0], sc_y[0], sc_x[1], sc_y[1], to,
           sc_length, sc_loops, sc_ptr);
#endif

  ptr    = a + sc_ptr;

  if (sc_loops == 0)
    { /* continguous section */
#ifdef DEBUG
      printf ("sec: %d now send2, %dx%d, continguous, ptr= %d\n",
              from, sc_n[0], sc_n[1], sc_ptr);
#endif 
      asend (from, to, ptr, sc_length);
    }
   else
    { dalib_create_buffer (sc_size * a_size, 0);
#ifdef DEBUG
      printf ("sec: %d now send2, %dx%d, 1 lps (%d), inc = %d, ptr= %d\n",
              from,sc_n[0],sc_n[1],sc_times[1],sc_inc[1],sc_ptr);
#endif 
      for (i=0;i<sc_times[1];i++)
       {  dalib_fill_buffer (ptr, sc_length);
          ptr += sc_inc[1];
       }
      dalib_send_buffer (to);
      dalib_destroy_buffer ();
    }
}

void dalib_send_section3 (to, a)
int to;
unsigned char *a;

{ int i, j, from;
  unsigned char *ptr;

  /* send section */

  from = dalib_pid_ ();

#ifdef DEBUG
  printf ("%d sends local (%d-%d,%d-%d,%d-%d) to %d, length = %d, ptr = %d\n",
           from,sc_x[0], sc_y[0], sc_x[1], sc_y[1], 
           sc_x[2],sc_y[2], to, sc_length, sc_ptr);
#endif

  ptr    = a + sc_ptr;

  if (sc_loops == 0)
    { /* continguous section */
#ifdef DEBUG
      printf ("%d send section3, %dx%dx%d, continguous \n",
              from, sc_n[0], sc_n[1], sc_n[2]);
#endif 
      asend (from, to, ptr, sc_length);
    }
   else if (sc_loops == 1)
    { dalib_create_buffer (sc_size * a_size, 0);
#ifdef DEBUG
      printf ("%d send section3, %dx%dx%d, 1 lps (%d), inc = (%d)\n",
              from, sc_n[0], sc_n[1], sc_n[2],
              sc_times[1], sc_inc[1]);
#endif 
      for (i=0;i<sc_times[1];i++)
       {  dalib_fill_buffer (ptr, sc_length);
          ptr += sc_inc[1];
       }
      dalib_send_buffer (to);
      dalib_destroy_buffer ();
    }
   else /* sc_loops == 2 */ 
    { dalib_create_buffer (sc_size * a_size, 0);
#ifdef DEBUG
      printf ("%d send section3, %dx%dx%d, 2 lps (%d,%d), inc = (%d,%d)\n",
              from, sc_n[0], sc_n[1], sc_n[2],
              sc_times[2], sc_times[1], sc_inc[2],sc_inc[1]);
#endif 
      for (i=0;i<sc_times[2];i++)
       { for (j=0;j<sc_times[1];j++)
          {  dalib_fill_buffer (ptr, sc_length);
             ptr += sc_inc[1];
          }
         ptr -= sc_times[1] * sc_inc[1];
         ptr += sc_inc[2];
       }
      dalib_send_buffer (to);
      dalib_destroy_buffer ();
    }
}

void dalib_send_section4 (to, a)
int to;
unsigned char *a;

{ int from;
  unsigned char *ptr;
  int i, j, k;

  /* send section */

  from = dalib_pid_ ();

#ifdef DEBUG
  printf ("sec: %d sends4 (%d-%d,%d-%d,%d-%d,%d-%d) to %d, len=%d, ptr=%d, loops = %d\n",
           from,sc_x[0], sc_y[0], sc_x[1], sc_y[1], 
           sc_x[2],sc_y[2], sc_x[3], sc_y[3], to, sc_length, sc_ptr, sc_loops);
#endif

  ptr    = a + sc_ptr;

  if (sc_loops == 0)
    { /* continguous section */
#ifdef DEBUG
      printf ("sec: %d now send4, %dx%dx%dx%d, continguous \n",
              from, sc_n[0], sc_n[1], sc_n[2], sc_n[3]);
#endif
      asend (from, to, ptr, sc_length);
    }
   else if (sc_loops == 1)
    { dalib_create_buffer (sc_size * a_size, 0);
#ifdef DEBUG
      printf ("sec: %d now send4, %dx%dx%dx%d, 1 loop (%d), inc = (%d)\n",
              from, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[1], sc_inc[1]);
#endif
      for (i=0;i<sc_times[1];i++)
       {  dalib_fill_buffer (ptr, sc_length);
          ptr += sc_inc[1];
       }
      dalib_send_buffer (to);
      dalib_destroy_buffer ();
    }
   else if (sc_loops == 2) 
    { dalib_create_buffer (sc_size * a_size, 0);
#ifdef DEBUG
      printf ("sec: %d now send4, %dx%dx%dx%d, 2 lps (%d,%d), inc = (%d,%d)\n",
              from, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[2], sc_times[1], sc_inc[2],sc_inc[1]);
#endif
      for (i=0;i<sc_times[2];i++)
       { for (j=0;j<sc_times[1];j++)
          {  dalib_fill_buffer (ptr, sc_length);
             ptr += sc_inc[1];
          }
         ptr -= sc_times[1] * sc_inc[1];
         ptr += sc_inc[2];
       }
      dalib_send_buffer (to);
      dalib_destroy_buffer ();
    }
   else /* sc_loops == 3 */
    { dalib_create_buffer (sc_size * a_size, 0);
#ifdef DEBUG
      printf ("sec: %d now send4, %dx%dx%dx%d, 3lps(%d,%d,%d),inc=(%d,%d,%d)\n",
              from, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[3], sc_times[2], sc_times[1], 
              sc_inc[3], sc_inc[2],sc_inc[1]);
#endif
      for (i=0;i<sc_times[3];i++)
       { for (j=0;j<sc_times[2];j++)
          { for (k=0;k<sc_times[1];k++)
             {  dalib_fill_buffer (ptr, sc_length);
                ptr += sc_inc[1];
             }
            ptr -= sc_times[1] * sc_inc[1];
            ptr += sc_inc[2];
          }
          ptr -= sc_times[2] * sc_inc[2];
          ptr += sc_inc[3];
       }
      dalib_send_buffer (to);
      dalib_destroy_buffer ();
    }
}

     /*********************************************************
     *                                                        *
     *  Receive of a local section                            *
     *                                                        *
     *********************************************************/

void dalib_recv_section1 (from, a)
int from;
unsigned char *a;

{ int to;
  unsigned char *ptr;

  /* receive section */

  to = dalib_pid_ ();

#ifdef DEBUG
  printf ("sec: %d recvs local (%d-%d) from %d, length = %d ptr = %d\n",
           to, sc_x[0], sc_y[1], from, sc_length, sc_ptr);
#endif

  ptr    = a + sc_ptr;
  areceive (to, from, ptr, sc_length);
}

void dalib_recv_section2 (from, a)
int from;
unsigned char *a;

{ int i, to;
  unsigned char *ptr;

  /* receive section */

  to = dalib_pid_ ();

#ifdef DEBUG
  printf ("sec: %d recvs2 (%d-%d,%d-%d) from %d, len = %d lps = %d ptr = %d\n",
           to, sc_x[0], sc_y[0], sc_x[1], sc_y[1], from, 
           sc_length, sc_loops, sc_ptr);
#endif

  ptr    = a + sc_ptr;
  if (sc_loops == 0)
    { /* continguous section */
#ifdef DEBUG
      printf ("sec: %d now recvs2, %dx%d, continguous, ptr = %d \n",
              from, sc_n[0], sc_n[1], sc_ptr);
#endif
      areceive (to, from, ptr, sc_length);
    }
   else
    { dalib_create_buffer (sc_size * a_size, 1);
#ifdef DEBUG
      printf ("sec: %d now recvs2, %dx%d, 1 lps (%d), inc = (%d)\n",
              from, sc_n[0], sc_n[1],
              sc_times[1], sc_inc[1]);
#endif
      dalib_recv_buffer (from);
      for (i=0;i<sc_times[1];i++)
       {  dalib_get_buffer (ptr, sc_length);
          ptr += sc_inc[1];
       }
      dalib_destroy_buffer ();
    }
}

void dalib_recv_section3 (from, a)
int from;
unsigned char *a;

{ int i, j, to;
  unsigned char *ptr;

  /* recv section */

  to = dalib_pid_ ();

#ifdef DEBUG
  printf ("sec: %d recvs (%d-%d,%d-%d,%d-%d) from %d, len = %d ptr = %d\n",
           to, sc_x[0], sc_y[0], sc_x[1], sc_y[1], sc_x[2], sc_y[2],
           from, sc_length, sc_ptr);
#endif

  ptr    = a + sc_ptr;

  if (sc_loops == 0)
    { /* continguous section */
#ifdef DEBUG
      printf ("sec: %d now recvs3, %dx%dx%d, continguous, ptr = %d \n",
              from, sc_n[0], sc_n[1], sc_n[2], sc_ptr);
#endif
      areceive (to, from, ptr, sc_length);
    }
   else if (sc_loops == 1)
    { dalib_create_buffer (sc_size * a_size, 1);
      dalib_recv_buffer (from);
#ifdef DEBUG
      printf ("sec: %d now recvs3, %dx%dx%d, 1 lps (%d), inc = (%d)\n",
              to, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[1], sc_inc[1]);
#endif 
      for (i=0;i<sc_times[1];i++)
       {  dalib_get_buffer (ptr, sc_length);
          ptr += sc_inc[1];
       }
      dalib_destroy_buffer ();
    }
   else /* sc_loops == 2 */
    { dalib_create_buffer (sc_size * a_size, 1);
      dalib_recv_buffer (from);
#ifdef DEBUG
      printf ("sec: %d now recvs3, %dx%dx%d, 2 lps (%d,%d), inc = (%d,%d)\n",
              to, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[2], sc_times[1], sc_inc[2], sc_inc[1]);
#endif 
      for (i=0;i<sc_times[2];i++)
       { for (j=0;j<sc_times[1];j++)
          {  dalib_get_buffer (ptr, sc_length);
             ptr += sc_inc[1];
          }
         ptr -= sc_times[1] * sc_inc[1];
         ptr += sc_inc[2];
       }
      dalib_destroy_buffer ();
    }
}

void dalib_recv_section4 (from, a)
int from;
unsigned char *a;

{ int to;
  unsigned char *ptr;
  int i, j, k;

  /* recv section */

  to = dalib_pid_ ();

#ifdef DEBUG
  printf ("sec: %d recvs4 (%d-%d,%d-%d,%d-%d,%d-%d) from %d, len=%d, ptr=%d\n",
           to, sc_x[0], sc_y[0], sc_x[1], sc_y[1],
           sc_x[2],sc_y[2], sc_x[3], sc_y[3], from, sc_length, sc_ptr);
#endif

  ptr    = a + sc_ptr;

  if (sc_loops == 0)
    { /* continguous section */
#ifdef DEBUG
      printf ("sec: %d now rec4, %dx%dx%dx%d, continguous \n",
              to, sc_n[0], sc_n[1], sc_n[2], sc_n[3]);
#endif
      areceive (to, from, ptr, sc_length);
    }
   else if (sc_loops == 1)
    { dalib_create_buffer (sc_size * a_size, 1);
      dalib_recv_buffer (from);
#ifdef DEBUG
      printf ("sec: %d now rec4, %dx%dx%dx%d, 1 loop (%d), inc = (%d)\n",
              to, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[1], sc_inc[1]);
#endif
      for (i=0;i<sc_times[1];i++)
       {  dalib_get_buffer (ptr, sc_length);
          ptr += sc_inc[1];
       }
      dalib_destroy_buffer ();
    }
   else if (sc_loops == 2)
    { dalib_create_buffer (sc_size * a_size, 1);
      dalib_recv_buffer (from);
#ifdef DEBUG
      printf ("sec: %d now rec4, %dx%dx%dx%d, 2 lps (%d,%d), inc = (%d,%d)\n",
              to, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[2], sc_times[1], sc_inc[2],sc_inc[1]);
#endif
      for (i=0;i<sc_times[2];i++)
       { for (j=0;j<sc_times[1];j++)
          {  dalib_get_buffer (ptr, sc_length);
             ptr += sc_inc[1];
          }
         ptr -= sc_times[1] * sc_inc[1];
         ptr += sc_inc[2];
       }
      dalib_destroy_buffer ();
    }
   else /* sc_loops == 3 */
    { dalib_create_buffer (sc_size * a_size, 1);
      dalib_recv_buffer (from);
#ifdef DEBUG
      printf ("sec: %d recv4, %dx%dx%dx%d, 3 lps(%d,%d,%d), inc=(%d,%d,%d)\n",
              to, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[3], sc_times[2], sc_times[1],
              sc_inc[3], sc_inc[2],sc_inc[1]);
#endif
      for (i=0;i<sc_times[3];i++)
       { for (j=0;j<sc_times[2];j++)
          { for (k=0;k<sc_times[1];k++)
             {  dalib_get_buffer (ptr, sc_length);
                ptr += sc_inc[1];
             }
            ptr -= sc_times[1] * sc_inc[1];
            ptr += sc_inc[2];
          }
          ptr -= sc_times[2] * sc_inc[2];
          ptr += sc_inc[3];
       }
      dalib_destroy_buffer ();
    }
}

     /*********************************************************
     *                                                        *
     *  Copy of a local section to a new array                *
     *                                                        *
     *********************************************************/

void dalib_copy_section1 (to, a)
unsigned char *to;
unsigned char *a;

{ unsigned char *ptr;

  /* copy section */

#ifdef DEBUG
  int from;
  from = pcb.i;
  printf ("sec: %d copies local (%d-%d) to %d, length = %d, ptr = %d\n",
           from, sc_x[0], sc_y[0], to, sc_length, sc_ptr);
#endif

  ptr    = a + sc_ptr;
  dalib_memcpy (to, ptr, sc_length);
}

void dalib_copy_section2 (to, a)
unsigned char *to;
unsigned char *a;

{ int i;
  unsigned char *ptr, *to_ptr;

  /* copy section */

#ifdef DEBUG
  int from;
  from = pcb.i;
  printf ("sec: %d copy2 (%d-%d,%d-%d) to %d, len = %d, lps = %d, ptr = %d\n",
           from, sc_x[0], sc_y[0], sc_x[1], sc_y[1], to,
           sc_length, sc_loops, sc_ptr);
#endif

  ptr    = a + sc_ptr;

  if (sc_loops == 0)
    { /* continguous section */
#ifdef DEBUG
      printf ("sec: %d now copy2, %dx%d, continguous, ptr= %d\n",
              from, sc_n[0], sc_n[1], sc_ptr);
#endif 
      dalib_memcpy (to, ptr, sc_length);
    }
   else
    { to_ptr = to;
#ifdef DEBUG
      printf ("sec: %d now copy2, %dx%d, 1 lps (%d), inc = %d, ptr= %d\n",
              from,sc_n[0],sc_n[1],sc_times[1],sc_inc[1],sc_ptr);
#endif 
      for (i=0;i<sc_times[1];i++)
       {  dalib_memcpy (to_ptr, ptr, sc_length);
          ptr += sc_inc[1];
          to_ptr += sc_length;
       }
    }
}

void dalib_copy_section3 (to, a)
unsigned char *to;
unsigned char *a;

{ int i, j, from;
  unsigned char *ptr, *to_ptr;

  /* send section */

  from = dalib_pid_ ();

#ifdef DEBUG
  printf ("%d copy3 local (%d-%d,%d-%d,%d-%d) to %d, length = %d, ptr = %d\n",
           from,sc_x[0], sc_y[0], sc_x[1], sc_y[1], 
           sc_x[2],sc_y[2], to, sc_length, sc_ptr);
#endif

  ptr    = a + sc_ptr;

  if (sc_loops == 0)
    { /* continguous section */
#ifdef DEBUG
      printf ("%d copy section3, %dx%dx%d, continguous \n",
              from, sc_n[0], sc_n[1], sc_n[2]);
#endif 
      dalib_memcpy (to, ptr, sc_length);
    }
   else if (sc_loops == 1)
    { to_ptr = to;
#ifdef DEBUG
      printf ("%d copy section3, %dx%dx%d, 1 lps (%d), inc = (%d)\n",
              from, sc_n[0], sc_n[1], sc_n[2],
              sc_times[1], sc_inc[1]);
#endif 
      for (i=0;i<sc_times[1];i++)
       {  dalib_memcpy (to_ptr, ptr, sc_length);
          ptr += sc_inc[1];
          to_ptr += sc_length;
       }
    }
   else /* sc_loops == 2 */ 
    { to_ptr = to;
#ifdef DEBUG
      printf ("%d copy section3, %dx%dx%d, 2 lps (%d,%d), inc = (%d,%d)\n",
              from, sc_n[0], sc_n[1], sc_n[2],
              sc_times[2], sc_times[1], sc_inc[2],sc_inc[1]);
#endif 
      for (i=0;i<sc_times[2];i++)
       { for (j=0;j<sc_times[1];j++)
          {  dalib_memcpy (to_ptr, ptr, sc_length);
             ptr += sc_inc[1];
             to_ptr += sc_length;
          }
         ptr -= sc_times[1] * sc_inc[1];
         ptr += sc_inc[2];
       }
    }
}

void dalib_copy_section4 (to, a)
unsigned char *to;
unsigned char *a;

{ int from;
  unsigned char *ptr, *to_ptr;
  int i, j, k;

  /* send section */

  from = dalib_pid_ ();

#ifdef DEBUG
  printf ("sec: %d copy4 (%d-%d,%d-%d,%d-%d,%d-%d) to %d, len=%d, ptr=%d, loops = %d\n",
           from,sc_x[0], sc_y[0], sc_x[1], sc_y[1], 
           sc_x[2],sc_y[2], sc_x[3], sc_y[3], to, sc_length, sc_ptr, sc_loops);
#endif

  ptr    = a + sc_ptr;

  if (sc_loops == 0)
    { /* continguous section */
#ifdef DEBUG
      printf ("sec: %d now copy4, %dx%dx%dx%d, continguous \n",
              from, sc_n[0], sc_n[1], sc_n[2], sc_n[3]);
#endif
      dalib_memcpy (to, ptr, sc_length);
    }
   else if (sc_loops == 1)
    { to_ptr = to;
#ifdef DEBUG
      printf ("sec: %d now copy4, %dx%dx%dx%d, 1 loop (%d), inc = (%d)\n",
              from, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[1], sc_inc[1]);
#endif
      for (i=0;i<sc_times[1];i++)
       {  dalib_memcpy (to_ptr, ptr, sc_length);
          ptr += sc_inc[1];
          to_ptr += sc_length;
       }
    }
   else if (sc_loops == 2) 
    { to_ptr = to;
#ifdef DEBUG
      printf ("sec: %d now copy4, %dx%dx%dx%d, 2 lps (%d,%d), inc = (%d,%d)\n",
              from, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[2], sc_times[1], sc_inc[2],sc_inc[1]);
#endif
      for (i=0;i<sc_times[2];i++)
       { for (j=0;j<sc_times[1];j++)
          {  dalib_memcpy (to_ptr, ptr, sc_length);
             ptr += sc_inc[1];
             to_ptr += sc_length;
          }
         ptr -= sc_times[1] * sc_inc[1];
         ptr += sc_inc[2];
       }
    }
   else /* sc_loops == 3 */
    { to_ptr = to;
#ifdef DEBUG
      printf ("sec: %d now copy4, %dx%dx%dx%d, 3lps(%d,%d,%d),inc=(%d,%d,%d)\n",
              from, sc_n[0], sc_n[1], sc_n[2], sc_n[3],
              sc_times[3], sc_times[2], sc_times[1], 
              sc_inc[3], sc_inc[2],sc_inc[1]);
#endif
      for (i=0;i<sc_times[3];i++)
       { for (j=0;j<sc_times[2];j++)
          { for (k=0;k<sc_times[1];k++)
             {  dalib_memcpy (to_ptr, ptr, sc_length);
                ptr += sc_inc[1];
                to_ptr += sc_length;
             }
            ptr -= sc_times[1] * sc_inc[1];
            ptr += sc_inc[2];
          }
          ptr -= sc_times[2] * sc_inc[2];
          ptr += sc_inc[3];
       }
    }
}
