xref: /petsc/src/mat/utils/matstash.c (revision 6849ba73f22fecb8f92ef896a42e4e8bd4cd6965)
12d5177cdSBarry Smith 
270f55243SBarry Smith #include "src/mat/matimpl.h"
39417f4adSLois Curfman McInnes 
43eda8832SBarry Smith /*
50ae3cd3bSBarry Smith        The input to the stash is ALWAYS in MatScalar precision, and the
60ae3cd3bSBarry Smith     internal storage and output is also in MatScalar.
73eda8832SBarry Smith */
8bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
94c1ff481SSatish Balay 
109417f4adSLois Curfman McInnes /*
118798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
124c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
134c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
149417f4adSLois Curfman McInnes 
154c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
164c1ff481SSatish Balay 
174c1ff481SSatish Balay   Input Parameters:
184c1ff481SSatish Balay   comm - communicator, required for scatters.
194c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
204c1ff481SSatish Balay 
214c1ff481SSatish Balay   Output Parameters:
224c1ff481SSatish Balay   stash    - the newly created stash
239417f4adSLois Curfman McInnes */
244a2ae208SSatish Balay #undef __FUNCT__
254a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
26dfbe8321SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,int bs,MatStash *stash)
279417f4adSLois Curfman McInnes {
28dfbe8321SBarry Smith   PetscErrorCode ierr;
29dfbe8321SBarry Smith   int max,*opt,nopt;
30f1af5d2fSBarry Smith   PetscTruth flg;
31bc5ccf88SSatish Balay 
323a40ed3dSBarry Smith   PetscFunctionBegin;
33bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
34752ec6e0SSatish Balay   stash->comm = comm;
35752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
36a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
37a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
38a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
39bc5ccf88SSatish Balay 
40434d7ff9SSatish Balay   nopt = stash->size;
4182502324SSatish Balay   ierr = PetscMalloc(nopt*sizeof(int),&opt);CHKERRQ(ierr);
42b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
43434d7ff9SSatish Balay   if (flg) {
44434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
45434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
46434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
47f4ab19daSSatish Balay     else                          max = 0; /* Use default */
48434d7ff9SSatish Balay     stash->umax = max;
49434d7ff9SSatish Balay   } else {
50434d7ff9SSatish Balay     stash->umax = 0;
51434d7ff9SSatish Balay   }
52606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
534c1ff481SSatish Balay   if (bs <= 0) bs = 1;
54a2d1c673SSatish Balay 
554c1ff481SSatish Balay   stash->bs       = bs;
569417f4adSLois Curfman McInnes   stash->nmax     = 0;
57434d7ff9SSatish Balay   stash->oldnmax  = 0;
589417f4adSLois Curfman McInnes   stash->n        = 0;
594c1ff481SSatish Balay   stash->reallocs = -1;
609417f4adSLois Curfman McInnes   stash->idx      = 0;
619417f4adSLois Curfman McInnes   stash->idy      = 0;
62bc5ccf88SSatish Balay   stash->array    = 0;
639417f4adSLois Curfman McInnes 
64bc5ccf88SSatish Balay   stash->send_waits  = 0;
65bc5ccf88SSatish Balay   stash->recv_waits  = 0;
66a2d1c673SSatish Balay   stash->send_status = 0;
67bc5ccf88SSatish Balay   stash->nsends      = 0;
68bc5ccf88SSatish Balay   stash->nrecvs      = 0;
69bc5ccf88SSatish Balay   stash->svalues     = 0;
70bc5ccf88SSatish Balay   stash->rvalues     = 0;
71bc5ccf88SSatish Balay   stash->rmax        = 0;
72a2d1c673SSatish Balay   stash->nprocs      = 0;
73a2d1c673SSatish Balay   stash->nprocessed  = 0;
743a40ed3dSBarry Smith   PetscFunctionReturn(0);
759417f4adSLois Curfman McInnes }
769417f4adSLois Curfman McInnes 
774c1ff481SSatish Balay /*
788798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
794c1ff481SSatish Balay */
804a2ae208SSatish Balay #undef __FUNCT__
814a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
82dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
839417f4adSLois Curfman McInnes {
84dfbe8321SBarry Smith   PetscErrorCode ierr;
85a2d1c673SSatish Balay 
86bc5ccf88SSatish Balay   PetscFunctionBegin;
87606d414cSSatish Balay   if (stash->array) {
88606d414cSSatish Balay     ierr = PetscFree(stash->array);CHKERRQ(ierr);
89606d414cSSatish Balay     stash->array = 0;
90606d414cSSatish Balay   }
91bc5ccf88SSatish Balay   PetscFunctionReturn(0);
92bc5ccf88SSatish Balay }
93bc5ccf88SSatish Balay 
944c1ff481SSatish Balay /*
958798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
964c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
974c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
984c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
994c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1004c1ff481SSatish Balay    so that the same value can be used the next time through.
1014c1ff481SSatish Balay */
1024a2ae208SSatish Balay #undef __FUNCT__
1034a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
104dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
105bc5ccf88SSatish Balay {
106*6849ba73SBarry Smith   PetscErrorCode ierr;
107*6849ba73SBarry Smith   int         nsends=stash->nsends,bs2,oldnmax;
108a2d1c673SSatish Balay   MPI_Status  *send_status;
109a2d1c673SSatish Balay 
1103a40ed3dSBarry Smith   PetscFunctionBegin;
111a2d1c673SSatish Balay   /* wait on sends */
112a2d1c673SSatish Balay   if (nsends) {
11382502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
114a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
115606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
116a2d1c673SSatish Balay   }
117a2d1c673SSatish Balay 
118c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
119434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
120434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
121b9b97703SBarry Smith   if (stash->n) {
12294b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1238a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
124434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
125b9b97703SBarry Smith   }
126434d7ff9SSatish Balay 
127d07ff455SSatish Balay   stash->nmax       = 0;
128d07ff455SSatish Balay   stash->n          = 0;
1294c1ff481SSatish Balay   stash->reallocs   = -1;
130bc5ccf88SSatish Balay   stash->rmax       = 0;
131a2d1c673SSatish Balay   stash->nprocessed = 0;
132bc5ccf88SSatish Balay 
133bc5ccf88SSatish Balay   if (stash->array) {
134606d414cSSatish Balay     ierr         = PetscFree(stash->array);CHKERRQ(ierr);
135bc5ccf88SSatish Balay     stash->array = 0;
136bc5ccf88SSatish Balay     stash->idx   = 0;
137bc5ccf88SSatish Balay     stash->idy   = 0;
138bc5ccf88SSatish Balay   }
139606d414cSSatish Balay   if (stash->send_waits) {
140606d414cSSatish Balay     ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
141606d414cSSatish Balay     stash->send_waits = 0;
142606d414cSSatish Balay   }
143606d414cSSatish Balay   if (stash->recv_waits) {
144606d414cSSatish Balay     ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
145606d414cSSatish Balay     stash->recv_waits = 0;
146606d414cSSatish Balay   }
147606d414cSSatish Balay   if (stash->svalues) {
148606d414cSSatish Balay     ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
149606d414cSSatish Balay     stash->svalues = 0;
150606d414cSSatish Balay   }
151606d414cSSatish Balay   if (stash->rvalues) {
152606d414cSSatish Balay     ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
153606d414cSSatish Balay     stash->rvalues = 0;
154606d414cSSatish Balay   }
155606d414cSSatish Balay   if (stash->nprocs) {
156b22afee1SSatish Balay     ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
157606d414cSSatish Balay     stash->nprocs = 0;
158606d414cSSatish Balay   }
159bc5ccf88SSatish Balay 
1603a40ed3dSBarry Smith   PetscFunctionReturn(0);
1619417f4adSLois Curfman McInnes }
1629417f4adSLois Curfman McInnes 
1634c1ff481SSatish Balay /*
1648798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1654c1ff481SSatish Balay 
1664c1ff481SSatish Balay    Input Parameters:
1674c1ff481SSatish Balay    stash    - the stash
16894b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1694c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1704c1ff481SSatish Balay 
1714c1ff481SSatish Balay */
1724a2ae208SSatish Balay #undef __FUNCT__
1734a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
174dfbe8321SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,int *nstash,int *reallocs)
17597530c3fSBarry Smith {
17694b769a5SSatish Balay   int bs2 = stash->bs*stash->bs;
17794b769a5SSatish Balay 
1783a40ed3dSBarry Smith   PetscFunctionBegin;
1791ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1801ecfd215SBarry Smith   if (reallocs) {
181434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
182434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1831ecfd215SBarry Smith   }
184bc5ccf88SSatish Balay   PetscFunctionReturn(0);
185bc5ccf88SSatish Balay }
1864c1ff481SSatish Balay 
1874c1ff481SSatish Balay 
1884c1ff481SSatish Balay /*
1898798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1904c1ff481SSatish Balay 
1914c1ff481SSatish Balay    Input Parameters:
1924c1ff481SSatish Balay    stash  - the stash
1934c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1944c1ff481SSatish Balay             this value is used while allocating memory.
1954c1ff481SSatish Balay */
1964a2ae208SSatish Balay #undef __FUNCT__
1974a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
198dfbe8321SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,int max)
199bc5ccf88SSatish Balay {
200bc5ccf88SSatish Balay   PetscFunctionBegin;
201434d7ff9SSatish Balay   stash->umax = max;
2023a40ed3dSBarry Smith   PetscFunctionReturn(0);
20397530c3fSBarry Smith }
20497530c3fSBarry Smith 
2058798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
2064c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
2074c1ff481SSatish Balay    being inserted into the stash.
2084c1ff481SSatish Balay 
2094c1ff481SSatish Balay    Input Parameters:
2104c1ff481SSatish Balay    stash - the stash
2114c1ff481SSatish Balay    incr  - the minimum increase requested
2124c1ff481SSatish Balay 
2134c1ff481SSatish Balay    Notes:
2144c1ff481SSatish Balay    This routine doubles the currently used memory.
2154c1ff481SSatish Balay  */
2164a2ae208SSatish Balay #undef __FUNCT__
2174a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
218*6849ba73SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,int incr)
2199417f4adSLois Curfman McInnes {
220*6849ba73SBarry Smith   PetscErrorCode ierr;
221*6849ba73SBarry Smith   int       *n_idx,*n_idy,newnmax,bs2;
2223eda8832SBarry Smith   MatScalar *n_array;
2239417f4adSLois Curfman McInnes 
2243a40ed3dSBarry Smith   PetscFunctionBegin;
2259417f4adSLois Curfman McInnes   /* allocate a larger stash */
22694b769a5SSatish Balay   bs2     = stash->bs*stash->bs;
227c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
228434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
229434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
230c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
231434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
232434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
233434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2344c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
235d07ff455SSatish Balay 
236b0a32e0cSBarry Smith   ierr  = PetscMalloc((newnmax)*(2*sizeof(int)+bs2*sizeof(MatScalar)),&n_array);CHKERRQ(ierr);
237a2d1c673SSatish Balay   n_idx = (int*)(n_array + bs2*newnmax);
238d07ff455SSatish Balay   n_idy = (int*)(n_idx + newnmax);
2393eda8832SBarry Smith   ierr  = PetscMemcpy(n_array,stash->array,bs2*stash->nmax*sizeof(MatScalar));CHKERRQ(ierr);
240549d3d68SSatish Balay   ierr  = PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(int));CHKERRQ(ierr);
241549d3d68SSatish Balay   ierr  = PetscMemcpy(n_idy,stash->idy,stash->nmax*sizeof(int));CHKERRQ(ierr);
242606d414cSSatish Balay   if (stash->array) {ierr = PetscFree(stash->array);CHKERRQ(ierr);}
243d07ff455SSatish Balay   stash->array   = n_array;
244d07ff455SSatish Balay   stash->idx     = n_idx;
245d07ff455SSatish Balay   stash->idy     = n_idy;
246d07ff455SSatish Balay   stash->nmax    = newnmax;
247bc5ccf88SSatish Balay   stash->reallocs++;
248bc5ccf88SSatish Balay   PetscFunctionReturn(0);
249bc5ccf88SSatish Balay }
250bc5ccf88SSatish Balay /*
2518798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2524c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2534c1ff481SSatish Balay   can be inserted with a single call to this function.
2544c1ff481SSatish Balay 
2554c1ff481SSatish Balay   Input Parameters:
2564c1ff481SSatish Balay   stash  - the stash
2574c1ff481SSatish Balay   row    - the global row correspoiding to the values
2584c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2594c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2604c1ff481SSatish Balay   values - the values inserted
261bc5ccf88SSatish Balay */
2624a2ae208SSatish Balay #undef __FUNCT__
2634a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
264dfbe8321SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[])
265bc5ccf88SSatish Balay {
266dfbe8321SBarry Smith   PetscErrorCode ierr;
267dfbe8321SBarry Smith   int i;
268bc5ccf88SSatish Balay 
269bc5ccf88SSatish Balay   PetscFunctionBegin;
2704c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
2714c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
2728798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2739417f4adSLois Curfman McInnes   }
2744c1ff481SSatish Balay   for (i=0; i<n; i++) {
2759417f4adSLois Curfman McInnes     stash->idx[stash->n]   = row;
276a2d1c673SSatish Balay     stash->idy[stash->n]   = idxn[i];
2770ae3cd3bSBarry Smith     stash->array[stash->n] = values[i];
278a2d1c673SSatish Balay     stash->n++;
2799417f4adSLois Curfman McInnes   }
280a2d1c673SSatish Balay   PetscFunctionReturn(0);
281a2d1c673SSatish Balay }
2824c1ff481SSatish Balay /*
2838798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2844c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2854c1ff481SSatish Balay   can be inserted with a single call to this function.
286a2d1c673SSatish Balay 
2874c1ff481SSatish Balay   Input Parameters:
2884c1ff481SSatish Balay   stash   - the stash
2894c1ff481SSatish Balay   row     - the global row correspoiding to the values
2904c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2914c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2924c1ff481SSatish Balay   values  - the values inserted
2934c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2944c1ff481SSatish Balay             this happens because the input is columnoriented.
2954c1ff481SSatish Balay */
2964a2ae208SSatish Balay #undef __FUNCT__
2974a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
298dfbe8321SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[],int stepval)
299a2d1c673SSatish Balay {
300dfbe8321SBarry Smith   PetscErrorCode ierr;
301dfbe8321SBarry Smith   int i;
302a2d1c673SSatish Balay 
3034c1ff481SSatish Balay   PetscFunctionBegin;
3044c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
3054c1ff481SSatish Balay   if ((stash->n + n) > stash->nmax) {
3068798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3074c1ff481SSatish Balay   }
3084c1ff481SSatish Balay   for (i=0; i<n; i++) {
3094c1ff481SSatish Balay     stash->idx[stash->n]   = row;
3104c1ff481SSatish Balay     stash->idy[stash->n]   = idxn[i];
3110ae3cd3bSBarry Smith     stash->array[stash->n] = values[i*stepval];
3124c1ff481SSatish Balay     stash->n++;
3134c1ff481SSatish Balay   }
3144c1ff481SSatish Balay   PetscFunctionReturn(0);
3154c1ff481SSatish Balay }
3164c1ff481SSatish Balay 
3174c1ff481SSatish Balay /*
3188798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3194c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3204c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3214c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3224c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3234c1ff481SSatish Balay 
3244c1ff481SSatish Balay   Input Parameters:
3254c1ff481SSatish Balay   stash  - the stash
3264c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3274c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3284c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3294c1ff481SSatish Balay            values. Each block is of size bs*bs.
3304c1ff481SSatish Balay   values - the values inserted
3314c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3324c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3334c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3344c1ff481SSatish Balay */
3354a2ae208SSatish Balay #undef __FUNCT__
3364a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
337dfbe8321SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[],int rmax,int cmax,int idx)
3384c1ff481SSatish Balay {
339dfbe8321SBarry Smith   PetscErrorCode ierr;
340dfbe8321SBarry Smith   int i,j,k,bs2,bs=stash->bs;
341f15d580aSBarry Smith   const MatScalar *vals;
342f15d580aSBarry Smith   MatScalar       *array;
343a2d1c673SSatish Balay 
344a2d1c673SSatish Balay   PetscFunctionBegin;
345a2d1c673SSatish Balay   bs2 = bs*bs;
3464c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3478798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
348a2d1c673SSatish Balay   }
3494c1ff481SSatish Balay   for (i=0; i<n; i++) {
350a2d1c673SSatish Balay     stash->idx[stash->n]   = row;
351a2d1c673SSatish Balay     stash->idy[stash->n] = idxn[i];
352a2d1c673SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
353a2d1c673SSatish Balay        This enables inserting multiple blocks belonging to a row with a single
354a2d1c673SSatish Balay        funtion call */
355a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
356a2d1c673SSatish Balay     vals  = values + idx*bs2*n + bs*i;
357a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
3580ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k*bs] = vals[k];}
359a2d1c673SSatish Balay       array += 1;
360a2d1c673SSatish Balay       vals  += cmax*bs;
361a2d1c673SSatish Balay     }
3624c1ff481SSatish Balay     stash->n++;
3634c1ff481SSatish Balay   }
3644c1ff481SSatish Balay   PetscFunctionReturn(0);
3654c1ff481SSatish Balay }
3664c1ff481SSatish Balay 
3674c1ff481SSatish Balay /*
3688798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3694c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3704c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3714c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3724c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3734c1ff481SSatish Balay 
3744c1ff481SSatish Balay   Input Parameters:
3754c1ff481SSatish Balay   stash  - the stash
3764c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3774c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3784c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3794c1ff481SSatish Balay            values. Each block is of size bs*bs.
3804c1ff481SSatish Balay   values - the values inserted
3814c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3824c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3834c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3844c1ff481SSatish Balay */
3854a2ae208SSatish Balay #undef __FUNCT__
3864a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
387dfbe8321SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,int row,int n,const int idxn[],const MatScalar values[],int rmax,int cmax,int idx)
3884c1ff481SSatish Balay {
389dfbe8321SBarry Smith   PetscErrorCode ierr;
390dfbe8321SBarry Smith   int i,j,k,bs2,bs=stash->bs;
391f15d580aSBarry Smith   const MatScalar *vals;
392f15d580aSBarry Smith   MatScalar       *array;
3934c1ff481SSatish Balay 
3944c1ff481SSatish Balay   PetscFunctionBegin;
3954c1ff481SSatish Balay   bs2 = bs*bs;
3964c1ff481SSatish Balay   if ((stash->n+n) > stash->nmax) {
3978798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
3984c1ff481SSatish Balay   }
3994c1ff481SSatish Balay   for (i=0; i<n; i++) {
4004c1ff481SSatish Balay     stash->idx[stash->n]   = row;
4014c1ff481SSatish Balay     stash->idy[stash->n] = idxn[i];
4024c1ff481SSatish Balay     /* Now copy over the block of values. Store the values column oriented.
4034c1ff481SSatish Balay      This enables inserting multiple blocks belonging to a row with a single
4044c1ff481SSatish Balay      funtion call */
405a2d1c673SSatish Balay     array = stash->array + bs2*stash->n;
406a2d1c673SSatish Balay     vals  = values + idx*bs + bs2*rmax*i;
407a2d1c673SSatish Balay     for (j=0; j<bs; j++) {
4080ae3cd3bSBarry Smith       for (k=0; k<bs; k++) {array[k] = vals[k];}
409a2d1c673SSatish Balay       array += bs;
410a2d1c673SSatish Balay       vals  += rmax*bs;
411a2d1c673SSatish Balay     }
412a2d1c673SSatish Balay     stash->n++;
4139417f4adSLois Curfman McInnes   }
4143a40ed3dSBarry Smith   PetscFunctionReturn(0);
4159417f4adSLois Curfman McInnes }
4164c1ff481SSatish Balay /*
4178798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4184c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4194c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4204c1ff481SSatish Balay   processors.
421bc5ccf88SSatish Balay 
4224c1ff481SSatish Balay   Input Parameters:
4234c1ff481SSatish Balay   stash  - the stash
4244c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4254c1ff481SSatish Balay            for each node.
4264c1ff481SSatish Balay 
4274c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4284c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4294c1ff481SSatish Balay   the proper global indices.
4304c1ff481SSatish Balay */
4314a2ae208SSatish Balay #undef __FUNCT__
4324a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
433dfbe8321SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,int *owners)
434bc5ccf88SSatish Balay {
435a2d1c673SSatish Balay   int         *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
436ccae9161SBarry Smith   int         size=stash->size,*nprocs,nsends,nreceives;
437*6849ba73SBarry Smith   PetscErrorCode ierr;
438*6849ba73SBarry Smith   int         nmax,count,*sindices,*rindices,i,j,idx;
4393eda8832SBarry Smith   MatScalar   *rvalues,*svalues;
440bc5ccf88SSatish Balay   MPI_Comm    comm = stash->comm;
441bc5ccf88SSatish Balay   MPI_Request *send_waits,*recv_waits;
442bc5ccf88SSatish Balay 
443bc5ccf88SSatish Balay   PetscFunctionBegin;
444bc5ccf88SSatish Balay 
4454c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
446bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
44782502324SSatish Balay   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
448549d3d68SSatish Balay   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
44982502324SSatish Balay   ierr  = PetscMalloc((stash->n+1)*sizeof(int),&owner);CHKERRQ(ierr);
450a2d1c673SSatish Balay 
451bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
452bc5ccf88SSatish Balay     idx = stash->idx[i];
453bc5ccf88SSatish Balay     for (j=0; j<size; j++) {
4544c1ff481SSatish Balay       if (idx >= owners[j] && idx < owners[j+1]) {
455c1dc657dSBarry Smith         nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
456bc5ccf88SSatish Balay       }
457bc5ccf88SSatish Balay     }
458bc5ccf88SSatish Balay   }
459c1dc657dSBarry Smith   nsends = 0;  for (i=0; i<size; i++) { nsends += nprocs[2*i+1];}
460bc5ccf88SSatish Balay 
461bc5ccf88SSatish Balay   /* inform other processors of number of messages and max length*/
462c1dc657dSBarry Smith   ierr = PetscMaxSum(comm,nprocs,&nmax,&nreceives);CHKERRQ(ierr);
463c1dc657dSBarry Smith 
464bc5ccf88SSatish Balay   /* post receives:
465bc5ccf88SSatish Balay      since we don't know how long each individual message is we
466bc5ccf88SSatish Balay      allocate the largest needed buffer for each receive. Potentially
467bc5ccf88SSatish Balay      this is a lot of wasted space.
468bc5ccf88SSatish Balay   */
469b0a32e0cSBarry Smith   ierr     = PetscMalloc((nreceives+1)*(nmax+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&rvalues);CHKERRQ(ierr);
470a2d1c673SSatish Balay   rindices = (int*)(rvalues + bs2*nreceives*nmax);
471b0a32e0cSBarry Smith   ierr     = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
472bc5ccf88SSatish Balay   for (i=0,count=0; i<nreceives; i++) {
4733eda8832SBarry Smith     ierr = MPI_Irecv(rvalues+bs2*nmax*i,bs2*nmax,MPIU_MATSCALAR,MPI_ANY_SOURCE,tag1,comm,
474bc5ccf88SSatish Balay                      recv_waits+count++);CHKERRQ(ierr);
4750ae3cd3bSBarry Smith     ierr = MPI_Irecv(rindices+2*nmax*i,2*nmax,MPI_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);CHKERRQ(ierr);
476bc5ccf88SSatish Balay   }
477bc5ccf88SSatish Balay 
478bc5ccf88SSatish Balay   /* do sends:
479bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
480bc5ccf88SSatish Balay          the ith processor
481bc5ccf88SSatish Balay   */
48282502324SSatish Balay   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(int)),&svalues);CHKERRQ(ierr);
483a2d1c673SSatish Balay   sindices = (int*)(svalues + bs2*stash->n);
484b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
48582502324SSatish Balay   ierr     = PetscMalloc(2*size*sizeof(int),&startv);CHKERRQ(ierr);
486bc5ccf88SSatish Balay   starti   = startv + size;
487a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
488bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
489bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
490c1dc657dSBarry Smith     startv[i] = startv[i-1] + nprocs[2*i-2];
491c1dc657dSBarry Smith     starti[i] = starti[i-1] + nprocs[2*i-2]*2;
492bc5ccf88SSatish Balay   }
493bc5ccf88SSatish Balay   for (i=0; i<stash->n; i++) {
494bc5ccf88SSatish Balay     j = owner[i];
495a2d1c673SSatish Balay     if (bs2 == 1) {
496bc5ccf88SSatish Balay       svalues[startv[j]]              = stash->array[i];
497a2d1c673SSatish Balay     } else {
4984c1ff481SSatish Balay       int       k;
4993eda8832SBarry Smith       MatScalar *buf1,*buf2;
5004c1ff481SSatish Balay       buf1 = svalues+bs2*startv[j];
5014c1ff481SSatish Balay       buf2 = stash->array+bs2*i;
5024c1ff481SSatish Balay       for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
503a2d1c673SSatish Balay     }
504bc5ccf88SSatish Balay     sindices[starti[j]]               = stash->idx[i];
505c1dc657dSBarry Smith     sindices[starti[j]+nprocs[2*j]]   = stash->idy[i];
506bc5ccf88SSatish Balay     startv[j]++;
507bc5ccf88SSatish Balay     starti[j]++;
508bc5ccf88SSatish Balay   }
509bc5ccf88SSatish Balay   startv[0] = 0;
510c1dc657dSBarry Smith   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nprocs[2*i-2];}
511bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
512c1dc657dSBarry Smith     if (nprocs[2*i+1]) {
513c1dc657dSBarry Smith       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nprocs[2*i],MPIU_MATSCALAR,i,tag1,comm,
514bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
515c1dc657dSBarry Smith       ierr = MPI_Isend(sindices+2*startv[i],2*nprocs[2*i],MPI_INT,i,tag2,comm,
516bc5ccf88SSatish Balay                        send_waits+count++);CHKERRQ(ierr);
517bc5ccf88SSatish Balay     }
518bc5ccf88SSatish Balay   }
519606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
520606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
521a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
522a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
523a2d1c673SSatish Balay   stash->nprocs      = nprocs;
524a2d1c673SSatish Balay 
525bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues    = rvalues;
526bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs     = nreceives;
527bc5ccf88SSatish Balay   stash->send_waits = send_waits; stash->recv_waits = recv_waits;
528bc5ccf88SSatish Balay   stash->rmax       = nmax;
529bc5ccf88SSatish Balay   PetscFunctionReturn(0);
530bc5ccf88SSatish Balay }
531bc5ccf88SSatish Balay 
532a2d1c673SSatish Balay /*
5338798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5348798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5354c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5364c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5374c1ff481SSatish Balay 
5384c1ff481SSatish Balay    Input Parameters:
5394c1ff481SSatish Balay    stash - the stash
5404c1ff481SSatish Balay 
5414c1ff481SSatish Balay    Output Parameters:
5424c1ff481SSatish Balay    nvals - the number of entries in the current message.
5434c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5444c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5454c1ff481SSatish Balay    vals  - the values
5464c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5474c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
5484c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
549a2d1c673SSatish Balay */
5504a2ae208SSatish Balay #undef __FUNCT__
5514a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
552dfbe8321SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,int *nvals,int **rows,int** cols,MatScalar **vals,int *flg)
553bc5ccf88SSatish Balay {
554*6849ba73SBarry Smith   PetscErrorCode ierr;
555*6849ba73SBarry Smith   int         i,*flg_v,i1,i2,*rindices,bs2;
556a2d1c673SSatish Balay   MPI_Status  recv_status;
557b0a32e0cSBarry Smith   PetscTruth  match_found = PETSC_FALSE;
558bc5ccf88SSatish Balay 
559bc5ccf88SSatish Balay   PetscFunctionBegin;
560bc5ccf88SSatish Balay 
561a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
562a2d1c673SSatish Balay   /* Return if no more messages to process */
563a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
564a2d1c673SSatish Balay 
565a2d1c673SSatish Balay   flg_v = stash->nprocs;
5664c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
567a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
568a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
569a2d1c673SSatish Balay   while (!match_found) {
570a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
571a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
572a2d1c673SSatish Balay     if (i % 2) {
573a2d1c673SSatish Balay       ierr = MPI_Get_count(&recv_status,MPI_INT,nvals);CHKERRQ(ierr);
574c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
575a2d1c673SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
576a2d1c673SSatish Balay     } else {
5773eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
578c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
579a2d1c673SSatish Balay       *nvals = *nvals/bs2;
580bc5ccf88SSatish Balay     }
581a2d1c673SSatish Balay 
582a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
583c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
584c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
585a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
586a2d1c673SSatish Balay       rindices    = (int*)(stash->rvalues + bs2*stash->rmax*stash->nrecvs);
587a2d1c673SSatish Balay       *rows       = rindices + 2*i2*stash->rmax;
588a2d1c673SSatish Balay       *cols       = *rows + *nvals;
589a2d1c673SSatish Balay       *vals       = stash->rvalues + i1*bs2*stash->rmax;
590a2d1c673SSatish Balay       *flg        = 1;
591a2d1c673SSatish Balay       stash->nprocessed ++;
59235d8aa7fSBarry Smith       match_found = PETSC_TRUE;
593bc5ccf88SSatish Balay     }
594bc5ccf88SSatish Balay   }
595bc5ccf88SSatish Balay   PetscFunctionReturn(0);
596bc5ccf88SSatish Balay }
597