xref: /petsc/src/mat/utils/matstash.c (revision b087b6d67c111367ed6b45cf2d6309e4e2fcfd8d)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
22d5177cdSBarry Smith 
370f55243SBarry Smith #include "src/mat/matimpl.h"
475cae7c1SHong Zhang #include "src/mat/utils/matstashspace.h"
55bd3b8fbSHong Zhang 
63eda8832SBarry Smith /*
70ae3cd3bSBarry Smith        The input to the stash is ALWAYS in MatScalar precision, and the
80ae3cd3bSBarry Smith     internal storage and output is also in MatScalar.
93eda8832SBarry Smith */
10bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE   10000
114c1ff481SSatish Balay 
129417f4adSLois Curfman McInnes /*
138798bf22SSatish Balay   MatStashCreate_Private - Creates a stash,currently used for all the parallel
144c1ff481SSatish Balay   matrix implementations. The stash is where elements of a matrix destined
154c1ff481SSatish Balay   to be stored on other processors are kept until matrix assembly is done.
169417f4adSLois Curfman McInnes 
174c1ff481SSatish Balay   This is a simple minded stash. Simply adds entries to end of stash.
184c1ff481SSatish Balay 
194c1ff481SSatish Balay   Input Parameters:
204c1ff481SSatish Balay   comm - communicator, required for scatters.
214c1ff481SSatish Balay   bs   - stash block size. used when stashing blocks of values
224c1ff481SSatish Balay 
234c1ff481SSatish Balay   Output Parameters:
244c1ff481SSatish Balay   stash    - the newly created stash
259417f4adSLois Curfman McInnes */
264a2ae208SSatish Balay #undef __FUNCT__
274a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private"
28c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
299417f4adSLois Curfman McInnes {
30dfbe8321SBarry Smith   PetscErrorCode ierr;
31c1ac3661SBarry Smith   PetscInt       max,*opt,nopt;
32f1af5d2fSBarry Smith   PetscTruth     flg;
33bc5ccf88SSatish Balay 
343a40ed3dSBarry Smith   PetscFunctionBegin;
35bc5ccf88SSatish Balay   /* Require 2 tags,get the second using PetscCommGetNewTag() */
36752ec6e0SSatish Balay   stash->comm = comm;
37752ec6e0SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr);
38a2d1c673SSatish Balay   ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr);
39a2d1c673SSatish Balay   ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr);
40a2d1c673SSatish Balay   ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr);
41bc5ccf88SSatish Balay 
42434d7ff9SSatish Balay   nopt = stash->size;
43d7d82daaSBarry Smith   ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr);
44b0a32e0cSBarry Smith   ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr);
45434d7ff9SSatish Balay   if (flg) {
46434d7ff9SSatish Balay     if (nopt == 1)                max = opt[0];
47434d7ff9SSatish Balay     else if (nopt == stash->size) max = opt[stash->rank];
48434d7ff9SSatish Balay     else if (stash->rank < nopt)  max = opt[stash->rank];
49f4ab19daSSatish Balay     else                          max = 0; /* Use default */
50434d7ff9SSatish Balay     stash->umax = max;
51434d7ff9SSatish Balay   } else {
52434d7ff9SSatish Balay     stash->umax = 0;
53434d7ff9SSatish Balay   }
54606d414cSSatish Balay   ierr = PetscFree(opt);CHKERRQ(ierr);
554c1ff481SSatish Balay   if (bs <= 0) bs = 1;
56a2d1c673SSatish Balay 
574c1ff481SSatish Balay   stash->bs       = bs;
589417f4adSLois Curfman McInnes   stash->nmax     = 0;
59434d7ff9SSatish Balay   stash->oldnmax  = 0;
609417f4adSLois Curfman McInnes   stash->n        = 0;
614c1ff481SSatish Balay   stash->reallocs = -1;
6275cae7c1SHong Zhang   stash->space_head = 0;
6375cae7c1SHong Zhang   stash->space      = 0;
649417f4adSLois Curfman McInnes 
65bc5ccf88SSatish Balay   stash->send_waits  = 0;
66bc5ccf88SSatish Balay   stash->recv_waits  = 0;
67a2d1c673SSatish Balay   stash->send_status = 0;
68bc5ccf88SSatish Balay   stash->nsends      = 0;
69bc5ccf88SSatish Balay   stash->nrecvs      = 0;
70bc5ccf88SSatish Balay   stash->svalues     = 0;
71bc5ccf88SSatish Balay   stash->rvalues     = 0;
72563fb871SSatish Balay   stash->rindices    = 0;
73a2d1c673SSatish Balay   stash->nprocs      = 0;
74a2d1c673SSatish Balay   stash->nprocessed  = 0;
753a40ed3dSBarry Smith   PetscFunctionReturn(0);
769417f4adSLois Curfman McInnes }
779417f4adSLois Curfman McInnes 
784c1ff481SSatish Balay /*
798798bf22SSatish Balay    MatStashDestroy_Private - Destroy the stash
804c1ff481SSatish Balay */
814a2ae208SSatish Balay #undef __FUNCT__
824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private"
83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash)
849417f4adSLois Curfman McInnes {
85dfbe8321SBarry Smith   PetscErrorCode ierr;
86a2d1c673SSatish Balay 
87bc5ccf88SSatish Balay   PetscFunctionBegin;
8875cae7c1SHong Zhang   if (stash->space_head){
8975cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
9075cae7c1SHong Zhang     stash->space_head = 0;
9182740460SHong Zhang     stash->space      = 0;
9275cae7c1SHong Zhang   }
93bc5ccf88SSatish Balay   PetscFunctionReturn(0);
94bc5ccf88SSatish Balay }
95bc5ccf88SSatish Balay 
964c1ff481SSatish Balay /*
978798bf22SSatish Balay    MatStashScatterEnd_Private - This is called as the fial stage of
984c1ff481SSatish Balay    scatter. The final stages of messagepassing is done here, and
994c1ff481SSatish Balay    all the memory used for messagepassing is cleanedu up. This
1004c1ff481SSatish Balay    routine also resets the stash, and deallocates the memory used
1014c1ff481SSatish Balay    for the stash. It also keeps track of the current memory usage
1024c1ff481SSatish Balay    so that the same value can be used the next time through.
1034c1ff481SSatish Balay */
1044a2ae208SSatish Balay #undef __FUNCT__
1054a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private"
106dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
107bc5ccf88SSatish Balay {
1086849ba73SBarry Smith   PetscErrorCode ierr;
1095bd3b8fbSHong Zhang   PetscInt       nsends=stash->nsends,bs2,oldnmax;
110a2d1c673SSatish Balay   MPI_Status     *send_status;
111a2d1c673SSatish Balay 
1123a40ed3dSBarry Smith   PetscFunctionBegin;
113a2d1c673SSatish Balay   /* wait on sends */
114a2d1c673SSatish Balay   if (nsends) {
11582502324SSatish Balay     ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
116a2d1c673SSatish Balay     ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr);
117606d414cSSatish Balay     ierr = PetscFree(send_status);CHKERRQ(ierr);
118a2d1c673SSatish Balay   }
119a2d1c673SSatish Balay 
120c0c58ca7SSatish Balay   /* Now update nmaxold to be app 10% more than max n used, this way the
121434d7ff9SSatish Balay      wastage of space is reduced the next time this stash is used.
122434d7ff9SSatish Balay      Also update the oldmax, only if it increases */
123b9b97703SBarry Smith   if (stash->n) {
12494b769a5SSatish Balay     bs2      = stash->bs*stash->bs;
1258a9378f0SSatish Balay     oldnmax  = ((int)(stash->n * 1.1) + 5)*bs2;
126434d7ff9SSatish Balay     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
127b9b97703SBarry Smith   }
128434d7ff9SSatish Balay 
129d07ff455SSatish Balay   stash->nmax       = 0;
130d07ff455SSatish Balay   stash->n          = 0;
1314c1ff481SSatish Balay   stash->reallocs   = -1;
132a2d1c673SSatish Balay   stash->nprocessed = 0;
13375cae7c1SHong Zhang   if (stash->space_head){
13475cae7c1SHong Zhang     ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr);
13575cae7c1SHong Zhang     stash->space_head = 0;
13682740460SHong Zhang     stash->space      = 0;
13775cae7c1SHong Zhang   }
138606d414cSSatish Balay   ierr = PetscFree(stash->send_waits);CHKERRQ(ierr);
139606d414cSSatish Balay   stash->send_waits = 0;
140606d414cSSatish Balay   ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr);
141606d414cSSatish Balay   stash->recv_waits = 0;
142606d414cSSatish Balay   ierr = PetscFree(stash->svalues);CHKERRQ(ierr);
143606d414cSSatish Balay   stash->svalues = 0;
144606d414cSSatish Balay   ierr = PetscFree(stash->rvalues);CHKERRQ(ierr);
145606d414cSSatish Balay   stash->rvalues = 0;
146563fb871SSatish Balay   ierr = PetscFree(stash->rindices);CHKERRQ(ierr);
147563fb871SSatish Balay   stash->rindices = 0;
148b22afee1SSatish Balay   ierr = PetscFree(stash->nprocs);CHKERRQ(ierr);
149606d414cSSatish Balay   stash->nprocs = 0;
1503a40ed3dSBarry Smith   PetscFunctionReturn(0);
1519417f4adSLois Curfman McInnes }
1529417f4adSLois Curfman McInnes 
1534c1ff481SSatish Balay /*
1548798bf22SSatish Balay    MatStashGetInfo_Private - Gets the relavant statistics of the stash
1554c1ff481SSatish Balay 
1564c1ff481SSatish Balay    Input Parameters:
1574c1ff481SSatish Balay    stash    - the stash
15894b769a5SSatish Balay    nstash   - the size of the stash. Indicates the number of values stored.
1594c1ff481SSatish Balay    reallocs - the number of additional mallocs incurred.
1604c1ff481SSatish Balay 
1614c1ff481SSatish Balay */
1624a2ae208SSatish Balay #undef __FUNCT__
1634a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private"
164c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
16597530c3fSBarry Smith {
166c1ac3661SBarry Smith   PetscInt bs2 = stash->bs*stash->bs;
16794b769a5SSatish Balay 
1683a40ed3dSBarry Smith   PetscFunctionBegin;
1691ecfd215SBarry Smith   if (nstash) *nstash   = stash->n*bs2;
1701ecfd215SBarry Smith   if (reallocs) {
171434d7ff9SSatish Balay     if (stash->reallocs < 0) *reallocs = 0;
172434d7ff9SSatish Balay     else                     *reallocs = stash->reallocs;
1731ecfd215SBarry Smith   }
174bc5ccf88SSatish Balay   PetscFunctionReturn(0);
175bc5ccf88SSatish Balay }
1764c1ff481SSatish Balay 
1774c1ff481SSatish Balay /*
1788798bf22SSatish Balay    MatStashSetInitialSize_Private - Sets the initial size of the stash
1794c1ff481SSatish Balay 
1804c1ff481SSatish Balay    Input Parameters:
1814c1ff481SSatish Balay    stash  - the stash
1824c1ff481SSatish Balay    max    - the value that is used as the max size of the stash.
1834c1ff481SSatish Balay             this value is used while allocating memory.
1844c1ff481SSatish Balay */
1854a2ae208SSatish Balay #undef __FUNCT__
1864a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private"
187c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
188bc5ccf88SSatish Balay {
189bc5ccf88SSatish Balay   PetscFunctionBegin;
190434d7ff9SSatish Balay   stash->umax = max;
1913a40ed3dSBarry Smith   PetscFunctionReturn(0);
19297530c3fSBarry Smith }
19397530c3fSBarry Smith 
1948798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called
1954c1ff481SSatish Balay    when the space in the stash is not sufficient to add the new values
1964c1ff481SSatish Balay    being inserted into the stash.
1974c1ff481SSatish Balay 
1984c1ff481SSatish Balay    Input Parameters:
1994c1ff481SSatish Balay    stash - the stash
2004c1ff481SSatish Balay    incr  - the minimum increase requested
2014c1ff481SSatish Balay 
2024c1ff481SSatish Balay    Notes:
2034c1ff481SSatish Balay    This routine doubles the currently used memory.
2044c1ff481SSatish Balay  */
2054a2ae208SSatish Balay #undef __FUNCT__
2064a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private"
207c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
2089417f4adSLois Curfman McInnes {
2096849ba73SBarry Smith   PetscErrorCode ierr;
2105bd3b8fbSHong Zhang   PetscInt       newnmax,bs2= stash->bs*stash->bs;
2119417f4adSLois Curfman McInnes 
2123a40ed3dSBarry Smith   PetscFunctionBegin;
2139417f4adSLois Curfman McInnes   /* allocate a larger stash */
214c481ceb5SSatish Balay   if (!stash->oldnmax && !stash->nmax) { /* new stash */
215434d7ff9SSatish Balay     if (stash->umax)                  newnmax = stash->umax/bs2;
216434d7ff9SSatish Balay     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
217c481ceb5SSatish Balay   } else if (!stash->nmax) { /* resuing stash */
218434d7ff9SSatish Balay     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
219434d7ff9SSatish Balay     else                              newnmax = stash->oldnmax/bs2;
220434d7ff9SSatish Balay   } else                              newnmax = stash->nmax*2;
2214c1ff481SSatish Balay   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;
222d07ff455SSatish Balay 
22375cae7c1SHong Zhang   /* Get a MatStashSpace and attach it to stash */
22475cae7c1SHong Zhang   ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr);
225*b087b6d6SSatish Balay   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
226*b087b6d6SSatish Balay     stash->space_head = stash->space;
22775cae7c1SHong Zhang   }
228*b087b6d6SSatish Balay 
229bc5ccf88SSatish Balay   stash->reallocs++;
23075cae7c1SHong Zhang   stash->nmax = newnmax;
231bc5ccf88SSatish Balay   PetscFunctionReturn(0);
232bc5ccf88SSatish Balay }
233bc5ccf88SSatish Balay /*
2348798bf22SSatish Balay   MatStashValuesRow_Private - inserts values into the stash. This function
2354c1ff481SSatish Balay   expects the values to be roworiented. Multiple columns belong to the same row
2364c1ff481SSatish Balay   can be inserted with a single call to this function.
2374c1ff481SSatish Balay 
2384c1ff481SSatish Balay   Input Parameters:
2394c1ff481SSatish Balay   stash  - the stash
2404c1ff481SSatish Balay   row    - the global row correspoiding to the values
2414c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
2424c1ff481SSatish Balay   idxn   - the global column indices corresponding to each of the values.
2434c1ff481SSatish Balay   values - the values inserted
244bc5ccf88SSatish Balay */
2454a2ae208SSatish Balay #undef __FUNCT__
2464a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private"
247c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[])
248bc5ccf88SSatish Balay {
249dfbe8321SBarry Smith   PetscErrorCode     ierr;
25075cae7c1SHong Zhang   PetscInt           i,k;
25175cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
252bc5ccf88SSatish Balay 
253bc5ccf88SSatish Balay   PetscFunctionBegin;
2544c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
25575cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2568798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2579417f4adSLois Curfman McInnes   }
25875cae7c1SHong Zhang   space = stash->space;
25975cae7c1SHong Zhang   k     = space->local_used;
2604c1ff481SSatish Balay   for (i=0; i<n; i++) {
26175cae7c1SHong Zhang     space->idx[k] = row;
26275cae7c1SHong Zhang     space->idy[k] = idxn[i];
26375cae7c1SHong Zhang     space->val[k] = values[i];
26475cae7c1SHong Zhang     k++;
2659417f4adSLois Curfman McInnes   }
2665bd3b8fbSHong Zhang   stash->n               += n;
26775cae7c1SHong Zhang   space->local_used      += n;
26875cae7c1SHong Zhang   space->local_remaining -= n;
269a2d1c673SSatish Balay   PetscFunctionReturn(0);
270a2d1c673SSatish Balay }
27175cae7c1SHong Zhang 
2724c1ff481SSatish Balay /*
2738798bf22SSatish Balay   MatStashValuesCol_Private - inserts values into the stash. This function
2744c1ff481SSatish Balay   expects the values to be columnoriented. Multiple columns belong to the same row
2754c1ff481SSatish Balay   can be inserted with a single call to this function.
276a2d1c673SSatish Balay 
2774c1ff481SSatish Balay   Input Parameters:
2784c1ff481SSatish Balay   stash   - the stash
2794c1ff481SSatish Balay   row     - the global row correspoiding to the values
2804c1ff481SSatish Balay   n       - the number of elements inserted. All elements belong to the above row.
2814c1ff481SSatish Balay   idxn    - the global column indices corresponding to each of the values.
2824c1ff481SSatish Balay   values  - the values inserted
2834c1ff481SSatish Balay   stepval - the consecutive values are sepated by a distance of stepval.
2844c1ff481SSatish Balay             this happens because the input is columnoriented.
2854c1ff481SSatish Balay */
2864a2ae208SSatish Balay #undef __FUNCT__
2874a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private"
288c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval)
289a2d1c673SSatish Balay {
290dfbe8321SBarry Smith   PetscErrorCode     ierr;
29175cae7c1SHong Zhang   PetscInt           i,k;
29275cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
293a2d1c673SSatish Balay 
2944c1ff481SSatish Balay   PetscFunctionBegin;
2954c1ff481SSatish Balay   /* Check and see if we have sufficient memory */
29675cae7c1SHong Zhang   if (!space || space->local_remaining < n){
2978798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
2984c1ff481SSatish Balay   }
29975cae7c1SHong Zhang   space = stash->space;
30075cae7c1SHong Zhang   k = space->local_used;
3014c1ff481SSatish Balay   for (i=0; i<n; i++) {
30275cae7c1SHong Zhang     space->idx[k] = row;
30375cae7c1SHong Zhang     space->idy[k] = idxn[i];
30475cae7c1SHong Zhang     space->val[k] = values[i*stepval];
30575cae7c1SHong Zhang     k++;
3064c1ff481SSatish Balay   }
3075bd3b8fbSHong Zhang   stash->n               += n;
30875cae7c1SHong Zhang   space->local_used      += n;
30975cae7c1SHong Zhang   space->local_remaining -= n;
3104c1ff481SSatish Balay   PetscFunctionReturn(0);
3114c1ff481SSatish Balay }
3124c1ff481SSatish Balay 
3134c1ff481SSatish Balay /*
3148798bf22SSatish Balay   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
3154c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3164c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3174c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3184c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3194c1ff481SSatish Balay 
3204c1ff481SSatish Balay   Input Parameters:
3214c1ff481SSatish Balay   stash  - the stash
3224c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3234c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3244c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3254c1ff481SSatish Balay            values. Each block is of size bs*bs.
3264c1ff481SSatish Balay   values - the values inserted
3274c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3284c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3294c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3304c1ff481SSatish Balay */
3314a2ae208SSatish Balay #undef __FUNCT__
3324a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private"
333c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3344c1ff481SSatish Balay {
335dfbe8321SBarry Smith   PetscErrorCode     ierr;
33675cae7c1SHong Zhang   PetscInt           i,j,k,bs2,bs=stash->bs,l;
337f15d580aSBarry Smith   const MatScalar    *vals;
338f15d580aSBarry Smith   MatScalar          *array;
33975cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
340a2d1c673SSatish Balay 
341a2d1c673SSatish Balay   PetscFunctionBegin;
34275cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3438798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
344a2d1c673SSatish Balay   }
34575cae7c1SHong Zhang   space = stash->space;
34675cae7c1SHong Zhang   l     = space->local_used;
34775cae7c1SHong Zhang   bs2   = bs*bs;
3484c1ff481SSatish Balay   for (i=0; i<n; i++) {
34975cae7c1SHong Zhang     space->idx[l] = row;
35075cae7c1SHong Zhang     space->idy[l] = idxn[i];
35175cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
35275cae7c1SHong Zhang        This enables inserting multiple blocks belonging to a row with a single
35375cae7c1SHong Zhang        funtion call */
35475cae7c1SHong Zhang     array = space->val + bs2*l;
35575cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
35675cae7c1SHong Zhang     for (j=0; j<bs; j++) {
35775cae7c1SHong Zhang       for (k=0; k<bs; k++) array[k*bs] = vals[k];
35875cae7c1SHong Zhang       array++;
35975cae7c1SHong Zhang       vals  += cmax*bs;
36075cae7c1SHong Zhang     }
36175cae7c1SHong Zhang     l++;
362a2d1c673SSatish Balay   }
3635bd3b8fbSHong Zhang   stash->n               += n;
36475cae7c1SHong Zhang   space->local_used      += n;
36575cae7c1SHong Zhang   space->local_remaining -= n;
3664c1ff481SSatish Balay   PetscFunctionReturn(0);
3674c1ff481SSatish Balay }
3684c1ff481SSatish Balay 
3694c1ff481SSatish Balay /*
3708798bf22SSatish Balay   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
3714c1ff481SSatish Balay   This function expects the values to be roworiented. Multiple columns belong
3724c1ff481SSatish Balay   to the same block-row can be inserted with a single call to this function.
3734c1ff481SSatish Balay   This function extracts the sub-block of values based on the dimensions of
3744c1ff481SSatish Balay   the original input block, and the row,col values corresponding to the blocks.
3754c1ff481SSatish Balay 
3764c1ff481SSatish Balay   Input Parameters:
3774c1ff481SSatish Balay   stash  - the stash
3784c1ff481SSatish Balay   row    - the global block-row correspoiding to the values
3794c1ff481SSatish Balay   n      - the number of elements inserted. All elements belong to the above row.
3804c1ff481SSatish Balay   idxn   - the global block-column indices corresponding to each of the blocks of
3814c1ff481SSatish Balay            values. Each block is of size bs*bs.
3824c1ff481SSatish Balay   values - the values inserted
3834c1ff481SSatish Balay   rmax   - the number of block-rows in the original block.
3844c1ff481SSatish Balay   cmax   - the number of block-columsn on the original block.
3854c1ff481SSatish Balay   idx    - the index of the current block-row in the original block.
3864c1ff481SSatish Balay */
3874a2ae208SSatish Balay #undef __FUNCT__
3884a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private"
389c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
3904c1ff481SSatish Balay {
391dfbe8321SBarry Smith   PetscErrorCode  ierr;
39275cae7c1SHong Zhang   PetscInt        i,j,k,bs2,bs=stash->bs,l;
393f15d580aSBarry Smith   const MatScalar *vals;
394f15d580aSBarry Smith   MatScalar       *array;
39575cae7c1SHong Zhang   PetscMatStashSpace space=stash->space;
3964c1ff481SSatish Balay 
3974c1ff481SSatish Balay   PetscFunctionBegin;
39875cae7c1SHong Zhang   if (!space || space->local_remaining < n){
3998798bf22SSatish Balay     ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr);
4004c1ff481SSatish Balay   }
40175cae7c1SHong Zhang   space = stash->space;
40275cae7c1SHong Zhang   l     = space->local_used;
40375cae7c1SHong Zhang   bs2   = bs*bs;
4044c1ff481SSatish Balay   for (i=0; i<n; i++) {
40575cae7c1SHong Zhang     space->idx[l] = row;
40675cae7c1SHong Zhang     space->idy[l] = idxn[i];
40775cae7c1SHong Zhang     /* Now copy over the block of values. Store the values column oriented.
40875cae7c1SHong Zhang      This enables inserting multiple blocks belonging to a row with a single
40975cae7c1SHong Zhang      funtion call */
41075cae7c1SHong Zhang     array = space->val + bs2*l;
41175cae7c1SHong Zhang     vals  = values + idx*bs2*n + bs*i;
41275cae7c1SHong Zhang     for (j=0; j<bs; j++) {
41375cae7c1SHong Zhang       for (k=0; k<bs; k++) {array[k] = vals[k];}
41475cae7c1SHong Zhang       array += bs;
41575cae7c1SHong Zhang       vals  += rmax*bs;
41675cae7c1SHong Zhang     }
4175bd3b8fbSHong Zhang     l++;
418a2d1c673SSatish Balay   }
4195bd3b8fbSHong Zhang   stash->n               += n;
42075cae7c1SHong Zhang   space->local_used      += n;
42175cae7c1SHong Zhang   space->local_remaining -= n;
4223a40ed3dSBarry Smith   PetscFunctionReturn(0);
4239417f4adSLois Curfman McInnes }
4244c1ff481SSatish Balay /*
4258798bf22SSatish Balay   MatStashScatterBegin_Private - Initiates the transfer of values to the
4264c1ff481SSatish Balay   correct owners. This function goes through the stash, and check the
4274c1ff481SSatish Balay   owners of each stashed value, and sends the values off to the owner
4284c1ff481SSatish Balay   processors.
429bc5ccf88SSatish Balay 
4304c1ff481SSatish Balay   Input Parameters:
4314c1ff481SSatish Balay   stash  - the stash
4324c1ff481SSatish Balay   owners - an array of size 'no-of-procs' which gives the ownership range
4334c1ff481SSatish Balay            for each node.
4344c1ff481SSatish Balay 
4354c1ff481SSatish Balay   Notes: The 'owners' array in the cased of the blocked-stash has the
4364c1ff481SSatish Balay   ranges specified blocked global indices, and for the regular stash in
4374c1ff481SSatish Balay   the proper global indices.
4384c1ff481SSatish Balay */
4394a2ae208SSatish Balay #undef __FUNCT__
4404a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private"
441c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners)
442bc5ccf88SSatish Balay {
443c1ac3661SBarry Smith   PetscInt       *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
444fe09c992SBarry Smith   PetscInt       size=stash->size,nsends;
4456849ba73SBarry Smith   PetscErrorCode ierr;
44675cae7c1SHong Zhang   PetscInt       count,*sindices,**rindices,i,j,idx,lastidx,l;
447563fb871SSatish Balay   MatScalar      **rvalues,*svalues;
448bc5ccf88SSatish Balay   MPI_Comm       comm = stash->comm;
449563fb871SSatish Balay   MPI_Request    *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
450fe09c992SBarry Smith   PetscMPIInt    *nprocs,*nlengths,nreceives;
4515bd3b8fbSHong Zhang   PetscInt       *sp_idx,*sp_idy;
4525bd3b8fbSHong Zhang   MatScalar      *sp_val;
4535bd3b8fbSHong Zhang   PetscMatStashSpace space,space_next;
454bc5ccf88SSatish Balay 
455bc5ccf88SSatish Balay   PetscFunctionBegin;
4564c1ff481SSatish Balay   bs2 = stash->bs*stash->bs;
45775cae7c1SHong Zhang 
458bc5ccf88SSatish Balay   /*  first count number of contributors to each processor */
459fe09c992SBarry Smith   ierr  = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr);
460fe09c992SBarry Smith   ierr  = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr);
461c1ac3661SBarry Smith   ierr  = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr);
462a2d1c673SSatish Balay 
463563fb871SSatish Balay   nlengths = nprocs+size;
46475cae7c1SHong Zhang   i = j    = 0;
4657357eb19SBarry Smith   lastidx  = -1;
4665bd3b8fbSHong Zhang   space    = stash->space_head;
46775cae7c1SHong Zhang   while (space != PETSC_NULL){
46875cae7c1SHong Zhang     space_next = space->next;
4695bd3b8fbSHong Zhang     sp_idx     = space->idx;
47075cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
4717357eb19SBarry Smith       /* if indices are NOT locally sorted, need to start search at the beginning */
4725bd3b8fbSHong Zhang       if (lastidx > (idx = sp_idx[l])) j = 0;
4737357eb19SBarry Smith       lastidx = idx;
4747357eb19SBarry Smith       for (; j<size; j++) {
4754c1ff481SSatish Balay         if (idx >= owners[j] && idx < owners[j+1]) {
476563fb871SSatish Balay           nlengths[j]++; owner[i] = j; break;
477bc5ccf88SSatish Balay         }
478bc5ccf88SSatish Balay       }
47975cae7c1SHong Zhang       i++;
48075cae7c1SHong Zhang     }
48175cae7c1SHong Zhang     space = space_next;
482bc5ccf88SSatish Balay   }
483563fb871SSatish Balay   /* Now check what procs get messages - and compute nsends. */
484563fb871SSatish Balay   for (i=0, nsends=0 ; i<size; i++) {
485563fb871SSatish Balay     if (nlengths[i]) { nprocs[i] = 1; nsends ++;}
486563fb871SSatish Balay   }
487bc5ccf88SSatish Balay 
488563fb871SSatish Balay   { int  *onodes,*olengths;
489563fb871SSatish Balay   /* Determine the number of messages to expect, their lengths, from from-ids */
490563fb871SSatish Balay   ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr);
491563fb871SSatish Balay   ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr);
492563fb871SSatish Balay   /* since clubbing row,col - lengths are multiplied by 2 */
493563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] *=2;
494563fb871SSatish Balay   ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr);
495563fb871SSatish Balay   /* values are size 'bs2' lengths (and remove earlier factor 2 */
496563fb871SSatish Balay   for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
497563fb871SSatish Balay   ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr);
498563fb871SSatish Balay   ierr = PetscFree(onodes);CHKERRQ(ierr);
499563fb871SSatish Balay   ierr = PetscFree(olengths);CHKERRQ(ierr);
500bc5ccf88SSatish Balay   }
501bc5ccf88SSatish Balay 
502bc5ccf88SSatish Balay   /* do sends:
503bc5ccf88SSatish Balay       1) starts[i] gives the starting index in svalues for stuff going to
504bc5ccf88SSatish Balay          the ith processor
505bc5ccf88SSatish Balay   */
506c1ac3661SBarry Smith   ierr     = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr);
507c1ac3661SBarry Smith   sindices = (PetscInt*)(svalues + bs2*stash->n);
508b0a32e0cSBarry Smith   ierr     = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
509c1ac3661SBarry Smith   ierr     = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr);
510bc5ccf88SSatish Balay   starti   = startv + size;
511a2d1c673SSatish Balay   /* use 2 sends the first with all_a, the next with all_i and all_j */
512bc5ccf88SSatish Balay   startv[0]  = 0; starti[0] = 0;
513bc5ccf88SSatish Balay   for (i=1; i<size; i++) {
514563fb871SSatish Balay     startv[i] = startv[i-1] + nlengths[i-1];
515563fb871SSatish Balay     starti[i] = starti[i-1] + nlengths[i-1]*2;
516bc5ccf88SSatish Balay   }
51775cae7c1SHong Zhang 
51875cae7c1SHong Zhang   i     = 0;
5195bd3b8fbSHong Zhang   space = stash->space_head;
52075cae7c1SHong Zhang   while (space != PETSC_NULL){
52175cae7c1SHong Zhang     space_next = space->next;
5225bd3b8fbSHong Zhang     sp_idx = space->idx;
5235bd3b8fbSHong Zhang     sp_idy = space->idy;
5245bd3b8fbSHong Zhang     sp_val = space->val;
52575cae7c1SHong Zhang     for (l=0; l<space->local_used; l++){
526bc5ccf88SSatish Balay       j = owner[i];
527a2d1c673SSatish Balay       if (bs2 == 1) {
5285bd3b8fbSHong Zhang         svalues[startv[j]] = sp_val[l];
529a2d1c673SSatish Balay       } else {
530c1ac3661SBarry Smith         PetscInt  k;
5313eda8832SBarry Smith         MatScalar *buf1,*buf2;
5324c1ff481SSatish Balay         buf1 = svalues+bs2*startv[j];
533*b087b6d6SSatish Balay         buf2 = space->val + bs2*l;
5344c1ff481SSatish Balay         for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; }
535a2d1c673SSatish Balay       }
5365bd3b8fbSHong Zhang       sindices[starti[j]]             = sp_idx[l];
5375bd3b8fbSHong Zhang       sindices[starti[j]+nlengths[j]] = sp_idy[l];
538bc5ccf88SSatish Balay       startv[j]++;
539bc5ccf88SSatish Balay       starti[j]++;
54075cae7c1SHong Zhang       i++;
54175cae7c1SHong Zhang     }
54275cae7c1SHong Zhang     space = space_next;
543bc5ccf88SSatish Balay   }
544bc5ccf88SSatish Balay   startv[0] = 0;
545563fb871SSatish Balay   for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];}
546e5d0e772SSatish Balay 
547bc5ccf88SSatish Balay   for (i=0,count=0; i<size; i++) {
548563fb871SSatish Balay     if (nprocs[i]) {
549563fb871SSatish Balay       ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr);
550563fb871SSatish Balay       ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr);
551bc5ccf88SSatish Balay     }
552b85c94c3SSatish Balay   }
5536cf91177SBarry Smith #if defined(PETSC_USE_INFO)
554ae15b995SBarry Smith   ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr);
555e5d0e772SSatish Balay   for (i=0; i<size; i++) {
556e5d0e772SSatish Balay     if (nprocs[i]) {
557ae15b995SBarry Smith       ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr);
558e5d0e772SSatish Balay     }
559e5d0e772SSatish Balay   }
560e5d0e772SSatish Balay #endif
561606d414cSSatish Balay   ierr = PetscFree(owner);CHKERRQ(ierr);
562606d414cSSatish Balay   ierr = PetscFree(startv);CHKERRQ(ierr);
563a2d1c673SSatish Balay   /* This memory is reused in scatter end  for a different purpose*/
564a2d1c673SSatish Balay   for (i=0; i<2*size; i++) nprocs[i] = -1;
565a2d1c673SSatish Balay   stash->nprocs = nprocs;
566a2d1c673SSatish Balay 
567563fb871SSatish Balay   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
568563fb871SSatish Balay   ierr  = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
569563fb871SSatish Balay 
570563fb871SSatish Balay   for (i=0; i<nreceives; i++) {
571563fb871SSatish Balay     recv_waits[2*i]   = recv_waits1[i];
572563fb871SSatish Balay     recv_waits[2*i+1] = recv_waits2[i];
573563fb871SSatish Balay   }
574563fb871SSatish Balay   stash->recv_waits = recv_waits;
575563fb871SSatish Balay   ierr = PetscFree(recv_waits1);CHKERRQ(ierr);
576563fb871SSatish Balay   ierr = PetscFree(recv_waits2);CHKERRQ(ierr);
577563fb871SSatish Balay 
578bc5ccf88SSatish Balay   stash->svalues    = svalues;    stash->rvalues     = rvalues;
579563fb871SSatish Balay   stash->rindices   = rindices;   stash->send_waits  = send_waits;
580bc5ccf88SSatish Balay   stash->nsends     = nsends;     stash->nrecvs      = nreceives;
581bc5ccf88SSatish Balay   PetscFunctionReturn(0);
582bc5ccf88SSatish Balay }
583bc5ccf88SSatish Balay 
584a2d1c673SSatish Balay /*
5858798bf22SSatish Balay    MatStashScatterGetMesg_Private - This function waits on the receives posted
5868798bf22SSatish Balay    in the function MatStashScatterBegin_Private() and returns one message at
5874c1ff481SSatish Balay    a time to the calling function. If no messages are left, it indicates this
5884c1ff481SSatish Balay    by setting flg = 0, else it sets flg = 1.
5894c1ff481SSatish Balay 
5904c1ff481SSatish Balay    Input Parameters:
5914c1ff481SSatish Balay    stash - the stash
5924c1ff481SSatish Balay 
5934c1ff481SSatish Balay    Output Parameters:
5944c1ff481SSatish Balay    nvals - the number of entries in the current message.
5954c1ff481SSatish Balay    rows  - an array of row indices (or blocked indices) corresponding to the values
5964c1ff481SSatish Balay    cols  - an array of columnindices (or blocked indices) corresponding to the values
5974c1ff481SSatish Balay    vals  - the values
5984c1ff481SSatish Balay    flg   - 0 indicates no more message left, and the current call has no values associated.
5994c1ff481SSatish Balay            1 indicates that the current call successfully received a message, and the
6004c1ff481SSatish Balay              other output parameters nvals,rows,cols,vals are set appropriately.
601a2d1c673SSatish Balay */
6024a2ae208SSatish Balay #undef __FUNCT__
6034a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private"
604c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg)
605bc5ccf88SSatish Balay {
6066849ba73SBarry Smith   PetscErrorCode ierr;
607fe09c992SBarry Smith   PetscMPIInt    i,*flg_v,i1,i2;
608fe09c992SBarry Smith   PetscInt       bs2;
609a2d1c673SSatish Balay   MPI_Status     recv_status;
610b0a32e0cSBarry Smith   PetscTruth     match_found = PETSC_FALSE;
611bc5ccf88SSatish Balay 
612bc5ccf88SSatish Balay   PetscFunctionBegin;
613bc5ccf88SSatish Balay 
614a2d1c673SSatish Balay   *flg = 0; /* When a message is discovered this is reset to 1 */
615a2d1c673SSatish Balay   /* Return if no more messages to process */
616a2d1c673SSatish Balay   if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); }
617a2d1c673SSatish Balay 
618a2d1c673SSatish Balay   flg_v = stash->nprocs;
6194c1ff481SSatish Balay   bs2   = stash->bs*stash->bs;
620a2d1c673SSatish Balay   /* If a matching pair of receieves are found, process them, and return the data to
621a2d1c673SSatish Balay      the calling function. Until then keep receiving messages */
622a2d1c673SSatish Balay   while (!match_found) {
623a2d1c673SSatish Balay     ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr);
624a2d1c673SSatish Balay     /* Now pack the received message into a structure which is useable by others */
625a2d1c673SSatish Balay     if (i % 2) {
6263eda8832SBarry Smith       ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr);
627c1dc657dSBarry Smith       flg_v[2*recv_status.MPI_SOURCE] = i/2;
628a2d1c673SSatish Balay       *nvals = *nvals/bs2;
629563fb871SSatish Balay     } else {
630563fb871SSatish Balay       ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr);
631563fb871SSatish Balay       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
632563fb871SSatish Balay       *nvals = *nvals/2; /* This message has both row indices and col indices */
633bc5ccf88SSatish Balay     }
634a2d1c673SSatish Balay 
635a2d1c673SSatish Balay     /* Check if we have both the messages from this proc */
636c1dc657dSBarry Smith     i1 = flg_v[2*recv_status.MPI_SOURCE];
637c1dc657dSBarry Smith     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
638a2d1c673SSatish Balay     if (i1 != -1 && i2 != -1) {
639563fb871SSatish Balay       *rows       = stash->rindices[i2];
640a2d1c673SSatish Balay       *cols       = *rows + *nvals;
641563fb871SSatish Balay       *vals       = stash->rvalues[i1];
642a2d1c673SSatish Balay       *flg        = 1;
643a2d1c673SSatish Balay       stash->nprocessed ++;
64435d8aa7fSBarry Smith       match_found = PETSC_TRUE;
645bc5ccf88SSatish Balay     }
646bc5ccf88SSatish Balay   }
647bc5ccf88SSatish Balay   PetscFunctionReturn(0);
648bc5ccf88SSatish Balay }
649