1be1d678aSKris Buschelman #define PETSCMAT_DLL 22d5177cdSBarry Smith 370f55243SBarry Smith #include "src/mat/matimpl.h" 475cae7c1SHong Zhang #include "src/mat/utils/matstashspace.h" 55bd3b8fbSHong Zhang 63eda8832SBarry Smith /* 70ae3cd3bSBarry Smith The input to the stash is ALWAYS in MatScalar precision, and the 80ae3cd3bSBarry Smith internal storage and output is also in MatScalar. 93eda8832SBarry Smith */ 10bc5ccf88SSatish Balay #define DEFAULT_STASH_SIZE 10000 114c1ff481SSatish Balay 129417f4adSLois Curfman McInnes /* 138798bf22SSatish Balay MatStashCreate_Private - Creates a stash,currently used for all the parallel 144c1ff481SSatish Balay matrix implementations. The stash is where elements of a matrix destined 154c1ff481SSatish Balay to be stored on other processors are kept until matrix assembly is done. 169417f4adSLois Curfman McInnes 174c1ff481SSatish Balay This is a simple minded stash. Simply adds entries to end of stash. 184c1ff481SSatish Balay 194c1ff481SSatish Balay Input Parameters: 204c1ff481SSatish Balay comm - communicator, required for scatters. 214c1ff481SSatish Balay bs - stash block size. used when stashing blocks of values 224c1ff481SSatish Balay 234c1ff481SSatish Balay Output Parameters: 244c1ff481SSatish Balay stash - the newly created stash 259417f4adSLois Curfman McInnes */ 264a2ae208SSatish Balay #undef __FUNCT__ 274a2ae208SSatish Balay #define __FUNCT__ "MatStashCreate_Private" 28c1ac3661SBarry Smith PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 299417f4adSLois Curfman McInnes { 30dfbe8321SBarry Smith PetscErrorCode ierr; 31c1ac3661SBarry Smith PetscInt max,*opt,nopt; 32f1af5d2fSBarry Smith PetscTruth flg; 33bc5ccf88SSatish Balay 343a40ed3dSBarry Smith PetscFunctionBegin; 35bc5ccf88SSatish Balay /* Require 2 tags,get the second using PetscCommGetNewTag() */ 36752ec6e0SSatish Balay stash->comm = comm; 37752ec6e0SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag1);CHKERRQ(ierr); 38a2d1c673SSatish Balay ierr = PetscCommGetNewTag(stash->comm,&stash->tag2);CHKERRQ(ierr); 39a2d1c673SSatish Balay ierr = MPI_Comm_size(stash->comm,&stash->size);CHKERRQ(ierr); 40a2d1c673SSatish Balay ierr = MPI_Comm_rank(stash->comm,&stash->rank);CHKERRQ(ierr); 41bc5ccf88SSatish Balay 42434d7ff9SSatish Balay nopt = stash->size; 43d7d82daaSBarry Smith ierr = PetscMalloc(nopt*sizeof(PetscInt),&opt);CHKERRQ(ierr); 44b0a32e0cSBarry Smith ierr = PetscOptionsGetIntArray(PETSC_NULL,"-matstash_initial_size",opt,&nopt,&flg);CHKERRQ(ierr); 45434d7ff9SSatish Balay if (flg) { 46434d7ff9SSatish Balay if (nopt == 1) max = opt[0]; 47434d7ff9SSatish Balay else if (nopt == stash->size) max = opt[stash->rank]; 48434d7ff9SSatish Balay else if (stash->rank < nopt) max = opt[stash->rank]; 49f4ab19daSSatish Balay else max = 0; /* Use default */ 50434d7ff9SSatish Balay stash->umax = max; 51434d7ff9SSatish Balay } else { 52434d7ff9SSatish Balay stash->umax = 0; 53434d7ff9SSatish Balay } 54606d414cSSatish Balay ierr = PetscFree(opt);CHKERRQ(ierr); 554c1ff481SSatish Balay if (bs <= 0) bs = 1; 56a2d1c673SSatish Balay 574c1ff481SSatish Balay stash->bs = bs; 589417f4adSLois Curfman McInnes stash->nmax = 0; 59434d7ff9SSatish Balay stash->oldnmax = 0; 609417f4adSLois Curfman McInnes stash->n = 0; 614c1ff481SSatish Balay stash->reallocs = -1; 6275cae7c1SHong Zhang stash->space_head = 0; 6375cae7c1SHong Zhang stash->space = 0; 649417f4adSLois Curfman McInnes 65bc5ccf88SSatish Balay stash->send_waits = 0; 66bc5ccf88SSatish Balay stash->recv_waits = 0; 67a2d1c673SSatish Balay stash->send_status = 0; 68bc5ccf88SSatish Balay stash->nsends = 0; 69bc5ccf88SSatish Balay stash->nrecvs = 0; 70bc5ccf88SSatish Balay stash->svalues = 0; 71bc5ccf88SSatish Balay stash->rvalues = 0; 72563fb871SSatish Balay stash->rindices = 0; 73a2d1c673SSatish Balay stash->nprocs = 0; 74a2d1c673SSatish Balay stash->nprocessed = 0; 753a40ed3dSBarry Smith PetscFunctionReturn(0); 769417f4adSLois Curfman McInnes } 779417f4adSLois Curfman McInnes 784c1ff481SSatish Balay /* 798798bf22SSatish Balay MatStashDestroy_Private - Destroy the stash 804c1ff481SSatish Balay */ 814a2ae208SSatish Balay #undef __FUNCT__ 824a2ae208SSatish Balay #define __FUNCT__ "MatStashDestroy_Private" 83dfbe8321SBarry Smith PetscErrorCode MatStashDestroy_Private(MatStash *stash) 849417f4adSLois Curfman McInnes { 85dfbe8321SBarry Smith PetscErrorCode ierr; 86a2d1c673SSatish Balay 87bc5ccf88SSatish Balay PetscFunctionBegin; 8875cae7c1SHong Zhang if (stash->space_head){ 8975cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 9075cae7c1SHong Zhang stash->space_head = 0; 9182740460SHong Zhang stash->space = 0; 9275cae7c1SHong Zhang } 93bc5ccf88SSatish Balay PetscFunctionReturn(0); 94bc5ccf88SSatish Balay } 95bc5ccf88SSatish Balay 964c1ff481SSatish Balay /* 978798bf22SSatish Balay MatStashScatterEnd_Private - This is called as the fial stage of 984c1ff481SSatish Balay scatter. The final stages of messagepassing is done here, and 994c1ff481SSatish Balay all the memory used for messagepassing is cleanedu up. This 1004c1ff481SSatish Balay routine also resets the stash, and deallocates the memory used 1014c1ff481SSatish Balay for the stash. It also keeps track of the current memory usage 1024c1ff481SSatish Balay so that the same value can be used the next time through. 1034c1ff481SSatish Balay */ 1044a2ae208SSatish Balay #undef __FUNCT__ 1054a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterEnd_Private" 106dfbe8321SBarry Smith PetscErrorCode MatStashScatterEnd_Private(MatStash *stash) 107bc5ccf88SSatish Balay { 1086849ba73SBarry Smith PetscErrorCode ierr; 1095bd3b8fbSHong Zhang PetscInt nsends=stash->nsends,bs2,oldnmax; 110a2d1c673SSatish Balay MPI_Status *send_status; 111a2d1c673SSatish Balay 1123a40ed3dSBarry Smith PetscFunctionBegin; 113a2d1c673SSatish Balay /* wait on sends */ 114a2d1c673SSatish Balay if (nsends) { 11582502324SSatish Balay ierr = PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr); 116a2d1c673SSatish Balay ierr = MPI_Waitall(2*nsends,stash->send_waits,send_status);CHKERRQ(ierr); 117606d414cSSatish Balay ierr = PetscFree(send_status);CHKERRQ(ierr); 118a2d1c673SSatish Balay } 119a2d1c673SSatish Balay 120c0c58ca7SSatish Balay /* Now update nmaxold to be app 10% more than max n used, this way the 121434d7ff9SSatish Balay wastage of space is reduced the next time this stash is used. 122434d7ff9SSatish Balay Also update the oldmax, only if it increases */ 123b9b97703SBarry Smith if (stash->n) { 12494b769a5SSatish Balay bs2 = stash->bs*stash->bs; 1258a9378f0SSatish Balay oldnmax = ((int)(stash->n * 1.1) + 5)*bs2; 126434d7ff9SSatish Balay if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax; 127b9b97703SBarry Smith } 128434d7ff9SSatish Balay 129d07ff455SSatish Balay stash->nmax = 0; 130d07ff455SSatish Balay stash->n = 0; 1314c1ff481SSatish Balay stash->reallocs = -1; 132a2d1c673SSatish Balay stash->nprocessed = 0; 13375cae7c1SHong Zhang if (stash->space_head){ 13475cae7c1SHong Zhang ierr = PetscMatStashSpaceDestroy(stash->space_head);CHKERRQ(ierr); 13575cae7c1SHong Zhang stash->space_head = 0; 13682740460SHong Zhang stash->space = 0; 13775cae7c1SHong Zhang } 138606d414cSSatish Balay ierr = PetscFree(stash->send_waits);CHKERRQ(ierr); 139606d414cSSatish Balay stash->send_waits = 0; 140606d414cSSatish Balay ierr = PetscFree(stash->recv_waits);CHKERRQ(ierr); 141606d414cSSatish Balay stash->recv_waits = 0; 142606d414cSSatish Balay ierr = PetscFree(stash->svalues);CHKERRQ(ierr); 143606d414cSSatish Balay stash->svalues = 0; 144606d414cSSatish Balay ierr = PetscFree(stash->rvalues);CHKERRQ(ierr); 145606d414cSSatish Balay stash->rvalues = 0; 146563fb871SSatish Balay ierr = PetscFree(stash->rindices);CHKERRQ(ierr); 147563fb871SSatish Balay stash->rindices = 0; 148b22afee1SSatish Balay ierr = PetscFree(stash->nprocs);CHKERRQ(ierr); 149606d414cSSatish Balay stash->nprocs = 0; 1503a40ed3dSBarry Smith PetscFunctionReturn(0); 1519417f4adSLois Curfman McInnes } 1529417f4adSLois Curfman McInnes 1534c1ff481SSatish Balay /* 1548798bf22SSatish Balay MatStashGetInfo_Private - Gets the relavant statistics of the stash 1554c1ff481SSatish Balay 1564c1ff481SSatish Balay Input Parameters: 1574c1ff481SSatish Balay stash - the stash 15894b769a5SSatish Balay nstash - the size of the stash. Indicates the number of values stored. 1594c1ff481SSatish Balay reallocs - the number of additional mallocs incurred. 1604c1ff481SSatish Balay 1614c1ff481SSatish Balay */ 1624a2ae208SSatish Balay #undef __FUNCT__ 1634a2ae208SSatish Balay #define __FUNCT__ "MatStashGetInfo_Private" 164c1ac3661SBarry Smith PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs) 16597530c3fSBarry Smith { 166c1ac3661SBarry Smith PetscInt bs2 = stash->bs*stash->bs; 16794b769a5SSatish Balay 1683a40ed3dSBarry Smith PetscFunctionBegin; 1691ecfd215SBarry Smith if (nstash) *nstash = stash->n*bs2; 1701ecfd215SBarry Smith if (reallocs) { 171434d7ff9SSatish Balay if (stash->reallocs < 0) *reallocs = 0; 172434d7ff9SSatish Balay else *reallocs = stash->reallocs; 1731ecfd215SBarry Smith } 174bc5ccf88SSatish Balay PetscFunctionReturn(0); 175bc5ccf88SSatish Balay } 1764c1ff481SSatish Balay 1774c1ff481SSatish Balay /* 1788798bf22SSatish Balay MatStashSetInitialSize_Private - Sets the initial size of the stash 1794c1ff481SSatish Balay 1804c1ff481SSatish Balay Input Parameters: 1814c1ff481SSatish Balay stash - the stash 1824c1ff481SSatish Balay max - the value that is used as the max size of the stash. 1834c1ff481SSatish Balay this value is used while allocating memory. 1844c1ff481SSatish Balay */ 1854a2ae208SSatish Balay #undef __FUNCT__ 1864a2ae208SSatish Balay #define __FUNCT__ "MatStashSetInitialSize_Private" 187c1ac3661SBarry Smith PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max) 188bc5ccf88SSatish Balay { 189bc5ccf88SSatish Balay PetscFunctionBegin; 190434d7ff9SSatish Balay stash->umax = max; 1913a40ed3dSBarry Smith PetscFunctionReturn(0); 19297530c3fSBarry Smith } 19397530c3fSBarry Smith 1948798bf22SSatish Balay /* MatStashExpand_Private - Expand the stash. This function is called 1954c1ff481SSatish Balay when the space in the stash is not sufficient to add the new values 1964c1ff481SSatish Balay being inserted into the stash. 1974c1ff481SSatish Balay 1984c1ff481SSatish Balay Input Parameters: 1994c1ff481SSatish Balay stash - the stash 2004c1ff481SSatish Balay incr - the minimum increase requested 2014c1ff481SSatish Balay 2024c1ff481SSatish Balay Notes: 2034c1ff481SSatish Balay This routine doubles the currently used memory. 2044c1ff481SSatish Balay */ 2054a2ae208SSatish Balay #undef __FUNCT__ 2064a2ae208SSatish Balay #define __FUNCT__ "MatStashExpand_Private" 207c1ac3661SBarry Smith static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr) 2089417f4adSLois Curfman McInnes { 2096849ba73SBarry Smith PetscErrorCode ierr; 2105bd3b8fbSHong Zhang PetscInt newnmax,bs2= stash->bs*stash->bs; 2119417f4adSLois Curfman McInnes 2123a40ed3dSBarry Smith PetscFunctionBegin; 2139417f4adSLois Curfman McInnes /* allocate a larger stash */ 214c481ceb5SSatish Balay if (!stash->oldnmax && !stash->nmax) { /* new stash */ 215434d7ff9SSatish Balay if (stash->umax) newnmax = stash->umax/bs2; 216434d7ff9SSatish Balay else newnmax = DEFAULT_STASH_SIZE/bs2; 217c481ceb5SSatish Balay } else if (!stash->nmax) { /* resuing stash */ 218434d7ff9SSatish Balay if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2; 219434d7ff9SSatish Balay else newnmax = stash->oldnmax/bs2; 220434d7ff9SSatish Balay } else newnmax = stash->nmax*2; 2214c1ff481SSatish Balay if (newnmax < (stash->nmax + incr)) newnmax += 2*incr; 222d07ff455SSatish Balay 22375cae7c1SHong Zhang /* Get a MatStashSpace and attach it to stash */ 22475cae7c1SHong Zhang ierr = PetscMatStashSpaceGet(bs2,newnmax,&stash->space);CHKERRQ(ierr); 225*b087b6d6SSatish Balay if (!stash->space_head) { /* new stash or resuing stash->oldnmax */ 226*b087b6d6SSatish Balay stash->space_head = stash->space; 22775cae7c1SHong Zhang } 228*b087b6d6SSatish Balay 229bc5ccf88SSatish Balay stash->reallocs++; 23075cae7c1SHong Zhang stash->nmax = newnmax; 231bc5ccf88SSatish Balay PetscFunctionReturn(0); 232bc5ccf88SSatish Balay } 233bc5ccf88SSatish Balay /* 2348798bf22SSatish Balay MatStashValuesRow_Private - inserts values into the stash. This function 2354c1ff481SSatish Balay expects the values to be roworiented. Multiple columns belong to the same row 2364c1ff481SSatish Balay can be inserted with a single call to this function. 2374c1ff481SSatish Balay 2384c1ff481SSatish Balay Input Parameters: 2394c1ff481SSatish Balay stash - the stash 2404c1ff481SSatish Balay row - the global row correspoiding to the values 2414c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2424c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2434c1ff481SSatish Balay values - the values inserted 244bc5ccf88SSatish Balay */ 2454a2ae208SSatish Balay #undef __FUNCT__ 2464a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRow_Private" 247c1ac3661SBarry Smith PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[]) 248bc5ccf88SSatish Balay { 249dfbe8321SBarry Smith PetscErrorCode ierr; 25075cae7c1SHong Zhang PetscInt i,k; 25175cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 252bc5ccf88SSatish Balay 253bc5ccf88SSatish Balay PetscFunctionBegin; 2544c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 25575cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2568798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2579417f4adSLois Curfman McInnes } 25875cae7c1SHong Zhang space = stash->space; 25975cae7c1SHong Zhang k = space->local_used; 2604c1ff481SSatish Balay for (i=0; i<n; i++) { 26175cae7c1SHong Zhang space->idx[k] = row; 26275cae7c1SHong Zhang space->idy[k] = idxn[i]; 26375cae7c1SHong Zhang space->val[k] = values[i]; 26475cae7c1SHong Zhang k++; 2659417f4adSLois Curfman McInnes } 2665bd3b8fbSHong Zhang stash->n += n; 26775cae7c1SHong Zhang space->local_used += n; 26875cae7c1SHong Zhang space->local_remaining -= n; 269a2d1c673SSatish Balay PetscFunctionReturn(0); 270a2d1c673SSatish Balay } 27175cae7c1SHong Zhang 2724c1ff481SSatish Balay /* 2738798bf22SSatish Balay MatStashValuesCol_Private - inserts values into the stash. This function 2744c1ff481SSatish Balay expects the values to be columnoriented. Multiple columns belong to the same row 2754c1ff481SSatish Balay can be inserted with a single call to this function. 276a2d1c673SSatish Balay 2774c1ff481SSatish Balay Input Parameters: 2784c1ff481SSatish Balay stash - the stash 2794c1ff481SSatish Balay row - the global row correspoiding to the values 2804c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 2814c1ff481SSatish Balay idxn - the global column indices corresponding to each of the values. 2824c1ff481SSatish Balay values - the values inserted 2834c1ff481SSatish Balay stepval - the consecutive values are sepated by a distance of stepval. 2844c1ff481SSatish Balay this happens because the input is columnoriented. 2854c1ff481SSatish Balay */ 2864a2ae208SSatish Balay #undef __FUNCT__ 2874a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesCol_Private" 288c1ac3661SBarry Smith PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt stepval) 289a2d1c673SSatish Balay { 290dfbe8321SBarry Smith PetscErrorCode ierr; 29175cae7c1SHong Zhang PetscInt i,k; 29275cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 293a2d1c673SSatish Balay 2944c1ff481SSatish Balay PetscFunctionBegin; 2954c1ff481SSatish Balay /* Check and see if we have sufficient memory */ 29675cae7c1SHong Zhang if (!space || space->local_remaining < n){ 2978798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 2984c1ff481SSatish Balay } 29975cae7c1SHong Zhang space = stash->space; 30075cae7c1SHong Zhang k = space->local_used; 3014c1ff481SSatish Balay for (i=0; i<n; i++) { 30275cae7c1SHong Zhang space->idx[k] = row; 30375cae7c1SHong Zhang space->idy[k] = idxn[i]; 30475cae7c1SHong Zhang space->val[k] = values[i*stepval]; 30575cae7c1SHong Zhang k++; 3064c1ff481SSatish Balay } 3075bd3b8fbSHong Zhang stash->n += n; 30875cae7c1SHong Zhang space->local_used += n; 30975cae7c1SHong Zhang space->local_remaining -= n; 3104c1ff481SSatish Balay PetscFunctionReturn(0); 3114c1ff481SSatish Balay } 3124c1ff481SSatish Balay 3134c1ff481SSatish Balay /* 3148798bf22SSatish Balay MatStashValuesRowBlocked_Private - inserts blocks of values into the stash. 3154c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3164c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3174c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3184c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3194c1ff481SSatish Balay 3204c1ff481SSatish Balay Input Parameters: 3214c1ff481SSatish Balay stash - the stash 3224c1ff481SSatish Balay row - the global block-row correspoiding to the values 3234c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3244c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3254c1ff481SSatish Balay values. Each block is of size bs*bs. 3264c1ff481SSatish Balay values - the values inserted 3274c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3284c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3294c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3304c1ff481SSatish Balay */ 3314a2ae208SSatish Balay #undef __FUNCT__ 3324a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesRowBlocked_Private" 333c1ac3661SBarry Smith PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3344c1ff481SSatish Balay { 335dfbe8321SBarry Smith PetscErrorCode ierr; 33675cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 337f15d580aSBarry Smith const MatScalar *vals; 338f15d580aSBarry Smith MatScalar *array; 33975cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 340a2d1c673SSatish Balay 341a2d1c673SSatish Balay PetscFunctionBegin; 34275cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3438798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 344a2d1c673SSatish Balay } 34575cae7c1SHong Zhang space = stash->space; 34675cae7c1SHong Zhang l = space->local_used; 34775cae7c1SHong Zhang bs2 = bs*bs; 3484c1ff481SSatish Balay for (i=0; i<n; i++) { 34975cae7c1SHong Zhang space->idx[l] = row; 35075cae7c1SHong Zhang space->idy[l] = idxn[i]; 35175cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 35275cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 35375cae7c1SHong Zhang funtion call */ 35475cae7c1SHong Zhang array = space->val + bs2*l; 35575cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 35675cae7c1SHong Zhang for (j=0; j<bs; j++) { 35775cae7c1SHong Zhang for (k=0; k<bs; k++) array[k*bs] = vals[k]; 35875cae7c1SHong Zhang array++; 35975cae7c1SHong Zhang vals += cmax*bs; 36075cae7c1SHong Zhang } 36175cae7c1SHong Zhang l++; 362a2d1c673SSatish Balay } 3635bd3b8fbSHong Zhang stash->n += n; 36475cae7c1SHong Zhang space->local_used += n; 36575cae7c1SHong Zhang space->local_remaining -= n; 3664c1ff481SSatish Balay PetscFunctionReturn(0); 3674c1ff481SSatish Balay } 3684c1ff481SSatish Balay 3694c1ff481SSatish Balay /* 3708798bf22SSatish Balay MatStashValuesColBlocked_Private - inserts blocks of values into the stash. 3714c1ff481SSatish Balay This function expects the values to be roworiented. Multiple columns belong 3724c1ff481SSatish Balay to the same block-row can be inserted with a single call to this function. 3734c1ff481SSatish Balay This function extracts the sub-block of values based on the dimensions of 3744c1ff481SSatish Balay the original input block, and the row,col values corresponding to the blocks. 3754c1ff481SSatish Balay 3764c1ff481SSatish Balay Input Parameters: 3774c1ff481SSatish Balay stash - the stash 3784c1ff481SSatish Balay row - the global block-row correspoiding to the values 3794c1ff481SSatish Balay n - the number of elements inserted. All elements belong to the above row. 3804c1ff481SSatish Balay idxn - the global block-column indices corresponding to each of the blocks of 3814c1ff481SSatish Balay values. Each block is of size bs*bs. 3824c1ff481SSatish Balay values - the values inserted 3834c1ff481SSatish Balay rmax - the number of block-rows in the original block. 3844c1ff481SSatish Balay cmax - the number of block-columsn on the original block. 3854c1ff481SSatish Balay idx - the index of the current block-row in the original block. 3864c1ff481SSatish Balay */ 3874a2ae208SSatish Balay #undef __FUNCT__ 3884a2ae208SSatish Balay #define __FUNCT__ "MatStashValuesColBlocked_Private" 389c1ac3661SBarry Smith PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const MatScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx) 3904c1ff481SSatish Balay { 391dfbe8321SBarry Smith PetscErrorCode ierr; 39275cae7c1SHong Zhang PetscInt i,j,k,bs2,bs=stash->bs,l; 393f15d580aSBarry Smith const MatScalar *vals; 394f15d580aSBarry Smith MatScalar *array; 39575cae7c1SHong Zhang PetscMatStashSpace space=stash->space; 3964c1ff481SSatish Balay 3974c1ff481SSatish Balay PetscFunctionBegin; 39875cae7c1SHong Zhang if (!space || space->local_remaining < n){ 3998798bf22SSatish Balay ierr = MatStashExpand_Private(stash,n);CHKERRQ(ierr); 4004c1ff481SSatish Balay } 40175cae7c1SHong Zhang space = stash->space; 40275cae7c1SHong Zhang l = space->local_used; 40375cae7c1SHong Zhang bs2 = bs*bs; 4044c1ff481SSatish Balay for (i=0; i<n; i++) { 40575cae7c1SHong Zhang space->idx[l] = row; 40675cae7c1SHong Zhang space->idy[l] = idxn[i]; 40775cae7c1SHong Zhang /* Now copy over the block of values. Store the values column oriented. 40875cae7c1SHong Zhang This enables inserting multiple blocks belonging to a row with a single 40975cae7c1SHong Zhang funtion call */ 41075cae7c1SHong Zhang array = space->val + bs2*l; 41175cae7c1SHong Zhang vals = values + idx*bs2*n + bs*i; 41275cae7c1SHong Zhang for (j=0; j<bs; j++) { 41375cae7c1SHong Zhang for (k=0; k<bs; k++) {array[k] = vals[k];} 41475cae7c1SHong Zhang array += bs; 41575cae7c1SHong Zhang vals += rmax*bs; 41675cae7c1SHong Zhang } 4175bd3b8fbSHong Zhang l++; 418a2d1c673SSatish Balay } 4195bd3b8fbSHong Zhang stash->n += n; 42075cae7c1SHong Zhang space->local_used += n; 42175cae7c1SHong Zhang space->local_remaining -= n; 4223a40ed3dSBarry Smith PetscFunctionReturn(0); 4239417f4adSLois Curfman McInnes } 4244c1ff481SSatish Balay /* 4258798bf22SSatish Balay MatStashScatterBegin_Private - Initiates the transfer of values to the 4264c1ff481SSatish Balay correct owners. This function goes through the stash, and check the 4274c1ff481SSatish Balay owners of each stashed value, and sends the values off to the owner 4284c1ff481SSatish Balay processors. 429bc5ccf88SSatish Balay 4304c1ff481SSatish Balay Input Parameters: 4314c1ff481SSatish Balay stash - the stash 4324c1ff481SSatish Balay owners - an array of size 'no-of-procs' which gives the ownership range 4334c1ff481SSatish Balay for each node. 4344c1ff481SSatish Balay 4354c1ff481SSatish Balay Notes: The 'owners' array in the cased of the blocked-stash has the 4364c1ff481SSatish Balay ranges specified blocked global indices, and for the regular stash in 4374c1ff481SSatish Balay the proper global indices. 4384c1ff481SSatish Balay */ 4394a2ae208SSatish Balay #undef __FUNCT__ 4404a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterBegin_Private" 441c1ac3661SBarry Smith PetscErrorCode MatStashScatterBegin_Private(MatStash *stash,PetscInt *owners) 442bc5ccf88SSatish Balay { 443c1ac3661SBarry Smith PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2; 444fe09c992SBarry Smith PetscInt size=stash->size,nsends; 4456849ba73SBarry Smith PetscErrorCode ierr; 44675cae7c1SHong Zhang PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l; 447563fb871SSatish Balay MatScalar **rvalues,*svalues; 448bc5ccf88SSatish Balay MPI_Comm comm = stash->comm; 449563fb871SSatish Balay MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2; 450fe09c992SBarry Smith PetscMPIInt *nprocs,*nlengths,nreceives; 4515bd3b8fbSHong Zhang PetscInt *sp_idx,*sp_idy; 4525bd3b8fbSHong Zhang MatScalar *sp_val; 4535bd3b8fbSHong Zhang PetscMatStashSpace space,space_next; 454bc5ccf88SSatish Balay 455bc5ccf88SSatish Balay PetscFunctionBegin; 4564c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 45775cae7c1SHong Zhang 458bc5ccf88SSatish Balay /* first count number of contributors to each processor */ 459fe09c992SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscMPIInt),&nprocs);CHKERRQ(ierr); 460fe09c992SBarry Smith ierr = PetscMemzero(nprocs,2*size*sizeof(PetscMPIInt));CHKERRQ(ierr); 461c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);CHKERRQ(ierr); 462a2d1c673SSatish Balay 463563fb871SSatish Balay nlengths = nprocs+size; 46475cae7c1SHong Zhang i = j = 0; 4657357eb19SBarry Smith lastidx = -1; 4665bd3b8fbSHong Zhang space = stash->space_head; 46775cae7c1SHong Zhang while (space != PETSC_NULL){ 46875cae7c1SHong Zhang space_next = space->next; 4695bd3b8fbSHong Zhang sp_idx = space->idx; 47075cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 4717357eb19SBarry Smith /* if indices are NOT locally sorted, need to start search at the beginning */ 4725bd3b8fbSHong Zhang if (lastidx > (idx = sp_idx[l])) j = 0; 4737357eb19SBarry Smith lastidx = idx; 4747357eb19SBarry Smith for (; j<size; j++) { 4754c1ff481SSatish Balay if (idx >= owners[j] && idx < owners[j+1]) { 476563fb871SSatish Balay nlengths[j]++; owner[i] = j; break; 477bc5ccf88SSatish Balay } 478bc5ccf88SSatish Balay } 47975cae7c1SHong Zhang i++; 48075cae7c1SHong Zhang } 48175cae7c1SHong Zhang space = space_next; 482bc5ccf88SSatish Balay } 483563fb871SSatish Balay /* Now check what procs get messages - and compute nsends. */ 484563fb871SSatish Balay for (i=0, nsends=0 ; i<size; i++) { 485563fb871SSatish Balay if (nlengths[i]) { nprocs[i] = 1; nsends ++;} 486563fb871SSatish Balay } 487bc5ccf88SSatish Balay 488563fb871SSatish Balay { int *onodes,*olengths; 489563fb871SSatish Balay /* Determine the number of messages to expect, their lengths, from from-ids */ 490563fb871SSatish Balay ierr = PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);CHKERRQ(ierr); 491563fb871SSatish Balay ierr = PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);CHKERRQ(ierr); 492563fb871SSatish Balay /* since clubbing row,col - lengths are multiplied by 2 */ 493563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] *=2; 494563fb871SSatish Balay ierr = PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);CHKERRQ(ierr); 495563fb871SSatish Balay /* values are size 'bs2' lengths (and remove earlier factor 2 */ 496563fb871SSatish Balay for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2; 497563fb871SSatish Balay ierr = PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);CHKERRQ(ierr); 498563fb871SSatish Balay ierr = PetscFree(onodes);CHKERRQ(ierr); 499563fb871SSatish Balay ierr = PetscFree(olengths);CHKERRQ(ierr); 500bc5ccf88SSatish Balay } 501bc5ccf88SSatish Balay 502bc5ccf88SSatish Balay /* do sends: 503bc5ccf88SSatish Balay 1) starts[i] gives the starting index in svalues for stuff going to 504bc5ccf88SSatish Balay the ith processor 505bc5ccf88SSatish Balay */ 506c1ac3661SBarry Smith ierr = PetscMalloc((stash->n+1)*(bs2*sizeof(MatScalar)+2*sizeof(PetscInt)),&svalues);CHKERRQ(ierr); 507c1ac3661SBarry Smith sindices = (PetscInt*)(svalues + bs2*stash->n); 508b0a32e0cSBarry Smith ierr = PetscMalloc(2*(nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr); 509c1ac3661SBarry Smith ierr = PetscMalloc(2*size*sizeof(PetscInt),&startv);CHKERRQ(ierr); 510bc5ccf88SSatish Balay starti = startv + size; 511a2d1c673SSatish Balay /* use 2 sends the first with all_a, the next with all_i and all_j */ 512bc5ccf88SSatish Balay startv[0] = 0; starti[0] = 0; 513bc5ccf88SSatish Balay for (i=1; i<size; i++) { 514563fb871SSatish Balay startv[i] = startv[i-1] + nlengths[i-1]; 515563fb871SSatish Balay starti[i] = starti[i-1] + nlengths[i-1]*2; 516bc5ccf88SSatish Balay } 51775cae7c1SHong Zhang 51875cae7c1SHong Zhang i = 0; 5195bd3b8fbSHong Zhang space = stash->space_head; 52075cae7c1SHong Zhang while (space != PETSC_NULL){ 52175cae7c1SHong Zhang space_next = space->next; 5225bd3b8fbSHong Zhang sp_idx = space->idx; 5235bd3b8fbSHong Zhang sp_idy = space->idy; 5245bd3b8fbSHong Zhang sp_val = space->val; 52575cae7c1SHong Zhang for (l=0; l<space->local_used; l++){ 526bc5ccf88SSatish Balay j = owner[i]; 527a2d1c673SSatish Balay if (bs2 == 1) { 5285bd3b8fbSHong Zhang svalues[startv[j]] = sp_val[l]; 529a2d1c673SSatish Balay } else { 530c1ac3661SBarry Smith PetscInt k; 5313eda8832SBarry Smith MatScalar *buf1,*buf2; 5324c1ff481SSatish Balay buf1 = svalues+bs2*startv[j]; 533*b087b6d6SSatish Balay buf2 = space->val + bs2*l; 5344c1ff481SSatish Balay for (k=0; k<bs2; k++){ buf1[k] = buf2[k]; } 535a2d1c673SSatish Balay } 5365bd3b8fbSHong Zhang sindices[starti[j]] = sp_idx[l]; 5375bd3b8fbSHong Zhang sindices[starti[j]+nlengths[j]] = sp_idy[l]; 538bc5ccf88SSatish Balay startv[j]++; 539bc5ccf88SSatish Balay starti[j]++; 54075cae7c1SHong Zhang i++; 54175cae7c1SHong Zhang } 54275cae7c1SHong Zhang space = space_next; 543bc5ccf88SSatish Balay } 544bc5ccf88SSatish Balay startv[0] = 0; 545563fb871SSatish Balay for (i=1; i<size; i++) { startv[i] = startv[i-1] + nlengths[i-1];} 546e5d0e772SSatish Balay 547bc5ccf88SSatish Balay for (i=0,count=0; i<size; i++) { 548563fb871SSatish Balay if (nprocs[i]) { 549563fb871SSatish Balay ierr = MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);CHKERRQ(ierr); 550563fb871SSatish Balay ierr = MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_MATSCALAR,i,tag2,comm,send_waits+count++);CHKERRQ(ierr); 551bc5ccf88SSatish Balay } 552b85c94c3SSatish Balay } 5536cf91177SBarry Smith #if defined(PETSC_USE_INFO) 554ae15b995SBarry Smith ierr = PetscInfo1(0,"No of messages: %d \n",nsends);CHKERRQ(ierr); 555e5d0e772SSatish Balay for (i=0; i<size; i++) { 556e5d0e772SSatish Balay if (nprocs[i]) { 557ae15b995SBarry Smith ierr = PetscInfo2(0,"Mesg_to: %d: size: %d \n",i,nlengths[i]*bs2*sizeof(MatScalar)+2*sizeof(PetscInt));CHKERRQ(ierr); 558e5d0e772SSatish Balay } 559e5d0e772SSatish Balay } 560e5d0e772SSatish Balay #endif 561606d414cSSatish Balay ierr = PetscFree(owner);CHKERRQ(ierr); 562606d414cSSatish Balay ierr = PetscFree(startv);CHKERRQ(ierr); 563a2d1c673SSatish Balay /* This memory is reused in scatter end for a different purpose*/ 564a2d1c673SSatish Balay for (i=0; i<2*size; i++) nprocs[i] = -1; 565a2d1c673SSatish Balay stash->nprocs = nprocs; 566a2d1c673SSatish Balay 567563fb871SSatish Balay /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */ 568563fb871SSatish Balay ierr = PetscMalloc((nreceives+1)*2*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr); 569563fb871SSatish Balay 570563fb871SSatish Balay for (i=0; i<nreceives; i++) { 571563fb871SSatish Balay recv_waits[2*i] = recv_waits1[i]; 572563fb871SSatish Balay recv_waits[2*i+1] = recv_waits2[i]; 573563fb871SSatish Balay } 574563fb871SSatish Balay stash->recv_waits = recv_waits; 575563fb871SSatish Balay ierr = PetscFree(recv_waits1);CHKERRQ(ierr); 576563fb871SSatish Balay ierr = PetscFree(recv_waits2);CHKERRQ(ierr); 577563fb871SSatish Balay 578bc5ccf88SSatish Balay stash->svalues = svalues; stash->rvalues = rvalues; 579563fb871SSatish Balay stash->rindices = rindices; stash->send_waits = send_waits; 580bc5ccf88SSatish Balay stash->nsends = nsends; stash->nrecvs = nreceives; 581bc5ccf88SSatish Balay PetscFunctionReturn(0); 582bc5ccf88SSatish Balay } 583bc5ccf88SSatish Balay 584a2d1c673SSatish Balay /* 5858798bf22SSatish Balay MatStashScatterGetMesg_Private - This function waits on the receives posted 5868798bf22SSatish Balay in the function MatStashScatterBegin_Private() and returns one message at 5874c1ff481SSatish Balay a time to the calling function. If no messages are left, it indicates this 5884c1ff481SSatish Balay by setting flg = 0, else it sets flg = 1. 5894c1ff481SSatish Balay 5904c1ff481SSatish Balay Input Parameters: 5914c1ff481SSatish Balay stash - the stash 5924c1ff481SSatish Balay 5934c1ff481SSatish Balay Output Parameters: 5944c1ff481SSatish Balay nvals - the number of entries in the current message. 5954c1ff481SSatish Balay rows - an array of row indices (or blocked indices) corresponding to the values 5964c1ff481SSatish Balay cols - an array of columnindices (or blocked indices) corresponding to the values 5974c1ff481SSatish Balay vals - the values 5984c1ff481SSatish Balay flg - 0 indicates no more message left, and the current call has no values associated. 5994c1ff481SSatish Balay 1 indicates that the current call successfully received a message, and the 6004c1ff481SSatish Balay other output parameters nvals,rows,cols,vals are set appropriately. 601a2d1c673SSatish Balay */ 6024a2ae208SSatish Balay #undef __FUNCT__ 6034a2ae208SSatish Balay #define __FUNCT__ "MatStashScatterGetMesg_Private" 604c1ac3661SBarry Smith PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt** cols,MatScalar **vals,PetscInt *flg) 605bc5ccf88SSatish Balay { 6066849ba73SBarry Smith PetscErrorCode ierr; 607fe09c992SBarry Smith PetscMPIInt i,*flg_v,i1,i2; 608fe09c992SBarry Smith PetscInt bs2; 609a2d1c673SSatish Balay MPI_Status recv_status; 610b0a32e0cSBarry Smith PetscTruth match_found = PETSC_FALSE; 611bc5ccf88SSatish Balay 612bc5ccf88SSatish Balay PetscFunctionBegin; 613bc5ccf88SSatish Balay 614a2d1c673SSatish Balay *flg = 0; /* When a message is discovered this is reset to 1 */ 615a2d1c673SSatish Balay /* Return if no more messages to process */ 616a2d1c673SSatish Balay if (stash->nprocessed == stash->nrecvs) { PetscFunctionReturn(0); } 617a2d1c673SSatish Balay 618a2d1c673SSatish Balay flg_v = stash->nprocs; 6194c1ff481SSatish Balay bs2 = stash->bs*stash->bs; 620a2d1c673SSatish Balay /* If a matching pair of receieves are found, process them, and return the data to 621a2d1c673SSatish Balay the calling function. Until then keep receiving messages */ 622a2d1c673SSatish Balay while (!match_found) { 623a2d1c673SSatish Balay ierr = MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);CHKERRQ(ierr); 624a2d1c673SSatish Balay /* Now pack the received message into a structure which is useable by others */ 625a2d1c673SSatish Balay if (i % 2) { 6263eda8832SBarry Smith ierr = MPI_Get_count(&recv_status,MPIU_MATSCALAR,nvals);CHKERRQ(ierr); 627c1dc657dSBarry Smith flg_v[2*recv_status.MPI_SOURCE] = i/2; 628a2d1c673SSatish Balay *nvals = *nvals/bs2; 629563fb871SSatish Balay } else { 630563fb871SSatish Balay ierr = MPI_Get_count(&recv_status,MPIU_INT,nvals);CHKERRQ(ierr); 631563fb871SSatish Balay flg_v[2*recv_status.MPI_SOURCE+1] = i/2; 632563fb871SSatish Balay *nvals = *nvals/2; /* This message has both row indices and col indices */ 633bc5ccf88SSatish Balay } 634a2d1c673SSatish Balay 635a2d1c673SSatish Balay /* Check if we have both the messages from this proc */ 636c1dc657dSBarry Smith i1 = flg_v[2*recv_status.MPI_SOURCE]; 637c1dc657dSBarry Smith i2 = flg_v[2*recv_status.MPI_SOURCE+1]; 638a2d1c673SSatish Balay if (i1 != -1 && i2 != -1) { 639563fb871SSatish Balay *rows = stash->rindices[i2]; 640a2d1c673SSatish Balay *cols = *rows + *nvals; 641563fb871SSatish Balay *vals = stash->rvalues[i1]; 642a2d1c673SSatish Balay *flg = 1; 643a2d1c673SSatish Balay stash->nprocessed ++; 64435d8aa7fSBarry Smith match_found = PETSC_TRUE; 645bc5ccf88SSatish Balay } 646bc5ccf88SSatish Balay } 647bc5ccf88SSatish Balay PetscFunctionReturn(0); 648bc5ccf88SSatish Balay } 649