1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 8 { 9 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 11 PetscErrorCode ierr; 12 PetscInt Nbs = sbaij->Nbs,i,j,*aj = B->j,ec = 0,*garray,*sgarray; 13 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 14 IS from,to; 15 Vec gvec; 16 PetscMPIInt rank = sbaij->rank,lsize; 17 PetscInt *owners = sbaij->rangebs,*ec_owner,k; 18 const PetscInt *sowners; 19 PetscScalar *ptr; 20 #if defined(PETSC_USE_CTABLE) 21 PetscTable gid1_lid1; /* one-based gid to lid table */ 22 PetscTablePosition tpos; 23 PetscInt gid,lid; 24 #else 25 PetscInt *indices; 26 #endif 27 28 PetscFunctionBegin; 29 #if defined(PETSC_USE_CTABLE) 30 ierr = PetscTableCreate(mbs,Nbs+1,&gid1_lid1);CHKERRQ(ierr); 31 for (i=0; i<B->mbs; i++) { 32 for (j=0; j<B->ilen[i]; j++) { 33 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 34 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 35 if (!data) {ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr);} 36 } 37 } 38 /* form array of columns we need */ 39 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 40 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 41 while (tpos) { 42 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 43 gid--; lid--; 44 garray[lid] = gid; 45 } 46 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 47 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 48 for (i=0; i<ec; i++) {ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr);} 49 /* compact out the extra columns in B */ 50 for (i=0; i<B->mbs; i++) { 51 for (j=0; j<B->ilen[i]; j++) { 52 PetscInt gid1 = aj[B->i[i] + j] + 1; 53 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 54 lid--; 55 aj[B->i[i]+j] = lid; 56 } 57 } 58 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 59 ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr); 60 for (i=j=0; i<ec; i++) { 61 while (garray[i]>=owners[j+1]) j++; 62 ec_owner[i] = j; 63 } 64 #else 65 /* For the first stab we make an array as long as the number of columns */ 66 /* mark those columns that are in sbaij->B */ 67 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 68 for (i=0; i<mbs; i++) { 69 for (j=0; j<B->ilen[i]; j++) { 70 if (!indices[aj[B->i[i] + j]]) ec++; 71 indices[aj[B->i[i] + j]] = 1; 72 } 73 } 74 75 /* form arrays of columns we need */ 76 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 77 ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr); 78 79 ec = 0; 80 for (j=0; j<sbaij->size; j++) { 81 for (i=owners[j]; i<owners[j+1]; i++) { 82 if (indices[i]) { 83 garray[ec] = i; 84 ec_owner[ec] = j; 85 ec++; 86 } 87 } 88 } 89 90 /* make indices now point into garray */ 91 for (i=0; i<ec; i++) indices[garray[i]] = i; 92 93 /* compact out the extra columns in B */ 94 for (i=0; i<mbs; i++) { 95 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 96 } 97 ierr = PetscFree(indices);CHKERRQ(ierr); 98 #endif 99 B->nbs = ec; 100 ierr = PetscLayoutDestroy(&sbaij->B->cmap);CHKERRQ(ierr); 101 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&sbaij->B->cmap);CHKERRQ(ierr); 102 103 ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); 104 /* create local vector that is used to scatter into */ 105 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 106 107 /* create two temporary index sets for building scatter-gather */ 108 ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr); 109 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 110 for (i=0; i<ec; i++) stmp[i] = i; 111 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 112 113 /* generate the scatter context 114 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 115 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 116 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 117 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 118 119 sbaij->garray = garray; 120 121 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr); 122 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr); 123 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 124 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 125 126 ierr = ISDestroy(&from);CHKERRQ(ierr); 127 ierr = ISDestroy(&to);CHKERRQ(ierr); 128 129 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 130 lsize = (mbs + ec)*bs; 131 ierr = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 132 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 133 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 134 135 ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr); 136 137 /* x index in the IS sfrom */ 138 for (i=0; i<ec; i++) { 139 j = ec_owner[i]; 140 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 141 } 142 /* b index in the IS sfrom */ 143 k = sowners[rank]/bs + mbs; 144 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 145 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 146 147 /* x index in the IS sto */ 148 k = sowners[rank]/bs + mbs; 149 for (i=0; i<ec; i++) stmp[i] = (k + i); 150 /* b index in the IS sto */ 151 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 152 153 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 154 155 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 156 ierr = VecScatterViewFromOptions(sbaij->sMvctx,(PetscObject)mat,"-matmult_vecscatter_view");CHKERRQ(ierr); 157 158 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 159 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 160 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 161 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 162 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 163 164 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 165 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 166 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 167 168 ierr = PetscFree(stmp);CHKERRQ(ierr); 169 170 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr); 171 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr); 172 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr); 173 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr); 174 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr); 175 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr); 176 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 177 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 178 179 ierr = PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt));CHKERRQ(ierr); 180 ierr = ISDestroy(&from);CHKERRQ(ierr); 181 ierr = ISDestroy(&to);CHKERRQ(ierr); 182 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 /* 187 Takes the local part of an already assembled MPISBAIJ matrix 188 and disassembles it. This is to allow new nonzeros into the matrix 189 that require more communication in the matrix vector multiply. 190 Thus certain data-structures must be rebuilt. 191 192 Kind of slow! But that's what application programmers get when 193 they are sloppy. 194 */ 195 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 196 { 197 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 198 Mat B = baij->B,Bnew; 199 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 200 PetscErrorCode ierr; 201 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 202 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 203 MatScalar *a = Bbaij->a; 204 PetscScalar *atmp; 205 #if defined(PETSC_USE_REAL_MAT_SINGLE) 206 PetscInt l; 207 #endif 208 209 PetscFunctionBegin; 210 #if defined(PETSC_USE_REAL_MAT_SINGLE) 211 ierr = PetscMalloc1(A->rmap->bs,&atmp);CHKERRQ(ierr); 212 #endif 213 /* free stuff related to matrix-vec multiply */ 214 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 215 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 216 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 217 218 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 219 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 220 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 221 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 222 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 223 224 if (baij->colmap) { 225 #if defined(PETSC_USE_CTABLE) 226 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 227 #else 228 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 229 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 230 #endif 231 } 232 233 /* make sure that B is assembled so we can access its values */ 234 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 235 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 236 237 /* invent new B and copy stuff over */ 238 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 239 for (i=0; i<mbs; i++) { 240 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 241 } 242 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 243 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 244 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 245 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 246 ierr = PetscFree(nz);CHKERRQ(ierr); 247 248 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 249 ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; 250 } 251 252 /* 253 Ensure that B's nonzerostate is monotonically increasing. 254 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 255 */ 256 Bnew->nonzerostate = B->nonzerostate; 257 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 258 for (i=0; i<mbs; i++) { 259 rvals[0] = bs*i; 260 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 261 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 262 col = garray[Bbaij->j[j]]*bs; 263 for (k=0; k<bs; k++) { 264 #if defined(PETSC_USE_REAL_MAT_SINGLE) 265 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 266 #else 267 atmp = a+j*bs2 + k*bs; 268 #endif 269 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 270 col++; 271 } 272 } 273 } 274 #if defined(PETSC_USE_REAL_MAT_SINGLE) 275 ierr = PetscFree(atmp);CHKERRQ(ierr); 276 #endif 277 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 278 279 baij->garray = NULL; 280 281 ierr = PetscFree(rvals);CHKERRQ(ierr); 282 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 283 ierr = MatDestroy(&B);CHKERRQ(ierr); 284 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 285 286 baij->B = Bnew; 287 288 A->was_assembled = PETSC_FALSE; 289 PetscFunctionReturn(0); 290 } 291