1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 8 { 9 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 11 PetscErrorCode ierr; 12 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 13 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 14 IS from,to; 15 Vec gvec; 16 PetscMPIInt rank =sbaij->rank,lsize,size=sbaij->size; 17 PetscInt *owners=sbaij->rangebs,*ec_owner,k; 18 const PetscInt *sowners; 19 PetscScalar *ptr; 20 21 PetscFunctionBegin; 22 ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); 23 24 /* For the first stab we make an array as long as the number of columns */ 25 /* mark those columns that are in sbaij->B */ 26 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 27 for (i=0; i<mbs; i++) { 28 for (j=0; j<B->ilen[i]; j++) { 29 if (!indices[aj[B->i[i] + j]]) ec++; 30 indices[aj[B->i[i] + j]] = 1; 31 } 32 } 33 34 /* form arrays of columns we need */ 35 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 36 ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr); 37 38 ec = 0; 39 for (j=0; j<size; j++) { 40 for (i=owners[j]; i<owners[j+1]; i++) { 41 if (indices[i]) { 42 garray[ec] = i; 43 ec_owner[ec] = j; 44 ec++; 45 } 46 } 47 } 48 49 /* make indices now point into garray */ 50 for (i=0; i<ec; i++) indices[garray[i]] = i; 51 52 /* compact out the extra columns in B */ 53 for (i=0; i<mbs; i++) { 54 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 55 } 56 B->nbs = ec; 57 58 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 59 60 ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 61 ierr = PetscFree(indices);CHKERRQ(ierr); 62 63 /* create local vector that is used to scatter into */ 64 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 65 66 /* create two temporary index sets for building scatter-gather */ 67 ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr); 68 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 69 for (i=0; i<ec; i++) stmp[i] = i; 70 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 71 72 /* generate the scatter context 73 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 74 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 75 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 76 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 77 78 sbaij->garray = garray; 79 80 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr); 81 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr); 82 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 83 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 84 85 ierr = ISDestroy(&from);CHKERRQ(ierr); 86 ierr = ISDestroy(&to);CHKERRQ(ierr); 87 88 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 89 lsize = (mbs + ec)*bs; 90 ierr = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 91 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 92 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 93 94 ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr); 95 96 /* x index in the IS sfrom */ 97 for (i=0; i<ec; i++) { 98 j = ec_owner[i]; 99 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 100 } 101 /* b index in the IS sfrom */ 102 k = sowners[rank]/bs + mbs; 103 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 104 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 105 106 /* x index in the IS sto */ 107 k = sowners[rank]/bs + mbs; 108 for (i=0; i<ec; i++) stmp[i] = (k + i); 109 /* b index in the IS sto */ 110 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 111 112 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 113 114 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 115 116 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 117 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 118 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 119 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 120 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 121 122 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 123 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 124 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 125 126 ierr = PetscFree(stmp);CHKERRQ(ierr); 127 ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 128 129 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr); 130 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr); 131 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr); 134 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr); 135 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 136 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 137 138 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 139 ierr = ISDestroy(&from);CHKERRQ(ierr); 140 ierr = ISDestroy(&to);CHKERRQ(ierr); 141 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 142 PetscFunctionReturn(0); 143 } 144 145 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 146 { 147 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 148 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 149 PetscErrorCode ierr; 150 PetscInt i,j,*aj = B->j,ec = 0,*garray; 151 PetscInt bs = mat->rmap->bs,*stmp; 152 IS from,to; 153 Vec gvec; 154 #if defined(PETSC_USE_CTABLE) 155 PetscTable gid1_lid1; 156 PetscTablePosition tpos; 157 PetscInt gid,lid; 158 #else 159 PetscInt Nbs = baij->Nbs,*indices; 160 #endif 161 162 PetscFunctionBegin; 163 #if defined(PETSC_USE_CTABLE) 164 /* use a table - Mark Adams */ 165 PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1); 166 for (i=0; i<B->mbs; i++) { 167 for (j=0; j<B->ilen[i]; j++) { 168 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 169 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 170 if (!data) { 171 /* one based table */ 172 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 173 } 174 } 175 } 176 /* form array of columns we need */ 177 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 178 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 179 while (tpos) { 180 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 181 gid--; lid--; 182 garray[lid] = gid; 183 } 184 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 185 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 186 for (i=0; i<ec; i++) { 187 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 188 } 189 /* compact out the extra columns in B */ 190 for (i=0; i<B->mbs; i++) { 191 for (j=0; j<B->ilen[i]; j++) { 192 PetscInt gid1 = aj[B->i[i] + j] + 1; 193 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 194 lid--; 195 aj[B->i[i]+j] = lid; 196 } 197 } 198 B->nbs = ec; 199 200 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 201 202 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 203 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 204 #else 205 /* For the first stab we make an array as long as the number of columns */ 206 /* mark those columns that are in baij->B */ 207 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 208 for (i=0; i<B->mbs; i++) { 209 for (j=0; j<B->ilen[i]; j++) { 210 if (!indices[aj[B->i[i] + j]]) ec++; 211 indices[aj[B->i[i] + j]] = 1; 212 } 213 } 214 215 /* form array of columns we need */ 216 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 217 ec = 0; 218 for (i=0; i<Nbs; i++) { 219 if (indices[i]) { 220 garray[ec++] = i; 221 } 222 } 223 224 /* make indices now point into garray */ 225 for (i=0; i<ec; i++) indices[garray[i]] = i; 226 227 /* compact out the extra columns in B */ 228 for (i=0; i<B->mbs; i++) { 229 for (j=0; j<B->ilen[i]; j++) { 230 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 231 } 232 } 233 B->nbs = ec; 234 235 baij->B->cmap->n = ec*mat->rmap->bs; 236 237 ierr = PetscFree(indices);CHKERRQ(ierr); 238 #endif 239 240 /* create local vector that is used to scatter into */ 241 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 242 243 /* create two temporary index sets for building scatter-gather */ 244 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 245 246 ierr = PetscMalloc1(ec,&stmp);CHKERRQ(ierr); 247 for (i=0; i<ec; i++) stmp[i] = i; 248 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 249 250 /* create temporary global vector to generate scatter context */ 251 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 252 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 253 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 254 255 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 256 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 257 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 258 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 259 260 baij->garray = garray; 261 262 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 263 ierr = ISDestroy(&from);CHKERRQ(ierr); 264 ierr = ISDestroy(&to);CHKERRQ(ierr); 265 PetscFunctionReturn(0); 266 } 267 268 /* 269 Takes the local part of an already assembled MPISBAIJ matrix 270 and disassembles it. This is to allow new nonzeros into the matrix 271 that require more communication in the matrix vector multiply. 272 Thus certain data-structures must be rebuilt. 273 274 Kind of slow! But that's what application programmers get when 275 they are sloppy. 276 */ 277 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 278 { 279 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 280 Mat B = baij->B,Bnew; 281 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 282 PetscErrorCode ierr; 283 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 284 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 285 MatScalar *a = Bbaij->a; 286 PetscScalar *atmp; 287 #if defined(PETSC_USE_REAL_MAT_SINGLE) 288 PetscInt l; 289 #endif 290 291 PetscFunctionBegin; 292 #if defined(PETSC_USE_REAL_MAT_SINGLE) 293 ierr = PetscMalloc1(A->rmap->bs,&atmp); 294 #endif 295 /* free stuff related to matrix-vec multiply */ 296 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 297 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 298 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 299 300 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 301 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 302 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 303 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 304 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 305 306 if (baij->colmap) { 307 #if defined(PETSC_USE_CTABLE) 308 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 309 #else 310 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 311 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 312 #endif 313 } 314 315 /* make sure that B is assembled so we can access its values */ 316 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 317 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 318 319 /* invent new B and copy stuff over */ 320 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 321 for (i=0; i<mbs; i++) { 322 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 323 } 324 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 325 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 326 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 327 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 328 ierr = PetscFree(nz);CHKERRQ(ierr); 329 330 ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 331 /* 332 Ensure that B's nonzerostate is monotonically increasing. 333 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 334 */ 335 Bnew->nonzerostate = B->nonzerostate; 336 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 337 for (i=0; i<mbs; i++) { 338 rvals[0] = bs*i; 339 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 340 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 341 col = garray[Bbaij->j[j]]*bs; 342 for (k=0; k<bs; k++) { 343 #if defined(PETSC_USE_REAL_MAT_SINGLE) 344 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 345 #else 346 atmp = a+j*bs2 + k*bs; 347 #endif 348 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 349 col++; 350 } 351 } 352 } 353 #if defined(PETSC_USE_REAL_MAT_SINGLE) 354 ierr = PetscFree(atmp);CHKERRQ(ierr); 355 #endif 356 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 357 358 baij->garray = 0; 359 360 ierr = PetscFree(rvals);CHKERRQ(ierr); 361 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 362 ierr = MatDestroy(&B);CHKERRQ(ierr); 363 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 364 365 baij->B = Bnew; 366 367 A->was_assembled = PETSC_FALSE; 368 PetscFunctionReturn(0); 369 } 370