1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 7 8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 14 { 15 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 17 PetscErrorCode ierr; 18 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 19 PetscInt bs = mat->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 20 IS from,to; 21 Vec gvec; 22 PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; 23 PetscInt *owners=sbaij->rowners,*sowners,*ec_owner,k; 24 PetscMap vecmap; 25 PetscScalar *ptr; 26 27 PetscFunctionBegin; 28 if (sbaij->lvec) { 29 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 30 sbaij->lvec = 0; 31 } 32 if (sbaij->Mvctx) { 33 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 34 sbaij->Mvctx = 0; 35 } 36 37 /* For the first stab we make an array as long as the number of columns */ 38 /* mark those columns that are in sbaij->B */ 39 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 40 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 41 for (i=0; i<mbs; i++) { 42 for (j=0; j<B->ilen[i]; j++) { 43 if (!indices[aj[B->i[i] + j]]) ec++; 44 indices[aj[B->i[i] + j] ] = 1; 45 } 46 } 47 48 /* form arrays of columns we need */ 49 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 50 ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr); 51 ec_owner = sgarray + 2*ec; 52 53 ec = 0; 54 for (j=0; j<size; j++){ 55 for (i=owners[j]; i<owners[j+1]; i++){ 56 if (indices[i]) { 57 garray[ec] = i; 58 ec_owner[ec] = j; 59 ec++; 60 } 61 } 62 } 63 64 /* make indices now point into garray */ 65 for (i=0; i<ec; i++) indices[garray[i]] = i; 66 67 /* compact out the extra columns in B */ 68 for (i=0; i<mbs; i++) { 69 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 70 } 71 B->nbs = ec; 72 sbaij->B->n = ec*mat->bs; 73 ierr = PetscFree(indices);CHKERRQ(ierr); 74 75 /* create local vector that is used to scatter into */ 76 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 77 78 /* create two temporary index sets for building scatter-gather */ 79 ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 80 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 81 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 82 83 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 84 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 85 86 /* generate the scatter context 87 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 88 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 89 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 90 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 91 92 sbaij->garray = garray; 93 ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 94 ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 95 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 96 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 97 98 ierr = ISDestroy(from);CHKERRQ(ierr); 99 ierr = ISDestroy(to);CHKERRQ(ierr); 100 101 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 102 lsize = (mbs + ec)*bs; 103 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 104 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 105 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 106 107 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 108 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 109 110 /* x index in the IS sfrom */ 111 for (i=0; i<ec; i++) { 112 j = ec_owner[i]; 113 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 114 } 115 /* b index in the IS sfrom */ 116 k = sowners[rank]/bs + mbs; 117 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 118 119 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 120 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 121 122 /* x index in the IS sto */ 123 k = sowners[rank]/bs + mbs; 124 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 125 /* b index in the IS sto */ 126 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 127 128 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 129 130 /* gnerate the SBAIJ scatter context */ 131 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 132 133 /* 134 Post the receives for the first matrix vector product. We sync-chronize after 135 this on the chance that the user immediately calls MatMult() after assemblying 136 the matrix. 137 */ 138 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 139 140 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 141 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 142 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 143 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 144 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 145 146 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 147 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 148 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 149 150 ierr = PetscFree(stmp);CHKERRQ(ierr); 151 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 152 153 ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 154 ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 155 ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 156 ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 157 ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 158 ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 159 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 160 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 161 162 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 163 ierr = ISDestroy(from);CHKERRQ(ierr); 164 ierr = ISDestroy(to);CHKERRQ(ierr); 165 ierr = VecDestroy(gvec);CHKERRQ(ierr); 166 ierr = PetscFree(sgarray);CHKERRQ(ierr); 167 168 PetscFunctionReturn(0); 169 } 170 171 #undef __FUNCT__ 172 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 173 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 174 { 175 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 176 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 177 PetscErrorCode ierr; 178 PetscInt i,j,*aj = B->j,ec = 0,*garray; 179 PetscInt bs = mat->bs,*stmp; 180 IS from,to; 181 Vec gvec; 182 #if defined (PETSC_USE_CTABLE) 183 PetscTable gid1_lid1; 184 PetscTablePosition tpos; 185 PetscInt gid,lid; 186 #else 187 PetscInt Nbs = baij->Nbs,*indices; 188 #endif 189 190 PetscFunctionBegin; 191 192 #if defined (PETSC_USE_CTABLE) 193 /* use a table - Mark Adams */ 194 PetscTableCreate(B->mbs,&gid1_lid1); 195 for (i=0; i<B->mbs; i++) { 196 for (j=0; j<B->ilen[i]; j++) { 197 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 198 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 199 if (!data) { 200 /* one based table */ 201 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 202 } 203 } 204 } 205 /* form array of columns we need */ 206 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 207 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 208 while (tpos) { 209 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 210 gid--; lid--; 211 garray[lid] = gid; 212 } 213 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 214 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 215 for (i=0; i<ec; i++) { 216 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 217 } 218 /* compact out the extra columns in B */ 219 for (i=0; i<B->mbs; i++) { 220 for (j=0; j<B->ilen[i]; j++) { 221 PetscInt gid1 = aj[B->i[i] + j] + 1; 222 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 223 lid --; 224 aj[B->i[i]+j] = lid; 225 } 226 } 227 B->nbs = ec; 228 baij->B->n = ec*mat->bs; 229 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 230 /* Mark Adams */ 231 #else 232 /* For the first stab we make an array as long as the number of columns */ 233 /* mark those columns that are in baij->B */ 234 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 235 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 236 for (i=0; i<B->mbs; i++) { 237 for (j=0; j<B->ilen[i]; j++) { 238 if (!indices[aj[B->i[i] + j]]) ec++; 239 indices[aj[B->i[i] + j] ] = 1; 240 } 241 } 242 243 /* form array of columns we need */ 244 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 245 ec = 0; 246 for (i=0; i<Nbs; i++) { 247 if (indices[i]) { 248 garray[ec++] = i; 249 } 250 } 251 252 /* make indices now point into garray */ 253 for (i=0; i<ec; i++) { 254 indices[garray[i]] = i; 255 } 256 257 /* compact out the extra columns in B */ 258 for (i=0; i<B->mbs; i++) { 259 for (j=0; j<B->ilen[i]; j++) { 260 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 261 } 262 } 263 B->nbs = ec; 264 baij->B->n = ec*mat->bs; 265 ierr = PetscFree(indices);CHKERRQ(ierr); 266 #endif 267 268 /* create local vector that is used to scatter into */ 269 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 270 271 /* create two temporary index sets for building scatter-gather */ 272 for (i=0; i<ec; i++) { 273 garray[i] = bs*garray[i]; 274 } 275 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 276 for (i=0; i<ec; i++) { 277 garray[i] = garray[i]/bs; 278 } 279 280 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 281 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 282 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 283 ierr = PetscFree(stmp);CHKERRQ(ierr); 284 285 /* create temporary global vector to generate scatter context */ 286 /* this is inefficient, but otherwise we must do either 287 1) save garray until the first actual scatter when the vector is known or 288 2) have another way of generating a scatter context without a vector.*/ 289 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 290 291 /* gnerate the scatter context */ 292 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 293 294 /* 295 Post the receives for the first matrix vector product. We sync-chronize after 296 this on the chance that the user immediately calls MatMult() after assemblying 297 the matrix. 298 */ 299 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 300 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 301 302 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 303 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 304 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 305 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 306 baij->garray = garray; 307 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 308 ierr = ISDestroy(from);CHKERRQ(ierr); 309 ierr = ISDestroy(to);CHKERRQ(ierr); 310 ierr = VecDestroy(gvec);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 315 /* 316 Takes the local part of an already assembled MPIBAIJ matrix 317 and disassembles it. This is to allow new nonzeros into the matrix 318 that require more communication in the matrix vector multiply. 319 Thus certain data-structures must be rebuilt. 320 321 Kind of slow! But that's what application programmers get when 322 they are sloppy. 323 */ 324 #undef __FUNCT__ 325 #define __FUNCT__ "DisAssemble_MPISBAIJ" 326 PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 327 { 328 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 329 Mat B = baij->B,Bnew; 330 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 331 PetscErrorCode ierr; 332 PetscInt i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 333 PetscInt k,bs=A->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 334 MatScalar *a = Bbaij->a; 335 PetscScalar *atmp; 336 #if defined(PETSC_USE_MAT_SINGLE) 337 PetscInt l; 338 #endif 339 340 PetscFunctionBegin; 341 #if defined(PETSC_USE_MAT_SINGLE) 342 ierr = PetscMalloc(A->bs*sizeof(PetscScalar),&atmp); 343 #endif 344 /* free stuff related to matrix-vec multiply */ 345 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 346 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 347 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 348 if (baij->colmap) { 349 #if defined (PETSC_USE_CTABLE) 350 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 351 #else 352 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 353 baij->colmap = 0; 354 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 355 #endif 356 } 357 358 /* make sure that B is assembled so we can access its values */ 359 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 360 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 361 362 /* invent new B and copy stuff over */ 363 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 364 for (i=0; i<mbs; i++) { 365 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 366 } 367 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 368 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 369 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 370 ierr = MatSeqBAIJSetPreallocation(Bnew,B->bs,0,nz);CHKERRQ(ierr); 371 ierr = PetscFree(nz);CHKERRQ(ierr); 372 373 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 374 for (i=0; i<mbs; i++) { 375 rvals[0] = bs*i; 376 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 377 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 378 col = garray[Bbaij->j[j]]*bs; 379 for (k=0; k<bs; k++) { 380 #if defined(PETSC_USE_MAT_SINGLE) 381 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 382 #else 383 atmp = a+j*bs2 + k*bs; 384 #endif 385 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 386 col++; 387 } 388 } 389 } 390 #if defined(PETSC_USE_MAT_SINGLE) 391 ierr = PetscFree(atmp);CHKERRQ(ierr); 392 #endif 393 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 394 baij->garray = 0; 395 ierr = PetscFree(rvals);CHKERRQ(ierr); 396 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 397 ierr = MatDestroy(B);CHKERRQ(ierr); 398 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 399 baij->B = Bnew; 400 A->was_assembled = PETSC_FALSE; 401 PetscFunctionReturn(0); 402 } 403 404 405 406