1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include "src/mat/impls/sbaij/mpi/mpisbaij.h" 6 7 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode); 8 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 PetscErrorCode ierr; 17 int Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 18 int bs = sbaij->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 19 IS from,to; 20 Vec gvec; 21 int rank=sbaij->rank,lsize,size=sbaij->size; 22 int *owners=sbaij->rowners,*sowners,*ec_owner,k; 23 PetscMap vecmap; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 if (sbaij->lvec) { 28 ierr = VecDestroy(sbaij->lvec);CHKERRQ(ierr); 29 sbaij->lvec = 0; 30 } 31 if (sbaij->Mvctx) { 32 ierr = VecScatterDestroy(sbaij->Mvctx);CHKERRQ(ierr); 33 sbaij->Mvctx = 0; 34 } 35 36 /* For the first stab we make an array as long as the number of columns */ 37 /* mark those columns that are in sbaij->B */ 38 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 39 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 40 for (i=0; i<mbs; i++) { 41 for (j=0; j<B->ilen[i]; j++) { 42 if (!indices[aj[B->i[i] + j]]) ec++; 43 indices[aj[B->i[i] + j] ] = 1; 44 } 45 } 46 47 /* form arrays of columns we need */ 48 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 49 ierr = PetscMalloc((3*ec+1)*sizeof(int),&sgarray);CHKERRQ(ierr); 50 ec_owner = sgarray + 2*ec; 51 52 ec = 0; 53 for (j=0; j<size; j++){ 54 for (i=owners[j]; i<owners[j+1]; i++){ 55 if (indices[i]) { 56 garray[ec] = i; 57 ec_owner[ec] = j; 58 ec++; 59 } 60 } 61 } 62 63 /* make indices now point into garray */ 64 for (i=0; i<ec; i++) indices[garray[i]] = i; 65 66 /* compact out the extra columns in B */ 67 for (i=0; i<mbs; i++) { 68 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 69 } 70 B->nbs = ec; 71 sbaij->B->n = ec*B->bs; 72 ierr = PetscFree(indices);CHKERRQ(ierr); 73 74 /* create local vector that is used to scatter into */ 75 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 76 77 /* create two temporary index sets for building scatter-gather */ 78 ierr = PetscMalloc((2*ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 79 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 80 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 81 82 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 83 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 84 85 /* generate the scatter context 86 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 87 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 88 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 89 ierr = VecScatterPostRecvs(gvec,sbaij->lvec,INSERT_VALUES,SCATTER_FORWARD,sbaij->Mvctx);CHKERRQ(ierr); 90 91 sbaij->garray = garray; 92 PetscLogObjectParent(mat,sbaij->Mvctx); 93 PetscLogObjectParent(mat,sbaij->lvec); 94 PetscLogObjectParent(mat,from); 95 PetscLogObjectParent(mat,to); 96 97 ierr = ISDestroy(from);CHKERRQ(ierr); 98 ierr = ISDestroy(to);CHKERRQ(ierr); 99 100 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 101 lsize = (mbs + ec)*bs; 102 ierr = VecCreateMPI(mat->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 103 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 104 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 105 106 ierr = VecGetPetscMap(sbaij->slvec0,&vecmap);CHKERRQ(ierr); 107 ierr = PetscMapGetGlobalRange(vecmap,&sowners);CHKERRQ(ierr); 108 109 /* x index in the IS sfrom */ 110 for (i=0; i<ec; i++) { 111 j = ec_owner[i]; 112 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 113 } 114 /* b index in the IS sfrom */ 115 k = sowners[rank]/bs + mbs; 116 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 117 118 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 119 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 120 121 /* x index in the IS sto */ 122 k = sowners[rank]/bs + mbs; 123 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 124 /* b index in the IS sto */ 125 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 126 127 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 128 129 /* gnerate the SBAIJ scatter context */ 130 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 131 132 /* 133 Post the receives for the first matrix vector product. We sync-chronize after 134 this on the chance that the user immediately calls MatMult() after assemblying 135 the matrix. 136 */ 137 ierr = VecScatterPostRecvs(sbaij->slvec0,sbaij->slvec1,INSERT_VALUES,SCATTER_FORWARD,sbaij->sMvctx);CHKERRQ(ierr); 138 139 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 140 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 141 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 142 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 143 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 144 145 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 146 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 147 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 148 149 ierr = PetscFree(stmp);CHKERRQ(ierr); 150 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 151 152 PetscLogObjectParent(mat,sbaij->sMvctx); 153 PetscLogObjectParent(mat,sbaij->slvec0); 154 PetscLogObjectParent(mat,sbaij->slvec1); 155 PetscLogObjectParent(mat,sbaij->slvec0b); 156 PetscLogObjectParent(mat,sbaij->slvec1a); 157 PetscLogObjectParent(mat,sbaij->slvec1b); 158 PetscLogObjectParent(mat,from); 159 PetscLogObjectParent(mat,to); 160 161 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 162 ierr = ISDestroy(from);CHKERRQ(ierr); 163 ierr = ISDestroy(to);CHKERRQ(ierr); 164 ierr = VecDestroy(gvec);CHKERRQ(ierr); 165 ierr = PetscFree(sgarray);CHKERRQ(ierr); 166 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 172 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 173 { 174 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 175 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 176 PetscErrorCode ierr; 177 int i,j,*aj = B->j,ec = 0,*garray; 178 int bs = baij->bs,*stmp; 179 IS from,to; 180 Vec gvec; 181 #if defined (PETSC_USE_CTABLE) 182 PetscTable gid1_lid1; 183 PetscTablePosition tpos; 184 int gid,lid; 185 #else 186 int Nbs = baij->Nbs,*indices; 187 #endif 188 189 PetscFunctionBegin; 190 191 #if defined (PETSC_USE_CTABLE) 192 /* use a table - Mark Adams */ 193 PetscTableCreate(B->mbs,&gid1_lid1); 194 for (i=0; i<B->mbs; i++) { 195 for (j=0; j<B->ilen[i]; j++) { 196 int data,gid1 = aj[B->i[i]+j] + 1; 197 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 198 if (!data) { 199 /* one based table */ 200 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 201 } 202 } 203 } 204 /* form array of columns we need */ 205 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 206 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 207 while (tpos) { 208 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 209 gid--; lid--; 210 garray[lid] = gid; 211 } 212 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 213 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 214 for (i=0; i<ec; i++) { 215 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 216 } 217 /* compact out the extra columns in B */ 218 for (i=0; i<B->mbs; i++) { 219 for (j=0; j<B->ilen[i]; j++) { 220 int gid1 = aj[B->i[i] + j] + 1; 221 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 222 lid --; 223 aj[B->i[i]+j] = lid; 224 } 225 } 226 B->nbs = ec; 227 baij->B->n = ec*B->bs; 228 ierr = PetscTableDelete(gid1_lid1);CHKERRQ(ierr); 229 /* Mark Adams */ 230 #else 231 /* For the first stab we make an array as long as the number of columns */ 232 /* mark those columns that are in baij->B */ 233 ierr = PetscMalloc((Nbs+1)*sizeof(int),&indices);CHKERRQ(ierr); 234 ierr = PetscMemzero(indices,Nbs*sizeof(int));CHKERRQ(ierr); 235 for (i=0; i<B->mbs; i++) { 236 for (j=0; j<B->ilen[i]; j++) { 237 if (!indices[aj[B->i[i] + j]]) ec++; 238 indices[aj[B->i[i] + j] ] = 1; 239 } 240 } 241 242 /* form array of columns we need */ 243 ierr = PetscMalloc((ec+1)*sizeof(int),&garray);CHKERRQ(ierr); 244 ec = 0; 245 for (i=0; i<Nbs; i++) { 246 if (indices[i]) { 247 garray[ec++] = i; 248 } 249 } 250 251 /* make indices now point into garray */ 252 for (i=0; i<ec; i++) { 253 indices[garray[i]] = i; 254 } 255 256 /* compact out the extra columns in B */ 257 for (i=0; i<B->mbs; i++) { 258 for (j=0; j<B->ilen[i]; j++) { 259 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 260 } 261 } 262 B->nbs = ec; 263 baij->B->n = ec*B->bs; 264 ierr = PetscFree(indices);CHKERRQ(ierr); 265 #endif 266 267 /* create local vector that is used to scatter into */ 268 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 269 270 /* create two temporary index sets for building scatter-gather */ 271 for (i=0; i<ec; i++) { 272 garray[i] = bs*garray[i]; 273 } 274 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 275 for (i=0; i<ec; i++) { 276 garray[i] = garray[i]/bs; 277 } 278 279 ierr = PetscMalloc((ec+1)*sizeof(int),&stmp);CHKERRQ(ierr); 280 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 281 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 282 ierr = PetscFree(stmp);CHKERRQ(ierr); 283 284 /* create temporary global vector to generate scatter context */ 285 /* this is inefficient, but otherwise we must do either 286 1) save garray until the first actual scatter when the vector is known or 287 2) have another way of generating a scatter context without a vector.*/ 288 ierr = VecCreateMPI(mat->comm,mat->n,mat->N,&gvec);CHKERRQ(ierr); 289 290 /* gnerate the scatter context */ 291 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 292 293 /* 294 Post the receives for the first matrix vector product. We sync-chronize after 295 this on the chance that the user immediately calls MatMult() after assemblying 296 the matrix. 297 */ 298 ierr = VecScatterPostRecvs(gvec,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr); 299 ierr = MPI_Barrier(mat->comm);CHKERRQ(ierr); 300 301 PetscLogObjectParent(mat,baij->Mvctx); 302 PetscLogObjectParent(mat,baij->lvec); 303 PetscLogObjectParent(mat,from); 304 PetscLogObjectParent(mat,to); 305 baij->garray = garray; 306 PetscLogObjectMemory(mat,(ec+1)*sizeof(int)); 307 ierr = ISDestroy(from);CHKERRQ(ierr); 308 ierr = ISDestroy(to);CHKERRQ(ierr); 309 ierr = VecDestroy(gvec);CHKERRQ(ierr); 310 PetscFunctionReturn(0); 311 } 312 313 314 /* 315 Takes the local part of an already assembled MPIBAIJ matrix 316 and disassembles it. This is to allow new nonzeros into the matrix 317 that require more communication in the matrix vector multiply. 318 Thus certain data-structures must be rebuilt. 319 320 Kind of slow! But that's what application programmers get when 321 they are sloppy. 322 */ 323 #undef __FUNCT__ 324 #define __FUNCT__ "DisAssemble_MPISBAIJ" 325 PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 326 { 327 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 328 Mat B = baij->B,Bnew; 329 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 330 PetscErrorCode ierr; 331 int i,j,mbs=Bbaij->mbs,n = A->N,col,*garray=baij->garray; 332 int k,bs=baij->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->m; 333 MatScalar *a = Bbaij->a; 334 PetscScalar *atmp; 335 #if defined(PETSC_USE_MAT_SINGLE) 336 int l; 337 #endif 338 339 PetscFunctionBegin; 340 #if defined(PETSC_USE_MAT_SINGLE) 341 ierr = PetscMalloc(baij->bs*sizeof(PetscScalar),&atmp); 342 #endif 343 /* free stuff related to matrix-vec multiply */ 344 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 345 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 346 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 347 if (baij->colmap) { 348 #if defined (PETSC_USE_CTABLE) 349 ierr = PetscTableDelete(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 350 #else 351 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 352 baij->colmap = 0; 353 PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(int)); 354 #endif 355 } 356 357 /* make sure that B is assembled so we can access its values */ 358 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 360 361 /* invent new B and copy stuff over */ 362 ierr = PetscMalloc(mbs*sizeof(int),&nz);CHKERRQ(ierr); 363 for (i=0; i<mbs; i++) { 364 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 365 } 366 ierr = MatCreate(PETSC_COMM_SELF,m,n,m,n,&Bnew);CHKERRQ(ierr); 367 ierr = MatSetType(Bnew,B->type_name);CHKERRQ(ierr); 368 ierr = MatSeqBAIJSetPreallocation(Bnew,baij->bs,0,nz);CHKERRQ(ierr); 369 ierr = PetscFree(nz);CHKERRQ(ierr); 370 371 ierr = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr); 372 for (i=0; i<mbs; i++) { 373 rvals[0] = bs*i; 374 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 375 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 376 col = garray[Bbaij->j[j]]*bs; 377 for (k=0; k<bs; k++) { 378 #if defined(PETSC_USE_MAT_SINGLE) 379 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 380 #else 381 atmp = a+j*bs2 + k*bs; 382 #endif 383 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 384 col++; 385 } 386 } 387 } 388 #if defined(PETSC_USE_MAT_SINGLE) 389 ierr = PetscFree(atmp);CHKERRQ(ierr); 390 #endif 391 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 392 baij->garray = 0; 393 ierr = PetscFree(rvals);CHKERRQ(ierr); 394 PetscLogObjectMemory(A,-ec*sizeof(int)); 395 ierr = MatDestroy(B);CHKERRQ(ierr); 396 PetscLogObjectParent(A,Bnew); 397 baij->B = Bnew; 398 A->was_assembled = PETSC_FALSE; 399 PetscFunctionReturn(0); 400 } 401 402 403 404