1 #define PETSCMAT_DLL 2 3 /* 4 Support for the parallel SBAIJ matrix vector multiply 5 */ 6 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h" 7 8 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 9 10 11 #undef __FUNCT__ 12 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 13 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 14 { 15 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 16 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 17 PetscErrorCode ierr; 18 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 19 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 20 IS from,to; 21 Vec gvec; 22 PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; 23 PetscInt *owners=sbaij->rangebs,*sowners,*ec_owner,k; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 if (sbaij->sMvctx) { 28 /* This two lines should be in DisAssemble_MPISBAIJ(). Don't know why it causes crash there? */ 29 ierr = VecScatterDestroy(sbaij->sMvctx);CHKERRQ(ierr); 30 sbaij->sMvctx = 0; 31 } 32 33 /* For the first stab we make an array as long as the number of columns */ 34 /* mark those columns that are in sbaij->B */ 35 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 36 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 37 for (i=0; i<mbs; i++) { 38 for (j=0; j<B->ilen[i]; j++) { 39 if (!indices[aj[B->i[i] + j]]) ec++; 40 indices[aj[B->i[i] + j] ] = 1; 41 } 42 } 43 44 /* form arrays of columns we need */ 45 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 46 ierr = PetscMalloc((3*ec+1)*sizeof(PetscInt),&sgarray);CHKERRQ(ierr); 47 ec_owner = sgarray + 2*ec; 48 49 ec = 0; 50 for (j=0; j<size; j++){ 51 for (i=owners[j]; i<owners[j+1]; i++){ 52 if (indices[i]) { 53 garray[ec] = i; 54 ec_owner[ec] = j; 55 ec++; 56 } 57 } 58 } 59 60 /* make indices now point into garray */ 61 for (i=0; i<ec; i++) indices[garray[i]] = i; 62 63 /* compact out the extra columns in B */ 64 for (i=0; i<mbs; i++) { 65 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 66 } 67 B->nbs = ec; 68 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 69 ierr = PetscMapSetUp((sbaij->B->cmap));CHKERRQ(ierr); 70 ierr = PetscFree(indices);CHKERRQ(ierr); 71 72 /* create local vector that is used to scatter into */ 73 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 74 75 /* create two temporary index sets for building scatter-gather */ 76 ierr = PetscMalloc((2*ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 77 for (i=0; i<ec; i++) stmp[i] = bs*garray[i]; 78 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&from);CHKERRQ(ierr); 79 80 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 81 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 82 83 /* generate the scatter context 84 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 85 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 86 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 87 ierr = VecDestroy(gvec);CHKERRQ(ierr); 88 89 sbaij->garray = garray; 90 ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 91 ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 92 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 93 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 94 95 ierr = ISDestroy(from);CHKERRQ(ierr); 96 ierr = ISDestroy(to);CHKERRQ(ierr); 97 98 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 99 lsize = (mbs + ec)*bs; 100 ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 101 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 102 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 103 104 sowners = sbaij->slvec0->map->range; 105 106 /* x index in the IS sfrom */ 107 for (i=0; i<ec; i++) { 108 j = ec_owner[i]; 109 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 110 } 111 /* b index in the IS sfrom */ 112 k = sowners[rank]/bs + mbs; 113 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 114 115 for (i=0; i<2*ec; i++) stmp[i] = bs*sgarray[i]; 116 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&from);CHKERRQ(ierr); 117 118 /* x index in the IS sto */ 119 k = sowners[rank]/bs + mbs; 120 for (i=0; i<ec; i++) stmp[i] = bs*(k + i); 121 /* b index in the IS sto */ 122 for (i=ec; i<2*ec; i++) stmp[i] = bs*sgarray[i-ec]; 123 124 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,&to);CHKERRQ(ierr); 125 126 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 127 128 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 129 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 130 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 131 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 132 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 133 134 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 135 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 136 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 137 138 ierr = PetscFree(stmp);CHKERRQ(ierr); 139 ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr); 140 141 ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 142 ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 143 ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 144 ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 145 ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 146 ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 147 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 148 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 149 150 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 151 ierr = ISDestroy(from);CHKERRQ(ierr); 152 ierr = ISDestroy(to);CHKERRQ(ierr); 153 ierr = PetscFree(sgarray);CHKERRQ(ierr); 154 PetscFunctionReturn(0); 155 } 156 157 #undef __FUNCT__ 158 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 159 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 160 { 161 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 162 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 163 PetscErrorCode ierr; 164 PetscInt i,j,*aj = B->j,ec = 0,*garray; 165 PetscInt bs = mat->rmap->bs,*stmp; 166 IS from,to; 167 Vec gvec; 168 #if defined (PETSC_USE_CTABLE) 169 PetscTable gid1_lid1; 170 PetscTablePosition tpos; 171 PetscInt gid,lid; 172 #else 173 PetscInt Nbs = baij->Nbs,*indices; 174 #endif 175 176 PetscFunctionBegin; 177 #if defined (PETSC_USE_CTABLE) 178 /* use a table - Mark Adams */ 179 PetscTableCreate(B->mbs,&gid1_lid1); 180 for (i=0; i<B->mbs; i++) { 181 for (j=0; j<B->ilen[i]; j++) { 182 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 183 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 184 if (!data) { 185 /* one based table */ 186 ierr = PetscTableAdd(gid1_lid1,gid1,++ec);CHKERRQ(ierr); 187 } 188 } 189 } 190 /* form array of columns we need */ 191 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 192 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 193 while (tpos) { 194 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 195 gid--; lid--; 196 garray[lid] = gid; 197 } 198 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 199 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 200 for (i=0; i<ec; i++) { 201 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1);CHKERRQ(ierr); 202 } 203 /* compact out the extra columns in B */ 204 for (i=0; i<B->mbs; i++) { 205 for (j=0; j<B->ilen[i]; j++) { 206 PetscInt gid1 = aj[B->i[i] + j] + 1; 207 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 208 lid --; 209 aj[B->i[i]+j] = lid; 210 } 211 } 212 B->nbs = ec; 213 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 214 ierr = PetscMapSetUp((baij->B->cmap));CHKERRQ(ierr); 215 ierr = PetscTableDestroy(gid1_lid1);CHKERRQ(ierr); 216 /* Mark Adams */ 217 #else 218 /* For the first stab we make an array as long as the number of columns */ 219 /* mark those columns that are in baij->B */ 220 ierr = PetscMalloc((Nbs+1)*sizeof(PetscInt),&indices);CHKERRQ(ierr); 221 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 222 for (i=0; i<B->mbs; i++) { 223 for (j=0; j<B->ilen[i]; j++) { 224 if (!indices[aj[B->i[i] + j]]) ec++; 225 indices[aj[B->i[i] + j] ] = 1; 226 } 227 } 228 229 /* form array of columns we need */ 230 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&garray);CHKERRQ(ierr); 231 ec = 0; 232 for (i=0; i<Nbs; i++) { 233 if (indices[i]) { 234 garray[ec++] = i; 235 } 236 } 237 238 /* make indices now point into garray */ 239 for (i=0; i<ec; i++) { 240 indices[garray[i]] = i; 241 } 242 243 /* compact out the extra columns in B */ 244 for (i=0; i<B->mbs; i++) { 245 for (j=0; j<B->ilen[i]; j++) { 246 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 247 } 248 } 249 B->nbs = ec; 250 baij->B->cmap->n = ec*mat->rmap->bs; 251 ierr = PetscFree(indices);CHKERRQ(ierr); 252 #endif 253 254 /* create local vector that is used to scatter into */ 255 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 256 257 /* create two temporary index sets for building scatter-gather */ 258 for (i=0; i<ec; i++) { 259 garray[i] = bs*garray[i]; 260 } 261 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,&from);CHKERRQ(ierr); 262 for (i=0; i<ec; i++) { 263 garray[i] = garray[i]/bs; 264 } 265 266 ierr = PetscMalloc((ec+1)*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 267 for (i=0; i<ec; i++) { stmp[i] = bs*i; } 268 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,&to);CHKERRQ(ierr); 269 ierr = PetscFree(stmp);CHKERRQ(ierr); 270 271 /* create temporary global vector to generate scatter context */ 272 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 273 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 274 ierr = VecDestroy(gvec);CHKERRQ(ierr); 275 276 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 277 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 278 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 279 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 280 baij->garray = garray; 281 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 282 ierr = ISDestroy(from);CHKERRQ(ierr); 283 ierr = ISDestroy(to);CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 /* 288 Takes the local part of an already assembled MPISBAIJ matrix 289 and disassembles it. This is to allow new nonzeros into the matrix 290 that require more communication in the matrix vector multiply. 291 Thus certain data-structures must be rebuilt. 292 293 Kind of slow! But that's what application programmers get when 294 they are sloppy. 295 */ 296 #undef __FUNCT__ 297 #define __FUNCT__ "DisAssemble_MPISBAIJ" 298 PetscErrorCode DisAssemble_MPISBAIJ(Mat A) 299 { 300 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 301 Mat B = baij->B,Bnew; 302 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 303 PetscErrorCode ierr; 304 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 305 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 306 MatScalar *a = Bbaij->a; 307 PetscScalar *atmp; 308 #if defined(PETSC_USE_MAT_SINGLE) 309 PetscInt l; 310 #endif 311 312 PetscFunctionBegin; 313 #if defined(PETSC_USE_MAT_SINGLE) 314 ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp); 315 #endif 316 /* free stuff related to matrix-vec multiply */ 317 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 318 ierr = VecDestroy(baij->lvec);CHKERRQ(ierr); 319 baij->lvec = 0; 320 ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr); 321 baij->Mvctx = 0; 322 323 ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr); 324 ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr); 325 baij->slvec0 = 0; 326 ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr); 327 ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr); 328 ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr); 329 baij->slvec1 = 0; 330 331 if (baij->colmap) { 332 #if defined (PETSC_USE_CTABLE) 333 ierr = PetscTableDestroy(baij->colmap); baij->colmap = 0;CHKERRQ(ierr); 334 #else 335 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 336 baij->colmap = 0; 337 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 338 #endif 339 } 340 341 /* make sure that B is assembled so we can access its values */ 342 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 343 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 344 345 /* invent new B and copy stuff over */ 346 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 347 for (i=0; i<mbs; i++) { 348 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 349 } 350 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 351 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 352 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 353 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 354 ierr = PetscFree(nz);CHKERRQ(ierr); 355 356 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 357 for (i=0; i<mbs; i++) { 358 rvals[0] = bs*i; 359 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 360 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 361 col = garray[Bbaij->j[j]]*bs; 362 for (k=0; k<bs; k++) { 363 #if defined(PETSC_USE_MAT_SINGLE) 364 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 365 #else 366 atmp = a+j*bs2 + k*bs; 367 #endif 368 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 369 col++; 370 } 371 } 372 } 373 #if defined(PETSC_USE_MAT_SINGLE) 374 ierr = PetscFree(atmp);CHKERRQ(ierr); 375 #endif 376 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 377 baij->garray = 0; 378 ierr = PetscFree(rvals);CHKERRQ(ierr); 379 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 380 ierr = MatDestroy(B);CHKERRQ(ierr); 381 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 382 baij->B = Bnew; 383 A->was_assembled = PETSC_FALSE; 384 PetscFunctionReturn(0); 385 } 386 387 388 389