1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 8 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 PetscErrorCode ierr; 17 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 18 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 19 IS from,to; 20 Vec gvec; 21 PetscMPIInt rank =sbaij->rank,lsize,size=sbaij->size; 22 PetscInt *owners=sbaij->rangebs,*ec_owner,k; 23 const PetscInt *sowners; 24 PetscScalar *ptr; 25 26 PetscFunctionBegin; 27 ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); 28 29 /* For the first stab we make an array as long as the number of columns */ 30 /* mark those columns that are in sbaij->B */ 31 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 32 for (i=0; i<mbs; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 if (!indices[aj[B->i[i] + j]]) ec++; 35 indices[aj[B->i[i] + j]] = 1; 36 } 37 } 38 39 /* form arrays of columns we need */ 40 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 41 ierr = PetscMalloc2(2*ec,&sgarray,ec,&ec_owner);CHKERRQ(ierr); 42 43 ec = 0; 44 for (j=0; j<size; j++) { 45 for (i=owners[j]; i<owners[j+1]; i++) { 46 if (indices[i]) { 47 garray[ec] = i; 48 ec_owner[ec] = j; 49 ec++; 50 } 51 } 52 } 53 54 /* make indices now point into garray */ 55 for (i=0; i<ec; i++) indices[garray[i]] = i; 56 57 /* compact out the extra columns in B */ 58 for (i=0; i<mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 60 } 61 B->nbs = ec; 62 63 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 64 65 ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 66 ierr = PetscFree(indices);CHKERRQ(ierr); 67 68 /* create local vector that is used to scatter into */ 69 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 70 71 /* create two temporary index sets for building scatter-gather */ 72 ierr = PetscMalloc1(2*ec,&stmp);CHKERRQ(ierr); 73 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 74 for (i=0; i<ec; i++) stmp[i] = i; 75 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 76 77 /* generate the scatter context 78 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 79 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 80 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 81 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 82 83 sbaij->garray = garray; 84 85 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->Mvctx);CHKERRQ(ierr); 86 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->lvec);CHKERRQ(ierr); 87 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 88 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 89 90 ierr = ISDestroy(&from);CHKERRQ(ierr); 91 ierr = ISDestroy(&to);CHKERRQ(ierr); 92 93 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 94 lsize = (mbs + ec)*bs; 95 ierr = VecCreateMPI(PetscObjectComm((PetscObject)mat),lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 96 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 97 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 98 99 ierr = VecGetOwnershipRanges(sbaij->slvec0,&sowners);CHKERRQ(ierr); 100 101 /* x index in the IS sfrom */ 102 for (i=0; i<ec; i++) { 103 j = ec_owner[i]; 104 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 105 } 106 /* b index in the IS sfrom */ 107 k = sowners[rank]/bs + mbs; 108 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 109 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 110 111 /* x index in the IS sto */ 112 k = sowners[rank]/bs + mbs; 113 for (i=0; i<ec; i++) stmp[i] = (k + i); 114 /* b index in the IS sto */ 115 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 116 117 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 118 119 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 120 121 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 122 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 123 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 124 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 125 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 126 127 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 128 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 129 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 130 131 ierr = PetscFree(stmp);CHKERRQ(ierr); 132 ierr = MPI_Barrier(PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 133 134 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->sMvctx);CHKERRQ(ierr); 135 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0);CHKERRQ(ierr); 136 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec0b);CHKERRQ(ierr); 138 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1a);CHKERRQ(ierr); 139 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)sbaij->slvec1b);CHKERRQ(ierr); 140 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 141 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 142 143 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 144 ierr = ISDestroy(&from);CHKERRQ(ierr); 145 ierr = ISDestroy(&to);CHKERRQ(ierr); 146 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 147 PetscFunctionReturn(0); 148 } 149 150 #undef __FUNCT__ 151 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 152 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 153 { 154 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 155 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 156 PetscErrorCode ierr; 157 PetscInt i,j,*aj = B->j,ec = 0,*garray; 158 PetscInt bs = mat->rmap->bs,*stmp; 159 IS from,to; 160 Vec gvec; 161 #if defined(PETSC_USE_CTABLE) 162 PetscTable gid1_lid1; 163 PetscTablePosition tpos; 164 PetscInt gid,lid; 165 #else 166 PetscInt Nbs = baij->Nbs,*indices; 167 #endif 168 169 PetscFunctionBegin; 170 #if defined(PETSC_USE_CTABLE) 171 /* use a table - Mark Adams */ 172 PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1); 173 for (i=0; i<B->mbs; i++) { 174 for (j=0; j<B->ilen[i]; j++) { 175 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 176 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 177 if (!data) { 178 /* one based table */ 179 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 180 } 181 } 182 } 183 /* form array of columns we need */ 184 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 185 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 186 while (tpos) { 187 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 188 gid--; lid--; 189 garray[lid] = gid; 190 } 191 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 192 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 193 for (i=0; i<ec; i++) { 194 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 195 } 196 /* compact out the extra columns in B */ 197 for (i=0; i<B->mbs; i++) { 198 for (j=0; j<B->ilen[i]; j++) { 199 PetscInt gid1 = aj[B->i[i] + j] + 1; 200 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 201 lid--; 202 aj[B->i[i]+j] = lid; 203 } 204 } 205 B->nbs = ec; 206 207 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 208 209 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 210 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 211 #else 212 /* For the first stab we make an array as long as the number of columns */ 213 /* mark those columns that are in baij->B */ 214 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 215 for (i=0; i<B->mbs; i++) { 216 for (j=0; j<B->ilen[i]; j++) { 217 if (!indices[aj[B->i[i] + j]]) ec++; 218 indices[aj[B->i[i] + j]] = 1; 219 } 220 } 221 222 /* form array of columns we need */ 223 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 224 ec = 0; 225 for (i=0; i<Nbs; i++) { 226 if (indices[i]) { 227 garray[ec++] = i; 228 } 229 } 230 231 /* make indices now point into garray */ 232 for (i=0; i<ec; i++) indices[garray[i]] = i; 233 234 /* compact out the extra columns in B */ 235 for (i=0; i<B->mbs; i++) { 236 for (j=0; j<B->ilen[i]; j++) { 237 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 238 } 239 } 240 B->nbs = ec; 241 242 baij->B->cmap->n = ec*mat->rmap->bs; 243 244 ierr = PetscFree(indices);CHKERRQ(ierr); 245 #endif 246 247 /* create local vector that is used to scatter into */ 248 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 249 250 /* create two temporary index sets for building scatter-gather */ 251 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 252 253 ierr = PetscMalloc1(ec,&stmp);CHKERRQ(ierr); 254 for (i=0; i<ec; i++) stmp[i] = i; 255 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 256 257 /* create temporary global vector to generate scatter context */ 258 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 259 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 260 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 261 262 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 263 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 264 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 265 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 266 267 baij->garray = garray; 268 269 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 270 ierr = ISDestroy(&from);CHKERRQ(ierr); 271 ierr = ISDestroy(&to);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 /* 276 Takes the local part of an already assembled MPISBAIJ matrix 277 and disassembles it. This is to allow new nonzeros into the matrix 278 that require more communication in the matrix vector multiply. 279 Thus certain data-structures must be rebuilt. 280 281 Kind of slow! But that's what application programmers get when 282 they are sloppy. 283 */ 284 #undef __FUNCT__ 285 #define __FUNCT__ "MatDisAssemble_MPISBAIJ" 286 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 287 { 288 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 289 Mat B = baij->B,Bnew; 290 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 291 PetscErrorCode ierr; 292 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 293 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 294 MatScalar *a = Bbaij->a; 295 PetscScalar *atmp; 296 #if defined(PETSC_USE_REAL_MAT_SINGLE) 297 PetscInt l; 298 #endif 299 300 PetscFunctionBegin; 301 #if defined(PETSC_USE_REAL_MAT_SINGLE) 302 ierr = PetscMalloc1(A->rmap->bs,&atmp); 303 #endif 304 /* free stuff related to matrix-vec multiply */ 305 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 306 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 307 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 308 309 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 310 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 311 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 312 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 313 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 314 315 if (baij->colmap) { 316 #if defined(PETSC_USE_CTABLE) 317 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 318 #else 319 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 320 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 321 #endif 322 } 323 324 /* make sure that B is assembled so we can access its values */ 325 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 326 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 327 328 /* invent new B and copy stuff over */ 329 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 330 for (i=0; i<mbs; i++) { 331 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 332 } 333 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 334 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 335 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 336 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 337 ierr = PetscFree(nz);CHKERRQ(ierr); 338 339 ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 340 /* 341 Ensure that B's nonzerostate is monotonically increasing. 342 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 343 */ 344 Bnew->nonzerostate = B->nonzerostate; 345 ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr); 346 for (i=0; i<mbs; i++) { 347 rvals[0] = bs*i; 348 for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1; 349 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 350 col = garray[Bbaij->j[j]]*bs; 351 for (k=0; k<bs; k++) { 352 #if defined(PETSC_USE_REAL_MAT_SINGLE) 353 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 354 #else 355 atmp = a+j*bs2 + k*bs; 356 #endif 357 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 358 col++; 359 } 360 } 361 } 362 #if defined(PETSC_USE_REAL_MAT_SINGLE) 363 ierr = PetscFree(atmp);CHKERRQ(ierr); 364 #endif 365 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 366 367 baij->garray = 0; 368 369 ierr = PetscFree(rvals);CHKERRQ(ierr); 370 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 371 ierr = MatDestroy(&B);CHKERRQ(ierr); 372 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 373 374 baij->B = Bnew; 375 376 A->was_assembled = PETSC_FALSE; 377 PetscFunctionReturn(0); 378 } 379