1 2 /* 3 Support for the parallel SBAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 6 7 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode); 8 9 10 #undef __FUNCT__ 11 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ" 12 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 13 { 14 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ*)mat->data; 15 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(sbaij->B->data); 16 PetscErrorCode ierr; 17 PetscInt Nbs = sbaij->Nbs,i,j,*indices,*aj = B->j,ec = 0,*garray,*sgarray; 18 PetscInt bs = mat->rmap->bs,*stmp,mbs=sbaij->mbs, vec_size,nt; 19 IS from,to; 20 Vec gvec; 21 PetscMPIInt rank=sbaij->rank,lsize,size=sbaij->size; 22 PetscInt *owners=sbaij->rangebs,*sowners,*ec_owner,k; 23 PetscScalar *ptr; 24 25 PetscFunctionBegin; 26 ierr = VecScatterDestroy(&sbaij->sMvctx);CHKERRQ(ierr); 27 28 /* For the first stab we make an array as long as the number of columns */ 29 /* mark those columns that are in sbaij->B */ 30 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 31 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 32 for (i=0; i<mbs; i++) { 33 for (j=0; j<B->ilen[i]; j++) { 34 if (!indices[aj[B->i[i] + j]]) ec++; 35 indices[aj[B->i[i] + j] ] = 1; 36 } 37 } 38 39 /* form arrays of columns we need */ 40 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 41 ierr = PetscMalloc2(2*ec,PetscInt,&sgarray,ec,PetscInt,&ec_owner);CHKERRQ(ierr); 42 43 ec = 0; 44 for (j=0; j<size; j++) { 45 for (i=owners[j]; i<owners[j+1]; i++) { 46 if (indices[i]) { 47 garray[ec] = i; 48 ec_owner[ec] = j; 49 ec++; 50 } 51 } 52 } 53 54 /* make indices now point into garray */ 55 for (i=0; i<ec; i++) indices[garray[i]] = i; 56 57 /* compact out the extra columns in B */ 58 for (i=0; i<mbs; i++) { 59 for (j=0; j<B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 60 } 61 B->nbs = ec; 62 sbaij->B->cmap->n = sbaij->B->cmap->N = ec*mat->rmap->bs; 63 ierr = PetscLayoutSetUp((sbaij->B->cmap));CHKERRQ(ierr); 64 ierr = PetscFree(indices);CHKERRQ(ierr); 65 66 /* create local vector that is used to scatter into */ 67 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&sbaij->lvec);CHKERRQ(ierr); 68 69 /* create two temporary index sets for building scatter-gather */ 70 ierr = PetscMalloc(2*ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 71 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 72 for (i=0; i<ec; i++) { stmp[i] = i; } 73 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 74 75 /* generate the scatter context 76 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but usefule for some applications */ 77 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 78 ierr = VecScatterCreate(gvec,from,sbaij->lvec,to,&sbaij->Mvctx);CHKERRQ(ierr); 79 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 80 81 sbaij->garray = garray; 82 ierr = PetscLogObjectParent(mat,sbaij->Mvctx);CHKERRQ(ierr); 83 ierr = PetscLogObjectParent(mat,sbaij->lvec);CHKERRQ(ierr); 84 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 85 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 86 87 ierr = ISDestroy(&from);CHKERRQ(ierr); 88 ierr = ISDestroy(&to);CHKERRQ(ierr); 89 90 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 91 lsize = (mbs + ec)*bs; 92 ierr = VecCreateMPI(((PetscObject)mat)->comm,lsize,PETSC_DETERMINE,&sbaij->slvec0);CHKERRQ(ierr); 93 ierr = VecDuplicate(sbaij->slvec0,&sbaij->slvec1);CHKERRQ(ierr); 94 ierr = VecGetSize(sbaij->slvec0,&vec_size);CHKERRQ(ierr); 95 96 sowners = sbaij->slvec0->map->range; 97 98 /* x index in the IS sfrom */ 99 for (i=0; i<ec; i++) { 100 j = ec_owner[i]; 101 sgarray[i] = garray[i] + (sowners[j]/bs - owners[j]); 102 } 103 /* b index in the IS sfrom */ 104 k = sowners[rank]/bs + mbs; 105 for (i=ec,j=0; i< 2*ec; i++,j++) sgarray[i] = k + j; 106 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,sgarray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 107 108 /* x index in the IS sto */ 109 k = sowners[rank]/bs + mbs; 110 for (i=0; i<ec; i++) stmp[i] = (k + i); 111 /* b index in the IS sto */ 112 for (i=ec; i<2*ec; i++) stmp[i] = sgarray[i-ec]; 113 114 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,2*ec,stmp,PETSC_COPY_VALUES,&to);CHKERRQ(ierr); 115 116 ierr = VecScatterCreate(sbaij->slvec0,from,sbaij->slvec1,to,&sbaij->sMvctx);CHKERRQ(ierr); 117 118 ierr = VecGetLocalSize(sbaij->slvec1,&nt);CHKERRQ(ierr); 119 ierr = VecGetArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 120 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,ptr,&sbaij->slvec1a);CHKERRQ(ierr); 121 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec1b);CHKERRQ(ierr); 122 ierr = VecRestoreArray(sbaij->slvec1,&ptr);CHKERRQ(ierr); 123 124 ierr = VecGetArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 125 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,ptr+bs*mbs,&sbaij->slvec0b);CHKERRQ(ierr); 126 ierr = VecRestoreArray(sbaij->slvec0,&ptr);CHKERRQ(ierr); 127 128 ierr = PetscFree(stmp);CHKERRQ(ierr); 129 ierr = MPI_Barrier(((PetscObject)mat)->comm);CHKERRQ(ierr); 130 131 ierr = PetscLogObjectParent(mat,sbaij->sMvctx);CHKERRQ(ierr); 132 ierr = PetscLogObjectParent(mat,sbaij->slvec0);CHKERRQ(ierr); 133 ierr = PetscLogObjectParent(mat,sbaij->slvec1);CHKERRQ(ierr); 134 ierr = PetscLogObjectParent(mat,sbaij->slvec0b);CHKERRQ(ierr); 135 ierr = PetscLogObjectParent(mat,sbaij->slvec1a);CHKERRQ(ierr); 136 ierr = PetscLogObjectParent(mat,sbaij->slvec1b);CHKERRQ(ierr); 137 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 138 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 139 140 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 141 ierr = ISDestroy(&from);CHKERRQ(ierr); 142 ierr = ISDestroy(&to);CHKERRQ(ierr); 143 ierr = PetscFree2(sgarray,ec_owner);CHKERRQ(ierr); 144 PetscFunctionReturn(0); 145 } 146 147 #undef __FUNCT__ 148 #define __FUNCT__ "MatSetUpMultiply_MPISBAIJ_2comm" 149 PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat mat) 150 { 151 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data; 152 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 153 PetscErrorCode ierr; 154 PetscInt i,j,*aj = B->j,ec = 0,*garray; 155 PetscInt bs = mat->rmap->bs,*stmp; 156 IS from,to; 157 Vec gvec; 158 #if defined (PETSC_USE_CTABLE) 159 PetscTable gid1_lid1; 160 PetscTablePosition tpos; 161 PetscInt gid,lid; 162 #else 163 PetscInt Nbs = baij->Nbs,*indices; 164 #endif 165 166 PetscFunctionBegin; 167 #if defined (PETSC_USE_CTABLE) 168 /* use a table - Mark Adams */ 169 PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1); 170 for (i=0; i<B->mbs; i++) { 171 for (j=0; j<B->ilen[i]; j++) { 172 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 173 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 174 if (!data) { 175 /* one based table */ 176 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 177 } 178 } 179 } 180 /* form array of columns we need */ 181 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 182 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 183 while (tpos) { 184 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 185 gid--; lid--; 186 garray[lid] = gid; 187 } 188 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 189 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 190 for (i=0; i<ec; i++) { 191 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 192 } 193 /* compact out the extra columns in B */ 194 for (i=0; i<B->mbs; i++) { 195 for (j=0; j<B->ilen[i]; j++) { 196 PetscInt gid1 = aj[B->i[i] + j] + 1; 197 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 198 lid --; 199 aj[B->i[i]+j] = lid; 200 } 201 } 202 B->nbs = ec; 203 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 204 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 205 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 206 #else 207 /* For the first stab we make an array as long as the number of columns */ 208 /* mark those columns that are in baij->B */ 209 ierr = PetscMalloc(Nbs*sizeof(PetscInt),&indices);CHKERRQ(ierr); 210 ierr = PetscMemzero(indices,Nbs*sizeof(PetscInt));CHKERRQ(ierr); 211 for (i=0; i<B->mbs; i++) { 212 for (j=0; j<B->ilen[i]; j++) { 213 if (!indices[aj[B->i[i] + j]]) ec++; 214 indices[aj[B->i[i] + j] ] = 1; 215 } 216 } 217 218 /* form array of columns we need */ 219 ierr = PetscMalloc(ec*sizeof(PetscInt),&garray);CHKERRQ(ierr); 220 ec = 0; 221 for (i=0; i<Nbs; i++) { 222 if (indices[i]) { 223 garray[ec++] = i; 224 } 225 } 226 227 /* make indices now point into garray */ 228 for (i=0; i<ec; i++) { 229 indices[garray[i]] = i; 230 } 231 232 /* compact out the extra columns in B */ 233 for (i=0; i<B->mbs; i++) { 234 for (j=0; j<B->ilen[i]; j++) { 235 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 236 } 237 } 238 B->nbs = ec; 239 baij->B->cmap->n = ec*mat->rmap->bs; 240 ierr = PetscFree(indices);CHKERRQ(ierr); 241 #endif 242 243 /* create local vector that is used to scatter into */ 244 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 245 246 /* create two temporary index sets for building scatter-gather */ 247 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 248 249 ierr = PetscMalloc(ec*sizeof(PetscInt),&stmp);CHKERRQ(ierr); 250 for (i=0; i<ec; i++) { stmp[i] = i; } 251 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 252 253 /* create temporary global vector to generate scatter context */ 254 ierr = VecCreateMPIWithArray(((PetscObject)mat)->comm,1,mat->cmap->n,mat->cmap->N,PETSC_NULL,&gvec);CHKERRQ(ierr); 255 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 256 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 257 258 ierr = PetscLogObjectParent(mat,baij->Mvctx);CHKERRQ(ierr); 259 ierr = PetscLogObjectParent(mat,baij->lvec);CHKERRQ(ierr); 260 ierr = PetscLogObjectParent(mat,from);CHKERRQ(ierr); 261 ierr = PetscLogObjectParent(mat,to);CHKERRQ(ierr); 262 baij->garray = garray; 263 ierr = PetscLogObjectMemory(mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 264 ierr = ISDestroy(&from);CHKERRQ(ierr); 265 ierr = ISDestroy(&to);CHKERRQ(ierr); 266 PetscFunctionReturn(0); 267 } 268 269 /* 270 Takes the local part of an already assembled MPISBAIJ matrix 271 and disassembles it. This is to allow new nonzeros into the matrix 272 that require more communication in the matrix vector multiply. 273 Thus certain data-structures must be rebuilt. 274 275 Kind of slow! But that's what application programmers get when 276 they are sloppy. 277 */ 278 #undef __FUNCT__ 279 #define __FUNCT__ "MatDisAssemble_MPISBAIJ" 280 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 281 { 282 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)A->data; 283 Mat B = baij->B,Bnew; 284 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 285 PetscErrorCode ierr; 286 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 287 PetscInt k,bs=A->rmap->bs,bs2=baij->bs2,*rvals,*nz,ec,m=A->rmap->n; 288 MatScalar *a = Bbaij->a; 289 PetscScalar *atmp; 290 #if defined(PETSC_USE_REAL_MAT_SINGLE) 291 PetscInt l; 292 #endif 293 294 PetscFunctionBegin; 295 #if defined(PETSC_USE_REAL_MAT_SINGLE) 296 ierr = PetscMalloc(A->rmap->bs*sizeof(PetscScalar),&atmp); 297 #endif 298 /* free stuff related to matrix-vec multiply */ 299 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 300 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); 301 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); 302 303 ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr); 304 ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr); 305 ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr); 306 ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr); 307 ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr); 308 309 if (baij->colmap) { 310 #if defined (PETSC_USE_CTABLE) 311 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 312 #else 313 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 314 ierr = PetscLogObjectMemory(A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 315 #endif 316 } 317 318 /* make sure that B is assembled so we can access its values */ 319 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 320 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 321 322 /* invent new B and copy stuff over */ 323 ierr = PetscMalloc(mbs*sizeof(PetscInt),&nz);CHKERRQ(ierr); 324 for (i=0; i<mbs; i++) { 325 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 326 } 327 ierr = MatCreate(PETSC_COMM_SELF,&Bnew);CHKERRQ(ierr); 328 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 329 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 330 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 331 ((Mat_SeqSBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 332 ierr = PetscFree(nz);CHKERRQ(ierr); 333 334 ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr); 335 for (i=0; i<mbs; i++) { 336 rvals[0] = bs*i; 337 for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; } 338 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 339 col = garray[Bbaij->j[j]]*bs; 340 for (k=0; k<bs; k++) { 341 #if defined(PETSC_USE_REAL_MAT_SINGLE) 342 for (l=0; l<bs; l++) atmp[l] = a[j*bs2+l]; 343 #else 344 atmp = a+j*bs2 + k*bs; 345 #endif 346 ierr = MatSetValues_SeqSBAIJ(Bnew,bs,rvals,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 347 col++; 348 } 349 } 350 } 351 #if defined(PETSC_USE_REAL_MAT_SINGLE) 352 ierr = PetscFree(atmp);CHKERRQ(ierr); 353 #endif 354 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 355 baij->garray = 0; 356 ierr = PetscFree(rvals);CHKERRQ(ierr); 357 ierr = PetscLogObjectMemory(A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 358 ierr = MatDestroy(&B);CHKERRQ(ierr); 359 ierr = PetscLogObjectParent(A,Bnew);CHKERRQ(ierr); 360 baij->B = Bnew; 361 A->was_assembled = PETSC_FALSE; 362 PetscFunctionReturn(0); 363 } 364 365 366 367