1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9 { 10 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 11 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 12 PetscErrorCode ierr; 13 PetscInt i,j,*aj = B->j,ec = 0,*garray; 14 PetscInt bs = mat->rmap->bs,*stmp; 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt Nbs = baij->Nbs,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 #if defined(PETSC_USE_CTABLE) 27 /* use a table - Mark Adams */ 28 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 29 for (i=0; i<B->mbs; i++) { 30 for (j=0; j<B->ilen[i]; j++) { 31 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 32 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 33 if (!data) { 34 /* one based table */ 35 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 36 } 37 } 38 } 39 /* form array of columns we need */ 40 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 41 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 42 while (tpos) { 43 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 44 gid--; lid--; 45 garray[lid] = gid; 46 } 47 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 48 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 49 for (i=0; i<ec; i++) { 50 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 51 } 52 /* compact out the extra columns in B */ 53 for (i=0; i<B->mbs; i++) { 54 for (j=0; j<B->ilen[i]; j++) { 55 PetscInt gid1 = aj[B->i[i] + j] + 1; 56 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 57 lid--; 58 aj[B->i[i]+j] = lid; 59 } 60 } 61 B->nbs = ec; 62 ierr = PetscLayoutDestroy(&baij->B->cmap);CHKERRQ(ierr); 63 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap);CHKERRQ(ierr); 64 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 65 #else 66 /* Make an array as long as the number of columns */ 67 /* mark those columns that are in baij->B */ 68 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 69 for (i=0; i<B->mbs; i++) { 70 for (j=0; j<B->ilen[i]; j++) { 71 if (!indices[aj[B->i[i] + j]]) ec++; 72 indices[aj[B->i[i] + j]] = 1; 73 } 74 } 75 76 /* form array of columns we need */ 77 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 78 ec = 0; 79 for (i=0; i<Nbs; i++) { 80 if (indices[i]) { 81 garray[ec++] = i; 82 } 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<B->mbs; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 B->nbs = ec; 97 ierr = PetscLayoutDestroy(&baij->B->cmap);CHKERRQ(ierr); 98 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap);CHKERRQ(ierr); 99 ierr = PetscFree(indices);CHKERRQ(ierr); 100 #endif 101 102 /* create local vector that is used to scatter into */ 103 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 104 105 /* create two temporary index sets for building scatter-gather */ 106 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 107 108 ierr = PetscMalloc1(ec,&stmp);CHKERRQ(ierr); 109 for (i=0; i<ec; i++) stmp[i] = i; 110 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 111 112 /* create temporary global vector to generate scatter context */ 113 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 114 115 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 116 ierr = VecScatterViewFromOptions(baij->Mvctx,(PetscObject)mat,"-matmult_vecscatter_view");CHKERRQ(ierr); 117 118 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 119 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 120 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 121 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 122 123 baij->garray = garray; 124 125 ierr = PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt));CHKERRQ(ierr); 126 ierr = ISDestroy(&from);CHKERRQ(ierr); 127 ierr = ISDestroy(&to);CHKERRQ(ierr); 128 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 129 PetscFunctionReturn(0); 130 } 131 132 /* 133 Takes the local part of an already assembled MPIBAIJ matrix 134 and disassembles it. This is to allow new nonzeros into the matrix 135 that require more communication in the matrix vector multiply. 136 Thus certain data-structures must be rebuilt. 137 138 Kind of slow! But that's what application programmers get when 139 they are sloppy. 140 */ 141 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 142 { 143 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 144 Mat B = baij->B,Bnew; 145 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 146 PetscErrorCode ierr; 147 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 148 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 149 MatScalar *a = Bbaij->a; 150 MatScalar *atmp; 151 152 PetscFunctionBegin; 153 /* free stuff related to matrix-vec multiply */ 154 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 155 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = NULL; 156 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = NULL; 157 if (baij->colmap) { 158 #if defined(PETSC_USE_CTABLE) 159 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 160 #else 161 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 162 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 163 #endif 164 } 165 166 /* make sure that B is assembled so we can access its values */ 167 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 168 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 169 170 /* invent new B and copy stuff over */ 171 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 172 for (i=0; i<mbs; i++) { 173 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 174 } 175 ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr); 176 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 177 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 178 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 179 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 180 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; 181 } 182 183 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 184 /* 185 Ensure that B's nonzerostate is monotonically increasing. 186 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 187 */ 188 Bnew->nonzerostate = B->nonzerostate; 189 190 for (i=0; i<mbs; i++) { 191 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 192 col = garray[Bbaij->j[j]]; 193 atmp = a + j*bs2; 194 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 195 } 196 } 197 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 198 199 ierr = PetscFree(nz);CHKERRQ(ierr); 200 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 201 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 202 ierr = MatDestroy(&B);CHKERRQ(ierr); 203 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 204 205 baij->B = Bnew; 206 A->was_assembled = PETSC_FALSE; 207 A->assembled = PETSC_FALSE; 208 PetscFunctionReturn(0); 209 } 210 211 /* ugly stuff added for Glenn someday we should fix this up */ 212 213 static PetscInt *uglyrmapd = NULL,*uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 214 static Vec uglydd = NULL,uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 215 216 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 217 { 218 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 219 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 220 PetscErrorCode ierr; 221 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 222 PetscInt *r_rmapd,*r_rmapo; 223 224 PetscFunctionBegin; 225 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 226 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 227 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 228 nt = 0; 229 for (i=0; i<inA->rmap->mapping->n; i++) { 230 if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) { 231 nt++; 232 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 233 } 234 } 235 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 236 ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr); 237 for (i=0; i<inA->rmap->mapping->n; i++) { 238 if (r_rmapd[i]) { 239 for (j=0; j<bs; j++) { 240 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 241 } 242 } 243 } 244 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 245 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 246 247 ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr); 248 for (i=0; i<B->nbs; i++) { 249 lindices[garray[i]] = i+1; 250 } 251 no = inA->rmap->mapping->n - nt; 252 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 253 nt = 0; 254 for (i=0; i<inA->rmap->mapping->n; i++) { 255 if (lindices[inA->rmap->mapping->indices[i]]) { 256 nt++; 257 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 258 } 259 } 260 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 261 ierr = PetscFree(lindices);CHKERRQ(ierr); 262 ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr); 263 for (i=0; i<inA->rmap->mapping->n; i++) { 264 if (r_rmapo[i]) { 265 for (j=0; j<bs; j++) { 266 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 267 } 268 } 269 } 270 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 271 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 276 { 277 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 278 PetscErrorCode ierr; 279 280 PetscFunctionBegin; 281 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 282 PetscFunctionReturn(0); 283 } 284 285 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 286 { 287 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 288 PetscErrorCode ierr; 289 PetscInt n,i; 290 PetscScalar *d,*o; 291 const PetscScalar *s; 292 293 PetscFunctionBegin; 294 if (!uglyrmapd) { 295 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 296 } 297 298 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 299 300 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 301 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 302 for (i=0; i<n; i++) { 303 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 304 } 305 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 306 /* column scale "diagonal" portion of local matrix */ 307 ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr); 308 309 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 310 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 311 for (i=0; i<n; i++) { 312 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 313 } 314 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 315 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 316 /* column scale "off-diagonal" portion of local matrix */ 317 ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320