1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9 { 10 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 11 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 12 PetscErrorCode ierr; 13 PetscInt i,j,*aj = B->j,ec = 0,*garray; 14 PetscInt bs = mat->rmap->bs,*stmp; 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt Nbs = baij->Nbs,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 #if defined(PETSC_USE_CTABLE) 27 /* use a table - Mark Adams */ 28 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 29 for (i=0; i<B->mbs; i++) { 30 for (j=0; j<B->ilen[i]; j++) { 31 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 32 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 33 if (!data) { 34 /* one based table */ 35 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 36 } 37 } 38 } 39 /* form array of columns we need */ 40 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 41 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 42 while (tpos) { 43 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 44 gid--; lid--; 45 garray[lid] = gid; 46 } 47 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 48 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 49 for (i=0; i<ec; i++) { 50 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 51 } 52 /* compact out the extra columns in B */ 53 for (i=0; i<B->mbs; i++) { 54 for (j=0; j<B->ilen[i]; j++) { 55 PetscInt gid1 = aj[B->i[i] + j] + 1; 56 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 57 lid--; 58 aj[B->i[i]+j] = lid; 59 } 60 } 61 B->nbs = ec; 62 ierr = PetscLayoutDestroy(&baij->B->cmap);CHKERRQ(ierr); 63 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap);CHKERRQ(ierr); 64 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 65 #else 66 /* Make an array as long as the number of columns */ 67 /* mark those columns that are in baij->B */ 68 ierr = PetscCalloc1(Nbs,&indices);CHKERRQ(ierr); 69 for (i=0; i<B->mbs; i++) { 70 for (j=0; j<B->ilen[i]; j++) { 71 if (!indices[aj[B->i[i] + j]]) ec++; 72 indices[aj[B->i[i] + j]] = 1; 73 } 74 } 75 76 /* form array of columns we need */ 77 ierr = PetscMalloc1(ec,&garray);CHKERRQ(ierr); 78 ec = 0; 79 for (i=0; i<Nbs; i++) { 80 if (indices[i]) { 81 garray[ec++] = i; 82 } 83 } 84 85 /* make indices now point into garray */ 86 for (i=0; i<ec; i++) { 87 indices[garray[i]] = i; 88 } 89 90 /* compact out the extra columns in B */ 91 for (i=0; i<B->mbs; i++) { 92 for (j=0; j<B->ilen[i]; j++) { 93 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 94 } 95 } 96 B->nbs = ec; 97 ierr = PetscLayoutDestroy(&baij->B->cmap);CHKERRQ(ierr); 98 ierr = PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B),ec*mat->rmap->bs,ec*mat->rmap->bs,mat->rmap->bs,&baij->B->cmap);CHKERRQ(ierr); 99 ierr = PetscFree(indices);CHKERRQ(ierr); 100 #endif 101 102 /* create local vector that is used to scatter into */ 103 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 104 105 /* create two temporary index sets for building scatter-gather */ 106 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 107 108 ierr = PetscMalloc1(ec,&stmp);CHKERRQ(ierr); 109 for (i=0; i<ec; i++) stmp[i] = i; 110 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 111 112 /* create temporary global vector to generate scatter context */ 113 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 114 115 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 116 117 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 118 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 119 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 120 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 121 122 baij->garray = garray; 123 124 ierr = PetscLogObjectMemory((PetscObject)mat,ec*sizeof(PetscInt));CHKERRQ(ierr); 125 ierr = ISDestroy(&from);CHKERRQ(ierr); 126 ierr = ISDestroy(&to);CHKERRQ(ierr); 127 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 128 PetscFunctionReturn(0); 129 } 130 131 /* 132 Takes the local part of an already assembled MPIBAIJ matrix 133 and disassembles it. This is to allow new nonzeros into the matrix 134 that require more communication in the matrix vector multiply. 135 Thus certain data-structures must be rebuilt. 136 137 Kind of slow! But that's what application programmers get when 138 they are sloppy. 139 */ 140 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 141 { 142 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 143 Mat B = baij->B,Bnew; 144 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 145 PetscErrorCode ierr; 146 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 147 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 148 MatScalar *a = Bbaij->a; 149 MatScalar *atmp; 150 151 PetscFunctionBegin; 152 /* free stuff related to matrix-vec multiply */ 153 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 154 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = NULL; 155 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = NULL; 156 if (baij->colmap) { 157 #if defined(PETSC_USE_CTABLE) 158 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 159 #else 160 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 161 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 162 #endif 163 } 164 165 /* make sure that B is assembled so we can access its values */ 166 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 167 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 168 169 /* invent new B and copy stuff over */ 170 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 171 for (i=0; i<mbs; i++) { 172 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 173 } 174 ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr); 175 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 176 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 177 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 178 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 179 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; 180 } 181 182 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 183 /* 184 Ensure that B's nonzerostate is monotonically increasing. 185 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 186 */ 187 Bnew->nonzerostate = B->nonzerostate; 188 189 for (i=0; i<mbs; i++) { 190 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 191 col = garray[Bbaij->j[j]]; 192 atmp = a + j*bs2; 193 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 194 } 195 } 196 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 197 198 ierr = PetscFree(nz);CHKERRQ(ierr); 199 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 200 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 201 ierr = MatDestroy(&B);CHKERRQ(ierr); 202 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 203 204 baij->B = Bnew; 205 A->was_assembled = PETSC_FALSE; 206 A->assembled = PETSC_FALSE; 207 PetscFunctionReturn(0); 208 } 209 210 /* ugly stuff added for Glenn someday we should fix this up */ 211 212 static PetscInt *uglyrmapd = NULL,*uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 213 static Vec uglydd = NULL,uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 214 215 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 216 { 217 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 218 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 219 PetscErrorCode ierr; 220 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 221 PetscInt *r_rmapd,*r_rmapo; 222 223 PetscFunctionBegin; 224 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 225 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 226 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 227 nt = 0; 228 for (i=0; i<inA->rmap->mapping->n; i++) { 229 if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) { 230 nt++; 231 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 232 } 233 } 234 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 235 ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr); 236 for (i=0; i<inA->rmap->mapping->n; i++) { 237 if (r_rmapd[i]) { 238 for (j=0; j<bs; j++) { 239 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 240 } 241 } 242 } 243 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 244 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 245 246 ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr); 247 for (i=0; i<B->nbs; i++) { 248 lindices[garray[i]] = i+1; 249 } 250 no = inA->rmap->mapping->n - nt; 251 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 252 nt = 0; 253 for (i=0; i<inA->rmap->mapping->n; i++) { 254 if (lindices[inA->rmap->mapping->indices[i]]) { 255 nt++; 256 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 257 } 258 } 259 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 260 ierr = PetscFree(lindices);CHKERRQ(ierr); 261 ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr); 262 for (i=0; i<inA->rmap->mapping->n; i++) { 263 if (r_rmapo[i]) { 264 for (j=0; j<bs; j++) { 265 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 266 } 267 } 268 } 269 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 270 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 271 PetscFunctionReturn(0); 272 } 273 274 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 275 { 276 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 277 PetscErrorCode ierr; 278 279 PetscFunctionBegin; 280 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 281 PetscFunctionReturn(0); 282 } 283 284 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 285 { 286 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 287 PetscErrorCode ierr; 288 PetscInt n,i; 289 PetscScalar *d,*o; 290 const PetscScalar *s; 291 292 PetscFunctionBegin; 293 if (!uglyrmapd) { 294 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 295 } 296 297 ierr = VecGetArrayRead(scale,&s);CHKERRQ(ierr); 298 299 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 300 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 301 for (i=0; i<n; i++) { 302 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 303 } 304 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 305 /* column scale "diagonal" portion of local matrix */ 306 ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr); 307 308 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 309 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 310 for (i=0; i<n; i++) { 311 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 312 } 313 ierr = VecRestoreArrayRead(scale,&s);CHKERRQ(ierr); 314 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 315 /* column scale "off-diagonal" portion of local matrix */ 316 ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319