1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9 { 10 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)mat->data; 11 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)(baij->B->data); 12 PetscErrorCode ierr; 13 PetscInt i,j,*aj = B->j,ec = 0,*garray; 14 PetscInt bs = mat->rmap->bs,*stmp; 15 IS from,to; 16 Vec gvec; 17 #if defined(PETSC_USE_CTABLE) 18 PetscTable gid1_lid1; 19 PetscTablePosition tpos; 20 PetscInt gid,lid; 21 #else 22 PetscInt Nbs = baij->Nbs,*indices; 23 #endif 24 25 PetscFunctionBegin; 26 #if defined(PETSC_USE_CTABLE) 27 /* use a table - Mark Adams */ 28 ierr = PetscTableCreate(B->mbs,baij->Nbs+1,&gid1_lid1);CHKERRQ(ierr); 29 for (i=0; i<B->mbs; i++) { 30 for (j=0; j<B->ilen[i]; j++) { 31 PetscInt data,gid1 = aj[B->i[i]+j] + 1; 32 ierr = PetscTableFind(gid1_lid1,gid1,&data);CHKERRQ(ierr); 33 if (!data) { 34 /* one based table */ 35 ierr = PetscTableAdd(gid1_lid1,gid1,++ec,INSERT_VALUES);CHKERRQ(ierr); 36 } 37 } 38 } 39 /* form array of columns we need */ 40 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 41 ierr = PetscTableGetHeadPosition(gid1_lid1,&tpos);CHKERRQ(ierr); 42 while (tpos) { 43 ierr = PetscTableGetNext(gid1_lid1,&tpos,&gid,&lid);CHKERRQ(ierr); 44 gid--; lid--; 45 garray[lid] = gid; 46 } 47 ierr = PetscSortInt(ec,garray);CHKERRQ(ierr); 48 ierr = PetscTableRemoveAll(gid1_lid1);CHKERRQ(ierr); 49 for (i=0; i<ec; i++) { 50 ierr = PetscTableAdd(gid1_lid1,garray[i]+1,i+1,INSERT_VALUES);CHKERRQ(ierr); 51 } 52 /* compact out the extra columns in B */ 53 for (i=0; i<B->mbs; i++) { 54 for (j=0; j<B->ilen[i]; j++) { 55 PetscInt gid1 = aj[B->i[i] + j] + 1; 56 ierr = PetscTableFind(gid1_lid1,gid1,&lid);CHKERRQ(ierr); 57 lid--; 58 aj[B->i[i]+j] = lid; 59 } 60 } 61 B->nbs = ec; 62 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 63 64 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 65 ierr = PetscTableDestroy(&gid1_lid1);CHKERRQ(ierr); 66 #else 67 /* Make an array as long as the number of columns */ 68 /* mark those columns that are in baij->B */ 69 ierr = PetscCalloc1(Nbs+1,&indices);CHKERRQ(ierr); 70 for (i=0; i<B->mbs; i++) { 71 for (j=0; j<B->ilen[i]; j++) { 72 if (!indices[aj[B->i[i] + j]]) ec++; 73 indices[aj[B->i[i] + j]] = 1; 74 } 75 } 76 77 /* form array of columns we need */ 78 ierr = PetscMalloc1(ec+1,&garray);CHKERRQ(ierr); 79 ec = 0; 80 for (i=0; i<Nbs; i++) { 81 if (indices[i]) { 82 garray[ec++] = i; 83 } 84 } 85 86 /* make indices now point into garray */ 87 for (i=0; i<ec; i++) { 88 indices[garray[i]] = i; 89 } 90 91 /* compact out the extra columns in B */ 92 for (i=0; i<B->mbs; i++) { 93 for (j=0; j<B->ilen[i]; j++) { 94 aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 95 } 96 } 97 B->nbs = ec; 98 baij->B->cmap->n = baij->B->cmap->N = ec*mat->rmap->bs; 99 100 ierr = PetscLayoutSetUp((baij->B->cmap));CHKERRQ(ierr); 101 ierr = PetscFree(indices);CHKERRQ(ierr); 102 #endif 103 104 /* create local vector that is used to scatter into */ 105 ierr = VecCreateSeq(PETSC_COMM_SELF,ec*bs,&baij->lvec);CHKERRQ(ierr); 106 107 /* create two temporary index sets for building scatter-gather */ 108 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 109 110 ierr = PetscMalloc1(ec+1,&stmp);CHKERRQ(ierr); 111 for (i=0; i<ec; i++) stmp[i] = i; 112 ierr = ISCreateBlock(PETSC_COMM_SELF,bs,ec,stmp,PETSC_OWN_POINTER,&to);CHKERRQ(ierr); 113 114 /* create temporary global vector to generate scatter context */ 115 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),1,mat->cmap->n,mat->cmap->N,NULL,&gvec);CHKERRQ(ierr); 116 117 ierr = VecScatterCreate(gvec,from,baij->lvec,to,&baij->Mvctx);CHKERRQ(ierr); 118 119 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->Mvctx);CHKERRQ(ierr); 120 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)baij->lvec);CHKERRQ(ierr); 121 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)from);CHKERRQ(ierr); 122 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)to);CHKERRQ(ierr); 123 124 baij->garray = garray; 125 126 ierr = PetscLogObjectMemory((PetscObject)mat,(ec+1)*sizeof(PetscInt));CHKERRQ(ierr); 127 ierr = ISDestroy(&from);CHKERRQ(ierr); 128 ierr = ISDestroy(&to);CHKERRQ(ierr); 129 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 130 PetscFunctionReturn(0); 131 } 132 133 /* 134 Takes the local part of an already assembled MPIBAIJ matrix 135 and disassembles it. This is to allow new nonzeros into the matrix 136 that require more communication in the matrix vector multiply. 137 Thus certain data-structures must be rebuilt. 138 139 Kind of slow! But that's what application programmers get when 140 they are sloppy. 141 */ 142 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 143 { 144 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ*)A->data; 145 Mat B = baij->B,Bnew; 146 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ*)B->data; 147 PetscErrorCode ierr; 148 PetscInt i,j,mbs=Bbaij->mbs,n = A->cmap->N,col,*garray=baij->garray; 149 PetscInt bs2 = baij->bs2,*nz,ec,m = A->rmap->n; 150 MatScalar *a = Bbaij->a; 151 MatScalar *atmp; 152 153 154 PetscFunctionBegin; 155 /* free stuff related to matrix-vec multiply */ 156 ierr = VecGetSize(baij->lvec,&ec);CHKERRQ(ierr); /* needed for PetscLogObjectMemory below */ 157 ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr); baij->lvec = 0; 158 ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr); baij->Mvctx = 0; 159 if (baij->colmap) { 160 #if defined(PETSC_USE_CTABLE) 161 ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr); 162 #else 163 ierr = PetscFree(baij->colmap);CHKERRQ(ierr); 164 ierr = PetscLogObjectMemory((PetscObject)A,-Bbaij->nbs*sizeof(PetscInt));CHKERRQ(ierr); 165 #endif 166 } 167 168 /* make sure that B is assembled so we can access its values */ 169 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 170 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 171 172 /* invent new B and copy stuff over */ 173 ierr = PetscMalloc1(mbs,&nz);CHKERRQ(ierr); 174 for (i=0; i<mbs; i++) { 175 nz[i] = Bbaij->i[i+1]-Bbaij->i[i]; 176 } 177 ierr = MatCreate(PetscObjectComm((PetscObject)B),&Bnew);CHKERRQ(ierr); 178 ierr = MatSetSizes(Bnew,m,n,m,n);CHKERRQ(ierr); 179 ierr = MatSetType(Bnew,((PetscObject)B)->type_name);CHKERRQ(ierr); 180 ierr = MatSeqBAIJSetPreallocation(Bnew,B->rmap->bs,0,nz);CHKERRQ(ierr); 181 182 ((Mat_SeqBAIJ*)Bnew->data)->nonew = Bbaij->nonew; /* Inherit insertion error options. */ 183 184 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 185 /* 186 Ensure that B's nonzerostate is monotonically increasing. 187 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 188 */ 189 Bnew->nonzerostate = B->nonzerostate; 190 191 for (i=0; i<mbs; i++) { 192 for (j=Bbaij->i[i]; j<Bbaij->i[i+1]; j++) { 193 col = garray[Bbaij->j[j]]; 194 atmp = a + j*bs2; 195 ierr = MatSetValuesBlocked_SeqBAIJ(Bnew,1,&i,1,&col,atmp,B->insertmode);CHKERRQ(ierr); 196 } 197 } 198 ierr = MatSetOption(Bnew,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 199 200 ierr = PetscFree(nz);CHKERRQ(ierr); 201 ierr = PetscFree(baij->garray);CHKERRQ(ierr); 202 ierr = PetscLogObjectMemory((PetscObject)A,-ec*sizeof(PetscInt));CHKERRQ(ierr); 203 ierr = MatDestroy(&B);CHKERRQ(ierr); 204 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)Bnew);CHKERRQ(ierr); 205 206 baij->B = Bnew; 207 A->was_assembled = PETSC_FALSE; 208 A->assembled = PETSC_FALSE; 209 PetscFunctionReturn(0); 210 } 211 212 /* ugly stuff added for Glenn someday we should fix this up */ 213 214 static PetscInt *uglyrmapd = 0,*uglyrmapo = 0; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 215 static Vec uglydd = 0,uglyoo = 0; /* work vectors used to scale the two parts of the local matrix */ 216 217 218 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA,Vec scale) 219 { 220 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ*) inA->data; /*access private part of matrix */ 221 Mat_SeqBAIJ *B = (Mat_SeqBAIJ*)ina->B->data; 222 PetscErrorCode ierr; 223 PetscInt bs = inA->rmap->bs,i,n,nt,j,cstart,cend,no,*garray = ina->garray,*lindices; 224 PetscInt *r_rmapd,*r_rmapo; 225 226 PetscFunctionBegin; 227 ierr = MatGetOwnershipRange(inA,&cstart,&cend);CHKERRQ(ierr); 228 ierr = MatGetSize(ina->A,NULL,&n);CHKERRQ(ierr); 229 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapd);CHKERRQ(ierr); 230 nt = 0; 231 for (i=0; i<inA->rmap->mapping->n; i++) { 232 if (inA->rmap->mapping->indices[i]*bs >= cstart && inA->rmap->mapping->indices[i]*bs < cend) { 233 nt++; 234 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 235 } 236 } 237 if (nt*bs != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt*bs %D n %D",nt*bs,n); 238 ierr = PetscMalloc1(n+1,&uglyrmapd);CHKERRQ(ierr); 239 for (i=0; i<inA->rmap->mapping->n; i++) { 240 if (r_rmapd[i]) { 241 for (j=0; j<bs; j++) { 242 uglyrmapd[(r_rmapd[i]-1)*bs+j-cstart] = i*bs + j; 243 } 244 } 245 } 246 ierr = PetscFree(r_rmapd);CHKERRQ(ierr); 247 ierr = VecCreateSeq(PETSC_COMM_SELF,n,&uglydd);CHKERRQ(ierr); 248 249 ierr = PetscCalloc1(ina->Nbs+1,&lindices);CHKERRQ(ierr); 250 for (i=0; i<B->nbs; i++) { 251 lindices[garray[i]] = i+1; 252 } 253 no = inA->rmap->mapping->n - nt; 254 ierr = PetscCalloc1(inA->rmap->mapping->n+1,&r_rmapo);CHKERRQ(ierr); 255 nt = 0; 256 for (i=0; i<inA->rmap->mapping->n; i++) { 257 if (lindices[inA->rmap->mapping->indices[i]]) { 258 nt++; 259 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 260 } 261 } 262 if (nt > no) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Hmm nt %D no %D",nt,n); 263 ierr = PetscFree(lindices);CHKERRQ(ierr); 264 ierr = PetscMalloc1(nt*bs+1,&uglyrmapo);CHKERRQ(ierr); 265 for (i=0; i<inA->rmap->mapping->n; i++) { 266 if (r_rmapo[i]) { 267 for (j=0; j<bs; j++) { 268 uglyrmapo[(r_rmapo[i]-1)*bs+j] = i*bs + j; 269 } 270 } 271 } 272 ierr = PetscFree(r_rmapo);CHKERRQ(ierr); 273 ierr = VecCreateSeq(PETSC_COMM_SELF,nt*bs,&uglyoo);CHKERRQ(ierr); 274 PetscFunctionReturn(0); 275 } 276 277 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A,Vec scale) 278 { 279 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 280 PetscErrorCode ierr; 281 282 PetscFunctionBegin; 283 ierr = PetscTryMethod(A,"MatDiagonalScaleLocal_C",(Mat,Vec),(A,scale));CHKERRQ(ierr); 284 PetscFunctionReturn(0); 285 } 286 287 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A,Vec scale) 288 { 289 Mat_MPIBAIJ *a = (Mat_MPIBAIJ*) A->data; /*access private part of matrix */ 290 PetscErrorCode ierr; 291 PetscInt n,i; 292 PetscScalar *d,*o,*s; 293 294 PetscFunctionBegin; 295 if (!uglyrmapd) { 296 ierr = MatMPIBAIJDiagonalScaleLocalSetUp(A,scale);CHKERRQ(ierr); 297 } 298 299 ierr = VecGetArray(scale,&s);CHKERRQ(ierr); 300 301 ierr = VecGetLocalSize(uglydd,&n);CHKERRQ(ierr); 302 ierr = VecGetArray(uglydd,&d);CHKERRQ(ierr); 303 for (i=0; i<n; i++) { 304 d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 305 } 306 ierr = VecRestoreArray(uglydd,&d);CHKERRQ(ierr); 307 /* column scale "diagonal" portion of local matrix */ 308 ierr = MatDiagonalScale(a->A,NULL,uglydd);CHKERRQ(ierr); 309 310 ierr = VecGetLocalSize(uglyoo,&n);CHKERRQ(ierr); 311 ierr = VecGetArray(uglyoo,&o);CHKERRQ(ierr); 312 for (i=0; i<n; i++) { 313 o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 314 } 315 ierr = VecRestoreArray(scale,&s);CHKERRQ(ierr); 316 ierr = VecRestoreArray(uglyoo,&o);CHKERRQ(ierr); 317 /* column scale "off-diagonal" portion of local matrix */ 318 ierr = MatDiagonalScale(a->B,NULL,uglyoo);CHKERRQ(ierr); 319 PetscFunctionReturn(0); 320 } 321