1 /* 2 Support for the parallel BAIJ matrix vector multiply 3 */ 4 #include <../src/mat/impls/baij/mpi/mpibaij.h> 5 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 6 7 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 11 PetscInt i, j, *aj = B->j, ec = 0, *garray; 12 PetscInt bs = mat->rmap->bs, *stmp; 13 IS from, to; 14 Vec gvec; 15 #if defined(PETSC_USE_CTABLE) 16 PetscHMapI gid1_lid1 = NULL; 17 PetscHashIter tpos; 18 PetscInt gid, lid; 19 #else 20 PetscInt Nbs = baij->Nbs, *indices; 21 #endif 22 23 PetscFunctionBegin; 24 #if defined(PETSC_USE_CTABLE) 25 /* use a table - Mark Adams */ 26 PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1)); 27 for (i = 0; i < B->mbs; i++) { 28 for (j = 0; j < B->ilen[i]; j++) { 29 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 30 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 31 if (!data) { 32 /* one based table */ 33 PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 34 } 35 } 36 } 37 /* form array of columns we need */ 38 PetscCall(PetscMalloc1(ec, &garray)); 39 PetscHashIterBegin(gid1_lid1, tpos); 40 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 41 PetscHashIterGetKey(gid1_lid1, tpos, gid); 42 PetscHashIterGetVal(gid1_lid1, tpos, lid); 43 PetscHashIterNext(gid1_lid1, tpos); 44 gid--; 45 lid--; 46 garray[lid] = gid; 47 } 48 PetscCall(PetscSortInt(ec, garray)); 49 PetscCall(PetscHMapIClear(gid1_lid1)); 50 for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 51 /* compact out the extra columns in B */ 52 for (i = 0; i < B->mbs; i++) { 53 for (j = 0; j < B->ilen[i]; j++) { 54 PetscInt gid1 = aj[B->i[i] + j] + 1; 55 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 56 lid--; 57 aj[B->i[i] + j] = lid; 58 } 59 } 60 B->nbs = ec; 61 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 62 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 63 PetscCall(PetscHMapIDestroy(&gid1_lid1)); 64 #else 65 /* Make an array as long as the number of columns */ 66 /* mark those columns that are in baij->B */ 67 PetscCall(PetscCalloc1(Nbs, &indices)); 68 for (i = 0; i < B->mbs; i++) { 69 for (j = 0; j < B->ilen[i]; j++) { 70 if (!indices[aj[B->i[i] + j]]) ec++; 71 indices[aj[B->i[i] + j]] = 1; 72 } 73 } 74 75 /* form array of columns we need */ 76 PetscCall(PetscMalloc1(ec, &garray)); 77 ec = 0; 78 for (i = 0; i < Nbs; i++) { 79 if (indices[i]) garray[ec++] = i; 80 } 81 82 /* make indices now point into garray */ 83 for (i = 0; i < ec; i++) indices[garray[i]] = i; 84 85 /* compact out the extra columns in B */ 86 for (i = 0; i < B->mbs; i++) { 87 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 88 } 89 B->nbs = ec; 90 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 91 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 92 PetscCall(PetscFree(indices)); 93 #endif 94 95 /* create local vector that is used to scatter into */ 96 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec)); 97 98 /* create two temporary index sets for building scatter-gather */ 99 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 100 101 PetscCall(PetscMalloc1(ec, &stmp)); 102 for (i = 0; i < ec; i++) stmp[i] = i; 103 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to)); 104 105 /* create temporary global vector to generate scatter context */ 106 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 107 108 PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx)); 109 PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 110 111 baij->garray = garray; 112 113 PetscCall(ISDestroy(&from)); 114 PetscCall(ISDestroy(&to)); 115 PetscCall(VecDestroy(&gvec)); 116 PetscFunctionReturn(PETSC_SUCCESS); 117 } 118 119 /* 120 Takes the local part of an already assembled MPIBAIJ matrix 121 and disassembles it. This is to allow new nonzeros into the matrix 122 that require more communication in the matrix vector multiply. 123 Thus certain data-structures must be rebuilt. 124 125 Kind of slow! But that's what application programmers get when 126 they are sloppy. 127 */ 128 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 129 { 130 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 131 Mat B = baij->B, Bnew; 132 Mat_SeqBAIJ *Bbaij; 133 PetscInt i, j, mbs, n = A->cmap->N, col, *garray = baij->garray; 134 PetscInt bs2 = baij->bs2, *nz = NULL, m = A->rmap->n; 135 MatScalar *a, *atmp; 136 137 PetscFunctionBegin; 138 /* free stuff related to matrix-vec multiply */ 139 PetscCall(VecDestroy(&baij->lvec)); 140 PetscCall(VecScatterDestroy(&baij->Mvctx)); 141 if (baij->colmap) { 142 #if defined(PETSC_USE_CTABLE) 143 PetscCall(PetscHMapIDestroy(&baij->colmap)); 144 #else 145 PetscCall(PetscFree(baij->colmap)); 146 #endif 147 } 148 149 /* make sure that B is assembled so we can access its values */ 150 if (B) { 151 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 152 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 153 Bbaij = (Mat_SeqBAIJ *)B->data; 154 mbs = Bbaij->mbs; 155 a = Bbaij->a; 156 157 /* invent new B and copy stuff over */ 158 PetscCall(PetscMalloc1(mbs, &nz)); 159 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 160 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew)); 161 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 162 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 163 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 164 /* 165 Ensure that B's nonzerostate is monotonically increasing. 166 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 167 */ 168 Bnew->nonzerostate = B->nonzerostate; 169 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 170 ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 171 } 172 173 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE)); 174 for (i = 0; i < mbs; i++) { 175 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 176 col = garray[Bbaij->j[j]]; 177 atmp = a + j * bs2; 178 PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode)); 179 } 180 } 181 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, Bbaij->roworiented)); 182 183 PetscCall(PetscFree(nz)); 184 PetscCall(MatDestroy(&B)); 185 186 baij->B = Bnew; 187 } 188 PetscCall(PetscFree(baij->garray)); 189 A->was_assembled = PETSC_FALSE; 190 A->assembled = PETSC_FALSE; 191 PetscFunctionReturn(PETSC_SUCCESS); 192 } 193 194 /* ugly stuff added for Glenn someday we should fix this up */ 195 196 static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 197 static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 198 199 static PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) 200 { 201 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */ 202 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data; 203 PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices; 204 PetscInt *r_rmapd, *r_rmapo; 205 206 PetscFunctionBegin; 207 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 208 PetscCall(MatGetSize(ina->A, NULL, &n)); 209 PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapd)); 210 nt = 0; 211 for (i = 0; i < inA->rmap->mapping->n; i++) { 212 if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) { 213 nt++; 214 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 215 } 216 } 217 PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n); 218 PetscCall(PetscMalloc1(n, &uglyrmapd)); 219 for (i = 0; i < inA->rmap->mapping->n; i++) { 220 if (r_rmapd[i]) { 221 for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; 222 } 223 } 224 PetscCall(PetscFree(r_rmapd)); 225 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd)); 226 227 PetscCall(PetscCalloc1(ina->Nbs, &lindices)); 228 for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1; 229 no = inA->rmap->mapping->n - nt; 230 PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapo)); 231 nt = 0; 232 for (i = 0; i < inA->rmap->mapping->n; i++) { 233 if (lindices[inA->rmap->mapping->indices[i]]) { 234 nt++; 235 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 236 } 237 } 238 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 239 PetscCall(PetscFree(lindices)); 240 PetscCall(PetscMalloc1(nt * bs, &uglyrmapo)); 241 for (i = 0; i < inA->rmap->mapping->n; i++) { 242 if (r_rmapo[i]) { 243 for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; 244 } 245 } 246 PetscCall(PetscFree(r_rmapo)); 247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo)); 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) 252 { 253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */ 254 PetscInt n, i; 255 PetscScalar *d, *o; 256 const PetscScalar *s; 257 258 PetscFunctionBegin; 259 if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); 260 261 PetscCall(VecGetArrayRead(scale, &s)); 262 263 PetscCall(VecGetLocalSize(uglydd, &n)); 264 PetscCall(VecGetArray(uglydd, &d)); 265 for (i = 0; i < n; i++) d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 266 PetscCall(VecRestoreArray(uglydd, &d)); 267 /* column scale "diagonal" portion of local matrix */ 268 PetscCall(MatDiagonalScale(a->A, NULL, uglydd)); 269 270 PetscCall(VecGetLocalSize(uglyoo, &n)); 271 PetscCall(VecGetArray(uglyoo, &o)); 272 for (i = 0; i < n; i++) o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 273 PetscCall(VecRestoreArrayRead(scale, &s)); 274 PetscCall(VecRestoreArray(uglyoo, &o)); 275 /* column scale "off-diagonal" portion of local matrix */ 276 PetscCall(MatDiagonalScale(a->B, NULL, uglyoo)); 277 PetscCall(PetscFree(uglyrmapd)); 278 PetscCall(PetscFree(uglyrmapo)); 279 PetscCall(VecDestroy(&uglydd)); 280 PetscCall(VecDestroy(&uglyoo)); 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283