1 2 /* 3 Support for the parallel BAIJ matrix vector multiply 4 */ 5 #include <../src/mat/impls/baij/mpi/mpibaij.h> 6 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 7 8 PetscErrorCode MatSetUpMultiply_MPIBAIJ(Mat mat) 9 { 10 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 11 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)(baij->B->data); 12 PetscInt i, j, *aj = B->j, ec = 0, *garray; 13 PetscInt bs = mat->rmap->bs, *stmp; 14 IS from, to; 15 Vec gvec; 16 #if defined(PETSC_USE_CTABLE) 17 PetscHMapI gid1_lid1 = NULL; 18 PetscHashIter tpos; 19 PetscInt gid, lid; 20 #else 21 PetscInt Nbs = baij->Nbs, *indices; 22 #endif 23 24 PetscFunctionBegin; 25 #if defined(PETSC_USE_CTABLE) 26 /* use a table - Mark Adams */ 27 PetscCall(PetscHMapICreateWithSize(B->mbs, &gid1_lid1)); 28 for (i = 0; i < B->mbs; i++) { 29 for (j = 0; j < B->ilen[i]; j++) { 30 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 31 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 32 if (!data) { 33 /* one based table */ 34 PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 35 } 36 } 37 } 38 /* form array of columns we need */ 39 PetscCall(PetscMalloc1(ec, &garray)); 40 PetscHashIterBegin(gid1_lid1, tpos); 41 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 42 PetscHashIterGetKey(gid1_lid1, tpos, gid); 43 PetscHashIterGetVal(gid1_lid1, tpos, lid); 44 PetscHashIterNext(gid1_lid1, tpos); 45 gid--; 46 lid--; 47 garray[lid] = gid; 48 } 49 PetscCall(PetscSortInt(ec, garray)); 50 PetscCall(PetscHMapIClear(gid1_lid1)); 51 for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 52 /* compact out the extra columns in B */ 53 for (i = 0; i < B->mbs; i++) { 54 for (j = 0; j < B->ilen[i]; j++) { 55 PetscInt gid1 = aj[B->i[i] + j] + 1; 56 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 57 lid--; 58 aj[B->i[i] + j] = lid; 59 } 60 } 61 B->nbs = ec; 62 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 63 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 64 PetscCall(PetscHMapIDestroy(&gid1_lid1)); 65 #else 66 /* Make an array as long as the number of columns */ 67 /* mark those columns that are in baij->B */ 68 PetscCall(PetscCalloc1(Nbs, &indices)); 69 for (i = 0; i < B->mbs; i++) { 70 for (j = 0; j < B->ilen[i]; j++) { 71 if (!indices[aj[B->i[i] + j]]) ec++; 72 indices[aj[B->i[i] + j]] = 1; 73 } 74 } 75 76 /* form array of columns we need */ 77 PetscCall(PetscMalloc1(ec, &garray)); 78 ec = 0; 79 for (i = 0; i < Nbs; i++) { 80 if (indices[i]) garray[ec++] = i; 81 } 82 83 /* make indices now point into garray */ 84 for (i = 0; i < ec; i++) indices[garray[i]] = i; 85 86 /* compact out the extra columns in B */ 87 for (i = 0; i < B->mbs; i++) { 88 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 89 } 90 B->nbs = ec; 91 PetscCall(PetscLayoutDestroy(&baij->B->cmap)); 92 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)baij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &baij->B->cmap)); 93 PetscCall(PetscFree(indices)); 94 #endif 95 96 /* create local vector that is used to scatter into */ 97 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &baij->lvec)); 98 99 /* create two temporary index sets for building scatter-gather */ 100 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 101 102 PetscCall(PetscMalloc1(ec, &stmp)); 103 for (i = 0; i < ec; i++) stmp[i] = i; 104 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_OWN_POINTER, &to)); 105 106 /* create temporary global vector to generate scatter context */ 107 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 108 109 PetscCall(VecScatterCreate(gvec, from, baij->lvec, to, &baij->Mvctx)); 110 PetscCall(VecScatterViewFromOptions(baij->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 111 112 baij->garray = garray; 113 114 PetscCall(ISDestroy(&from)); 115 PetscCall(ISDestroy(&to)); 116 PetscCall(VecDestroy(&gvec)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 /* 121 Takes the local part of an already assembled MPIBAIJ matrix 122 and disassembles it. This is to allow new nonzeros into the matrix 123 that require more communication in the matrix vector multiply. 124 Thus certain data-structures must be rebuilt. 125 126 Kind of slow! But that's what application programmers get when 127 they are sloppy. 128 */ 129 PetscErrorCode MatDisAssemble_MPIBAIJ(Mat A) 130 { 131 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 132 Mat B = baij->B, Bnew; 133 Mat_SeqBAIJ *Bbaij; 134 PetscInt i, j, mbs, n = A->cmap->N, col, *garray = baij->garray; 135 PetscInt bs2 = baij->bs2, *nz = NULL, m = A->rmap->n; 136 MatScalar *a, *atmp; 137 138 PetscFunctionBegin; 139 /* free stuff related to matrix-vec multiply */ 140 PetscCall(VecDestroy(&baij->lvec)); 141 PetscCall(VecScatterDestroy(&baij->Mvctx)); 142 if (baij->colmap) { 143 #if defined(PETSC_USE_CTABLE) 144 PetscCall(PetscHMapIDestroy(&baij->colmap)); 145 #else 146 PetscCall(PetscFree(baij->colmap)); 147 #endif 148 } 149 150 /* make sure that B is assembled so we can access its values */ 151 if (B) { 152 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 153 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 154 Bbaij = (Mat_SeqBAIJ *)B->data; 155 mbs = Bbaij->mbs; 156 a = Bbaij->a; 157 158 /* invent new B and copy stuff over */ 159 PetscCall(PetscMalloc1(mbs, &nz)); 160 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 161 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Bnew)); 162 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 163 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 164 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 165 /* 166 Ensure that B's nonzerostate is monotonically increasing. 167 Or should this follow the MatSetValuesBlocked() loop to preserve B's nonzerstate across a MatDisAssemble() call? 168 */ 169 Bnew->nonzerostate = B->nonzerostate; 170 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 171 ((Mat_SeqBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 172 } 173 174 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_FALSE)); 175 for (i = 0; i < mbs; i++) { 176 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 177 col = garray[Bbaij->j[j]]; 178 atmp = a + j * bs2; 179 PetscCall(MatSetValuesBlocked_SeqBAIJ(Bnew, 1, &i, 1, &col, atmp, B->insertmode)); 180 } 181 } 182 PetscCall(MatSetOption(Bnew, MAT_ROW_ORIENTED, PETSC_TRUE)); 183 184 PetscCall(PetscFree(nz)); 185 PetscCall(MatDestroy(&B)); 186 187 baij->B = Bnew; 188 } 189 PetscCall(PetscFree(baij->garray)); 190 A->was_assembled = PETSC_FALSE; 191 A->assembled = PETSC_FALSE; 192 PetscFunctionReturn(PETSC_SUCCESS); 193 } 194 195 /* ugly stuff added for Glenn someday we should fix this up */ 196 197 static PetscInt *uglyrmapd = NULL, *uglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 198 static Vec uglydd = NULL, uglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 199 200 PetscErrorCode MatMPIBAIJDiagonalScaleLocalSetUp(Mat inA, Vec scale) 201 { 202 Mat_MPIBAIJ *ina = (Mat_MPIBAIJ *)inA->data; /*access private part of matrix */ 203 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)ina->B->data; 204 PetscInt bs = inA->rmap->bs, i, n, nt, j, cstart, cend, no, *garray = ina->garray, *lindices; 205 PetscInt *r_rmapd, *r_rmapo; 206 207 PetscFunctionBegin; 208 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 209 PetscCall(MatGetSize(ina->A, NULL, &n)); 210 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapd)); 211 nt = 0; 212 for (i = 0; i < inA->rmap->mapping->n; i++) { 213 if (inA->rmap->mapping->indices[i] * bs >= cstart && inA->rmap->mapping->indices[i] * bs < cend) { 214 nt++; 215 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 216 } 217 } 218 PetscCheck(nt * bs == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt*bs %" PetscInt_FMT " n %" PetscInt_FMT, nt * bs, n); 219 PetscCall(PetscMalloc1(n + 1, &uglyrmapd)); 220 for (i = 0; i < inA->rmap->mapping->n; i++) { 221 if (r_rmapd[i]) { 222 for (j = 0; j < bs; j++) uglyrmapd[(r_rmapd[i] - 1) * bs + j - cstart] = i * bs + j; 223 } 224 } 225 PetscCall(PetscFree(r_rmapd)); 226 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &uglydd)); 227 228 PetscCall(PetscCalloc1(ina->Nbs + 1, &lindices)); 229 for (i = 0; i < B->nbs; i++) lindices[garray[i]] = i + 1; 230 no = inA->rmap->mapping->n - nt; 231 PetscCall(PetscCalloc1(inA->rmap->mapping->n + 1, &r_rmapo)); 232 nt = 0; 233 for (i = 0; i < inA->rmap->mapping->n; i++) { 234 if (lindices[inA->rmap->mapping->indices[i]]) { 235 nt++; 236 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 237 } 238 } 239 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 240 PetscCall(PetscFree(lindices)); 241 PetscCall(PetscMalloc1(nt * bs + 1, &uglyrmapo)); 242 for (i = 0; i < inA->rmap->mapping->n; i++) { 243 if (r_rmapo[i]) { 244 for (j = 0; j < bs; j++) uglyrmapo[(r_rmapo[i] - 1) * bs + j] = i * bs + j; 245 } 246 } 247 PetscCall(PetscFree(r_rmapo)); 248 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt * bs, &uglyoo)); 249 PetscFunctionReturn(PETSC_SUCCESS); 250 } 251 252 PetscErrorCode MatMPIBAIJDiagonalScaleLocal(Mat A, Vec scale) 253 { 254 /* This routine should really be abandoned as it duplicates MatDiagonalScaleLocal */ 255 256 PetscFunctionBegin; 257 PetscTryMethod(A, "MatDiagonalScaleLocal_C", (Mat, Vec), (A, scale)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat A, Vec scale) 262 { 263 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; /*access private part of matrix */ 264 PetscInt n, i; 265 PetscScalar *d, *o; 266 const PetscScalar *s; 267 268 PetscFunctionBegin; 269 if (!uglyrmapd) PetscCall(MatMPIBAIJDiagonalScaleLocalSetUp(A, scale)); 270 271 PetscCall(VecGetArrayRead(scale, &s)); 272 273 PetscCall(VecGetLocalSize(uglydd, &n)); 274 PetscCall(VecGetArray(uglydd, &d)); 275 for (i = 0; i < n; i++) { d[i] = s[uglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ } 276 PetscCall(VecRestoreArray(uglydd, &d)); 277 /* column scale "diagonal" portion of local matrix */ 278 PetscCall(MatDiagonalScale(a->A, NULL, uglydd)); 279 280 PetscCall(VecGetLocalSize(uglyoo, &n)); 281 PetscCall(VecGetArray(uglyoo, &o)); 282 for (i = 0; i < n; i++) { o[i] = s[uglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ } 283 PetscCall(VecRestoreArrayRead(scale, &s)); 284 PetscCall(VecRestoreArray(uglyoo, &o)); 285 /* column scale "off-diagonal" portion of local matrix */ 286 PetscCall(MatDiagonalScale(a->B, NULL, uglyoo)); 287 PetscFunctionReturn(PETSC_SUCCESS); 288 } 289