1 /* 2 Support for the parallel SBAIJ matrix vector multiply 3 */ 4 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 5 6 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data; 9 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)sbaij->B->data; 10 PetscInt i, j, *aj = B->j, ec = 0, *garray, *sgarray; 11 PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt; 12 IS from, to; 13 Vec gvec; 14 PetscMPIInt rank = sbaij->rank, lsize; 15 PetscInt *owners = sbaij->rangebs, *ec_owner, k; 16 const PetscInt *sowners; 17 PetscScalar *ptr; 18 #if defined(PETSC_USE_CTABLE) 19 PetscHMapI gid1_lid1 = NULL; /* one-based gid to lid table */ 20 PetscHashIter tpos; 21 PetscInt gid, lid; 22 #else 23 PetscInt Nbs = sbaij->Nbs; 24 PetscInt *indices; 25 #endif 26 27 PetscFunctionBegin; 28 #if defined(PETSC_USE_CTABLE) 29 PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1)); 30 for (i = 0; i < B->mbs; i++) { 31 for (j = 0; j < B->ilen[i]; j++) { 32 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 33 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 34 if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 35 } 36 } 37 /* form array of columns we need */ 38 PetscCall(PetscMalloc1(ec, &garray)); 39 PetscHashIterBegin(gid1_lid1, tpos); 40 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 41 PetscHashIterGetKey(gid1_lid1, tpos, gid); 42 PetscHashIterGetVal(gid1_lid1, tpos, lid); 43 PetscHashIterNext(gid1_lid1, tpos); 44 gid--; 45 lid--; 46 garray[lid] = gid; 47 } 48 PetscCall(PetscSortInt(ec, garray)); 49 PetscCall(PetscHMapIClear(gid1_lid1)); 50 for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 51 /* compact out the extra columns in B */ 52 for (i = 0; i < B->mbs; i++) { 53 for (j = 0; j < B->ilen[i]; j++) { 54 PetscInt gid1 = aj[B->i[i] + j] + 1; 55 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 56 lid--; 57 aj[B->i[i] + j] = lid; 58 } 59 } 60 PetscCall(PetscHMapIDestroy(&gid1_lid1)); 61 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 62 for (i = j = 0; i < ec; i++) { 63 while (garray[i] >= owners[j + 1]) j++; 64 ec_owner[i] = j; 65 } 66 #else 67 /* For the first stab we make an array as long as the number of columns */ 68 /* mark those columns that are in sbaij->B */ 69 PetscCall(PetscCalloc1(Nbs, &indices)); 70 for (i = 0; i < mbs; i++) { 71 for (j = 0; j < B->ilen[i]; j++) { 72 if (!indices[aj[B->i[i] + j]]) ec++; 73 indices[aj[B->i[i] + j]] = 1; 74 } 75 } 76 77 /* form arrays of columns we need */ 78 PetscCall(PetscMalloc1(ec, &garray)); 79 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 80 81 ec = 0; 82 for (j = 0; j < sbaij->size; j++) { 83 for (i = owners[j]; i < owners[j + 1]; i++) { 84 if (indices[i]) { 85 garray[ec] = i; 86 ec_owner[ec] = j; 87 ec++; 88 } 89 } 90 } 91 92 /* make indices now point into garray */ 93 for (i = 0; i < ec; i++) indices[garray[i]] = i; 94 95 /* compact out the extra columns in B */ 96 for (i = 0; i < mbs; i++) { 97 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 98 } 99 PetscCall(PetscFree(indices)); 100 #endif 101 B->nbs = ec; 102 PetscCall(PetscLayoutDestroy(&sbaij->B->cmap)); 103 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap)); 104 105 PetscCall(VecScatterDestroy(&sbaij->sMvctx)); 106 /* create local vector that is used to scatter into */ 107 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec)); 108 109 /* create two temporary index sets for building scatter-gather */ 110 PetscCall(PetscMalloc1(2 * ec, &stmp)); 111 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 112 for (i = 0; i < ec; i++) stmp[i] = i; 113 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to)); 114 115 /* generate the scatter context 116 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 117 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 118 PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx)); 119 PetscCall(VecDestroy(&gvec)); 120 121 sbaij->garray = garray; 122 123 PetscCall(ISDestroy(&from)); 124 PetscCall(ISDestroy(&to)); 125 126 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 127 lsize = (mbs + ec) * bs; 128 PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), lsize, PETSC_DETERMINE, &sbaij->slvec0)); 129 PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1)); 130 PetscCall(VecGetSize(sbaij->slvec0, &vec_size)); 131 132 PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners)); 133 134 /* x index in the IS sfrom */ 135 for (i = 0; i < ec; i++) { 136 j = ec_owner[i]; 137 sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]); 138 } 139 /* b index in the IS sfrom */ 140 k = sowners[rank] / bs + mbs; 141 for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j; 142 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from)); 143 144 /* x index in the IS sto */ 145 k = sowners[rank] / bs + mbs; 146 for (i = 0; i < ec; i++) stmp[i] = (k + i); 147 /* b index in the IS sto */ 148 for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec]; 149 150 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to)); 151 152 PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx)); 153 PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 154 155 PetscCall(VecGetLocalSize(sbaij->slvec1, &nt)); 156 PetscCall(VecGetArray(sbaij->slvec1, &ptr)); 157 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a)); 158 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec1b)); 159 PetscCall(VecRestoreArray(sbaij->slvec1, &ptr)); 160 161 PetscCall(VecGetArray(sbaij->slvec0, &ptr)); 162 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec0b)); 163 PetscCall(VecRestoreArray(sbaij->slvec0, &ptr)); 164 165 PetscCall(PetscFree(stmp)); 166 167 PetscCall(ISDestroy(&from)); 168 PetscCall(ISDestroy(&to)); 169 PetscCall(PetscFree2(sgarray, ec_owner)); 170 PetscFunctionReturn(PETSC_SUCCESS); 171 } 172 173 /* 174 Takes the local part of an already assembled MPISBAIJ matrix 175 and disassembles it. This is to allow new nonzeros into the matrix 176 that require more communication in the matrix vector multiply. 177 Thus certain data-structures must be rebuilt. 178 179 Kind of slow! But that's what application programmers get when 180 they are sloppy. 181 */ 182 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 183 { 184 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data; 185 Mat B = baij->B, Bnew; 186 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 187 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 188 PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n; 189 MatScalar *a = Bbaij->a; 190 PetscScalar *atmp; 191 #if defined(PETSC_USE_REAL_MAT_SINGLE) 192 PetscInt l; 193 #endif 194 195 PetscFunctionBegin; 196 #if defined(PETSC_USE_REAL_MAT_SINGLE) 197 PetscCall(PetscMalloc1(A->rmap->bs, &atmp)); 198 #endif 199 /* free stuff related to matrix-vec multiply */ 200 PetscCall(VecDestroy(&baij->lvec)); 201 PetscCall(VecScatterDestroy(&baij->Mvctx)); 202 203 PetscCall(VecDestroy(&baij->slvec0)); 204 PetscCall(VecDestroy(&baij->slvec0b)); 205 PetscCall(VecDestroy(&baij->slvec1)); 206 PetscCall(VecDestroy(&baij->slvec1a)); 207 PetscCall(VecDestroy(&baij->slvec1b)); 208 209 if (baij->colmap) { 210 #if defined(PETSC_USE_CTABLE) 211 PetscCall(PetscHMapIDestroy(&baij->colmap)); 212 #else 213 PetscCall(PetscFree(baij->colmap)); 214 #endif 215 } 216 217 /* make sure that B is assembled so we can access its values */ 218 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 219 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 220 221 /* invent new B and copy stuff over */ 222 PetscCall(PetscMalloc1(mbs, &nz)); 223 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 224 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 225 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 226 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 227 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 228 PetscCall(PetscFree(nz)); 229 230 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 231 ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 232 } 233 234 /* 235 Ensure that B's nonzerostate is monotonically increasing. 236 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 237 */ 238 Bnew->nonzerostate = B->nonzerostate; 239 PetscCall(PetscMalloc1(bs, &rvals)); 240 for (i = 0; i < mbs; i++) { 241 rvals[0] = bs * i; 242 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 243 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 244 col = garray[Bbaij->j[j]] * bs; 245 for (k = 0; k < bs; k++) { 246 #if defined(PETSC_USE_REAL_MAT_SINGLE) 247 for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l]; 248 #else 249 atmp = a + j * bs2 + k * bs; 250 #endif 251 PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode)); 252 col++; 253 } 254 } 255 } 256 #if defined(PETSC_USE_REAL_MAT_SINGLE) 257 PetscCall(PetscFree(atmp)); 258 #endif 259 PetscCall(PetscFree(baij->garray)); 260 261 baij->garray = NULL; 262 263 PetscCall(PetscFree(rvals)); 264 PetscCall(MatDestroy(&B)); 265 266 baij->B = Bnew; 267 268 A->was_assembled = PETSC_FALSE; 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271