1 /* 2 Support for the parallel SBAIJ matrix vector multiply 3 */ 4 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 5 6 PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat mat) 7 { 8 Mat_MPISBAIJ *sbaij = (Mat_MPISBAIJ *)mat->data; 9 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)sbaij->B->data; 10 PetscInt i, j, *aj = B->j, ec = 0, *garray, *sgarray; 11 PetscInt bs = mat->rmap->bs, *stmp, mbs = sbaij->mbs, vec_size, nt; 12 IS from, to; 13 Vec gvec; 14 PetscMPIInt rank = sbaij->rank; 15 PetscInt *owners = sbaij->rangebs, *ec_owner, k; 16 const PetscInt *sowners; 17 PetscScalar *ptr; 18 #if defined(PETSC_USE_CTABLE) 19 PetscHMapI gid1_lid1 = NULL; /* one-based gid to lid table */ 20 PetscHashIter tpos; 21 PetscInt gid, lid; 22 #else 23 PetscInt Nbs = sbaij->Nbs; 24 PetscInt *indices; 25 #endif 26 27 PetscFunctionBegin; 28 #if defined(PETSC_USE_CTABLE) 29 PetscCall(PetscHMapICreateWithSize(mbs, &gid1_lid1)); 30 for (i = 0; i < B->mbs; i++) { 31 for (j = 0; j < B->ilen[i]; j++) { 32 PetscInt data, gid1 = aj[B->i[i] + j] + 1; 33 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 34 if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 35 } 36 } 37 /* form array of columns we need */ 38 PetscCall(PetscMalloc1(ec, &garray)); 39 PetscHashIterBegin(gid1_lid1, tpos); 40 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 41 PetscHashIterGetKey(gid1_lid1, tpos, gid); 42 PetscHashIterGetVal(gid1_lid1, tpos, lid); 43 PetscHashIterNext(gid1_lid1, tpos); 44 gid--; 45 lid--; 46 garray[lid] = gid; 47 } 48 PetscCall(PetscSortInt(ec, garray)); 49 PetscCall(PetscHMapIClear(gid1_lid1)); 50 for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 51 /* compact out the extra columns in B */ 52 for (i = 0; i < B->mbs; i++) { 53 for (j = 0; j < B->ilen[i]; j++) { 54 PetscInt gid1 = aj[B->i[i] + j] + 1; 55 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 56 lid--; 57 aj[B->i[i] + j] = lid; 58 } 59 } 60 PetscCall(PetscHMapIDestroy(&gid1_lid1)); 61 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 62 for (i = j = 0; i < ec; i++) { 63 while (garray[i] >= owners[j + 1]) j++; 64 ec_owner[i] = j; 65 } 66 #else 67 /* For the first stab we make an array as long as the number of columns */ 68 /* mark those columns that are in sbaij->B */ 69 PetscCall(PetscCalloc1(Nbs, &indices)); 70 for (i = 0; i < mbs; i++) { 71 for (j = 0; j < B->ilen[i]; j++) { 72 if (!indices[aj[B->i[i] + j]]) ec++; 73 indices[aj[B->i[i] + j]] = 1; 74 } 75 } 76 77 /* form arrays of columns we need */ 78 PetscCall(PetscMalloc1(ec, &garray)); 79 PetscCall(PetscMalloc2(2 * ec, &sgarray, ec, &ec_owner)); 80 81 ec = 0; 82 for (j = 0; j < sbaij->size; j++) { 83 for (i = owners[j]; i < owners[j + 1]; i++) { 84 if (indices[i]) { 85 garray[ec] = i; 86 ec_owner[ec] = j; 87 ec++; 88 } 89 } 90 } 91 92 /* make indices now point into garray */ 93 for (i = 0; i < ec; i++) indices[garray[i]] = i; 94 95 /* compact out the extra columns in B */ 96 for (i = 0; i < mbs; i++) { 97 for (j = 0; j < B->ilen[i]; j++) aj[B->i[i] + j] = indices[aj[B->i[i] + j]]; 98 } 99 PetscCall(PetscFree(indices)); 100 #endif 101 B->nbs = ec; 102 PetscCall(PetscLayoutDestroy(&sbaij->B->cmap)); 103 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sbaij->B), ec * mat->rmap->bs, ec * mat->rmap->bs, mat->rmap->bs, &sbaij->B->cmap)); 104 105 PetscCall(VecScatterDestroy(&sbaij->sMvctx)); 106 /* create local vector that is used to scatter into */ 107 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec * bs, &sbaij->lvec)); 108 109 /* create two temporary index sets for building scatter-gather */ 110 PetscCall(PetscMalloc1(2 * ec, &stmp)); 111 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, garray, PETSC_COPY_VALUES, &from)); 112 for (i = 0; i < ec; i++) stmp[i] = i; 113 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, ec, stmp, PETSC_COPY_VALUES, &to)); 114 115 /* generate the scatter context 116 -- Mvctx and lvec are not used by MatMult_MPISBAIJ(), but have other uses, such as in MatDiagonalScale_MPISBAIJ */ 117 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 118 PetscCall(VecScatterCreate(gvec, from, sbaij->lvec, to, &sbaij->Mvctx)); 119 PetscCall(VecDestroy(&gvec)); 120 121 sbaij->garray = garray; 122 123 PetscCall(ISDestroy(&from)); 124 PetscCall(ISDestroy(&to)); 125 126 /* create parallel vector that is used by SBAIJ matrix to scatter from/into */ 127 PetscCall(VecCreateMPI(PetscObjectComm((PetscObject)mat), (mbs + ec) * bs, PETSC_DETERMINE, &sbaij->slvec0)); 128 PetscCall(VecDuplicate(sbaij->slvec0, &sbaij->slvec1)); 129 PetscCall(VecGetSize(sbaij->slvec0, &vec_size)); 130 131 PetscCall(VecGetOwnershipRanges(sbaij->slvec0, &sowners)); 132 133 /* x index in the IS sfrom */ 134 for (i = 0; i < ec; i++) { 135 j = ec_owner[i]; 136 sgarray[i] = garray[i] + (sowners[j] / bs - owners[j]); 137 } 138 /* b index in the IS sfrom */ 139 k = sowners[rank] / bs + mbs; 140 for (i = ec, j = 0; i < 2 * ec; i++, j++) sgarray[i] = k + j; 141 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, sgarray, PETSC_COPY_VALUES, &from)); 142 143 /* x index in the IS sto */ 144 k = sowners[rank] / bs + mbs; 145 for (i = 0; i < ec; i++) stmp[i] = (k + i); 146 /* b index in the IS sto */ 147 for (i = ec; i < 2 * ec; i++) stmp[i] = sgarray[i - ec]; 148 149 PetscCall(ISCreateBlock(PETSC_COMM_SELF, bs, 2 * ec, stmp, PETSC_COPY_VALUES, &to)); 150 151 PetscCall(VecScatterCreate(sbaij->slvec0, from, sbaij->slvec1, to, &sbaij->sMvctx)); 152 PetscCall(VecScatterViewFromOptions(sbaij->sMvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 153 154 PetscCall(VecGetLocalSize(sbaij->slvec1, &nt)); 155 PetscCall(VecGetArray(sbaij->slvec1, &ptr)); 156 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, bs * mbs, ptr, &sbaij->slvec1a)); 157 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec1b)); 158 PetscCall(VecRestoreArray(sbaij->slvec1, &ptr)); 159 160 PetscCall(VecGetArray(sbaij->slvec0, &ptr)); 161 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nt - bs * mbs, PetscSafePointerPlusOffset(ptr, bs * mbs), &sbaij->slvec0b)); 162 PetscCall(VecRestoreArray(sbaij->slvec0, &ptr)); 163 164 PetscCall(PetscFree(stmp)); 165 166 PetscCall(ISDestroy(&from)); 167 PetscCall(ISDestroy(&to)); 168 PetscCall(PetscFree2(sgarray, ec_owner)); 169 PetscFunctionReturn(PETSC_SUCCESS); 170 } 171 172 /* 173 Takes the local part of an already assembled MPISBAIJ matrix 174 and disassembles it. This is to allow new nonzeros into the matrix 175 that require more communication in the matrix vector multiply. 176 Thus certain data-structures must be rebuilt. 177 178 Kind of slow! But that's what application programmers get when 179 they are sloppy. 180 */ 181 PetscErrorCode MatDisAssemble_MPISBAIJ(Mat A) 182 { 183 Mat_MPISBAIJ *baij = (Mat_MPISBAIJ *)A->data; 184 Mat B = baij->B, Bnew; 185 Mat_SeqBAIJ *Bbaij = (Mat_SeqBAIJ *)B->data; 186 PetscInt i, j, mbs = Bbaij->mbs, n = A->cmap->N, col, *garray = baij->garray; 187 PetscInt k, bs = A->rmap->bs, bs2 = baij->bs2, *rvals, *nz, m = A->rmap->n; 188 MatScalar *a = Bbaij->a; 189 PetscScalar *atmp; 190 #if defined(PETSC_USE_REAL_MAT_SINGLE) 191 PetscInt l; 192 #endif 193 194 PetscFunctionBegin; 195 #if defined(PETSC_USE_REAL_MAT_SINGLE) 196 PetscCall(PetscMalloc1(A->rmap->bs, &atmp)); 197 #endif 198 /* free stuff related to matrix-vec multiply */ 199 PetscCall(VecDestroy(&baij->lvec)); 200 PetscCall(VecScatterDestroy(&baij->Mvctx)); 201 202 PetscCall(VecDestroy(&baij->slvec0)); 203 PetscCall(VecDestroy(&baij->slvec0b)); 204 PetscCall(VecDestroy(&baij->slvec1)); 205 PetscCall(VecDestroy(&baij->slvec1a)); 206 PetscCall(VecDestroy(&baij->slvec1b)); 207 208 if (baij->colmap) { 209 #if defined(PETSC_USE_CTABLE) 210 PetscCall(PetscHMapIDestroy(&baij->colmap)); 211 #else 212 PetscCall(PetscFree(baij->colmap)); 213 #endif 214 } 215 216 /* make sure that B is assembled so we can access its values */ 217 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 218 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 219 220 /* invent new B and copy stuff over */ 221 PetscCall(PetscMalloc1(mbs, &nz)); 222 for (i = 0; i < mbs; i++) nz[i] = Bbaij->i[i + 1] - Bbaij->i[i]; 223 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 224 PetscCall(MatSetSizes(Bnew, m, n, m, n)); 225 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 226 PetscCall(MatSeqBAIJSetPreallocation(Bnew, B->rmap->bs, 0, nz)); 227 PetscCall(PetscFree(nz)); 228 229 if (Bbaij->nonew >= 0) { /* Inherit insertion error options (if positive). */ 230 ((Mat_SeqSBAIJ *)Bnew->data)->nonew = Bbaij->nonew; 231 } 232 233 /* 234 Ensure that B's nonzerostate is monotonically increasing. 235 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 236 */ 237 Bnew->nonzerostate = B->nonzerostate; 238 PetscCall(PetscMalloc1(bs, &rvals)); 239 for (i = 0; i < mbs; i++) { 240 rvals[0] = bs * i; 241 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 242 for (j = Bbaij->i[i]; j < Bbaij->i[i + 1]; j++) { 243 col = garray[Bbaij->j[j]] * bs; 244 for (k = 0; k < bs; k++) { 245 #if defined(PETSC_USE_REAL_MAT_SINGLE) 246 for (l = 0; l < bs; l++) atmp[l] = a[j * bs2 + l]; 247 #else 248 atmp = a + j * bs2 + k * bs; 249 #endif 250 PetscCall(MatSetValues_SeqSBAIJ(Bnew, bs, rvals, 1, &col, atmp, B->insertmode)); 251 col++; 252 } 253 } 254 } 255 #if defined(PETSC_USE_REAL_MAT_SINGLE) 256 PetscCall(PetscFree(atmp)); 257 #endif 258 PetscCall(PetscFree(baij->garray)); 259 260 baij->garray = NULL; 261 262 PetscCall(PetscFree(rvals)); 263 PetscCall(MatDestroy(&B)); 264 265 baij->B = Bnew; 266 267 A->was_assembled = PETSC_FALSE; 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270