1 /* 2 Support for the parallel SELL matrix vector multiply 3 */ 4 #include <../src/mat/impls/sell/mpi/mpisell.h> 5 #include <petsc/private/isimpl.h> /* needed because accesses data structure of ISLocalToGlobalMapping directly */ 6 7 /* 8 Takes the local part of an already assembled MPISELL matrix 9 and disassembles it. This is to allow new nonzeros into the matrix 10 that require more communication in the matrix vector multiply. 11 Thus certain data-structures must be rebuilt. 12 13 Kind of slow! But that's what application programmers get when 14 they are sloppy. 15 */ 16 PetscErrorCode MatDisAssemble_MPISELL(Mat A) 17 { 18 Mat_MPISELL *sell = (Mat_MPISELL *)A->data; 19 Mat B = sell->B, Bnew; 20 Mat_SeqSELL *Bsell = (Mat_SeqSELL *)B->data; 21 PetscInt i, j, totalslices, N = A->cmap->N, row; 22 PetscBool isnonzero; 23 24 PetscFunctionBegin; 25 /* free stuff related to matrix-vec multiply */ 26 PetscCall(VecDestroy(&sell->lvec)); 27 PetscCall(VecScatterDestroy(&sell->Mvctx)); 28 if (sell->colmap) { 29 #if defined(PETSC_USE_CTABLE) 30 PetscCall(PetscHMapIDestroy(&sell->colmap)); 31 #else 32 PetscCall(PetscFree(sell->colmap)); 33 #endif 34 } 35 36 /* make sure that B is assembled so we can access its values */ 37 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 38 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 39 40 /* invent new B and copy stuff over */ 41 PetscCall(MatCreate(PETSC_COMM_SELF, &Bnew)); 42 PetscCall(MatSetSizes(Bnew, B->rmap->n, N, B->rmap->n, N)); 43 PetscCall(MatSetBlockSizesFromMats(Bnew, A, A)); 44 PetscCall(MatSetType(Bnew, ((PetscObject)B)->type_name)); 45 PetscCall(MatSeqSELLSetPreallocation(Bnew, 0, Bsell->rlen)); 46 if (Bsell->nonew >= 0) { /* Inherit insertion error options (if positive). */ 47 ((Mat_SeqSELL *)Bnew->data)->nonew = Bsell->nonew; 48 } 49 50 /* 51 Ensure that B's nonzerostate is monotonically increasing. 52 Or should this follow the MatSetValues() loop to preserve B's nonzerstate across a MatDisAssemble() call? 53 */ 54 Bnew->nonzerostate = B->nonzerostate; 55 56 totalslices = PetscCeilInt(B->rmap->n, Bsell->sliceheight); 57 for (i = 0; i < totalslices; i++) { /* loop over slices */ 58 for (j = Bsell->sliidx[i], row = 0; j < Bsell->sliidx[i + 1]; j++, row = (row + 1) % Bsell->sliceheight) { 59 isnonzero = (PetscBool)((j - Bsell->sliidx[i]) / Bsell->sliceheight < Bsell->rlen[Bsell->sliceheight * i + row]); 60 if (isnonzero) PetscCall(MatSetValue(Bnew, Bsell->sliceheight * i + row, sell->garray[Bsell->colidx[j]], Bsell->val[j], B->insertmode)); 61 } 62 } 63 64 PetscCall(PetscFree(sell->garray)); 65 PetscCall(MatDestroy(&B)); 66 67 sell->B = Bnew; 68 A->was_assembled = PETSC_FALSE; 69 A->assembled = PETSC_FALSE; 70 PetscFunctionReturn(PETSC_SUCCESS); 71 } 72 73 PetscErrorCode MatSetUpMultiply_MPISELL(Mat mat) 74 { 75 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 76 Mat_SeqSELL *B = (Mat_SeqSELL *)sell->B->data; 77 PetscInt i, j, *bcolidx = B->colidx, ec = 0, *garray, totalslices; 78 IS from, to; 79 Vec gvec; 80 PetscBool isnonzero; 81 #if defined(PETSC_USE_CTABLE) 82 PetscHMapI gid1_lid1 = NULL; 83 PetscHashIter tpos; 84 PetscInt gid, lid; 85 #else 86 PetscInt N = mat->cmap->N, *indices; 87 #endif 88 89 PetscFunctionBegin; 90 totalslices = PetscCeilInt(sell->B->rmap->n, B->sliceheight); 91 92 /* ec counts the number of columns that contain nonzeros */ 93 #if defined(PETSC_USE_CTABLE) 94 /* use a table */ 95 PetscCall(PetscHMapICreateWithSize(sell->B->rmap->n, &gid1_lid1)); 96 for (i = 0; i < totalslices; i++) { /* loop over slices */ 97 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 98 isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]); 99 if (isnonzero) { /* check the mask bit */ 100 PetscInt data, gid1 = bcolidx[j] + 1; 101 102 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &data)); 103 /* one based table */ 104 if (!data) PetscCall(PetscHMapISet(gid1_lid1, gid1, ++ec)); 105 } 106 } 107 } 108 109 /* form array of columns we need */ 110 PetscCall(PetscMalloc1(ec, &garray)); 111 PetscHashIterBegin(gid1_lid1, tpos); 112 while (!PetscHashIterAtEnd(gid1_lid1, tpos)) { 113 PetscHashIterGetKey(gid1_lid1, tpos, gid); 114 PetscHashIterGetVal(gid1_lid1, tpos, lid); 115 PetscHashIterNext(gid1_lid1, tpos); 116 gid--; 117 lid--; 118 garray[lid] = gid; 119 } 120 PetscCall(PetscSortInt(ec, garray)); /* sort, and rebuild */ 121 PetscCall(PetscHMapIClear(gid1_lid1)); 122 for (i = 0; i < ec; i++) PetscCall(PetscHMapISet(gid1_lid1, garray[i] + 1, i + 1)); 123 124 /* compact out the extra columns in B */ 125 for (i = 0; i < totalslices; i++) { /* loop over slices */ 126 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 127 isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]); 128 if (isnonzero) { 129 PetscInt gid1 = bcolidx[j] + 1; 130 PetscCall(PetscHMapIGetWithDefault(gid1_lid1, gid1, 0, &lid)); 131 lid--; 132 bcolidx[j] = lid; 133 } 134 } 135 } 136 PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 137 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 138 PetscCall(PetscHMapIDestroy(&gid1_lid1)); 139 #else 140 /* Make an array as long as the number of columns */ 141 PetscCall(PetscCalloc1(N, &indices)); 142 /* mark those columns that are in sell->B */ 143 for (i = 0; i < totalslices; i++) { /* loop over slices */ 144 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 145 isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]); 146 if (isnonzero) { 147 if (!indices[bcolidx[j]]) ec++; 148 indices[bcolidx[j]] = 1; 149 } 150 } 151 } 152 153 /* form array of columns we need */ 154 PetscCall(PetscMalloc1(ec, &garray)); 155 ec = 0; 156 for (i = 0; i < N; i++) { 157 if (indices[i]) garray[ec++] = i; 158 } 159 160 /* make indices now point into garray */ 161 for (i = 0; i < ec; i++) indices[garray[i]] = i; 162 163 /* compact out the extra columns in B */ 164 for (i = 0; i < totalslices; i++) { /* loop over slices */ 165 for (j = B->sliidx[i]; j < B->sliidx[i + 1]; j++) { 166 isnonzero = (PetscBool)((j - B->sliidx[i]) / B->sliceheight < B->rlen[i * B->sliceheight + j % B->sliceheight]); 167 if (isnonzero) bcolidx[j] = indices[bcolidx[j]]; 168 } 169 } 170 PetscCall(PetscLayoutDestroy(&sell->B->cmap)); 171 PetscCall(PetscLayoutCreateFromSizes(PetscObjectComm((PetscObject)sell->B), ec, ec, 1, &sell->B->cmap)); 172 PetscCall(PetscFree(indices)); 173 #endif 174 /* create local vector that is used to scatter into */ 175 PetscCall(VecCreateSeq(PETSC_COMM_SELF, ec, &sell->lvec)); 176 /* create two temporary Index sets for build scatter gather */ 177 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ec, garray, PETSC_COPY_VALUES, &from)); 178 PetscCall(ISCreateStride(PETSC_COMM_SELF, ec, 0, 1, &to)); 179 180 /* create temporary global vector to generate scatter context */ 181 /* This does not allocate the array's memory so is efficient */ 182 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat), 1, mat->cmap->n, mat->cmap->N, NULL, &gvec)); 183 184 /* generate the scatter context */ 185 PetscCall(VecScatterCreate(gvec, from, sell->lvec, to, &sell->Mvctx)); 186 PetscCall(VecScatterViewFromOptions(sell->Mvctx, (PetscObject)mat, "-matmult_vecscatter_view")); 187 188 sell->garray = garray; 189 190 PetscCall(ISDestroy(&from)); 191 PetscCall(ISDestroy(&to)); 192 PetscCall(VecDestroy(&gvec)); 193 PetscFunctionReturn(PETSC_SUCCESS); 194 } 195 196 /* ugly stuff added for Glenn someday we should fix this up */ 197 static PetscInt *auglyrmapd = NULL, *auglyrmapo = NULL; /* mapping from the local ordering to the "diagonal" and "off-diagonal" parts of the local matrix */ 198 static Vec auglydd = NULL, auglyoo = NULL; /* work vectors used to scale the two parts of the local matrix */ 199 200 static PetscErrorCode MatMPISELLDiagonalScaleLocalSetUp(Mat inA, Vec scale) 201 { 202 Mat_MPISELL *ina = (Mat_MPISELL *)inA->data; /*access private part of matrix */ 203 PetscInt i, n, nt, cstart, cend, no, *garray = ina->garray, *lindices; 204 PetscInt *r_rmapd, *r_rmapo; 205 206 PetscFunctionBegin; 207 PetscCall(MatGetOwnershipRange(inA, &cstart, &cend)); 208 PetscCall(MatGetSize(ina->A, NULL, &n)); 209 PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapd)); 210 nt = 0; 211 for (i = 0; i < inA->rmap->mapping->n; i++) { 212 if (inA->rmap->mapping->indices[i] >= cstart && inA->rmap->mapping->indices[i] < cend) { 213 nt++; 214 r_rmapd[i] = inA->rmap->mapping->indices[i] + 1; 215 } 216 } 217 PetscCheck(nt == n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " n %" PetscInt_FMT, nt, n); 218 PetscCall(PetscMalloc1(n, &auglyrmapd)); 219 for (i = 0; i < inA->rmap->mapping->n; i++) { 220 if (r_rmapd[i]) auglyrmapd[(r_rmapd[i] - 1) - cstart] = i; 221 } 222 PetscCall(PetscFree(r_rmapd)); 223 PetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &auglydd)); 224 PetscCall(PetscCalloc1(inA->cmap->N, &lindices)); 225 for (i = 0; i < ina->B->cmap->n; i++) lindices[garray[i]] = i + 1; 226 no = inA->rmap->mapping->n - nt; 227 PetscCall(PetscCalloc1(inA->rmap->mapping->n, &r_rmapo)); 228 nt = 0; 229 for (i = 0; i < inA->rmap->mapping->n; i++) { 230 if (lindices[inA->rmap->mapping->indices[i]]) { 231 nt++; 232 r_rmapo[i] = lindices[inA->rmap->mapping->indices[i]]; 233 } 234 } 235 PetscCheck(nt <= no, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Hmm nt %" PetscInt_FMT " no %" PetscInt_FMT, nt, n); 236 PetscCall(PetscFree(lindices)); 237 PetscCall(PetscMalloc1(nt, &auglyrmapo)); 238 for (i = 0; i < inA->rmap->mapping->n; i++) { 239 if (r_rmapo[i]) auglyrmapo[(r_rmapo[i] - 1)] = i; 240 } 241 PetscCall(PetscFree(r_rmapo)); 242 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nt, &auglyoo)); 243 PetscFunctionReturn(PETSC_SUCCESS); 244 } 245 246 PetscErrorCode MatDiagonalScaleLocal_MPISELL(Mat A, Vec scale) 247 { 248 Mat_MPISELL *a = (Mat_MPISELL *)A->data; /*access private part of matrix */ 249 PetscInt n, i; 250 PetscScalar *d, *o; 251 const PetscScalar *s; 252 253 PetscFunctionBegin; 254 if (!auglyrmapd) PetscCall(MatMPISELLDiagonalScaleLocalSetUp(A, scale)); 255 PetscCall(VecGetArrayRead(scale, &s)); 256 PetscCall(VecGetLocalSize(auglydd, &n)); 257 PetscCall(VecGetArray(auglydd, &d)); 258 for (i = 0; i < n; i++) d[i] = s[auglyrmapd[i]]; /* copy "diagonal" (true local) portion of scale into dd vector */ 259 PetscCall(VecRestoreArray(auglydd, &d)); 260 /* column scale "diagonal" portion of local matrix */ 261 PetscCall(MatDiagonalScale(a->A, NULL, auglydd)); 262 PetscCall(VecGetLocalSize(auglyoo, &n)); 263 PetscCall(VecGetArray(auglyoo, &o)); 264 for (i = 0; i < n; i++) o[i] = s[auglyrmapo[i]]; /* copy "off-diagonal" portion of scale into oo vector */ 265 PetscCall(VecRestoreArrayRead(scale, &s)); 266 PetscCall(VecRestoreArray(auglyoo, &o)); 267 /* column scale "off-diagonal" portion of local matrix */ 268 PetscCall(MatDiagonalScale(a->B, NULL, auglyoo)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271