1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/baij/seq/baij.h> 3 #include <../src/mat/impls/sell/seq/sell.h> 4 #include <petsc/private/isimpl.h> 5 6 /* 7 This routine is shared by SeqAIJ and SeqBAIJ matrices, 8 since it operators only on the nonzero structure of the elements or blocks. 9 */ 10 PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) { 11 PetscInt bs, nis = iscoloring->n, m = mat->rmap->n; 12 PetscBool isBAIJ, isSELL; 13 14 PetscFunctionBegin; 15 /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */ 16 PetscCall(MatGetBlockSize(mat, &bs)); 17 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ)); 18 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL)); 19 if (isBAIJ) { 20 c->brows = m; 21 c->bcols = 1; 22 } else { /* seqaij matrix */ 23 /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */ 24 PetscReal mem; 25 PetscInt nz, brows, bcols; 26 if (isSELL) { 27 Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data; 28 nz = spA->nz; 29 } else { 30 Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data; 31 nz = spA->nz; 32 } 33 34 bs = 1; /* only bs=1 is supported for SeqAIJ matrix */ 35 mem = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt); 36 bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar))); 37 brows = 1000 / bcols; 38 if (bcols > nis) bcols = nis; 39 if (brows == 0 || brows > m) brows = m; 40 c->brows = brows; 41 c->bcols = bcols; 42 } 43 44 c->M = mat->rmap->N / bs; /* set total rows, columns and local rows */ 45 c->N = mat->cmap->N / bs; 46 c->m = mat->rmap->N / bs; 47 c->rstart = 0; 48 c->ncolors = nis; 49 c->ctype = iscoloring->ctype; 50 PetscFunctionReturn(0); 51 } 52 53 /* 54 Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance 55 Input Parameters: 56 + mat - the matrix containing the nonzero structure of the Jacobian 57 . color - the coloring context 58 - nz - number of local non-zeros in mat 59 */ 60 PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat, MatFDColoring c, PetscInt nz) { 61 PetscInt i, j, nrows, nbcols, brows = c->brows, bcols = c->bcols, mbs = c->m, nis = c->ncolors; 62 PetscInt *color_start, *row_start, *nrows_new, nz_new, row_end; 63 64 PetscFunctionBegin; 65 if (brows < 1 || brows > mbs) brows = mbs; 66 PetscCall(PetscMalloc2(bcols + 1, &color_start, bcols, &row_start)); 67 PetscCall(PetscCalloc1(nis, &nrows_new)); 68 PetscCall(PetscMalloc1(bcols * mat->rmap->n, &c->dy)); 69 PetscCall(PetscLogObjectMemory((PetscObject)c, bcols * mat->rmap->n * sizeof(PetscScalar))); 70 71 nz_new = 0; 72 nbcols = 0; 73 color_start[bcols] = 0; 74 75 if (c->htype[0] == 'd') { /* ---- c->htype == 'ds', use MatEntry --------*/ 76 MatEntry *Jentry_new, *Jentry = c->matentry; 77 78 PetscCall(PetscMalloc1(nz, &Jentry_new)); 79 for (i = 0; i < nis; i += bcols) { /* loop over colors */ 80 if (i + bcols > nis) { 81 color_start[nis - i] = color_start[bcols]; 82 bcols = nis - i; 83 } 84 85 color_start[0] = color_start[bcols]; 86 for (j = 0; j < bcols; j++) { 87 color_start[j + 1] = c->nrows[i + j] + color_start[j]; 88 row_start[j] = 0; 89 } 90 91 row_end = brows; 92 if (row_end > mbs) row_end = mbs; 93 94 while (row_end <= mbs) { /* loop over block rows */ 95 for (j = 0; j < bcols; j++) { /* loop over block columns */ 96 nrows = c->nrows[i + j]; 97 nz = color_start[j]; 98 while (row_start[j] < nrows) { 99 if (Jentry[nz].row >= row_end) { 100 color_start[j] = nz; 101 break; 102 } else { /* copy Jentry[nz] to Jentry_new[nz_new] */ 103 Jentry_new[nz_new].row = Jentry[nz].row + j * mbs; /* index in dy-array */ 104 Jentry_new[nz_new].col = Jentry[nz].col; 105 Jentry_new[nz_new].valaddr = Jentry[nz].valaddr; 106 nz_new++; 107 nz++; 108 row_start[j]++; 109 } 110 } 111 } 112 if (row_end == mbs) break; 113 row_end += brows; 114 if (row_end > mbs) row_end = mbs; 115 } 116 nrows_new[nbcols++] = nz_new; 117 } 118 PetscCall(PetscFree(Jentry)); 119 c->matentry = Jentry_new; 120 } else { /* --------- c->htype == 'wp', use MatEntry2 ------------------*/ 121 MatEntry2 *Jentry2_new, *Jentry2 = c->matentry2; 122 123 PetscCall(PetscMalloc1(nz, &Jentry2_new)); 124 for (i = 0; i < nis; i += bcols) { /* loop over colors */ 125 if (i + bcols > nis) { 126 color_start[nis - i] = color_start[bcols]; 127 bcols = nis - i; 128 } 129 130 color_start[0] = color_start[bcols]; 131 for (j = 0; j < bcols; j++) { 132 color_start[j + 1] = c->nrows[i + j] + color_start[j]; 133 row_start[j] = 0; 134 } 135 136 row_end = brows; 137 if (row_end > mbs) row_end = mbs; 138 139 while (row_end <= mbs) { /* loop over block rows */ 140 for (j = 0; j < bcols; j++) { /* loop over block columns */ 141 nrows = c->nrows[i + j]; 142 nz = color_start[j]; 143 while (row_start[j] < nrows) { 144 if (Jentry2[nz].row >= row_end) { 145 color_start[j] = nz; 146 break; 147 } else { /* copy Jentry2[nz] to Jentry2_new[nz_new] */ 148 Jentry2_new[nz_new].row = Jentry2[nz].row + j * mbs; /* index in dy-array */ 149 Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr; 150 nz_new++; 151 nz++; 152 row_start[j]++; 153 } 154 } 155 } 156 if (row_end == mbs) break; 157 row_end += brows; 158 if (row_end > mbs) row_end = mbs; 159 } 160 nrows_new[nbcols++] = nz_new; 161 } 162 PetscCall(PetscFree(Jentry2)); 163 c->matentry2 = Jentry2_new; 164 } /* ---------------------------------------------*/ 165 166 PetscCall(PetscFree2(color_start, row_start)); 167 168 for (i = nbcols - 1; i > 0; i--) nrows_new[i] -= nrows_new[i - 1]; 169 PetscCall(PetscFree(c->nrows)); 170 c->nrows = nrows_new; 171 PetscFunctionReturn(0); 172 } 173 174 PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) { 175 PetscInt i, n, nrows, mbs = c->m, j, k, m, ncols, col, nis = iscoloring->n, *rowhit, bs, bs2, *spidx, nz, tmp; 176 const PetscInt *is, *row, *ci, *cj; 177 PetscBool isBAIJ, isSELL; 178 const PetscScalar *A_val; 179 PetscScalar **valaddrhit; 180 MatEntry *Jentry; 181 MatEntry2 *Jentry2; 182 183 PetscFunctionBegin; 184 PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa)); 185 186 PetscCall(MatGetBlockSize(mat, &bs)); 187 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ)); 188 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL)); 189 if (isBAIJ) { 190 Mat_SeqBAIJ *spA = (Mat_SeqBAIJ *)mat->data; 191 192 A_val = spA->a; 193 nz = spA->nz; 194 } else if (isSELL) { 195 Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data; 196 197 A_val = spA->val; 198 nz = spA->nz; 199 bs = 1; /* only bs=1 is supported for SeqSELL matrix */ 200 } else { 201 Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data; 202 203 A_val = spA->a; 204 nz = spA->nz; 205 bs = 1; /* only bs=1 is supported for SeqAIJ matrix */ 206 } 207 208 PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns)); 209 PetscCall(PetscMalloc1(nis, &c->nrows)); /* nrows is freeed separately from ncolumns and columns */ 210 PetscCall(PetscLogObjectMemory((PetscObject)c, 3 * nis * sizeof(PetscInt))); 211 212 if (c->htype[0] == 'd') { 213 PetscCall(PetscMalloc1(nz, &Jentry)); 214 PetscCall(PetscLogObjectMemory((PetscObject)c, nz * sizeof(MatEntry))); 215 c->matentry = Jentry; 216 } else if (c->htype[0] == 'w') { 217 PetscCall(PetscMalloc1(nz, &Jentry2)); 218 PetscCall(PetscLogObjectMemory((PetscObject)c, nz * sizeof(MatEntry2))); 219 c->matentry2 = Jentry2; 220 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported"); 221 222 if (isBAIJ) { 223 PetscCall(MatGetColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 224 } else if (isSELL) { 225 PetscCall(MatGetColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 226 } else { 227 PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 228 } 229 230 PetscCall(PetscCalloc1(c->m, &rowhit)); 231 PetscCall(PetscMalloc1(c->m, &valaddrhit)); 232 233 nz = 0; 234 for (i = 0; i < nis; i++) { /* loop over colors */ 235 PetscCall(ISGetLocalSize(c->isa[i], &n)); 236 PetscCall(ISGetIndices(c->isa[i], &is)); 237 238 c->ncolumns[i] = n; 239 c->columns[i] = (PetscInt *)is; 240 /* note: we know that c->isa is going to be around as long at the c->columns values */ 241 PetscCall(ISRestoreIndices(c->isa[i], &is)); 242 243 /* fast, crude version requires O(N*N) work */ 244 bs2 = bs * bs; 245 nrows = 0; 246 for (j = 0; j < n; j++) { /* loop over columns */ 247 col = is[j]; 248 tmp = ci[col]; 249 row = cj + tmp; 250 m = ci[col + 1] - tmp; 251 nrows += m; 252 for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */ 253 rowhit[*row] = col + 1; 254 valaddrhit[*row++] = (PetscScalar *)&A_val[bs2 * spidx[tmp + k]]; 255 } 256 } 257 c->nrows[i] = nrows; /* total num of rows for this color */ 258 259 if (c->htype[0] == 'd') { 260 for (j = 0; j < mbs; j++) { /* loop over rows */ 261 if (rowhit[j]) { 262 Jentry[nz].row = j; /* local row index */ 263 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 264 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 265 nz++; 266 rowhit[j] = 0.0; /* zero rowhit for reuse */ 267 } 268 } 269 } else { /* c->htype == 'wp' */ 270 for (j = 0; j < mbs; j++) { /* loop over rows */ 271 if (rowhit[j]) { 272 Jentry2[nz].row = j; /* local row index */ 273 Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 274 nz++; 275 rowhit[j] = 0.0; /* zero rowhit for reuse */ 276 } 277 } 278 } 279 } 280 281 if (c->bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */ 282 PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz)); 283 } 284 285 if (isBAIJ) { 286 PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 287 PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy)); 288 PetscCall(PetscLogObjectMemory((PetscObject)c, bs * mat->rmap->n * sizeof(PetscScalar))); 289 } else if (isSELL) { 290 PetscCall(MatRestoreColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 291 } else { 292 PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 293 } 294 PetscCall(PetscFree(rowhit)); 295 PetscCall(PetscFree(valaddrhit)); 296 PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa)); 297 298 PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->rmap->n, PETSC_DETERMINE, 0, NULL, &c->vscale)); 299 PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols)); 300 PetscFunctionReturn(0); 301 } 302