1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/baij/seq/baij.h> 3 #include <../src/mat/impls/sell/seq/sell.h> 4 #include <petsc/private/isimpl.h> 5 6 /* 7 This routine is shared by SeqAIJ and SeqBAIJ matrices, 8 since it operators only on the nonzero structure of the elements or blocks. 9 */ 10 PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) { 11 PetscInt bs, nis = iscoloring->n, m = mat->rmap->n; 12 PetscBool isBAIJ, isSELL; 13 14 PetscFunctionBegin; 15 /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */ 16 PetscCall(MatGetBlockSize(mat, &bs)); 17 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ)); 18 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL)); 19 if (isBAIJ) { 20 c->brows = m; 21 c->bcols = 1; 22 } else { /* seqaij matrix */ 23 /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */ 24 PetscReal mem; 25 PetscInt nz, brows, bcols; 26 if (isSELL) { 27 Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data; 28 nz = spA->nz; 29 } else { 30 Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data; 31 nz = spA->nz; 32 } 33 34 bs = 1; /* only bs=1 is supported for SeqAIJ matrix */ 35 mem = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt); 36 bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar))); 37 brows = 1000 / bcols; 38 if (bcols > nis) bcols = nis; 39 if (brows == 0 || brows > m) brows = m; 40 c->brows = brows; 41 c->bcols = bcols; 42 } 43 44 c->M = mat->rmap->N / bs; /* set total rows, columns and local rows */ 45 c->N = mat->cmap->N / bs; 46 c->m = mat->rmap->N / bs; 47 c->rstart = 0; 48 c->ncolors = nis; 49 c->ctype = iscoloring->ctype; 50 PetscFunctionReturn(0); 51 } 52 53 /* 54 Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance 55 Input Parameters: 56 + mat - the matrix containing the nonzero structure of the Jacobian 57 . color - the coloring context 58 - nz - number of local non-zeros in mat 59 */ 60 PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat, MatFDColoring c, PetscInt nz) { 61 PetscInt i, j, nrows, nbcols, brows = c->brows, bcols = c->bcols, mbs = c->m, nis = c->ncolors; 62 PetscInt *color_start, *row_start, *nrows_new, nz_new, row_end; 63 64 PetscFunctionBegin; 65 if (brows < 1 || brows > mbs) brows = mbs; 66 PetscCall(PetscMalloc2(bcols + 1, &color_start, bcols, &row_start)); 67 PetscCall(PetscCalloc1(nis, &nrows_new)); 68 PetscCall(PetscMalloc1(bcols * mat->rmap->n, &c->dy)); 69 70 nz_new = 0; 71 nbcols = 0; 72 color_start[bcols] = 0; 73 74 if (c->htype[0] == 'd') { /* ---- c->htype == 'ds', use MatEntry --------*/ 75 MatEntry *Jentry_new, *Jentry = c->matentry; 76 77 PetscCall(PetscMalloc1(nz, &Jentry_new)); 78 for (i = 0; i < nis; i += bcols) { /* loop over colors */ 79 if (i + bcols > nis) { 80 color_start[nis - i] = color_start[bcols]; 81 bcols = nis - i; 82 } 83 84 color_start[0] = color_start[bcols]; 85 for (j = 0; j < bcols; j++) { 86 color_start[j + 1] = c->nrows[i + j] + color_start[j]; 87 row_start[j] = 0; 88 } 89 90 row_end = brows; 91 if (row_end > mbs) row_end = mbs; 92 93 while (row_end <= mbs) { /* loop over block rows */ 94 for (j = 0; j < bcols; j++) { /* loop over block columns */ 95 nrows = c->nrows[i + j]; 96 nz = color_start[j]; 97 while (row_start[j] < nrows) { 98 if (Jentry[nz].row >= row_end) { 99 color_start[j] = nz; 100 break; 101 } else { /* copy Jentry[nz] to Jentry_new[nz_new] */ 102 Jentry_new[nz_new].row = Jentry[nz].row + j * mbs; /* index in dy-array */ 103 Jentry_new[nz_new].col = Jentry[nz].col; 104 Jentry_new[nz_new].valaddr = Jentry[nz].valaddr; 105 nz_new++; 106 nz++; 107 row_start[j]++; 108 } 109 } 110 } 111 if (row_end == mbs) break; 112 row_end += brows; 113 if (row_end > mbs) row_end = mbs; 114 } 115 nrows_new[nbcols++] = nz_new; 116 } 117 PetscCall(PetscFree(Jentry)); 118 c->matentry = Jentry_new; 119 } else { /* --------- c->htype == 'wp', use MatEntry2 ------------------*/ 120 MatEntry2 *Jentry2_new, *Jentry2 = c->matentry2; 121 122 PetscCall(PetscMalloc1(nz, &Jentry2_new)); 123 for (i = 0; i < nis; i += bcols) { /* loop over colors */ 124 if (i + bcols > nis) { 125 color_start[nis - i] = color_start[bcols]; 126 bcols = nis - i; 127 } 128 129 color_start[0] = color_start[bcols]; 130 for (j = 0; j < bcols; j++) { 131 color_start[j + 1] = c->nrows[i + j] + color_start[j]; 132 row_start[j] = 0; 133 } 134 135 row_end = brows; 136 if (row_end > mbs) row_end = mbs; 137 138 while (row_end <= mbs) { /* loop over block rows */ 139 for (j = 0; j < bcols; j++) { /* loop over block columns */ 140 nrows = c->nrows[i + j]; 141 nz = color_start[j]; 142 while (row_start[j] < nrows) { 143 if (Jentry2[nz].row >= row_end) { 144 color_start[j] = nz; 145 break; 146 } else { /* copy Jentry2[nz] to Jentry2_new[nz_new] */ 147 Jentry2_new[nz_new].row = Jentry2[nz].row + j * mbs; /* index in dy-array */ 148 Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr; 149 nz_new++; 150 nz++; 151 row_start[j]++; 152 } 153 } 154 } 155 if (row_end == mbs) break; 156 row_end += brows; 157 if (row_end > mbs) row_end = mbs; 158 } 159 nrows_new[nbcols++] = nz_new; 160 } 161 PetscCall(PetscFree(Jentry2)); 162 c->matentry2 = Jentry2_new; 163 } /* ---------------------------------------------*/ 164 165 PetscCall(PetscFree2(color_start, row_start)); 166 167 for (i = nbcols - 1; i > 0; i--) nrows_new[i] -= nrows_new[i - 1]; 168 PetscCall(PetscFree(c->nrows)); 169 c->nrows = nrows_new; 170 PetscFunctionReturn(0); 171 } 172 173 PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) { 174 PetscInt i, n, nrows, mbs = c->m, j, k, m, ncols, col, nis = iscoloring->n, *rowhit, bs, bs2, *spidx, nz, tmp; 175 const PetscInt *is, *row, *ci, *cj; 176 PetscBool isBAIJ, isSELL; 177 const PetscScalar *A_val; 178 PetscScalar **valaddrhit; 179 MatEntry *Jentry; 180 MatEntry2 *Jentry2; 181 182 PetscFunctionBegin; 183 PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa)); 184 185 PetscCall(MatGetBlockSize(mat, &bs)); 186 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ)); 187 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL)); 188 if (isBAIJ) { 189 Mat_SeqBAIJ *spA = (Mat_SeqBAIJ *)mat->data; 190 191 A_val = spA->a; 192 nz = spA->nz; 193 } else if (isSELL) { 194 Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data; 195 196 A_val = spA->val; 197 nz = spA->nz; 198 bs = 1; /* only bs=1 is supported for SeqSELL matrix */ 199 } else { 200 Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data; 201 202 A_val = spA->a; 203 nz = spA->nz; 204 bs = 1; /* only bs=1 is supported for SeqAIJ matrix */ 205 } 206 207 PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns)); 208 PetscCall(PetscMalloc1(nis, &c->nrows)); /* nrows is freeed separately from ncolumns and columns */ 209 210 if (c->htype[0] == 'd') { 211 PetscCall(PetscMalloc1(nz, &Jentry)); 212 c->matentry = Jentry; 213 } else if (c->htype[0] == 'w') { 214 PetscCall(PetscMalloc1(nz, &Jentry2)); 215 c->matentry2 = Jentry2; 216 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported"); 217 218 if (isBAIJ) { 219 PetscCall(MatGetColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 220 } else if (isSELL) { 221 PetscCall(MatGetColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 222 } else { 223 PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 224 } 225 226 PetscCall(PetscCalloc1(c->m, &rowhit)); 227 PetscCall(PetscMalloc1(c->m, &valaddrhit)); 228 229 nz = 0; 230 for (i = 0; i < nis; i++) { /* loop over colors */ 231 PetscCall(ISGetLocalSize(c->isa[i], &n)); 232 PetscCall(ISGetIndices(c->isa[i], &is)); 233 234 c->ncolumns[i] = n; 235 c->columns[i] = (PetscInt *)is; 236 /* note: we know that c->isa is going to be around as long at the c->columns values */ 237 PetscCall(ISRestoreIndices(c->isa[i], &is)); 238 239 /* fast, crude version requires O(N*N) work */ 240 bs2 = bs * bs; 241 nrows = 0; 242 for (j = 0; j < n; j++) { /* loop over columns */ 243 col = is[j]; 244 tmp = ci[col]; 245 row = cj + tmp; 246 m = ci[col + 1] - tmp; 247 nrows += m; 248 for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */ 249 rowhit[*row] = col + 1; 250 valaddrhit[*row++] = (PetscScalar *)&A_val[bs2 * spidx[tmp + k]]; 251 } 252 } 253 c->nrows[i] = nrows; /* total num of rows for this color */ 254 255 if (c->htype[0] == 'd') { 256 for (j = 0; j < mbs; j++) { /* loop over rows */ 257 if (rowhit[j]) { 258 Jentry[nz].row = j; /* local row index */ 259 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 260 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 261 nz++; 262 rowhit[j] = 0.0; /* zero rowhit for reuse */ 263 } 264 } 265 } else { /* c->htype == 'wp' */ 266 for (j = 0; j < mbs; j++) { /* loop over rows */ 267 if (rowhit[j]) { 268 Jentry2[nz].row = j; /* local row index */ 269 Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 270 nz++; 271 rowhit[j] = 0.0; /* zero rowhit for reuse */ 272 } 273 } 274 } 275 } 276 277 if (c->bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */ 278 PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz)); 279 } 280 281 if (isBAIJ) { 282 PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 283 PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy)); 284 } else if (isSELL) { 285 PetscCall(MatRestoreColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 286 } else { 287 PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 288 } 289 PetscCall(PetscFree(rowhit)); 290 PetscCall(PetscFree(valaddrhit)); 291 PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa)); 292 293 PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->rmap->n, PETSC_DETERMINE, 0, NULL, &c->vscale)); 294 PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols)); 295 PetscFunctionReturn(0); 296 } 297