1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/mat/impls/baij/seq/baij.h> 3 #include <../src/mat/impls/sell/seq/sell.h> 4 #include <petsc/private/isimpl.h> 5 6 /* 7 This routine is shared by SeqAIJ and SeqBAIJ matrices, 8 since it operators only on the nonzero structure of the elements or blocks. 9 */ 10 PetscErrorCode MatFDColoringCreate_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) 11 { 12 PetscInt bs, nis = iscoloring->n, m = mat->rmap->n; 13 PetscBool isBAIJ, isSELL; 14 15 PetscFunctionBegin; 16 /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian */ 17 PetscCall(MatGetBlockSize(mat, &bs)); 18 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ)); 19 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL)); 20 if (isBAIJ) { 21 c->brows = m; 22 c->bcols = 1; 23 } else { /* seqaij matrix */ 24 /* bcols is chosen s.t. dy-array takes 50% of memory space as mat */ 25 PetscReal mem; 26 PetscInt nz, brows, bcols; 27 if (isSELL) { 28 Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data; 29 nz = spA->nz; 30 } else { 31 Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data; 32 nz = spA->nz; 33 } 34 35 bs = 1; /* only bs=1 is supported for SeqAIJ matrix */ 36 mem = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt); 37 bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar))); 38 if (!bcols) bcols = 1; 39 brows = 1000 / bcols; 40 if (bcols > nis) bcols = nis; 41 if (brows == 0 || brows > m) brows = m; 42 c->brows = brows; 43 c->bcols = bcols; 44 } 45 46 c->M = mat->rmap->N / bs; /* set total rows, columns and local rows */ 47 c->N = mat->cmap->N / bs; 48 c->m = mat->rmap->N / bs; 49 c->rstart = 0; 50 c->ncolors = nis; 51 c->ctype = iscoloring->ctype; 52 PetscFunctionReturn(PETSC_SUCCESS); 53 } 54 55 /* 56 Reorder Jentry such that blocked brows*bols of entries from dense matrix are inserted into Jacobian for improved cache performance 57 Input Parameters: 58 + mat - the matrix containing the nonzero structure of the Jacobian 59 . color - the coloring context 60 - nz - number of local non-zeros in mat 61 */ 62 PetscErrorCode MatFDColoringSetUpBlocked_AIJ_Private(Mat mat, MatFDColoring c, PetscInt nz) 63 { 64 PetscInt i, j, nrows, nbcols, brows = c->brows, bcols = c->bcols, mbs = c->m, nis = c->ncolors; 65 PetscInt *color_start, *row_start, *nrows_new, nz_new, row_end; 66 67 PetscFunctionBegin; 68 if (brows < 1 || brows > mbs) brows = mbs; 69 PetscCall(PetscMalloc2(bcols + 1, &color_start, bcols, &row_start)); 70 PetscCall(PetscCalloc1(nis, &nrows_new)); 71 PetscCall(PetscMalloc1(bcols * mat->rmap->n, &c->dy)); 72 73 nz_new = 0; 74 nbcols = 0; 75 color_start[bcols] = 0; 76 77 if (c->htype[0] == 'd') { /* ---- c->htype == 'ds', use MatEntry --------*/ 78 MatEntry *Jentry_new, *Jentry = c->matentry; 79 80 PetscCall(PetscMalloc1(nz, &Jentry_new)); 81 for (i = 0; i < nis; i += bcols) { /* loop over colors */ 82 if (i + bcols > nis) { 83 color_start[nis - i] = color_start[bcols]; 84 bcols = nis - i; 85 } 86 87 color_start[0] = color_start[bcols]; 88 for (j = 0; j < bcols; j++) { 89 color_start[j + 1] = c->nrows[i + j] + color_start[j]; 90 row_start[j] = 0; 91 } 92 93 row_end = brows; 94 if (row_end > mbs) row_end = mbs; 95 96 while (row_end <= mbs) { /* loop over block rows */ 97 for (j = 0; j < bcols; j++) { /* loop over block columns */ 98 nrows = c->nrows[i + j]; 99 nz = color_start[j]; 100 while (row_start[j] < nrows) { 101 if (Jentry[nz].row >= row_end) { 102 color_start[j] = nz; 103 break; 104 } else { /* copy Jentry[nz] to Jentry_new[nz_new] */ 105 Jentry_new[nz_new].row = Jentry[nz].row + j * mbs; /* index in dy-array */ 106 Jentry_new[nz_new].col = Jentry[nz].col; 107 Jentry_new[nz_new].valaddr = Jentry[nz].valaddr; 108 nz_new++; 109 nz++; 110 row_start[j]++; 111 } 112 } 113 } 114 if (row_end == mbs) break; 115 row_end += brows; 116 if (row_end > mbs) row_end = mbs; 117 } 118 nrows_new[nbcols++] = nz_new; 119 } 120 PetscCall(PetscFree(Jentry)); 121 c->matentry = Jentry_new; 122 } else { /* --------- c->htype == 'wp', use MatEntry2 ------------------*/ 123 MatEntry2 *Jentry2_new, *Jentry2 = c->matentry2; 124 125 PetscCall(PetscMalloc1(nz, &Jentry2_new)); 126 for (i = 0; i < nis; i += bcols) { /* loop over colors */ 127 if (i + bcols > nis) { 128 color_start[nis - i] = color_start[bcols]; 129 bcols = nis - i; 130 } 131 132 color_start[0] = color_start[bcols]; 133 for (j = 0; j < bcols; j++) { 134 color_start[j + 1] = c->nrows[i + j] + color_start[j]; 135 row_start[j] = 0; 136 } 137 138 row_end = brows; 139 if (row_end > mbs) row_end = mbs; 140 141 while (row_end <= mbs) { /* loop over block rows */ 142 for (j = 0; j < bcols; j++) { /* loop over block columns */ 143 nrows = c->nrows[i + j]; 144 nz = color_start[j]; 145 while (row_start[j] < nrows) { 146 if (Jentry2[nz].row >= row_end) { 147 color_start[j] = nz; 148 break; 149 } else { /* copy Jentry2[nz] to Jentry2_new[nz_new] */ 150 Jentry2_new[nz_new].row = Jentry2[nz].row + j * mbs; /* index in dy-array */ 151 Jentry2_new[nz_new].valaddr = Jentry2[nz].valaddr; 152 nz_new++; 153 nz++; 154 row_start[j]++; 155 } 156 } 157 } 158 if (row_end == mbs) break; 159 row_end += brows; 160 if (row_end > mbs) row_end = mbs; 161 } 162 nrows_new[nbcols++] = nz_new; 163 } 164 PetscCall(PetscFree(Jentry2)); 165 c->matentry2 = Jentry2_new; 166 } /* ---------------------------------------------*/ 167 168 PetscCall(PetscFree2(color_start, row_start)); 169 170 for (i = nbcols - 1; i > 0; i--) nrows_new[i] -= nrows_new[i - 1]; 171 PetscCall(PetscFree(c->nrows)); 172 c->nrows = nrows_new; 173 PetscFunctionReturn(PETSC_SUCCESS); 174 } 175 176 PetscErrorCode MatFDColoringSetUp_SeqXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) 177 { 178 PetscInt i, n, nrows, mbs = c->m, j, k, m, ncols, col, nis = iscoloring->n, *rowhit, bs, bs2, *spidx, nz, tmp; 179 const PetscInt *is, *row, *ci, *cj; 180 PetscBool isBAIJ, isSELL; 181 const PetscScalar *A_val; 182 PetscScalar **valaddrhit; 183 MatEntry *Jentry; 184 MatEntry2 *Jentry2; 185 186 PetscFunctionBegin; 187 PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa)); 188 189 PetscCall(MatGetBlockSize(mat, &bs)); 190 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATSEQBAIJ, &isBAIJ)); 191 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATSEQSELL, &isSELL)); 192 if (isBAIJ) { 193 Mat_SeqBAIJ *spA = (Mat_SeqBAIJ *)mat->data; 194 195 A_val = spA->a; 196 nz = spA->nz; 197 } else if (isSELL) { 198 Mat_SeqSELL *spA = (Mat_SeqSELL *)mat->data; 199 200 A_val = spA->val; 201 nz = spA->nz; 202 bs = 1; /* only bs=1 is supported for SeqSELL matrix */ 203 } else { 204 Mat_SeqAIJ *spA = (Mat_SeqAIJ *)mat->data; 205 206 A_val = spA->a; 207 nz = spA->nz; 208 bs = 1; /* only bs=1 is supported for SeqAIJ matrix */ 209 } 210 211 PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns)); 212 PetscCall(PetscMalloc1(nis, &c->nrows)); /* nrows is freed separately from ncolumns and columns */ 213 214 if (c->htype[0] == 'd') { 215 PetscCall(PetscMalloc1(nz, &Jentry)); 216 c->matentry = Jentry; 217 } else if (c->htype[0] == 'w') { 218 PetscCall(PetscMalloc1(nz, &Jentry2)); 219 c->matentry2 = Jentry2; 220 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported"); 221 222 if (isBAIJ) { 223 PetscCall(MatGetColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 224 } else if (isSELL) { 225 PetscCall(MatGetColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 226 } else { 227 PetscCall(MatGetColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 228 } 229 230 PetscCall(PetscCalloc1(c->m, &rowhit)); 231 PetscCall(PetscMalloc1(c->m, &valaddrhit)); 232 233 nz = 0; 234 for (i = 0; i < nis; i++) { /* loop over colors */ 235 PetscCall(ISGetLocalSize(c->isa[i], &n)); 236 PetscCall(ISGetIndices(c->isa[i], &is)); 237 238 c->ncolumns[i] = n; 239 c->columns[i] = (PetscInt *)is; 240 /* note: we know that c->isa is going to be around as long at the c->columns values */ 241 PetscCall(ISRestoreIndices(c->isa[i], &is)); 242 243 /* fast, crude version requires O(N*N) work */ 244 bs2 = bs * bs; 245 nrows = 0; 246 for (j = 0; j < n; j++) { /* loop over columns */ 247 col = is[j]; 248 tmp = ci[col]; 249 row = cj + tmp; 250 m = ci[col + 1] - tmp; 251 nrows += m; 252 for (k = 0; k < m; k++) { /* loop over columns marking them in rowhit */ 253 rowhit[*row] = col + 1; 254 valaddrhit[*row++] = (PetscScalar *)&A_val[bs2 * spidx[tmp + k]]; 255 } 256 } 257 c->nrows[i] = nrows; /* total num of rows for this color */ 258 259 if (c->htype[0] == 'd') { 260 for (j = 0; j < mbs; j++) { /* loop over rows */ 261 if (rowhit[j]) { 262 Jentry[nz].row = j; /* local row index */ 263 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 264 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 265 nz++; 266 rowhit[j] = 0.0; /* zero rowhit for reuse */ 267 } 268 } 269 } else { /* c->htype == 'wp' */ 270 for (j = 0; j < mbs; j++) { /* loop over rows */ 271 if (rowhit[j]) { 272 Jentry2[nz].row = j; /* local row index */ 273 Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 274 nz++; 275 rowhit[j] = 0.0; /* zero rowhit for reuse */ 276 } 277 } 278 } 279 } 280 281 if (c->bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */ 282 PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz)); 283 } 284 285 if (isBAIJ) { 286 PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 287 PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy)); 288 } else if (isSELL) { 289 PetscCall(MatRestoreColumnIJ_SeqSELL_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 290 } else { 291 PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(mat, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &ci, &cj, &spidx, NULL)); 292 } 293 PetscCall(PetscFree(rowhit)); 294 PetscCall(PetscFree(valaddrhit)); 295 PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa)); 296 297 PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->rmap->n, PETSC_DETERMINE, 0, NULL, &c->vscale)); 298 PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols)); 299 PetscFunctionReturn(PETSC_SUCCESS); 300 } 301