1 #include <../src/mat/impls/sell/mpi/mpisell.h> 2 #include <../src/mat/impls/aij/mpi/mpiaij.h> 3 #include <../src/mat/impls/baij/mpi/mpibaij.h> 4 #include <petsc/private/isimpl.h> 5 6 PetscErrorCode MatFDColoringApply_BAIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx) 7 { 8 PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f; 9 PetscInt k, cstart, cend, l, row, col, nz, spidx, i, j; 10 PetscScalar dx = 0.0, *w3_array, *dy_i, *dy = coloring->dy; 11 PetscScalar *vscale_array; 12 const PetscScalar *xx; 13 PetscReal epsilon = coloring->error_rel, umin = coloring->umin, unorm; 14 Vec w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale; 15 void *fctx = coloring->fctx; 16 PetscInt ctype = coloring->ctype, nxloc, nrows_k; 17 PetscScalar *valaddr; 18 MatEntry *Jentry = coloring->matentry; 19 MatEntry2 *Jentry2 = coloring->matentry2; 20 const PetscInt ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows; 21 PetscInt bs = J->rmap->bs; 22 23 PetscFunctionBegin; 24 PetscCall(VecBindToCPU(x1, PETSC_TRUE)); 25 /* (1) Set w1 = F(x1) */ 26 if (!coloring->fset) { 27 PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, coloring, 0, 0, 0)); 28 PetscCall((*f)(sctx, x1, w1, fctx)); 29 PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, coloring, 0, 0, 0)); 30 } else { 31 coloring->fset = PETSC_FALSE; 32 } 33 34 /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 35 PetscCall(VecGetLocalSize(x1, &nxloc)); 36 if (coloring->htype[0] == 'w') { 37 /* vscale = dx is a constant scalar */ 38 PetscCall(VecNorm(x1, NORM_2, &unorm)); 39 dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon); 40 } else { 41 PetscCall(VecGetArrayRead(x1, &xx)); 42 PetscCall(VecGetArray(vscale, &vscale_array)); 43 for (col = 0; col < nxloc; col++) { 44 dx = xx[col]; 45 if (PetscAbsScalar(dx) < umin) { 46 if (PetscRealPart(dx) >= 0.0) dx = umin; 47 else if (PetscRealPart(dx) < 0.0) dx = -umin; 48 } 49 dx *= epsilon; 50 vscale_array[col] = 1.0 / dx; 51 } 52 PetscCall(VecRestoreArrayRead(x1, &xx)); 53 PetscCall(VecRestoreArray(vscale, &vscale_array)); 54 } 55 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 56 PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD)); 57 PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD)); 58 } 59 60 /* (3) Loop over each color */ 61 if (!coloring->w3) { 62 PetscCall(VecDuplicate(x1, &coloring->w3)); 63 /* Vec is used intensively in particular piece of scalar CPU code; won't benefit from bouncing back and forth to the GPU */ 64 PetscCall(VecBindToCPU(coloring->w3, PETSC_TRUE)); 65 } 66 w3 = coloring->w3; 67 68 PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */ 69 if (vscale) PetscCall(VecGetArray(vscale, &vscale_array)); 70 nz = 0; 71 for (k = 0; k < ncolors; k++) { 72 coloring->currentcolor = k; 73 74 /* 75 (3-1) Loop over each column associated with color 76 adding the perturbation to the vector w3 = x1 + dx. 77 */ 78 PetscCall(VecCopy(x1, w3)); 79 dy_i = dy; 80 for (i = 0; i < bs; i++) { /* Loop over a block of columns */ 81 PetscCall(VecGetArray(w3, &w3_array)); 82 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 83 if (coloring->htype[0] == 'w') { 84 for (l = 0; l < ncolumns[k]; l++) { 85 col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 86 w3_array[col] += 1.0 / dx; 87 if (i) w3_array[col - 1] -= 1.0 / dx; /* resume original w3[col-1] */ 88 } 89 } else { /* htype == 'ds' */ 90 vscale_array -= cstart; /* shift pointer so global index can be used */ 91 for (l = 0; l < ncolumns[k]; l++) { 92 col = i + bs * coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 93 w3_array[col] += 1.0 / vscale_array[col]; 94 if (i) w3_array[col - 1] -= 1.0 / vscale_array[col - 1]; /* resume original w3[col-1] */ 95 } 96 vscale_array += cstart; 97 } 98 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 99 PetscCall(VecRestoreArray(w3, &w3_array)); 100 101 /* 102 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 103 w2 = F(x1 + dx) - F(x1) 104 */ 105 PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0)); 106 PetscCall(VecPlaceArray(w2, dy_i)); /* place w2 to the array dy_i */ 107 PetscCall((*f)(sctx, w3, w2, fctx)); 108 PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0)); 109 PetscCall(VecAXPY(w2, -1.0, w1)); 110 PetscCall(VecResetArray(w2)); 111 dy_i += nxloc; /* points to dy+i*nxloc */ 112 } 113 114 /* 115 (3-3) Loop over rows of vector, putting results into Jacobian matrix 116 */ 117 nrows_k = nrows[k]; 118 if (coloring->htype[0] == 'w') { 119 for (l = 0; l < nrows_k; l++) { 120 row = bs * Jentry2[nz].row; /* local row index */ 121 valaddr = Jentry2[nz++].valaddr; 122 spidx = 0; 123 dy_i = dy; 124 for (i = 0; i < bs; i++) { /* column of the block */ 125 for (j = 0; j < bs; j++) { /* row of the block */ 126 valaddr[spidx++] = dy_i[row + j] * dx; 127 } 128 dy_i += nxloc; /* points to dy+i*nxloc */ 129 } 130 } 131 } else { /* htype == 'ds' */ 132 for (l = 0; l < nrows_k; l++) { 133 row = bs * Jentry[nz].row; /* local row index */ 134 col = bs * Jentry[nz].col; /* local column index */ 135 valaddr = Jentry[nz++].valaddr; 136 spidx = 0; 137 dy_i = dy; 138 for (i = 0; i < bs; i++) { /* column of the block */ 139 for (j = 0; j < bs; j++) { /* row of the block */ 140 valaddr[spidx++] = dy_i[row + j] * vscale_array[col + i]; 141 } 142 dy_i += nxloc; /* points to dy+i*nxloc */ 143 } 144 } 145 } 146 } 147 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 148 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 149 if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array)); 150 151 coloring->currentcolor = -1; 152 PetscCall(VecBindToCPU(x1, PETSC_FALSE)); 153 PetscFunctionReturn(0); 154 } 155 156 /* this is declared PETSC_EXTERN because it is used by MatFDColoringUseDM() which is in the DM library */ 157 PetscErrorCode MatFDColoringApply_AIJ(Mat J, MatFDColoring coloring, Vec x1, void *sctx) 158 { 159 PetscErrorCode (*f)(void *, Vec, Vec, void *) = (PetscErrorCode(*)(void *, Vec, Vec, void *))coloring->f; 160 PetscInt k, cstart, cend, l, row, col, nz; 161 PetscScalar dx = 0.0, *y, *w3_array; 162 const PetscScalar *xx; 163 PetscScalar *vscale_array; 164 PetscReal epsilon = coloring->error_rel, umin = coloring->umin, unorm; 165 Vec w1 = coloring->w1, w2 = coloring->w2, w3, vscale = coloring->vscale; 166 void *fctx = coloring->fctx; 167 ISColoringType ctype = coloring->ctype; 168 PetscInt nxloc, nrows_k; 169 MatEntry *Jentry = coloring->matentry; 170 MatEntry2 *Jentry2 = coloring->matentry2; 171 const PetscInt ncolors = coloring->ncolors, *ncolumns = coloring->ncolumns, *nrows = coloring->nrows; 172 PetscBool alreadyboundtocpu; 173 174 PetscFunctionBegin; 175 PetscCall(VecBoundToCPU(x1, &alreadyboundtocpu)); 176 PetscCall(VecBindToCPU(x1, PETSC_TRUE)); 177 PetscCheck(!(ctype == IS_COLORING_LOCAL) || !(J->ops->fdcoloringapply == MatFDColoringApply_AIJ), PetscObjectComm((PetscObject)J), PETSC_ERR_SUP, "Must call MatColoringUseDM() with IS_COLORING_LOCAL"); 178 /* (1) Set w1 = F(x1) */ 179 if (!coloring->fset) { 180 PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0)); 181 PetscCall((*f)(sctx, x1, w1, fctx)); 182 PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0)); 183 } else { 184 coloring->fset = PETSC_FALSE; 185 } 186 187 /* (2) Compute vscale = 1./dx - the local scale factors, including ghost points */ 188 if (coloring->htype[0] == 'w') { 189 /* vscale = 1./dx is a constant scalar */ 190 PetscCall(VecNorm(x1, NORM_2, &unorm)); 191 dx = 1.0 / (PetscSqrtReal(1.0 + unorm) * epsilon); 192 } else { 193 PetscCall(VecGetLocalSize(x1, &nxloc)); 194 PetscCall(VecGetArrayRead(x1, &xx)); 195 PetscCall(VecGetArray(vscale, &vscale_array)); 196 for (col = 0; col < nxloc; col++) { 197 dx = xx[col]; 198 if (PetscAbsScalar(dx) < umin) { 199 if (PetscRealPart(dx) >= 0.0) dx = umin; 200 else if (PetscRealPart(dx) < 0.0) dx = -umin; 201 } 202 dx *= epsilon; 203 vscale_array[col] = 1.0 / dx; 204 } 205 PetscCall(VecRestoreArrayRead(x1, &xx)); 206 PetscCall(VecRestoreArray(vscale, &vscale_array)); 207 } 208 if (ctype == IS_COLORING_GLOBAL && coloring->htype[0] == 'd') { 209 PetscCall(VecGhostUpdateBegin(vscale, INSERT_VALUES, SCATTER_FORWARD)); 210 PetscCall(VecGhostUpdateEnd(vscale, INSERT_VALUES, SCATTER_FORWARD)); 211 } 212 213 /* (3) Loop over each color */ 214 if (!coloring->w3) { PetscCall(VecDuplicate(x1, &coloring->w3)); } 215 w3 = coloring->w3; 216 217 PetscCall(VecGetOwnershipRange(x1, &cstart, &cend)); /* used by ghosted vscale */ 218 if (vscale) PetscCall(VecGetArray(vscale, &vscale_array)); 219 nz = 0; 220 221 if (coloring->bcols > 1) { /* use blocked insertion of Jentry */ 222 PetscInt i, m = J->rmap->n, nbcols, bcols = coloring->bcols; 223 PetscScalar *dy = coloring->dy, *dy_k; 224 225 nbcols = 0; 226 for (k = 0; k < ncolors; k += bcols) { 227 /* 228 (3-1) Loop over each column associated with color 229 adding the perturbation to the vector w3 = x1 + dx. 230 */ 231 232 dy_k = dy; 233 if (k + bcols > ncolors) bcols = ncolors - k; 234 for (i = 0; i < bcols; i++) { 235 coloring->currentcolor = k + i; 236 237 PetscCall(VecCopy(x1, w3)); 238 PetscCall(VecGetArray(w3, &w3_array)); 239 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 240 if (coloring->htype[0] == 'w') { 241 for (l = 0; l < ncolumns[k + i]; l++) { 242 col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */ 243 w3_array[col] += 1.0 / dx; 244 } 245 } else { /* htype == 'ds' */ 246 vscale_array -= cstart; /* shift pointer so global index can be used */ 247 for (l = 0; l < ncolumns[k + i]; l++) { 248 col = coloring->columns[k + i][l]; /* local column (in global index!) of the matrix we are probing for */ 249 w3_array[col] += 1.0 / vscale_array[col]; 250 } 251 vscale_array += cstart; 252 } 253 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 254 PetscCall(VecRestoreArray(w3, &w3_array)); 255 256 /* 257 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 258 w2 = F(x1 + dx) - F(x1) 259 */ 260 PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0)); 261 PetscCall(VecPlaceArray(w2, dy_k)); /* place w2 to the array dy_i */ 262 PetscCall((*f)(sctx, w3, w2, fctx)); 263 PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0)); 264 PetscCall(VecAXPY(w2, -1.0, w1)); 265 PetscCall(VecResetArray(w2)); 266 dy_k += m; /* points to dy+i*nxloc */ 267 } 268 269 /* 270 (3-3) Loop over block rows of vector, putting results into Jacobian matrix 271 */ 272 nrows_k = nrows[nbcols++]; 273 274 if (coloring->htype[0] == 'w') { 275 for (l = 0; l < nrows_k; l++) { 276 row = Jentry2[nz].row; /* local row index */ 277 /* The 'useless' ifdef is due to a bug in NVIDIA nvc 21.11, which triggers a segfault on this line. We write it in 278 another way, and it seems work. See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html 279 */ 280 #if defined(PETSC_USE_COMPLEX) 281 PetscScalar *tmp = Jentry2[nz].valaddr; 282 *tmp = dy[row] * dx; 283 #else 284 *(Jentry2[nz].valaddr) = dy[row] * dx; 285 #endif 286 nz++; 287 } 288 } else { /* htype == 'ds' */ 289 for (l = 0; l < nrows_k; l++) { 290 row = Jentry[nz].row; /* local row index */ 291 #if defined(PETSC_USE_COMPLEX) /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */ 292 PetscScalar *tmp = Jentry[nz].valaddr; 293 *tmp = dy[row] * vscale_array[Jentry[nz].col]; 294 #else 295 *(Jentry[nz].valaddr) = dy[row] * vscale_array[Jentry[nz].col]; 296 #endif 297 nz++; 298 } 299 } 300 } 301 } else { /* bcols == 1 */ 302 for (k = 0; k < ncolors; k++) { 303 coloring->currentcolor = k; 304 305 /* 306 (3-1) Loop over each column associated with color 307 adding the perturbation to the vector w3 = x1 + dx. 308 */ 309 PetscCall(VecCopy(x1, w3)); 310 PetscCall(VecGetArray(w3, &w3_array)); 311 if (ctype == IS_COLORING_GLOBAL) w3_array -= cstart; /* shift pointer so global index can be used */ 312 if (coloring->htype[0] == 'w') { 313 for (l = 0; l < ncolumns[k]; l++) { 314 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 315 w3_array[col] += 1.0 / dx; 316 } 317 } else { /* htype == 'ds' */ 318 vscale_array -= cstart; /* shift pointer so global index can be used */ 319 for (l = 0; l < ncolumns[k]; l++) { 320 col = coloring->columns[k][l]; /* local column (in global index!) of the matrix we are probing for */ 321 w3_array[col] += 1.0 / vscale_array[col]; 322 } 323 vscale_array += cstart; 324 } 325 if (ctype == IS_COLORING_GLOBAL) w3_array += cstart; 326 PetscCall(VecRestoreArray(w3, &w3_array)); 327 328 /* 329 (3-2) Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations) 330 w2 = F(x1 + dx) - F(x1) 331 */ 332 PetscCall(PetscLogEventBegin(MAT_FDColoringFunction, 0, 0, 0, 0)); 333 PetscCall((*f)(sctx, w3, w2, fctx)); 334 PetscCall(PetscLogEventEnd(MAT_FDColoringFunction, 0, 0, 0, 0)); 335 PetscCall(VecAXPY(w2, -1.0, w1)); 336 337 /* 338 (3-3) Loop over rows of vector, putting results into Jacobian matrix 339 */ 340 nrows_k = nrows[k]; 341 PetscCall(VecGetArray(w2, &y)); 342 if (coloring->htype[0] == 'w') { 343 for (l = 0; l < nrows_k; l++) { 344 row = Jentry2[nz].row; /* local row index */ 345 #if defined(PETSC_USE_COMPLEX) /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */ 346 PetscScalar *tmp = Jentry2[nz].valaddr; 347 *tmp = y[row] * dx; 348 #else 349 *(Jentry2[nz].valaddr) = y[row] * dx; 350 #endif 351 nz++; 352 } 353 } else { /* htype == 'ds' */ 354 for (l = 0; l < nrows_k; l++) { 355 row = Jentry[nz].row; /* local row index */ 356 #if defined(PETSC_USE_COMPLEX) /* See https://lists.mcs.anl.gov/pipermail/petsc-users/2021-December/045158.html */ 357 PetscScalar *tmp = Jentry[nz].valaddr; 358 *tmp = y[row] * vscale_array[Jentry[nz].col]; 359 #else 360 *(Jentry[nz].valaddr) = y[row] * vscale_array[Jentry[nz].col]; 361 #endif 362 nz++; 363 } 364 } 365 PetscCall(VecRestoreArray(w2, &y)); 366 } 367 } 368 369 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 370 if (J->offloadmask != PETSC_OFFLOAD_UNALLOCATED) J->offloadmask = PETSC_OFFLOAD_CPU; 371 #endif 372 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 373 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 374 if (vscale) PetscCall(VecRestoreArray(vscale, &vscale_array)); 375 coloring->currentcolor = -1; 376 if (!alreadyboundtocpu) PetscCall(VecBindToCPU(x1, PETSC_FALSE)); 377 PetscFunctionReturn(0); 378 } 379 380 PetscErrorCode MatFDColoringSetUp_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) 381 { 382 PetscMPIInt size, *ncolsonproc, *disp, nn; 383 PetscInt i, n, nrows, nrows_i, j, k, m, ncols, col, *rowhit, cstart, cend, colb; 384 const PetscInt *is, *A_ci, *A_cj, *B_ci, *B_cj, *row = NULL, *ltog = NULL; 385 PetscInt nis = iscoloring->n, nctot, *cols, tmp = 0; 386 ISLocalToGlobalMapping map = mat->cmap->mapping; 387 PetscInt ctype = c->ctype, *spidxA, *spidxB, nz, bs, bs2, spidx; 388 Mat A, B; 389 PetscScalar *A_val, *B_val, **valaddrhit; 390 MatEntry *Jentry; 391 MatEntry2 *Jentry2; 392 PetscBool isBAIJ, isSELL; 393 PetscInt bcols = c->bcols; 394 #if defined(PETSC_USE_CTABLE) 395 PetscHMapI colmap = NULL; 396 #else 397 PetscInt *colmap = NULL; /* local col number of off-diag col */ 398 #endif 399 400 PetscFunctionBegin; 401 if (ctype == IS_COLORING_LOCAL) { 402 PetscCheck(map, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_INCOMP, "When using ghosted differencing matrix must have local to global mapping provided with MatSetLocalToGlobalMapping"); 403 PetscCall(ISLocalToGlobalMappingGetIndices(map, <og)); 404 } 405 406 PetscCall(MatGetBlockSize(mat, &bs)); 407 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ)); 408 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL)); 409 if (isBAIJ) { 410 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 411 Mat_SeqBAIJ *spA, *spB; 412 A = baij->A; 413 spA = (Mat_SeqBAIJ *)A->data; 414 A_val = spA->a; 415 B = baij->B; 416 spB = (Mat_SeqBAIJ *)B->data; 417 B_val = spB->a; 418 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 419 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 420 colmap = baij->colmap; 421 PetscCall(MatGetColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL)); 422 PetscCall(MatGetColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL)); 423 424 if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ 425 PetscInt *garray; 426 PetscCall(PetscMalloc1(B->cmap->n, &garray)); 427 for (i = 0; i < baij->B->cmap->n / bs; i++) { 428 for (j = 0; j < bs; j++) garray[i * bs + j] = bs * baij->garray[i] + j; 429 } 430 PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, garray, &c->vscale)); 431 PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE)); 432 PetscCall(PetscFree(garray)); 433 } 434 } else if (isSELL) { 435 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 436 Mat_SeqSELL *spA, *spB; 437 A = sell->A; 438 spA = (Mat_SeqSELL *)A->data; 439 A_val = spA->val; 440 B = sell->B; 441 spB = (Mat_SeqSELL *)B->data; 442 B_val = spB->val; 443 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 444 if (!sell->colmap) { 445 /* Allow access to data structures of local part of matrix 446 - creates aij->colmap which maps global column number to local number in part B */ 447 PetscCall(MatCreateColmap_MPISELL_Private(mat)); 448 } 449 colmap = sell->colmap; 450 PetscCall(MatGetColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL)); 451 PetscCall(MatGetColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL)); 452 453 bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */ 454 455 if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ 456 PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, sell->garray, &c->vscale)); 457 PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE)); 458 } 459 } else { 460 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 461 Mat_SeqAIJ *spA, *spB; 462 A = aij->A; 463 spA = (Mat_SeqAIJ *)A->data; 464 A_val = spA->a; 465 B = aij->B; 466 spB = (Mat_SeqAIJ *)B->data; 467 B_val = spB->a; 468 nz = spA->nz + spB->nz; /* total nonzero entries of mat */ 469 if (!aij->colmap) { 470 /* Allow access to data structures of local part of matrix 471 - creates aij->colmap which maps global column number to local number in part B */ 472 PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 473 } 474 colmap = aij->colmap; 475 PetscCall(MatGetColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL)); 476 PetscCall(MatGetColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL)); 477 478 bs = 1; /* only bs=1 is supported for non MPIBAIJ matrix */ 479 480 if (ctype == IS_COLORING_GLOBAL && c->htype[0] == 'd') { /* create vscale for storing dx */ 481 PetscCall(VecCreateGhost(PetscObjectComm((PetscObject)mat), mat->cmap->n, PETSC_DETERMINE, B->cmap->n, aij->garray, &c->vscale)); 482 PetscCall(VecBindToCPU(c->vscale, PETSC_TRUE)); 483 } 484 } 485 486 m = mat->rmap->n / bs; 487 cstart = mat->cmap->rstart / bs; 488 cend = mat->cmap->rend / bs; 489 490 PetscCall(PetscMalloc2(nis, &c->ncolumns, nis, &c->columns)); 491 PetscCall(PetscMalloc1(nis, &c->nrows)); 492 493 if (c->htype[0] == 'd') { 494 PetscCall(PetscMalloc1(nz, &Jentry)); 495 c->matentry = Jentry; 496 } else if (c->htype[0] == 'w') { 497 PetscCall(PetscMalloc1(nz, &Jentry2)); 498 c->matentry2 = Jentry2; 499 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "htype is not supported"); 500 501 PetscCall(PetscMalloc2(m + 1, &rowhit, m + 1, &valaddrhit)); 502 nz = 0; 503 PetscCall(ISColoringGetIS(iscoloring, PETSC_OWN_POINTER, PETSC_IGNORE, &c->isa)); 504 505 if (ctype == IS_COLORING_GLOBAL) { 506 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size)); 507 PetscCall(PetscMalloc2(size, &ncolsonproc, size, &disp)); 508 } 509 510 for (i = 0; i < nis; i++) { /* for each local color */ 511 PetscCall(ISGetLocalSize(c->isa[i], &n)); 512 PetscCall(ISGetIndices(c->isa[i], &is)); 513 514 c->ncolumns[i] = n; /* local number of columns of this color on this process */ 515 c->columns[i] = (PetscInt *)is; 516 517 if (ctype == IS_COLORING_GLOBAL) { 518 /* Determine nctot, the total (parallel) number of columns of this color */ 519 /* ncolsonproc[j]: local ncolumns on proc[j] of this color */ 520 PetscCall(PetscMPIIntCast(n, &nn)); 521 PetscCallMPI(MPI_Allgather(&nn, 1, MPI_INT, ncolsonproc, 1, MPI_INT, PetscObjectComm((PetscObject)mat))); 522 nctot = 0; 523 for (j = 0; j < size; j++) nctot += ncolsonproc[j]; 524 if (!nctot) PetscCall(PetscInfo(mat, "Coloring of matrix has some unneeded colors with no corresponding rows\n")); 525 526 disp[0] = 0; 527 for (j = 1; j < size; j++) disp[j] = disp[j - 1] + ncolsonproc[j - 1]; 528 529 /* Get cols, the complete list of columns for this color on each process */ 530 PetscCall(PetscMalloc1(nctot + 1, &cols)); 531 PetscCallMPI(MPI_Allgatherv((void *)is, n, MPIU_INT, cols, ncolsonproc, disp, MPIU_INT, PetscObjectComm((PetscObject)mat))); 532 } else if (ctype == IS_COLORING_LOCAL) { 533 /* Determine local number of columns of this color on this process, including ghost points */ 534 nctot = n; 535 cols = (PetscInt *)is; 536 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Not provided for this MatFDColoring type"); 537 538 /* Mark all rows affect by these columns */ 539 PetscCall(PetscArrayzero(rowhit, m)); 540 bs2 = bs * bs; 541 nrows_i = 0; 542 for (j = 0; j < nctot; j++) { /* loop over columns*/ 543 if (ctype == IS_COLORING_LOCAL) { 544 col = ltog[cols[j]]; 545 } else { 546 col = cols[j]; 547 } 548 if (col >= cstart && col < cend) { /* column is in A, diagonal block of mat */ 549 tmp = A_ci[col - cstart]; 550 row = A_cj + tmp; 551 nrows = A_ci[col - cstart + 1] - tmp; 552 nrows_i += nrows; 553 554 /* loop over columns of A marking them in rowhit */ 555 for (k = 0; k < nrows; k++) { 556 /* set valaddrhit for part A */ 557 spidx = bs2 * spidxA[tmp + k]; 558 valaddrhit[*row] = &A_val[spidx]; 559 rowhit[*row++] = col - cstart + 1; /* local column index */ 560 } 561 } else { /* column is in B, off-diagonal block of mat */ 562 #if defined(PETSC_USE_CTABLE) 563 PetscCall(PetscHMapIGetWithDefault(colmap, col + 1, 0, &colb)); 564 colb--; 565 #else 566 colb = colmap[col] - 1; /* local column index */ 567 #endif 568 if (colb == -1) { 569 nrows = 0; 570 } else { 571 colb = colb / bs; 572 tmp = B_ci[colb]; 573 row = B_cj + tmp; 574 nrows = B_ci[colb + 1] - tmp; 575 } 576 nrows_i += nrows; 577 /* loop over columns of B marking them in rowhit */ 578 for (k = 0; k < nrows; k++) { 579 /* set valaddrhit for part B */ 580 spidx = bs2 * spidxB[tmp + k]; 581 valaddrhit[*row] = &B_val[spidx]; 582 rowhit[*row++] = colb + 1 + cend - cstart; /* local column index */ 583 } 584 } 585 } 586 c->nrows[i] = nrows_i; 587 588 if (c->htype[0] == 'd') { 589 for (j = 0; j < m; j++) { 590 if (rowhit[j]) { 591 Jentry[nz].row = j; /* local row index */ 592 Jentry[nz].col = rowhit[j] - 1; /* local column index */ 593 Jentry[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 594 nz++; 595 } 596 } 597 } else { /* c->htype == 'wp' */ 598 for (j = 0; j < m; j++) { 599 if (rowhit[j]) { 600 Jentry2[nz].row = j; /* local row index */ 601 Jentry2[nz].valaddr = valaddrhit[j]; /* address of mat value for this entry */ 602 nz++; 603 } 604 } 605 } 606 if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree(cols)); 607 } 608 if (ctype == IS_COLORING_GLOBAL) PetscCall(PetscFree2(ncolsonproc, disp)); 609 610 if (bcols > 1) { /* reorder Jentry for faster MatFDColoringApply() */ 611 PetscCall(MatFDColoringSetUpBlocked_AIJ_Private(mat, c, nz)); 612 } 613 614 if (isBAIJ) { 615 PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL)); 616 PetscCall(MatRestoreColumnIJ_SeqBAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL)); 617 PetscCall(PetscMalloc1(bs * mat->rmap->n, &c->dy)); 618 } else if (isSELL) { 619 PetscCall(MatRestoreColumnIJ_SeqSELL_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL)); 620 PetscCall(MatRestoreColumnIJ_SeqSELL_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL)); 621 } else { 622 PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(A, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &A_ci, &A_cj, &spidxA, NULL)); 623 PetscCall(MatRestoreColumnIJ_SeqAIJ_Color(B, 0, PETSC_FALSE, PETSC_FALSE, &ncols, &B_ci, &B_cj, &spidxB, NULL)); 624 } 625 626 PetscCall(ISColoringRestoreIS(iscoloring, PETSC_OWN_POINTER, &c->isa)); 627 PetscCall(PetscFree2(rowhit, valaddrhit)); 628 629 if (ctype == IS_COLORING_LOCAL) PetscCall(ISLocalToGlobalMappingRestoreIndices(map, <og)); 630 PetscCall(PetscInfo(c, "ncolors %" PetscInt_FMT ", brows %" PetscInt_FMT " and bcols %" PetscInt_FMT " are used.\n", c->ncolors, c->brows, c->bcols)); 631 PetscFunctionReturn(0); 632 } 633 634 PetscErrorCode MatFDColoringCreate_MPIXAIJ(Mat mat, ISColoring iscoloring, MatFDColoring c) 635 { 636 PetscInt bs, nis = iscoloring->n, m = mat->rmap->n; 637 PetscBool isBAIJ, isSELL; 638 639 PetscFunctionBegin; 640 /* set default brows and bcols for speedup inserting the dense matrix into sparse Jacobian; 641 bcols is chosen s.t. dy-array takes 50% of memory space as mat */ 642 PetscCall(MatGetBlockSize(mat, &bs)); 643 PetscCall(PetscObjectBaseTypeCompare((PetscObject)mat, MATMPIBAIJ, &isBAIJ)); 644 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISELL, &isSELL)); 645 if (isBAIJ || m == 0) { 646 c->brows = m; 647 c->bcols = 1; 648 } else if (isSELL) { 649 /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */ 650 Mat_MPISELL *sell = (Mat_MPISELL *)mat->data; 651 Mat_SeqSELL *spA, *spB; 652 Mat A, B; 653 PetscInt nz, brows, bcols; 654 PetscReal mem; 655 656 bs = 1; /* only bs=1 is supported for MPISELL matrix */ 657 658 A = sell->A; 659 spA = (Mat_SeqSELL *)A->data; 660 B = sell->B; 661 spB = (Mat_SeqSELL *)B->data; 662 nz = spA->nz + spB->nz; /* total local nonzero entries of mat */ 663 mem = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt); 664 bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar))); 665 brows = 1000 / bcols; 666 if (bcols > nis) bcols = nis; 667 if (brows == 0 || brows > m) brows = m; 668 c->brows = brows; 669 c->bcols = bcols; 670 } else { /* mpiaij matrix */ 671 /* bcols is chosen s.t. dy-array takes 50% of local memory space as mat */ 672 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 673 Mat_SeqAIJ *spA, *spB; 674 Mat A, B; 675 PetscInt nz, brows, bcols; 676 PetscReal mem; 677 678 bs = 1; /* only bs=1 is supported for MPIAIJ matrix */ 679 680 A = aij->A; 681 spA = (Mat_SeqAIJ *)A->data; 682 B = aij->B; 683 spB = (Mat_SeqAIJ *)B->data; 684 nz = spA->nz + spB->nz; /* total local nonzero entries of mat */ 685 mem = nz * (sizeof(PetscScalar) + sizeof(PetscInt)) + 3 * m * sizeof(PetscInt); 686 bcols = (PetscInt)(0.5 * mem / (m * sizeof(PetscScalar))); 687 brows = 1000 / bcols; 688 if (bcols > nis) bcols = nis; 689 if (brows == 0 || brows > m) brows = m; 690 c->brows = brows; 691 c->bcols = bcols; 692 } 693 694 c->M = mat->rmap->N / bs; /* set the global rows and columns and local rows */ 695 c->N = mat->cmap->N / bs; 696 c->m = mat->rmap->n / bs; 697 c->rstart = mat->rmap->rstart / bs; 698 c->ncolors = nis; 699 PetscFunctionReturn(0); 700 } 701 702 /*@C 703 704 MatFDColoringSetValues - takes a matrix in compressed color format and enters the matrix into a PETSc `Mat` 705 706 Collective 707 708 Input Parameters: 709 + J - the sparse matrix 710 . coloring - created with `MatFDColoringCreate()` and a local coloring 711 - y - column major storage of matrix values with one color of values per column, the number of rows of y should match 712 the number of local rows of J and the number of columns is the number of colors. 713 714 Level: intermediate 715 716 Notes: the matrix in compressed color format may come from an automatic differentiation code 717 718 The code will be slightly faster if `MatFDColoringSetBlockSize`(coloring,`PETSC_DEFAULT`,nc); is called immediately after creating the coloring 719 720 .seealso: `MatFDColoringCreate()`, `ISColoring`, `ISColoringCreate()`, `ISColoringSetType()`, `IS_COLORING_LOCAL`, `MatFDColoringSetBlockSize()` 721 @*/ 722 PetscErrorCode MatFDColoringSetValues(Mat J, MatFDColoring coloring, const PetscScalar *y) 723 { 724 MatEntry2 *Jentry2; 725 PetscInt row, i, nrows_k, l, ncolors, nz = 0, bcols, nbcols = 0; 726 const PetscInt *nrows; 727 PetscBool eq; 728 729 PetscFunctionBegin; 730 PetscValidHeaderSpecific(J, MAT_CLASSID, 1); 731 PetscValidHeaderSpecific(coloring, MAT_FDCOLORING_CLASSID, 2); 732 PetscCall(PetscObjectCompareId((PetscObject)J, coloring->matid, &eq)); 733 PetscCheck(eq, PetscObjectComm((PetscObject)J), PETSC_ERR_ARG_WRONG, "Matrix used with MatFDColoringSetValues() must be that used with MatFDColoringCreate()"); 734 Jentry2 = coloring->matentry2; 735 nrows = coloring->nrows; 736 ncolors = coloring->ncolors; 737 bcols = coloring->bcols; 738 739 for (i = 0; i < ncolors; i += bcols) { 740 nrows_k = nrows[nbcols++]; 741 for (l = 0; l < nrows_k; l++) { 742 row = Jentry2[nz].row; /* local row index */ 743 *(Jentry2[nz++].valaddr) = y[row]; 744 } 745 y += bcols * coloring->m; 746 } 747 PetscCall(MatAssemblyBegin(J, MAT_FINAL_ASSEMBLY)); 748 PetscCall(MatAssemblyEnd(J, MAT_FINAL_ASSEMBLY)); 749 PetscFunctionReturn(0); 750 } 751