1 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/utils/freespace.h> 3 4 /*@ 5 MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 6 7 Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 8 9 Input Parameter: 10 . A - the `MATMAIJ` matrix 11 12 Output Parameter: 13 . B - the `MATAIJ` matrix 14 15 Level: advanced 16 17 Note: 18 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 19 20 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 21 @*/ 22 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 23 { 24 PetscBool ismpimaij, isseqmaij; 25 26 PetscFunctionBegin; 27 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 28 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 29 if (ismpimaij) { 30 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 31 32 *B = b->A; 33 } else if (isseqmaij) { 34 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 35 36 *B = b->AIJ; 37 } else { 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 /*@ 44 MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 45 46 Logically Collective 47 48 Input Parameters: 49 + A - the `MATMAIJ` matrix 50 - dof - the block size for the new matrix 51 52 Output Parameter: 53 . B - the new `MATMAIJ` matrix 54 55 Level: advanced 56 57 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()` 58 @*/ 59 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 60 { 61 Mat Aij = NULL; 62 63 PetscFunctionBegin; 64 PetscValidLogicalCollectiveInt(A, dof, 2); 65 PetscCall(MatMAIJGetAIJ(A, &Aij)); 66 PetscCall(MatCreateMAIJ(Aij, dof, B)); 67 PetscFunctionReturn(PETSC_SUCCESS); 68 } 69 70 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 71 { 72 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 73 74 PetscFunctionBegin; 75 PetscCall(MatDestroy(&b->AIJ)); 76 PetscCall(PetscFree(A->data)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 79 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatSetUp_MAIJ(Mat A) 84 { 85 PetscFunctionBegin; 86 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 87 } 88 89 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(MatView(B, viewer)); 96 PetscCall(MatDestroy(&B)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 101 { 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 106 PetscCall(MatView(B, viewer)); 107 PetscCall(MatDestroy(&B)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 112 { 113 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 114 115 PetscFunctionBegin; 116 PetscCall(MatDestroy(&b->AIJ)); 117 PetscCall(MatDestroy(&b->OAIJ)); 118 PetscCall(MatDestroy(&b->A)); 119 PetscCall(VecScatterDestroy(&b->ctx)); 120 PetscCall(VecDestroy(&b->w)); 121 PetscCall(PetscFree(A->data)); 122 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 123 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 124 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 125 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 126 PetscFunctionReturn(PETSC_SUCCESS); 127 } 128 129 /*MC 130 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 131 multicomponent problems, interpolating or restricting each component the same way independently. 132 The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 133 134 Operations provided: 135 .vb 136 MatMult() 137 MatMultTranspose() 138 MatMultAdd() 139 MatMultTransposeAdd() 140 .ve 141 142 Level: advanced 143 144 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 145 M*/ 146 147 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 148 { 149 Mat_MPIMAIJ *b; 150 PetscMPIInt size; 151 152 PetscFunctionBegin; 153 PetscCall(PetscNew(&b)); 154 A->data = (void *)b; 155 156 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 157 158 A->ops->setup = MatSetUp_MAIJ; 159 160 b->AIJ = NULL; 161 b->dof = 0; 162 b->OAIJ = NULL; 163 b->ctx = NULL; 164 b->w = NULL; 165 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 166 if (size == 1) { 167 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 168 } else { 169 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 170 } 171 A->preallocated = PETSC_TRUE; 172 A->assembled = PETSC_TRUE; 173 PetscFunctionReturn(PETSC_SUCCESS); 174 } 175 176 #if PetscHasAttribute(always_inline) 177 #define PETSC_FORCE_INLINE __attribute__((always_inline)) 178 #else 179 #define PETSC_FORCE_INLINE 180 #endif 181 182 #if defined(__clang__) 183 #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 184 #else 185 #define PETSC_PRAGMA_UNROLL 186 #endif 187 188 enum { 189 MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 190 }; 191 192 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 193 // keyword into account for these... 194 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 195 { 196 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 197 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 198 const Mat baij = b->AIJ; 199 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 200 const PetscInt m = baij->rmap->n; 201 const PetscInt nz = a->nz; 202 const PetscInt *idx = a->j; 203 const PetscInt *ii = a->i; 204 const PetscScalar *v = a->a; 205 PetscInt nonzerorow = 0; 206 const PetscScalar *x; 207 PetscScalar *z; 208 209 PetscFunctionBegin; 210 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 211 if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 212 PetscCall(VecGetArrayRead(xx, &x)); 213 if (mult_add) { 214 PetscCall(VecGetArray(zz, &z)); 215 } else { 216 PetscCall(VecGetArrayWrite(zz, &z)); 217 } 218 219 for (PetscInt i = 0; i < m; ++i) { 220 PetscInt jrow = ii[i]; 221 const PetscInt n = ii[i + 1] - jrow; 222 // leave a line so clang-format does not align these decls 223 PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 224 225 nonzerorow += n > 0; 226 for (PetscInt j = 0; j < n; ++j, ++jrow) { 227 const PetscScalar v_jrow = v[jrow]; 228 const PetscInt N_idx_jrow = N * idx[jrow]; 229 230 PETSC_PRAGMA_UNROLL 231 for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 232 } 233 234 PETSC_PRAGMA_UNROLL 235 for (int k = 0; k < N; ++k) { 236 const PetscInt z_idx = N * i + k; 237 238 if (mult_add) { 239 z[z_idx] += sum[k]; 240 } else { 241 z[z_idx] = sum[k]; 242 } 243 } 244 } 245 PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 246 PetscCall(VecRestoreArrayRead(xx, &x)); 247 if (mult_add) { 248 PetscCall(VecRestoreArray(zz, &z)); 249 } else { 250 PetscCall(VecRestoreArrayWrite(zz, &z)); 251 } 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 256 { 257 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 258 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 259 const Mat baij = b->AIJ; 260 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 261 const PetscInt m = baij->rmap->n; 262 const PetscInt nz = a->nz; 263 const PetscInt *a_j = a->j; 264 const PetscInt *a_i = a->i; 265 const PetscScalar *a_a = a->a; 266 const PetscScalar *x; 267 PetscScalar *z; 268 269 PetscFunctionBegin; 270 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 271 if (mult_add) { 272 if (yy != zz) PetscCall(VecCopy(yy, zz)); 273 } else { 274 PetscCall(VecSet(zz, 0.0)); 275 } 276 PetscCall(VecGetArrayRead(xx, &x)); 277 PetscCall(VecGetArray(zz, &z)); 278 279 for (PetscInt i = 0; i < m; i++) { 280 const PetscInt a_ii = a_i[i]; 281 const PetscInt *idx = PetscSafePointerPlusOffset(a_j, a_ii); 282 const PetscScalar *v = PetscSafePointerPlusOffset(a_a, a_ii); 283 const PetscInt n = a_i[i + 1] - a_ii; 284 PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 285 286 PETSC_PRAGMA_UNROLL 287 for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 288 for (PetscInt j = 0; j < n; ++j) { 289 const PetscInt N_idx_j = N * idx[j]; 290 const PetscScalar v_j = v[j]; 291 292 PETSC_PRAGMA_UNROLL 293 for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 294 } 295 } 296 297 PetscCall(PetscLogFlops(2 * N * nz)); 298 PetscCall(VecRestoreArrayRead(xx, &x)); 299 PetscCall(VecRestoreArray(zz, &z)); 300 PetscFunctionReturn(PETSC_SUCCESS); 301 } 302 303 #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \ 304 static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 305 { \ 306 PetscFunctionBegin; \ 307 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 308 PetscFunctionReturn(PETSC_SUCCESS); \ 309 } \ 310 static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 311 { \ 312 PetscFunctionBegin; \ 313 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 314 PetscFunctionReturn(PETSC_SUCCESS); \ 315 } \ 316 static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 317 { \ 318 PetscFunctionBegin; \ 319 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 320 PetscFunctionReturn(PETSC_SUCCESS); \ 321 } \ 322 static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 323 { \ 324 PetscFunctionBegin; \ 325 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 326 PetscFunctionReturn(PETSC_SUCCESS); \ 327 } 328 329 // clang-format off 330 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2) 331 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3) 332 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4) 333 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5) 334 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6) 335 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7) 336 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8) 337 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9) 338 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10) 339 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11) 340 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16) 341 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18) 342 // clang-format on 343 344 #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE 345 346 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 347 { 348 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 349 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 350 const PetscScalar *x, *v; 351 PetscScalar *y, *sums; 352 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 353 PetscInt n, i, jrow, j, dof = b->dof, k; 354 355 PetscFunctionBegin; 356 PetscCall(VecGetArrayRead(xx, &x)); 357 PetscCall(VecSet(yy, 0.0)); 358 PetscCall(VecGetArray(yy, &y)); 359 idx = a->j; 360 v = a->a; 361 ii = a->i; 362 363 for (i = 0; i < m; i++) { 364 jrow = ii[i]; 365 n = ii[i + 1] - jrow; 366 sums = y + dof * i; 367 for (j = 0; j < n; j++) { 368 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 369 jrow++; 370 } 371 } 372 373 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 374 PetscCall(VecRestoreArrayRead(xx, &x)); 375 PetscCall(VecRestoreArray(yy, &y)); 376 PetscFunctionReturn(PETSC_SUCCESS); 377 } 378 379 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 380 { 381 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 382 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 383 const PetscScalar *x, *v; 384 PetscScalar *y, *sums; 385 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 386 PetscInt n, i, jrow, j, dof = b->dof, k; 387 388 PetscFunctionBegin; 389 if (yy != zz) PetscCall(VecCopy(yy, zz)); 390 PetscCall(VecGetArrayRead(xx, &x)); 391 PetscCall(VecGetArray(zz, &y)); 392 idx = a->j; 393 v = a->a; 394 ii = a->i; 395 396 for (i = 0; i < m; i++) { 397 jrow = ii[i]; 398 n = ii[i + 1] - jrow; 399 sums = y + dof * i; 400 for (j = 0; j < n; j++) { 401 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 402 jrow++; 403 } 404 } 405 406 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 407 PetscCall(VecRestoreArrayRead(xx, &x)); 408 PetscCall(VecRestoreArray(zz, &y)); 409 PetscFunctionReturn(PETSC_SUCCESS); 410 } 411 412 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 413 { 414 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 415 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 416 const PetscScalar *x, *v, *alpha; 417 PetscScalar *y; 418 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 419 PetscInt n, i, k; 420 421 PetscFunctionBegin; 422 PetscCall(VecGetArrayRead(xx, &x)); 423 PetscCall(VecSet(yy, 0.0)); 424 PetscCall(VecGetArray(yy, &y)); 425 for (i = 0; i < m; i++) { 426 idx = PetscSafePointerPlusOffset(a->j, a->i[i]); 427 v = PetscSafePointerPlusOffset(a->a, a->i[i]); 428 n = a->i[i + 1] - a->i[i]; 429 alpha = x + dof * i; 430 while (n-- > 0) { 431 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 432 idx++; 433 v++; 434 } 435 } 436 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 437 PetscCall(VecRestoreArrayRead(xx, &x)); 438 PetscCall(VecRestoreArray(yy, &y)); 439 PetscFunctionReturn(PETSC_SUCCESS); 440 } 441 442 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 443 { 444 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 445 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 446 const PetscScalar *x, *v, *alpha; 447 PetscScalar *y; 448 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 449 PetscInt n, i, k; 450 451 PetscFunctionBegin; 452 if (yy != zz) PetscCall(VecCopy(yy, zz)); 453 PetscCall(VecGetArrayRead(xx, &x)); 454 PetscCall(VecGetArray(zz, &y)); 455 for (i = 0; i < m; i++) { 456 idx = a->j + a->i[i]; 457 v = a->a + a->i[i]; 458 n = a->i[i + 1] - a->i[i]; 459 alpha = x + dof * i; 460 while (n-- > 0) { 461 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 462 idx++; 463 v++; 464 } 465 } 466 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 467 PetscCall(VecRestoreArrayRead(xx, &x)); 468 PetscCall(VecRestoreArray(zz, &y)); 469 PetscFunctionReturn(PETSC_SUCCESS); 470 } 471 472 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 473 { 474 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 475 476 PetscFunctionBegin; 477 /* start the scatter */ 478 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 479 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 480 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 481 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 482 PetscFunctionReturn(PETSC_SUCCESS); 483 } 484 485 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 486 { 487 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 488 489 PetscFunctionBegin; 490 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 491 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 492 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 493 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 494 PetscFunctionReturn(PETSC_SUCCESS); 495 } 496 497 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 498 { 499 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 500 501 PetscFunctionBegin; 502 /* start the scatter */ 503 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 504 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 505 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 506 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 511 { 512 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 513 514 PetscFunctionBegin; 515 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 516 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 517 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 518 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 519 PetscFunctionReturn(PETSC_SUCCESS); 520 } 521 522 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 523 { 524 Mat_Product *product = C->product; 525 526 PetscFunctionBegin; 527 if (product->type == MATPRODUCT_PtAP) { 528 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 529 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 530 PetscFunctionReturn(PETSC_SUCCESS); 531 } 532 533 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 534 { 535 Mat_Product *product = C->product; 536 PetscBool flg = PETSC_FALSE; 537 Mat A = product->A, P = product->B; 538 PetscInt alg = 1; /* set default algorithm */ 539 #if !defined(PETSC_HAVE_HYPRE) 540 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 541 PetscInt nalg = 4; 542 #else 543 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 544 PetscInt nalg = 5; 545 #endif 546 547 PetscFunctionBegin; 548 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 549 550 /* PtAP */ 551 /* Check matrix local sizes */ 552 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 553 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 554 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 555 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 556 557 /* Set the default algorithm */ 558 PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 559 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 560 561 /* Get runtime option */ 562 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 563 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 564 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 565 PetscOptionsEnd(); 566 567 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 568 if (flg) { 569 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 570 PetscFunctionReturn(PETSC_SUCCESS); 571 } 572 573 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 574 if (flg) { 575 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 576 PetscFunctionReturn(PETSC_SUCCESS); 577 } 578 579 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 580 PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 581 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 582 PetscCall(MatProductSetFromOptions(C)); 583 PetscFunctionReturn(PETSC_SUCCESS); 584 } 585 586 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 587 { 588 /* This routine requires testing -- first draft only */ 589 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 590 Mat P = pp->AIJ; 591 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 592 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 593 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 594 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 595 const PetscInt *ci = c->i, *cj = c->j, *cjj; 596 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 597 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 598 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 599 MatScalar *ca = c->a, *caj, *apa; 600 601 PetscFunctionBegin; 602 /* Allocate temporary array for storage of one row of A*P */ 603 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 604 605 /* Clear old values in C */ 606 PetscCall(PetscArrayzero(ca, ci[cm])); 607 608 for (i = 0; i < am; i++) { 609 /* Form sparse row of A*P */ 610 anzi = ai[i + 1] - ai[i]; 611 apnzj = 0; 612 for (j = 0; j < anzi; j++) { 613 /* Get offset within block of P */ 614 pshift = *aj % ppdof; 615 /* Get block row of P */ 616 prow = *aj++ / ppdof; /* integer division */ 617 pnzj = pi[prow + 1] - pi[prow]; 618 pjj = pj + pi[prow]; 619 paj = pa + pi[prow]; 620 for (k = 0; k < pnzj; k++) { 621 poffset = pjj[k] * ppdof + pshift; 622 if (!apjdense[poffset]) { 623 apjdense[poffset] = -1; 624 apj[apnzj++] = poffset; 625 } 626 apa[poffset] += (*aa) * paj[k]; 627 } 628 PetscCall(PetscLogFlops(2.0 * pnzj)); 629 aa++; 630 } 631 632 /* Sort the j index array for quick sparse axpy. */ 633 /* Note: a array does not need sorting as it is in dense storage locations. */ 634 PetscCall(PetscSortInt(apnzj, apj)); 635 636 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 637 prow = i / ppdof; /* integer division */ 638 pshift = i % ppdof; 639 poffset = pi[prow]; 640 pnzi = pi[prow + 1] - poffset; 641 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 642 pJ = pj + poffset; 643 pA = pa + poffset; 644 for (j = 0; j < pnzi; j++) { 645 crow = (*pJ) * ppdof + pshift; 646 cjj = cj + ci[crow]; 647 caj = ca + ci[crow]; 648 pJ++; 649 /* Perform sparse axpy operation. Note cjj includes apj. */ 650 for (k = 0, nextap = 0; nextap < apnzj; k++) { 651 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 652 } 653 PetscCall(PetscLogFlops(2.0 * apnzj)); 654 pA++; 655 } 656 657 /* Zero the current row info for A*P */ 658 for (j = 0; j < apnzj; j++) { 659 apa[apj[j]] = 0.; 660 apjdense[apj[j]] = 0; 661 } 662 } 663 664 /* Assemble the final matrix and clean up */ 665 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 666 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 667 PetscCall(PetscFree3(apa, apj, apjdense)); 668 PetscFunctionReturn(PETSC_SUCCESS); 669 } 670 671 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 672 { 673 PetscFreeSpaceList free_space = NULL, current_space = NULL; 674 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 675 Mat P = pp->AIJ; 676 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 677 PetscInt *pti, *ptj, *ptJ; 678 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 679 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 680 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 681 MatScalar *ca; 682 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 683 684 PetscFunctionBegin; 685 /* Get ij structure of P^T */ 686 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 687 688 cn = pn * ppdof; 689 /* Allocate ci array, arrays for fill computation and */ 690 /* free space for accumulating nonzero column info */ 691 PetscCall(PetscMalloc1(cn + 1, &ci)); 692 ci[0] = 0; 693 694 /* Work arrays for rows of P^T*A */ 695 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 696 PetscCall(PetscArrayzero(ptadenserow, an)); 697 PetscCall(PetscArrayzero(denserow, cn)); 698 699 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 700 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 701 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 702 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 703 current_space = free_space; 704 705 /* Determine symbolic info for each row of C: */ 706 for (i = 0; i < pn; i++) { 707 ptnzi = pti[i + 1] - pti[i]; 708 ptJ = ptj + pti[i]; 709 for (dof = 0; dof < ppdof; dof++) { 710 ptanzi = 0; 711 /* Determine symbolic row of PtA: */ 712 for (j = 0; j < ptnzi; j++) { 713 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 714 arow = ptJ[j] * ppdof + dof; 715 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 716 anzj = ai[arow + 1] - ai[arow]; 717 ajj = aj + ai[arow]; 718 for (k = 0; k < anzj; k++) { 719 if (!ptadenserow[ajj[k]]) { 720 ptadenserow[ajj[k]] = -1; 721 ptasparserow[ptanzi++] = ajj[k]; 722 } 723 } 724 } 725 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 726 ptaj = ptasparserow; 727 cnzi = 0; 728 for (j = 0; j < ptanzi; j++) { 729 /* Get offset within block of P */ 730 pshift = *ptaj % ppdof; 731 /* Get block row of P */ 732 prow = (*ptaj++) / ppdof; /* integer division */ 733 /* P has same number of nonzeros per row as the compressed form */ 734 pnzj = pi[prow + 1] - pi[prow]; 735 pjj = pj + pi[prow]; 736 for (k = 0; k < pnzj; k++) { 737 /* Locations in C are shifted by the offset within the block */ 738 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 739 if (!denserow[pjj[k] * ppdof + pshift]) { 740 denserow[pjj[k] * ppdof + pshift] = -1; 741 sparserow[cnzi++] = pjj[k] * ppdof + pshift; 742 } 743 } 744 } 745 746 /* sort sparserow */ 747 PetscCall(PetscSortInt(cnzi, sparserow)); 748 749 /* If free space is not available, make more free space */ 750 /* Double the amount of total space in the list */ 751 if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 752 753 /* Copy data into free space, and zero out denserows */ 754 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 755 756 current_space->array += cnzi; 757 current_space->local_used += cnzi; 758 current_space->local_remaining -= cnzi; 759 760 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 761 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 762 763 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 764 /* For now, we will recompute what is needed. */ 765 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 766 } 767 } 768 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 769 /* Allocate space for cj, initialize cj, and */ 770 /* destroy list of free space and other temporary array(s) */ 771 PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 772 PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 773 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 774 775 /* Allocate space for ca */ 776 PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 777 778 /* put together the new matrix */ 779 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 780 PetscCall(MatSetBlockSize(C, pp->dof)); 781 782 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 783 /* Since these are PETSc arrays, change flags to free them as necessary. */ 784 c = (Mat_SeqAIJ *)(C->data); 785 c->free_a = PETSC_TRUE; 786 c->free_ij = PETSC_TRUE; 787 c->nonew = 0; 788 789 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 790 C->ops->productnumeric = MatProductNumeric_PtAP; 791 792 /* Clean up. */ 793 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 794 PetscFunctionReturn(PETSC_SUCCESS); 795 } 796 797 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 798 { 799 Mat_Product *product = C->product; 800 Mat A = product->A, P = product->B; 801 802 PetscFunctionBegin; 803 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 804 PetscFunctionReturn(PETSC_SUCCESS); 805 } 806 807 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 808 809 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 810 { 811 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 812 813 PetscFunctionBegin; 814 815 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 816 PetscFunctionReturn(PETSC_SUCCESS); 817 } 818 819 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 820 821 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 822 { 823 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 824 825 PetscFunctionBegin; 826 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 827 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 828 PetscFunctionReturn(PETSC_SUCCESS); 829 } 830 831 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 832 833 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 834 { 835 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 836 837 PetscFunctionBegin; 838 839 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 844 845 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 846 { 847 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 848 849 PetscFunctionBegin; 850 851 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 852 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 853 PetscFunctionReturn(PETSC_SUCCESS); 854 } 855 856 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 857 { 858 Mat_Product *product = C->product; 859 Mat A = product->A, P = product->B; 860 PetscBool flg; 861 862 PetscFunctionBegin; 863 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 864 if (flg) { 865 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 866 C->ops->productnumeric = MatProductNumeric_PtAP; 867 PetscFunctionReturn(PETSC_SUCCESS); 868 } 869 870 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 871 if (flg) { 872 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 873 C->ops->productnumeric = MatProductNumeric_PtAP; 874 PetscFunctionReturn(PETSC_SUCCESS); 875 } 876 877 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 878 } 879 880 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 881 { 882 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 883 Mat a = b->AIJ, B; 884 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 885 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 886 PetscInt *cols; 887 PetscScalar *vals; 888 889 PetscFunctionBegin; 890 PetscCall(MatGetSize(a, &m, &n)); 891 PetscCall(PetscMalloc1(dof * m, &ilen)); 892 for (i = 0; i < m; i++) { 893 nmax = PetscMax(nmax, aij->ilen[i]); 894 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 895 } 896 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 897 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 898 PetscCall(MatSetType(B, newtype)); 899 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 900 PetscCall(PetscFree(ilen)); 901 PetscCall(PetscMalloc1(nmax, &icols)); 902 ii = 0; 903 for (i = 0; i < m; i++) { 904 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 905 for (j = 0; j < dof; j++) { 906 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 907 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 908 ii++; 909 } 910 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 911 } 912 PetscCall(PetscFree(icols)); 913 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 914 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 915 916 if (reuse == MAT_INPLACE_MATRIX) { 917 PetscCall(MatHeaderReplace(A, &B)); 918 } else { 919 *newmat = B; 920 } 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 #include <../src/mat/impls/aij/mpi/mpiaij.h> 925 926 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 927 { 928 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 929 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 930 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 931 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 932 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 933 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 934 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 935 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 936 PetscInt rstart, cstart, *garray, ii, k; 937 PetscScalar *vals, *ovals; 938 939 PetscFunctionBegin; 940 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 941 for (i = 0; i < A->rmap->n / dof; i++) { 942 nmax = PetscMax(nmax, AIJ->ilen[i]); 943 onmax = PetscMax(onmax, OAIJ->ilen[i]); 944 for (j = 0; j < dof; j++) { 945 dnz[dof * i + j] = AIJ->ilen[i]; 946 onz[dof * i + j] = OAIJ->ilen[i]; 947 } 948 } 949 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 950 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 951 PetscCall(MatSetType(B, newtype)); 952 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 953 PetscCall(MatSetBlockSize(B, dof)); 954 PetscCall(PetscFree2(dnz, onz)); 955 956 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 957 rstart = dof * maij->A->rmap->rstart; 958 cstart = dof * maij->A->cmap->rstart; 959 garray = mpiaij->garray; 960 961 ii = rstart; 962 for (i = 0; i < A->rmap->n / dof; i++) { 963 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 964 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 965 for (j = 0; j < dof; j++) { 966 for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 967 for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 968 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 969 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 970 ii++; 971 } 972 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 973 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 974 } 975 PetscCall(PetscFree2(icols, oicols)); 976 977 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 978 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 979 980 if (reuse == MAT_INPLACE_MATRIX) { 981 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 982 ((PetscObject)A)->refct = 1; 983 984 PetscCall(MatHeaderReplace(A, &B)); 985 986 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 987 } else { 988 *newmat = B; 989 } 990 PetscFunctionReturn(PETSC_SUCCESS); 991 } 992 993 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 994 { 995 Mat A; 996 997 PetscFunctionBegin; 998 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 999 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1000 PetscCall(MatDestroy(&A)); 1001 PetscFunctionReturn(PETSC_SUCCESS); 1002 } 1003 1004 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1005 { 1006 Mat A; 1007 1008 PetscFunctionBegin; 1009 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1010 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 1011 PetscCall(MatDestroy(&A)); 1012 PetscFunctionReturn(PETSC_SUCCESS); 1013 } 1014 1015 /*@ 1016 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1017 operations for multicomponent problems. It interpolates each component the same 1018 way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 1019 and `MATMPIAIJ` for distributed matrices. 1020 1021 Collective 1022 1023 Input Parameters: 1024 + A - the `MATAIJ` matrix describing the action on blocks 1025 - dof - the block size (number of components per node) 1026 1027 Output Parameter: 1028 . maij - the new `MATMAIJ` matrix 1029 1030 Level: advanced 1031 1032 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()` 1033 @*/ 1034 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1035 { 1036 PetscInt n; 1037 Mat B; 1038 PetscBool flg; 1039 #if defined(PETSC_HAVE_CUDA) 1040 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1041 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1042 #endif 1043 1044 PetscFunctionBegin; 1045 dof = PetscAbs(dof); 1046 PetscCall(PetscObjectReference((PetscObject)A)); 1047 1048 if (dof == 1) *maij = A; 1049 else { 1050 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1051 /* propagate vec type */ 1052 PetscCall(MatSetVecType(B, A->defaultvectype)); 1053 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 1054 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 1055 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 1056 PetscCall(PetscLayoutSetUp(B->rmap)); 1057 PetscCall(PetscLayoutSetUp(B->cmap)); 1058 1059 B->assembled = PETSC_TRUE; 1060 1061 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 1062 if (flg) { 1063 Mat_SeqMAIJ *b; 1064 1065 PetscCall(MatSetType(B, MATSEQMAIJ)); 1066 1067 B->ops->setup = NULL; 1068 B->ops->destroy = MatDestroy_SeqMAIJ; 1069 B->ops->view = MatView_SeqMAIJ; 1070 1071 b = (Mat_SeqMAIJ *)B->data; 1072 b->dof = dof; 1073 b->AIJ = A; 1074 1075 if (dof == 2) { 1076 B->ops->mult = MatMult_SeqMAIJ_2; 1077 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1078 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1079 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1080 } else if (dof == 3) { 1081 B->ops->mult = MatMult_SeqMAIJ_3; 1082 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1083 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1084 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1085 } else if (dof == 4) { 1086 B->ops->mult = MatMult_SeqMAIJ_4; 1087 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1088 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1089 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1090 } else if (dof == 5) { 1091 B->ops->mult = MatMult_SeqMAIJ_5; 1092 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1093 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1094 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1095 } else if (dof == 6) { 1096 B->ops->mult = MatMult_SeqMAIJ_6; 1097 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1098 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1099 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1100 } else if (dof == 7) { 1101 B->ops->mult = MatMult_SeqMAIJ_7; 1102 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1103 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1104 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 1105 } else if (dof == 8) { 1106 B->ops->mult = MatMult_SeqMAIJ_8; 1107 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1108 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1109 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1110 } else if (dof == 9) { 1111 B->ops->mult = MatMult_SeqMAIJ_9; 1112 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1113 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1114 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1115 } else if (dof == 10) { 1116 B->ops->mult = MatMult_SeqMAIJ_10; 1117 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1118 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1119 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1120 } else if (dof == 11) { 1121 B->ops->mult = MatMult_SeqMAIJ_11; 1122 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1123 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1124 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 1125 } else if (dof == 16) { 1126 B->ops->mult = MatMult_SeqMAIJ_16; 1127 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1128 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1129 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1130 } else if (dof == 18) { 1131 B->ops->mult = MatMult_SeqMAIJ_18; 1132 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1133 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1134 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 1135 } else { 1136 B->ops->mult = MatMult_SeqMAIJ_N; 1137 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 1138 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 1139 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 1140 } 1141 #if defined(PETSC_HAVE_CUDA) 1142 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1143 #endif 1144 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 1145 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1146 } else { 1147 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1148 Mat_MPIMAIJ *b; 1149 IS from, to; 1150 Vec gvec; 1151 1152 PetscCall(MatSetType(B, MATMPIMAIJ)); 1153 1154 B->ops->setup = NULL; 1155 B->ops->destroy = MatDestroy_MPIMAIJ; 1156 B->ops->view = MatView_MPIMAIJ; 1157 1158 b = (Mat_MPIMAIJ *)B->data; 1159 b->dof = dof; 1160 b->A = A; 1161 1162 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 1163 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 1164 1165 PetscCall(VecGetSize(mpiaij->lvec, &n)); 1166 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 1167 PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 1168 PetscCall(VecSetBlockSize(b->w, dof)); 1169 PetscCall(VecSetType(b->w, VECSEQ)); 1170 1171 /* create two temporary Index sets for build scatter gather */ 1172 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 1173 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1174 1175 /* create temporary global vector to generate scatter context */ 1176 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1177 1178 /* generate the scatter context */ 1179 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1180 1181 PetscCall(ISDestroy(&from)); 1182 PetscCall(ISDestroy(&to)); 1183 PetscCall(VecDestroy(&gvec)); 1184 1185 B->ops->mult = MatMult_MPIMAIJ_dof; 1186 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1187 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1188 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1189 1190 #if defined(PETSC_HAVE_CUDA) 1191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1192 #endif 1193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1195 } 1196 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1197 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 1198 PetscCall(MatSetUp(B)); 1199 #if defined(PETSC_HAVE_CUDA) 1200 /* temporary until we have CUDA implementation of MAIJ */ 1201 { 1202 PetscBool flg; 1203 if (convert) { 1204 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 1205 if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1206 } 1207 } 1208 #endif 1209 *maij = B; 1210 } 1211 PetscFunctionReturn(PETSC_SUCCESS); 1212 } 1213