1 2 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 3 #include <../src/mat/utils/freespace.h> 4 5 /*@ 6 MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 7 8 Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 9 10 Input Parameter: 11 . A - the `MATMAIJ` matrix 12 13 Output Parameter: 14 . B - the `MATAIJ` matrix 15 16 Level: advanced 17 18 Note: 19 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 20 21 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 22 @*/ 23 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 24 { 25 PetscBool ismpimaij, isseqmaij; 26 27 PetscFunctionBegin; 28 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 29 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 30 if (ismpimaij) { 31 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 32 33 *B = b->A; 34 } else if (isseqmaij) { 35 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 36 37 *B = b->AIJ; 38 } else { 39 *B = A; 40 } 41 PetscFunctionReturn(PETSC_SUCCESS); 42 } 43 44 /*@ 45 MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 46 47 Logically Collective 48 49 Input Parameters: 50 + A - the `MATMAIJ` matrix 51 - dof - the block size for the new matrix 52 53 Output Parameter: 54 . B - the new `MATMAIJ` matrix 55 56 Level: advanced 57 58 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()` 59 @*/ 60 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 61 { 62 Mat Aij = NULL; 63 64 PetscFunctionBegin; 65 PetscValidLogicalCollectiveInt(A, dof, 2); 66 PetscCall(MatMAIJGetAIJ(A, &Aij)); 67 PetscCall(MatCreateMAIJ(Aij, dof, B)); 68 PetscFunctionReturn(PETSC_SUCCESS); 69 } 70 71 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 72 { 73 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 74 75 PetscFunctionBegin; 76 PetscCall(MatDestroy(&b->AIJ)); 77 PetscCall(PetscFree(A->data)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 79 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 81 PetscFunctionReturn(PETSC_SUCCESS); 82 } 83 84 static PetscErrorCode MatSetUp_MAIJ(Mat A) 85 { 86 PetscFunctionBegin; 87 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 88 } 89 90 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 91 { 92 Mat B; 93 94 PetscFunctionBegin; 95 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 96 PetscCall(MatView(B, viewer)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 107 PetscCall(MatView(B, viewer)); 108 PetscCall(MatDestroy(&B)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 113 { 114 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 115 116 PetscFunctionBegin; 117 PetscCall(MatDestroy(&b->AIJ)); 118 PetscCall(MatDestroy(&b->OAIJ)); 119 PetscCall(MatDestroy(&b->A)); 120 PetscCall(VecScatterDestroy(&b->ctx)); 121 PetscCall(VecDestroy(&b->w)); 122 PetscCall(PetscFree(A->data)); 123 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 124 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 125 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 126 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 127 PetscFunctionReturn(PETSC_SUCCESS); 128 } 129 130 /*MC 131 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 132 multicomponent problems, interpolating or restricting each component the same way independently. 133 The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 134 135 Operations provided: 136 .vb 137 MatMult() 138 MatMultTranspose() 139 MatMultAdd() 140 MatMultTransposeAdd() 141 .ve 142 143 Level: advanced 144 145 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 146 M*/ 147 148 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 149 { 150 Mat_MPIMAIJ *b; 151 PetscMPIInt size; 152 153 PetscFunctionBegin; 154 PetscCall(PetscNew(&b)); 155 A->data = (void *)b; 156 157 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 158 159 A->ops->setup = MatSetUp_MAIJ; 160 161 b->AIJ = NULL; 162 b->dof = 0; 163 b->OAIJ = NULL; 164 b->ctx = NULL; 165 b->w = NULL; 166 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 167 if (size == 1) { 168 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 169 } else { 170 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 171 } 172 A->preallocated = PETSC_TRUE; 173 A->assembled = PETSC_TRUE; 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 #if PetscHasAttribute(always_inline) 178 #define PETSC_FORCE_INLINE __attribute__((always_inline)) 179 #else 180 #define PETSC_FORCE_INLINE 181 #endif 182 183 #if defined(__clang__) 184 #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 185 #else 186 #define PETSC_PRAGMA_UNROLL 187 #endif 188 189 enum { 190 MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 191 }; 192 193 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 194 // keyword into account for these... 195 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 196 { 197 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 198 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 199 const Mat baij = b->AIJ; 200 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 201 const PetscInt m = baij->rmap->n; 202 const PetscInt nz = a->nz; 203 const PetscInt *idx = a->j; 204 const PetscInt *ii = a->i; 205 const PetscScalar *v = a->a; 206 PetscInt nonzerorow = 0; 207 const PetscScalar *x; 208 PetscScalar *z; 209 210 PetscFunctionBegin; 211 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 212 if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 213 PetscCall(VecGetArrayRead(xx, &x)); 214 if (mult_add) { 215 PetscCall(VecGetArray(zz, &z)); 216 } else { 217 PetscCall(VecGetArrayWrite(zz, &z)); 218 } 219 220 for (PetscInt i = 0; i < m; ++i) { 221 PetscInt jrow = ii[i]; 222 const PetscInt n = ii[i + 1] - jrow; 223 // leave a line so clang-format does not align these decls 224 PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 225 226 nonzerorow += n > 0; 227 for (PetscInt j = 0; j < n; ++j, ++jrow) { 228 const PetscScalar v_jrow = v[jrow]; 229 const PetscInt N_idx_jrow = N * idx[jrow]; 230 231 PETSC_PRAGMA_UNROLL 232 for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 233 } 234 235 PETSC_PRAGMA_UNROLL 236 for (int k = 0; k < N; ++k) { 237 const PetscInt z_idx = N * i + k; 238 239 if (mult_add) { 240 z[z_idx] += sum[k]; 241 } else { 242 z[z_idx] = sum[k]; 243 } 244 } 245 } 246 PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 247 PetscCall(VecRestoreArrayRead(xx, &x)); 248 if (mult_add) { 249 PetscCall(VecRestoreArray(zz, &z)); 250 } else { 251 PetscCall(VecRestoreArrayWrite(zz, &z)); 252 } 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 257 { 258 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 259 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 260 const Mat baij = b->AIJ; 261 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 262 const PetscInt m = baij->rmap->n; 263 const PetscInt nz = a->nz; 264 const PetscInt *a_j = a->j; 265 const PetscInt *a_i = a->i; 266 const PetscScalar *a_a = a->a; 267 const PetscScalar *x; 268 PetscScalar *z; 269 270 PetscFunctionBegin; 271 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 272 if (mult_add) { 273 if (yy != zz) PetscCall(VecCopy(yy, zz)); 274 } else { 275 PetscCall(VecSet(zz, 0.0)); 276 } 277 PetscCall(VecGetArrayRead(xx, &x)); 278 PetscCall(VecGetArray(zz, &z)); 279 280 for (PetscInt i = 0; i < m; i++) { 281 const PetscInt a_ii = a_i[i]; 282 const PetscInt *idx = a_j + a_ii; 283 const PetscScalar *v = a_a + a_ii; 284 const PetscInt n = a_i[i + 1] - a_ii; 285 PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 286 287 PETSC_PRAGMA_UNROLL 288 for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 289 for (PetscInt j = 0; j < n; ++j) { 290 const PetscInt N_idx_j = N * idx[j]; 291 const PetscScalar v_j = v[j]; 292 293 PETSC_PRAGMA_UNROLL 294 for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 295 } 296 } 297 298 PetscCall(PetscLogFlops(2 * N * nz)); 299 PetscCall(VecRestoreArrayRead(xx, &x)); 300 PetscCall(VecRestoreArray(zz, &z)); 301 PetscFunctionReturn(PETSC_SUCCESS); 302 } 303 304 #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \ 305 static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 306 { \ 307 PetscFunctionBegin; \ 308 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 309 PetscFunctionReturn(PETSC_SUCCESS); \ 310 } \ 311 static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 312 { \ 313 PetscFunctionBegin; \ 314 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 315 PetscFunctionReturn(PETSC_SUCCESS); \ 316 } \ 317 static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 318 { \ 319 PetscFunctionBegin; \ 320 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 321 PetscFunctionReturn(PETSC_SUCCESS); \ 322 } \ 323 static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 324 { \ 325 PetscFunctionBegin; \ 326 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 327 PetscFunctionReturn(PETSC_SUCCESS); \ 328 } 329 330 // clang-format off 331 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2) 332 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3) 333 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4) 334 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5) 335 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6) 336 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7) 337 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8) 338 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9) 339 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10) 340 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11) 341 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16) 342 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18) 343 // clang-format on 344 345 #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE 346 347 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 348 { 349 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 350 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 351 const PetscScalar *x, *v; 352 PetscScalar *y, *sums; 353 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 354 PetscInt n, i, jrow, j, dof = b->dof, k; 355 356 PetscFunctionBegin; 357 PetscCall(VecGetArrayRead(xx, &x)); 358 PetscCall(VecSet(yy, 0.0)); 359 PetscCall(VecGetArray(yy, &y)); 360 idx = a->j; 361 v = a->a; 362 ii = a->i; 363 364 for (i = 0; i < m; i++) { 365 jrow = ii[i]; 366 n = ii[i + 1] - jrow; 367 sums = y + dof * i; 368 for (j = 0; j < n; j++) { 369 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 370 jrow++; 371 } 372 } 373 374 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 375 PetscCall(VecRestoreArrayRead(xx, &x)); 376 PetscCall(VecRestoreArray(yy, &y)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 381 { 382 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 383 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 384 const PetscScalar *x, *v; 385 PetscScalar *y, *sums; 386 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 387 PetscInt n, i, jrow, j, dof = b->dof, k; 388 389 PetscFunctionBegin; 390 if (yy != zz) PetscCall(VecCopy(yy, zz)); 391 PetscCall(VecGetArrayRead(xx, &x)); 392 PetscCall(VecGetArray(zz, &y)); 393 idx = a->j; 394 v = a->a; 395 ii = a->i; 396 397 for (i = 0; i < m; i++) { 398 jrow = ii[i]; 399 n = ii[i + 1] - jrow; 400 sums = y + dof * i; 401 for (j = 0; j < n; j++) { 402 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 403 jrow++; 404 } 405 } 406 407 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 408 PetscCall(VecRestoreArrayRead(xx, &x)); 409 PetscCall(VecRestoreArray(zz, &y)); 410 PetscFunctionReturn(PETSC_SUCCESS); 411 } 412 413 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 414 { 415 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 416 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 417 const PetscScalar *x, *v, *alpha; 418 PetscScalar *y; 419 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 420 PetscInt n, i, k; 421 422 PetscFunctionBegin; 423 PetscCall(VecGetArrayRead(xx, &x)); 424 PetscCall(VecSet(yy, 0.0)); 425 PetscCall(VecGetArray(yy, &y)); 426 for (i = 0; i < m; i++) { 427 idx = a->j + a->i[i]; 428 v = a->a + a->i[i]; 429 n = a->i[i + 1] - a->i[i]; 430 alpha = x + dof * i; 431 while (n-- > 0) { 432 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 433 idx++; 434 v++; 435 } 436 } 437 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 438 PetscCall(VecRestoreArrayRead(xx, &x)); 439 PetscCall(VecRestoreArray(yy, &y)); 440 PetscFunctionReturn(PETSC_SUCCESS); 441 } 442 443 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 444 { 445 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 446 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 447 const PetscScalar *x, *v, *alpha; 448 PetscScalar *y; 449 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 450 PetscInt n, i, k; 451 452 PetscFunctionBegin; 453 if (yy != zz) PetscCall(VecCopy(yy, zz)); 454 PetscCall(VecGetArrayRead(xx, &x)); 455 PetscCall(VecGetArray(zz, &y)); 456 for (i = 0; i < m; i++) { 457 idx = a->j + a->i[i]; 458 v = a->a + a->i[i]; 459 n = a->i[i + 1] - a->i[i]; 460 alpha = x + dof * i; 461 while (n-- > 0) { 462 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 463 idx++; 464 v++; 465 } 466 } 467 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 468 PetscCall(VecRestoreArrayRead(xx, &x)); 469 PetscCall(VecRestoreArray(zz, &y)); 470 PetscFunctionReturn(PETSC_SUCCESS); 471 } 472 473 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 474 { 475 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 476 477 PetscFunctionBegin; 478 /* start the scatter */ 479 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 480 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 481 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 482 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 483 PetscFunctionReturn(PETSC_SUCCESS); 484 } 485 486 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 487 { 488 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 489 490 PetscFunctionBegin; 491 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 492 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 493 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 494 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 495 PetscFunctionReturn(PETSC_SUCCESS); 496 } 497 498 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 499 { 500 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 501 502 PetscFunctionBegin; 503 /* start the scatter */ 504 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 505 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 506 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 507 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 508 PetscFunctionReturn(PETSC_SUCCESS); 509 } 510 511 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 512 { 513 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 514 515 PetscFunctionBegin; 516 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 517 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 518 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 519 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 520 PetscFunctionReturn(PETSC_SUCCESS); 521 } 522 523 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 524 { 525 Mat_Product *product = C->product; 526 527 PetscFunctionBegin; 528 if (product->type == MATPRODUCT_PtAP) { 529 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 530 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 531 PetscFunctionReturn(PETSC_SUCCESS); 532 } 533 534 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 535 { 536 Mat_Product *product = C->product; 537 PetscBool flg = PETSC_FALSE; 538 Mat A = product->A, P = product->B; 539 PetscInt alg = 1; /* set default algorithm */ 540 #if !defined(PETSC_HAVE_HYPRE) 541 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 542 PetscInt nalg = 4; 543 #else 544 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 545 PetscInt nalg = 5; 546 #endif 547 548 PetscFunctionBegin; 549 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 550 551 /* PtAP */ 552 /* Check matrix local sizes */ 553 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 554 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 555 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 556 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 557 558 /* Set the default algorithm */ 559 PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 560 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 561 562 /* Get runtime option */ 563 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 564 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 565 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 566 PetscOptionsEnd(); 567 568 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 569 if (flg) { 570 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 571 PetscFunctionReturn(PETSC_SUCCESS); 572 } 573 574 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 575 if (flg) { 576 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 577 PetscFunctionReturn(PETSC_SUCCESS); 578 } 579 580 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 581 PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 582 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 583 PetscCall(MatProductSetFromOptions(C)); 584 PetscFunctionReturn(PETSC_SUCCESS); 585 } 586 587 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 588 { 589 /* This routine requires testing -- first draft only */ 590 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 591 Mat P = pp->AIJ; 592 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 593 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 594 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 595 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 596 const PetscInt *ci = c->i, *cj = c->j, *cjj; 597 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 598 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 599 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 600 MatScalar *ca = c->a, *caj, *apa; 601 602 PetscFunctionBegin; 603 /* Allocate temporary array for storage of one row of A*P */ 604 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 605 606 /* Clear old values in C */ 607 PetscCall(PetscArrayzero(ca, ci[cm])); 608 609 for (i = 0; i < am; i++) { 610 /* Form sparse row of A*P */ 611 anzi = ai[i + 1] - ai[i]; 612 apnzj = 0; 613 for (j = 0; j < anzi; j++) { 614 /* Get offset within block of P */ 615 pshift = *aj % ppdof; 616 /* Get block row of P */ 617 prow = *aj++ / ppdof; /* integer division */ 618 pnzj = pi[prow + 1] - pi[prow]; 619 pjj = pj + pi[prow]; 620 paj = pa + pi[prow]; 621 for (k = 0; k < pnzj; k++) { 622 poffset = pjj[k] * ppdof + pshift; 623 if (!apjdense[poffset]) { 624 apjdense[poffset] = -1; 625 apj[apnzj++] = poffset; 626 } 627 apa[poffset] += (*aa) * paj[k]; 628 } 629 PetscCall(PetscLogFlops(2.0 * pnzj)); 630 aa++; 631 } 632 633 /* Sort the j index array for quick sparse axpy. */ 634 /* Note: a array does not need sorting as it is in dense storage locations. */ 635 PetscCall(PetscSortInt(apnzj, apj)); 636 637 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 638 prow = i / ppdof; /* integer division */ 639 pshift = i % ppdof; 640 poffset = pi[prow]; 641 pnzi = pi[prow + 1] - poffset; 642 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 643 pJ = pj + poffset; 644 pA = pa + poffset; 645 for (j = 0; j < pnzi; j++) { 646 crow = (*pJ) * ppdof + pshift; 647 cjj = cj + ci[crow]; 648 caj = ca + ci[crow]; 649 pJ++; 650 /* Perform sparse axpy operation. Note cjj includes apj. */ 651 for (k = 0, nextap = 0; nextap < apnzj; k++) { 652 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 653 } 654 PetscCall(PetscLogFlops(2.0 * apnzj)); 655 pA++; 656 } 657 658 /* Zero the current row info for A*P */ 659 for (j = 0; j < apnzj; j++) { 660 apa[apj[j]] = 0.; 661 apjdense[apj[j]] = 0; 662 } 663 } 664 665 /* Assemble the final matrix and clean up */ 666 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 667 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 668 PetscCall(PetscFree3(apa, apj, apjdense)); 669 PetscFunctionReturn(PETSC_SUCCESS); 670 } 671 672 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 673 { 674 PetscFreeSpaceList free_space = NULL, current_space = NULL; 675 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 676 Mat P = pp->AIJ; 677 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 678 PetscInt *pti, *ptj, *ptJ; 679 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 680 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 681 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 682 MatScalar *ca; 683 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 684 685 PetscFunctionBegin; 686 /* Get ij structure of P^T */ 687 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 688 689 cn = pn * ppdof; 690 /* Allocate ci array, arrays for fill computation and */ 691 /* free space for accumulating nonzero column info */ 692 PetscCall(PetscMalloc1(cn + 1, &ci)); 693 ci[0] = 0; 694 695 /* Work arrays for rows of P^T*A */ 696 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 697 PetscCall(PetscArrayzero(ptadenserow, an)); 698 PetscCall(PetscArrayzero(denserow, cn)); 699 700 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 701 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 702 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 703 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 704 current_space = free_space; 705 706 /* Determine symbolic info for each row of C: */ 707 for (i = 0; i < pn; i++) { 708 ptnzi = pti[i + 1] - pti[i]; 709 ptJ = ptj + pti[i]; 710 for (dof = 0; dof < ppdof; dof++) { 711 ptanzi = 0; 712 /* Determine symbolic row of PtA: */ 713 for (j = 0; j < ptnzi; j++) { 714 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 715 arow = ptJ[j] * ppdof + dof; 716 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 717 anzj = ai[arow + 1] - ai[arow]; 718 ajj = aj + ai[arow]; 719 for (k = 0; k < anzj; k++) { 720 if (!ptadenserow[ajj[k]]) { 721 ptadenserow[ajj[k]] = -1; 722 ptasparserow[ptanzi++] = ajj[k]; 723 } 724 } 725 } 726 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 727 ptaj = ptasparserow; 728 cnzi = 0; 729 for (j = 0; j < ptanzi; j++) { 730 /* Get offset within block of P */ 731 pshift = *ptaj % ppdof; 732 /* Get block row of P */ 733 prow = (*ptaj++) / ppdof; /* integer division */ 734 /* P has same number of nonzeros per row as the compressed form */ 735 pnzj = pi[prow + 1] - pi[prow]; 736 pjj = pj + pi[prow]; 737 for (k = 0; k < pnzj; k++) { 738 /* Locations in C are shifted by the offset within the block */ 739 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 740 if (!denserow[pjj[k] * ppdof + pshift]) { 741 denserow[pjj[k] * ppdof + pshift] = -1; 742 sparserow[cnzi++] = pjj[k] * ppdof + pshift; 743 } 744 } 745 } 746 747 /* sort sparserow */ 748 PetscCall(PetscSortInt(cnzi, sparserow)); 749 750 /* If free space is not available, make more free space */ 751 /* Double the amount of total space in the list */ 752 if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 753 754 /* Copy data into free space, and zero out denserows */ 755 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 756 757 current_space->array += cnzi; 758 current_space->local_used += cnzi; 759 current_space->local_remaining -= cnzi; 760 761 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 762 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 763 764 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 765 /* For now, we will recompute what is needed. */ 766 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 767 } 768 } 769 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 770 /* Allocate space for cj, initialize cj, and */ 771 /* destroy list of free space and other temporary array(s) */ 772 PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 773 PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 774 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 775 776 /* Allocate space for ca */ 777 PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 778 779 /* put together the new matrix */ 780 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 781 PetscCall(MatSetBlockSize(C, pp->dof)); 782 783 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 784 /* Since these are PETSc arrays, change flags to free them as necessary. */ 785 c = (Mat_SeqAIJ *)(C->data); 786 c->free_a = PETSC_TRUE; 787 c->free_ij = PETSC_TRUE; 788 c->nonew = 0; 789 790 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 791 C->ops->productnumeric = MatProductNumeric_PtAP; 792 793 /* Clean up. */ 794 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 795 PetscFunctionReturn(PETSC_SUCCESS); 796 } 797 798 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 799 { 800 Mat_Product *product = C->product; 801 Mat A = product->A, P = product->B; 802 803 PetscFunctionBegin; 804 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 805 PetscFunctionReturn(PETSC_SUCCESS); 806 } 807 808 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 809 810 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 811 { 812 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 813 814 PetscFunctionBegin; 815 816 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 817 PetscFunctionReturn(PETSC_SUCCESS); 818 } 819 820 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 821 822 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 823 { 824 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 825 826 PetscFunctionBegin; 827 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 828 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 829 PetscFunctionReturn(PETSC_SUCCESS); 830 } 831 832 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 833 834 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 835 { 836 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 837 838 PetscFunctionBegin; 839 840 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 845 846 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 847 { 848 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 849 850 PetscFunctionBegin; 851 852 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 853 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 854 PetscFunctionReturn(PETSC_SUCCESS); 855 } 856 857 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 858 { 859 Mat_Product *product = C->product; 860 Mat A = product->A, P = product->B; 861 PetscBool flg; 862 863 PetscFunctionBegin; 864 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 865 if (flg) { 866 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 867 C->ops->productnumeric = MatProductNumeric_PtAP; 868 PetscFunctionReturn(PETSC_SUCCESS); 869 } 870 871 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 872 if (flg) { 873 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 874 C->ops->productnumeric = MatProductNumeric_PtAP; 875 PetscFunctionReturn(PETSC_SUCCESS); 876 } 877 878 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 879 } 880 881 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 882 { 883 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 884 Mat a = b->AIJ, B; 885 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 886 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 887 PetscInt *cols; 888 PetscScalar *vals; 889 890 PetscFunctionBegin; 891 PetscCall(MatGetSize(a, &m, &n)); 892 PetscCall(PetscMalloc1(dof * m, &ilen)); 893 for (i = 0; i < m; i++) { 894 nmax = PetscMax(nmax, aij->ilen[i]); 895 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 896 } 897 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 898 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 899 PetscCall(MatSetType(B, newtype)); 900 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 901 PetscCall(PetscFree(ilen)); 902 PetscCall(PetscMalloc1(nmax, &icols)); 903 ii = 0; 904 for (i = 0; i < m; i++) { 905 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 906 for (j = 0; j < dof; j++) { 907 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 908 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 909 ii++; 910 } 911 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 912 } 913 PetscCall(PetscFree(icols)); 914 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 915 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 916 917 if (reuse == MAT_INPLACE_MATRIX) { 918 PetscCall(MatHeaderReplace(A, &B)); 919 } else { 920 *newmat = B; 921 } 922 PetscFunctionReturn(PETSC_SUCCESS); 923 } 924 925 #include <../src/mat/impls/aij/mpi/mpiaij.h> 926 927 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 928 { 929 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 930 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 931 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 932 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 933 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 934 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 935 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 936 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 937 PetscInt rstart, cstart, *garray, ii, k; 938 PetscScalar *vals, *ovals; 939 940 PetscFunctionBegin; 941 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 942 for (i = 0; i < A->rmap->n / dof; i++) { 943 nmax = PetscMax(nmax, AIJ->ilen[i]); 944 onmax = PetscMax(onmax, OAIJ->ilen[i]); 945 for (j = 0; j < dof; j++) { 946 dnz[dof * i + j] = AIJ->ilen[i]; 947 onz[dof * i + j] = OAIJ->ilen[i]; 948 } 949 } 950 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 951 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 952 PetscCall(MatSetType(B, newtype)); 953 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 954 PetscCall(MatSetBlockSize(B, dof)); 955 PetscCall(PetscFree2(dnz, onz)); 956 957 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 958 rstart = dof * maij->A->rmap->rstart; 959 cstart = dof * maij->A->cmap->rstart; 960 garray = mpiaij->garray; 961 962 ii = rstart; 963 for (i = 0; i < A->rmap->n / dof; i++) { 964 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 965 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 966 for (j = 0; j < dof; j++) { 967 for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 968 for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 969 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 970 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 971 ii++; 972 } 973 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 974 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 975 } 976 PetscCall(PetscFree2(icols, oicols)); 977 978 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 979 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 980 981 if (reuse == MAT_INPLACE_MATRIX) { 982 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 983 ((PetscObject)A)->refct = 1; 984 985 PetscCall(MatHeaderReplace(A, &B)); 986 987 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 988 } else { 989 *newmat = B; 990 } 991 PetscFunctionReturn(PETSC_SUCCESS); 992 } 993 994 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 995 { 996 Mat A; 997 998 PetscFunctionBegin; 999 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1000 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1001 PetscCall(MatDestroy(&A)); 1002 PetscFunctionReturn(PETSC_SUCCESS); 1003 } 1004 1005 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1006 { 1007 Mat A; 1008 1009 PetscFunctionBegin; 1010 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1011 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 1012 PetscCall(MatDestroy(&A)); 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 /*@ 1017 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1018 operations for multicomponent problems. It interpolates each component the same 1019 way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 1020 and `MATMPIAIJ` for distributed matrices. 1021 1022 Collective 1023 1024 Input Parameters: 1025 + A - the `MATAIJ` matrix describing the action on blocks 1026 - dof - the block size (number of components per node) 1027 1028 Output Parameter: 1029 . maij - the new `MATMAIJ` matrix 1030 1031 Level: advanced 1032 1033 Operations provided: 1034 .vb 1035 MatMult() 1036 MatMultTranspose() 1037 MatMultAdd() 1038 MatMultTransposeAdd() 1039 MatView() 1040 .ve 1041 1042 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MATMAIJ` 1043 @*/ 1044 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1045 { 1046 PetscInt n; 1047 Mat B; 1048 PetscBool flg; 1049 #if defined(PETSC_HAVE_CUDA) 1050 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1051 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1052 #endif 1053 1054 PetscFunctionBegin; 1055 dof = PetscAbs(dof); 1056 PetscCall(PetscObjectReference((PetscObject)A)); 1057 1058 if (dof == 1) *maij = A; 1059 else { 1060 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1061 /* propagate vec type */ 1062 PetscCall(MatSetVecType(B, A->defaultvectype)); 1063 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 1064 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 1065 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 1066 PetscCall(PetscLayoutSetUp(B->rmap)); 1067 PetscCall(PetscLayoutSetUp(B->cmap)); 1068 1069 B->assembled = PETSC_TRUE; 1070 1071 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 1072 if (flg) { 1073 Mat_SeqMAIJ *b; 1074 1075 PetscCall(MatSetType(B, MATSEQMAIJ)); 1076 1077 B->ops->setup = NULL; 1078 B->ops->destroy = MatDestroy_SeqMAIJ; 1079 B->ops->view = MatView_SeqMAIJ; 1080 1081 b = (Mat_SeqMAIJ *)B->data; 1082 b->dof = dof; 1083 b->AIJ = A; 1084 1085 if (dof == 2) { 1086 B->ops->mult = MatMult_SeqMAIJ_2; 1087 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1088 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1089 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1090 } else if (dof == 3) { 1091 B->ops->mult = MatMult_SeqMAIJ_3; 1092 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1093 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1094 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1095 } else if (dof == 4) { 1096 B->ops->mult = MatMult_SeqMAIJ_4; 1097 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1098 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1099 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1100 } else if (dof == 5) { 1101 B->ops->mult = MatMult_SeqMAIJ_5; 1102 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1103 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1104 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1105 } else if (dof == 6) { 1106 B->ops->mult = MatMult_SeqMAIJ_6; 1107 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1108 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1109 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1110 } else if (dof == 7) { 1111 B->ops->mult = MatMult_SeqMAIJ_7; 1112 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1113 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1114 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 1115 } else if (dof == 8) { 1116 B->ops->mult = MatMult_SeqMAIJ_8; 1117 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1118 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1119 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1120 } else if (dof == 9) { 1121 B->ops->mult = MatMult_SeqMAIJ_9; 1122 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1123 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1124 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1125 } else if (dof == 10) { 1126 B->ops->mult = MatMult_SeqMAIJ_10; 1127 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1128 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1129 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1130 } else if (dof == 11) { 1131 B->ops->mult = MatMult_SeqMAIJ_11; 1132 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1133 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1134 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 1135 } else if (dof == 16) { 1136 B->ops->mult = MatMult_SeqMAIJ_16; 1137 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1138 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1139 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1140 } else if (dof == 18) { 1141 B->ops->mult = MatMult_SeqMAIJ_18; 1142 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1143 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1144 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 1145 } else { 1146 B->ops->mult = MatMult_SeqMAIJ_N; 1147 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 1148 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 1149 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 1150 } 1151 #if defined(PETSC_HAVE_CUDA) 1152 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1153 #endif 1154 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1156 } else { 1157 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1158 Mat_MPIMAIJ *b; 1159 IS from, to; 1160 Vec gvec; 1161 1162 PetscCall(MatSetType(B, MATMPIMAIJ)); 1163 1164 B->ops->setup = NULL; 1165 B->ops->destroy = MatDestroy_MPIMAIJ; 1166 B->ops->view = MatView_MPIMAIJ; 1167 1168 b = (Mat_MPIMAIJ *)B->data; 1169 b->dof = dof; 1170 b->A = A; 1171 1172 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 1173 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 1174 1175 PetscCall(VecGetSize(mpiaij->lvec, &n)); 1176 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 1177 PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 1178 PetscCall(VecSetBlockSize(b->w, dof)); 1179 PetscCall(VecSetType(b->w, VECSEQ)); 1180 1181 /* create two temporary Index sets for build scatter gather */ 1182 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 1183 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1184 1185 /* create temporary global vector to generate scatter context */ 1186 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1187 1188 /* generate the scatter context */ 1189 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1190 1191 PetscCall(ISDestroy(&from)); 1192 PetscCall(ISDestroy(&to)); 1193 PetscCall(VecDestroy(&gvec)); 1194 1195 B->ops->mult = MatMult_MPIMAIJ_dof; 1196 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1197 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1198 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1199 1200 #if defined(PETSC_HAVE_CUDA) 1201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1202 #endif 1203 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 1204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1205 } 1206 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1207 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 1208 PetscCall(MatSetUp(B)); 1209 #if defined(PETSC_HAVE_CUDA) 1210 /* temporary until we have CUDA implementation of MAIJ */ 1211 { 1212 PetscBool flg; 1213 if (convert) { 1214 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 1215 if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1216 } 1217 } 1218 #endif 1219 *maij = B; 1220 } 1221 PetscFunctionReturn(PETSC_SUCCESS); 1222 } 1223