1 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/utils/freespace.h> 3 4 /*@ 5 MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 6 7 Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 8 9 Input Parameter: 10 . A - the `MATMAIJ` matrix 11 12 Output Parameter: 13 . B - the `MATAIJ` matrix 14 15 Level: advanced 16 17 Note: 18 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 19 20 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 21 @*/ 22 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 23 { 24 PetscBool ismpimaij, isseqmaij; 25 26 PetscFunctionBegin; 27 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 28 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 29 if (ismpimaij) { 30 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 31 32 *B = b->A; 33 } else if (isseqmaij) { 34 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 35 36 *B = b->AIJ; 37 } else { 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 /*@ 44 MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 45 46 Logically Collective 47 48 Input Parameters: 49 + A - the `MATMAIJ` matrix 50 - dof - the block size for the new matrix 51 52 Output Parameter: 53 . B - the new `MATMAIJ` matrix 54 55 Level: advanced 56 57 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()` 58 @*/ 59 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 60 { 61 Mat Aij = NULL; 62 63 PetscFunctionBegin; 64 PetscValidLogicalCollectiveInt(A, dof, 2); 65 PetscCall(MatMAIJGetAIJ(A, &Aij)); 66 PetscCall(MatCreateMAIJ(Aij, dof, B)); 67 PetscFunctionReturn(PETSC_SUCCESS); 68 } 69 70 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 71 { 72 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 73 74 PetscFunctionBegin; 75 PetscCall(MatDestroy(&b->AIJ)); 76 PetscCall(PetscFree(A->data)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 79 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatSetUp_MAIJ(Mat A) 84 { 85 PetscFunctionBegin; 86 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 87 } 88 89 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(MatView(B, viewer)); 96 PetscCall(MatDestroy(&B)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 101 { 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 106 PetscCall(MatView(B, viewer)); 107 PetscCall(MatDestroy(&B)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 112 { 113 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 114 115 PetscFunctionBegin; 116 PetscCall(MatDestroy(&b->AIJ)); 117 PetscCall(MatDestroy(&b->OAIJ)); 118 PetscCall(MatDestroy(&b->A)); 119 PetscCall(VecScatterDestroy(&b->ctx)); 120 PetscCall(VecDestroy(&b->w)); 121 PetscCall(PetscFree(A->data)); 122 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 123 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 124 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 125 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 126 PetscFunctionReturn(PETSC_SUCCESS); 127 } 128 129 /*MC 130 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 131 multicomponent problems, interpolating or restricting each component the same way independently. 132 The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 133 134 Operations provided: 135 .vb 136 MatMult() 137 MatMultTranspose() 138 MatMultAdd() 139 MatMultTransposeAdd() 140 .ve 141 142 Level: advanced 143 144 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 145 M*/ 146 147 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 148 { 149 Mat_MPIMAIJ *b; 150 PetscMPIInt size; 151 152 PetscFunctionBegin; 153 PetscCall(PetscNew(&b)); 154 A->data = (void *)b; 155 156 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 157 158 A->ops->setup = MatSetUp_MAIJ; 159 160 b->AIJ = NULL; 161 b->dof = 0; 162 b->OAIJ = NULL; 163 b->ctx = NULL; 164 b->w = NULL; 165 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 166 if (size == 1) { 167 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 168 } else { 169 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 170 } 171 A->preallocated = PETSC_TRUE; 172 A->assembled = PETSC_TRUE; 173 PetscFunctionReturn(PETSC_SUCCESS); 174 } 175 176 #if PetscHasAttribute(always_inline) 177 #define PETSC_FORCE_INLINE __attribute__((always_inline)) 178 #else 179 #define PETSC_FORCE_INLINE 180 #endif 181 182 #if defined(__clang__) 183 #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 184 #else 185 #define PETSC_PRAGMA_UNROLL 186 #endif 187 188 enum { 189 MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 190 }; 191 192 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 193 // keyword into account for these... 194 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 195 { 196 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 197 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 198 const Mat baij = b->AIJ; 199 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 200 const PetscInt m = baij->rmap->n; 201 const PetscInt nz = a->nz; 202 const PetscInt *idx = a->j; 203 const PetscInt *ii = a->i; 204 const PetscScalar *v = a->a; 205 PetscInt nonzerorow = 0; 206 const PetscScalar *x; 207 PetscScalar *z; 208 209 PetscFunctionBegin; 210 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 211 if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 212 PetscCall(VecGetArrayRead(xx, &x)); 213 if (mult_add) { 214 PetscCall(VecGetArray(zz, &z)); 215 } else { 216 PetscCall(VecGetArrayWrite(zz, &z)); 217 } 218 219 for (PetscInt i = 0; i < m; ++i) { 220 PetscInt jrow = ii[i]; 221 const PetscInt n = ii[i + 1] - jrow; 222 // leave a line so clang-format does not align these decls 223 PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 224 225 nonzerorow += n > 0; 226 for (PetscInt j = 0; j < n; ++j, ++jrow) { 227 const PetscScalar v_jrow = v[jrow]; 228 const PetscInt N_idx_jrow = N * idx[jrow]; 229 230 PETSC_PRAGMA_UNROLL 231 for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 232 } 233 234 PETSC_PRAGMA_UNROLL 235 for (int k = 0; k < N; ++k) { 236 const PetscInt z_idx = N * i + k; 237 238 if (mult_add) { 239 z[z_idx] += sum[k]; 240 } else { 241 z[z_idx] = sum[k]; 242 } 243 } 244 } 245 PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 246 PetscCall(VecRestoreArrayRead(xx, &x)); 247 if (mult_add) { 248 PetscCall(VecRestoreArray(zz, &z)); 249 } else { 250 PetscCall(VecRestoreArrayWrite(zz, &z)); 251 } 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 256 { 257 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 258 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 259 const Mat baij = b->AIJ; 260 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 261 const PetscInt m = baij->rmap->n; 262 const PetscInt nz = a->nz; 263 const PetscInt *a_j = a->j; 264 const PetscInt *a_i = a->i; 265 const PetscScalar *a_a = a->a; 266 const PetscScalar *x; 267 PetscScalar *z; 268 269 PetscFunctionBegin; 270 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 271 if (mult_add) { 272 if (yy != zz) PetscCall(VecCopy(yy, zz)); 273 } else { 274 PetscCall(VecSet(zz, 0.0)); 275 } 276 PetscCall(VecGetArrayRead(xx, &x)); 277 PetscCall(VecGetArray(zz, &z)); 278 279 for (PetscInt i = 0; i < m; i++) { 280 const PetscInt a_ii = a_i[i]; 281 const PetscInt *idx = PetscSafePointerPlusOffset(a_j, a_ii); 282 const PetscScalar *v = PetscSafePointerPlusOffset(a_a, a_ii); 283 const PetscInt n = a_i[i + 1] - a_ii; 284 PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 285 286 PETSC_PRAGMA_UNROLL 287 for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 288 for (PetscInt j = 0; j < n; ++j) { 289 const PetscInt N_idx_j = N * idx[j]; 290 const PetscScalar v_j = v[j]; 291 292 PETSC_PRAGMA_UNROLL 293 for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 294 } 295 } 296 297 PetscCall(PetscLogFlops(2 * N * nz)); 298 PetscCall(VecRestoreArrayRead(xx, &x)); 299 PetscCall(VecRestoreArray(zz, &z)); 300 PetscFunctionReturn(PETSC_SUCCESS); 301 } 302 303 #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \ 304 static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 305 { \ 306 PetscFunctionBegin; \ 307 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 308 PetscFunctionReturn(PETSC_SUCCESS); \ 309 } \ 310 static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 311 { \ 312 PetscFunctionBegin; \ 313 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 314 PetscFunctionReturn(PETSC_SUCCESS); \ 315 } \ 316 static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 317 { \ 318 PetscFunctionBegin; \ 319 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 320 PetscFunctionReturn(PETSC_SUCCESS); \ 321 } \ 322 static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 323 { \ 324 PetscFunctionBegin; \ 325 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 326 PetscFunctionReturn(PETSC_SUCCESS); \ 327 } 328 329 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2) 330 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3) 331 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4) 332 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5) 333 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6) 334 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7) 335 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8) 336 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9) 337 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10) 338 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11) 339 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16) 340 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18) 341 342 #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE 343 344 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 345 { 346 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 347 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 348 const PetscScalar *x, *v; 349 PetscScalar *y, *sums; 350 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 351 PetscInt n, i, jrow, j, dof = b->dof, k; 352 353 PetscFunctionBegin; 354 PetscCall(VecGetArrayRead(xx, &x)); 355 PetscCall(VecSet(yy, 0.0)); 356 PetscCall(VecGetArray(yy, &y)); 357 idx = a->j; 358 v = a->a; 359 ii = a->i; 360 361 for (i = 0; i < m; i++) { 362 jrow = ii[i]; 363 n = ii[i + 1] - jrow; 364 sums = y + dof * i; 365 for (j = 0; j < n; j++) { 366 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 367 jrow++; 368 } 369 } 370 371 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 372 PetscCall(VecRestoreArrayRead(xx, &x)); 373 PetscCall(VecRestoreArray(yy, &y)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 378 { 379 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 380 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 381 const PetscScalar *x, *v; 382 PetscScalar *y, *sums; 383 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 384 PetscInt n, i, jrow, j, dof = b->dof, k; 385 386 PetscFunctionBegin; 387 if (yy != zz) PetscCall(VecCopy(yy, zz)); 388 PetscCall(VecGetArrayRead(xx, &x)); 389 PetscCall(VecGetArray(zz, &y)); 390 idx = a->j; 391 v = a->a; 392 ii = a->i; 393 394 for (i = 0; i < m; i++) { 395 jrow = ii[i]; 396 n = ii[i + 1] - jrow; 397 sums = y + dof * i; 398 for (j = 0; j < n; j++) { 399 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 400 jrow++; 401 } 402 } 403 404 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 405 PetscCall(VecRestoreArrayRead(xx, &x)); 406 PetscCall(VecRestoreArray(zz, &y)); 407 PetscFunctionReturn(PETSC_SUCCESS); 408 } 409 410 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 411 { 412 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 413 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 414 const PetscScalar *x, *v, *alpha; 415 PetscScalar *y; 416 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 417 PetscInt n, i, k; 418 419 PetscFunctionBegin; 420 PetscCall(VecGetArrayRead(xx, &x)); 421 PetscCall(VecSet(yy, 0.0)); 422 PetscCall(VecGetArray(yy, &y)); 423 for (i = 0; i < m; i++) { 424 idx = PetscSafePointerPlusOffset(a->j, a->i[i]); 425 v = PetscSafePointerPlusOffset(a->a, a->i[i]); 426 n = a->i[i + 1] - a->i[i]; 427 alpha = x + dof * i; 428 while (n-- > 0) { 429 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 430 idx++; 431 v++; 432 } 433 } 434 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 435 PetscCall(VecRestoreArrayRead(xx, &x)); 436 PetscCall(VecRestoreArray(yy, &y)); 437 PetscFunctionReturn(PETSC_SUCCESS); 438 } 439 440 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 441 { 442 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 443 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 444 const PetscScalar *x, *v, *alpha; 445 PetscScalar *y; 446 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 447 PetscInt n, i, k; 448 449 PetscFunctionBegin; 450 if (yy != zz) PetscCall(VecCopy(yy, zz)); 451 PetscCall(VecGetArrayRead(xx, &x)); 452 PetscCall(VecGetArray(zz, &y)); 453 for (i = 0; i < m; i++) { 454 idx = a->j + a->i[i]; 455 v = a->a + a->i[i]; 456 n = a->i[i + 1] - a->i[i]; 457 alpha = x + dof * i; 458 while (n-- > 0) { 459 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 460 idx++; 461 v++; 462 } 463 } 464 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 465 PetscCall(VecRestoreArrayRead(xx, &x)); 466 PetscCall(VecRestoreArray(zz, &y)); 467 PetscFunctionReturn(PETSC_SUCCESS); 468 } 469 470 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 471 { 472 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 473 474 PetscFunctionBegin; 475 /* start the scatter */ 476 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 477 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 478 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 479 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 480 PetscFunctionReturn(PETSC_SUCCESS); 481 } 482 483 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 484 { 485 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 486 487 PetscFunctionBegin; 488 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 489 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 490 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 491 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 492 PetscFunctionReturn(PETSC_SUCCESS); 493 } 494 495 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 496 { 497 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 498 499 PetscFunctionBegin; 500 /* start the scatter */ 501 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 502 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 503 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 504 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 505 PetscFunctionReturn(PETSC_SUCCESS); 506 } 507 508 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 509 { 510 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 511 512 PetscFunctionBegin; 513 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 514 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 515 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 516 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 517 PetscFunctionReturn(PETSC_SUCCESS); 518 } 519 520 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 521 { 522 Mat_Product *product = C->product; 523 524 PetscFunctionBegin; 525 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 526 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 527 PetscFunctionReturn(PETSC_SUCCESS); 528 } 529 530 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 531 { 532 Mat_Product *product = C->product; 533 PetscBool flg = PETSC_FALSE; 534 Mat A = product->A, P = product->B; 535 PetscInt alg = 1; /* set default algorithm */ 536 #if !defined(PETSC_HAVE_HYPRE) 537 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 538 PetscInt nalg = 4; 539 #else 540 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 541 PetscInt nalg = 5; 542 #endif 543 544 PetscFunctionBegin; 545 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 546 547 /* PtAP */ 548 /* Check matrix local sizes */ 549 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 550 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 551 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 552 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 553 554 /* Set the default algorithm */ 555 PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 556 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 557 558 /* Get runtime option */ 559 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 560 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 561 if (flg) PetscCall(MatProductSetAlgorithm(C, algTypes[alg])); 562 PetscOptionsEnd(); 563 564 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 565 if (flg) { 566 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 567 PetscFunctionReturn(PETSC_SUCCESS); 568 } 569 570 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 571 if (flg) { 572 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 573 PetscFunctionReturn(PETSC_SUCCESS); 574 } 575 576 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 577 PetscCall(PetscInfo(A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 578 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 579 PetscCall(MatProductSetFromOptions(C)); 580 PetscFunctionReturn(PETSC_SUCCESS); 581 } 582 583 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 584 { 585 /* This routine requires testing -- first draft only */ 586 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 587 Mat P = pp->AIJ; 588 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 589 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 590 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 591 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 592 const PetscInt *ci = c->i, *cj = c->j, *cjj; 593 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 594 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 595 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 596 MatScalar *ca = c->a, *caj, *apa; 597 598 PetscFunctionBegin; 599 /* Allocate temporary array for storage of one row of A*P */ 600 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 601 602 /* Clear old values in C */ 603 PetscCall(PetscArrayzero(ca, ci[cm])); 604 605 for (i = 0; i < am; i++) { 606 /* Form sparse row of A*P */ 607 anzi = ai[i + 1] - ai[i]; 608 apnzj = 0; 609 for (j = 0; j < anzi; j++) { 610 /* Get offset within block of P */ 611 pshift = *aj % ppdof; 612 /* Get block row of P */ 613 prow = *aj++ / ppdof; /* integer division */ 614 pnzj = pi[prow + 1] - pi[prow]; 615 pjj = pj + pi[prow]; 616 paj = pa + pi[prow]; 617 for (k = 0; k < pnzj; k++) { 618 poffset = pjj[k] * ppdof + pshift; 619 if (!apjdense[poffset]) { 620 apjdense[poffset] = -1; 621 apj[apnzj++] = poffset; 622 } 623 apa[poffset] += (*aa) * paj[k]; 624 } 625 PetscCall(PetscLogFlops(2.0 * pnzj)); 626 aa++; 627 } 628 629 /* Sort the j index array for quick sparse axpy. */ 630 /* Note: a array does not need sorting as it is in dense storage locations. */ 631 PetscCall(PetscSortInt(apnzj, apj)); 632 633 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 634 prow = i / ppdof; /* integer division */ 635 pshift = i % ppdof; 636 poffset = pi[prow]; 637 pnzi = pi[prow + 1] - poffset; 638 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 639 pJ = pj + poffset; 640 pA = pa + poffset; 641 for (j = 0; j < pnzi; j++) { 642 crow = (*pJ) * ppdof + pshift; 643 cjj = cj + ci[crow]; 644 caj = ca + ci[crow]; 645 pJ++; 646 /* Perform sparse axpy operation. Note cjj includes apj. */ 647 for (k = 0, nextap = 0; nextap < apnzj; k++) { 648 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 649 } 650 PetscCall(PetscLogFlops(2.0 * apnzj)); 651 pA++; 652 } 653 654 /* Zero the current row info for A*P */ 655 for (j = 0; j < apnzj; j++) { 656 apa[apj[j]] = 0.; 657 apjdense[apj[j]] = 0; 658 } 659 } 660 661 /* Assemble the final matrix and clean up */ 662 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 663 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 664 PetscCall(PetscFree3(apa, apj, apjdense)); 665 PetscFunctionReturn(PETSC_SUCCESS); 666 } 667 668 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 669 { 670 PetscFreeSpaceList free_space = NULL, current_space = NULL; 671 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 672 Mat P = pp->AIJ; 673 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 674 PetscInt *pti, *ptj, *ptJ; 675 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 676 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 677 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 678 MatScalar *ca; 679 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 680 681 PetscFunctionBegin; 682 /* Get ij structure of P^T */ 683 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 684 685 cn = pn * ppdof; 686 /* Allocate ci array, arrays for fill computation and */ 687 /* free space for accumulating nonzero column info */ 688 PetscCall(PetscMalloc1(cn + 1, &ci)); 689 ci[0] = 0; 690 691 /* Work arrays for rows of P^T*A */ 692 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 693 PetscCall(PetscArrayzero(ptadenserow, an)); 694 PetscCall(PetscArrayzero(denserow, cn)); 695 696 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 697 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 698 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 699 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 700 current_space = free_space; 701 702 /* Determine symbolic info for each row of C: */ 703 for (i = 0; i < pn; i++) { 704 ptnzi = pti[i + 1] - pti[i]; 705 ptJ = ptj + pti[i]; 706 for (dof = 0; dof < ppdof; dof++) { 707 ptanzi = 0; 708 /* Determine symbolic row of PtA: */ 709 for (j = 0; j < ptnzi; j++) { 710 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 711 arow = ptJ[j] * ppdof + dof; 712 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 713 anzj = ai[arow + 1] - ai[arow]; 714 ajj = aj + ai[arow]; 715 for (k = 0; k < anzj; k++) { 716 if (!ptadenserow[ajj[k]]) { 717 ptadenserow[ajj[k]] = -1; 718 ptasparserow[ptanzi++] = ajj[k]; 719 } 720 } 721 } 722 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 723 ptaj = ptasparserow; 724 cnzi = 0; 725 for (j = 0; j < ptanzi; j++) { 726 /* Get offset within block of P */ 727 pshift = *ptaj % ppdof; 728 /* Get block row of P */ 729 prow = (*ptaj++) / ppdof; /* integer division */ 730 /* P has same number of nonzeros per row as the compressed form */ 731 pnzj = pi[prow + 1] - pi[prow]; 732 pjj = pj + pi[prow]; 733 for (k = 0; k < pnzj; k++) { 734 /* Locations in C are shifted by the offset within the block */ 735 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 736 if (!denserow[pjj[k] * ppdof + pshift]) { 737 denserow[pjj[k] * ppdof + pshift] = -1; 738 sparserow[cnzi++] = pjj[k] * ppdof + pshift; 739 } 740 } 741 } 742 743 /* sort sparserow */ 744 PetscCall(PetscSortInt(cnzi, sparserow)); 745 746 /* If free space is not available, make more free space */ 747 /* Double the amount of total space in the list */ 748 if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 749 750 /* Copy data into free space, and zero out denserows */ 751 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 752 753 current_space->array += cnzi; 754 current_space->local_used += cnzi; 755 current_space->local_remaining -= cnzi; 756 757 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 758 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 759 760 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 761 /* For now, we will recompute what is needed. */ 762 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 763 } 764 } 765 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 766 /* Allocate space for cj, initialize cj, and */ 767 /* destroy list of free space and other temporary array(s) */ 768 PetscCall(PetscMalloc1(ci[cn], &cj)); 769 PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 770 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 771 772 /* Allocate space for ca */ 773 PetscCall(PetscCalloc1(ci[cn], &ca)); 774 775 /* put together the new matrix */ 776 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 777 PetscCall(MatSetBlockSize(C, pp->dof)); 778 779 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 780 /* Since these are PETSc arrays, change flags to free them as necessary. */ 781 c = (Mat_SeqAIJ *)C->data; 782 c->free_a = PETSC_TRUE; 783 c->free_ij = PETSC_TRUE; 784 c->nonew = 0; 785 786 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 787 C->ops->productnumeric = MatProductNumeric_PtAP; 788 789 /* Clean up. */ 790 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 791 PetscFunctionReturn(PETSC_SUCCESS); 792 } 793 794 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 795 { 796 Mat_Product *product = C->product; 797 Mat A = product->A, P = product->B; 798 799 PetscFunctionBegin; 800 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 801 PetscFunctionReturn(PETSC_SUCCESS); 802 } 803 804 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 805 806 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 807 { 808 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 809 810 PetscFunctionBegin; 811 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 812 PetscFunctionReturn(PETSC_SUCCESS); 813 } 814 815 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 816 817 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 818 { 819 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 820 821 PetscFunctionBegin; 822 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 823 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 824 PetscFunctionReturn(PETSC_SUCCESS); 825 } 826 827 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 828 829 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 830 { 831 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 832 833 PetscFunctionBegin; 834 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 835 PetscFunctionReturn(PETSC_SUCCESS); 836 } 837 838 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 839 840 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 841 { 842 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 843 844 PetscFunctionBegin; 845 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 846 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 851 { 852 Mat_Product *product = C->product; 853 Mat A = product->A, P = product->B; 854 PetscBool flg; 855 856 PetscFunctionBegin; 857 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 858 if (flg) { 859 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 860 C->ops->productnumeric = MatProductNumeric_PtAP; 861 PetscFunctionReturn(PETSC_SUCCESS); 862 } 863 864 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 865 PetscCheck(flg, PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 866 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 867 C->ops->productnumeric = MatProductNumeric_PtAP; 868 PetscFunctionReturn(PETSC_SUCCESS); 869 } 870 871 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 872 { 873 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 874 Mat a = b->AIJ, B; 875 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 876 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 877 PetscInt *cols; 878 PetscScalar *vals; 879 880 PetscFunctionBegin; 881 PetscCall(MatGetSize(a, &m, &n)); 882 PetscCall(PetscMalloc1(dof * m, &ilen)); 883 for (i = 0; i < m; i++) { 884 nmax = PetscMax(nmax, aij->ilen[i]); 885 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 886 } 887 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 888 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 889 PetscCall(MatSetType(B, newtype)); 890 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 891 PetscCall(PetscFree(ilen)); 892 PetscCall(PetscMalloc1(nmax, &icols)); 893 ii = 0; 894 for (i = 0; i < m; i++) { 895 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 896 for (j = 0; j < dof; j++) { 897 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 898 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 899 ii++; 900 } 901 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 902 } 903 PetscCall(PetscFree(icols)); 904 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 905 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 906 907 if (reuse == MAT_INPLACE_MATRIX) { 908 PetscCall(MatHeaderReplace(A, &B)); 909 } else { 910 *newmat = B; 911 } 912 PetscFunctionReturn(PETSC_SUCCESS); 913 } 914 915 #include <../src/mat/impls/aij/mpi/mpiaij.h> 916 917 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 918 { 919 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 920 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 921 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 922 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 923 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 924 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 925 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 926 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 927 PetscInt rstart, cstart, *garray, ii, k; 928 PetscScalar *vals, *ovals; 929 930 PetscFunctionBegin; 931 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 932 for (i = 0; i < A->rmap->n / dof; i++) { 933 nmax = PetscMax(nmax, AIJ->ilen[i]); 934 onmax = PetscMax(onmax, OAIJ->ilen[i]); 935 for (j = 0; j < dof; j++) { 936 dnz[dof * i + j] = AIJ->ilen[i]; 937 onz[dof * i + j] = OAIJ->ilen[i]; 938 } 939 } 940 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 941 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 942 PetscCall(MatSetType(B, newtype)); 943 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 944 PetscCall(MatSetBlockSize(B, dof)); 945 PetscCall(PetscFree2(dnz, onz)); 946 947 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 948 rstart = dof * maij->A->rmap->rstart; 949 cstart = dof * maij->A->cmap->rstart; 950 garray = mpiaij->garray; 951 952 ii = rstart; 953 for (i = 0; i < A->rmap->n / dof; i++) { 954 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 955 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 956 for (j = 0; j < dof; j++) { 957 for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 958 for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 959 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 960 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 961 ii++; 962 } 963 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 964 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 965 } 966 PetscCall(PetscFree2(icols, oicols)); 967 968 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 969 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 970 971 if (reuse == MAT_INPLACE_MATRIX) { 972 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 973 ((PetscObject)A)->refct = 1; 974 975 PetscCall(MatHeaderReplace(A, &B)); 976 977 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 978 } else { 979 *newmat = B; 980 } 981 PetscFunctionReturn(PETSC_SUCCESS); 982 } 983 984 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 985 { 986 Mat A; 987 988 PetscFunctionBegin; 989 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 990 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 991 PetscCall(MatDestroy(&A)); 992 PetscFunctionReturn(PETSC_SUCCESS); 993 } 994 995 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 996 { 997 Mat A; 998 999 PetscFunctionBegin; 1000 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1001 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 1002 PetscCall(MatDestroy(&A)); 1003 PetscFunctionReturn(PETSC_SUCCESS); 1004 } 1005 1006 /*@ 1007 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1008 operations for multicomponent problems. It interpolates each component the same 1009 way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 1010 and `MATMPIAIJ` for distributed matrices. 1011 1012 Collective 1013 1014 Input Parameters: 1015 + A - the `MATAIJ` matrix describing the action on blocks 1016 - dof - the block size (number of components per node) 1017 1018 Output Parameter: 1019 . maij - the new `MATMAIJ` matrix 1020 1021 Level: advanced 1022 1023 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()` 1024 @*/ 1025 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1026 { 1027 PetscInt n; 1028 Mat B; 1029 PetscBool flg; 1030 #if defined(PETSC_HAVE_CUDA) 1031 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1032 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1033 #endif 1034 1035 PetscFunctionBegin; 1036 dof = PetscAbs(dof); 1037 PetscCall(PetscObjectReference((PetscObject)A)); 1038 1039 if (dof == 1) *maij = A; 1040 else { 1041 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1042 /* propagate vec type */ 1043 PetscCall(MatSetVecType(B, A->defaultvectype)); 1044 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 1045 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 1046 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 1047 PetscCall(PetscLayoutSetUp(B->rmap)); 1048 PetscCall(PetscLayoutSetUp(B->cmap)); 1049 1050 B->assembled = PETSC_TRUE; 1051 1052 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 1053 if (flg) { 1054 Mat_SeqMAIJ *b; 1055 1056 PetscCall(MatSetType(B, MATSEQMAIJ)); 1057 1058 B->ops->setup = NULL; 1059 B->ops->destroy = MatDestroy_SeqMAIJ; 1060 B->ops->view = MatView_SeqMAIJ; 1061 1062 b = (Mat_SeqMAIJ *)B->data; 1063 b->dof = dof; 1064 b->AIJ = A; 1065 1066 if (dof == 2) { 1067 B->ops->mult = MatMult_SeqMAIJ_2; 1068 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1069 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1070 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1071 } else if (dof == 3) { 1072 B->ops->mult = MatMult_SeqMAIJ_3; 1073 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1074 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1075 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1076 } else if (dof == 4) { 1077 B->ops->mult = MatMult_SeqMAIJ_4; 1078 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1079 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1080 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1081 } else if (dof == 5) { 1082 B->ops->mult = MatMult_SeqMAIJ_5; 1083 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1084 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1085 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1086 } else if (dof == 6) { 1087 B->ops->mult = MatMult_SeqMAIJ_6; 1088 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1089 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1090 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1091 } else if (dof == 7) { 1092 B->ops->mult = MatMult_SeqMAIJ_7; 1093 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1094 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1095 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 1096 } else if (dof == 8) { 1097 B->ops->mult = MatMult_SeqMAIJ_8; 1098 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1099 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1100 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1101 } else if (dof == 9) { 1102 B->ops->mult = MatMult_SeqMAIJ_9; 1103 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1104 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1105 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1106 } else if (dof == 10) { 1107 B->ops->mult = MatMult_SeqMAIJ_10; 1108 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1109 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1110 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1111 } else if (dof == 11) { 1112 B->ops->mult = MatMult_SeqMAIJ_11; 1113 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1114 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1115 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 1116 } else if (dof == 16) { 1117 B->ops->mult = MatMult_SeqMAIJ_16; 1118 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1119 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1120 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1121 } else if (dof == 18) { 1122 B->ops->mult = MatMult_SeqMAIJ_18; 1123 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1124 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1125 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 1126 } else { 1127 B->ops->mult = MatMult_SeqMAIJ_N; 1128 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 1129 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 1130 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 1131 } 1132 #if defined(PETSC_HAVE_CUDA) 1133 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1134 #endif 1135 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 1136 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1137 } else { 1138 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1139 Mat_MPIMAIJ *b; 1140 IS from, to; 1141 Vec gvec; 1142 1143 PetscCall(MatSetType(B, MATMPIMAIJ)); 1144 1145 B->ops->setup = NULL; 1146 B->ops->destroy = MatDestroy_MPIMAIJ; 1147 B->ops->view = MatView_MPIMAIJ; 1148 1149 b = (Mat_MPIMAIJ *)B->data; 1150 b->dof = dof; 1151 b->A = A; 1152 1153 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 1154 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 1155 1156 PetscCall(VecGetSize(mpiaij->lvec, &n)); 1157 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 1158 PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 1159 PetscCall(VecSetBlockSize(b->w, dof)); 1160 PetscCall(VecSetType(b->w, VECSEQ)); 1161 1162 /* create two temporary Index sets for build scatter gather */ 1163 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 1164 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1165 1166 /* create temporary global vector to generate scatter context */ 1167 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1168 1169 /* generate the scatter context */ 1170 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1171 1172 PetscCall(ISDestroy(&from)); 1173 PetscCall(ISDestroy(&to)); 1174 PetscCall(VecDestroy(&gvec)); 1175 1176 B->ops->mult = MatMult_MPIMAIJ_dof; 1177 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1178 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1179 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1180 1181 #if defined(PETSC_HAVE_CUDA) 1182 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1183 #endif 1184 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 1185 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1186 } 1187 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1188 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 1189 PetscCall(MatSetUp(B)); 1190 #if defined(PETSC_HAVE_CUDA) 1191 /* temporary until we have CUDA implementation of MAIJ */ 1192 { 1193 PetscBool flg; 1194 if (convert) { 1195 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 1196 if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1197 } 1198 } 1199 #endif 1200 *maij = B; 1201 } 1202 PetscFunctionReturn(PETSC_SUCCESS); 1203 } 1204