1 #include <../src/mat/impls/maij/maij.h> /*I "petscmat.h" I*/ 2 #include <../src/mat/utils/freespace.h> 3 4 /*@ 5 MatMAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATMAIJ` matrix 6 7 Not Collective, but if the `MATMAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 8 9 Input Parameter: 10 . A - the `MATMAIJ` matrix 11 12 Output Parameter: 13 . B - the `MATAIJ` matrix 14 15 Level: advanced 16 17 Note: 18 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 19 20 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MATAIJ`, `MatCreateMAIJ()` 21 @*/ 22 PetscErrorCode MatMAIJGetAIJ(Mat A, Mat *B) 23 { 24 PetscBool ismpimaij, isseqmaij; 25 26 PetscFunctionBegin; 27 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIMAIJ, &ismpimaij)); 28 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQMAIJ, &isseqmaij)); 29 if (ismpimaij) { 30 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 31 32 *B = b->A; 33 } else if (isseqmaij) { 34 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 35 36 *B = b->AIJ; 37 } else { 38 *B = A; 39 } 40 PetscFunctionReturn(PETSC_SUCCESS); 41 } 42 43 /*@ 44 MatMAIJRedimension - Get a new `MATMAIJ` matrix with the same action, but for a different block size 45 46 Logically Collective 47 48 Input Parameters: 49 + A - the `MATMAIJ` matrix 50 - dof - the block size for the new matrix 51 52 Output Parameter: 53 . B - the new `MATMAIJ` matrix 54 55 Level: advanced 56 57 .seealso: [](ch_matrices), `Mat`, `MATMAIJ`, `MatCreateMAIJ()` 58 @*/ 59 PetscErrorCode MatMAIJRedimension(Mat A, PetscInt dof, Mat *B) 60 { 61 Mat Aij = NULL; 62 63 PetscFunctionBegin; 64 PetscValidLogicalCollectiveInt(A, dof, 2); 65 PetscCall(MatMAIJGetAIJ(A, &Aij)); 66 PetscCall(MatCreateMAIJ(Aij, dof, B)); 67 PetscFunctionReturn(PETSC_SUCCESS); 68 } 69 70 static PetscErrorCode MatDestroy_SeqMAIJ(Mat A) 71 { 72 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 73 74 PetscFunctionBegin; 75 PetscCall(MatDestroy(&b->AIJ)); 76 PetscCall(PetscFree(A->data)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaijcusparse_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqmaij_seqaij_C", NULL)); 79 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_seqaij_seqmaij_C", NULL)); 80 PetscFunctionReturn(PETSC_SUCCESS); 81 } 82 83 static PetscErrorCode MatSetUp_MAIJ(Mat A) 84 { 85 PetscFunctionBegin; 86 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Must use MatCreateMAIJ() to create MAIJ matrices"); 87 } 88 89 static PetscErrorCode MatView_SeqMAIJ(Mat A, PetscViewer viewer) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(MatView(B, viewer)); 96 PetscCall(MatDestroy(&B)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatView_MPIMAIJ(Mat A, PetscViewer viewer) 101 { 102 Mat B; 103 104 PetscFunctionBegin; 105 PetscCall(MatConvert(A, MATMPIAIJ, MAT_INITIAL_MATRIX, &B)); 106 PetscCall(MatView(B, viewer)); 107 PetscCall(MatDestroy(&B)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatDestroy_MPIMAIJ(Mat A) 112 { 113 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 114 115 PetscFunctionBegin; 116 PetscCall(MatDestroy(&b->AIJ)); 117 PetscCall(MatDestroy(&b->OAIJ)); 118 PetscCall(MatDestroy(&b->A)); 119 PetscCall(VecScatterDestroy(&b->ctx)); 120 PetscCall(VecDestroy(&b->w)); 121 PetscCall(PetscFree(A->data)); 122 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaijcusparse_C", NULL)); 123 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpimaij_mpiaij_C", NULL)); 124 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatProductSetFromOptions_mpiaij_mpimaij_C", NULL)); 125 PetscCall(PetscObjectChangeTypeName((PetscObject)A, NULL)); 126 PetscFunctionReturn(PETSC_SUCCESS); 127 } 128 129 /*MC 130 MATMAIJ - MATMAIJ = "maij" - A matrix type to be used for restriction and interpolation operations for 131 multicomponent problems, interpolating or restricting each component the same way independently. 132 The matrix type is based on `MATSEQAIJ` for sequential matrices, and `MATMPIAIJ` for distributed matrices. 133 134 Operations provided: 135 .vb 136 MatMult() 137 MatMultTranspose() 138 MatMultAdd() 139 MatMultTransposeAdd() 140 .ve 141 142 Level: advanced 143 144 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()`, `MatCreateMAIJ()` 145 M*/ 146 147 PETSC_EXTERN PetscErrorCode MatCreate_MAIJ(Mat A) 148 { 149 Mat_MPIMAIJ *b; 150 PetscMPIInt size; 151 152 PetscFunctionBegin; 153 PetscCall(PetscNew(&b)); 154 A->data = (void *)b; 155 156 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 157 158 A->ops->setup = MatSetUp_MAIJ; 159 160 b->AIJ = NULL; 161 b->dof = 0; 162 b->OAIJ = NULL; 163 b->ctx = NULL; 164 b->w = NULL; 165 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 166 if (size == 1) { 167 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQMAIJ)); 168 } else { 169 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIMAIJ)); 170 } 171 A->preallocated = PETSC_TRUE; 172 A->assembled = PETSC_TRUE; 173 PetscFunctionReturn(PETSC_SUCCESS); 174 } 175 176 #if PetscHasAttribute(always_inline) 177 #define PETSC_FORCE_INLINE __attribute__((always_inline)) 178 #else 179 #define PETSC_FORCE_INLINE 180 #endif 181 182 #if defined(__clang__) 183 #define PETSC_PRAGMA_UNROLL _Pragma("unroll") 184 #else 185 #define PETSC_PRAGMA_UNROLL 186 #endif 187 188 enum { 189 MAT_SEQMAIJ_MAX_TEMPLATE_SIZE = 18 190 }; 191 192 // try as hard as possible to get these "template"s inlined, GCC apparently does take 'inline' 193 // keyword into account for these... 194 PETSC_FORCE_INLINE static inline PetscErrorCode MatMult_MatMultAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 195 { 196 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 197 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 198 const Mat baij = b->AIJ; 199 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 200 const PetscInt m = baij->rmap->n; 201 const PetscInt nz = a->nz; 202 const PetscInt *idx = a->j; 203 const PetscInt *ii = a->i; 204 const PetscScalar *v = a->a; 205 PetscInt nonzerorow = 0; 206 const PetscScalar *x; 207 PetscScalar *z; 208 209 PetscFunctionBegin; 210 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 211 if (mult_add && yy != zz) PetscCall(VecCopy(yy, zz)); 212 PetscCall(VecGetArrayRead(xx, &x)); 213 if (mult_add) { 214 PetscCall(VecGetArray(zz, &z)); 215 } else { 216 PetscCall(VecGetArrayWrite(zz, &z)); 217 } 218 219 for (PetscInt i = 0; i < m; ++i) { 220 PetscInt jrow = ii[i]; 221 const PetscInt n = ii[i + 1] - jrow; 222 // leave a line so clang-format does not align these decls 223 PetscScalar sum[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE] = {0}; 224 225 nonzerorow += n > 0; 226 for (PetscInt j = 0; j < n; ++j, ++jrow) { 227 const PetscScalar v_jrow = v[jrow]; 228 const PetscInt N_idx_jrow = N * idx[jrow]; 229 230 PETSC_PRAGMA_UNROLL 231 for (int k = 0; k < N; ++k) sum[k] += v_jrow * x[N_idx_jrow + k]; 232 } 233 234 PETSC_PRAGMA_UNROLL 235 for (int k = 0; k < N; ++k) { 236 const PetscInt z_idx = N * i + k; 237 238 if (mult_add) { 239 z[z_idx] += sum[k]; 240 } else { 241 z[z_idx] = sum[k]; 242 } 243 } 244 } 245 PetscCall(PetscLogFlops(2 * N * nz - (mult_add ? 0 : (N * nonzerorow)))); 246 PetscCall(VecRestoreArrayRead(xx, &x)); 247 if (mult_add) { 248 PetscCall(VecRestoreArray(zz, &z)); 249 } else { 250 PetscCall(VecRestoreArrayWrite(zz, &z)); 251 } 252 PetscFunctionReturn(PETSC_SUCCESS); 253 } 254 255 PETSC_FORCE_INLINE static inline PetscErrorCode MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(Mat A, Vec xx, Vec yy, Vec zz, int N) 256 { 257 const PetscBool mult_add = yy == NULL ? PETSC_FALSE : PETSC_TRUE; 258 const Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 259 const Mat baij = b->AIJ; 260 const Mat_SeqAIJ *a = (Mat_SeqAIJ *)baij->data; 261 const PetscInt m = baij->rmap->n; 262 const PetscInt nz = a->nz; 263 const PetscInt *a_j = a->j; 264 const PetscInt *a_i = a->i; 265 const PetscScalar *a_a = a->a; 266 const PetscScalar *x; 267 PetscScalar *z; 268 269 PetscFunctionBegin; 270 PetscAssert(N <= MAT_SEQMAIJ_MAX_TEMPLATE_SIZE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "%s() called with N = %d > max size %d", PETSC_FUNCTION_NAME, N, MAT_SEQMAIJ_MAX_TEMPLATE_SIZE); 271 if (mult_add) { 272 if (yy != zz) PetscCall(VecCopy(yy, zz)); 273 } else { 274 PetscCall(VecSet(zz, 0.0)); 275 } 276 PetscCall(VecGetArrayRead(xx, &x)); 277 PetscCall(VecGetArray(zz, &z)); 278 279 for (PetscInt i = 0; i < m; i++) { 280 const PetscInt a_ii = a_i[i]; 281 const PetscInt *idx = PetscSafePointerPlusOffset(a_j, a_ii); 282 const PetscScalar *v = PetscSafePointerPlusOffset(a_a, a_ii); 283 const PetscInt n = a_i[i + 1] - a_ii; 284 PetscScalar alpha[MAT_SEQMAIJ_MAX_TEMPLATE_SIZE]; 285 286 PETSC_PRAGMA_UNROLL 287 for (int k = 0; k < N; ++k) alpha[k] = x[N * i + k]; 288 for (PetscInt j = 0; j < n; ++j) { 289 const PetscInt N_idx_j = N * idx[j]; 290 const PetscScalar v_j = v[j]; 291 292 PETSC_PRAGMA_UNROLL 293 for (int k = 0; k < N; ++k) z[N_idx_j + k] += alpha[k] * v_j; 294 } 295 } 296 297 PetscCall(PetscLogFlops(2 * N * nz)); 298 PetscCall(VecRestoreArrayRead(xx, &x)); 299 PetscCall(VecRestoreArray(zz, &z)); 300 PetscFunctionReturn(PETSC_SUCCESS); 301 } 302 303 #define MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(N) \ 304 static PetscErrorCode PetscConcat(MatMult_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 305 { \ 306 PetscFunctionBegin; \ 307 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 308 PetscFunctionReturn(PETSC_SUCCESS); \ 309 } \ 310 static PetscErrorCode PetscConcat(MatMultTranspose_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy) \ 311 { \ 312 PetscFunctionBegin; \ 313 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, NULL, yy, N)); \ 314 PetscFunctionReturn(PETSC_SUCCESS); \ 315 } \ 316 static PetscErrorCode PetscConcat(MatMultAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 317 { \ 318 PetscFunctionBegin; \ 319 PetscCall(MatMult_MatMultAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 320 PetscFunctionReturn(PETSC_SUCCESS); \ 321 } \ 322 static PetscErrorCode PetscConcat(MatMultTransposeAdd_SeqMAIJ_, N)(Mat A, Vec xx, Vec yy, Vec zz) \ 323 { \ 324 PetscFunctionBegin; \ 325 PetscCall(MatMultTranspose_MatMultTransposeAdd_SeqMAIJ_Template(A, xx, yy, zz, N)); \ 326 PetscFunctionReturn(PETSC_SUCCESS); \ 327 } 328 329 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(2) 330 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(3) 331 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(4) 332 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(5) 333 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(6) 334 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(7) 335 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(8) 336 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(9) 337 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(10) 338 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(11) 339 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(16) 340 MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE(18) 341 342 #undef MAT_SEQ_MAIJ_INSTANTIATE_MATMULT_MATMULTADD_TEMPLATE 343 344 static PetscErrorCode MatMult_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 345 { 346 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 347 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 348 const PetscScalar *x, *v; 349 PetscScalar *y, *sums; 350 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 351 PetscInt n, i, jrow, j, dof = b->dof, k; 352 353 PetscFunctionBegin; 354 PetscCall(VecGetArrayRead(xx, &x)); 355 PetscCall(VecSet(yy, 0.0)); 356 PetscCall(VecGetArray(yy, &y)); 357 idx = a->j; 358 v = a->a; 359 ii = a->i; 360 361 for (i = 0; i < m; i++) { 362 jrow = ii[i]; 363 n = ii[i + 1] - jrow; 364 sums = y + dof * i; 365 for (j = 0; j < n; j++) { 366 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 367 jrow++; 368 } 369 } 370 371 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 372 PetscCall(VecRestoreArrayRead(xx, &x)); 373 PetscCall(VecRestoreArray(yy, &y)); 374 PetscFunctionReturn(PETSC_SUCCESS); 375 } 376 377 static PetscErrorCode MatMultAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 378 { 379 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 380 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 381 const PetscScalar *x, *v; 382 PetscScalar *y, *sums; 383 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 384 PetscInt n, i, jrow, j, dof = b->dof, k; 385 386 PetscFunctionBegin; 387 if (yy != zz) PetscCall(VecCopy(yy, zz)); 388 PetscCall(VecGetArrayRead(xx, &x)); 389 PetscCall(VecGetArray(zz, &y)); 390 idx = a->j; 391 v = a->a; 392 ii = a->i; 393 394 for (i = 0; i < m; i++) { 395 jrow = ii[i]; 396 n = ii[i + 1] - jrow; 397 sums = y + dof * i; 398 for (j = 0; j < n; j++) { 399 for (k = 0; k < dof; k++) sums[k] += v[jrow] * x[dof * idx[jrow] + k]; 400 jrow++; 401 } 402 } 403 404 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 405 PetscCall(VecRestoreArrayRead(xx, &x)); 406 PetscCall(VecRestoreArray(zz, &y)); 407 PetscFunctionReturn(PETSC_SUCCESS); 408 } 409 410 static PetscErrorCode MatMultTranspose_SeqMAIJ_N(Mat A, Vec xx, Vec yy) 411 { 412 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 413 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 414 const PetscScalar *x, *v, *alpha; 415 PetscScalar *y; 416 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 417 PetscInt n, i, k; 418 419 PetscFunctionBegin; 420 PetscCall(VecGetArrayRead(xx, &x)); 421 PetscCall(VecSet(yy, 0.0)); 422 PetscCall(VecGetArray(yy, &y)); 423 for (i = 0; i < m; i++) { 424 idx = PetscSafePointerPlusOffset(a->j, a->i[i]); 425 v = PetscSafePointerPlusOffset(a->a, a->i[i]); 426 n = a->i[i + 1] - a->i[i]; 427 alpha = x + dof * i; 428 while (n-- > 0) { 429 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 430 idx++; 431 v++; 432 } 433 } 434 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 435 PetscCall(VecRestoreArrayRead(xx, &x)); 436 PetscCall(VecRestoreArray(yy, &y)); 437 PetscFunctionReturn(PETSC_SUCCESS); 438 } 439 440 static PetscErrorCode MatMultTransposeAdd_SeqMAIJ_N(Mat A, Vec xx, Vec yy, Vec zz) 441 { 442 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 443 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 444 const PetscScalar *x, *v, *alpha; 445 PetscScalar *y; 446 const PetscInt m = b->AIJ->rmap->n, *idx, dof = b->dof; 447 PetscInt n, i, k; 448 449 PetscFunctionBegin; 450 if (yy != zz) PetscCall(VecCopy(yy, zz)); 451 PetscCall(VecGetArrayRead(xx, &x)); 452 PetscCall(VecGetArray(zz, &y)); 453 for (i = 0; i < m; i++) { 454 idx = a->j + a->i[i]; 455 v = a->a + a->i[i]; 456 n = a->i[i + 1] - a->i[i]; 457 alpha = x + dof * i; 458 while (n-- > 0) { 459 for (k = 0; k < dof; k++) y[dof * (*idx) + k] += alpha[k] * (*v); 460 idx++; 461 v++; 462 } 463 } 464 PetscCall(PetscLogFlops(2.0 * dof * a->nz)); 465 PetscCall(VecRestoreArrayRead(xx, &x)); 466 PetscCall(VecRestoreArray(zz, &y)); 467 PetscFunctionReturn(PETSC_SUCCESS); 468 } 469 470 static PetscErrorCode MatMult_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 471 { 472 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 473 474 PetscFunctionBegin; 475 /* start the scatter */ 476 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 477 PetscCall((*b->AIJ->ops->mult)(b->AIJ, xx, yy)); 478 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 479 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, yy, yy)); 480 PetscFunctionReturn(PETSC_SUCCESS); 481 } 482 483 static PetscErrorCode MatMultTranspose_MPIMAIJ_dof(Mat A, Vec xx, Vec yy) 484 { 485 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 486 487 PetscFunctionBegin; 488 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 489 PetscCall((*b->AIJ->ops->multtranspose)(b->AIJ, xx, yy)); 490 PetscCall(VecScatterBegin(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 491 PetscCall(VecScatterEnd(b->ctx, b->w, yy, ADD_VALUES, SCATTER_REVERSE)); 492 PetscFunctionReturn(PETSC_SUCCESS); 493 } 494 495 static PetscErrorCode MatMultAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 496 { 497 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 498 499 PetscFunctionBegin; 500 /* start the scatter */ 501 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 502 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, yy, zz)); 503 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 504 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 505 PetscFunctionReturn(PETSC_SUCCESS); 506 } 507 508 static PetscErrorCode MatMultTransposeAdd_MPIMAIJ_dof(Mat A, Vec xx, Vec yy, Vec zz) 509 { 510 Mat_MPIMAIJ *b = (Mat_MPIMAIJ *)A->data; 511 512 PetscFunctionBegin; 513 PetscCall((*b->OAIJ->ops->multtranspose)(b->OAIJ, xx, b->w)); 514 PetscCall((*b->AIJ->ops->multtransposeadd)(b->AIJ, xx, yy, zz)); 515 PetscCall(VecScatterBegin(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 516 PetscCall(VecScatterEnd(b->ctx, b->w, zz, ADD_VALUES, SCATTER_REVERSE)); 517 PetscFunctionReturn(PETSC_SUCCESS); 518 } 519 520 static PetscErrorCode MatProductSetFromOptions_SeqAIJ_SeqMAIJ(Mat C) 521 { 522 Mat_Product *product = C->product; 523 524 PetscFunctionBegin; 525 if (product->type == MATPRODUCT_PtAP) { 526 C->ops->productsymbolic = MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ; 527 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for SeqAIJ and SeqMAIJ matrices", MatProductTypes[product->type]); 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 static PetscErrorCode MatProductSetFromOptions_MPIAIJ_MPIMAIJ(Mat C) 532 { 533 Mat_Product *product = C->product; 534 PetscBool flg = PETSC_FALSE; 535 Mat A = product->A, P = product->B; 536 PetscInt alg = 1; /* set default algorithm */ 537 #if !defined(PETSC_HAVE_HYPRE) 538 const char *algTypes[4] = {"scalable", "nonscalable", "allatonce", "allatonce_merged"}; 539 PetscInt nalg = 4; 540 #else 541 const char *algTypes[5] = {"scalable", "nonscalable", "allatonce", "allatonce_merged", "hypre"}; 542 PetscInt nalg = 5; 543 #endif 544 545 PetscFunctionBegin; 546 PetscCheck(product->type == MATPRODUCT_PtAP, PETSC_COMM_SELF, PETSC_ERR_SUP, "Mat Product type %s is not supported for MPIAIJ and MPIMAIJ matrices", MatProductTypes[product->type]); 547 548 /* PtAP */ 549 /* Check matrix local sizes */ 550 PetscCheck(A->rmap->rstart == P->rmap->rstart && A->rmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Arow (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 551 A->rmap->rstart, A->rmap->rend, P->rmap->rstart, P->rmap->rend); 552 PetscCheck(A->cmap->rstart == P->rmap->rstart && A->cmap->rend == P->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, Acol (%" PetscInt_FMT ", %" PetscInt_FMT ") != Prow (%" PetscInt_FMT ",%" PetscInt_FMT ")", 553 A->cmap->rstart, A->cmap->rend, P->rmap->rstart, P->rmap->rend); 554 555 /* Set the default algorithm */ 556 PetscCall(PetscStrcmp(C->product->alg, "default", &flg)); 557 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 558 559 /* Get runtime option */ 560 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 561 PetscCall(PetscOptionsEList("-mat_product_algorithm", "Algorithmic approach", "MatPtAP", algTypes, nalg, algTypes[alg], &alg, &flg)); 562 if (flg) PetscCall(MatProductSetAlgorithm(C, (MatProductAlgorithm)algTypes[alg])); 563 PetscOptionsEnd(); 564 565 PetscCall(PetscStrcmp(C->product->alg, "allatonce", &flg)); 566 if (flg) { 567 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 568 PetscFunctionReturn(PETSC_SUCCESS); 569 } 570 571 PetscCall(PetscStrcmp(C->product->alg, "allatonce_merged", &flg)); 572 if (flg) { 573 C->ops->productsymbolic = MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ; 574 PetscFunctionReturn(PETSC_SUCCESS); 575 } 576 577 /* Convert P from MAIJ to AIJ matrix since implementation not available for MAIJ */ 578 PetscCall(PetscInfo((PetscObject)A, "Converting from MAIJ to AIJ matrix since implementation not available for MAIJ\n")); 579 PetscCall(MatConvert(P, MATMPIAIJ, MAT_INPLACE_MATRIX, &P)); 580 PetscCall(MatProductSetFromOptions(C)); 581 PetscFunctionReturn(PETSC_SUCCESS); 582 } 583 584 static PetscErrorCode MatPtAPNumeric_SeqAIJ_SeqMAIJ(Mat A, Mat PP, Mat C) 585 { 586 /* This routine requires testing -- first draft only */ 587 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 588 Mat P = pp->AIJ; 589 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 590 Mat_SeqAIJ *p = (Mat_SeqAIJ *)P->data; 591 Mat_SeqAIJ *c = (Mat_SeqAIJ *)C->data; 592 const PetscInt *ai = a->i, *aj = a->j, *pi = p->i, *pj = p->j, *pJ, *pjj; 593 const PetscInt *ci = c->i, *cj = c->j, *cjj; 594 const PetscInt am = A->rmap->N, cn = C->cmap->N, cm = C->rmap->N, ppdof = pp->dof; 595 PetscInt i, j, k, pshift, poffset, anzi, pnzi, apnzj, nextap, pnzj, prow, crow, *apj, *apjdense; 596 const MatScalar *aa = a->a, *pa = p->a, *pA, *paj; 597 MatScalar *ca = c->a, *caj, *apa; 598 599 PetscFunctionBegin; 600 /* Allocate temporary array for storage of one row of A*P */ 601 PetscCall(PetscCalloc3(cn, &apa, cn, &apj, cn, &apjdense)); 602 603 /* Clear old values in C */ 604 PetscCall(PetscArrayzero(ca, ci[cm])); 605 606 for (i = 0; i < am; i++) { 607 /* Form sparse row of A*P */ 608 anzi = ai[i + 1] - ai[i]; 609 apnzj = 0; 610 for (j = 0; j < anzi; j++) { 611 /* Get offset within block of P */ 612 pshift = *aj % ppdof; 613 /* Get block row of P */ 614 prow = *aj++ / ppdof; /* integer division */ 615 pnzj = pi[prow + 1] - pi[prow]; 616 pjj = pj + pi[prow]; 617 paj = pa + pi[prow]; 618 for (k = 0; k < pnzj; k++) { 619 poffset = pjj[k] * ppdof + pshift; 620 if (!apjdense[poffset]) { 621 apjdense[poffset] = -1; 622 apj[apnzj++] = poffset; 623 } 624 apa[poffset] += (*aa) * paj[k]; 625 } 626 PetscCall(PetscLogFlops(2.0 * pnzj)); 627 aa++; 628 } 629 630 /* Sort the j index array for quick sparse axpy. */ 631 /* Note: a array does not need sorting as it is in dense storage locations. */ 632 PetscCall(PetscSortInt(apnzj, apj)); 633 634 /* Compute P^T*A*P using outer product (P^T)[:,j]*(A*P)[j,:]. */ 635 prow = i / ppdof; /* integer division */ 636 pshift = i % ppdof; 637 poffset = pi[prow]; 638 pnzi = pi[prow + 1] - poffset; 639 /* Reset pJ and pA so we can traverse the same row of P 'dof' times. */ 640 pJ = pj + poffset; 641 pA = pa + poffset; 642 for (j = 0; j < pnzi; j++) { 643 crow = (*pJ) * ppdof + pshift; 644 cjj = cj + ci[crow]; 645 caj = ca + ci[crow]; 646 pJ++; 647 /* Perform sparse axpy operation. Note cjj includes apj. */ 648 for (k = 0, nextap = 0; nextap < apnzj; k++) { 649 if (cjj[k] == apj[nextap]) caj[k] += (*pA) * apa[apj[nextap++]]; 650 } 651 PetscCall(PetscLogFlops(2.0 * apnzj)); 652 pA++; 653 } 654 655 /* Zero the current row info for A*P */ 656 for (j = 0; j < apnzj; j++) { 657 apa[apj[j]] = 0.; 658 apjdense[apj[j]] = 0; 659 } 660 } 661 662 /* Assemble the final matrix and clean up */ 663 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 664 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 665 PetscCall(PetscFree3(apa, apj, apjdense)); 666 PetscFunctionReturn(PETSC_SUCCESS); 667 } 668 669 static PetscErrorCode MatPtAPSymbolic_SeqAIJ_SeqMAIJ(Mat A, Mat PP, PetscReal fill, Mat C) 670 { 671 PetscFreeSpaceList free_space = NULL, current_space = NULL; 672 Mat_SeqMAIJ *pp = (Mat_SeqMAIJ *)PP->data; 673 Mat P = pp->AIJ; 674 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data, *p = (Mat_SeqAIJ *)P->data, *c; 675 PetscInt *pti, *ptj, *ptJ; 676 PetscInt *ci, *cj, *ptadenserow, *ptasparserow, *denserow, *sparserow, *ptaj; 677 const PetscInt an = A->cmap->N, am = A->rmap->N, pn = P->cmap->N, pm = P->rmap->N, ppdof = pp->dof; 678 PetscInt i, j, k, dof, pshift, ptnzi, arow, anzj, ptanzi, prow, pnzj, cnzi, cn; 679 MatScalar *ca; 680 const PetscInt *pi = p->i, *pj = p->j, *pjj, *ai = a->i, *aj = a->j, *ajj; 681 682 PetscFunctionBegin; 683 /* Get ij structure of P^T */ 684 PetscCall(MatGetSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 685 686 cn = pn * ppdof; 687 /* Allocate ci array, arrays for fill computation and */ 688 /* free space for accumulating nonzero column info */ 689 PetscCall(PetscMalloc1(cn + 1, &ci)); 690 ci[0] = 0; 691 692 /* Work arrays for rows of P^T*A */ 693 PetscCall(PetscMalloc4(an, &ptadenserow, an, &ptasparserow, cn, &denserow, cn, &sparserow)); 694 PetscCall(PetscArrayzero(ptadenserow, an)); 695 PetscCall(PetscArrayzero(denserow, cn)); 696 697 /* Set initial free space to be nnz(A) scaled by aspect ratio of P. */ 698 /* This should be reasonable if sparsity of PtAP is similar to that of A. */ 699 /* Note, aspect ratio of P is the same as the aspect ratio of SeqAIJ inside P */ 700 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(ai[am] / pm, pn), &free_space)); 701 current_space = free_space; 702 703 /* Determine symbolic info for each row of C: */ 704 for (i = 0; i < pn; i++) { 705 ptnzi = pti[i + 1] - pti[i]; 706 ptJ = ptj + pti[i]; 707 for (dof = 0; dof < ppdof; dof++) { 708 ptanzi = 0; 709 /* Determine symbolic row of PtA: */ 710 for (j = 0; j < ptnzi; j++) { 711 /* Expand ptJ[j] by block size and shift by dof to get the right row of A */ 712 arow = ptJ[j] * ppdof + dof; 713 /* Nonzeros of P^T*A will be in same locations as any element of A in that row */ 714 anzj = ai[arow + 1] - ai[arow]; 715 ajj = aj + ai[arow]; 716 for (k = 0; k < anzj; k++) { 717 if (!ptadenserow[ajj[k]]) { 718 ptadenserow[ajj[k]] = -1; 719 ptasparserow[ptanzi++] = ajj[k]; 720 } 721 } 722 } 723 /* Using symbolic info for row of PtA, determine symbolic info for row of C: */ 724 ptaj = ptasparserow; 725 cnzi = 0; 726 for (j = 0; j < ptanzi; j++) { 727 /* Get offset within block of P */ 728 pshift = *ptaj % ppdof; 729 /* Get block row of P */ 730 prow = (*ptaj++) / ppdof; /* integer division */ 731 /* P has same number of nonzeros per row as the compressed form */ 732 pnzj = pi[prow + 1] - pi[prow]; 733 pjj = pj + pi[prow]; 734 for (k = 0; k < pnzj; k++) { 735 /* Locations in C are shifted by the offset within the block */ 736 /* Note: we cannot use PetscLLAdd here because of the additional offset for the write location */ 737 if (!denserow[pjj[k] * ppdof + pshift]) { 738 denserow[pjj[k] * ppdof + pshift] = -1; 739 sparserow[cnzi++] = pjj[k] * ppdof + pshift; 740 } 741 } 742 } 743 744 /* sort sparserow */ 745 PetscCall(PetscSortInt(cnzi, sparserow)); 746 747 /* If free space is not available, make more free space */ 748 /* Double the amount of total space in the list */ 749 if (current_space->local_remaining < cnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(cnzi, current_space->total_array_size), ¤t_space)); 750 751 /* Copy data into free space, and zero out denserows */ 752 PetscCall(PetscArraycpy(current_space->array, sparserow, cnzi)); 753 754 current_space->array += cnzi; 755 current_space->local_used += cnzi; 756 current_space->local_remaining -= cnzi; 757 758 for (j = 0; j < ptanzi; j++) ptadenserow[ptasparserow[j]] = 0; 759 for (j = 0; j < cnzi; j++) denserow[sparserow[j]] = 0; 760 761 /* Aside: Perhaps we should save the pta info for the numerical factorization. */ 762 /* For now, we will recompute what is needed. */ 763 ci[i * ppdof + 1 + dof] = ci[i * ppdof + dof] + cnzi; 764 } 765 } 766 /* nnz is now stored in ci[ptm], column indices are in the list of free space */ 767 /* Allocate space for cj, initialize cj, and */ 768 /* destroy list of free space and other temporary array(s) */ 769 PetscCall(PetscMalloc1(ci[cn] + 1, &cj)); 770 PetscCall(PetscFreeSpaceContiguous(&free_space, cj)); 771 PetscCall(PetscFree4(ptadenserow, ptasparserow, denserow, sparserow)); 772 773 /* Allocate space for ca */ 774 PetscCall(PetscCalloc1(ci[cn] + 1, &ca)); 775 776 /* put together the new matrix */ 777 PetscCall(MatSetSeqAIJWithArrays_private(PetscObjectComm((PetscObject)A), cn, cn, ci, cj, ca, NULL, C)); 778 PetscCall(MatSetBlockSize(C, pp->dof)); 779 780 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 781 /* Since these are PETSc arrays, change flags to free them as necessary. */ 782 c = (Mat_SeqAIJ *)C->data; 783 c->free_a = PETSC_TRUE; 784 c->free_ij = PETSC_TRUE; 785 c->nonew = 0; 786 787 C->ops->ptapnumeric = MatPtAPNumeric_SeqAIJ_SeqMAIJ; 788 C->ops->productnumeric = MatProductNumeric_PtAP; 789 790 /* Clean up. */ 791 PetscCall(MatRestoreSymbolicTranspose_SeqAIJ(P, &pti, &ptj)); 792 PetscFunctionReturn(PETSC_SUCCESS); 793 } 794 795 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_SeqAIJ_SeqMAIJ(Mat C) 796 { 797 Mat_Product *product = C->product; 798 Mat A = product->A, P = product->B; 799 800 PetscFunctionBegin; 801 PetscCall(MatPtAPSymbolic_SeqAIJ_SeqMAIJ(A, P, product->fill, C)); 802 PetscFunctionReturn(PETSC_SUCCESS); 803 } 804 805 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, Mat); 806 807 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, Mat C) 808 { 809 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 810 811 PetscFunctionBegin; 812 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, C)); 813 PetscFunctionReturn(PETSC_SUCCESS); 814 } 815 816 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(Mat, Mat, PetscInt, PetscReal, Mat); 817 818 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(Mat A, Mat P, PetscReal fill, Mat C) 819 { 820 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 821 822 PetscFunctionBegin; 823 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce(A, maij->A, maij->dof, fill, C)); 824 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce; 825 PetscFunctionReturn(PETSC_SUCCESS); 826 } 827 828 PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, Mat); 829 830 PETSC_INTERN PetscErrorCode MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, Mat C) 831 { 832 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 833 834 PetscFunctionBegin; 835 PetscCall(MatPtAPNumeric_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, C)); 836 PetscFunctionReturn(PETSC_SUCCESS); 837 } 838 839 PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(Mat, Mat, PetscInt, PetscReal, Mat); 840 841 PETSC_INTERN PetscErrorCode MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(Mat A, Mat P, PetscReal fill, Mat C) 842 { 843 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)P->data; 844 845 PetscFunctionBegin; 846 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIXAIJ_allatonce_merged(A, maij->A, maij->dof, fill, C)); 847 C->ops->ptapnumeric = MatPtAPNumeric_MPIAIJ_MPIMAIJ_allatonce_merged; 848 PetscFunctionReturn(PETSC_SUCCESS); 849 } 850 851 PETSC_INTERN PetscErrorCode MatProductSymbolic_PtAP_MPIAIJ_MPIMAIJ(Mat C) 852 { 853 Mat_Product *product = C->product; 854 Mat A = product->A, P = product->B; 855 PetscBool flg; 856 857 PetscFunctionBegin; 858 PetscCall(PetscStrcmp(product->alg, "allatonce", &flg)); 859 if (flg) { 860 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce(A, P, product->fill, C)); 861 C->ops->productnumeric = MatProductNumeric_PtAP; 862 PetscFunctionReturn(PETSC_SUCCESS); 863 } 864 865 PetscCall(PetscStrcmp(product->alg, "allatonce_merged", &flg)); 866 if (flg) { 867 PetscCall(MatPtAPSymbolic_MPIAIJ_MPIMAIJ_allatonce_merged(A, P, product->fill, C)); 868 C->ops->productnumeric = MatProductNumeric_PtAP; 869 PetscFunctionReturn(PETSC_SUCCESS); 870 } 871 872 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_SUP, "Mat Product Algorithm is not supported"); 873 } 874 875 PETSC_INTERN PetscErrorCode MatConvert_SeqMAIJ_SeqAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 876 { 877 Mat_SeqMAIJ *b = (Mat_SeqMAIJ *)A->data; 878 Mat a = b->AIJ, B; 879 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)a->data; 880 PetscInt m, n, i, ncols, *ilen, nmax = 0, *icols, j, k, ii, dof = b->dof; 881 PetscInt *cols; 882 PetscScalar *vals; 883 884 PetscFunctionBegin; 885 PetscCall(MatGetSize(a, &m, &n)); 886 PetscCall(PetscMalloc1(dof * m, &ilen)); 887 for (i = 0; i < m; i++) { 888 nmax = PetscMax(nmax, aij->ilen[i]); 889 for (j = 0; j < dof; j++) ilen[dof * i + j] = aij->ilen[i]; 890 } 891 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 892 PetscCall(MatSetSizes(B, dof * m, dof * n, dof * m, dof * n)); 893 PetscCall(MatSetType(B, newtype)); 894 PetscCall(MatSeqAIJSetPreallocation(B, 0, ilen)); 895 PetscCall(PetscFree(ilen)); 896 PetscCall(PetscMalloc1(nmax, &icols)); 897 ii = 0; 898 for (i = 0; i < m; i++) { 899 PetscCall(MatGetRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 900 for (j = 0; j < dof; j++) { 901 for (k = 0; k < ncols; k++) icols[k] = dof * cols[k] + j; 902 PetscCall(MatSetValues_SeqAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 903 ii++; 904 } 905 PetscCall(MatRestoreRow_SeqAIJ(a, i, &ncols, &cols, &vals)); 906 } 907 PetscCall(PetscFree(icols)); 908 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 909 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 910 911 if (reuse == MAT_INPLACE_MATRIX) { 912 PetscCall(MatHeaderReplace(A, &B)); 913 } else { 914 *newmat = B; 915 } 916 PetscFunctionReturn(PETSC_SUCCESS); 917 } 918 919 #include <../src/mat/impls/aij/mpi/mpiaij.h> 920 921 PETSC_INTERN PetscErrorCode MatConvert_MPIMAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 922 { 923 Mat_MPIMAIJ *maij = (Mat_MPIMAIJ *)A->data; 924 Mat MatAIJ = ((Mat_SeqMAIJ *)maij->AIJ->data)->AIJ, B; 925 Mat MatOAIJ = ((Mat_SeqMAIJ *)maij->OAIJ->data)->AIJ; 926 Mat_SeqAIJ *AIJ = (Mat_SeqAIJ *)MatAIJ->data; 927 Mat_SeqAIJ *OAIJ = (Mat_SeqAIJ *)MatOAIJ->data; 928 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)maij->A->data; 929 PetscInt dof = maij->dof, i, j, *dnz = NULL, *onz = NULL, nmax = 0, onmax = 0; 930 PetscInt *oicols = NULL, *icols = NULL, ncols, *cols = NULL, oncols, *ocols = NULL; 931 PetscInt rstart, cstart, *garray, ii, k; 932 PetscScalar *vals, *ovals; 933 934 PetscFunctionBegin; 935 PetscCall(PetscMalloc2(A->rmap->n, &dnz, A->rmap->n, &onz)); 936 for (i = 0; i < A->rmap->n / dof; i++) { 937 nmax = PetscMax(nmax, AIJ->ilen[i]); 938 onmax = PetscMax(onmax, OAIJ->ilen[i]); 939 for (j = 0; j < dof; j++) { 940 dnz[dof * i + j] = AIJ->ilen[i]; 941 onz[dof * i + j] = OAIJ->ilen[i]; 942 } 943 } 944 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 945 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 946 PetscCall(MatSetType(B, newtype)); 947 PetscCall(MatMPIAIJSetPreallocation(B, 0, dnz, 0, onz)); 948 PetscCall(MatSetBlockSize(B, dof)); 949 PetscCall(PetscFree2(dnz, onz)); 950 951 PetscCall(PetscMalloc2(nmax, &icols, onmax, &oicols)); 952 rstart = dof * maij->A->rmap->rstart; 953 cstart = dof * maij->A->cmap->rstart; 954 garray = mpiaij->garray; 955 956 ii = rstart; 957 for (i = 0; i < A->rmap->n / dof; i++) { 958 PetscCall(MatGetRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 959 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 960 for (j = 0; j < dof; j++) { 961 for (k = 0; k < ncols; k++) icols[k] = cstart + dof * cols[k] + j; 962 for (k = 0; k < oncols; k++) oicols[k] = dof * garray[ocols[k]] + j; 963 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, ncols, icols, vals, INSERT_VALUES)); 964 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, oncols, oicols, ovals, INSERT_VALUES)); 965 ii++; 966 } 967 PetscCall(MatRestoreRow_SeqAIJ(MatAIJ, i, &ncols, &cols, &vals)); 968 PetscCall(MatRestoreRow_SeqAIJ(MatOAIJ, i, &oncols, &ocols, &ovals)); 969 } 970 PetscCall(PetscFree2(icols, oicols)); 971 972 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 973 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 974 975 if (reuse == MAT_INPLACE_MATRIX) { 976 PetscInt refct = ((PetscObject)A)->refct; /* save ((PetscObject)A)->refct */ 977 ((PetscObject)A)->refct = 1; 978 979 PetscCall(MatHeaderReplace(A, &B)); 980 981 ((PetscObject)A)->refct = refct; /* restore ((PetscObject)A)->refct */ 982 } else { 983 *newmat = B; 984 } 985 PetscFunctionReturn(PETSC_SUCCESS); 986 } 987 988 static PetscErrorCode MatCreateSubMatrix_MAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 989 { 990 Mat A; 991 992 PetscFunctionBegin; 993 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 994 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 995 PetscCall(MatDestroy(&A)); 996 PetscFunctionReturn(PETSC_SUCCESS); 997 } 998 999 static PetscErrorCode MatCreateSubMatrices_MAIJ(Mat mat, PetscInt n, const IS irow[], const IS icol[], MatReuse scall, Mat *submat[]) 1000 { 1001 Mat A; 1002 1003 PetscFunctionBegin; 1004 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1005 PetscCall(MatCreateSubMatrices(A, n, irow, icol, scall, submat)); 1006 PetscCall(MatDestroy(&A)); 1007 PetscFunctionReturn(PETSC_SUCCESS); 1008 } 1009 1010 /*@ 1011 MatCreateMAIJ - Creates a matrix type providing restriction and interpolation 1012 operations for multicomponent problems. It interpolates each component the same 1013 way independently. The matrix type is based on `MATSEQAIJ` for sequential matrices, 1014 and `MATMPIAIJ` for distributed matrices. 1015 1016 Collective 1017 1018 Input Parameters: 1019 + A - the `MATAIJ` matrix describing the action on blocks 1020 - dof - the block size (number of components per node) 1021 1022 Output Parameter: 1023 . maij - the new `MATMAIJ` matrix 1024 1025 Level: advanced 1026 1027 .seealso: [](ch_matrices), `Mat`, `MATAIJ`, `MATMAIJ`, `MatMAIJGetAIJ()`, `MatMAIJRedimension()` 1028 @*/ 1029 PetscErrorCode MatCreateMAIJ(Mat A, PetscInt dof, Mat *maij) 1030 { 1031 PetscInt n; 1032 Mat B; 1033 PetscBool flg; 1034 #if defined(PETSC_HAVE_CUDA) 1035 /* hack to prevent conversion to AIJ format for CUDA when used inside a parallel MAIJ */ 1036 PetscBool convert = dof < 0 ? PETSC_FALSE : PETSC_TRUE; 1037 #endif 1038 1039 PetscFunctionBegin; 1040 dof = PetscAbs(dof); 1041 PetscCall(PetscObjectReference((PetscObject)A)); 1042 1043 if (dof == 1) *maij = A; 1044 else { 1045 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1046 /* propagate vec type */ 1047 PetscCall(MatSetVecType(B, A->defaultvectype)); 1048 PetscCall(MatSetSizes(B, dof * A->rmap->n, dof * A->cmap->n, dof * A->rmap->N, dof * A->cmap->N)); 1049 PetscCall(PetscLayoutSetBlockSize(B->rmap, dof)); 1050 PetscCall(PetscLayoutSetBlockSize(B->cmap, dof)); 1051 PetscCall(PetscLayoutSetUp(B->rmap)); 1052 PetscCall(PetscLayoutSetUp(B->cmap)); 1053 1054 B->assembled = PETSC_TRUE; 1055 1056 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &flg)); 1057 if (flg) { 1058 Mat_SeqMAIJ *b; 1059 1060 PetscCall(MatSetType(B, MATSEQMAIJ)); 1061 1062 B->ops->setup = NULL; 1063 B->ops->destroy = MatDestroy_SeqMAIJ; 1064 B->ops->view = MatView_SeqMAIJ; 1065 1066 b = (Mat_SeqMAIJ *)B->data; 1067 b->dof = dof; 1068 b->AIJ = A; 1069 1070 if (dof == 2) { 1071 B->ops->mult = MatMult_SeqMAIJ_2; 1072 B->ops->multadd = MatMultAdd_SeqMAIJ_2; 1073 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_2; 1074 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_2; 1075 } else if (dof == 3) { 1076 B->ops->mult = MatMult_SeqMAIJ_3; 1077 B->ops->multadd = MatMultAdd_SeqMAIJ_3; 1078 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_3; 1079 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_3; 1080 } else if (dof == 4) { 1081 B->ops->mult = MatMult_SeqMAIJ_4; 1082 B->ops->multadd = MatMultAdd_SeqMAIJ_4; 1083 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_4; 1084 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_4; 1085 } else if (dof == 5) { 1086 B->ops->mult = MatMult_SeqMAIJ_5; 1087 B->ops->multadd = MatMultAdd_SeqMAIJ_5; 1088 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_5; 1089 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_5; 1090 } else if (dof == 6) { 1091 B->ops->mult = MatMult_SeqMAIJ_6; 1092 B->ops->multadd = MatMultAdd_SeqMAIJ_6; 1093 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_6; 1094 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_6; 1095 } else if (dof == 7) { 1096 B->ops->mult = MatMult_SeqMAIJ_7; 1097 B->ops->multadd = MatMultAdd_SeqMAIJ_7; 1098 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_7; 1099 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_7; 1100 } else if (dof == 8) { 1101 B->ops->mult = MatMult_SeqMAIJ_8; 1102 B->ops->multadd = MatMultAdd_SeqMAIJ_8; 1103 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_8; 1104 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_8; 1105 } else if (dof == 9) { 1106 B->ops->mult = MatMult_SeqMAIJ_9; 1107 B->ops->multadd = MatMultAdd_SeqMAIJ_9; 1108 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_9; 1109 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_9; 1110 } else if (dof == 10) { 1111 B->ops->mult = MatMult_SeqMAIJ_10; 1112 B->ops->multadd = MatMultAdd_SeqMAIJ_10; 1113 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_10; 1114 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_10; 1115 } else if (dof == 11) { 1116 B->ops->mult = MatMult_SeqMAIJ_11; 1117 B->ops->multadd = MatMultAdd_SeqMAIJ_11; 1118 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_11; 1119 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_11; 1120 } else if (dof == 16) { 1121 B->ops->mult = MatMult_SeqMAIJ_16; 1122 B->ops->multadd = MatMultAdd_SeqMAIJ_16; 1123 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_16; 1124 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_16; 1125 } else if (dof == 18) { 1126 B->ops->mult = MatMult_SeqMAIJ_18; 1127 B->ops->multadd = MatMultAdd_SeqMAIJ_18; 1128 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_18; 1129 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_18; 1130 } else { 1131 B->ops->mult = MatMult_SeqMAIJ_N; 1132 B->ops->multadd = MatMultAdd_SeqMAIJ_N; 1133 B->ops->multtranspose = MatMultTranspose_SeqMAIJ_N; 1134 B->ops->multtransposeadd = MatMultTransposeAdd_SeqMAIJ_N; 1135 } 1136 #if defined(PETSC_HAVE_CUDA) 1137 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaijcusparse_C", MatConvert_SeqMAIJ_SeqAIJ)); 1138 #endif 1139 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_seqmaij_seqaij_C", MatConvert_SeqMAIJ_SeqAIJ)); 1140 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_seqaij_seqmaij_C", MatProductSetFromOptions_SeqAIJ_SeqMAIJ)); 1141 } else { 1142 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 1143 Mat_MPIMAIJ *b; 1144 IS from, to; 1145 Vec gvec; 1146 1147 PetscCall(MatSetType(B, MATMPIMAIJ)); 1148 1149 B->ops->setup = NULL; 1150 B->ops->destroy = MatDestroy_MPIMAIJ; 1151 B->ops->view = MatView_MPIMAIJ; 1152 1153 b = (Mat_MPIMAIJ *)B->data; 1154 b->dof = dof; 1155 b->A = A; 1156 1157 PetscCall(MatCreateMAIJ(mpiaij->A, -dof, &b->AIJ)); 1158 PetscCall(MatCreateMAIJ(mpiaij->B, -dof, &b->OAIJ)); 1159 1160 PetscCall(VecGetSize(mpiaij->lvec, &n)); 1161 PetscCall(VecCreate(PETSC_COMM_SELF, &b->w)); 1162 PetscCall(VecSetSizes(b->w, n * dof, n * dof)); 1163 PetscCall(VecSetBlockSize(b->w, dof)); 1164 PetscCall(VecSetType(b->w, VECSEQ)); 1165 1166 /* create two temporary Index sets for build scatter gather */ 1167 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)A), dof, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 1168 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * dof, 0, 1, &to)); 1169 1170 /* create temporary global vector to generate scatter context */ 1171 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A), dof, dof * A->cmap->n, dof * A->cmap->N, NULL, &gvec)); 1172 1173 /* generate the scatter context */ 1174 PetscCall(VecScatterCreate(gvec, from, b->w, to, &b->ctx)); 1175 1176 PetscCall(ISDestroy(&from)); 1177 PetscCall(ISDestroy(&to)); 1178 PetscCall(VecDestroy(&gvec)); 1179 1180 B->ops->mult = MatMult_MPIMAIJ_dof; 1181 B->ops->multtranspose = MatMultTranspose_MPIMAIJ_dof; 1182 B->ops->multadd = MatMultAdd_MPIMAIJ_dof; 1183 B->ops->multtransposeadd = MatMultTransposeAdd_MPIMAIJ_dof; 1184 1185 #if defined(PETSC_HAVE_CUDA) 1186 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaijcusparse_C", MatConvert_MPIMAIJ_MPIAIJ)); 1187 #endif 1188 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpimaij_mpiaij_C", MatConvert_MPIMAIJ_MPIAIJ)); 1189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpimaij_C", MatProductSetFromOptions_MPIAIJ_MPIMAIJ)); 1190 } 1191 B->ops->createsubmatrix = MatCreateSubMatrix_MAIJ; 1192 B->ops->createsubmatrices = MatCreateSubMatrices_MAIJ; 1193 PetscCall(MatSetUp(B)); 1194 #if defined(PETSC_HAVE_CUDA) 1195 /* temporary until we have CUDA implementation of MAIJ */ 1196 { 1197 PetscBool flg; 1198 if (convert) { 1199 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &flg, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, MATAIJCUSPARSE, "")); 1200 if (flg) PetscCall(MatConvert(B, ((PetscObject)A)->type_name, MAT_INPLACE_MATRIX, &B)); 1201 } 1202 } 1203 #endif 1204 *maij = B; 1205 } 1206 PetscFunctionReturn(PETSC_SUCCESS); 1207 } 1208