1 /* 2 Defines the basic matrix operations for the KAIJ matrix storage format. 3 This format is used to evaluate matrices of the form: 4 5 [I \otimes S + A \otimes T] 6 7 where 8 S is a dense (p \times q) matrix 9 T is a dense (p \times q) matrix 10 A is an AIJ (n \times n) matrix 11 I is the identity matrix 12 13 The resulting matrix is (np \times nq) 14 15 We provide: 16 MatMult() 17 MatMultAdd() 18 MatInvertBlockDiagonal() 19 and 20 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 21 22 This single directory handles both the sequential and parallel codes 23 */ 24 25 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 26 #include <../src/mat/utils/freespace.h> 27 #include <petsc/private/vecimpl.h> 28 29 /*@ 30 MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 31 32 Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 33 34 Input Parameter: 35 . A - the `MATKAIJ` matrix 36 37 Output Parameter: 38 . B - the `MATAIJ` matrix 39 40 Level: advanced 41 42 Note: 43 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 44 45 .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ` 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) 48 { 49 PetscBool ismpikaij, isseqkaij; 50 51 PetscFunctionBegin; 52 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 53 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 54 if (ismpikaij) { 55 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 56 57 *B = b->A; 58 } else if (isseqkaij) { 59 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 60 61 *B = b->AIJ; 62 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 /*@C 67 MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix 68 69 Not Collective; the entire `S` is stored and returned independently on all processes. 70 71 Input Parameter: 72 . A - the `MATKAIJ` matrix 73 74 Output Parameters: 75 + m - the number of rows in `S` 76 . n - the number of columns in `S` 77 - S - the S matrix, in form of a scalar array in column-major format 78 79 Level: advanced 80 81 Note: 82 All output parameters are optional (pass `NULL` if not desired) 83 84 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar *S[]) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 89 90 PetscFunctionBegin; 91 if (m) *m = b->p; 92 if (n) *n = b->q; 93 if (S) *S = b->S; 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 /*@C 98 MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix 99 100 Not Collective; the entire `S` is stored and returned independently on all processes. 101 102 Input Parameter: 103 . A - the `MATKAIJ` matrix 104 105 Output Parameters: 106 + m - the number of rows in `S` 107 . n - the number of columns in `S` 108 - S - the S matrix, in form of a scalar array in column-major format 109 110 Level: advanced 111 112 Note: 113 All output parameters are optional (pass `NULL` if not desired) 114 115 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 116 @*/ 117 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *S[]) 118 { 119 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 120 121 PetscFunctionBegin; 122 if (m) *m = b->p; 123 if (n) *n = b->q; 124 if (S) *S = b->S; 125 PetscFunctionReturn(PETSC_SUCCESS); 126 } 127 128 /*@C 129 MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()` 130 131 Not Collective 132 133 Input Parameters: 134 + A - the `MATKAIJ` matrix 135 - S - location of pointer to array obtained with `MatKAIJGetS()` 136 137 Level: advanced 138 139 Note: 140 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 141 If `NULL` is passed, it will not attempt to zero the array pointer. 142 143 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 144 @*/ 145 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar *S[]) 146 { 147 PetscFunctionBegin; 148 if (S) *S = NULL; 149 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 150 PetscFunctionReturn(PETSC_SUCCESS); 151 } 152 153 /*@C 154 MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()` 155 156 Not Collective 157 158 Input Parameters: 159 + A - the `MATKAIJ` matrix 160 - S - location of pointer to array obtained with `MatKAIJGetS()` 161 162 Level: advanced 163 164 Note: 165 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 166 If `NULL` is passed, it will not attempt to zero the array pointer. 167 168 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()` 169 @*/ 170 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar *S[]) 171 { 172 PetscFunctionBegin; 173 if (S) *S = NULL; 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 /*@C 178 MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix 179 180 Not Collective; the entire `T` is stored and returned independently on all processes 181 182 Input Parameter: 183 . A - the `MATKAIJ` matrix 184 185 Output Parameters: 186 + m - the number of rows in `T` 187 . n - the number of columns in `T` 188 - T - the T matrix, in form of a scalar array in column-major format 189 190 Level: advanced 191 192 Note: 193 All output parameters are optional (pass `NULL` if not desired) 194 195 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 196 @*/ 197 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar *T[]) 198 { 199 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 200 201 PetscFunctionBegin; 202 if (m) *m = b->p; 203 if (n) *n = b->q; 204 if (T) *T = b->T; 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 /*@C 209 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix 210 211 Not Collective; the entire `T` is stored and returned independently on all processes 212 213 Input Parameter: 214 . A - the `MATKAIJ` matrix 215 216 Output Parameters: 217 + m - the number of rows in `T` 218 . n - the number of columns in `T` 219 - T - the T matrix, in form of a scalar array in column-major format 220 221 Level: advanced 222 223 Note: 224 All output parameters are optional (pass `NULL` if not desired) 225 226 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 227 @*/ 228 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *T[]) 229 { 230 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 231 232 PetscFunctionBegin; 233 if (m) *m = b->p; 234 if (n) *n = b->q; 235 if (T) *T = b->T; 236 PetscFunctionReturn(PETSC_SUCCESS); 237 } 238 239 /*@C 240 MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()` 241 242 Not Collective 243 244 Input Parameters: 245 + A - the `MATKAIJ` matrix 246 - T - location of pointer to array obtained with `MatKAIJGetS()` 247 248 Level: advanced 249 250 Note: 251 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 252 If `NULL` is passed, it will not attempt to zero the array pointer. 253 254 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 255 @*/ 256 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar *T[]) 257 { 258 PetscFunctionBegin; 259 if (T) *T = NULL; 260 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 /*@C 265 MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()` 266 267 Not Collective 268 269 Input Parameters: 270 + A - the `MATKAIJ` matrix 271 - T - location of pointer to array obtained with `MatKAIJGetS()` 272 273 Level: advanced 274 275 Note: 276 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 277 If `NULL` is passed, it will not attempt to zero the array pointer. 278 279 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()` 280 @*/ 281 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar *T[]) 282 { 283 PetscFunctionBegin; 284 if (T) *T = NULL; 285 PetscFunctionReturn(PETSC_SUCCESS); 286 } 287 288 /*@ 289 MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 290 291 Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel 292 293 Input Parameters: 294 + A - the `MATKAIJ` matrix 295 - B - the `MATAIJ` matrix 296 297 Level: advanced 298 299 Notes: 300 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 301 302 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 303 304 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 305 @*/ 306 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) 307 { 308 PetscMPIInt size; 309 PetscBool flg; 310 311 PetscFunctionBegin; 312 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 313 if (size == 1) { 314 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 315 PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 316 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 317 a->AIJ = B; 318 } else { 319 Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 320 a->A = B; 321 } 322 PetscCall(PetscObjectReference((PetscObject)B)); 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 /*@ 327 MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix 328 329 Logically Collective; the entire `S` is stored independently on all processes. 330 331 Input Parameters: 332 + A - the `MATKAIJ` matrix 333 . p - the number of rows in `S` 334 . q - the number of columns in `S` 335 - S - the S matrix, in form of a scalar array in column-major format 336 337 Level: advanced 338 339 Notes: 340 The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix. 341 342 The `S` matrix is copied, so the user can destroy this array. 343 344 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 345 @*/ 346 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) 347 { 348 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 349 350 PetscFunctionBegin; 351 PetscCall(PetscFree(a->S)); 352 if (S) { 353 PetscCall(PetscMalloc1(p * q, &a->S)); 354 PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar))); 355 } else a->S = NULL; 356 357 a->p = p; 358 a->q = q; 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /*@ 363 MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities. 364 365 Logically Collective. 366 367 Input Parameter: 368 . A - the `MATKAIJ` matrix 369 370 Output Parameter: 371 . identity - the Boolean value 372 373 Level: advanced 374 375 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()` 376 @*/ 377 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) 378 { 379 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 380 PetscInt i, j; 381 382 PetscFunctionBegin; 383 if (a->p != a->q) { 384 *identity = PETSC_FALSE; 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } else *identity = PETSC_TRUE; 387 if (!a->isTI || a->S) { 388 for (i = 0; i < a->p && *identity; i++) { 389 for (j = 0; j < a->p && *identity; j++) { 390 if (i != j) { 391 if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 392 if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 393 } else { 394 if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 395 if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 396 } 397 } 398 } 399 } 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /*@ 404 MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix 405 406 Logically Collective; the entire `T` is stored independently on all processes. 407 408 Input Parameters: 409 + A - the `MATKAIJ` matrix 410 . p - the number of rows in `S` 411 . q - the number of columns in `S` 412 - T - the `T` matrix, in form of a scalar array in column-major format 413 414 Level: advanced 415 416 Notes: 417 The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix. 418 419 The `T` matrix is copied, so the user can destroy this array. 420 421 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 422 @*/ 423 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) 424 { 425 PetscInt i, j; 426 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 427 PetscBool isTI = PETSC_FALSE; 428 429 PetscFunctionBegin; 430 /* check if T is an identity matrix */ 431 if (T && (p == q)) { 432 isTI = PETSC_TRUE; 433 for (i = 0; i < p; i++) { 434 for (j = 0; j < q; j++) { 435 if (i == j) { 436 /* diagonal term must be 1 */ 437 if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 438 } else { 439 /* off-diagonal term must be 0 */ 440 if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 441 } 442 } 443 } 444 } 445 a->isTI = isTI; 446 447 PetscCall(PetscFree(a->T)); 448 if (T && (!isTI)) { 449 PetscCall(PetscMalloc1(p * q, &a->T)); 450 PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar))); 451 } else a->T = NULL; 452 453 a->p = p; 454 a->q = q; 455 PetscFunctionReturn(PETSC_SUCCESS); 456 } 457 458 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 459 { 460 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 461 462 PetscFunctionBegin; 463 PetscCall(MatDestroy(&b->AIJ)); 464 PetscCall(PetscFree(b->S)); 465 PetscCall(PetscFree(b->T)); 466 PetscCall(PetscFree(b->ibdiag)); 467 PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 468 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 469 PetscCall(PetscFree(A->data)); 470 PetscFunctionReturn(PETSC_SUCCESS); 471 } 472 473 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 474 { 475 Mat_MPIKAIJ *a; 476 Mat_MPIAIJ *mpiaij; 477 PetscScalar *T; 478 PetscInt i, j; 479 PetscObjectState state; 480 481 PetscFunctionBegin; 482 a = (Mat_MPIKAIJ *)A->data; 483 mpiaij = (Mat_MPIAIJ *)a->A->data; 484 485 PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 486 if (state == a->state) { 487 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } else { 490 PetscCall(MatDestroy(&a->AIJ)); 491 PetscCall(MatDestroy(&a->OAIJ)); 492 if (a->isTI) { 493 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 494 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 495 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 496 * to pass in. */ 497 PetscCall(PetscMalloc1(a->p * a->q, &T)); 498 for (i = 0; i < a->p; i++) { 499 for (j = 0; j < a->q; j++) { 500 if (i == j) T[i + j * a->p] = 1.0; 501 else T[i + j * a->p] = 0.0; 502 } 503 } 504 } else T = a->T; 505 PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 506 PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 507 if (a->isTI) PetscCall(PetscFree(T)); 508 a->state = state; 509 } 510 PetscFunctionReturn(PETSC_SUCCESS); 511 } 512 513 static PetscErrorCode MatSetUp_KAIJ(Mat A) 514 { 515 PetscInt n; 516 PetscMPIInt size; 517 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 518 519 PetscFunctionBegin; 520 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 521 if (size == 1) { 522 PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 523 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 524 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 525 PetscCall(PetscLayoutSetUp(A->rmap)); 526 PetscCall(PetscLayoutSetUp(A->cmap)); 527 } else { 528 Mat_MPIKAIJ *a; 529 Mat_MPIAIJ *mpiaij; 530 IS from, to; 531 Vec gvec; 532 533 a = (Mat_MPIKAIJ *)A->data; 534 mpiaij = (Mat_MPIAIJ *)a->A->data; 535 PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 536 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 537 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 538 PetscCall(PetscLayoutSetUp(A->rmap)); 539 PetscCall(PetscLayoutSetUp(A->cmap)); 540 541 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 542 543 PetscCall(VecGetSize(mpiaij->lvec, &n)); 544 PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 545 PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 546 PetscCall(VecSetBlockSize(a->w, a->q)); 547 PetscCall(VecSetType(a->w, VECSEQ)); 548 549 /* create two temporary Index sets for build scatter gather */ 550 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 551 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 552 553 /* create temporary global vector to generate scatter context */ 554 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 555 556 /* generate the scatter context */ 557 PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 558 559 PetscCall(ISDestroy(&from)); 560 PetscCall(ISDestroy(&to)); 561 PetscCall(VecDestroy(&gvec)); 562 } 563 564 A->assembled = PETSC_TRUE; 565 PetscFunctionReturn(PETSC_SUCCESS); 566 } 567 568 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) 569 { 570 PetscViewerFormat format; 571 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 572 Mat B; 573 PetscInt i; 574 PetscBool ismpikaij; 575 576 PetscFunctionBegin; 577 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 578 PetscCall(PetscViewerGetFormat(viewer, &format)); 579 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 580 PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 581 582 /* Print appropriate details for S. */ 583 if (!a->S) { 584 PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 585 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 586 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 587 for (i = 0; i < (a->p * a->q); i++) { 588 #if defined(PETSC_USE_COMPLEX) 589 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 590 #else 591 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 592 #endif 593 } 594 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 595 } 596 597 /* Print appropriate details for T. */ 598 if (a->isTI) { 599 PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 600 } else if (!a->T) { 601 PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 602 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 603 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 604 for (i = 0; i < (a->p * a->q); i++) { 605 #if defined(PETSC_USE_COMPLEX) 606 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 607 #else 608 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 609 #endif 610 } 611 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 612 } 613 614 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 615 PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 616 if (ismpikaij) { 617 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 618 PetscCall(MatView(b->A, viewer)); 619 } else { 620 PetscCall(MatView(a->AIJ, viewer)); 621 } 622 623 } else { 624 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 625 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 626 PetscCall(MatView(B, viewer)); 627 PetscCall(MatDestroy(&B)); 628 } 629 PetscFunctionReturn(PETSC_SUCCESS); 630 } 631 632 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 633 { 634 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 635 636 PetscFunctionBegin; 637 PetscCall(MatDestroy(&b->AIJ)); 638 PetscCall(MatDestroy(&b->OAIJ)); 639 PetscCall(MatDestroy(&b->A)); 640 PetscCall(VecScatterDestroy(&b->ctx)); 641 PetscCall(VecDestroy(&b->w)); 642 PetscCall(PetscFree(b->S)); 643 PetscCall(PetscFree(b->T)); 644 PetscCall(PetscFree(b->ibdiag)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 647 PetscCall(PetscFree(A->data)); 648 PetscFunctionReturn(PETSC_SUCCESS); 649 } 650 651 /* zz = yy + Axx */ 652 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 653 { 654 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 655 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 656 const PetscScalar *s = b->S, *t = b->T; 657 const PetscScalar *x, *v, *bx; 658 PetscScalar *y, *sums; 659 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 660 PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 661 662 PetscFunctionBegin; 663 if (!yy) { 664 PetscCall(VecSet(zz, 0.0)); 665 } else { 666 PetscCall(VecCopy(yy, zz)); 667 } 668 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS); 669 670 PetscCall(VecGetArrayRead(xx, &x)); 671 PetscCall(VecGetArray(zz, &y)); 672 idx = a->j; 673 v = a->a; 674 ii = a->i; 675 676 if (b->isTI) { 677 for (i = 0; i < m; i++) { 678 jrow = ii[i]; 679 n = ii[i + 1] - jrow; 680 sums = y + p * i; 681 for (j = 0; j < n; j++) { 682 for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; 683 } 684 } 685 PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 686 } else if (t) { 687 for (i = 0; i < m; i++) { 688 jrow = ii[i]; 689 n = ii[i + 1] - jrow; 690 sums = y + p * i; 691 for (j = 0; j < n; j++) { 692 for (k = 0; k < p; k++) { 693 for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; 694 } 695 } 696 } 697 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 698 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 699 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 700 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 701 PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 702 } 703 if (s) { 704 for (i = 0; i < m; i++) { 705 sums = y + p * i; 706 bx = x + q * i; 707 if (i < b->AIJ->cmap->n) { 708 for (j = 0; j < q; j++) { 709 for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j]; 710 } 711 } 712 } 713 PetscCall(PetscLogFlops(2.0 * m * p * q)); 714 } 715 716 PetscCall(VecRestoreArrayRead(xx, &x)); 717 PetscCall(VecRestoreArray(zz, &y)); 718 PetscFunctionReturn(PETSC_SUCCESS); 719 } 720 721 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) 722 { 723 PetscFunctionBegin; 724 PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy)); 725 PetscFunctionReturn(PETSC_SUCCESS); 726 } 727 728 #include <petsc/private/kernels/blockinvert.h> 729 730 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) 731 { 732 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 733 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 734 const PetscScalar *S = b->S; 735 const PetscScalar *T = b->T; 736 const PetscScalar *v = a->a; 737 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 738 PetscInt i, j, *v_pivots, dof, dof2; 739 PetscScalar *diag, aval, *v_work; 740 741 PetscFunctionBegin; 742 PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 743 PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 744 745 dof = p; 746 dof2 = dof * dof; 747 748 if (b->ibdiagvalid) { 749 if (values) *values = b->ibdiag; 750 PetscFunctionReturn(PETSC_SUCCESS); 751 } 752 if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); 753 if (values) *values = b->ibdiag; 754 diag = b->ibdiag; 755 756 PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 757 for (i = 0; i < m; i++) { 758 if (S) { 759 PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar))); 760 } else { 761 PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar))); 762 } 763 if (b->isTI) { 764 aval = 0; 765 for (j = ii[i]; j < ii[i + 1]; j++) 766 if (idx[j] == i) aval = v[j]; 767 for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 768 } else if (T) { 769 aval = 0; 770 for (j = ii[i]; j < ii[i + 1]; j++) 771 if (idx[j] == i) aval = v[j]; 772 for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 773 } 774 PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 775 diag += dof2; 776 } 777 PetscCall(PetscFree2(v_work, v_pivots)); 778 779 b->ibdiagvalid = PETSC_TRUE; 780 PetscFunctionReturn(PETSC_SUCCESS); 781 } 782 783 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) 784 { 785 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 786 787 PetscFunctionBegin; 788 *B = kaij->AIJ; 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 793 { 794 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 795 Mat AIJ, OAIJ, B; 796 PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 797 const PetscInt p = a->p, q = a->q; 798 PetscBool ismpikaij, missing; 799 800 PetscFunctionBegin; 801 if (reuse != MAT_REUSE_MATRIX) { 802 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 803 if (ismpikaij) { 804 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 805 AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 806 OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 807 } else { 808 AIJ = a->AIJ; 809 OAIJ = NULL; 810 } 811 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 812 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 813 PetscCall(MatSetType(B, MATAIJ)); 814 PetscCall(MatGetSize(AIJ, &m, NULL)); 815 PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */ 816 if (!missing || !a->S) d = m; 817 PetscCall(PetscMalloc1(m * p, &d_nnz)); 818 for (i = 0; i < m; ++i) { 819 PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 820 for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 821 PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 822 } 823 if (OAIJ) { 824 PetscCall(PetscMalloc1(m * p, &o_nnz)); 825 for (i = 0; i < m; ++i) { 826 PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 827 for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 828 PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 829 } 830 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 831 } else { 832 PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 833 } 834 PetscCall(PetscFree(d_nnz)); 835 PetscCall(PetscFree(o_nnz)); 836 } else B = *newmat; 837 PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 838 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &B)); 839 else *newmat = B; 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 844 { 845 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 846 Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 847 const PetscScalar *aa = a->a, *T = kaij->T, *v; 848 const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 849 const PetscScalar *b, *xb, *idiag; 850 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 851 PetscInt i, j, k, i2, bs, bs2, nz; 852 853 PetscFunctionBegin; 854 its = its * lits; 855 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 856 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 857 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 858 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 859 PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 860 bs = p; 861 bs2 = bs * bs; 862 863 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 864 865 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 866 idiag = kaij->ibdiag; 867 diag = a->diag; 868 869 if (!kaij->sor.setup) { 870 PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 871 kaij->sor.setup = PETSC_TRUE; 872 } 873 y = kaij->sor.y; 874 w = kaij->sor.w; 875 work = kaij->sor.work; 876 t = kaij->sor.t; 877 arr = kaij->sor.arr; 878 879 PetscCall(VecGetArray(xx, &x)); 880 PetscCall(VecGetArrayRead(bb, &b)); 881 882 if (flag & SOR_ZERO_INITIAL_GUESS) { 883 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 884 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 885 PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar))); 886 i2 = bs; 887 idiag += bs2; 888 for (i = 1; i < m; i++) { 889 v = aa + ai[i]; 890 vi = aj + ai[i]; 891 nz = diag[i] - ai[i]; 892 893 if (T) { /* b - T (Arow * x) */ 894 PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar))); 895 for (j = 0; j < nz; j++) { 896 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 897 } 898 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 899 for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 900 } else if (kaij->isTI) { 901 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 902 for (j = 0; j < nz; j++) { 903 for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 904 } 905 } else { 906 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 907 } 908 909 PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 910 for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 911 912 idiag += bs2; 913 i2 += bs; 914 } 915 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 916 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 917 xb = t; 918 } else xb = b; 919 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 920 idiag = kaij->ibdiag + bs2 * (m - 1); 921 i2 = bs * (m - 1); 922 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 923 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 924 i2 -= bs; 925 idiag -= bs2; 926 for (i = m - 2; i >= 0; i--) { 927 v = aa + diag[i] + 1; 928 vi = aj + diag[i] + 1; 929 nz = ai[i + 1] - diag[i] - 1; 930 931 if (T) { /* FIXME: This branch untested */ 932 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 933 /* copy all rows of x that are needed into contiguous space */ 934 workt = work; 935 for (j = 0; j < nz; j++) { 936 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 937 workt += bs; 938 } 939 arrt = arr; 940 for (j = 0; j < nz; j++) { 941 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 942 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 943 arrt += bs2; 944 } 945 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 946 } else if (kaij->isTI) { 947 PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar))); 948 for (j = 0; j < nz; j++) { 949 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 950 } 951 } 952 953 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 954 for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 955 956 idiag -= bs2; 957 i2 -= bs; 958 } 959 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 960 } 961 its--; 962 } 963 while (its--) { /* FIXME: This branch not updated */ 964 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 965 i2 = 0; 966 idiag = kaij->ibdiag; 967 for (i = 0; i < m; i++) { 968 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 969 970 v = aa + ai[i]; 971 vi = aj + ai[i]; 972 nz = diag[i] - ai[i]; 973 workt = work; 974 for (j = 0; j < nz; j++) { 975 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 976 workt += bs; 977 } 978 arrt = arr; 979 if (T) { 980 for (j = 0; j < nz; j++) { 981 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 982 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 983 arrt += bs2; 984 } 985 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 986 } else if (kaij->isTI) { 987 for (j = 0; j < nz; j++) { 988 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 989 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 990 arrt += bs2; 991 } 992 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 993 } 994 PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar))); 995 996 v = aa + diag[i] + 1; 997 vi = aj + diag[i] + 1; 998 nz = ai[i + 1] - diag[i] - 1; 999 workt = work; 1000 for (j = 0; j < nz; j++) { 1001 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1002 workt += bs; 1003 } 1004 arrt = arr; 1005 if (T) { 1006 for (j = 0; j < nz; j++) { 1007 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1008 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1009 arrt += bs2; 1010 } 1011 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1012 } else if (kaij->isTI) { 1013 for (j = 0; j < nz; j++) { 1014 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1015 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1016 arrt += bs2; 1017 } 1018 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1019 } 1020 1021 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1022 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1023 1024 idiag += bs2; 1025 i2 += bs; 1026 } 1027 xb = t; 1028 } else xb = b; 1029 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1030 idiag = kaij->ibdiag + bs2 * (m - 1); 1031 i2 = bs * (m - 1); 1032 if (xb == b) { 1033 for (i = m - 1; i >= 0; i--) { 1034 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 1035 1036 v = aa + ai[i]; 1037 vi = aj + ai[i]; 1038 nz = diag[i] - ai[i]; 1039 workt = work; 1040 for (j = 0; j < nz; j++) { 1041 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1042 workt += bs; 1043 } 1044 arrt = arr; 1045 if (T) { 1046 for (j = 0; j < nz; j++) { 1047 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1048 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1049 arrt += bs2; 1050 } 1051 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1052 } else if (kaij->isTI) { 1053 for (j = 0; j < nz; j++) { 1054 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1055 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1056 arrt += bs2; 1057 } 1058 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1059 } 1060 1061 v = aa + diag[i] + 1; 1062 vi = aj + diag[i] + 1; 1063 nz = ai[i + 1] - diag[i] - 1; 1064 workt = work; 1065 for (j = 0; j < nz; j++) { 1066 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1067 workt += bs; 1068 } 1069 arrt = arr; 1070 if (T) { 1071 for (j = 0; j < nz; j++) { 1072 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1073 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1074 arrt += bs2; 1075 } 1076 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1077 } else if (kaij->isTI) { 1078 for (j = 0; j < nz; j++) { 1079 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1080 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1081 arrt += bs2; 1082 } 1083 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1084 } 1085 1086 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1087 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1088 } 1089 } else { 1090 for (i = m - 1; i >= 0; i--) { 1091 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 1092 v = aa + diag[i] + 1; 1093 vi = aj + diag[i] + 1; 1094 nz = ai[i + 1] - diag[i] - 1; 1095 workt = work; 1096 for (j = 0; j < nz; j++) { 1097 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1098 workt += bs; 1099 } 1100 arrt = arr; 1101 if (T) { 1102 for (j = 0; j < nz; j++) { 1103 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1104 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1105 arrt += bs2; 1106 } 1107 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1108 } else if (kaij->isTI) { 1109 for (j = 0; j < nz; j++) { 1110 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1111 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1112 arrt += bs2; 1113 } 1114 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1115 } 1116 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1117 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1118 } 1119 } 1120 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 1121 } 1122 } 1123 1124 PetscCall(VecRestoreArray(xx, &x)); 1125 PetscCall(VecRestoreArrayRead(bb, &b)); 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 /*===================================================================================*/ 1130 1131 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1132 { 1133 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1134 1135 PetscFunctionBegin; 1136 if (!yy) { 1137 PetscCall(VecSet(zz, 0.0)); 1138 } else { 1139 PetscCall(VecCopy(yy, zz)); 1140 } 1141 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1142 /* start the scatter */ 1143 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1144 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 1145 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1146 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 1147 PetscFunctionReturn(PETSC_SUCCESS); 1148 } 1149 1150 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) 1151 { 1152 PetscFunctionBegin; 1153 PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy)); 1154 PetscFunctionReturn(PETSC_SUCCESS); 1155 } 1156 1157 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) 1158 { 1159 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1160 1161 PetscFunctionBegin; 1162 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1163 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 1164 PetscFunctionReturn(PETSC_SUCCESS); 1165 } 1166 1167 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1168 { 1169 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 1170 PetscBool diag = PETSC_FALSE; 1171 PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 1172 PetscScalar *vaij, *v, *S = b->S, *T = b->T; 1173 1174 PetscFunctionBegin; 1175 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1176 b->getrowactive = PETSC_TRUE; 1177 PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1178 1179 if ((!S) && (!T) && (!b->isTI)) { 1180 if (ncols) *ncols = 0; 1181 if (cols) *cols = NULL; 1182 if (values) *values = NULL; 1183 PetscFunctionReturn(PETSC_SUCCESS); 1184 } 1185 1186 if (T || b->isTI) { 1187 PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 1188 c = nzaij; 1189 for (i = 0; i < nzaij; i++) { 1190 /* check if this row contains a diagonal entry */ 1191 if (colsaij[i] == r) { 1192 diag = PETSC_TRUE; 1193 c = i; 1194 } 1195 } 1196 } else nzaij = c = 0; 1197 1198 /* calculate size of row */ 1199 nz = 0; 1200 if (S) nz += q; 1201 if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 1202 1203 if (cols || values) { 1204 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1205 for (i = 0; i < q; i++) { 1206 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1207 v[i] = 0.0; 1208 } 1209 if (b->isTI) { 1210 for (i = 0; i < nzaij; i++) { 1211 for (j = 0; j < q; j++) { 1212 idx[i * q + j] = colsaij[i] * q + j; 1213 v[i * q + j] = (j == s ? vaij[i] : 0); 1214 } 1215 } 1216 } else if (T) { 1217 for (i = 0; i < nzaij; i++) { 1218 for (j = 0; j < q; j++) { 1219 idx[i * q + j] = colsaij[i] * q + j; 1220 v[i * q + j] = vaij[i] * T[s + j * p]; 1221 } 1222 } 1223 } 1224 if (S) { 1225 for (j = 0; j < q; j++) { 1226 idx[c * q + j] = r * q + j; 1227 v[c * q + j] += S[s + j * p]; 1228 } 1229 } 1230 } 1231 1232 if (ncols) *ncols = nz; 1233 if (cols) *cols = idx; 1234 if (values) *values = v; 1235 PetscFunctionReturn(PETSC_SUCCESS); 1236 } 1237 1238 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1239 { 1240 PetscFunctionBegin; 1241 PetscCall(PetscFree2(*idx, *v)); 1242 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1243 PetscFunctionReturn(PETSC_SUCCESS); 1244 } 1245 1246 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1247 { 1248 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1249 Mat AIJ = b->A; 1250 PetscBool diag = PETSC_FALSE; 1251 Mat MatAIJ, MatOAIJ; 1252 const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1253 PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 1254 PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 1255 1256 PetscFunctionBegin; 1257 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1258 MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1259 MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 1260 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1261 b->getrowactive = PETSC_TRUE; 1262 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1263 lrow = row - rstart; 1264 1265 if ((!S) && (!T) && (!b->isTI)) { 1266 if (ncols) *ncols = 0; 1267 if (cols) *cols = NULL; 1268 if (values) *values = NULL; 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 1272 r = lrow / p; 1273 s = lrow % p; 1274 1275 if (T || b->isTI) { 1276 PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 1277 PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 1278 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 1279 1280 c = ncolsaij + ncolsoaij; 1281 for (i = 0; i < ncolsaij; i++) { 1282 /* check if this row contains a diagonal entry */ 1283 if (colsaij[i] == r) { 1284 diag = PETSC_TRUE; 1285 c = i; 1286 } 1287 } 1288 } else c = 0; 1289 1290 /* calculate size of row */ 1291 nz = 0; 1292 if (S) nz += q; 1293 if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 1294 1295 if (cols || values) { 1296 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1297 for (i = 0; i < q; i++) { 1298 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1299 v[i] = 0.0; 1300 } 1301 if (b->isTI) { 1302 for (i = 0; i < ncolsaij; i++) { 1303 for (j = 0; j < q; j++) { 1304 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1305 v[i * q + j] = (j == s ? vals[i] : 0.0); 1306 } 1307 } 1308 for (i = 0; i < ncolsoaij; i++) { 1309 for (j = 0; j < q; j++) { 1310 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1311 v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 1312 } 1313 } 1314 } else if (T) { 1315 for (i = 0; i < ncolsaij; i++) { 1316 for (j = 0; j < q; j++) { 1317 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1318 v[i * q + j] = vals[i] * T[s + j * p]; 1319 } 1320 } 1321 for (i = 0; i < ncolsoaij; i++) { 1322 for (j = 0; j < q; j++) { 1323 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1324 v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 1325 } 1326 } 1327 } 1328 if (S) { 1329 for (j = 0; j < q; j++) { 1330 idx[c * q + j] = (r + rstart / p) * q + j; 1331 v[c * q + j] += S[s + j * p]; 1332 } 1333 } 1334 } 1335 1336 if (ncols) *ncols = nz; 1337 if (cols) *cols = idx; 1338 if (values) *values = v; 1339 PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341 1342 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1343 { 1344 PetscFunctionBegin; 1345 PetscCall(PetscFree2(*idx, *v)); 1346 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1347 PetscFunctionReturn(PETSC_SUCCESS); 1348 } 1349 1350 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1351 { 1352 Mat A; 1353 1354 PetscFunctionBegin; 1355 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1356 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1357 PetscCall(MatDestroy(&A)); 1358 PetscFunctionReturn(PETSC_SUCCESS); 1359 } 1360 1361 /*@C 1362 MatCreateKAIJ - Creates a matrix of type `MATKAIJ`. 1363 1364 Collective 1365 1366 Input Parameters: 1367 + A - the `MATAIJ` matrix 1368 . p - number of rows in `S` and `T` 1369 . q - number of columns in `S` and `T` 1370 . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major) 1371 - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major) 1372 1373 Output Parameter: 1374 . kaij - the new `MATKAIJ` matrix 1375 1376 Level: advanced 1377 1378 Notes: 1379 The created matrix is of the following form\: 1380 .vb 1381 [I \otimes S + A \otimes T] 1382 .ve 1383 where 1384 .vb 1385 S is a dense (p \times q) matrix 1386 T is a dense (p \times q) matrix 1387 A is a `MATAIJ` (n \times n) matrix 1388 I is the identity matrix 1389 .ve 1390 The resulting matrix is (np \times nq) 1391 1392 `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in 1393 column-major format. 1394 1395 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 1396 1397 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 1398 1399 Developer Notes: 1400 In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1401 of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily 1402 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added, 1403 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1404 1405 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1406 @*/ 1407 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) 1408 { 1409 PetscFunctionBegin; 1410 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 1411 PetscCall(MatSetType(*kaij, MATKAIJ)); 1412 PetscCall(MatKAIJSetAIJ(*kaij, A)); 1413 PetscCall(MatKAIJSetS(*kaij, p, q, S)); 1414 PetscCall(MatKAIJSetT(*kaij, p, q, T)); 1415 PetscCall(MatSetUp(*kaij)); 1416 PetscFunctionReturn(PETSC_SUCCESS); 1417 } 1418 1419 /*MC 1420 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1421 [I \otimes S + A \otimes T], 1422 where 1423 .vb 1424 S is a dense (p \times q) matrix, 1425 T is a dense (p \times q) matrix, 1426 A is an AIJ (n \times n) matrix, 1427 and I is the identity matrix. 1428 .ve 1429 The resulting matrix is (np \times nq). 1430 1431 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1432 1433 Level: advanced 1434 1435 Note: 1436 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1437 where x and b are column vectors containing the row-major representations of X and B. 1438 1439 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1440 M*/ 1441 1442 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1443 { 1444 Mat_MPIKAIJ *b; 1445 PetscMPIInt size; 1446 1447 PetscFunctionBegin; 1448 PetscCall(PetscNew(&b)); 1449 A->data = (void *)b; 1450 1451 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 1452 1453 b->w = NULL; 1454 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1455 if (size == 1) { 1456 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 1457 A->ops->destroy = MatDestroy_SeqKAIJ; 1458 A->ops->mult = MatMult_SeqKAIJ; 1459 A->ops->multadd = MatMultAdd_SeqKAIJ; 1460 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1461 A->ops->getrow = MatGetRow_SeqKAIJ; 1462 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1463 A->ops->sor = MatSOR_SeqKAIJ; 1464 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 1465 } else { 1466 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 1467 A->ops->destroy = MatDestroy_MPIKAIJ; 1468 A->ops->mult = MatMult_MPIKAIJ; 1469 A->ops->multadd = MatMultAdd_MPIKAIJ; 1470 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1471 A->ops->getrow = MatGetRow_MPIKAIJ; 1472 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1473 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 1474 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 1475 } 1476 A->ops->setup = MatSetUp_KAIJ; 1477 A->ops->view = MatView_KAIJ; 1478 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1479 PetscFunctionReturn(PETSC_SUCCESS); 1480 } 1481