1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 32 33 Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 34 35 Input Parameter: 36 . A - the `MATKAIJ` matrix 37 38 Output Parameter: 39 . B - the `MATAIJ` matrix 40 41 Level: advanced 42 43 Note: 44 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 45 46 .seealso: [](chapter_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ` 47 @*/ 48 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) 49 { 50 PetscBool ismpikaij, isseqkaij; 51 52 PetscFunctionBegin; 53 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 54 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(PETSC_SUCCESS); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix 69 70 Not Collective; the entire `S` is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the `MATKAIJ` matrix 74 75 Output Parameters: 76 + m - the number of rows in `S` 77 . n - the number of columns in `S` 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Level: advanced 81 82 Note: 83 All output parameters are optional (pass `NULL` if not desired) 84 85 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 86 @*/ 87 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) 88 { 89 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 90 PetscFunctionBegin; 91 if (m) *m = b->p; 92 if (n) *n = b->q; 93 if (S) *S = b->S; 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 /*@C 98 MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix 99 100 Not Collective; the entire `S` is stored and returned independently on all processes. 101 102 Input Parameter: 103 . A - the `MATKAIJ` matrix 104 105 Output Parameters: 106 + m - the number of rows in `S` 107 . n - the number of columns in `S` 108 - S - the S matrix, in form of a scalar array in column-major format 109 110 Level: advanced 111 112 Note: 113 All output parameters are optional (pass `NULL` if not desired) 114 115 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 116 @*/ 117 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) 118 { 119 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 120 PetscFunctionBegin; 121 if (m) *m = b->p; 122 if (n) *n = b->q; 123 if (S) *S = b->S; 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 /*@C 128 MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()` 129 130 Not Collective 131 132 Input Parameters: 133 + A - the `MATKAIJ` matrix 134 - S - location of pointer to array obtained with `MatKAIJGetS()` 135 136 Level: advanced 137 138 Note: 139 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 140 If `NULL` is passed, it will not attempt to zero the array pointer. 141 142 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 143 @*/ 144 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) 145 { 146 PetscFunctionBegin; 147 if (S) *S = NULL; 148 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 149 PetscFunctionReturn(PETSC_SUCCESS); 150 } 151 152 /*@C 153 MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()` 154 155 Not Collective 156 157 Input Parameters: 158 + A - the `MATKAIJ` matrix 159 - S - location of pointer to array obtained with `MatKAIJGetS()` 160 161 Level: advanced 162 163 Note: 164 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 165 If `NULL` is passed, it will not attempt to zero the array pointer. 166 167 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 168 @*/ 169 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) 170 { 171 PetscFunctionBegin; 172 if (S) *S = NULL; 173 PetscFunctionReturn(PETSC_SUCCESS); 174 } 175 176 /*@C 177 MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix 178 179 Not Collective; the entire `T` is stored and returned independently on all processes 180 181 Input Parameter: 182 . A - the `MATKAIJ` matrix 183 184 Output Parameters: 185 + m - the number of rows in `T` 186 . n - the number of columns in `T` 187 - T - the T matrix, in form of a scalar array in column-major format 188 189 Level: advanced 190 191 Note: 192 All output parameters are optional (pass `NULL` if not desired) 193 194 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 195 @*/ 196 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) 197 { 198 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 199 PetscFunctionBegin; 200 if (m) *m = b->p; 201 if (n) *n = b->q; 202 if (T) *T = b->T; 203 PetscFunctionReturn(PETSC_SUCCESS); 204 } 205 206 /*@C 207 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix 208 209 Not Collective; the entire `T` is stored and returned independently on all processes 210 211 Input Parameter: 212 . A - the `MATKAIJ` matrix 213 214 Output Parameters: 215 + m - the number of rows in `T` 216 . n - the number of columns in `T` 217 - T - the T matrix, in form of a scalar array in column-major format 218 219 Level: advanced 220 221 Note: 222 All output parameters are optional (pass `NULL` if not desired) 223 224 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 225 @*/ 226 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) 227 { 228 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 229 PetscFunctionBegin; 230 if (m) *m = b->p; 231 if (n) *n = b->q; 232 if (T) *T = b->T; 233 PetscFunctionReturn(PETSC_SUCCESS); 234 } 235 236 /*@C 237 MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()` 238 239 Not Collective 240 241 Input Parameters: 242 + A - the `MATKAIJ` matrix 243 - T - location of pointer to array obtained with `MatKAIJGetS()` 244 245 Level: advanced 246 247 Note: 248 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 249 If `NULL` is passed, it will not attempt to zero the array pointer. 250 251 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 252 @*/ 253 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) 254 { 255 PetscFunctionBegin; 256 if (T) *T = NULL; 257 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 258 PetscFunctionReturn(PETSC_SUCCESS); 259 } 260 261 /*@C 262 MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()` 263 264 Not Collective 265 266 Input Parameters: 267 + A - the `MATKAIJ` matrix 268 - T - location of pointer to array obtained with `MatKAIJGetS()` 269 270 Level: advanced 271 272 Note: 273 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 274 If `NULL` is passed, it will not attempt to zero the array pointer. 275 276 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 277 @*/ 278 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) 279 { 280 PetscFunctionBegin; 281 if (T) *T = NULL; 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 /*@ 286 MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 287 288 Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel 289 290 Input Parameters: 291 + A - the `MATKAIJ` matrix 292 - B - the `MATAIJ` matrix 293 294 Level: advanced 295 296 Notes: 297 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 298 299 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 300 301 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 302 @*/ 303 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) 304 { 305 PetscMPIInt size; 306 PetscBool flg; 307 308 PetscFunctionBegin; 309 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 310 if (size == 1) { 311 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 312 PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 313 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 314 a->AIJ = B; 315 } else { 316 Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 317 a->A = B; 318 } 319 PetscCall(PetscObjectReference((PetscObject)B)); 320 PetscFunctionReturn(PETSC_SUCCESS); 321 } 322 323 /*@C 324 MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix 325 326 Logically Collective; the entire `S` is stored independently on all processes. 327 328 Input Parameters: 329 + A - the `MATKAIJ` matrix 330 . p - the number of rows in `S` 331 . q - the number of columns in `S` 332 - S - the S matrix, in form of a scalar array in column-major format 333 334 Level: advanced 335 336 Notes: 337 The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix. 338 339 The `S` matrix is copied, so the user can destroy this array. 340 341 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 342 @*/ 343 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) 344 { 345 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 346 347 PetscFunctionBegin; 348 PetscCall(PetscFree(a->S)); 349 if (S) { 350 PetscCall(PetscMalloc1(p * q, &a->S)); 351 PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar))); 352 } else a->S = NULL; 353 354 a->p = p; 355 a->q = q; 356 PetscFunctionReturn(PETSC_SUCCESS); 357 } 358 359 /*@C 360 MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities. 361 362 Logically Collective. 363 364 Input Parameter: 365 . A - the `MATKAIJ` matrix 366 367 Output Parameter: 368 . identity - the Boolean value 369 370 Level: Advanced 371 372 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()` 373 @*/ 374 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) 375 { 376 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 377 PetscInt i, j; 378 379 PetscFunctionBegin; 380 if (a->p != a->q) { 381 *identity = PETSC_FALSE; 382 PetscFunctionReturn(PETSC_SUCCESS); 383 } else *identity = PETSC_TRUE; 384 if (!a->isTI || a->S) { 385 for (i = 0; i < a->p && *identity; i++) { 386 for (j = 0; j < a->p && *identity; j++) { 387 if (i != j) { 388 if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 389 if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 390 } else { 391 if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 392 if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 393 } 394 } 395 } 396 } 397 PetscFunctionReturn(PETSC_SUCCESS); 398 } 399 400 /*@C 401 MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix 402 403 Logically Collective; the entire `T` is stored independently on all processes. 404 405 Input Parameters: 406 + A - the `MATKAIJ` matrix 407 . p - the number of rows in `S` 408 . q - the number of columns in `S` 409 - T - the `T` matrix, in form of a scalar array in column-major format 410 411 Level: Advanced 412 413 Notes: 414 The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix. 415 416 The `T` matrix is copied, so the user can destroy this array. 417 418 .seealso: [](chapter_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 419 @*/ 420 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) 421 { 422 PetscInt i, j; 423 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 424 PetscBool isTI = PETSC_FALSE; 425 426 PetscFunctionBegin; 427 /* check if T is an identity matrix */ 428 if (T && (p == q)) { 429 isTI = PETSC_TRUE; 430 for (i = 0; i < p; i++) { 431 for (j = 0; j < q; j++) { 432 if (i == j) { 433 /* diagonal term must be 1 */ 434 if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 435 } else { 436 /* off-diagonal term must be 0 */ 437 if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 438 } 439 } 440 } 441 } 442 a->isTI = isTI; 443 444 PetscCall(PetscFree(a->T)); 445 if (T && (!isTI)) { 446 PetscCall(PetscMalloc1(p * q, &a->T)); 447 PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar))); 448 } else a->T = NULL; 449 450 a->p = p; 451 a->q = q; 452 PetscFunctionReturn(PETSC_SUCCESS); 453 } 454 455 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 456 { 457 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 458 459 PetscFunctionBegin; 460 PetscCall(MatDestroy(&b->AIJ)); 461 PetscCall(PetscFree(b->S)); 462 PetscCall(PetscFree(b->T)); 463 PetscCall(PetscFree(b->ibdiag)); 464 PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 465 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 466 PetscCall(PetscFree(A->data)); 467 PetscFunctionReturn(PETSC_SUCCESS); 468 } 469 470 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 471 { 472 Mat_MPIKAIJ *a; 473 Mat_MPIAIJ *mpiaij; 474 PetscScalar *T; 475 PetscInt i, j; 476 PetscObjectState state; 477 478 PetscFunctionBegin; 479 a = (Mat_MPIKAIJ *)A->data; 480 mpiaij = (Mat_MPIAIJ *)a->A->data; 481 482 PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 483 if (state == a->state) { 484 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } else { 487 PetscCall(MatDestroy(&a->AIJ)); 488 PetscCall(MatDestroy(&a->OAIJ)); 489 if (a->isTI) { 490 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 491 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 492 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 493 * to pass in. */ 494 PetscCall(PetscMalloc1(a->p * a->q, &T)); 495 for (i = 0; i < a->p; i++) { 496 for (j = 0; j < a->q; j++) { 497 if (i == j) T[i + j * a->p] = 1.0; 498 else T[i + j * a->p] = 0.0; 499 } 500 } 501 } else T = a->T; 502 PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 503 PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 504 if (a->isTI) PetscCall(PetscFree(T)); 505 a->state = state; 506 } 507 508 PetscFunctionReturn(PETSC_SUCCESS); 509 } 510 511 static PetscErrorCode MatSetUp_KAIJ(Mat A) 512 { 513 PetscInt n; 514 PetscMPIInt size; 515 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 516 517 PetscFunctionBegin; 518 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 519 if (size == 1) { 520 PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 521 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 522 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 523 PetscCall(PetscLayoutSetUp(A->rmap)); 524 PetscCall(PetscLayoutSetUp(A->cmap)); 525 } else { 526 Mat_MPIKAIJ *a; 527 Mat_MPIAIJ *mpiaij; 528 IS from, to; 529 Vec gvec; 530 531 a = (Mat_MPIKAIJ *)A->data; 532 mpiaij = (Mat_MPIAIJ *)a->A->data; 533 PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 534 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 535 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 536 PetscCall(PetscLayoutSetUp(A->rmap)); 537 PetscCall(PetscLayoutSetUp(A->cmap)); 538 539 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 540 541 PetscCall(VecGetSize(mpiaij->lvec, &n)); 542 PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 543 PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 544 PetscCall(VecSetBlockSize(a->w, a->q)); 545 PetscCall(VecSetType(a->w, VECSEQ)); 546 547 /* create two temporary Index sets for build scatter gather */ 548 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 549 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 550 551 /* create temporary global vector to generate scatter context */ 552 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 553 554 /* generate the scatter context */ 555 PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 556 557 PetscCall(ISDestroy(&from)); 558 PetscCall(ISDestroy(&to)); 559 PetscCall(VecDestroy(&gvec)); 560 } 561 562 A->assembled = PETSC_TRUE; 563 PetscFunctionReturn(PETSC_SUCCESS); 564 } 565 566 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) 567 { 568 PetscViewerFormat format; 569 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 570 Mat B; 571 PetscInt i; 572 PetscBool ismpikaij; 573 574 PetscFunctionBegin; 575 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 576 PetscCall(PetscViewerGetFormat(viewer, &format)); 577 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 578 PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 579 580 /* Print appropriate details for S. */ 581 if (!a->S) { 582 PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 583 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 584 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 585 for (i = 0; i < (a->p * a->q); i++) { 586 #if defined(PETSC_USE_COMPLEX) 587 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 588 #else 589 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 590 #endif 591 } 592 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 593 } 594 595 /* Print appropriate details for T. */ 596 if (a->isTI) { 597 PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 598 } else if (!a->T) { 599 PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 600 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 601 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 602 for (i = 0; i < (a->p * a->q); i++) { 603 #if defined(PETSC_USE_COMPLEX) 604 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 605 #else 606 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 607 #endif 608 } 609 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 610 } 611 612 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 613 PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 614 if (ismpikaij) { 615 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 616 PetscCall(MatView(b->A, viewer)); 617 } else { 618 PetscCall(MatView(a->AIJ, viewer)); 619 } 620 621 } else { 622 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 623 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 624 PetscCall(MatView(B, viewer)); 625 PetscCall(MatDestroy(&B)); 626 } 627 PetscFunctionReturn(PETSC_SUCCESS); 628 } 629 630 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 631 { 632 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 633 634 PetscFunctionBegin; 635 PetscCall(MatDestroy(&b->AIJ)); 636 PetscCall(MatDestroy(&b->OAIJ)); 637 PetscCall(MatDestroy(&b->A)); 638 PetscCall(VecScatterDestroy(&b->ctx)); 639 PetscCall(VecDestroy(&b->w)); 640 PetscCall(PetscFree(b->S)); 641 PetscCall(PetscFree(b->T)); 642 PetscCall(PetscFree(b->ibdiag)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 644 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 645 PetscCall(PetscFree(A->data)); 646 PetscFunctionReturn(PETSC_SUCCESS); 647 } 648 649 /* zz = yy + Axx */ 650 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 651 { 652 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 653 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 654 const PetscScalar *s = b->S, *t = b->T; 655 const PetscScalar *x, *v, *bx; 656 PetscScalar *y, *sums; 657 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 658 PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 659 660 PetscFunctionBegin; 661 if (!yy) { 662 PetscCall(VecSet(zz, 0.0)); 663 } else { 664 PetscCall(VecCopy(yy, zz)); 665 } 666 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS); 667 668 PetscCall(VecGetArrayRead(xx, &x)); 669 PetscCall(VecGetArray(zz, &y)); 670 idx = a->j; 671 v = a->a; 672 ii = a->i; 673 674 if (b->isTI) { 675 for (i = 0; i < m; i++) { 676 jrow = ii[i]; 677 n = ii[i + 1] - jrow; 678 sums = y + p * i; 679 for (j = 0; j < n; j++) { 680 for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; 681 } 682 } 683 PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 684 } else if (t) { 685 for (i = 0; i < m; i++) { 686 jrow = ii[i]; 687 n = ii[i + 1] - jrow; 688 sums = y + p * i; 689 for (j = 0; j < n; j++) { 690 for (k = 0; k < p; k++) { 691 for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; 692 } 693 } 694 } 695 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 696 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 697 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 698 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 699 PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 700 } 701 if (s) { 702 for (i = 0; i < m; i++) { 703 sums = y + p * i; 704 bx = x + q * i; 705 if (i < b->AIJ->cmap->n) { 706 for (j = 0; j < q; j++) { 707 for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j]; 708 } 709 } 710 } 711 PetscCall(PetscLogFlops(2.0 * m * p * q)); 712 } 713 714 PetscCall(VecRestoreArrayRead(xx, &x)); 715 PetscCall(VecRestoreArray(zz, &y)); 716 PetscFunctionReturn(PETSC_SUCCESS); 717 } 718 719 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) 720 { 721 PetscFunctionBegin; 722 PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy)); 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 #include <petsc/private/kernels/blockinvert.h> 727 728 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) 729 { 730 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 731 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 732 const PetscScalar *S = b->S; 733 const PetscScalar *T = b->T; 734 const PetscScalar *v = a->a; 735 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 736 PetscInt i, j, *v_pivots, dof, dof2; 737 PetscScalar *diag, aval, *v_work; 738 739 PetscFunctionBegin; 740 PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 741 PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 742 743 dof = p; 744 dof2 = dof * dof; 745 746 if (b->ibdiagvalid) { 747 if (values) *values = b->ibdiag; 748 PetscFunctionReturn(PETSC_SUCCESS); 749 } 750 if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); 751 if (values) *values = b->ibdiag; 752 diag = b->ibdiag; 753 754 PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 755 for (i = 0; i < m; i++) { 756 if (S) { 757 PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar))); 758 } else { 759 PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar))); 760 } 761 if (b->isTI) { 762 aval = 0; 763 for (j = ii[i]; j < ii[i + 1]; j++) 764 if (idx[j] == i) aval = v[j]; 765 for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 766 } else if (T) { 767 aval = 0; 768 for (j = ii[i]; j < ii[i + 1]; j++) 769 if (idx[j] == i) aval = v[j]; 770 for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 771 } 772 PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 773 diag += dof2; 774 } 775 PetscCall(PetscFree2(v_work, v_pivots)); 776 777 b->ibdiagvalid = PETSC_TRUE; 778 PetscFunctionReturn(PETSC_SUCCESS); 779 } 780 781 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) 782 { 783 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 784 785 PetscFunctionBegin; 786 *B = kaij->AIJ; 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 791 { 792 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 793 Mat AIJ, OAIJ, B; 794 PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 795 const PetscInt p = a->p, q = a->q; 796 PetscBool ismpikaij, missing; 797 798 PetscFunctionBegin; 799 if (reuse != MAT_REUSE_MATRIX) { 800 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 801 if (ismpikaij) { 802 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 803 AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 804 OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 805 } else { 806 AIJ = a->AIJ; 807 OAIJ = NULL; 808 } 809 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 810 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 811 PetscCall(MatSetType(B, MATAIJ)); 812 PetscCall(MatGetSize(AIJ, &m, NULL)); 813 PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */ 814 if (!missing || !a->S) d = m; 815 PetscCall(PetscMalloc1(m * p, &d_nnz)); 816 for (i = 0; i < m; ++i) { 817 PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 818 for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 819 PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 820 } 821 if (OAIJ) { 822 PetscCall(PetscMalloc1(m * p, &o_nnz)); 823 for (i = 0; i < m; ++i) { 824 PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 825 for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 826 PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 827 } 828 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 829 } else { 830 PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 831 } 832 PetscCall(PetscFree(d_nnz)); 833 PetscCall(PetscFree(o_nnz)); 834 } else B = *newmat; 835 PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 836 if (reuse == MAT_INPLACE_MATRIX) { 837 PetscCall(MatHeaderReplace(A, &B)); 838 } else *newmat = B; 839 PetscFunctionReturn(PETSC_SUCCESS); 840 } 841 842 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 843 { 844 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 845 Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 846 const PetscScalar *aa = a->a, *T = kaij->T, *v; 847 const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 848 const PetscScalar *b, *xb, *idiag; 849 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 850 PetscInt i, j, k, i2, bs, bs2, nz; 851 852 PetscFunctionBegin; 853 its = its * lits; 854 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 855 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 856 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 857 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 858 PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 859 bs = p; 860 bs2 = bs * bs; 861 862 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 863 864 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 865 idiag = kaij->ibdiag; 866 diag = a->diag; 867 868 if (!kaij->sor.setup) { 869 PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 870 kaij->sor.setup = PETSC_TRUE; 871 } 872 y = kaij->sor.y; 873 w = kaij->sor.w; 874 work = kaij->sor.work; 875 t = kaij->sor.t; 876 arr = kaij->sor.arr; 877 878 PetscCall(VecGetArray(xx, &x)); 879 PetscCall(VecGetArrayRead(bb, &b)); 880 881 if (flag & SOR_ZERO_INITIAL_GUESS) { 882 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 883 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 884 PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar))); 885 i2 = bs; 886 idiag += bs2; 887 for (i = 1; i < m; i++) { 888 v = aa + ai[i]; 889 vi = aj + ai[i]; 890 nz = diag[i] - ai[i]; 891 892 if (T) { /* b - T (Arow * x) */ 893 PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar))); 894 for (j = 0; j < nz; j++) { 895 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 896 } 897 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 898 for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 899 } else if (kaij->isTI) { 900 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 901 for (j = 0; j < nz; j++) { 902 for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 903 } 904 } else { 905 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 906 } 907 908 PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 909 for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 910 911 idiag += bs2; 912 i2 += bs; 913 } 914 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 915 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 916 xb = t; 917 } else xb = b; 918 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 919 idiag = kaij->ibdiag + bs2 * (m - 1); 920 i2 = bs * (m - 1); 921 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 922 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 923 i2 -= bs; 924 idiag -= bs2; 925 for (i = m - 2; i >= 0; i--) { 926 v = aa + diag[i] + 1; 927 vi = aj + diag[i] + 1; 928 nz = ai[i + 1] - diag[i] - 1; 929 930 if (T) { /* FIXME: This branch untested */ 931 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 932 /* copy all rows of x that are needed into contiguous space */ 933 workt = work; 934 for (j = 0; j < nz; j++) { 935 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 936 workt += bs; 937 } 938 arrt = arr; 939 for (j = 0; j < nz; j++) { 940 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 941 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 942 arrt += bs2; 943 } 944 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 945 } else if (kaij->isTI) { 946 PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar))); 947 for (j = 0; j < nz; j++) { 948 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 949 } 950 } 951 952 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 953 for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 954 955 idiag -= bs2; 956 i2 -= bs; 957 } 958 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 959 } 960 its--; 961 } 962 while (its--) { /* FIXME: This branch not updated */ 963 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 964 i2 = 0; 965 idiag = kaij->ibdiag; 966 for (i = 0; i < m; i++) { 967 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 968 969 v = aa + ai[i]; 970 vi = aj + ai[i]; 971 nz = diag[i] - ai[i]; 972 workt = work; 973 for (j = 0; j < nz; j++) { 974 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 975 workt += bs; 976 } 977 arrt = arr; 978 if (T) { 979 for (j = 0; j < nz; j++) { 980 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 981 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 982 arrt += bs2; 983 } 984 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 985 } else if (kaij->isTI) { 986 for (j = 0; j < nz; j++) { 987 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 988 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 989 arrt += bs2; 990 } 991 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 992 } 993 PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar))); 994 995 v = aa + diag[i] + 1; 996 vi = aj + diag[i] + 1; 997 nz = ai[i + 1] - diag[i] - 1; 998 workt = work; 999 for (j = 0; j < nz; j++) { 1000 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1001 workt += bs; 1002 } 1003 arrt = arr; 1004 if (T) { 1005 for (j = 0; j < nz; j++) { 1006 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1007 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1008 arrt += bs2; 1009 } 1010 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1011 } else if (kaij->isTI) { 1012 for (j = 0; j < nz; j++) { 1013 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1014 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1015 arrt += bs2; 1016 } 1017 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1018 } 1019 1020 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1021 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1022 1023 idiag += bs2; 1024 i2 += bs; 1025 } 1026 xb = t; 1027 } else xb = b; 1028 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1029 idiag = kaij->ibdiag + bs2 * (m - 1); 1030 i2 = bs * (m - 1); 1031 if (xb == b) { 1032 for (i = m - 1; i >= 0; i--) { 1033 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 1034 1035 v = aa + ai[i]; 1036 vi = aj + ai[i]; 1037 nz = diag[i] - ai[i]; 1038 workt = work; 1039 for (j = 0; j < nz; j++) { 1040 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1041 workt += bs; 1042 } 1043 arrt = arr; 1044 if (T) { 1045 for (j = 0; j < nz; j++) { 1046 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1047 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1048 arrt += bs2; 1049 } 1050 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1051 } else if (kaij->isTI) { 1052 for (j = 0; j < nz; j++) { 1053 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1054 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1055 arrt += bs2; 1056 } 1057 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1058 } 1059 1060 v = aa + diag[i] + 1; 1061 vi = aj + diag[i] + 1; 1062 nz = ai[i + 1] - diag[i] - 1; 1063 workt = work; 1064 for (j = 0; j < nz; j++) { 1065 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1066 workt += bs; 1067 } 1068 arrt = arr; 1069 if (T) { 1070 for (j = 0; j < nz; j++) { 1071 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1072 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1073 arrt += bs2; 1074 } 1075 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1076 } else if (kaij->isTI) { 1077 for (j = 0; j < nz; j++) { 1078 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1079 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1080 arrt += bs2; 1081 } 1082 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1083 } 1084 1085 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1086 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1087 } 1088 } else { 1089 for (i = m - 1; i >= 0; i--) { 1090 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 1091 v = aa + diag[i] + 1; 1092 vi = aj + diag[i] + 1; 1093 nz = ai[i + 1] - diag[i] - 1; 1094 workt = work; 1095 for (j = 0; j < nz; j++) { 1096 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1097 workt += bs; 1098 } 1099 arrt = arr; 1100 if (T) { 1101 for (j = 0; j < nz; j++) { 1102 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1103 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1104 arrt += bs2; 1105 } 1106 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1107 } else if (kaij->isTI) { 1108 for (j = 0; j < nz; j++) { 1109 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1110 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1111 arrt += bs2; 1112 } 1113 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1114 } 1115 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1116 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1117 } 1118 } 1119 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 1120 } 1121 } 1122 1123 PetscCall(VecRestoreArray(xx, &x)); 1124 PetscCall(VecRestoreArrayRead(bb, &b)); 1125 PetscFunctionReturn(PETSC_SUCCESS); 1126 } 1127 1128 /*===================================================================================*/ 1129 1130 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1131 { 1132 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1133 1134 PetscFunctionBegin; 1135 if (!yy) { 1136 PetscCall(VecSet(zz, 0.0)); 1137 } else { 1138 PetscCall(VecCopy(yy, zz)); 1139 } 1140 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1141 /* start the scatter */ 1142 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1143 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 1144 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1145 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 1146 PetscFunctionReturn(PETSC_SUCCESS); 1147 } 1148 1149 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) 1150 { 1151 PetscFunctionBegin; 1152 PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy)); 1153 PetscFunctionReturn(PETSC_SUCCESS); 1154 } 1155 1156 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) 1157 { 1158 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1159 1160 PetscFunctionBegin; 1161 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1162 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 1163 PetscFunctionReturn(PETSC_SUCCESS); 1164 } 1165 1166 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1167 { 1168 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 1169 PetscBool diag = PETSC_FALSE; 1170 PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 1171 PetscScalar *vaij, *v, *S = b->S, *T = b->T; 1172 1173 PetscFunctionBegin; 1174 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1175 b->getrowactive = PETSC_TRUE; 1176 PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1177 1178 if ((!S) && (!T) && (!b->isTI)) { 1179 if (ncols) *ncols = 0; 1180 if (cols) *cols = NULL; 1181 if (values) *values = NULL; 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 if (T || b->isTI) { 1186 PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 1187 c = nzaij; 1188 for (i = 0; i < nzaij; i++) { 1189 /* check if this row contains a diagonal entry */ 1190 if (colsaij[i] == r) { 1191 diag = PETSC_TRUE; 1192 c = i; 1193 } 1194 } 1195 } else nzaij = c = 0; 1196 1197 /* calculate size of row */ 1198 nz = 0; 1199 if (S) nz += q; 1200 if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 1201 1202 if (cols || values) { 1203 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1204 for (i = 0; i < q; i++) { 1205 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1206 v[i] = 0.0; 1207 } 1208 if (b->isTI) { 1209 for (i = 0; i < nzaij; i++) { 1210 for (j = 0; j < q; j++) { 1211 idx[i * q + j] = colsaij[i] * q + j; 1212 v[i * q + j] = (j == s ? vaij[i] : 0); 1213 } 1214 } 1215 } else if (T) { 1216 for (i = 0; i < nzaij; i++) { 1217 for (j = 0; j < q; j++) { 1218 idx[i * q + j] = colsaij[i] * q + j; 1219 v[i * q + j] = vaij[i] * T[s + j * p]; 1220 } 1221 } 1222 } 1223 if (S) { 1224 for (j = 0; j < q; j++) { 1225 idx[c * q + j] = r * q + j; 1226 v[c * q + j] += S[s + j * p]; 1227 } 1228 } 1229 } 1230 1231 if (ncols) *ncols = nz; 1232 if (cols) *cols = idx; 1233 if (values) *values = v; 1234 PetscFunctionReturn(PETSC_SUCCESS); 1235 } 1236 1237 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1238 { 1239 PetscFunctionBegin; 1240 if (nz) *nz = 0; 1241 PetscCall(PetscFree2(*idx, *v)); 1242 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1243 PetscFunctionReturn(PETSC_SUCCESS); 1244 } 1245 1246 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1247 { 1248 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1249 Mat AIJ = b->A; 1250 PetscBool diag = PETSC_FALSE; 1251 Mat MatAIJ, MatOAIJ; 1252 const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1253 PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 1254 PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 1255 1256 PetscFunctionBegin; 1257 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1258 MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1259 MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 1260 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1261 b->getrowactive = PETSC_TRUE; 1262 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1263 lrow = row - rstart; 1264 1265 if ((!S) && (!T) && (!b->isTI)) { 1266 if (ncols) *ncols = 0; 1267 if (cols) *cols = NULL; 1268 if (values) *values = NULL; 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 1272 r = lrow / p; 1273 s = lrow % p; 1274 1275 if (T || b->isTI) { 1276 PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 1277 PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 1278 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 1279 1280 c = ncolsaij + ncolsoaij; 1281 for (i = 0; i < ncolsaij; i++) { 1282 /* check if this row contains a diagonal entry */ 1283 if (colsaij[i] == r) { 1284 diag = PETSC_TRUE; 1285 c = i; 1286 } 1287 } 1288 } else c = 0; 1289 1290 /* calculate size of row */ 1291 nz = 0; 1292 if (S) nz += q; 1293 if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 1294 1295 if (cols || values) { 1296 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1297 for (i = 0; i < q; i++) { 1298 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1299 v[i] = 0.0; 1300 } 1301 if (b->isTI) { 1302 for (i = 0; i < ncolsaij; i++) { 1303 for (j = 0; j < q; j++) { 1304 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1305 v[i * q + j] = (j == s ? vals[i] : 0.0); 1306 } 1307 } 1308 for (i = 0; i < ncolsoaij; i++) { 1309 for (j = 0; j < q; j++) { 1310 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1311 v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 1312 } 1313 } 1314 } else if (T) { 1315 for (i = 0; i < ncolsaij; i++) { 1316 for (j = 0; j < q; j++) { 1317 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1318 v[i * q + j] = vals[i] * T[s + j * p]; 1319 } 1320 } 1321 for (i = 0; i < ncolsoaij; i++) { 1322 for (j = 0; j < q; j++) { 1323 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1324 v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 1325 } 1326 } 1327 } 1328 if (S) { 1329 for (j = 0; j < q; j++) { 1330 idx[c * q + j] = (r + rstart / p) * q + j; 1331 v[c * q + j] += S[s + j * p]; 1332 } 1333 } 1334 } 1335 1336 if (ncols) *ncols = nz; 1337 if (cols) *cols = idx; 1338 if (values) *values = v; 1339 PetscFunctionReturn(PETSC_SUCCESS); 1340 } 1341 1342 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1343 { 1344 PetscFunctionBegin; 1345 PetscCall(PetscFree2(*idx, *v)); 1346 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1347 PetscFunctionReturn(PETSC_SUCCESS); 1348 } 1349 1350 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1351 { 1352 Mat A; 1353 1354 PetscFunctionBegin; 1355 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1356 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1357 PetscCall(MatDestroy(&A)); 1358 PetscFunctionReturn(PETSC_SUCCESS); 1359 } 1360 1361 /*@C 1362 MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form 1363 .vb 1364 [I \otimes S + A \otimes T] 1365 .ve 1366 where 1367 .vb 1368 S is a dense (p \times q) matrix 1369 T is a dense (p \times q) matrix 1370 A is a `MATAIJ` (n \times n) matrix 1371 I is the identity matrix 1372 .ve 1373 The resulting matrix is (np \times nq) 1374 1375 `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1376 1377 Collective 1378 1379 Input Parameters: 1380 + A - the `MATAIJ` matrix 1381 . p - number of rows in `S` and `T` 1382 . q - number of columns in `S` and `T` 1383 . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major) 1384 - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major) 1385 1386 Output Parameter: 1387 . kaij - the new `MATKAIJ` matrix 1388 1389 Level: advanced 1390 1391 Notes: 1392 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 1393 1394 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 1395 1396 Developer Note: 1397 In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1398 of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily 1399 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added, 1400 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1401 1402 .seealso: [](chapter_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1403 @*/ 1404 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) 1405 { 1406 PetscFunctionBegin; 1407 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 1408 PetscCall(MatSetType(*kaij, MATKAIJ)); 1409 PetscCall(MatKAIJSetAIJ(*kaij, A)); 1410 PetscCall(MatKAIJSetS(*kaij, p, q, S)); 1411 PetscCall(MatKAIJSetT(*kaij, p, q, T)); 1412 PetscCall(MatSetUp(*kaij)); 1413 PetscFunctionReturn(PETSC_SUCCESS); 1414 } 1415 1416 /*MC 1417 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1418 [I \otimes S + A \otimes T], 1419 where 1420 .vb 1421 S is a dense (p \times q) matrix, 1422 T is a dense (p \times q) matrix, 1423 A is an AIJ (n \times n) matrix, 1424 and I is the identity matrix. 1425 .ve 1426 The resulting matrix is (np \times nq). 1427 1428 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1429 1430 Level: advanced 1431 1432 Note: 1433 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1434 where x and b are column vectors containing the row-major representations of X and B. 1435 1436 .seealso: [](chapter_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1437 M*/ 1438 1439 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1440 { 1441 Mat_MPIKAIJ *b; 1442 PetscMPIInt size; 1443 1444 PetscFunctionBegin; 1445 PetscCall(PetscNew(&b)); 1446 A->data = (void *)b; 1447 1448 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 1449 1450 b->w = NULL; 1451 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1452 if (size == 1) { 1453 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 1454 A->ops->destroy = MatDestroy_SeqKAIJ; 1455 A->ops->mult = MatMult_SeqKAIJ; 1456 A->ops->multadd = MatMultAdd_SeqKAIJ; 1457 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1458 A->ops->getrow = MatGetRow_SeqKAIJ; 1459 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1460 A->ops->sor = MatSOR_SeqKAIJ; 1461 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 1462 } else { 1463 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 1464 A->ops->destroy = MatDestroy_MPIKAIJ; 1465 A->ops->mult = MatMult_MPIKAIJ; 1466 A->ops->multadd = MatMultAdd_MPIKAIJ; 1467 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1468 A->ops->getrow = MatGetRow_MPIKAIJ; 1469 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1470 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 1471 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 1472 } 1473 A->ops->setup = MatSetUp_KAIJ; 1474 A->ops->view = MatView_KAIJ; 1475 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1476 PetscFunctionReturn(PETSC_SUCCESS); 1477 } 1478