1 /* 2 Defines the basic matrix operations for the KAIJ matrix storage format. 3 This format is used to evaluate matrices of the form: 4 5 [I \otimes S + A \otimes T] 6 7 where 8 S is a dense (p \times q) matrix 9 T is a dense (p \times q) matrix 10 A is an AIJ (n \times n) matrix 11 I is the identity matrix 12 13 The resulting matrix is (np \times nq) 14 15 We provide: 16 MatMult() 17 MatMultAdd() 18 MatInvertBlockDiagonal() 19 and 20 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 21 22 This single directory handles both the sequential and parallel codes 23 */ 24 25 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 26 #include <../src/mat/utils/freespace.h> 27 #include <petsc/private/vecimpl.h> 28 29 /*@ 30 MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 31 32 Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 33 34 Input Parameter: 35 . A - the `MATKAIJ` matrix 36 37 Output Parameter: 38 . B - the `MATAIJ` matrix 39 40 Level: advanced 41 42 Note: 43 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 44 45 .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ` 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) 48 { 49 PetscBool ismpikaij, isseqkaij; 50 51 PetscFunctionBegin; 52 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 53 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 54 if (ismpikaij) { 55 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 56 57 *B = b->A; 58 } else if (isseqkaij) { 59 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 60 61 *B = b->AIJ; 62 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 /*@C 67 MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix 68 69 Not Collective; the entire `S` is stored and returned independently on all processes. 70 71 Input Parameter: 72 . A - the `MATKAIJ` matrix 73 74 Output Parameters: 75 + m - the number of rows in `S` 76 . n - the number of columns in `S` 77 - S - the S matrix, in form of a scalar array in column-major format 78 79 Level: advanced 80 81 Note: 82 All output parameters are optional (pass `NULL` if not desired) 83 84 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar *S[]) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 89 90 PetscFunctionBegin; 91 if (m) *m = b->p; 92 if (n) *n = b->q; 93 if (S) *S = b->S; 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 /*@C 98 MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix 99 100 Not Collective; the entire `S` is stored and returned independently on all processes. 101 102 Input Parameter: 103 . A - the `MATKAIJ` matrix 104 105 Output Parameters: 106 + m - the number of rows in `S` 107 . n - the number of columns in `S` 108 - S - the S matrix, in form of a scalar array in column-major format 109 110 Level: advanced 111 112 Note: 113 All output parameters are optional (pass `NULL` if not desired) 114 115 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 116 @*/ 117 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *S[]) 118 { 119 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 120 121 PetscFunctionBegin; 122 if (m) *m = b->p; 123 if (n) *n = b->q; 124 if (S) *S = b->S; 125 PetscFunctionReturn(PETSC_SUCCESS); 126 } 127 128 /*@C 129 MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()` 130 131 Not Collective 132 133 Input Parameters: 134 + A - the `MATKAIJ` matrix 135 - S - location of pointer to array obtained with `MatKAIJGetS()` 136 137 Level: advanced 138 139 Note: 140 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 141 If `NULL` is passed, it will not attempt to zero the array pointer. 142 143 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 144 @*/ 145 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar *S[]) 146 { 147 PetscFunctionBegin; 148 if (S) *S = NULL; 149 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 150 PetscFunctionReturn(PETSC_SUCCESS); 151 } 152 153 /*@C 154 MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()` 155 156 Not Collective 157 158 Input Parameters: 159 + A - the `MATKAIJ` matrix 160 - S - location of pointer to array obtained with `MatKAIJGetS()` 161 162 Level: advanced 163 164 Note: 165 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 166 If `NULL` is passed, it will not attempt to zero the array pointer. 167 168 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()` 169 @*/ 170 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar *S[]) 171 { 172 PetscFunctionBegin; 173 if (S) *S = NULL; 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 /*@C 178 MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix 179 180 Not Collective; the entire `T` is stored and returned independently on all processes 181 182 Input Parameter: 183 . A - the `MATKAIJ` matrix 184 185 Output Parameters: 186 + m - the number of rows in `T` 187 . n - the number of columns in `T` 188 - T - the T matrix, in form of a scalar array in column-major format 189 190 Level: advanced 191 192 Note: 193 All output parameters are optional (pass `NULL` if not desired) 194 195 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 196 @*/ 197 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar *T[]) 198 { 199 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 200 201 PetscFunctionBegin; 202 if (m) *m = b->p; 203 if (n) *n = b->q; 204 if (T) *T = b->T; 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 /*@C 209 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix 210 211 Not Collective; the entire `T` is stored and returned independently on all processes 212 213 Input Parameter: 214 . A - the `MATKAIJ` matrix 215 216 Output Parameters: 217 + m - the number of rows in `T` 218 . n - the number of columns in `T` 219 - T - the T matrix, in form of a scalar array in column-major format 220 221 Level: advanced 222 223 Note: 224 All output parameters are optional (pass `NULL` if not desired) 225 226 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 227 @*/ 228 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar *T[]) 229 { 230 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 231 232 PetscFunctionBegin; 233 if (m) *m = b->p; 234 if (n) *n = b->q; 235 if (T) *T = b->T; 236 PetscFunctionReturn(PETSC_SUCCESS); 237 } 238 239 /*@C 240 MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()` 241 242 Not Collective 243 244 Input Parameters: 245 + A - the `MATKAIJ` matrix 246 - T - location of pointer to array obtained with `MatKAIJGetS()` 247 248 Level: advanced 249 250 Note: 251 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 252 If `NULL` is passed, it will not attempt to zero the array pointer. 253 254 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 255 @*/ 256 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar *T[]) 257 { 258 PetscFunctionBegin; 259 if (T) *T = NULL; 260 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 /*@C 265 MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()` 266 267 Not Collective 268 269 Input Parameters: 270 + A - the `MATKAIJ` matrix 271 - T - location of pointer to array obtained with `MatKAIJGetS()` 272 273 Level: advanced 274 275 Note: 276 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 277 If `NULL` is passed, it will not attempt to zero the array pointer. 278 279 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()` 280 @*/ 281 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar *T[]) 282 { 283 PetscFunctionBegin; 284 if (T) *T = NULL; 285 PetscFunctionReturn(PETSC_SUCCESS); 286 } 287 288 /*@ 289 MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 290 291 Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel 292 293 Input Parameters: 294 + A - the `MATKAIJ` matrix 295 - B - the `MATAIJ` matrix 296 297 Level: advanced 298 299 Notes: 300 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 301 302 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 303 304 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 305 @*/ 306 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) 307 { 308 PetscMPIInt size; 309 PetscBool flg; 310 311 PetscFunctionBegin; 312 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 313 if (size == 1) { 314 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 315 PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 316 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 317 a->AIJ = B; 318 } else { 319 Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 320 a->A = B; 321 } 322 PetscCall(PetscObjectReference((PetscObject)B)); 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 /*@ 327 MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix 328 329 Logically Collective; the entire `S` is stored independently on all processes. 330 331 Input Parameters: 332 + A - the `MATKAIJ` matrix 333 . p - the number of rows in `S` 334 . q - the number of columns in `S` 335 - S - the S matrix, in form of a scalar array in column-major format 336 337 Level: advanced 338 339 Notes: 340 The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix. 341 342 The `S` matrix is copied, so the user can destroy this array. 343 344 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 345 @*/ 346 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) 347 { 348 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 349 350 PetscFunctionBegin; 351 PetscCall(PetscFree(a->S)); 352 if (S) { 353 PetscCall(PetscMalloc1(p * q, &a->S)); 354 PetscCall(PetscArraycpy(a->S, S, p * q)); 355 } else a->S = NULL; 356 357 a->p = p; 358 a->q = q; 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /*@ 363 MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities. 364 365 Logically Collective. 366 367 Input Parameter: 368 . A - the `MATKAIJ` matrix 369 370 Output Parameter: 371 . identity - the Boolean value 372 373 Level: advanced 374 375 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()` 376 @*/ 377 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) 378 { 379 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 380 PetscInt i, j; 381 382 PetscFunctionBegin; 383 if (a->p != a->q) { 384 *identity = PETSC_FALSE; 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } else *identity = PETSC_TRUE; 387 if (!a->isTI || a->S) { 388 for (i = 0; i < a->p && *identity; i++) { 389 for (j = 0; j < a->p && *identity; j++) { 390 if (i != j) { 391 if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 392 if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 393 } else { 394 if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 395 if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 396 } 397 } 398 } 399 } 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /*@ 404 MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix 405 406 Logically Collective; the entire `T` is stored independently on all processes. 407 408 Input Parameters: 409 + A - the `MATKAIJ` matrix 410 . p - the number of rows in `S` 411 . q - the number of columns in `S` 412 - T - the `T` matrix, in form of a scalar array in column-major format 413 414 Level: advanced 415 416 Notes: 417 The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix. 418 419 The `T` matrix is copied, so the user can destroy this array. 420 421 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 422 @*/ 423 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) 424 { 425 PetscInt i, j; 426 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 427 PetscBool isTI = PETSC_FALSE; 428 429 PetscFunctionBegin; 430 /* check if T is an identity matrix */ 431 if (T && (p == q)) { 432 isTI = PETSC_TRUE; 433 for (i = 0; i < p; i++) { 434 for (j = 0; j < q; j++) { 435 if (i == j) { 436 /* diagonal term must be 1 */ 437 if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 438 } else { 439 /* off-diagonal term must be 0 */ 440 if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 441 } 442 } 443 } 444 } 445 a->isTI = isTI; 446 447 PetscCall(PetscFree(a->T)); 448 if (T && (!isTI)) { 449 PetscCall(PetscMalloc1(p * q, &a->T)); 450 PetscCall(PetscArraycpy(a->T, T, p * q)); 451 } else a->T = NULL; 452 453 a->p = p; 454 a->q = q; 455 PetscFunctionReturn(PETSC_SUCCESS); 456 } 457 458 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 459 { 460 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 461 462 PetscFunctionBegin; 463 PetscCall(MatDestroy(&b->AIJ)); 464 PetscCall(PetscFree(b->S)); 465 PetscCall(PetscFree(b->T)); 466 PetscCall(PetscFree(b->ibdiag)); 467 PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 468 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 469 PetscCall(PetscFree(A->data)); 470 PetscFunctionReturn(PETSC_SUCCESS); 471 } 472 473 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 474 { 475 Mat_MPIKAIJ *a; 476 Mat_MPIAIJ *mpiaij; 477 PetscScalar *T; 478 PetscInt i, j; 479 PetscObjectState state; 480 481 PetscFunctionBegin; 482 a = (Mat_MPIKAIJ *)A->data; 483 mpiaij = (Mat_MPIAIJ *)a->A->data; 484 485 PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 486 if (state == a->state) { 487 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } else { 490 PetscCall(MatDestroy(&a->AIJ)); 491 PetscCall(MatDestroy(&a->OAIJ)); 492 if (a->isTI) { 493 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 494 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 495 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 496 * to pass in. */ 497 PetscCall(PetscMalloc1(a->p * a->q, &T)); 498 for (i = 0; i < a->p; i++) { 499 for (j = 0; j < a->q; j++) { 500 if (i == j) T[i + j * a->p] = 1.0; 501 else T[i + j * a->p] = 0.0; 502 } 503 } 504 } else T = a->T; 505 PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 506 PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 507 if (a->isTI) PetscCall(PetscFree(T)); 508 a->state = state; 509 } 510 PetscFunctionReturn(PETSC_SUCCESS); 511 } 512 513 static PetscErrorCode MatSetUp_KAIJ(Mat A) 514 { 515 PetscInt n; 516 PetscMPIInt size; 517 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 518 519 PetscFunctionBegin; 520 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 521 if (size == 1) { 522 PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 523 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 524 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 525 PetscCall(PetscLayoutSetUp(A->rmap)); 526 PetscCall(PetscLayoutSetUp(A->cmap)); 527 } else { 528 Mat_MPIKAIJ *a; 529 Mat_MPIAIJ *mpiaij; 530 IS from, to; 531 Vec gvec; 532 533 a = (Mat_MPIKAIJ *)A->data; 534 mpiaij = (Mat_MPIAIJ *)a->A->data; 535 PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 536 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 537 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 538 PetscCall(PetscLayoutSetUp(A->rmap)); 539 PetscCall(PetscLayoutSetUp(A->cmap)); 540 541 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 542 543 PetscCall(VecGetSize(mpiaij->lvec, &n)); 544 PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 545 PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 546 PetscCall(VecSetBlockSize(a->w, a->q)); 547 PetscCall(VecSetType(a->w, VECSEQ)); 548 549 /* create two temporary Index sets for build scatter gather */ 550 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 551 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 552 553 /* create temporary global vector to generate scatter context */ 554 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 555 556 /* generate the scatter context */ 557 PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 558 559 PetscCall(ISDestroy(&from)); 560 PetscCall(ISDestroy(&to)); 561 PetscCall(VecDestroy(&gvec)); 562 } 563 564 A->assembled = PETSC_TRUE; 565 PetscFunctionReturn(PETSC_SUCCESS); 566 } 567 568 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) 569 { 570 PetscViewerFormat format; 571 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 572 Mat B; 573 PetscInt i; 574 PetscBool ismpikaij; 575 576 PetscFunctionBegin; 577 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 578 PetscCall(PetscViewerGetFormat(viewer, &format)); 579 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 580 PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 581 582 /* Print appropriate details for S. */ 583 if (!a->S) { 584 PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 585 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 586 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 587 for (i = 0; i < (a->p * a->q); i++) { 588 #if defined(PETSC_USE_COMPLEX) 589 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 590 #else 591 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 592 #endif 593 } 594 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 595 } 596 597 /* Print appropriate details for T. */ 598 if (a->isTI) { 599 PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 600 } else if (!a->T) { 601 PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 602 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 603 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 604 for (i = 0; i < (a->p * a->q); i++) { 605 #if defined(PETSC_USE_COMPLEX) 606 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 607 #else 608 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 609 #endif 610 } 611 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 612 } 613 614 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 615 PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 616 if (ismpikaij) { 617 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 618 PetscCall(MatView(b->A, viewer)); 619 } else { 620 PetscCall(MatView(a->AIJ, viewer)); 621 } 622 623 } else { 624 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 625 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 626 PetscCall(MatView(B, viewer)); 627 PetscCall(MatDestroy(&B)); 628 } 629 PetscFunctionReturn(PETSC_SUCCESS); 630 } 631 632 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 633 { 634 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 635 636 PetscFunctionBegin; 637 PetscCall(MatDestroy(&b->AIJ)); 638 PetscCall(MatDestroy(&b->OAIJ)); 639 PetscCall(MatDestroy(&b->A)); 640 PetscCall(VecScatterDestroy(&b->ctx)); 641 PetscCall(VecDestroy(&b->w)); 642 PetscCall(PetscFree(b->S)); 643 PetscCall(PetscFree(b->T)); 644 PetscCall(PetscFree(b->ibdiag)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 647 PetscCall(PetscFree(A->data)); 648 PetscFunctionReturn(PETSC_SUCCESS); 649 } 650 651 /* zz = yy + Axx */ 652 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 653 { 654 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 655 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 656 const PetscScalar *s = b->S, *t = b->T; 657 const PetscScalar *x, *v, *bx; 658 PetscScalar *y, *sums; 659 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 660 PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 661 662 PetscFunctionBegin; 663 if (!yy) { 664 PetscCall(VecSet(zz, 0.0)); 665 } else { 666 PetscCall(VecCopy(yy, zz)); 667 } 668 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS); 669 670 PetscCall(VecGetArrayRead(xx, &x)); 671 PetscCall(VecGetArray(zz, &y)); 672 idx = a->j; 673 v = a->a; 674 ii = a->i; 675 676 if (b->isTI) { 677 for (i = 0; i < m; i++) { 678 jrow = ii[i]; 679 n = ii[i + 1] - jrow; 680 sums = y + p * i; 681 for (j = 0; j < n; j++) { 682 for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; 683 } 684 } 685 PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 686 } else if (t) { 687 for (i = 0; i < m; i++) { 688 jrow = ii[i]; 689 n = ii[i + 1] - jrow; 690 sums = y + p * i; 691 for (j = 0; j < n; j++) { 692 for (k = 0; k < p; k++) { 693 for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; 694 } 695 } 696 } 697 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 698 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 699 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 700 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 701 PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 702 } 703 if (s) { 704 for (i = 0; i < m; i++) { 705 sums = y + p * i; 706 bx = x + q * i; 707 if (i < b->AIJ->cmap->n) { 708 for (j = 0; j < q; j++) { 709 for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j]; 710 } 711 } 712 } 713 PetscCall(PetscLogFlops(2.0 * m * p * q)); 714 } 715 716 PetscCall(VecRestoreArrayRead(xx, &x)); 717 PetscCall(VecRestoreArray(zz, &y)); 718 PetscFunctionReturn(PETSC_SUCCESS); 719 } 720 721 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) 722 { 723 PetscFunctionBegin; 724 PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy)); 725 PetscFunctionReturn(PETSC_SUCCESS); 726 } 727 728 #include <petsc/private/kernels/blockinvert.h> 729 730 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) 731 { 732 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 733 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 734 const PetscScalar *S = b->S; 735 const PetscScalar *T = b->T; 736 const PetscScalar *v = a->a; 737 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 738 PetscInt i, j, *v_pivots, dof, dof2; 739 PetscScalar *diag, aval, *v_work; 740 741 PetscFunctionBegin; 742 PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 743 PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 744 745 dof = p; 746 dof2 = dof * dof; 747 748 if (b->ibdiagvalid) { 749 if (values) *values = b->ibdiag; 750 PetscFunctionReturn(PETSC_SUCCESS); 751 } 752 if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); 753 if (values) *values = b->ibdiag; 754 diag = b->ibdiag; 755 756 PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 757 for (i = 0; i < m; i++) { 758 if (S) { 759 PetscCall(PetscArraycpy(diag, S, dof2)); 760 } else { 761 PetscCall(PetscArrayzero(diag, dof2)); 762 } 763 if (b->isTI) { 764 aval = 0; 765 for (j = ii[i]; j < ii[i + 1]; j++) 766 if (idx[j] == i) aval = v[j]; 767 for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 768 } else if (T) { 769 aval = 0; 770 for (j = ii[i]; j < ii[i + 1]; j++) 771 if (idx[j] == i) aval = v[j]; 772 for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 773 } 774 PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 775 diag += dof2; 776 } 777 PetscCall(PetscFree2(v_work, v_pivots)); 778 779 b->ibdiagvalid = PETSC_TRUE; 780 PetscFunctionReturn(PETSC_SUCCESS); 781 } 782 783 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) 784 { 785 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 786 787 PetscFunctionBegin; 788 *B = kaij->AIJ; 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 793 { 794 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 795 Mat AIJ, OAIJ, B; 796 PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 797 const PetscInt p = a->p, q = a->q; 798 PetscBool ismpikaij, diagDense; 799 const PetscInt *aijdiag; 800 801 PetscFunctionBegin; 802 if (reuse != MAT_REUSE_MATRIX) { 803 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 804 if (ismpikaij) { 805 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 806 AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 807 OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 808 } else { 809 AIJ = a->AIJ; 810 OAIJ = NULL; 811 } 812 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 813 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 814 PetscCall(MatSetType(B, MATAIJ)); 815 PetscCall(MatGetSize(AIJ, &m, NULL)); 816 817 /* 818 Determine the first row of AIJ with missing diagonal and assume all successive rows also have a missing diagonal 819 */ 820 PetscCall(MatGetDiagonalMarkers_SeqAIJ(AIJ, &aijdiag, &diagDense)); 821 if (diagDense || !a->S) d = m; 822 else { 823 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)AIJ->data; 824 825 for (d = 0; d < m; d++) { 826 if (aijdiag[d] == aij->i[d + 1]) break; 827 } 828 } 829 PetscCall(PetscMalloc1(m * p, &d_nnz)); 830 for (i = 0; i < m; ++i) { 831 PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 832 for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 833 PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 834 } 835 if (OAIJ) { 836 PetscCall(PetscMalloc1(m * p, &o_nnz)); 837 for (i = 0; i < m; ++i) { 838 PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 839 for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 840 PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 841 } 842 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 843 } else { 844 PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 845 } 846 PetscCall(PetscFree(d_nnz)); 847 PetscCall(PetscFree(o_nnz)); 848 } else B = *newmat; 849 PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 850 if (reuse == MAT_INPLACE_MATRIX) PetscCall(MatHeaderReplace(A, &B)); 851 else *newmat = B; 852 PetscFunctionReturn(PETSC_SUCCESS); 853 } 854 855 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 856 { 857 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 858 Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 859 const PetscScalar *aa = a->a, *T = kaij->T, *v; 860 const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 861 const PetscScalar *b, *xb, *idiag; 862 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 863 PetscInt i, j, k, i2, bs, bs2, nz; 864 865 PetscFunctionBegin; 866 its = its * lits; 867 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 868 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 869 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 870 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 871 PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 872 bs = p; 873 bs2 = bs * bs; 874 875 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 876 877 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 878 idiag = kaij->ibdiag; 879 PetscCall(MatGetDiagonalMarkers_SeqAIJ(kaij->AIJ, &diag, NULL)); 880 881 if (!kaij->sor.setup) { 882 PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 883 kaij->sor.setup = PETSC_TRUE; 884 } 885 y = kaij->sor.y; 886 w = kaij->sor.w; 887 work = kaij->sor.work; 888 t = kaij->sor.t; 889 arr = kaij->sor.arr; 890 891 PetscCall(VecGetArray(xx, &x)); 892 PetscCall(VecGetArrayRead(bb, &b)); 893 894 if (flag & SOR_ZERO_INITIAL_GUESS) { 895 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 896 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 897 PetscCall(PetscArraycpy(t, b, bs)); 898 i2 = bs; 899 idiag += bs2; 900 for (i = 1; i < m; i++) { 901 v = aa + ai[i]; 902 vi = aj + ai[i]; 903 nz = diag[i] - ai[i]; 904 905 if (T) { /* b - T (Arow * x) */ 906 PetscCall(PetscArrayzero(w, bs)); 907 for (j = 0; j < nz; j++) { 908 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 909 } 910 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 911 for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 912 } else if (kaij->isTI) { 913 PetscCall(PetscArraycpy(t + i2, b + i2, bs)); 914 for (j = 0; j < nz; j++) { 915 for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 916 } 917 } else { 918 PetscCall(PetscArraycpy(t + i2, b + i2, bs)); 919 } 920 921 PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 922 for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 923 924 idiag += bs2; 925 i2 += bs; 926 } 927 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 928 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 929 xb = t; 930 } else xb = b; 931 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 932 idiag = kaij->ibdiag + bs2 * (m - 1); 933 i2 = bs * (m - 1); 934 PetscCall(PetscArraycpy(w, xb + i2, bs)); 935 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 936 i2 -= bs; 937 idiag -= bs2; 938 for (i = m - 2; i >= 0; i--) { 939 v = aa + diag[i] + 1; 940 vi = aj + diag[i] + 1; 941 nz = ai[i + 1] - diag[i] - 1; 942 943 if (T) { /* FIXME: This branch untested */ 944 PetscCall(PetscArraycpy(w, xb + i2, bs)); 945 /* copy all rows of x that are needed into contiguous space */ 946 workt = work; 947 for (j = 0; j < nz; j++) { 948 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 949 workt += bs; 950 } 951 arrt = arr; 952 for (j = 0; j < nz; j++) { 953 PetscCall(PetscArraycpy(arrt, T, bs2)); 954 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 955 arrt += bs2; 956 } 957 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 958 } else if (kaij->isTI) { 959 PetscCall(PetscArraycpy(w, t + i2, bs)); 960 for (j = 0; j < nz; j++) { 961 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 962 } 963 } 964 965 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 966 for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 967 968 idiag -= bs2; 969 i2 -= bs; 970 } 971 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 972 } 973 its--; 974 } 975 while (its--) { /* FIXME: This branch not updated */ 976 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 977 i2 = 0; 978 idiag = kaij->ibdiag; 979 for (i = 0; i < m; i++) { 980 PetscCall(PetscArraycpy(w, b + i2, bs)); 981 982 v = aa + ai[i]; 983 vi = aj + ai[i]; 984 nz = diag[i] - ai[i]; 985 workt = work; 986 for (j = 0; j < nz; j++) { 987 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 988 workt += bs; 989 } 990 arrt = arr; 991 if (T) { 992 for (j = 0; j < nz; j++) { 993 PetscCall(PetscArraycpy(arrt, T, bs2)); 994 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 995 arrt += bs2; 996 } 997 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 998 } else if (kaij->isTI) { 999 for (j = 0; j < nz; j++) { 1000 PetscCall(PetscArrayzero(arrt, bs2)); 1001 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1002 arrt += bs2; 1003 } 1004 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1005 } 1006 PetscCall(PetscArraycpy(t + i2, w, bs)); 1007 1008 v = aa + diag[i] + 1; 1009 vi = aj + diag[i] + 1; 1010 nz = ai[i + 1] - diag[i] - 1; 1011 workt = work; 1012 for (j = 0; j < nz; j++) { 1013 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1014 workt += bs; 1015 } 1016 arrt = arr; 1017 if (T) { 1018 for (j = 0; j < nz; j++) { 1019 PetscCall(PetscArraycpy(arrt, T, bs2)); 1020 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1021 arrt += bs2; 1022 } 1023 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1024 } else if (kaij->isTI) { 1025 for (j = 0; j < nz; j++) { 1026 PetscCall(PetscArrayzero(arrt, bs2)); 1027 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1028 arrt += bs2; 1029 } 1030 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1031 } 1032 1033 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1034 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1035 1036 idiag += bs2; 1037 i2 += bs; 1038 } 1039 xb = t; 1040 } else xb = b; 1041 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1042 idiag = kaij->ibdiag + bs2 * (m - 1); 1043 i2 = bs * (m - 1); 1044 if (xb == b) { 1045 for (i = m - 1; i >= 0; i--) { 1046 PetscCall(PetscArraycpy(w, b + i2, bs)); 1047 1048 v = aa + ai[i]; 1049 vi = aj + ai[i]; 1050 nz = diag[i] - ai[i]; 1051 workt = work; 1052 for (j = 0; j < nz; j++) { 1053 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1054 workt += bs; 1055 } 1056 arrt = arr; 1057 if (T) { 1058 for (j = 0; j < nz; j++) { 1059 PetscCall(PetscArraycpy(arrt, T, bs2)); 1060 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1061 arrt += bs2; 1062 } 1063 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1064 } else if (kaij->isTI) { 1065 for (j = 0; j < nz; j++) { 1066 PetscCall(PetscArrayzero(arrt, bs2)); 1067 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1068 arrt += bs2; 1069 } 1070 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1071 } 1072 1073 v = aa + diag[i] + 1; 1074 vi = aj + diag[i] + 1; 1075 nz = ai[i + 1] - diag[i] - 1; 1076 workt = work; 1077 for (j = 0; j < nz; j++) { 1078 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1079 workt += bs; 1080 } 1081 arrt = arr; 1082 if (T) { 1083 for (j = 0; j < nz; j++) { 1084 PetscCall(PetscArraycpy(arrt, T, bs2)); 1085 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1086 arrt += bs2; 1087 } 1088 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1089 } else if (kaij->isTI) { 1090 for (j = 0; j < nz; j++) { 1091 PetscCall(PetscArrayzero(arrt, bs2)); 1092 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1093 arrt += bs2; 1094 } 1095 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1096 } 1097 1098 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1099 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1100 } 1101 } else { 1102 for (i = m - 1; i >= 0; i--) { 1103 PetscCall(PetscArraycpy(w, xb + i2, bs)); 1104 v = aa + diag[i] + 1; 1105 vi = aj + diag[i] + 1; 1106 nz = ai[i + 1] - diag[i] - 1; 1107 workt = work; 1108 for (j = 0; j < nz; j++) { 1109 PetscCall(PetscArraycpy(workt, x + bs * (*vi++), bs)); 1110 workt += bs; 1111 } 1112 arrt = arr; 1113 if (T) { 1114 for (j = 0; j < nz; j++) { 1115 PetscCall(PetscArraycpy(arrt, T, bs2)); 1116 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1117 arrt += bs2; 1118 } 1119 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1120 } else if (kaij->isTI) { 1121 for (j = 0; j < nz; j++) { 1122 PetscCall(PetscArrayzero(arrt, bs2)); 1123 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1124 arrt += bs2; 1125 } 1126 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1127 } 1128 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1129 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1130 } 1131 } 1132 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 1133 } 1134 } 1135 1136 PetscCall(VecRestoreArray(xx, &x)); 1137 PetscCall(VecRestoreArrayRead(bb, &b)); 1138 PetscFunctionReturn(PETSC_SUCCESS); 1139 } 1140 1141 /*===================================================================================*/ 1142 1143 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1144 { 1145 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1146 1147 PetscFunctionBegin; 1148 if (!yy) { 1149 PetscCall(VecSet(zz, 0.0)); 1150 } else { 1151 PetscCall(VecCopy(yy, zz)); 1152 } 1153 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1154 /* start the scatter */ 1155 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1156 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 1157 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1158 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 1159 PetscFunctionReturn(PETSC_SUCCESS); 1160 } 1161 1162 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) 1163 { 1164 PetscFunctionBegin; 1165 PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy)); 1166 PetscFunctionReturn(PETSC_SUCCESS); 1167 } 1168 1169 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) 1170 { 1171 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1172 1173 PetscFunctionBegin; 1174 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1175 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 1176 PetscFunctionReturn(PETSC_SUCCESS); 1177 } 1178 1179 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1180 { 1181 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 1182 PetscBool diag = PETSC_FALSE; 1183 PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 1184 PetscScalar *vaij, *v, *S = b->S, *T = b->T; 1185 1186 PetscFunctionBegin; 1187 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1188 b->getrowactive = PETSC_TRUE; 1189 PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1190 1191 if ((!S) && (!T) && (!b->isTI)) { 1192 if (ncols) *ncols = 0; 1193 if (cols) *cols = NULL; 1194 if (values) *values = NULL; 1195 PetscFunctionReturn(PETSC_SUCCESS); 1196 } 1197 1198 if (T || b->isTI) { 1199 PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 1200 c = nzaij; 1201 for (i = 0; i < nzaij; i++) { 1202 /* check if this row contains a diagonal entry */ 1203 if (colsaij[i] == r) { 1204 diag = PETSC_TRUE; 1205 c = i; 1206 } 1207 } 1208 } else nzaij = c = 0; 1209 1210 /* calculate size of row */ 1211 nz = 0; 1212 if (S) nz += q; 1213 if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 1214 1215 if (cols || values) { 1216 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1217 for (i = 0; i < q; i++) { 1218 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1219 v[i] = 0.0; 1220 } 1221 if (b->isTI) { 1222 for (i = 0; i < nzaij; i++) { 1223 for (j = 0; j < q; j++) { 1224 idx[i * q + j] = colsaij[i] * q + j; 1225 v[i * q + j] = (j == s ? vaij[i] : 0); 1226 } 1227 } 1228 } else if (T) { 1229 for (i = 0; i < nzaij; i++) { 1230 for (j = 0; j < q; j++) { 1231 idx[i * q + j] = colsaij[i] * q + j; 1232 v[i * q + j] = vaij[i] * T[s + j * p]; 1233 } 1234 } 1235 } 1236 if (S) { 1237 for (j = 0; j < q; j++) { 1238 idx[c * q + j] = r * q + j; 1239 v[c * q + j] += S[s + j * p]; 1240 } 1241 } 1242 } 1243 1244 if (ncols) *ncols = nz; 1245 if (cols) *cols = idx; 1246 if (values) *values = v; 1247 PetscFunctionReturn(PETSC_SUCCESS); 1248 } 1249 1250 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1251 { 1252 PetscFunctionBegin; 1253 PetscCall(PetscFree2(*idx, *v)); 1254 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1255 PetscFunctionReturn(PETSC_SUCCESS); 1256 } 1257 1258 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1259 { 1260 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1261 Mat AIJ = b->A; 1262 PetscBool diag = PETSC_FALSE; 1263 Mat MatAIJ, MatOAIJ; 1264 const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1265 PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 1266 PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 1267 1268 PetscFunctionBegin; 1269 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1270 MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1271 MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 1272 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1273 b->getrowactive = PETSC_TRUE; 1274 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1275 lrow = row - rstart; 1276 1277 if ((!S) && (!T) && (!b->isTI)) { 1278 if (ncols) *ncols = 0; 1279 if (cols) *cols = NULL; 1280 if (values) *values = NULL; 1281 PetscFunctionReturn(PETSC_SUCCESS); 1282 } 1283 1284 r = lrow / p; 1285 s = lrow % p; 1286 1287 if (T || b->isTI) { 1288 PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 1289 PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 1290 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 1291 1292 c = ncolsaij + ncolsoaij; 1293 for (i = 0; i < ncolsaij; i++) { 1294 /* check if this row contains a diagonal entry */ 1295 if (colsaij[i] == r) { 1296 diag = PETSC_TRUE; 1297 c = i; 1298 } 1299 } 1300 } else c = 0; 1301 1302 /* calculate size of row */ 1303 nz = 0; 1304 if (S) nz += q; 1305 if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 1306 1307 if (cols || values) { 1308 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1309 for (i = 0; i < q; i++) { 1310 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1311 v[i] = 0.0; 1312 } 1313 if (b->isTI) { 1314 for (i = 0; i < ncolsaij; i++) { 1315 for (j = 0; j < q; j++) { 1316 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1317 v[i * q + j] = (j == s ? vals[i] : 0.0); 1318 } 1319 } 1320 for (i = 0; i < ncolsoaij; i++) { 1321 for (j = 0; j < q; j++) { 1322 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1323 v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 1324 } 1325 } 1326 } else if (T) { 1327 for (i = 0; i < ncolsaij; i++) { 1328 for (j = 0; j < q; j++) { 1329 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1330 v[i * q + j] = vals[i] * T[s + j * p]; 1331 } 1332 } 1333 for (i = 0; i < ncolsoaij; i++) { 1334 for (j = 0; j < q; j++) { 1335 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1336 v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 1337 } 1338 } 1339 } 1340 if (S) { 1341 for (j = 0; j < q; j++) { 1342 idx[c * q + j] = (r + rstart / p) * q + j; 1343 v[c * q + j] += S[s + j * p]; 1344 } 1345 } 1346 } 1347 1348 if (ncols) *ncols = nz; 1349 if (cols) *cols = idx; 1350 if (values) *values = v; 1351 PetscFunctionReturn(PETSC_SUCCESS); 1352 } 1353 1354 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1355 { 1356 PetscFunctionBegin; 1357 PetscCall(PetscFree2(*idx, *v)); 1358 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1359 PetscFunctionReturn(PETSC_SUCCESS); 1360 } 1361 1362 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1363 { 1364 Mat A; 1365 1366 PetscFunctionBegin; 1367 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1368 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1369 PetscCall(MatDestroy(&A)); 1370 PetscFunctionReturn(PETSC_SUCCESS); 1371 } 1372 1373 /*@C 1374 MatCreateKAIJ - Creates a matrix of type `MATKAIJ`. 1375 1376 Collective 1377 1378 Input Parameters: 1379 + A - the `MATAIJ` matrix 1380 . p - number of rows in `S` and `T` 1381 . q - number of columns in `S` and `T` 1382 . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major) 1383 - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major) 1384 1385 Output Parameter: 1386 . kaij - the new `MATKAIJ` matrix 1387 1388 Level: advanced 1389 1390 Notes: 1391 The created matrix is of the following form\: 1392 .vb 1393 [I \otimes S + A \otimes T] 1394 .ve 1395 where 1396 .vb 1397 S is a dense (p \times q) matrix 1398 T is a dense (p \times q) matrix 1399 A is a `MATAIJ` (n \times n) matrix 1400 I is the identity matrix 1401 .ve 1402 The resulting matrix is (np \times nq) 1403 1404 `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in 1405 column-major format. 1406 1407 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 1408 1409 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 1410 1411 Developer Notes: 1412 In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1413 of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily 1414 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added, 1415 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1416 1417 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1418 @*/ 1419 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) 1420 { 1421 PetscFunctionBegin; 1422 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 1423 PetscCall(MatSetType(*kaij, MATKAIJ)); 1424 PetscCall(MatKAIJSetAIJ(*kaij, A)); 1425 PetscCall(MatKAIJSetS(*kaij, p, q, S)); 1426 PetscCall(MatKAIJSetT(*kaij, p, q, T)); 1427 PetscCall(MatSetUp(*kaij)); 1428 PetscFunctionReturn(PETSC_SUCCESS); 1429 } 1430 1431 /*MC 1432 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1433 [I \otimes S + A \otimes T], 1434 where 1435 .vb 1436 S is a dense (p \times q) matrix, 1437 T is a dense (p \times q) matrix, 1438 A is an AIJ (n \times n) matrix, 1439 and I is the identity matrix. 1440 .ve 1441 The resulting matrix is (np \times nq). 1442 1443 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1444 1445 Level: advanced 1446 1447 Note: 1448 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1449 where x and b are column vectors containing the row-major representations of X and B. 1450 1451 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1452 M*/ 1453 1454 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1455 { 1456 Mat_MPIKAIJ *b; 1457 PetscMPIInt size; 1458 1459 PetscFunctionBegin; 1460 PetscCall(PetscNew(&b)); 1461 A->data = (void *)b; 1462 1463 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 1464 1465 b->w = NULL; 1466 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1467 if (size == 1) { 1468 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 1469 A->ops->destroy = MatDestroy_SeqKAIJ; 1470 A->ops->mult = MatMult_SeqKAIJ; 1471 A->ops->multadd = MatMultAdd_SeqKAIJ; 1472 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1473 A->ops->getrow = MatGetRow_SeqKAIJ; 1474 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1475 A->ops->sor = MatSOR_SeqKAIJ; 1476 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 1477 } else { 1478 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 1479 A->ops->destroy = MatDestroy_MPIKAIJ; 1480 A->ops->mult = MatMult_MPIKAIJ; 1481 A->ops->multadd = MatMultAdd_MPIKAIJ; 1482 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1483 A->ops->getrow = MatGetRow_MPIKAIJ; 1484 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1485 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 1486 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 1487 } 1488 A->ops->setup = MatSetUp_KAIJ; 1489 A->ops->view = MatView_KAIJ; 1490 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1491 PetscFunctionReturn(PETSC_SUCCESS); 1492 } 1493