1 /* 2 Defines the basic matrix operations for the KAIJ matrix storage format. 3 This format is used to evaluate matrices of the form: 4 5 [I \otimes S + A \otimes T] 6 7 where 8 S is a dense (p \times q) matrix 9 T is a dense (p \times q) matrix 10 A is an AIJ (n \times n) matrix 11 I is the identity matrix 12 13 The resulting matrix is (np \times nq) 14 15 We provide: 16 MatMult() 17 MatMultAdd() 18 MatInvertBlockDiagonal() 19 and 20 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 21 22 This single directory handles both the sequential and parallel codes 23 */ 24 25 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 26 #include <../src/mat/utils/freespace.h> 27 #include <petsc/private/vecimpl.h> 28 29 /*@C 30 MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 31 32 Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 33 34 Input Parameter: 35 . A - the `MATKAIJ` matrix 36 37 Output Parameter: 38 . B - the `MATAIJ` matrix 39 40 Level: advanced 41 42 Note: 43 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 44 45 .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ` 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) 48 { 49 PetscBool ismpikaij, isseqkaij; 50 51 PetscFunctionBegin; 52 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 53 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 54 if (ismpikaij) { 55 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 56 57 *B = b->A; 58 } else if (isseqkaij) { 59 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 60 61 *B = b->AIJ; 62 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 /*@C 67 MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix 68 69 Not Collective; the entire `S` is stored and returned independently on all processes. 70 71 Input Parameter: 72 . A - the `MATKAIJ` matrix 73 74 Output Parameters: 75 + m - the number of rows in `S` 76 . n - the number of columns in `S` 77 - S - the S matrix, in form of a scalar array in column-major format 78 79 Level: advanced 80 81 Note: 82 All output parameters are optional (pass `NULL` if not desired) 83 84 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(PETSC_SUCCESS); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix 98 99 Not Collective; the entire `S` is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the `MATKAIJ` matrix 103 104 Output Parameters: 105 + m - the number of rows in `S` 106 . n - the number of columns in `S` 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Level: advanced 110 111 Note: 112 All output parameters are optional (pass `NULL` if not desired) 113 114 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 115 @*/ 116 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) 117 { 118 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 119 PetscFunctionBegin; 120 if (m) *m = b->p; 121 if (n) *n = b->q; 122 if (S) *S = b->S; 123 PetscFunctionReturn(PETSC_SUCCESS); 124 } 125 126 /*@C 127 MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()` 128 129 Not Collective 130 131 Input Parameters: 132 + A - the `MATKAIJ` matrix 133 - S - location of pointer to array obtained with `MatKAIJGetS()` 134 135 Level: advanced 136 137 Note: 138 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 139 If `NULL` is passed, it will not attempt to zero the array pointer. 140 141 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 142 @*/ 143 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) 144 { 145 PetscFunctionBegin; 146 if (S) *S = NULL; 147 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 148 PetscFunctionReturn(PETSC_SUCCESS); 149 } 150 151 /*@C 152 MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()` 153 154 Not Collective 155 156 Input Parameters: 157 + A - the `MATKAIJ` matrix 158 - S - location of pointer to array obtained with `MatKAIJGetS()` 159 160 Level: advanced 161 162 Note: 163 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 164 If `NULL` is passed, it will not attempt to zero the array pointer. 165 166 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()` 167 @*/ 168 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) 169 { 170 PetscFunctionBegin; 171 if (S) *S = NULL; 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 /*@C 176 MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix 177 178 Not Collective; the entire `T` is stored and returned independently on all processes 179 180 Input Parameter: 181 . A - the `MATKAIJ` matrix 182 183 Output Parameters: 184 + m - the number of rows in `T` 185 . n - the number of columns in `T` 186 - T - the T matrix, in form of a scalar array in column-major format 187 188 Level: advanced 189 190 Note: 191 All output parameters are optional (pass `NULL` if not desired) 192 193 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 194 @*/ 195 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) 196 { 197 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 198 PetscFunctionBegin; 199 if (m) *m = b->p; 200 if (n) *n = b->q; 201 if (T) *T = b->T; 202 PetscFunctionReturn(PETSC_SUCCESS); 203 } 204 205 /*@C 206 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix 207 208 Not Collective; the entire `T` is stored and returned independently on all processes 209 210 Input Parameter: 211 . A - the `MATKAIJ` matrix 212 213 Output Parameters: 214 + m - the number of rows in `T` 215 . n - the number of columns in `T` 216 - T - the T matrix, in form of a scalar array in column-major format 217 218 Level: advanced 219 220 Note: 221 All output parameters are optional (pass `NULL` if not desired) 222 223 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 224 @*/ 225 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) 226 { 227 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 228 PetscFunctionBegin; 229 if (m) *m = b->p; 230 if (n) *n = b->q; 231 if (T) *T = b->T; 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 /*@C 236 MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()` 237 238 Not Collective 239 240 Input Parameters: 241 + A - the `MATKAIJ` matrix 242 - T - location of pointer to array obtained with `MatKAIJGetS()` 243 244 Level: advanced 245 246 Note: 247 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 248 If `NULL` is passed, it will not attempt to zero the array pointer. 249 250 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 251 @*/ 252 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) 253 { 254 PetscFunctionBegin; 255 if (T) *T = NULL; 256 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 257 PetscFunctionReturn(PETSC_SUCCESS); 258 } 259 260 /*@C 261 MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()` 262 263 Not Collective 264 265 Input Parameters: 266 + A - the `MATKAIJ` matrix 267 - T - location of pointer to array obtained with `MatKAIJGetS()` 268 269 Level: advanced 270 271 Note: 272 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 273 If `NULL` is passed, it will not attempt to zero the array pointer. 274 275 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()` 276 @*/ 277 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) 278 { 279 PetscFunctionBegin; 280 if (T) *T = NULL; 281 PetscFunctionReturn(PETSC_SUCCESS); 282 } 283 284 /*@ 285 MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 286 287 Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel 288 289 Input Parameters: 290 + A - the `MATKAIJ` matrix 291 - B - the `MATAIJ` matrix 292 293 Level: advanced 294 295 Notes: 296 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 297 298 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 299 300 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 301 @*/ 302 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) 303 { 304 PetscMPIInt size; 305 PetscBool flg; 306 307 PetscFunctionBegin; 308 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 309 if (size == 1) { 310 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 311 PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 312 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 313 a->AIJ = B; 314 } else { 315 Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 316 a->A = B; 317 } 318 PetscCall(PetscObjectReference((PetscObject)B)); 319 PetscFunctionReturn(PETSC_SUCCESS); 320 } 321 322 /*@C 323 MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix 324 325 Logically Collective; the entire `S` is stored independently on all processes. 326 327 Input Parameters: 328 + A - the `MATKAIJ` matrix 329 . p - the number of rows in `S` 330 . q - the number of columns in `S` 331 - S - the S matrix, in form of a scalar array in column-major format 332 333 Level: advanced 334 335 Notes: 336 The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix. 337 338 The `S` matrix is copied, so the user can destroy this array. 339 340 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 341 @*/ 342 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) 343 { 344 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 345 346 PetscFunctionBegin; 347 PetscCall(PetscFree(a->S)); 348 if (S) { 349 PetscCall(PetscMalloc1(p * q, &a->S)); 350 PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar))); 351 } else a->S = NULL; 352 353 a->p = p; 354 a->q = q; 355 PetscFunctionReturn(PETSC_SUCCESS); 356 } 357 358 /*@C 359 MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities. 360 361 Logically Collective. 362 363 Input Parameter: 364 . A - the `MATKAIJ` matrix 365 366 Output Parameter: 367 . identity - the Boolean value 368 369 Level: advanced 370 371 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()` 372 @*/ 373 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) 374 { 375 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 376 PetscInt i, j; 377 378 PetscFunctionBegin; 379 if (a->p != a->q) { 380 *identity = PETSC_FALSE; 381 PetscFunctionReturn(PETSC_SUCCESS); 382 } else *identity = PETSC_TRUE; 383 if (!a->isTI || a->S) { 384 for (i = 0; i < a->p && *identity; i++) { 385 for (j = 0; j < a->p && *identity; j++) { 386 if (i != j) { 387 if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 388 if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 389 } else { 390 if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 391 if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 392 } 393 } 394 } 395 } 396 PetscFunctionReturn(PETSC_SUCCESS); 397 } 398 399 /*@C 400 MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix 401 402 Logically Collective; the entire `T` is stored independently on all processes. 403 404 Input Parameters: 405 + A - the `MATKAIJ` matrix 406 . p - the number of rows in `S` 407 . q - the number of columns in `S` 408 - T - the `T` matrix, in form of a scalar array in column-major format 409 410 Level: advanced 411 412 Notes: 413 The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix. 414 415 The `T` matrix is copied, so the user can destroy this array. 416 417 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 418 @*/ 419 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) 420 { 421 PetscInt i, j; 422 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 423 PetscBool isTI = PETSC_FALSE; 424 425 PetscFunctionBegin; 426 /* check if T is an identity matrix */ 427 if (T && (p == q)) { 428 isTI = PETSC_TRUE; 429 for (i = 0; i < p; i++) { 430 for (j = 0; j < q; j++) { 431 if (i == j) { 432 /* diagonal term must be 1 */ 433 if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 434 } else { 435 /* off-diagonal term must be 0 */ 436 if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 437 } 438 } 439 } 440 } 441 a->isTI = isTI; 442 443 PetscCall(PetscFree(a->T)); 444 if (T && (!isTI)) { 445 PetscCall(PetscMalloc1(p * q, &a->T)); 446 PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar))); 447 } else a->T = NULL; 448 449 a->p = p; 450 a->q = q; 451 PetscFunctionReturn(PETSC_SUCCESS); 452 } 453 454 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 455 { 456 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 457 458 PetscFunctionBegin; 459 PetscCall(MatDestroy(&b->AIJ)); 460 PetscCall(PetscFree(b->S)); 461 PetscCall(PetscFree(b->T)); 462 PetscCall(PetscFree(b->ibdiag)); 463 PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 464 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 465 PetscCall(PetscFree(A->data)); 466 PetscFunctionReturn(PETSC_SUCCESS); 467 } 468 469 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 470 { 471 Mat_MPIKAIJ *a; 472 Mat_MPIAIJ *mpiaij; 473 PetscScalar *T; 474 PetscInt i, j; 475 PetscObjectState state; 476 477 PetscFunctionBegin; 478 a = (Mat_MPIKAIJ *)A->data; 479 mpiaij = (Mat_MPIAIJ *)a->A->data; 480 481 PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 482 if (state == a->state) { 483 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 484 PetscFunctionReturn(PETSC_SUCCESS); 485 } else { 486 PetscCall(MatDestroy(&a->AIJ)); 487 PetscCall(MatDestroy(&a->OAIJ)); 488 if (a->isTI) { 489 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 490 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 491 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 492 * to pass in. */ 493 PetscCall(PetscMalloc1(a->p * a->q, &T)); 494 for (i = 0; i < a->p; i++) { 495 for (j = 0; j < a->q; j++) { 496 if (i == j) T[i + j * a->p] = 1.0; 497 else T[i + j * a->p] = 0.0; 498 } 499 } 500 } else T = a->T; 501 PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 502 PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 503 if (a->isTI) PetscCall(PetscFree(T)); 504 a->state = state; 505 } 506 507 PetscFunctionReturn(PETSC_SUCCESS); 508 } 509 510 static PetscErrorCode MatSetUp_KAIJ(Mat A) 511 { 512 PetscInt n; 513 PetscMPIInt size; 514 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 515 516 PetscFunctionBegin; 517 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 518 if (size == 1) { 519 PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 520 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 521 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 522 PetscCall(PetscLayoutSetUp(A->rmap)); 523 PetscCall(PetscLayoutSetUp(A->cmap)); 524 } else { 525 Mat_MPIKAIJ *a; 526 Mat_MPIAIJ *mpiaij; 527 IS from, to; 528 Vec gvec; 529 530 a = (Mat_MPIKAIJ *)A->data; 531 mpiaij = (Mat_MPIAIJ *)a->A->data; 532 PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 533 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 534 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 535 PetscCall(PetscLayoutSetUp(A->rmap)); 536 PetscCall(PetscLayoutSetUp(A->cmap)); 537 538 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 539 540 PetscCall(VecGetSize(mpiaij->lvec, &n)); 541 PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 542 PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 543 PetscCall(VecSetBlockSize(a->w, a->q)); 544 PetscCall(VecSetType(a->w, VECSEQ)); 545 546 /* create two temporary Index sets for build scatter gather */ 547 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 548 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 549 550 /* create temporary global vector to generate scatter context */ 551 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 552 553 /* generate the scatter context */ 554 PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 555 556 PetscCall(ISDestroy(&from)); 557 PetscCall(ISDestroy(&to)); 558 PetscCall(VecDestroy(&gvec)); 559 } 560 561 A->assembled = PETSC_TRUE; 562 PetscFunctionReturn(PETSC_SUCCESS); 563 } 564 565 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) 566 { 567 PetscViewerFormat format; 568 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 569 Mat B; 570 PetscInt i; 571 PetscBool ismpikaij; 572 573 PetscFunctionBegin; 574 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 575 PetscCall(PetscViewerGetFormat(viewer, &format)); 576 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 577 PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 578 579 /* Print appropriate details for S. */ 580 if (!a->S) { 581 PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 582 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 583 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 584 for (i = 0; i < (a->p * a->q); i++) { 585 #if defined(PETSC_USE_COMPLEX) 586 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 587 #else 588 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 589 #endif 590 } 591 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 592 } 593 594 /* Print appropriate details for T. */ 595 if (a->isTI) { 596 PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 597 } else if (!a->T) { 598 PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 599 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 600 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 601 for (i = 0; i < (a->p * a->q); i++) { 602 #if defined(PETSC_USE_COMPLEX) 603 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 604 #else 605 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 606 #endif 607 } 608 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 609 } 610 611 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 612 PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 613 if (ismpikaij) { 614 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 615 PetscCall(MatView(b->A, viewer)); 616 } else { 617 PetscCall(MatView(a->AIJ, viewer)); 618 } 619 620 } else { 621 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 622 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 623 PetscCall(MatView(B, viewer)); 624 PetscCall(MatDestroy(&B)); 625 } 626 PetscFunctionReturn(PETSC_SUCCESS); 627 } 628 629 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 630 { 631 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 632 633 PetscFunctionBegin; 634 PetscCall(MatDestroy(&b->AIJ)); 635 PetscCall(MatDestroy(&b->OAIJ)); 636 PetscCall(MatDestroy(&b->A)); 637 PetscCall(VecScatterDestroy(&b->ctx)); 638 PetscCall(VecDestroy(&b->w)); 639 PetscCall(PetscFree(b->S)); 640 PetscCall(PetscFree(b->T)); 641 PetscCall(PetscFree(b->ibdiag)); 642 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 643 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 644 PetscCall(PetscFree(A->data)); 645 PetscFunctionReturn(PETSC_SUCCESS); 646 } 647 648 /* zz = yy + Axx */ 649 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 650 { 651 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 652 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 653 const PetscScalar *s = b->S, *t = b->T; 654 const PetscScalar *x, *v, *bx; 655 PetscScalar *y, *sums; 656 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 657 PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 658 659 PetscFunctionBegin; 660 if (!yy) { 661 PetscCall(VecSet(zz, 0.0)); 662 } else { 663 PetscCall(VecCopy(yy, zz)); 664 } 665 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS); 666 667 PetscCall(VecGetArrayRead(xx, &x)); 668 PetscCall(VecGetArray(zz, &y)); 669 idx = a->j; 670 v = a->a; 671 ii = a->i; 672 673 if (b->isTI) { 674 for (i = 0; i < m; i++) { 675 jrow = ii[i]; 676 n = ii[i + 1] - jrow; 677 sums = y + p * i; 678 for (j = 0; j < n; j++) { 679 for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; 680 } 681 } 682 PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 683 } else if (t) { 684 for (i = 0; i < m; i++) { 685 jrow = ii[i]; 686 n = ii[i + 1] - jrow; 687 sums = y + p * i; 688 for (j = 0; j < n; j++) { 689 for (k = 0; k < p; k++) { 690 for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; 691 } 692 } 693 } 694 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 695 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 696 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 697 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 698 PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 699 } 700 if (s) { 701 for (i = 0; i < m; i++) { 702 sums = y + p * i; 703 bx = x + q * i; 704 if (i < b->AIJ->cmap->n) { 705 for (j = 0; j < q; j++) { 706 for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j]; 707 } 708 } 709 } 710 PetscCall(PetscLogFlops(2.0 * m * p * q)); 711 } 712 713 PetscCall(VecRestoreArrayRead(xx, &x)); 714 PetscCall(VecRestoreArray(zz, &y)); 715 PetscFunctionReturn(PETSC_SUCCESS); 716 } 717 718 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) 719 { 720 PetscFunctionBegin; 721 PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy)); 722 PetscFunctionReturn(PETSC_SUCCESS); 723 } 724 725 #include <petsc/private/kernels/blockinvert.h> 726 727 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) 728 { 729 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 730 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 731 const PetscScalar *S = b->S; 732 const PetscScalar *T = b->T; 733 const PetscScalar *v = a->a; 734 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 735 PetscInt i, j, *v_pivots, dof, dof2; 736 PetscScalar *diag, aval, *v_work; 737 738 PetscFunctionBegin; 739 PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 740 PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 741 742 dof = p; 743 dof2 = dof * dof; 744 745 if (b->ibdiagvalid) { 746 if (values) *values = b->ibdiag; 747 PetscFunctionReturn(PETSC_SUCCESS); 748 } 749 if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); 750 if (values) *values = b->ibdiag; 751 diag = b->ibdiag; 752 753 PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 754 for (i = 0; i < m; i++) { 755 if (S) { 756 PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar))); 757 } else { 758 PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar))); 759 } 760 if (b->isTI) { 761 aval = 0; 762 for (j = ii[i]; j < ii[i + 1]; j++) 763 if (idx[j] == i) aval = v[j]; 764 for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 765 } else if (T) { 766 aval = 0; 767 for (j = ii[i]; j < ii[i + 1]; j++) 768 if (idx[j] == i) aval = v[j]; 769 for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 770 } 771 PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 772 diag += dof2; 773 } 774 PetscCall(PetscFree2(v_work, v_pivots)); 775 776 b->ibdiagvalid = PETSC_TRUE; 777 PetscFunctionReturn(PETSC_SUCCESS); 778 } 779 780 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) 781 { 782 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 783 784 PetscFunctionBegin; 785 *B = kaij->AIJ; 786 PetscFunctionReturn(PETSC_SUCCESS); 787 } 788 789 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 790 { 791 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 792 Mat AIJ, OAIJ, B; 793 PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 794 const PetscInt p = a->p, q = a->q; 795 PetscBool ismpikaij, missing; 796 797 PetscFunctionBegin; 798 if (reuse != MAT_REUSE_MATRIX) { 799 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 800 if (ismpikaij) { 801 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 802 AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 803 OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 804 } else { 805 AIJ = a->AIJ; 806 OAIJ = NULL; 807 } 808 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 809 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 810 PetscCall(MatSetType(B, MATAIJ)); 811 PetscCall(MatGetSize(AIJ, &m, NULL)); 812 PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */ 813 if (!missing || !a->S) d = m; 814 PetscCall(PetscMalloc1(m * p, &d_nnz)); 815 for (i = 0; i < m; ++i) { 816 PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 817 for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 818 PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 819 } 820 if (OAIJ) { 821 PetscCall(PetscMalloc1(m * p, &o_nnz)); 822 for (i = 0; i < m; ++i) { 823 PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 824 for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 825 PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 826 } 827 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 828 } else { 829 PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 830 } 831 PetscCall(PetscFree(d_nnz)); 832 PetscCall(PetscFree(o_nnz)); 833 } else B = *newmat; 834 PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 835 if (reuse == MAT_INPLACE_MATRIX) { 836 PetscCall(MatHeaderReplace(A, &B)); 837 } else *newmat = B; 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 841 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 842 { 843 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 844 Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 845 const PetscScalar *aa = a->a, *T = kaij->T, *v; 846 const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 847 const PetscScalar *b, *xb, *idiag; 848 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 849 PetscInt i, j, k, i2, bs, bs2, nz; 850 851 PetscFunctionBegin; 852 its = its * lits; 853 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 854 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 855 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 856 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 857 PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 858 bs = p; 859 bs2 = bs * bs; 860 861 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 862 863 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 864 idiag = kaij->ibdiag; 865 diag = a->diag; 866 867 if (!kaij->sor.setup) { 868 PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 869 kaij->sor.setup = PETSC_TRUE; 870 } 871 y = kaij->sor.y; 872 w = kaij->sor.w; 873 work = kaij->sor.work; 874 t = kaij->sor.t; 875 arr = kaij->sor.arr; 876 877 PetscCall(VecGetArray(xx, &x)); 878 PetscCall(VecGetArrayRead(bb, &b)); 879 880 if (flag & SOR_ZERO_INITIAL_GUESS) { 881 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 882 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 883 PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar))); 884 i2 = bs; 885 idiag += bs2; 886 for (i = 1; i < m; i++) { 887 v = aa + ai[i]; 888 vi = aj + ai[i]; 889 nz = diag[i] - ai[i]; 890 891 if (T) { /* b - T (Arow * x) */ 892 PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar))); 893 for (j = 0; j < nz; j++) { 894 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 895 } 896 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 897 for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 898 } else if (kaij->isTI) { 899 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 900 for (j = 0; j < nz; j++) { 901 for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 902 } 903 } else { 904 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 905 } 906 907 PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 908 for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 909 910 idiag += bs2; 911 i2 += bs; 912 } 913 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 914 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 915 xb = t; 916 } else xb = b; 917 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 918 idiag = kaij->ibdiag + bs2 * (m - 1); 919 i2 = bs * (m - 1); 920 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 921 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 922 i2 -= bs; 923 idiag -= bs2; 924 for (i = m - 2; i >= 0; i--) { 925 v = aa + diag[i] + 1; 926 vi = aj + diag[i] + 1; 927 nz = ai[i + 1] - diag[i] - 1; 928 929 if (T) { /* FIXME: This branch untested */ 930 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 931 /* copy all rows of x that are needed into contiguous space */ 932 workt = work; 933 for (j = 0; j < nz; j++) { 934 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 935 workt += bs; 936 } 937 arrt = arr; 938 for (j = 0; j < nz; j++) { 939 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 940 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 941 arrt += bs2; 942 } 943 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 944 } else if (kaij->isTI) { 945 PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar))); 946 for (j = 0; j < nz; j++) { 947 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 948 } 949 } 950 951 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 952 for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 953 954 idiag -= bs2; 955 i2 -= bs; 956 } 957 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 958 } 959 its--; 960 } 961 while (its--) { /* FIXME: This branch not updated */ 962 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 963 i2 = 0; 964 idiag = kaij->ibdiag; 965 for (i = 0; i < m; i++) { 966 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 967 968 v = aa + ai[i]; 969 vi = aj + ai[i]; 970 nz = diag[i] - ai[i]; 971 workt = work; 972 for (j = 0; j < nz; j++) { 973 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 974 workt += bs; 975 } 976 arrt = arr; 977 if (T) { 978 for (j = 0; j < nz; j++) { 979 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 980 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 981 arrt += bs2; 982 } 983 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 984 } else if (kaij->isTI) { 985 for (j = 0; j < nz; j++) { 986 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 987 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 988 arrt += bs2; 989 } 990 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 991 } 992 PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar))); 993 994 v = aa + diag[i] + 1; 995 vi = aj + diag[i] + 1; 996 nz = ai[i + 1] - diag[i] - 1; 997 workt = work; 998 for (j = 0; j < nz; j++) { 999 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1000 workt += bs; 1001 } 1002 arrt = arr; 1003 if (T) { 1004 for (j = 0; j < nz; j++) { 1005 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1006 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1007 arrt += bs2; 1008 } 1009 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1010 } else if (kaij->isTI) { 1011 for (j = 0; j < nz; j++) { 1012 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1013 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1014 arrt += bs2; 1015 } 1016 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1017 } 1018 1019 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1020 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1021 1022 idiag += bs2; 1023 i2 += bs; 1024 } 1025 xb = t; 1026 } else xb = b; 1027 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1028 idiag = kaij->ibdiag + bs2 * (m - 1); 1029 i2 = bs * (m - 1); 1030 if (xb == b) { 1031 for (i = m - 1; i >= 0; i--) { 1032 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 1033 1034 v = aa + ai[i]; 1035 vi = aj + ai[i]; 1036 nz = diag[i] - ai[i]; 1037 workt = work; 1038 for (j = 0; j < nz; j++) { 1039 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1040 workt += bs; 1041 } 1042 arrt = arr; 1043 if (T) { 1044 for (j = 0; j < nz; j++) { 1045 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1046 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1047 arrt += bs2; 1048 } 1049 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1050 } else if (kaij->isTI) { 1051 for (j = 0; j < nz; j++) { 1052 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1053 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1054 arrt += bs2; 1055 } 1056 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1057 } 1058 1059 v = aa + diag[i] + 1; 1060 vi = aj + diag[i] + 1; 1061 nz = ai[i + 1] - diag[i] - 1; 1062 workt = work; 1063 for (j = 0; j < nz; j++) { 1064 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1065 workt += bs; 1066 } 1067 arrt = arr; 1068 if (T) { 1069 for (j = 0; j < nz; j++) { 1070 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1071 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1072 arrt += bs2; 1073 } 1074 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1075 } else if (kaij->isTI) { 1076 for (j = 0; j < nz; j++) { 1077 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1078 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1079 arrt += bs2; 1080 } 1081 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1082 } 1083 1084 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1085 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1086 } 1087 } else { 1088 for (i = m - 1; i >= 0; i--) { 1089 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 1090 v = aa + diag[i] + 1; 1091 vi = aj + diag[i] + 1; 1092 nz = ai[i + 1] - diag[i] - 1; 1093 workt = work; 1094 for (j = 0; j < nz; j++) { 1095 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1096 workt += bs; 1097 } 1098 arrt = arr; 1099 if (T) { 1100 for (j = 0; j < nz; j++) { 1101 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1102 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1103 arrt += bs2; 1104 } 1105 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1106 } else if (kaij->isTI) { 1107 for (j = 0; j < nz; j++) { 1108 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1109 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1110 arrt += bs2; 1111 } 1112 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1113 } 1114 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1115 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1116 } 1117 } 1118 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 1119 } 1120 } 1121 1122 PetscCall(VecRestoreArray(xx, &x)); 1123 PetscCall(VecRestoreArrayRead(bb, &b)); 1124 PetscFunctionReturn(PETSC_SUCCESS); 1125 } 1126 1127 /*===================================================================================*/ 1128 1129 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1130 { 1131 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1132 1133 PetscFunctionBegin; 1134 if (!yy) { 1135 PetscCall(VecSet(zz, 0.0)); 1136 } else { 1137 PetscCall(VecCopy(yy, zz)); 1138 } 1139 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1140 /* start the scatter */ 1141 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1142 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 1143 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1144 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 1145 PetscFunctionReturn(PETSC_SUCCESS); 1146 } 1147 1148 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) 1149 { 1150 PetscFunctionBegin; 1151 PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) 1156 { 1157 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1161 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1166 { 1167 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 1168 PetscBool diag = PETSC_FALSE; 1169 PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 1170 PetscScalar *vaij, *v, *S = b->S, *T = b->T; 1171 1172 PetscFunctionBegin; 1173 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1174 b->getrowactive = PETSC_TRUE; 1175 PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1176 1177 if ((!S) && (!T) && (!b->isTI)) { 1178 if (ncols) *ncols = 0; 1179 if (cols) *cols = NULL; 1180 if (values) *values = NULL; 1181 PetscFunctionReturn(PETSC_SUCCESS); 1182 } 1183 1184 if (T || b->isTI) { 1185 PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 1186 c = nzaij; 1187 for (i = 0; i < nzaij; i++) { 1188 /* check if this row contains a diagonal entry */ 1189 if (colsaij[i] == r) { 1190 diag = PETSC_TRUE; 1191 c = i; 1192 } 1193 } 1194 } else nzaij = c = 0; 1195 1196 /* calculate size of row */ 1197 nz = 0; 1198 if (S) nz += q; 1199 if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 1200 1201 if (cols || values) { 1202 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1203 for (i = 0; i < q; i++) { 1204 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1205 v[i] = 0.0; 1206 } 1207 if (b->isTI) { 1208 for (i = 0; i < nzaij; i++) { 1209 for (j = 0; j < q; j++) { 1210 idx[i * q + j] = colsaij[i] * q + j; 1211 v[i * q + j] = (j == s ? vaij[i] : 0); 1212 } 1213 } 1214 } else if (T) { 1215 for (i = 0; i < nzaij; i++) { 1216 for (j = 0; j < q; j++) { 1217 idx[i * q + j] = colsaij[i] * q + j; 1218 v[i * q + j] = vaij[i] * T[s + j * p]; 1219 } 1220 } 1221 } 1222 if (S) { 1223 for (j = 0; j < q; j++) { 1224 idx[c * q + j] = r * q + j; 1225 v[c * q + j] += S[s + j * p]; 1226 } 1227 } 1228 } 1229 1230 if (ncols) *ncols = nz; 1231 if (cols) *cols = idx; 1232 if (values) *values = v; 1233 PetscFunctionReturn(PETSC_SUCCESS); 1234 } 1235 1236 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1237 { 1238 PetscFunctionBegin; 1239 PetscCall(PetscFree2(*idx, *v)); 1240 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1245 { 1246 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1247 Mat AIJ = b->A; 1248 PetscBool diag = PETSC_FALSE; 1249 Mat MatAIJ, MatOAIJ; 1250 const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1251 PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 1252 PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 1253 1254 PetscFunctionBegin; 1255 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1256 MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1257 MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 1258 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1259 b->getrowactive = PETSC_TRUE; 1260 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1261 lrow = row - rstart; 1262 1263 if ((!S) && (!T) && (!b->isTI)) { 1264 if (ncols) *ncols = 0; 1265 if (cols) *cols = NULL; 1266 if (values) *values = NULL; 1267 PetscFunctionReturn(PETSC_SUCCESS); 1268 } 1269 1270 r = lrow / p; 1271 s = lrow % p; 1272 1273 if (T || b->isTI) { 1274 PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 1275 PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 1276 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 1277 1278 c = ncolsaij + ncolsoaij; 1279 for (i = 0; i < ncolsaij; i++) { 1280 /* check if this row contains a diagonal entry */ 1281 if (colsaij[i] == r) { 1282 diag = PETSC_TRUE; 1283 c = i; 1284 } 1285 } 1286 } else c = 0; 1287 1288 /* calculate size of row */ 1289 nz = 0; 1290 if (S) nz += q; 1291 if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 1292 1293 if (cols || values) { 1294 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1295 for (i = 0; i < q; i++) { 1296 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1297 v[i] = 0.0; 1298 } 1299 if (b->isTI) { 1300 for (i = 0; i < ncolsaij; i++) { 1301 for (j = 0; j < q; j++) { 1302 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1303 v[i * q + j] = (j == s ? vals[i] : 0.0); 1304 } 1305 } 1306 for (i = 0; i < ncolsoaij; i++) { 1307 for (j = 0; j < q; j++) { 1308 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1309 v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 1310 } 1311 } 1312 } else if (T) { 1313 for (i = 0; i < ncolsaij; i++) { 1314 for (j = 0; j < q; j++) { 1315 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1316 v[i * q + j] = vals[i] * T[s + j * p]; 1317 } 1318 } 1319 for (i = 0; i < ncolsoaij; i++) { 1320 for (j = 0; j < q; j++) { 1321 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1322 v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 1323 } 1324 } 1325 } 1326 if (S) { 1327 for (j = 0; j < q; j++) { 1328 idx[c * q + j] = (r + rstart / p) * q + j; 1329 v[c * q + j] += S[s + j * p]; 1330 } 1331 } 1332 } 1333 1334 if (ncols) *ncols = nz; 1335 if (cols) *cols = idx; 1336 if (values) *values = v; 1337 PetscFunctionReturn(PETSC_SUCCESS); 1338 } 1339 1340 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1341 { 1342 PetscFunctionBegin; 1343 PetscCall(PetscFree2(*idx, *v)); 1344 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1345 PetscFunctionReturn(PETSC_SUCCESS); 1346 } 1347 1348 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1349 { 1350 Mat A; 1351 1352 PetscFunctionBegin; 1353 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1355 PetscCall(MatDestroy(&A)); 1356 PetscFunctionReturn(PETSC_SUCCESS); 1357 } 1358 1359 /*@C 1360 MatCreateKAIJ - Creates a matrix of type `MATKAIJ`. 1361 1362 Collective 1363 1364 Input Parameters: 1365 + A - the `MATAIJ` matrix 1366 . p - number of rows in `S` and `T` 1367 . q - number of columns in `S` and `T` 1368 . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major) 1369 - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major) 1370 1371 Output Parameter: 1372 . kaij - the new `MATKAIJ` matrix 1373 1374 Level: advanced 1375 1376 Notes: 1377 The created matrix is of the following form\: 1378 .vb 1379 [I \otimes S + A \otimes T] 1380 .ve 1381 where 1382 .vb 1383 S is a dense (p \times q) matrix 1384 T is a dense (p \times q) matrix 1385 A is a `MATAIJ` (n \times n) matrix 1386 I is the identity matrix 1387 .ve 1388 The resulting matrix is (np \times nq) 1389 1390 `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in 1391 column-major format. 1392 1393 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 1394 1395 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 1396 1397 Developer Notes: 1398 In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1399 of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily 1400 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added, 1401 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1402 1403 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1404 @*/ 1405 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) 1406 { 1407 PetscFunctionBegin; 1408 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 1409 PetscCall(MatSetType(*kaij, MATKAIJ)); 1410 PetscCall(MatKAIJSetAIJ(*kaij, A)); 1411 PetscCall(MatKAIJSetS(*kaij, p, q, S)); 1412 PetscCall(MatKAIJSetT(*kaij, p, q, T)); 1413 PetscCall(MatSetUp(*kaij)); 1414 PetscFunctionReturn(PETSC_SUCCESS); 1415 } 1416 1417 /*MC 1418 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1419 [I \otimes S + A \otimes T], 1420 where 1421 .vb 1422 S is a dense (p \times q) matrix, 1423 T is a dense (p \times q) matrix, 1424 A is an AIJ (n \times n) matrix, 1425 and I is the identity matrix. 1426 .ve 1427 The resulting matrix is (np \times nq). 1428 1429 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1430 1431 Level: advanced 1432 1433 Note: 1434 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1435 where x and b are column vectors containing the row-major representations of X and B. 1436 1437 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1438 M*/ 1439 1440 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1441 { 1442 Mat_MPIKAIJ *b; 1443 PetscMPIInt size; 1444 1445 PetscFunctionBegin; 1446 PetscCall(PetscNew(&b)); 1447 A->data = (void *)b; 1448 1449 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 1450 1451 b->w = NULL; 1452 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1453 if (size == 1) { 1454 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 1455 A->ops->destroy = MatDestroy_SeqKAIJ; 1456 A->ops->mult = MatMult_SeqKAIJ; 1457 A->ops->multadd = MatMultAdd_SeqKAIJ; 1458 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1459 A->ops->getrow = MatGetRow_SeqKAIJ; 1460 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1461 A->ops->sor = MatSOR_SeqKAIJ; 1462 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 1463 } else { 1464 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 1465 A->ops->destroy = MatDestroy_MPIKAIJ; 1466 A->ops->mult = MatMult_MPIKAIJ; 1467 A->ops->multadd = MatMultAdd_MPIKAIJ; 1468 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1469 A->ops->getrow = MatGetRow_MPIKAIJ; 1470 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1471 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 1472 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 1473 } 1474 A->ops->setup = MatSetUp_KAIJ; 1475 A->ops->view = MatView_KAIJ; 1476 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479