1 /* 2 Defines the basic matrix operations for the KAIJ matrix storage format. 3 This format is used to evaluate matrices of the form: 4 5 [I \otimes S + A \otimes T] 6 7 where 8 S is a dense (p \times q) matrix 9 T is a dense (p \times q) matrix 10 A is an AIJ (n \times n) matrix 11 I is the identity matrix 12 13 The resulting matrix is (np \times nq) 14 15 We provide: 16 MatMult() 17 MatMultAdd() 18 MatInvertBlockDiagonal() 19 and 20 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 21 22 This single directory handles both the sequential and parallel codes 23 */ 24 25 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 26 #include <../src/mat/utils/freespace.h> 27 #include <petsc/private/vecimpl.h> 28 29 /*@C 30 MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 31 32 Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 33 34 Input Parameter: 35 . A - the `MATKAIJ` matrix 36 37 Output Parameter: 38 . B - the `MATAIJ` matrix 39 40 Level: advanced 41 42 Note: 43 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 44 45 .seealso: [](ch_matrices), `Mat`, `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ` 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) 48 { 49 PetscBool ismpikaij, isseqkaij; 50 51 PetscFunctionBegin; 52 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 53 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 54 if (ismpikaij) { 55 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 56 57 *B = b->A; 58 } else if (isseqkaij) { 59 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 60 61 *B = b->AIJ; 62 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 63 PetscFunctionReturn(PETSC_SUCCESS); 64 } 65 66 /*@C 67 MatKAIJGetS - Get the `S` matrix describing the shift action of the `MATKAIJ` matrix 68 69 Not Collective; the entire `S` is stored and returned independently on all processes. 70 71 Input Parameter: 72 . A - the `MATKAIJ` matrix 73 74 Output Parameters: 75 + m - the number of rows in `S` 76 . n - the number of columns in `S` 77 - S - the S matrix, in form of a scalar array in column-major format 78 79 Level: advanced 80 81 Note: 82 All output parameters are optional (pass `NULL` if not desired) 83 84 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 89 90 PetscFunctionBegin; 91 if (m) *m = b->p; 92 if (n) *n = b->q; 93 if (S) *S = b->S; 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 /*@C 98 MatKAIJGetSRead - Get a read-only pointer to the `S` matrix describing the shift action of the `MATKAIJ` matrix 99 100 Not Collective; the entire `S` is stored and returned independently on all processes. 101 102 Input Parameter: 103 . A - the `MATKAIJ` matrix 104 105 Output Parameters: 106 + m - the number of rows in `S` 107 . n - the number of columns in `S` 108 - S - the S matrix, in form of a scalar array in column-major format 109 110 Level: advanced 111 112 Note: 113 All output parameters are optional (pass `NULL` if not desired) 114 115 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 116 @*/ 117 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) 118 { 119 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 120 121 PetscFunctionBegin; 122 if (m) *m = b->p; 123 if (n) *n = b->q; 124 if (S) *S = b->S; 125 PetscFunctionReturn(PETSC_SUCCESS); 126 } 127 128 /*@C 129 MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()` 130 131 Not Collective 132 133 Input Parameters: 134 + A - the `MATKAIJ` matrix 135 - S - location of pointer to array obtained with `MatKAIJGetS()` 136 137 Level: advanced 138 139 Note: 140 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 141 If `NULL` is passed, it will not attempt to zero the array pointer. 142 143 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 144 @*/ 145 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) 146 { 147 PetscFunctionBegin; 148 if (S) *S = NULL; 149 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 150 PetscFunctionReturn(PETSC_SUCCESS); 151 } 152 153 /*@C 154 MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()` 155 156 Not Collective 157 158 Input Parameters: 159 + A - the `MATKAIJ` matrix 160 - S - location of pointer to array obtained with `MatKAIJGetS()` 161 162 Level: advanced 163 164 Note: 165 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 166 If `NULL` is passed, it will not attempt to zero the array pointer. 167 168 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()` 169 @*/ 170 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) 171 { 172 PetscFunctionBegin; 173 if (S) *S = NULL; 174 PetscFunctionReturn(PETSC_SUCCESS); 175 } 176 177 /*@C 178 MatKAIJGetT - Get the transformation matrix `T` associated with the `MATKAIJ` matrix 179 180 Not Collective; the entire `T` is stored and returned independently on all processes 181 182 Input Parameter: 183 . A - the `MATKAIJ` matrix 184 185 Output Parameters: 186 + m - the number of rows in `T` 187 . n - the number of columns in `T` 188 - T - the T matrix, in form of a scalar array in column-major format 189 190 Level: advanced 191 192 Note: 193 All output parameters are optional (pass `NULL` if not desired) 194 195 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 196 @*/ 197 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) 198 { 199 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 200 201 PetscFunctionBegin; 202 if (m) *m = b->p; 203 if (n) *n = b->q; 204 if (T) *T = b->T; 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 /*@C 209 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix `T` associated with the `MATKAIJ` matrix 210 211 Not Collective; the entire `T` is stored and returned independently on all processes 212 213 Input Parameter: 214 . A - the `MATKAIJ` matrix 215 216 Output Parameters: 217 + m - the number of rows in `T` 218 . n - the number of columns in `T` 219 - T - the T matrix, in form of a scalar array in column-major format 220 221 Level: advanced 222 223 Note: 224 All output parameters are optional (pass `NULL` if not desired) 225 226 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 227 @*/ 228 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) 229 { 230 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 231 232 PetscFunctionBegin; 233 if (m) *m = b->p; 234 if (n) *n = b->q; 235 if (T) *T = b->T; 236 PetscFunctionReturn(PETSC_SUCCESS); 237 } 238 239 /*@C 240 MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()` 241 242 Not Collective 243 244 Input Parameters: 245 + A - the `MATKAIJ` matrix 246 - T - location of pointer to array obtained with `MatKAIJGetS()` 247 248 Level: advanced 249 250 Note: 251 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 252 If `NULL` is passed, it will not attempt to zero the array pointer. 253 254 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 255 @*/ 256 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) 257 { 258 PetscFunctionBegin; 259 if (T) *T = NULL; 260 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 261 PetscFunctionReturn(PETSC_SUCCESS); 262 } 263 264 /*@C 265 MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()` 266 267 Not Collective 268 269 Input Parameters: 270 + A - the `MATKAIJ` matrix 271 - T - location of pointer to array obtained with `MatKAIJGetS()` 272 273 Level: advanced 274 275 Note: 276 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 277 If `NULL` is passed, it will not attempt to zero the array pointer. 278 279 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()` 280 @*/ 281 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) 282 { 283 PetscFunctionBegin; 284 if (T) *T = NULL; 285 PetscFunctionReturn(PETSC_SUCCESS); 286 } 287 288 /*@ 289 MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 290 291 Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel 292 293 Input Parameters: 294 + A - the `MATKAIJ` matrix 295 - B - the `MATAIJ` matrix 296 297 Level: advanced 298 299 Notes: 300 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 301 302 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 303 304 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 305 @*/ 306 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) 307 { 308 PetscMPIInt size; 309 PetscBool flg; 310 311 PetscFunctionBegin; 312 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 313 if (size == 1) { 314 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 315 PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 316 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 317 a->AIJ = B; 318 } else { 319 Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 320 a->A = B; 321 } 322 PetscCall(PetscObjectReference((PetscObject)B)); 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 /*@C 327 MatKAIJSetS - Set the `S` matrix describing the shift action of the `MATKAIJ` matrix 328 329 Logically Collective; the entire `S` is stored independently on all processes. 330 331 Input Parameters: 332 + A - the `MATKAIJ` matrix 333 . p - the number of rows in `S` 334 . q - the number of columns in `S` 335 - S - the S matrix, in form of a scalar array in column-major format 336 337 Level: advanced 338 339 Notes: 340 The dimensions `p` and `q` must match those of the transformation matrix `T` associated with the `MATKAIJ` matrix. 341 342 The `S` matrix is copied, so the user can destroy this array. 343 344 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 345 @*/ 346 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) 347 { 348 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 349 350 PetscFunctionBegin; 351 PetscCall(PetscFree(a->S)); 352 if (S) { 353 PetscCall(PetscMalloc1(p * q, &a->S)); 354 PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar))); 355 } else a->S = NULL; 356 357 a->p = p; 358 a->q = q; 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /*@C 363 MatKAIJGetScaledIdentity - Check if both `S` and `T` are scaled identities. 364 365 Logically Collective. 366 367 Input Parameter: 368 . A - the `MATKAIJ` matrix 369 370 Output Parameter: 371 . identity - the Boolean value 372 373 Level: advanced 374 375 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()` 376 @*/ 377 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) 378 { 379 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 380 PetscInt i, j; 381 382 PetscFunctionBegin; 383 if (a->p != a->q) { 384 *identity = PETSC_FALSE; 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } else *identity = PETSC_TRUE; 387 if (!a->isTI || a->S) { 388 for (i = 0; i < a->p && *identity; i++) { 389 for (j = 0; j < a->p && *identity; j++) { 390 if (i != j) { 391 if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 392 if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 393 } else { 394 if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 395 if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 396 } 397 } 398 } 399 } 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /*@C 404 MatKAIJSetT - Set the transformation matrix `T` associated with the `MATKAIJ` matrix 405 406 Logically Collective; the entire `T` is stored independently on all processes. 407 408 Input Parameters: 409 + A - the `MATKAIJ` matrix 410 . p - the number of rows in `S` 411 . q - the number of columns in `S` 412 - T - the `T` matrix, in form of a scalar array in column-major format 413 414 Level: advanced 415 416 Notes: 417 The dimensions `p` and `q` must match those of the shift matrix `S` associated with the `MATKAIJ` matrix. 418 419 The `T` matrix is copied, so the user can destroy this array. 420 421 .seealso: [](ch_matrices), `Mat`, `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 422 @*/ 423 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) 424 { 425 PetscInt i, j; 426 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 427 PetscBool isTI = PETSC_FALSE; 428 429 PetscFunctionBegin; 430 /* check if T is an identity matrix */ 431 if (T && (p == q)) { 432 isTI = PETSC_TRUE; 433 for (i = 0; i < p; i++) { 434 for (j = 0; j < q; j++) { 435 if (i == j) { 436 /* diagonal term must be 1 */ 437 if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 438 } else { 439 /* off-diagonal term must be 0 */ 440 if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 441 } 442 } 443 } 444 } 445 a->isTI = isTI; 446 447 PetscCall(PetscFree(a->T)); 448 if (T && (!isTI)) { 449 PetscCall(PetscMalloc1(p * q, &a->T)); 450 PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar))); 451 } else a->T = NULL; 452 453 a->p = p; 454 a->q = q; 455 PetscFunctionReturn(PETSC_SUCCESS); 456 } 457 458 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 459 { 460 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 461 462 PetscFunctionBegin; 463 PetscCall(MatDestroy(&b->AIJ)); 464 PetscCall(PetscFree(b->S)); 465 PetscCall(PetscFree(b->T)); 466 PetscCall(PetscFree(b->ibdiag)); 467 PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 468 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 469 PetscCall(PetscFree(A->data)); 470 PetscFunctionReturn(PETSC_SUCCESS); 471 } 472 473 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 474 { 475 Mat_MPIKAIJ *a; 476 Mat_MPIAIJ *mpiaij; 477 PetscScalar *T; 478 PetscInt i, j; 479 PetscObjectState state; 480 481 PetscFunctionBegin; 482 a = (Mat_MPIKAIJ *)A->data; 483 mpiaij = (Mat_MPIAIJ *)a->A->data; 484 485 PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 486 if (state == a->state) { 487 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 488 PetscFunctionReturn(PETSC_SUCCESS); 489 } else { 490 PetscCall(MatDestroy(&a->AIJ)); 491 PetscCall(MatDestroy(&a->OAIJ)); 492 if (a->isTI) { 493 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 494 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 495 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 496 * to pass in. */ 497 PetscCall(PetscMalloc1(a->p * a->q, &T)); 498 for (i = 0; i < a->p; i++) { 499 for (j = 0; j < a->q; j++) { 500 if (i == j) T[i + j * a->p] = 1.0; 501 else T[i + j * a->p] = 0.0; 502 } 503 } 504 } else T = a->T; 505 PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 506 PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 507 if (a->isTI) PetscCall(PetscFree(T)); 508 a->state = state; 509 } 510 PetscFunctionReturn(PETSC_SUCCESS); 511 } 512 513 static PetscErrorCode MatSetUp_KAIJ(Mat A) 514 { 515 PetscInt n; 516 PetscMPIInt size; 517 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 518 519 PetscFunctionBegin; 520 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 521 if (size == 1) { 522 PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 523 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 524 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 525 PetscCall(PetscLayoutSetUp(A->rmap)); 526 PetscCall(PetscLayoutSetUp(A->cmap)); 527 } else { 528 Mat_MPIKAIJ *a; 529 Mat_MPIAIJ *mpiaij; 530 IS from, to; 531 Vec gvec; 532 533 a = (Mat_MPIKAIJ *)A->data; 534 mpiaij = (Mat_MPIAIJ *)a->A->data; 535 PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 536 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 537 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 538 PetscCall(PetscLayoutSetUp(A->rmap)); 539 PetscCall(PetscLayoutSetUp(A->cmap)); 540 541 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 542 543 PetscCall(VecGetSize(mpiaij->lvec, &n)); 544 PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 545 PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 546 PetscCall(VecSetBlockSize(a->w, a->q)); 547 PetscCall(VecSetType(a->w, VECSEQ)); 548 549 /* create two temporary Index sets for build scatter gather */ 550 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 551 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 552 553 /* create temporary global vector to generate scatter context */ 554 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 555 556 /* generate the scatter context */ 557 PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 558 559 PetscCall(ISDestroy(&from)); 560 PetscCall(ISDestroy(&to)); 561 PetscCall(VecDestroy(&gvec)); 562 } 563 564 A->assembled = PETSC_TRUE; 565 PetscFunctionReturn(PETSC_SUCCESS); 566 } 567 568 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) 569 { 570 PetscViewerFormat format; 571 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 572 Mat B; 573 PetscInt i; 574 PetscBool ismpikaij; 575 576 PetscFunctionBegin; 577 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 578 PetscCall(PetscViewerGetFormat(viewer, &format)); 579 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 580 PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 581 582 /* Print appropriate details for S. */ 583 if (!a->S) { 584 PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 585 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 586 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 587 for (i = 0; i < (a->p * a->q); i++) { 588 #if defined(PETSC_USE_COMPLEX) 589 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 590 #else 591 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 592 #endif 593 } 594 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 595 } 596 597 /* Print appropriate details for T. */ 598 if (a->isTI) { 599 PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 600 } else if (!a->T) { 601 PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 602 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 603 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 604 for (i = 0; i < (a->p * a->q); i++) { 605 #if defined(PETSC_USE_COMPLEX) 606 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 607 #else 608 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 609 #endif 610 } 611 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 612 } 613 614 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 615 PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 616 if (ismpikaij) { 617 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 618 PetscCall(MatView(b->A, viewer)); 619 } else { 620 PetscCall(MatView(a->AIJ, viewer)); 621 } 622 623 } else { 624 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 625 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 626 PetscCall(MatView(B, viewer)); 627 PetscCall(MatDestroy(&B)); 628 } 629 PetscFunctionReturn(PETSC_SUCCESS); 630 } 631 632 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 633 { 634 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 635 636 PetscFunctionBegin; 637 PetscCall(MatDestroy(&b->AIJ)); 638 PetscCall(MatDestroy(&b->OAIJ)); 639 PetscCall(MatDestroy(&b->A)); 640 PetscCall(VecScatterDestroy(&b->ctx)); 641 PetscCall(VecDestroy(&b->w)); 642 PetscCall(PetscFree(b->S)); 643 PetscCall(PetscFree(b->T)); 644 PetscCall(PetscFree(b->ibdiag)); 645 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 646 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 647 PetscCall(PetscFree(A->data)); 648 PetscFunctionReturn(PETSC_SUCCESS); 649 } 650 651 /* zz = yy + Axx */ 652 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 653 { 654 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 655 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 656 const PetscScalar *s = b->S, *t = b->T; 657 const PetscScalar *x, *v, *bx; 658 PetscScalar *y, *sums; 659 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 660 PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 661 662 PetscFunctionBegin; 663 if (!yy) { 664 PetscCall(VecSet(zz, 0.0)); 665 } else { 666 PetscCall(VecCopy(yy, zz)); 667 } 668 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS); 669 670 PetscCall(VecGetArrayRead(xx, &x)); 671 PetscCall(VecGetArray(zz, &y)); 672 idx = a->j; 673 v = a->a; 674 ii = a->i; 675 676 if (b->isTI) { 677 for (i = 0; i < m; i++) { 678 jrow = ii[i]; 679 n = ii[i + 1] - jrow; 680 sums = y + p * i; 681 for (j = 0; j < n; j++) { 682 for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; 683 } 684 } 685 PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 686 } else if (t) { 687 for (i = 0; i < m; i++) { 688 jrow = ii[i]; 689 n = ii[i + 1] - jrow; 690 sums = y + p * i; 691 for (j = 0; j < n; j++) { 692 for (k = 0; k < p; k++) { 693 for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; 694 } 695 } 696 } 697 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 698 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 699 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 700 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 701 PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 702 } 703 if (s) { 704 for (i = 0; i < m; i++) { 705 sums = y + p * i; 706 bx = x + q * i; 707 if (i < b->AIJ->cmap->n) { 708 for (j = 0; j < q; j++) { 709 for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j]; 710 } 711 } 712 } 713 PetscCall(PetscLogFlops(2.0 * m * p * q)); 714 } 715 716 PetscCall(VecRestoreArrayRead(xx, &x)); 717 PetscCall(VecRestoreArray(zz, &y)); 718 PetscFunctionReturn(PETSC_SUCCESS); 719 } 720 721 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) 722 { 723 PetscFunctionBegin; 724 PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy)); 725 PetscFunctionReturn(PETSC_SUCCESS); 726 } 727 728 #include <petsc/private/kernels/blockinvert.h> 729 730 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) 731 { 732 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 733 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 734 const PetscScalar *S = b->S; 735 const PetscScalar *T = b->T; 736 const PetscScalar *v = a->a; 737 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 738 PetscInt i, j, *v_pivots, dof, dof2; 739 PetscScalar *diag, aval, *v_work; 740 741 PetscFunctionBegin; 742 PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 743 PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 744 745 dof = p; 746 dof2 = dof * dof; 747 748 if (b->ibdiagvalid) { 749 if (values) *values = b->ibdiag; 750 PetscFunctionReturn(PETSC_SUCCESS); 751 } 752 if (!b->ibdiag) PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); 753 if (values) *values = b->ibdiag; 754 diag = b->ibdiag; 755 756 PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 757 for (i = 0; i < m; i++) { 758 if (S) { 759 PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar))); 760 } else { 761 PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar))); 762 } 763 if (b->isTI) { 764 aval = 0; 765 for (j = ii[i]; j < ii[i + 1]; j++) 766 if (idx[j] == i) aval = v[j]; 767 for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 768 } else if (T) { 769 aval = 0; 770 for (j = ii[i]; j < ii[i + 1]; j++) 771 if (idx[j] == i) aval = v[j]; 772 for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 773 } 774 PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 775 diag += dof2; 776 } 777 PetscCall(PetscFree2(v_work, v_pivots)); 778 779 b->ibdiagvalid = PETSC_TRUE; 780 PetscFunctionReturn(PETSC_SUCCESS); 781 } 782 783 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) 784 { 785 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 786 787 PetscFunctionBegin; 788 *B = kaij->AIJ; 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 793 { 794 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 795 Mat AIJ, OAIJ, B; 796 PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 797 const PetscInt p = a->p, q = a->q; 798 PetscBool ismpikaij, missing; 799 800 PetscFunctionBegin; 801 if (reuse != MAT_REUSE_MATRIX) { 802 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 803 if (ismpikaij) { 804 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 805 AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 806 OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 807 } else { 808 AIJ = a->AIJ; 809 OAIJ = NULL; 810 } 811 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 812 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 813 PetscCall(MatSetType(B, MATAIJ)); 814 PetscCall(MatGetSize(AIJ, &m, NULL)); 815 PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */ 816 if (!missing || !a->S) d = m; 817 PetscCall(PetscMalloc1(m * p, &d_nnz)); 818 for (i = 0; i < m; ++i) { 819 PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 820 for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 821 PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 822 } 823 if (OAIJ) { 824 PetscCall(PetscMalloc1(m * p, &o_nnz)); 825 for (i = 0; i < m; ++i) { 826 PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 827 for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 828 PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 829 } 830 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 831 } else { 832 PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 833 } 834 PetscCall(PetscFree(d_nnz)); 835 PetscCall(PetscFree(o_nnz)); 836 } else B = *newmat; 837 PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 838 if (reuse == MAT_INPLACE_MATRIX) { 839 PetscCall(MatHeaderReplace(A, &B)); 840 } else *newmat = B; 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 845 { 846 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 847 Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 848 const PetscScalar *aa = a->a, *T = kaij->T, *v; 849 const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 850 const PetscScalar *b, *xb, *idiag; 851 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 852 PetscInt i, j, k, i2, bs, bs2, nz; 853 854 PetscFunctionBegin; 855 its = its * lits; 856 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 857 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 858 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 859 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 860 PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 861 bs = p; 862 bs2 = bs * bs; 863 864 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 865 866 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 867 idiag = kaij->ibdiag; 868 diag = a->diag; 869 870 if (!kaij->sor.setup) { 871 PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 872 kaij->sor.setup = PETSC_TRUE; 873 } 874 y = kaij->sor.y; 875 w = kaij->sor.w; 876 work = kaij->sor.work; 877 t = kaij->sor.t; 878 arr = kaij->sor.arr; 879 880 PetscCall(VecGetArray(xx, &x)); 881 PetscCall(VecGetArrayRead(bb, &b)); 882 883 if (flag & SOR_ZERO_INITIAL_GUESS) { 884 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 885 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 886 PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar))); 887 i2 = bs; 888 idiag += bs2; 889 for (i = 1; i < m; i++) { 890 v = aa + ai[i]; 891 vi = aj + ai[i]; 892 nz = diag[i] - ai[i]; 893 894 if (T) { /* b - T (Arow * x) */ 895 PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar))); 896 for (j = 0; j < nz; j++) { 897 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 898 } 899 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 900 for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 901 } else if (kaij->isTI) { 902 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 903 for (j = 0; j < nz; j++) { 904 for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 905 } 906 } else { 907 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 908 } 909 910 PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 911 for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 912 913 idiag += bs2; 914 i2 += bs; 915 } 916 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 917 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 918 xb = t; 919 } else xb = b; 920 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 921 idiag = kaij->ibdiag + bs2 * (m - 1); 922 i2 = bs * (m - 1); 923 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 924 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 925 i2 -= bs; 926 idiag -= bs2; 927 for (i = m - 2; i >= 0; i--) { 928 v = aa + diag[i] + 1; 929 vi = aj + diag[i] + 1; 930 nz = ai[i + 1] - diag[i] - 1; 931 932 if (T) { /* FIXME: This branch untested */ 933 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 934 /* copy all rows of x that are needed into contiguous space */ 935 workt = work; 936 for (j = 0; j < nz; j++) { 937 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 938 workt += bs; 939 } 940 arrt = arr; 941 for (j = 0; j < nz; j++) { 942 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 943 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 944 arrt += bs2; 945 } 946 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 947 } else if (kaij->isTI) { 948 PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar))); 949 for (j = 0; j < nz; j++) { 950 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 951 } 952 } 953 954 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 955 for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 956 957 idiag -= bs2; 958 i2 -= bs; 959 } 960 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 961 } 962 its--; 963 } 964 while (its--) { /* FIXME: This branch not updated */ 965 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 966 i2 = 0; 967 idiag = kaij->ibdiag; 968 for (i = 0; i < m; i++) { 969 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 970 971 v = aa + ai[i]; 972 vi = aj + ai[i]; 973 nz = diag[i] - ai[i]; 974 workt = work; 975 for (j = 0; j < nz; j++) { 976 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 977 workt += bs; 978 } 979 arrt = arr; 980 if (T) { 981 for (j = 0; j < nz; j++) { 982 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 983 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 984 arrt += bs2; 985 } 986 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 987 } else if (kaij->isTI) { 988 for (j = 0; j < nz; j++) { 989 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 990 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 991 arrt += bs2; 992 } 993 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 994 } 995 PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar))); 996 997 v = aa + diag[i] + 1; 998 vi = aj + diag[i] + 1; 999 nz = ai[i + 1] - diag[i] - 1; 1000 workt = work; 1001 for (j = 0; j < nz; j++) { 1002 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1003 workt += bs; 1004 } 1005 arrt = arr; 1006 if (T) { 1007 for (j = 0; j < nz; j++) { 1008 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1009 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1010 arrt += bs2; 1011 } 1012 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1013 } else if (kaij->isTI) { 1014 for (j = 0; j < nz; j++) { 1015 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1016 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1017 arrt += bs2; 1018 } 1019 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1020 } 1021 1022 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1023 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1024 1025 idiag += bs2; 1026 i2 += bs; 1027 } 1028 xb = t; 1029 } else xb = b; 1030 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1031 idiag = kaij->ibdiag + bs2 * (m - 1); 1032 i2 = bs * (m - 1); 1033 if (xb == b) { 1034 for (i = m - 1; i >= 0; i--) { 1035 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 1036 1037 v = aa + ai[i]; 1038 vi = aj + ai[i]; 1039 nz = diag[i] - ai[i]; 1040 workt = work; 1041 for (j = 0; j < nz; j++) { 1042 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1043 workt += bs; 1044 } 1045 arrt = arr; 1046 if (T) { 1047 for (j = 0; j < nz; j++) { 1048 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1049 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1050 arrt += bs2; 1051 } 1052 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1053 } else if (kaij->isTI) { 1054 for (j = 0; j < nz; j++) { 1055 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1056 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1057 arrt += bs2; 1058 } 1059 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1060 } 1061 1062 v = aa + diag[i] + 1; 1063 vi = aj + diag[i] + 1; 1064 nz = ai[i + 1] - diag[i] - 1; 1065 workt = work; 1066 for (j = 0; j < nz; j++) { 1067 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1068 workt += bs; 1069 } 1070 arrt = arr; 1071 if (T) { 1072 for (j = 0; j < nz; j++) { 1073 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1074 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1075 arrt += bs2; 1076 } 1077 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1078 } else if (kaij->isTI) { 1079 for (j = 0; j < nz; j++) { 1080 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1081 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1082 arrt += bs2; 1083 } 1084 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1085 } 1086 1087 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1088 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1089 } 1090 } else { 1091 for (i = m - 1; i >= 0; i--) { 1092 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 1093 v = aa + diag[i] + 1; 1094 vi = aj + diag[i] + 1; 1095 nz = ai[i + 1] - diag[i] - 1; 1096 workt = work; 1097 for (j = 0; j < nz; j++) { 1098 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1099 workt += bs; 1100 } 1101 arrt = arr; 1102 if (T) { 1103 for (j = 0; j < nz; j++) { 1104 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1105 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1106 arrt += bs2; 1107 } 1108 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1109 } else if (kaij->isTI) { 1110 for (j = 0; j < nz; j++) { 1111 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1112 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1113 arrt += bs2; 1114 } 1115 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1116 } 1117 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1118 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1119 } 1120 } 1121 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 1122 } 1123 } 1124 1125 PetscCall(VecRestoreArray(xx, &x)); 1126 PetscCall(VecRestoreArrayRead(bb, &b)); 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 /*===================================================================================*/ 1131 1132 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1133 { 1134 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1135 1136 PetscFunctionBegin; 1137 if (!yy) { 1138 PetscCall(VecSet(zz, 0.0)); 1139 } else { 1140 PetscCall(VecCopy(yy, zz)); 1141 } 1142 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1143 /* start the scatter */ 1144 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1145 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 1146 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1147 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 1148 PetscFunctionReturn(PETSC_SUCCESS); 1149 } 1150 1151 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) 1152 { 1153 PetscFunctionBegin; 1154 PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy)); 1155 PetscFunctionReturn(PETSC_SUCCESS); 1156 } 1157 1158 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) 1159 { 1160 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1161 1162 PetscFunctionBegin; 1163 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1164 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 1165 PetscFunctionReturn(PETSC_SUCCESS); 1166 } 1167 1168 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1169 { 1170 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 1171 PetscBool diag = PETSC_FALSE; 1172 PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 1173 PetscScalar *vaij, *v, *S = b->S, *T = b->T; 1174 1175 PetscFunctionBegin; 1176 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1177 b->getrowactive = PETSC_TRUE; 1178 PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1179 1180 if ((!S) && (!T) && (!b->isTI)) { 1181 if (ncols) *ncols = 0; 1182 if (cols) *cols = NULL; 1183 if (values) *values = NULL; 1184 PetscFunctionReturn(PETSC_SUCCESS); 1185 } 1186 1187 if (T || b->isTI) { 1188 PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 1189 c = nzaij; 1190 for (i = 0; i < nzaij; i++) { 1191 /* check if this row contains a diagonal entry */ 1192 if (colsaij[i] == r) { 1193 diag = PETSC_TRUE; 1194 c = i; 1195 } 1196 } 1197 } else nzaij = c = 0; 1198 1199 /* calculate size of row */ 1200 nz = 0; 1201 if (S) nz += q; 1202 if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 1203 1204 if (cols || values) { 1205 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1206 for (i = 0; i < q; i++) { 1207 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1208 v[i] = 0.0; 1209 } 1210 if (b->isTI) { 1211 for (i = 0; i < nzaij; i++) { 1212 for (j = 0; j < q; j++) { 1213 idx[i * q + j] = colsaij[i] * q + j; 1214 v[i * q + j] = (j == s ? vaij[i] : 0); 1215 } 1216 } 1217 } else if (T) { 1218 for (i = 0; i < nzaij; i++) { 1219 for (j = 0; j < q; j++) { 1220 idx[i * q + j] = colsaij[i] * q + j; 1221 v[i * q + j] = vaij[i] * T[s + j * p]; 1222 } 1223 } 1224 } 1225 if (S) { 1226 for (j = 0; j < q; j++) { 1227 idx[c * q + j] = r * q + j; 1228 v[c * q + j] += S[s + j * p]; 1229 } 1230 } 1231 } 1232 1233 if (ncols) *ncols = nz; 1234 if (cols) *cols = idx; 1235 if (values) *values = v; 1236 PetscFunctionReturn(PETSC_SUCCESS); 1237 } 1238 1239 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1240 { 1241 PetscFunctionBegin; 1242 PetscCall(PetscFree2(*idx, *v)); 1243 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1244 PetscFunctionReturn(PETSC_SUCCESS); 1245 } 1246 1247 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1248 { 1249 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1250 Mat AIJ = b->A; 1251 PetscBool diag = PETSC_FALSE; 1252 Mat MatAIJ, MatOAIJ; 1253 const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1254 PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 1255 PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 1256 1257 PetscFunctionBegin; 1258 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1259 MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1260 MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 1261 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1262 b->getrowactive = PETSC_TRUE; 1263 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1264 lrow = row - rstart; 1265 1266 if ((!S) && (!T) && (!b->isTI)) { 1267 if (ncols) *ncols = 0; 1268 if (cols) *cols = NULL; 1269 if (values) *values = NULL; 1270 PetscFunctionReturn(PETSC_SUCCESS); 1271 } 1272 1273 r = lrow / p; 1274 s = lrow % p; 1275 1276 if (T || b->isTI) { 1277 PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 1278 PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 1279 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 1280 1281 c = ncolsaij + ncolsoaij; 1282 for (i = 0; i < ncolsaij; i++) { 1283 /* check if this row contains a diagonal entry */ 1284 if (colsaij[i] == r) { 1285 diag = PETSC_TRUE; 1286 c = i; 1287 } 1288 } 1289 } else c = 0; 1290 1291 /* calculate size of row */ 1292 nz = 0; 1293 if (S) nz += q; 1294 if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 1295 1296 if (cols || values) { 1297 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1298 for (i = 0; i < q; i++) { 1299 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1300 v[i] = 0.0; 1301 } 1302 if (b->isTI) { 1303 for (i = 0; i < ncolsaij; i++) { 1304 for (j = 0; j < q; j++) { 1305 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1306 v[i * q + j] = (j == s ? vals[i] : 0.0); 1307 } 1308 } 1309 for (i = 0; i < ncolsoaij; i++) { 1310 for (j = 0; j < q; j++) { 1311 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1312 v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 1313 } 1314 } 1315 } else if (T) { 1316 for (i = 0; i < ncolsaij; i++) { 1317 for (j = 0; j < q; j++) { 1318 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1319 v[i * q + j] = vals[i] * T[s + j * p]; 1320 } 1321 } 1322 for (i = 0; i < ncolsoaij; i++) { 1323 for (j = 0; j < q; j++) { 1324 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1325 v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 1326 } 1327 } 1328 } 1329 if (S) { 1330 for (j = 0; j < q; j++) { 1331 idx[c * q + j] = (r + rstart / p) * q + j; 1332 v[c * q + j] += S[s + j * p]; 1333 } 1334 } 1335 } 1336 1337 if (ncols) *ncols = nz; 1338 if (cols) *cols = idx; 1339 if (values) *values = v; 1340 PetscFunctionReturn(PETSC_SUCCESS); 1341 } 1342 1343 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1344 { 1345 PetscFunctionBegin; 1346 PetscCall(PetscFree2(*idx, *v)); 1347 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1348 PetscFunctionReturn(PETSC_SUCCESS); 1349 } 1350 1351 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1352 { 1353 Mat A; 1354 1355 PetscFunctionBegin; 1356 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1357 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1358 PetscCall(MatDestroy(&A)); 1359 PetscFunctionReturn(PETSC_SUCCESS); 1360 } 1361 1362 /*@C 1363 MatCreateKAIJ - Creates a matrix of type `MATKAIJ`. 1364 1365 Collective 1366 1367 Input Parameters: 1368 + A - the `MATAIJ` matrix 1369 . p - number of rows in `S` and `T` 1370 . q - number of columns in `S` and `T` 1371 . S - the `S` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major) 1372 - T - the `T` matrix (can be `NULL`), stored as a `PetscScalar` array (column-major) 1373 1374 Output Parameter: 1375 . kaij - the new `MATKAIJ` matrix 1376 1377 Level: advanced 1378 1379 Notes: 1380 The created matrix is of the following form\: 1381 .vb 1382 [I \otimes S + A \otimes T] 1383 .ve 1384 where 1385 .vb 1386 S is a dense (p \times q) matrix 1387 T is a dense (p \times q) matrix 1388 A is a `MATAIJ` (n \times n) matrix 1389 I is the identity matrix 1390 .ve 1391 The resulting matrix is (np \times nq) 1392 1393 `S` and `T` are always stored independently on all processes as `PetscScalar` arrays in 1394 column-major format. 1395 1396 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 1397 1398 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 1399 1400 Developer Notes: 1401 In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1402 of the AIJ matrix 'A' that describes the blockwise action of the `MATMPIKAIJ` matrix and, if the object state has changed, lazily 1403 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the `MATMPIKAIJ` matrix. If new types of operations are added, 1404 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1405 1406 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1407 @*/ 1408 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) 1409 { 1410 PetscFunctionBegin; 1411 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 1412 PetscCall(MatSetType(*kaij, MATKAIJ)); 1413 PetscCall(MatKAIJSetAIJ(*kaij, A)); 1414 PetscCall(MatKAIJSetS(*kaij, p, q, S)); 1415 PetscCall(MatKAIJSetT(*kaij, p, q, T)); 1416 PetscCall(MatSetUp(*kaij)); 1417 PetscFunctionReturn(PETSC_SUCCESS); 1418 } 1419 1420 /*MC 1421 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1422 [I \otimes S + A \otimes T], 1423 where 1424 .vb 1425 S is a dense (p \times q) matrix, 1426 T is a dense (p \times q) matrix, 1427 A is an AIJ (n \times n) matrix, 1428 and I is the identity matrix. 1429 .ve 1430 The resulting matrix is (np \times nq). 1431 1432 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1433 1434 Level: advanced 1435 1436 Note: 1437 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1438 where x and b are column vectors containing the row-major representations of X and B. 1439 1440 .seealso: [](ch_matrices), `Mat`, `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1441 M*/ 1442 1443 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1444 { 1445 Mat_MPIKAIJ *b; 1446 PetscMPIInt size; 1447 1448 PetscFunctionBegin; 1449 PetscCall(PetscNew(&b)); 1450 A->data = (void *)b; 1451 1452 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 1453 1454 b->w = NULL; 1455 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1456 if (size == 1) { 1457 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 1458 A->ops->destroy = MatDestroy_SeqKAIJ; 1459 A->ops->mult = MatMult_SeqKAIJ; 1460 A->ops->multadd = MatMultAdd_SeqKAIJ; 1461 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1462 A->ops->getrow = MatGetRow_SeqKAIJ; 1463 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1464 A->ops->sor = MatSOR_SeqKAIJ; 1465 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 1466 } else { 1467 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 1468 A->ops->destroy = MatDestroy_MPIKAIJ; 1469 A->ops->mult = MatMult_MPIKAIJ; 1470 A->ops->multadd = MatMultAdd_MPIKAIJ; 1471 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1472 A->ops->getrow = MatGetRow_MPIKAIJ; 1473 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1474 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 1475 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 1476 } 1477 A->ops->setup = MatSetUp_KAIJ; 1478 A->ops->view = MatView_KAIJ; 1479 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1480 PetscFunctionReturn(PETSC_SUCCESS); 1481 } 1482