1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 32 33 Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 34 35 Input Parameter: 36 . A - the `MATKAIJ` matrix 37 38 Output Parameter: 39 . B - the `MATAIJ` matrix 40 41 Level: advanced 42 43 Note: 44 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 45 46 .seealso: `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ` 47 @*/ 48 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) 49 { 50 PetscBool ismpikaij, isseqkaij; 51 52 PetscFunctionBegin; 53 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 54 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(PETSC_SUCCESS); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the `MATKAIJ` matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the `MATKAIJ` matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: 81 All output parameters are optional (pass NULL if not desired) 82 83 Level: advanced 84 85 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 86 @*/ 87 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) 88 { 89 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 90 PetscFunctionBegin; 91 if (m) *m = b->p; 92 if (n) *n = b->q; 93 if (S) *S = b->S; 94 PetscFunctionReturn(PETSC_SUCCESS); 95 } 96 97 /*@C 98 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the `MATKAIJ` matrix 99 100 Not Collective; the entire S is stored and returned independently on all processes. 101 102 Input Parameter: 103 . A - the `MATKAIJ` matrix 104 105 Output Parameters: 106 + m - the number of rows in S 107 . n - the number of columns in S 108 - S - the S matrix, in form of a scalar array in column-major format 109 110 Note: 111 All output parameters are optional (pass NULL if not desired) 112 113 Level: advanced 114 115 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 116 @*/ 117 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) 118 { 119 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 120 PetscFunctionBegin; 121 if (m) *m = b->p; 122 if (n) *n = b->q; 123 if (S) *S = b->S; 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 /*@C 128 MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()` 129 130 Not collective 131 132 Input Parameter: 133 . A - the `MATKAIJ` matrix 134 135 Output Parameter: 136 . S - location of pointer to array obtained with `MatKAIJGetS()` 137 138 Note: 139 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 140 If NULL is passed, it will not attempt to zero the array pointer. 141 142 Level: advanced 143 144 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 145 @*/ 146 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) 147 { 148 PetscFunctionBegin; 149 if (S) *S = NULL; 150 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 151 PetscFunctionReturn(PETSC_SUCCESS); 152 } 153 154 /*@C 155 MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()` 156 157 Not collective 158 159 Input Parameter: 160 . A - the `MATKAIJ` matrix 161 162 Output Parameter: 163 . S - location of pointer to array obtained with `MatKAIJGetS()` 164 165 Note: 166 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 167 If NULL is passed, it will not attempt to zero the array pointer. 168 169 Level: advanced 170 171 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 172 @*/ 173 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) 174 { 175 PetscFunctionBegin; 176 if (S) *S = NULL; 177 PetscFunctionReturn(PETSC_SUCCESS); 178 } 179 180 /*@C 181 MatKAIJGetT - Get the transformation matrix T associated with the `MATKAIJ` matrix 182 183 Not Collective; the entire T is stored and returned independently on all processes 184 185 Input Parameter: 186 . A - the `MATKAIJ` matrix 187 188 Output Parameters: 189 + m - the number of rows in T 190 . n - the number of columns in T 191 - T - the T matrix, in form of a scalar array in column-major format 192 193 Note: 194 All output parameters are optional (pass NULL if not desired) 195 196 Level: advanced 197 198 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 199 @*/ 200 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) 201 { 202 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 203 PetscFunctionBegin; 204 if (m) *m = b->p; 205 if (n) *n = b->q; 206 if (T) *T = b->T; 207 PetscFunctionReturn(PETSC_SUCCESS); 208 } 209 210 /*@C 211 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the `MATKAIJ` matrix 212 213 Not Collective; the entire T is stored and returned independently on all processes 214 215 Input Parameter: 216 . A - the `MATKAIJ` matrix 217 218 Output Parameters: 219 + m - the number of rows in T 220 . n - the number of columns in T 221 - T - the T matrix, in form of a scalar array in column-major format 222 223 Note: All output parameters are optional (pass NULL if not desired) 224 225 Level: advanced 226 227 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 228 @*/ 229 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) 230 { 231 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 232 PetscFunctionBegin; 233 if (m) *m = b->p; 234 if (n) *n = b->q; 235 if (T) *T = b->T; 236 PetscFunctionReturn(PETSC_SUCCESS); 237 } 238 239 /*@C 240 MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()` 241 242 Not collective 243 244 Input Parameter: 245 . A - the `MATKAIJ` matrix 246 247 Output Parameter: 248 . T - location of pointer to array obtained with `MatKAIJGetS()` 249 250 Note: 251 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 252 If NULL is passed, it will not attempt to zero the array pointer. 253 254 Level: advanced 255 256 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 257 @*/ 258 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) 259 { 260 PetscFunctionBegin; 261 if (T) *T = NULL; 262 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 263 PetscFunctionReturn(PETSC_SUCCESS); 264 } 265 266 /*@C 267 MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()` 268 269 Not collective 270 271 Input Parameter: 272 . A - the `MATKAIJ` matrix 273 274 Output Parameter: 275 . T - location of pointer to array obtained with `MatKAIJGetS()` 276 277 Note: 278 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 279 If NULL is passed, it will not attempt to zero the array pointer. 280 281 Level: advanced 282 283 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 284 @*/ 285 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) 286 { 287 PetscFunctionBegin; 288 if (T) *T = NULL; 289 PetscFunctionReturn(PETSC_SUCCESS); 290 } 291 292 /*@ 293 MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 294 295 Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel 296 297 Input Parameters: 298 + A - the `MATKAIJ` matrix 299 - B - the `MATAIJ` matrix 300 301 Notes: 302 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 303 304 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 305 306 Level: advanced 307 308 .seealso: `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 309 @*/ 310 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) 311 { 312 PetscMPIInt size; 313 PetscBool flg; 314 315 PetscFunctionBegin; 316 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 317 if (size == 1) { 318 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 319 PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 320 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 321 a->AIJ = B; 322 } else { 323 Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 324 a->A = B; 325 } 326 PetscCall(PetscObjectReference((PetscObject)B)); 327 PetscFunctionReturn(PETSC_SUCCESS); 328 } 329 330 /*@C 331 MatKAIJSetS - Set the S matrix describing the shift action of the `MATKAIJ` matrix 332 333 Logically Collective; the entire S is stored independently on all processes. 334 335 Input Parameters: 336 + A - the `MATKAIJ` matrix 337 . p - the number of rows in S 338 . q - the number of columns in S 339 - S - the S matrix, in form of a scalar array in column-major format 340 341 Notes: 342 The dimensions p and q must match those of the transformation matrix T associated with the `MATKAIJ` matrix. 343 344 The S matrix is copied, so the user can destroy this array. 345 346 Level: advanced 347 348 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 349 @*/ 350 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) 351 { 352 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 353 354 PetscFunctionBegin; 355 PetscCall(PetscFree(a->S)); 356 if (S) { 357 PetscCall(PetscMalloc1(p * q, &a->S)); 358 PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar))); 359 } else a->S = NULL; 360 361 a->p = p; 362 a->q = q; 363 PetscFunctionReturn(PETSC_SUCCESS); 364 } 365 366 /*@C 367 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 368 369 Logically Collective. 370 371 Input Parameter: 372 . A - the `MATKAIJ` matrix 373 374 Output Parameter: 375 . identity - the Boolean value 376 377 Level: Advanced 378 379 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()` 380 @*/ 381 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) 382 { 383 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 384 PetscInt i, j; 385 386 PetscFunctionBegin; 387 if (a->p != a->q) { 388 *identity = PETSC_FALSE; 389 PetscFunctionReturn(PETSC_SUCCESS); 390 } else *identity = PETSC_TRUE; 391 if (!a->isTI || a->S) { 392 for (i = 0; i < a->p && *identity; i++) { 393 for (j = 0; j < a->p && *identity; j++) { 394 if (i != j) { 395 if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 396 if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 397 } else { 398 if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 399 if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 400 } 401 } 402 } 403 } 404 PetscFunctionReturn(PETSC_SUCCESS); 405 } 406 407 /*@C 408 MatKAIJSetT - Set the transformation matrix T associated with the `MATKAIJ` matrix 409 410 Logically Collective; the entire T is stored independently on all processes. 411 412 Input Parameters: 413 + A - the `MATKAIJ` matrix 414 . p - the number of rows in S 415 . q - the number of columns in S 416 - T - the T matrix, in form of a scalar array in column-major format 417 418 Notes: 419 The dimensions p and q must match those of the shift matrix S associated with the `MATKAIJ` matrix. 420 421 The T matrix is copied, so the user can destroy this array. 422 423 Level: Advanced 424 425 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 426 @*/ 427 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) 428 { 429 PetscInt i, j; 430 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 431 PetscBool isTI = PETSC_FALSE; 432 433 PetscFunctionBegin; 434 /* check if T is an identity matrix */ 435 if (T && (p == q)) { 436 isTI = PETSC_TRUE; 437 for (i = 0; i < p; i++) { 438 for (j = 0; j < q; j++) { 439 if (i == j) { 440 /* diagonal term must be 1 */ 441 if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 442 } else { 443 /* off-diagonal term must be 0 */ 444 if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 445 } 446 } 447 } 448 } 449 a->isTI = isTI; 450 451 PetscCall(PetscFree(a->T)); 452 if (T && (!isTI)) { 453 PetscCall(PetscMalloc1(p * q, &a->T)); 454 PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar))); 455 } else a->T = NULL; 456 457 a->p = p; 458 a->q = q; 459 PetscFunctionReturn(PETSC_SUCCESS); 460 } 461 462 static PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 463 { 464 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 465 466 PetscFunctionBegin; 467 PetscCall(MatDestroy(&b->AIJ)); 468 PetscCall(PetscFree(b->S)); 469 PetscCall(PetscFree(b->T)); 470 PetscCall(PetscFree(b->ibdiag)); 471 PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 472 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 473 PetscCall(PetscFree(A->data)); 474 PetscFunctionReturn(PETSC_SUCCESS); 475 } 476 477 static PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 478 { 479 Mat_MPIKAIJ *a; 480 Mat_MPIAIJ *mpiaij; 481 PetscScalar *T; 482 PetscInt i, j; 483 PetscObjectState state; 484 485 PetscFunctionBegin; 486 a = (Mat_MPIKAIJ *)A->data; 487 mpiaij = (Mat_MPIAIJ *)a->A->data; 488 489 PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 490 if (state == a->state) { 491 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 492 PetscFunctionReturn(PETSC_SUCCESS); 493 } else { 494 PetscCall(MatDestroy(&a->AIJ)); 495 PetscCall(MatDestroy(&a->OAIJ)); 496 if (a->isTI) { 497 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 498 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 499 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 500 * to pass in. */ 501 PetscCall(PetscMalloc1(a->p * a->q, &T)); 502 for (i = 0; i < a->p; i++) { 503 for (j = 0; j < a->q; j++) { 504 if (i == j) T[i + j * a->p] = 1.0; 505 else T[i + j * a->p] = 0.0; 506 } 507 } 508 } else T = a->T; 509 PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 510 PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 511 if (a->isTI) PetscCall(PetscFree(T)); 512 a->state = state; 513 } 514 515 PetscFunctionReturn(PETSC_SUCCESS); 516 } 517 518 static PetscErrorCode MatSetUp_KAIJ(Mat A) 519 { 520 PetscInt n; 521 PetscMPIInt size; 522 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 523 524 PetscFunctionBegin; 525 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 526 if (size == 1) { 527 PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 528 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 529 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 530 PetscCall(PetscLayoutSetUp(A->rmap)); 531 PetscCall(PetscLayoutSetUp(A->cmap)); 532 } else { 533 Mat_MPIKAIJ *a; 534 Mat_MPIAIJ *mpiaij; 535 IS from, to; 536 Vec gvec; 537 538 a = (Mat_MPIKAIJ *)A->data; 539 mpiaij = (Mat_MPIAIJ *)a->A->data; 540 PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 541 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 542 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 543 PetscCall(PetscLayoutSetUp(A->rmap)); 544 PetscCall(PetscLayoutSetUp(A->cmap)); 545 546 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 547 548 PetscCall(VecGetSize(mpiaij->lvec, &n)); 549 PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 550 PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 551 PetscCall(VecSetBlockSize(a->w, a->q)); 552 PetscCall(VecSetType(a->w, VECSEQ)); 553 554 /* create two temporary Index sets for build scatter gather */ 555 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 556 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 557 558 /* create temporary global vector to generate scatter context */ 559 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 560 561 /* generate the scatter context */ 562 PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 563 564 PetscCall(ISDestroy(&from)); 565 PetscCall(ISDestroy(&to)); 566 PetscCall(VecDestroy(&gvec)); 567 } 568 569 A->assembled = PETSC_TRUE; 570 PetscFunctionReturn(PETSC_SUCCESS); 571 } 572 573 static PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) 574 { 575 PetscViewerFormat format; 576 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 577 Mat B; 578 PetscInt i; 579 PetscBool ismpikaij; 580 581 PetscFunctionBegin; 582 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 583 PetscCall(PetscViewerGetFormat(viewer, &format)); 584 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 585 PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 586 587 /* Print appropriate details for S. */ 588 if (!a->S) { 589 PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 590 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 591 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 592 for (i = 0; i < (a->p * a->q); i++) { 593 #if defined(PETSC_USE_COMPLEX) 594 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 595 #else 596 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 597 #endif 598 } 599 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 600 } 601 602 /* Print appropriate details for T. */ 603 if (a->isTI) { 604 PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 605 } else if (!a->T) { 606 PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 607 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 608 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 609 for (i = 0; i < (a->p * a->q); i++) { 610 #if defined(PETSC_USE_COMPLEX) 611 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 612 #else 613 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 614 #endif 615 } 616 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 617 } 618 619 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 620 PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 621 if (ismpikaij) { 622 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 623 PetscCall(MatView(b->A, viewer)); 624 } else { 625 PetscCall(MatView(a->AIJ, viewer)); 626 } 627 628 } else { 629 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 630 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 631 PetscCall(MatView(B, viewer)); 632 PetscCall(MatDestroy(&B)); 633 } 634 PetscFunctionReturn(PETSC_SUCCESS); 635 } 636 637 static PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 638 { 639 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 640 641 PetscFunctionBegin; 642 PetscCall(MatDestroy(&b->AIJ)); 643 PetscCall(MatDestroy(&b->OAIJ)); 644 PetscCall(MatDestroy(&b->A)); 645 PetscCall(VecScatterDestroy(&b->ctx)); 646 PetscCall(VecDestroy(&b->w)); 647 PetscCall(PetscFree(b->S)); 648 PetscCall(PetscFree(b->T)); 649 PetscCall(PetscFree(b->ibdiag)); 650 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 651 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 652 PetscCall(PetscFree(A->data)); 653 PetscFunctionReturn(PETSC_SUCCESS); 654 } 655 656 /* --------------------------------------------------------------------------------------*/ 657 658 /* zz = yy + Axx */ 659 static PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 660 { 661 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 662 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 663 const PetscScalar *s = b->S, *t = b->T; 664 const PetscScalar *x, *v, *bx; 665 PetscScalar *y, *sums; 666 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 667 PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 668 669 PetscFunctionBegin; 670 if (!yy) { 671 PetscCall(VecSet(zz, 0.0)); 672 } else { 673 PetscCall(VecCopy(yy, zz)); 674 } 675 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(PETSC_SUCCESS); 676 677 PetscCall(VecGetArrayRead(xx, &x)); 678 PetscCall(VecGetArray(zz, &y)); 679 idx = a->j; 680 v = a->a; 681 ii = a->i; 682 683 if (b->isTI) { 684 for (i = 0; i < m; i++) { 685 jrow = ii[i]; 686 n = ii[i + 1] - jrow; 687 sums = y + p * i; 688 for (j = 0; j < n; j++) { 689 for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; 690 } 691 } 692 PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 693 } else if (t) { 694 for (i = 0; i < m; i++) { 695 jrow = ii[i]; 696 n = ii[i + 1] - jrow; 697 sums = y + p * i; 698 for (j = 0; j < n; j++) { 699 for (k = 0; k < p; k++) { 700 for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; 701 } 702 } 703 } 704 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 705 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 706 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 707 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 708 PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 709 } 710 if (s) { 711 for (i = 0; i < m; i++) { 712 sums = y + p * i; 713 bx = x + q * i; 714 if (i < b->AIJ->cmap->n) { 715 for (j = 0; j < q; j++) { 716 for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j]; 717 } 718 } 719 } 720 PetscCall(PetscLogFlops(2.0 * m * p * q)); 721 } 722 723 PetscCall(VecRestoreArrayRead(xx, &x)); 724 PetscCall(VecRestoreArray(zz, &y)); 725 PetscFunctionReturn(PETSC_SUCCESS); 726 } 727 728 static PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) 729 { 730 PetscFunctionBegin; 731 PetscCall(MatMultAdd_SeqKAIJ(A, xx, NULL, yy)); 732 PetscFunctionReturn(PETSC_SUCCESS); 733 } 734 735 #include <petsc/private/kernels/blockinvert.h> 736 737 static PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) 738 { 739 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 740 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 741 const PetscScalar *S = b->S; 742 const PetscScalar *T = b->T; 743 const PetscScalar *v = a->a; 744 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 745 PetscInt i, j, *v_pivots, dof, dof2; 746 PetscScalar *diag, aval, *v_work; 747 748 PetscFunctionBegin; 749 PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 750 PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 751 752 dof = p; 753 dof2 = dof * dof; 754 755 if (b->ibdiagvalid) { 756 if (values) *values = b->ibdiag; 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 if (!b->ibdiag) { PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); } 760 if (values) *values = b->ibdiag; 761 diag = b->ibdiag; 762 763 PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 764 for (i = 0; i < m; i++) { 765 if (S) { 766 PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar))); 767 } else { 768 PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar))); 769 } 770 if (b->isTI) { 771 aval = 0; 772 for (j = ii[i]; j < ii[i + 1]; j++) 773 if (idx[j] == i) aval = v[j]; 774 for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 775 } else if (T) { 776 aval = 0; 777 for (j = ii[i]; j < ii[i + 1]; j++) 778 if (idx[j] == i) aval = v[j]; 779 for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 780 } 781 PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 782 diag += dof2; 783 } 784 PetscCall(PetscFree2(v_work, v_pivots)); 785 786 b->ibdiagvalid = PETSC_TRUE; 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) 791 { 792 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 793 794 PetscFunctionBegin; 795 *B = kaij->AIJ; 796 PetscFunctionReturn(PETSC_SUCCESS); 797 } 798 799 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 800 { 801 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 802 Mat AIJ, OAIJ, B; 803 PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 804 const PetscInt p = a->p, q = a->q; 805 PetscBool ismpikaij, missing; 806 807 PetscFunctionBegin; 808 if (reuse != MAT_REUSE_MATRIX) { 809 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 810 if (ismpikaij) { 811 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 812 AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 813 OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 814 } else { 815 AIJ = a->AIJ; 816 OAIJ = NULL; 817 } 818 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 819 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 820 PetscCall(MatSetType(B, MATAIJ)); 821 PetscCall(MatGetSize(AIJ, &m, NULL)); 822 PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */ 823 if (!missing || !a->S) d = m; 824 PetscCall(PetscMalloc1(m * p, &d_nnz)); 825 for (i = 0; i < m; ++i) { 826 PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 827 for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 828 PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 829 } 830 if (OAIJ) { 831 PetscCall(PetscMalloc1(m * p, &o_nnz)); 832 for (i = 0; i < m; ++i) { 833 PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 834 for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 835 PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 836 } 837 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 838 } else { 839 PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 840 } 841 PetscCall(PetscFree(d_nnz)); 842 PetscCall(PetscFree(o_nnz)); 843 } else B = *newmat; 844 PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 845 if (reuse == MAT_INPLACE_MATRIX) { 846 PetscCall(MatHeaderReplace(A, &B)); 847 } else *newmat = B; 848 PetscFunctionReturn(PETSC_SUCCESS); 849 } 850 851 static PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 852 { 853 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 854 Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 855 const PetscScalar *aa = a->a, *T = kaij->T, *v; 856 const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 857 const PetscScalar *b, *xb, *idiag; 858 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 859 PetscInt i, j, k, i2, bs, bs2, nz; 860 861 PetscFunctionBegin; 862 its = its * lits; 863 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 864 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 865 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 866 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 867 PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 868 bs = p; 869 bs2 = bs * bs; 870 871 if (!m) PetscFunctionReturn(PETSC_SUCCESS); 872 873 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 874 idiag = kaij->ibdiag; 875 diag = a->diag; 876 877 if (!kaij->sor.setup) { 878 PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 879 kaij->sor.setup = PETSC_TRUE; 880 } 881 y = kaij->sor.y; 882 w = kaij->sor.w; 883 work = kaij->sor.work; 884 t = kaij->sor.t; 885 arr = kaij->sor.arr; 886 887 PetscCall(VecGetArray(xx, &x)); 888 PetscCall(VecGetArrayRead(bb, &b)); 889 890 if (flag & SOR_ZERO_INITIAL_GUESS) { 891 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 892 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 893 PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar))); 894 i2 = bs; 895 idiag += bs2; 896 for (i = 1; i < m; i++) { 897 v = aa + ai[i]; 898 vi = aj + ai[i]; 899 nz = diag[i] - ai[i]; 900 901 if (T) { /* b - T (Arow * x) */ 902 PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar))); 903 for (j = 0; j < nz; j++) { 904 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 905 } 906 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 907 for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 908 } else if (kaij->isTI) { 909 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 910 for (j = 0; j < nz; j++) { 911 for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 912 } 913 } else { 914 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 915 } 916 917 PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 918 for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 919 920 idiag += bs2; 921 i2 += bs; 922 } 923 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 924 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 925 xb = t; 926 } else xb = b; 927 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 928 idiag = kaij->ibdiag + bs2 * (m - 1); 929 i2 = bs * (m - 1); 930 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 931 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 932 i2 -= bs; 933 idiag -= bs2; 934 for (i = m - 2; i >= 0; i--) { 935 v = aa + diag[i] + 1; 936 vi = aj + diag[i] + 1; 937 nz = ai[i + 1] - diag[i] - 1; 938 939 if (T) { /* FIXME: This branch untested */ 940 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 941 /* copy all rows of x that are needed into contiguous space */ 942 workt = work; 943 for (j = 0; j < nz; j++) { 944 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 945 workt += bs; 946 } 947 arrt = arr; 948 for (j = 0; j < nz; j++) { 949 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 950 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 951 arrt += bs2; 952 } 953 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 954 } else if (kaij->isTI) { 955 PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar))); 956 for (j = 0; j < nz; j++) { 957 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 958 } 959 } 960 961 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 962 for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 963 964 idiag -= bs2; 965 i2 -= bs; 966 } 967 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 968 } 969 its--; 970 } 971 while (its--) { /* FIXME: This branch not updated */ 972 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 973 i2 = 0; 974 idiag = kaij->ibdiag; 975 for (i = 0; i < m; i++) { 976 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 977 978 v = aa + ai[i]; 979 vi = aj + ai[i]; 980 nz = diag[i] - ai[i]; 981 workt = work; 982 for (j = 0; j < nz; j++) { 983 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 984 workt += bs; 985 } 986 arrt = arr; 987 if (T) { 988 for (j = 0; j < nz; j++) { 989 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 990 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 991 arrt += bs2; 992 } 993 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 994 } else if (kaij->isTI) { 995 for (j = 0; j < nz; j++) { 996 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 997 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 998 arrt += bs2; 999 } 1000 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1001 } 1002 PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar))); 1003 1004 v = aa + diag[i] + 1; 1005 vi = aj + diag[i] + 1; 1006 nz = ai[i + 1] - diag[i] - 1; 1007 workt = work; 1008 for (j = 0; j < nz; j++) { 1009 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1010 workt += bs; 1011 } 1012 arrt = arr; 1013 if (T) { 1014 for (j = 0; j < nz; j++) { 1015 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1016 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1017 arrt += bs2; 1018 } 1019 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1020 } else if (kaij->isTI) { 1021 for (j = 0; j < nz; j++) { 1022 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1023 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1024 arrt += bs2; 1025 } 1026 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1027 } 1028 1029 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1030 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1031 1032 idiag += bs2; 1033 i2 += bs; 1034 } 1035 xb = t; 1036 } else xb = b; 1037 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1038 idiag = kaij->ibdiag + bs2 * (m - 1); 1039 i2 = bs * (m - 1); 1040 if (xb == b) { 1041 for (i = m - 1; i >= 0; i--) { 1042 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 1043 1044 v = aa + ai[i]; 1045 vi = aj + ai[i]; 1046 nz = diag[i] - ai[i]; 1047 workt = work; 1048 for (j = 0; j < nz; j++) { 1049 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1050 workt += bs; 1051 } 1052 arrt = arr; 1053 if (T) { 1054 for (j = 0; j < nz; j++) { 1055 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1056 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1057 arrt += bs2; 1058 } 1059 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1060 } else if (kaij->isTI) { 1061 for (j = 0; j < nz; j++) { 1062 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1063 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1064 arrt += bs2; 1065 } 1066 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1067 } 1068 1069 v = aa + diag[i] + 1; 1070 vi = aj + diag[i] + 1; 1071 nz = ai[i + 1] - diag[i] - 1; 1072 workt = work; 1073 for (j = 0; j < nz; j++) { 1074 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1075 workt += bs; 1076 } 1077 arrt = arr; 1078 if (T) { 1079 for (j = 0; j < nz; j++) { 1080 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1081 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1082 arrt += bs2; 1083 } 1084 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1085 } else if (kaij->isTI) { 1086 for (j = 0; j < nz; j++) { 1087 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1088 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1089 arrt += bs2; 1090 } 1091 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1092 } 1093 1094 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1095 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1096 } 1097 } else { 1098 for (i = m - 1; i >= 0; i--) { 1099 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 1100 v = aa + diag[i] + 1; 1101 vi = aj + diag[i] + 1; 1102 nz = ai[i + 1] - diag[i] - 1; 1103 workt = work; 1104 for (j = 0; j < nz; j++) { 1105 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1106 workt += bs; 1107 } 1108 arrt = arr; 1109 if (T) { 1110 for (j = 0; j < nz; j++) { 1111 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1112 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1113 arrt += bs2; 1114 } 1115 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1116 } else if (kaij->isTI) { 1117 for (j = 0; j < nz; j++) { 1118 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1119 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1120 arrt += bs2; 1121 } 1122 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1123 } 1124 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1125 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1126 } 1127 } 1128 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 1129 } 1130 } 1131 1132 PetscCall(VecRestoreArray(xx, &x)); 1133 PetscCall(VecRestoreArrayRead(bb, &b)); 1134 PetscFunctionReturn(PETSC_SUCCESS); 1135 } 1136 1137 /*===================================================================================*/ 1138 1139 static PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1140 { 1141 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1142 1143 PetscFunctionBegin; 1144 if (!yy) { 1145 PetscCall(VecSet(zz, 0.0)); 1146 } else { 1147 PetscCall(VecCopy(yy, zz)); 1148 } 1149 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1150 /* start the scatter */ 1151 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1152 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 1153 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1154 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 1155 PetscFunctionReturn(PETSC_SUCCESS); 1156 } 1157 1158 static PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) 1159 { 1160 PetscFunctionBegin; 1161 PetscCall(MatMultAdd_MPIKAIJ(A, xx, NULL, yy)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) 1166 { 1167 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1168 1169 PetscFunctionBegin; 1170 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1171 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 /* ----------------------------------------------------------------*/ 1176 1177 static PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1178 { 1179 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 1180 PetscBool diag = PETSC_FALSE; 1181 PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 1182 PetscScalar *vaij, *v, *S = b->S, *T = b->T; 1183 1184 PetscFunctionBegin; 1185 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1186 b->getrowactive = PETSC_TRUE; 1187 PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1188 1189 if ((!S) && (!T) && (!b->isTI)) { 1190 if (ncols) *ncols = 0; 1191 if (cols) *cols = NULL; 1192 if (values) *values = NULL; 1193 PetscFunctionReturn(PETSC_SUCCESS); 1194 } 1195 1196 if (T || b->isTI) { 1197 PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 1198 c = nzaij; 1199 for (i = 0; i < nzaij; i++) { 1200 /* check if this row contains a diagonal entry */ 1201 if (colsaij[i] == r) { 1202 diag = PETSC_TRUE; 1203 c = i; 1204 } 1205 } 1206 } else nzaij = c = 0; 1207 1208 /* calculate size of row */ 1209 nz = 0; 1210 if (S) nz += q; 1211 if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 1212 1213 if (cols || values) { 1214 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1215 for (i = 0; i < q; i++) { 1216 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1217 v[i] = 0.0; 1218 } 1219 if (b->isTI) { 1220 for (i = 0; i < nzaij; i++) { 1221 for (j = 0; j < q; j++) { 1222 idx[i * q + j] = colsaij[i] * q + j; 1223 v[i * q + j] = (j == s ? vaij[i] : 0); 1224 } 1225 } 1226 } else if (T) { 1227 for (i = 0; i < nzaij; i++) { 1228 for (j = 0; j < q; j++) { 1229 idx[i * q + j] = colsaij[i] * q + j; 1230 v[i * q + j] = vaij[i] * T[s + j * p]; 1231 } 1232 } 1233 } 1234 if (S) { 1235 for (j = 0; j < q; j++) { 1236 idx[c * q + j] = r * q + j; 1237 v[c * q + j] += S[s + j * p]; 1238 } 1239 } 1240 } 1241 1242 if (ncols) *ncols = nz; 1243 if (cols) *cols = idx; 1244 if (values) *values = v; 1245 PetscFunctionReturn(PETSC_SUCCESS); 1246 } 1247 1248 static PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1249 { 1250 PetscFunctionBegin; 1251 if (nz) *nz = 0; 1252 PetscCall(PetscFree2(*idx, *v)); 1253 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1254 PetscFunctionReturn(PETSC_SUCCESS); 1255 } 1256 1257 static PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) 1258 { 1259 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1260 Mat AIJ = b->A; 1261 PetscBool diag = PETSC_FALSE; 1262 Mat MatAIJ, MatOAIJ; 1263 const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1264 PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 1265 PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 1266 1267 PetscFunctionBegin; 1268 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1269 MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1270 MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 1271 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1272 b->getrowactive = PETSC_TRUE; 1273 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1274 lrow = row - rstart; 1275 1276 if ((!S) && (!T) && (!b->isTI)) { 1277 if (ncols) *ncols = 0; 1278 if (cols) *cols = NULL; 1279 if (values) *values = NULL; 1280 PetscFunctionReturn(PETSC_SUCCESS); 1281 } 1282 1283 r = lrow / p; 1284 s = lrow % p; 1285 1286 if (T || b->isTI) { 1287 PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 1288 PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 1289 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 1290 1291 c = ncolsaij + ncolsoaij; 1292 for (i = 0; i < ncolsaij; i++) { 1293 /* check if this row contains a diagonal entry */ 1294 if (colsaij[i] == r) { 1295 diag = PETSC_TRUE; 1296 c = i; 1297 } 1298 } 1299 } else c = 0; 1300 1301 /* calculate size of row */ 1302 nz = 0; 1303 if (S) nz += q; 1304 if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 1305 1306 if (cols || values) { 1307 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1308 for (i = 0; i < q; i++) { 1309 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1310 v[i] = 0.0; 1311 } 1312 if (b->isTI) { 1313 for (i = 0; i < ncolsaij; i++) { 1314 for (j = 0; j < q; j++) { 1315 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1316 v[i * q + j] = (j == s ? vals[i] : 0.0); 1317 } 1318 } 1319 for (i = 0; i < ncolsoaij; i++) { 1320 for (j = 0; j < q; j++) { 1321 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1322 v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 1323 } 1324 } 1325 } else if (T) { 1326 for (i = 0; i < ncolsaij; i++) { 1327 for (j = 0; j < q; j++) { 1328 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1329 v[i * q + j] = vals[i] * T[s + j * p]; 1330 } 1331 } 1332 for (i = 0; i < ncolsoaij; i++) { 1333 for (j = 0; j < q; j++) { 1334 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1335 v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 1336 } 1337 } 1338 } 1339 if (S) { 1340 for (j = 0; j < q; j++) { 1341 idx[c * q + j] = (r + rstart / p) * q + j; 1342 v[c * q + j] += S[s + j * p]; 1343 } 1344 } 1345 } 1346 1347 if (ncols) *ncols = nz; 1348 if (cols) *cols = idx; 1349 if (values) *values = v; 1350 PetscFunctionReturn(PETSC_SUCCESS); 1351 } 1352 1353 static PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1354 { 1355 PetscFunctionBegin; 1356 PetscCall(PetscFree2(*idx, *v)); 1357 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1358 PetscFunctionReturn(PETSC_SUCCESS); 1359 } 1360 1361 static PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) 1362 { 1363 Mat A; 1364 1365 PetscFunctionBegin; 1366 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1367 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1368 PetscCall(MatDestroy(&A)); 1369 PetscFunctionReturn(PETSC_SUCCESS); 1370 } 1371 1372 /* ---------------------------------------------------------------------------------- */ 1373 /*@C 1374 MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form: 1375 1376 [I \otimes S + A \otimes T] 1377 1378 where 1379 S is a dense (p \times q) matrix 1380 T is a dense (p \times q) matrix 1381 A is an `MATAIJ` (n \times n) matrix 1382 I is the identity matrix 1383 The resulting matrix is (np \times nq) 1384 1385 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1386 1387 Collective 1388 1389 Input Parameters: 1390 + A - the `MATAIJ` matrix 1391 . p - number of rows in S and T 1392 . q - number of columns in S and T 1393 . S - the S matrix (can be NULL), stored as a `PetscScalar` array (column-major) 1394 - T - the T matrix (can be NULL), stored as a `PetscScalar` array (column-major) 1395 1396 Output Parameter: 1397 . kaij - the new `MATKAIJ` matrix 1398 1399 Notes: 1400 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 1401 1402 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 1403 1404 Developer Note: 1405 In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1406 of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1407 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1408 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1409 1410 Level: advanced 1411 1412 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1413 @*/ 1414 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) 1415 { 1416 PetscFunctionBegin; 1417 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 1418 PetscCall(MatSetType(*kaij, MATKAIJ)); 1419 PetscCall(MatKAIJSetAIJ(*kaij, A)); 1420 PetscCall(MatKAIJSetS(*kaij, p, q, S)); 1421 PetscCall(MatKAIJSetT(*kaij, p, q, T)); 1422 PetscCall(MatSetUp(*kaij)); 1423 PetscFunctionReturn(PETSC_SUCCESS); 1424 } 1425 1426 /*MC 1427 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1428 [I \otimes S + A \otimes T], 1429 where 1430 S is a dense (p \times q) matrix, 1431 T is a dense (p \times q) matrix, 1432 A is an AIJ (n \times n) matrix, 1433 and I is the identity matrix. 1434 The resulting matrix is (np \times nq). 1435 1436 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1437 1438 Note: 1439 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1440 where x and b are column vectors containing the row-major representations of X and B. 1441 1442 Level: advanced 1443 1444 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1445 M*/ 1446 1447 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1448 { 1449 Mat_MPIKAIJ *b; 1450 PetscMPIInt size; 1451 1452 PetscFunctionBegin; 1453 PetscCall(PetscNew(&b)); 1454 A->data = (void *)b; 1455 1456 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 1457 1458 b->w = NULL; 1459 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1460 if (size == 1) { 1461 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 1462 A->ops->destroy = MatDestroy_SeqKAIJ; 1463 A->ops->mult = MatMult_SeqKAIJ; 1464 A->ops->multadd = MatMultAdd_SeqKAIJ; 1465 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1466 A->ops->getrow = MatGetRow_SeqKAIJ; 1467 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1468 A->ops->sor = MatSOR_SeqKAIJ; 1469 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 1470 } else { 1471 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 1472 A->ops->destroy = MatDestroy_MPIKAIJ; 1473 A->ops->mult = MatMult_MPIKAIJ; 1474 A->ops->multadd = MatMultAdd_MPIKAIJ; 1475 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1476 A->ops->getrow = MatGetRow_MPIKAIJ; 1477 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1478 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 1479 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 1480 } 1481 A->ops->setup = MatSetUp_KAIJ; 1482 A->ops->view = MatView_KAIJ; 1483 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1484 PetscFunctionReturn(PETSC_SUCCESS); 1485 } 1486