1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 32 33 Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 34 35 Input Parameter: 36 . A - the `MATKAIJ` matrix 37 38 Output Parameter: 39 . B - the `MATAIJ` matrix 40 41 Level: advanced 42 43 Note: 44 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 45 46 .seealso: `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ` 47 @*/ 48 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) { 49 PetscBool ismpikaij, isseqkaij; 50 51 PetscFunctionBegin; 52 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 53 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 54 if (ismpikaij) { 55 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 56 57 *B = b->A; 58 } else if (isseqkaij) { 59 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 60 61 *B = b->AIJ; 62 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 63 PetscFunctionReturn(0); 64 } 65 66 /*@C 67 MatKAIJGetS - Get the S matrix describing the shift action of the `MATKAIJ` matrix 68 69 Not Collective; the entire S is stored and returned independently on all processes. 70 71 Input Parameter: 72 . A - the `MATKAIJ` matrix 73 74 Output Parameters: 75 + m - the number of rows in S 76 . n - the number of columns in S 77 - S - the S matrix, in form of a scalar array in column-major format 78 79 Note: 80 All output parameters are optional (pass NULL if not desired) 81 82 Level: advanced 83 84 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) { 87 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 88 PetscFunctionBegin; 89 if (m) *m = b->p; 90 if (n) *n = b->q; 91 if (S) *S = b->S; 92 PetscFunctionReturn(0); 93 } 94 95 /*@C 96 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the `MATKAIJ` matrix 97 98 Not Collective; the entire S is stored and returned independently on all processes. 99 100 Input Parameter: 101 . A - the `MATKAIJ` matrix 102 103 Output Parameters: 104 + m - the number of rows in S 105 . n - the number of columns in S 106 - S - the S matrix, in form of a scalar array in column-major format 107 108 Note: 109 All output parameters are optional (pass NULL if not desired) 110 111 Level: advanced 112 113 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) { 116 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 117 PetscFunctionBegin; 118 if (m) *m = b->p; 119 if (n) *n = b->q; 120 if (S) *S = b->S; 121 PetscFunctionReturn(0); 122 } 123 124 /*@C 125 MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()` 126 127 Not collective 128 129 Input Parameter: 130 . A - the `MATKAIJ` matrix 131 132 Output Parameter: 133 . S - location of pointer to array obtained with `MatKAIJGetS()` 134 135 Note: 136 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 141 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 142 @*/ 143 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) { 144 PetscFunctionBegin; 145 if (S) *S = NULL; 146 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 147 PetscFunctionReturn(0); 148 } 149 150 /*@C 151 MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()` 152 153 Not collective 154 155 Input Parameter: 156 . A - the `MATKAIJ` matrix 157 158 Output Parameter: 159 . S - location of pointer to array obtained with `MatKAIJGetS()` 160 161 Note: 162 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 163 If NULL is passed, it will not attempt to zero the array pointer. 164 165 Level: advanced 166 167 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 168 @*/ 169 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) { 170 PetscFunctionBegin; 171 if (S) *S = NULL; 172 PetscFunctionReturn(0); 173 } 174 175 /*@C 176 MatKAIJGetT - Get the transformation matrix T associated with the `MATKAIJ` matrix 177 178 Not Collective; the entire T is stored and returned independently on all processes 179 180 Input Parameter: 181 . A - the `MATKAIJ` matrix 182 183 Output Parameters: 184 + m - the number of rows in T 185 . n - the number of columns in T 186 - T - the T matrix, in form of a scalar array in column-major format 187 188 Note: 189 All output parameters are optional (pass NULL if not desired) 190 191 Level: advanced 192 193 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 194 @*/ 195 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) { 196 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 197 PetscFunctionBegin; 198 if (m) *m = b->p; 199 if (n) *n = b->q; 200 if (T) *T = b->T; 201 PetscFunctionReturn(0); 202 } 203 204 /*@C 205 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the `MATKAIJ` matrix 206 207 Not Collective; the entire T is stored and returned independently on all processes 208 209 Input Parameter: 210 . A - the `MATKAIJ` matrix 211 212 Output Parameters: 213 + m - the number of rows in T 214 . n - the number of columns in T 215 - T - the T matrix, in form of a scalar array in column-major format 216 217 Note: All output parameters are optional (pass NULL if not desired) 218 219 Level: advanced 220 221 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 222 @*/ 223 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) { 224 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 225 PetscFunctionBegin; 226 if (m) *m = b->p; 227 if (n) *n = b->q; 228 if (T) *T = b->T; 229 PetscFunctionReturn(0); 230 } 231 232 /*@C 233 MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()` 234 235 Not collective 236 237 Input Parameter: 238 . A - the `MATKAIJ` matrix 239 240 Output Parameter: 241 . T - location of pointer to array obtained with `MatKAIJGetS()` 242 243 Note: 244 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 245 If NULL is passed, it will not attempt to zero the array pointer. 246 247 Level: advanced 248 249 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 250 @*/ 251 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) { 252 PetscFunctionBegin; 253 if (T) *T = NULL; 254 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 255 PetscFunctionReturn(0); 256 } 257 258 /*@C 259 MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()` 260 261 Not collective 262 263 Input Parameter: 264 . A - the `MATKAIJ` matrix 265 266 Output Parameter: 267 . T - location of pointer to array obtained with `MatKAIJGetS()` 268 269 Note: 270 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 271 If NULL is passed, it will not attempt to zero the array pointer. 272 273 Level: advanced 274 275 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 276 @*/ 277 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) { 278 PetscFunctionBegin; 279 if (T) *T = NULL; 280 PetscFunctionReturn(0); 281 } 282 283 /*@ 284 MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 285 286 Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel 287 288 Input Parameters: 289 + A - the `MATKAIJ` matrix 290 - B - the `MATAIJ` matrix 291 292 Notes: 293 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 294 295 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 296 297 Level: advanced 298 299 .seealso: `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 300 @*/ 301 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) { 302 PetscMPIInt size; 303 PetscBool flg; 304 305 PetscFunctionBegin; 306 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 307 if (size == 1) { 308 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 309 PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 310 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 311 a->AIJ = B; 312 } else { 313 Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 314 a->A = B; 315 } 316 PetscCall(PetscObjectReference((PetscObject)B)); 317 PetscFunctionReturn(0); 318 } 319 320 /*@C 321 MatKAIJSetS - Set the S matrix describing the shift action of the `MATKAIJ` matrix 322 323 Logically Collective; the entire S is stored independently on all processes. 324 325 Input Parameters: 326 + A - the `MATKAIJ` matrix 327 . p - the number of rows in S 328 . q - the number of columns in S 329 - S - the S matrix, in form of a scalar array in column-major format 330 331 Notes: 332 The dimensions p and q must match those of the transformation matrix T associated with the `MATKAIJ` matrix. 333 334 The S matrix is copied, so the user can destroy this array. 335 336 Level: advanced 337 338 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 339 @*/ 340 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) { 341 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 342 343 PetscFunctionBegin; 344 PetscCall(PetscFree(a->S)); 345 if (S) { 346 PetscCall(PetscMalloc1(p * q, &a->S)); 347 PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar))); 348 } else a->S = NULL; 349 350 a->p = p; 351 a->q = q; 352 PetscFunctionReturn(0); 353 } 354 355 /*@C 356 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 357 358 Logically Collective. 359 360 Input Parameter: 361 . A - the `MATKAIJ` matrix 362 363 Output Parameter: 364 . identity - the Boolean value 365 366 Level: Advanced 367 368 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()` 369 @*/ 370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) { 371 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 372 PetscInt i, j; 373 374 PetscFunctionBegin; 375 if (a->p != a->q) { 376 *identity = PETSC_FALSE; 377 PetscFunctionReturn(0); 378 } else *identity = PETSC_TRUE; 379 if (!a->isTI || a->S) { 380 for (i = 0; i < a->p && *identity; i++) { 381 for (j = 0; j < a->p && *identity; j++) { 382 if (i != j) { 383 if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 384 if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 385 } else { 386 if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 387 if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 388 } 389 } 390 } 391 } 392 PetscFunctionReturn(0); 393 } 394 395 /*@C 396 MatKAIJSetT - Set the transformation matrix T associated with the `MATKAIJ` matrix 397 398 Logically Collective; the entire T is stored independently on all processes. 399 400 Input Parameters: 401 + A - the `MATKAIJ` matrix 402 . p - the number of rows in S 403 . q - the number of columns in S 404 - T - the T matrix, in form of a scalar array in column-major format 405 406 Notes: 407 The dimensions p and q must match those of the shift matrix S associated with the `MATKAIJ` matrix. 408 409 The T matrix is copied, so the user can destroy this array. 410 411 Level: Advanced 412 413 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 414 @*/ 415 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) { 416 PetscInt i, j; 417 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 418 PetscBool isTI = PETSC_FALSE; 419 420 PetscFunctionBegin; 421 /* check if T is an identity matrix */ 422 if (T && (p == q)) { 423 isTI = PETSC_TRUE; 424 for (i = 0; i < p; i++) { 425 for (j = 0; j < q; j++) { 426 if (i == j) { 427 /* diagonal term must be 1 */ 428 if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 429 } else { 430 /* off-diagonal term must be 0 */ 431 if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 432 } 433 } 434 } 435 } 436 a->isTI = isTI; 437 438 PetscCall(PetscFree(a->T)); 439 if (T && (!isTI)) { 440 PetscCall(PetscMalloc1(p * q, &a->T)); 441 PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar))); 442 } else a->T = NULL; 443 444 a->p = p; 445 a->q = q; 446 PetscFunctionReturn(0); 447 } 448 449 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) { 450 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 451 452 PetscFunctionBegin; 453 PetscCall(MatDestroy(&b->AIJ)); 454 PetscCall(PetscFree(b->S)); 455 PetscCall(PetscFree(b->T)); 456 PetscCall(PetscFree(b->ibdiag)); 457 PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 458 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 459 PetscCall(PetscFree(A->data)); 460 PetscFunctionReturn(0); 461 } 462 463 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) { 464 Mat_MPIKAIJ *a; 465 Mat_MPIAIJ *mpiaij; 466 PetscScalar *T; 467 PetscInt i, j; 468 PetscObjectState state; 469 470 PetscFunctionBegin; 471 a = (Mat_MPIKAIJ *)A->data; 472 mpiaij = (Mat_MPIAIJ *)a->A->data; 473 474 PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 475 if (state == a->state) { 476 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 477 PetscFunctionReturn(0); 478 } else { 479 PetscCall(MatDestroy(&a->AIJ)); 480 PetscCall(MatDestroy(&a->OAIJ)); 481 if (a->isTI) { 482 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 483 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 484 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 485 * to pass in. */ 486 PetscCall(PetscMalloc1(a->p * a->q, &T)); 487 for (i = 0; i < a->p; i++) { 488 for (j = 0; j < a->q; j++) { 489 if (i == j) T[i + j * a->p] = 1.0; 490 else T[i + j * a->p] = 0.0; 491 } 492 } 493 } else T = a->T; 494 PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 495 PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 496 if (a->isTI) PetscCall(PetscFree(T)); 497 a->state = state; 498 } 499 500 PetscFunctionReturn(0); 501 } 502 503 PetscErrorCode MatSetUp_KAIJ(Mat A) { 504 PetscInt n; 505 PetscMPIInt size; 506 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 507 508 PetscFunctionBegin; 509 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 510 if (size == 1) { 511 PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 512 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 513 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 514 PetscCall(PetscLayoutSetUp(A->rmap)); 515 PetscCall(PetscLayoutSetUp(A->cmap)); 516 } else { 517 Mat_MPIKAIJ *a; 518 Mat_MPIAIJ *mpiaij; 519 IS from, to; 520 Vec gvec; 521 522 a = (Mat_MPIKAIJ *)A->data; 523 mpiaij = (Mat_MPIAIJ *)a->A->data; 524 PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 525 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 526 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 527 PetscCall(PetscLayoutSetUp(A->rmap)); 528 PetscCall(PetscLayoutSetUp(A->cmap)); 529 530 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 531 532 PetscCall(VecGetSize(mpiaij->lvec, &n)); 533 PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 534 PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 535 PetscCall(VecSetBlockSize(a->w, a->q)); 536 PetscCall(VecSetType(a->w, VECSEQ)); 537 538 /* create two temporary Index sets for build scatter gather */ 539 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 540 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 541 542 /* create temporary global vector to generate scatter context */ 543 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 544 545 /* generate the scatter context */ 546 PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 547 548 PetscCall(ISDestroy(&from)); 549 PetscCall(ISDestroy(&to)); 550 PetscCall(VecDestroy(&gvec)); 551 } 552 553 A->assembled = PETSC_TRUE; 554 PetscFunctionReturn(0); 555 } 556 557 PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) { 558 PetscViewerFormat format; 559 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 560 Mat B; 561 PetscInt i; 562 PetscBool ismpikaij; 563 564 PetscFunctionBegin; 565 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 566 PetscCall(PetscViewerGetFormat(viewer, &format)); 567 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 568 PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 569 570 /* Print appropriate details for S. */ 571 if (!a->S) { 572 PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 573 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 574 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 575 for (i = 0; i < (a->p * a->q); i++) { 576 #if defined(PETSC_USE_COMPLEX) 577 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 578 #else 579 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 580 #endif 581 } 582 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 583 } 584 585 /* Print appropriate details for T. */ 586 if (a->isTI) { 587 PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 588 } else if (!a->T) { 589 PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 590 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 591 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 592 for (i = 0; i < (a->p * a->q); i++) { 593 #if defined(PETSC_USE_COMPLEX) 594 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 595 #else 596 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 597 #endif 598 } 599 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 600 } 601 602 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 603 PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 604 if (ismpikaij) { 605 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 606 PetscCall(MatView(b->A, viewer)); 607 } else { 608 PetscCall(MatView(a->AIJ, viewer)); 609 } 610 611 } else { 612 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 613 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 614 PetscCall(MatView(B, viewer)); 615 PetscCall(MatDestroy(&B)); 616 } 617 PetscFunctionReturn(0); 618 } 619 620 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) { 621 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 622 623 PetscFunctionBegin; 624 PetscCall(MatDestroy(&b->AIJ)); 625 PetscCall(MatDestroy(&b->OAIJ)); 626 PetscCall(MatDestroy(&b->A)); 627 PetscCall(VecScatterDestroy(&b->ctx)); 628 PetscCall(VecDestroy(&b->w)); 629 PetscCall(PetscFree(b->S)); 630 PetscCall(PetscFree(b->T)); 631 PetscCall(PetscFree(b->ibdiag)); 632 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 633 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 634 PetscCall(PetscFree(A->data)); 635 PetscFunctionReturn(0); 636 } 637 638 /* --------------------------------------------------------------------------------------*/ 639 640 /* zz = yy + Axx */ 641 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 642 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 643 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 644 const PetscScalar *s = b->S, *t = b->T; 645 const PetscScalar *x, *v, *bx; 646 PetscScalar *y, *sums; 647 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 648 PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 649 650 PetscFunctionBegin; 651 if (!yy) { 652 PetscCall(VecSet(zz, 0.0)); 653 } else { 654 PetscCall(VecCopy(yy, zz)); 655 } 656 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 657 658 PetscCall(VecGetArrayRead(xx, &x)); 659 PetscCall(VecGetArray(zz, &y)); 660 idx = a->j; 661 v = a->a; 662 ii = a->i; 663 664 if (b->isTI) { 665 for (i = 0; i < m; i++) { 666 jrow = ii[i]; 667 n = ii[i + 1] - jrow; 668 sums = y + p * i; 669 for (j = 0; j < n; j++) { 670 for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; 671 } 672 } 673 PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 674 } else if (t) { 675 for (i = 0; i < m; i++) { 676 jrow = ii[i]; 677 n = ii[i + 1] - jrow; 678 sums = y + p * i; 679 for (j = 0; j < n; j++) { 680 for (k = 0; k < p; k++) { 681 for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; 682 } 683 } 684 } 685 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 686 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 687 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 688 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 689 PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 690 } 691 if (s) { 692 for (i = 0; i < m; i++) { 693 sums = y + p * i; 694 bx = x + q * i; 695 if (i < b->AIJ->cmap->n) { 696 for (j = 0; j < q; j++) { 697 for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j]; 698 } 699 } 700 } 701 PetscCall(PetscLogFlops(2.0 * m * p * q)); 702 } 703 704 PetscCall(VecRestoreArrayRead(xx, &x)); 705 PetscCall(VecRestoreArray(zz, &y)); 706 PetscFunctionReturn(0); 707 } 708 709 PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) { 710 PetscFunctionBegin; 711 PetscCall(MatMultAdd_SeqKAIJ(A, xx, PETSC_NULL, yy)); 712 PetscFunctionReturn(0); 713 } 714 715 #include <petsc/private/kernels/blockinvert.h> 716 717 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) { 718 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 719 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 720 const PetscScalar *S = b->S; 721 const PetscScalar *T = b->T; 722 const PetscScalar *v = a->a; 723 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 724 PetscInt i, j, *v_pivots, dof, dof2; 725 PetscScalar *diag, aval, *v_work; 726 727 PetscFunctionBegin; 728 PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 729 PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 730 731 dof = p; 732 dof2 = dof * dof; 733 734 if (b->ibdiagvalid) { 735 if (values) *values = b->ibdiag; 736 PetscFunctionReturn(0); 737 } 738 if (!b->ibdiag) { 739 PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); 740 PetscCall(PetscLogObjectMemory((PetscObject)A, dof2 * m * sizeof(PetscScalar))); 741 } 742 if (values) *values = b->ibdiag; 743 diag = b->ibdiag; 744 745 PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 746 for (i = 0; i < m; i++) { 747 if (S) { 748 PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar))); 749 } else { 750 PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar))); 751 } 752 if (b->isTI) { 753 aval = 0; 754 for (j = ii[i]; j < ii[i + 1]; j++) 755 if (idx[j] == i) aval = v[j]; 756 for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 757 } else if (T) { 758 aval = 0; 759 for (j = ii[i]; j < ii[i + 1]; j++) 760 if (idx[j] == i) aval = v[j]; 761 for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 762 } 763 PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 764 diag += dof2; 765 } 766 PetscCall(PetscFree2(v_work, v_pivots)); 767 768 b->ibdiagvalid = PETSC_TRUE; 769 PetscFunctionReturn(0); 770 } 771 772 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) { 773 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 774 775 PetscFunctionBegin; 776 *B = kaij->AIJ; 777 PetscFunctionReturn(0); 778 } 779 780 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 781 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 782 Mat AIJ, OAIJ, B; 783 PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 784 const PetscInt p = a->p, q = a->q; 785 PetscBool ismpikaij, missing; 786 787 PetscFunctionBegin; 788 if (reuse != MAT_REUSE_MATRIX) { 789 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 790 if (ismpikaij) { 791 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 792 AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 793 OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 794 } else { 795 AIJ = a->AIJ; 796 OAIJ = NULL; 797 } 798 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 799 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 800 PetscCall(MatSetType(B, MATAIJ)); 801 PetscCall(MatGetSize(AIJ, &m, NULL)); 802 PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */ 803 if (!missing || !a->S) d = m; 804 PetscCall(PetscMalloc1(m * p, &d_nnz)); 805 for (i = 0; i < m; ++i) { 806 PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 807 for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 808 PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 809 } 810 if (OAIJ) { 811 PetscCall(PetscMalloc1(m * p, &o_nnz)); 812 for (i = 0; i < m; ++i) { 813 PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 814 for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 815 PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 816 } 817 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 818 } else { 819 PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 820 } 821 PetscCall(PetscFree(d_nnz)); 822 PetscCall(PetscFree(o_nnz)); 823 } else B = *newmat; 824 PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 825 if (reuse == MAT_INPLACE_MATRIX) { 826 PetscCall(MatHeaderReplace(A, &B)); 827 } else *newmat = B; 828 PetscFunctionReturn(0); 829 } 830 831 PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 832 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 833 Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 834 const PetscScalar *aa = a->a, *T = kaij->T, *v; 835 const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 836 const PetscScalar *b, *xb, *idiag; 837 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 838 PetscInt i, j, k, i2, bs, bs2, nz; 839 840 PetscFunctionBegin; 841 its = its * lits; 842 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 843 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 844 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 845 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 846 PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 847 bs = p; 848 bs2 = bs * bs; 849 850 if (!m) PetscFunctionReturn(0); 851 852 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 853 idiag = kaij->ibdiag; 854 diag = a->diag; 855 856 if (!kaij->sor.setup) { 857 PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 858 kaij->sor.setup = PETSC_TRUE; 859 } 860 y = kaij->sor.y; 861 w = kaij->sor.w; 862 work = kaij->sor.work; 863 t = kaij->sor.t; 864 arr = kaij->sor.arr; 865 866 PetscCall(VecGetArray(xx, &x)); 867 PetscCall(VecGetArrayRead(bb, &b)); 868 869 if (flag & SOR_ZERO_INITIAL_GUESS) { 870 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 871 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 872 PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar))); 873 i2 = bs; 874 idiag += bs2; 875 for (i = 1; i < m; i++) { 876 v = aa + ai[i]; 877 vi = aj + ai[i]; 878 nz = diag[i] - ai[i]; 879 880 if (T) { /* b - T (Arow * x) */ 881 PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar))); 882 for (j = 0; j < nz; j++) { 883 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 884 } 885 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 886 for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 887 } else if (kaij->isTI) { 888 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 889 for (j = 0; j < nz; j++) { 890 for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 891 } 892 } else { 893 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 894 } 895 896 PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 897 for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 898 899 idiag += bs2; 900 i2 += bs; 901 } 902 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 903 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 904 xb = t; 905 } else xb = b; 906 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 907 idiag = kaij->ibdiag + bs2 * (m - 1); 908 i2 = bs * (m - 1); 909 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 910 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 911 i2 -= bs; 912 idiag -= bs2; 913 for (i = m - 2; i >= 0; i--) { 914 v = aa + diag[i] + 1; 915 vi = aj + diag[i] + 1; 916 nz = ai[i + 1] - diag[i] - 1; 917 918 if (T) { /* FIXME: This branch untested */ 919 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 920 /* copy all rows of x that are needed into contiguous space */ 921 workt = work; 922 for (j = 0; j < nz; j++) { 923 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 924 workt += bs; 925 } 926 arrt = arr; 927 for (j = 0; j < nz; j++) { 928 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 929 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 930 arrt += bs2; 931 } 932 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 933 } else if (kaij->isTI) { 934 PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar))); 935 for (j = 0; j < nz; j++) { 936 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 937 } 938 } 939 940 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 941 for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 942 943 idiag -= bs2; 944 i2 -= bs; 945 } 946 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 947 } 948 its--; 949 } 950 while (its--) { /* FIXME: This branch not updated */ 951 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 952 i2 = 0; 953 idiag = kaij->ibdiag; 954 for (i = 0; i < m; i++) { 955 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 956 957 v = aa + ai[i]; 958 vi = aj + ai[i]; 959 nz = diag[i] - ai[i]; 960 workt = work; 961 for (j = 0; j < nz; j++) { 962 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 963 workt += bs; 964 } 965 arrt = arr; 966 if (T) { 967 for (j = 0; j < nz; j++) { 968 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 969 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 970 arrt += bs2; 971 } 972 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 973 } else if (kaij->isTI) { 974 for (j = 0; j < nz; j++) { 975 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 976 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 977 arrt += bs2; 978 } 979 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 980 } 981 PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar))); 982 983 v = aa + diag[i] + 1; 984 vi = aj + diag[i] + 1; 985 nz = ai[i + 1] - diag[i] - 1; 986 workt = work; 987 for (j = 0; j < nz; j++) { 988 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 989 workt += bs; 990 } 991 arrt = arr; 992 if (T) { 993 for (j = 0; j < nz; j++) { 994 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 995 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 996 arrt += bs2; 997 } 998 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 999 } else if (kaij->isTI) { 1000 for (j = 0; j < nz; j++) { 1001 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1002 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1003 arrt += bs2; 1004 } 1005 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1006 } 1007 1008 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1009 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1010 1011 idiag += bs2; 1012 i2 += bs; 1013 } 1014 xb = t; 1015 } else xb = b; 1016 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1017 idiag = kaij->ibdiag + bs2 * (m - 1); 1018 i2 = bs * (m - 1); 1019 if (xb == b) { 1020 for (i = m - 1; i >= 0; i--) { 1021 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 1022 1023 v = aa + ai[i]; 1024 vi = aj + ai[i]; 1025 nz = diag[i] - ai[i]; 1026 workt = work; 1027 for (j = 0; j < nz; j++) { 1028 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1029 workt += bs; 1030 } 1031 arrt = arr; 1032 if (T) { 1033 for (j = 0; j < nz; j++) { 1034 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1035 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1036 arrt += bs2; 1037 } 1038 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1039 } else if (kaij->isTI) { 1040 for (j = 0; j < nz; j++) { 1041 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1042 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1043 arrt += bs2; 1044 } 1045 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1046 } 1047 1048 v = aa + diag[i] + 1; 1049 vi = aj + diag[i] + 1; 1050 nz = ai[i + 1] - diag[i] - 1; 1051 workt = work; 1052 for (j = 0; j < nz; j++) { 1053 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1054 workt += bs; 1055 } 1056 arrt = arr; 1057 if (T) { 1058 for (j = 0; j < nz; j++) { 1059 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1060 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1061 arrt += bs2; 1062 } 1063 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1064 } else if (kaij->isTI) { 1065 for (j = 0; j < nz; j++) { 1066 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1067 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1068 arrt += bs2; 1069 } 1070 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1071 } 1072 1073 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1074 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1075 } 1076 } else { 1077 for (i = m - 1; i >= 0; i--) { 1078 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 1079 v = aa + diag[i] + 1; 1080 vi = aj + diag[i] + 1; 1081 nz = ai[i + 1] - diag[i] - 1; 1082 workt = work; 1083 for (j = 0; j < nz; j++) { 1084 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1085 workt += bs; 1086 } 1087 arrt = arr; 1088 if (T) { 1089 for (j = 0; j < nz; j++) { 1090 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1091 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1092 arrt += bs2; 1093 } 1094 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1095 } else if (kaij->isTI) { 1096 for (j = 0; j < nz; j++) { 1097 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1098 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1099 arrt += bs2; 1100 } 1101 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1102 } 1103 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1104 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1105 } 1106 } 1107 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 1108 } 1109 } 1110 1111 PetscCall(VecRestoreArray(xx, &x)); 1112 PetscCall(VecRestoreArrayRead(bb, &b)); 1113 PetscFunctionReturn(0); 1114 } 1115 1116 /*===================================================================================*/ 1117 1118 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1119 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1120 1121 PetscFunctionBegin; 1122 if (!yy) { 1123 PetscCall(VecSet(zz, 0.0)); 1124 } else { 1125 PetscCall(VecCopy(yy, zz)); 1126 } 1127 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1128 /* start the scatter */ 1129 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1130 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 1131 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1132 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 1133 PetscFunctionReturn(0); 1134 } 1135 1136 PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) { 1137 PetscFunctionBegin; 1138 PetscCall(MatMultAdd_MPIKAIJ(A, xx, PETSC_NULL, yy)); 1139 PetscFunctionReturn(0); 1140 } 1141 1142 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) { 1143 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1144 1145 PetscFunctionBegin; 1146 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1147 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 /* ----------------------------------------------------------------*/ 1152 1153 PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) { 1154 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 1155 PetscErrorCode diag = PETSC_FALSE; 1156 PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 1157 PetscScalar *vaij, *v, *S = b->S, *T = b->T; 1158 1159 PetscFunctionBegin; 1160 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1161 b->getrowactive = PETSC_TRUE; 1162 PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1163 1164 if ((!S) && (!T) && (!b->isTI)) { 1165 if (ncols) *ncols = 0; 1166 if (cols) *cols = NULL; 1167 if (values) *values = NULL; 1168 PetscFunctionReturn(0); 1169 } 1170 1171 if (T || b->isTI) { 1172 PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 1173 c = nzaij; 1174 for (i = 0; i < nzaij; i++) { 1175 /* check if this row contains a diagonal entry */ 1176 if (colsaij[i] == r) { 1177 diag = PETSC_TRUE; 1178 c = i; 1179 } 1180 } 1181 } else nzaij = c = 0; 1182 1183 /* calculate size of row */ 1184 nz = 0; 1185 if (S) nz += q; 1186 if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 1187 1188 if (cols || values) { 1189 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1190 for (i = 0; i < q; i++) { 1191 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1192 v[i] = 0.0; 1193 } 1194 if (b->isTI) { 1195 for (i = 0; i < nzaij; i++) { 1196 for (j = 0; j < q; j++) { 1197 idx[i * q + j] = colsaij[i] * q + j; 1198 v[i * q + j] = (j == s ? vaij[i] : 0); 1199 } 1200 } 1201 } else if (T) { 1202 for (i = 0; i < nzaij; i++) { 1203 for (j = 0; j < q; j++) { 1204 idx[i * q + j] = colsaij[i] * q + j; 1205 v[i * q + j] = vaij[i] * T[s + j * p]; 1206 } 1207 } 1208 } 1209 if (S) { 1210 for (j = 0; j < q; j++) { 1211 idx[c * q + j] = r * q + j; 1212 v[c * q + j] += S[s + j * p]; 1213 } 1214 } 1215 } 1216 1217 if (ncols) *ncols = nz; 1218 if (cols) *cols = idx; 1219 if (values) *values = v; 1220 PetscFunctionReturn(0); 1221 } 1222 1223 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1224 PetscFunctionBegin; 1225 if (nz) *nz = 0; 1226 PetscCall(PetscFree2(*idx, *v)); 1227 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1228 PetscFunctionReturn(0); 1229 } 1230 1231 PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) { 1232 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1233 Mat AIJ = b->A; 1234 PetscBool diag = PETSC_FALSE; 1235 Mat MatAIJ, MatOAIJ; 1236 const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1237 PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 1238 PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 1239 1240 PetscFunctionBegin; 1241 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1242 MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1243 MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 1244 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1245 b->getrowactive = PETSC_TRUE; 1246 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1247 lrow = row - rstart; 1248 1249 if ((!S) && (!T) && (!b->isTI)) { 1250 if (ncols) *ncols = 0; 1251 if (cols) *cols = NULL; 1252 if (values) *values = NULL; 1253 PetscFunctionReturn(0); 1254 } 1255 1256 r = lrow / p; 1257 s = lrow % p; 1258 1259 if (T || b->isTI) { 1260 PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 1261 PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 1262 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 1263 1264 c = ncolsaij + ncolsoaij; 1265 for (i = 0; i < ncolsaij; i++) { 1266 /* check if this row contains a diagonal entry */ 1267 if (colsaij[i] == r) { 1268 diag = PETSC_TRUE; 1269 c = i; 1270 } 1271 } 1272 } else c = 0; 1273 1274 /* calculate size of row */ 1275 nz = 0; 1276 if (S) nz += q; 1277 if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 1278 1279 if (cols || values) { 1280 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1281 for (i = 0; i < q; i++) { 1282 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1283 v[i] = 0.0; 1284 } 1285 if (b->isTI) { 1286 for (i = 0; i < ncolsaij; i++) { 1287 for (j = 0; j < q; j++) { 1288 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1289 v[i * q + j] = (j == s ? vals[i] : 0.0); 1290 } 1291 } 1292 for (i = 0; i < ncolsoaij; i++) { 1293 for (j = 0; j < q; j++) { 1294 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1295 v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 1296 } 1297 } 1298 } else if (T) { 1299 for (i = 0; i < ncolsaij; i++) { 1300 for (j = 0; j < q; j++) { 1301 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1302 v[i * q + j] = vals[i] * T[s + j * p]; 1303 } 1304 } 1305 for (i = 0; i < ncolsoaij; i++) { 1306 for (j = 0; j < q; j++) { 1307 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1308 v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 1309 } 1310 } 1311 } 1312 if (S) { 1313 for (j = 0; j < q; j++) { 1314 idx[c * q + j] = (r + rstart / p) * q + j; 1315 v[c * q + j] += S[s + j * p]; 1316 } 1317 } 1318 } 1319 1320 if (ncols) *ncols = nz; 1321 if (cols) *cols = idx; 1322 if (values) *values = v; 1323 PetscFunctionReturn(0); 1324 } 1325 1326 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1327 PetscFunctionBegin; 1328 PetscCall(PetscFree2(*idx, *v)); 1329 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1330 PetscFunctionReturn(0); 1331 } 1332 1333 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { 1334 Mat A; 1335 1336 PetscFunctionBegin; 1337 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1338 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1339 PetscCall(MatDestroy(&A)); 1340 PetscFunctionReturn(0); 1341 } 1342 1343 /* ---------------------------------------------------------------------------------- */ 1344 /*@C 1345 MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form: 1346 1347 [I \otimes S + A \otimes T] 1348 1349 where 1350 S is a dense (p \times q) matrix 1351 T is a dense (p \times q) matrix 1352 A is an `MATAIJ` (n \times n) matrix 1353 I is the identity matrix 1354 The resulting matrix is (np \times nq) 1355 1356 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1357 1358 Collective 1359 1360 Input Parameters: 1361 + A - the `MATAIJ` matrix 1362 . p - number of rows in S and T 1363 . q - number of columns in S and T 1364 . S - the S matrix (can be NULL), stored as a `PetscScalar` array (column-major) 1365 - T - the T matrix (can be NULL), stored as a `PetscScalar` array (column-major) 1366 1367 Output Parameter: 1368 . kaij - the new `MATKAIJ` matrix 1369 1370 Notes: 1371 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 1372 1373 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 1374 1375 Developer Note: 1376 In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1377 of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1378 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1379 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1380 1381 Level: advanced 1382 1383 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1384 @*/ 1385 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) { 1386 PetscFunctionBegin; 1387 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 1388 PetscCall(MatSetType(*kaij, MATKAIJ)); 1389 PetscCall(MatKAIJSetAIJ(*kaij, A)); 1390 PetscCall(MatKAIJSetS(*kaij, p, q, S)); 1391 PetscCall(MatKAIJSetT(*kaij, p, q, T)); 1392 PetscCall(MatSetUp(*kaij)); 1393 PetscFunctionReturn(0); 1394 } 1395 1396 /*MC 1397 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1398 [I \otimes S + A \otimes T], 1399 where 1400 S is a dense (p \times q) matrix, 1401 T is a dense (p \times q) matrix, 1402 A is an AIJ (n \times n) matrix, 1403 and I is the identity matrix. 1404 The resulting matrix is (np \times nq). 1405 1406 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1407 1408 Note: 1409 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1410 where x and b are column vectors containing the row-major representations of X and B. 1411 1412 Level: advanced 1413 1414 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1415 M*/ 1416 1417 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) { 1418 Mat_MPIKAIJ *b; 1419 PetscMPIInt size; 1420 1421 PetscFunctionBegin; 1422 PetscCall(PetscNewLog(A, &b)); 1423 A->data = (void *)b; 1424 1425 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 1426 1427 b->w = NULL; 1428 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1429 if (size == 1) { 1430 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 1431 A->ops->destroy = MatDestroy_SeqKAIJ; 1432 A->ops->mult = MatMult_SeqKAIJ; 1433 A->ops->multadd = MatMultAdd_SeqKAIJ; 1434 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1435 A->ops->getrow = MatGetRow_SeqKAIJ; 1436 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1437 A->ops->sor = MatSOR_SeqKAIJ; 1438 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 1439 } else { 1440 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 1441 A->ops->destroy = MatDestroy_MPIKAIJ; 1442 A->ops->mult = MatMult_MPIKAIJ; 1443 A->ops->multadd = MatMultAdd_MPIKAIJ; 1444 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1445 A->ops->getrow = MatGetRow_MPIKAIJ; 1446 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1447 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 1448 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 1449 } 1450 A->ops->setup = MatSetUp_KAIJ; 1451 A->ops->view = MatView_KAIJ; 1452 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1453 PetscFunctionReturn(0); 1454 } 1455