1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: `MatCreateKAIJ()` 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) { 48 PetscBool ismpikaij, isseqkaij; 49 50 PetscFunctionBegin; 51 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 52 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 53 if (ismpikaij) { 54 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 55 56 *B = b->A; 57 } else if (isseqkaij) { 58 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 59 60 *B = b->AIJ; 61 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 62 PetscFunctionReturn(0); 63 } 64 65 /*@C 66 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 67 68 Not Collective; the entire S is stored and returned independently on all processes. 69 70 Input Parameter: 71 . A - the KAIJ matrix 72 73 Output Parameters: 74 + m - the number of rows in S 75 . n - the number of columns in S 76 - S - the S matrix, in form of a scalar array in column-major format 77 78 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 79 80 Level: advanced 81 82 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 83 @*/ 84 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) { 85 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 86 PetscFunctionBegin; 87 if (m) *m = b->p; 88 if (n) *n = b->q; 89 if (S) *S = b->S; 90 PetscFunctionReturn(0); 91 } 92 93 /*@C 94 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 95 96 Not Collective; the entire S is stored and returned independently on all processes. 97 98 Input Parameter: 99 . A - the KAIJ matrix 100 101 Output Parameters: 102 + m - the number of rows in S 103 . n - the number of columns in S 104 - S - the S matrix, in form of a scalar array in column-major format 105 106 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 107 108 Level: advanced 109 110 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 111 @*/ 112 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) { 113 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 114 PetscFunctionBegin; 115 if (m) *m = b->p; 116 if (n) *n = b->q; 117 if (S) *S = b->S; 118 PetscFunctionReturn(0); 119 } 120 121 /*@C 122 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 123 124 Not collective 125 126 Input Parameter: 127 . A - the KAIJ matrix 128 129 Output Parameter: 130 . S - location of pointer to array obtained with MatKAIJGetS() 131 132 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 133 If NULL is passed, it will not attempt to zero the array pointer. 134 135 Level: advanced 136 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 137 @*/ 138 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) { 139 PetscFunctionBegin; 140 if (S) *S = NULL; 141 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 142 PetscFunctionReturn(0); 143 } 144 145 /*@C 146 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 147 148 Not collective 149 150 Input Parameter: 151 . A - the KAIJ matrix 152 153 Output Parameter: 154 . S - location of pointer to array obtained with MatKAIJGetS() 155 156 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 157 If NULL is passed, it will not attempt to zero the array pointer. 158 159 Level: advanced 160 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 161 @*/ 162 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) { 163 PetscFunctionBegin; 164 if (S) *S = NULL; 165 PetscFunctionReturn(0); 166 } 167 168 /*@C 169 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 170 171 Not Collective; the entire T is stored and returned independently on all processes 172 173 Input Parameter: 174 . A - the KAIJ matrix 175 176 Output Parameters: 177 + m - the number of rows in T 178 . n - the number of columns in T 179 - T - the T matrix, in form of a scalar array in column-major format 180 181 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 182 183 Level: advanced 184 185 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 186 @*/ 187 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) { 188 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 189 PetscFunctionBegin; 190 if (m) *m = b->p; 191 if (n) *n = b->q; 192 if (T) *T = b->T; 193 PetscFunctionReturn(0); 194 } 195 196 /*@C 197 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 198 199 Not Collective; the entire T is stored and returned independently on all processes 200 201 Input Parameter: 202 . A - the KAIJ matrix 203 204 Output Parameters: 205 + m - the number of rows in T 206 . n - the number of columns in T 207 - T - the T matrix, in form of a scalar array in column-major format 208 209 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 210 211 Level: advanced 212 213 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 214 @*/ 215 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) { 216 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 217 PetscFunctionBegin; 218 if (m) *m = b->p; 219 if (n) *n = b->q; 220 if (T) *T = b->T; 221 PetscFunctionReturn(0); 222 } 223 224 /*@C 225 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 226 227 Not collective 228 229 Input Parameter: 230 . A - the KAIJ matrix 231 232 Output Parameter: 233 . T - location of pointer to array obtained with MatKAIJGetS() 234 235 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 236 If NULL is passed, it will not attempt to zero the array pointer. 237 238 Level: advanced 239 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 240 @*/ 241 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) { 242 PetscFunctionBegin; 243 if (T) *T = NULL; 244 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 245 PetscFunctionReturn(0); 246 } 247 248 /*@C 249 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 250 251 Not collective 252 253 Input Parameter: 254 . A - the KAIJ matrix 255 256 Output Parameter: 257 . T - location of pointer to array obtained with MatKAIJGetS() 258 259 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 260 If NULL is passed, it will not attempt to zero the array pointer. 261 262 Level: advanced 263 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 264 @*/ 265 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) { 266 PetscFunctionBegin; 267 if (T) *T = NULL; 268 PetscFunctionReturn(0); 269 } 270 271 /*@ 272 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 273 274 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 275 276 Input Parameters: 277 + A - the KAIJ matrix 278 - B - the AIJ matrix 279 280 Notes: 281 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 282 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 283 284 Level: advanced 285 286 .seealso: `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 287 @*/ 288 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) { 289 PetscMPIInt size; 290 PetscBool flg; 291 292 PetscFunctionBegin; 293 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 294 if (size == 1) { 295 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 296 PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 297 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 298 a->AIJ = B; 299 } else { 300 Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 301 a->A = B; 302 } 303 PetscCall(PetscObjectReference((PetscObject)B)); 304 PetscFunctionReturn(0); 305 } 306 307 /*@C 308 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 309 310 Logically Collective; the entire S is stored independently on all processes. 311 312 Input Parameters: 313 + A - the KAIJ matrix 314 . p - the number of rows in S 315 . q - the number of columns in S 316 - S - the S matrix, in form of a scalar array in column-major format 317 318 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 319 The S matrix is copied, so the user can destroy this array. 320 321 Level: Advanced 322 323 .seealso: `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 324 @*/ 325 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) { 326 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 327 328 PetscFunctionBegin; 329 PetscCall(PetscFree(a->S)); 330 if (S) { 331 PetscCall(PetscMalloc1(p * q, &a->S)); 332 PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar))); 333 } else a->S = NULL; 334 335 a->p = p; 336 a->q = q; 337 PetscFunctionReturn(0); 338 } 339 340 /*@C 341 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 342 343 Logically Collective. 344 345 Input Parameter: 346 . A - the KAIJ matrix 347 348 Output Parameter: 349 . identity - the Boolean value 350 351 Level: Advanced 352 353 .seealso: `MatKAIJGetS()`, `MatKAIJGetT()` 354 @*/ 355 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) { 356 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 357 PetscInt i, j; 358 359 PetscFunctionBegin; 360 if (a->p != a->q) { 361 *identity = PETSC_FALSE; 362 PetscFunctionReturn(0); 363 } else *identity = PETSC_TRUE; 364 if (!a->isTI || a->S) { 365 for (i = 0; i < a->p && *identity; i++) { 366 for (j = 0; j < a->p && *identity; j++) { 367 if (i != j) { 368 if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 369 if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 370 } else { 371 if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 372 if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 373 } 374 } 375 } 376 } 377 PetscFunctionReturn(0); 378 } 379 380 /*@C 381 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 382 383 Logically Collective; the entire T is stored independently on all processes. 384 385 Input Parameters: 386 + A - the KAIJ matrix 387 . p - the number of rows in S 388 . q - the number of columns in S 389 - T - the T matrix, in form of a scalar array in column-major format 390 391 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 392 The T matrix is copied, so the user can destroy this array. 393 394 Level: Advanced 395 396 .seealso: `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 397 @*/ 398 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) { 399 PetscInt i, j; 400 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 401 PetscBool isTI = PETSC_FALSE; 402 403 PetscFunctionBegin; 404 /* check if T is an identity matrix */ 405 if (T && (p == q)) { 406 isTI = PETSC_TRUE; 407 for (i = 0; i < p; i++) { 408 for (j = 0; j < q; j++) { 409 if (i == j) { 410 /* diagonal term must be 1 */ 411 if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 412 } else { 413 /* off-diagonal term must be 0 */ 414 if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 415 } 416 } 417 } 418 } 419 a->isTI = isTI; 420 421 PetscCall(PetscFree(a->T)); 422 if (T && (!isTI)) { 423 PetscCall(PetscMalloc1(p * q, &a->T)); 424 PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar))); 425 } else a->T = NULL; 426 427 a->p = p; 428 a->q = q; 429 PetscFunctionReturn(0); 430 } 431 432 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) { 433 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 434 435 PetscFunctionBegin; 436 PetscCall(MatDestroy(&b->AIJ)); 437 PetscCall(PetscFree(b->S)); 438 PetscCall(PetscFree(b->T)); 439 PetscCall(PetscFree(b->ibdiag)); 440 PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 441 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 442 PetscCall(PetscFree(A->data)); 443 PetscFunctionReturn(0); 444 } 445 446 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) { 447 Mat_MPIKAIJ *a; 448 Mat_MPIAIJ *mpiaij; 449 PetscScalar *T; 450 PetscInt i, j; 451 PetscObjectState state; 452 453 PetscFunctionBegin; 454 a = (Mat_MPIKAIJ *)A->data; 455 mpiaij = (Mat_MPIAIJ *)a->A->data; 456 457 PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 458 if (state == a->state) { 459 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 460 PetscFunctionReturn(0); 461 } else { 462 PetscCall(MatDestroy(&a->AIJ)); 463 PetscCall(MatDestroy(&a->OAIJ)); 464 if (a->isTI) { 465 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 466 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 467 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 468 * to pass in. */ 469 PetscCall(PetscMalloc1(a->p * a->q, &T)); 470 for (i = 0; i < a->p; i++) { 471 for (j = 0; j < a->q; j++) { 472 if (i == j) T[i + j * a->p] = 1.0; 473 else T[i + j * a->p] = 0.0; 474 } 475 } 476 } else T = a->T; 477 PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 478 PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 479 if (a->isTI) PetscCall(PetscFree(T)); 480 a->state = state; 481 } 482 483 PetscFunctionReturn(0); 484 } 485 486 PetscErrorCode MatSetUp_KAIJ(Mat A) { 487 PetscInt n; 488 PetscMPIInt size; 489 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 490 491 PetscFunctionBegin; 492 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 493 if (size == 1) { 494 PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 495 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 496 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 497 PetscCall(PetscLayoutSetUp(A->rmap)); 498 PetscCall(PetscLayoutSetUp(A->cmap)); 499 } else { 500 Mat_MPIKAIJ *a; 501 Mat_MPIAIJ *mpiaij; 502 IS from, to; 503 Vec gvec; 504 505 a = (Mat_MPIKAIJ *)A->data; 506 mpiaij = (Mat_MPIAIJ *)a->A->data; 507 PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 508 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 509 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 510 PetscCall(PetscLayoutSetUp(A->rmap)); 511 PetscCall(PetscLayoutSetUp(A->cmap)); 512 513 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 514 515 PetscCall(VecGetSize(mpiaij->lvec, &n)); 516 PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 517 PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 518 PetscCall(VecSetBlockSize(a->w, a->q)); 519 PetscCall(VecSetType(a->w, VECSEQ)); 520 521 /* create two temporary Index sets for build scatter gather */ 522 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 523 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 524 525 /* create temporary global vector to generate scatter context */ 526 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 527 528 /* generate the scatter context */ 529 PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 530 531 PetscCall(ISDestroy(&from)); 532 PetscCall(ISDestroy(&to)); 533 PetscCall(VecDestroy(&gvec)); 534 } 535 536 A->assembled = PETSC_TRUE; 537 PetscFunctionReturn(0); 538 } 539 540 PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) { 541 PetscViewerFormat format; 542 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 543 Mat B; 544 PetscInt i; 545 PetscBool ismpikaij; 546 547 PetscFunctionBegin; 548 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 549 PetscCall(PetscViewerGetFormat(viewer, &format)); 550 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 551 PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 552 553 /* Print appropriate details for S. */ 554 if (!a->S) { 555 PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 556 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 557 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 558 for (i = 0; i < (a->p * a->q); i++) { 559 #if defined(PETSC_USE_COMPLEX) 560 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 561 #else 562 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 563 #endif 564 } 565 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 566 } 567 568 /* Print appropriate details for T. */ 569 if (a->isTI) { 570 PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 571 } else if (!a->T) { 572 PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 573 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 574 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 575 for (i = 0; i < (a->p * a->q); i++) { 576 #if defined(PETSC_USE_COMPLEX) 577 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 578 #else 579 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 580 #endif 581 } 582 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 583 } 584 585 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 586 PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 587 if (ismpikaij) { 588 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 589 PetscCall(MatView(b->A, viewer)); 590 } else { 591 PetscCall(MatView(a->AIJ, viewer)); 592 } 593 594 } else { 595 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 596 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 597 PetscCall(MatView(B, viewer)); 598 PetscCall(MatDestroy(&B)); 599 } 600 PetscFunctionReturn(0); 601 } 602 603 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) { 604 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 605 606 PetscFunctionBegin; 607 PetscCall(MatDestroy(&b->AIJ)); 608 PetscCall(MatDestroy(&b->OAIJ)); 609 PetscCall(MatDestroy(&b->A)); 610 PetscCall(VecScatterDestroy(&b->ctx)); 611 PetscCall(VecDestroy(&b->w)); 612 PetscCall(PetscFree(b->S)); 613 PetscCall(PetscFree(b->T)); 614 PetscCall(PetscFree(b->ibdiag)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 617 PetscCall(PetscFree(A->data)); 618 PetscFunctionReturn(0); 619 } 620 621 /* --------------------------------------------------------------------------------------*/ 622 623 /* zz = yy + Axx */ 624 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 625 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 626 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 627 const PetscScalar *s = b->S, *t = b->T; 628 const PetscScalar *x, *v, *bx; 629 PetscScalar *y, *sums; 630 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 631 PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 632 633 PetscFunctionBegin; 634 if (!yy) { 635 PetscCall(VecSet(zz, 0.0)); 636 } else { 637 PetscCall(VecCopy(yy, zz)); 638 } 639 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 640 641 PetscCall(VecGetArrayRead(xx, &x)); 642 PetscCall(VecGetArray(zz, &y)); 643 idx = a->j; 644 v = a->a; 645 ii = a->i; 646 647 if (b->isTI) { 648 for (i = 0; i < m; i++) { 649 jrow = ii[i]; 650 n = ii[i + 1] - jrow; 651 sums = y + p * i; 652 for (j = 0; j < n; j++) { 653 for (k = 0; k < p; k++) { sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; } 654 } 655 } 656 PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 657 } else if (t) { 658 for (i = 0; i < m; i++) { 659 jrow = ii[i]; 660 n = ii[i + 1] - jrow; 661 sums = y + p * i; 662 for (j = 0; j < n; j++) { 663 for (k = 0; k < p; k++) { 664 for (l = 0; l < q; l++) { sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; } 665 } 666 } 667 } 668 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 669 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 670 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 671 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 672 PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 673 } 674 if (s) { 675 for (i = 0; i < m; i++) { 676 sums = y + p * i; 677 bx = x + q * i; 678 if (i < b->AIJ->cmap->n) { 679 for (j = 0; j < q; j++) { 680 for (k = 0; k < p; k++) { sums[k] += s[k + j * p] * bx[j]; } 681 } 682 } 683 } 684 PetscCall(PetscLogFlops(2.0 * m * p * q)); 685 } 686 687 PetscCall(VecRestoreArrayRead(xx, &x)); 688 PetscCall(VecRestoreArray(zz, &y)); 689 PetscFunctionReturn(0); 690 } 691 692 PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) { 693 PetscFunctionBegin; 694 PetscCall(MatMultAdd_SeqKAIJ(A, xx, PETSC_NULL, yy)); 695 PetscFunctionReturn(0); 696 } 697 698 #include <petsc/private/kernels/blockinvert.h> 699 700 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) { 701 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 702 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 703 const PetscScalar *S = b->S; 704 const PetscScalar *T = b->T; 705 const PetscScalar *v = a->a; 706 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 707 PetscInt i, j, *v_pivots, dof, dof2; 708 PetscScalar *diag, aval, *v_work; 709 710 PetscFunctionBegin; 711 PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 712 PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 713 714 dof = p; 715 dof2 = dof * dof; 716 717 if (b->ibdiagvalid) { 718 if (values) *values = b->ibdiag; 719 PetscFunctionReturn(0); 720 } 721 if (!b->ibdiag) { 722 PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); 723 PetscCall(PetscLogObjectMemory((PetscObject)A, dof2 * m * sizeof(PetscScalar))); 724 } 725 if (values) *values = b->ibdiag; 726 diag = b->ibdiag; 727 728 PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 729 for (i = 0; i < m; i++) { 730 if (S) { 731 PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar))); 732 } else { 733 PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar))); 734 } 735 if (b->isTI) { 736 aval = 0; 737 for (j = ii[i]; j < ii[i + 1]; j++) 738 if (idx[j] == i) aval = v[j]; 739 for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 740 } else if (T) { 741 aval = 0; 742 for (j = ii[i]; j < ii[i + 1]; j++) 743 if (idx[j] == i) aval = v[j]; 744 for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 745 } 746 PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 747 diag += dof2; 748 } 749 PetscCall(PetscFree2(v_work, v_pivots)); 750 751 b->ibdiagvalid = PETSC_TRUE; 752 PetscFunctionReturn(0); 753 } 754 755 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) { 756 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 757 758 PetscFunctionBegin; 759 *B = kaij->AIJ; 760 PetscFunctionReturn(0); 761 } 762 763 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 764 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 765 Mat AIJ, OAIJ, B; 766 PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 767 const PetscInt p = a->p, q = a->q; 768 PetscBool ismpikaij, missing; 769 770 PetscFunctionBegin; 771 if (reuse != MAT_REUSE_MATRIX) { 772 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 773 if (ismpikaij) { 774 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 775 AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 776 OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 777 } else { 778 AIJ = a->AIJ; 779 OAIJ = NULL; 780 } 781 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 782 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 783 PetscCall(MatSetType(B, MATAIJ)); 784 PetscCall(MatGetSize(AIJ, &m, NULL)); 785 PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */ 786 if (!missing || !a->S) d = m; 787 PetscCall(PetscMalloc1(m * p, &d_nnz)); 788 for (i = 0; i < m; ++i) { 789 PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 790 for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 791 PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 792 } 793 if (OAIJ) { 794 PetscCall(PetscMalloc1(m * p, &o_nnz)); 795 for (i = 0; i < m; ++i) { 796 PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 797 for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 798 PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 799 } 800 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 801 } else { 802 PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 803 } 804 PetscCall(PetscFree(d_nnz)); 805 PetscCall(PetscFree(o_nnz)); 806 } else B = *newmat; 807 PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 808 if (reuse == MAT_INPLACE_MATRIX) { 809 PetscCall(MatHeaderReplace(A, &B)); 810 } else *newmat = B; 811 PetscFunctionReturn(0); 812 } 813 814 PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 815 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 816 Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 817 const PetscScalar *aa = a->a, *T = kaij->T, *v; 818 const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 819 const PetscScalar *b, *xb, *idiag; 820 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 821 PetscInt i, j, k, i2, bs, bs2, nz; 822 823 PetscFunctionBegin; 824 its = its * lits; 825 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 826 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 827 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 828 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 829 PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 830 bs = p; 831 bs2 = bs * bs; 832 833 if (!m) PetscFunctionReturn(0); 834 835 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 836 idiag = kaij->ibdiag; 837 diag = a->diag; 838 839 if (!kaij->sor.setup) { 840 PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 841 kaij->sor.setup = PETSC_TRUE; 842 } 843 y = kaij->sor.y; 844 w = kaij->sor.w; 845 work = kaij->sor.work; 846 t = kaij->sor.t; 847 arr = kaij->sor.arr; 848 849 PetscCall(VecGetArray(xx, &x)); 850 PetscCall(VecGetArrayRead(bb, &b)); 851 852 if (flag & SOR_ZERO_INITIAL_GUESS) { 853 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 854 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 855 PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar))); 856 i2 = bs; 857 idiag += bs2; 858 for (i = 1; i < m; i++) { 859 v = aa + ai[i]; 860 vi = aj + ai[i]; 861 nz = diag[i] - ai[i]; 862 863 if (T) { /* b - T (Arow * x) */ 864 PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar))); 865 for (j = 0; j < nz; j++) { 866 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 867 } 868 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 869 for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 870 } else if (kaij->isTI) { 871 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 872 for (j = 0; j < nz; j++) { 873 for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 874 } 875 } else { 876 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 877 } 878 879 PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 880 for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 881 882 idiag += bs2; 883 i2 += bs; 884 } 885 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 886 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 887 xb = t; 888 } else xb = b; 889 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 890 idiag = kaij->ibdiag + bs2 * (m - 1); 891 i2 = bs * (m - 1); 892 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 893 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 894 i2 -= bs; 895 idiag -= bs2; 896 for (i = m - 2; i >= 0; i--) { 897 v = aa + diag[i] + 1; 898 vi = aj + diag[i] + 1; 899 nz = ai[i + 1] - diag[i] - 1; 900 901 if (T) { /* FIXME: This branch untested */ 902 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 903 /* copy all rows of x that are needed into contiguous space */ 904 workt = work; 905 for (j = 0; j < nz; j++) { 906 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 907 workt += bs; 908 } 909 arrt = arr; 910 for (j = 0; j < nz; j++) { 911 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 912 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 913 arrt += bs2; 914 } 915 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 916 } else if (kaij->isTI) { 917 PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar))); 918 for (j = 0; j < nz; j++) { 919 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 920 } 921 } 922 923 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 924 for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 925 926 idiag -= bs2; 927 i2 -= bs; 928 } 929 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 930 } 931 its--; 932 } 933 while (its--) { /* FIXME: This branch not updated */ 934 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 935 i2 = 0; 936 idiag = kaij->ibdiag; 937 for (i = 0; i < m; i++) { 938 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 939 940 v = aa + ai[i]; 941 vi = aj + ai[i]; 942 nz = diag[i] - ai[i]; 943 workt = work; 944 for (j = 0; j < nz; j++) { 945 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 946 workt += bs; 947 } 948 arrt = arr; 949 if (T) { 950 for (j = 0; j < nz; j++) { 951 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 952 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 953 arrt += bs2; 954 } 955 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 956 } else if (kaij->isTI) { 957 for (j = 0; j < nz; j++) { 958 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 959 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 960 arrt += bs2; 961 } 962 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 963 } 964 PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar))); 965 966 v = aa + diag[i] + 1; 967 vi = aj + diag[i] + 1; 968 nz = ai[i + 1] - diag[i] - 1; 969 workt = work; 970 for (j = 0; j < nz; j++) { 971 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 972 workt += bs; 973 } 974 arrt = arr; 975 if (T) { 976 for (j = 0; j < nz; j++) { 977 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 978 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 979 arrt += bs2; 980 } 981 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 982 } else if (kaij->isTI) { 983 for (j = 0; j < nz; j++) { 984 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 985 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 986 arrt += bs2; 987 } 988 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 989 } 990 991 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 992 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 993 994 idiag += bs2; 995 i2 += bs; 996 } 997 xb = t; 998 } else xb = b; 999 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1000 idiag = kaij->ibdiag + bs2 * (m - 1); 1001 i2 = bs * (m - 1); 1002 if (xb == b) { 1003 for (i = m - 1; i >= 0; i--) { 1004 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 1005 1006 v = aa + ai[i]; 1007 vi = aj + ai[i]; 1008 nz = diag[i] - ai[i]; 1009 workt = work; 1010 for (j = 0; j < nz; j++) { 1011 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1012 workt += bs; 1013 } 1014 arrt = arr; 1015 if (T) { 1016 for (j = 0; j < nz; j++) { 1017 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1018 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1019 arrt += bs2; 1020 } 1021 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1022 } else if (kaij->isTI) { 1023 for (j = 0; j < nz; j++) { 1024 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1025 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1026 arrt += bs2; 1027 } 1028 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1029 } 1030 1031 v = aa + diag[i] + 1; 1032 vi = aj + diag[i] + 1; 1033 nz = ai[i + 1] - diag[i] - 1; 1034 workt = work; 1035 for (j = 0; j < nz; j++) { 1036 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1037 workt += bs; 1038 } 1039 arrt = arr; 1040 if (T) { 1041 for (j = 0; j < nz; j++) { 1042 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1043 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1044 arrt += bs2; 1045 } 1046 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1047 } else if (kaij->isTI) { 1048 for (j = 0; j < nz; j++) { 1049 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1050 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1051 arrt += bs2; 1052 } 1053 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1054 } 1055 1056 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1057 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1058 } 1059 } else { 1060 for (i = m - 1; i >= 0; i--) { 1061 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 1062 v = aa + diag[i] + 1; 1063 vi = aj + diag[i] + 1; 1064 nz = ai[i + 1] - diag[i] - 1; 1065 workt = work; 1066 for (j = 0; j < nz; j++) { 1067 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1068 workt += bs; 1069 } 1070 arrt = arr; 1071 if (T) { 1072 for (j = 0; j < nz; j++) { 1073 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1074 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1075 arrt += bs2; 1076 } 1077 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1078 } else if (kaij->isTI) { 1079 for (j = 0; j < nz; j++) { 1080 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1081 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1082 arrt += bs2; 1083 } 1084 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1085 } 1086 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1087 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1088 } 1089 } 1090 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 1091 } 1092 } 1093 1094 PetscCall(VecRestoreArray(xx, &x)); 1095 PetscCall(VecRestoreArrayRead(bb, &b)); 1096 PetscFunctionReturn(0); 1097 } 1098 1099 /*===================================================================================*/ 1100 1101 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1102 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1103 1104 PetscFunctionBegin; 1105 if (!yy) { 1106 PetscCall(VecSet(zz, 0.0)); 1107 } else { 1108 PetscCall(VecCopy(yy, zz)); 1109 } 1110 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1111 /* start the scatter */ 1112 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1113 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 1114 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1115 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 1116 PetscFunctionReturn(0); 1117 } 1118 1119 PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) { 1120 PetscFunctionBegin; 1121 PetscCall(MatMultAdd_MPIKAIJ(A, xx, PETSC_NULL, yy)); 1122 PetscFunctionReturn(0); 1123 } 1124 1125 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) { 1126 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1127 1128 PetscFunctionBegin; 1129 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1130 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 /* ----------------------------------------------------------------*/ 1135 1136 PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) { 1137 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 1138 PetscErrorCode diag = PETSC_FALSE; 1139 PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 1140 PetscScalar *vaij, *v, *S = b->S, *T = b->T; 1141 1142 PetscFunctionBegin; 1143 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1144 b->getrowactive = PETSC_TRUE; 1145 PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1146 1147 if ((!S) && (!T) && (!b->isTI)) { 1148 if (ncols) *ncols = 0; 1149 if (cols) *cols = NULL; 1150 if (values) *values = NULL; 1151 PetscFunctionReturn(0); 1152 } 1153 1154 if (T || b->isTI) { 1155 PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 1156 c = nzaij; 1157 for (i = 0; i < nzaij; i++) { 1158 /* check if this row contains a diagonal entry */ 1159 if (colsaij[i] == r) { 1160 diag = PETSC_TRUE; 1161 c = i; 1162 } 1163 } 1164 } else nzaij = c = 0; 1165 1166 /* calculate size of row */ 1167 nz = 0; 1168 if (S) nz += q; 1169 if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 1170 1171 if (cols || values) { 1172 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1173 for (i = 0; i < q; i++) { 1174 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1175 v[i] = 0.0; 1176 } 1177 if (b->isTI) { 1178 for (i = 0; i < nzaij; i++) { 1179 for (j = 0; j < q; j++) { 1180 idx[i * q + j] = colsaij[i] * q + j; 1181 v[i * q + j] = (j == s ? vaij[i] : 0); 1182 } 1183 } 1184 } else if (T) { 1185 for (i = 0; i < nzaij; i++) { 1186 for (j = 0; j < q; j++) { 1187 idx[i * q + j] = colsaij[i] * q + j; 1188 v[i * q + j] = vaij[i] * T[s + j * p]; 1189 } 1190 } 1191 } 1192 if (S) { 1193 for (j = 0; j < q; j++) { 1194 idx[c * q + j] = r * q + j; 1195 v[c * q + j] += S[s + j * p]; 1196 } 1197 } 1198 } 1199 1200 if (ncols) *ncols = nz; 1201 if (cols) *cols = idx; 1202 if (values) *values = v; 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1207 PetscFunctionBegin; 1208 if (nz) *nz = 0; 1209 PetscCall(PetscFree2(*idx, *v)); 1210 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1211 PetscFunctionReturn(0); 1212 } 1213 1214 PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) { 1215 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1216 Mat AIJ = b->A; 1217 PetscBool diag = PETSC_FALSE; 1218 Mat MatAIJ, MatOAIJ; 1219 const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1220 PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 1221 PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 1222 1223 PetscFunctionBegin; 1224 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1225 MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1226 MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 1227 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1228 b->getrowactive = PETSC_TRUE; 1229 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1230 lrow = row - rstart; 1231 1232 if ((!S) && (!T) && (!b->isTI)) { 1233 if (ncols) *ncols = 0; 1234 if (cols) *cols = NULL; 1235 if (values) *values = NULL; 1236 PetscFunctionReturn(0); 1237 } 1238 1239 r = lrow / p; 1240 s = lrow % p; 1241 1242 if (T || b->isTI) { 1243 PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 1244 PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 1245 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 1246 1247 c = ncolsaij + ncolsoaij; 1248 for (i = 0; i < ncolsaij; i++) { 1249 /* check if this row contains a diagonal entry */ 1250 if (colsaij[i] == r) { 1251 diag = PETSC_TRUE; 1252 c = i; 1253 } 1254 } 1255 } else c = 0; 1256 1257 /* calculate size of row */ 1258 nz = 0; 1259 if (S) nz += q; 1260 if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 1261 1262 if (cols || values) { 1263 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1264 for (i = 0; i < q; i++) { 1265 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1266 v[i] = 0.0; 1267 } 1268 if (b->isTI) { 1269 for (i = 0; i < ncolsaij; i++) { 1270 for (j = 0; j < q; j++) { 1271 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1272 v[i * q + j] = (j == s ? vals[i] : 0.0); 1273 } 1274 } 1275 for (i = 0; i < ncolsoaij; i++) { 1276 for (j = 0; j < q; j++) { 1277 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1278 v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 1279 } 1280 } 1281 } else if (T) { 1282 for (i = 0; i < ncolsaij; i++) { 1283 for (j = 0; j < q; j++) { 1284 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1285 v[i * q + j] = vals[i] * T[s + j * p]; 1286 } 1287 } 1288 for (i = 0; i < ncolsoaij; i++) { 1289 for (j = 0; j < q; j++) { 1290 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1291 v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 1292 } 1293 } 1294 } 1295 if (S) { 1296 for (j = 0; j < q; j++) { 1297 idx[c * q + j] = (r + rstart / p) * q + j; 1298 v[c * q + j] += S[s + j * p]; 1299 } 1300 } 1301 } 1302 1303 if (ncols) *ncols = nz; 1304 if (cols) *cols = idx; 1305 if (values) *values = v; 1306 PetscFunctionReturn(0); 1307 } 1308 1309 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1310 PetscFunctionBegin; 1311 PetscCall(PetscFree2(*idx, *v)); 1312 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1313 PetscFunctionReturn(0); 1314 } 1315 1316 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { 1317 Mat A; 1318 1319 PetscFunctionBegin; 1320 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1321 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1322 PetscCall(MatDestroy(&A)); 1323 PetscFunctionReturn(0); 1324 } 1325 1326 /* ---------------------------------------------------------------------------------- */ 1327 /*@C 1328 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1329 1330 [I \otimes S + A \otimes T] 1331 1332 where 1333 S is a dense (p \times q) matrix 1334 T is a dense (p \times q) matrix 1335 A is an AIJ (n \times n) matrix 1336 I is the identity matrix 1337 The resulting matrix is (np \times nq) 1338 1339 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1340 1341 Collective 1342 1343 Input Parameters: 1344 + A - the AIJ matrix 1345 . p - number of rows in S and T 1346 . q - number of columns in S and T 1347 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1348 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1349 1350 Output Parameter: 1351 . kaij - the new KAIJ matrix 1352 1353 Notes: 1354 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1355 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1356 1357 Developer Notes: 1358 In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1359 of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1360 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1361 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1362 1363 Level: advanced 1364 1365 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1366 @*/ 1367 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) { 1368 PetscFunctionBegin; 1369 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 1370 PetscCall(MatSetType(*kaij, MATKAIJ)); 1371 PetscCall(MatKAIJSetAIJ(*kaij, A)); 1372 PetscCall(MatKAIJSetS(*kaij, p, q, S)); 1373 PetscCall(MatKAIJSetT(*kaij, p, q, T)); 1374 PetscCall(MatSetUp(*kaij)); 1375 PetscFunctionReturn(0); 1376 } 1377 1378 /*MC 1379 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1380 [I \otimes S + A \otimes T], 1381 where 1382 S is a dense (p \times q) matrix, 1383 T is a dense (p \times q) matrix, 1384 A is an AIJ (n \times n) matrix, 1385 and I is the identity matrix. 1386 The resulting matrix is (np \times nq). 1387 1388 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1389 1390 Notes: 1391 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1392 where x and b are column vectors containing the row-major representations of X and B. 1393 1394 Level: advanced 1395 1396 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1397 M*/ 1398 1399 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) { 1400 Mat_MPIKAIJ *b; 1401 PetscMPIInt size; 1402 1403 PetscFunctionBegin; 1404 PetscCall(PetscNewLog(A, &b)); 1405 A->data = (void *)b; 1406 1407 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 1408 1409 b->w = NULL; 1410 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1411 if (size == 1) { 1412 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 1413 A->ops->destroy = MatDestroy_SeqKAIJ; 1414 A->ops->mult = MatMult_SeqKAIJ; 1415 A->ops->multadd = MatMultAdd_SeqKAIJ; 1416 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1417 A->ops->getrow = MatGetRow_SeqKAIJ; 1418 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1419 A->ops->sor = MatSOR_SeqKAIJ; 1420 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 1421 } else { 1422 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 1423 A->ops->destroy = MatDestroy_MPIKAIJ; 1424 A->ops->mult = MatMult_MPIKAIJ; 1425 A->ops->multadd = MatMultAdd_MPIKAIJ; 1426 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1427 A->ops->getrow = MatGetRow_MPIKAIJ; 1428 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1429 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 1430 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 1431 } 1432 A->ops->setup = MatSetUp_KAIJ; 1433 A->ops->view = MatView_KAIJ; 1434 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1435 PetscFunctionReturn(0); 1436 } 1437