1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 32 33 Not Collective, but if the `MATKAIJ` matrix is parallel, the `MATAIJ` matrix is also parallel 34 35 Input Parameter: 36 . A - the `MATKAIJ` matrix 37 38 Output Parameter: 39 . B - the `MATAIJ` matrix 40 41 Level: advanced 42 43 Note: 44 The reference count on the `MATAIJ` matrix is not increased so you should not destroy it. 45 46 .seealso: `MatCreateKAIJ()`, `MATKAIJ`, `MATAIJ` 47 @*/ 48 PetscErrorCode MatKAIJGetAIJ(Mat A, Mat *B) { 49 PetscBool ismpikaij, isseqkaij; 50 51 PetscFunctionBegin; 52 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 53 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQKAIJ, &isseqkaij)); 54 if (ismpikaij) { 55 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 56 57 *B = b->A; 58 } else if (isseqkaij) { 59 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 60 61 *B = b->AIJ; 62 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix passed in is not of type KAIJ"); 63 PetscFunctionReturn(0); 64 } 65 66 /*@C 67 MatKAIJGetS - Get the S matrix describing the shift action of the `MATKAIJ` matrix 68 69 Not Collective; the entire S is stored and returned independently on all processes. 70 71 Input Parameter: 72 . A - the `MATKAIJ` matrix 73 74 Output Parameters: 75 + m - the number of rows in S 76 . n - the number of columns in S 77 - S - the S matrix, in form of a scalar array in column-major format 78 79 Note: 80 All output parameters are optional (pass NULL if not desired) 81 82 Level: advanced 83 84 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A, PetscInt *m, PetscInt *n, PetscScalar **S) { 87 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 88 PetscFunctionBegin; 89 if (m) *m = b->p; 90 if (n) *n = b->q; 91 if (S) *S = b->S; 92 PetscFunctionReturn(0); 93 } 94 95 /*@C 96 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the `MATKAIJ` matrix 97 98 Not Collective; the entire S is stored and returned independently on all processes. 99 100 Input Parameter: 101 . A - the `MATKAIJ` matrix 102 103 Output Parameters: 104 + m - the number of rows in S 105 . n - the number of columns in S 106 - S - the S matrix, in form of a scalar array in column-major format 107 108 Note: 109 All output parameters are optional (pass NULL if not desired) 110 111 Level: advanced 112 113 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **S) { 116 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 117 PetscFunctionBegin; 118 if (m) *m = b->p; 119 if (n) *n = b->q; 120 if (S) *S = b->S; 121 PetscFunctionReturn(0); 122 } 123 124 /*@C 125 MatKAIJRestoreS - Restore array obtained with `MatKAIJGetS()` 126 127 Not collective 128 129 Input Parameter: 130 . A - the `MATKAIJ` matrix 131 132 Output Parameter: 133 . S - location of pointer to array obtained with `MatKAIJGetS()` 134 135 Note: 136 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 141 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 142 @*/ 143 PetscErrorCode MatKAIJRestoreS(Mat A, PetscScalar **S) { 144 PetscFunctionBegin; 145 if (S) *S = NULL; 146 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 147 PetscFunctionReturn(0); 148 } 149 150 /*@C 151 MatKAIJRestoreSRead - Restore array obtained with `MatKAIJGetSRead()` 152 153 Not collective 154 155 Input Parameter: 156 . A - the `MATKAIJ` matrix 157 158 Output Parameter: 159 . S - location of pointer to array obtained with `MatKAIJGetS()` 160 161 Note: 162 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 163 If NULL is passed, it will not attempt to zero the array pointer. 164 165 Level: advanced 166 167 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 168 @*/ 169 PetscErrorCode MatKAIJRestoreSRead(Mat A, const PetscScalar **S) { 170 PetscFunctionBegin; 171 if (S) *S = NULL; 172 PetscFunctionReturn(0); 173 } 174 175 /*@C 176 MatKAIJGetT - Get the transformation matrix T associated with the `MATKAIJ` matrix 177 178 Not Collective; the entire T is stored and returned independently on all processes 179 180 Input Parameter: 181 . A - the `MATKAIJ` matrix 182 183 Output Parameters: 184 + m - the number of rows in T 185 . n - the number of columns in T 186 - T - the T matrix, in form of a scalar array in column-major format 187 188 Note: 189 All output parameters are optional (pass NULL if not desired) 190 191 Level: advanced 192 193 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 194 @*/ 195 PetscErrorCode MatKAIJGetT(Mat A, PetscInt *m, PetscInt *n, PetscScalar **T) { 196 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 197 PetscFunctionBegin; 198 if (m) *m = b->p; 199 if (n) *n = b->q; 200 if (T) *T = b->T; 201 PetscFunctionReturn(0); 202 } 203 204 /*@C 205 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the `MATKAIJ` matrix 206 207 Not Collective; the entire T is stored and returned independently on all processes 208 209 Input Parameter: 210 . A - the `MATKAIJ` matrix 211 212 Output Parameters: 213 + m - the number of rows in T 214 . n - the number of columns in T 215 - T - the T matrix, in form of a scalar array in column-major format 216 217 Note: All output parameters are optional (pass NULL if not desired) 218 219 Level: advanced 220 221 .seealso: `MATKAIJ`, `MatCreateKAIJ()`, `MatGetBlockSizes()` 222 @*/ 223 PetscErrorCode MatKAIJGetTRead(Mat A, PetscInt *m, PetscInt *n, const PetscScalar **T) { 224 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 225 PetscFunctionBegin; 226 if (m) *m = b->p; 227 if (n) *n = b->q; 228 if (T) *T = b->T; 229 PetscFunctionReturn(0); 230 } 231 232 /*@C 233 MatKAIJRestoreT - Restore array obtained with `MatKAIJGetT()` 234 235 Not collective 236 237 Input Parameter: 238 . A - the `MATKAIJ` matrix 239 240 Output Parameter: 241 . T - location of pointer to array obtained with `MatKAIJGetS()` 242 243 Note: 244 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 245 If NULL is passed, it will not attempt to zero the array pointer. 246 247 Level: advanced 248 249 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 250 @*/ 251 PetscErrorCode MatKAIJRestoreT(Mat A, PetscScalar **T) { 252 PetscFunctionBegin; 253 if (T) *T = NULL; 254 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 255 PetscFunctionReturn(0); 256 } 257 258 /*@C 259 MatKAIJRestoreTRead - Restore array obtained with `MatKAIJGetTRead()` 260 261 Not collective 262 263 Input Parameter: 264 . A - the `MATKAIJ` matrix 265 266 Output Parameter: 267 . T - location of pointer to array obtained with `MatKAIJGetS()` 268 269 Note: 270 This routine zeros the array pointer to prevent accidental reuse after it has been restored. 271 If NULL is passed, it will not attempt to zero the array pointer. 272 273 Level: advanced 274 275 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 276 @*/ 277 PetscErrorCode MatKAIJRestoreTRead(Mat A, const PetscScalar **T) { 278 PetscFunctionBegin; 279 if (T) *T = NULL; 280 PetscFunctionReturn(0); 281 } 282 283 /*@ 284 MatKAIJSetAIJ - Set the `MATAIJ` matrix describing the blockwise action of the `MATKAIJ` matrix 285 286 Logically Collective; if the `MATAIJ` matrix is parallel, the `MATKAIJ` matrix is also parallel 287 288 Input Parameters: 289 + A - the `MATKAIJ` matrix 290 - B - the `MATAIJ` matrix 291 292 Notes: 293 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 294 295 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 296 297 Level: advanced 298 299 .seealso: `MATKAIJ`, `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 300 @*/ 301 PetscErrorCode MatKAIJSetAIJ(Mat A, Mat B) { 302 PetscMPIInt size; 303 PetscBool flg; 304 305 PetscFunctionBegin; 306 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 307 if (size == 1) { 308 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQAIJ, &flg)); 309 PetscCheck(flg, PetscObjectComm((PetscObject)B), PETSC_ERR_SUP, "MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat", ((PetscObject)B)->type_name); 310 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 311 a->AIJ = B; 312 } else { 313 Mat_MPIKAIJ *a = (Mat_MPIKAIJ *)A->data; 314 a->A = B; 315 } 316 PetscCall(PetscObjectReference((PetscObject)B)); 317 PetscFunctionReturn(0); 318 } 319 320 /*@C 321 MatKAIJSetS - Set the S matrix describing the shift action of the `MATKAIJ` matrix 322 323 Logically Collective; the entire S is stored independently on all processes. 324 325 Input Parameters: 326 + A - the `MATKAIJ` matrix 327 . p - the number of rows in S 328 . q - the number of columns in S 329 - S - the S matrix, in form of a scalar array in column-major format 330 331 Notes: 332 The dimensions p and q must match those of the transformation matrix T associated with the `MATKAIJ` matrix. 333 334 The S matrix is copied, so the user can destroy this array. 335 336 Level: advanced 337 338 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 339 @*/ 340 PetscErrorCode MatKAIJSetS(Mat A, PetscInt p, PetscInt q, const PetscScalar S[]) { 341 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 342 343 PetscFunctionBegin; 344 PetscCall(PetscFree(a->S)); 345 if (S) { 346 PetscCall(PetscMalloc1(p * q, &a->S)); 347 PetscCall(PetscMemcpy(a->S, S, p * q * sizeof(PetscScalar))); 348 } else a->S = NULL; 349 350 a->p = p; 351 a->q = q; 352 PetscFunctionReturn(0); 353 } 354 355 /*@C 356 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 357 358 Logically Collective. 359 360 Input Parameter: 361 . A - the `MATKAIJ` matrix 362 363 Output Parameter: 364 . identity - the Boolean value 365 366 Level: Advanced 367 368 .seealso: `MATKAIJ`, `MatKAIJGetS()`, `MatKAIJGetT()` 369 @*/ 370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A, PetscBool *identity) { 371 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 372 PetscInt i, j; 373 374 PetscFunctionBegin; 375 if (a->p != a->q) { 376 *identity = PETSC_FALSE; 377 PetscFunctionReturn(0); 378 } else *identity = PETSC_TRUE; 379 if (!a->isTI || a->S) { 380 for (i = 0; i < a->p && *identity; i++) { 381 for (j = 0; j < a->p && *identity; j++) { 382 if (i != j) { 383 if (a->S && PetscAbsScalar(a->S[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 384 if (a->T && PetscAbsScalar(a->T[i + j * a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 385 } else { 386 if (a->S && PetscAbsScalar(a->S[i * (a->p + 1)] - a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 387 if (a->T && PetscAbsScalar(a->T[i * (a->p + 1)] - a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 388 } 389 } 390 } 391 } 392 PetscFunctionReturn(0); 393 } 394 395 /*@C 396 MatKAIJSetT - Set the transformation matrix T associated with the `MATKAIJ` matrix 397 398 Logically Collective; the entire T is stored independently on all processes. 399 400 Input Parameters: 401 + A - the `MATKAIJ` matrix 402 . p - the number of rows in S 403 . q - the number of columns in S 404 - T - the T matrix, in form of a scalar array in column-major format 405 406 Notes: 407 The dimensions p and q must match those of the shift matrix S associated with the `MATKAIJ` matrix. 408 409 The T matrix is copied, so the user can destroy this array. 410 411 Level: Advanced 412 413 .seealso: `MATKAIJ`, `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 414 @*/ 415 PetscErrorCode MatKAIJSetT(Mat A, PetscInt p, PetscInt q, const PetscScalar T[]) { 416 PetscInt i, j; 417 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 418 PetscBool isTI = PETSC_FALSE; 419 420 PetscFunctionBegin; 421 /* check if T is an identity matrix */ 422 if (T && (p == q)) { 423 isTI = PETSC_TRUE; 424 for (i = 0; i < p; i++) { 425 for (j = 0; j < q; j++) { 426 if (i == j) { 427 /* diagonal term must be 1 */ 428 if (T[i + j * p] != 1.0) isTI = PETSC_FALSE; 429 } else { 430 /* off-diagonal term must be 0 */ 431 if (T[i + j * p] != 0.0) isTI = PETSC_FALSE; 432 } 433 } 434 } 435 } 436 a->isTI = isTI; 437 438 PetscCall(PetscFree(a->T)); 439 if (T && (!isTI)) { 440 PetscCall(PetscMalloc1(p * q, &a->T)); 441 PetscCall(PetscMemcpy(a->T, T, p * q * sizeof(PetscScalar))); 442 } else a->T = NULL; 443 444 a->p = p; 445 a->q = q; 446 PetscFunctionReturn(0); 447 } 448 449 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) { 450 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 451 452 PetscFunctionBegin; 453 PetscCall(MatDestroy(&b->AIJ)); 454 PetscCall(PetscFree(b->S)); 455 PetscCall(PetscFree(b->T)); 456 PetscCall(PetscFree(b->ibdiag)); 457 PetscCall(PetscFree5(b->sor.w, b->sor.y, b->sor.work, b->sor.t, b->sor.arr)); 458 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", NULL)); 459 PetscCall(PetscFree(A->data)); 460 PetscFunctionReturn(0); 461 } 462 463 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) { 464 Mat_MPIKAIJ *a; 465 Mat_MPIAIJ *mpiaij; 466 PetscScalar *T; 467 PetscInt i, j; 468 PetscObjectState state; 469 470 PetscFunctionBegin; 471 a = (Mat_MPIKAIJ *)A->data; 472 mpiaij = (Mat_MPIAIJ *)a->A->data; 473 474 PetscCall(PetscObjectStateGet((PetscObject)a->A, &state)); 475 if (state == a->state) { 476 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 477 PetscFunctionReturn(0); 478 } else { 479 PetscCall(MatDestroy(&a->AIJ)); 480 PetscCall(MatDestroy(&a->OAIJ)); 481 if (a->isTI) { 482 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 483 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 484 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 485 * to pass in. */ 486 PetscCall(PetscMalloc1(a->p * a->q, &T)); 487 for (i = 0; i < a->p; i++) { 488 for (j = 0; j < a->q; j++) { 489 if (i == j) T[i + j * a->p] = 1.0; 490 else T[i + j * a->p] = 0.0; 491 } 492 } 493 } else T = a->T; 494 PetscCall(MatCreateKAIJ(mpiaij->A, a->p, a->q, a->S, T, &a->AIJ)); 495 PetscCall(MatCreateKAIJ(mpiaij->B, a->p, a->q, NULL, T, &a->OAIJ)); 496 if (a->isTI) PetscCall(PetscFree(T)); 497 a->state = state; 498 } 499 500 PetscFunctionReturn(0); 501 } 502 503 PetscErrorCode MatSetUp_KAIJ(Mat A) { 504 PetscInt n; 505 PetscMPIInt size; 506 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ *)A->data; 507 508 PetscFunctionBegin; 509 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 510 if (size == 1) { 511 PetscCall(MatSetSizes(A, seqkaij->p * seqkaij->AIJ->rmap->n, seqkaij->q * seqkaij->AIJ->cmap->n, seqkaij->p * seqkaij->AIJ->rmap->N, seqkaij->q * seqkaij->AIJ->cmap->N)); 512 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 513 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 514 PetscCall(PetscLayoutSetUp(A->rmap)); 515 PetscCall(PetscLayoutSetUp(A->cmap)); 516 } else { 517 Mat_MPIKAIJ *a; 518 Mat_MPIAIJ *mpiaij; 519 IS from, to; 520 Vec gvec; 521 522 a = (Mat_MPIKAIJ *)A->data; 523 mpiaij = (Mat_MPIAIJ *)a->A->data; 524 PetscCall(MatSetSizes(A, a->p * a->A->rmap->n, a->q * a->A->cmap->n, a->p * a->A->rmap->N, a->q * a->A->cmap->N)); 525 PetscCall(PetscLayoutSetBlockSize(A->rmap, seqkaij->p)); 526 PetscCall(PetscLayoutSetBlockSize(A->cmap, seqkaij->q)); 527 PetscCall(PetscLayoutSetUp(A->rmap)); 528 PetscCall(PetscLayoutSetUp(A->cmap)); 529 530 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 531 532 PetscCall(VecGetSize(mpiaij->lvec, &n)); 533 PetscCall(VecCreate(PETSC_COMM_SELF, &a->w)); 534 PetscCall(VecSetSizes(a->w, n * a->q, n * a->q)); 535 PetscCall(VecSetBlockSize(a->w, a->q)); 536 PetscCall(VecSetType(a->w, VECSEQ)); 537 538 /* create two temporary Index sets for build scatter gather */ 539 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A), a->q, n, mpiaij->garray, PETSC_COPY_VALUES, &from)); 540 PetscCall(ISCreateStride(PETSC_COMM_SELF, n * a->q, 0, 1, &to)); 541 542 /* create temporary global vector to generate scatter context */ 543 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A), a->q, a->q * a->A->cmap->n, a->q * a->A->cmap->N, NULL, &gvec)); 544 545 /* generate the scatter context */ 546 PetscCall(VecScatterCreate(gvec, from, a->w, to, &a->ctx)); 547 548 PetscCall(ISDestroy(&from)); 549 PetscCall(ISDestroy(&to)); 550 PetscCall(VecDestroy(&gvec)); 551 } 552 553 A->assembled = PETSC_TRUE; 554 PetscFunctionReturn(0); 555 } 556 557 PetscErrorCode MatView_KAIJ(Mat A, PetscViewer viewer) { 558 PetscViewerFormat format; 559 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 560 Mat B; 561 PetscInt i; 562 PetscBool ismpikaij; 563 564 PetscFunctionBegin; 565 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 566 PetscCall(PetscViewerGetFormat(viewer, &format)); 567 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 568 PetscCall(PetscViewerASCIIPrintf(viewer, "S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n", a->p, a->q)); 569 570 /* Print appropriate details for S. */ 571 if (!a->S) { 572 PetscCall(PetscViewerASCIIPrintf(viewer, "S is NULL\n")); 573 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 574 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of S are ")); 575 for (i = 0; i < (a->p * a->q); i++) { 576 #if defined(PETSC_USE_COMPLEX) 577 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->S[i]), (double)PetscImaginaryPart(a->S[i]))); 578 #else 579 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->S[i]))); 580 #endif 581 } 582 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 583 } 584 585 /* Print appropriate details for T. */ 586 if (a->isTI) { 587 PetscCall(PetscViewerASCIIPrintf(viewer, "T is the identity matrix\n")); 588 } else if (!a->T) { 589 PetscCall(PetscViewerASCIIPrintf(viewer, "T is NULL\n")); 590 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 591 PetscCall(PetscViewerASCIIPrintf(viewer, "Entries of T are ")); 592 for (i = 0; i < (a->p * a->q); i++) { 593 #if defined(PETSC_USE_COMPLEX) 594 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e %18.16e ", (double)PetscRealPart(a->T[i]), (double)PetscImaginaryPart(a->T[i]))); 595 #else 596 PetscCall(PetscViewerASCIIPrintf(viewer, "%18.16e ", (double)PetscRealPart(a->T[i]))); 597 #endif 598 } 599 PetscCall(PetscViewerASCIIPrintf(viewer, "\n")); 600 } 601 602 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 603 PetscCall(PetscViewerASCIIPrintf(viewer, "Now viewing the associated AIJ matrix:\n")); 604 if (ismpikaij) { 605 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 606 PetscCall(MatView(b->A, viewer)); 607 } else { 608 PetscCall(MatView(a->AIJ, viewer)); 609 } 610 611 } else { 612 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 613 PetscCall(MatConvert(A, MATAIJ, MAT_INITIAL_MATRIX, &B)); 614 PetscCall(MatView(B, viewer)); 615 PetscCall(MatDestroy(&B)); 616 } 617 PetscFunctionReturn(0); 618 } 619 620 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) { 621 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 622 623 PetscFunctionBegin; 624 PetscCall(MatDestroy(&b->AIJ)); 625 PetscCall(MatDestroy(&b->OAIJ)); 626 PetscCall(MatDestroy(&b->A)); 627 PetscCall(VecScatterDestroy(&b->ctx)); 628 PetscCall(VecDestroy(&b->w)); 629 PetscCall(PetscFree(b->S)); 630 PetscCall(PetscFree(b->T)); 631 PetscCall(PetscFree(b->ibdiag)); 632 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", NULL)); 633 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", NULL)); 634 PetscCall(PetscFree(A->data)); 635 PetscFunctionReturn(0); 636 } 637 638 /* --------------------------------------------------------------------------------------*/ 639 640 /* zz = yy + Axx */ 641 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 642 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 643 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 644 const PetscScalar *s = b->S, *t = b->T; 645 const PetscScalar *x, *v, *bx; 646 PetscScalar *y, *sums; 647 const PetscInt m = b->AIJ->rmap->n, *idx, *ii; 648 PetscInt n, i, jrow, j, l, p = b->p, q = b->q, k; 649 650 PetscFunctionBegin; 651 if (!yy) { 652 PetscCall(VecSet(zz, 0.0)); 653 } else { 654 PetscCall(VecCopy(yy, zz)); 655 } 656 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 657 658 PetscCall(VecGetArrayRead(xx, &x)); 659 PetscCall(VecGetArray(zz, &y)); 660 idx = a->j; 661 v = a->a; 662 ii = a->i; 663 664 if (b->isTI) { 665 for (i = 0; i < m; i++) { 666 jrow = ii[i]; 667 n = ii[i + 1] - jrow; 668 sums = y + p * i; 669 for (j = 0; j < n; j++) { 670 for (k = 0; k < p; k++) sums[k] += v[jrow + j] * x[q * idx[jrow + j] + k]; 671 } 672 } 673 PetscCall(PetscLogFlops(3.0 * (a->nz) * p)); 674 } else if (t) { 675 for (i = 0; i < m; i++) { 676 jrow = ii[i]; 677 n = ii[i + 1] - jrow; 678 sums = y + p * i; 679 for (j = 0; j < n; j++) { 680 for (k = 0; k < p; k++) { 681 for (l = 0; l < q; l++) sums[k] += v[jrow + j] * t[k + l * p] * x[q * idx[jrow + j] + l]; 682 } 683 } 684 } 685 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 686 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 687 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 688 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 689 PetscCall(PetscLogFlops((2.0 * p * q - p) * m + 2.0 * p * a->nz)); 690 } 691 if (s) { 692 for (i = 0; i < m; i++) { 693 sums = y + p * i; 694 bx = x + q * i; 695 if (i < b->AIJ->cmap->n) { 696 for (j = 0; j < q; j++) { 697 for (k = 0; k < p; k++) sums[k] += s[k + j * p] * bx[j]; 698 } 699 } 700 } 701 PetscCall(PetscLogFlops(2.0 * m * p * q)); 702 } 703 704 PetscCall(VecRestoreArrayRead(xx, &x)); 705 PetscCall(VecRestoreArray(zz, &y)); 706 PetscFunctionReturn(0); 707 } 708 709 PetscErrorCode MatMult_SeqKAIJ(Mat A, Vec xx, Vec yy) { 710 PetscFunctionBegin; 711 PetscCall(MatMultAdd_SeqKAIJ(A, xx, PETSC_NULL, yy)); 712 PetscFunctionReturn(0); 713 } 714 715 #include <petsc/private/kernels/blockinvert.h> 716 717 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A, const PetscScalar **values) { 718 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 719 Mat_SeqAIJ *a = (Mat_SeqAIJ *)b->AIJ->data; 720 const PetscScalar *S = b->S; 721 const PetscScalar *T = b->T; 722 const PetscScalar *v = a->a; 723 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 724 PetscInt i, j, *v_pivots, dof, dof2; 725 PetscScalar *diag, aval, *v_work; 726 727 PetscFunctionBegin; 728 PetscCheck(p == q, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Block size must be square to calculate inverse."); 729 PetscCheck(S || T || b->isTI, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MATKAIJ: Cannot invert a zero matrix."); 730 731 dof = p; 732 dof2 = dof * dof; 733 734 if (b->ibdiagvalid) { 735 if (values) *values = b->ibdiag; 736 PetscFunctionReturn(0); 737 } 738 if (!b->ibdiag) { PetscCall(PetscMalloc1(dof2 * m, &b->ibdiag)); } 739 if (values) *values = b->ibdiag; 740 diag = b->ibdiag; 741 742 PetscCall(PetscMalloc2(dof, &v_work, dof, &v_pivots)); 743 for (i = 0; i < m; i++) { 744 if (S) { 745 PetscCall(PetscMemcpy(diag, S, dof2 * sizeof(PetscScalar))); 746 } else { 747 PetscCall(PetscMemzero(diag, dof2 * sizeof(PetscScalar))); 748 } 749 if (b->isTI) { 750 aval = 0; 751 for (j = ii[i]; j < ii[i + 1]; j++) 752 if (idx[j] == i) aval = v[j]; 753 for (j = 0; j < dof; j++) diag[j + dof * j] += aval; 754 } else if (T) { 755 aval = 0; 756 for (j = ii[i]; j < ii[i + 1]; j++) 757 if (idx[j] == i) aval = v[j]; 758 for (j = 0; j < dof2; j++) diag[j] += aval * T[j]; 759 } 760 PetscCall(PetscKernel_A_gets_inverse_A(dof, diag, v_pivots, v_work, PETSC_FALSE, NULL)); 761 diag += dof2; 762 } 763 PetscCall(PetscFree2(v_work, v_pivots)); 764 765 b->ibdiagvalid = PETSC_TRUE; 766 PetscFunctionReturn(0); 767 } 768 769 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A, Mat *B) { 770 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ *)A->data; 771 772 PetscFunctionBegin; 773 *B = kaij->AIJ; 774 PetscFunctionReturn(0); 775 } 776 777 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 778 Mat_SeqKAIJ *a = (Mat_SeqKAIJ *)A->data; 779 Mat AIJ, OAIJ, B; 780 PetscInt *d_nnz, *o_nnz = NULL, nz, i, j, m, d; 781 const PetscInt p = a->p, q = a->q; 782 PetscBool ismpikaij, missing; 783 784 PetscFunctionBegin; 785 if (reuse != MAT_REUSE_MATRIX) { 786 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIKAIJ, &ismpikaij)); 787 if (ismpikaij) { 788 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 789 AIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 790 OAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 791 } else { 792 AIJ = a->AIJ; 793 OAIJ = NULL; 794 } 795 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 796 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 797 PetscCall(MatSetType(B, MATAIJ)); 798 PetscCall(MatGetSize(AIJ, &m, NULL)); 799 PetscCall(MatMissingDiagonal(AIJ, &missing, &d)); /* assumption that all successive rows will have a missing diagonal */ 800 if (!missing || !a->S) d = m; 801 PetscCall(PetscMalloc1(m * p, &d_nnz)); 802 for (i = 0; i < m; ++i) { 803 PetscCall(MatGetRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 804 for (j = 0; j < p; ++j) d_nnz[i * p + j] = nz * q + (i >= d) * q; 805 PetscCall(MatRestoreRow_SeqAIJ(AIJ, i, &nz, NULL, NULL)); 806 } 807 if (OAIJ) { 808 PetscCall(PetscMalloc1(m * p, &o_nnz)); 809 for (i = 0; i < m; ++i) { 810 PetscCall(MatGetRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 811 for (j = 0; j < p; ++j) o_nnz[i * p + j] = nz * q; 812 PetscCall(MatRestoreRow_SeqAIJ(OAIJ, i, &nz, NULL, NULL)); 813 } 814 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 815 } else { 816 PetscCall(MatSeqAIJSetPreallocation(B, 0, d_nnz)); 817 } 818 PetscCall(PetscFree(d_nnz)); 819 PetscCall(PetscFree(o_nnz)); 820 } else B = *newmat; 821 PetscCall(MatConvert_Basic(A, newtype, MAT_REUSE_MATRIX, &B)); 822 if (reuse == MAT_INPLACE_MATRIX) { 823 PetscCall(MatHeaderReplace(A, &B)); 824 } else *newmat = B; 825 PetscFunctionReturn(0); 826 } 827 828 PetscErrorCode MatSOR_SeqKAIJ(Mat A, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 829 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ *)A->data; 830 Mat_SeqAIJ *a = (Mat_SeqAIJ *)kaij->AIJ->data; 831 const PetscScalar *aa = a->a, *T = kaij->T, *v; 832 const PetscInt m = kaij->AIJ->rmap->n, *ai = a->i, *aj = a->j, p = kaij->p, q = kaij->q, *diag, *vi; 833 const PetscScalar *b, *xb, *idiag; 834 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 835 PetscInt i, j, k, i2, bs, bs2, nz; 836 837 PetscFunctionBegin; 838 its = its * lits; 839 PetscCheck(!(flag & SOR_EISENSTAT), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support yet for Eisenstat"); 840 PetscCheck(its > 0, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive", its, lits); 841 PetscCheck(!fshift, PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for diagonal shift"); 842 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER), PETSC_COMM_SELF, PETSC_ERR_SUP, "No support for applying upper or lower triangular parts"); 843 PetscCheck(p == q, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatSOR for KAIJ: No support for non-square dense blocks"); 844 bs = p; 845 bs2 = bs * bs; 846 847 if (!m) PetscFunctionReturn(0); 848 849 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A, NULL)); 850 idiag = kaij->ibdiag; 851 diag = a->diag; 852 853 if (!kaij->sor.setup) { 854 PetscCall(PetscMalloc5(bs, &kaij->sor.w, bs, &kaij->sor.y, m * bs, &kaij->sor.work, m * bs, &kaij->sor.t, m * bs2, &kaij->sor.arr)); 855 kaij->sor.setup = PETSC_TRUE; 856 } 857 y = kaij->sor.y; 858 w = kaij->sor.w; 859 work = kaij->sor.work; 860 t = kaij->sor.t; 861 arr = kaij->sor.arr; 862 863 PetscCall(VecGetArray(xx, &x)); 864 PetscCall(VecGetArrayRead(bb, &b)); 865 866 if (flag & SOR_ZERO_INITIAL_GUESS) { 867 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 868 PetscKernel_w_gets_Ar_times_v(bs, bs, b, idiag, x); /* x[0:bs] <- D^{-1} b[0:bs] */ 869 PetscCall(PetscMemcpy(t, b, bs * sizeof(PetscScalar))); 870 i2 = bs; 871 idiag += bs2; 872 for (i = 1; i < m; i++) { 873 v = aa + ai[i]; 874 vi = aj + ai[i]; 875 nz = diag[i] - ai[i]; 876 877 if (T) { /* b - T (Arow * x) */ 878 PetscCall(PetscMemzero(w, bs * sizeof(PetscScalar))); 879 for (j = 0; j < nz; j++) { 880 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 881 } 882 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs, w, T, &t[i2]); 883 for (k = 0; k < bs; k++) t[i2 + k] += b[i2 + k]; 884 } else if (kaij->isTI) { 885 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 886 for (j = 0; j < nz; j++) { 887 for (k = 0; k < bs; k++) t[i2 + k] -= v[j] * x[vi[j] * bs + k]; 888 } 889 } else { 890 PetscCall(PetscMemcpy(t + i2, b + i2, bs * sizeof(PetscScalar))); 891 } 892 893 PetscKernel_w_gets_Ar_times_v(bs, bs, t + i2, idiag, y); 894 for (j = 0; j < bs; j++) x[i2 + j] = omega * y[j]; 895 896 idiag += bs2; 897 i2 += bs; 898 } 899 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 900 PetscCall(PetscLogFlops(1.0 * bs2 * a->nz)); 901 xb = t; 902 } else xb = b; 903 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 904 idiag = kaij->ibdiag + bs2 * (m - 1); 905 i2 = bs * (m - 1); 906 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 907 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, x + i2); 908 i2 -= bs; 909 idiag -= bs2; 910 for (i = m - 2; i >= 0; i--) { 911 v = aa + diag[i] + 1; 912 vi = aj + diag[i] + 1; 913 nz = ai[i + 1] - diag[i] - 1; 914 915 if (T) { /* FIXME: This branch untested */ 916 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 917 /* copy all rows of x that are needed into contiguous space */ 918 workt = work; 919 for (j = 0; j < nz; j++) { 920 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 921 workt += bs; 922 } 923 arrt = arr; 924 for (j = 0; j < nz; j++) { 925 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 926 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 927 arrt += bs2; 928 } 929 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 930 } else if (kaij->isTI) { 931 PetscCall(PetscMemcpy(w, t + i2, bs * sizeof(PetscScalar))); 932 for (j = 0; j < nz; j++) { 933 for (k = 0; k < bs; k++) w[k] -= v[j] * x[vi[j] * bs + k]; 934 } 935 } 936 937 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); /* RHS incorrect for omega != 1.0 */ 938 for (j = 0; j < bs; j++) x[i2 + j] = (1.0 - omega) * x[i2 + j] + omega * y[j]; 939 940 idiag -= bs2; 941 i2 -= bs; 942 } 943 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 944 } 945 its--; 946 } 947 while (its--) { /* FIXME: This branch not updated */ 948 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 949 i2 = 0; 950 idiag = kaij->ibdiag; 951 for (i = 0; i < m; i++) { 952 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 953 954 v = aa + ai[i]; 955 vi = aj + ai[i]; 956 nz = diag[i] - ai[i]; 957 workt = work; 958 for (j = 0; j < nz; j++) { 959 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 960 workt += bs; 961 } 962 arrt = arr; 963 if (T) { 964 for (j = 0; j < nz; j++) { 965 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 966 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 967 arrt += bs2; 968 } 969 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 970 } else if (kaij->isTI) { 971 for (j = 0; j < nz; j++) { 972 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 973 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 974 arrt += bs2; 975 } 976 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 977 } 978 PetscCall(PetscMemcpy(t + i2, w, bs * sizeof(PetscScalar))); 979 980 v = aa + diag[i] + 1; 981 vi = aj + diag[i] + 1; 982 nz = ai[i + 1] - diag[i] - 1; 983 workt = work; 984 for (j = 0; j < nz; j++) { 985 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 986 workt += bs; 987 } 988 arrt = arr; 989 if (T) { 990 for (j = 0; j < nz; j++) { 991 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 992 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 993 arrt += bs2; 994 } 995 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 996 } else if (kaij->isTI) { 997 for (j = 0; j < nz; j++) { 998 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 999 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1000 arrt += bs2; 1001 } 1002 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1003 } 1004 1005 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1006 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1007 1008 idiag += bs2; 1009 i2 += bs; 1010 } 1011 xb = t; 1012 } else xb = b; 1013 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1014 idiag = kaij->ibdiag + bs2 * (m - 1); 1015 i2 = bs * (m - 1); 1016 if (xb == b) { 1017 for (i = m - 1; i >= 0; i--) { 1018 PetscCall(PetscMemcpy(w, b + i2, bs * sizeof(PetscScalar))); 1019 1020 v = aa + ai[i]; 1021 vi = aj + ai[i]; 1022 nz = diag[i] - ai[i]; 1023 workt = work; 1024 for (j = 0; j < nz; j++) { 1025 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1026 workt += bs; 1027 } 1028 arrt = arr; 1029 if (T) { 1030 for (j = 0; j < nz; j++) { 1031 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1032 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1033 arrt += bs2; 1034 } 1035 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1036 } else if (kaij->isTI) { 1037 for (j = 0; j < nz; j++) { 1038 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1039 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1040 arrt += bs2; 1041 } 1042 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1043 } 1044 1045 v = aa + diag[i] + 1; 1046 vi = aj + diag[i] + 1; 1047 nz = ai[i + 1] - diag[i] - 1; 1048 workt = work; 1049 for (j = 0; j < nz; j++) { 1050 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1051 workt += bs; 1052 } 1053 arrt = arr; 1054 if (T) { 1055 for (j = 0; j < nz; j++) { 1056 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1057 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1058 arrt += bs2; 1059 } 1060 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1061 } else if (kaij->isTI) { 1062 for (j = 0; j < nz; j++) { 1063 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1064 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1065 arrt += bs2; 1066 } 1067 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1068 } 1069 1070 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1071 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1072 } 1073 } else { 1074 for (i = m - 1; i >= 0; i--) { 1075 PetscCall(PetscMemcpy(w, xb + i2, bs * sizeof(PetscScalar))); 1076 v = aa + diag[i] + 1; 1077 vi = aj + diag[i] + 1; 1078 nz = ai[i + 1] - diag[i] - 1; 1079 workt = work; 1080 for (j = 0; j < nz; j++) { 1081 PetscCall(PetscMemcpy(workt, x + bs * (*vi++), bs * sizeof(PetscScalar))); 1082 workt += bs; 1083 } 1084 arrt = arr; 1085 if (T) { 1086 for (j = 0; j < nz; j++) { 1087 PetscCall(PetscMemcpy(arrt, T, bs2 * sizeof(PetscScalar))); 1088 for (k = 0; k < bs2; k++) arrt[k] *= v[j]; 1089 arrt += bs2; 1090 } 1091 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1092 } else if (kaij->isTI) { 1093 for (j = 0; j < nz; j++) { 1094 PetscCall(PetscMemzero(arrt, bs2 * sizeof(PetscScalar))); 1095 for (k = 0; k < bs; k++) arrt[k + bs * k] = v[j]; 1096 arrt += bs2; 1097 } 1098 PetscKernel_w_gets_w_minus_Ar_times_v(bs, bs * nz, w, arr, work); 1099 } 1100 PetscKernel_w_gets_Ar_times_v(bs, bs, w, idiag, y); 1101 for (j = 0; j < bs; j++) *(x + i2 + j) = (1.0 - omega) * *(x + i2 + j) + omega * *(y + j); 1102 } 1103 } 1104 PetscCall(PetscLogFlops(1.0 * bs2 * (a->nz))); 1105 } 1106 } 1107 1108 PetscCall(VecRestoreArray(xx, &x)); 1109 PetscCall(VecRestoreArrayRead(bb, &b)); 1110 PetscFunctionReturn(0); 1111 } 1112 1113 /*===================================================================================*/ 1114 1115 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1116 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1117 1118 PetscFunctionBegin; 1119 if (!yy) { 1120 PetscCall(VecSet(zz, 0.0)); 1121 } else { 1122 PetscCall(VecCopy(yy, zz)); 1123 } 1124 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1125 /* start the scatter */ 1126 PetscCall(VecScatterBegin(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1127 PetscCall((*b->AIJ->ops->multadd)(b->AIJ, xx, zz, zz)); 1128 PetscCall(VecScatterEnd(b->ctx, xx, b->w, INSERT_VALUES, SCATTER_FORWARD)); 1129 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ, b->w, zz, zz)); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 PetscErrorCode MatMult_MPIKAIJ(Mat A, Vec xx, Vec yy) { 1134 PetscFunctionBegin; 1135 PetscCall(MatMultAdd_MPIKAIJ(A, xx, PETSC_NULL, yy)); 1136 PetscFunctionReturn(0); 1137 } 1138 1139 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A, const PetscScalar **values) { 1140 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1141 1142 PetscFunctionBegin; 1143 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1144 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ, values)); 1145 PetscFunctionReturn(0); 1146 } 1147 1148 /* ----------------------------------------------------------------*/ 1149 1150 PetscErrorCode MatGetRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) { 1151 Mat_SeqKAIJ *b = (Mat_SeqKAIJ *)A->data; 1152 PetscErrorCode diag = PETSC_FALSE; 1153 PetscInt nzaij, nz, *colsaij, *idx, i, j, p = b->p, q = b->q, r = row / p, s = row % p, c; 1154 PetscScalar *vaij, *v, *S = b->S, *T = b->T; 1155 1156 PetscFunctionBegin; 1157 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1158 b->getrowactive = PETSC_TRUE; 1159 PetscCheck(row >= 0 && row < A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range", row); 1160 1161 if ((!S) && (!T) && (!b->isTI)) { 1162 if (ncols) *ncols = 0; 1163 if (cols) *cols = NULL; 1164 if (values) *values = NULL; 1165 PetscFunctionReturn(0); 1166 } 1167 1168 if (T || b->isTI) { 1169 PetscCall(MatGetRow_SeqAIJ(b->AIJ, r, &nzaij, &colsaij, &vaij)); 1170 c = nzaij; 1171 for (i = 0; i < nzaij; i++) { 1172 /* check if this row contains a diagonal entry */ 1173 if (colsaij[i] == r) { 1174 diag = PETSC_TRUE; 1175 c = i; 1176 } 1177 } 1178 } else nzaij = c = 0; 1179 1180 /* calculate size of row */ 1181 nz = 0; 1182 if (S) nz += q; 1183 if (T || b->isTI) nz += (diag && S ? (nzaij - 1) * q : nzaij * q); 1184 1185 if (cols || values) { 1186 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1187 for (i = 0; i < q; i++) { 1188 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1189 v[i] = 0.0; 1190 } 1191 if (b->isTI) { 1192 for (i = 0; i < nzaij; i++) { 1193 for (j = 0; j < q; j++) { 1194 idx[i * q + j] = colsaij[i] * q + j; 1195 v[i * q + j] = (j == s ? vaij[i] : 0); 1196 } 1197 } 1198 } else if (T) { 1199 for (i = 0; i < nzaij; i++) { 1200 for (j = 0; j < q; j++) { 1201 idx[i * q + j] = colsaij[i] * q + j; 1202 v[i * q + j] = vaij[i] * T[s + j * p]; 1203 } 1204 } 1205 } 1206 if (S) { 1207 for (j = 0; j < q; j++) { 1208 idx[c * q + j] = r * q + j; 1209 v[c * q + j] += S[s + j * p]; 1210 } 1211 } 1212 } 1213 1214 if (ncols) *ncols = nz; 1215 if (cols) *cols = idx; 1216 if (values) *values = v; 1217 PetscFunctionReturn(0); 1218 } 1219 1220 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1221 PetscFunctionBegin; 1222 if (nz) *nz = 0; 1223 PetscCall(PetscFree2(*idx, *v)); 1224 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1225 PetscFunctionReturn(0); 1226 } 1227 1228 PetscErrorCode MatGetRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *ncols, PetscInt **cols, PetscScalar **values) { 1229 Mat_MPIKAIJ *b = (Mat_MPIKAIJ *)A->data; 1230 Mat AIJ = b->A; 1231 PetscBool diag = PETSC_FALSE; 1232 Mat MatAIJ, MatOAIJ; 1233 const PetscInt rstart = A->rmap->rstart, rend = A->rmap->rend, p = b->p, q = b->q, *garray; 1234 PetscInt nz, *idx, ncolsaij = 0, ncolsoaij = 0, *colsaij, *colsoaij, r, s, c, i, j, lrow; 1235 PetscScalar *v, *vals, *ovals, *S = b->S, *T = b->T; 1236 1237 PetscFunctionBegin; 1238 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1239 MatAIJ = ((Mat_SeqKAIJ *)b->AIJ->data)->AIJ; 1240 MatOAIJ = ((Mat_SeqKAIJ *)b->OAIJ->data)->AIJ; 1241 PetscCheck(!b->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1242 b->getrowactive = PETSC_TRUE; 1243 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1244 lrow = row - rstart; 1245 1246 if ((!S) && (!T) && (!b->isTI)) { 1247 if (ncols) *ncols = 0; 1248 if (cols) *cols = NULL; 1249 if (values) *values = NULL; 1250 PetscFunctionReturn(0); 1251 } 1252 1253 r = lrow / p; 1254 s = lrow % p; 1255 1256 if (T || b->isTI) { 1257 PetscCall(MatMPIAIJGetSeqAIJ(AIJ, NULL, NULL, &garray)); 1258 PetscCall(MatGetRow_SeqAIJ(MatAIJ, lrow / p, &ncolsaij, &colsaij, &vals)); 1259 PetscCall(MatGetRow_SeqAIJ(MatOAIJ, lrow / p, &ncolsoaij, &colsoaij, &ovals)); 1260 1261 c = ncolsaij + ncolsoaij; 1262 for (i = 0; i < ncolsaij; i++) { 1263 /* check if this row contains a diagonal entry */ 1264 if (colsaij[i] == r) { 1265 diag = PETSC_TRUE; 1266 c = i; 1267 } 1268 } 1269 } else c = 0; 1270 1271 /* calculate size of row */ 1272 nz = 0; 1273 if (S) nz += q; 1274 if (T || b->isTI) nz += (diag && S ? (ncolsaij + ncolsoaij - 1) * q : (ncolsaij + ncolsoaij) * q); 1275 1276 if (cols || values) { 1277 PetscCall(PetscMalloc2(nz, &idx, nz, &v)); 1278 for (i = 0; i < q; i++) { 1279 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1280 v[i] = 0.0; 1281 } 1282 if (b->isTI) { 1283 for (i = 0; i < ncolsaij; i++) { 1284 for (j = 0; j < q; j++) { 1285 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1286 v[i * q + j] = (j == s ? vals[i] : 0.0); 1287 } 1288 } 1289 for (i = 0; i < ncolsoaij; i++) { 1290 for (j = 0; j < q; j++) { 1291 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1292 v[(i + ncolsaij) * q + j] = (j == s ? ovals[i] : 0.0); 1293 } 1294 } 1295 } else if (T) { 1296 for (i = 0; i < ncolsaij; i++) { 1297 for (j = 0; j < q; j++) { 1298 idx[i * q + j] = (colsaij[i] + rstart / p) * q + j; 1299 v[i * q + j] = vals[i] * T[s + j * p]; 1300 } 1301 } 1302 for (i = 0; i < ncolsoaij; i++) { 1303 for (j = 0; j < q; j++) { 1304 idx[(i + ncolsaij) * q + j] = garray[colsoaij[i]] * q + j; 1305 v[(i + ncolsaij) * q + j] = ovals[i] * T[s + j * p]; 1306 } 1307 } 1308 } 1309 if (S) { 1310 for (j = 0; j < q; j++) { 1311 idx[c * q + j] = (r + rstart / p) * q + j; 1312 v[c * q + j] += S[s + j * p]; 1313 } 1314 } 1315 } 1316 1317 if (ncols) *ncols = nz; 1318 if (cols) *cols = idx; 1319 if (values) *values = v; 1320 PetscFunctionReturn(0); 1321 } 1322 1323 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1324 PetscFunctionBegin; 1325 PetscCall(PetscFree2(*idx, *v)); 1326 ((Mat_SeqKAIJ *)A->data)->getrowactive = PETSC_FALSE; 1327 PetscFunctionReturn(0); 1328 } 1329 1330 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat, IS isrow, IS iscol, MatReuse cll, Mat *newmat) { 1331 Mat A; 1332 1333 PetscFunctionBegin; 1334 PetscCall(MatConvert(mat, MATAIJ, MAT_INITIAL_MATRIX, &A)); 1335 PetscCall(MatCreateSubMatrix(A, isrow, iscol, cll, newmat)); 1336 PetscCall(MatDestroy(&A)); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 /* ---------------------------------------------------------------------------------- */ 1341 /*@C 1342 MatCreateKAIJ - Creates a matrix of type `MATKAIJ` to be used for matrices of the following form: 1343 1344 [I \otimes S + A \otimes T] 1345 1346 where 1347 S is a dense (p \times q) matrix 1348 T is a dense (p \times q) matrix 1349 A is an `MATAIJ` (n \times n) matrix 1350 I is the identity matrix 1351 The resulting matrix is (np \times nq) 1352 1353 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1354 1355 Collective 1356 1357 Input Parameters: 1358 + A - the `MATAIJ` matrix 1359 . p - number of rows in S and T 1360 . q - number of columns in S and T 1361 . S - the S matrix (can be NULL), stored as a `PetscScalar` array (column-major) 1362 - T - the T matrix (can be NULL), stored as a `PetscScalar` array (column-major) 1363 1364 Output Parameter: 1365 . kaij - the new `MATKAIJ` matrix 1366 1367 Notes: 1368 This function increases the reference count on the `MATAIJ` matrix, so the user is free to destroy the matrix if it is not needed. 1369 1370 Changes to the entries of the `MATAIJ` matrix will immediately affect the `MATKAIJ` matrix. 1371 1372 Developer Note: 1373 In the `MATMPIKAIJ` case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1374 of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1375 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1376 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1377 1378 Level: advanced 1379 1380 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1381 @*/ 1382 PetscErrorCode MatCreateKAIJ(Mat A, PetscInt p, PetscInt q, const PetscScalar S[], const PetscScalar T[], Mat *kaij) { 1383 PetscFunctionBegin; 1384 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), kaij)); 1385 PetscCall(MatSetType(*kaij, MATKAIJ)); 1386 PetscCall(MatKAIJSetAIJ(*kaij, A)); 1387 PetscCall(MatKAIJSetS(*kaij, p, q, S)); 1388 PetscCall(MatKAIJSetT(*kaij, p, q, T)); 1389 PetscCall(MatSetUp(*kaij)); 1390 PetscFunctionReturn(0); 1391 } 1392 1393 /*MC 1394 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1395 [I \otimes S + A \otimes T], 1396 where 1397 S is a dense (p \times q) matrix, 1398 T is a dense (p \times q) matrix, 1399 A is an AIJ (n \times n) matrix, 1400 and I is the identity matrix. 1401 The resulting matrix is (np \times nq). 1402 1403 S and T are always stored independently on all processes as `PetscScalar` arrays in column-major format. 1404 1405 Note: 1406 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1407 where x and b are column vectors containing the row-major representations of X and B. 1408 1409 Level: advanced 1410 1411 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1412 M*/ 1413 1414 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) { 1415 Mat_MPIKAIJ *b; 1416 PetscMPIInt size; 1417 1418 PetscFunctionBegin; 1419 PetscCall(PetscNew(&b)); 1420 A->data = (void *)b; 1421 1422 PetscCall(PetscMemzero(A->ops, sizeof(struct _MatOps))); 1423 1424 b->w = NULL; 1425 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1426 if (size == 1) { 1427 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATSEQKAIJ)); 1428 A->ops->destroy = MatDestroy_SeqKAIJ; 1429 A->ops->mult = MatMult_SeqKAIJ; 1430 A->ops->multadd = MatMultAdd_SeqKAIJ; 1431 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1432 A->ops->getrow = MatGetRow_SeqKAIJ; 1433 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1434 A->ops->sor = MatSOR_SeqKAIJ; 1435 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_seqkaij_seqaij_C", MatConvert_KAIJ_AIJ)); 1436 } else { 1437 PetscCall(PetscObjectChangeTypeName((PetscObject)A, MATMPIKAIJ)); 1438 A->ops->destroy = MatDestroy_MPIKAIJ; 1439 A->ops->mult = MatMult_MPIKAIJ; 1440 A->ops->multadd = MatMultAdd_MPIKAIJ; 1441 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1442 A->ops->getrow = MatGetRow_MPIKAIJ; 1443 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1444 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatGetDiagonalBlock_C", MatGetDiagonalBlock_MPIKAIJ)); 1445 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatConvert_mpikaij_mpiaij_C", MatConvert_KAIJ_AIJ)); 1446 } 1447 A->ops->setup = MatSetUp_KAIJ; 1448 A->ops->view = MatView_KAIJ; 1449 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1450 PetscFunctionReturn(0); 1451 } 1452