1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: `MatCreateKAIJ()` 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscBool ismpikaij,isseqkaij; 50 51 PetscFunctionBegin; 52 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij)); 53 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij)); 54 if (ismpikaij) { 55 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 56 57 *B = b->A; 58 } else if (isseqkaij) { 59 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 60 61 *B = b->AIJ; 62 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 63 PetscFunctionReturn(0); 64 } 65 66 /*@C 67 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 68 69 Not Collective; the entire S is stored and returned independently on all processes. 70 71 Input Parameter: 72 . A - the KAIJ matrix 73 74 Output Parameters: 75 + m - the number of rows in S 76 . n - the number of columns in S 77 - S - the S matrix, in form of a scalar array in column-major format 78 79 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 80 81 Level: advanced 82 83 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 84 @*/ 85 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 86 { 87 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 88 PetscFunctionBegin; 89 if (m) *m = b->p; 90 if (n) *n = b->q; 91 if (S) *S = b->S; 92 PetscFunctionReturn(0); 93 } 94 95 /*@C 96 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 97 98 Not Collective; the entire S is stored and returned independently on all processes. 99 100 Input Parameter: 101 . A - the KAIJ matrix 102 103 Output Parameters: 104 + m - the number of rows in S 105 . n - the number of columns in S 106 - S - the S matrix, in form of a scalar array in column-major format 107 108 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 109 110 Level: advanced 111 112 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 113 @*/ 114 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 115 { 116 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 117 PetscFunctionBegin; 118 if (m) *m = b->p; 119 if (n) *n = b->q; 120 if (S) *S = b->S; 121 PetscFunctionReturn(0); 122 } 123 124 /*@C 125 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 126 127 Not collective 128 129 Input Parameter: 130 . A - the KAIJ matrix 131 132 Output Parameter: 133 . S - location of pointer to array obtained with MatKAIJGetS() 134 135 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 136 If NULL is passed, it will not attempt to zero the array pointer. 137 138 Level: advanced 139 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 140 @*/ 141 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 142 { 143 PetscFunctionBegin; 144 if (S) *S = NULL; 145 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 146 PetscFunctionReturn(0); 147 } 148 149 /*@C 150 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 151 152 Not collective 153 154 Input Parameter: 155 . A - the KAIJ matrix 156 157 Output Parameter: 158 . S - location of pointer to array obtained with MatKAIJGetS() 159 160 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 161 If NULL is passed, it will not attempt to zero the array pointer. 162 163 Level: advanced 164 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 165 @*/ 166 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 167 { 168 PetscFunctionBegin; 169 if (S) *S = NULL; 170 PetscFunctionReturn(0); 171 } 172 173 /*@C 174 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 175 176 Not Collective; the entire T is stored and returned independently on all processes 177 178 Input Parameter: 179 . A - the KAIJ matrix 180 181 Output Parameters: 182 + m - the number of rows in T 183 . n - the number of columns in T 184 - T - the T matrix, in form of a scalar array in column-major format 185 186 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 187 188 Level: advanced 189 190 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 191 @*/ 192 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 193 { 194 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 195 PetscFunctionBegin; 196 if (m) *m = b->p; 197 if (n) *n = b->q; 198 if (T) *T = b->T; 199 PetscFunctionReturn(0); 200 } 201 202 /*@C 203 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 204 205 Not Collective; the entire T is stored and returned independently on all processes 206 207 Input Parameter: 208 . A - the KAIJ matrix 209 210 Output Parameters: 211 + m - the number of rows in T 212 . n - the number of columns in T 213 - T - the T matrix, in form of a scalar array in column-major format 214 215 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 216 217 Level: advanced 218 219 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 220 @*/ 221 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 222 { 223 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 224 PetscFunctionBegin; 225 if (m) *m = b->p; 226 if (n) *n = b->q; 227 if (T) *T = b->T; 228 PetscFunctionReturn(0); 229 } 230 231 /*@C 232 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 233 234 Not collective 235 236 Input Parameter: 237 . A - the KAIJ matrix 238 239 Output Parameter: 240 . T - location of pointer to array obtained with MatKAIJGetS() 241 242 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 243 If NULL is passed, it will not attempt to zero the array pointer. 244 245 Level: advanced 246 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 247 @*/ 248 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 249 { 250 PetscFunctionBegin; 251 if (T) *T = NULL; 252 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 253 PetscFunctionReturn(0); 254 } 255 256 /*@C 257 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 258 259 Not collective 260 261 Input Parameter: 262 . A - the KAIJ matrix 263 264 Output Parameter: 265 . T - location of pointer to array obtained with MatKAIJGetS() 266 267 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 268 If NULL is passed, it will not attempt to zero the array pointer. 269 270 Level: advanced 271 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 272 @*/ 273 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 274 { 275 PetscFunctionBegin; 276 if (T) *T = NULL; 277 PetscFunctionReturn(0); 278 } 279 280 /*@ 281 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 282 283 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 284 285 Input Parameters: 286 + A - the KAIJ matrix 287 - B - the AIJ matrix 288 289 Notes: 290 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 291 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 292 293 Level: advanced 294 295 .seealso: `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 296 @*/ 297 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 298 { 299 PetscMPIInt size; 300 PetscBool flg; 301 302 PetscFunctionBegin; 303 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 304 if (size == 1) { 305 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg)); 306 PetscCheck(flg,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name); 307 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 308 a->AIJ = B; 309 } else { 310 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 311 a->A = B; 312 } 313 PetscCall(PetscObjectReference((PetscObject)B)); 314 PetscFunctionReturn(0); 315 } 316 317 /*@C 318 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 319 320 Logically Collective; the entire S is stored independently on all processes. 321 322 Input Parameters: 323 + A - the KAIJ matrix 324 . p - the number of rows in S 325 . q - the number of columns in S 326 - S - the S matrix, in form of a scalar array in column-major format 327 328 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 329 The S matrix is copied, so the user can destroy this array. 330 331 Level: Advanced 332 333 .seealso: `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 334 @*/ 335 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 336 { 337 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 338 339 PetscFunctionBegin; 340 PetscCall(PetscFree(a->S)); 341 if (S) { 342 PetscCall(PetscMalloc1(p*q,&a->S)); 343 PetscCall(PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar))); 344 } else a->S = NULL; 345 346 a->p = p; 347 a->q = q; 348 PetscFunctionReturn(0); 349 } 350 351 /*@C 352 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 353 354 Logically Collective. 355 356 Input Parameter: 357 . A - the KAIJ matrix 358 359 Output Parameter: 360 . identity - the Boolean value 361 362 Level: Advanced 363 364 .seealso: `MatKAIJGetS()`, `MatKAIJGetT()` 365 @*/ 366 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity) 367 { 368 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 369 PetscInt i,j; 370 371 PetscFunctionBegin; 372 if (a->p != a->q) { 373 *identity = PETSC_FALSE; 374 PetscFunctionReturn(0); 375 } else *identity = PETSC_TRUE; 376 if (!a->isTI || a->S) { 377 for (i=0; i<a->p && *identity; i++) { 378 for (j=0; j<a->p && *identity; j++) { 379 if (i != j) { 380 if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 381 if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 382 } else { 383 if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 384 if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 385 } 386 } 387 } 388 } 389 PetscFunctionReturn(0); 390 } 391 392 /*@C 393 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 394 395 Logically Collective; the entire T is stored independently on all processes. 396 397 Input Parameters: 398 + A - the KAIJ matrix 399 . p - the number of rows in S 400 . q - the number of columns in S 401 - T - the T matrix, in form of a scalar array in column-major format 402 403 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 404 The T matrix is copied, so the user can destroy this array. 405 406 Level: Advanced 407 408 .seealso: `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 409 @*/ 410 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 411 { 412 PetscInt i,j; 413 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 414 PetscBool isTI = PETSC_FALSE; 415 416 PetscFunctionBegin; 417 /* check if T is an identity matrix */ 418 if (T && (p == q)) { 419 isTI = PETSC_TRUE; 420 for (i=0; i<p; i++) { 421 for (j=0; j<q; j++) { 422 if (i == j) { 423 /* diagonal term must be 1 */ 424 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 425 } else { 426 /* off-diagonal term must be 0 */ 427 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 428 } 429 } 430 } 431 } 432 a->isTI = isTI; 433 434 PetscCall(PetscFree(a->T)); 435 if (T && (!isTI)) { 436 PetscCall(PetscMalloc1(p*q,&a->T)); 437 PetscCall(PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar))); 438 } else a->T = NULL; 439 440 a->p = p; 441 a->q = q; 442 PetscFunctionReturn(0); 443 } 444 445 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 446 { 447 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 448 449 PetscFunctionBegin; 450 PetscCall(MatDestroy(&b->AIJ)); 451 PetscCall(PetscFree(b->S)); 452 PetscCall(PetscFree(b->T)); 453 PetscCall(PetscFree(b->ibdiag)); 454 PetscCall(PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr)); 455 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL)); 456 PetscCall(PetscFree(A->data)); 457 PetscFunctionReturn(0); 458 } 459 460 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 461 { 462 Mat_MPIKAIJ *a; 463 Mat_MPIAIJ *mpiaij; 464 PetscScalar *T; 465 PetscInt i,j; 466 PetscObjectState state; 467 468 PetscFunctionBegin; 469 a = (Mat_MPIKAIJ*)A->data; 470 mpiaij = (Mat_MPIAIJ*)a->A->data; 471 472 PetscCall(PetscObjectStateGet((PetscObject)a->A,&state)); 473 if (state == a->state) { 474 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 475 PetscFunctionReturn(0); 476 } else { 477 PetscCall(MatDestroy(&a->AIJ)); 478 PetscCall(MatDestroy(&a->OAIJ)); 479 if (a->isTI) { 480 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 481 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 482 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 483 * to pass in. */ 484 PetscCall(PetscMalloc1(a->p*a->q,&T)); 485 for (i=0; i<a->p; i++) { 486 for (j=0; j<a->q; j++) { 487 if (i==j) T[i+j*a->p] = 1.0; 488 else T[i+j*a->p] = 0.0; 489 } 490 } 491 } else T = a->T; 492 PetscCall(MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ)); 493 PetscCall(MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ)); 494 if (a->isTI) PetscCall(PetscFree(T)); 495 a->state = state; 496 } 497 498 PetscFunctionReturn(0); 499 } 500 501 PetscErrorCode MatSetUp_KAIJ(Mat A) 502 { 503 PetscInt n; 504 PetscMPIInt size; 505 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 506 507 PetscFunctionBegin; 508 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 509 if (size == 1) { 510 PetscCall(MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N)); 511 PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p)); 512 PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q)); 513 PetscCall(PetscLayoutSetUp(A->rmap)); 514 PetscCall(PetscLayoutSetUp(A->cmap)); 515 } else { 516 Mat_MPIKAIJ *a; 517 Mat_MPIAIJ *mpiaij; 518 IS from,to; 519 Vec gvec; 520 521 a = (Mat_MPIKAIJ*)A->data; 522 mpiaij = (Mat_MPIAIJ*)a->A->data; 523 PetscCall(MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N)); 524 PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p)); 525 PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q)); 526 PetscCall(PetscLayoutSetUp(A->rmap)); 527 PetscCall(PetscLayoutSetUp(A->cmap)); 528 529 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 530 531 PetscCall(VecGetSize(mpiaij->lvec,&n)); 532 PetscCall(VecCreate(PETSC_COMM_SELF,&a->w)); 533 PetscCall(VecSetSizes(a->w,n*a->q,n*a->q)); 534 PetscCall(VecSetBlockSize(a->w,a->q)); 535 PetscCall(VecSetType(a->w,VECSEQ)); 536 537 /* create two temporary Index sets for build scatter gather */ 538 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from)); 539 PetscCall(ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to)); 540 541 /* create temporary global vector to generate scatter context */ 542 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec)); 543 544 /* generate the scatter context */ 545 PetscCall(VecScatterCreate(gvec,from,a->w,to,&a->ctx)); 546 547 PetscCall(ISDestroy(&from)); 548 PetscCall(ISDestroy(&to)); 549 PetscCall(VecDestroy(&gvec)); 550 } 551 552 A->assembled = PETSC_TRUE; 553 PetscFunctionReturn(0); 554 } 555 556 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 557 { 558 PetscViewerFormat format; 559 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 560 Mat B; 561 PetscInt i; 562 PetscBool ismpikaij; 563 564 PetscFunctionBegin; 565 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij)); 566 PetscCall(PetscViewerGetFormat(viewer,&format)); 567 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 568 PetscCall(PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q)); 569 570 /* Print appropriate details for S. */ 571 if (!a->S) { 572 PetscCall(PetscViewerASCIIPrintf(viewer,"S is NULL\n")); 573 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 574 PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of S are ")); 575 for (i=0; i<(a->p * a->q); i++) { 576 #if defined(PETSC_USE_COMPLEX) 577 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]))); 578 #else 579 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]))); 580 #endif 581 } 582 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 583 } 584 585 /* Print appropriate details for T. */ 586 if (a->isTI) { 587 PetscCall(PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n")); 588 } else if (!a->T) { 589 PetscCall(PetscViewerASCIIPrintf(viewer,"T is NULL\n")); 590 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 591 PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of T are ")); 592 for (i=0; i<(a->p * a->q); i++) { 593 #if defined(PETSC_USE_COMPLEX) 594 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]))); 595 #else 596 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]))); 597 #endif 598 } 599 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 600 } 601 602 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 603 PetscCall(PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n")); 604 if (ismpikaij) { 605 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 606 PetscCall(MatView(b->A,viewer)); 607 } else { 608 PetscCall(MatView(a->AIJ,viewer)); 609 } 610 611 } else { 612 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 613 PetscCall(MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B)); 614 PetscCall(MatView(B,viewer)); 615 PetscCall(MatDestroy(&B)); 616 } 617 PetscFunctionReturn(0); 618 } 619 620 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 621 { 622 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 623 624 PetscFunctionBegin; 625 PetscCall(MatDestroy(&b->AIJ)); 626 PetscCall(MatDestroy(&b->OAIJ)); 627 PetscCall(MatDestroy(&b->A)); 628 PetscCall(VecScatterDestroy(&b->ctx)); 629 PetscCall(VecDestroy(&b->w)); 630 PetscCall(PetscFree(b->S)); 631 PetscCall(PetscFree(b->T)); 632 PetscCall(PetscFree(b->ibdiag)); 633 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL)); 634 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL)); 635 PetscCall(PetscFree(A->data)); 636 PetscFunctionReturn(0); 637 } 638 639 /* --------------------------------------------------------------------------------------*/ 640 641 /* zz = yy + Axx */ 642 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 643 { 644 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 645 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 646 const PetscScalar *s = b->S, *t = b->T; 647 const PetscScalar *x,*v,*bx; 648 PetscScalar *y,*sums; 649 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 650 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 651 652 PetscFunctionBegin; 653 if (!yy) { 654 PetscCall(VecSet(zz,0.0)); 655 } else { 656 PetscCall(VecCopy(yy,zz)); 657 } 658 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 659 660 PetscCall(VecGetArrayRead(xx,&x)); 661 PetscCall(VecGetArray(zz,&y)); 662 idx = a->j; 663 v = a->a; 664 ii = a->i; 665 666 if (b->isTI) { 667 for (i=0; i<m; i++) { 668 jrow = ii[i]; 669 n = ii[i+1] - jrow; 670 sums = y + p*i; 671 for (j=0; j<n; j++) { 672 for (k=0; k<p; k++) { 673 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 674 } 675 } 676 } 677 PetscCall(PetscLogFlops(3.0*(a->nz)*p)); 678 } else if (t) { 679 for (i=0; i<m; i++) { 680 jrow = ii[i]; 681 n = ii[i+1] - jrow; 682 sums = y + p*i; 683 for (j=0; j<n; j++) { 684 for (k=0; k<p; k++) { 685 for (l=0; l<q; l++) { 686 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 687 } 688 } 689 } 690 } 691 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 692 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 693 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 694 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 695 PetscCall(PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz)); 696 } 697 if (s) { 698 for (i=0; i<m; i++) { 699 sums = y + p*i; 700 bx = x + q*i; 701 if (i < b->AIJ->cmap->n) { 702 for (j=0; j<q; j++) { 703 for (k=0; k<p; k++) { 704 sums[k] += s[k+j*p]*bx[j]; 705 } 706 } 707 } 708 } 709 PetscCall(PetscLogFlops(2.0*m*p*q)); 710 } 711 712 PetscCall(VecRestoreArrayRead(xx,&x)); 713 PetscCall(VecRestoreArray(zz,&y)); 714 PetscFunctionReturn(0); 715 } 716 717 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 718 { 719 PetscFunctionBegin; 720 PetscCall(MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy)); 721 PetscFunctionReturn(0); 722 } 723 724 #include <petsc/private/kernels/blockinvert.h> 725 726 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 727 { 728 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 729 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 730 const PetscScalar *S = b->S; 731 const PetscScalar *T = b->T; 732 const PetscScalar *v = a->a; 733 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 734 PetscInt i,j,*v_pivots,dof,dof2; 735 PetscScalar *diag,aval,*v_work; 736 737 PetscFunctionBegin; 738 PetscCheck(p == q,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 739 PetscCheck(S || T || b->isTI,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 740 741 dof = p; 742 dof2 = dof*dof; 743 744 if (b->ibdiagvalid) { 745 if (values) *values = b->ibdiag; 746 PetscFunctionReturn(0); 747 } 748 if (!b->ibdiag) { 749 PetscCall(PetscMalloc1(dof2*m,&b->ibdiag)); 750 PetscCall(PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar))); 751 } 752 if (values) *values = b->ibdiag; 753 diag = b->ibdiag; 754 755 PetscCall(PetscMalloc2(dof,&v_work,dof,&v_pivots)); 756 for (i=0; i<m; i++) { 757 if (S) { 758 PetscCall(PetscMemcpy(diag,S,dof2*sizeof(PetscScalar))); 759 } else { 760 PetscCall(PetscMemzero(diag,dof2*sizeof(PetscScalar))); 761 } 762 if (b->isTI) { 763 aval = 0; 764 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 765 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 766 } else if (T) { 767 aval = 0; 768 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 769 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 770 } 771 PetscCall(PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL)); 772 diag += dof2; 773 } 774 PetscCall(PetscFree2(v_work,v_pivots)); 775 776 b->ibdiagvalid = PETSC_TRUE; 777 PetscFunctionReturn(0); 778 } 779 780 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 781 { 782 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 783 784 PetscFunctionBegin; 785 *B = kaij->AIJ; 786 PetscFunctionReturn(0); 787 } 788 789 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 790 { 791 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 792 Mat AIJ,OAIJ,B; 793 PetscInt *d_nnz,*o_nnz = NULL,nz,i,j,m,d; 794 const PetscInt p = a->p,q = a->q; 795 PetscBool ismpikaij,missing; 796 797 PetscFunctionBegin; 798 if (reuse != MAT_REUSE_MATRIX) { 799 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij)); 800 if (ismpikaij) { 801 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 802 AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 803 OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 804 } else { 805 AIJ = a->AIJ; 806 OAIJ = NULL; 807 } 808 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 809 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 810 PetscCall(MatSetType(B,MATAIJ)); 811 PetscCall(MatGetSize(AIJ,&m,NULL)); 812 PetscCall(MatMissingDiagonal(AIJ,&missing,&d)); /* assumption that all successive rows will have a missing diagonal */ 813 if (!missing || !a->S) d = m; 814 PetscCall(PetscMalloc1(m*p,&d_nnz)); 815 for (i = 0; i < m; ++i) { 816 PetscCall(MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL)); 817 for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q; 818 PetscCall(MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL)); 819 } 820 if (OAIJ) { 821 PetscCall(PetscMalloc1(m*p,&o_nnz)); 822 for (i = 0; i < m; ++i) { 823 PetscCall(MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL)); 824 for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q; 825 PetscCall(MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL)); 826 } 827 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 828 } else { 829 PetscCall(MatSeqAIJSetPreallocation(B,0,d_nnz)); 830 } 831 PetscCall(PetscFree(d_nnz)); 832 PetscCall(PetscFree(o_nnz)); 833 } else B = *newmat; 834 PetscCall(MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B)); 835 if (reuse == MAT_INPLACE_MATRIX) { 836 PetscCall(MatHeaderReplace(A,&B)); 837 } else *newmat = B; 838 PetscFunctionReturn(0); 839 } 840 841 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 842 { 843 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 844 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 845 const PetscScalar *aa = a->a, *T = kaij->T, *v; 846 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 847 const PetscScalar *b, *xb, *idiag; 848 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 849 PetscInt i, j, k, i2, bs, bs2, nz; 850 851 PetscFunctionBegin; 852 its = its*lits; 853 PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 854 PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 855 PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 856 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 857 PetscCheck(p == q,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 858 bs = p; 859 bs2 = bs*bs; 860 861 if (!m) PetscFunctionReturn(0); 862 863 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A,NULL)); 864 idiag = kaij->ibdiag; 865 diag = a->diag; 866 867 if (!kaij->sor.setup) { 868 PetscCall(PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr)); 869 kaij->sor.setup = PETSC_TRUE; 870 } 871 y = kaij->sor.y; 872 w = kaij->sor.w; 873 work = kaij->sor.work; 874 t = kaij->sor.t; 875 arr = kaij->sor.arr; 876 877 PetscCall( VecGetArray(xx,&x)); 878 PetscCall(VecGetArrayRead(bb,&b)); 879 880 if (flag & SOR_ZERO_INITIAL_GUESS) { 881 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 882 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 883 PetscCall(PetscMemcpy(t,b,bs*sizeof(PetscScalar))); 884 i2 = bs; 885 idiag += bs2; 886 for (i=1; i<m; i++) { 887 v = aa + ai[i]; 888 vi = aj + ai[i]; 889 nz = diag[i] - ai[i]; 890 891 if (T) { /* b - T (Arow * x) */ 892 PetscCall(PetscMemzero(w,bs*sizeof(PetscScalar))); 893 for (j=0; j<nz; j++) { 894 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 895 } 896 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 897 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 898 } else if (kaij->isTI) { 899 PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar))); 900 for (j=0; j<nz; j++) { 901 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 902 } 903 } else { 904 PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar))); 905 } 906 907 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 908 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 909 910 idiag += bs2; 911 i2 += bs; 912 } 913 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 914 PetscCall(PetscLogFlops(1.0*bs2*a->nz)); 915 xb = t; 916 } else xb = b; 917 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 918 idiag = kaij->ibdiag+bs2*(m-1); 919 i2 = bs * (m-1); 920 PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar))); 921 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 922 i2 -= bs; 923 idiag -= bs2; 924 for (i=m-2; i>=0; i--) { 925 v = aa + diag[i] + 1 ; 926 vi = aj + diag[i] + 1; 927 nz = ai[i+1] - diag[i] - 1; 928 929 if (T) { /* FIXME: This branch untested */ 930 PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar))); 931 /* copy all rows of x that are needed into contiguous space */ 932 workt = work; 933 for (j=0; j<nz; j++) { 934 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 935 workt += bs; 936 } 937 arrt = arr; 938 for (j=0; j<nz; j++) { 939 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 940 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 941 arrt += bs2; 942 } 943 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 944 } else if (kaij->isTI) { 945 PetscCall(PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar))); 946 for (j=0; j<nz; j++) { 947 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 948 } 949 } 950 951 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 952 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 953 954 idiag -= bs2; 955 i2 -= bs; 956 } 957 PetscCall(PetscLogFlops(1.0*bs2*(a->nz))); 958 } 959 its--; 960 } 961 while (its--) { /* FIXME: This branch not updated */ 962 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 963 i2 = 0; 964 idiag = kaij->ibdiag; 965 for (i=0; i<m; i++) { 966 PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar))); 967 968 v = aa + ai[i]; 969 vi = aj + ai[i]; 970 nz = diag[i] - ai[i]; 971 workt = work; 972 for (j=0; j<nz; j++) { 973 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 974 workt += bs; 975 } 976 arrt = arr; 977 if (T) { 978 for (j=0; j<nz; j++) { 979 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 980 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 981 arrt += bs2; 982 } 983 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 984 } else if (kaij->isTI) { 985 for (j=0; j<nz; j++) { 986 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 987 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 988 arrt += bs2; 989 } 990 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 991 } 992 PetscCall(PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar))); 993 994 v = aa + diag[i] + 1; 995 vi = aj + diag[i] + 1; 996 nz = ai[i+1] - diag[i] - 1; 997 workt = work; 998 for (j=0; j<nz; j++) { 999 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1000 workt += bs; 1001 } 1002 arrt = arr; 1003 if (T) { 1004 for (j=0; j<nz; j++) { 1005 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1006 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1007 arrt += bs2; 1008 } 1009 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1010 } else if (kaij->isTI) { 1011 for (j=0; j<nz; j++) { 1012 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1013 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1014 arrt += bs2; 1015 } 1016 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1017 } 1018 1019 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1020 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1021 1022 idiag += bs2; 1023 i2 += bs; 1024 } 1025 xb = t; 1026 } 1027 else xb = b; 1028 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1029 idiag = kaij->ibdiag+bs2*(m-1); 1030 i2 = bs * (m-1); 1031 if (xb == b) { 1032 for (i=m-1; i>=0; i--) { 1033 PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar))); 1034 1035 v = aa + ai[i]; 1036 vi = aj + ai[i]; 1037 nz = diag[i] - ai[i]; 1038 workt = work; 1039 for (j=0; j<nz; j++) { 1040 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1041 workt += bs; 1042 } 1043 arrt = arr; 1044 if (T) { 1045 for (j=0; j<nz; j++) { 1046 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1047 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1048 arrt += bs2; 1049 } 1050 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1051 } else if (kaij->isTI) { 1052 for (j=0; j<nz; j++) { 1053 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1054 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1055 arrt += bs2; 1056 } 1057 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1058 } 1059 1060 v = aa + diag[i] + 1; 1061 vi = aj + diag[i] + 1; 1062 nz = ai[i+1] - diag[i] - 1; 1063 workt = work; 1064 for (j=0; j<nz; j++) { 1065 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1066 workt += bs; 1067 } 1068 arrt = arr; 1069 if (T) { 1070 for (j=0; j<nz; j++) { 1071 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1072 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1073 arrt += bs2; 1074 } 1075 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1076 } else if (kaij->isTI) { 1077 for (j=0; j<nz; j++) { 1078 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1079 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1080 arrt += bs2; 1081 } 1082 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1083 } 1084 1085 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1086 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1087 } 1088 } else { 1089 for (i=m-1; i>=0; i--) { 1090 PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar))); 1091 v = aa + diag[i] + 1; 1092 vi = aj + diag[i] + 1; 1093 nz = ai[i+1] - diag[i] - 1; 1094 workt = work; 1095 for (j=0; j<nz; j++) { 1096 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1097 workt += bs; 1098 } 1099 arrt = arr; 1100 if (T) { 1101 for (j=0; j<nz; j++) { 1102 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1103 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1104 arrt += bs2; 1105 } 1106 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1107 } else if (kaij->isTI) { 1108 for (j=0; j<nz; j++) { 1109 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1110 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1111 arrt += bs2; 1112 } 1113 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1114 } 1115 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1116 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1117 } 1118 } 1119 PetscCall(PetscLogFlops(1.0*bs2*(a->nz))); 1120 } 1121 } 1122 1123 PetscCall(VecRestoreArray(xx,&x)); 1124 PetscCall(VecRestoreArrayRead(bb,&b)); 1125 PetscFunctionReturn(0); 1126 } 1127 1128 /*===================================================================================*/ 1129 1130 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1131 { 1132 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1133 1134 PetscFunctionBegin; 1135 if (!yy) { 1136 PetscCall(VecSet(zz,0.0)); 1137 } else { 1138 PetscCall(VecCopy(yy,zz)); 1139 } 1140 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1141 /* start the scatter */ 1142 PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 1143 PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz)); 1144 PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 1145 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz)); 1146 PetscFunctionReturn(0); 1147 } 1148 1149 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1150 { 1151 PetscFunctionBegin; 1152 PetscCall(MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy)); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1157 { 1158 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1159 1160 PetscFunctionBegin; 1161 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1162 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values)); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 /* ----------------------------------------------------------------*/ 1167 1168 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1169 { 1170 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1171 PetscErrorCode diag = PETSC_FALSE; 1172 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1173 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1174 1175 PetscFunctionBegin; 1176 PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1177 b->getrowactive = PETSC_TRUE; 1178 PetscCheck(row >= 0 && row < A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row); 1179 1180 if ((!S) && (!T) && (!b->isTI)) { 1181 if (ncols) *ncols = 0; 1182 if (cols) *cols = NULL; 1183 if (values) *values = NULL; 1184 PetscFunctionReturn(0); 1185 } 1186 1187 if (T || b->isTI) { 1188 PetscCall(MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij)); 1189 c = nzaij; 1190 for (i=0; i<nzaij; i++) { 1191 /* check if this row contains a diagonal entry */ 1192 if (colsaij[i] == r) { 1193 diag = PETSC_TRUE; 1194 c = i; 1195 } 1196 } 1197 } else nzaij = c = 0; 1198 1199 /* calculate size of row */ 1200 nz = 0; 1201 if (S) nz += q; 1202 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1203 1204 if (cols || values) { 1205 PetscCall(PetscMalloc2(nz,&idx,nz,&v)); 1206 for (i=0; i<q; i++) { 1207 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1208 v[i] = 0.0; 1209 } 1210 if (b->isTI) { 1211 for (i=0; i<nzaij; i++) { 1212 for (j=0; j<q; j++) { 1213 idx[i*q+j] = colsaij[i]*q+j; 1214 v[i*q+j] = (j==s ? vaij[i] : 0); 1215 } 1216 } 1217 } else if (T) { 1218 for (i=0; i<nzaij; i++) { 1219 for (j=0; j<q; j++) { 1220 idx[i*q+j] = colsaij[i]*q+j; 1221 v[i*q+j] = vaij[i]*T[s+j*p]; 1222 } 1223 } 1224 } 1225 if (S) { 1226 for (j=0; j<q; j++) { 1227 idx[c*q+j] = r*q+j; 1228 v[c*q+j] += S[s+j*p]; 1229 } 1230 } 1231 } 1232 1233 if (ncols) *ncols = nz; 1234 if (cols) *cols = idx; 1235 if (values) *values = v; 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1240 { 1241 PetscFunctionBegin; 1242 if (nz) *nz = 0; 1243 PetscCall(PetscFree2(*idx,*v)); 1244 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1245 PetscFunctionReturn(0); 1246 } 1247 1248 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1249 { 1250 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1251 Mat AIJ = b->A; 1252 PetscBool diag = PETSC_FALSE; 1253 Mat MatAIJ,MatOAIJ; 1254 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1255 PetscInt nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1256 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1257 1258 PetscFunctionBegin; 1259 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1260 MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1261 MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1262 PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1263 b->getrowactive = PETSC_TRUE; 1264 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1265 lrow = row - rstart; 1266 1267 if ((!S) && (!T) && (!b->isTI)) { 1268 if (ncols) *ncols = 0; 1269 if (cols) *cols = NULL; 1270 if (values) *values = NULL; 1271 PetscFunctionReturn(0); 1272 } 1273 1274 r = lrow/p; 1275 s = lrow%p; 1276 1277 if (T || b->isTI) { 1278 PetscCall(MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray)); 1279 PetscCall(MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals)); 1280 PetscCall(MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals)); 1281 1282 c = ncolsaij + ncolsoaij; 1283 for (i=0; i<ncolsaij; i++) { 1284 /* check if this row contains a diagonal entry */ 1285 if (colsaij[i] == r) { 1286 diag = PETSC_TRUE; 1287 c = i; 1288 } 1289 } 1290 } else c = 0; 1291 1292 /* calculate size of row */ 1293 nz = 0; 1294 if (S) nz += q; 1295 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1296 1297 if (cols || values) { 1298 PetscCall(PetscMalloc2(nz,&idx,nz,&v)); 1299 for (i=0; i<q; i++) { 1300 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1301 v[i] = 0.0; 1302 } 1303 if (b->isTI) { 1304 for (i=0; i<ncolsaij; i++) { 1305 for (j=0; j<q; j++) { 1306 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1307 v[i*q+j] = (j==s ? vals[i] : 0.0); 1308 } 1309 } 1310 for (i=0; i<ncolsoaij; i++) { 1311 for (j=0; j<q; j++) { 1312 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1313 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1314 } 1315 } 1316 } else if (T) { 1317 for (i=0; i<ncolsaij; i++) { 1318 for (j=0; j<q; j++) { 1319 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1320 v[i*q+j] = vals[i]*T[s+j*p]; 1321 } 1322 } 1323 for (i=0; i<ncolsoaij; i++) { 1324 for (j=0; j<q; j++) { 1325 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1326 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1327 } 1328 } 1329 } 1330 if (S) { 1331 for (j=0; j<q; j++) { 1332 idx[c*q+j] = (r+rstart/p)*q+j; 1333 v[c*q+j] += S[s+j*p]; 1334 } 1335 } 1336 } 1337 1338 if (ncols) *ncols = nz; 1339 if (cols) *cols = idx; 1340 if (values) *values = v; 1341 PetscFunctionReturn(0); 1342 } 1343 1344 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1345 { 1346 PetscFunctionBegin; 1347 PetscCall(PetscFree2(*idx,*v)); 1348 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1349 PetscFunctionReturn(0); 1350 } 1351 1352 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1353 { 1354 Mat A; 1355 1356 PetscFunctionBegin; 1357 PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 1358 PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat)); 1359 PetscCall(MatDestroy(&A)); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 /* ---------------------------------------------------------------------------------- */ 1364 /*@C 1365 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1366 1367 [I \otimes S + A \otimes T] 1368 1369 where 1370 S is a dense (p \times q) matrix 1371 T is a dense (p \times q) matrix 1372 A is an AIJ (n \times n) matrix 1373 I is the identity matrix 1374 The resulting matrix is (np \times nq) 1375 1376 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1377 1378 Collective 1379 1380 Input Parameters: 1381 + A - the AIJ matrix 1382 . p - number of rows in S and T 1383 . q - number of columns in S and T 1384 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1385 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1386 1387 Output Parameter: 1388 . kaij - the new KAIJ matrix 1389 1390 Notes: 1391 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1392 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1393 1394 Developer Notes: 1395 In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1396 of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1397 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1398 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1399 1400 Level: advanced 1401 1402 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1403 @*/ 1404 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1405 { 1406 PetscFunctionBegin; 1407 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),kaij)); 1408 PetscCall(MatSetType(*kaij,MATKAIJ)); 1409 PetscCall(MatKAIJSetAIJ(*kaij,A)); 1410 PetscCall(MatKAIJSetS(*kaij,p,q,S)); 1411 PetscCall(MatKAIJSetT(*kaij,p,q,T)); 1412 PetscCall(MatSetUp(*kaij)); 1413 PetscFunctionReturn(0); 1414 } 1415 1416 /*MC 1417 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1418 [I \otimes S + A \otimes T], 1419 where 1420 S is a dense (p \times q) matrix, 1421 T is a dense (p \times q) matrix, 1422 A is an AIJ (n \times n) matrix, 1423 and I is the identity matrix. 1424 The resulting matrix is (np \times nq). 1425 1426 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1427 1428 Notes: 1429 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1430 where x and b are column vectors containing the row-major representations of X and B. 1431 1432 Level: advanced 1433 1434 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1435 M*/ 1436 1437 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1438 { 1439 Mat_MPIKAIJ *b; 1440 PetscMPIInt size; 1441 1442 PetscFunctionBegin; 1443 PetscCall(PetscNewLog(A,&b)); 1444 A->data = (void*)b; 1445 1446 PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps))); 1447 1448 b->w = NULL; 1449 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 1450 if (size == 1) { 1451 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ)); 1452 A->ops->destroy = MatDestroy_SeqKAIJ; 1453 A->ops->mult = MatMult_SeqKAIJ; 1454 A->ops->multadd = MatMultAdd_SeqKAIJ; 1455 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1456 A->ops->getrow = MatGetRow_SeqKAIJ; 1457 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1458 A->ops->sor = MatSOR_SeqKAIJ; 1459 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ)); 1460 } else { 1461 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ)); 1462 A->ops->destroy = MatDestroy_MPIKAIJ; 1463 A->ops->mult = MatMult_MPIKAIJ; 1464 A->ops->multadd = MatMultAdd_MPIKAIJ; 1465 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1466 A->ops->getrow = MatGetRow_MPIKAIJ; 1467 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1468 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ)); 1469 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ)); 1470 } 1471 A->ops->setup = MatSetUp_KAIJ; 1472 A->ops->view = MatView_KAIJ; 1473 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1474 PetscFunctionReturn(0); 1475 } 1476