1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: `MatCreateKAIJ()` 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscBool ismpikaij,isseqkaij; 50 51 PetscFunctionBegin; 52 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij)); 53 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij)); 54 if (ismpikaij) { 55 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 56 57 *B = b->A; 58 } else if (isseqkaij) { 59 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 60 61 *B = b->AIJ; 62 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 63 PetscFunctionReturn(0); 64 } 65 66 /*@C 67 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 68 69 Not Collective; the entire S is stored and returned independently on all processes. 70 71 Input Parameter: 72 . A - the KAIJ matrix 73 74 Output Parameters: 75 + m - the number of rows in S 76 . n - the number of columns in S 77 - S - the S matrix, in form of a scalar array in column-major format 78 79 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 80 81 Level: advanced 82 83 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 84 @*/ 85 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 86 { 87 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 88 PetscFunctionBegin; 89 if (m) *m = b->p; 90 if (n) *n = b->q; 91 if (S) *S = b->S; 92 PetscFunctionReturn(0); 93 } 94 95 /*@C 96 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 97 98 Not Collective; the entire S is stored and returned independently on all processes. 99 100 Input Parameter: 101 . A - the KAIJ matrix 102 103 Output Parameters: 104 + m - the number of rows in S 105 . n - the number of columns in S 106 - S - the S matrix, in form of a scalar array in column-major format 107 108 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 109 110 Level: advanced 111 112 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 113 @*/ 114 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 115 { 116 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 117 PetscFunctionBegin; 118 if (m) *m = b->p; 119 if (n) *n = b->q; 120 if (S) *S = b->S; 121 PetscFunctionReturn(0); 122 } 123 124 /*@C 125 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 126 127 Not collective 128 129 Input Parameter: 130 . A - the KAIJ matrix 131 132 Output Parameter: 133 . S - location of pointer to array obtained with MatKAIJGetS() 134 135 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 136 If NULL is passed, it will not attempt to zero the array pointer. 137 138 Level: advanced 139 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 140 @*/ 141 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 142 { 143 PetscFunctionBegin; 144 if (S) *S = NULL; 145 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 146 PetscFunctionReturn(0); 147 } 148 149 /*@C 150 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 151 152 Not collective 153 154 Input Parameter: 155 . A - the KAIJ matrix 156 157 Output Parameter: 158 . S - location of pointer to array obtained with MatKAIJGetS() 159 160 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 161 If NULL is passed, it will not attempt to zero the array pointer. 162 163 Level: advanced 164 .seealso: `MatKAIJGetS()`, `MatKAIJGetSRead()`, `MatKAIJRestoreSRead()` 165 @*/ 166 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 167 { 168 PetscFunctionBegin; 169 if (S) *S = NULL; 170 PetscFunctionReturn(0); 171 } 172 173 /*@C 174 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 175 176 Not Collective; the entire T is stored and returned independently on all processes 177 178 Input Parameter: 179 . A - the KAIJ matrix 180 181 Output Parameters: 182 + m - the number of rows in T 183 . n - the number of columns in T 184 - T - the T matrix, in form of a scalar array in column-major format 185 186 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 187 188 Level: advanced 189 190 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 191 @*/ 192 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 193 { 194 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 195 PetscFunctionBegin; 196 if (m) *m = b->p; 197 if (n) *n = b->q; 198 if (T) *T = b->T; 199 PetscFunctionReturn(0); 200 } 201 202 /*@C 203 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 204 205 Not Collective; the entire T is stored and returned independently on all processes 206 207 Input Parameter: 208 . A - the KAIJ matrix 209 210 Output Parameters: 211 + m - the number of rows in T 212 . n - the number of columns in T 213 - T - the T matrix, in form of a scalar array in column-major format 214 215 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 216 217 Level: advanced 218 219 .seealso: `MatCreateKAIJ()`, `MatGetBlockSizes()` 220 @*/ 221 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 222 { 223 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 224 PetscFunctionBegin; 225 if (m) *m = b->p; 226 if (n) *n = b->q; 227 if (T) *T = b->T; 228 PetscFunctionReturn(0); 229 } 230 231 /*@C 232 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 233 234 Not collective 235 236 Input Parameter: 237 . A - the KAIJ matrix 238 239 Output Parameter: 240 . T - location of pointer to array obtained with MatKAIJGetS() 241 242 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 243 If NULL is passed, it will not attempt to zero the array pointer. 244 245 Level: advanced 246 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 247 @*/ 248 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 249 { 250 PetscFunctionBegin; 251 if (T) *T = NULL; 252 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 253 PetscFunctionReturn(0); 254 } 255 256 /*@C 257 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 258 259 Not collective 260 261 Input Parameter: 262 . A - the KAIJ matrix 263 264 Output Parameter: 265 . T - location of pointer to array obtained with MatKAIJGetS() 266 267 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 268 If NULL is passed, it will not attempt to zero the array pointer. 269 270 Level: advanced 271 .seealso: `MatKAIJGetT()`, `MatKAIJGetTRead()`, `MatKAIJRestoreTRead()` 272 @*/ 273 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 274 { 275 PetscFunctionBegin; 276 if (T) *T = NULL; 277 PetscFunctionReturn(0); 278 } 279 280 /*@ 281 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 282 283 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 284 285 Input Parameters: 286 + A - the KAIJ matrix 287 - B - the AIJ matrix 288 289 Notes: 290 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 291 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 292 293 Level: advanced 294 295 .seealso: `MatKAIJGetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()` 296 @*/ 297 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 298 { 299 PetscMPIInt size; 300 PetscBool flg; 301 302 PetscFunctionBegin; 303 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 304 if (size == 1) { 305 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg)); 306 PetscCheck(flg,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name); 307 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 308 a->AIJ = B; 309 } else { 310 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 311 a->A = B; 312 } 313 PetscCall(PetscObjectReference((PetscObject)B)); 314 PetscFunctionReturn(0); 315 } 316 317 /*@C 318 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 319 320 Logically Collective; the entire S is stored independently on all processes. 321 322 Input Parameters: 323 + A - the KAIJ matrix 324 . p - the number of rows in S 325 . q - the number of columns in S 326 - S - the S matrix, in form of a scalar array in column-major format 327 328 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 329 The S matrix is copied, so the user can destroy this array. 330 331 Level: Advanced 332 333 .seealso: `MatKAIJGetS()`, `MatKAIJSetT()`, `MatKAIJSetAIJ()` 334 @*/ 335 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 336 { 337 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 338 339 PetscFunctionBegin; 340 PetscCall(PetscFree(a->S)); 341 if (S) { 342 PetscCall(PetscMalloc1(p*q,&a->S)); 343 PetscCall(PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar))); 344 } else a->S = NULL; 345 346 a->p = p; 347 a->q = q; 348 PetscFunctionReturn(0); 349 } 350 351 /*@C 352 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 353 354 Logically Collective. 355 356 Input Parameter: 357 . A - the KAIJ matrix 358 359 Output Parameter: 360 . identity - the Boolean value 361 362 Level: Advanced 363 364 .seealso: `MatKAIJGetS()`, `MatKAIJGetT()` 365 @*/ 366 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity) 367 { 368 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 369 PetscInt i,j; 370 371 PetscFunctionBegin; 372 if (a->p != a->q) { 373 *identity = PETSC_FALSE; 374 PetscFunctionReturn(0); 375 } else *identity = PETSC_TRUE; 376 if (!a->isTI || a->S) { 377 for (i=0; i<a->p && *identity; i++) { 378 for (j=0; j<a->p && *identity; j++) { 379 if (i != j) { 380 if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 381 if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 382 } else { 383 if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 384 if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 385 } 386 } 387 } 388 } 389 PetscFunctionReturn(0); 390 } 391 392 /*@C 393 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 394 395 Logically Collective; the entire T is stored independently on all processes. 396 397 Input Parameters: 398 + A - the KAIJ matrix 399 . p - the number of rows in S 400 . q - the number of columns in S 401 - T - the T matrix, in form of a scalar array in column-major format 402 403 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 404 The T matrix is copied, so the user can destroy this array. 405 406 Level: Advanced 407 408 .seealso: `MatKAIJGetT()`, `MatKAIJSetS()`, `MatKAIJSetAIJ()` 409 @*/ 410 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 411 { 412 PetscInt i,j; 413 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 414 PetscBool isTI = PETSC_FALSE; 415 416 PetscFunctionBegin; 417 /* check if T is an identity matrix */ 418 if (T && (p == q)) { 419 isTI = PETSC_TRUE; 420 for (i=0; i<p; i++) { 421 for (j=0; j<q; j++) { 422 if (i == j) { 423 /* diagonal term must be 1 */ 424 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 425 } else { 426 /* off-diagonal term must be 0 */ 427 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 428 } 429 } 430 } 431 } 432 a->isTI = isTI; 433 434 PetscCall(PetscFree(a->T)); 435 if (T && (!isTI)) { 436 PetscCall(PetscMalloc1(p*q,&a->T)); 437 PetscCall(PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar))); 438 } else a->T = NULL; 439 440 a->p = p; 441 a->q = q; 442 PetscFunctionReturn(0); 443 } 444 445 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 446 { 447 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 448 449 PetscFunctionBegin; 450 PetscCall(MatDestroy(&b->AIJ)); 451 PetscCall(PetscFree(b->S)); 452 PetscCall(PetscFree(b->T)); 453 PetscCall(PetscFree(b->ibdiag)); 454 PetscCall(PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr)); 455 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL)); 456 PetscCall(PetscFree(A->data)); 457 PetscFunctionReturn(0); 458 } 459 460 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 461 { 462 Mat_MPIKAIJ *a; 463 Mat_MPIAIJ *mpiaij; 464 PetscScalar *T; 465 PetscInt i,j; 466 PetscObjectState state; 467 468 PetscFunctionBegin; 469 a = (Mat_MPIKAIJ*)A->data; 470 mpiaij = (Mat_MPIAIJ*)a->A->data; 471 472 PetscCall(PetscObjectStateGet((PetscObject)a->A,&state)); 473 if (state == a->state) { 474 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 475 PetscFunctionReturn(0); 476 } else { 477 PetscCall(MatDestroy(&a->AIJ)); 478 PetscCall(MatDestroy(&a->OAIJ)); 479 if (a->isTI) { 480 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 481 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 482 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 483 * to pass in. */ 484 PetscCall(PetscMalloc1(a->p*a->q,&T)); 485 for (i=0; i<a->p; i++) { 486 for (j=0; j<a->q; j++) { 487 if (i==j) T[i+j*a->p] = 1.0; 488 else T[i+j*a->p] = 0.0; 489 } 490 } 491 } else T = a->T; 492 PetscCall(MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ)); 493 PetscCall(MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ)); 494 if (a->isTI) { 495 PetscCall(PetscFree(T)); 496 } 497 a->state = state; 498 } 499 500 PetscFunctionReturn(0); 501 } 502 503 PetscErrorCode MatSetUp_KAIJ(Mat A) 504 { 505 PetscInt n; 506 PetscMPIInt size; 507 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 508 509 PetscFunctionBegin; 510 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 511 if (size == 1) { 512 PetscCall(MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N)); 513 PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p)); 514 PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q)); 515 PetscCall(PetscLayoutSetUp(A->rmap)); 516 PetscCall(PetscLayoutSetUp(A->cmap)); 517 } else { 518 Mat_MPIKAIJ *a; 519 Mat_MPIAIJ *mpiaij; 520 IS from,to; 521 Vec gvec; 522 523 a = (Mat_MPIKAIJ*)A->data; 524 mpiaij = (Mat_MPIAIJ*)a->A->data; 525 PetscCall(MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N)); 526 PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p)); 527 PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q)); 528 PetscCall(PetscLayoutSetUp(A->rmap)); 529 PetscCall(PetscLayoutSetUp(A->cmap)); 530 531 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 532 533 PetscCall(VecGetSize(mpiaij->lvec,&n)); 534 PetscCall(VecCreate(PETSC_COMM_SELF,&a->w)); 535 PetscCall(VecSetSizes(a->w,n*a->q,n*a->q)); 536 PetscCall(VecSetBlockSize(a->w,a->q)); 537 PetscCall(VecSetType(a->w,VECSEQ)); 538 539 /* create two temporary Index sets for build scatter gather */ 540 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from)); 541 PetscCall(ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to)); 542 543 /* create temporary global vector to generate scatter context */ 544 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec)); 545 546 /* generate the scatter context */ 547 PetscCall(VecScatterCreate(gvec,from,a->w,to,&a->ctx)); 548 549 PetscCall(ISDestroy(&from)); 550 PetscCall(ISDestroy(&to)); 551 PetscCall(VecDestroy(&gvec)); 552 } 553 554 A->assembled = PETSC_TRUE; 555 PetscFunctionReturn(0); 556 } 557 558 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 559 { 560 PetscViewerFormat format; 561 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 562 Mat B; 563 PetscInt i; 564 PetscBool ismpikaij; 565 566 PetscFunctionBegin; 567 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij)); 568 PetscCall(PetscViewerGetFormat(viewer,&format)); 569 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 570 PetscCall(PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q)); 571 572 /* Print appropriate details for S. */ 573 if (!a->S) { 574 PetscCall(PetscViewerASCIIPrintf(viewer,"S is NULL\n")); 575 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 576 PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of S are ")); 577 for (i=0; i<(a->p * a->q); i++) { 578 #if defined(PETSC_USE_COMPLEX) 579 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]))); 580 #else 581 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]))); 582 #endif 583 } 584 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 585 } 586 587 /* Print appropriate details for T. */ 588 if (a->isTI) { 589 PetscCall(PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n")); 590 } else if (!a->T) { 591 PetscCall(PetscViewerASCIIPrintf(viewer,"T is NULL\n")); 592 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 593 PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of T are ")); 594 for (i=0; i<(a->p * a->q); i++) { 595 #if defined(PETSC_USE_COMPLEX) 596 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]))); 597 #else 598 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]))); 599 #endif 600 } 601 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 602 } 603 604 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 605 PetscCall(PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n")); 606 if (ismpikaij) { 607 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 608 PetscCall(MatView(b->A,viewer)); 609 } else { 610 PetscCall(MatView(a->AIJ,viewer)); 611 } 612 613 } else { 614 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 615 PetscCall(MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B)); 616 PetscCall(MatView(B,viewer)); 617 PetscCall(MatDestroy(&B)); 618 } 619 PetscFunctionReturn(0); 620 } 621 622 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 623 { 624 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 625 626 PetscFunctionBegin; 627 PetscCall(MatDestroy(&b->AIJ)); 628 PetscCall(MatDestroy(&b->OAIJ)); 629 PetscCall(MatDestroy(&b->A)); 630 PetscCall(VecScatterDestroy(&b->ctx)); 631 PetscCall(VecDestroy(&b->w)); 632 PetscCall(PetscFree(b->S)); 633 PetscCall(PetscFree(b->T)); 634 PetscCall(PetscFree(b->ibdiag)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL)); 637 PetscCall(PetscFree(A->data)); 638 PetscFunctionReturn(0); 639 } 640 641 /* --------------------------------------------------------------------------------------*/ 642 643 /* zz = yy + Axx */ 644 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 645 { 646 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 648 const PetscScalar *s = b->S, *t = b->T; 649 const PetscScalar *x,*v,*bx; 650 PetscScalar *y,*sums; 651 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 652 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 653 654 PetscFunctionBegin; 655 if (!yy) { 656 PetscCall(VecSet(zz,0.0)); 657 } else { 658 PetscCall(VecCopy(yy,zz)); 659 } 660 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 661 662 PetscCall(VecGetArrayRead(xx,&x)); 663 PetscCall(VecGetArray(zz,&y)); 664 idx = a->j; 665 v = a->a; 666 ii = a->i; 667 668 if (b->isTI) { 669 for (i=0; i<m; i++) { 670 jrow = ii[i]; 671 n = ii[i+1] - jrow; 672 sums = y + p*i; 673 for (j=0; j<n; j++) { 674 for (k=0; k<p; k++) { 675 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 676 } 677 } 678 } 679 PetscCall(PetscLogFlops(3.0*(a->nz)*p)); 680 } else if (t) { 681 for (i=0; i<m; i++) { 682 jrow = ii[i]; 683 n = ii[i+1] - jrow; 684 sums = y + p*i; 685 for (j=0; j<n; j++) { 686 for (k=0; k<p; k++) { 687 for (l=0; l<q; l++) { 688 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 689 } 690 } 691 } 692 } 693 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 694 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 695 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 696 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 697 PetscCall(PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz)); 698 } 699 if (s) { 700 for (i=0; i<m; i++) { 701 sums = y + p*i; 702 bx = x + q*i; 703 if (i < b->AIJ->cmap->n) { 704 for (j=0; j<q; j++) { 705 for (k=0; k<p; k++) { 706 sums[k] += s[k+j*p]*bx[j]; 707 } 708 } 709 } 710 } 711 PetscCall(PetscLogFlops(2.0*m*p*q)); 712 } 713 714 PetscCall(VecRestoreArrayRead(xx,&x)); 715 PetscCall(VecRestoreArray(zz,&y)); 716 PetscFunctionReturn(0); 717 } 718 719 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 720 { 721 PetscFunctionBegin; 722 PetscCall(MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy)); 723 PetscFunctionReturn(0); 724 } 725 726 #include <petsc/private/kernels/blockinvert.h> 727 728 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 729 { 730 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 731 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 732 const PetscScalar *S = b->S; 733 const PetscScalar *T = b->T; 734 const PetscScalar *v = a->a; 735 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 736 PetscInt i,j,*v_pivots,dof,dof2; 737 PetscScalar *diag,aval,*v_work; 738 739 PetscFunctionBegin; 740 PetscCheck(p == q,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 741 PetscCheck(S || T || b->isTI,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 742 743 dof = p; 744 dof2 = dof*dof; 745 746 if (b->ibdiagvalid) { 747 if (values) *values = b->ibdiag; 748 PetscFunctionReturn(0); 749 } 750 if (!b->ibdiag) { 751 PetscCall(PetscMalloc1(dof2*m,&b->ibdiag)); 752 PetscCall(PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar))); 753 } 754 if (values) *values = b->ibdiag; 755 diag = b->ibdiag; 756 757 PetscCall(PetscMalloc2(dof,&v_work,dof,&v_pivots)); 758 for (i=0; i<m; i++) { 759 if (S) { 760 PetscCall(PetscMemcpy(diag,S,dof2*sizeof(PetscScalar))); 761 } else { 762 PetscCall(PetscMemzero(diag,dof2*sizeof(PetscScalar))); 763 } 764 if (b->isTI) { 765 aval = 0; 766 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 767 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 768 } else if (T) { 769 aval = 0; 770 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 771 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 772 } 773 PetscCall(PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL)); 774 diag += dof2; 775 } 776 PetscCall(PetscFree2(v_work,v_pivots)); 777 778 b->ibdiagvalid = PETSC_TRUE; 779 PetscFunctionReturn(0); 780 } 781 782 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 783 { 784 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 785 786 PetscFunctionBegin; 787 *B = kaij->AIJ; 788 PetscFunctionReturn(0); 789 } 790 791 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 792 { 793 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 794 Mat AIJ,OAIJ,B; 795 PetscInt *d_nnz,*o_nnz = NULL,nz,i,j,m,d; 796 const PetscInt p = a->p,q = a->q; 797 PetscBool ismpikaij,missing; 798 799 PetscFunctionBegin; 800 if (reuse != MAT_REUSE_MATRIX) { 801 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij)); 802 if (ismpikaij) { 803 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 804 AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 805 OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 806 } else { 807 AIJ = a->AIJ; 808 OAIJ = NULL; 809 } 810 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 811 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 812 PetscCall(MatSetType(B,MATAIJ)); 813 PetscCall(MatGetSize(AIJ,&m,NULL)); 814 PetscCall(MatMissingDiagonal(AIJ,&missing,&d)); /* assumption that all successive rows will have a missing diagonal */ 815 if (!missing || !a->S) d = m; 816 PetscCall(PetscMalloc1(m*p,&d_nnz)); 817 for (i = 0; i < m; ++i) { 818 PetscCall(MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL)); 819 for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q; 820 PetscCall(MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL)); 821 } 822 if (OAIJ) { 823 PetscCall(PetscMalloc1(m*p,&o_nnz)); 824 for (i = 0; i < m; ++i) { 825 PetscCall(MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL)); 826 for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q; 827 PetscCall(MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL)); 828 } 829 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 830 } else { 831 PetscCall(MatSeqAIJSetPreallocation(B,0,d_nnz)); 832 } 833 PetscCall(PetscFree(d_nnz)); 834 PetscCall(PetscFree(o_nnz)); 835 } else B = *newmat; 836 PetscCall(MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B)); 837 if (reuse == MAT_INPLACE_MATRIX) { 838 PetscCall(MatHeaderReplace(A,&B)); 839 } else *newmat = B; 840 PetscFunctionReturn(0); 841 } 842 843 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 844 { 845 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 846 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 847 const PetscScalar *aa = a->a, *T = kaij->T, *v; 848 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 849 const PetscScalar *b, *xb, *idiag; 850 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 851 PetscInt i, j, k, i2, bs, bs2, nz; 852 853 PetscFunctionBegin; 854 its = its*lits; 855 PetscCheck(!(flag & SOR_EISENSTAT),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 856 PetscCheck(its > 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 857 PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 858 PetscCheck(!(flag & SOR_APPLY_UPPER) && !(flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 859 PetscCheck(p == q,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 860 else {bs = p; bs2 = bs*bs; } 861 862 if (!m) PetscFunctionReturn(0); 863 864 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A,NULL)); 865 idiag = kaij->ibdiag; 866 diag = a->diag; 867 868 if (!kaij->sor.setup) { 869 PetscCall(PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr)); 870 kaij->sor.setup = PETSC_TRUE; 871 } 872 y = kaij->sor.y; 873 w = kaij->sor.w; 874 work = kaij->sor.work; 875 t = kaij->sor.t; 876 arr = kaij->sor.arr; 877 878 PetscCall( VecGetArray(xx,&x)); 879 PetscCall(VecGetArrayRead(bb,&b)); 880 881 if (flag & SOR_ZERO_INITIAL_GUESS) { 882 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 883 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 884 PetscCall(PetscMemcpy(t,b,bs*sizeof(PetscScalar))); 885 i2 = bs; 886 idiag += bs2; 887 for (i=1; i<m; i++) { 888 v = aa + ai[i]; 889 vi = aj + ai[i]; 890 nz = diag[i] - ai[i]; 891 892 if (T) { /* b - T (Arow * x) */ 893 PetscCall(PetscMemzero(w,bs*sizeof(PetscScalar))); 894 for (j=0; j<nz; j++) { 895 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 896 } 897 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 898 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 899 } else if (kaij->isTI) { 900 PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar))); 901 for (j=0; j<nz; j++) { 902 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 903 } 904 } else { 905 PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar))); 906 } 907 908 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 909 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 910 911 idiag += bs2; 912 i2 += bs; 913 } 914 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 915 PetscCall(PetscLogFlops(1.0*bs2*a->nz)); 916 xb = t; 917 } else xb = b; 918 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 919 idiag = kaij->ibdiag+bs2*(m-1); 920 i2 = bs * (m-1); 921 PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar))); 922 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 923 i2 -= bs; 924 idiag -= bs2; 925 for (i=m-2; i>=0; i--) { 926 v = aa + diag[i] + 1 ; 927 vi = aj + diag[i] + 1; 928 nz = ai[i+1] - diag[i] - 1; 929 930 if (T) { /* FIXME: This branch untested */ 931 PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar))); 932 /* copy all rows of x that are needed into contiguous space */ 933 workt = work; 934 for (j=0; j<nz; j++) { 935 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 936 workt += bs; 937 } 938 arrt = arr; 939 for (j=0; j<nz; j++) { 940 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 941 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 942 arrt += bs2; 943 } 944 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 945 } else if (kaij->isTI) { 946 PetscCall(PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar))); 947 for (j=0; j<nz; j++) { 948 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 949 } 950 } 951 952 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 953 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 954 955 idiag -= bs2; 956 i2 -= bs; 957 } 958 PetscCall(PetscLogFlops(1.0*bs2*(a->nz))); 959 } 960 its--; 961 } 962 while (its--) { /* FIXME: This branch not updated */ 963 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 964 i2 = 0; 965 idiag = kaij->ibdiag; 966 for (i=0; i<m; i++) { 967 PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar))); 968 969 v = aa + ai[i]; 970 vi = aj + ai[i]; 971 nz = diag[i] - ai[i]; 972 workt = work; 973 for (j=0; j<nz; j++) { 974 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 975 workt += bs; 976 } 977 arrt = arr; 978 if (T) { 979 for (j=0; j<nz; j++) { 980 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 981 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 982 arrt += bs2; 983 } 984 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 985 } else if (kaij->isTI) { 986 for (j=0; j<nz; j++) { 987 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 988 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 989 arrt += bs2; 990 } 991 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 992 } 993 PetscCall(PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar))); 994 995 v = aa + diag[i] + 1; 996 vi = aj + diag[i] + 1; 997 nz = ai[i+1] - diag[i] - 1; 998 workt = work; 999 for (j=0; j<nz; j++) { 1000 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1001 workt += bs; 1002 } 1003 arrt = arr; 1004 if (T) { 1005 for (j=0; j<nz; j++) { 1006 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1007 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1008 arrt += bs2; 1009 } 1010 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1011 } else if (kaij->isTI) { 1012 for (j=0; j<nz; j++) { 1013 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1014 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1015 arrt += bs2; 1016 } 1017 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1018 } 1019 1020 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1021 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1022 1023 idiag += bs2; 1024 i2 += bs; 1025 } 1026 xb = t; 1027 } 1028 else xb = b; 1029 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1030 idiag = kaij->ibdiag+bs2*(m-1); 1031 i2 = bs * (m-1); 1032 if (xb == b) { 1033 for (i=m-1; i>=0; i--) { 1034 PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar))); 1035 1036 v = aa + ai[i]; 1037 vi = aj + ai[i]; 1038 nz = diag[i] - ai[i]; 1039 workt = work; 1040 for (j=0; j<nz; j++) { 1041 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1042 workt += bs; 1043 } 1044 arrt = arr; 1045 if (T) { 1046 for (j=0; j<nz; j++) { 1047 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1048 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1049 arrt += bs2; 1050 } 1051 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1052 } else if (kaij->isTI) { 1053 for (j=0; j<nz; j++) { 1054 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1055 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1056 arrt += bs2; 1057 } 1058 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1059 } 1060 1061 v = aa + diag[i] + 1; 1062 vi = aj + diag[i] + 1; 1063 nz = ai[i+1] - diag[i] - 1; 1064 workt = work; 1065 for (j=0; j<nz; j++) { 1066 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1067 workt += bs; 1068 } 1069 arrt = arr; 1070 if (T) { 1071 for (j=0; j<nz; j++) { 1072 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1073 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1074 arrt += bs2; 1075 } 1076 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1077 } else if (kaij->isTI) { 1078 for (j=0; j<nz; j++) { 1079 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1080 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1081 arrt += bs2; 1082 } 1083 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1084 } 1085 1086 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1087 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1088 } 1089 } else { 1090 for (i=m-1; i>=0; i--) { 1091 PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar))); 1092 v = aa + diag[i] + 1; 1093 vi = aj + diag[i] + 1; 1094 nz = ai[i+1] - diag[i] - 1; 1095 workt = work; 1096 for (j=0; j<nz; j++) { 1097 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1098 workt += bs; 1099 } 1100 arrt = arr; 1101 if (T) { 1102 for (j=0; j<nz; j++) { 1103 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1104 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1105 arrt += bs2; 1106 } 1107 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1108 } else if (kaij->isTI) { 1109 for (j=0; j<nz; j++) { 1110 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1111 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1112 arrt += bs2; 1113 } 1114 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1115 } 1116 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1117 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1118 } 1119 } 1120 PetscCall(PetscLogFlops(1.0*bs2*(a->nz))); 1121 } 1122 } 1123 1124 PetscCall(VecRestoreArray(xx,&x)); 1125 PetscCall(VecRestoreArrayRead(bb,&b)); 1126 PetscFunctionReturn(0); 1127 } 1128 1129 /*===================================================================================*/ 1130 1131 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1132 { 1133 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1134 1135 PetscFunctionBegin; 1136 if (!yy) { 1137 PetscCall(VecSet(zz,0.0)); 1138 } else { 1139 PetscCall(VecCopy(yy,zz)); 1140 } 1141 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1142 /* start the scatter */ 1143 PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 1144 PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz)); 1145 PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 1146 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz)); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1151 { 1152 PetscFunctionBegin; 1153 PetscCall(MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy)); 1154 PetscFunctionReturn(0); 1155 } 1156 1157 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1158 { 1159 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1160 1161 PetscFunctionBegin; 1162 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1163 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values)); 1164 PetscFunctionReturn(0); 1165 } 1166 1167 /* ----------------------------------------------------------------*/ 1168 1169 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1170 { 1171 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1172 PetscErrorCode diag = PETSC_FALSE; 1173 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1174 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1175 1176 PetscFunctionBegin; 1177 PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1178 b->getrowactive = PETSC_TRUE; 1179 PetscCheck(row >= 0 && row < A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row); 1180 1181 if ((!S) && (!T) && (!b->isTI)) { 1182 if (ncols) *ncols = 0; 1183 if (cols) *cols = NULL; 1184 if (values) *values = NULL; 1185 PetscFunctionReturn(0); 1186 } 1187 1188 if (T || b->isTI) { 1189 PetscCall(MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij)); 1190 c = nzaij; 1191 for (i=0; i<nzaij; i++) { 1192 /* check if this row contains a diagonal entry */ 1193 if (colsaij[i] == r) { 1194 diag = PETSC_TRUE; 1195 c = i; 1196 } 1197 } 1198 } else nzaij = c = 0; 1199 1200 /* calculate size of row */ 1201 nz = 0; 1202 if (S) nz += q; 1203 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1204 1205 if (cols || values) { 1206 PetscCall(PetscMalloc2(nz,&idx,nz,&v)); 1207 for (i=0; i<q; i++) { 1208 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1209 v[i] = 0.0; 1210 } 1211 if (b->isTI) { 1212 for (i=0; i<nzaij; i++) { 1213 for (j=0; j<q; j++) { 1214 idx[i*q+j] = colsaij[i]*q+j; 1215 v[i*q+j] = (j==s ? vaij[i] : 0); 1216 } 1217 } 1218 } else if (T) { 1219 for (i=0; i<nzaij; i++) { 1220 for (j=0; j<q; j++) { 1221 idx[i*q+j] = colsaij[i]*q+j; 1222 v[i*q+j] = vaij[i]*T[s+j*p]; 1223 } 1224 } 1225 } 1226 if (S) { 1227 for (j=0; j<q; j++) { 1228 idx[c*q+j] = r*q+j; 1229 v[c*q+j] += S[s+j*p]; 1230 } 1231 } 1232 } 1233 1234 if (ncols) *ncols = nz; 1235 if (cols) *cols = idx; 1236 if (values) *values = v; 1237 PetscFunctionReturn(0); 1238 } 1239 1240 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1241 { 1242 PetscFunctionBegin; 1243 if (nz) *nz = 0; 1244 PetscCall(PetscFree2(*idx,*v)); 1245 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1246 PetscFunctionReturn(0); 1247 } 1248 1249 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1250 { 1251 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1252 Mat AIJ = b->A; 1253 PetscBool diag = PETSC_FALSE; 1254 Mat MatAIJ,MatOAIJ; 1255 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1256 PetscInt nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1257 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1258 1259 PetscFunctionBegin; 1260 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1261 MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1262 MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1263 PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1264 b->getrowactive = PETSC_TRUE; 1265 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1266 lrow = row - rstart; 1267 1268 if ((!S) && (!T) && (!b->isTI)) { 1269 if (ncols) *ncols = 0; 1270 if (cols) *cols = NULL; 1271 if (values) *values = NULL; 1272 PetscFunctionReturn(0); 1273 } 1274 1275 r = lrow/p; 1276 s = lrow%p; 1277 1278 if (T || b->isTI) { 1279 PetscCall(MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray)); 1280 PetscCall(MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals)); 1281 PetscCall(MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals)); 1282 1283 c = ncolsaij + ncolsoaij; 1284 for (i=0; i<ncolsaij; i++) { 1285 /* check if this row contains a diagonal entry */ 1286 if (colsaij[i] == r) { 1287 diag = PETSC_TRUE; 1288 c = i; 1289 } 1290 } 1291 } else c = 0; 1292 1293 /* calculate size of row */ 1294 nz = 0; 1295 if (S) nz += q; 1296 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1297 1298 if (cols || values) { 1299 PetscCall(PetscMalloc2(nz,&idx,nz,&v)); 1300 for (i=0; i<q; i++) { 1301 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1302 v[i] = 0.0; 1303 } 1304 if (b->isTI) { 1305 for (i=0; i<ncolsaij; i++) { 1306 for (j=0; j<q; j++) { 1307 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1308 v[i*q+j] = (j==s ? vals[i] : 0.0); 1309 } 1310 } 1311 for (i=0; i<ncolsoaij; i++) { 1312 for (j=0; j<q; j++) { 1313 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1314 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1315 } 1316 } 1317 } else if (T) { 1318 for (i=0; i<ncolsaij; i++) { 1319 for (j=0; j<q; j++) { 1320 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1321 v[i*q+j] = vals[i]*T[s+j*p]; 1322 } 1323 } 1324 for (i=0; i<ncolsoaij; i++) { 1325 for (j=0; j<q; j++) { 1326 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1327 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1328 } 1329 } 1330 } 1331 if (S) { 1332 for (j=0; j<q; j++) { 1333 idx[c*q+j] = (r+rstart/p)*q+j; 1334 v[c*q+j] += S[s+j*p]; 1335 } 1336 } 1337 } 1338 1339 if (ncols) *ncols = nz; 1340 if (cols) *cols = idx; 1341 if (values) *values = v; 1342 PetscFunctionReturn(0); 1343 } 1344 1345 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1346 { 1347 PetscFunctionBegin; 1348 PetscCall(PetscFree2(*idx,*v)); 1349 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1350 PetscFunctionReturn(0); 1351 } 1352 1353 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1354 { 1355 Mat A; 1356 1357 PetscFunctionBegin; 1358 PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 1359 PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat)); 1360 PetscCall(MatDestroy(&A)); 1361 PetscFunctionReturn(0); 1362 } 1363 1364 /* ---------------------------------------------------------------------------------- */ 1365 /*@C 1366 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1367 1368 [I \otimes S + A \otimes T] 1369 1370 where 1371 S is a dense (p \times q) matrix 1372 T is a dense (p \times q) matrix 1373 A is an AIJ (n \times n) matrix 1374 I is the identity matrix 1375 The resulting matrix is (np \times nq) 1376 1377 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1378 1379 Collective 1380 1381 Input Parameters: 1382 + A - the AIJ matrix 1383 . p - number of rows in S and T 1384 . q - number of columns in S and T 1385 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1386 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1387 1388 Output Parameter: 1389 . kaij - the new KAIJ matrix 1390 1391 Notes: 1392 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1393 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1394 1395 Developer Notes: 1396 In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1397 of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1398 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1399 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1400 1401 Level: advanced 1402 1403 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MATKAIJ` 1404 @*/ 1405 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1406 { 1407 PetscFunctionBegin; 1408 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),kaij)); 1409 PetscCall(MatSetType(*kaij,MATKAIJ)); 1410 PetscCall(MatKAIJSetAIJ(*kaij,A)); 1411 PetscCall(MatKAIJSetS(*kaij,p,q,S)); 1412 PetscCall(MatKAIJSetT(*kaij,p,q,T)); 1413 PetscCall(MatSetUp(*kaij)); 1414 PetscFunctionReturn(0); 1415 } 1416 1417 /*MC 1418 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1419 [I \otimes S + A \otimes T], 1420 where 1421 S is a dense (p \times q) matrix, 1422 T is a dense (p \times q) matrix, 1423 A is an AIJ (n \times n) matrix, 1424 and I is the identity matrix. 1425 The resulting matrix is (np \times nq). 1426 1427 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1428 1429 Notes: 1430 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1431 where x and b are column vectors containing the row-major representations of X and B. 1432 1433 Level: advanced 1434 1435 .seealso: `MatKAIJSetAIJ()`, `MatKAIJSetS()`, `MatKAIJSetT()`, `MatKAIJGetAIJ()`, `MatKAIJGetS()`, `MatKAIJGetT()`, `MatCreateKAIJ()` 1436 M*/ 1437 1438 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1439 { 1440 Mat_MPIKAIJ *b; 1441 PetscMPIInt size; 1442 1443 PetscFunctionBegin; 1444 PetscCall(PetscNewLog(A,&b)); 1445 A->data = (void*)b; 1446 1447 PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps))); 1448 1449 b->w = NULL; 1450 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 1451 if (size == 1) { 1452 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ)); 1453 A->ops->destroy = MatDestroy_SeqKAIJ; 1454 A->ops->mult = MatMult_SeqKAIJ; 1455 A->ops->multadd = MatMultAdd_SeqKAIJ; 1456 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1457 A->ops->getrow = MatGetRow_SeqKAIJ; 1458 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1459 A->ops->sor = MatSOR_SeqKAIJ; 1460 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ)); 1461 } else { 1462 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ)); 1463 A->ops->destroy = MatDestroy_MPIKAIJ; 1464 A->ops->mult = MatMult_MPIKAIJ; 1465 A->ops->multadd = MatMultAdd_MPIKAIJ; 1466 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1467 A->ops->getrow = MatGetRow_MPIKAIJ; 1468 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1469 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ)); 1470 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ)); 1471 } 1472 A->ops->setup = MatSetUp_KAIJ; 1473 A->ops->view = MatView_KAIJ; 1474 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1475 PetscFunctionReturn(0); 1476 } 1477