1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscBool ismpikaij,isseqkaij; 50 51 PetscFunctionBegin; 52 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij)); 53 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij)); 54 if (ismpikaij) { 55 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 56 57 *B = b->A; 58 } else if (isseqkaij) { 59 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 60 61 *B = b->AIJ; 62 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 63 PetscFunctionReturn(0); 64 } 65 66 /*@C 67 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 68 69 Not Collective; the entire S is stored and returned independently on all processes. 70 71 Input Parameter: 72 . A - the KAIJ matrix 73 74 Output Parameters: 75 + m - the number of rows in S 76 . n - the number of columns in S 77 - S - the S matrix, in form of a scalar array in column-major format 78 79 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 80 81 Level: advanced 82 83 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 84 @*/ 85 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 86 { 87 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 88 PetscFunctionBegin; 89 if (m) *m = b->p; 90 if (n) *n = b->q; 91 if (S) *S = b->S; 92 PetscFunctionReturn(0); 93 } 94 95 /*@C 96 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 97 98 Not Collective; the entire S is stored and returned independently on all processes. 99 100 Input Parameter: 101 . A - the KAIJ matrix 102 103 Output Parameters: 104 + m - the number of rows in S 105 . n - the number of columns in S 106 - S - the S matrix, in form of a scalar array in column-major format 107 108 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 109 110 Level: advanced 111 112 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 113 @*/ 114 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 115 { 116 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 117 PetscFunctionBegin; 118 if (m) *m = b->p; 119 if (n) *n = b->q; 120 if (S) *S = b->S; 121 PetscFunctionReturn(0); 122 } 123 124 /*@C 125 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 126 127 Not collective 128 129 Input Parameter: 130 . A - the KAIJ matrix 131 132 Output Parameter: 133 . S - location of pointer to array obtained with MatKAIJGetS() 134 135 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 136 If NULL is passed, it will not attempt to zero the array pointer. 137 138 Level: advanced 139 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 140 @*/ 141 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 142 { 143 PetscFunctionBegin; 144 if (S) *S = NULL; 145 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 146 PetscFunctionReturn(0); 147 } 148 149 /*@C 150 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 151 152 Not collective 153 154 Input Parameter: 155 . A - the KAIJ matrix 156 157 Output Parameter: 158 . S - location of pointer to array obtained with MatKAIJGetS() 159 160 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 161 If NULL is passed, it will not attempt to zero the array pointer. 162 163 Level: advanced 164 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 165 @*/ 166 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 167 { 168 PetscFunctionBegin; 169 if (S) *S = NULL; 170 PetscFunctionReturn(0); 171 } 172 173 /*@C 174 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 175 176 Not Collective; the entire T is stored and returned independently on all processes 177 178 Input Parameter: 179 . A - the KAIJ matrix 180 181 Output Parameters: 182 + m - the number of rows in T 183 . n - the number of columns in T 184 - T - the T matrix, in form of a scalar array in column-major format 185 186 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 187 188 Level: advanced 189 190 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 191 @*/ 192 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 193 { 194 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 195 PetscFunctionBegin; 196 if (m) *m = b->p; 197 if (n) *n = b->q; 198 if (T) *T = b->T; 199 PetscFunctionReturn(0); 200 } 201 202 /*@C 203 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 204 205 Not Collective; the entire T is stored and returned independently on all processes 206 207 Input Parameter: 208 . A - the KAIJ matrix 209 210 Output Parameters: 211 + m - the number of rows in T 212 . n - the number of columns in T 213 - T - the T matrix, in form of a scalar array in column-major format 214 215 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 216 217 Level: advanced 218 219 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 220 @*/ 221 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 222 { 223 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 224 PetscFunctionBegin; 225 if (m) *m = b->p; 226 if (n) *n = b->q; 227 if (T) *T = b->T; 228 PetscFunctionReturn(0); 229 } 230 231 /*@C 232 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 233 234 Not collective 235 236 Input Parameter: 237 . A - the KAIJ matrix 238 239 Output Parameter: 240 . T - location of pointer to array obtained with MatKAIJGetS() 241 242 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 243 If NULL is passed, it will not attempt to zero the array pointer. 244 245 Level: advanced 246 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 247 @*/ 248 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 249 { 250 PetscFunctionBegin; 251 if (T) *T = NULL; 252 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 253 PetscFunctionReturn(0); 254 } 255 256 /*@C 257 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 258 259 Not collective 260 261 Input Parameter: 262 . A - the KAIJ matrix 263 264 Output Parameter: 265 . T - location of pointer to array obtained with MatKAIJGetS() 266 267 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 268 If NULL is passed, it will not attempt to zero the array pointer. 269 270 Level: advanced 271 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 272 @*/ 273 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 274 { 275 PetscFunctionBegin; 276 if (T) *T = NULL; 277 PetscFunctionReturn(0); 278 } 279 280 /*@ 281 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 282 283 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 284 285 Input Parameters: 286 + A - the KAIJ matrix 287 - B - the AIJ matrix 288 289 Notes: 290 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 291 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 292 293 Level: advanced 294 295 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 296 @*/ 297 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 298 { 299 PetscMPIInt size; 300 PetscBool flg; 301 302 PetscFunctionBegin; 303 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 304 if (size == 1) { 305 PetscCall(PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg)); 306 PetscCheck(flg,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name); 307 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 308 a->AIJ = B; 309 } else { 310 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 311 a->A = B; 312 } 313 PetscCall(PetscObjectReference((PetscObject)B)); 314 PetscFunctionReturn(0); 315 } 316 317 /*@C 318 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 319 320 Logically Collective; the entire S is stored independently on all processes. 321 322 Input Parameters: 323 + A - the KAIJ matrix 324 . p - the number of rows in S 325 . q - the number of columns in S 326 - S - the S matrix, in form of a scalar array in column-major format 327 328 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 329 The S matrix is copied, so the user can destroy this array. 330 331 Level: Advanced 332 333 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 334 @*/ 335 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 336 { 337 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 338 339 PetscFunctionBegin; 340 PetscCall(PetscFree(a->S)); 341 if (S) { 342 PetscCall(PetscMalloc1(p*q,&a->S)); 343 PetscCall(PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar))); 344 } else a->S = NULL; 345 346 a->p = p; 347 a->q = q; 348 PetscFunctionReturn(0); 349 } 350 351 /*@C 352 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 353 354 Logically Collective. 355 356 Input Parameter: 357 . A - the KAIJ matrix 358 359 Output Parameter: 360 . identity - the Boolean value 361 362 Level: Advanced 363 364 .seealso: MatKAIJGetS(), MatKAIJGetT() 365 @*/ 366 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity) 367 { 368 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 369 PetscInt i,j; 370 371 PetscFunctionBegin; 372 if (a->p != a->q) { 373 *identity = PETSC_FALSE; 374 PetscFunctionReturn(0); 375 } else *identity = PETSC_TRUE; 376 if (!a->isTI || a->S) { 377 for (i=0; i<a->p && *identity; i++) { 378 for (j=0; j<a->p && *identity; j++) { 379 if (i != j) { 380 if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 381 if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 382 } else { 383 if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 384 if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 385 } 386 } 387 } 388 } 389 PetscFunctionReturn(0); 390 } 391 392 /*@C 393 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 394 395 Logically Collective; the entire T is stored independently on all processes. 396 397 Input Parameters: 398 + A - the KAIJ matrix 399 . p - the number of rows in S 400 . q - the number of columns in S 401 - T - the T matrix, in form of a scalar array in column-major format 402 403 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 404 The T matrix is copied, so the user can destroy this array. 405 406 Level: Advanced 407 408 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 409 @*/ 410 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 411 { 412 PetscInt i,j; 413 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 414 PetscBool isTI = PETSC_FALSE; 415 416 PetscFunctionBegin; 417 /* check if T is an identity matrix */ 418 if (T && (p == q)) { 419 isTI = PETSC_TRUE; 420 for (i=0; i<p; i++) { 421 for (j=0; j<q; j++) { 422 if (i == j) { 423 /* diagonal term must be 1 */ 424 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 425 } else { 426 /* off-diagonal term must be 0 */ 427 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 428 } 429 } 430 } 431 } 432 a->isTI = isTI; 433 434 PetscCall(PetscFree(a->T)); 435 if (T && (!isTI)) { 436 PetscCall(PetscMalloc1(p*q,&a->T)); 437 PetscCall(PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar))); 438 } else a->T = NULL; 439 440 a->p = p; 441 a->q = q; 442 PetscFunctionReturn(0); 443 } 444 445 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 446 { 447 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 448 449 PetscFunctionBegin; 450 PetscCall(MatDestroy(&b->AIJ)); 451 PetscCall(PetscFree(b->S)); 452 PetscCall(PetscFree(b->T)); 453 PetscCall(PetscFree(b->ibdiag)); 454 PetscCall(PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr)); 455 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL)); 456 PetscCall(PetscFree(A->data)); 457 PetscFunctionReturn(0); 458 } 459 460 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 461 { 462 Mat_MPIKAIJ *a; 463 Mat_MPIAIJ *mpiaij; 464 PetscScalar *T; 465 PetscInt i,j; 466 PetscObjectState state; 467 468 PetscFunctionBegin; 469 a = (Mat_MPIKAIJ*)A->data; 470 mpiaij = (Mat_MPIAIJ*)a->A->data; 471 472 PetscCall(PetscObjectStateGet((PetscObject)a->A,&state)); 473 if (state == a->state) { 474 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 475 PetscFunctionReturn(0); 476 } else { 477 PetscCall(MatDestroy(&a->AIJ)); 478 PetscCall(MatDestroy(&a->OAIJ)); 479 if (a->isTI) { 480 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 481 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 482 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 483 * to pass in. */ 484 PetscCall(PetscMalloc1(a->p*a->q,&T)); 485 for (i=0; i<a->p; i++) { 486 for (j=0; j<a->q; j++) { 487 if (i==j) T[i+j*a->p] = 1.0; 488 else T[i+j*a->p] = 0.0; 489 } 490 } 491 } else T = a->T; 492 PetscCall(MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ)); 493 PetscCall(MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ)); 494 if (a->isTI) { 495 PetscCall(PetscFree(T)); 496 } 497 a->state = state; 498 } 499 500 PetscFunctionReturn(0); 501 } 502 503 PetscErrorCode MatSetUp_KAIJ(Mat A) 504 { 505 PetscInt n; 506 PetscMPIInt size; 507 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 508 509 PetscFunctionBegin; 510 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 511 if (size == 1) { 512 PetscCall(MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N)); 513 PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p)); 514 PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q)); 515 PetscCall(PetscLayoutSetUp(A->rmap)); 516 PetscCall(PetscLayoutSetUp(A->cmap)); 517 } else { 518 Mat_MPIKAIJ *a; 519 Mat_MPIAIJ *mpiaij; 520 IS from,to; 521 Vec gvec; 522 523 a = (Mat_MPIKAIJ*)A->data; 524 mpiaij = (Mat_MPIAIJ*)a->A->data; 525 PetscCall(MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N)); 526 PetscCall(PetscLayoutSetBlockSize(A->rmap,seqkaij->p)); 527 PetscCall(PetscLayoutSetBlockSize(A->cmap,seqkaij->q)); 528 PetscCall(PetscLayoutSetUp(A->rmap)); 529 PetscCall(PetscLayoutSetUp(A->cmap)); 530 531 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); 532 533 PetscCall(VecGetSize(mpiaij->lvec,&n)); 534 PetscCall(VecCreate(PETSC_COMM_SELF,&a->w)); 535 PetscCall(VecSetSizes(a->w,n*a->q,n*a->q)); 536 PetscCall(VecSetBlockSize(a->w,a->q)); 537 PetscCall(VecSetType(a->w,VECSEQ)); 538 539 /* create two temporary Index sets for build scatter gather */ 540 PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from)); 541 PetscCall(ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to)); 542 543 /* create temporary global vector to generate scatter context */ 544 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec)); 545 546 /* generate the scatter context */ 547 PetscCall(VecScatterCreate(gvec,from,a->w,to,&a->ctx)); 548 549 PetscCall(ISDestroy(&from)); 550 PetscCall(ISDestroy(&to)); 551 PetscCall(VecDestroy(&gvec)); 552 } 553 554 A->assembled = PETSC_TRUE; 555 PetscFunctionReturn(0); 556 } 557 558 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 559 { 560 PetscViewerFormat format; 561 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 562 Mat B; 563 PetscInt i; 564 PetscBool ismpikaij; 565 566 PetscFunctionBegin; 567 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij)); 568 PetscCall(PetscViewerGetFormat(viewer,&format)); 569 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 570 PetscCall(PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q)); 571 572 /* Print appropriate details for S. */ 573 if (!a->S) { 574 PetscCall(PetscViewerASCIIPrintf(viewer,"S is NULL\n")); 575 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 576 PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of S are ")); 577 for (i=0; i<(a->p * a->q); i++) { 578 #if defined(PETSC_USE_COMPLEX) 579 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]))); 580 #else 581 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]))); 582 #endif 583 } 584 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 585 } 586 587 /* Print appropriate details for T. */ 588 if (a->isTI) { 589 PetscCall(PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n")); 590 } else if (!a->T) { 591 PetscCall(PetscViewerASCIIPrintf(viewer,"T is NULL\n")); 592 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 593 PetscCall(PetscViewerASCIIPrintf(viewer,"Entries of T are ")); 594 for (i=0; i<(a->p * a->q); i++) { 595 #if defined(PETSC_USE_COMPLEX) 596 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]))); 597 #else 598 PetscCall(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]))); 599 #endif 600 } 601 PetscCall(PetscViewerASCIIPrintf(viewer,"\n")); 602 } 603 604 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 605 PetscCall(PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n")); 606 if (ismpikaij) { 607 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 608 PetscCall(MatView(b->A,viewer)); 609 } else { 610 PetscCall(MatView(a->AIJ,viewer)); 611 } 612 613 } else { 614 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 615 PetscCall(MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B)); 616 PetscCall(MatView(B,viewer)); 617 PetscCall(MatDestroy(&B)); 618 } 619 PetscFunctionReturn(0); 620 } 621 622 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 623 { 624 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 625 626 PetscFunctionBegin; 627 PetscCall(MatDestroy(&b->AIJ)); 628 PetscCall(MatDestroy(&b->OAIJ)); 629 PetscCall(MatDestroy(&b->A)); 630 PetscCall(VecScatterDestroy(&b->ctx)); 631 PetscCall(VecDestroy(&b->w)); 632 PetscCall(PetscFree(b->S)); 633 PetscCall(PetscFree(b->T)); 634 PetscCall(PetscFree(b->ibdiag)); 635 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL)); 636 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL)); 637 PetscCall(PetscFree(A->data)); 638 PetscFunctionReturn(0); 639 } 640 641 /* --------------------------------------------------------------------------------------*/ 642 643 /* zz = yy + Axx */ 644 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 645 { 646 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 647 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 648 const PetscScalar *s = b->S, *t = b->T; 649 const PetscScalar *x,*v,*bx; 650 PetscScalar *y,*sums; 651 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 652 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 653 654 PetscFunctionBegin; 655 if (!yy) { 656 PetscCall(VecSet(zz,0.0)); 657 } else { 658 PetscCall(VecCopy(yy,zz)); 659 } 660 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 661 662 PetscCall(VecGetArrayRead(xx,&x)); 663 PetscCall(VecGetArray(zz,&y)); 664 idx = a->j; 665 v = a->a; 666 ii = a->i; 667 668 if (b->isTI) { 669 for (i=0; i<m; i++) { 670 jrow = ii[i]; 671 n = ii[i+1] - jrow; 672 sums = y + p*i; 673 for (j=0; j<n; j++) { 674 for (k=0; k<p; k++) { 675 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 676 } 677 } 678 } 679 PetscCall(PetscLogFlops(3.0*(a->nz)*p)); 680 } else if (t) { 681 for (i=0; i<m; i++) { 682 jrow = ii[i]; 683 n = ii[i+1] - jrow; 684 sums = y + p*i; 685 for (j=0; j<n; j++) { 686 for (k=0; k<p; k++) { 687 for (l=0; l<q; l++) { 688 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 689 } 690 } 691 } 692 } 693 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 694 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 695 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 696 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 697 PetscCall(PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz)); 698 } 699 if (s) { 700 for (i=0; i<m; i++) { 701 sums = y + p*i; 702 bx = x + q*i; 703 if (i < b->AIJ->cmap->n) { 704 for (j=0; j<q; j++) { 705 for (k=0; k<p; k++) { 706 sums[k] += s[k+j*p]*bx[j]; 707 } 708 } 709 } 710 } 711 PetscCall(PetscLogFlops(2.0*m*p*q)); 712 } 713 714 PetscCall(VecRestoreArrayRead(xx,&x)); 715 PetscCall(VecRestoreArray(zz,&y)); 716 PetscFunctionReturn(0); 717 } 718 719 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 720 { 721 PetscFunctionBegin; 722 PetscCall(MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy)); 723 PetscFunctionReturn(0); 724 } 725 726 #include <petsc/private/kernels/blockinvert.h> 727 728 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 729 { 730 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 731 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 732 const PetscScalar *S = b->S; 733 const PetscScalar *T = b->T; 734 const PetscScalar *v = a->a; 735 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 736 PetscInt i,j,*v_pivots,dof,dof2; 737 PetscScalar *diag,aval,*v_work; 738 739 PetscFunctionBegin; 740 PetscCheckFalse(p != q,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 741 PetscCheckFalse((!S) && (!T) && (!b->isTI),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 742 743 dof = p; 744 dof2 = dof*dof; 745 746 if (b->ibdiagvalid) { 747 if (values) *values = b->ibdiag; 748 PetscFunctionReturn(0); 749 } 750 if (!b->ibdiag) { 751 PetscCall(PetscMalloc1(dof2*m,&b->ibdiag)); 752 PetscCall(PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar))); 753 } 754 if (values) *values = b->ibdiag; 755 diag = b->ibdiag; 756 757 PetscCall(PetscMalloc2(dof,&v_work,dof,&v_pivots)); 758 for (i=0; i<m; i++) { 759 if (S) { 760 PetscCall(PetscMemcpy(diag,S,dof2*sizeof(PetscScalar))); 761 } else { 762 PetscCall(PetscMemzero(diag,dof2*sizeof(PetscScalar))); 763 } 764 if (b->isTI) { 765 aval = 0; 766 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 767 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 768 } else if (T) { 769 aval = 0; 770 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 771 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 772 } 773 PetscCall(PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL)); 774 diag += dof2; 775 } 776 PetscCall(PetscFree2(v_work,v_pivots)); 777 778 b->ibdiagvalid = PETSC_TRUE; 779 PetscFunctionReturn(0); 780 } 781 782 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 783 { 784 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 785 786 PetscFunctionBegin; 787 *B = kaij->AIJ; 788 PetscFunctionReturn(0); 789 } 790 791 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat) 792 { 793 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 794 Mat AIJ,OAIJ,B; 795 PetscInt *d_nnz,*o_nnz = NULL,nz,i,j,m,d; 796 const PetscInt p = a->p,q = a->q; 797 PetscBool ismpikaij,missing; 798 799 PetscFunctionBegin; 800 if (reuse != MAT_REUSE_MATRIX) { 801 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij)); 802 if (ismpikaij) { 803 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 804 AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 805 OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 806 } else { 807 AIJ = a->AIJ; 808 OAIJ = NULL; 809 } 810 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 811 PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 812 PetscCall(MatSetType(B,MATAIJ)); 813 PetscCall(MatGetSize(AIJ,&m,NULL)); 814 PetscCall(MatMissingDiagonal(AIJ,&missing,&d)); /* assumption that all successive rows will have a missing diagonal */ 815 if (!missing || !a->S) d = m; 816 PetscCall(PetscMalloc1(m*p,&d_nnz)); 817 for (i = 0; i < m; ++i) { 818 PetscCall(MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL)); 819 for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q; 820 PetscCall(MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL)); 821 } 822 if (OAIJ) { 823 PetscCall(PetscMalloc1(m*p,&o_nnz)); 824 for (i = 0; i < m; ++i) { 825 PetscCall(MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL)); 826 for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q; 827 PetscCall(MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL)); 828 } 829 PetscCall(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz)); 830 } else { 831 PetscCall(MatSeqAIJSetPreallocation(B,0,d_nnz)); 832 } 833 PetscCall(PetscFree(d_nnz)); 834 PetscCall(PetscFree(o_nnz)); 835 } else B = *newmat; 836 PetscCall(MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B)); 837 if (reuse == MAT_INPLACE_MATRIX) { 838 PetscCall(MatHeaderReplace(A,&B)); 839 } else *newmat = B; 840 PetscFunctionReturn(0); 841 } 842 843 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 844 { 845 PetscErrorCode ierr; 846 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 847 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 848 const PetscScalar *aa = a->a, *T = kaij->T, *v; 849 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 850 const PetscScalar *b, *xb, *idiag; 851 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 852 PetscInt i, j, k, i2, bs, bs2, nz; 853 854 PetscFunctionBegin; 855 its = its*lits; 856 PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 857 PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits); 858 PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 859 PetscCheckFalse((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 860 PetscCheckFalse(p != q,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 861 else {bs = p; bs2 = bs*bs; } 862 863 if (!m) PetscFunctionReturn(0); 864 865 if (!kaij->ibdiagvalid) PetscCall(MatInvertBlockDiagonal_SeqKAIJ(A,NULL)); 866 idiag = kaij->ibdiag; 867 diag = a->diag; 868 869 if (!kaij->sor.setup) { 870 PetscCall(PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr)); 871 kaij->sor.setup = PETSC_TRUE; 872 } 873 y = kaij->sor.y; 874 w = kaij->sor.w; 875 work = kaij->sor.work; 876 t = kaij->sor.t; 877 arr = kaij->sor.arr; 878 879 ierr = VecGetArray(xx,&x); PetscCall(ierr); 880 PetscCall(VecGetArrayRead(bb,&b)); 881 882 if (flag & SOR_ZERO_INITIAL_GUESS) { 883 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 884 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 885 PetscCall(PetscMemcpy(t,b,bs*sizeof(PetscScalar))); 886 i2 = bs; 887 idiag += bs2; 888 for (i=1; i<m; i++) { 889 v = aa + ai[i]; 890 vi = aj + ai[i]; 891 nz = diag[i] - ai[i]; 892 893 if (T) { /* b - T (Arow * x) */ 894 PetscCall(PetscMemzero(w,bs*sizeof(PetscScalar))); 895 for (j=0; j<nz; j++) { 896 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 897 } 898 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 899 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 900 } else if (kaij->isTI) { 901 PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar))); 902 for (j=0; j<nz; j++) { 903 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 904 } 905 } else { 906 PetscCall(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar))); 907 } 908 909 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 910 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 911 912 idiag += bs2; 913 i2 += bs; 914 } 915 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 916 PetscCall(PetscLogFlops(1.0*bs2*a->nz)); 917 xb = t; 918 } else xb = b; 919 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 920 idiag = kaij->ibdiag+bs2*(m-1); 921 i2 = bs * (m-1); 922 PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar))); 923 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 924 i2 -= bs; 925 idiag -= bs2; 926 for (i=m-2; i>=0; i--) { 927 v = aa + diag[i] + 1 ; 928 vi = aj + diag[i] + 1; 929 nz = ai[i+1] - diag[i] - 1; 930 931 if (T) { /* FIXME: This branch untested */ 932 PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar))); 933 /* copy all rows of x that are needed into contiguous space */ 934 workt = work; 935 for (j=0; j<nz; j++) { 936 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 937 workt += bs; 938 } 939 arrt = arr; 940 for (j=0; j<nz; j++) { 941 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 942 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 943 arrt += bs2; 944 } 945 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 946 } else if (kaij->isTI) { 947 PetscCall(PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar))); 948 for (j=0; j<nz; j++) { 949 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 950 } 951 } 952 953 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 954 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 955 956 idiag -= bs2; 957 i2 -= bs; 958 } 959 PetscCall(PetscLogFlops(1.0*bs2*(a->nz))); 960 } 961 its--; 962 } 963 while (its--) { /* FIXME: This branch not updated */ 964 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 965 i2 = 0; 966 idiag = kaij->ibdiag; 967 for (i=0; i<m; i++) { 968 PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar))); 969 970 v = aa + ai[i]; 971 vi = aj + ai[i]; 972 nz = diag[i] - ai[i]; 973 workt = work; 974 for (j=0; j<nz; j++) { 975 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 976 workt += bs; 977 } 978 arrt = arr; 979 if (T) { 980 for (j=0; j<nz; j++) { 981 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 982 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 983 arrt += bs2; 984 } 985 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 986 } else if (kaij->isTI) { 987 for (j=0; j<nz; j++) { 988 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 989 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 990 arrt += bs2; 991 } 992 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 993 } 994 PetscCall(PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar))); 995 996 v = aa + diag[i] + 1; 997 vi = aj + diag[i] + 1; 998 nz = ai[i+1] - diag[i] - 1; 999 workt = work; 1000 for (j=0; j<nz; j++) { 1001 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1002 workt += bs; 1003 } 1004 arrt = arr; 1005 if (T) { 1006 for (j=0; j<nz; j++) { 1007 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1008 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1009 arrt += bs2; 1010 } 1011 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1012 } else if (kaij->isTI) { 1013 for (j=0; j<nz; j++) { 1014 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1015 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1016 arrt += bs2; 1017 } 1018 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1019 } 1020 1021 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1022 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1023 1024 idiag += bs2; 1025 i2 += bs; 1026 } 1027 xb = t; 1028 } 1029 else xb = b; 1030 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 1031 idiag = kaij->ibdiag+bs2*(m-1); 1032 i2 = bs * (m-1); 1033 if (xb == b) { 1034 for (i=m-1; i>=0; i--) { 1035 PetscCall(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar))); 1036 1037 v = aa + ai[i]; 1038 vi = aj + ai[i]; 1039 nz = diag[i] - ai[i]; 1040 workt = work; 1041 for (j=0; j<nz; j++) { 1042 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1043 workt += bs; 1044 } 1045 arrt = arr; 1046 if (T) { 1047 for (j=0; j<nz; j++) { 1048 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1049 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1050 arrt += bs2; 1051 } 1052 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1053 } else if (kaij->isTI) { 1054 for (j=0; j<nz; j++) { 1055 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1056 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1057 arrt += bs2; 1058 } 1059 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1060 } 1061 1062 v = aa + diag[i] + 1; 1063 vi = aj + diag[i] + 1; 1064 nz = ai[i+1] - diag[i] - 1; 1065 workt = work; 1066 for (j=0; j<nz; j++) { 1067 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1068 workt += bs; 1069 } 1070 arrt = arr; 1071 if (T) { 1072 for (j=0; j<nz; j++) { 1073 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1074 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1075 arrt += bs2; 1076 } 1077 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1078 } else if (kaij->isTI) { 1079 for (j=0; j<nz; j++) { 1080 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1081 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1082 arrt += bs2; 1083 } 1084 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1085 } 1086 1087 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1088 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1089 } 1090 } else { 1091 for (i=m-1; i>=0; i--) { 1092 PetscCall(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar))); 1093 v = aa + diag[i] + 1; 1094 vi = aj + diag[i] + 1; 1095 nz = ai[i+1] - diag[i] - 1; 1096 workt = work; 1097 for (j=0; j<nz; j++) { 1098 PetscCall(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar))); 1099 workt += bs; 1100 } 1101 arrt = arr; 1102 if (T) { 1103 for (j=0; j<nz; j++) { 1104 PetscCall(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar))); 1105 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1106 arrt += bs2; 1107 } 1108 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1109 } else if (kaij->isTI) { 1110 for (j=0; j<nz; j++) { 1111 PetscCall(PetscMemzero(arrt,bs2*sizeof(PetscScalar))); 1112 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1113 arrt += bs2; 1114 } 1115 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1116 } 1117 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1118 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1119 } 1120 } 1121 PetscCall(PetscLogFlops(1.0*bs2*(a->nz))); 1122 } 1123 } 1124 1125 ierr = VecRestoreArray(xx,&x); PetscCall(ierr); 1126 PetscCall(VecRestoreArrayRead(bb,&b)); 1127 PetscFunctionReturn(0); 1128 } 1129 1130 /*===================================================================================*/ 1131 1132 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1133 { 1134 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1135 1136 PetscFunctionBegin; 1137 if (!yy) { 1138 PetscCall(VecSet(zz,0.0)); 1139 } else { 1140 PetscCall(VecCopy(yy,zz)); 1141 } 1142 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1143 /* start the scatter */ 1144 PetscCall(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 1145 PetscCall((*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz)); 1146 PetscCall(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD)); 1147 PetscCall((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz)); 1148 PetscFunctionReturn(0); 1149 } 1150 1151 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1152 { 1153 PetscFunctionBegin; 1154 PetscCall(MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy)); 1155 PetscFunctionReturn(0); 1156 } 1157 1158 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1159 { 1160 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1161 1162 PetscFunctionBegin; 1163 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */ 1164 PetscCall((*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values)); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 /* ----------------------------------------------------------------*/ 1169 1170 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1171 { 1172 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1173 PetscErrorCode diag = PETSC_FALSE; 1174 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1175 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1176 1177 PetscFunctionBegin; 1178 PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1179 b->getrowactive = PETSC_TRUE; 1180 PetscCheckFalse(row < 0 || row >= A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row); 1181 1182 if ((!S) && (!T) && (!b->isTI)) { 1183 if (ncols) *ncols = 0; 1184 if (cols) *cols = NULL; 1185 if (values) *values = NULL; 1186 PetscFunctionReturn(0); 1187 } 1188 1189 if (T || b->isTI) { 1190 PetscCall(MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij)); 1191 c = nzaij; 1192 for (i=0; i<nzaij; i++) { 1193 /* check if this row contains a diagonal entry */ 1194 if (colsaij[i] == r) { 1195 diag = PETSC_TRUE; 1196 c = i; 1197 } 1198 } 1199 } else nzaij = c = 0; 1200 1201 /* calculate size of row */ 1202 nz = 0; 1203 if (S) nz += q; 1204 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1205 1206 if (cols || values) { 1207 PetscCall(PetscMalloc2(nz,&idx,nz,&v)); 1208 for (i=0; i<q; i++) { 1209 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1210 v[i] = 0.0; 1211 } 1212 if (b->isTI) { 1213 for (i=0; i<nzaij; i++) { 1214 for (j=0; j<q; j++) { 1215 idx[i*q+j] = colsaij[i]*q+j; 1216 v[i*q+j] = (j==s ? vaij[i] : 0); 1217 } 1218 } 1219 } else if (T) { 1220 for (i=0; i<nzaij; i++) { 1221 for (j=0; j<q; j++) { 1222 idx[i*q+j] = colsaij[i]*q+j; 1223 v[i*q+j] = vaij[i]*T[s+j*p]; 1224 } 1225 } 1226 } 1227 if (S) { 1228 for (j=0; j<q; j++) { 1229 idx[c*q+j] = r*q+j; 1230 v[c*q+j] += S[s+j*p]; 1231 } 1232 } 1233 } 1234 1235 if (ncols) *ncols = nz; 1236 if (cols) *cols = idx; 1237 if (values) *values = v; 1238 PetscFunctionReturn(0); 1239 } 1240 1241 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1242 { 1243 PetscFunctionBegin; 1244 if (nz) *nz = 0; 1245 PetscCall(PetscFree2(*idx,*v)); 1246 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1247 PetscFunctionReturn(0); 1248 } 1249 1250 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1251 { 1252 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1253 Mat AIJ = b->A; 1254 PetscBool diag = PETSC_FALSE; 1255 Mat MatAIJ,MatOAIJ; 1256 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1257 PetscInt nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1258 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1259 1260 PetscFunctionBegin; 1261 PetscCall(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1262 MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1263 MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1264 PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1265 b->getrowactive = PETSC_TRUE; 1266 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1267 lrow = row - rstart; 1268 1269 if ((!S) && (!T) && (!b->isTI)) { 1270 if (ncols) *ncols = 0; 1271 if (cols) *cols = NULL; 1272 if (values) *values = NULL; 1273 PetscFunctionReturn(0); 1274 } 1275 1276 r = lrow/p; 1277 s = lrow%p; 1278 1279 if (T || b->isTI) { 1280 PetscCall(MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray)); 1281 PetscCall(MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals)); 1282 PetscCall(MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals)); 1283 1284 c = ncolsaij + ncolsoaij; 1285 for (i=0; i<ncolsaij; i++) { 1286 /* check if this row contains a diagonal entry */ 1287 if (colsaij[i] == r) { 1288 diag = PETSC_TRUE; 1289 c = i; 1290 } 1291 } 1292 } else c = 0; 1293 1294 /* calculate size of row */ 1295 nz = 0; 1296 if (S) nz += q; 1297 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1298 1299 if (cols || values) { 1300 PetscCall(PetscMalloc2(nz,&idx,nz,&v)); 1301 for (i=0; i<q; i++) { 1302 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1303 v[i] = 0.0; 1304 } 1305 if (b->isTI) { 1306 for (i=0; i<ncolsaij; i++) { 1307 for (j=0; j<q; j++) { 1308 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1309 v[i*q+j] = (j==s ? vals[i] : 0.0); 1310 } 1311 } 1312 for (i=0; i<ncolsoaij; i++) { 1313 for (j=0; j<q; j++) { 1314 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1315 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1316 } 1317 } 1318 } else if (T) { 1319 for (i=0; i<ncolsaij; i++) { 1320 for (j=0; j<q; j++) { 1321 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1322 v[i*q+j] = vals[i]*T[s+j*p]; 1323 } 1324 } 1325 for (i=0; i<ncolsoaij; i++) { 1326 for (j=0; j<q; j++) { 1327 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1328 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1329 } 1330 } 1331 } 1332 if (S) { 1333 for (j=0; j<q; j++) { 1334 idx[c*q+j] = (r+rstart/p)*q+j; 1335 v[c*q+j] += S[s+j*p]; 1336 } 1337 } 1338 } 1339 1340 if (ncols) *ncols = nz; 1341 if (cols) *cols = idx; 1342 if (values) *values = v; 1343 PetscFunctionReturn(0); 1344 } 1345 1346 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1347 { 1348 PetscFunctionBegin; 1349 PetscCall(PetscFree2(*idx,*v)); 1350 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1351 PetscFunctionReturn(0); 1352 } 1353 1354 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1355 { 1356 Mat A; 1357 1358 PetscFunctionBegin; 1359 PetscCall(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A)); 1360 PetscCall(MatCreateSubMatrix(A,isrow,iscol,cll,newmat)); 1361 PetscCall(MatDestroy(&A)); 1362 PetscFunctionReturn(0); 1363 } 1364 1365 /* ---------------------------------------------------------------------------------- */ 1366 /*@C 1367 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1368 1369 [I \otimes S + A \otimes T] 1370 1371 where 1372 S is a dense (p \times q) matrix 1373 T is a dense (p \times q) matrix 1374 A is an AIJ (n \times n) matrix 1375 I is the identity matrix 1376 The resulting matrix is (np \times nq) 1377 1378 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1379 1380 Collective 1381 1382 Input Parameters: 1383 + A - the AIJ matrix 1384 . p - number of rows in S and T 1385 . q - number of columns in S and T 1386 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1387 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1388 1389 Output Parameter: 1390 . kaij - the new KAIJ matrix 1391 1392 Notes: 1393 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1394 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1395 1396 Developer Notes: 1397 In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1398 of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1399 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1400 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1401 1402 Level: advanced 1403 1404 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1405 @*/ 1406 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1407 { 1408 PetscFunctionBegin; 1409 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),kaij)); 1410 PetscCall(MatSetType(*kaij,MATKAIJ)); 1411 PetscCall(MatKAIJSetAIJ(*kaij,A)); 1412 PetscCall(MatKAIJSetS(*kaij,p,q,S)); 1413 PetscCall(MatKAIJSetT(*kaij,p,q,T)); 1414 PetscCall(MatSetUp(*kaij)); 1415 PetscFunctionReturn(0); 1416 } 1417 1418 /*MC 1419 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1420 [I \otimes S + A \otimes T], 1421 where 1422 S is a dense (p \times q) matrix, 1423 T is a dense (p \times q) matrix, 1424 A is an AIJ (n \times n) matrix, 1425 and I is the identity matrix. 1426 The resulting matrix is (np \times nq). 1427 1428 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1429 1430 Notes: 1431 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1432 where x and b are column vectors containing the row-major representations of X and B. 1433 1434 Level: advanced 1435 1436 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1437 M*/ 1438 1439 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1440 { 1441 Mat_MPIKAIJ *b; 1442 PetscMPIInt size; 1443 1444 PetscFunctionBegin; 1445 PetscCall(PetscNewLog(A,&b)); 1446 A->data = (void*)b; 1447 1448 PetscCall(PetscMemzero(A->ops,sizeof(struct _MatOps))); 1449 1450 b->w = NULL; 1451 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 1452 if (size == 1) { 1453 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ)); 1454 A->ops->destroy = MatDestroy_SeqKAIJ; 1455 A->ops->mult = MatMult_SeqKAIJ; 1456 A->ops->multadd = MatMultAdd_SeqKAIJ; 1457 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1458 A->ops->getrow = MatGetRow_SeqKAIJ; 1459 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1460 A->ops->sor = MatSOR_SeqKAIJ; 1461 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ)); 1462 } else { 1463 PetscCall(PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ)); 1464 A->ops->destroy = MatDestroy_MPIKAIJ; 1465 A->ops->mult = MatMult_MPIKAIJ; 1466 A->ops->multadd = MatMultAdd_MPIKAIJ; 1467 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1468 A->ops->getrow = MatGetRow_MPIKAIJ; 1469 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1470 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ)); 1471 PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ)); 1472 } 1473 A->ops->setup = MatSetUp_KAIJ; 1474 A->ops->view = MatView_KAIJ; 1475 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1476 PetscFunctionReturn(0); 1477 } 1478