1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 if (S) *S = NULL; 148 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 /*@C 153 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 154 155 Not collective 156 157 Input Parameter: 158 . A - the KAIJ matrix 159 160 Output Parameter: 161 . S - location of pointer to array obtained with MatKAIJGetS() 162 163 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 164 If NULL is passed, it will not attempt to zero the array pointer. 165 166 Level: advanced 167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 168 @*/ 169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 170 { 171 PetscFunctionBegin; 172 if (S) *S = NULL; 173 PetscFunctionReturn(0); 174 } 175 176 /*@C 177 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 178 179 Not Collective; the entire T is stored and returned independently on all processes 180 181 Input Parameter: 182 . A - the KAIJ matrix 183 184 Output Parameters: 185 + m - the number of rows in T 186 . n - the number of columns in T 187 - T - the T matrix, in form of a scalar array in column-major format 188 189 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 190 191 Level: advanced 192 193 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 194 @*/ 195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 196 { 197 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 198 PetscFunctionBegin; 199 if (m) *m = b->p; 200 if (n) *n = b->q; 201 if (T) *T = b->T; 202 PetscFunctionReturn(0); 203 } 204 205 /*@C 206 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 207 208 Not Collective; the entire T is stored and returned independently on all processes 209 210 Input Parameter: 211 . A - the KAIJ matrix 212 213 Output Parameters: 214 + m - the number of rows in T 215 . n - the number of columns in T 216 - T - the T matrix, in form of a scalar array in column-major format 217 218 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 219 220 Level: advanced 221 222 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 223 @*/ 224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 225 { 226 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 227 PetscFunctionBegin; 228 if (m) *m = b->p; 229 if (n) *n = b->q; 230 if (T) *T = b->T; 231 PetscFunctionReturn(0); 232 } 233 234 /*@C 235 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 236 237 Not collective 238 239 Input Parameter: 240 . A - the KAIJ matrix 241 242 Output Parameter: 243 . T - location of pointer to array obtained with MatKAIJGetS() 244 245 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 246 If NULL is passed, it will not attempt to zero the array pointer. 247 248 Level: advanced 249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 250 @*/ 251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 252 { 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 if (T) *T = NULL; 257 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 261 /*@C 262 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 263 264 Not collective 265 266 Input Parameter: 267 . A - the KAIJ matrix 268 269 Output Parameter: 270 . T - location of pointer to array obtained with MatKAIJGetS() 271 272 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 273 If NULL is passed, it will not attempt to zero the array pointer. 274 275 Level: advanced 276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 277 @*/ 278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 279 { 280 PetscFunctionBegin; 281 if (T) *T = NULL; 282 PetscFunctionReturn(0); 283 } 284 285 /*@ 286 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 287 288 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 289 290 Input Parameters: 291 + A - the KAIJ matrix 292 - B - the AIJ matrix 293 294 Notes: 295 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 296 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 297 298 Level: advanced 299 300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 301 @*/ 302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 303 { 304 PetscErrorCode ierr; 305 PetscMPIInt size; 306 307 PetscFunctionBegin; 308 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 309 if (size == 1) { 310 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 311 a->AIJ = B; 312 } else { 313 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 314 a->A = B; 315 } 316 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 /*@C 321 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 322 323 Logically Collective; the entire S is stored independently on all processes. 324 325 Input Parameters: 326 + A - the KAIJ matrix 327 . p - the number of rows in S 328 . q - the number of columns in S 329 - S - the S matrix, in form of a scalar array in column-major format 330 331 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 332 The S matrix is copied, so the user can destroy this array. 333 334 Level: Advanced 335 336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 337 @*/ 338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 339 { 340 PetscErrorCode ierr; 341 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 342 343 PetscFunctionBegin; 344 ierr = PetscFree(a->S);CHKERRQ(ierr); 345 if (S) { 346 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 347 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 348 } else a->S = NULL; 349 350 a->p = p; 351 a->q = q; 352 PetscFunctionReturn(0); 353 } 354 355 /*@C 356 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 357 358 Logically Collective. 359 360 Input Parameter: 361 . A - the KAIJ matrix 362 363 Output Parameter: 364 . identity - the Boolean value 365 366 Level: Advanced 367 368 .seealso: MatKAIJGetS(), MatKAIJGetT() 369 @*/ 370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity) 371 { 372 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 373 PetscInt i,j; 374 375 PetscFunctionBegin; 376 if (a->p != a->q) { 377 *identity = PETSC_FALSE; 378 PetscFunctionReturn(0); 379 } else *identity = PETSC_TRUE; 380 if (!a->isTI || a->S) { 381 for (i=0; i<a->p && *identity; i++) { 382 for (j=0; j<a->p && *identity; j++) { 383 if (i != j) { 384 if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 385 if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 386 } else { 387 if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 388 if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 389 } 390 } 391 } 392 } 393 PetscFunctionReturn(0); 394 } 395 396 /*@C 397 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 398 399 Logically Collective; the entire T is stored independently on all processes. 400 401 Input Parameters: 402 + A - the KAIJ matrix 403 . p - the number of rows in S 404 . q - the number of columns in S 405 - T - the T matrix, in form of a scalar array in column-major format 406 407 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 408 The T matrix is copied, so the user can destroy this array. 409 410 Level: Advanced 411 412 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 413 @*/ 414 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 415 { 416 PetscErrorCode ierr; 417 PetscInt i,j; 418 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 419 PetscBool isTI = PETSC_FALSE; 420 421 PetscFunctionBegin; 422 /* check if T is an identity matrix */ 423 if (T && (p == q)) { 424 isTI = PETSC_TRUE; 425 for (i=0; i<p; i++) { 426 for (j=0; j<q; j++) { 427 if (i == j) { 428 /* diagonal term must be 1 */ 429 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 430 } else { 431 /* off-diagonal term must be 0 */ 432 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 433 } 434 } 435 } 436 } 437 a->isTI = isTI; 438 439 ierr = PetscFree(a->T);CHKERRQ(ierr); 440 if (T && (!isTI)) { 441 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 442 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 443 } else a->T = NULL; 444 445 a->p = p; 446 a->q = q; 447 PetscFunctionReturn(0); 448 } 449 450 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 451 { 452 PetscErrorCode ierr; 453 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 454 455 PetscFunctionBegin; 456 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 457 ierr = PetscFree(b->S);CHKERRQ(ierr); 458 ierr = PetscFree(b->T);CHKERRQ(ierr); 459 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 460 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 461 ierr = PetscFree(A->data);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A) 466 { 467 PetscErrorCode ierr; 468 Mat_MPIKAIJ *a; 469 Mat_MPIAIJ *mpiaij; 470 PetscScalar *T; 471 PetscInt i,j; 472 PetscObjectState state; 473 474 PetscFunctionBegin; 475 a = (Mat_MPIKAIJ*)A->data; 476 mpiaij = (Mat_MPIAIJ*)a->A->data; 477 478 ierr = PetscObjectStateGet((PetscObject)a->A,&state);CHKERRQ(ierr); 479 if (state == a->state) { 480 /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */ 481 PetscFunctionReturn(0); 482 } else { 483 ierr = MatDestroy(&a->AIJ);CHKERRQ(ierr); 484 ierr = MatDestroy(&a->OAIJ);CHKERRQ(ierr); 485 if (a->isTI) { 486 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 487 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 488 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 489 * to pass in. */ 490 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 491 for (i=0; i<a->p; i++) { 492 for (j=0; j<a->q; j++) { 493 if (i==j) T[i+j*a->p] = 1.0; 494 else T[i+j*a->p] = 0.0; 495 } 496 } 497 } else T = a->T; 498 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 499 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 500 if (a->isTI) { 501 ierr = PetscFree(T);CHKERRQ(ierr); 502 } 503 a->state = state; 504 } 505 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatSetUp_KAIJ(Mat A) 510 { 511 PetscErrorCode ierr; 512 PetscInt n; 513 PetscMPIInt size; 514 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 515 516 PetscFunctionBegin; 517 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 518 if (size == 1) { 519 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 520 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 521 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 522 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 523 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 524 } else { 525 Mat_MPIKAIJ *a; 526 Mat_MPIAIJ *mpiaij; 527 IS from,to; 528 Vec gvec; 529 530 a = (Mat_MPIKAIJ*)A->data; 531 mpiaij = (Mat_MPIAIJ*)a->A->data; 532 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 533 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 534 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 535 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 536 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 537 538 ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); 539 540 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 541 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 542 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 543 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 544 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 545 546 /* create two temporary Index sets for build scatter gather */ 547 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 548 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 549 550 /* create temporary global vector to generate scatter context */ 551 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 552 553 /* generate the scatter context */ 554 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 555 556 ierr = ISDestroy(&from);CHKERRQ(ierr); 557 ierr = ISDestroy(&to);CHKERRQ(ierr); 558 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 559 } 560 561 A->assembled = PETSC_TRUE; 562 PetscFunctionReturn(0); 563 } 564 565 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 566 { 567 PetscViewerFormat format; 568 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 569 Mat B; 570 PetscInt i; 571 PetscErrorCode ierr; 572 PetscBool ismpikaij; 573 574 PetscFunctionBegin; 575 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 576 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 577 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 578 ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr); 579 580 /* Print appropriate details for S. */ 581 if (!a->S) { 582 ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr); 583 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 584 ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr); 585 for (i=0; i<(a->p * a->q); i++) { 586 #if defined(PETSC_USE_COMPLEX) 587 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr); 588 #else 589 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr); 590 #endif 591 } 592 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 593 } 594 595 /* Print appropriate details for T. */ 596 if (a->isTI) { 597 ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr); 598 } else if (!a->T) { 599 ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr); 600 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 601 ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr); 602 for (i=0; i<(a->p * a->q); i++) { 603 #if defined(PETSC_USE_COMPLEX) 604 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr); 605 #else 606 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr); 607 #endif 608 } 609 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 610 } 611 612 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 613 ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr); 614 if (ismpikaij) { 615 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 616 ierr = MatView(b->A,viewer);CHKERRQ(ierr); 617 } else { 618 ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr); 619 } 620 621 } else { 622 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 623 if (ismpikaij) { 624 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 625 } else { 626 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 627 } 628 ierr = MatView(B,viewer);CHKERRQ(ierr); 629 ierr = MatDestroy(&B);CHKERRQ(ierr); 630 } 631 PetscFunctionReturn(0); 632 } 633 634 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 635 { 636 PetscErrorCode ierr; 637 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 638 639 PetscFunctionBegin; 640 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 641 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 642 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 643 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 644 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 645 ierr = PetscFree(b->S);CHKERRQ(ierr); 646 ierr = PetscFree(b->T);CHKERRQ(ierr); 647 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 648 ierr = PetscFree(A->data);CHKERRQ(ierr); 649 PetscFunctionReturn(0); 650 } 651 652 /* --------------------------------------------------------------------------------------*/ 653 654 /* zz = yy + Axx */ 655 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 656 { 657 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 658 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 659 const PetscScalar *s = b->S, *t = b->T; 660 const PetscScalar *x,*v,*bx; 661 PetscScalar *y,*sums; 662 PetscErrorCode ierr; 663 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 664 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 665 666 PetscFunctionBegin; 667 if (!yy) { 668 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 669 } else { 670 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 671 } 672 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 673 674 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 675 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 676 idx = a->j; 677 v = a->a; 678 ii = a->i; 679 680 if (b->isTI) { 681 for (i=0; i<m; i++) { 682 jrow = ii[i]; 683 n = ii[i+1] - jrow; 684 sums = y + p*i; 685 for (j=0; j<n; j++) { 686 for (k=0; k<p; k++) { 687 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 688 } 689 } 690 } 691 ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr); 692 } else if (t) { 693 for (i=0; i<m; i++) { 694 jrow = ii[i]; 695 n = ii[i+1] - jrow; 696 sums = y + p*i; 697 for (j=0; j<n; j++) { 698 for (k=0; k<p; k++) { 699 for (l=0; l<q; l++) { 700 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 701 } 702 } 703 } 704 } 705 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 706 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 707 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 708 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 709 ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr); 710 } 711 if (s) { 712 for (i=0; i<m; i++) { 713 sums = y + p*i; 714 bx = x + q*i; 715 if (i < b->AIJ->cmap->n) { 716 for (j=0; j<q; j++) { 717 for (k=0; k<p; k++) { 718 sums[k] += s[k+j*p]*bx[j]; 719 } 720 } 721 } 722 } 723 ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr); 724 } 725 726 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 727 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 728 PetscFunctionReturn(0); 729 } 730 731 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 732 { 733 PetscErrorCode ierr; 734 PetscFunctionBegin; 735 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 736 PetscFunctionReturn(0); 737 } 738 739 #include <petsc/private/kernels/blockinvert.h> 740 741 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 742 { 743 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 744 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 745 const PetscScalar *S = b->S; 746 const PetscScalar *T = b->T; 747 const PetscScalar *v = a->a; 748 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 749 PetscErrorCode ierr; 750 PetscInt i,j,*v_pivots,dof,dof2; 751 PetscScalar *diag,aval,*v_work; 752 753 PetscFunctionBegin; 754 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 755 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 756 757 dof = p; 758 dof2 = dof*dof; 759 760 if (b->ibdiagvalid) { 761 if (values) *values = b->ibdiag; 762 PetscFunctionReturn(0); 763 } 764 if (!b->ibdiag) { 765 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 766 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 767 } 768 if (values) *values = b->ibdiag; 769 diag = b->ibdiag; 770 771 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 772 for (i=0; i<m; i++) { 773 if (S) { 774 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 775 } else { 776 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 777 } 778 if (b->isTI) { 779 aval = 0; 780 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 781 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 782 } else if (T) { 783 aval = 0; 784 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 785 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 786 } 787 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 788 diag += dof2; 789 } 790 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 791 792 b->ibdiagvalid = PETSC_TRUE; 793 PetscFunctionReturn(0); 794 } 795 796 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 797 { 798 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 799 800 PetscFunctionBegin; 801 *B = kaij->AIJ; 802 PetscFunctionReturn(0); 803 } 804 805 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 806 { 807 PetscErrorCode ierr; 808 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 809 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 810 const PetscScalar *aa = a->a, *T = kaij->T, *v; 811 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 812 const PetscScalar *b, *xb, *idiag; 813 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 814 PetscInt i, j, k, i2, bs, bs2, nz; 815 816 PetscFunctionBegin; 817 its = its*lits; 818 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 819 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 820 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 821 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 822 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 823 else {bs = p; bs2 = bs*bs; } 824 825 if (!m) PetscFunctionReturn(0); 826 827 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 828 idiag = kaij->ibdiag; 829 diag = a->diag; 830 831 if (!kaij->sor.setup) { 832 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 833 kaij->sor.setup = PETSC_TRUE; 834 } 835 y = kaij->sor.y; 836 w = kaij->sor.w; 837 work = kaij->sor.work; 838 t = kaij->sor.t; 839 arr = kaij->sor.arr; 840 841 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 842 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 843 844 if (flag & SOR_ZERO_INITIAL_GUESS) { 845 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 846 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 847 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 848 i2 = bs; 849 idiag += bs2; 850 for (i=1; i<m; i++) { 851 v = aa + ai[i]; 852 vi = aj + ai[i]; 853 nz = diag[i] - ai[i]; 854 855 if (T) { /* b - T (Arow * x) */ 856 ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 857 for (j=0; j<nz; j++) { 858 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 859 } 860 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 861 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 862 } else if (kaij->isTI) { 863 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 864 for (j=0; j<nz; j++) { 865 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 866 } 867 } else { 868 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 869 } 870 871 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 872 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 873 874 idiag += bs2; 875 i2 += bs; 876 } 877 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 878 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 879 xb = t; 880 } else xb = b; 881 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 882 idiag = kaij->ibdiag+bs2*(m-1); 883 i2 = bs * (m-1); 884 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 885 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 886 i2 -= bs; 887 idiag -= bs2; 888 for (i=m-2; i>=0; i--) { 889 v = aa + diag[i] + 1 ; 890 vi = aj + diag[i] + 1; 891 nz = ai[i+1] - diag[i] - 1; 892 893 if (T) { /* FIXME: This branch untested */ 894 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 895 /* copy all rows of x that are needed into contiguous space */ 896 workt = work; 897 for (j=0; j<nz; j++) { 898 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 899 workt += bs; 900 } 901 arrt = arr; 902 for (j=0; j<nz; j++) { 903 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 904 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 905 arrt += bs2; 906 } 907 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 908 } else if (kaij->isTI) { 909 ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 910 for (j=0; j<nz; j++) { 911 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 912 } 913 } 914 915 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 916 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 917 918 idiag -= bs2; 919 i2 -= bs; 920 } 921 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 922 } 923 its--; 924 } 925 while (its--) { /* FIXME: This branch not updated */ 926 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 927 i2 = 0; 928 idiag = kaij->ibdiag; 929 for (i=0; i<m; i++) { 930 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 931 932 v = aa + ai[i]; 933 vi = aj + ai[i]; 934 nz = diag[i] - ai[i]; 935 workt = work; 936 for (j=0; j<nz; j++) { 937 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 938 workt += bs; 939 } 940 arrt = arr; 941 if (T) { 942 for (j=0; j<nz; j++) { 943 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 944 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 945 arrt += bs2; 946 } 947 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 948 } else if (kaij->isTI) { 949 for (j=0; j<nz; j++) { 950 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 951 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 952 arrt += bs2; 953 } 954 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 955 } 956 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 957 958 v = aa + diag[i] + 1; 959 vi = aj + diag[i] + 1; 960 nz = ai[i+1] - diag[i] - 1; 961 workt = work; 962 for (j=0; j<nz; j++) { 963 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 964 workt += bs; 965 } 966 arrt = arr; 967 if (T) { 968 for (j=0; j<nz; j++) { 969 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 970 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 971 arrt += bs2; 972 } 973 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 974 } else if (kaij->isTI) { 975 for (j=0; j<nz; j++) { 976 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 977 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 978 arrt += bs2; 979 } 980 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 981 } 982 983 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 984 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 985 986 idiag += bs2; 987 i2 += bs; 988 } 989 xb = t; 990 } 991 else xb = b; 992 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 993 idiag = kaij->ibdiag+bs2*(m-1); 994 i2 = bs * (m-1); 995 if (xb == b) { 996 for (i=m-1; i>=0; i--) { 997 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 998 999 v = aa + ai[i]; 1000 vi = aj + ai[i]; 1001 nz = diag[i] - ai[i]; 1002 workt = work; 1003 for (j=0; j<nz; j++) { 1004 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1005 workt += bs; 1006 } 1007 arrt = arr; 1008 if (T) { 1009 for (j=0; j<nz; j++) { 1010 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1011 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1012 arrt += bs2; 1013 } 1014 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1015 } else if (kaij->isTI) { 1016 for (j=0; j<nz; j++) { 1017 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1018 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1019 arrt += bs2; 1020 } 1021 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1022 } 1023 1024 v = aa + diag[i] + 1; 1025 vi = aj + diag[i] + 1; 1026 nz = ai[i+1] - diag[i] - 1; 1027 workt = work; 1028 for (j=0; j<nz; j++) { 1029 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1030 workt += bs; 1031 } 1032 arrt = arr; 1033 if (T) { 1034 for (j=0; j<nz; j++) { 1035 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1036 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1037 arrt += bs2; 1038 } 1039 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1040 } else if (kaij->isTI) { 1041 for (j=0; j<nz; j++) { 1042 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1043 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1044 arrt += bs2; 1045 } 1046 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1047 } 1048 1049 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1050 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1051 } 1052 } else { 1053 for (i=m-1; i>=0; i--) { 1054 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1055 v = aa + diag[i] + 1; 1056 vi = aj + diag[i] + 1; 1057 nz = ai[i+1] - diag[i] - 1; 1058 workt = work; 1059 for (j=0; j<nz; j++) { 1060 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1061 workt += bs; 1062 } 1063 arrt = arr; 1064 if (T) { 1065 for (j=0; j<nz; j++) { 1066 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1067 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1068 arrt += bs2; 1069 } 1070 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1071 } else if (kaij->isTI) { 1072 for (j=0; j<nz; j++) { 1073 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1074 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1075 arrt += bs2; 1076 } 1077 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1078 } 1079 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1080 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1081 } 1082 } 1083 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1084 } 1085 } 1086 1087 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1088 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1089 PetscFunctionReturn(0); 1090 } 1091 1092 /*===================================================================================*/ 1093 1094 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1095 { 1096 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1097 PetscErrorCode ierr; 1098 1099 PetscFunctionBegin; 1100 if (!yy) { 1101 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 1102 } else { 1103 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1104 } 1105 ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1106 /* start the scatter */ 1107 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1108 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 1109 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1110 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1111 PetscFunctionReturn(0); 1112 } 1113 1114 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1115 { 1116 PetscErrorCode ierr; 1117 PetscFunctionBegin; 1118 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1119 PetscFunctionReturn(0); 1120 } 1121 1122 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1123 { 1124 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1125 PetscErrorCode ierr; 1126 1127 PetscFunctionBegin; 1128 ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ is up to date. */ 1129 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 /* ----------------------------------------------------------------*/ 1134 1135 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1136 { 1137 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1138 PetscErrorCode diag = PETSC_FALSE; 1139 PetscErrorCode ierr; 1140 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1141 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1142 1143 PetscFunctionBegin; 1144 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1145 b->getrowactive = PETSC_TRUE; 1146 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1147 1148 if ((!S) && (!T) && (!b->isTI)) { 1149 if (ncols) *ncols = 0; 1150 if (cols) *cols = NULL; 1151 if (values) *values = NULL; 1152 PetscFunctionReturn(0); 1153 } 1154 1155 if (T || b->isTI) { 1156 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1157 c = nzaij; 1158 for (i=0; i<nzaij; i++) { 1159 /* check if this row contains a diagonal entry */ 1160 if (colsaij[i] == r) { 1161 diag = PETSC_TRUE; 1162 c = i; 1163 } 1164 } 1165 } else nzaij = c = 0; 1166 1167 /* calculate size of row */ 1168 nz = 0; 1169 if (S) nz += q; 1170 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1171 1172 if (cols || values) { 1173 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1174 for (i=0; i<q; i++) { 1175 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1176 v[i] = 0.0; 1177 } 1178 if (b->isTI) { 1179 for (i=0; i<nzaij; i++) { 1180 for (j=0; j<q; j++) { 1181 idx[i*q+j] = colsaij[i]*q+j; 1182 v[i*q+j] = (j==s ? vaij[i] : 0); 1183 } 1184 } 1185 } else if (T) { 1186 for (i=0; i<nzaij; i++) { 1187 for (j=0; j<q; j++) { 1188 idx[i*q+j] = colsaij[i]*q+j; 1189 v[i*q+j] = vaij[i]*T[s+j*p]; 1190 } 1191 } 1192 } 1193 if (S) { 1194 for (j=0; j<q; j++) { 1195 idx[c*q+j] = r*q+j; 1196 v[c*q+j] += S[s+j*p]; 1197 } 1198 } 1199 } 1200 1201 if (ncols) *ncols = nz; 1202 if (cols) *cols = idx; 1203 if (values) *values = v; 1204 PetscFunctionReturn(0); 1205 } 1206 1207 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1208 { 1209 PetscErrorCode ierr; 1210 1211 PetscFunctionBegin; 1212 if (nz) *nz = 0; 1213 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1214 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1215 PetscFunctionReturn(0); 1216 } 1217 1218 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1219 { 1220 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1221 Mat AIJ = b->A; 1222 PetscBool diag = PETSC_FALSE; 1223 Mat MatAIJ,MatOAIJ; 1224 PetscErrorCode ierr; 1225 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1226 PetscInt nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1227 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1228 1229 PetscFunctionBegin; 1230 ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ and b->OAIJ are up to date. */ 1231 MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1232 MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1233 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1234 b->getrowactive = PETSC_TRUE; 1235 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1236 lrow = row - rstart; 1237 1238 if ((!S) && (!T) && (!b->isTI)) { 1239 if (ncols) *ncols = 0; 1240 if (cols) *cols = NULL; 1241 if (values) *values = NULL; 1242 PetscFunctionReturn(0); 1243 } 1244 1245 r = lrow/p; 1246 s = lrow%p; 1247 1248 if (T || b->isTI) { 1249 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr); 1250 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1251 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1252 1253 c = ncolsaij + ncolsoaij; 1254 for (i=0; i<ncolsaij; i++) { 1255 /* check if this row contains a diagonal entry */ 1256 if (colsaij[i] == r) { 1257 diag = PETSC_TRUE; 1258 c = i; 1259 } 1260 } 1261 } else c = 0; 1262 1263 /* calculate size of row */ 1264 nz = 0; 1265 if (S) nz += q; 1266 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1267 1268 if (cols || values) { 1269 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1270 for (i=0; i<q; i++) { 1271 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1272 v[i] = 0.0; 1273 } 1274 if (b->isTI) { 1275 for (i=0; i<ncolsaij; i++) { 1276 for (j=0; j<q; j++) { 1277 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1278 v[i*q+j] = (j==s ? vals[i] : 0.0); 1279 } 1280 } 1281 for (i=0; i<ncolsoaij; i++) { 1282 for (j=0; j<q; j++) { 1283 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1284 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1285 } 1286 } 1287 } else if (T) { 1288 for (i=0; i<ncolsaij; i++) { 1289 for (j=0; j<q; j++) { 1290 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1291 v[i*q+j] = vals[i]*T[s+j*p]; 1292 } 1293 } 1294 for (i=0; i<ncolsoaij; i++) { 1295 for (j=0; j<q; j++) { 1296 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1297 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1298 } 1299 } 1300 } 1301 if (S) { 1302 for (j=0; j<q; j++) { 1303 idx[c*q+j] = (r+rstart/p)*q+j; 1304 v[c*q+j] += S[s+j*p]; 1305 } 1306 } 1307 } 1308 1309 if (ncols) *ncols = nz; 1310 if (cols) *cols = idx; 1311 if (values) *values = v; 1312 PetscFunctionReturn(0); 1313 } 1314 1315 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1316 { 1317 PetscErrorCode ierr; 1318 PetscFunctionBegin; 1319 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1320 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1321 PetscFunctionReturn(0); 1322 } 1323 1324 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1325 { 1326 PetscErrorCode ierr; 1327 Mat A; 1328 1329 PetscFunctionBegin; 1330 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1331 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1332 ierr = MatDestroy(&A);CHKERRQ(ierr); 1333 PetscFunctionReturn(0); 1334 } 1335 1336 /* ---------------------------------------------------------------------------------- */ 1337 /*@C 1338 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1339 1340 [I \otimes S + A \otimes T] 1341 1342 where 1343 S is a dense (p \times q) matrix 1344 T is a dense (p \times q) matrix 1345 A is an AIJ (n \times n) matrix 1346 I is the identity matrix 1347 The resulting matrix is (np \times nq) 1348 1349 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1350 1351 Collective 1352 1353 Input Parameters: 1354 + A - the AIJ matrix 1355 . p - number of rows in S and T 1356 . q - number of columns in S and T 1357 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1358 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1359 1360 Output Parameter: 1361 . kaij - the new KAIJ matrix 1362 1363 Notes: 1364 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1365 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1366 1367 Developer Notes: 1368 In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state 1369 of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily 1370 rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added, 1371 routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine). 1372 1373 Level: advanced 1374 1375 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1376 @*/ 1377 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1378 { 1379 PetscErrorCode ierr; 1380 PetscMPIInt size; 1381 1382 PetscFunctionBegin; 1383 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1384 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 1385 if (size == 1) { 1386 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1387 } else { 1388 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1389 } 1390 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1391 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1392 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1393 ierr = MatSetUp(*kaij);CHKERRQ(ierr); 1394 PetscFunctionReturn(0); 1395 } 1396 1397 /*MC 1398 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1399 [I \otimes S + A \otimes T], 1400 where 1401 S is a dense (p \times q) matrix, 1402 T is a dense (p \times q) matrix, 1403 A is an AIJ (n \times n) matrix, 1404 and I is the identity matrix. 1405 The resulting matrix is (np \times nq). 1406 1407 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1408 1409 Notes: 1410 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1411 where x and b are column vectors containing the row-major representations of X and B. 1412 1413 Level: advanced 1414 1415 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1416 M*/ 1417 1418 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1419 { 1420 PetscErrorCode ierr; 1421 Mat_MPIKAIJ *b; 1422 PetscMPIInt size; 1423 1424 PetscFunctionBegin; 1425 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1426 A->data = (void*)b; 1427 1428 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1429 1430 A->ops->setup = MatSetUp_KAIJ; 1431 1432 b->w = NULL; 1433 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 1434 if (size == 1) { 1435 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1436 A->ops->setup = MatSetUp_KAIJ; 1437 A->ops->destroy = MatDestroy_SeqKAIJ; 1438 A->ops->view = MatView_KAIJ; 1439 A->ops->mult = MatMult_SeqKAIJ; 1440 A->ops->multadd = MatMultAdd_SeqKAIJ; 1441 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1442 A->ops->getrow = MatGetRow_SeqKAIJ; 1443 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1444 A->ops->sor = MatSOR_SeqKAIJ; 1445 } else { 1446 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1447 A->ops->setup = MatSetUp_KAIJ; 1448 A->ops->destroy = MatDestroy_MPIKAIJ; 1449 A->ops->view = MatView_KAIJ; 1450 A->ops->mult = MatMult_MPIKAIJ; 1451 A->ops->multadd = MatMultAdd_MPIKAIJ; 1452 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1453 A->ops->getrow = MatGetRow_MPIKAIJ; 1454 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1455 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1456 } 1457 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1458 PetscFunctionReturn(0); 1459 } 1460