1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 if (S) *S = NULL; 148 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 /*@C 153 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 154 155 Not collective 156 157 Input Parameter: 158 . A - the KAIJ matrix 159 160 Output Parameter: 161 . S - location of pointer to array obtained with MatKAIJGetS() 162 163 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 164 If NULL is passed, it will not attempt to zero the array pointer. 165 166 Level: advanced 167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 168 @*/ 169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 170 { 171 PetscFunctionBegin; 172 if (S) *S = NULL; 173 PetscFunctionReturn(0); 174 } 175 176 /*@C 177 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 178 179 Not Collective; the entire T is stored and returned independently on all processes 180 181 Input Parameter: 182 . A - the KAIJ matrix 183 184 Output Parameter: 185 + m - the number of rows in T 186 . n - the number of columns in T 187 - T - the T matrix, in form of a scalar array in column-major format 188 189 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 190 191 Level: advanced 192 193 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 194 @*/ 195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 196 { 197 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 198 PetscFunctionBegin; 199 if (m) *m = b->p; 200 if (n) *n = b->q; 201 if (T) *T = b->T; 202 PetscFunctionReturn(0); 203 } 204 205 /*@C 206 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 207 208 Not Collective; the entire T is stored and returned independently on all processes 209 210 Input Parameter: 211 . A - the KAIJ matrix 212 213 Output Parameter: 214 + m - the number of rows in T 215 . n - the number of columns in T 216 - T - the T matrix, in form of a scalar array in column-major format 217 218 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 219 220 Level: advanced 221 222 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 223 @*/ 224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 225 { 226 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 227 PetscFunctionBegin; 228 if (m) *m = b->p; 229 if (n) *n = b->q; 230 if (T) *T = b->T; 231 PetscFunctionReturn(0); 232 } 233 234 /*@C 235 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 236 237 Not collective 238 239 Input Parameter: 240 . A - the KAIJ matrix 241 242 Output Parameter: 243 . T - location of pointer to array obtained with MatKAIJGetS() 244 245 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 246 If NULL is passed, it will not attempt to zero the array pointer. 247 248 Level: advanced 249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 250 @*/ 251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 252 { 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 if (T) *T = NULL; 257 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 261 /*@C 262 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 263 264 Not collective 265 266 Input Parameter: 267 . A - the KAIJ matrix 268 269 Output Parameter: 270 . T - location of pointer to array obtained with MatKAIJGetS() 271 272 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 273 If NULL is passed, it will not attempt to zero the array pointer. 274 275 Level: advanced 276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 277 @*/ 278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 279 { 280 PetscFunctionBegin; 281 if (T) *T = NULL; 282 PetscFunctionReturn(0); 283 } 284 285 /*@ 286 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 287 288 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 289 290 Input Parameters: 291 + A - the KAIJ matrix 292 - B - the AIJ matrix 293 294 Notes: 295 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 296 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 297 298 Level: advanced 299 300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 301 @*/ 302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 303 { 304 PetscErrorCode ierr; 305 PetscMPIInt size; 306 307 PetscFunctionBegin; 308 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 309 if (size == 1) { 310 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 311 a->AIJ = B; 312 } else { 313 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 314 a->A = B; 315 } 316 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 /*@C 321 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 322 323 Logically Collective; the entire S is stored independently on all processes. 324 325 Input Parameters: 326 + A - the KAIJ matrix 327 . p - the number of rows in S 328 . q - the number of columns in S 329 - S - the S matrix, in form of a scalar array in column-major format 330 331 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 332 The S matrix is copied, so the user can destroy this array. 333 334 Level: Advanced 335 336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 337 @*/ 338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 339 { 340 PetscErrorCode ierr; 341 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 342 343 PetscFunctionBegin; 344 ierr = PetscFree(a->S);CHKERRQ(ierr); 345 if (S) { 346 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 347 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 348 } else a->S = NULL; 349 350 a->p = p; 351 a->q = q; 352 PetscFunctionReturn(0); 353 } 354 355 /*@C 356 MatKAIJGetScaledIdentity - Check if both S and T are scaled identities. 357 358 Logically Collective. 359 360 Input Parameter: 361 . A - the KAIJ matrix 362 363 Output Parameter: 364 . identity - the Boolean value 365 366 Level: Advanced 367 368 .seealso: MatKAIJGetS(), MatKAIJGetT() 369 @*/ 370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity) 371 { 372 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 373 PetscInt i,j; 374 375 PetscFunctionBegin; 376 if (a->p != a->q) { 377 *identity = PETSC_FALSE; 378 PetscFunctionReturn(0); 379 } else *identity = PETSC_TRUE; 380 if (!a->isTI || a->S) { 381 for (i=0; i<a->p && *identity; i++) { 382 for (j=0; j<a->p && *identity; j++) { 383 if (i != j) { 384 if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 385 if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE; 386 } else { 387 if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 388 if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE; 389 } 390 } 391 } 392 } 393 PetscFunctionReturn(0); 394 } 395 396 /*@C 397 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 398 399 Logically Collective; the entire T is stored independently on all processes. 400 401 Input Parameters: 402 + A - the KAIJ matrix 403 . p - the number of rows in S 404 . q - the number of columns in S 405 - T - the T matrix, in form of a scalar array in column-major format 406 407 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 408 The T matrix is copied, so the user can destroy this array. 409 410 Level: Advanced 411 412 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 413 @*/ 414 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 415 { 416 PetscErrorCode ierr; 417 PetscInt i,j; 418 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 419 PetscBool isTI = PETSC_FALSE; 420 421 PetscFunctionBegin; 422 /* check if T is an identity matrix */ 423 if (T && (p == q)) { 424 isTI = PETSC_TRUE; 425 for (i=0; i<p; i++) { 426 for (j=0; j<q; j++) { 427 if (i == j) { 428 /* diagonal term must be 1 */ 429 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 430 } else { 431 /* off-diagonal term must be 0 */ 432 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 433 } 434 } 435 } 436 } 437 a->isTI = isTI; 438 439 ierr = PetscFree(a->T);CHKERRQ(ierr); 440 if (T && (!isTI)) { 441 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 442 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 443 } else a->T = NULL; 444 445 a->p = p; 446 a->q = q; 447 PetscFunctionReturn(0); 448 } 449 450 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 451 { 452 PetscErrorCode ierr; 453 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 454 455 PetscFunctionBegin; 456 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 457 ierr = PetscFree(b->S);CHKERRQ(ierr); 458 ierr = PetscFree(b->T);CHKERRQ(ierr); 459 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 460 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 461 ierr = PetscFree(A->data);CHKERRQ(ierr); 462 PetscFunctionReturn(0); 463 } 464 465 PetscErrorCode MatSetUp_KAIJ(Mat A) 466 { 467 PetscErrorCode ierr; 468 PetscInt n; 469 PetscMPIInt size; 470 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 471 472 PetscFunctionBegin; 473 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 474 if (size == 1) { 475 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 476 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 477 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 478 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 479 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 480 } else { 481 Mat_MPIKAIJ *a; 482 Mat_MPIAIJ *mpiaij; 483 IS from,to; 484 Vec gvec; 485 PetscScalar *T; 486 PetscInt i,j; 487 488 a = (Mat_MPIKAIJ*)A->data; 489 mpiaij = (Mat_MPIAIJ*)a->A->data; 490 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 491 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 492 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 493 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 494 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 495 496 if (a->isTI) { 497 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 498 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 499 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 500 * to pass in. */ 501 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 502 for (i=0; i<a->p; i++) { 503 for (j=0; j<a->q; j++) { 504 if (i==j) T[i+j*a->p] = 1.0; 505 else T[i+j*a->p] = 0.0; 506 } 507 } 508 } else T = a->T; 509 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 510 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 511 if (a->isTI) { 512 ierr = PetscFree(T);CHKERRQ(ierr); 513 } 514 515 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 516 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 517 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 518 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 519 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 520 521 /* create two temporary Index sets for build scatter gather */ 522 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 523 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 524 525 /* create temporary global vector to generate scatter context */ 526 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 527 528 /* generate the scatter context */ 529 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 530 531 ierr = ISDestroy(&from);CHKERRQ(ierr); 532 ierr = ISDestroy(&to);CHKERRQ(ierr); 533 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 534 } 535 536 A->assembled = PETSC_TRUE; 537 PetscFunctionReturn(0); 538 } 539 540 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 541 { 542 PetscViewerFormat format; 543 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 544 Mat B; 545 PetscInt i; 546 PetscErrorCode ierr; 547 PetscBool ismpikaij; 548 549 PetscFunctionBegin; 550 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 551 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 552 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 553 ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr); 554 555 /* Print appropriate details for S. */ 556 if (!a->S) { 557 ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr); 558 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 559 ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr); 560 for (i=0; i<(a->p * a->q); i++) { 561 #if defined(PETSC_USE_COMPLEX) 562 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr); 563 #else 564 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr); 565 #endif 566 } 567 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 568 } 569 570 /* Print appropriate details for T. */ 571 if (a->isTI) { 572 ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr); 573 } else if (!a->T) { 574 ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr); 575 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 576 ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr); 577 for (i=0; i<(a->p * a->q); i++) { 578 #if defined(PETSC_USE_COMPLEX) 579 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr); 580 #else 581 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr); 582 #endif 583 } 584 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 585 } 586 587 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 588 ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr); 589 if (ismpikaij) { 590 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 591 ierr = MatView(b->A,viewer);CHKERRQ(ierr); 592 } else { 593 ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr); 594 } 595 596 } else { 597 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 598 if (ismpikaij) { 599 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 600 } else { 601 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 602 } 603 ierr = MatView(B,viewer);CHKERRQ(ierr); 604 ierr = MatDestroy(&B);CHKERRQ(ierr); 605 } 606 PetscFunctionReturn(0); 607 } 608 609 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 610 { 611 PetscErrorCode ierr; 612 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 613 614 PetscFunctionBegin; 615 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 616 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 617 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 618 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 619 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 620 ierr = PetscFree(b->S);CHKERRQ(ierr); 621 ierr = PetscFree(b->T);CHKERRQ(ierr); 622 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 623 ierr = PetscFree(A->data);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } 626 627 /* --------------------------------------------------------------------------------------*/ 628 629 /* zz = yy + Axx */ 630 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 631 { 632 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 633 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 634 const PetscScalar *s = b->S, *t = b->T; 635 const PetscScalar *x,*v,*bx; 636 PetscScalar *y,*sums; 637 PetscErrorCode ierr; 638 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 639 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 640 641 PetscFunctionBegin; 642 if (!yy) { 643 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 644 } else { 645 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 646 } 647 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 648 649 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 650 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 651 idx = a->j; 652 v = a->a; 653 ii = a->i; 654 655 if (b->isTI) { 656 for (i=0; i<m; i++) { 657 jrow = ii[i]; 658 n = ii[i+1] - jrow; 659 sums = y + p*i; 660 for (j=0; j<n; j++) { 661 for (k=0; k<p; k++) { 662 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 663 } 664 } 665 } 666 ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr); 667 } else if (t) { 668 for (i=0; i<m; i++) { 669 jrow = ii[i]; 670 n = ii[i+1] - jrow; 671 sums = y + p*i; 672 for (j=0; j<n; j++) { 673 for (k=0; k<p; k++) { 674 for (l=0; l<q; l++) { 675 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 676 } 677 } 678 } 679 } 680 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 681 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 682 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 683 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 684 ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr); 685 } 686 if (s) { 687 for (i=0; i<m; i++) { 688 sums = y + p*i; 689 bx = x + q*i; 690 if (i < b->AIJ->cmap->n) { 691 for (j=0; j<q; j++) { 692 for (k=0; k<p; k++) { 693 sums[k] += s[k+j*p]*bx[j]; 694 } 695 } 696 } 697 } 698 ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr); 699 } 700 701 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 702 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 703 PetscFunctionReturn(0); 704 } 705 706 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 707 { 708 PetscErrorCode ierr; 709 PetscFunctionBegin; 710 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 711 PetscFunctionReturn(0); 712 } 713 714 #include <petsc/private/kernels/blockinvert.h> 715 716 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 717 { 718 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 719 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 720 const PetscScalar *S = b->S; 721 const PetscScalar *T = b->T; 722 const PetscScalar *v = a->a; 723 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 724 PetscErrorCode ierr; 725 PetscInt i,j,*v_pivots,dof,dof2; 726 PetscScalar *diag,aval,*v_work; 727 728 PetscFunctionBegin; 729 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 730 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 731 732 dof = p; 733 dof2 = dof*dof; 734 735 if (b->ibdiagvalid) { 736 if (values) *values = b->ibdiag; 737 PetscFunctionReturn(0); 738 } 739 if (!b->ibdiag) { 740 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 741 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 742 } 743 if (values) *values = b->ibdiag; 744 diag = b->ibdiag; 745 746 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 747 for (i=0; i<m; i++) { 748 if (S) { 749 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 750 } else { 751 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 752 } 753 if (b->isTI) { 754 aval = 0; 755 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 756 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 757 } else if (T) { 758 aval = 0; 759 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 760 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 761 } 762 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 763 diag += dof2; 764 } 765 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 766 767 b->ibdiagvalid = PETSC_TRUE; 768 PetscFunctionReturn(0); 769 } 770 771 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 772 { 773 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 774 775 PetscFunctionBegin; 776 *B = kaij->AIJ; 777 PetscFunctionReturn(0); 778 } 779 780 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 781 { 782 PetscErrorCode ierr; 783 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 784 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 785 const PetscScalar *aa = a->a, *T = kaij->T, *v; 786 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 787 const PetscScalar *b, *xb, *idiag; 788 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 789 PetscInt i, j, k, i2, bs, bs2, nz; 790 791 PetscFunctionBegin; 792 its = its*lits; 793 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 794 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 795 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 796 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 797 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 798 else {bs = p; bs2 = bs*bs; } 799 800 if (!m) PetscFunctionReturn(0); 801 802 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 803 idiag = kaij->ibdiag; 804 diag = a->diag; 805 806 if (!kaij->sor.setup) { 807 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 808 kaij->sor.setup = PETSC_TRUE; 809 } 810 y = kaij->sor.y; 811 w = kaij->sor.w; 812 work = kaij->sor.work; 813 t = kaij->sor.t; 814 arr = kaij->sor.arr; 815 816 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 817 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 818 819 if (flag & SOR_ZERO_INITIAL_GUESS) { 820 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 821 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 822 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 823 i2 = bs; 824 idiag += bs2; 825 for (i=1; i<m; i++) { 826 v = aa + ai[i]; 827 vi = aj + ai[i]; 828 nz = diag[i] - ai[i]; 829 830 if (T) { /* b - T (Arow * x) */ 831 ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 832 for (j=0; j<nz; j++) { 833 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 834 } 835 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 836 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 837 } else if (kaij->isTI) { 838 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 839 for (j=0; j<nz; j++) { 840 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 841 } 842 } else { 843 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 844 } 845 846 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 847 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 848 849 idiag += bs2; 850 i2 += bs; 851 } 852 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 853 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 854 xb = t; 855 } else xb = b; 856 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 857 idiag = kaij->ibdiag+bs2*(m-1); 858 i2 = bs * (m-1); 859 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 860 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 861 i2 -= bs; 862 idiag -= bs2; 863 for (i=m-2; i>=0; i--) { 864 v = aa + diag[i] + 1 ; 865 vi = aj + diag[i] + 1; 866 nz = ai[i+1] - diag[i] - 1; 867 868 if (T) { /* FIXME: This branch untested */ 869 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 870 /* copy all rows of x that are needed into contiguous space */ 871 workt = work; 872 for (j=0; j<nz; j++) { 873 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 874 workt += bs; 875 } 876 arrt = arr; 877 for (j=0; j<nz; j++) { 878 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 879 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 880 arrt += bs2; 881 } 882 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 883 } else if (kaij->isTI) { 884 ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 885 for (j=0; j<nz; j++) { 886 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 887 } 888 } 889 890 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 891 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 892 893 idiag -= bs2; 894 i2 -= bs; 895 } 896 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 897 } 898 its--; 899 } 900 while (its--) { /* FIXME: This branch not updated */ 901 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 902 i2 = 0; 903 idiag = kaij->ibdiag; 904 for (i=0; i<m; i++) { 905 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 906 907 v = aa + ai[i]; 908 vi = aj + ai[i]; 909 nz = diag[i] - ai[i]; 910 workt = work; 911 for (j=0; j<nz; j++) { 912 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 913 workt += bs; 914 } 915 arrt = arr; 916 if (T) { 917 for (j=0; j<nz; j++) { 918 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 919 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 920 arrt += bs2; 921 } 922 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 923 } else if (kaij->isTI) { 924 for (j=0; j<nz; j++) { 925 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 926 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 927 arrt += bs2; 928 } 929 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 930 } 931 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 932 933 v = aa + diag[i] + 1; 934 vi = aj + diag[i] + 1; 935 nz = ai[i+1] - diag[i] - 1; 936 workt = work; 937 for (j=0; j<nz; j++) { 938 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 939 workt += bs; 940 } 941 arrt = arr; 942 if (T) { 943 for (j=0; j<nz; j++) { 944 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 945 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 946 arrt += bs2; 947 } 948 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 949 } else if (kaij->isTI) { 950 for (j=0; j<nz; j++) { 951 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 952 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 953 arrt += bs2; 954 } 955 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 956 } 957 958 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 959 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 960 961 idiag += bs2; 962 i2 += bs; 963 } 964 xb = t; 965 } 966 else xb = b; 967 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 968 idiag = kaij->ibdiag+bs2*(m-1); 969 i2 = bs * (m-1); 970 if (xb == b) { 971 for (i=m-1; i>=0; i--) { 972 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 973 974 v = aa + ai[i]; 975 vi = aj + ai[i]; 976 nz = diag[i] - ai[i]; 977 workt = work; 978 for (j=0; j<nz; j++) { 979 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 980 workt += bs; 981 } 982 arrt = arr; 983 if (T) { 984 for (j=0; j<nz; j++) { 985 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 986 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 987 arrt += bs2; 988 } 989 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 990 } else if (kaij->isTI) { 991 for (j=0; j<nz; j++) { 992 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 993 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 994 arrt += bs2; 995 } 996 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 997 } 998 999 v = aa + diag[i] + 1; 1000 vi = aj + diag[i] + 1; 1001 nz = ai[i+1] - diag[i] - 1; 1002 workt = work; 1003 for (j=0; j<nz; j++) { 1004 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1005 workt += bs; 1006 } 1007 arrt = arr; 1008 if (T) { 1009 for (j=0; j<nz; j++) { 1010 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1011 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1012 arrt += bs2; 1013 } 1014 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1015 } else if (kaij->isTI) { 1016 for (j=0; j<nz; j++) { 1017 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1018 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1019 arrt += bs2; 1020 } 1021 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1022 } 1023 1024 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1025 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1026 } 1027 } else { 1028 for (i=m-1; i>=0; i--) { 1029 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 1030 v = aa + diag[i] + 1; 1031 vi = aj + diag[i] + 1; 1032 nz = ai[i+1] - diag[i] - 1; 1033 workt = work; 1034 for (j=0; j<nz; j++) { 1035 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 1036 workt += bs; 1037 } 1038 arrt = arr; 1039 if (T) { 1040 for (j=0; j<nz; j++) { 1041 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1042 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1043 arrt += bs2; 1044 } 1045 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1046 } else if (kaij->isTI) { 1047 for (j=0; j<nz; j++) { 1048 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1049 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1050 arrt += bs2; 1051 } 1052 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1053 } 1054 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1055 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1056 } 1057 } 1058 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1059 } 1060 } 1061 1062 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1063 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1064 PetscFunctionReturn(0); 1065 } 1066 1067 /*===================================================================================*/ 1068 1069 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1070 { 1071 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1072 PetscErrorCode ierr; 1073 1074 PetscFunctionBegin; 1075 if (!yy) { 1076 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 1077 } else { 1078 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1079 } 1080 /* start the scatter */ 1081 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1082 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 1083 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1084 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1085 PetscFunctionReturn(0); 1086 } 1087 1088 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1089 { 1090 PetscErrorCode ierr; 1091 PetscFunctionBegin; 1092 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1093 PetscFunctionReturn(0); 1094 } 1095 1096 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1097 { 1098 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1099 PetscErrorCode ierr; 1100 1101 PetscFunctionBegin; 1102 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1103 PetscFunctionReturn(0); 1104 } 1105 1106 /* ----------------------------------------------------------------*/ 1107 1108 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1109 { 1110 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1111 PetscErrorCode diag = PETSC_FALSE; 1112 PetscErrorCode ierr; 1113 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1114 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1115 1116 PetscFunctionBegin; 1117 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1118 b->getrowactive = PETSC_TRUE; 1119 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1120 1121 if ((!S) && (!T) && (!b->isTI)) { 1122 if (ncols) *ncols = 0; 1123 if (cols) *cols = NULL; 1124 if (values) *values = NULL; 1125 PetscFunctionReturn(0); 1126 } 1127 1128 if (T || b->isTI) { 1129 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1130 c = nzaij; 1131 for (i=0; i<nzaij; i++) { 1132 /* check if this row contains a diagonal entry */ 1133 if (colsaij[i] == r) { 1134 diag = PETSC_TRUE; 1135 c = i; 1136 } 1137 } 1138 } else nzaij = c = 0; 1139 1140 /* calculate size of row */ 1141 nz = 0; 1142 if (S) nz += q; 1143 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1144 1145 if (cols || values) { 1146 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1147 for (i=0; i<q; i++) { 1148 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1149 v[i] = 0.0; 1150 } 1151 if (b->isTI) { 1152 for (i=0; i<nzaij; i++) { 1153 for (j=0; j<q; j++) { 1154 idx[i*q+j] = colsaij[i]*q+j; 1155 v[i*q+j] = (j==s ? vaij[i] : 0); 1156 } 1157 } 1158 } else if (T) { 1159 for (i=0; i<nzaij; i++) { 1160 for (j=0; j<q; j++) { 1161 idx[i*q+j] = colsaij[i]*q+j; 1162 v[i*q+j] = vaij[i]*T[s+j*p]; 1163 } 1164 } 1165 } 1166 if (S) { 1167 for (j=0; j<q; j++) { 1168 idx[c*q+j] = r*q+j; 1169 v[c*q+j] += S[s+j*p]; 1170 } 1171 } 1172 } 1173 1174 if (ncols) *ncols = nz; 1175 if (cols) *cols = idx; 1176 if (values) *values = v; 1177 PetscFunctionReturn(0); 1178 } 1179 1180 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1181 { 1182 PetscErrorCode ierr; 1183 PetscFunctionBegin; 1184 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1185 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1186 PetscFunctionReturn(0); 1187 } 1188 1189 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1190 { 1191 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1192 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1193 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1194 Mat AIJ = b->A; 1195 PetscBool diag = PETSC_FALSE; 1196 PetscErrorCode ierr; 1197 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1198 PetscInt nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1199 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1200 1201 PetscFunctionBegin; 1202 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1203 b->getrowactive = PETSC_TRUE; 1204 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1205 lrow = row - rstart; 1206 1207 if ((!S) && (!T) && (!b->isTI)) { 1208 if (ncols) *ncols = 0; 1209 if (cols) *cols = NULL; 1210 if (values) *values = NULL; 1211 PetscFunctionReturn(0); 1212 } 1213 1214 r = lrow/p; 1215 s = lrow%p; 1216 1217 if (T || b->isTI) { 1218 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr); 1219 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1220 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1221 1222 c = ncolsaij + ncolsoaij; 1223 for (i=0; i<ncolsaij; i++) { 1224 /* check if this row contains a diagonal entry */ 1225 if (colsaij[i] == r) { 1226 diag = PETSC_TRUE; 1227 c = i; 1228 } 1229 } 1230 } else c = 0; 1231 1232 /* calculate size of row */ 1233 nz = 0; 1234 if (S) nz += q; 1235 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1236 1237 if (cols || values) { 1238 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1239 for (i=0; i<q; i++) { 1240 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1241 v[i] = 0.0; 1242 } 1243 if (b->isTI) { 1244 for (i=0; i<ncolsaij; i++) { 1245 for (j=0; j<q; j++) { 1246 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1247 v[i*q+j] = (j==s ? vals[i] : 0.0); 1248 } 1249 } 1250 for (i=0; i<ncolsoaij; i++) { 1251 for (j=0; j<q; j++) { 1252 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1253 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1254 } 1255 } 1256 } else if (T) { 1257 for (i=0; i<ncolsaij; i++) { 1258 for (j=0; j<q; j++) { 1259 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1260 v[i*q+j] = vals[i]*T[s+j*p]; 1261 } 1262 } 1263 for (i=0; i<ncolsoaij; i++) { 1264 for (j=0; j<q; j++) { 1265 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1266 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1267 } 1268 } 1269 } 1270 if (S) { 1271 for (j=0; j<q; j++) { 1272 idx[c*q+j] = (r+rstart/p)*q+j; 1273 v[c*q+j] += S[s+j*p]; 1274 } 1275 } 1276 } 1277 1278 if (ncols) *ncols = nz; 1279 if (cols) *cols = idx; 1280 if (values) *values = v; 1281 PetscFunctionReturn(0); 1282 } 1283 1284 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1285 { 1286 PetscErrorCode ierr; 1287 PetscFunctionBegin; 1288 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1289 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1290 PetscFunctionReturn(0); 1291 } 1292 1293 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1294 { 1295 PetscErrorCode ierr; 1296 Mat A; 1297 1298 PetscFunctionBegin; 1299 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1300 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1301 ierr = MatDestroy(&A);CHKERRQ(ierr); 1302 PetscFunctionReturn(0); 1303 } 1304 1305 /* ---------------------------------------------------------------------------------- */ 1306 /*@C 1307 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1308 1309 [I \otimes S + A \otimes T] 1310 1311 where 1312 S is a dense (p \times q) matrix 1313 T is a dense (p \times q) matrix 1314 A is an AIJ (n \times n) matrix 1315 I is the identity matrix 1316 The resulting matrix is (np \times nq) 1317 1318 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1319 1320 Collective 1321 1322 Input Parameters: 1323 + A - the AIJ matrix 1324 . p - number of rows in S and T 1325 . q - number of columns in S and T 1326 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1327 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1328 1329 Output Parameter: 1330 . kaij - the new KAIJ matrix 1331 1332 Notes: 1333 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1334 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1335 1336 Level: advanced 1337 1338 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1339 @*/ 1340 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1341 { 1342 PetscErrorCode ierr; 1343 PetscMPIInt size; 1344 1345 PetscFunctionBegin; 1346 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1347 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1348 if (size == 1) { 1349 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1350 } else { 1351 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1352 } 1353 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1354 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1355 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1356 ierr = MatSetUp(*kaij);CHKERRQ(ierr); 1357 PetscFunctionReturn(0); 1358 } 1359 1360 /*MC 1361 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1362 [I \otimes S + A \otimes T], 1363 where 1364 S is a dense (p \times q) matrix, 1365 T is a dense (p \times q) matrix, 1366 A is an AIJ (n \times n) matrix, 1367 and I is the identity matrix. 1368 The resulting matrix is (np \times nq). 1369 1370 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1371 1372 Notes: 1373 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1374 where x and b are column vectors containing the row-major representations of X and B. 1375 1376 Level: advanced 1377 1378 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1379 M*/ 1380 1381 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1382 { 1383 PetscErrorCode ierr; 1384 Mat_MPIKAIJ *b; 1385 PetscMPIInt size; 1386 1387 PetscFunctionBegin; 1388 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1389 A->data = (void*)b; 1390 1391 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1392 1393 A->ops->setup = MatSetUp_KAIJ; 1394 1395 b->w = NULL; 1396 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1397 if (size == 1) { 1398 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1399 A->ops->setup = MatSetUp_KAIJ; 1400 A->ops->destroy = MatDestroy_SeqKAIJ; 1401 A->ops->view = MatView_KAIJ; 1402 A->ops->mult = MatMult_SeqKAIJ; 1403 A->ops->multadd = MatMultAdd_SeqKAIJ; 1404 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1405 A->ops->getrow = MatGetRow_SeqKAIJ; 1406 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1407 A->ops->sor = MatSOR_SeqKAIJ; 1408 } else { 1409 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1410 A->ops->setup = MatSetUp_KAIJ; 1411 A->ops->destroy = MatDestroy_MPIKAIJ; 1412 A->ops->view = MatView_KAIJ; 1413 A->ops->mult = MatMult_MPIKAIJ; 1414 A->ops->multadd = MatMultAdd_MPIKAIJ; 1415 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1416 A->ops->getrow = MatGetRow_MPIKAIJ; 1417 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1418 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1419 } 1420 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1421 PetscFunctionReturn(0); 1422 } 1423