1 2 /* 3 Defines the basic matrix operations for the KAIJ matrix storage format. 4 This format is used to evaluate matrices of the form: 5 6 [I \otimes S + A \otimes T] 7 8 where 9 S is a dense (p \times q) matrix 10 T is a dense (p \times q) matrix 11 A is an AIJ (n \times n) matrix 12 I is the identity matrix 13 14 The resulting matrix is (np \times nq) 15 16 We provide: 17 MatMult() 18 MatMultAdd() 19 MatInvertBlockDiagonal() 20 and 21 MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*) 22 23 This single directory handles both the sequential and parallel codes 24 */ 25 26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/ 27 #include <../src/mat/utils/freespace.h> 28 #include <petsc/private/vecimpl.h> 29 30 /*@C 31 MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix 32 33 Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel 34 35 Input Parameter: 36 . A - the KAIJ matrix 37 38 Output Parameter: 39 . B - the AIJ matrix 40 41 Level: advanced 42 43 Notes: The reference count on the AIJ matrix is not increased so you should not destroy it. 44 45 .seealso: MatCreateKAIJ() 46 @*/ 47 PetscErrorCode MatKAIJGetAIJ(Mat A,Mat *B) 48 { 49 PetscErrorCode ierr; 50 PetscBool ismpikaij,isseqkaij; 51 52 PetscFunctionBegin; 53 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 54 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr); 55 if (ismpikaij) { 56 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 57 58 *B = b->A; 59 } else if (isseqkaij) { 60 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 61 62 *B = b->AIJ; 63 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ"); 64 PetscFunctionReturn(0); 65 } 66 67 /*@C 68 MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix 69 70 Not Collective; the entire S is stored and returned independently on all processes. 71 72 Input Parameter: 73 . A - the KAIJ matrix 74 75 Output Parameters: 76 + m - the number of rows in S 77 . n - the number of columns in S 78 - S - the S matrix, in form of a scalar array in column-major format 79 80 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 81 82 Level: advanced 83 84 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 85 @*/ 86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S) 87 { 88 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 89 PetscFunctionBegin; 90 if (m) *m = b->p; 91 if (n) *n = b->q; 92 if (S) *S = b->S; 93 PetscFunctionReturn(0); 94 } 95 96 /*@C 97 MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix 98 99 Not Collective; the entire S is stored and returned independently on all processes. 100 101 Input Parameter: 102 . A - the KAIJ matrix 103 104 Output Parameters: 105 + m - the number of rows in S 106 . n - the number of columns in S 107 - S - the S matrix, in form of a scalar array in column-major format 108 109 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 110 111 Level: advanced 112 113 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 114 @*/ 115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S) 116 { 117 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 118 PetscFunctionBegin; 119 if (m) *m = b->p; 120 if (n) *n = b->q; 121 if (S) *S = b->S; 122 PetscFunctionReturn(0); 123 } 124 125 /*@C 126 MatKAIJRestoreS - Restore array obtained with MatKAIJGetS() 127 128 Not collective 129 130 Input Parameter: 131 . A - the KAIJ matrix 132 133 Output Parameter: 134 . S - location of pointer to array obtained with MatKAIJGetS() 135 136 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 137 If NULL is passed, it will not attempt to zero the array pointer. 138 139 Level: advanced 140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 141 @*/ 142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S) 143 { 144 PetscErrorCode ierr; 145 146 PetscFunctionBegin; 147 if (S) *S = NULL; 148 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 149 PetscFunctionReturn(0); 150 } 151 152 /*@C 153 MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead() 154 155 Not collective 156 157 Input Parameter: 158 . A - the KAIJ matrix 159 160 Output Parameter: 161 . S - location of pointer to array obtained with MatKAIJGetS() 162 163 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 164 If NULL is passed, it will not attempt to zero the array pointer. 165 166 Level: advanced 167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead() 168 @*/ 169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S) 170 { 171 PetscFunctionBegin; 172 if (S) *S = NULL; 173 PetscFunctionReturn(0); 174 } 175 176 /*@C 177 MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix 178 179 Not Collective; the entire T is stored and returned independently on all processes 180 181 Input Parameter: 182 . A - the KAIJ matrix 183 184 Output Parameter: 185 + m - the number of rows in T 186 . n - the number of columns in T 187 - T - the T matrix, in form of a scalar array in column-major format 188 189 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 190 191 Level: advanced 192 193 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 194 @*/ 195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T) 196 { 197 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 198 PetscFunctionBegin; 199 if (m) *m = b->p; 200 if (n) *n = b->q; 201 if (T) *T = b->T; 202 PetscFunctionReturn(0); 203 } 204 205 /*@C 206 MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix 207 208 Not Collective; the entire T is stored and returned independently on all processes 209 210 Input Parameter: 211 . A - the KAIJ matrix 212 213 Output Parameter: 214 + m - the number of rows in T 215 . n - the number of columns in T 216 - T - the T matrix, in form of a scalar array in column-major format 217 218 Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired) 219 220 Level: advanced 221 222 .seealso: MatCreateKAIJ(), MatGetBlockSizes() 223 @*/ 224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T) 225 { 226 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 227 PetscFunctionBegin; 228 if (m) *m = b->p; 229 if (n) *n = b->q; 230 if (T) *T = b->T; 231 PetscFunctionReturn(0); 232 } 233 234 /*@C 235 MatKAIJRestoreT - Restore array obtained with MatKAIJGetT() 236 237 Not collective 238 239 Input Parameter: 240 . A - the KAIJ matrix 241 242 Output Parameter: 243 . T - location of pointer to array obtained with MatKAIJGetS() 244 245 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 246 If NULL is passed, it will not attempt to zero the array pointer. 247 248 Level: advanced 249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 250 @*/ 251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T) 252 { 253 PetscErrorCode ierr; 254 255 PetscFunctionBegin; 256 if (T) *T = NULL; 257 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 258 PetscFunctionReturn(0); 259 } 260 261 /*@C 262 MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead() 263 264 Not collective 265 266 Input Parameter: 267 . A - the KAIJ matrix 268 269 Output Parameter: 270 . T - location of pointer to array obtained with MatKAIJGetS() 271 272 Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored. 273 If NULL is passed, it will not attempt to zero the array pointer. 274 275 Level: advanced 276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead() 277 @*/ 278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T) 279 { 280 PetscFunctionBegin; 281 if (T) *T = NULL; 282 PetscFunctionReturn(0); 283 } 284 285 /*@ 286 MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix 287 288 Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel 289 290 Input Parameters: 291 + A - the KAIJ matrix 292 - B - the AIJ matrix 293 294 Notes: 295 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 296 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 297 298 Level: advanced 299 300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT() 301 @*/ 302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B) 303 { 304 PetscErrorCode ierr; 305 PetscMPIInt size; 306 307 PetscFunctionBegin; 308 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 309 if (size == 1) { 310 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 311 a->AIJ = B; 312 } else { 313 Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data; 314 a->A = B; 315 } 316 ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr); 317 PetscFunctionReturn(0); 318 } 319 320 /*@C 321 MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix 322 323 Logically Collective; the entire S is stored independently on all processes. 324 325 Input Parameters: 326 + A - the KAIJ matrix 327 . p - the number of rows in S 328 . q - the number of columns in S 329 - S - the S matrix, in form of a scalar array in column-major format 330 331 Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix. 332 The S matrix is copied, so the user can destroy this array. 333 334 Level: Advanced 335 336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ() 337 @*/ 338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[]) 339 { 340 PetscErrorCode ierr; 341 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 342 343 PetscFunctionBegin; 344 ierr = PetscFree(a->S);CHKERRQ(ierr); 345 if (S) { 346 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr); 347 ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 348 } else a->S = NULL; 349 350 a->p = p; 351 a->q = q; 352 PetscFunctionReturn(0); 353 } 354 355 /*@C 356 MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix 357 358 Logically Collective; the entire T is stored independently on all processes. 359 360 Input Parameters: 361 + A - the KAIJ matrix 362 . p - the number of rows in S 363 . q - the number of columns in S 364 - T - the T matrix, in form of a scalar array in column-major format 365 366 Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix. 367 The T matrix is copied, so the user can destroy this array. 368 369 Level: Advanced 370 371 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ() 372 @*/ 373 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[]) 374 { 375 PetscErrorCode ierr; 376 PetscInt i,j; 377 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 378 PetscBool isTI = PETSC_FALSE; 379 380 PetscFunctionBegin; 381 /* check if T is an identity matrix */ 382 if (T && (p == q)) { 383 isTI = PETSC_TRUE; 384 for (i=0; i<p; i++) { 385 for (j=0; j<q; j++) { 386 if (i == j) { 387 /* diagonal term must be 1 */ 388 if (T[i+j*p] != 1.0) isTI = PETSC_FALSE; 389 } else { 390 /* off-diagonal term must be 0 */ 391 if (T[i+j*p] != 0.0) isTI = PETSC_FALSE; 392 } 393 } 394 } 395 } 396 a->isTI = isTI; 397 398 ierr = PetscFree(a->T);CHKERRQ(ierr); 399 if (T && (!isTI)) { 400 ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr); 401 ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr); 402 } else a->T = NULL; 403 404 a->p = p; 405 a->q = q; 406 PetscFunctionReturn(0); 407 } 408 409 PetscErrorCode MatDestroy_SeqKAIJ(Mat A) 410 { 411 PetscErrorCode ierr; 412 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 413 414 PetscFunctionBegin; 415 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 416 ierr = PetscFree(b->S);CHKERRQ(ierr); 417 ierr = PetscFree(b->T);CHKERRQ(ierr); 418 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 419 ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr); 420 ierr = PetscFree(A->data);CHKERRQ(ierr); 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatSetUp_KAIJ(Mat A) 425 { 426 PetscErrorCode ierr; 427 PetscInt n; 428 PetscMPIInt size; 429 Mat_SeqKAIJ *seqkaij = (Mat_SeqKAIJ*)A->data; 430 431 PetscFunctionBegin; 432 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 433 if (size == 1) { 434 ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr); 435 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 436 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 437 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 438 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 439 } else { 440 Mat_MPIKAIJ *a; 441 Mat_MPIAIJ *mpiaij; 442 IS from,to; 443 Vec gvec; 444 PetscScalar *T; 445 PetscInt i,j; 446 447 a = (Mat_MPIKAIJ*)A->data; 448 mpiaij = (Mat_MPIAIJ*)a->A->data; 449 ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr); 450 ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr); 451 ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr); 452 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 453 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 454 455 if (a->isTI) { 456 /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL. 457 * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will 458 * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix 459 * to pass in. */ 460 ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr); 461 for (i=0; i<a->p; i++) { 462 for (j=0; j<a->q; j++) { 463 if (i==j) T[i+j*a->p] = 1.0; 464 else T[i+j*a->p] = 0.0; 465 } 466 } 467 } else T = a->T; 468 ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr); 469 ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr); 470 if (a->isTI) { 471 ierr = PetscFree(T);CHKERRQ(ierr); 472 } 473 474 ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr); 475 ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr); 476 ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr); 477 ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr); 478 ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr); 479 480 /* create two temporary Index sets for build scatter gather */ 481 ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr); 482 ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr); 483 484 /* create temporary global vector to generate scatter context */ 485 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr); 486 487 /* generate the scatter context */ 488 ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr); 489 490 ierr = ISDestroy(&from);CHKERRQ(ierr); 491 ierr = ISDestroy(&to);CHKERRQ(ierr); 492 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 493 } 494 495 A->assembled = PETSC_TRUE; 496 PetscFunctionReturn(0); 497 } 498 499 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer) 500 { 501 PetscViewerFormat format; 502 Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data; 503 Mat B; 504 PetscInt i; 505 PetscErrorCode ierr; 506 PetscBool ismpikaij; 507 508 PetscFunctionBegin; 509 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr); 510 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 511 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) { 512 ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr); 513 514 /* Print appropriate details for S. */ 515 if (!a->S) { 516 ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr); 517 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 518 ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr); 519 for (i=0; i<(a->p * a->q); i++) { 520 #if defined(PETSC_USE_COMPLEX) 521 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr); 522 #else 523 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr); 524 #endif 525 } 526 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 527 } 528 529 /* Print appropriate details for T. */ 530 if (a->isTI) { 531 ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr); 532 } else if (!a->T) { 533 ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr); 534 } else if (format == PETSC_VIEWER_ASCII_IMPL) { 535 ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr); 536 for (i=0; i<(a->p * a->q); i++) { 537 #if defined(PETSC_USE_COMPLEX) 538 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr); 539 #else 540 ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr); 541 #endif 542 } 543 ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr); 544 } 545 546 /* Now print details for the AIJ matrix, using the AIJ viewer. */ 547 ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr); 548 if (ismpikaij) { 549 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 550 ierr = MatView(b->A,viewer);CHKERRQ(ierr); 551 } else { 552 ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr); 553 } 554 555 } else { 556 /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */ 557 if (ismpikaij) { 558 ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 559 } else { 560 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 561 } 562 ierr = MatView(B,viewer);CHKERRQ(ierr); 563 ierr = MatDestroy(&B);CHKERRQ(ierr); 564 } 565 PetscFunctionReturn(0); 566 } 567 568 PetscErrorCode MatDestroy_MPIKAIJ(Mat A) 569 { 570 PetscErrorCode ierr; 571 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 572 573 PetscFunctionBegin; 574 ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr); 575 ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr); 576 ierr = MatDestroy(&b->A);CHKERRQ(ierr); 577 ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr); 578 ierr = VecDestroy(&b->w);CHKERRQ(ierr); 579 ierr = PetscFree(b->S);CHKERRQ(ierr); 580 ierr = PetscFree(b->T);CHKERRQ(ierr); 581 ierr = PetscFree(b->ibdiag);CHKERRQ(ierr); 582 ierr = PetscFree(A->data);CHKERRQ(ierr); 583 PetscFunctionReturn(0); 584 } 585 586 /* --------------------------------------------------------------------------------------*/ 587 588 /* zz = yy + Axx */ 589 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 590 { 591 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 592 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 593 const PetscScalar *s = b->S, *t = b->T; 594 const PetscScalar *x,*v,*bx; 595 PetscScalar *y,*sums; 596 PetscErrorCode ierr; 597 const PetscInt m = b->AIJ->rmap->n,*idx,*ii; 598 PetscInt n,i,jrow,j,l,p=b->p,q=b->q,k; 599 600 PetscFunctionBegin; 601 if (!yy) { 602 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 603 } else { 604 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 605 } 606 if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0); 607 608 ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr); 609 ierr = VecGetArray(zz,&y);CHKERRQ(ierr); 610 idx = a->j; 611 v = a->a; 612 ii = a->i; 613 614 if (b->isTI) { 615 for (i=0; i<m; i++) { 616 jrow = ii[i]; 617 n = ii[i+1] - jrow; 618 sums = y + p*i; 619 for (j=0; j<n; j++) { 620 for (k=0; k<p; k++) { 621 sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k]; 622 } 623 } 624 } 625 ierr = PetscLogFlops((a->nz)*3*p);CHKERRQ(ierr); 626 } else if (t) { 627 for (i=0; i<m; i++) { 628 jrow = ii[i]; 629 n = ii[i+1] - jrow; 630 sums = y + p*i; 631 for (j=0; j<n; j++) { 632 for (k=0; k<p; k++) { 633 for (l=0; l<q; l++) { 634 sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l]; 635 } 636 } 637 } 638 } 639 /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do), 640 * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T), 641 * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter 642 * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */ 643 ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr); 644 } 645 if (s) { 646 for (i=0; i<m; i++) { 647 sums = y + p*i; 648 bx = x + q*i; 649 if (i < b->AIJ->cmap->n) { 650 for (j=0; j<q; j++) { 651 for (k=0; k<p; k++) { 652 sums[k] += s[k+j*p]*bx[j]; 653 } 654 } 655 } 656 } 657 ierr = PetscLogFlops(m*2*p*q);CHKERRQ(ierr); 658 } 659 660 ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr); 661 ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr); 662 PetscFunctionReturn(0); 663 } 664 665 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy) 666 { 667 PetscErrorCode ierr; 668 PetscFunctionBegin; 669 ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 670 PetscFunctionReturn(0); 671 } 672 673 #include <petsc/private/kernels/blockinvert.h> 674 675 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values) 676 { 677 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data; 678 Mat_SeqAIJ *a = (Mat_SeqAIJ*)b->AIJ->data; 679 const PetscScalar *S = b->S; 680 const PetscScalar *T = b->T; 681 const PetscScalar *v = a->a; 682 const PetscInt p = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i; 683 PetscErrorCode ierr; 684 PetscInt i,j,*v_pivots,dof,dof2; 685 PetscScalar *diag,aval,*v_work; 686 687 PetscFunctionBegin; 688 if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse."); 689 if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix."); 690 691 dof = p; 692 dof2 = dof*dof; 693 694 if (b->ibdiagvalid) { 695 if (values) *values = b->ibdiag; 696 PetscFunctionReturn(0); 697 } 698 if (!b->ibdiag) { 699 ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr); 700 ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr); 701 } 702 if (values) *values = b->ibdiag; 703 diag = b->ibdiag; 704 705 ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr); 706 for (i=0; i<m; i++) { 707 if (S) { 708 ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 709 } else { 710 ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr); 711 } 712 if (b->isTI) { 713 aval = 0; 714 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 715 for (j=0; j<dof; j++) diag[j+dof*j] += aval; 716 } else if (T) { 717 aval = 0; 718 for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j]; 719 for (j=0; j<dof2; j++) diag[j] += aval*T[j]; 720 } 721 ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr); 722 diag += dof2; 723 } 724 ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr); 725 726 b->ibdiagvalid = PETSC_TRUE; 727 PetscFunctionReturn(0); 728 } 729 730 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B) 731 { 732 Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data; 733 734 PetscFunctionBegin; 735 *B = kaij->AIJ; 736 PetscFunctionReturn(0); 737 } 738 739 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx) 740 { 741 PetscErrorCode ierr; 742 Mat_SeqKAIJ *kaij = (Mat_SeqKAIJ*) A->data; 743 Mat_SeqAIJ *a = (Mat_SeqAIJ*)kaij->AIJ->data; 744 const PetscScalar *aa = a->a, *T = kaij->T, *v; 745 const PetscInt m = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi; 746 const PetscScalar *b, *xb, *idiag; 747 PetscScalar *x, *work, *workt, *w, *y, *arr, *t, *arrt; 748 PetscInt i, j, k, i2, bs, bs2, nz; 749 750 PetscFunctionBegin; 751 its = its*lits; 752 if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat"); 753 if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits); 754 if (fshift) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift"); 755 if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts"); 756 if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks"); 757 else {bs = p; bs2 = bs*bs; } 758 759 if (!m) PetscFunctionReturn(0); 760 761 if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); } 762 idiag = kaij->ibdiag; 763 diag = a->diag; 764 765 if (!kaij->sor.setup) { 766 ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr); 767 kaij->sor.setup = PETSC_TRUE; 768 } 769 y = kaij->sor.y; 770 w = kaij->sor.w; 771 work = kaij->sor.work; 772 t = kaij->sor.t; 773 arr = kaij->sor.arr; 774 775 ierr = VecGetArray(xx,&x); CHKERRQ(ierr); 776 ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr); 777 778 if (flag & SOR_ZERO_INITIAL_GUESS) { 779 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 780 PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x); /* x[0:bs] <- D^{-1} b[0:bs] */ 781 ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr); 782 i2 = bs; 783 idiag += bs2; 784 for (i=1; i<m; i++) { 785 v = aa + ai[i]; 786 vi = aj + ai[i]; 787 nz = diag[i] - ai[i]; 788 789 if (T) { /* b - T (Arow * x) */ 790 ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 791 for (j=0; j<nz; j++) { 792 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 793 } 794 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]); 795 for (k=0; k<bs; k++) t[i2+k] += b[i2+k]; 796 } else if (kaij->isTI) { 797 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 798 for (j=0; j<nz; j++) { 799 for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k]; 800 } 801 } else { 802 ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 803 } 804 805 PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y); 806 for (j=0; j<bs; j++) x[i2+j] = omega * y[j]; 807 808 idiag += bs2; 809 i2 += bs; 810 } 811 /* for logging purposes assume number of nonzero in lower half is 1/2 of total */ 812 ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr); 813 xb = t; 814 } else xb = b; 815 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 816 idiag = kaij->ibdiag+bs2*(m-1); 817 i2 = bs * (m-1); 818 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 819 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2); 820 i2 -= bs; 821 idiag -= bs2; 822 for (i=m-2; i>=0; i--) { 823 v = aa + diag[i] + 1 ; 824 vi = aj + diag[i] + 1; 825 nz = ai[i+1] - diag[i] - 1; 826 827 if (T) { /* FIXME: This branch untested */ 828 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 829 /* copy all rows of x that are needed into contiguous space */ 830 workt = work; 831 for (j=0; j<nz; j++) { 832 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 833 workt += bs; 834 } 835 arrt = arr; 836 for (j=0; j<nz; j++) { 837 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 838 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 839 arrt += bs2; 840 } 841 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 842 } else if (kaij->isTI) { 843 ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 844 for (j=0; j<nz; j++) { 845 for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k]; 846 } 847 } 848 849 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */ 850 for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j]; 851 852 idiag -= bs2; 853 i2 -= bs; 854 } 855 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 856 } 857 its--; 858 } 859 while (its--) { /* FIXME: This branch not updated */ 860 if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) { 861 i2 = 0; 862 idiag = kaij->ibdiag; 863 for (i=0; i<m; i++) { 864 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 865 866 v = aa + ai[i]; 867 vi = aj + ai[i]; 868 nz = diag[i] - ai[i]; 869 workt = work; 870 for (j=0; j<nz; j++) { 871 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 872 workt += bs; 873 } 874 arrt = arr; 875 if (T) { 876 for (j=0; j<nz; j++) { 877 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 878 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 879 arrt += bs2; 880 } 881 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 882 } else if (kaij->isTI) { 883 for (j=0; j<nz; j++) { 884 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 885 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 886 arrt += bs2; 887 } 888 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 889 } 890 ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr); 891 892 v = aa + diag[i] + 1; 893 vi = aj + diag[i] + 1; 894 nz = ai[i+1] - diag[i] - 1; 895 workt = work; 896 for (j=0; j<nz; j++) { 897 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 898 workt += bs; 899 } 900 arrt = arr; 901 if (T) { 902 for (j=0; j<nz; j++) { 903 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 904 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 905 arrt += bs2; 906 } 907 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 908 } else if (kaij->isTI) { 909 for (j=0; j<nz; j++) { 910 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 911 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 912 arrt += bs2; 913 } 914 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 915 } 916 917 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 918 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 919 920 idiag += bs2; 921 i2 += bs; 922 } 923 xb = t; 924 } 925 else xb = b; 926 if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) { 927 idiag = kaij->ibdiag+bs2*(m-1); 928 i2 = bs * (m-1); 929 if (xb == b) { 930 for (i=m-1; i>=0; i--) { 931 ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 932 933 v = aa + ai[i]; 934 vi = aj + ai[i]; 935 nz = diag[i] - ai[i]; 936 workt = work; 937 for (j=0; j<nz; j++) { 938 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 939 workt += bs; 940 } 941 arrt = arr; 942 if (T) { 943 for (j=0; j<nz; j++) { 944 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 945 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 946 arrt += bs2; 947 } 948 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 949 } else if (kaij->isTI) { 950 for (j=0; j<nz; j++) { 951 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 952 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 953 arrt += bs2; 954 } 955 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 956 } 957 958 v = aa + diag[i] + 1; 959 vi = aj + diag[i] + 1; 960 nz = ai[i+1] - diag[i] - 1; 961 workt = work; 962 for (j=0; j<nz; j++) { 963 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 964 workt += bs; 965 } 966 arrt = arr; 967 if (T) { 968 for (j=0; j<nz; j++) { 969 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 970 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 971 arrt += bs2; 972 } 973 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 974 } else if (kaij->isTI) { 975 for (j=0; j<nz; j++) { 976 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 977 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 978 arrt += bs2; 979 } 980 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 981 } 982 983 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 984 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 985 } 986 } else { 987 for (i=m-1; i>=0; i--) { 988 ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr); 989 v = aa + diag[i] + 1; 990 vi = aj + diag[i] + 1; 991 nz = ai[i+1] - diag[i] - 1; 992 workt = work; 993 for (j=0; j<nz; j++) { 994 ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr); 995 workt += bs; 996 } 997 arrt = arr; 998 if (T) { 999 for (j=0; j<nz; j++) { 1000 ierr = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1001 for (k=0; k<bs2; k++) arrt[k] *= v[j]; 1002 arrt += bs2; 1003 } 1004 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1005 } else if (kaij->isTI) { 1006 for (j=0; j<nz; j++) { 1007 ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr); 1008 for (k=0; k<bs; k++) arrt[k+bs*k] = v[j]; 1009 arrt += bs2; 1010 } 1011 PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work); 1012 } 1013 PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); 1014 for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j); 1015 } 1016 } 1017 ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr); 1018 } 1019 } 1020 1021 ierr = VecRestoreArray(xx,&x); CHKERRQ(ierr); 1022 ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr); 1023 PetscFunctionReturn(0); 1024 } 1025 1026 /*===================================================================================*/ 1027 1028 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz) 1029 { 1030 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1031 PetscErrorCode ierr; 1032 1033 PetscFunctionBegin; 1034 if (!yy) { 1035 ierr = VecSet(zz,0.0);CHKERRQ(ierr); 1036 } else { 1037 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 1038 } 1039 /* start the scatter */ 1040 ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1041 ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr); 1042 ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1043 ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr); 1044 PetscFunctionReturn(0); 1045 } 1046 1047 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy) 1048 { 1049 PetscErrorCode ierr; 1050 PetscFunctionBegin; 1051 ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr); 1052 PetscFunctionReturn(0); 1053 } 1054 1055 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values) 1056 { 1057 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data; 1058 PetscErrorCode ierr; 1059 1060 PetscFunctionBegin; 1061 ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr); 1062 PetscFunctionReturn(0); 1063 } 1064 1065 /* ----------------------------------------------------------------*/ 1066 1067 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1068 { 1069 Mat_SeqKAIJ *b = (Mat_SeqKAIJ*) A->data; 1070 PetscErrorCode diag = PETSC_FALSE; 1071 PetscErrorCode ierr; 1072 PetscInt nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c; 1073 PetscScalar *vaij,*v,*S=b->S,*T=b->T; 1074 1075 PetscFunctionBegin; 1076 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1077 b->getrowactive = PETSC_TRUE; 1078 if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row); 1079 1080 if ((!S) && (!T) && (!b->isTI)) { 1081 if (ncols) *ncols = 0; 1082 if (cols) *cols = NULL; 1083 if (values) *values = NULL; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 if (T || b->isTI) { 1088 ierr = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr); 1089 c = nzaij; 1090 for (i=0; i<nzaij; i++) { 1091 /* check if this row contains a diagonal entry */ 1092 if (colsaij[i] == r) { 1093 diag = PETSC_TRUE; 1094 c = i; 1095 } 1096 } 1097 } else nzaij = c = 0; 1098 1099 /* calculate size of row */ 1100 nz = 0; 1101 if (S) nz += q; 1102 if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q); 1103 1104 if (cols || values) { 1105 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1106 for (i=0; i<q; i++) { 1107 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1108 v[i] = 0.0; 1109 } 1110 if (b->isTI) { 1111 for (i=0; i<nzaij; i++) { 1112 for (j=0; j<q; j++) { 1113 idx[i*q+j] = colsaij[i]*q+j; 1114 v[i*q+j] = (j==s ? vaij[i] : 0); 1115 } 1116 } 1117 } else if (T) { 1118 for (i=0; i<nzaij; i++) { 1119 for (j=0; j<q; j++) { 1120 idx[i*q+j] = colsaij[i]*q+j; 1121 v[i*q+j] = vaij[i]*T[s+j*p]; 1122 } 1123 } 1124 } 1125 if (S) { 1126 for (j=0; j<q; j++) { 1127 idx[c*q+j] = r*q+j; 1128 v[c*q+j] += S[s+j*p]; 1129 } 1130 } 1131 } 1132 1133 if (ncols) *ncols = nz; 1134 if (cols) *cols = idx; 1135 if (values) *values = v; 1136 PetscFunctionReturn(0); 1137 } 1138 1139 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1140 { 1141 PetscErrorCode ierr; 1142 PetscFunctionBegin; 1143 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1144 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1145 PetscFunctionReturn(0); 1146 } 1147 1148 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values) 1149 { 1150 Mat_MPIKAIJ *b = (Mat_MPIKAIJ*) A->data; 1151 Mat MatAIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ; 1152 Mat MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ; 1153 Mat AIJ = b->A; 1154 PetscBool diag = PETSC_FALSE; 1155 PetscErrorCode ierr; 1156 const PetscInt rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray; 1157 PetscInt nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow; 1158 PetscScalar *v,*vals,*ovals,*S=b->S,*T=b->T; 1159 1160 PetscFunctionBegin; 1161 if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active"); 1162 b->getrowactive = PETSC_TRUE; 1163 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows"); 1164 lrow = row - rstart; 1165 1166 if ((!S) && (!T) && (!b->isTI)) { 1167 if (ncols) *ncols = 0; 1168 if (cols) *cols = NULL; 1169 if (values) *values = NULL; 1170 PetscFunctionReturn(0); 1171 } 1172 1173 r = lrow/p; 1174 s = lrow%p; 1175 1176 if (T || b->isTI) { 1177 ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr); 1178 ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr); 1179 ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr); 1180 1181 c = ncolsaij + ncolsoaij; 1182 for (i=0; i<ncolsaij; i++) { 1183 /* check if this row contains a diagonal entry */ 1184 if (colsaij[i] == r) { 1185 diag = PETSC_TRUE; 1186 c = i; 1187 } 1188 } 1189 } else c = 0; 1190 1191 /* calculate size of row */ 1192 nz = 0; 1193 if (S) nz += q; 1194 if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q); 1195 1196 if (cols || values) { 1197 ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr); 1198 for (i=0; i<q; i++) { 1199 /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */ 1200 v[i] = 0.0; 1201 } 1202 if (b->isTI) { 1203 for (i=0; i<ncolsaij; i++) { 1204 for (j=0; j<q; j++) { 1205 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1206 v[i*q+j] = (j==s ? vals[i] : 0.0); 1207 } 1208 } 1209 for (i=0; i<ncolsoaij; i++) { 1210 for (j=0; j<q; j++) { 1211 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1212 v[(i+ncolsaij)*q+j] = (j==s ? ovals[i]: 0.0); 1213 } 1214 } 1215 } else if (T) { 1216 for (i=0; i<ncolsaij; i++) { 1217 for (j=0; j<q; j++) { 1218 idx[i*q+j] = (colsaij[i]+rstart/p)*q+j; 1219 v[i*q+j] = vals[i]*T[s+j*p]; 1220 } 1221 } 1222 for (i=0; i<ncolsoaij; i++) { 1223 for (j=0; j<q; j++) { 1224 idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j; 1225 v[(i+ncolsaij)*q+j] = ovals[i]*T[s+j*p]; 1226 } 1227 } 1228 } 1229 if (S) { 1230 for (j=0; j<q; j++) { 1231 idx[c*q+j] = (r+rstart/p)*q+j; 1232 v[c*q+j] += S[s+j*p]; 1233 } 1234 } 1235 } 1236 1237 if (ncols) *ncols = nz; 1238 if (cols) *cols = idx; 1239 if (values) *values = v; 1240 PetscFunctionReturn(0); 1241 } 1242 1243 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 1244 { 1245 PetscErrorCode ierr; 1246 PetscFunctionBegin; 1247 ierr = PetscFree2(*idx,*v);CHKERRQ(ierr); 1248 ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE; 1249 PetscFunctionReturn(0); 1250 } 1251 1252 PetscErrorCode MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat) 1253 { 1254 PetscErrorCode ierr; 1255 Mat A; 1256 1257 PetscFunctionBegin; 1258 ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr); 1259 ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr); 1260 ierr = MatDestroy(&A);CHKERRQ(ierr); 1261 PetscFunctionReturn(0); 1262 } 1263 1264 /* ---------------------------------------------------------------------------------- */ 1265 /*@C 1266 MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form: 1267 1268 [I \otimes S + A \otimes T] 1269 1270 where 1271 S is a dense (p \times q) matrix 1272 T is a dense (p \times q) matrix 1273 A is an AIJ (n \times n) matrix 1274 I is the identity matrix 1275 The resulting matrix is (np \times nq) 1276 1277 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1278 1279 Collective 1280 1281 Input Parameters: 1282 + A - the AIJ matrix 1283 . p - number of rows in S and T 1284 . q - number of columns in S and T 1285 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1286 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major) 1287 1288 Output Parameter: 1289 . kaij - the new KAIJ matrix 1290 1291 Notes: 1292 This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed. 1293 Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix. 1294 1295 Level: advanced 1296 1297 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ 1298 @*/ 1299 PetscErrorCode MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij) 1300 { 1301 PetscErrorCode ierr; 1302 PetscMPIInt size; 1303 1304 PetscFunctionBegin; 1305 ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr); 1306 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1307 if (size == 1) { 1308 ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr); 1309 } else { 1310 ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr); 1311 } 1312 ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr); 1313 ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr); 1314 ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr); 1315 ierr = MatSetUp(*kaij);CHKERRQ(ierr); 1316 PetscFunctionReturn(0); 1317 } 1318 1319 /*MC 1320 MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form 1321 [I \otimes S + A \otimes T], 1322 where 1323 S is a dense (p \times q) matrix, 1324 T is a dense (p \times q) matrix, 1325 A is an AIJ (n \times n) matrix, 1326 and I is the identity matrix. 1327 The resulting matrix is (np \times nq). 1328 1329 S and T are always stored independently on all processes as PetscScalar arrays in column-major format. 1330 1331 Notes: 1332 A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b, 1333 where x and b are column vectors containing the row-major representations of X and B. 1334 1335 Level: advanced 1336 1337 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ() 1338 M*/ 1339 1340 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A) 1341 { 1342 PetscErrorCode ierr; 1343 Mat_MPIKAIJ *b; 1344 PetscMPIInt size; 1345 1346 PetscFunctionBegin; 1347 ierr = PetscNewLog(A,&b);CHKERRQ(ierr); 1348 A->data = (void*)b; 1349 1350 ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr); 1351 1352 A->ops->setup = MatSetUp_KAIJ; 1353 1354 b->w = 0; 1355 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 1356 if (size == 1) { 1357 ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr); 1358 A->ops->setup = MatSetUp_KAIJ; 1359 A->ops->destroy = MatDestroy_SeqKAIJ; 1360 A->ops->view = MatView_KAIJ; 1361 A->ops->mult = MatMult_SeqKAIJ; 1362 A->ops->multadd = MatMultAdd_SeqKAIJ; 1363 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ; 1364 A->ops->getrow = MatGetRow_SeqKAIJ; 1365 A->ops->restorerow = MatRestoreRow_SeqKAIJ; 1366 A->ops->sor = MatSOR_SeqKAIJ; 1367 } else { 1368 ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr); 1369 A->ops->setup = MatSetUp_KAIJ; 1370 A->ops->destroy = MatDestroy_MPIKAIJ; 1371 A->ops->view = MatView_KAIJ; 1372 A->ops->mult = MatMult_MPIKAIJ; 1373 A->ops->multadd = MatMultAdd_MPIKAIJ; 1374 A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ; 1375 A->ops->getrow = MatGetRow_MPIKAIJ; 1376 A->ops->restorerow = MatRestoreRow_MPIKAIJ; 1377 ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr); 1378 } 1379 A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ; 1380 PetscFunctionReturn(0); 1381 } 1382