1 2 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 3 4 static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T) 5 { 6 Mat A,F; 7 PetscErrorCode (*f)(Mat,Mat*); 8 9 PetscFunctionBegin; 10 CHKERRQ(PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f)); 11 if (f) { 12 CHKERRQ(MatTransposeGetMat(T,&A)); 13 if (T == X) { 14 CHKERRQ(PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n")); 15 CHKERRQ(MatTranspose(A,MAT_INITIAL_MATRIX,&F)); 16 A = Y; 17 } else { 18 CHKERRQ(PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n")); 19 CHKERRQ(MatTranspose(X,MAT_INITIAL_MATRIX,&F)); 20 } 21 } else { 22 CHKERRQ(MatHermitianTransposeGetMat(T,&A)); 23 if (T == X) { 24 CHKERRQ(PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n")); 25 CHKERRQ(MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F)); 26 A = Y; 27 } else { 28 CHKERRQ(PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n")); 29 CHKERRQ(MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F)); 30 } 31 } 32 CHKERRQ(MatAXPY(A,a,F,str)); 33 CHKERRQ(MatDestroy(&F)); 34 PetscFunctionReturn(0); 35 } 36 37 /*@ 38 MatAXPY - Computes Y = a*X + Y. 39 40 Logically Collective on Mat 41 42 Input Parameters: 43 + a - the scalar multiplier 44 . X - the first matrix 45 . Y - the second matrix 46 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's) 47 48 Notes: No operation is performed when a is zero. 49 50 Level: intermediate 51 52 .seealso: MatAYPX() 53 @*/ 54 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str) 55 { 56 PetscInt M1,M2,N1,N2; 57 PetscInt m1,m2,n1,n2; 58 MatType t1,t2; 59 PetscBool sametype,transpose; 60 61 PetscFunctionBegin; 62 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 63 PetscValidLogicalCollectiveScalar(Y,a,2); 64 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 65 PetscCheckSameComm(Y,1,X,3); 66 CHKERRQ(MatGetSize(X,&M1,&N1)); 67 CHKERRQ(MatGetSize(Y,&M2,&N2)); 68 CHKERRQ(MatGetLocalSize(X,&m1,&n1)); 69 CHKERRQ(MatGetLocalSize(Y,&m2,&n2)); 70 PetscCheck(M1 == M2 && N1 == N2,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_SIZ,"Non conforming matrix add: global sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT,M1,N1,M2,N2); 71 PetscCheck(m1 == m2 && n1 == n2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: local sizes X %" PetscInt_FMT " x %" PetscInt_FMT ", Y %" PetscInt_FMT " x %" PetscInt_FMT,m1,n1,m2,n2); 72 PetscCheck(Y->assembled,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (Y)"); 73 PetscCheck(X->assembled,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix (X)"); 74 if (a == (PetscScalar)0.0) PetscFunctionReturn(0); 75 if (Y == X) { 76 CHKERRQ(MatScale(Y,1.0 + a)); 77 PetscFunctionReturn(0); 78 } 79 CHKERRQ(MatGetType(X,&t1)); 80 CHKERRQ(MatGetType(Y,&t2)); 81 CHKERRQ(PetscStrcmp(t1,t2,&sametype)); 82 CHKERRQ(PetscLogEventBegin(MAT_AXPY,Y,0,0,0)); 83 if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) { 84 CHKERRQ((*Y->ops->axpy)(Y,a,X,str)); 85 } else { 86 CHKERRQ(PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose)); 87 if (transpose) { 88 CHKERRQ(MatTransposeAXPY_Private(Y,a,X,str,X)); 89 } else { 90 CHKERRQ(PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose)); 91 if (transpose) { 92 CHKERRQ(MatTransposeAXPY_Private(Y,a,X,str,Y)); 93 } else { 94 CHKERRQ(MatAXPY_Basic(Y,a,X,str)); 95 } 96 } 97 } 98 CHKERRQ(PetscLogEventEnd(MAT_AXPY,Y,0,0,0)); 99 PetscFunctionReturn(0); 100 } 101 102 PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B) 103 { 104 PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL; 105 106 PetscFunctionBegin; 107 /* look for any available faster alternative to the general preallocator */ 108 CHKERRQ(PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall)); 109 if (preall) { 110 CHKERRQ((*preall)(Y,X,B)); 111 } else { /* Use MatPrellocator, assumes same row-col distribution */ 112 Mat preallocator; 113 PetscInt r,rstart,rend; 114 PetscInt m,n,M,N; 115 116 CHKERRQ(MatGetRowUpperTriangular(Y)); 117 CHKERRQ(MatGetRowUpperTriangular(X)); 118 CHKERRQ(MatGetSize(Y,&M,&N)); 119 CHKERRQ(MatGetLocalSize(Y,&m,&n)); 120 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)Y),&preallocator)); 121 CHKERRQ(MatSetType(preallocator,MATPREALLOCATOR)); 122 CHKERRQ(MatSetLayouts(preallocator,Y->rmap,Y->cmap)); 123 CHKERRQ(MatSetUp(preallocator)); 124 CHKERRQ(MatGetOwnershipRange(preallocator,&rstart,&rend)); 125 for (r = rstart; r < rend; ++r) { 126 PetscInt ncols; 127 const PetscInt *row; 128 const PetscScalar *vals; 129 130 CHKERRQ(MatGetRow(Y,r,&ncols,&row,&vals)); 131 CHKERRQ(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES)); 132 CHKERRQ(MatRestoreRow(Y,r,&ncols,&row,&vals)); 133 CHKERRQ(MatGetRow(X,r,&ncols,&row,&vals)); 134 CHKERRQ(MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES)); 135 CHKERRQ(MatRestoreRow(X,r,&ncols,&row,&vals)); 136 } 137 CHKERRQ(MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 138 CHKERRQ(MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY)); 139 CHKERRQ(MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY)); 140 CHKERRQ(MatRestoreRowUpperTriangular(Y)); 141 CHKERRQ(MatRestoreRowUpperTriangular(X)); 142 143 CHKERRQ(MatCreate(PetscObjectComm((PetscObject)Y),B)); 144 CHKERRQ(MatSetType(*B,((PetscObject)Y)->type_name)); 145 CHKERRQ(MatSetLayouts(*B,Y->rmap,Y->cmap)); 146 CHKERRQ(MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B)); 147 CHKERRQ(MatDestroy(&preallocator)); 148 } 149 PetscFunctionReturn(0); 150 } 151 152 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str) 153 { 154 PetscBool isshell,isdense,isnest; 155 156 PetscFunctionBegin; 157 CHKERRQ(MatIsShell(Y,&isshell)); 158 if (isshell) { /* MatShell has special support for AXPY */ 159 PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure); 160 161 CHKERRQ(MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f)); 162 if (f) { 163 CHKERRQ((*f)(Y,a,X,str)); 164 PetscFunctionReturn(0); 165 } 166 } 167 /* no need to preallocate if Y is dense */ 168 CHKERRQ(PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,"")); 169 if (isdense) { 170 CHKERRQ(PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest)); 171 if (isnest) { 172 CHKERRQ(MatAXPY_Dense_Nest(Y,a,X)); 173 PetscFunctionReturn(0); 174 } 175 if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN; 176 } 177 if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) { 178 PetscInt i,start,end,j,ncols,m,n; 179 const PetscInt *row; 180 PetscScalar *val; 181 const PetscScalar *vals; 182 183 CHKERRQ(MatGetSize(X,&m,&n)); 184 CHKERRQ(MatGetOwnershipRange(X,&start,&end)); 185 CHKERRQ(MatGetRowUpperTriangular(X)); 186 if (a == 1.0) { 187 for (i = start; i < end; i++) { 188 CHKERRQ(MatGetRow(X,i,&ncols,&row,&vals)); 189 CHKERRQ(MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES)); 190 CHKERRQ(MatRestoreRow(X,i,&ncols,&row,&vals)); 191 } 192 } else { 193 PetscInt vs = 100; 194 /* realloc if needed, as this function may be used in parallel */ 195 CHKERRQ(PetscMalloc1(vs,&val)); 196 for (i=start; i<end; i++) { 197 CHKERRQ(MatGetRow(X,i,&ncols,&row,&vals)); 198 if (vs < ncols) { 199 vs = PetscMin(2*ncols,n); 200 CHKERRQ(PetscRealloc(vs*sizeof(*val),&val)); 201 } 202 for (j=0; j<ncols; j++) val[j] = a*vals[j]; 203 CHKERRQ(MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES)); 204 CHKERRQ(MatRestoreRow(X,i,&ncols,&row,&vals)); 205 } 206 CHKERRQ(PetscFree(val)); 207 } 208 CHKERRQ(MatRestoreRowUpperTriangular(X)); 209 CHKERRQ(MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY)); 210 CHKERRQ(MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY)); 211 } else { 212 Mat B; 213 214 CHKERRQ(MatAXPY_Basic_Preallocate(Y,X,&B)); 215 CHKERRQ(MatAXPY_BasicWithPreallocation(B,Y,a,X,str)); 216 CHKERRQ(MatHeaderMerge(Y,&B)); 217 } 218 PetscFunctionReturn(0); 219 } 220 221 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str) 222 { 223 PetscInt i,start,end,j,ncols,m,n; 224 const PetscInt *row; 225 PetscScalar *val; 226 const PetscScalar *vals; 227 228 PetscFunctionBegin; 229 CHKERRQ(MatGetSize(X,&m,&n)); 230 CHKERRQ(MatGetOwnershipRange(X,&start,&end)); 231 CHKERRQ(MatGetRowUpperTriangular(Y)); 232 CHKERRQ(MatGetRowUpperTriangular(X)); 233 if (a == 1.0) { 234 for (i = start; i < end; i++) { 235 CHKERRQ(MatGetRow(Y,i,&ncols,&row,&vals)); 236 CHKERRQ(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES)); 237 CHKERRQ(MatRestoreRow(Y,i,&ncols,&row,&vals)); 238 239 CHKERRQ(MatGetRow(X,i,&ncols,&row,&vals)); 240 CHKERRQ(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES)); 241 CHKERRQ(MatRestoreRow(X,i,&ncols,&row,&vals)); 242 } 243 } else { 244 PetscInt vs = 100; 245 /* realloc if needed, as this function may be used in parallel */ 246 CHKERRQ(PetscMalloc1(vs,&val)); 247 for (i=start; i<end; i++) { 248 CHKERRQ(MatGetRow(Y,i,&ncols,&row,&vals)); 249 CHKERRQ(MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES)); 250 CHKERRQ(MatRestoreRow(Y,i,&ncols,&row,&vals)); 251 252 CHKERRQ(MatGetRow(X,i,&ncols,&row,&vals)); 253 if (vs < ncols) { 254 vs = PetscMin(2*ncols,n); 255 CHKERRQ(PetscRealloc(vs*sizeof(*val),&val)); 256 } 257 for (j=0; j<ncols; j++) val[j] = a*vals[j]; 258 CHKERRQ(MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES)); 259 CHKERRQ(MatRestoreRow(X,i,&ncols,&row,&vals)); 260 } 261 CHKERRQ(PetscFree(val)); 262 } 263 CHKERRQ(MatRestoreRowUpperTriangular(Y)); 264 CHKERRQ(MatRestoreRowUpperTriangular(X)); 265 CHKERRQ(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 266 CHKERRQ(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 267 PetscFunctionReturn(0); 268 } 269 270 /*@ 271 MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix. 272 273 Neighbor-wise Collective on Mat 274 275 Input Parameters: 276 + Y - the matrices 277 - a - the PetscScalar 278 279 Level: intermediate 280 281 Notes: 282 If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 283 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 284 entry). No operation is performed when a is zero. 285 286 To form Y = Y + diag(V) use MatDiagonalSet() 287 288 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale() 289 @*/ 290 PetscErrorCode MatShift(Mat Y,PetscScalar a) 291 { 292 PetscFunctionBegin; 293 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 294 PetscCheck(Y->assembled,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 295 PetscCheck(!Y->factortype,PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 296 MatCheckPreallocated(Y,1); 297 if (a == 0.0) PetscFunctionReturn(0); 298 299 if (Y->ops->shift) CHKERRQ((*Y->ops->shift)(Y,a)); 300 else CHKERRQ(MatShift_Basic(Y,a)); 301 302 CHKERRQ(PetscObjectStateIncrease((PetscObject)Y)); 303 PetscFunctionReturn(0); 304 } 305 306 PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is) 307 { 308 PetscInt i,start,end; 309 const PetscScalar *v; 310 311 PetscFunctionBegin; 312 CHKERRQ(MatGetOwnershipRange(Y,&start,&end)); 313 CHKERRQ(VecGetArrayRead(D,&v)); 314 for (i=start; i<end; i++) { 315 CHKERRQ(MatSetValues(Y,1,&i,1,&i,v+i-start,is)); 316 } 317 CHKERRQ(VecRestoreArrayRead(D,&v)); 318 CHKERRQ(MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY)); 319 CHKERRQ(MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY)); 320 PetscFunctionReturn(0); 321 } 322 323 /*@ 324 MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix 325 that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is 326 INSERT_VALUES. 327 328 Neighbor-wise Collective on Mat 329 330 Input Parameters: 331 + Y - the input matrix 332 . D - the diagonal matrix, represented as a vector 333 - i - INSERT_VALUES or ADD_VALUES 334 335 Notes: 336 If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 337 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 338 entry). 339 340 Level: intermediate 341 342 .seealso: MatShift(), MatScale(), MatDiagonalScale() 343 @*/ 344 PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is) 345 { 346 PetscInt matlocal,veclocal; 347 348 PetscFunctionBegin; 349 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 350 PetscValidHeaderSpecific(D,VEC_CLASSID,2); 351 CHKERRQ(MatGetLocalSize(Y,&matlocal,NULL)); 352 CHKERRQ(VecGetLocalSize(D,&veclocal)); 353 PetscCheck(matlocal == veclocal,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %" PetscInt_FMT " does not match that of vector for diagonal %" PetscInt_FMT,matlocal,veclocal); 354 if (Y->ops->diagonalset) { 355 CHKERRQ((*Y->ops->diagonalset)(Y,D,is)); 356 } else { 357 CHKERRQ(MatDiagonalSet_Default(Y,D,is)); 358 } 359 CHKERRQ(PetscObjectStateIncrease((PetscObject)Y)); 360 PetscFunctionReturn(0); 361 } 362 363 /*@ 364 MatAYPX - Computes Y = a*Y + X. 365 366 Logically on Mat 367 368 Input Parameters: 369 + a - the PetscScalar multiplier 370 . Y - the first matrix 371 . X - the second matrix 372 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's) 373 374 Level: intermediate 375 376 .seealso: MatAXPY() 377 @*/ 378 PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str) 379 { 380 PetscFunctionBegin; 381 CHKERRQ(MatScale(Y,a)); 382 CHKERRQ(MatAXPY(Y,1.0,X,str)); 383 PetscFunctionReturn(0); 384 } 385 386 /*@ 387 MatComputeOperator - Computes the explicit matrix 388 389 Collective on Mat 390 391 Input Parameters: 392 + inmat - the matrix 393 - mattype - the matrix type for the explicit operator 394 395 Output Parameter: 396 . mat - the explicit operator 397 398 Notes: 399 This computation is done by applying the operators to columns of the identity matrix. 400 This routine is costly in general, and is recommended for use only with relatively small systems. 401 Currently, this routine uses a dense matrix format if mattype == NULL. 402 403 Level: advanced 404 405 @*/ 406 PetscErrorCode MatComputeOperator(Mat inmat,MatType mattype,Mat *mat) 407 { 408 PetscFunctionBegin; 409 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 410 PetscValidPointer(mat,3); 411 CHKERRQ(MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat)); 412 PetscFunctionReturn(0); 413 } 414 415 /*@ 416 MatComputeOperatorTranspose - Computes the explicit matrix representation of 417 a give matrix that can apply MatMultTranspose() 418 419 Collective on Mat 420 421 Input Parameters: 422 + inmat - the matrix 423 - mattype - the matrix type for the explicit operator 424 425 Output Parameter: 426 . mat - the explicit operator transposed 427 428 Notes: 429 This computation is done by applying the transpose of the operator to columns of the identity matrix. 430 This routine is costly in general, and is recommended for use only with relatively small systems. 431 Currently, this routine uses a dense matrix format if mattype == NULL. 432 433 Level: advanced 434 435 @*/ 436 PetscErrorCode MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat) 437 { 438 Mat A; 439 440 PetscFunctionBegin; 441 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 442 PetscValidPointer(mat,3); 443 CHKERRQ(MatCreateTranspose(inmat,&A)); 444 CHKERRQ(MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat)); 445 CHKERRQ(MatDestroy(&A)); 446 PetscFunctionReturn(0); 447 } 448 449 /*@ 450 MatChop - Set all values in the matrix less than the tolerance to zero 451 452 Input Parameters: 453 + A - The matrix 454 - tol - The zero tolerance 455 456 Output Parameters: 457 . A - The chopped matrix 458 459 Level: intermediate 460 461 .seealso: MatCreate(), MatZeroEntries() 462 @*/ 463 PetscErrorCode MatChop(Mat A, PetscReal tol) 464 { 465 Mat a; 466 PetscScalar *newVals; 467 PetscInt *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0; 468 PetscBool flg; 469 470 PetscFunctionBegin; 471 CHKERRQ(PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, "")); 472 if (flg) { 473 CHKERRQ(MatDenseGetLocalMatrix(A, &a)); 474 CHKERRQ(MatDenseGetLDA(a, &r)); 475 CHKERRQ(MatGetSize(a, &rStart, &rEnd)); 476 CHKERRQ(MatDenseGetArray(a, &newVals)); 477 for (; colMax < rEnd; ++colMax) { 478 for (maxRows = 0; maxRows < rStart; ++maxRows) { 479 newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r]; 480 } 481 } 482 CHKERRQ(MatDenseRestoreArray(a, &newVals)); 483 } else { 484 CHKERRQ(MatGetOwnershipRange(A, &rStart, &rEnd)); 485 CHKERRQ(MatGetRowUpperTriangular(A)); 486 for (r = rStart; r < rEnd; ++r) { 487 PetscInt ncols; 488 489 CHKERRQ(MatGetRow(A, r, &ncols, NULL, NULL)); 490 colMax = PetscMax(colMax, ncols); 491 CHKERRQ(MatRestoreRow(A, r, &ncols, NULL, NULL)); 492 } 493 numRows = rEnd - rStart; 494 CHKERRMPI(MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A))); 495 CHKERRQ(PetscMalloc2(colMax, &newCols, colMax, &newVals)); 496 CHKERRQ(MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg)); /* cache user-defined value */ 497 CHKERRQ(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 498 /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd() */ 499 /* that are potentially called many times depending on the distribution of A */ 500 for (r = rStart; r < rStart+maxRows; ++r) { 501 const PetscScalar *vals; 502 const PetscInt *cols; 503 PetscInt ncols, newcols, c; 504 505 if (r < rEnd) { 506 CHKERRQ(MatGetRow(A, r, &ncols, &cols, &vals)); 507 for (c = 0; c < ncols; ++c) { 508 newCols[c] = cols[c]; 509 newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c]; 510 } 511 newcols = ncols; 512 CHKERRQ(MatRestoreRow(A, r, &ncols, &cols, &vals)); 513 CHKERRQ(MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES)); 514 } 515 CHKERRQ(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 516 CHKERRQ(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 517 } 518 CHKERRQ(MatRestoreRowUpperTriangular(A)); 519 CHKERRQ(PetscFree2(newCols, newVals)); 520 CHKERRQ(MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg)); /* reset option to its user-defined value */ 521 } 522 PetscFunctionReturn(0); 523 } 524