1 2 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 3 4 /*@ 5 MatAXPY - Computes Y = a*X + Y. 6 7 Logically Collective on Mat 8 9 Input Parameters: 10 + a - the scalar multiplier 11 . X - the first matrix 12 . Y - the second matrix 13 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN 14 or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's) 15 16 Level: intermediate 17 18 .keywords: matrix, add 19 20 .seealso: MatAYPX() 21 @*/ 22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str) 23 { 24 PetscErrorCode ierr; 25 PetscInt m1,m2,n1,n2; 26 PetscBool sametype; 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 30 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 31 PetscValidLogicalCollectiveScalar(Y,a,2); 32 ierr = MatGetSize(X,&m1,&n1);CHKERRQ(ierr); 33 ierr = MatGetSize(Y,&m2,&n2);CHKERRQ(ierr); 34 if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2); 35 36 ierr = PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);CHKERRQ(ierr); 37 ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 38 if (Y->ops->axpy && sametype) { 39 ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr); 40 } else { 41 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 42 } 43 ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 44 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA) 45 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 46 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 47 } 48 #endif 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str) 53 { 54 PetscInt i,start,end,j,ncols,m,n; 55 PetscErrorCode ierr; 56 const PetscInt *row; 57 PetscScalar *val; 58 const PetscScalar *vals; 59 60 PetscFunctionBegin; 61 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 62 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 63 if (a == 1.0) { 64 for (i = start; i < end; i++) { 65 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 66 ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 67 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 68 } 69 } else { 70 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 71 for (i=start; i<end; i++) { 72 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 73 for (j=0; j<ncols; j++) { 74 val[j] = a*vals[j]; 75 } 76 ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 77 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 78 } 79 ierr = PetscFree(val);CHKERRQ(ierr); 80 } 81 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 82 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str) 87 { 88 PetscInt i,start,end,j,ncols,m,n; 89 PetscErrorCode ierr; 90 const PetscInt *row; 91 PetscScalar *val; 92 const PetscScalar *vals; 93 94 PetscFunctionBegin; 95 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 96 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 97 if (a == 1.0) { 98 for (i = start; i < end; i++) { 99 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 100 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 101 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 102 103 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 104 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 105 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 106 } 107 } else { 108 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 109 for (i=start; i<end; i++) { 110 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 111 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 112 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 113 114 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 115 for (j=0; j<ncols; j++) { 116 val[j] = a*vals[j]; 117 } 118 ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 119 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 120 } 121 ierr = PetscFree(val);CHKERRQ(ierr); 122 } 123 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 124 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 /*@ 129 MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix. 130 131 Neighbor-wise Collective on Mat 132 133 Input Parameters: 134 + Y - the matrices 135 - a - the PetscScalar 136 137 Level: intermediate 138 139 Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 140 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 141 entry). 142 143 To form Y = Y + diag(V) use MatDiagonalSet() 144 145 Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is 146 preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix. 147 148 .keywords: matrix, add, shift 149 150 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale() 151 @*/ 152 PetscErrorCode MatShift(Mat Y,PetscScalar a) 153 { 154 PetscErrorCode ierr; 155 156 PetscFunctionBegin; 157 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 158 if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 159 if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 160 MatCheckPreallocated(Y,1); 161 162 if (Y->ops->shift) { 163 ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr); 164 } else { 165 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 166 } 167 168 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA) 169 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 170 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 171 } 172 #endif 173 PetscFunctionReturn(0); 174 } 175 176 PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is) 177 { 178 PetscErrorCode ierr; 179 PetscInt i,start,end; 180 PetscScalar *v; 181 182 PetscFunctionBegin; 183 ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr); 184 ierr = VecGetArray(D,&v);CHKERRQ(ierr); 185 for (i=start; i<end; i++) { 186 ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr); 187 } 188 ierr = VecRestoreArray(D,&v);CHKERRQ(ierr); 189 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 190 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 191 PetscFunctionReturn(0); 192 } 193 194 /*@ 195 MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix 196 that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is 197 INSERT_VALUES. 198 199 Input Parameters: 200 + Y - the input matrix 201 . D - the diagonal matrix, represented as a vector 202 - i - INSERT_VALUES or ADD_VALUES 203 204 Neighbor-wise Collective on Mat and Vec 205 206 Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 207 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 208 entry). 209 210 Level: intermediate 211 212 .keywords: matrix, add, shift, diagonal 213 214 .seealso: MatShift(), MatScale(), MatDiagonalScale() 215 @*/ 216 PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is) 217 { 218 PetscErrorCode ierr; 219 PetscInt matlocal,veclocal; 220 221 PetscFunctionBegin; 222 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 223 PetscValidHeaderSpecific(D,VEC_CLASSID,2); 224 ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr); 225 ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr); 226 if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal); 227 if (Y->ops->diagonalset) { 228 ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr); 229 } else { 230 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 231 } 232 PetscFunctionReturn(0); 233 } 234 235 /*@ 236 MatAYPX - Computes Y = a*Y + X. 237 238 Logically on Mat 239 240 Input Parameters: 241 + a - the PetscScalar multiplier 242 . Y - the first matrix 243 . X - the second matrix 244 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN 245 246 Level: intermediate 247 248 .keywords: matrix, add 249 250 .seealso: MatAXPY() 251 @*/ 252 PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str) 253 { 254 PetscScalar one = 1.0; 255 PetscErrorCode ierr; 256 PetscInt mX,mY,nX,nY; 257 258 PetscFunctionBegin; 259 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 260 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 261 PetscValidLogicalCollectiveScalar(Y,a,2); 262 ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr); 263 ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr); 264 if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY); 265 266 ierr = MatScale(Y,a);CHKERRQ(ierr); 267 ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr); 268 PetscFunctionReturn(0); 269 } 270 271 /*@ 272 MatComputeExplicitOperator - Computes the explicit matrix 273 274 Collective on Mat 275 276 Input Parameter: 277 . inmat - the matrix 278 279 Output Parameter: 280 . mat - the explict operator 281 282 Notes: 283 This computation is done by applying the operators to columns of the 284 identity matrix. 285 286 Currently, this routine uses a dense matrix format when 1 processor 287 is used and a sparse format otherwise. This routine is costly in general, 288 and is recommended for use only with relatively small systems. 289 290 Level: advanced 291 292 .keywords: Mat, compute, explicit, operator 293 @*/ 294 PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat) 295 { 296 Vec in,out; 297 PetscErrorCode ierr; 298 PetscInt i,m,n,M,N,*rows,start,end; 299 MPI_Comm comm; 300 PetscScalar *array,zero = 0.0,one = 1.0; 301 PetscMPIInt size; 302 303 PetscFunctionBegin; 304 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 305 PetscValidPointer(mat,2); 306 307 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 308 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 309 310 ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr); 311 ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr); 312 ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr); 313 ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 314 ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr); 315 ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr); 316 for (i=0; i<m; i++) rows[i] = start + i; 317 318 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 319 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 320 if (size == 1) { 321 ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr); 322 ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr); 323 } else { 324 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 325 ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr); 326 } 327 328 for (i=0; i<N; i++) { 329 330 ierr = VecSet(in,zero);CHKERRQ(ierr); 331 ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); 332 ierr = VecAssemblyBegin(in);CHKERRQ(ierr); 333 ierr = VecAssemblyEnd(in);CHKERRQ(ierr); 334 335 ierr = MatMult(inmat,in,out);CHKERRQ(ierr); 336 337 ierr = VecGetArray(out,&array);CHKERRQ(ierr); 338 ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 339 ierr = VecRestoreArray(out,&array);CHKERRQ(ierr); 340 341 } 342 ierr = PetscFree(rows);CHKERRQ(ierr); 343 ierr = VecDestroy(&out);CHKERRQ(ierr); 344 ierr = VecDestroy(&in);CHKERRQ(ierr); 345 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 346 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 347 PetscFunctionReturn(0); 348 } 349 350 /*@ 351 MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of 352 a give matrix that can apply MatMultTranspose() 353 354 Collective on Mat 355 356 Input Parameter: 357 . inmat - the matrix 358 359 Output Parameter: 360 . mat - the explict operator transposed 361 362 Notes: 363 This computation is done by applying the transpose of the operator to columns of the 364 identity matrix. 365 366 Currently, this routine uses a dense matrix format when 1 processor 367 is used and a sparse format otherwise. This routine is costly in general, 368 and is recommended for use only with relatively small systems. 369 370 Level: advanced 371 372 .keywords: Mat, compute, explicit, operator 373 @*/ 374 PetscErrorCode MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat) 375 { 376 Vec in,out; 377 PetscErrorCode ierr; 378 PetscInt i,m,n,M,N,*rows,start,end; 379 MPI_Comm comm; 380 PetscScalar *array,zero = 0.0,one = 1.0; 381 PetscMPIInt size; 382 383 PetscFunctionBegin; 384 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 385 PetscValidPointer(mat,2); 386 387 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 388 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 389 390 ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr); 391 ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr); 392 ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr); 393 ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 394 ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr); 395 ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr); 396 for (i=0; i<m; i++) rows[i] = start + i; 397 398 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 399 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 400 if (size == 1) { 401 ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr); 402 ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr); 403 } else { 404 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 405 ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr); 406 } 407 408 for (i=0; i<N; i++) { 409 410 ierr = VecSet(in,zero);CHKERRQ(ierr); 411 ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); 412 ierr = VecAssemblyBegin(in);CHKERRQ(ierr); 413 ierr = VecAssemblyEnd(in);CHKERRQ(ierr); 414 415 ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr); 416 417 ierr = VecGetArray(out,&array);CHKERRQ(ierr); 418 ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 419 ierr = VecRestoreArray(out,&array);CHKERRQ(ierr); 420 421 } 422 ierr = PetscFree(rows);CHKERRQ(ierr); 423 ierr = VecDestroy(&out);CHKERRQ(ierr); 424 ierr = VecDestroy(&in);CHKERRQ(ierr); 425 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 426 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 427 PetscFunctionReturn(0); 428 } 429 430 /*@ 431 MatChop - Set all values in the matrix less than the tolerance to zero 432 433 Input Parameters: 434 + A - The matrix 435 - tol - The zero tolerance 436 437 Output Parameters: 438 . A - The chopped matrix 439 440 Level: intermediate 441 442 .seealso: MatCreate(), MatZeroEntries() 443 @*/ 444 PetscErrorCode MatChop(Mat A, PetscReal tol) 445 { 446 PetscScalar *newVals; 447 PetscInt *newCols; 448 PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0; 449 PetscErrorCode ierr; 450 451 PetscFunctionBegin; 452 ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr); 453 for (r = rStart; r < rEnd; ++r) { 454 PetscInt ncols; 455 456 ierr = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 457 colMax = PetscMax(colMax, ncols);CHKERRQ(ierr); 458 ierr = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 459 } 460 numRows = rEnd - rStart; 461 ierr = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 462 ierr = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr); 463 for (r = rStart; r < rStart+maxRows; ++r) { 464 const PetscScalar *vals; 465 const PetscInt *cols; 466 PetscInt ncols, newcols, c; 467 468 if (r < rEnd) { 469 ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 470 for (c = 0; c < ncols; ++c) { 471 newCols[c] = cols[c]; 472 newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c]; 473 } 474 newcols = ncols; 475 ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 476 ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr); 477 } 478 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 479 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 480 } 481 ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr); 482 PetscFunctionReturn(0); 483 } 484