1 2 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 3 4 /*@ 5 MatAXPY - Computes Y = a*X + Y. 6 7 Logically Collective on Mat 8 9 Input Parameters: 10 + a - the scalar multiplier 11 . X - the first matrix 12 . Y - the second matrix 13 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN 14 or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's) 15 16 Level: intermediate 17 18 .keywords: matrix, add 19 20 .seealso: MatAYPX() 21 @*/ 22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str) 23 { 24 PetscErrorCode ierr; 25 PetscInt m1,m2,n1,n2; 26 PetscBool sametype; 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 30 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 31 PetscValidLogicalCollectiveScalar(Y,a,2); 32 ierr = MatGetSize(X,&m1,&n1);CHKERRQ(ierr); 33 ierr = MatGetSize(Y,&m2,&n2);CHKERRQ(ierr); 34 if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2); 35 36 ierr = PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);CHKERRQ(ierr); 37 ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 38 if (Y->ops->axpy && sametype) { 39 ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr); 40 } else { 41 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 42 } 43 ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 44 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA) 45 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 46 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 47 } 48 #endif 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str) 53 { 54 PetscInt i,start,end,j,ncols,m,n; 55 PetscErrorCode ierr; 56 const PetscInt *row; 57 PetscScalar *val; 58 const PetscScalar *vals; 59 60 PetscFunctionBegin; 61 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 62 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 63 if (a == 1.0) { 64 for (i = start; i < end; i++) { 65 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 66 ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 67 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 68 } 69 } else { 70 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 71 for (i=start; i<end; i++) { 72 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 73 for (j=0; j<ncols; j++) { 74 val[j] = a*vals[j]; 75 } 76 ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 77 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 78 } 79 ierr = PetscFree(val);CHKERRQ(ierr); 80 } 81 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 82 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str) 87 { 88 PetscInt i,start,end,j,ncols,m,n; 89 PetscErrorCode ierr; 90 const PetscInt *row; 91 PetscScalar *val; 92 const PetscScalar *vals; 93 94 PetscFunctionBegin; 95 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 96 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 97 if (a == 1.0) { 98 for (i = start; i < end; i++) { 99 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 100 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 101 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 102 103 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 104 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 105 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 106 } 107 } else { 108 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 109 for (i=start; i<end; i++) { 110 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 111 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 112 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 113 114 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 115 for (j=0; j<ncols; j++) { 116 val[j] = a*vals[j]; 117 } 118 ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 119 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 120 } 121 ierr = PetscFree(val);CHKERRQ(ierr); 122 } 123 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 124 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 /*@ 129 MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix. 130 131 Neighbor-wise Collective on Mat 132 133 Input Parameters: 134 + Y - the matrices 135 - a - the PetscScalar 136 137 Level: intermediate 138 139 Notes: 140 If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 141 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 142 entry). 143 144 To form Y = Y + diag(V) use MatDiagonalSet() 145 146 Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is 147 preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix. 148 149 .keywords: matrix, add, shift 150 151 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale() 152 @*/ 153 PetscErrorCode MatShift(Mat Y,PetscScalar a) 154 { 155 PetscErrorCode ierr; 156 157 PetscFunctionBegin; 158 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 159 if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 160 if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 161 MatCheckPreallocated(Y,1); 162 163 if (Y->ops->shift) { 164 ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr); 165 } else { 166 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 167 } 168 169 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA) 170 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 171 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 172 } 173 #endif 174 PetscFunctionReturn(0); 175 } 176 177 PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is) 178 { 179 PetscErrorCode ierr; 180 PetscInt i,start,end; 181 PetscScalar *v; 182 183 PetscFunctionBegin; 184 ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr); 185 ierr = VecGetArray(D,&v);CHKERRQ(ierr); 186 for (i=start; i<end; i++) { 187 ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr); 188 } 189 ierr = VecRestoreArray(D,&v);CHKERRQ(ierr); 190 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 191 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 192 PetscFunctionReturn(0); 193 } 194 195 /*@ 196 MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix 197 that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is 198 INSERT_VALUES. 199 200 Input Parameters: 201 + Y - the input matrix 202 . D - the diagonal matrix, represented as a vector 203 - i - INSERT_VALUES or ADD_VALUES 204 205 Neighbor-wise Collective on Mat and Vec 206 207 Notes: 208 If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 209 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 210 entry). 211 212 Level: intermediate 213 214 .keywords: matrix, add, shift, diagonal 215 216 .seealso: MatShift(), MatScale(), MatDiagonalScale() 217 @*/ 218 PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is) 219 { 220 PetscErrorCode ierr; 221 PetscInt matlocal,veclocal; 222 223 PetscFunctionBegin; 224 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 225 PetscValidHeaderSpecific(D,VEC_CLASSID,2); 226 ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr); 227 ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr); 228 if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal); 229 if (Y->ops->diagonalset) { 230 ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr); 231 } else { 232 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 233 } 234 PetscFunctionReturn(0); 235 } 236 237 /*@ 238 MatAYPX - Computes Y = a*Y + X. 239 240 Logically on Mat 241 242 Input Parameters: 243 + a - the PetscScalar multiplier 244 . Y - the first matrix 245 . X - the second matrix 246 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN 247 248 Level: intermediate 249 250 .keywords: matrix, add 251 252 .seealso: MatAXPY() 253 @*/ 254 PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str) 255 { 256 PetscScalar one = 1.0; 257 PetscErrorCode ierr; 258 PetscInt mX,mY,nX,nY; 259 260 PetscFunctionBegin; 261 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 262 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 263 PetscValidLogicalCollectiveScalar(Y,a,2); 264 ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr); 265 ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr); 266 if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY); 267 268 ierr = MatScale(Y,a);CHKERRQ(ierr); 269 ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr); 270 PetscFunctionReturn(0); 271 } 272 273 /*@ 274 MatComputeExplicitOperator - Computes the explicit matrix 275 276 Collective on Mat 277 278 Input Parameter: 279 . inmat - the matrix 280 281 Output Parameter: 282 . mat - the explict operator 283 284 Notes: 285 This computation is done by applying the operators to columns of the 286 identity matrix. 287 288 Currently, this routine uses a dense matrix format when 1 processor 289 is used and a sparse format otherwise. This routine is costly in general, 290 and is recommended for use only with relatively small systems. 291 292 Level: advanced 293 294 .keywords: Mat, compute, explicit, operator 295 @*/ 296 PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat) 297 { 298 Vec in,out; 299 PetscErrorCode ierr; 300 PetscInt i,m,n,M,N,*rows,start,end; 301 MPI_Comm comm; 302 PetscScalar *array,zero = 0.0,one = 1.0; 303 PetscMPIInt size; 304 305 PetscFunctionBegin; 306 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 307 PetscValidPointer(mat,2); 308 309 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 310 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 311 312 ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr); 313 ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr); 314 ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr); 315 ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 316 ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr); 317 ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr); 318 for (i=0; i<m; i++) rows[i] = start + i; 319 320 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 321 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 322 if (size == 1) { 323 ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr); 324 ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr); 325 } else { 326 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 327 ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr); 328 } 329 330 for (i=0; i<N; i++) { 331 332 ierr = VecSet(in,zero);CHKERRQ(ierr); 333 ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); 334 ierr = VecAssemblyBegin(in);CHKERRQ(ierr); 335 ierr = VecAssemblyEnd(in);CHKERRQ(ierr); 336 337 ierr = MatMult(inmat,in,out);CHKERRQ(ierr); 338 339 ierr = VecGetArray(out,&array);CHKERRQ(ierr); 340 ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 341 ierr = VecRestoreArray(out,&array);CHKERRQ(ierr); 342 343 } 344 ierr = PetscFree(rows);CHKERRQ(ierr); 345 ierr = VecDestroy(&out);CHKERRQ(ierr); 346 ierr = VecDestroy(&in);CHKERRQ(ierr); 347 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 348 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 349 PetscFunctionReturn(0); 350 } 351 352 /*@ 353 MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of 354 a give matrix that can apply MatMultTranspose() 355 356 Collective on Mat 357 358 Input Parameter: 359 . inmat - the matrix 360 361 Output Parameter: 362 . mat - the explict operator transposed 363 364 Notes: 365 This computation is done by applying the transpose of the operator to columns of the 366 identity matrix. 367 368 Currently, this routine uses a dense matrix format when 1 processor 369 is used and a sparse format otherwise. This routine is costly in general, 370 and is recommended for use only with relatively small systems. 371 372 Level: advanced 373 374 .keywords: Mat, compute, explicit, operator 375 @*/ 376 PetscErrorCode MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat) 377 { 378 Vec in,out; 379 PetscErrorCode ierr; 380 PetscInt i,m,n,M,N,*rows,start,end; 381 MPI_Comm comm; 382 PetscScalar *array,zero = 0.0,one = 1.0; 383 PetscMPIInt size; 384 385 PetscFunctionBegin; 386 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 387 PetscValidPointer(mat,2); 388 389 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 390 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 391 392 ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr); 393 ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr); 394 ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr); 395 ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 396 ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr); 397 ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr); 398 for (i=0; i<m; i++) rows[i] = start + i; 399 400 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 401 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 402 if (size == 1) { 403 ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr); 404 ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr); 405 } else { 406 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 407 ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr); 408 } 409 410 for (i=0; i<N; i++) { 411 412 ierr = VecSet(in,zero);CHKERRQ(ierr); 413 ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); 414 ierr = VecAssemblyBegin(in);CHKERRQ(ierr); 415 ierr = VecAssemblyEnd(in);CHKERRQ(ierr); 416 417 ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr); 418 419 ierr = VecGetArray(out,&array);CHKERRQ(ierr); 420 ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 421 ierr = VecRestoreArray(out,&array);CHKERRQ(ierr); 422 423 } 424 ierr = PetscFree(rows);CHKERRQ(ierr); 425 ierr = VecDestroy(&out);CHKERRQ(ierr); 426 ierr = VecDestroy(&in);CHKERRQ(ierr); 427 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 428 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 429 PetscFunctionReturn(0); 430 } 431 432 /*@ 433 MatChop - Set all values in the matrix less than the tolerance to zero 434 435 Input Parameters: 436 + A - The matrix 437 - tol - The zero tolerance 438 439 Output Parameters: 440 . A - The chopped matrix 441 442 Level: intermediate 443 444 .seealso: MatCreate(), MatZeroEntries() 445 @*/ 446 PetscErrorCode MatChop(Mat A, PetscReal tol) 447 { 448 PetscScalar *newVals; 449 PetscInt *newCols; 450 PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0; 451 PetscErrorCode ierr; 452 453 PetscFunctionBegin; 454 ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr); 455 for (r = rStart; r < rEnd; ++r) { 456 PetscInt ncols; 457 458 ierr = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 459 colMax = PetscMax(colMax, ncols);CHKERRQ(ierr); 460 ierr = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 461 } 462 numRows = rEnd - rStart; 463 ierr = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 464 ierr = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr); 465 for (r = rStart; r < rStart+maxRows; ++r) { 466 const PetscScalar *vals; 467 const PetscInt *cols; 468 PetscInt ncols, newcols, c; 469 470 if (r < rEnd) { 471 ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 472 for (c = 0; c < ncols; ++c) { 473 newCols[c] = cols[c]; 474 newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c]; 475 } 476 newcols = ncols; 477 ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 478 ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr); 479 } 480 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 481 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 482 } 483 ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr); 484 PetscFunctionReturn(0); 485 } 486