1 2 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 3 4 /*@ 5 MatAXPY - Computes Y = a*X + Y. 6 7 Logically Collective on Mat 8 9 Input Parameters: 10 + a - the scalar multiplier 11 . X - the first matrix 12 . Y - the second matrix 13 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN 14 or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's) 15 16 Level: intermediate 17 18 .keywords: matrix, add 19 20 .seealso: MatAYPX() 21 @*/ 22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str) 23 { 24 PetscErrorCode ierr; 25 PetscInt m1,m2,n1,n2; 26 PetscBool sametype; 27 28 PetscFunctionBegin; 29 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 30 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 31 PetscValidLogicalCollectiveScalar(Y,a,2); 32 ierr = MatGetSize(X,&m1,&n1);CHKERRQ(ierr); 33 ierr = MatGetSize(Y,&m2,&n2);CHKERRQ(ierr); 34 if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2); 35 36 ierr = PetscStrcmp(((PetscObject)X)->type_name,((PetscObject)Y)->type_name,&sametype);CHKERRQ(ierr); 37 ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 38 if (Y->ops->axpy && sametype) { 39 ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr); 40 } else { 41 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 42 } 43 ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 44 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA) 45 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 46 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 47 } 48 #endif 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str) 53 { 54 PetscInt i,start,end,j,ncols,m,n; 55 PetscErrorCode ierr; 56 const PetscInt *row; 57 PetscScalar *val; 58 const PetscScalar *vals; 59 60 PetscFunctionBegin; 61 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 62 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 63 if (a == 1.0) { 64 for (i = start; i < end; i++) { 65 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 66 ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 67 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 68 } 69 } else { 70 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 71 for (i=start; i<end; i++) { 72 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 73 for (j=0; j<ncols; j++) { 74 val[j] = a*vals[j]; 75 } 76 ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 77 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 78 } 79 ierr = PetscFree(val);CHKERRQ(ierr); 80 } 81 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 82 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 83 PetscFunctionReturn(0); 84 } 85 86 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str) 87 { 88 PetscInt i,start,end,j,ncols,m,n; 89 PetscErrorCode ierr; 90 const PetscInt *row; 91 PetscScalar *val; 92 const PetscScalar *vals; 93 94 PetscFunctionBegin; 95 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 96 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 97 if (a == 1.0) { 98 for (i = start; i < end; i++) { 99 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 100 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 101 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 102 103 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 104 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 105 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 106 } 107 } else { 108 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 109 for (i=start; i<end; i++) { 110 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 111 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 112 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 113 114 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 115 for (j=0; j<ncols; j++) { 116 val[j] = a*vals[j]; 117 } 118 ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 119 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 120 } 121 ierr = PetscFree(val);CHKERRQ(ierr); 122 } 123 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 124 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 128 /*@ 129 MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix. 130 131 Neighbor-wise Collective on Mat 132 133 Input Parameters: 134 + Y - the matrices 135 - a - the PetscScalar 136 137 Level: intermediate 138 139 Notes: 140 If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 141 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 142 entry). 143 144 To form Y = Y + diag(V) use MatDiagonalSet() 145 146 Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is 147 preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix. 148 149 .keywords: matrix, add, shift 150 151 .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale() 152 @*/ 153 PetscErrorCode MatShift(Mat Y,PetscScalar a) 154 { 155 PetscErrorCode ierr; 156 157 PetscFunctionBegin; 158 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 159 if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 160 if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 161 MatCheckPreallocated(Y,1); 162 163 if (Y->ops->shift) { 164 ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr); 165 } else { 166 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 167 } 168 169 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 170 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_VECCUDA) 171 if (Y->valid_GPU_matrix != PETSC_OFFLOAD_UNALLOCATED) { 172 Y->valid_GPU_matrix = PETSC_OFFLOAD_CPU; 173 } 174 #endif 175 PetscFunctionReturn(0); 176 } 177 178 PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is) 179 { 180 PetscErrorCode ierr; 181 PetscInt i,start,end; 182 PetscScalar *v; 183 184 PetscFunctionBegin; 185 ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr); 186 ierr = VecGetArray(D,&v);CHKERRQ(ierr); 187 for (i=start; i<end; i++) { 188 ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr); 189 } 190 ierr = VecRestoreArray(D,&v);CHKERRQ(ierr); 191 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 192 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 193 PetscFunctionReturn(0); 194 } 195 196 /*@ 197 MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix 198 that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is 199 INSERT_VALUES. 200 201 Input Parameters: 202 + Y - the input matrix 203 . D - the diagonal matrix, represented as a vector 204 - i - INSERT_VALUES or ADD_VALUES 205 206 Neighbor-wise Collective on Mat and Vec 207 208 Notes: 209 If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 210 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 211 entry). 212 213 Level: intermediate 214 215 .keywords: matrix, add, shift, diagonal 216 217 .seealso: MatShift(), MatScale(), MatDiagonalScale() 218 @*/ 219 PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is) 220 { 221 PetscErrorCode ierr; 222 PetscInt matlocal,veclocal; 223 224 PetscFunctionBegin; 225 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 226 PetscValidHeaderSpecific(D,VEC_CLASSID,2); 227 ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr); 228 ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr); 229 if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal); 230 if (Y->ops->diagonalset) { 231 ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr); 232 } else { 233 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 234 } 235 ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr); 236 PetscFunctionReturn(0); 237 } 238 239 /*@ 240 MatAYPX - Computes Y = a*Y + X. 241 242 Logically on Mat 243 244 Input Parameters: 245 + a - the PetscScalar multiplier 246 . Y - the first matrix 247 . X - the second matrix 248 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN 249 250 Level: intermediate 251 252 .keywords: matrix, add 253 254 .seealso: MatAXPY() 255 @*/ 256 PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str) 257 { 258 PetscScalar one = 1.0; 259 PetscErrorCode ierr; 260 PetscInt mX,mY,nX,nY; 261 262 PetscFunctionBegin; 263 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 264 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 265 PetscValidLogicalCollectiveScalar(Y,a,2); 266 ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr); 267 ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr); 268 if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY); 269 270 ierr = MatScale(Y,a);CHKERRQ(ierr); 271 ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr); 272 PetscFunctionReturn(0); 273 } 274 275 /*@ 276 MatComputeExplicitOperator - Computes the explicit matrix 277 278 Collective on Mat 279 280 Input Parameter: 281 . inmat - the matrix 282 283 Output Parameter: 284 . mat - the explict operator 285 286 Notes: 287 This computation is done by applying the operators to columns of the 288 identity matrix. 289 290 Currently, this routine uses a dense matrix format when 1 processor 291 is used and a sparse format otherwise. This routine is costly in general, 292 and is recommended for use only with relatively small systems. 293 294 Level: advanced 295 296 .keywords: Mat, compute, explicit, operator 297 @*/ 298 PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat) 299 { 300 PetscErrorCode ierr; 301 MPI_Comm comm; 302 PetscMPIInt size; 303 304 PetscFunctionBegin; 305 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 306 PetscValidPointer(mat,2); 307 308 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 309 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 310 ierr = MatConvert_Shell(inmat,size == 1 ? MATSEQDENSE : MATAIJ,MAT_INITIAL_MATRIX,mat);CHKERRQ(ierr); 311 PetscFunctionReturn(0); 312 } 313 314 /*@ 315 MatComputeExplicitOperatorTranspose - Computes the explicit matrix representation of 316 a give matrix that can apply MatMultTranspose() 317 318 Collective on Mat 319 320 Input Parameter: 321 . inmat - the matrix 322 323 Output Parameter: 324 . mat - the explict operator transposed 325 326 Notes: 327 This computation is done by applying the transpose of the operator to columns of the 328 identity matrix. 329 330 Currently, this routine uses a dense matrix format when 1 processor 331 is used and a sparse format otherwise. This routine is costly in general, 332 and is recommended for use only with relatively small systems. 333 334 Level: advanced 335 336 .keywords: Mat, compute, explicit, operator 337 @*/ 338 PetscErrorCode MatComputeExplicitOperatorTranspose(Mat inmat,Mat *mat) 339 { 340 Vec in,out; 341 PetscErrorCode ierr; 342 PetscInt i,m,n,M,N,*rows,start,end; 343 MPI_Comm comm; 344 PetscScalar *array,zero = 0.0,one = 1.0; 345 PetscMPIInt size; 346 347 PetscFunctionBegin; 348 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 349 PetscValidPointer(mat,2); 350 351 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 352 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 353 354 ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr); 355 ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr); 356 ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr); 357 ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 358 ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr); 359 ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr); 360 for (i=0; i<m; i++) rows[i] = start + i; 361 362 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 363 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 364 if (size == 1) { 365 ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr); 366 ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr); 367 } else { 368 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 369 ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr); 370 } 371 372 for (i=0; i<N; i++) { 373 374 ierr = VecSet(in,zero);CHKERRQ(ierr); 375 ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); 376 ierr = VecAssemblyBegin(in);CHKERRQ(ierr); 377 ierr = VecAssemblyEnd(in);CHKERRQ(ierr); 378 379 ierr = MatMultTranspose(inmat,in,out);CHKERRQ(ierr); 380 381 ierr = VecGetArray(out,&array);CHKERRQ(ierr); 382 ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 383 ierr = VecRestoreArray(out,&array);CHKERRQ(ierr); 384 385 } 386 ierr = PetscFree(rows);CHKERRQ(ierr); 387 ierr = VecDestroy(&out);CHKERRQ(ierr); 388 ierr = VecDestroy(&in);CHKERRQ(ierr); 389 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 391 PetscFunctionReturn(0); 392 } 393 394 /*@ 395 MatChop - Set all values in the matrix less than the tolerance to zero 396 397 Input Parameters: 398 + A - The matrix 399 - tol - The zero tolerance 400 401 Output Parameters: 402 . A - The chopped matrix 403 404 Level: intermediate 405 406 .seealso: MatCreate(), MatZeroEntries() 407 @*/ 408 PetscErrorCode MatChop(Mat A, PetscReal tol) 409 { 410 PetscScalar *newVals; 411 PetscInt *newCols; 412 PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0; 413 PetscErrorCode ierr; 414 415 PetscFunctionBegin; 416 ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr); 417 for (r = rStart; r < rEnd; ++r) { 418 PetscInt ncols; 419 420 ierr = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 421 colMax = PetscMax(colMax, ncols);CHKERRQ(ierr); 422 ierr = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 423 } 424 numRows = rEnd - rStart; 425 ierr = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 426 ierr = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr); 427 for (r = rStart; r < rStart+maxRows; ++r) { 428 const PetscScalar *vals; 429 const PetscInt *cols; 430 PetscInt ncols, newcols, c; 431 432 if (r < rEnd) { 433 ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 434 for (c = 0; c < ncols; ++c) { 435 newCols[c] = cols[c]; 436 newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c]; 437 } 438 newcols = ncols; 439 ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 440 ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr); 441 } 442 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 443 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 444 } 445 ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr); 446 PetscFunctionReturn(0); 447 } 448