1 2 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 3 4 /*@ 5 MatAXPY - Computes Y = a*X + Y. 6 7 Logically Collective on Mat 8 9 Input Parameters: 10 + a - the scalar multiplier 11 . X - the first matrix 12 . Y - the second matrix 13 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN 14 or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's) 15 16 Level: intermediate 17 18 .keywords: matrix, add 19 20 .seealso: MatAYPX() 21 @*/ 22 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str) 23 { 24 PetscErrorCode ierr; 25 PetscInt m1,m2,n1,n2; 26 27 PetscFunctionBegin; 28 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 29 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 30 PetscValidLogicalCollectiveScalar(Y,a,2); 31 ierr = MatGetSize(X,&m1,&n1);CHKERRQ(ierr); 32 ierr = MatGetSize(Y,&m2,&n2);CHKERRQ(ierr); 33 if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2); 34 35 ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 36 if (Y->ops->axpy) { 37 ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr); 38 } else { 39 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 40 } 41 ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 42 #if defined(PETSC_HAVE_CUSP) 43 if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) { 44 Y->valid_GPU_matrix = PETSC_CUSP_CPU; 45 } 46 #elif defined(PETSC_HAVE_VIENNACL) 47 if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) { 48 Y->valid_GPU_matrix = PETSC_VIENNACL_CPU; 49 } 50 #elif defined(PETSC_HAVE_VECCUDA) 51 if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) { 52 Y->valid_GPU_matrix = PETSC_CUDA_CPU; 53 } 54 #endif 55 PetscFunctionReturn(0); 56 } 57 58 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str) 59 { 60 PetscInt i,start,end,j,ncols,m,n; 61 PetscErrorCode ierr; 62 const PetscInt *row; 63 PetscScalar *val; 64 const PetscScalar *vals; 65 66 PetscFunctionBegin; 67 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 68 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 69 if (a == 1.0) { 70 for (i = start; i < end; i++) { 71 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 72 ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 73 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 74 } 75 } else { 76 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 77 for (i=start; i<end; i++) { 78 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 79 for (j=0; j<ncols; j++) { 80 val[j] = a*vals[j]; 81 } 82 ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 83 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 84 } 85 ierr = PetscFree(val);CHKERRQ(ierr); 86 } 87 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 88 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 89 PetscFunctionReturn(0); 90 } 91 92 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str) 93 { 94 PetscInt i,start,end,j,ncols,m,n; 95 PetscErrorCode ierr; 96 const PetscInt *row; 97 PetscScalar *val; 98 const PetscScalar *vals; 99 100 PetscFunctionBegin; 101 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 102 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 103 if (a == 1.0) { 104 for (i = start; i < end; i++) { 105 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 106 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 107 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 108 109 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 110 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 111 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 112 } 113 } else { 114 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 115 for (i=start; i<end; i++) { 116 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 117 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 118 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 119 120 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 121 for (j=0; j<ncols; j++) { 122 val[j] = a*vals[j]; 123 } 124 ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 125 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 126 } 127 ierr = PetscFree(val);CHKERRQ(ierr); 128 } 129 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 130 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 131 PetscFunctionReturn(0); 132 } 133 134 /*@ 135 MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix. 136 137 Neighbor-wise Collective on Mat 138 139 Input Parameters: 140 + Y - the matrices 141 - a - the PetscScalar 142 143 Level: intermediate 144 145 Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 146 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 147 entry). 148 149 Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is 150 preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix. 151 152 .keywords: matrix, add, shift 153 154 .seealso: MatDiagonalSet() 155 @*/ 156 PetscErrorCode MatShift(Mat Y,PetscScalar a) 157 { 158 PetscErrorCode ierr; 159 160 PetscFunctionBegin; 161 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 162 if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 163 if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 164 MatCheckPreallocated(Y,1); 165 166 if (Y->ops->shift) { 167 ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr); 168 } else { 169 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 170 } 171 172 #if defined(PETSC_HAVE_CUSP) 173 if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) { 174 Y->valid_GPU_matrix = PETSC_CUSP_CPU; 175 } 176 #elif defined(PETSC_HAVE_VIENNACL) 177 if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) { 178 Y->valid_GPU_matrix = PETSC_VIENNACL_CPU; 179 } 180 #elif defined(PETSC_HAVE_VECCUDA) 181 if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) { 182 Y->valid_GPU_matrix = PETSC_CUDA_CPU; 183 } 184 #endif 185 PetscFunctionReturn(0); 186 } 187 188 PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is) 189 { 190 PetscErrorCode ierr; 191 PetscInt i,start,end; 192 PetscScalar *v; 193 194 PetscFunctionBegin; 195 ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr); 196 ierr = VecGetArray(D,&v);CHKERRQ(ierr); 197 for (i=start; i<end; i++) { 198 ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr); 199 } 200 ierr = VecRestoreArray(D,&v);CHKERRQ(ierr); 201 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 202 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 203 PetscFunctionReturn(0); 204 } 205 206 /*@ 207 MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix 208 that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is 209 INSERT_VALUES. 210 211 Input Parameters: 212 + Y - the input matrix 213 . D - the diagonal matrix, represented as a vector 214 - i - INSERT_VALUES or ADD_VALUES 215 216 Neighbor-wise Collective on Mat and Vec 217 218 Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 219 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 220 entry). 221 222 Level: intermediate 223 224 .keywords: matrix, add, shift, diagonal 225 226 .seealso: MatShift() 227 @*/ 228 PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is) 229 { 230 PetscErrorCode ierr; 231 PetscInt matlocal,veclocal; 232 233 PetscFunctionBegin; 234 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 235 PetscValidHeaderSpecific(D,VEC_CLASSID,2); 236 ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr); 237 ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr); 238 if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal); 239 if (Y->ops->diagonalset) { 240 ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr); 241 } else { 242 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 243 } 244 PetscFunctionReturn(0); 245 } 246 247 /*@ 248 MatAYPX - Computes Y = a*Y + X. 249 250 Logically on Mat 251 252 Input Parameters: 253 + a - the PetscScalar multiplier 254 . Y - the first matrix 255 . X - the second matrix 256 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN 257 258 Level: intermediate 259 260 .keywords: matrix, add 261 262 .seealso: MatAXPY() 263 @*/ 264 PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str) 265 { 266 PetscScalar one = 1.0; 267 PetscErrorCode ierr; 268 PetscInt mX,mY,nX,nY; 269 270 PetscFunctionBegin; 271 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 272 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 273 PetscValidLogicalCollectiveScalar(Y,a,2); 274 ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr); 275 ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr); 276 if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY); 277 278 ierr = MatScale(Y,a);CHKERRQ(ierr); 279 ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr); 280 PetscFunctionReturn(0); 281 } 282 283 /*@ 284 MatComputeExplicitOperator - Computes the explicit matrix 285 286 Collective on Mat 287 288 Input Parameter: 289 . inmat - the matrix 290 291 Output Parameter: 292 . mat - the explict preconditioned operator 293 294 Notes: 295 This computation is done by applying the operators to columns of the 296 identity matrix. 297 298 Currently, this routine uses a dense matrix format when 1 processor 299 is used and a sparse format otherwise. This routine is costly in general, 300 and is recommended for use only with relatively small systems. 301 302 Level: advanced 303 304 .keywords: Mat, compute, explicit, operator 305 @*/ 306 PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat) 307 { 308 Vec in,out; 309 PetscErrorCode ierr; 310 PetscInt i,m,n,M,N,*rows,start,end; 311 MPI_Comm comm; 312 PetscScalar *array,zero = 0.0,one = 1.0; 313 PetscMPIInt size; 314 315 PetscFunctionBegin; 316 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 317 PetscValidPointer(mat,2); 318 319 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 320 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 321 322 ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr); 323 ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr); 324 ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr); 325 ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 326 ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr); 327 ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr); 328 for (i=0; i<m; i++) rows[i] = start + i; 329 330 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 331 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 332 if (size == 1) { 333 ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr); 334 ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr); 335 } else { 336 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 337 ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr); 338 } 339 340 for (i=0; i<N; i++) { 341 342 ierr = VecSet(in,zero);CHKERRQ(ierr); 343 ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); 344 ierr = VecAssemblyBegin(in);CHKERRQ(ierr); 345 ierr = VecAssemblyEnd(in);CHKERRQ(ierr); 346 347 ierr = MatMult(inmat,in,out);CHKERRQ(ierr); 348 349 ierr = VecGetArray(out,&array);CHKERRQ(ierr); 350 ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 351 ierr = VecRestoreArray(out,&array);CHKERRQ(ierr); 352 353 } 354 ierr = PetscFree(rows);CHKERRQ(ierr); 355 ierr = VecDestroy(&out);CHKERRQ(ierr); 356 ierr = VecDestroy(&in);CHKERRQ(ierr); 357 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 358 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 359 PetscFunctionReturn(0); 360 } 361 362 /*@ 363 MatChop - Set all values in the matrix less than the tolerance to zero 364 365 Input Parameters: 366 + A - The matrix 367 - tol - The zero tolerance 368 369 Output Parameters: 370 . A - The chopped matrix 371 372 Level: intermediate 373 374 .seealso: MatCreate(), MatZeroEntries() 375 @*/ 376 PetscErrorCode MatChop(Mat A, PetscReal tol) 377 { 378 PetscScalar *newVals; 379 PetscInt *newCols; 380 PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0; 381 PetscErrorCode ierr; 382 383 PetscFunctionBegin; 384 ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr); 385 for (r = rStart; r < rEnd; ++r) { 386 PetscInt ncols; 387 388 ierr = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 389 colMax = PetscMax(colMax, ncols);CHKERRQ(ierr); 390 ierr = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 391 } 392 numRows = rEnd - rStart; 393 ierr = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 394 ierr = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr); 395 for (r = rStart; r < rStart+maxRows; ++r) { 396 const PetscScalar *vals; 397 const PetscInt *cols; 398 PetscInt ncols, newcols, c; 399 400 if (r < rEnd) { 401 ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 402 for (c = 0; c < ncols; ++c) { 403 newCols[c] = cols[c]; 404 newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c]; 405 } 406 newcols = ncols; 407 ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 408 ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr); 409 } 410 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 411 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 412 } 413 ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr); 414 PetscFunctionReturn(0); 415 } 416