1 2 #include <petsc/private/matimpl.h> /*I "petscmat.h" I*/ 3 4 #undef __FUNCT__ 5 #define __FUNCT__ "MatAXPY" 6 /*@ 7 MatAXPY - Computes Y = a*X + Y. 8 9 Logically Collective on Mat 10 11 Input Parameters: 12 + a - the scalar multiplier 13 . X - the first matrix 14 . Y - the second matrix 15 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN 16 or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's) 17 18 Level: intermediate 19 20 .keywords: matrix, add 21 22 .seealso: MatAYPX() 23 @*/ 24 PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str) 25 { 26 PetscErrorCode ierr; 27 PetscInt m1,m2,n1,n2; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 31 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 32 PetscValidLogicalCollectiveScalar(Y,a,2); 33 ierr = MatGetSize(X,&m1,&n1);CHKERRQ(ierr); 34 ierr = MatGetSize(Y,&m2,&n2);CHKERRQ(ierr); 35 if (m1 != m2 || n1 != n2) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrix add: %D %D %D %D",m1,m2,n1,n2); 36 37 ierr = PetscLogEventBegin(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 38 if (Y->ops->axpy) { 39 ierr = (*Y->ops->axpy)(Y,a,X,str);CHKERRQ(ierr); 40 } else { 41 ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr); 42 } 43 ierr = PetscLogEventEnd(MAT_AXPY,Y,0,0,0);CHKERRQ(ierr); 44 #if defined(PETSC_HAVE_CUSP) 45 if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) { 46 Y->valid_GPU_matrix = PETSC_CUSP_CPU; 47 } 48 #elif defined(PETSC_HAVE_VIENNACL) 49 if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) { 50 Y->valid_GPU_matrix = PETSC_VIENNACL_CPU; 51 } 52 #elif defined(PETSC_HAVE_VECCUDA) 53 if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) { 54 Y->valid_GPU_matrix = PETSC_CUDA_CPU; 55 } 56 #endif 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatAXPY_Basic" 62 PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str) 63 { 64 PetscInt i,start,end,j,ncols,m,n; 65 PetscErrorCode ierr; 66 const PetscInt *row; 67 PetscScalar *val; 68 const PetscScalar *vals; 69 70 PetscFunctionBegin; 71 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 72 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 73 if (a == 1.0) { 74 for (i = start; i < end; i++) { 75 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 76 ierr = MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 77 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 78 } 79 } else { 80 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 81 for (i=start; i<end; i++) { 82 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 83 for (j=0; j<ncols; j++) { 84 val[j] = a*vals[j]; 85 } 86 ierr = MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 87 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 88 } 89 ierr = PetscFree(val);CHKERRQ(ierr); 90 } 91 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 93 PetscFunctionReturn(0); 94 } 95 96 #undef __FUNCT__ 97 #define __FUNCT__ "MatAXPY_BasicWithPreallocation" 98 PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str) 99 { 100 PetscInt i,start,end,j,ncols,m,n; 101 PetscErrorCode ierr; 102 const PetscInt *row; 103 PetscScalar *val; 104 const PetscScalar *vals; 105 106 PetscFunctionBegin; 107 ierr = MatGetSize(X,&m,&n);CHKERRQ(ierr); 108 ierr = MatGetOwnershipRange(X,&start,&end);CHKERRQ(ierr); 109 if (a == 1.0) { 110 for (i = start; i < end; i++) { 111 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 112 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 113 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 114 115 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 116 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 117 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 118 } 119 } else { 120 ierr = PetscMalloc1(n+1,&val);CHKERRQ(ierr); 121 for (i=start; i<end; i++) { 122 ierr = MatGetRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 123 ierr = MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);CHKERRQ(ierr); 124 ierr = MatRestoreRow(Y,i,&ncols,&row,&vals);CHKERRQ(ierr); 125 126 ierr = MatGetRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 127 for (j=0; j<ncols; j++) { 128 val[j] = a*vals[j]; 129 } 130 ierr = MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);CHKERRQ(ierr); 131 ierr = MatRestoreRow(X,i,&ncols,&row,&vals);CHKERRQ(ierr); 132 } 133 ierr = PetscFree(val);CHKERRQ(ierr); 134 } 135 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 136 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 137 PetscFunctionReturn(0); 138 } 139 140 #undef __FUNCT__ 141 #define __FUNCT__ "MatShift" 142 /*@ 143 MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix. 144 145 Neighbor-wise Collective on Mat 146 147 Input Parameters: 148 + Y - the matrices 149 - a - the PetscScalar 150 151 Level: intermediate 152 153 Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 154 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 155 entry). 156 157 Developers Note: If the local "diagonal part" of the matrix Y has no entries then the local diagonal part is 158 preallocated with 1 nonzero per row for the to be added values. This allows for fast shifting of an empty matrix. 159 160 .keywords: matrix, add, shift 161 162 .seealso: MatDiagonalSet() 163 @*/ 164 PetscErrorCode MatShift(Mat Y,PetscScalar a) 165 { 166 PetscErrorCode ierr; 167 168 PetscFunctionBegin; 169 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 170 if (!Y->assembled) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix"); 171 if (Y->factortype) SETERRQ(PetscObjectComm((PetscObject)Y),PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix"); 172 MatCheckPreallocated(Y,1); 173 174 if (Y->ops->shift) { 175 ierr = (*Y->ops->shift)(Y,a);CHKERRQ(ierr); 176 } else { 177 ierr = MatShift_Basic(Y,a);CHKERRQ(ierr); 178 } 179 180 #if defined(PETSC_HAVE_CUSP) 181 if (Y->valid_GPU_matrix != PETSC_CUSP_UNALLOCATED) { 182 Y->valid_GPU_matrix = PETSC_CUSP_CPU; 183 } 184 #elif defined(PETSC_HAVE_VIENNACL) 185 if (Y->valid_GPU_matrix != PETSC_VIENNACL_UNALLOCATED) { 186 Y->valid_GPU_matrix = PETSC_VIENNACL_CPU; 187 } 188 #elif defined(PETSC_HAVE_VECCUDA) 189 if (Y->valid_GPU_matrix != PETSC_CUDA_UNALLOCATED) { 190 Y->valid_GPU_matrix = PETSC_CUDA_CPU; 191 } 192 #endif 193 PetscFunctionReturn(0); 194 } 195 196 #undef __FUNCT__ 197 #define __FUNCT__ "MatDiagonalSet_Default" 198 PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is) 199 { 200 PetscErrorCode ierr; 201 PetscInt i,start,end; 202 PetscScalar *v; 203 204 PetscFunctionBegin; 205 ierr = MatGetOwnershipRange(Y,&start,&end);CHKERRQ(ierr); 206 ierr = VecGetArray(D,&v);CHKERRQ(ierr); 207 for (i=start; i<end; i++) { 208 ierr = MatSetValues(Y,1,&i,1,&i,v+i-start,is);CHKERRQ(ierr); 209 } 210 ierr = VecRestoreArray(D,&v);CHKERRQ(ierr); 211 ierr = MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 212 ierr = MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 216 #undef __FUNCT__ 217 #define __FUNCT__ "MatDiagonalSet" 218 /*@ 219 MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix 220 that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is 221 INSERT_VALUES. 222 223 Input Parameters: 224 + Y - the input matrix 225 . D - the diagonal matrix, represented as a vector 226 - i - INSERT_VALUES or ADD_VALUES 227 228 Neighbor-wise Collective on Mat and Vec 229 230 Notes: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially 231 fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an 232 entry). 233 234 Level: intermediate 235 236 .keywords: matrix, add, shift, diagonal 237 238 .seealso: MatShift() 239 @*/ 240 PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is) 241 { 242 PetscErrorCode ierr; 243 PetscInt matlocal,veclocal; 244 245 PetscFunctionBegin; 246 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 247 PetscValidHeaderSpecific(D,VEC_CLASSID,2); 248 ierr = MatGetLocalSize(Y,&matlocal,NULL);CHKERRQ(ierr); 249 ierr = VecGetLocalSize(D,&veclocal);CHKERRQ(ierr); 250 if (matlocal != veclocal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number local rows of matrix %D does not match that of vector for diagonal %D",matlocal,veclocal); 251 if (Y->ops->diagonalset) { 252 ierr = (*Y->ops->diagonalset)(Y,D,is);CHKERRQ(ierr); 253 } else { 254 ierr = MatDiagonalSet_Default(Y,D,is);CHKERRQ(ierr); 255 } 256 PetscFunctionReturn(0); 257 } 258 259 #undef __FUNCT__ 260 #define __FUNCT__ "MatAYPX" 261 /*@ 262 MatAYPX - Computes Y = a*Y + X. 263 264 Logically on Mat 265 266 Input Parameters: 267 + a - the PetscScalar multiplier 268 . Y - the first matrix 269 . X - the second matrix 270 - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN or SUBSET_NONZERO_PATTERN 271 272 Level: intermediate 273 274 .keywords: matrix, add 275 276 .seealso: MatAXPY() 277 @*/ 278 PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str) 279 { 280 PetscScalar one = 1.0; 281 PetscErrorCode ierr; 282 PetscInt mX,mY,nX,nY; 283 284 PetscFunctionBegin; 285 PetscValidHeaderSpecific(X,MAT_CLASSID,3); 286 PetscValidHeaderSpecific(Y,MAT_CLASSID,1); 287 PetscValidLogicalCollectiveScalar(Y,a,2); 288 ierr = MatGetSize(X,&mX,&nX);CHKERRQ(ierr); 289 ierr = MatGetSize(X,&mY,&nY);CHKERRQ(ierr); 290 if (mX != mY || nX != nY) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Non conforming matrices: %D %D first %D %D second",mX,mY,nX,nY); 291 292 ierr = MatScale(Y,a);CHKERRQ(ierr); 293 ierr = MatAXPY(Y,one,X,str);CHKERRQ(ierr); 294 PetscFunctionReturn(0); 295 } 296 297 #undef __FUNCT__ 298 #define __FUNCT__ "MatComputeExplicitOperator" 299 /*@ 300 MatComputeExplicitOperator - Computes the explicit matrix 301 302 Collective on Mat 303 304 Input Parameter: 305 . inmat - the matrix 306 307 Output Parameter: 308 . mat - the explict preconditioned operator 309 310 Notes: 311 This computation is done by applying the operators to columns of the 312 identity matrix. 313 314 Currently, this routine uses a dense matrix format when 1 processor 315 is used and a sparse format otherwise. This routine is costly in general, 316 and is recommended for use only with relatively small systems. 317 318 Level: advanced 319 320 .keywords: Mat, compute, explicit, operator 321 @*/ 322 PetscErrorCode MatComputeExplicitOperator(Mat inmat,Mat *mat) 323 { 324 Vec in,out; 325 PetscErrorCode ierr; 326 PetscInt i,m,n,M,N,*rows,start,end; 327 MPI_Comm comm; 328 PetscScalar *array,zero = 0.0,one = 1.0; 329 PetscMPIInt size; 330 331 PetscFunctionBegin; 332 PetscValidHeaderSpecific(inmat,MAT_CLASSID,1); 333 PetscValidPointer(mat,2); 334 335 ierr = PetscObjectGetComm((PetscObject)inmat,&comm);CHKERRQ(ierr); 336 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 337 338 ierr = MatGetLocalSize(inmat,&m,&n);CHKERRQ(ierr); 339 ierr = MatGetSize(inmat,&M,&N);CHKERRQ(ierr); 340 ierr = MatCreateVecs(inmat,&in,&out);CHKERRQ(ierr); 341 ierr = VecSetOption(in,VEC_IGNORE_OFF_PROC_ENTRIES,PETSC_TRUE);CHKERRQ(ierr); 342 ierr = VecGetOwnershipRange(out,&start,&end);CHKERRQ(ierr); 343 ierr = PetscMalloc1(m,&rows);CHKERRQ(ierr); 344 for (i=0; i<m; i++) rows[i] = start + i; 345 346 ierr = MatCreate(comm,mat);CHKERRQ(ierr); 347 ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr); 348 if (size == 1) { 349 ierr = MatSetType(*mat,MATSEQDENSE);CHKERRQ(ierr); 350 ierr = MatSeqDenseSetPreallocation(*mat,NULL);CHKERRQ(ierr); 351 } else { 352 ierr = MatSetType(*mat,MATMPIAIJ);CHKERRQ(ierr); 353 ierr = MatMPIAIJSetPreallocation(*mat,n,NULL,N-n,NULL);CHKERRQ(ierr); 354 } 355 356 for (i=0; i<N; i++) { 357 358 ierr = VecSet(in,zero);CHKERRQ(ierr); 359 ierr = VecSetValues(in,1,&i,&one,INSERT_VALUES);CHKERRQ(ierr); 360 ierr = VecAssemblyBegin(in);CHKERRQ(ierr); 361 ierr = VecAssemblyEnd(in);CHKERRQ(ierr); 362 363 ierr = MatMult(inmat,in,out);CHKERRQ(ierr); 364 365 ierr = VecGetArray(out,&array);CHKERRQ(ierr); 366 ierr = MatSetValues(*mat,m,rows,1,&i,array,INSERT_VALUES);CHKERRQ(ierr); 367 ierr = VecRestoreArray(out,&array);CHKERRQ(ierr); 368 369 } 370 ierr = PetscFree(rows);CHKERRQ(ierr); 371 ierr = VecDestroy(&out);CHKERRQ(ierr); 372 ierr = VecDestroy(&in);CHKERRQ(ierr); 373 ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 374 ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 375 PetscFunctionReturn(0); 376 } 377 378 #undef __FUNCT__ 379 #define __FUNCT__ "MatChop" 380 /*@ 381 MatChop - Set all values in the matrix less than the tolerance to zero 382 383 Input Parameters: 384 + A - The matrix 385 - tol - The zero tolerance 386 387 Output Parameters: 388 . A - The chopped matrix 389 390 Level: intermediate 391 392 .seealso: MatCreate(), MatZeroEntries() 393 @*/ 394 PetscErrorCode MatChop(Mat A, PetscReal tol) 395 { 396 PetscScalar *newVals; 397 PetscInt *newCols; 398 PetscInt rStart, rEnd, numRows, maxRows, r, colMax = 0; 399 PetscErrorCode ierr; 400 401 PetscFunctionBegin; 402 ierr = MatGetOwnershipRange(A, &rStart, &rEnd);CHKERRQ(ierr); 403 for (r = rStart; r < rEnd; ++r) { 404 PetscInt ncols; 405 406 ierr = MatGetRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 407 colMax = PetscMax(colMax, ncols);CHKERRQ(ierr); 408 ierr = MatRestoreRow(A, r, &ncols, NULL, NULL);CHKERRQ(ierr); 409 } 410 numRows = rEnd - rStart; 411 ierr = MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 412 ierr = PetscMalloc2(colMax,&newCols,colMax,&newVals);CHKERRQ(ierr); 413 for (r = rStart; r < rStart+maxRows; ++r) { 414 const PetscScalar *vals; 415 const PetscInt *cols; 416 PetscInt ncols, newcols, c; 417 418 if (r < rEnd) { 419 ierr = MatGetRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 420 for (c = 0; c < ncols; ++c) { 421 newCols[c] = cols[c]; 422 newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c]; 423 } 424 newcols = ncols; 425 ierr = MatRestoreRow(A, r, &ncols, &cols, &vals);CHKERRQ(ierr); 426 ierr = MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);CHKERRQ(ierr); 427 } 428 ierr = MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 429 ierr = MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 430 } 431 ierr = PetscFree2(newCols,newVals);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434