1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 31 PetscValidPointer(B,2); 32 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg)); 33 if (flg) *B = mat->A; 34 else { 35 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg)); 36 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 37 *B = A; 38 } 39 PetscFunctionReturn(0); 40 } 41 42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 43 { 44 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 45 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 46 47 PetscFunctionBegin; 48 PetscCall(MatCopy(Amat->A,Bmat->A,s)); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscInt j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2; 56 PetscScalar *v; 57 58 PetscFunctionBegin; 59 PetscCall(MatDenseGetArray(mat->A,&v)); 60 PetscCall(MatDenseGetLDA(mat->A,&lda)); 61 rend2 = PetscMin(rend,A->cmap->N); 62 if (rend2>rstart) { 63 for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha; 64 PetscCall(PetscLogFlops(rend2-rstart)); 65 } 66 PetscCall(MatDenseRestoreArray(mat->A,&v)); 67 PetscFunctionReturn(0); 68 } 69 70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 71 { 72 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 73 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 74 75 PetscFunctionBegin; 76 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 77 lrow = row - rstart; 78 PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 79 PetscFunctionReturn(0); 80 } 81 82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 83 { 84 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 85 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 86 87 PetscFunctionBegin; 88 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 89 lrow = row - rstart; 90 PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 95 { 96 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 97 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 98 PetscScalar *array; 99 MPI_Comm comm; 100 PetscBool flg; 101 Mat B; 102 103 PetscFunctionBegin; 104 PetscCall(MatHasCongruentLayouts(A,&flg)); 105 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 106 PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); 107 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 108 109 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg)); 110 PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 111 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm)); 112 PetscCall(MatCreate(comm,&B)); 113 PetscCall(MatSetSizes(B,m,m,m,m)); 114 PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name)); 115 PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array)); 116 PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart)); 117 PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array)); 118 PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B)); 119 *a = B; 120 PetscCall(MatDestroy(&B)); 121 } else *a = B; 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 126 { 127 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 PetscBool roworiented = A->roworiented; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; 134 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 if (roworiented) { 138 PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv)); 139 } else { 140 for (j=0; j<n; j++) { 141 if (idxn[j] < 0) continue; 142 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 143 PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv)); 144 } 145 } 146 } else if (!A->donotstash) { 147 mat->assembled = PETSC_FALSE; 148 if (roworiented) { 149 PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE)); 150 } else { 151 PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE)); 152 } 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 161 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 162 163 PetscFunctionBegin; 164 for (i=0; i<m; i++) { 165 if (idxm[i] < 0) continue; /* negative row */ 166 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 167 if (idxm[i] >= rstart && idxm[i] < rend) { 168 row = idxm[i] - rstart; 169 for (j=0; j<n; j++) { 170 if (idxn[j] < 0) continue; /* negative column */ 171 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 172 PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j)); 173 } 174 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 175 } 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 180 { 181 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 182 183 PetscFunctionBegin; 184 PetscCall(MatDenseGetLDA(a->A,lda)); 185 PetscFunctionReturn(0); 186 } 187 188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 189 { 190 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 191 PetscBool iscuda; 192 193 PetscFunctionBegin; 194 if (!a->A) { 195 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 196 PetscCall(PetscLayoutSetUp(A->rmap)); 197 PetscCall(PetscLayoutSetUp(A->cmap)); 198 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 199 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A)); 200 PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 201 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 202 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 203 } 204 PetscCall(MatDenseSetLDA(a->A,lda)); 205 PetscFunctionReturn(0); 206 } 207 208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 209 { 210 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211 212 PetscFunctionBegin; 213 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 214 PetscCall(MatDenseGetArray(a->A,array)); 215 PetscFunctionReturn(0); 216 } 217 218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 219 { 220 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 221 222 PetscFunctionBegin; 223 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 224 PetscCall(MatDenseGetArrayRead(a->A,array)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 229 { 230 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 231 232 PetscFunctionBegin; 233 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 234 PetscCall(MatDenseGetArrayWrite(a->A,array)); 235 PetscFunctionReturn(0); 236 } 237 238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 239 { 240 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 241 242 PetscFunctionBegin; 243 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 244 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 245 PetscCall(MatDensePlaceArray(a->A,array)); 246 PetscFunctionReturn(0); 247 } 248 249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 250 { 251 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 252 253 PetscFunctionBegin; 254 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 255 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 256 PetscCall(MatDenseResetArray(a->A)); 257 PetscFunctionReturn(0); 258 } 259 260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 261 { 262 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 263 264 PetscFunctionBegin; 265 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 266 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 267 PetscCall(MatDenseReplaceArray(a->A,array)); 268 PetscFunctionReturn(0); 269 } 270 271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 272 { 273 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat)); 284 PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is)); 285 PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 PetscCall(ISAllGather(iscol,&iscol_local)); 288 PetscCall(ISGetIndices(isrow,&irow)); 289 PetscCall(ISGetIndices(iscol_local,&icol)); 290 PetscCall(ISGetLocalSize(isrow,&nrows)); 291 PetscCall(ISGetLocalSize(iscol,&ncols)); 292 PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 PetscCall(MatGetLocalSize(A,&nlrows,&nlcols)); 299 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat)); 309 PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols)); 310 PetscCall(MatSetType(newmat,((PetscObject)A)->type_name)); 311 PetscCall(MatMPIDenseSetPreallocation(newmat,NULL)); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 PetscCall(MatDenseGetArray(newmatd->A,&bv)); 317 PetscCall(MatDenseGetArrayRead(mat->A,&v)); 318 PetscCall(MatDenseGetLDA(mat->A,&lda)); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 PetscCall(MatDenseRestoreArrayRead(mat->A,&v)); 326 PetscCall(MatDenseRestoreArray(newmatd->A,&bv)); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY)); 330 PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY)); 331 332 /* Free work space */ 333 PetscCall(ISRestoreIndices(isrow,&irow)); 334 PetscCall(ISRestoreIndices(iscol_local,&icol)); 335 PetscCall(ISDestroy(&iscol_local)); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 344 PetscFunctionBegin; 345 PetscCall(MatDenseRestoreArray(a->A,array)); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 352 353 PetscFunctionBegin; 354 PetscCall(MatDenseRestoreArrayRead(a->A,array)); 355 PetscFunctionReturn(0); 356 } 357 358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 359 { 360 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 361 362 PetscFunctionBegin; 363 PetscCall(MatDenseRestoreArrayWrite(a->A,array)); 364 PetscFunctionReturn(0); 365 } 366 367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 368 { 369 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 370 PetscInt nstash,reallocs; 371 372 PetscFunctionBegin; 373 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 374 375 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 376 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 377 PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 378 PetscFunctionReturn(0); 379 } 380 381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 382 { 383 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 384 PetscInt i,*row,*col,flg,j,rstart,ncols; 385 PetscMPIInt n; 386 PetscScalar *val; 387 388 PetscFunctionBegin; 389 if (!mdn->donotstash && !mat->nooffprocentries) { 390 /* wait on receives */ 391 while (1) { 392 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 393 if (!flg) break; 394 395 for (i=0; i<n;) { 396 /* Now identify the consecutive vals belonging to the same row */ 397 for (j=i,rstart=row[j]; j<n; j++) { 398 if (row[j] != rstart) break; 399 } 400 if (j < n) ncols = j-i; 401 else ncols = n-i; 402 /* Now assemble all these values with a single function call */ 403 PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 404 i = j; 405 } 406 } 407 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 408 } 409 410 PetscCall(MatAssemblyBegin(mdn->A,mode)); 411 PetscCall(MatAssemblyEnd(mdn->A,mode)); 412 PetscFunctionReturn(0); 413 } 414 415 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 416 { 417 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 418 419 PetscFunctionBegin; 420 PetscCall(MatZeroEntries(l->A)); 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 425 { 426 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 427 PetscInt i,len,*lrows; 428 429 PetscFunctionBegin; 430 /* get locally owned rows */ 431 PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL)); 432 /* fix right hand side if needed */ 433 if (x && b) { 434 const PetscScalar *xx; 435 PetscScalar *bb; 436 437 PetscCall(VecGetArrayRead(x, &xx)); 438 PetscCall(VecGetArrayWrite(b, &bb)); 439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 440 PetscCall(VecRestoreArrayRead(x, &xx)); 441 PetscCall(VecRestoreArrayWrite(b, &bb)); 442 } 443 PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL)); 444 if (diag != 0.0) { 445 Vec d; 446 447 PetscCall(MatCreateVecs(A,NULL,&d)); 448 PetscCall(VecSet(d,diag)); 449 PetscCall(MatDiagonalSet(A,d,INSERT_VALUES)); 450 PetscCall(VecDestroy(&d)); 451 } 452 PetscCall(PetscFree(lrows)); 453 PetscFunctionReturn(0); 454 } 455 456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 460 461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 const PetscScalar *ax; 465 PetscScalar *ay; 466 PetscMemType axmtype,aymtype; 467 468 PetscFunctionBegin; 469 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 470 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 471 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 472 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 473 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 474 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 475 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 476 PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy)); 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 const PetscScalar *ax; 484 PetscScalar *ay; 485 PetscMemType axmtype,aymtype; 486 487 PetscFunctionBegin; 488 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 489 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 490 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 491 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 492 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 493 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 494 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 495 PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz)); 496 PetscFunctionReturn(0); 497 } 498 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 const PetscScalar *ax; 503 PetscScalar *ay; 504 PetscMemType axmtype,aymtype; 505 506 PetscFunctionBegin; 507 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 508 PetscCall(VecSet(yy,0.0)); 509 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 510 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 511 PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype)); 512 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 513 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 514 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 515 PetscCall(VecRestoreArrayAndMemType(yy,&ay)); 516 PetscFunctionReturn(0); 517 } 518 519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 520 { 521 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 522 const PetscScalar *ax; 523 PetscScalar *ay; 524 PetscMemType axmtype,aymtype; 525 526 PetscFunctionBegin; 527 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 528 PetscCall(VecCopy(yy,zz)); 529 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 530 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 531 PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype)); 532 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 533 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 534 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 535 PetscCall(VecRestoreArrayAndMemType(zz,&ay)); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 540 { 541 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542 PetscInt lda,len,i,n,m = A->rmap->n,radd; 543 PetscScalar *x,zero = 0.0; 544 const PetscScalar *av; 545 546 PetscFunctionBegin; 547 PetscCall(VecSet(v,zero)); 548 PetscCall(VecGetArray(v,&x)); 549 PetscCall(VecGetSize(v,&n)); 550 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 551 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 552 radd = A->rmap->rstart*m; 553 PetscCall(MatDenseGetArrayRead(a->A,&av)); 554 PetscCall(MatDenseGetLDA(a->A,&lda)); 555 for (i=0; i<len; i++) { 556 x[i] = av[radd + i*lda + i]; 557 } 558 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 559 PetscCall(VecRestoreArray(v,&x)); 560 PetscFunctionReturn(0); 561 } 562 563 PetscErrorCode MatDestroy_MPIDense(Mat mat) 564 { 565 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 566 567 PetscFunctionBegin; 568 #if defined(PETSC_USE_LOG) 569 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 570 #endif 571 PetscCall(MatStashDestroy_Private(&mat->stash)); 572 PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 573 PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 574 PetscCall(MatDestroy(&mdn->A)); 575 PetscCall(VecDestroy(&mdn->lvec)); 576 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 577 PetscCall(VecDestroy(&mdn->cvec)); 578 PetscCall(MatDestroy(&mdn->cmat)); 579 580 PetscCall(PetscFree(mat->data)); 581 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 582 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL)); 596 #if defined(PETSC_HAVE_ELEMENTAL) 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL)); 598 #endif 599 #if defined(PETSC_HAVE_SCALAPACK) 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL)); 601 #endif 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL)); 605 #if defined (PETSC_HAVE_CUDA) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL)); 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL)); 608 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",NULL)); 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidensecuda_mpidense_C",NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArray_C",NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayRead_C",NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayWrite_C",NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArray_C",NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayRead_C",NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayWrite_C",NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAPlaceArray_C",NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAResetArray_C",NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAReplaceArray_C",NULL)); 623 #endif 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL)); 632 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL)); 633 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL)); 634 635 PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL)); 636 PetscFunctionReturn(0); 637 } 638 639 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 640 641 #include <petscdraw.h> 642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 643 { 644 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 645 PetscMPIInt rank; 646 PetscViewerType vtype; 647 PetscBool iascii,isdraw; 648 PetscViewer sviewer; 649 PetscViewerFormat format; 650 651 PetscFunctionBegin; 652 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 653 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 654 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 655 if (iascii) { 656 PetscCall(PetscViewerGetType(viewer,&vtype)); 657 PetscCall(PetscViewerGetFormat(viewer,&format)); 658 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 659 MatInfo info; 660 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 661 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 662 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory)); 663 PetscCall(PetscViewerFlush(viewer)); 664 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 665 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer)); 666 PetscFunctionReturn(0); 667 } else if (format == PETSC_VIEWER_ASCII_INFO) { 668 PetscFunctionReturn(0); 669 } 670 } else if (isdraw) { 671 PetscDraw draw; 672 PetscBool isnull; 673 674 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 675 PetscCall(PetscDrawIsNull(draw,&isnull)); 676 if (isnull) PetscFunctionReturn(0); 677 } 678 679 { 680 /* assemble the entire matrix onto first processor. */ 681 Mat A; 682 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 683 PetscInt *cols; 684 PetscScalar *vals; 685 686 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 687 if (rank == 0) { 688 PetscCall(MatSetSizes(A,M,N,M,N)); 689 } else { 690 PetscCall(MatSetSizes(A,0,0,M,N)); 691 } 692 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 693 PetscCall(MatSetType(A,MATMPIDENSE)); 694 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 695 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 696 697 /* Copy the matrix ... This isn't the most efficient means, 698 but it's quick for now */ 699 A->insertmode = INSERT_VALUES; 700 701 row = mat->rmap->rstart; 702 m = mdn->A->rmap->n; 703 for (i=0; i<m; i++) { 704 PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals)); 705 PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES)); 706 PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals)); 707 row++; 708 } 709 710 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 711 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 712 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 713 if (rank == 0) { 714 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name)); 715 PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer)); 716 } 717 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 718 PetscCall(PetscViewerFlush(viewer)); 719 PetscCall(MatDestroy(&A)); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 725 { 726 PetscBool iascii,isbinary,isdraw,issocket; 727 728 PetscFunctionBegin; 729 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 730 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 731 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 732 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 733 734 if (iascii || issocket || isdraw) { 735 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer)); 736 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat,viewer)); 737 PetscFunctionReturn(0); 738 } 739 740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 741 { 742 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 743 Mat mdn = mat->A; 744 PetscLogDouble isend[5],irecv[5]; 745 746 PetscFunctionBegin; 747 info->block_size = 1.0; 748 749 PetscCall(MatGetInfo(mdn,MAT_LOCAL,info)); 750 751 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 752 isend[3] = info->memory; isend[4] = info->mallocs; 753 if (flag == MAT_LOCAL) { 754 info->nz_used = isend[0]; 755 info->nz_allocated = isend[1]; 756 info->nz_unneeded = isend[2]; 757 info->memory = isend[3]; 758 info->mallocs = isend[4]; 759 } else if (flag == MAT_GLOBAL_MAX) { 760 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A))); 761 762 info->nz_used = irecv[0]; 763 info->nz_allocated = irecv[1]; 764 info->nz_unneeded = irecv[2]; 765 info->memory = irecv[3]; 766 info->mallocs = irecv[4]; 767 } else if (flag == MAT_GLOBAL_SUM) { 768 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A))); 769 770 info->nz_used = irecv[0]; 771 info->nz_allocated = irecv[1]; 772 info->nz_unneeded = irecv[2]; 773 info->memory = irecv[3]; 774 info->mallocs = irecv[4]; 775 } 776 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 777 info->fill_ratio_needed = 0; 778 info->factor_mallocs = 0; 779 PetscFunctionReturn(0); 780 } 781 782 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 783 { 784 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 785 786 PetscFunctionBegin; 787 switch (op) { 788 case MAT_NEW_NONZERO_LOCATIONS: 789 case MAT_NEW_NONZERO_LOCATION_ERR: 790 case MAT_NEW_NONZERO_ALLOCATION_ERR: 791 MatCheckPreallocated(A,1); 792 PetscCall(MatSetOption(a->A,op,flg)); 793 break; 794 case MAT_ROW_ORIENTED: 795 MatCheckPreallocated(A,1); 796 a->roworiented = flg; 797 PetscCall(MatSetOption(a->A,op,flg)); 798 break; 799 case MAT_FORCE_DIAGONAL_ENTRIES: 800 case MAT_KEEP_NONZERO_PATTERN: 801 case MAT_USE_HASH_TABLE: 802 case MAT_SORTED_FULL: 803 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 804 break; 805 case MAT_IGNORE_OFF_PROC_ENTRIES: 806 a->donotstash = flg; 807 break; 808 case MAT_SYMMETRIC: 809 case MAT_STRUCTURALLY_SYMMETRIC: 810 case MAT_HERMITIAN: 811 case MAT_SYMMETRY_ETERNAL: 812 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 813 case MAT_SPD: 814 case MAT_IGNORE_LOWER_TRIANGULAR: 815 case MAT_IGNORE_ZERO_ENTRIES: 816 case MAT_SPD_ETERNAL: 817 /* if the diagonal matrix is square it inherits some of the properties above */ 818 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 819 break; 820 default: 821 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 822 } 823 PetscFunctionReturn(0); 824 } 825 826 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 827 { 828 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 829 const PetscScalar *l; 830 PetscScalar x,*v,*vv,*r; 831 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 832 833 PetscFunctionBegin; 834 PetscCall(MatDenseGetArray(mdn->A,&vv)); 835 PetscCall(MatDenseGetLDA(mdn->A,&lda)); 836 PetscCall(MatGetLocalSize(A,&s2,&s3)); 837 if (ll) { 838 PetscCall(VecGetLocalSize(ll,&s2a)); 839 PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 840 PetscCall(VecGetArrayRead(ll,&l)); 841 for (i=0; i<m; i++) { 842 x = l[i]; 843 v = vv + i; 844 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 845 } 846 PetscCall(VecRestoreArrayRead(ll,&l)); 847 PetscCall(PetscLogFlops(1.0*n*m)); 848 } 849 if (rr) { 850 const PetscScalar *ar; 851 852 PetscCall(VecGetLocalSize(rr,&s3a)); 853 PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 854 PetscCall(VecGetArrayRead(rr,&ar)); 855 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 856 PetscCall(VecGetArray(mdn->lvec,&r)); 857 PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 858 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 859 PetscCall(VecRestoreArrayRead(rr,&ar)); 860 for (i=0; i<n; i++) { 861 x = r[i]; 862 v = vv + i*lda; 863 for (j=0; j<m; j++) (*v++) *= x; 864 } 865 PetscCall(VecRestoreArray(mdn->lvec,&r)); 866 PetscCall(PetscLogFlops(1.0*n*m)); 867 } 868 PetscCall(MatDenseRestoreArray(mdn->A,&vv)); 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 873 { 874 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 875 PetscInt i,j; 876 PetscMPIInt size; 877 PetscReal sum = 0.0; 878 const PetscScalar *av,*v; 879 880 PetscFunctionBegin; 881 PetscCall(MatDenseGetArrayRead(mdn->A,&av)); 882 v = av; 883 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 884 if (size == 1) { 885 PetscCall(MatNorm(mdn->A,type,nrm)); 886 } else { 887 if (type == NORM_FROBENIUS) { 888 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 889 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 890 } 891 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 892 *nrm = PetscSqrtReal(*nrm); 893 PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n)); 894 } else if (type == NORM_1) { 895 PetscReal *tmp,*tmp2; 896 PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2)); 897 *nrm = 0.0; 898 v = av; 899 for (j=0; j<mdn->A->cmap->n; j++) { 900 for (i=0; i<mdn->A->rmap->n; i++) { 901 tmp[j] += PetscAbsScalar(*v); v++; 902 } 903 } 904 PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 905 for (j=0; j<A->cmap->N; j++) { 906 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 907 } 908 PetscCall(PetscFree2(tmp,tmp2)); 909 PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n)); 910 } else if (type == NORM_INFINITY) { /* max row norm */ 911 PetscReal ntemp; 912 PetscCall(MatNorm(mdn->A,type,&ntemp)); 913 PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 914 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 915 } 916 PetscCall(MatDenseRestoreArrayRead(mdn->A,&av)); 917 PetscFunctionReturn(0); 918 } 919 920 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 921 { 922 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 923 Mat B; 924 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 925 PetscInt j,i,lda; 926 PetscScalar *v; 927 928 PetscFunctionBegin; 929 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A,*matout)); 930 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 931 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 932 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 933 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 934 PetscCall(MatMPIDenseSetPreallocation(B,NULL)); 935 } else B = *matout; 936 937 m = a->A->rmap->n; n = a->A->cmap->n; 938 PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v)); 939 PetscCall(MatDenseGetLDA(a->A,&lda)); 940 PetscCall(PetscMalloc1(m,&rwork)); 941 for (i=0; i<m; i++) rwork[i] = rstart + i; 942 for (j=0; j<n; j++) { 943 PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES)); 944 v += lda; 945 } 946 PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v)); 947 PetscCall(PetscFree(rwork)); 948 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 949 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 950 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 951 *matout = B; 952 } else { 953 PetscCall(MatHeaderMerge(A,&B)); 954 } 955 PetscFunctionReturn(0); 956 } 957 958 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 959 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 960 961 PetscErrorCode MatSetUp_MPIDense(Mat A) 962 { 963 PetscFunctionBegin; 964 PetscCall(PetscLayoutSetUp(A->rmap)); 965 PetscCall(PetscLayoutSetUp(A->cmap)); 966 if (!A->preallocated) { 967 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 968 } 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 973 { 974 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 975 976 PetscFunctionBegin; 977 PetscCall(MatAXPY(A->A,alpha,B->A,str)); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatConjugate_MPIDense(Mat mat) 982 { 983 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 984 985 PetscFunctionBegin; 986 PetscCall(MatConjugate(a->A)); 987 PetscFunctionReturn(0); 988 } 989 990 PetscErrorCode MatRealPart_MPIDense(Mat A) 991 { 992 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 993 994 PetscFunctionBegin; 995 PetscCall(MatRealPart(a->A)); 996 PetscFunctionReturn(0); 997 } 998 999 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 1000 { 1001 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1002 1003 PetscFunctionBegin; 1004 PetscCall(MatImaginaryPart(a->A)); 1005 PetscFunctionReturn(0); 1006 } 1007 1008 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1009 { 1010 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1011 1012 PetscFunctionBegin; 1013 PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1014 PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1015 PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col)); 1016 PetscFunctionReturn(0); 1017 } 1018 1019 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*); 1020 1021 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions) 1022 { 1023 PetscInt i,m,n; 1024 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1025 PetscReal *work; 1026 1027 PetscFunctionBegin; 1028 PetscCall(MatGetSize(A,&m,&n)); 1029 PetscCall(PetscMalloc1(n,&work)); 1030 if (type == REDUCTION_MEAN_REALPART) { 1031 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work)); 1032 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1033 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work)); 1034 } else { 1035 PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work)); 1036 } 1037 if (type == NORM_2) { 1038 for (i=0; i<n; i++) work[i] *= work[i]; 1039 } 1040 if (type == NORM_INFINITY) { 1041 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm)); 1042 } else { 1043 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm)); 1044 } 1045 PetscCall(PetscFree(work)); 1046 if (type == NORM_2) { 1047 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1048 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1049 for (i=0; i<n; i++) reductions[i] /= m; 1050 } 1051 PetscFunctionReturn(0); 1052 } 1053 1054 #if defined(PETSC_HAVE_CUDA) 1055 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha) 1056 { 1057 PetscScalar *da; 1058 PetscInt lda; 1059 1060 PetscFunctionBegin; 1061 PetscCall(MatDenseCUDAGetArray(A,&da)); 1062 PetscCall(MatDenseGetLDA(A,&lda)); 1063 PetscCall(PetscInfo(A,"Performing Shift on backend\n")); 1064 PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N)); 1065 PetscCall(MatDenseCUDARestoreArray(A,&da)); 1066 PetscFunctionReturn(0); 1067 } 1068 1069 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1070 { 1071 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1072 PetscInt lda; 1073 1074 PetscFunctionBegin; 1075 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1076 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1077 if (!a->cvec) { 1078 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1079 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1080 } 1081 a->vecinuse = col + 1; 1082 PetscCall(MatDenseGetLDA(a->A,&lda)); 1083 PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1084 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1085 *v = a->cvec; 1086 PetscFunctionReturn(0); 1087 } 1088 1089 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1090 { 1091 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1092 1093 PetscFunctionBegin; 1094 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1095 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1096 a->vecinuse = 0; 1097 PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1098 PetscCall(VecCUDAResetArray(a->cvec)); 1099 *v = NULL; 1100 PetscFunctionReturn(0); 1101 } 1102 1103 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1104 { 1105 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1106 PetscInt lda; 1107 1108 PetscFunctionBegin; 1109 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1110 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1111 if (!a->cvec) { 1112 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1113 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1114 } 1115 a->vecinuse = col + 1; 1116 PetscCall(MatDenseGetLDA(a->A,&lda)); 1117 PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse)); 1118 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1119 PetscCall(VecLockReadPush(a->cvec)); 1120 *v = a->cvec; 1121 PetscFunctionReturn(0); 1122 } 1123 1124 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1125 { 1126 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1127 1128 PetscFunctionBegin; 1129 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1130 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1131 a->vecinuse = 0; 1132 PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse)); 1133 PetscCall(VecLockReadPop(a->cvec)); 1134 PetscCall(VecCUDAResetArray(a->cvec)); 1135 *v = NULL; 1136 PetscFunctionReturn(0); 1137 } 1138 1139 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1140 { 1141 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1142 PetscInt lda; 1143 1144 PetscFunctionBegin; 1145 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1146 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1147 if (!a->cvec) { 1148 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1149 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1150 } 1151 a->vecinuse = col + 1; 1152 PetscCall(MatDenseGetLDA(a->A,&lda)); 1153 PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1154 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1155 *v = a->cvec; 1156 PetscFunctionReturn(0); 1157 } 1158 1159 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1160 { 1161 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1162 1163 PetscFunctionBegin; 1164 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1165 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1166 a->vecinuse = 0; 1167 PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1168 PetscCall(VecCUDAResetArray(a->cvec)); 1169 *v = NULL; 1170 PetscFunctionReturn(0); 1171 } 1172 1173 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1174 { 1175 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1176 1177 PetscFunctionBegin; 1178 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1179 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1180 PetscCall(MatDenseCUDAPlaceArray(l->A,a)); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1185 { 1186 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1187 1188 PetscFunctionBegin; 1189 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1190 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1191 PetscCall(MatDenseCUDAResetArray(l->A)); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1196 { 1197 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1198 1199 PetscFunctionBegin; 1200 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1201 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1202 PetscCall(MatDenseCUDAReplaceArray(l->A,a)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1207 { 1208 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1209 1210 PetscFunctionBegin; 1211 PetscCall(MatDenseCUDAGetArrayWrite(l->A,a)); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1216 { 1217 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1218 1219 PetscFunctionBegin; 1220 PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a)); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1225 { 1226 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1227 1228 PetscFunctionBegin; 1229 PetscCall(MatDenseCUDAGetArrayRead(l->A,a)); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1234 { 1235 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1236 1237 PetscFunctionBegin; 1238 PetscCall(MatDenseCUDARestoreArrayRead(l->A,a)); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1243 { 1244 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1245 1246 PetscFunctionBegin; 1247 PetscCall(MatDenseCUDAGetArray(l->A,a)); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1252 { 1253 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1254 1255 PetscFunctionBegin; 1256 PetscCall(MatDenseCUDARestoreArray(l->A,a)); 1257 PetscFunctionReturn(0); 1258 } 1259 1260 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1261 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1262 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1263 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1264 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1265 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1266 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1267 1268 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1269 { 1270 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1271 1272 PetscFunctionBegin; 1273 PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1274 PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1275 if (d->A) PetscCall(MatBindToCPU(d->A,bind)); 1276 mat->boundtocpu = bind; 1277 if (!bind) { 1278 PetscBool iscuda; 1279 1280 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda)); 1281 if (!iscuda) { 1282 PetscCall(VecDestroy(&d->cvec)); 1283 } 1284 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda)); 1285 if (!iscuda) { 1286 PetscCall(MatDestroy(&d->cmat)); 1287 } 1288 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA)); 1289 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA)); 1290 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA)); 1291 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1292 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1293 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1294 mat->ops->shift = MatShift_MPIDenseCUDA; 1295 } else { 1296 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1297 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1298 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1299 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1300 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1301 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1302 mat->ops->shift = MatShift_MPIDense; 1303 } 1304 if (d->cmat) PetscCall(MatBindToCPU(d->cmat,bind)); 1305 PetscFunctionReturn(0); 1306 } 1307 1308 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1309 { 1310 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1311 PetscBool iscuda; 1312 1313 PetscFunctionBegin; 1314 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1315 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 1316 if (!iscuda) PetscFunctionReturn(0); 1317 PetscCall(PetscLayoutSetUp(A->rmap)); 1318 PetscCall(PetscLayoutSetUp(A->cmap)); 1319 if (!d->A) { 1320 PetscCall(MatCreate(PETSC_COMM_SELF,&d->A)); 1321 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A)); 1322 PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 1323 } 1324 PetscCall(MatSetType(d->A,MATSEQDENSECUDA)); 1325 PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data)); 1326 A->preallocated = PETSC_TRUE; 1327 A->assembled = PETSC_TRUE; 1328 PetscFunctionReturn(0); 1329 } 1330 #endif 1331 1332 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1333 { 1334 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1335 1336 PetscFunctionBegin; 1337 PetscCall(MatSetRandom(d->A,rctx)); 1338 PetscFunctionReturn(0); 1339 } 1340 1341 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1342 { 1343 PetscFunctionBegin; 1344 *missing = PETSC_FALSE; 1345 PetscFunctionReturn(0); 1346 } 1347 1348 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1349 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1350 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1351 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1352 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1353 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1354 1355 /* -------------------------------------------------------------------*/ 1356 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1357 MatGetRow_MPIDense, 1358 MatRestoreRow_MPIDense, 1359 MatMult_MPIDense, 1360 /* 4*/ MatMultAdd_MPIDense, 1361 MatMultTranspose_MPIDense, 1362 MatMultTransposeAdd_MPIDense, 1363 NULL, 1364 NULL, 1365 NULL, 1366 /* 10*/ NULL, 1367 NULL, 1368 NULL, 1369 NULL, 1370 MatTranspose_MPIDense, 1371 /* 15*/ MatGetInfo_MPIDense, 1372 MatEqual_MPIDense, 1373 MatGetDiagonal_MPIDense, 1374 MatDiagonalScale_MPIDense, 1375 MatNorm_MPIDense, 1376 /* 20*/ MatAssemblyBegin_MPIDense, 1377 MatAssemblyEnd_MPIDense, 1378 MatSetOption_MPIDense, 1379 MatZeroEntries_MPIDense, 1380 /* 24*/ MatZeroRows_MPIDense, 1381 NULL, 1382 NULL, 1383 NULL, 1384 NULL, 1385 /* 29*/ MatSetUp_MPIDense, 1386 NULL, 1387 NULL, 1388 MatGetDiagonalBlock_MPIDense, 1389 NULL, 1390 /* 34*/ MatDuplicate_MPIDense, 1391 NULL, 1392 NULL, 1393 NULL, 1394 NULL, 1395 /* 39*/ MatAXPY_MPIDense, 1396 MatCreateSubMatrices_MPIDense, 1397 NULL, 1398 MatGetValues_MPIDense, 1399 MatCopy_MPIDense, 1400 /* 44*/ NULL, 1401 MatScale_MPIDense, 1402 MatShift_MPIDense, 1403 NULL, 1404 NULL, 1405 /* 49*/ MatSetRandom_MPIDense, 1406 NULL, 1407 NULL, 1408 NULL, 1409 NULL, 1410 /* 54*/ NULL, 1411 NULL, 1412 NULL, 1413 NULL, 1414 NULL, 1415 /* 59*/ MatCreateSubMatrix_MPIDense, 1416 MatDestroy_MPIDense, 1417 MatView_MPIDense, 1418 NULL, 1419 NULL, 1420 /* 64*/ NULL, 1421 NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 /* 69*/ NULL, 1426 NULL, 1427 NULL, 1428 NULL, 1429 NULL, 1430 /* 74*/ NULL, 1431 NULL, 1432 NULL, 1433 NULL, 1434 NULL, 1435 /* 79*/ NULL, 1436 NULL, 1437 NULL, 1438 NULL, 1439 /* 83*/ MatLoad_MPIDense, 1440 NULL, 1441 NULL, 1442 NULL, 1443 NULL, 1444 NULL, 1445 /* 89*/ NULL, 1446 NULL, 1447 NULL, 1448 NULL, 1449 NULL, 1450 /* 94*/ NULL, 1451 NULL, 1452 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1453 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1454 NULL, 1455 /* 99*/ MatProductSetFromOptions_MPIDense, 1456 NULL, 1457 NULL, 1458 MatConjugate_MPIDense, 1459 NULL, 1460 /*104*/ NULL, 1461 MatRealPart_MPIDense, 1462 MatImaginaryPart_MPIDense, 1463 NULL, 1464 NULL, 1465 /*109*/ NULL, 1466 NULL, 1467 NULL, 1468 MatGetColumnVector_MPIDense, 1469 MatMissingDiagonal_MPIDense, 1470 /*114*/ NULL, 1471 NULL, 1472 NULL, 1473 NULL, 1474 NULL, 1475 /*119*/ NULL, 1476 NULL, 1477 NULL, 1478 NULL, 1479 NULL, 1480 /*124*/ NULL, 1481 MatGetColumnReductions_MPIDense, 1482 NULL, 1483 NULL, 1484 NULL, 1485 /*129*/ NULL, 1486 NULL, 1487 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1488 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1489 NULL, 1490 /*134*/ NULL, 1491 NULL, 1492 NULL, 1493 NULL, 1494 NULL, 1495 /*139*/ NULL, 1496 NULL, 1497 NULL, 1498 NULL, 1499 NULL, 1500 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1501 /*145*/ NULL, 1502 NULL, 1503 NULL, 1504 NULL, 1505 NULL, 1506 /*150*/ NULL 1507 }; 1508 1509 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1510 { 1511 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1512 PetscBool iscuda; 1513 1514 PetscFunctionBegin; 1515 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1516 PetscCall(PetscLayoutSetUp(mat->rmap)); 1517 PetscCall(PetscLayoutSetUp(mat->cmap)); 1518 if (!a->A) { 1519 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 1520 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 1521 PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N)); 1522 } 1523 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda)); 1524 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1525 PetscCall(MatSeqDenseSetPreallocation(a->A,data)); 1526 mat->preallocated = PETSC_TRUE; 1527 mat->assembled = PETSC_TRUE; 1528 PetscFunctionReturn(0); 1529 } 1530 1531 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1532 { 1533 Mat B,C; 1534 1535 PetscFunctionBegin; 1536 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C)); 1537 PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B)); 1538 PetscCall(MatDestroy(&C)); 1539 if (reuse == MAT_REUSE_MATRIX) { 1540 C = *newmat; 1541 } else C = NULL; 1542 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1543 PetscCall(MatDestroy(&B)); 1544 if (reuse == MAT_INPLACE_MATRIX) { 1545 PetscCall(MatHeaderReplace(A,&C)); 1546 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1547 PetscFunctionReturn(0); 1548 } 1549 1550 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1551 { 1552 Mat B,C; 1553 1554 PetscFunctionBegin; 1555 PetscCall(MatDenseGetLocalMatrix(A,&C)); 1556 PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1557 if (reuse == MAT_REUSE_MATRIX) { 1558 C = *newmat; 1559 } else C = NULL; 1560 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1561 PetscCall(MatDestroy(&B)); 1562 if (reuse == MAT_INPLACE_MATRIX) { 1563 PetscCall(MatHeaderReplace(A,&C)); 1564 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1565 PetscFunctionReturn(0); 1566 } 1567 1568 #if defined(PETSC_HAVE_ELEMENTAL) 1569 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1570 { 1571 Mat mat_elemental; 1572 PetscScalar *v; 1573 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1574 1575 PetscFunctionBegin; 1576 if (reuse == MAT_REUSE_MATRIX) { 1577 mat_elemental = *newmat; 1578 PetscCall(MatZeroEntries(*newmat)); 1579 } else { 1580 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1581 PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 1582 PetscCall(MatSetType(mat_elemental,MATELEMENTAL)); 1583 PetscCall(MatSetUp(mat_elemental)); 1584 PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE)); 1585 } 1586 1587 PetscCall(PetscMalloc2(m,&rows,N,&cols)); 1588 for (i=0; i<N; i++) cols[i] = i; 1589 for (i=0; i<m; i++) rows[i] = rstart + i; 1590 1591 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1592 PetscCall(MatDenseGetArray(A,&v)); 1593 PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES)); 1594 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1595 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1596 PetscCall(MatDenseRestoreArray(A,&v)); 1597 PetscCall(PetscFree2(rows,cols)); 1598 1599 if (reuse == MAT_INPLACE_MATRIX) { 1600 PetscCall(MatHeaderReplace(A,&mat_elemental)); 1601 } else { 1602 *newmat = mat_elemental; 1603 } 1604 PetscFunctionReturn(0); 1605 } 1606 #endif 1607 1608 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1609 { 1610 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1611 1612 PetscFunctionBegin; 1613 PetscCall(MatDenseGetColumn(mat->A,col,vals)); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1618 { 1619 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1620 1621 PetscFunctionBegin; 1622 PetscCall(MatDenseRestoreColumn(mat->A,vals)); 1623 PetscFunctionReturn(0); 1624 } 1625 1626 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1627 { 1628 Mat_MPIDense *mat; 1629 PetscInt m,nloc,N; 1630 1631 PetscFunctionBegin; 1632 PetscCall(MatGetSize(inmat,&m,&N)); 1633 PetscCall(MatGetLocalSize(inmat,NULL,&nloc)); 1634 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1635 PetscInt sum; 1636 1637 if (n == PETSC_DECIDE) { 1638 PetscCall(PetscSplitOwnership(comm,&n,&N)); 1639 } 1640 /* Check sum(n) = N */ 1641 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 1642 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 1643 1644 PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat)); 1645 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 1646 } 1647 1648 /* numeric phase */ 1649 mat = (Mat_MPIDense*)(*outmat)->data; 1650 PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN)); 1651 PetscFunctionReturn(0); 1652 } 1653 1654 #if defined(PETSC_HAVE_CUDA) 1655 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1656 { 1657 Mat B; 1658 Mat_MPIDense *m; 1659 1660 PetscFunctionBegin; 1661 if (reuse == MAT_INITIAL_MATRIX) { 1662 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1663 } else if (reuse == MAT_REUSE_MATRIX) { 1664 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1665 } 1666 1667 B = *newmat; 1668 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE)); 1669 PetscCall(PetscFree(B->defaultvectype)); 1670 PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype)); 1671 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE)); 1672 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL)); 1673 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 1674 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 1675 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 1676 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 1677 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL)); 1678 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL)); 1679 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL)); 1680 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL)); 1681 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL)); 1682 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL)); 1683 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL)); 1684 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL)); 1685 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL)); 1686 m = (Mat_MPIDense*)(B)->data; 1687 if (m->A) { 1688 PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A)); 1689 } 1690 B->ops->bindtocpu = NULL; 1691 B->offloadmask = PETSC_OFFLOAD_CPU; 1692 PetscFunctionReturn(0); 1693 } 1694 1695 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1696 { 1697 Mat B; 1698 Mat_MPIDense *m; 1699 1700 PetscFunctionBegin; 1701 if (reuse == MAT_INITIAL_MATRIX) { 1702 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1703 } else if (reuse == MAT_REUSE_MATRIX) { 1704 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1705 } 1706 1707 B = *newmat; 1708 PetscCall(PetscFree(B->defaultvectype)); 1709 PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype)); 1710 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA)); 1711 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1712 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1713 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1714 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1715 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1716 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1717 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1718 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1719 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1720 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1721 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1722 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1723 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1724 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1725 m = (Mat_MPIDense*)(B->data); 1726 if (m->A) { 1727 PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A)); 1728 B->offloadmask = PETSC_OFFLOAD_BOTH; 1729 } else { 1730 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1731 } 1732 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE)); 1733 1734 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1735 PetscFunctionReturn(0); 1736 } 1737 #endif 1738 1739 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1740 { 1741 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1742 PetscInt lda; 1743 1744 PetscFunctionBegin; 1745 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1746 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1747 if (!a->cvec) { 1748 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1749 } 1750 a->vecinuse = col + 1; 1751 PetscCall(MatDenseGetLDA(a->A,&lda)); 1752 PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1753 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1754 *v = a->cvec; 1755 PetscFunctionReturn(0); 1756 } 1757 1758 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1759 { 1760 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1761 1762 PetscFunctionBegin; 1763 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1764 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1765 a->vecinuse = 0; 1766 PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1767 PetscCall(VecResetArray(a->cvec)); 1768 *v = NULL; 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1773 { 1774 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1775 PetscInt lda; 1776 1777 PetscFunctionBegin; 1778 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1779 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1780 if (!a->cvec) { 1781 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1782 } 1783 a->vecinuse = col + 1; 1784 PetscCall(MatDenseGetLDA(a->A,&lda)); 1785 PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse)); 1786 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1787 PetscCall(VecLockReadPush(a->cvec)); 1788 *v = a->cvec; 1789 PetscFunctionReturn(0); 1790 } 1791 1792 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1793 { 1794 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1795 1796 PetscFunctionBegin; 1797 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1798 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1799 a->vecinuse = 0; 1800 PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse)); 1801 PetscCall(VecLockReadPop(a->cvec)); 1802 PetscCall(VecResetArray(a->cvec)); 1803 *v = NULL; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1808 { 1809 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1810 PetscInt lda; 1811 1812 PetscFunctionBegin; 1813 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1814 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1815 if (!a->cvec) { 1816 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1817 } 1818 a->vecinuse = col + 1; 1819 PetscCall(MatDenseGetLDA(a->A,&lda)); 1820 PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1821 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1822 *v = a->cvec; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1827 { 1828 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1829 1830 PetscFunctionBegin; 1831 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1832 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1833 a->vecinuse = 0; 1834 PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1835 PetscCall(VecResetArray(a->cvec)); 1836 *v = NULL; 1837 PetscFunctionReturn(0); 1838 } 1839 1840 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat *v) 1841 { 1842 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1843 Mat_MPIDense *c; 1844 MPI_Comm comm; 1845 PetscInt pbegin, pend; 1846 1847 PetscFunctionBegin; 1848 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1849 PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1850 PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1851 pbegin = PetscMax(0,PetscMin(A->rmap->rend,rbegin)-A->rmap->rstart); 1852 pend = PetscMin(A->rmap->n,PetscMax(0,rend-A->rmap->rstart)); 1853 if (!a->cmat) { 1854 PetscCall(MatCreate(comm,&a->cmat)); 1855 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat)); 1856 PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name)); 1857 if (rend-rbegin==A->rmap->N) PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap)); 1858 else { 1859 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin)); 1860 PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin)); 1861 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1862 } 1863 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1864 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1865 } else { 1866 PetscBool same = (PetscBool)(rend-rbegin == a->cmat->rmap->N); 1867 if (same && a->cmat->rmap->N != A->rmap->N) { 1868 same = (PetscBool)(pend-pbegin == a->cmat->rmap->n); 1869 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&same,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1870 } 1871 if (!same) { 1872 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1873 PetscCall(PetscLayoutCreate(comm,&a->cmat->rmap)); 1874 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin)); 1875 PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin)); 1876 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1877 } 1878 if (cend-cbegin != a->cmat->cmap->N) { 1879 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1880 PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap)); 1881 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1882 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1883 } 1884 } 1885 c = (Mat_MPIDense*)a->cmat->data; 1886 PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1887 PetscCall(MatDenseGetSubMatrix(a->A,pbegin,pend,cbegin,cend,&c->A)); 1888 a->cmat->preallocated = PETSC_TRUE; 1889 a->cmat->assembled = PETSC_TRUE; 1890 a->matinuse = cbegin + 1; 1891 *v = a->cmat; 1892 PetscFunctionReturn(0); 1893 } 1894 1895 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1896 { 1897 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1898 Mat_MPIDense *c; 1899 1900 PetscFunctionBegin; 1901 PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1902 PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1903 PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1904 a->matinuse = 0; 1905 c = (Mat_MPIDense*)a->cmat->data; 1906 PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A)); 1907 *v = NULL; 1908 PetscFunctionReturn(0); 1909 } 1910 1911 /*MC 1912 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1913 1914 Options Database Keys: 1915 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1916 1917 Level: beginner 1918 1919 .seealso: `MatCreateDense()` 1920 1921 M*/ 1922 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1923 { 1924 Mat_MPIDense *a; 1925 1926 PetscFunctionBegin; 1927 PetscCall(PetscNewLog(mat,&a)); 1928 mat->data = (void*)a; 1929 PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps))); 1930 1931 mat->insertmode = NOT_SET_VALUES; 1932 1933 /* build cache for off array entries formed */ 1934 a->donotstash = PETSC_FALSE; 1935 1936 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash)); 1937 1938 /* stuff used for matrix vector multiply */ 1939 a->lvec = NULL; 1940 a->Mvctx = NULL; 1941 a->roworiented = PETSC_TRUE; 1942 1943 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense)); 1944 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense)); 1945 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense)); 1946 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense)); 1947 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense)); 1948 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense)); 1949 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense)); 1950 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense)); 1951 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense)); 1952 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense)); 1953 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense)); 1954 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1955 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1956 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1957 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1958 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1959 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1960 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense)); 1961 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense)); 1962 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 1963 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ)); 1964 #if defined(PETSC_HAVE_ELEMENTAL) 1965 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental)); 1966 #endif 1967 #if defined(PETSC_HAVE_SCALAPACK) 1968 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK)); 1969 #endif 1970 #if defined(PETSC_HAVE_CUDA) 1971 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA)); 1972 #endif 1973 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense)); 1974 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1975 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1976 #if defined(PETSC_HAVE_CUDA) 1977 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1978 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1979 #endif 1980 1981 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense)); 1982 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense)); 1983 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE)); 1984 PetscFunctionReturn(0); 1985 } 1986 1987 /*MC 1988 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1989 1990 Options Database Keys: 1991 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1992 1993 Level: beginner 1994 1995 .seealso: 1996 1997 M*/ 1998 #if defined(PETSC_HAVE_CUDA) 1999 #include <petsc/private/deviceimpl.h> 2000 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 2001 { 2002 PetscFunctionBegin; 2003 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 2004 PetscCall(MatCreate_MPIDense(B)); 2005 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B)); 2006 PetscFunctionReturn(0); 2007 } 2008 #endif 2009 2010 /*MC 2011 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2012 2013 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 2014 and MATMPIDENSE otherwise. 2015 2016 Options Database Keys: 2017 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 2018 2019 Level: beginner 2020 2021 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 2022 M*/ 2023 2024 /*MC 2025 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2026 2027 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 2028 and MATMPIDENSECUDA otherwise. 2029 2030 Options Database Keys: 2031 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 2032 2033 Level: beginner 2034 2035 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 2036 M*/ 2037 2038 /*@C 2039 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2040 2041 Collective 2042 2043 Input Parameters: 2044 . B - the matrix 2045 - data - optional location of matrix data. Set data=NULL for PETSc 2046 to control all matrix memory allocation. 2047 2048 Notes: 2049 The dense format is fully compatible with standard Fortran 77 2050 storage by columns. 2051 2052 The data input variable is intended primarily for Fortran programmers 2053 who wish to allocate their own matrix memory space. Most users should 2054 set data=NULL. 2055 2056 Level: intermediate 2057 2058 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2059 @*/ 2060 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2061 { 2062 PetscFunctionBegin; 2063 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2064 PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /*@ 2069 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2070 array provided by the user. This is useful to avoid copying an array 2071 into a matrix 2072 2073 Not Collective 2074 2075 Input Parameters: 2076 + mat - the matrix 2077 - array - the array in column major order 2078 2079 Notes: 2080 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2081 freed when the matrix is destroyed. 2082 2083 Level: developer 2084 2085 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2086 2087 @*/ 2088 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2089 { 2090 PetscFunctionBegin; 2091 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2092 PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2093 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2094 #if defined(PETSC_HAVE_CUDA) 2095 mat->offloadmask = PETSC_OFFLOAD_CPU; 2096 #endif 2097 PetscFunctionReturn(0); 2098 } 2099 2100 /*@ 2101 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2102 2103 Not Collective 2104 2105 Input Parameters: 2106 . mat - the matrix 2107 2108 Notes: 2109 You can only call this after a call to MatDensePlaceArray() 2110 2111 Level: developer 2112 2113 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2114 2115 @*/ 2116 PetscErrorCode MatDenseResetArray(Mat mat) 2117 { 2118 PetscFunctionBegin; 2119 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2120 PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat)); 2121 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2122 PetscFunctionReturn(0); 2123 } 2124 2125 /*@ 2126 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2127 array provided by the user. This is useful to avoid copying an array 2128 into a matrix 2129 2130 Not Collective 2131 2132 Input Parameters: 2133 + mat - the matrix 2134 - array - the array in column major order 2135 2136 Notes: 2137 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2138 freed by the user. It will be freed when the matrix is destroyed. 2139 2140 Level: developer 2141 2142 .seealso: `MatDenseGetArray()`, `VecReplaceArray()` 2143 @*/ 2144 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2145 { 2146 PetscFunctionBegin; 2147 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2148 PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2149 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2150 #if defined(PETSC_HAVE_CUDA) 2151 mat->offloadmask = PETSC_OFFLOAD_CPU; 2152 #endif 2153 PetscFunctionReturn(0); 2154 } 2155 2156 #if defined(PETSC_HAVE_CUDA) 2157 /*@C 2158 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2159 array provided by the user. This is useful to avoid copying an array 2160 into a matrix 2161 2162 Not Collective 2163 2164 Input Parameters: 2165 + mat - the matrix 2166 - array - the array in column major order 2167 2168 Notes: 2169 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2170 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2171 2172 Level: developer 2173 2174 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()` 2175 @*/ 2176 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2177 { 2178 PetscFunctionBegin; 2179 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2180 PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2181 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2182 mat->offloadmask = PETSC_OFFLOAD_GPU; 2183 PetscFunctionReturn(0); 2184 } 2185 2186 /*@C 2187 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2188 2189 Not Collective 2190 2191 Input Parameters: 2192 . mat - the matrix 2193 2194 Notes: 2195 You can only call this after a call to MatDenseCUDAPlaceArray() 2196 2197 Level: developer 2198 2199 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2200 2201 @*/ 2202 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2203 { 2204 PetscFunctionBegin; 2205 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2206 PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat)); 2207 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 /*@C 2212 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2213 array provided by the user. This is useful to avoid copying an array 2214 into a matrix 2215 2216 Not Collective 2217 2218 Input Parameters: 2219 + mat - the matrix 2220 - array - the array in column major order 2221 2222 Notes: 2223 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2224 The memory passed in CANNOT be freed by the user. It will be freed 2225 when the matrix is destroyed. The array should respect the matrix leading dimension. 2226 2227 Level: developer 2228 2229 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2230 @*/ 2231 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2232 { 2233 PetscFunctionBegin; 2234 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2235 PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2236 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2237 mat->offloadmask = PETSC_OFFLOAD_GPU; 2238 PetscFunctionReturn(0); 2239 } 2240 2241 /*@C 2242 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2243 2244 Not Collective 2245 2246 Input Parameters: 2247 . A - the matrix 2248 2249 Output Parameters 2250 . array - the GPU array in column major order 2251 2252 Notes: 2253 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2254 2255 Level: developer 2256 2257 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2258 @*/ 2259 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2260 { 2261 PetscFunctionBegin; 2262 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2263 PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2264 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2265 PetscFunctionReturn(0); 2266 } 2267 2268 /*@C 2269 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2270 2271 Not Collective 2272 2273 Input Parameters: 2274 + A - the matrix 2275 - array - the GPU array in column major order 2276 2277 Notes: 2278 2279 Level: developer 2280 2281 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2282 @*/ 2283 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2284 { 2285 PetscFunctionBegin; 2286 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2287 PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2288 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2289 A->offloadmask = PETSC_OFFLOAD_GPU; 2290 PetscFunctionReturn(0); 2291 } 2292 2293 /*@C 2294 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2295 2296 Not Collective 2297 2298 Input Parameters: 2299 . A - the matrix 2300 2301 Output Parameters 2302 . array - the GPU array in column major order 2303 2304 Notes: 2305 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2306 2307 Level: developer 2308 2309 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2310 @*/ 2311 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2312 { 2313 PetscFunctionBegin; 2314 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2315 PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2316 PetscFunctionReturn(0); 2317 } 2318 2319 /*@C 2320 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2321 2322 Not Collective 2323 2324 Input Parameters: 2325 + A - the matrix 2326 - array - the GPU array in column major order 2327 2328 Notes: 2329 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2330 2331 Level: developer 2332 2333 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2334 @*/ 2335 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2336 { 2337 PetscFunctionBegin; 2338 PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2339 PetscFunctionReturn(0); 2340 } 2341 2342 /*@C 2343 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2344 2345 Not Collective 2346 2347 Input Parameters: 2348 . A - the matrix 2349 2350 Output Parameters 2351 . array - the GPU array in column major order 2352 2353 Notes: 2354 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2355 2356 Level: developer 2357 2358 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2359 @*/ 2360 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2361 { 2362 PetscFunctionBegin; 2363 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2364 PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a)); 2365 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 /*@C 2370 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2371 2372 Not Collective 2373 2374 Input Parameters: 2375 + A - the matrix 2376 - array - the GPU array in column major order 2377 2378 Notes: 2379 2380 Level: developer 2381 2382 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2383 @*/ 2384 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2385 { 2386 PetscFunctionBegin; 2387 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2388 PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a)); 2389 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2390 A->offloadmask = PETSC_OFFLOAD_GPU; 2391 PetscFunctionReturn(0); 2392 } 2393 #endif 2394 2395 /*@C 2396 MatCreateDense - Creates a matrix in dense format. 2397 2398 Collective 2399 2400 Input Parameters: 2401 + comm - MPI communicator 2402 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2403 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2404 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2405 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2406 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2407 to control all matrix memory allocation. 2408 2409 Output Parameter: 2410 . A - the matrix 2411 2412 Notes: 2413 The dense format is fully compatible with standard Fortran 77 2414 storage by columns. 2415 Note that, although local portions of the matrix are stored in column-major 2416 order, the matrix is partitioned across MPI ranks by row. 2417 2418 The data input variable is intended primarily for Fortran programmers 2419 who wish to allocate their own matrix memory space. Most users should 2420 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2421 2422 The user MUST specify either the local or global matrix dimensions 2423 (possibly both). 2424 2425 Level: intermediate 2426 2427 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2428 @*/ 2429 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2430 { 2431 PetscFunctionBegin; 2432 PetscCall(MatCreate(comm,A)); 2433 PetscCall(MatSetSizes(*A,m,n,M,N)); 2434 PetscCall(MatSetType(*A,MATDENSE)); 2435 PetscCall(MatSeqDenseSetPreallocation(*A,data)); 2436 PetscCall(MatMPIDenseSetPreallocation(*A,data)); 2437 PetscFunctionReturn(0); 2438 } 2439 2440 #if defined(PETSC_HAVE_CUDA) 2441 /*@C 2442 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2443 2444 Collective 2445 2446 Input Parameters: 2447 + comm - MPI communicator 2448 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2449 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2450 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2451 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2452 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2453 to control matrix memory allocation. 2454 2455 Output Parameter: 2456 . A - the matrix 2457 2458 Notes: 2459 2460 Level: intermediate 2461 2462 .seealso: `MatCreate()`, `MatCreateDense()` 2463 @*/ 2464 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2465 { 2466 PetscFunctionBegin; 2467 PetscCall(MatCreate(comm,A)); 2468 PetscValidLogicalCollectiveBool(*A,!!data,6); 2469 PetscCall(MatSetSizes(*A,m,n,M,N)); 2470 PetscCall(MatSetType(*A,MATDENSECUDA)); 2471 PetscCall(MatSeqDenseCUDASetPreallocation(*A,data)); 2472 PetscCall(MatMPIDenseCUDASetPreallocation(*A,data)); 2473 PetscFunctionReturn(0); 2474 } 2475 #endif 2476 2477 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2478 { 2479 Mat mat; 2480 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2481 2482 PetscFunctionBegin; 2483 *newmat = NULL; 2484 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat)); 2485 PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2486 PetscCall(MatSetType(mat,((PetscObject)A)->type_name)); 2487 a = (Mat_MPIDense*)mat->data; 2488 2489 mat->factortype = A->factortype; 2490 mat->assembled = PETSC_TRUE; 2491 mat->preallocated = PETSC_TRUE; 2492 2493 mat->insertmode = NOT_SET_VALUES; 2494 a->donotstash = oldmat->donotstash; 2495 2496 PetscCall(PetscLayoutReference(A->rmap,&mat->rmap)); 2497 PetscCall(PetscLayoutReference(A->cmap,&mat->cmap)); 2498 2499 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2500 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2501 2502 *newmat = mat; 2503 PetscFunctionReturn(0); 2504 } 2505 2506 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2507 { 2508 PetscBool isbinary; 2509 #if defined(PETSC_HAVE_HDF5) 2510 PetscBool ishdf5; 2511 #endif 2512 2513 PetscFunctionBegin; 2514 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2515 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2516 /* force binary viewer to load .info file if it has not yet done so */ 2517 PetscCall(PetscViewerSetUp(viewer)); 2518 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2519 #if defined(PETSC_HAVE_HDF5) 2520 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2521 #endif 2522 if (isbinary) { 2523 PetscCall(MatLoad_Dense_Binary(newMat,viewer)); 2524 #if defined(PETSC_HAVE_HDF5) 2525 } else if (ishdf5) { 2526 PetscCall(MatLoad_Dense_HDF5(newMat,viewer)); 2527 #endif 2528 } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2529 PetscFunctionReturn(0); 2530 } 2531 2532 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2533 { 2534 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2535 Mat a,b; 2536 2537 PetscFunctionBegin; 2538 a = matA->A; 2539 b = matB->A; 2540 PetscCall(MatEqual(a,b,flag)); 2541 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2542 PetscFunctionReturn(0); 2543 } 2544 2545 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2546 { 2547 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2548 2549 PetscFunctionBegin; 2550 PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts)); 2551 PetscCall(MatDestroy(&atb->atb)); 2552 PetscCall(PetscFree(atb)); 2553 PetscFunctionReturn(0); 2554 } 2555 2556 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2557 { 2558 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2559 2560 PetscFunctionBegin; 2561 PetscCall(PetscFree2(abt->buf[0],abt->buf[1])); 2562 PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls)); 2563 PetscCall(PetscFree(abt)); 2564 PetscFunctionReturn(0); 2565 } 2566 2567 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2568 { 2569 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2570 Mat_TransMatMultDense *atb; 2571 MPI_Comm comm; 2572 PetscMPIInt size,*recvcounts; 2573 PetscScalar *carray,*sendbuf; 2574 const PetscScalar *atbarray; 2575 PetscInt i,cN=C->cmap->N,proc,k,j,lda; 2576 const PetscInt *ranges; 2577 2578 PetscFunctionBegin; 2579 MatCheckProduct(C,3); 2580 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2581 atb = (Mat_TransMatMultDense *)C->product->data; 2582 recvcounts = atb->recvcounts; 2583 sendbuf = atb->sendbuf; 2584 2585 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2586 PetscCallMPI(MPI_Comm_size(comm,&size)); 2587 2588 /* compute atbarray = aseq^T * bseq */ 2589 PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb)); 2590 2591 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2592 2593 /* arrange atbarray into sendbuf */ 2594 PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray)); 2595 PetscCall(MatDenseGetLDA(atb->atb,&lda)); 2596 for (proc=0, k=0; proc<size; proc++) { 2597 for (j=0; j<cN; j++) { 2598 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda]; 2599 } 2600 } 2601 PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray)); 2602 2603 /* sum all atbarray to local values of C */ 2604 PetscCall(MatDenseGetArrayWrite(c->A,&carray)); 2605 PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm)); 2606 PetscCall(MatDenseRestoreArrayWrite(c->A,&carray)); 2607 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2608 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2609 PetscFunctionReturn(0); 2610 } 2611 2612 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2613 { 2614 MPI_Comm comm; 2615 PetscMPIInt size; 2616 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2617 Mat_TransMatMultDense *atb; 2618 PetscBool cisdense; 2619 PetscInt i; 2620 const PetscInt *ranges; 2621 2622 PetscFunctionBegin; 2623 MatCheckProduct(C,3); 2624 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2625 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2626 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2627 SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2628 } 2629 2630 /* create matrix product C */ 2631 PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N)); 2632 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 2633 if (!cisdense) { 2634 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 2635 } 2636 PetscCall(MatSetUp(C)); 2637 2638 /* create data structure for reuse C */ 2639 PetscCallMPI(MPI_Comm_size(comm,&size)); 2640 PetscCall(PetscNew(&atb)); 2641 cM = C->rmap->N; 2642 PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts)); 2643 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2644 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2645 2646 C->product->data = atb; 2647 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2648 PetscFunctionReturn(0); 2649 } 2650 2651 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2652 { 2653 MPI_Comm comm; 2654 PetscMPIInt i, size; 2655 PetscInt maxRows, bufsiz; 2656 PetscMPIInt tag; 2657 PetscInt alg; 2658 Mat_MatTransMultDense *abt; 2659 Mat_Product *product = C->product; 2660 PetscBool flg; 2661 2662 PetscFunctionBegin; 2663 MatCheckProduct(C,4); 2664 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2665 /* check local size of A and B */ 2666 PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n); 2667 2668 PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg)); 2669 alg = flg ? 0 : 1; 2670 2671 /* setup matrix product C */ 2672 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N)); 2673 PetscCall(MatSetType(C,MATMPIDENSE)); 2674 PetscCall(MatSetUp(C)); 2675 PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag)); 2676 2677 /* create data structure for reuse C */ 2678 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2679 PetscCallMPI(MPI_Comm_size(comm,&size)); 2680 PetscCall(PetscNew(&abt)); 2681 abt->tag = tag; 2682 abt->alg = alg; 2683 switch (alg) { 2684 case 1: /* alg: "cyclic" */ 2685 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2686 bufsiz = A->cmap->N * maxRows; 2687 PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]))); 2688 break; 2689 default: /* alg: "allgatherv" */ 2690 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2691 PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls))); 2692 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2693 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2694 break; 2695 } 2696 2697 C->product->data = abt; 2698 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2699 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2700 PetscFunctionReturn(0); 2701 } 2702 2703 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2704 { 2705 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2706 Mat_MatTransMultDense *abt; 2707 MPI_Comm comm; 2708 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2709 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2710 PetscInt i,cK=A->cmap->N,k,j,bn; 2711 PetscScalar _DOne=1.0,_DZero=0.0; 2712 const PetscScalar *av,*bv; 2713 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2714 MPI_Request reqs[2]; 2715 const PetscInt *ranges; 2716 2717 PetscFunctionBegin; 2718 MatCheckProduct(C,3); 2719 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2720 abt = (Mat_MatTransMultDense*)C->product->data; 2721 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2722 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2723 PetscCallMPI(MPI_Comm_size(comm,&size)); 2724 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2725 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2726 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2727 PetscCall(MatDenseGetLDA(a->A,&i)); 2728 PetscCall(PetscBLASIntCast(i,&alda)); 2729 PetscCall(MatDenseGetLDA(b->A,&i)); 2730 PetscCall(PetscBLASIntCast(i,&blda)); 2731 PetscCall(MatDenseGetLDA(c->A,&i)); 2732 PetscCall(PetscBLASIntCast(i,&clda)); 2733 PetscCall(MatGetOwnershipRanges(B,&ranges)); 2734 bn = B->rmap->n; 2735 if (blda == bn) { 2736 sendbuf = (PetscScalar*)bv; 2737 } else { 2738 sendbuf = abt->buf[0]; 2739 for (k = 0, i = 0; i < cK; i++) { 2740 for (j = 0; j < bn; j++, k++) { 2741 sendbuf[k] = bv[i * blda + j]; 2742 } 2743 } 2744 } 2745 if (size > 1) { 2746 sendto = (rank + size - 1) % size; 2747 recvfrom = (rank + size + 1) % size; 2748 } else { 2749 sendto = recvfrom = 0; 2750 } 2751 PetscCall(PetscBLASIntCast(cK,&ck)); 2752 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2753 recvisfrom = rank; 2754 for (i = 0; i < size; i++) { 2755 /* we have finished receiving in sending, bufs can be read/modified */ 2756 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2757 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2758 2759 if (nextrecvisfrom != rank) { 2760 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2761 sendsiz = cK * bn; 2762 recvsiz = cK * nextbn; 2763 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2764 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2765 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2766 } 2767 2768 /* local aseq * sendbuf^T */ 2769 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2770 if (cm && cn && ck) PetscCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2771 2772 if (nextrecvisfrom != rank) { 2773 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2774 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2775 } 2776 bn = nextbn; 2777 recvisfrom = nextrecvisfrom; 2778 sendbuf = recvbuf; 2779 } 2780 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2781 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2782 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2783 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2784 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2785 PetscFunctionReturn(0); 2786 } 2787 2788 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2789 { 2790 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2791 Mat_MatTransMultDense *abt; 2792 MPI_Comm comm; 2793 PetscMPIInt size; 2794 PetscScalar *cv, *sendbuf, *recvbuf; 2795 const PetscScalar *av,*bv; 2796 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2797 PetscScalar _DOne=1.0,_DZero=0.0; 2798 PetscBLASInt cm, cn, ck, alda, clda; 2799 2800 PetscFunctionBegin; 2801 MatCheckProduct(C,3); 2802 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2803 abt = (Mat_MatTransMultDense*)C->product->data; 2804 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2805 PetscCallMPI(MPI_Comm_size(comm,&size)); 2806 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2807 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2808 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2809 PetscCall(MatDenseGetLDA(a->A,&i)); 2810 PetscCall(PetscBLASIntCast(i,&alda)); 2811 PetscCall(MatDenseGetLDA(b->A,&blda)); 2812 PetscCall(MatDenseGetLDA(c->A,&i)); 2813 PetscCall(PetscBLASIntCast(i,&clda)); 2814 /* copy transpose of B into buf[0] */ 2815 bn = B->rmap->n; 2816 sendbuf = abt->buf[0]; 2817 recvbuf = abt->buf[1]; 2818 for (k = 0, j = 0; j < bn; j++) { 2819 for (i = 0; i < cK; i++, k++) { 2820 sendbuf[k] = bv[i * blda + j]; 2821 } 2822 } 2823 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2824 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2825 PetscCall(PetscBLASIntCast(cK,&ck)); 2826 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2827 PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn)); 2828 if (cm && cn && ck) PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2829 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2830 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2831 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2832 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2833 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2834 PetscFunctionReturn(0); 2835 } 2836 2837 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2838 { 2839 Mat_MatTransMultDense *abt; 2840 2841 PetscFunctionBegin; 2842 MatCheckProduct(C,3); 2843 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2844 abt = (Mat_MatTransMultDense*)C->product->data; 2845 switch (abt->alg) { 2846 case 1: 2847 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2848 break; 2849 default: 2850 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2851 break; 2852 } 2853 PetscFunctionReturn(0); 2854 } 2855 2856 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2857 { 2858 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2859 2860 PetscFunctionBegin; 2861 PetscCall(MatDestroy(&ab->Ce)); 2862 PetscCall(MatDestroy(&ab->Ae)); 2863 PetscCall(MatDestroy(&ab->Be)); 2864 PetscCall(PetscFree(ab)); 2865 PetscFunctionReturn(0); 2866 } 2867 2868 #if defined(PETSC_HAVE_ELEMENTAL) 2869 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2870 { 2871 Mat_MatMultDense *ab; 2872 2873 PetscFunctionBegin; 2874 MatCheckProduct(C,3); 2875 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2876 ab = (Mat_MatMultDense*)C->product->data; 2877 PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae)); 2878 PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be)); 2879 PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce)); 2880 PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C)); 2881 PetscFunctionReturn(0); 2882 } 2883 2884 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2885 { 2886 Mat Ae,Be,Ce; 2887 Mat_MatMultDense *ab; 2888 2889 PetscFunctionBegin; 2890 MatCheckProduct(C,4); 2891 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2892 /* check local size of A and B */ 2893 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2894 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2895 } 2896 2897 /* create elemental matrices Ae and Be */ 2898 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2899 PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 2900 PetscCall(MatSetType(Ae,MATELEMENTAL)); 2901 PetscCall(MatSetUp(Ae)); 2902 PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE)); 2903 2904 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2905 PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N)); 2906 PetscCall(MatSetType(Be,MATELEMENTAL)); 2907 PetscCall(MatSetUp(Be)); 2908 PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE)); 2909 2910 /* compute symbolic Ce = Ae*Be */ 2911 PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce)); 2912 PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce)); 2913 2914 /* setup C */ 2915 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 2916 PetscCall(MatSetType(C,MATDENSE)); 2917 PetscCall(MatSetUp(C)); 2918 2919 /* create data structure for reuse Cdense */ 2920 PetscCall(PetscNew(&ab)); 2921 ab->Ae = Ae; 2922 ab->Be = Be; 2923 ab->Ce = Ce; 2924 2925 C->product->data = ab; 2926 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2927 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2928 PetscFunctionReturn(0); 2929 } 2930 #endif 2931 /* ----------------------------------------------- */ 2932 #if defined(PETSC_HAVE_ELEMENTAL) 2933 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2934 { 2935 PetscFunctionBegin; 2936 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2937 C->ops->productsymbolic = MatProductSymbolic_AB; 2938 PetscFunctionReturn(0); 2939 } 2940 #endif 2941 2942 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2943 { 2944 Mat_Product *product = C->product; 2945 Mat A = product->A,B=product->B; 2946 2947 PetscFunctionBegin; 2948 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2949 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2950 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2951 C->ops->productsymbolic = MatProductSymbolic_AtB; 2952 PetscFunctionReturn(0); 2953 } 2954 2955 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2956 { 2957 Mat_Product *product = C->product; 2958 const char *algTypes[2] = {"allgatherv","cyclic"}; 2959 PetscInt alg,nalg = 2; 2960 PetscBool flg = PETSC_FALSE; 2961 2962 PetscFunctionBegin; 2963 /* Set default algorithm */ 2964 alg = 0; /* default is allgatherv */ 2965 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2966 if (flg) { 2967 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2968 } 2969 2970 /* Get runtime option */ 2971 if (product->api_user) { 2972 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2973 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2974 PetscOptionsEnd(); 2975 } else { 2976 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2977 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2978 PetscOptionsEnd(); 2979 } 2980 if (flg) { 2981 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2982 } 2983 2984 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2985 C->ops->productsymbolic = MatProductSymbolic_ABt; 2986 PetscFunctionReturn(0); 2987 } 2988 2989 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2990 { 2991 Mat_Product *product = C->product; 2992 2993 PetscFunctionBegin; 2994 switch (product->type) { 2995 #if defined(PETSC_HAVE_ELEMENTAL) 2996 case MATPRODUCT_AB: 2997 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2998 break; 2999 #endif 3000 case MATPRODUCT_AtB: 3001 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 3002 break; 3003 case MATPRODUCT_ABt: 3004 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 3005 break; 3006 default: 3007 break; 3008 } 3009 PetscFunctionReturn(0); 3010 } 3011