1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 31 PetscValidPointer(B,2); 32 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg)); 33 if (flg) *B = mat->A; 34 else { 35 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg)); 36 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 37 *B = A; 38 } 39 PetscFunctionReturn(0); 40 } 41 42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 43 { 44 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 45 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 46 47 PetscFunctionBegin; 48 PetscCall(MatCopy(Amat->A,Bmat->A,s)); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscInt j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2; 56 PetscScalar *v; 57 58 PetscFunctionBegin; 59 PetscCall(MatDenseGetArray(mat->A,&v)); 60 PetscCall(MatDenseGetLDA(mat->A,&lda)); 61 rend2 = PetscMin(rend,A->cmap->N); 62 if (rend2>rstart) { 63 for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha; 64 PetscCall(PetscLogFlops(rend2-rstart)); 65 } 66 PetscCall(MatDenseRestoreArray(mat->A,&v)); 67 PetscFunctionReturn(0); 68 } 69 70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 71 { 72 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 73 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 74 75 PetscFunctionBegin; 76 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 77 lrow = row - rstart; 78 PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 79 PetscFunctionReturn(0); 80 } 81 82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 83 { 84 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 85 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 86 87 PetscFunctionBegin; 88 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 89 lrow = row - rstart; 90 PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 95 { 96 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 97 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 98 PetscScalar *array; 99 MPI_Comm comm; 100 PetscBool flg; 101 Mat B; 102 103 PetscFunctionBegin; 104 PetscCall(MatHasCongruentLayouts(A,&flg)); 105 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 106 PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); 107 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 108 109 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg)); 110 PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 111 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm)); 112 PetscCall(MatCreate(comm,&B)); 113 PetscCall(MatSetSizes(B,m,m,m,m)); 114 PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name)); 115 PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array)); 116 PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart)); 117 PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array)); 118 PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B)); 119 *a = B; 120 PetscCall(MatDestroy(&B)); 121 } else *a = B; 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 126 { 127 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 PetscBool roworiented = A->roworiented; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; 134 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 if (roworiented) { 138 PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv)); 139 } else { 140 for (j=0; j<n; j++) { 141 if (idxn[j] < 0) continue; 142 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 143 PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv)); 144 } 145 } 146 } else if (!A->donotstash) { 147 mat->assembled = PETSC_FALSE; 148 if (roworiented) { 149 PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE)); 150 } else { 151 PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE)); 152 } 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 161 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 162 163 PetscFunctionBegin; 164 for (i=0; i<m; i++) { 165 if (idxm[i] < 0) continue; /* negative row */ 166 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 167 if (idxm[i] >= rstart && idxm[i] < rend) { 168 row = idxm[i] - rstart; 169 for (j=0; j<n; j++) { 170 if (idxn[j] < 0) continue; /* negative column */ 171 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 172 PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j)); 173 } 174 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 175 } 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 180 { 181 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 182 183 PetscFunctionBegin; 184 PetscCall(MatDenseGetLDA(a->A,lda)); 185 PetscFunctionReturn(0); 186 } 187 188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 189 { 190 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 191 PetscBool iscuda; 192 193 PetscFunctionBegin; 194 if (!a->A) { 195 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 196 PetscCall(PetscLayoutSetUp(A->rmap)); 197 PetscCall(PetscLayoutSetUp(A->cmap)); 198 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 199 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A)); 200 PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 201 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 202 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 203 } 204 PetscCall(MatDenseSetLDA(a->A,lda)); 205 PetscFunctionReturn(0); 206 } 207 208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 209 { 210 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211 212 PetscFunctionBegin; 213 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 214 PetscCall(MatDenseGetArray(a->A,array)); 215 PetscFunctionReturn(0); 216 } 217 218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 219 { 220 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 221 222 PetscFunctionBegin; 223 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 224 PetscCall(MatDenseGetArrayRead(a->A,array)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 229 { 230 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 231 232 PetscFunctionBegin; 233 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 234 PetscCall(MatDenseGetArrayWrite(a->A,array)); 235 PetscFunctionReturn(0); 236 } 237 238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 239 { 240 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 241 242 PetscFunctionBegin; 243 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 244 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 245 PetscCall(MatDensePlaceArray(a->A,array)); 246 PetscFunctionReturn(0); 247 } 248 249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 250 { 251 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 252 253 PetscFunctionBegin; 254 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 255 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 256 PetscCall(MatDenseResetArray(a->A)); 257 PetscFunctionReturn(0); 258 } 259 260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 261 { 262 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 263 264 PetscFunctionBegin; 265 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 266 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 267 PetscCall(MatDenseReplaceArray(a->A,array)); 268 PetscFunctionReturn(0); 269 } 270 271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 272 { 273 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat)); 284 PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is)); 285 PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 PetscCall(ISAllGather(iscol,&iscol_local)); 288 PetscCall(ISGetIndices(isrow,&irow)); 289 PetscCall(ISGetIndices(iscol_local,&icol)); 290 PetscCall(ISGetLocalSize(isrow,&nrows)); 291 PetscCall(ISGetLocalSize(iscol,&ncols)); 292 PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 PetscCall(MatGetLocalSize(A,&nlrows,&nlcols)); 299 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat)); 309 PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols)); 310 PetscCall(MatSetType(newmat,((PetscObject)A)->type_name)); 311 PetscCall(MatMPIDenseSetPreallocation(newmat,NULL)); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 PetscCall(MatDenseGetArray(newmatd->A,&bv)); 317 PetscCall(MatDenseGetArrayRead(mat->A,&v)); 318 PetscCall(MatDenseGetLDA(mat->A,&lda)); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 PetscCall(MatDenseRestoreArrayRead(mat->A,&v)); 326 PetscCall(MatDenseRestoreArray(newmatd->A,&bv)); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY)); 330 PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY)); 331 332 /* Free work space */ 333 PetscCall(ISRestoreIndices(isrow,&irow)); 334 PetscCall(ISRestoreIndices(iscol_local,&icol)); 335 PetscCall(ISDestroy(&iscol_local)); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 344 PetscFunctionBegin; 345 PetscCall(MatDenseRestoreArray(a->A,array)); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 352 353 PetscFunctionBegin; 354 PetscCall(MatDenseRestoreArrayRead(a->A,array)); 355 PetscFunctionReturn(0); 356 } 357 358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 359 { 360 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 361 362 PetscFunctionBegin; 363 PetscCall(MatDenseRestoreArrayWrite(a->A,array)); 364 PetscFunctionReturn(0); 365 } 366 367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 368 { 369 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 370 PetscInt nstash,reallocs; 371 372 PetscFunctionBegin; 373 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 374 375 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 376 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 377 PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 378 PetscFunctionReturn(0); 379 } 380 381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 382 { 383 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 384 PetscInt i,*row,*col,flg,j,rstart,ncols; 385 PetscMPIInt n; 386 PetscScalar *val; 387 388 PetscFunctionBegin; 389 if (!mdn->donotstash && !mat->nooffprocentries) { 390 /* wait on receives */ 391 while (1) { 392 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 393 if (!flg) break; 394 395 for (i=0; i<n;) { 396 /* Now identify the consecutive vals belonging to the same row */ 397 for (j=i,rstart=row[j]; j<n; j++) { 398 if (row[j] != rstart) break; 399 } 400 if (j < n) ncols = j-i; 401 else ncols = n-i; 402 /* Now assemble all these values with a single function call */ 403 PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 404 i = j; 405 } 406 } 407 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 408 } 409 410 PetscCall(MatAssemblyBegin(mdn->A,mode)); 411 PetscCall(MatAssemblyEnd(mdn->A,mode)); 412 PetscFunctionReturn(0); 413 } 414 415 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 416 { 417 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 418 419 PetscFunctionBegin; 420 PetscCall(MatZeroEntries(l->A)); 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 425 { 426 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 427 PetscInt i,len,*lrows; 428 429 PetscFunctionBegin; 430 /* get locally owned rows */ 431 PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL)); 432 /* fix right hand side if needed */ 433 if (x && b) { 434 const PetscScalar *xx; 435 PetscScalar *bb; 436 437 PetscCall(VecGetArrayRead(x, &xx)); 438 PetscCall(VecGetArrayWrite(b, &bb)); 439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 440 PetscCall(VecRestoreArrayRead(x, &xx)); 441 PetscCall(VecRestoreArrayWrite(b, &bb)); 442 } 443 PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL)); 444 if (diag != 0.0) { 445 Vec d; 446 447 PetscCall(MatCreateVecs(A,NULL,&d)); 448 PetscCall(VecSet(d,diag)); 449 PetscCall(MatDiagonalSet(A,d,INSERT_VALUES)); 450 PetscCall(VecDestroy(&d)); 451 } 452 PetscCall(PetscFree(lrows)); 453 PetscFunctionReturn(0); 454 } 455 456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 460 461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 const PetscScalar *ax; 465 PetscScalar *ay; 466 PetscMemType axmtype,aymtype; 467 468 PetscFunctionBegin; 469 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 470 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 471 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 472 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 473 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 474 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 475 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 476 PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy)); 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 const PetscScalar *ax; 484 PetscScalar *ay; 485 PetscMemType axmtype,aymtype; 486 487 PetscFunctionBegin; 488 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 489 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 490 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 491 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 492 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 493 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 494 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 495 PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz)); 496 PetscFunctionReturn(0); 497 } 498 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 const PetscScalar *ax; 503 PetscScalar *ay; 504 PetscMemType axmtype,aymtype; 505 506 PetscFunctionBegin; 507 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 508 PetscCall(VecSet(yy,0.0)); 509 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 510 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 511 PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype)); 512 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 513 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 514 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 515 PetscCall(VecRestoreArrayAndMemType(yy,&ay)); 516 PetscFunctionReturn(0); 517 } 518 519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 520 { 521 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 522 const PetscScalar *ax; 523 PetscScalar *ay; 524 PetscMemType axmtype,aymtype; 525 526 PetscFunctionBegin; 527 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 528 PetscCall(VecCopy(yy,zz)); 529 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 530 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 531 PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype)); 532 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 533 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 534 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 535 PetscCall(VecRestoreArrayAndMemType(zz,&ay)); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 540 { 541 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542 PetscInt lda,len,i,n,m = A->rmap->n,radd; 543 PetscScalar *x,zero = 0.0; 544 const PetscScalar *av; 545 546 PetscFunctionBegin; 547 PetscCall(VecSet(v,zero)); 548 PetscCall(VecGetArray(v,&x)); 549 PetscCall(VecGetSize(v,&n)); 550 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 551 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 552 radd = A->rmap->rstart*m; 553 PetscCall(MatDenseGetArrayRead(a->A,&av)); 554 PetscCall(MatDenseGetLDA(a->A,&lda)); 555 for (i=0; i<len; i++) { 556 x[i] = av[radd + i*lda + i]; 557 } 558 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 559 PetscCall(VecRestoreArray(v,&x)); 560 PetscFunctionReturn(0); 561 } 562 563 PetscErrorCode MatDestroy_MPIDense(Mat mat) 564 { 565 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 566 567 PetscFunctionBegin; 568 #if defined(PETSC_USE_LOG) 569 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 570 #endif 571 PetscCall(MatStashDestroy_Private(&mat->stash)); 572 PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 573 PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 574 PetscCall(MatDestroy(&mdn->A)); 575 PetscCall(VecDestroy(&mdn->lvec)); 576 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 577 PetscCall(VecDestroy(&mdn->cvec)); 578 PetscCall(MatDestroy(&mdn->cmat)); 579 580 PetscCall(PetscFree(mat->data)); 581 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 582 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL)); 596 #if defined(PETSC_HAVE_ELEMENTAL) 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL)); 598 #endif 599 #if defined(PETSC_HAVE_SCALAPACK) 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL)); 601 #endif 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL)); 605 #if defined (PETSC_HAVE_CUDA) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL)); 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL)); 608 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",NULL)); 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidensecuda_mpidense_C",NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArray_C",NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayRead_C",NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAGetArrayWrite_C",NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArray_C",NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayRead_C",NULL)); 619 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDARestoreArrayWrite_C",NULL)); 620 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAPlaceArray_C",NULL)); 621 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAResetArray_C",NULL)); 622 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseCUDAReplaceArray_C",NULL)); 623 #endif 624 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL)); 625 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL)); 626 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL)); 627 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL)); 628 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL)); 629 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL)); 630 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL)); 631 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL)); 632 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL)); 633 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL)); 634 635 PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL)); 636 PetscFunctionReturn(0); 637 } 638 639 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 640 641 #include <petscdraw.h> 642 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 643 { 644 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 645 PetscMPIInt rank; 646 PetscViewerType vtype; 647 PetscBool iascii,isdraw; 648 PetscViewer sviewer; 649 PetscViewerFormat format; 650 651 PetscFunctionBegin; 652 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 653 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 654 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 655 if (iascii) { 656 PetscCall(PetscViewerGetType(viewer,&vtype)); 657 PetscCall(PetscViewerGetFormat(viewer,&format)); 658 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 659 MatInfo info; 660 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 661 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 662 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory)); 663 PetscCall(PetscViewerFlush(viewer)); 664 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 665 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer)); 666 PetscFunctionReturn(0); 667 } else if (format == PETSC_VIEWER_ASCII_INFO) { 668 PetscFunctionReturn(0); 669 } 670 } else if (isdraw) { 671 PetscDraw draw; 672 PetscBool isnull; 673 674 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 675 PetscCall(PetscDrawIsNull(draw,&isnull)); 676 if (isnull) PetscFunctionReturn(0); 677 } 678 679 { 680 /* assemble the entire matrix onto first processor. */ 681 Mat A; 682 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 683 PetscInt *cols; 684 PetscScalar *vals; 685 686 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 687 if (rank == 0) { 688 PetscCall(MatSetSizes(A,M,N,M,N)); 689 } else { 690 PetscCall(MatSetSizes(A,0,0,M,N)); 691 } 692 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 693 PetscCall(MatSetType(A,MATMPIDENSE)); 694 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 695 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 696 697 /* Copy the matrix ... This isn't the most efficient means, 698 but it's quick for now */ 699 A->insertmode = INSERT_VALUES; 700 701 row = mat->rmap->rstart; 702 m = mdn->A->rmap->n; 703 for (i=0; i<m; i++) { 704 PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals)); 705 PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES)); 706 PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals)); 707 row++; 708 } 709 710 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 711 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 712 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 713 if (rank == 0) { 714 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name)); 715 PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer)); 716 } 717 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 718 PetscCall(PetscViewerFlush(viewer)); 719 PetscCall(MatDestroy(&A)); 720 } 721 PetscFunctionReturn(0); 722 } 723 724 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 725 { 726 PetscBool iascii,isbinary,isdraw,issocket; 727 728 PetscFunctionBegin; 729 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 730 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 731 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 732 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 733 734 if (iascii || issocket || isdraw) { 735 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer)); 736 } else if (isbinary) PetscCall(MatView_Dense_Binary(mat,viewer)); 737 PetscFunctionReturn(0); 738 } 739 740 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 741 { 742 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 743 Mat mdn = mat->A; 744 PetscLogDouble isend[5],irecv[5]; 745 746 PetscFunctionBegin; 747 info->block_size = 1.0; 748 749 PetscCall(MatGetInfo(mdn,MAT_LOCAL,info)); 750 751 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 752 isend[3] = info->memory; isend[4] = info->mallocs; 753 if (flag == MAT_LOCAL) { 754 info->nz_used = isend[0]; 755 info->nz_allocated = isend[1]; 756 info->nz_unneeded = isend[2]; 757 info->memory = isend[3]; 758 info->mallocs = isend[4]; 759 } else if (flag == MAT_GLOBAL_MAX) { 760 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A))); 761 762 info->nz_used = irecv[0]; 763 info->nz_allocated = irecv[1]; 764 info->nz_unneeded = irecv[2]; 765 info->memory = irecv[3]; 766 info->mallocs = irecv[4]; 767 } else if (flag == MAT_GLOBAL_SUM) { 768 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A))); 769 770 info->nz_used = irecv[0]; 771 info->nz_allocated = irecv[1]; 772 info->nz_unneeded = irecv[2]; 773 info->memory = irecv[3]; 774 info->mallocs = irecv[4]; 775 } 776 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 777 info->fill_ratio_needed = 0; 778 info->factor_mallocs = 0; 779 PetscFunctionReturn(0); 780 } 781 782 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 783 { 784 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 785 786 PetscFunctionBegin; 787 switch (op) { 788 case MAT_NEW_NONZERO_LOCATIONS: 789 case MAT_NEW_NONZERO_LOCATION_ERR: 790 case MAT_NEW_NONZERO_ALLOCATION_ERR: 791 MatCheckPreallocated(A,1); 792 PetscCall(MatSetOption(a->A,op,flg)); 793 break; 794 case MAT_ROW_ORIENTED: 795 MatCheckPreallocated(A,1); 796 a->roworiented = flg; 797 PetscCall(MatSetOption(a->A,op,flg)); 798 break; 799 case MAT_FORCE_DIAGONAL_ENTRIES: 800 case MAT_KEEP_NONZERO_PATTERN: 801 case MAT_USE_HASH_TABLE: 802 case MAT_SORTED_FULL: 803 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 804 break; 805 case MAT_IGNORE_OFF_PROC_ENTRIES: 806 a->donotstash = flg; 807 break; 808 case MAT_SYMMETRIC: 809 case MAT_STRUCTURALLY_SYMMETRIC: 810 case MAT_HERMITIAN: 811 case MAT_SYMMETRY_ETERNAL: 812 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 813 case MAT_SPD: 814 case MAT_IGNORE_LOWER_TRIANGULAR: 815 case MAT_IGNORE_ZERO_ENTRIES: 816 case MAT_SPD_ETERNAL: 817 /* if the diagonal matrix is square it inherits some of the properties above */ 818 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 819 break; 820 default: 821 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 822 } 823 PetscFunctionReturn(0); 824 } 825 826 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 827 { 828 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 829 const PetscScalar *l; 830 PetscScalar x,*v,*vv,*r; 831 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 832 833 PetscFunctionBegin; 834 PetscCall(MatDenseGetArray(mdn->A,&vv)); 835 PetscCall(MatDenseGetLDA(mdn->A,&lda)); 836 PetscCall(MatGetLocalSize(A,&s2,&s3)); 837 if (ll) { 838 PetscCall(VecGetLocalSize(ll,&s2a)); 839 PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 840 PetscCall(VecGetArrayRead(ll,&l)); 841 for (i=0; i<m; i++) { 842 x = l[i]; 843 v = vv + i; 844 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 845 } 846 PetscCall(VecRestoreArrayRead(ll,&l)); 847 PetscCall(PetscLogFlops(1.0*n*m)); 848 } 849 if (rr) { 850 const PetscScalar *ar; 851 852 PetscCall(VecGetLocalSize(rr,&s3a)); 853 PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 854 PetscCall(VecGetArrayRead(rr,&ar)); 855 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 856 PetscCall(VecGetArray(mdn->lvec,&r)); 857 PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 858 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 859 PetscCall(VecRestoreArrayRead(rr,&ar)); 860 for (i=0; i<n; i++) { 861 x = r[i]; 862 v = vv + i*lda; 863 for (j=0; j<m; j++) (*v++) *= x; 864 } 865 PetscCall(VecRestoreArray(mdn->lvec,&r)); 866 PetscCall(PetscLogFlops(1.0*n*m)); 867 } 868 PetscCall(MatDenseRestoreArray(mdn->A,&vv)); 869 PetscFunctionReturn(0); 870 } 871 872 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 873 { 874 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 875 PetscInt i,j; 876 PetscMPIInt size; 877 PetscReal sum = 0.0; 878 const PetscScalar *av,*v; 879 880 PetscFunctionBegin; 881 PetscCall(MatDenseGetArrayRead(mdn->A,&av)); 882 v = av; 883 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 884 if (size == 1) { 885 PetscCall(MatNorm(mdn->A,type,nrm)); 886 } else { 887 if (type == NORM_FROBENIUS) { 888 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 889 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 890 } 891 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 892 *nrm = PetscSqrtReal(*nrm); 893 PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n)); 894 } else if (type == NORM_1) { 895 PetscReal *tmp,*tmp2; 896 PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2)); 897 *nrm = 0.0; 898 v = av; 899 for (j=0; j<mdn->A->cmap->n; j++) { 900 for (i=0; i<mdn->A->rmap->n; i++) { 901 tmp[j] += PetscAbsScalar(*v); v++; 902 } 903 } 904 PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 905 for (j=0; j<A->cmap->N; j++) { 906 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 907 } 908 PetscCall(PetscFree2(tmp,tmp2)); 909 PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n)); 910 } else if (type == NORM_INFINITY) { /* max row norm */ 911 PetscReal ntemp; 912 PetscCall(MatNorm(mdn->A,type,&ntemp)); 913 PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 914 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 915 } 916 PetscCall(MatDenseRestoreArrayRead(mdn->A,&av)); 917 PetscFunctionReturn(0); 918 } 919 920 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 921 { 922 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 923 Mat B; 924 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 925 PetscInt j,i,lda; 926 PetscScalar *v; 927 928 PetscFunctionBegin; 929 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 930 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 931 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 932 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 933 PetscCall(MatMPIDenseSetPreallocation(B,NULL)); 934 } else B = *matout; 935 936 m = a->A->rmap->n; n = a->A->cmap->n; 937 PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v)); 938 PetscCall(MatDenseGetLDA(a->A,&lda)); 939 PetscCall(PetscMalloc1(m,&rwork)); 940 for (i=0; i<m; i++) rwork[i] = rstart + i; 941 for (j=0; j<n; j++) { 942 PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES)); 943 v += lda; 944 } 945 PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v)); 946 PetscCall(PetscFree(rwork)); 947 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 948 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 949 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 950 *matout = B; 951 } else { 952 PetscCall(MatHeaderMerge(A,&B)); 953 } 954 PetscFunctionReturn(0); 955 } 956 957 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 958 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 959 960 PetscErrorCode MatSetUp_MPIDense(Mat A) 961 { 962 PetscFunctionBegin; 963 PetscCall(PetscLayoutSetUp(A->rmap)); 964 PetscCall(PetscLayoutSetUp(A->cmap)); 965 if (!A->preallocated) { 966 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 967 } 968 PetscFunctionReturn(0); 969 } 970 971 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 972 { 973 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 974 975 PetscFunctionBegin; 976 PetscCall(MatAXPY(A->A,alpha,B->A,str)); 977 PetscFunctionReturn(0); 978 } 979 980 PetscErrorCode MatConjugate_MPIDense(Mat mat) 981 { 982 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 983 984 PetscFunctionBegin; 985 PetscCall(MatConjugate(a->A)); 986 PetscFunctionReturn(0); 987 } 988 989 PetscErrorCode MatRealPart_MPIDense(Mat A) 990 { 991 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 992 993 PetscFunctionBegin; 994 PetscCall(MatRealPart(a->A)); 995 PetscFunctionReturn(0); 996 } 997 998 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 999 { 1000 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1001 1002 PetscFunctionBegin; 1003 PetscCall(MatImaginaryPart(a->A)); 1004 PetscFunctionReturn(0); 1005 } 1006 1007 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 1008 { 1009 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1010 1011 PetscFunctionBegin; 1012 PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 1013 PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 1014 PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col)); 1015 PetscFunctionReturn(0); 1016 } 1017 1018 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*); 1019 1020 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions) 1021 { 1022 PetscInt i,m,n; 1023 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1024 PetscReal *work; 1025 1026 PetscFunctionBegin; 1027 PetscCall(MatGetSize(A,&m,&n)); 1028 PetscCall(PetscMalloc1(n,&work)); 1029 if (type == REDUCTION_MEAN_REALPART) { 1030 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work)); 1031 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1032 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work)); 1033 } else { 1034 PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work)); 1035 } 1036 if (type == NORM_2) { 1037 for (i=0; i<n; i++) work[i] *= work[i]; 1038 } 1039 if (type == NORM_INFINITY) { 1040 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm)); 1041 } else { 1042 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm)); 1043 } 1044 PetscCall(PetscFree(work)); 1045 if (type == NORM_2) { 1046 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1047 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1048 for (i=0; i<n; i++) reductions[i] /= m; 1049 } 1050 PetscFunctionReturn(0); 1051 } 1052 1053 #if defined(PETSC_HAVE_CUDA) 1054 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha) 1055 { 1056 PetscScalar *da; 1057 PetscInt lda; 1058 1059 PetscFunctionBegin; 1060 PetscCall(MatDenseCUDAGetArray(A,&da)); 1061 PetscCall(MatDenseGetLDA(A,&lda)); 1062 PetscCall(PetscInfo(A,"Performing Shift on backend\n")); 1063 PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N)); 1064 PetscCall(MatDenseCUDARestoreArray(A,&da)); 1065 PetscFunctionReturn(0); 1066 } 1067 1068 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1069 { 1070 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1071 PetscInt lda; 1072 1073 PetscFunctionBegin; 1074 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1075 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1076 if (!a->cvec) { 1077 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1078 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1079 } 1080 a->vecinuse = col + 1; 1081 PetscCall(MatDenseGetLDA(a->A,&lda)); 1082 PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1083 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1084 *v = a->cvec; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1089 { 1090 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1091 1092 PetscFunctionBegin; 1093 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1094 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1095 a->vecinuse = 0; 1096 PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1097 PetscCall(VecCUDAResetArray(a->cvec)); 1098 *v = NULL; 1099 PetscFunctionReturn(0); 1100 } 1101 1102 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1103 { 1104 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1105 PetscInt lda; 1106 1107 PetscFunctionBegin; 1108 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1109 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1110 if (!a->cvec) { 1111 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1112 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1113 } 1114 a->vecinuse = col + 1; 1115 PetscCall(MatDenseGetLDA(a->A,&lda)); 1116 PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse)); 1117 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1118 PetscCall(VecLockReadPush(a->cvec)); 1119 *v = a->cvec; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1124 { 1125 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1126 1127 PetscFunctionBegin; 1128 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1129 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1130 a->vecinuse = 0; 1131 PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse)); 1132 PetscCall(VecLockReadPop(a->cvec)); 1133 PetscCall(VecCUDAResetArray(a->cvec)); 1134 *v = NULL; 1135 PetscFunctionReturn(0); 1136 } 1137 1138 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1139 { 1140 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1141 PetscInt lda; 1142 1143 PetscFunctionBegin; 1144 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1145 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1146 if (!a->cvec) { 1147 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1148 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1149 } 1150 a->vecinuse = col + 1; 1151 PetscCall(MatDenseGetLDA(a->A,&lda)); 1152 PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1153 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1154 *v = a->cvec; 1155 PetscFunctionReturn(0); 1156 } 1157 1158 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1159 { 1160 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1161 1162 PetscFunctionBegin; 1163 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1164 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1165 a->vecinuse = 0; 1166 PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1167 PetscCall(VecCUDAResetArray(a->cvec)); 1168 *v = NULL; 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1173 { 1174 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1178 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1179 PetscCall(MatDenseCUDAPlaceArray(l->A,a)); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1184 { 1185 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1186 1187 PetscFunctionBegin; 1188 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1189 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1190 PetscCall(MatDenseCUDAResetArray(l->A)); 1191 PetscFunctionReturn(0); 1192 } 1193 1194 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1195 { 1196 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1197 1198 PetscFunctionBegin; 1199 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1200 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1201 PetscCall(MatDenseCUDAReplaceArray(l->A,a)); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1206 { 1207 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1208 1209 PetscFunctionBegin; 1210 PetscCall(MatDenseCUDAGetArrayWrite(l->A,a)); 1211 PetscFunctionReturn(0); 1212 } 1213 1214 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1215 { 1216 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1217 1218 PetscFunctionBegin; 1219 PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a)); 1220 PetscFunctionReturn(0); 1221 } 1222 1223 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1224 { 1225 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1226 1227 PetscFunctionBegin; 1228 PetscCall(MatDenseCUDAGetArrayRead(l->A,a)); 1229 PetscFunctionReturn(0); 1230 } 1231 1232 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1233 { 1234 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1235 1236 PetscFunctionBegin; 1237 PetscCall(MatDenseCUDARestoreArrayRead(l->A,a)); 1238 PetscFunctionReturn(0); 1239 } 1240 1241 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1242 { 1243 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1244 1245 PetscFunctionBegin; 1246 PetscCall(MatDenseCUDAGetArray(l->A,a)); 1247 PetscFunctionReturn(0); 1248 } 1249 1250 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1251 { 1252 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1253 1254 PetscFunctionBegin; 1255 PetscCall(MatDenseCUDARestoreArray(l->A,a)); 1256 PetscFunctionReturn(0); 1257 } 1258 1259 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1260 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1261 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1262 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1263 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1264 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1265 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1266 1267 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1268 { 1269 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1270 1271 PetscFunctionBegin; 1272 PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1273 PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1274 if (d->A) PetscCall(MatBindToCPU(d->A,bind)); 1275 mat->boundtocpu = bind; 1276 if (!bind) { 1277 PetscBool iscuda; 1278 1279 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda)); 1280 if (!iscuda) { 1281 PetscCall(VecDestroy(&d->cvec)); 1282 } 1283 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda)); 1284 if (!iscuda) { 1285 PetscCall(MatDestroy(&d->cmat)); 1286 } 1287 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA)); 1288 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA)); 1289 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA)); 1290 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1291 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1292 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1293 mat->ops->shift = MatShift_MPIDenseCUDA; 1294 } else { 1295 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1296 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1297 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1298 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1299 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1300 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1301 mat->ops->shift = MatShift_MPIDense; 1302 } 1303 if (d->cmat) PetscCall(MatBindToCPU(d->cmat,bind)); 1304 PetscFunctionReturn(0); 1305 } 1306 1307 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1308 { 1309 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1310 PetscBool iscuda; 1311 1312 PetscFunctionBegin; 1313 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1314 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 1315 if (!iscuda) PetscFunctionReturn(0); 1316 PetscCall(PetscLayoutSetUp(A->rmap)); 1317 PetscCall(PetscLayoutSetUp(A->cmap)); 1318 if (!d->A) { 1319 PetscCall(MatCreate(PETSC_COMM_SELF,&d->A)); 1320 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A)); 1321 PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 1322 } 1323 PetscCall(MatSetType(d->A,MATSEQDENSECUDA)); 1324 PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data)); 1325 A->preallocated = PETSC_TRUE; 1326 A->assembled = PETSC_TRUE; 1327 PetscFunctionReturn(0); 1328 } 1329 #endif 1330 1331 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1332 { 1333 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1334 1335 PetscFunctionBegin; 1336 PetscCall(MatSetRandom(d->A,rctx)); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1341 { 1342 PetscFunctionBegin; 1343 *missing = PETSC_FALSE; 1344 PetscFunctionReturn(0); 1345 } 1346 1347 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1348 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1349 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1350 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1351 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1352 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1353 1354 /* -------------------------------------------------------------------*/ 1355 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1356 MatGetRow_MPIDense, 1357 MatRestoreRow_MPIDense, 1358 MatMult_MPIDense, 1359 /* 4*/ MatMultAdd_MPIDense, 1360 MatMultTranspose_MPIDense, 1361 MatMultTransposeAdd_MPIDense, 1362 NULL, 1363 NULL, 1364 NULL, 1365 /* 10*/ NULL, 1366 NULL, 1367 NULL, 1368 NULL, 1369 MatTranspose_MPIDense, 1370 /* 15*/ MatGetInfo_MPIDense, 1371 MatEqual_MPIDense, 1372 MatGetDiagonal_MPIDense, 1373 MatDiagonalScale_MPIDense, 1374 MatNorm_MPIDense, 1375 /* 20*/ MatAssemblyBegin_MPIDense, 1376 MatAssemblyEnd_MPIDense, 1377 MatSetOption_MPIDense, 1378 MatZeroEntries_MPIDense, 1379 /* 24*/ MatZeroRows_MPIDense, 1380 NULL, 1381 NULL, 1382 NULL, 1383 NULL, 1384 /* 29*/ MatSetUp_MPIDense, 1385 NULL, 1386 NULL, 1387 MatGetDiagonalBlock_MPIDense, 1388 NULL, 1389 /* 34*/ MatDuplicate_MPIDense, 1390 NULL, 1391 NULL, 1392 NULL, 1393 NULL, 1394 /* 39*/ MatAXPY_MPIDense, 1395 MatCreateSubMatrices_MPIDense, 1396 NULL, 1397 MatGetValues_MPIDense, 1398 MatCopy_MPIDense, 1399 /* 44*/ NULL, 1400 MatScale_MPIDense, 1401 MatShift_MPIDense, 1402 NULL, 1403 NULL, 1404 /* 49*/ MatSetRandom_MPIDense, 1405 NULL, 1406 NULL, 1407 NULL, 1408 NULL, 1409 /* 54*/ NULL, 1410 NULL, 1411 NULL, 1412 NULL, 1413 NULL, 1414 /* 59*/ MatCreateSubMatrix_MPIDense, 1415 MatDestroy_MPIDense, 1416 MatView_MPIDense, 1417 NULL, 1418 NULL, 1419 /* 64*/ NULL, 1420 NULL, 1421 NULL, 1422 NULL, 1423 NULL, 1424 /* 69*/ NULL, 1425 NULL, 1426 NULL, 1427 NULL, 1428 NULL, 1429 /* 74*/ NULL, 1430 NULL, 1431 NULL, 1432 NULL, 1433 NULL, 1434 /* 79*/ NULL, 1435 NULL, 1436 NULL, 1437 NULL, 1438 /* 83*/ MatLoad_MPIDense, 1439 NULL, 1440 NULL, 1441 NULL, 1442 NULL, 1443 NULL, 1444 /* 89*/ NULL, 1445 NULL, 1446 NULL, 1447 NULL, 1448 NULL, 1449 /* 94*/ NULL, 1450 NULL, 1451 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1452 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1453 NULL, 1454 /* 99*/ MatProductSetFromOptions_MPIDense, 1455 NULL, 1456 NULL, 1457 MatConjugate_MPIDense, 1458 NULL, 1459 /*104*/ NULL, 1460 MatRealPart_MPIDense, 1461 MatImaginaryPart_MPIDense, 1462 NULL, 1463 NULL, 1464 /*109*/ NULL, 1465 NULL, 1466 NULL, 1467 MatGetColumnVector_MPIDense, 1468 MatMissingDiagonal_MPIDense, 1469 /*114*/ NULL, 1470 NULL, 1471 NULL, 1472 NULL, 1473 NULL, 1474 /*119*/ NULL, 1475 NULL, 1476 NULL, 1477 NULL, 1478 NULL, 1479 /*124*/ NULL, 1480 MatGetColumnReductions_MPIDense, 1481 NULL, 1482 NULL, 1483 NULL, 1484 /*129*/ NULL, 1485 NULL, 1486 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1487 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1488 NULL, 1489 /*134*/ NULL, 1490 NULL, 1491 NULL, 1492 NULL, 1493 NULL, 1494 /*139*/ NULL, 1495 NULL, 1496 NULL, 1497 NULL, 1498 NULL, 1499 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1500 /*145*/ NULL, 1501 NULL, 1502 NULL, 1503 NULL, 1504 NULL 1505 }; 1506 1507 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1508 { 1509 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1510 PetscBool iscuda; 1511 1512 PetscFunctionBegin; 1513 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1514 PetscCall(PetscLayoutSetUp(mat->rmap)); 1515 PetscCall(PetscLayoutSetUp(mat->cmap)); 1516 if (!a->A) { 1517 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 1518 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 1519 PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N)); 1520 } 1521 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda)); 1522 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1523 PetscCall(MatSeqDenseSetPreallocation(a->A,data)); 1524 mat->preallocated = PETSC_TRUE; 1525 mat->assembled = PETSC_TRUE; 1526 PetscFunctionReturn(0); 1527 } 1528 1529 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1530 { 1531 Mat B,C; 1532 1533 PetscFunctionBegin; 1534 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C)); 1535 PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B)); 1536 PetscCall(MatDestroy(&C)); 1537 if (reuse == MAT_REUSE_MATRIX) { 1538 C = *newmat; 1539 } else C = NULL; 1540 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1541 PetscCall(MatDestroy(&B)); 1542 if (reuse == MAT_INPLACE_MATRIX) { 1543 PetscCall(MatHeaderReplace(A,&C)); 1544 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1545 PetscFunctionReturn(0); 1546 } 1547 1548 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1549 { 1550 Mat B,C; 1551 1552 PetscFunctionBegin; 1553 PetscCall(MatDenseGetLocalMatrix(A,&C)); 1554 PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1555 if (reuse == MAT_REUSE_MATRIX) { 1556 C = *newmat; 1557 } else C = NULL; 1558 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1559 PetscCall(MatDestroy(&B)); 1560 if (reuse == MAT_INPLACE_MATRIX) { 1561 PetscCall(MatHeaderReplace(A,&C)); 1562 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1563 PetscFunctionReturn(0); 1564 } 1565 1566 #if defined(PETSC_HAVE_ELEMENTAL) 1567 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1568 { 1569 Mat mat_elemental; 1570 PetscScalar *v; 1571 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1572 1573 PetscFunctionBegin; 1574 if (reuse == MAT_REUSE_MATRIX) { 1575 mat_elemental = *newmat; 1576 PetscCall(MatZeroEntries(*newmat)); 1577 } else { 1578 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1579 PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 1580 PetscCall(MatSetType(mat_elemental,MATELEMENTAL)); 1581 PetscCall(MatSetUp(mat_elemental)); 1582 PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE)); 1583 } 1584 1585 PetscCall(PetscMalloc2(m,&rows,N,&cols)); 1586 for (i=0; i<N; i++) cols[i] = i; 1587 for (i=0; i<m; i++) rows[i] = rstart + i; 1588 1589 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1590 PetscCall(MatDenseGetArray(A,&v)); 1591 PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES)); 1592 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1593 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1594 PetscCall(MatDenseRestoreArray(A,&v)); 1595 PetscCall(PetscFree2(rows,cols)); 1596 1597 if (reuse == MAT_INPLACE_MATRIX) { 1598 PetscCall(MatHeaderReplace(A,&mat_elemental)); 1599 } else { 1600 *newmat = mat_elemental; 1601 } 1602 PetscFunctionReturn(0); 1603 } 1604 #endif 1605 1606 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1607 { 1608 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1609 1610 PetscFunctionBegin; 1611 PetscCall(MatDenseGetColumn(mat->A,col,vals)); 1612 PetscFunctionReturn(0); 1613 } 1614 1615 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1616 { 1617 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1618 1619 PetscFunctionBegin; 1620 PetscCall(MatDenseRestoreColumn(mat->A,vals)); 1621 PetscFunctionReturn(0); 1622 } 1623 1624 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1625 { 1626 Mat_MPIDense *mat; 1627 PetscInt m,nloc,N; 1628 1629 PetscFunctionBegin; 1630 PetscCall(MatGetSize(inmat,&m,&N)); 1631 PetscCall(MatGetLocalSize(inmat,NULL,&nloc)); 1632 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1633 PetscInt sum; 1634 1635 if (n == PETSC_DECIDE) { 1636 PetscCall(PetscSplitOwnership(comm,&n,&N)); 1637 } 1638 /* Check sum(n) = N */ 1639 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 1640 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 1641 1642 PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat)); 1643 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 1644 } 1645 1646 /* numeric phase */ 1647 mat = (Mat_MPIDense*)(*outmat)->data; 1648 PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN)); 1649 PetscFunctionReturn(0); 1650 } 1651 1652 #if defined(PETSC_HAVE_CUDA) 1653 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1654 { 1655 Mat B; 1656 Mat_MPIDense *m; 1657 1658 PetscFunctionBegin; 1659 if (reuse == MAT_INITIAL_MATRIX) { 1660 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1661 } else if (reuse == MAT_REUSE_MATRIX) { 1662 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1663 } 1664 1665 B = *newmat; 1666 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE)); 1667 PetscCall(PetscFree(B->defaultvectype)); 1668 PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype)); 1669 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL)); 1671 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 1672 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 1673 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 1674 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 1675 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL)); 1676 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL)); 1677 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL)); 1678 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL)); 1679 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL)); 1680 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL)); 1681 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL)); 1682 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL)); 1683 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL)); 1684 m = (Mat_MPIDense*)(B)->data; 1685 if (m->A) { 1686 PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A)); 1687 } 1688 B->ops->bindtocpu = NULL; 1689 B->offloadmask = PETSC_OFFLOAD_CPU; 1690 PetscFunctionReturn(0); 1691 } 1692 1693 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1694 { 1695 Mat B; 1696 Mat_MPIDense *m; 1697 1698 PetscFunctionBegin; 1699 if (reuse == MAT_INITIAL_MATRIX) { 1700 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1701 } else if (reuse == MAT_REUSE_MATRIX) { 1702 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1703 } 1704 1705 B = *newmat; 1706 PetscCall(PetscFree(B->defaultvectype)); 1707 PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype)); 1708 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA)); 1709 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1710 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1711 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1712 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1713 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1714 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1715 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1716 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1717 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1718 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1719 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1720 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1721 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1722 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1723 m = (Mat_MPIDense*)(B->data); 1724 if (m->A) { 1725 PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A)); 1726 B->offloadmask = PETSC_OFFLOAD_BOTH; 1727 } else { 1728 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1729 } 1730 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE)); 1731 1732 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1733 PetscFunctionReturn(0); 1734 } 1735 #endif 1736 1737 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1738 { 1739 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1740 PetscInt lda; 1741 1742 PetscFunctionBegin; 1743 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1744 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1745 if (!a->cvec) { 1746 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1747 } 1748 a->vecinuse = col + 1; 1749 PetscCall(MatDenseGetLDA(a->A,&lda)); 1750 PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1751 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1752 *v = a->cvec; 1753 PetscFunctionReturn(0); 1754 } 1755 1756 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1757 { 1758 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1759 1760 PetscFunctionBegin; 1761 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1762 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1763 a->vecinuse = 0; 1764 PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1765 PetscCall(VecResetArray(a->cvec)); 1766 *v = NULL; 1767 PetscFunctionReturn(0); 1768 } 1769 1770 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1771 { 1772 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1773 PetscInt lda; 1774 1775 PetscFunctionBegin; 1776 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1777 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1778 if (!a->cvec) { 1779 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1780 } 1781 a->vecinuse = col + 1; 1782 PetscCall(MatDenseGetLDA(a->A,&lda)); 1783 PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse)); 1784 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1785 PetscCall(VecLockReadPush(a->cvec)); 1786 *v = a->cvec; 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1791 { 1792 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1793 1794 PetscFunctionBegin; 1795 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1796 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1797 a->vecinuse = 0; 1798 PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse)); 1799 PetscCall(VecLockReadPop(a->cvec)); 1800 PetscCall(VecResetArray(a->cvec)); 1801 *v = NULL; 1802 PetscFunctionReturn(0); 1803 } 1804 1805 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1806 { 1807 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1808 PetscInt lda; 1809 1810 PetscFunctionBegin; 1811 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1812 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1813 if (!a->cvec) { 1814 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1815 } 1816 a->vecinuse = col + 1; 1817 PetscCall(MatDenseGetLDA(a->A,&lda)); 1818 PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1819 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1820 *v = a->cvec; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1825 { 1826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1827 1828 PetscFunctionBegin; 1829 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1830 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1831 a->vecinuse = 0; 1832 PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1833 PetscCall(VecResetArray(a->cvec)); 1834 *v = NULL; 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat *v) 1839 { 1840 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1841 Mat_MPIDense *c; 1842 MPI_Comm comm; 1843 PetscInt pbegin, pend; 1844 1845 PetscFunctionBegin; 1846 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1847 PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1848 PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1849 pbegin = PetscMax(0,PetscMin(A->rmap->rend,rbegin)-A->rmap->rstart); 1850 pend = PetscMin(A->rmap->n,PetscMax(0,rend-A->rmap->rstart)); 1851 if (!a->cmat) { 1852 PetscCall(MatCreate(comm,&a->cmat)); 1853 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat)); 1854 PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name)); 1855 if (rend-rbegin==A->rmap->N) PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap)); 1856 else { 1857 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin)); 1858 PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin)); 1859 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1860 } 1861 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1862 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1863 } else { 1864 PetscBool same = (PetscBool)(rend-rbegin == a->cmat->rmap->N); 1865 if (same && a->cmat->rmap->N != A->rmap->N) { 1866 same = (PetscBool)(pend-pbegin == a->cmat->rmap->n); 1867 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&same,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1868 } 1869 if (!same) { 1870 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1871 PetscCall(PetscLayoutCreate(comm,&a->cmat->rmap)); 1872 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin)); 1873 PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin)); 1874 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1875 } 1876 if (cend-cbegin != a->cmat->cmap->N) { 1877 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1878 PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap)); 1879 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1880 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1881 } 1882 } 1883 c = (Mat_MPIDense*)a->cmat->data; 1884 PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1885 PetscCall(MatDenseGetSubMatrix(a->A,pbegin,pend,cbegin,cend,&c->A)); 1886 a->cmat->preallocated = PETSC_TRUE; 1887 a->cmat->assembled = PETSC_TRUE; 1888 a->matinuse = cbegin + 1; 1889 *v = a->cmat; 1890 PetscFunctionReturn(0); 1891 } 1892 1893 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1894 { 1895 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1896 Mat_MPIDense *c; 1897 1898 PetscFunctionBegin; 1899 PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1900 PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1901 PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1902 a->matinuse = 0; 1903 c = (Mat_MPIDense*)a->cmat->data; 1904 PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A)); 1905 *v = NULL; 1906 PetscFunctionReturn(0); 1907 } 1908 1909 /*MC 1910 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1911 1912 Options Database Keys: 1913 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1914 1915 Level: beginner 1916 1917 .seealso: `MatCreateDense()` 1918 1919 M*/ 1920 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1921 { 1922 Mat_MPIDense *a; 1923 1924 PetscFunctionBegin; 1925 PetscCall(PetscNewLog(mat,&a)); 1926 mat->data = (void*)a; 1927 PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps))); 1928 1929 mat->insertmode = NOT_SET_VALUES; 1930 1931 /* build cache for off array entries formed */ 1932 a->donotstash = PETSC_FALSE; 1933 1934 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash)); 1935 1936 /* stuff used for matrix vector multiply */ 1937 a->lvec = NULL; 1938 a->Mvctx = NULL; 1939 a->roworiented = PETSC_TRUE; 1940 1941 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense)); 1942 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense)); 1943 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense)); 1944 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense)); 1945 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense)); 1946 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense)); 1947 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense)); 1948 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense)); 1949 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense)); 1950 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense)); 1951 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense)); 1952 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1953 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1954 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1955 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1956 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1957 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1958 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense)); 1959 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense)); 1960 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 1961 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ)); 1962 #if defined(PETSC_HAVE_ELEMENTAL) 1963 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental)); 1964 #endif 1965 #if defined(PETSC_HAVE_SCALAPACK) 1966 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK)); 1967 #endif 1968 #if defined(PETSC_HAVE_CUDA) 1969 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA)); 1970 #endif 1971 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense)); 1972 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1973 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1974 #if defined(PETSC_HAVE_CUDA) 1975 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1976 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1977 #endif 1978 1979 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense)); 1980 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense)); 1981 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE)); 1982 PetscFunctionReturn(0); 1983 } 1984 1985 /*MC 1986 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1987 1988 Options Database Keys: 1989 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1990 1991 Level: beginner 1992 1993 .seealso: 1994 1995 M*/ 1996 #if defined(PETSC_HAVE_CUDA) 1997 #include <petsc/private/deviceimpl.h> 1998 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1999 { 2000 PetscFunctionBegin; 2001 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 2002 PetscCall(MatCreate_MPIDense(B)); 2003 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B)); 2004 PetscFunctionReturn(0); 2005 } 2006 #endif 2007 2008 /*MC 2009 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 2010 2011 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 2012 and MATMPIDENSE otherwise. 2013 2014 Options Database Keys: 2015 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 2016 2017 Level: beginner 2018 2019 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 2020 M*/ 2021 2022 /*MC 2023 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2024 2025 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 2026 and MATMPIDENSECUDA otherwise. 2027 2028 Options Database Keys: 2029 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 2030 2031 Level: beginner 2032 2033 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 2034 M*/ 2035 2036 /*@C 2037 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2038 2039 Collective 2040 2041 Input Parameters: 2042 . B - the matrix 2043 - data - optional location of matrix data. Set data=NULL for PETSc 2044 to control all matrix memory allocation. 2045 2046 Notes: 2047 The dense format is fully compatible with standard Fortran 77 2048 storage by columns. 2049 2050 The data input variable is intended primarily for Fortran programmers 2051 who wish to allocate their own matrix memory space. Most users should 2052 set data=NULL. 2053 2054 Level: intermediate 2055 2056 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2057 @*/ 2058 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2059 { 2060 PetscFunctionBegin; 2061 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2062 PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data)); 2063 PetscFunctionReturn(0); 2064 } 2065 2066 /*@ 2067 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2068 array provided by the user. This is useful to avoid copying an array 2069 into a matrix 2070 2071 Not Collective 2072 2073 Input Parameters: 2074 + mat - the matrix 2075 - array - the array in column major order 2076 2077 Notes: 2078 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2079 freed when the matrix is destroyed. 2080 2081 Level: developer 2082 2083 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2084 2085 @*/ 2086 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2087 { 2088 PetscFunctionBegin; 2089 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2090 PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2091 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2092 #if defined(PETSC_HAVE_CUDA) 2093 mat->offloadmask = PETSC_OFFLOAD_CPU; 2094 #endif 2095 PetscFunctionReturn(0); 2096 } 2097 2098 /*@ 2099 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2100 2101 Not Collective 2102 2103 Input Parameters: 2104 . mat - the matrix 2105 2106 Notes: 2107 You can only call this after a call to MatDensePlaceArray() 2108 2109 Level: developer 2110 2111 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2112 2113 @*/ 2114 PetscErrorCode MatDenseResetArray(Mat mat) 2115 { 2116 PetscFunctionBegin; 2117 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2118 PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat)); 2119 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2120 PetscFunctionReturn(0); 2121 } 2122 2123 /*@ 2124 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2125 array provided by the user. This is useful to avoid copying an array 2126 into a matrix 2127 2128 Not Collective 2129 2130 Input Parameters: 2131 + mat - the matrix 2132 - array - the array in column major order 2133 2134 Notes: 2135 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2136 freed by the user. It will be freed when the matrix is destroyed. 2137 2138 Level: developer 2139 2140 .seealso: `MatDenseGetArray()`, `VecReplaceArray()` 2141 @*/ 2142 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2143 { 2144 PetscFunctionBegin; 2145 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2146 PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2147 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2148 #if defined(PETSC_HAVE_CUDA) 2149 mat->offloadmask = PETSC_OFFLOAD_CPU; 2150 #endif 2151 PetscFunctionReturn(0); 2152 } 2153 2154 #if defined(PETSC_HAVE_CUDA) 2155 /*@C 2156 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2157 array provided by the user. This is useful to avoid copying an array 2158 into a matrix 2159 2160 Not Collective 2161 2162 Input Parameters: 2163 + mat - the matrix 2164 - array - the array in column major order 2165 2166 Notes: 2167 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2168 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2169 2170 Level: developer 2171 2172 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()` 2173 @*/ 2174 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2175 { 2176 PetscFunctionBegin; 2177 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2178 PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2179 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2180 mat->offloadmask = PETSC_OFFLOAD_GPU; 2181 PetscFunctionReturn(0); 2182 } 2183 2184 /*@C 2185 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2186 2187 Not Collective 2188 2189 Input Parameters: 2190 . mat - the matrix 2191 2192 Notes: 2193 You can only call this after a call to MatDenseCUDAPlaceArray() 2194 2195 Level: developer 2196 2197 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2198 2199 @*/ 2200 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2201 { 2202 PetscFunctionBegin; 2203 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2204 PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat)); 2205 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2206 PetscFunctionReturn(0); 2207 } 2208 2209 /*@C 2210 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2211 array provided by the user. This is useful to avoid copying an array 2212 into a matrix 2213 2214 Not Collective 2215 2216 Input Parameters: 2217 + mat - the matrix 2218 - array - the array in column major order 2219 2220 Notes: 2221 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2222 The memory passed in CANNOT be freed by the user. It will be freed 2223 when the matrix is destroyed. The array should respect the matrix leading dimension. 2224 2225 Level: developer 2226 2227 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2228 @*/ 2229 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2230 { 2231 PetscFunctionBegin; 2232 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2233 PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2234 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2235 mat->offloadmask = PETSC_OFFLOAD_GPU; 2236 PetscFunctionReturn(0); 2237 } 2238 2239 /*@C 2240 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2241 2242 Not Collective 2243 2244 Input Parameters: 2245 . A - the matrix 2246 2247 Output Parameters 2248 . array - the GPU array in column major order 2249 2250 Notes: 2251 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2252 2253 Level: developer 2254 2255 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2256 @*/ 2257 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2258 { 2259 PetscFunctionBegin; 2260 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2261 PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2262 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2263 PetscFunctionReturn(0); 2264 } 2265 2266 /*@C 2267 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2268 2269 Not Collective 2270 2271 Input Parameters: 2272 + A - the matrix 2273 - array - the GPU array in column major order 2274 2275 Notes: 2276 2277 Level: developer 2278 2279 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2280 @*/ 2281 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2282 { 2283 PetscFunctionBegin; 2284 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2285 PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2286 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2287 A->offloadmask = PETSC_OFFLOAD_GPU; 2288 PetscFunctionReturn(0); 2289 } 2290 2291 /*@C 2292 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2293 2294 Not Collective 2295 2296 Input Parameters: 2297 . A - the matrix 2298 2299 Output Parameters 2300 . array - the GPU array in column major order 2301 2302 Notes: 2303 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2304 2305 Level: developer 2306 2307 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2308 @*/ 2309 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2310 { 2311 PetscFunctionBegin; 2312 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2313 PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2314 PetscFunctionReturn(0); 2315 } 2316 2317 /*@C 2318 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2319 2320 Not Collective 2321 2322 Input Parameters: 2323 + A - the matrix 2324 - array - the GPU array in column major order 2325 2326 Notes: 2327 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2328 2329 Level: developer 2330 2331 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2332 @*/ 2333 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2334 { 2335 PetscFunctionBegin; 2336 PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2337 PetscFunctionReturn(0); 2338 } 2339 2340 /*@C 2341 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2342 2343 Not Collective 2344 2345 Input Parameters: 2346 . A - the matrix 2347 2348 Output Parameters 2349 . array - the GPU array in column major order 2350 2351 Notes: 2352 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2353 2354 Level: developer 2355 2356 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2357 @*/ 2358 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2359 { 2360 PetscFunctionBegin; 2361 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2362 PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a)); 2363 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2364 PetscFunctionReturn(0); 2365 } 2366 2367 /*@C 2368 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2369 2370 Not Collective 2371 2372 Input Parameters: 2373 + A - the matrix 2374 - array - the GPU array in column major order 2375 2376 Notes: 2377 2378 Level: developer 2379 2380 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2381 @*/ 2382 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2383 { 2384 PetscFunctionBegin; 2385 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2386 PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a)); 2387 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2388 A->offloadmask = PETSC_OFFLOAD_GPU; 2389 PetscFunctionReturn(0); 2390 } 2391 #endif 2392 2393 /*@C 2394 MatCreateDense - Creates a matrix in dense format. 2395 2396 Collective 2397 2398 Input Parameters: 2399 + comm - MPI communicator 2400 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2401 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2402 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2403 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2404 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2405 to control all matrix memory allocation. 2406 2407 Output Parameter: 2408 . A - the matrix 2409 2410 Notes: 2411 The dense format is fully compatible with standard Fortran 77 2412 storage by columns. 2413 Note that, although local portions of the matrix are stored in column-major 2414 order, the matrix is partitioned across MPI ranks by row. 2415 2416 The data input variable is intended primarily for Fortran programmers 2417 who wish to allocate their own matrix memory space. Most users should 2418 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2419 2420 The user MUST specify either the local or global matrix dimensions 2421 (possibly both). 2422 2423 Level: intermediate 2424 2425 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2426 @*/ 2427 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2428 { 2429 PetscFunctionBegin; 2430 PetscCall(MatCreate(comm,A)); 2431 PetscCall(MatSetSizes(*A,m,n,M,N)); 2432 PetscCall(MatSetType(*A,MATDENSE)); 2433 PetscCall(MatSeqDenseSetPreallocation(*A,data)); 2434 PetscCall(MatMPIDenseSetPreallocation(*A,data)); 2435 PetscFunctionReturn(0); 2436 } 2437 2438 #if defined(PETSC_HAVE_CUDA) 2439 /*@C 2440 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2441 2442 Collective 2443 2444 Input Parameters: 2445 + comm - MPI communicator 2446 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2447 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2448 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2449 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2450 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2451 to control matrix memory allocation. 2452 2453 Output Parameter: 2454 . A - the matrix 2455 2456 Notes: 2457 2458 Level: intermediate 2459 2460 .seealso: `MatCreate()`, `MatCreateDense()` 2461 @*/ 2462 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2463 { 2464 PetscFunctionBegin; 2465 PetscCall(MatCreate(comm,A)); 2466 PetscValidLogicalCollectiveBool(*A,!!data,6); 2467 PetscCall(MatSetSizes(*A,m,n,M,N)); 2468 PetscCall(MatSetType(*A,MATDENSECUDA)); 2469 PetscCall(MatSeqDenseCUDASetPreallocation(*A,data)); 2470 PetscCall(MatMPIDenseCUDASetPreallocation(*A,data)); 2471 PetscFunctionReturn(0); 2472 } 2473 #endif 2474 2475 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2476 { 2477 Mat mat; 2478 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2479 2480 PetscFunctionBegin; 2481 *newmat = NULL; 2482 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat)); 2483 PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2484 PetscCall(MatSetType(mat,((PetscObject)A)->type_name)); 2485 a = (Mat_MPIDense*)mat->data; 2486 2487 mat->factortype = A->factortype; 2488 mat->assembled = PETSC_TRUE; 2489 mat->preallocated = PETSC_TRUE; 2490 2491 mat->insertmode = NOT_SET_VALUES; 2492 a->donotstash = oldmat->donotstash; 2493 2494 PetscCall(PetscLayoutReference(A->rmap,&mat->rmap)); 2495 PetscCall(PetscLayoutReference(A->cmap,&mat->cmap)); 2496 2497 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2498 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2499 2500 *newmat = mat; 2501 PetscFunctionReturn(0); 2502 } 2503 2504 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2505 { 2506 PetscBool isbinary; 2507 #if defined(PETSC_HAVE_HDF5) 2508 PetscBool ishdf5; 2509 #endif 2510 2511 PetscFunctionBegin; 2512 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2513 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2514 /* force binary viewer to load .info file if it has not yet done so */ 2515 PetscCall(PetscViewerSetUp(viewer)); 2516 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2517 #if defined(PETSC_HAVE_HDF5) 2518 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2519 #endif 2520 if (isbinary) { 2521 PetscCall(MatLoad_Dense_Binary(newMat,viewer)); 2522 #if defined(PETSC_HAVE_HDF5) 2523 } else if (ishdf5) { 2524 PetscCall(MatLoad_Dense_HDF5(newMat,viewer)); 2525 #endif 2526 } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2527 PetscFunctionReturn(0); 2528 } 2529 2530 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2531 { 2532 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2533 Mat a,b; 2534 2535 PetscFunctionBegin; 2536 a = matA->A; 2537 b = matB->A; 2538 PetscCall(MatEqual(a,b,flag)); 2539 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2544 { 2545 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2546 2547 PetscFunctionBegin; 2548 PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts)); 2549 PetscCall(MatDestroy(&atb->atb)); 2550 PetscCall(PetscFree(atb)); 2551 PetscFunctionReturn(0); 2552 } 2553 2554 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2555 { 2556 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2557 2558 PetscFunctionBegin; 2559 PetscCall(PetscFree2(abt->buf[0],abt->buf[1])); 2560 PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls)); 2561 PetscCall(PetscFree(abt)); 2562 PetscFunctionReturn(0); 2563 } 2564 2565 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2566 { 2567 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2568 Mat_TransMatMultDense *atb; 2569 MPI_Comm comm; 2570 PetscMPIInt size,*recvcounts; 2571 PetscScalar *carray,*sendbuf; 2572 const PetscScalar *atbarray; 2573 PetscInt i,cN=C->cmap->N,proc,k,j,lda; 2574 const PetscInt *ranges; 2575 2576 PetscFunctionBegin; 2577 MatCheckProduct(C,3); 2578 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2579 atb = (Mat_TransMatMultDense *)C->product->data; 2580 recvcounts = atb->recvcounts; 2581 sendbuf = atb->sendbuf; 2582 2583 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2584 PetscCallMPI(MPI_Comm_size(comm,&size)); 2585 2586 /* compute atbarray = aseq^T * bseq */ 2587 PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb)); 2588 2589 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2590 2591 /* arrange atbarray into sendbuf */ 2592 PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray)); 2593 PetscCall(MatDenseGetLDA(atb->atb,&lda)); 2594 for (proc=0, k=0; proc<size; proc++) { 2595 for (j=0; j<cN; j++) { 2596 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda]; 2597 } 2598 } 2599 PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray)); 2600 2601 /* sum all atbarray to local values of C */ 2602 PetscCall(MatDenseGetArrayWrite(c->A,&carray)); 2603 PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm)); 2604 PetscCall(MatDenseRestoreArrayWrite(c->A,&carray)); 2605 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2606 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2607 PetscFunctionReturn(0); 2608 } 2609 2610 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2611 { 2612 MPI_Comm comm; 2613 PetscMPIInt size; 2614 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2615 Mat_TransMatMultDense *atb; 2616 PetscBool cisdense; 2617 PetscInt i; 2618 const PetscInt *ranges; 2619 2620 PetscFunctionBegin; 2621 MatCheckProduct(C,3); 2622 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2623 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2624 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2625 SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2626 } 2627 2628 /* create matrix product C */ 2629 PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N)); 2630 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 2631 if (!cisdense) { 2632 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 2633 } 2634 PetscCall(MatSetUp(C)); 2635 2636 /* create data structure for reuse C */ 2637 PetscCallMPI(MPI_Comm_size(comm,&size)); 2638 PetscCall(PetscNew(&atb)); 2639 cM = C->rmap->N; 2640 PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts)); 2641 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2642 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2643 2644 C->product->data = atb; 2645 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2646 PetscFunctionReturn(0); 2647 } 2648 2649 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2650 { 2651 MPI_Comm comm; 2652 PetscMPIInt i, size; 2653 PetscInt maxRows, bufsiz; 2654 PetscMPIInt tag; 2655 PetscInt alg; 2656 Mat_MatTransMultDense *abt; 2657 Mat_Product *product = C->product; 2658 PetscBool flg; 2659 2660 PetscFunctionBegin; 2661 MatCheckProduct(C,4); 2662 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2663 /* check local size of A and B */ 2664 PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n); 2665 2666 PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg)); 2667 alg = flg ? 0 : 1; 2668 2669 /* setup matrix product C */ 2670 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N)); 2671 PetscCall(MatSetType(C,MATMPIDENSE)); 2672 PetscCall(MatSetUp(C)); 2673 PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag)); 2674 2675 /* create data structure for reuse C */ 2676 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2677 PetscCallMPI(MPI_Comm_size(comm,&size)); 2678 PetscCall(PetscNew(&abt)); 2679 abt->tag = tag; 2680 abt->alg = alg; 2681 switch (alg) { 2682 case 1: /* alg: "cyclic" */ 2683 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2684 bufsiz = A->cmap->N * maxRows; 2685 PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]))); 2686 break; 2687 default: /* alg: "allgatherv" */ 2688 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2689 PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls))); 2690 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2691 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2692 break; 2693 } 2694 2695 C->product->data = abt; 2696 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2697 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2698 PetscFunctionReturn(0); 2699 } 2700 2701 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2702 { 2703 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2704 Mat_MatTransMultDense *abt; 2705 MPI_Comm comm; 2706 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2707 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2708 PetscInt i,cK=A->cmap->N,k,j,bn; 2709 PetscScalar _DOne=1.0,_DZero=0.0; 2710 const PetscScalar *av,*bv; 2711 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2712 MPI_Request reqs[2]; 2713 const PetscInt *ranges; 2714 2715 PetscFunctionBegin; 2716 MatCheckProduct(C,3); 2717 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2718 abt = (Mat_MatTransMultDense*)C->product->data; 2719 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2720 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2721 PetscCallMPI(MPI_Comm_size(comm,&size)); 2722 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2723 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2724 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2725 PetscCall(MatDenseGetLDA(a->A,&i)); 2726 PetscCall(PetscBLASIntCast(i,&alda)); 2727 PetscCall(MatDenseGetLDA(b->A,&i)); 2728 PetscCall(PetscBLASIntCast(i,&blda)); 2729 PetscCall(MatDenseGetLDA(c->A,&i)); 2730 PetscCall(PetscBLASIntCast(i,&clda)); 2731 PetscCall(MatGetOwnershipRanges(B,&ranges)); 2732 bn = B->rmap->n; 2733 if (blda == bn) { 2734 sendbuf = (PetscScalar*)bv; 2735 } else { 2736 sendbuf = abt->buf[0]; 2737 for (k = 0, i = 0; i < cK; i++) { 2738 for (j = 0; j < bn; j++, k++) { 2739 sendbuf[k] = bv[i * blda + j]; 2740 } 2741 } 2742 } 2743 if (size > 1) { 2744 sendto = (rank + size - 1) % size; 2745 recvfrom = (rank + size + 1) % size; 2746 } else { 2747 sendto = recvfrom = 0; 2748 } 2749 PetscCall(PetscBLASIntCast(cK,&ck)); 2750 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2751 recvisfrom = rank; 2752 for (i = 0; i < size; i++) { 2753 /* we have finished receiving in sending, bufs can be read/modified */ 2754 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2755 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2756 2757 if (nextrecvisfrom != rank) { 2758 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2759 sendsiz = cK * bn; 2760 recvsiz = cK * nextbn; 2761 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2762 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2763 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2764 } 2765 2766 /* local aseq * sendbuf^T */ 2767 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2768 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2769 2770 if (nextrecvisfrom != rank) { 2771 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2772 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2773 } 2774 bn = nextbn; 2775 recvisfrom = nextrecvisfrom; 2776 sendbuf = recvbuf; 2777 } 2778 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2779 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2780 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2781 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2782 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2783 PetscFunctionReturn(0); 2784 } 2785 2786 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2787 { 2788 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2789 Mat_MatTransMultDense *abt; 2790 MPI_Comm comm; 2791 PetscMPIInt size; 2792 PetscScalar *cv, *sendbuf, *recvbuf; 2793 const PetscScalar *av,*bv; 2794 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2795 PetscScalar _DOne=1.0,_DZero=0.0; 2796 PetscBLASInt cm, cn, ck, alda, clda; 2797 2798 PetscFunctionBegin; 2799 MatCheckProduct(C,3); 2800 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2801 abt = (Mat_MatTransMultDense*)C->product->data; 2802 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2803 PetscCallMPI(MPI_Comm_size(comm,&size)); 2804 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2805 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2806 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2807 PetscCall(MatDenseGetLDA(a->A,&i)); 2808 PetscCall(PetscBLASIntCast(i,&alda)); 2809 PetscCall(MatDenseGetLDA(b->A,&blda)); 2810 PetscCall(MatDenseGetLDA(c->A,&i)); 2811 PetscCall(PetscBLASIntCast(i,&clda)); 2812 /* copy transpose of B into buf[0] */ 2813 bn = B->rmap->n; 2814 sendbuf = abt->buf[0]; 2815 recvbuf = abt->buf[1]; 2816 for (k = 0, j = 0; j < bn; j++) { 2817 for (i = 0; i < cK; i++, k++) { 2818 sendbuf[k] = bv[i * blda + j]; 2819 } 2820 } 2821 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2822 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2823 PetscCall(PetscBLASIntCast(cK,&ck)); 2824 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2825 PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn)); 2826 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2827 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2828 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2829 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2830 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2831 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2832 PetscFunctionReturn(0); 2833 } 2834 2835 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2836 { 2837 Mat_MatTransMultDense *abt; 2838 2839 PetscFunctionBegin; 2840 MatCheckProduct(C,3); 2841 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2842 abt = (Mat_MatTransMultDense*)C->product->data; 2843 switch (abt->alg) { 2844 case 1: 2845 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2846 break; 2847 default: 2848 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2849 break; 2850 } 2851 PetscFunctionReturn(0); 2852 } 2853 2854 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2855 { 2856 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2857 2858 PetscFunctionBegin; 2859 PetscCall(MatDestroy(&ab->Ce)); 2860 PetscCall(MatDestroy(&ab->Ae)); 2861 PetscCall(MatDestroy(&ab->Be)); 2862 PetscCall(PetscFree(ab)); 2863 PetscFunctionReturn(0); 2864 } 2865 2866 #if defined(PETSC_HAVE_ELEMENTAL) 2867 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2868 { 2869 Mat_MatMultDense *ab; 2870 2871 PetscFunctionBegin; 2872 MatCheckProduct(C,3); 2873 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2874 ab = (Mat_MatMultDense*)C->product->data; 2875 PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae)); 2876 PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be)); 2877 PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce)); 2878 PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C)); 2879 PetscFunctionReturn(0); 2880 } 2881 2882 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2883 { 2884 Mat Ae,Be,Ce; 2885 Mat_MatMultDense *ab; 2886 2887 PetscFunctionBegin; 2888 MatCheckProduct(C,4); 2889 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2890 /* check local size of A and B */ 2891 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2892 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2893 } 2894 2895 /* create elemental matrices Ae and Be */ 2896 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2897 PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 2898 PetscCall(MatSetType(Ae,MATELEMENTAL)); 2899 PetscCall(MatSetUp(Ae)); 2900 PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE)); 2901 2902 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2903 PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N)); 2904 PetscCall(MatSetType(Be,MATELEMENTAL)); 2905 PetscCall(MatSetUp(Be)); 2906 PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE)); 2907 2908 /* compute symbolic Ce = Ae*Be */ 2909 PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce)); 2910 PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce)); 2911 2912 /* setup C */ 2913 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 2914 PetscCall(MatSetType(C,MATDENSE)); 2915 PetscCall(MatSetUp(C)); 2916 2917 /* create data structure for reuse Cdense */ 2918 PetscCall(PetscNew(&ab)); 2919 ab->Ae = Ae; 2920 ab->Be = Be; 2921 ab->Ce = Ce; 2922 2923 C->product->data = ab; 2924 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2925 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2926 PetscFunctionReturn(0); 2927 } 2928 #endif 2929 /* ----------------------------------------------- */ 2930 #if defined(PETSC_HAVE_ELEMENTAL) 2931 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2932 { 2933 PetscFunctionBegin; 2934 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2935 C->ops->productsymbolic = MatProductSymbolic_AB; 2936 PetscFunctionReturn(0); 2937 } 2938 #endif 2939 2940 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2941 { 2942 Mat_Product *product = C->product; 2943 Mat A = product->A,B=product->B; 2944 2945 PetscFunctionBegin; 2946 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2947 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2948 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2949 C->ops->productsymbolic = MatProductSymbolic_AtB; 2950 PetscFunctionReturn(0); 2951 } 2952 2953 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2954 { 2955 Mat_Product *product = C->product; 2956 const char *algTypes[2] = {"allgatherv","cyclic"}; 2957 PetscInt alg,nalg = 2; 2958 PetscBool flg = PETSC_FALSE; 2959 2960 PetscFunctionBegin; 2961 /* Set default algorithm */ 2962 alg = 0; /* default is allgatherv */ 2963 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2964 if (flg) { 2965 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2966 } 2967 2968 /* Get runtime option */ 2969 if (product->api_user) { 2970 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2971 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2972 PetscOptionsEnd(); 2973 } else { 2974 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2975 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2976 PetscOptionsEnd(); 2977 } 2978 if (flg) { 2979 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2980 } 2981 2982 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2983 C->ops->productsymbolic = MatProductSymbolic_ABt; 2984 PetscFunctionReturn(0); 2985 } 2986 2987 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2988 { 2989 Mat_Product *product = C->product; 2990 2991 PetscFunctionBegin; 2992 switch (product->type) { 2993 #if defined(PETSC_HAVE_ELEMENTAL) 2994 case MATPRODUCT_AB: 2995 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2996 break; 2997 #endif 2998 case MATPRODUCT_AtB: 2999 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 3000 break; 3001 case MATPRODUCT_ABt: 3002 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 3003 break; 3004 default: 3005 break; 3006 } 3007 PetscFunctionReturn(0); 3008 } 3009