1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 31 PetscValidPointer(B,2); 32 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg)); 33 if (flg) *B = mat->A; 34 else { 35 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg)); 36 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 37 *B = A; 38 } 39 PetscFunctionReturn(0); 40 } 41 42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 43 { 44 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 45 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 46 47 PetscFunctionBegin; 48 PetscCall(MatCopy(Amat->A,Bmat->A,s)); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscInt j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2; 56 PetscScalar *v; 57 58 PetscFunctionBegin; 59 PetscCall(MatDenseGetArray(mat->A,&v)); 60 PetscCall(MatDenseGetLDA(mat->A,&lda)); 61 rend2 = PetscMin(rend,A->cmap->N); 62 if (rend2>rstart) { 63 for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha; 64 PetscCall(PetscLogFlops(rend2-rstart)); 65 } 66 PetscCall(MatDenseRestoreArray(mat->A,&v)); 67 PetscFunctionReturn(0); 68 } 69 70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 71 { 72 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 73 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 74 75 PetscFunctionBegin; 76 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 77 lrow = row - rstart; 78 PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 79 PetscFunctionReturn(0); 80 } 81 82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 83 { 84 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 85 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 86 87 PetscFunctionBegin; 88 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 89 lrow = row - rstart; 90 PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 95 { 96 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 97 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 98 PetscScalar *array; 99 MPI_Comm comm; 100 PetscBool flg; 101 Mat B; 102 103 PetscFunctionBegin; 104 PetscCall(MatHasCongruentLayouts(A,&flg)); 105 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 106 PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); 107 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 108 109 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg)); 110 PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 111 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm)); 112 PetscCall(MatCreate(comm,&B)); 113 PetscCall(MatSetSizes(B,m,m,m,m)); 114 PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name)); 115 PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array)); 116 PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart)); 117 PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array)); 118 PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B)); 119 *a = B; 120 PetscCall(MatDestroy(&B)); 121 } else *a = B; 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 126 { 127 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 PetscBool roworiented = A->roworiented; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; 134 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 if (roworiented) { 138 PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv)); 139 } else { 140 for (j=0; j<n; j++) { 141 if (idxn[j] < 0) continue; 142 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 143 PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv)); 144 } 145 } 146 } else if (!A->donotstash) { 147 mat->assembled = PETSC_FALSE; 148 if (roworiented) { 149 PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE)); 150 } else { 151 PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE)); 152 } 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 161 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 162 163 PetscFunctionBegin; 164 for (i=0; i<m; i++) { 165 if (idxm[i] < 0) continue; /* negative row */ 166 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 167 if (idxm[i] >= rstart && idxm[i] < rend) { 168 row = idxm[i] - rstart; 169 for (j=0; j<n; j++) { 170 if (idxn[j] < 0) continue; /* negative column */ 171 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 172 PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j)); 173 } 174 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 175 } 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 180 { 181 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 182 183 PetscFunctionBegin; 184 PetscCall(MatDenseGetLDA(a->A,lda)); 185 PetscFunctionReturn(0); 186 } 187 188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 189 { 190 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 191 PetscBool iscuda; 192 193 PetscFunctionBegin; 194 if (!a->A) { 195 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 196 PetscCall(PetscLayoutSetUp(A->rmap)); 197 PetscCall(PetscLayoutSetUp(A->cmap)); 198 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 199 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A)); 200 PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 201 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 202 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 203 } 204 PetscCall(MatDenseSetLDA(a->A,lda)); 205 PetscFunctionReturn(0); 206 } 207 208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 209 { 210 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211 212 PetscFunctionBegin; 213 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 214 PetscCall(MatDenseGetArray(a->A,array)); 215 PetscFunctionReturn(0); 216 } 217 218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 219 { 220 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 221 222 PetscFunctionBegin; 223 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 224 PetscCall(MatDenseGetArrayRead(a->A,array)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 229 { 230 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 231 232 PetscFunctionBegin; 233 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 234 PetscCall(MatDenseGetArrayWrite(a->A,array)); 235 PetscFunctionReturn(0); 236 } 237 238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 239 { 240 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 241 242 PetscFunctionBegin; 243 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 244 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 245 PetscCall(MatDensePlaceArray(a->A,array)); 246 PetscFunctionReturn(0); 247 } 248 249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 250 { 251 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 252 253 PetscFunctionBegin; 254 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 255 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 256 PetscCall(MatDenseResetArray(a->A)); 257 PetscFunctionReturn(0); 258 } 259 260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 261 { 262 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 263 264 PetscFunctionBegin; 265 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 266 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 267 PetscCall(MatDenseReplaceArray(a->A,array)); 268 PetscFunctionReturn(0); 269 } 270 271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 272 { 273 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat)); 284 PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is)); 285 PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 PetscCall(ISAllGather(iscol,&iscol_local)); 288 PetscCall(ISGetIndices(isrow,&irow)); 289 PetscCall(ISGetIndices(iscol_local,&icol)); 290 PetscCall(ISGetLocalSize(isrow,&nrows)); 291 PetscCall(ISGetLocalSize(iscol,&ncols)); 292 PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 PetscCall(MatGetLocalSize(A,&nlrows,&nlcols)); 299 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat)); 309 PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols)); 310 PetscCall(MatSetType(newmat,((PetscObject)A)->type_name)); 311 PetscCall(MatMPIDenseSetPreallocation(newmat,NULL)); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 PetscCall(MatDenseGetArray(newmatd->A,&bv)); 317 PetscCall(MatDenseGetArrayRead(mat->A,&v)); 318 PetscCall(MatDenseGetLDA(mat->A,&lda)); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 PetscCall(MatDenseRestoreArrayRead(mat->A,&v)); 326 PetscCall(MatDenseRestoreArray(newmatd->A,&bv)); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY)); 330 PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY)); 331 332 /* Free work space */ 333 PetscCall(ISRestoreIndices(isrow,&irow)); 334 PetscCall(ISRestoreIndices(iscol_local,&icol)); 335 PetscCall(ISDestroy(&iscol_local)); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 344 PetscFunctionBegin; 345 PetscCall(MatDenseRestoreArray(a->A,array)); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 352 353 PetscFunctionBegin; 354 PetscCall(MatDenseRestoreArrayRead(a->A,array)); 355 PetscFunctionReturn(0); 356 } 357 358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 359 { 360 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 361 362 PetscFunctionBegin; 363 PetscCall(MatDenseRestoreArrayWrite(a->A,array)); 364 PetscFunctionReturn(0); 365 } 366 367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 368 { 369 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 370 PetscInt nstash,reallocs; 371 372 PetscFunctionBegin; 373 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 374 375 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 376 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 377 PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 378 PetscFunctionReturn(0); 379 } 380 381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 382 { 383 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 384 PetscInt i,*row,*col,flg,j,rstart,ncols; 385 PetscMPIInt n; 386 PetscScalar *val; 387 388 PetscFunctionBegin; 389 if (!mdn->donotstash && !mat->nooffprocentries) { 390 /* wait on receives */ 391 while (1) { 392 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 393 if (!flg) break; 394 395 for (i=0; i<n;) { 396 /* Now identify the consecutive vals belonging to the same row */ 397 for (j=i,rstart=row[j]; j<n; j++) { 398 if (row[j] != rstart) break; 399 } 400 if (j < n) ncols = j-i; 401 else ncols = n-i; 402 /* Now assemble all these values with a single function call */ 403 PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 404 i = j; 405 } 406 } 407 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 408 } 409 410 PetscCall(MatAssemblyBegin(mdn->A,mode)); 411 PetscCall(MatAssemblyEnd(mdn->A,mode)); 412 PetscFunctionReturn(0); 413 } 414 415 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 416 { 417 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 418 419 PetscFunctionBegin; 420 PetscCall(MatZeroEntries(l->A)); 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 425 { 426 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 427 PetscInt i,len,*lrows; 428 429 PetscFunctionBegin; 430 /* get locally owned rows */ 431 PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL)); 432 /* fix right hand side if needed */ 433 if (x && b) { 434 const PetscScalar *xx; 435 PetscScalar *bb; 436 437 PetscCall(VecGetArrayRead(x, &xx)); 438 PetscCall(VecGetArrayWrite(b, &bb)); 439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 440 PetscCall(VecRestoreArrayRead(x, &xx)); 441 PetscCall(VecRestoreArrayWrite(b, &bb)); 442 } 443 PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL)); 444 if (diag != 0.0) { 445 Vec d; 446 447 PetscCall(MatCreateVecs(A,NULL,&d)); 448 PetscCall(VecSet(d,diag)); 449 PetscCall(MatDiagonalSet(A,d,INSERT_VALUES)); 450 PetscCall(VecDestroy(&d)); 451 } 452 PetscCall(PetscFree(lrows)); 453 PetscFunctionReturn(0); 454 } 455 456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 460 461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 const PetscScalar *ax; 465 PetscScalar *ay; 466 PetscMemType axmtype,aymtype; 467 468 PetscFunctionBegin; 469 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 470 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 471 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 472 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 473 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 474 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 475 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 476 PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy)); 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 const PetscScalar *ax; 484 PetscScalar *ay; 485 PetscMemType axmtype,aymtype; 486 487 PetscFunctionBegin; 488 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 489 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 490 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 491 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 492 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 493 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 494 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 495 PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz)); 496 PetscFunctionReturn(0); 497 } 498 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 const PetscScalar *ax; 503 PetscScalar *ay; 504 PetscMemType axmtype,aymtype; 505 506 PetscFunctionBegin; 507 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 508 PetscCall(VecSet(yy,0.0)); 509 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 510 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 511 PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype)); 512 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 513 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 514 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 515 PetscCall(VecRestoreArrayAndMemType(yy,&ay)); 516 PetscFunctionReturn(0); 517 } 518 519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 520 { 521 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 522 const PetscScalar *ax; 523 PetscScalar *ay; 524 PetscMemType axmtype,aymtype; 525 526 PetscFunctionBegin; 527 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 528 PetscCall(VecCopy(yy,zz)); 529 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 530 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 531 PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype)); 532 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 533 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 534 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 535 PetscCall(VecRestoreArrayAndMemType(zz,&ay)); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 540 { 541 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542 PetscInt lda,len,i,n,m = A->rmap->n,radd; 543 PetscScalar *x,zero = 0.0; 544 const PetscScalar *av; 545 546 PetscFunctionBegin; 547 PetscCall(VecSet(v,zero)); 548 PetscCall(VecGetArray(v,&x)); 549 PetscCall(VecGetSize(v,&n)); 550 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 551 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 552 radd = A->rmap->rstart*m; 553 PetscCall(MatDenseGetArrayRead(a->A,&av)); 554 PetscCall(MatDenseGetLDA(a->A,&lda)); 555 for (i=0; i<len; i++) { 556 x[i] = av[radd + i*lda + i]; 557 } 558 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 559 PetscCall(VecRestoreArray(v,&x)); 560 PetscFunctionReturn(0); 561 } 562 563 PetscErrorCode MatDestroy_MPIDense(Mat mat) 564 { 565 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 566 567 PetscFunctionBegin; 568 #if defined(PETSC_USE_LOG) 569 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 570 #endif 571 PetscCall(MatStashDestroy_Private(&mat->stash)); 572 PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 573 PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 574 PetscCall(MatDestroy(&mdn->A)); 575 PetscCall(VecDestroy(&mdn->lvec)); 576 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 577 PetscCall(VecDestroy(&mdn->cvec)); 578 PetscCall(MatDestroy(&mdn->cmat)); 579 580 PetscCall(PetscFree(mat->data)); 581 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 582 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL)); 596 #if defined(PETSC_HAVE_ELEMENTAL) 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL)); 598 #endif 599 #if defined(PETSC_HAVE_SCALAPACK) 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL)); 601 #endif 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL)); 605 #if defined (PETSC_HAVE_CUDA) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL)); 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL)); 608 #endif 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL)); 619 620 PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL)); 621 PetscFunctionReturn(0); 622 } 623 624 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 625 626 #include <petscdraw.h> 627 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 628 { 629 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 630 PetscMPIInt rank; 631 PetscViewerType vtype; 632 PetscBool iascii,isdraw; 633 PetscViewer sviewer; 634 PetscViewerFormat format; 635 636 PetscFunctionBegin; 637 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 638 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 639 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 640 if (iascii) { 641 PetscCall(PetscViewerGetType(viewer,&vtype)); 642 PetscCall(PetscViewerGetFormat(viewer,&format)); 643 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 644 MatInfo info; 645 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 646 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 647 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory)); 648 PetscCall(PetscViewerFlush(viewer)); 649 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 650 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer)); 651 PetscFunctionReturn(0); 652 } else if (format == PETSC_VIEWER_ASCII_INFO) { 653 PetscFunctionReturn(0); 654 } 655 } else if (isdraw) { 656 PetscDraw draw; 657 PetscBool isnull; 658 659 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 660 PetscCall(PetscDrawIsNull(draw,&isnull)); 661 if (isnull) PetscFunctionReturn(0); 662 } 663 664 { 665 /* assemble the entire matrix onto first processor. */ 666 Mat A; 667 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 668 PetscInt *cols; 669 PetscScalar *vals; 670 671 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 672 if (rank == 0) { 673 PetscCall(MatSetSizes(A,M,N,M,N)); 674 } else { 675 PetscCall(MatSetSizes(A,0,0,M,N)); 676 } 677 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 678 PetscCall(MatSetType(A,MATMPIDENSE)); 679 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 680 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 681 682 /* Copy the matrix ... This isn't the most efficient means, 683 but it's quick for now */ 684 A->insertmode = INSERT_VALUES; 685 686 row = mat->rmap->rstart; 687 m = mdn->A->rmap->n; 688 for (i=0; i<m; i++) { 689 PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals)); 690 PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES)); 691 PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals)); 692 row++; 693 } 694 695 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 696 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 697 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 698 if (rank == 0) { 699 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name)); 700 PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer)); 701 } 702 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 703 PetscCall(PetscViewerFlush(viewer)); 704 PetscCall(MatDestroy(&A)); 705 } 706 PetscFunctionReturn(0); 707 } 708 709 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 710 { 711 PetscBool iascii,isbinary,isdraw,issocket; 712 713 PetscFunctionBegin; 714 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 715 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 716 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 717 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 718 719 if (iascii || issocket || isdraw) { 720 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer)); 721 } else if (isbinary) { 722 PetscCall(MatView_Dense_Binary(mat,viewer)); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 728 { 729 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 730 Mat mdn = mat->A; 731 PetscLogDouble isend[5],irecv[5]; 732 733 PetscFunctionBegin; 734 info->block_size = 1.0; 735 736 PetscCall(MatGetInfo(mdn,MAT_LOCAL,info)); 737 738 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 739 isend[3] = info->memory; isend[4] = info->mallocs; 740 if (flag == MAT_LOCAL) { 741 info->nz_used = isend[0]; 742 info->nz_allocated = isend[1]; 743 info->nz_unneeded = isend[2]; 744 info->memory = isend[3]; 745 info->mallocs = isend[4]; 746 } else if (flag == MAT_GLOBAL_MAX) { 747 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A))); 748 749 info->nz_used = irecv[0]; 750 info->nz_allocated = irecv[1]; 751 info->nz_unneeded = irecv[2]; 752 info->memory = irecv[3]; 753 info->mallocs = irecv[4]; 754 } else if (flag == MAT_GLOBAL_SUM) { 755 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A))); 756 757 info->nz_used = irecv[0]; 758 info->nz_allocated = irecv[1]; 759 info->nz_unneeded = irecv[2]; 760 info->memory = irecv[3]; 761 info->mallocs = irecv[4]; 762 } 763 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 764 info->fill_ratio_needed = 0; 765 info->factor_mallocs = 0; 766 PetscFunctionReturn(0); 767 } 768 769 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 770 { 771 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 772 773 PetscFunctionBegin; 774 switch (op) { 775 case MAT_NEW_NONZERO_LOCATIONS: 776 case MAT_NEW_NONZERO_LOCATION_ERR: 777 case MAT_NEW_NONZERO_ALLOCATION_ERR: 778 MatCheckPreallocated(A,1); 779 PetscCall(MatSetOption(a->A,op,flg)); 780 break; 781 case MAT_ROW_ORIENTED: 782 MatCheckPreallocated(A,1); 783 a->roworiented = flg; 784 PetscCall(MatSetOption(a->A,op,flg)); 785 break; 786 case MAT_FORCE_DIAGONAL_ENTRIES: 787 case MAT_KEEP_NONZERO_PATTERN: 788 case MAT_USE_HASH_TABLE: 789 case MAT_SORTED_FULL: 790 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 791 break; 792 case MAT_IGNORE_OFF_PROC_ENTRIES: 793 a->donotstash = flg; 794 break; 795 case MAT_SYMMETRIC: 796 case MAT_STRUCTURALLY_SYMMETRIC: 797 case MAT_HERMITIAN: 798 case MAT_SYMMETRY_ETERNAL: 799 case MAT_IGNORE_LOWER_TRIANGULAR: 800 case MAT_IGNORE_ZERO_ENTRIES: 801 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 802 break; 803 default: 804 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 805 } 806 PetscFunctionReturn(0); 807 } 808 809 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 810 { 811 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 812 const PetscScalar *l; 813 PetscScalar x,*v,*vv,*r; 814 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 815 816 PetscFunctionBegin; 817 PetscCall(MatDenseGetArray(mdn->A,&vv)); 818 PetscCall(MatDenseGetLDA(mdn->A,&lda)); 819 PetscCall(MatGetLocalSize(A,&s2,&s3)); 820 if (ll) { 821 PetscCall(VecGetLocalSize(ll,&s2a)); 822 PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 823 PetscCall(VecGetArrayRead(ll,&l)); 824 for (i=0; i<m; i++) { 825 x = l[i]; 826 v = vv + i; 827 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 828 } 829 PetscCall(VecRestoreArrayRead(ll,&l)); 830 PetscCall(PetscLogFlops(1.0*n*m)); 831 } 832 if (rr) { 833 const PetscScalar *ar; 834 835 PetscCall(VecGetLocalSize(rr,&s3a)); 836 PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 837 PetscCall(VecGetArrayRead(rr,&ar)); 838 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 839 PetscCall(VecGetArray(mdn->lvec,&r)); 840 PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 841 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 842 PetscCall(VecRestoreArrayRead(rr,&ar)); 843 for (i=0; i<n; i++) { 844 x = r[i]; 845 v = vv + i*lda; 846 for (j=0; j<m; j++) (*v++) *= x; 847 } 848 PetscCall(VecRestoreArray(mdn->lvec,&r)); 849 PetscCall(PetscLogFlops(1.0*n*m)); 850 } 851 PetscCall(MatDenseRestoreArray(mdn->A,&vv)); 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 856 { 857 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 858 PetscInt i,j; 859 PetscMPIInt size; 860 PetscReal sum = 0.0; 861 const PetscScalar *av,*v; 862 863 PetscFunctionBegin; 864 PetscCall(MatDenseGetArrayRead(mdn->A,&av)); 865 v = av; 866 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 867 if (size == 1) { 868 PetscCall(MatNorm(mdn->A,type,nrm)); 869 } else { 870 if (type == NORM_FROBENIUS) { 871 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 872 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 873 } 874 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 875 *nrm = PetscSqrtReal(*nrm); 876 PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n)); 877 } else if (type == NORM_1) { 878 PetscReal *tmp,*tmp2; 879 PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2)); 880 *nrm = 0.0; 881 v = av; 882 for (j=0; j<mdn->A->cmap->n; j++) { 883 for (i=0; i<mdn->A->rmap->n; i++) { 884 tmp[j] += PetscAbsScalar(*v); v++; 885 } 886 } 887 PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 888 for (j=0; j<A->cmap->N; j++) { 889 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 890 } 891 PetscCall(PetscFree2(tmp,tmp2)); 892 PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n)); 893 } else if (type == NORM_INFINITY) { /* max row norm */ 894 PetscReal ntemp; 895 PetscCall(MatNorm(mdn->A,type,&ntemp)); 896 PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 897 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 898 } 899 PetscCall(MatDenseRestoreArrayRead(mdn->A,&av)); 900 PetscFunctionReturn(0); 901 } 902 903 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 904 { 905 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 906 Mat B; 907 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 908 PetscInt j,i,lda; 909 PetscScalar *v; 910 911 PetscFunctionBegin; 912 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 913 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 914 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 915 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 916 PetscCall(MatMPIDenseSetPreallocation(B,NULL)); 917 } else B = *matout; 918 919 m = a->A->rmap->n; n = a->A->cmap->n; 920 PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v)); 921 PetscCall(MatDenseGetLDA(a->A,&lda)); 922 PetscCall(PetscMalloc1(m,&rwork)); 923 for (i=0; i<m; i++) rwork[i] = rstart + i; 924 for (j=0; j<n; j++) { 925 PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES)); 926 v += lda; 927 } 928 PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v)); 929 PetscCall(PetscFree(rwork)); 930 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 931 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 932 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 933 *matout = B; 934 } else { 935 PetscCall(MatHeaderMerge(A,&B)); 936 } 937 PetscFunctionReturn(0); 938 } 939 940 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 941 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 942 943 PetscErrorCode MatSetUp_MPIDense(Mat A) 944 { 945 PetscFunctionBegin; 946 PetscCall(PetscLayoutSetUp(A->rmap)); 947 PetscCall(PetscLayoutSetUp(A->cmap)); 948 if (!A->preallocated) { 949 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 950 } 951 PetscFunctionReturn(0); 952 } 953 954 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 955 { 956 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 957 958 PetscFunctionBegin; 959 PetscCall(MatAXPY(A->A,alpha,B->A,str)); 960 PetscFunctionReturn(0); 961 } 962 963 PetscErrorCode MatConjugate_MPIDense(Mat mat) 964 { 965 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 966 967 PetscFunctionBegin; 968 PetscCall(MatConjugate(a->A)); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatRealPart_MPIDense(Mat A) 973 { 974 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 975 976 PetscFunctionBegin; 977 PetscCall(MatRealPart(a->A)); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 982 { 983 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 984 985 PetscFunctionBegin; 986 PetscCall(MatImaginaryPart(a->A)); 987 PetscFunctionReturn(0); 988 } 989 990 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 991 { 992 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 993 994 PetscFunctionBegin; 995 PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 996 PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 997 PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col)); 998 PetscFunctionReturn(0); 999 } 1000 1001 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*); 1002 1003 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions) 1004 { 1005 PetscInt i,m,n; 1006 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1007 PetscReal *work; 1008 1009 PetscFunctionBegin; 1010 PetscCall(MatGetSize(A,&m,&n)); 1011 PetscCall(PetscMalloc1(n,&work)); 1012 if (type == REDUCTION_MEAN_REALPART) { 1013 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work)); 1014 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1015 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work)); 1016 } else { 1017 PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work)); 1018 } 1019 if (type == NORM_2) { 1020 for (i=0; i<n; i++) work[i] *= work[i]; 1021 } 1022 if (type == NORM_INFINITY) { 1023 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm)); 1024 } else { 1025 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm)); 1026 } 1027 PetscCall(PetscFree(work)); 1028 if (type == NORM_2) { 1029 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1030 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1031 for (i=0; i<n; i++) reductions[i] /= m; 1032 } 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #if defined(PETSC_HAVE_CUDA) 1037 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha) 1038 { 1039 PetscScalar *da; 1040 PetscInt lda; 1041 1042 PetscFunctionBegin; 1043 PetscCall(MatDenseCUDAGetArray(A,&da)); 1044 PetscCall(MatDenseGetLDA(A,&lda)); 1045 PetscCall(PetscInfo(A,"Performing Shift on backend\n")); 1046 PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N)); 1047 PetscCall(MatDenseCUDARestoreArray(A,&da)); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1052 { 1053 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1054 PetscInt lda; 1055 1056 PetscFunctionBegin; 1057 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1058 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1059 if (!a->cvec) { 1060 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1061 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1062 } 1063 a->vecinuse = col + 1; 1064 PetscCall(MatDenseGetLDA(a->A,&lda)); 1065 PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1066 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1067 *v = a->cvec; 1068 PetscFunctionReturn(0); 1069 } 1070 1071 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1072 { 1073 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1074 1075 PetscFunctionBegin; 1076 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1077 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1078 a->vecinuse = 0; 1079 PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1080 PetscCall(VecCUDAResetArray(a->cvec)); 1081 *v = NULL; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1086 { 1087 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1088 PetscInt lda; 1089 1090 PetscFunctionBegin; 1091 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1092 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1093 if (!a->cvec) { 1094 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1095 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1096 } 1097 a->vecinuse = col + 1; 1098 PetscCall(MatDenseGetLDA(a->A,&lda)); 1099 PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse)); 1100 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1101 PetscCall(VecLockReadPush(a->cvec)); 1102 *v = a->cvec; 1103 PetscFunctionReturn(0); 1104 } 1105 1106 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1107 { 1108 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1109 1110 PetscFunctionBegin; 1111 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1112 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1113 a->vecinuse = 0; 1114 PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse)); 1115 PetscCall(VecLockReadPop(a->cvec)); 1116 PetscCall(VecCUDAResetArray(a->cvec)); 1117 *v = NULL; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1122 { 1123 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1124 PetscInt lda; 1125 1126 PetscFunctionBegin; 1127 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1128 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1129 if (!a->cvec) { 1130 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1131 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1132 } 1133 a->vecinuse = col + 1; 1134 PetscCall(MatDenseGetLDA(a->A,&lda)); 1135 PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1136 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1137 *v = a->cvec; 1138 PetscFunctionReturn(0); 1139 } 1140 1141 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1142 { 1143 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1144 1145 PetscFunctionBegin; 1146 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1147 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1148 a->vecinuse = 0; 1149 PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1150 PetscCall(VecCUDAResetArray(a->cvec)); 1151 *v = NULL; 1152 PetscFunctionReturn(0); 1153 } 1154 1155 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1156 { 1157 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1161 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1162 PetscCall(MatDenseCUDAPlaceArray(l->A,a)); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1167 { 1168 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1172 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1173 PetscCall(MatDenseCUDAResetArray(l->A)); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1178 { 1179 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1180 1181 PetscFunctionBegin; 1182 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1183 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1184 PetscCall(MatDenseCUDAReplaceArray(l->A,a)); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1189 { 1190 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1191 1192 PetscFunctionBegin; 1193 PetscCall(MatDenseCUDAGetArrayWrite(l->A,a)); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1198 { 1199 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1200 1201 PetscFunctionBegin; 1202 PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1207 { 1208 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1209 1210 PetscFunctionBegin; 1211 PetscCall(MatDenseCUDAGetArrayRead(l->A,a)); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1216 { 1217 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1218 1219 PetscFunctionBegin; 1220 PetscCall(MatDenseCUDARestoreArrayRead(l->A,a)); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1225 { 1226 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1227 1228 PetscFunctionBegin; 1229 PetscCall(MatDenseCUDAGetArray(l->A,a)); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1234 { 1235 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1236 1237 PetscFunctionBegin; 1238 PetscCall(MatDenseCUDARestoreArray(l->A,a)); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1243 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1244 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1245 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1246 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1247 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1248 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1249 1250 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1251 { 1252 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1253 1254 PetscFunctionBegin; 1255 PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1256 PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1257 if (d->A) { 1258 PetscCall(MatBindToCPU(d->A,bind)); 1259 } 1260 mat->boundtocpu = bind; 1261 if (!bind) { 1262 PetscBool iscuda; 1263 1264 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda)); 1265 if (!iscuda) { 1266 PetscCall(VecDestroy(&d->cvec)); 1267 } 1268 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda)); 1269 if (!iscuda) { 1270 PetscCall(MatDestroy(&d->cmat)); 1271 } 1272 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1277 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1278 mat->ops->shift = MatShift_MPIDenseCUDA; 1279 } else { 1280 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1281 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1282 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1283 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1284 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1285 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1286 mat->ops->shift = MatShift_MPIDense; 1287 } 1288 if (d->cmat) { 1289 PetscCall(MatBindToCPU(d->cmat,bind)); 1290 } 1291 PetscFunctionReturn(0); 1292 } 1293 1294 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1295 { 1296 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1297 PetscBool iscuda; 1298 1299 PetscFunctionBegin; 1300 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1301 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 1302 if (!iscuda) PetscFunctionReturn(0); 1303 PetscCall(PetscLayoutSetUp(A->rmap)); 1304 PetscCall(PetscLayoutSetUp(A->cmap)); 1305 if (!d->A) { 1306 PetscCall(MatCreate(PETSC_COMM_SELF,&d->A)); 1307 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A)); 1308 PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 1309 } 1310 PetscCall(MatSetType(d->A,MATSEQDENSECUDA)); 1311 PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data)); 1312 A->preallocated = PETSC_TRUE; 1313 A->assembled = PETSC_TRUE; 1314 PetscFunctionReturn(0); 1315 } 1316 #endif 1317 1318 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1319 { 1320 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1321 1322 PetscFunctionBegin; 1323 PetscCall(MatSetRandom(d->A,rctx)); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1328 { 1329 PetscFunctionBegin; 1330 *missing = PETSC_FALSE; 1331 PetscFunctionReturn(0); 1332 } 1333 1334 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1335 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1336 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1337 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1338 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1339 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1340 1341 /* -------------------------------------------------------------------*/ 1342 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1343 MatGetRow_MPIDense, 1344 MatRestoreRow_MPIDense, 1345 MatMult_MPIDense, 1346 /* 4*/ MatMultAdd_MPIDense, 1347 MatMultTranspose_MPIDense, 1348 MatMultTransposeAdd_MPIDense, 1349 NULL, 1350 NULL, 1351 NULL, 1352 /* 10*/ NULL, 1353 NULL, 1354 NULL, 1355 NULL, 1356 MatTranspose_MPIDense, 1357 /* 15*/ MatGetInfo_MPIDense, 1358 MatEqual_MPIDense, 1359 MatGetDiagonal_MPIDense, 1360 MatDiagonalScale_MPIDense, 1361 MatNorm_MPIDense, 1362 /* 20*/ MatAssemblyBegin_MPIDense, 1363 MatAssemblyEnd_MPIDense, 1364 MatSetOption_MPIDense, 1365 MatZeroEntries_MPIDense, 1366 /* 24*/ MatZeroRows_MPIDense, 1367 NULL, 1368 NULL, 1369 NULL, 1370 NULL, 1371 /* 29*/ MatSetUp_MPIDense, 1372 NULL, 1373 NULL, 1374 MatGetDiagonalBlock_MPIDense, 1375 NULL, 1376 /* 34*/ MatDuplicate_MPIDense, 1377 NULL, 1378 NULL, 1379 NULL, 1380 NULL, 1381 /* 39*/ MatAXPY_MPIDense, 1382 MatCreateSubMatrices_MPIDense, 1383 NULL, 1384 MatGetValues_MPIDense, 1385 MatCopy_MPIDense, 1386 /* 44*/ NULL, 1387 MatScale_MPIDense, 1388 MatShift_MPIDense, 1389 NULL, 1390 NULL, 1391 /* 49*/ MatSetRandom_MPIDense, 1392 NULL, 1393 NULL, 1394 NULL, 1395 NULL, 1396 /* 54*/ NULL, 1397 NULL, 1398 NULL, 1399 NULL, 1400 NULL, 1401 /* 59*/ MatCreateSubMatrix_MPIDense, 1402 MatDestroy_MPIDense, 1403 MatView_MPIDense, 1404 NULL, 1405 NULL, 1406 /* 64*/ NULL, 1407 NULL, 1408 NULL, 1409 NULL, 1410 NULL, 1411 /* 69*/ NULL, 1412 NULL, 1413 NULL, 1414 NULL, 1415 NULL, 1416 /* 74*/ NULL, 1417 NULL, 1418 NULL, 1419 NULL, 1420 NULL, 1421 /* 79*/ NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 /* 83*/ MatLoad_MPIDense, 1426 NULL, 1427 NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 /* 89*/ NULL, 1432 NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 /* 94*/ NULL, 1437 NULL, 1438 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1439 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1440 NULL, 1441 /* 99*/ MatProductSetFromOptions_MPIDense, 1442 NULL, 1443 NULL, 1444 MatConjugate_MPIDense, 1445 NULL, 1446 /*104*/ NULL, 1447 MatRealPart_MPIDense, 1448 MatImaginaryPart_MPIDense, 1449 NULL, 1450 NULL, 1451 /*109*/ NULL, 1452 NULL, 1453 NULL, 1454 MatGetColumnVector_MPIDense, 1455 MatMissingDiagonal_MPIDense, 1456 /*114*/ NULL, 1457 NULL, 1458 NULL, 1459 NULL, 1460 NULL, 1461 /*119*/ NULL, 1462 NULL, 1463 NULL, 1464 NULL, 1465 NULL, 1466 /*124*/ NULL, 1467 MatGetColumnReductions_MPIDense, 1468 NULL, 1469 NULL, 1470 NULL, 1471 /*129*/ NULL, 1472 NULL, 1473 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1474 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1475 NULL, 1476 /*134*/ NULL, 1477 NULL, 1478 NULL, 1479 NULL, 1480 NULL, 1481 /*139*/ NULL, 1482 NULL, 1483 NULL, 1484 NULL, 1485 NULL, 1486 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1487 /*145*/ NULL, 1488 NULL, 1489 NULL 1490 }; 1491 1492 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1493 { 1494 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1495 PetscBool iscuda; 1496 1497 PetscFunctionBegin; 1498 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1499 PetscCall(PetscLayoutSetUp(mat->rmap)); 1500 PetscCall(PetscLayoutSetUp(mat->cmap)); 1501 if (!a->A) { 1502 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 1503 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 1504 PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N)); 1505 } 1506 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda)); 1507 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1508 PetscCall(MatSeqDenseSetPreallocation(a->A,data)); 1509 mat->preallocated = PETSC_TRUE; 1510 mat->assembled = PETSC_TRUE; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1515 { 1516 Mat B,C; 1517 1518 PetscFunctionBegin; 1519 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C)); 1520 PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B)); 1521 PetscCall(MatDestroy(&C)); 1522 if (reuse == MAT_REUSE_MATRIX) { 1523 C = *newmat; 1524 } else C = NULL; 1525 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1526 PetscCall(MatDestroy(&B)); 1527 if (reuse == MAT_INPLACE_MATRIX) { 1528 PetscCall(MatHeaderReplace(A,&C)); 1529 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1530 PetscFunctionReturn(0); 1531 } 1532 1533 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1534 { 1535 Mat B,C; 1536 1537 PetscFunctionBegin; 1538 PetscCall(MatDenseGetLocalMatrix(A,&C)); 1539 PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1540 if (reuse == MAT_REUSE_MATRIX) { 1541 C = *newmat; 1542 } else C = NULL; 1543 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1544 PetscCall(MatDestroy(&B)); 1545 if (reuse == MAT_INPLACE_MATRIX) { 1546 PetscCall(MatHeaderReplace(A,&C)); 1547 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #if defined(PETSC_HAVE_ELEMENTAL) 1552 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1553 { 1554 Mat mat_elemental; 1555 PetscScalar *v; 1556 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1557 1558 PetscFunctionBegin; 1559 if (reuse == MAT_REUSE_MATRIX) { 1560 mat_elemental = *newmat; 1561 PetscCall(MatZeroEntries(*newmat)); 1562 } else { 1563 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1564 PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 1565 PetscCall(MatSetType(mat_elemental,MATELEMENTAL)); 1566 PetscCall(MatSetUp(mat_elemental)); 1567 PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE)); 1568 } 1569 1570 PetscCall(PetscMalloc2(m,&rows,N,&cols)); 1571 for (i=0; i<N; i++) cols[i] = i; 1572 for (i=0; i<m; i++) rows[i] = rstart + i; 1573 1574 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1575 PetscCall(MatDenseGetArray(A,&v)); 1576 PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES)); 1577 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1578 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1579 PetscCall(MatDenseRestoreArray(A,&v)); 1580 PetscCall(PetscFree2(rows,cols)); 1581 1582 if (reuse == MAT_INPLACE_MATRIX) { 1583 PetscCall(MatHeaderReplace(A,&mat_elemental)); 1584 } else { 1585 *newmat = mat_elemental; 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 #endif 1590 1591 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1592 { 1593 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1594 1595 PetscFunctionBegin; 1596 PetscCall(MatDenseGetColumn(mat->A,col,vals)); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1601 { 1602 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1603 1604 PetscFunctionBegin; 1605 PetscCall(MatDenseRestoreColumn(mat->A,vals)); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1610 { 1611 Mat_MPIDense *mat; 1612 PetscInt m,nloc,N; 1613 1614 PetscFunctionBegin; 1615 PetscCall(MatGetSize(inmat,&m,&N)); 1616 PetscCall(MatGetLocalSize(inmat,NULL,&nloc)); 1617 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1618 PetscInt sum; 1619 1620 if (n == PETSC_DECIDE) { 1621 PetscCall(PetscSplitOwnership(comm,&n,&N)); 1622 } 1623 /* Check sum(n) = N */ 1624 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 1625 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 1626 1627 PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat)); 1628 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 1629 } 1630 1631 /* numeric phase */ 1632 mat = (Mat_MPIDense*)(*outmat)->data; 1633 PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN)); 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #if defined(PETSC_HAVE_CUDA) 1638 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1639 { 1640 Mat B; 1641 Mat_MPIDense *m; 1642 1643 PetscFunctionBegin; 1644 if (reuse == MAT_INITIAL_MATRIX) { 1645 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1646 } else if (reuse == MAT_REUSE_MATRIX) { 1647 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1648 } 1649 1650 B = *newmat; 1651 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE)); 1652 PetscCall(PetscFree(B->defaultvectype)); 1653 PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype)); 1654 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE)); 1655 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL)); 1656 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 1657 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 1658 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 1659 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL)); 1669 m = (Mat_MPIDense*)(B)->data; 1670 if (m->A) { 1671 PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A)); 1672 } 1673 B->ops->bindtocpu = NULL; 1674 B->offloadmask = PETSC_OFFLOAD_CPU; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1679 { 1680 Mat B; 1681 Mat_MPIDense *m; 1682 1683 PetscFunctionBegin; 1684 if (reuse == MAT_INITIAL_MATRIX) { 1685 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1686 } else if (reuse == MAT_REUSE_MATRIX) { 1687 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1688 } 1689 1690 B = *newmat; 1691 PetscCall(PetscFree(B->defaultvectype)); 1692 PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype)); 1693 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA)); 1694 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1695 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1696 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1697 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1698 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1699 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1700 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1701 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1702 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1703 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1704 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1705 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1706 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1707 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1708 m = (Mat_MPIDense*)(B->data); 1709 if (m->A) { 1710 PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A)); 1711 B->offloadmask = PETSC_OFFLOAD_BOTH; 1712 } else { 1713 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1714 } 1715 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE)); 1716 1717 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1718 PetscFunctionReturn(0); 1719 } 1720 #endif 1721 1722 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1723 { 1724 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1725 PetscInt lda; 1726 1727 PetscFunctionBegin; 1728 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1729 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1730 if (!a->cvec) { 1731 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1732 } 1733 a->vecinuse = col + 1; 1734 PetscCall(MatDenseGetLDA(a->A,&lda)); 1735 PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1736 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1737 *v = a->cvec; 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1742 { 1743 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1747 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1748 a->vecinuse = 0; 1749 PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1750 PetscCall(VecResetArray(a->cvec)); 1751 *v = NULL; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1756 { 1757 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1758 PetscInt lda; 1759 1760 PetscFunctionBegin; 1761 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1762 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1763 if (!a->cvec) { 1764 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1765 } 1766 a->vecinuse = col + 1; 1767 PetscCall(MatDenseGetLDA(a->A,&lda)); 1768 PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse)); 1769 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1770 PetscCall(VecLockReadPush(a->cvec)); 1771 *v = a->cvec; 1772 PetscFunctionReturn(0); 1773 } 1774 1775 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1776 { 1777 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1778 1779 PetscFunctionBegin; 1780 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1781 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1782 a->vecinuse = 0; 1783 PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse)); 1784 PetscCall(VecLockReadPop(a->cvec)); 1785 PetscCall(VecResetArray(a->cvec)); 1786 *v = NULL; 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1791 { 1792 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1793 PetscInt lda; 1794 1795 PetscFunctionBegin; 1796 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1797 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1798 if (!a->cvec) { 1799 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1800 } 1801 a->vecinuse = col + 1; 1802 PetscCall(MatDenseGetLDA(a->A,&lda)); 1803 PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1804 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1805 *v = a->cvec; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1810 { 1811 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1812 1813 PetscFunctionBegin; 1814 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1815 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1816 a->vecinuse = 0; 1817 PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1818 PetscCall(VecResetArray(a->cvec)); 1819 *v = NULL; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1824 { 1825 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1826 Mat_MPIDense *c; 1827 MPI_Comm comm; 1828 1829 PetscFunctionBegin; 1830 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1831 PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1832 PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1833 if (!a->cmat) { 1834 PetscCall(MatCreate(comm,&a->cmat)); 1835 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat)); 1836 PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name)); 1837 PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap)); 1838 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1839 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1840 } else if (cend-cbegin != a->cmat->cmap->N) { 1841 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1842 PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap)); 1843 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1844 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1845 } 1846 c = (Mat_MPIDense*)a->cmat->data; 1847 PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1848 PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A)); 1849 a->cmat->preallocated = PETSC_TRUE; 1850 a->cmat->assembled = PETSC_TRUE; 1851 a->matinuse = cbegin + 1; 1852 *v = a->cmat; 1853 PetscFunctionReturn(0); 1854 } 1855 1856 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1857 { 1858 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1859 Mat_MPIDense *c; 1860 1861 PetscFunctionBegin; 1862 PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1863 PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1864 PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1865 a->matinuse = 0; 1866 c = (Mat_MPIDense*)a->cmat->data; 1867 PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A)); 1868 *v = NULL; 1869 PetscFunctionReturn(0); 1870 } 1871 1872 /*MC 1873 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1874 1875 Options Database Keys: 1876 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1877 1878 Level: beginner 1879 1880 .seealso: `MatCreateDense()` 1881 1882 M*/ 1883 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1884 { 1885 Mat_MPIDense *a; 1886 1887 PetscFunctionBegin; 1888 PetscCall(PetscNewLog(mat,&a)); 1889 mat->data = (void*)a; 1890 PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps))); 1891 1892 mat->insertmode = NOT_SET_VALUES; 1893 1894 /* build cache for off array entries formed */ 1895 a->donotstash = PETSC_FALSE; 1896 1897 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash)); 1898 1899 /* stuff used for matrix vector multiply */ 1900 a->lvec = NULL; 1901 a->Mvctx = NULL; 1902 a->roworiented = PETSC_TRUE; 1903 1904 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense)); 1905 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense)); 1906 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense)); 1907 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense)); 1908 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense)); 1909 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense)); 1910 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense)); 1911 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense)); 1912 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense)); 1913 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense)); 1914 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense)); 1915 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1916 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1917 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1918 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1919 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1920 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1921 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense)); 1922 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense)); 1923 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 1924 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ)); 1925 #if defined(PETSC_HAVE_ELEMENTAL) 1926 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental)); 1927 #endif 1928 #if defined(PETSC_HAVE_SCALAPACK) 1929 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK)); 1930 #endif 1931 #if defined(PETSC_HAVE_CUDA) 1932 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA)); 1933 #endif 1934 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense)); 1935 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1936 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1937 #if defined(PETSC_HAVE_CUDA) 1938 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1939 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1940 #endif 1941 1942 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense)); 1943 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense)); 1944 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE)); 1945 PetscFunctionReturn(0); 1946 } 1947 1948 /*MC 1949 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1950 1951 Options Database Keys: 1952 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1953 1954 Level: beginner 1955 1956 .seealso: 1957 1958 M*/ 1959 #if defined(PETSC_HAVE_CUDA) 1960 #include <petsc/private/deviceimpl.h> 1961 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1962 { 1963 PetscFunctionBegin; 1964 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1965 PetscCall(MatCreate_MPIDense(B)); 1966 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B)); 1967 PetscFunctionReturn(0); 1968 } 1969 #endif 1970 1971 /*MC 1972 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1973 1974 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1975 and MATMPIDENSE otherwise. 1976 1977 Options Database Keys: 1978 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1979 1980 Level: beginner 1981 1982 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 1983 M*/ 1984 1985 /*MC 1986 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1987 1988 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1989 and MATMPIDENSECUDA otherwise. 1990 1991 Options Database Keys: 1992 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1993 1994 Level: beginner 1995 1996 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 1997 M*/ 1998 1999 /*@C 2000 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2001 2002 Collective 2003 2004 Input Parameters: 2005 . B - the matrix 2006 - data - optional location of matrix data. Set data=NULL for PETSc 2007 to control all matrix memory allocation. 2008 2009 Notes: 2010 The dense format is fully compatible with standard Fortran 77 2011 storage by columns. 2012 2013 The data input variable is intended primarily for Fortran programmers 2014 who wish to allocate their own matrix memory space. Most users should 2015 set data=NULL. 2016 2017 Level: intermediate 2018 2019 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2020 @*/ 2021 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2022 { 2023 PetscFunctionBegin; 2024 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2025 PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data)); 2026 PetscFunctionReturn(0); 2027 } 2028 2029 /*@ 2030 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2031 array provided by the user. This is useful to avoid copying an array 2032 into a matrix 2033 2034 Not Collective 2035 2036 Input Parameters: 2037 + mat - the matrix 2038 - array - the array in column major order 2039 2040 Notes: 2041 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2042 freed when the matrix is destroyed. 2043 2044 Level: developer 2045 2046 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2047 2048 @*/ 2049 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2050 { 2051 PetscFunctionBegin; 2052 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2053 PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2054 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2055 #if defined(PETSC_HAVE_CUDA) 2056 mat->offloadmask = PETSC_OFFLOAD_CPU; 2057 #endif 2058 PetscFunctionReturn(0); 2059 } 2060 2061 /*@ 2062 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2063 2064 Not Collective 2065 2066 Input Parameters: 2067 . mat - the matrix 2068 2069 Notes: 2070 You can only call this after a call to MatDensePlaceArray() 2071 2072 Level: developer 2073 2074 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2075 2076 @*/ 2077 PetscErrorCode MatDenseResetArray(Mat mat) 2078 { 2079 PetscFunctionBegin; 2080 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2081 PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat)); 2082 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2083 PetscFunctionReturn(0); 2084 } 2085 2086 /*@ 2087 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2088 array provided by the user. This is useful to avoid copying an array 2089 into a matrix 2090 2091 Not Collective 2092 2093 Input Parameters: 2094 + mat - the matrix 2095 - array - the array in column major order 2096 2097 Notes: 2098 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2099 freed by the user. It will be freed when the matrix is destroyed. 2100 2101 Level: developer 2102 2103 .seealso: `MatDenseGetArray()`, `VecReplaceArray()` 2104 @*/ 2105 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2106 { 2107 PetscFunctionBegin; 2108 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2109 PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2110 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2111 #if defined(PETSC_HAVE_CUDA) 2112 mat->offloadmask = PETSC_OFFLOAD_CPU; 2113 #endif 2114 PetscFunctionReturn(0); 2115 } 2116 2117 #if defined(PETSC_HAVE_CUDA) 2118 /*@C 2119 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2120 array provided by the user. This is useful to avoid copying an array 2121 into a matrix 2122 2123 Not Collective 2124 2125 Input Parameters: 2126 + mat - the matrix 2127 - array - the array in column major order 2128 2129 Notes: 2130 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2131 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2132 2133 Level: developer 2134 2135 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()` 2136 @*/ 2137 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2138 { 2139 PetscFunctionBegin; 2140 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2141 PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2142 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2143 mat->offloadmask = PETSC_OFFLOAD_GPU; 2144 PetscFunctionReturn(0); 2145 } 2146 2147 /*@C 2148 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2149 2150 Not Collective 2151 2152 Input Parameters: 2153 . mat - the matrix 2154 2155 Notes: 2156 You can only call this after a call to MatDenseCUDAPlaceArray() 2157 2158 Level: developer 2159 2160 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2161 2162 @*/ 2163 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2164 { 2165 PetscFunctionBegin; 2166 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2167 PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat)); 2168 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2169 PetscFunctionReturn(0); 2170 } 2171 2172 /*@C 2173 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2174 array provided by the user. This is useful to avoid copying an array 2175 into a matrix 2176 2177 Not Collective 2178 2179 Input Parameters: 2180 + mat - the matrix 2181 - array - the array in column major order 2182 2183 Notes: 2184 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2185 The memory passed in CANNOT be freed by the user. It will be freed 2186 when the matrix is destroyed. The array should respect the matrix leading dimension. 2187 2188 Level: developer 2189 2190 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2191 @*/ 2192 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2193 { 2194 PetscFunctionBegin; 2195 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2196 PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2197 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2198 mat->offloadmask = PETSC_OFFLOAD_GPU; 2199 PetscFunctionReturn(0); 2200 } 2201 2202 /*@C 2203 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2204 2205 Not Collective 2206 2207 Input Parameters: 2208 . A - the matrix 2209 2210 Output Parameters 2211 . array - the GPU array in column major order 2212 2213 Notes: 2214 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2215 2216 Level: developer 2217 2218 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2219 @*/ 2220 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2221 { 2222 PetscFunctionBegin; 2223 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2224 PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2225 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2226 PetscFunctionReturn(0); 2227 } 2228 2229 /*@C 2230 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2231 2232 Not Collective 2233 2234 Input Parameters: 2235 + A - the matrix 2236 - array - the GPU array in column major order 2237 2238 Notes: 2239 2240 Level: developer 2241 2242 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2243 @*/ 2244 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2245 { 2246 PetscFunctionBegin; 2247 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2248 PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2249 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2250 A->offloadmask = PETSC_OFFLOAD_GPU; 2251 PetscFunctionReturn(0); 2252 } 2253 2254 /*@C 2255 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2256 2257 Not Collective 2258 2259 Input Parameters: 2260 . A - the matrix 2261 2262 Output Parameters 2263 . array - the GPU array in column major order 2264 2265 Notes: 2266 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2267 2268 Level: developer 2269 2270 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2271 @*/ 2272 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2273 { 2274 PetscFunctionBegin; 2275 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2276 PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2277 PetscFunctionReturn(0); 2278 } 2279 2280 /*@C 2281 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2282 2283 Not Collective 2284 2285 Input Parameters: 2286 + A - the matrix 2287 - array - the GPU array in column major order 2288 2289 Notes: 2290 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2291 2292 Level: developer 2293 2294 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2295 @*/ 2296 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2297 { 2298 PetscFunctionBegin; 2299 PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 /*@C 2304 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2305 2306 Not Collective 2307 2308 Input Parameters: 2309 . A - the matrix 2310 2311 Output Parameters 2312 . array - the GPU array in column major order 2313 2314 Notes: 2315 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2316 2317 Level: developer 2318 2319 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2320 @*/ 2321 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2322 { 2323 PetscFunctionBegin; 2324 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2325 PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a)); 2326 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2327 PetscFunctionReturn(0); 2328 } 2329 2330 /*@C 2331 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2332 2333 Not Collective 2334 2335 Input Parameters: 2336 + A - the matrix 2337 - array - the GPU array in column major order 2338 2339 Notes: 2340 2341 Level: developer 2342 2343 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2344 @*/ 2345 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2346 { 2347 PetscFunctionBegin; 2348 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2349 PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a)); 2350 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2351 A->offloadmask = PETSC_OFFLOAD_GPU; 2352 PetscFunctionReturn(0); 2353 } 2354 #endif 2355 2356 /*@C 2357 MatCreateDense - Creates a matrix in dense format. 2358 2359 Collective 2360 2361 Input Parameters: 2362 + comm - MPI communicator 2363 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2364 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2365 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2366 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2367 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2368 to control all matrix memory allocation. 2369 2370 Output Parameter: 2371 . A - the matrix 2372 2373 Notes: 2374 The dense format is fully compatible with standard Fortran 77 2375 storage by columns. 2376 Note that, although local portions of the matrix are stored in column-major 2377 order, the matrix is partitioned across MPI ranks by row. 2378 2379 The data input variable is intended primarily for Fortran programmers 2380 who wish to allocate their own matrix memory space. Most users should 2381 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2382 2383 The user MUST specify either the local or global matrix dimensions 2384 (possibly both). 2385 2386 Level: intermediate 2387 2388 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2389 @*/ 2390 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2391 { 2392 PetscFunctionBegin; 2393 PetscCall(MatCreate(comm,A)); 2394 PetscCall(MatSetSizes(*A,m,n,M,N)); 2395 PetscCall(MatSetType(*A,MATDENSE)); 2396 PetscCall(MatSeqDenseSetPreallocation(*A,data)); 2397 PetscCall(MatMPIDenseSetPreallocation(*A,data)); 2398 PetscFunctionReturn(0); 2399 } 2400 2401 #if defined(PETSC_HAVE_CUDA) 2402 /*@C 2403 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2404 2405 Collective 2406 2407 Input Parameters: 2408 + comm - MPI communicator 2409 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2410 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2411 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2412 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2413 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2414 to control matrix memory allocation. 2415 2416 Output Parameter: 2417 . A - the matrix 2418 2419 Notes: 2420 2421 Level: intermediate 2422 2423 .seealso: `MatCreate()`, `MatCreateDense()` 2424 @*/ 2425 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2426 { 2427 PetscFunctionBegin; 2428 PetscCall(MatCreate(comm,A)); 2429 PetscValidLogicalCollectiveBool(*A,!!data,6); 2430 PetscCall(MatSetSizes(*A,m,n,M,N)); 2431 PetscCall(MatSetType(*A,MATDENSECUDA)); 2432 PetscCall(MatSeqDenseCUDASetPreallocation(*A,data)); 2433 PetscCall(MatMPIDenseCUDASetPreallocation(*A,data)); 2434 PetscFunctionReturn(0); 2435 } 2436 #endif 2437 2438 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2439 { 2440 Mat mat; 2441 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2442 2443 PetscFunctionBegin; 2444 *newmat = NULL; 2445 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat)); 2446 PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2447 PetscCall(MatSetType(mat,((PetscObject)A)->type_name)); 2448 a = (Mat_MPIDense*)mat->data; 2449 2450 mat->factortype = A->factortype; 2451 mat->assembled = PETSC_TRUE; 2452 mat->preallocated = PETSC_TRUE; 2453 2454 mat->insertmode = NOT_SET_VALUES; 2455 a->donotstash = oldmat->donotstash; 2456 2457 PetscCall(PetscLayoutReference(A->rmap,&mat->rmap)); 2458 PetscCall(PetscLayoutReference(A->cmap,&mat->cmap)); 2459 2460 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2461 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2462 2463 *newmat = mat; 2464 PetscFunctionReturn(0); 2465 } 2466 2467 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2468 { 2469 PetscBool isbinary; 2470 #if defined(PETSC_HAVE_HDF5) 2471 PetscBool ishdf5; 2472 #endif 2473 2474 PetscFunctionBegin; 2475 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2476 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2477 /* force binary viewer to load .info file if it has not yet done so */ 2478 PetscCall(PetscViewerSetUp(viewer)); 2479 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2480 #if defined(PETSC_HAVE_HDF5) 2481 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2482 #endif 2483 if (isbinary) { 2484 PetscCall(MatLoad_Dense_Binary(newMat,viewer)); 2485 #if defined(PETSC_HAVE_HDF5) 2486 } else if (ishdf5) { 2487 PetscCall(MatLoad_Dense_HDF5(newMat,viewer)); 2488 #endif 2489 } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2490 PetscFunctionReturn(0); 2491 } 2492 2493 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2494 { 2495 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2496 Mat a,b; 2497 2498 PetscFunctionBegin; 2499 a = matA->A; 2500 b = matB->A; 2501 PetscCall(MatEqual(a,b,flag)); 2502 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2507 { 2508 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2509 2510 PetscFunctionBegin; 2511 PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts)); 2512 PetscCall(MatDestroy(&atb->atb)); 2513 PetscCall(PetscFree(atb)); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2518 { 2519 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2520 2521 PetscFunctionBegin; 2522 PetscCall(PetscFree2(abt->buf[0],abt->buf[1])); 2523 PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls)); 2524 PetscCall(PetscFree(abt)); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2529 { 2530 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2531 Mat_TransMatMultDense *atb; 2532 MPI_Comm comm; 2533 PetscMPIInt size,*recvcounts; 2534 PetscScalar *carray,*sendbuf; 2535 const PetscScalar *atbarray; 2536 PetscInt i,cN=C->cmap->N,proc,k,j,lda; 2537 const PetscInt *ranges; 2538 2539 PetscFunctionBegin; 2540 MatCheckProduct(C,3); 2541 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2542 atb = (Mat_TransMatMultDense *)C->product->data; 2543 recvcounts = atb->recvcounts; 2544 sendbuf = atb->sendbuf; 2545 2546 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2547 PetscCallMPI(MPI_Comm_size(comm,&size)); 2548 2549 /* compute atbarray = aseq^T * bseq */ 2550 PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb)); 2551 2552 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2553 2554 /* arrange atbarray into sendbuf */ 2555 PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray)); 2556 PetscCall(MatDenseGetLDA(atb->atb,&lda)); 2557 for (proc=0, k=0; proc<size; proc++) { 2558 for (j=0; j<cN; j++) { 2559 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda]; 2560 } 2561 } 2562 PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray)); 2563 2564 /* sum all atbarray to local values of C */ 2565 PetscCall(MatDenseGetArrayWrite(c->A,&carray)); 2566 PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm)); 2567 PetscCall(MatDenseRestoreArrayWrite(c->A,&carray)); 2568 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2569 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2570 PetscFunctionReturn(0); 2571 } 2572 2573 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2574 { 2575 MPI_Comm comm; 2576 PetscMPIInt size; 2577 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2578 Mat_TransMatMultDense *atb; 2579 PetscBool cisdense; 2580 PetscInt i; 2581 const PetscInt *ranges; 2582 2583 PetscFunctionBegin; 2584 MatCheckProduct(C,3); 2585 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2586 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2587 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2588 SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2589 } 2590 2591 /* create matrix product C */ 2592 PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N)); 2593 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 2594 if (!cisdense) { 2595 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 2596 } 2597 PetscCall(MatSetUp(C)); 2598 2599 /* create data structure for reuse C */ 2600 PetscCallMPI(MPI_Comm_size(comm,&size)); 2601 PetscCall(PetscNew(&atb)); 2602 cM = C->rmap->N; 2603 PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts)); 2604 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2605 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2606 2607 C->product->data = atb; 2608 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2609 PetscFunctionReturn(0); 2610 } 2611 2612 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2613 { 2614 MPI_Comm comm; 2615 PetscMPIInt i, size; 2616 PetscInt maxRows, bufsiz; 2617 PetscMPIInt tag; 2618 PetscInt alg; 2619 Mat_MatTransMultDense *abt; 2620 Mat_Product *product = C->product; 2621 PetscBool flg; 2622 2623 PetscFunctionBegin; 2624 MatCheckProduct(C,4); 2625 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2626 /* check local size of A and B */ 2627 PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n); 2628 2629 PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg)); 2630 alg = flg ? 0 : 1; 2631 2632 /* setup matrix product C */ 2633 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N)); 2634 PetscCall(MatSetType(C,MATMPIDENSE)); 2635 PetscCall(MatSetUp(C)); 2636 PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag)); 2637 2638 /* create data structure for reuse C */ 2639 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2640 PetscCallMPI(MPI_Comm_size(comm,&size)); 2641 PetscCall(PetscNew(&abt)); 2642 abt->tag = tag; 2643 abt->alg = alg; 2644 switch (alg) { 2645 case 1: /* alg: "cyclic" */ 2646 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2647 bufsiz = A->cmap->N * maxRows; 2648 PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]))); 2649 break; 2650 default: /* alg: "allgatherv" */ 2651 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2652 PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls))); 2653 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2654 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2655 break; 2656 } 2657 2658 C->product->data = abt; 2659 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2660 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2661 PetscFunctionReturn(0); 2662 } 2663 2664 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2665 { 2666 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2667 Mat_MatTransMultDense *abt; 2668 MPI_Comm comm; 2669 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2670 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2671 PetscInt i,cK=A->cmap->N,k,j,bn; 2672 PetscScalar _DOne=1.0,_DZero=0.0; 2673 const PetscScalar *av,*bv; 2674 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2675 MPI_Request reqs[2]; 2676 const PetscInt *ranges; 2677 2678 PetscFunctionBegin; 2679 MatCheckProduct(C,3); 2680 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2681 abt = (Mat_MatTransMultDense*)C->product->data; 2682 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2683 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2684 PetscCallMPI(MPI_Comm_size(comm,&size)); 2685 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2686 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2687 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2688 PetscCall(MatDenseGetLDA(a->A,&i)); 2689 PetscCall(PetscBLASIntCast(i,&alda)); 2690 PetscCall(MatDenseGetLDA(b->A,&i)); 2691 PetscCall(PetscBLASIntCast(i,&blda)); 2692 PetscCall(MatDenseGetLDA(c->A,&i)); 2693 PetscCall(PetscBLASIntCast(i,&clda)); 2694 PetscCall(MatGetOwnershipRanges(B,&ranges)); 2695 bn = B->rmap->n; 2696 if (blda == bn) { 2697 sendbuf = (PetscScalar*)bv; 2698 } else { 2699 sendbuf = abt->buf[0]; 2700 for (k = 0, i = 0; i < cK; i++) { 2701 for (j = 0; j < bn; j++, k++) { 2702 sendbuf[k] = bv[i * blda + j]; 2703 } 2704 } 2705 } 2706 if (size > 1) { 2707 sendto = (rank + size - 1) % size; 2708 recvfrom = (rank + size + 1) % size; 2709 } else { 2710 sendto = recvfrom = 0; 2711 } 2712 PetscCall(PetscBLASIntCast(cK,&ck)); 2713 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2714 recvisfrom = rank; 2715 for (i = 0; i < size; i++) { 2716 /* we have finished receiving in sending, bufs can be read/modified */ 2717 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2718 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2719 2720 if (nextrecvisfrom != rank) { 2721 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2722 sendsiz = cK * bn; 2723 recvsiz = cK * nextbn; 2724 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2725 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2726 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2727 } 2728 2729 /* local aseq * sendbuf^T */ 2730 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2731 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2732 2733 if (nextrecvisfrom != rank) { 2734 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2735 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2736 } 2737 bn = nextbn; 2738 recvisfrom = nextrecvisfrom; 2739 sendbuf = recvbuf; 2740 } 2741 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2742 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2743 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2744 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2745 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2750 { 2751 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2752 Mat_MatTransMultDense *abt; 2753 MPI_Comm comm; 2754 PetscMPIInt size; 2755 PetscScalar *cv, *sendbuf, *recvbuf; 2756 const PetscScalar *av,*bv; 2757 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2758 PetscScalar _DOne=1.0,_DZero=0.0; 2759 PetscBLASInt cm, cn, ck, alda, clda; 2760 2761 PetscFunctionBegin; 2762 MatCheckProduct(C,3); 2763 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2764 abt = (Mat_MatTransMultDense*)C->product->data; 2765 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2766 PetscCallMPI(MPI_Comm_size(comm,&size)); 2767 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2768 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2769 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2770 PetscCall(MatDenseGetLDA(a->A,&i)); 2771 PetscCall(PetscBLASIntCast(i,&alda)); 2772 PetscCall(MatDenseGetLDA(b->A,&blda)); 2773 PetscCall(MatDenseGetLDA(c->A,&i)); 2774 PetscCall(PetscBLASIntCast(i,&clda)); 2775 /* copy transpose of B into buf[0] */ 2776 bn = B->rmap->n; 2777 sendbuf = abt->buf[0]; 2778 recvbuf = abt->buf[1]; 2779 for (k = 0, j = 0; j < bn; j++) { 2780 for (i = 0; i < cK; i++, k++) { 2781 sendbuf[k] = bv[i * blda + j]; 2782 } 2783 } 2784 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2785 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2786 PetscCall(PetscBLASIntCast(cK,&ck)); 2787 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2788 PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn)); 2789 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2790 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2791 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2792 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2793 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2794 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2795 PetscFunctionReturn(0); 2796 } 2797 2798 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2799 { 2800 Mat_MatTransMultDense *abt; 2801 2802 PetscFunctionBegin; 2803 MatCheckProduct(C,3); 2804 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2805 abt = (Mat_MatTransMultDense*)C->product->data; 2806 switch (abt->alg) { 2807 case 1: 2808 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2809 break; 2810 default: 2811 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2812 break; 2813 } 2814 PetscFunctionReturn(0); 2815 } 2816 2817 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2818 { 2819 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2820 2821 PetscFunctionBegin; 2822 PetscCall(MatDestroy(&ab->Ce)); 2823 PetscCall(MatDestroy(&ab->Ae)); 2824 PetscCall(MatDestroy(&ab->Be)); 2825 PetscCall(PetscFree(ab)); 2826 PetscFunctionReturn(0); 2827 } 2828 2829 #if defined(PETSC_HAVE_ELEMENTAL) 2830 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2831 { 2832 Mat_MatMultDense *ab; 2833 2834 PetscFunctionBegin; 2835 MatCheckProduct(C,3); 2836 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2837 ab = (Mat_MatMultDense*)C->product->data; 2838 PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae)); 2839 PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be)); 2840 PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce)); 2841 PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C)); 2842 PetscFunctionReturn(0); 2843 } 2844 2845 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2846 { 2847 Mat Ae,Be,Ce; 2848 Mat_MatMultDense *ab; 2849 2850 PetscFunctionBegin; 2851 MatCheckProduct(C,4); 2852 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2853 /* check local size of A and B */ 2854 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2855 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2856 } 2857 2858 /* create elemental matrices Ae and Be */ 2859 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2860 PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 2861 PetscCall(MatSetType(Ae,MATELEMENTAL)); 2862 PetscCall(MatSetUp(Ae)); 2863 PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE)); 2864 2865 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2866 PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N)); 2867 PetscCall(MatSetType(Be,MATELEMENTAL)); 2868 PetscCall(MatSetUp(Be)); 2869 PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE)); 2870 2871 /* compute symbolic Ce = Ae*Be */ 2872 PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce)); 2873 PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce)); 2874 2875 /* setup C */ 2876 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 2877 PetscCall(MatSetType(C,MATDENSE)); 2878 PetscCall(MatSetUp(C)); 2879 2880 /* create data structure for reuse Cdense */ 2881 PetscCall(PetscNew(&ab)); 2882 ab->Ae = Ae; 2883 ab->Be = Be; 2884 ab->Ce = Ce; 2885 2886 C->product->data = ab; 2887 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2888 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2889 PetscFunctionReturn(0); 2890 } 2891 #endif 2892 /* ----------------------------------------------- */ 2893 #if defined(PETSC_HAVE_ELEMENTAL) 2894 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2895 { 2896 PetscFunctionBegin; 2897 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2898 C->ops->productsymbolic = MatProductSymbolic_AB; 2899 PetscFunctionReturn(0); 2900 } 2901 #endif 2902 2903 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2904 { 2905 Mat_Product *product = C->product; 2906 Mat A = product->A,B=product->B; 2907 2908 PetscFunctionBegin; 2909 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2910 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2911 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2912 C->ops->productsymbolic = MatProductSymbolic_AtB; 2913 PetscFunctionReturn(0); 2914 } 2915 2916 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2917 { 2918 Mat_Product *product = C->product; 2919 const char *algTypes[2] = {"allgatherv","cyclic"}; 2920 PetscInt alg,nalg = 2; 2921 PetscBool flg = PETSC_FALSE; 2922 2923 PetscFunctionBegin; 2924 /* Set default algorithm */ 2925 alg = 0; /* default is allgatherv */ 2926 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2927 if (flg) { 2928 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2929 } 2930 2931 /* Get runtime option */ 2932 if (product->api_user) { 2933 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2934 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2935 PetscOptionsEnd(); 2936 } else { 2937 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2938 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2939 PetscOptionsEnd(); 2940 } 2941 if (flg) { 2942 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2943 } 2944 2945 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2946 C->ops->productsymbolic = MatProductSymbolic_ABt; 2947 PetscFunctionReturn(0); 2948 } 2949 2950 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2951 { 2952 Mat_Product *product = C->product; 2953 2954 PetscFunctionBegin; 2955 switch (product->type) { 2956 #if defined(PETSC_HAVE_ELEMENTAL) 2957 case MATPRODUCT_AB: 2958 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2959 break; 2960 #endif 2961 case MATPRODUCT_AtB: 2962 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2963 break; 2964 case MATPRODUCT_ABt: 2965 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2966 break; 2967 default: 2968 break; 2969 } 2970 PetscFunctionReturn(0); 2971 } 2972