1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 31 PetscValidPointer(B,2); 32 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg)); 33 if (flg) *B = mat->A; 34 else { 35 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg)); 36 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 37 *B = A; 38 } 39 PetscFunctionReturn(0); 40 } 41 42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 43 { 44 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 45 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 46 47 PetscFunctionBegin; 48 PetscCall(MatCopy(Amat->A,Bmat->A,s)); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscInt j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2; 56 PetscScalar *v; 57 58 PetscFunctionBegin; 59 PetscCall(MatDenseGetArray(mat->A,&v)); 60 PetscCall(MatDenseGetLDA(mat->A,&lda)); 61 rend2 = PetscMin(rend,A->cmap->N); 62 if (rend2>rstart) { 63 for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha; 64 PetscCall(PetscLogFlops(rend2-rstart)); 65 } 66 PetscCall(MatDenseRestoreArray(mat->A,&v)); 67 PetscFunctionReturn(0); 68 } 69 70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 71 { 72 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 73 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 74 75 PetscFunctionBegin; 76 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 77 lrow = row - rstart; 78 PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 79 PetscFunctionReturn(0); 80 } 81 82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 83 { 84 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 85 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 86 87 PetscFunctionBegin; 88 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 89 lrow = row - rstart; 90 PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 95 { 96 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 97 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 98 PetscScalar *array; 99 MPI_Comm comm; 100 PetscBool flg; 101 Mat B; 102 103 PetscFunctionBegin; 104 PetscCall(MatHasCongruentLayouts(A,&flg)); 105 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 106 PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); 107 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 108 109 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg)); 110 PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 111 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm)); 112 PetscCall(MatCreate(comm,&B)); 113 PetscCall(MatSetSizes(B,m,m,m,m)); 114 PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name)); 115 PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array)); 116 PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart)); 117 PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array)); 118 PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B)); 119 *a = B; 120 PetscCall(MatDestroy(&B)); 121 } else *a = B; 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 126 { 127 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 PetscBool roworiented = A->roworiented; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; 134 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 if (roworiented) { 138 PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv)); 139 } else { 140 for (j=0; j<n; j++) { 141 if (idxn[j] < 0) continue; 142 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 143 PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv)); 144 } 145 } 146 } else if (!A->donotstash) { 147 mat->assembled = PETSC_FALSE; 148 if (roworiented) { 149 PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE)); 150 } else { 151 PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE)); 152 } 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 161 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 162 163 PetscFunctionBegin; 164 for (i=0; i<m; i++) { 165 if (idxm[i] < 0) continue; /* negative row */ 166 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 167 if (idxm[i] >= rstart && idxm[i] < rend) { 168 row = idxm[i] - rstart; 169 for (j=0; j<n; j++) { 170 if (idxn[j] < 0) continue; /* negative column */ 171 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 172 PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j)); 173 } 174 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 175 } 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 180 { 181 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 182 183 PetscFunctionBegin; 184 PetscCall(MatDenseGetLDA(a->A,lda)); 185 PetscFunctionReturn(0); 186 } 187 188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 189 { 190 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 191 PetscBool iscuda; 192 193 PetscFunctionBegin; 194 if (!a->A) { 195 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 196 PetscCall(PetscLayoutSetUp(A->rmap)); 197 PetscCall(PetscLayoutSetUp(A->cmap)); 198 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 199 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A)); 200 PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 201 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 202 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 203 } 204 PetscCall(MatDenseSetLDA(a->A,lda)); 205 PetscFunctionReturn(0); 206 } 207 208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 209 { 210 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211 212 PetscFunctionBegin; 213 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 214 PetscCall(MatDenseGetArray(a->A,array)); 215 PetscFunctionReturn(0); 216 } 217 218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 219 { 220 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 221 222 PetscFunctionBegin; 223 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 224 PetscCall(MatDenseGetArrayRead(a->A,array)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 229 { 230 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 231 232 PetscFunctionBegin; 233 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 234 PetscCall(MatDenseGetArrayWrite(a->A,array)); 235 PetscFunctionReturn(0); 236 } 237 238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 239 { 240 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 241 242 PetscFunctionBegin; 243 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 244 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 245 PetscCall(MatDensePlaceArray(a->A,array)); 246 PetscFunctionReturn(0); 247 } 248 249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 250 { 251 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 252 253 PetscFunctionBegin; 254 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 255 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 256 PetscCall(MatDenseResetArray(a->A)); 257 PetscFunctionReturn(0); 258 } 259 260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 261 { 262 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 263 264 PetscFunctionBegin; 265 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 266 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 267 PetscCall(MatDenseReplaceArray(a->A,array)); 268 PetscFunctionReturn(0); 269 } 270 271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 272 { 273 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat)); 284 PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is)); 285 PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 PetscCall(ISAllGather(iscol,&iscol_local)); 288 PetscCall(ISGetIndices(isrow,&irow)); 289 PetscCall(ISGetIndices(iscol_local,&icol)); 290 PetscCall(ISGetLocalSize(isrow,&nrows)); 291 PetscCall(ISGetLocalSize(iscol,&ncols)); 292 PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 PetscCall(MatGetLocalSize(A,&nlrows,&nlcols)); 299 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat)); 309 PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols)); 310 PetscCall(MatSetType(newmat,((PetscObject)A)->type_name)); 311 PetscCall(MatMPIDenseSetPreallocation(newmat,NULL)); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 PetscCall(MatDenseGetArray(newmatd->A,&bv)); 317 PetscCall(MatDenseGetArrayRead(mat->A,&v)); 318 PetscCall(MatDenseGetLDA(mat->A,&lda)); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 PetscCall(MatDenseRestoreArrayRead(mat->A,&v)); 326 PetscCall(MatDenseRestoreArray(newmatd->A,&bv)); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY)); 330 PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY)); 331 332 /* Free work space */ 333 PetscCall(ISRestoreIndices(isrow,&irow)); 334 PetscCall(ISRestoreIndices(iscol_local,&icol)); 335 PetscCall(ISDestroy(&iscol_local)); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 344 PetscFunctionBegin; 345 PetscCall(MatDenseRestoreArray(a->A,array)); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 352 353 PetscFunctionBegin; 354 PetscCall(MatDenseRestoreArrayRead(a->A,array)); 355 PetscFunctionReturn(0); 356 } 357 358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 359 { 360 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 361 362 PetscFunctionBegin; 363 PetscCall(MatDenseRestoreArrayWrite(a->A,array)); 364 PetscFunctionReturn(0); 365 } 366 367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 368 { 369 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 370 PetscInt nstash,reallocs; 371 372 PetscFunctionBegin; 373 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 374 375 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 376 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 377 PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 378 PetscFunctionReturn(0); 379 } 380 381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 382 { 383 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 384 PetscInt i,*row,*col,flg,j,rstart,ncols; 385 PetscMPIInt n; 386 PetscScalar *val; 387 388 PetscFunctionBegin; 389 if (!mdn->donotstash && !mat->nooffprocentries) { 390 /* wait on receives */ 391 while (1) { 392 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 393 if (!flg) break; 394 395 for (i=0; i<n;) { 396 /* Now identify the consecutive vals belonging to the same row */ 397 for (j=i,rstart=row[j]; j<n; j++) { 398 if (row[j] != rstart) break; 399 } 400 if (j < n) ncols = j-i; 401 else ncols = n-i; 402 /* Now assemble all these values with a single function call */ 403 PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 404 i = j; 405 } 406 } 407 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 408 } 409 410 PetscCall(MatAssemblyBegin(mdn->A,mode)); 411 PetscCall(MatAssemblyEnd(mdn->A,mode)); 412 PetscFunctionReturn(0); 413 } 414 415 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 416 { 417 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 418 419 PetscFunctionBegin; 420 PetscCall(MatZeroEntries(l->A)); 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 425 { 426 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 427 PetscInt i,len,*lrows; 428 429 PetscFunctionBegin; 430 /* get locally owned rows */ 431 PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL)); 432 /* fix right hand side if needed */ 433 if (x && b) { 434 const PetscScalar *xx; 435 PetscScalar *bb; 436 437 PetscCall(VecGetArrayRead(x, &xx)); 438 PetscCall(VecGetArrayWrite(b, &bb)); 439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 440 PetscCall(VecRestoreArrayRead(x, &xx)); 441 PetscCall(VecRestoreArrayWrite(b, &bb)); 442 } 443 PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL)); 444 if (diag != 0.0) { 445 Vec d; 446 447 PetscCall(MatCreateVecs(A,NULL,&d)); 448 PetscCall(VecSet(d,diag)); 449 PetscCall(MatDiagonalSet(A,d,INSERT_VALUES)); 450 PetscCall(VecDestroy(&d)); 451 } 452 PetscCall(PetscFree(lrows)); 453 PetscFunctionReturn(0); 454 } 455 456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 460 461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 const PetscScalar *ax; 465 PetscScalar *ay; 466 PetscMemType axmtype,aymtype; 467 468 PetscFunctionBegin; 469 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 470 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 471 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 472 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 473 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 474 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 475 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 476 PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy)); 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 const PetscScalar *ax; 484 PetscScalar *ay; 485 PetscMemType axmtype,aymtype; 486 487 PetscFunctionBegin; 488 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 489 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 490 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 491 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 492 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 493 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 494 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 495 PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz)); 496 PetscFunctionReturn(0); 497 } 498 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 const PetscScalar *ax; 503 PetscScalar *ay; 504 PetscMemType axmtype,aymtype; 505 506 PetscFunctionBegin; 507 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 508 PetscCall(VecSet(yy,0.0)); 509 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 510 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 511 PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype)); 512 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 513 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 514 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 515 PetscCall(VecRestoreArrayAndMemType(yy,&ay)); 516 PetscFunctionReturn(0); 517 } 518 519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 520 { 521 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 522 const PetscScalar *ax; 523 PetscScalar *ay; 524 PetscMemType axmtype,aymtype; 525 526 PetscFunctionBegin; 527 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 528 PetscCall(VecCopy(yy,zz)); 529 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 530 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 531 PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype)); 532 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 533 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 534 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 535 PetscCall(VecRestoreArrayAndMemType(zz,&ay)); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 540 { 541 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542 PetscInt lda,len,i,n,m = A->rmap->n,radd; 543 PetscScalar *x,zero = 0.0; 544 const PetscScalar *av; 545 546 PetscFunctionBegin; 547 PetscCall(VecSet(v,zero)); 548 PetscCall(VecGetArray(v,&x)); 549 PetscCall(VecGetSize(v,&n)); 550 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 551 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 552 radd = A->rmap->rstart*m; 553 PetscCall(MatDenseGetArrayRead(a->A,&av)); 554 PetscCall(MatDenseGetLDA(a->A,&lda)); 555 for (i=0; i<len; i++) { 556 x[i] = av[radd + i*lda + i]; 557 } 558 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 559 PetscCall(VecRestoreArray(v,&x)); 560 PetscFunctionReturn(0); 561 } 562 563 PetscErrorCode MatDestroy_MPIDense(Mat mat) 564 { 565 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 566 567 PetscFunctionBegin; 568 #if defined(PETSC_USE_LOG) 569 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 570 #endif 571 PetscCall(MatStashDestroy_Private(&mat->stash)); 572 PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 573 PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 574 PetscCall(MatDestroy(&mdn->A)); 575 PetscCall(VecDestroy(&mdn->lvec)); 576 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 577 PetscCall(VecDestroy(&mdn->cvec)); 578 PetscCall(MatDestroy(&mdn->cmat)); 579 580 PetscCall(PetscFree(mat->data)); 581 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 582 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL)); 596 #if defined(PETSC_HAVE_ELEMENTAL) 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL)); 598 #endif 599 #if defined(PETSC_HAVE_SCALAPACK) 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL)); 601 #endif 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL)); 605 #if defined (PETSC_HAVE_CUDA) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL)); 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL)); 608 #endif 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL)); 619 620 PetscCall(PetscObjectCompose((PetscObject)mat,"DiagonalBlock",NULL)); 621 PetscFunctionReturn(0); 622 } 623 624 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 625 626 #include <petscdraw.h> 627 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 628 { 629 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 630 PetscMPIInt rank; 631 PetscViewerType vtype; 632 PetscBool iascii,isdraw; 633 PetscViewer sviewer; 634 PetscViewerFormat format; 635 636 PetscFunctionBegin; 637 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 638 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 639 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 640 if (iascii) { 641 PetscCall(PetscViewerGetType(viewer,&vtype)); 642 PetscCall(PetscViewerGetFormat(viewer,&format)); 643 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 644 MatInfo info; 645 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 646 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 647 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory)); 648 PetscCall(PetscViewerFlush(viewer)); 649 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 650 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer)); 651 PetscFunctionReturn(0); 652 } else if (format == PETSC_VIEWER_ASCII_INFO) { 653 PetscFunctionReturn(0); 654 } 655 } else if (isdraw) { 656 PetscDraw draw; 657 PetscBool isnull; 658 659 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 660 PetscCall(PetscDrawIsNull(draw,&isnull)); 661 if (isnull) PetscFunctionReturn(0); 662 } 663 664 { 665 /* assemble the entire matrix onto first processor. */ 666 Mat A; 667 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 668 PetscInt *cols; 669 PetscScalar *vals; 670 671 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 672 if (rank == 0) { 673 PetscCall(MatSetSizes(A,M,N,M,N)); 674 } else { 675 PetscCall(MatSetSizes(A,0,0,M,N)); 676 } 677 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 678 PetscCall(MatSetType(A,MATMPIDENSE)); 679 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 680 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 681 682 /* Copy the matrix ... This isn't the most efficient means, 683 but it's quick for now */ 684 A->insertmode = INSERT_VALUES; 685 686 row = mat->rmap->rstart; 687 m = mdn->A->rmap->n; 688 for (i=0; i<m; i++) { 689 PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals)); 690 PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES)); 691 PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals)); 692 row++; 693 } 694 695 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 696 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 697 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 698 if (rank == 0) { 699 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name)); 700 PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer)); 701 } 702 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 703 PetscCall(PetscViewerFlush(viewer)); 704 PetscCall(MatDestroy(&A)); 705 } 706 PetscFunctionReturn(0); 707 } 708 709 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 710 { 711 PetscBool iascii,isbinary,isdraw,issocket; 712 713 PetscFunctionBegin; 714 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 715 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 716 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 717 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 718 719 if (iascii || issocket || isdraw) { 720 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer)); 721 } else if (isbinary) { 722 PetscCall(MatView_Dense_Binary(mat,viewer)); 723 } 724 PetscFunctionReturn(0); 725 } 726 727 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 728 { 729 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 730 Mat mdn = mat->A; 731 PetscLogDouble isend[5],irecv[5]; 732 733 PetscFunctionBegin; 734 info->block_size = 1.0; 735 736 PetscCall(MatGetInfo(mdn,MAT_LOCAL,info)); 737 738 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 739 isend[3] = info->memory; isend[4] = info->mallocs; 740 if (flag == MAT_LOCAL) { 741 info->nz_used = isend[0]; 742 info->nz_allocated = isend[1]; 743 info->nz_unneeded = isend[2]; 744 info->memory = isend[3]; 745 info->mallocs = isend[4]; 746 } else if (flag == MAT_GLOBAL_MAX) { 747 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A))); 748 749 info->nz_used = irecv[0]; 750 info->nz_allocated = irecv[1]; 751 info->nz_unneeded = irecv[2]; 752 info->memory = irecv[3]; 753 info->mallocs = irecv[4]; 754 } else if (flag == MAT_GLOBAL_SUM) { 755 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A))); 756 757 info->nz_used = irecv[0]; 758 info->nz_allocated = irecv[1]; 759 info->nz_unneeded = irecv[2]; 760 info->memory = irecv[3]; 761 info->mallocs = irecv[4]; 762 } 763 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 764 info->fill_ratio_needed = 0; 765 info->factor_mallocs = 0; 766 PetscFunctionReturn(0); 767 } 768 769 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 770 { 771 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 772 773 PetscFunctionBegin; 774 switch (op) { 775 case MAT_NEW_NONZERO_LOCATIONS: 776 case MAT_NEW_NONZERO_LOCATION_ERR: 777 case MAT_NEW_NONZERO_ALLOCATION_ERR: 778 MatCheckPreallocated(A,1); 779 PetscCall(MatSetOption(a->A,op,flg)); 780 break; 781 case MAT_ROW_ORIENTED: 782 MatCheckPreallocated(A,1); 783 a->roworiented = flg; 784 PetscCall(MatSetOption(a->A,op,flg)); 785 break; 786 case MAT_FORCE_DIAGONAL_ENTRIES: 787 case MAT_KEEP_NONZERO_PATTERN: 788 case MAT_USE_HASH_TABLE: 789 case MAT_SORTED_FULL: 790 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 791 break; 792 case MAT_IGNORE_OFF_PROC_ENTRIES: 793 a->donotstash = flg; 794 break; 795 case MAT_SYMMETRIC: 796 case MAT_STRUCTURALLY_SYMMETRIC: 797 case MAT_HERMITIAN: 798 case MAT_SYMMETRY_ETERNAL: 799 case MAT_IGNORE_LOWER_TRIANGULAR: 800 case MAT_IGNORE_ZERO_ENTRIES: 801 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 802 break; 803 default: 804 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 805 } 806 PetscFunctionReturn(0); 807 } 808 809 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 810 { 811 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 812 const PetscScalar *l; 813 PetscScalar x,*v,*vv,*r; 814 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 815 816 PetscFunctionBegin; 817 PetscCall(MatDenseGetArray(mdn->A,&vv)); 818 PetscCall(MatDenseGetLDA(mdn->A,&lda)); 819 PetscCall(MatGetLocalSize(A,&s2,&s3)); 820 if (ll) { 821 PetscCall(VecGetLocalSize(ll,&s2a)); 822 PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 823 PetscCall(VecGetArrayRead(ll,&l)); 824 for (i=0; i<m; i++) { 825 x = l[i]; 826 v = vv + i; 827 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 828 } 829 PetscCall(VecRestoreArrayRead(ll,&l)); 830 PetscCall(PetscLogFlops(1.0*n*m)); 831 } 832 if (rr) { 833 const PetscScalar *ar; 834 835 PetscCall(VecGetLocalSize(rr,&s3a)); 836 PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 837 PetscCall(VecGetArrayRead(rr,&ar)); 838 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 839 PetscCall(VecGetArray(mdn->lvec,&r)); 840 PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 841 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 842 PetscCall(VecRestoreArrayRead(rr,&ar)); 843 for (i=0; i<n; i++) { 844 x = r[i]; 845 v = vv + i*lda; 846 for (j=0; j<m; j++) (*v++) *= x; 847 } 848 PetscCall(VecRestoreArray(mdn->lvec,&r)); 849 PetscCall(PetscLogFlops(1.0*n*m)); 850 } 851 PetscCall(MatDenseRestoreArray(mdn->A,&vv)); 852 PetscFunctionReturn(0); 853 } 854 855 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 856 { 857 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 858 PetscInt i,j; 859 PetscMPIInt size; 860 PetscReal sum = 0.0; 861 const PetscScalar *av,*v; 862 863 PetscFunctionBegin; 864 PetscCall(MatDenseGetArrayRead(mdn->A,&av)); 865 v = av; 866 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 867 if (size == 1) { 868 PetscCall(MatNorm(mdn->A,type,nrm)); 869 } else { 870 if (type == NORM_FROBENIUS) { 871 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 872 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 873 } 874 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 875 *nrm = PetscSqrtReal(*nrm); 876 PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n)); 877 } else if (type == NORM_1) { 878 PetscReal *tmp,*tmp2; 879 PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2)); 880 *nrm = 0.0; 881 v = av; 882 for (j=0; j<mdn->A->cmap->n; j++) { 883 for (i=0; i<mdn->A->rmap->n; i++) { 884 tmp[j] += PetscAbsScalar(*v); v++; 885 } 886 } 887 PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 888 for (j=0; j<A->cmap->N; j++) { 889 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 890 } 891 PetscCall(PetscFree2(tmp,tmp2)); 892 PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n)); 893 } else if (type == NORM_INFINITY) { /* max row norm */ 894 PetscReal ntemp; 895 PetscCall(MatNorm(mdn->A,type,&ntemp)); 896 PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 897 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 898 } 899 PetscCall(MatDenseRestoreArrayRead(mdn->A,&av)); 900 PetscFunctionReturn(0); 901 } 902 903 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 904 { 905 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 906 Mat B; 907 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 908 PetscInt j,i,lda; 909 PetscScalar *v; 910 911 PetscFunctionBegin; 912 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 913 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 914 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 915 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 916 PetscCall(MatMPIDenseSetPreallocation(B,NULL)); 917 } else B = *matout; 918 919 m = a->A->rmap->n; n = a->A->cmap->n; 920 PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v)); 921 PetscCall(MatDenseGetLDA(a->A,&lda)); 922 PetscCall(PetscMalloc1(m,&rwork)); 923 for (i=0; i<m; i++) rwork[i] = rstart + i; 924 for (j=0; j<n; j++) { 925 PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES)); 926 v += lda; 927 } 928 PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v)); 929 PetscCall(PetscFree(rwork)); 930 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 931 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 932 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 933 *matout = B; 934 } else { 935 PetscCall(MatHeaderMerge(A,&B)); 936 } 937 PetscFunctionReturn(0); 938 } 939 940 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 941 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 942 943 PetscErrorCode MatSetUp_MPIDense(Mat A) 944 { 945 PetscFunctionBegin; 946 PetscCall(PetscLayoutSetUp(A->rmap)); 947 PetscCall(PetscLayoutSetUp(A->cmap)); 948 if (!A->preallocated) { 949 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 950 } 951 PetscFunctionReturn(0); 952 } 953 954 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 955 { 956 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 957 958 PetscFunctionBegin; 959 PetscCall(MatAXPY(A->A,alpha,B->A,str)); 960 PetscFunctionReturn(0); 961 } 962 963 PetscErrorCode MatConjugate_MPIDense(Mat mat) 964 { 965 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 966 967 PetscFunctionBegin; 968 PetscCall(MatConjugate(a->A)); 969 PetscFunctionReturn(0); 970 } 971 972 PetscErrorCode MatRealPart_MPIDense(Mat A) 973 { 974 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 975 976 PetscFunctionBegin; 977 PetscCall(MatRealPart(a->A)); 978 PetscFunctionReturn(0); 979 } 980 981 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 982 { 983 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 984 985 PetscFunctionBegin; 986 PetscCall(MatImaginaryPart(a->A)); 987 PetscFunctionReturn(0); 988 } 989 990 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 991 { 992 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 993 994 PetscFunctionBegin; 995 PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 996 PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 997 PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col)); 998 PetscFunctionReturn(0); 999 } 1000 1001 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*); 1002 1003 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions) 1004 { 1005 PetscInt i,m,n; 1006 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1007 PetscReal *work; 1008 1009 PetscFunctionBegin; 1010 PetscCall(MatGetSize(A,&m,&n)); 1011 PetscCall(PetscMalloc1(n,&work)); 1012 if (type == REDUCTION_MEAN_REALPART) { 1013 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work)); 1014 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1015 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work)); 1016 } else { 1017 PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work)); 1018 } 1019 if (type == NORM_2) { 1020 for (i=0; i<n; i++) work[i] *= work[i]; 1021 } 1022 if (type == NORM_INFINITY) { 1023 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm)); 1024 } else { 1025 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm)); 1026 } 1027 PetscCall(PetscFree(work)); 1028 if (type == NORM_2) { 1029 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1030 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1031 for (i=0; i<n; i++) reductions[i] /= m; 1032 } 1033 PetscFunctionReturn(0); 1034 } 1035 1036 #if defined(PETSC_HAVE_CUDA) 1037 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha) 1038 { 1039 PetscScalar *da; 1040 PetscInt lda; 1041 1042 PetscFunctionBegin; 1043 PetscCall(MatDenseCUDAGetArray(A,&da)); 1044 PetscCall(MatDenseGetLDA(A,&lda)); 1045 PetscCall(PetscInfo(A,"Performing Shift on backend\n")); 1046 PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N)); 1047 PetscCall(MatDenseCUDARestoreArray(A,&da)); 1048 PetscFunctionReturn(0); 1049 } 1050 1051 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1052 { 1053 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1054 PetscInt lda; 1055 1056 PetscFunctionBegin; 1057 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1058 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1059 if (!a->cvec) { 1060 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1061 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1062 } 1063 a->vecinuse = col + 1; 1064 PetscCall(MatDenseGetLDA(a->A,&lda)); 1065 PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1066 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1067 *v = a->cvec; 1068 PetscFunctionReturn(0); 1069 } 1070 1071 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1072 { 1073 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1074 1075 PetscFunctionBegin; 1076 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1077 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1078 a->vecinuse = 0; 1079 PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1080 PetscCall(VecCUDAResetArray(a->cvec)); 1081 *v = NULL; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1086 { 1087 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1088 PetscInt lda; 1089 1090 PetscFunctionBegin; 1091 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1092 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1093 if (!a->cvec) { 1094 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1095 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1096 } 1097 a->vecinuse = col + 1; 1098 PetscCall(MatDenseGetLDA(a->A,&lda)); 1099 PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse)); 1100 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1101 PetscCall(VecLockReadPush(a->cvec)); 1102 *v = a->cvec; 1103 PetscFunctionReturn(0); 1104 } 1105 1106 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1107 { 1108 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1109 1110 PetscFunctionBegin; 1111 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1112 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1113 a->vecinuse = 0; 1114 PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse)); 1115 PetscCall(VecLockReadPop(a->cvec)); 1116 PetscCall(VecCUDAResetArray(a->cvec)); 1117 *v = NULL; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1122 { 1123 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1124 PetscInt lda; 1125 1126 PetscFunctionBegin; 1127 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1128 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1129 if (!a->cvec) { 1130 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1131 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1132 } 1133 a->vecinuse = col + 1; 1134 PetscCall(MatDenseGetLDA(a->A,&lda)); 1135 PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1136 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1137 *v = a->cvec; 1138 PetscFunctionReturn(0); 1139 } 1140 1141 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1142 { 1143 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1144 1145 PetscFunctionBegin; 1146 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1147 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1148 a->vecinuse = 0; 1149 PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1150 PetscCall(VecCUDAResetArray(a->cvec)); 1151 *v = NULL; 1152 PetscFunctionReturn(0); 1153 } 1154 1155 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1156 { 1157 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1161 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1162 PetscCall(MatDenseCUDAPlaceArray(l->A,a)); 1163 PetscFunctionReturn(0); 1164 } 1165 1166 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1167 { 1168 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1172 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1173 PetscCall(MatDenseCUDAResetArray(l->A)); 1174 PetscFunctionReturn(0); 1175 } 1176 1177 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1178 { 1179 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1180 1181 PetscFunctionBegin; 1182 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1183 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1184 PetscCall(MatDenseCUDAReplaceArray(l->A,a)); 1185 PetscFunctionReturn(0); 1186 } 1187 1188 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1189 { 1190 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1191 1192 PetscFunctionBegin; 1193 PetscCall(MatDenseCUDAGetArrayWrite(l->A,a)); 1194 PetscFunctionReturn(0); 1195 } 1196 1197 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1198 { 1199 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1200 1201 PetscFunctionBegin; 1202 PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1207 { 1208 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1209 1210 PetscFunctionBegin; 1211 PetscCall(MatDenseCUDAGetArrayRead(l->A,a)); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1216 { 1217 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1218 1219 PetscFunctionBegin; 1220 PetscCall(MatDenseCUDARestoreArrayRead(l->A,a)); 1221 PetscFunctionReturn(0); 1222 } 1223 1224 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1225 { 1226 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1227 1228 PetscFunctionBegin; 1229 PetscCall(MatDenseCUDAGetArray(l->A,a)); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1234 { 1235 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1236 1237 PetscFunctionBegin; 1238 PetscCall(MatDenseCUDARestoreArray(l->A,a)); 1239 PetscFunctionReturn(0); 1240 } 1241 1242 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1243 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1244 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1245 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1246 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1247 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1248 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1249 1250 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1251 { 1252 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1253 1254 PetscFunctionBegin; 1255 PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1256 PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1257 if (d->A) { 1258 PetscCall(MatBindToCPU(d->A,bind)); 1259 } 1260 mat->boundtocpu = bind; 1261 if (!bind) { 1262 PetscBool iscuda; 1263 1264 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda)); 1265 if (!iscuda) { 1266 PetscCall(VecDestroy(&d->cvec)); 1267 } 1268 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda)); 1269 if (!iscuda) { 1270 PetscCall(MatDestroy(&d->cmat)); 1271 } 1272 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1276 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1277 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1278 mat->ops->shift = MatShift_MPIDenseCUDA; 1279 } else { 1280 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1281 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1282 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1283 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1284 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1285 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1286 mat->ops->shift = MatShift_MPIDense; 1287 } 1288 if (d->cmat) { 1289 PetscCall(MatBindToCPU(d->cmat,bind)); 1290 } 1291 PetscFunctionReturn(0); 1292 } 1293 1294 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1295 { 1296 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1297 PetscBool iscuda; 1298 1299 PetscFunctionBegin; 1300 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1301 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 1302 if (!iscuda) PetscFunctionReturn(0); 1303 PetscCall(PetscLayoutSetUp(A->rmap)); 1304 PetscCall(PetscLayoutSetUp(A->cmap)); 1305 if (!d->A) { 1306 PetscCall(MatCreate(PETSC_COMM_SELF,&d->A)); 1307 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A)); 1308 PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 1309 } 1310 PetscCall(MatSetType(d->A,MATSEQDENSECUDA)); 1311 PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data)); 1312 A->preallocated = PETSC_TRUE; 1313 A->assembled = PETSC_TRUE; 1314 PetscFunctionReturn(0); 1315 } 1316 #endif 1317 1318 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1319 { 1320 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1321 1322 PetscFunctionBegin; 1323 PetscCall(MatSetRandom(d->A,rctx)); 1324 PetscFunctionReturn(0); 1325 } 1326 1327 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1328 { 1329 PetscFunctionBegin; 1330 *missing = PETSC_FALSE; 1331 PetscFunctionReturn(0); 1332 } 1333 1334 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1335 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1336 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1337 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1338 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1339 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1340 1341 /* -------------------------------------------------------------------*/ 1342 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1343 MatGetRow_MPIDense, 1344 MatRestoreRow_MPIDense, 1345 MatMult_MPIDense, 1346 /* 4*/ MatMultAdd_MPIDense, 1347 MatMultTranspose_MPIDense, 1348 MatMultTransposeAdd_MPIDense, 1349 NULL, 1350 NULL, 1351 NULL, 1352 /* 10*/ NULL, 1353 NULL, 1354 NULL, 1355 NULL, 1356 MatTranspose_MPIDense, 1357 /* 15*/ MatGetInfo_MPIDense, 1358 MatEqual_MPIDense, 1359 MatGetDiagonal_MPIDense, 1360 MatDiagonalScale_MPIDense, 1361 MatNorm_MPIDense, 1362 /* 20*/ MatAssemblyBegin_MPIDense, 1363 MatAssemblyEnd_MPIDense, 1364 MatSetOption_MPIDense, 1365 MatZeroEntries_MPIDense, 1366 /* 24*/ MatZeroRows_MPIDense, 1367 NULL, 1368 NULL, 1369 NULL, 1370 NULL, 1371 /* 29*/ MatSetUp_MPIDense, 1372 NULL, 1373 NULL, 1374 MatGetDiagonalBlock_MPIDense, 1375 NULL, 1376 /* 34*/ MatDuplicate_MPIDense, 1377 NULL, 1378 NULL, 1379 NULL, 1380 NULL, 1381 /* 39*/ MatAXPY_MPIDense, 1382 MatCreateSubMatrices_MPIDense, 1383 NULL, 1384 MatGetValues_MPIDense, 1385 MatCopy_MPIDense, 1386 /* 44*/ NULL, 1387 MatScale_MPIDense, 1388 MatShift_MPIDense, 1389 NULL, 1390 NULL, 1391 /* 49*/ MatSetRandom_MPIDense, 1392 NULL, 1393 NULL, 1394 NULL, 1395 NULL, 1396 /* 54*/ NULL, 1397 NULL, 1398 NULL, 1399 NULL, 1400 NULL, 1401 /* 59*/ MatCreateSubMatrix_MPIDense, 1402 MatDestroy_MPIDense, 1403 MatView_MPIDense, 1404 NULL, 1405 NULL, 1406 /* 64*/ NULL, 1407 NULL, 1408 NULL, 1409 NULL, 1410 NULL, 1411 /* 69*/ NULL, 1412 NULL, 1413 NULL, 1414 NULL, 1415 NULL, 1416 /* 74*/ NULL, 1417 NULL, 1418 NULL, 1419 NULL, 1420 NULL, 1421 /* 79*/ NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 /* 83*/ MatLoad_MPIDense, 1426 NULL, 1427 NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 /* 89*/ NULL, 1432 NULL, 1433 NULL, 1434 NULL, 1435 NULL, 1436 /* 94*/ NULL, 1437 NULL, 1438 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1439 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1440 NULL, 1441 /* 99*/ MatProductSetFromOptions_MPIDense, 1442 NULL, 1443 NULL, 1444 MatConjugate_MPIDense, 1445 NULL, 1446 /*104*/ NULL, 1447 MatRealPart_MPIDense, 1448 MatImaginaryPart_MPIDense, 1449 NULL, 1450 NULL, 1451 /*109*/ NULL, 1452 NULL, 1453 NULL, 1454 MatGetColumnVector_MPIDense, 1455 MatMissingDiagonal_MPIDense, 1456 /*114*/ NULL, 1457 NULL, 1458 NULL, 1459 NULL, 1460 NULL, 1461 /*119*/ NULL, 1462 NULL, 1463 NULL, 1464 NULL, 1465 NULL, 1466 /*124*/ NULL, 1467 MatGetColumnReductions_MPIDense, 1468 NULL, 1469 NULL, 1470 NULL, 1471 /*129*/ NULL, 1472 NULL, 1473 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1474 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1475 NULL, 1476 /*134*/ NULL, 1477 NULL, 1478 NULL, 1479 NULL, 1480 NULL, 1481 /*139*/ NULL, 1482 NULL, 1483 NULL, 1484 NULL, 1485 NULL, 1486 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1487 /*145*/ NULL, 1488 NULL, 1489 NULL 1490 }; 1491 1492 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1493 { 1494 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1495 PetscBool iscuda; 1496 1497 PetscFunctionBegin; 1498 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1499 PetscCall(PetscLayoutSetUp(mat->rmap)); 1500 PetscCall(PetscLayoutSetUp(mat->cmap)); 1501 if (!a->A) { 1502 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 1503 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 1504 PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N)); 1505 } 1506 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda)); 1507 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1508 PetscCall(MatSeqDenseSetPreallocation(a->A,data)); 1509 mat->preallocated = PETSC_TRUE; 1510 mat->assembled = PETSC_TRUE; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1515 { 1516 Mat B,C; 1517 1518 PetscFunctionBegin; 1519 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C)); 1520 PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B)); 1521 PetscCall(MatDestroy(&C)); 1522 if (reuse == MAT_REUSE_MATRIX) { 1523 C = *newmat; 1524 } else C = NULL; 1525 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1526 PetscCall(MatDestroy(&B)); 1527 if (reuse == MAT_INPLACE_MATRIX) { 1528 PetscCall(MatHeaderReplace(A,&C)); 1529 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1530 PetscFunctionReturn(0); 1531 } 1532 1533 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1534 { 1535 Mat B,C; 1536 1537 PetscFunctionBegin; 1538 PetscCall(MatDenseGetLocalMatrix(A,&C)); 1539 PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1540 if (reuse == MAT_REUSE_MATRIX) { 1541 C = *newmat; 1542 } else C = NULL; 1543 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1544 PetscCall(MatDestroy(&B)); 1545 if (reuse == MAT_INPLACE_MATRIX) { 1546 PetscCall(MatHeaderReplace(A,&C)); 1547 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1548 PetscFunctionReturn(0); 1549 } 1550 1551 #if defined(PETSC_HAVE_ELEMENTAL) 1552 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1553 { 1554 Mat mat_elemental; 1555 PetscScalar *v; 1556 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1557 1558 PetscFunctionBegin; 1559 if (reuse == MAT_REUSE_MATRIX) { 1560 mat_elemental = *newmat; 1561 PetscCall(MatZeroEntries(*newmat)); 1562 } else { 1563 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1564 PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 1565 PetscCall(MatSetType(mat_elemental,MATELEMENTAL)); 1566 PetscCall(MatSetUp(mat_elemental)); 1567 PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE)); 1568 } 1569 1570 PetscCall(PetscMalloc2(m,&rows,N,&cols)); 1571 for (i=0; i<N; i++) cols[i] = i; 1572 for (i=0; i<m; i++) rows[i] = rstart + i; 1573 1574 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1575 PetscCall(MatDenseGetArray(A,&v)); 1576 PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES)); 1577 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1578 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1579 PetscCall(MatDenseRestoreArray(A,&v)); 1580 PetscCall(PetscFree2(rows,cols)); 1581 1582 if (reuse == MAT_INPLACE_MATRIX) { 1583 PetscCall(MatHeaderReplace(A,&mat_elemental)); 1584 } else { 1585 *newmat = mat_elemental; 1586 } 1587 PetscFunctionReturn(0); 1588 } 1589 #endif 1590 1591 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1592 { 1593 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1594 1595 PetscFunctionBegin; 1596 PetscCall(MatDenseGetColumn(mat->A,col,vals)); 1597 PetscFunctionReturn(0); 1598 } 1599 1600 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1601 { 1602 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1603 1604 PetscFunctionBegin; 1605 PetscCall(MatDenseRestoreColumn(mat->A,vals)); 1606 PetscFunctionReturn(0); 1607 } 1608 1609 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1610 { 1611 Mat_MPIDense *mat; 1612 PetscInt m,nloc,N; 1613 1614 PetscFunctionBegin; 1615 PetscCall(MatGetSize(inmat,&m,&N)); 1616 PetscCall(MatGetLocalSize(inmat,NULL,&nloc)); 1617 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1618 PetscInt sum; 1619 1620 if (n == PETSC_DECIDE) { 1621 PetscCall(PetscSplitOwnership(comm,&n,&N)); 1622 } 1623 /* Check sum(n) = N */ 1624 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 1625 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 1626 1627 PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat)); 1628 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 1629 } 1630 1631 /* numeric phase */ 1632 mat = (Mat_MPIDense*)(*outmat)->data; 1633 PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN)); 1634 PetscFunctionReturn(0); 1635 } 1636 1637 #if defined(PETSC_HAVE_CUDA) 1638 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1639 { 1640 Mat B; 1641 Mat_MPIDense *m; 1642 1643 PetscFunctionBegin; 1644 if (reuse == MAT_INITIAL_MATRIX) { 1645 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1646 } else if (reuse == MAT_REUSE_MATRIX) { 1647 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1648 } 1649 1650 B = *newmat; 1651 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE)); 1652 PetscCall(PetscFree(B->defaultvectype)); 1653 PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype)); 1654 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE)); 1655 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL)); 1656 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 1657 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 1658 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 1659 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL)); 1669 m = (Mat_MPIDense*)(B)->data; 1670 if (m->A) { 1671 PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A)); 1672 } 1673 B->ops->bindtocpu = NULL; 1674 B->offloadmask = PETSC_OFFLOAD_CPU; 1675 PetscFunctionReturn(0); 1676 } 1677 1678 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1679 { 1680 Mat B; 1681 Mat_MPIDense *m; 1682 1683 PetscFunctionBegin; 1684 if (reuse == MAT_INITIAL_MATRIX) { 1685 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1686 } else if (reuse == MAT_REUSE_MATRIX) { 1687 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1688 } 1689 1690 B = *newmat; 1691 PetscCall(PetscFree(B->defaultvectype)); 1692 PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype)); 1693 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA)); 1694 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1695 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1696 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1697 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1698 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1699 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1700 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1701 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1702 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1703 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1704 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1705 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1706 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1707 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1708 m = (Mat_MPIDense*)(B->data); 1709 if (m->A) { 1710 PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A)); 1711 B->offloadmask = PETSC_OFFLOAD_BOTH; 1712 } else { 1713 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1714 } 1715 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE)); 1716 1717 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1718 PetscFunctionReturn(0); 1719 } 1720 #endif 1721 1722 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1723 { 1724 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1725 PetscInt lda; 1726 1727 PetscFunctionBegin; 1728 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1729 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1730 if (!a->cvec) { 1731 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1732 } 1733 a->vecinuse = col + 1; 1734 PetscCall(MatDenseGetLDA(a->A,&lda)); 1735 PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1736 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1737 *v = a->cvec; 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1742 { 1743 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1744 1745 PetscFunctionBegin; 1746 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1747 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1748 a->vecinuse = 0; 1749 PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1750 PetscCall(VecResetArray(a->cvec)); 1751 *v = NULL; 1752 PetscFunctionReturn(0); 1753 } 1754 1755 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1756 { 1757 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1758 PetscInt lda; 1759 1760 PetscFunctionBegin; 1761 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1762 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1763 if (!a->cvec) { 1764 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1765 } 1766 a->vecinuse = col + 1; 1767 PetscCall(MatDenseGetLDA(a->A,&lda)); 1768 PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse)); 1769 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1770 PetscCall(VecLockReadPush(a->cvec)); 1771 *v = a->cvec; 1772 PetscFunctionReturn(0); 1773 } 1774 1775 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1776 { 1777 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1778 1779 PetscFunctionBegin; 1780 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1781 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1782 a->vecinuse = 0; 1783 PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse)); 1784 PetscCall(VecLockReadPop(a->cvec)); 1785 PetscCall(VecResetArray(a->cvec)); 1786 *v = NULL; 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1791 { 1792 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1793 PetscInt lda; 1794 1795 PetscFunctionBegin; 1796 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1797 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1798 if (!a->cvec) { 1799 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1800 } 1801 a->vecinuse = col + 1; 1802 PetscCall(MatDenseGetLDA(a->A,&lda)); 1803 PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1804 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1805 *v = a->cvec; 1806 PetscFunctionReturn(0); 1807 } 1808 1809 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1810 { 1811 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1812 1813 PetscFunctionBegin; 1814 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1815 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1816 a->vecinuse = 0; 1817 PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1818 PetscCall(VecResetArray(a->cvec)); 1819 *v = NULL; 1820 PetscFunctionReturn(0); 1821 } 1822 1823 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt rbegin,PetscInt rend,PetscInt cbegin,PetscInt cend,Mat *v) 1824 { 1825 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1826 Mat_MPIDense *c; 1827 MPI_Comm comm; 1828 PetscInt pbegin, pend; 1829 1830 PetscFunctionBegin; 1831 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1832 PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1833 PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1834 pbegin = PetscMax(0,PetscMin(A->rmap->rend,rbegin)-A->rmap->rstart); 1835 pend = PetscMin(A->rmap->n,PetscMax(0,rend-A->rmap->rstart)); 1836 if (!a->cmat) { 1837 PetscCall(MatCreate(comm,&a->cmat)); 1838 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat)); 1839 PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name)); 1840 if (rend-rbegin==A->rmap->N) PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap)); 1841 else { 1842 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin)); 1843 PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin)); 1844 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1845 } 1846 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1847 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1848 } else { 1849 PetscBool same = (PetscBool)(rend-rbegin == a->cmat->rmap->N); 1850 if (same && a->cmat->rmap->N != A->rmap->N) { 1851 same = (PetscBool)(pend-pbegin == a->cmat->rmap->n); 1852 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&same,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 1853 } 1854 if (!same) { 1855 PetscCall(PetscLayoutDestroy(&a->cmat->rmap)); 1856 PetscCall(PetscLayoutCreate(comm,&a->cmat->rmap)); 1857 PetscCall(PetscLayoutSetLocalSize(a->cmat->rmap,pend-pbegin)); 1858 PetscCall(PetscLayoutSetSize(a->cmat->rmap,rend-rbegin)); 1859 PetscCall(PetscLayoutSetUp(a->cmat->rmap)); 1860 } 1861 if (cend-cbegin != a->cmat->cmap->N) { 1862 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1863 PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap)); 1864 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1865 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1866 } 1867 } 1868 c = (Mat_MPIDense*)a->cmat->data; 1869 PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1870 PetscCall(MatDenseGetSubMatrix(a->A,pbegin,pend,cbegin,cend,&c->A)); 1871 a->cmat->preallocated = PETSC_TRUE; 1872 a->cmat->assembled = PETSC_TRUE; 1873 a->matinuse = cbegin + 1; 1874 *v = a->cmat; 1875 PetscFunctionReturn(0); 1876 } 1877 1878 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1879 { 1880 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1881 Mat_MPIDense *c; 1882 1883 PetscFunctionBegin; 1884 PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1885 PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1886 PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1887 a->matinuse = 0; 1888 c = (Mat_MPIDense*)a->cmat->data; 1889 PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A)); 1890 *v = NULL; 1891 PetscFunctionReturn(0); 1892 } 1893 1894 /*MC 1895 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1896 1897 Options Database Keys: 1898 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1899 1900 Level: beginner 1901 1902 .seealso: `MatCreateDense()` 1903 1904 M*/ 1905 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1906 { 1907 Mat_MPIDense *a; 1908 1909 PetscFunctionBegin; 1910 PetscCall(PetscNewLog(mat,&a)); 1911 mat->data = (void*)a; 1912 PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps))); 1913 1914 mat->insertmode = NOT_SET_VALUES; 1915 1916 /* build cache for off array entries formed */ 1917 a->donotstash = PETSC_FALSE; 1918 1919 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash)); 1920 1921 /* stuff used for matrix vector multiply */ 1922 a->lvec = NULL; 1923 a->Mvctx = NULL; 1924 a->roworiented = PETSC_TRUE; 1925 1926 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense)); 1927 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense)); 1928 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense)); 1929 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense)); 1930 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense)); 1931 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense)); 1932 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense)); 1933 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense)); 1934 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense)); 1935 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense)); 1936 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense)); 1937 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1938 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1939 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1940 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1941 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1942 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1943 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense)); 1944 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense)); 1945 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 1946 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ)); 1947 #if defined(PETSC_HAVE_ELEMENTAL) 1948 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental)); 1949 #endif 1950 #if defined(PETSC_HAVE_SCALAPACK) 1951 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK)); 1952 #endif 1953 #if defined(PETSC_HAVE_CUDA) 1954 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA)); 1955 #endif 1956 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense)); 1957 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1958 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1959 #if defined(PETSC_HAVE_CUDA) 1960 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1961 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1962 #endif 1963 1964 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense)); 1965 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense)); 1966 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE)); 1967 PetscFunctionReturn(0); 1968 } 1969 1970 /*MC 1971 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1972 1973 Options Database Keys: 1974 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1975 1976 Level: beginner 1977 1978 .seealso: 1979 1980 M*/ 1981 #if defined(PETSC_HAVE_CUDA) 1982 #include <petsc/private/deviceimpl.h> 1983 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1984 { 1985 PetscFunctionBegin; 1986 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1987 PetscCall(MatCreate_MPIDense(B)); 1988 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B)); 1989 PetscFunctionReturn(0); 1990 } 1991 #endif 1992 1993 /*MC 1994 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1995 1996 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1997 and MATMPIDENSE otherwise. 1998 1999 Options Database Keys: 2000 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 2001 2002 Level: beginner 2003 2004 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 2005 M*/ 2006 2007 /*MC 2008 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 2009 2010 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 2011 and MATMPIDENSECUDA otherwise. 2012 2013 Options Database Keys: 2014 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 2015 2016 Level: beginner 2017 2018 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 2019 M*/ 2020 2021 /*@C 2022 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 2023 2024 Collective 2025 2026 Input Parameters: 2027 . B - the matrix 2028 - data - optional location of matrix data. Set data=NULL for PETSc 2029 to control all matrix memory allocation. 2030 2031 Notes: 2032 The dense format is fully compatible with standard Fortran 77 2033 storage by columns. 2034 2035 The data input variable is intended primarily for Fortran programmers 2036 who wish to allocate their own matrix memory space. Most users should 2037 set data=NULL. 2038 2039 Level: intermediate 2040 2041 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2042 @*/ 2043 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2044 { 2045 PetscFunctionBegin; 2046 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2047 PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data)); 2048 PetscFunctionReturn(0); 2049 } 2050 2051 /*@ 2052 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2053 array provided by the user. This is useful to avoid copying an array 2054 into a matrix 2055 2056 Not Collective 2057 2058 Input Parameters: 2059 + mat - the matrix 2060 - array - the array in column major order 2061 2062 Notes: 2063 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2064 freed when the matrix is destroyed. 2065 2066 Level: developer 2067 2068 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2069 2070 @*/ 2071 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2072 { 2073 PetscFunctionBegin; 2074 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2075 PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2076 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2077 #if defined(PETSC_HAVE_CUDA) 2078 mat->offloadmask = PETSC_OFFLOAD_CPU; 2079 #endif 2080 PetscFunctionReturn(0); 2081 } 2082 2083 /*@ 2084 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2085 2086 Not Collective 2087 2088 Input Parameters: 2089 . mat - the matrix 2090 2091 Notes: 2092 You can only call this after a call to MatDensePlaceArray() 2093 2094 Level: developer 2095 2096 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2097 2098 @*/ 2099 PetscErrorCode MatDenseResetArray(Mat mat) 2100 { 2101 PetscFunctionBegin; 2102 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2103 PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat)); 2104 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2105 PetscFunctionReturn(0); 2106 } 2107 2108 /*@ 2109 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2110 array provided by the user. This is useful to avoid copying an array 2111 into a matrix 2112 2113 Not Collective 2114 2115 Input Parameters: 2116 + mat - the matrix 2117 - array - the array in column major order 2118 2119 Notes: 2120 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2121 freed by the user. It will be freed when the matrix is destroyed. 2122 2123 Level: developer 2124 2125 .seealso: `MatDenseGetArray()`, `VecReplaceArray()` 2126 @*/ 2127 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2128 { 2129 PetscFunctionBegin; 2130 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2131 PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2132 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2133 #if defined(PETSC_HAVE_CUDA) 2134 mat->offloadmask = PETSC_OFFLOAD_CPU; 2135 #endif 2136 PetscFunctionReturn(0); 2137 } 2138 2139 #if defined(PETSC_HAVE_CUDA) 2140 /*@C 2141 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2142 array provided by the user. This is useful to avoid copying an array 2143 into a matrix 2144 2145 Not Collective 2146 2147 Input Parameters: 2148 + mat - the matrix 2149 - array - the array in column major order 2150 2151 Notes: 2152 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2153 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2154 2155 Level: developer 2156 2157 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()` 2158 @*/ 2159 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2160 { 2161 PetscFunctionBegin; 2162 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2163 PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2164 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2165 mat->offloadmask = PETSC_OFFLOAD_GPU; 2166 PetscFunctionReturn(0); 2167 } 2168 2169 /*@C 2170 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2171 2172 Not Collective 2173 2174 Input Parameters: 2175 . mat - the matrix 2176 2177 Notes: 2178 You can only call this after a call to MatDenseCUDAPlaceArray() 2179 2180 Level: developer 2181 2182 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2183 2184 @*/ 2185 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2186 { 2187 PetscFunctionBegin; 2188 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2189 PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat)); 2190 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2191 PetscFunctionReturn(0); 2192 } 2193 2194 /*@C 2195 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2196 array provided by the user. This is useful to avoid copying an array 2197 into a matrix 2198 2199 Not Collective 2200 2201 Input Parameters: 2202 + mat - the matrix 2203 - array - the array in column major order 2204 2205 Notes: 2206 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2207 The memory passed in CANNOT be freed by the user. It will be freed 2208 when the matrix is destroyed. The array should respect the matrix leading dimension. 2209 2210 Level: developer 2211 2212 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2213 @*/ 2214 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2215 { 2216 PetscFunctionBegin; 2217 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2218 PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2219 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2220 mat->offloadmask = PETSC_OFFLOAD_GPU; 2221 PetscFunctionReturn(0); 2222 } 2223 2224 /*@C 2225 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2226 2227 Not Collective 2228 2229 Input Parameters: 2230 . A - the matrix 2231 2232 Output Parameters 2233 . array - the GPU array in column major order 2234 2235 Notes: 2236 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2237 2238 Level: developer 2239 2240 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2241 @*/ 2242 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2243 { 2244 PetscFunctionBegin; 2245 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2246 PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2247 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2248 PetscFunctionReturn(0); 2249 } 2250 2251 /*@C 2252 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2253 2254 Not Collective 2255 2256 Input Parameters: 2257 + A - the matrix 2258 - array - the GPU array in column major order 2259 2260 Notes: 2261 2262 Level: developer 2263 2264 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2265 @*/ 2266 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2267 { 2268 PetscFunctionBegin; 2269 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2270 PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2271 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2272 A->offloadmask = PETSC_OFFLOAD_GPU; 2273 PetscFunctionReturn(0); 2274 } 2275 2276 /*@C 2277 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2278 2279 Not Collective 2280 2281 Input Parameters: 2282 . A - the matrix 2283 2284 Output Parameters 2285 . array - the GPU array in column major order 2286 2287 Notes: 2288 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2289 2290 Level: developer 2291 2292 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2293 @*/ 2294 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2295 { 2296 PetscFunctionBegin; 2297 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2298 PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 /*@C 2303 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2304 2305 Not Collective 2306 2307 Input Parameters: 2308 + A - the matrix 2309 - array - the GPU array in column major order 2310 2311 Notes: 2312 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2313 2314 Level: developer 2315 2316 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2317 @*/ 2318 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2319 { 2320 PetscFunctionBegin; 2321 PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2322 PetscFunctionReturn(0); 2323 } 2324 2325 /*@C 2326 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2327 2328 Not Collective 2329 2330 Input Parameters: 2331 . A - the matrix 2332 2333 Output Parameters 2334 . array - the GPU array in column major order 2335 2336 Notes: 2337 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2338 2339 Level: developer 2340 2341 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2342 @*/ 2343 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2344 { 2345 PetscFunctionBegin; 2346 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2347 PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a)); 2348 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2349 PetscFunctionReturn(0); 2350 } 2351 2352 /*@C 2353 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2354 2355 Not Collective 2356 2357 Input Parameters: 2358 + A - the matrix 2359 - array - the GPU array in column major order 2360 2361 Notes: 2362 2363 Level: developer 2364 2365 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2366 @*/ 2367 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2368 { 2369 PetscFunctionBegin; 2370 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2371 PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a)); 2372 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2373 A->offloadmask = PETSC_OFFLOAD_GPU; 2374 PetscFunctionReturn(0); 2375 } 2376 #endif 2377 2378 /*@C 2379 MatCreateDense - Creates a matrix in dense format. 2380 2381 Collective 2382 2383 Input Parameters: 2384 + comm - MPI communicator 2385 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2386 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2387 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2388 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2389 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2390 to control all matrix memory allocation. 2391 2392 Output Parameter: 2393 . A - the matrix 2394 2395 Notes: 2396 The dense format is fully compatible with standard Fortran 77 2397 storage by columns. 2398 Note that, although local portions of the matrix are stored in column-major 2399 order, the matrix is partitioned across MPI ranks by row. 2400 2401 The data input variable is intended primarily for Fortran programmers 2402 who wish to allocate their own matrix memory space. Most users should 2403 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2404 2405 The user MUST specify either the local or global matrix dimensions 2406 (possibly both). 2407 2408 Level: intermediate 2409 2410 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2411 @*/ 2412 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2413 { 2414 PetscFunctionBegin; 2415 PetscCall(MatCreate(comm,A)); 2416 PetscCall(MatSetSizes(*A,m,n,M,N)); 2417 PetscCall(MatSetType(*A,MATDENSE)); 2418 PetscCall(MatSeqDenseSetPreallocation(*A,data)); 2419 PetscCall(MatMPIDenseSetPreallocation(*A,data)); 2420 PetscFunctionReturn(0); 2421 } 2422 2423 #if defined(PETSC_HAVE_CUDA) 2424 /*@C 2425 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2426 2427 Collective 2428 2429 Input Parameters: 2430 + comm - MPI communicator 2431 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2432 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2433 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2434 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2435 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2436 to control matrix memory allocation. 2437 2438 Output Parameter: 2439 . A - the matrix 2440 2441 Notes: 2442 2443 Level: intermediate 2444 2445 .seealso: `MatCreate()`, `MatCreateDense()` 2446 @*/ 2447 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2448 { 2449 PetscFunctionBegin; 2450 PetscCall(MatCreate(comm,A)); 2451 PetscValidLogicalCollectiveBool(*A,!!data,6); 2452 PetscCall(MatSetSizes(*A,m,n,M,N)); 2453 PetscCall(MatSetType(*A,MATDENSECUDA)); 2454 PetscCall(MatSeqDenseCUDASetPreallocation(*A,data)); 2455 PetscCall(MatMPIDenseCUDASetPreallocation(*A,data)); 2456 PetscFunctionReturn(0); 2457 } 2458 #endif 2459 2460 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2461 { 2462 Mat mat; 2463 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2464 2465 PetscFunctionBegin; 2466 *newmat = NULL; 2467 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat)); 2468 PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2469 PetscCall(MatSetType(mat,((PetscObject)A)->type_name)); 2470 a = (Mat_MPIDense*)mat->data; 2471 2472 mat->factortype = A->factortype; 2473 mat->assembled = PETSC_TRUE; 2474 mat->preallocated = PETSC_TRUE; 2475 2476 mat->insertmode = NOT_SET_VALUES; 2477 a->donotstash = oldmat->donotstash; 2478 2479 PetscCall(PetscLayoutReference(A->rmap,&mat->rmap)); 2480 PetscCall(PetscLayoutReference(A->cmap,&mat->cmap)); 2481 2482 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2483 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2484 2485 *newmat = mat; 2486 PetscFunctionReturn(0); 2487 } 2488 2489 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2490 { 2491 PetscBool isbinary; 2492 #if defined(PETSC_HAVE_HDF5) 2493 PetscBool ishdf5; 2494 #endif 2495 2496 PetscFunctionBegin; 2497 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2498 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2499 /* force binary viewer to load .info file if it has not yet done so */ 2500 PetscCall(PetscViewerSetUp(viewer)); 2501 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2502 #if defined(PETSC_HAVE_HDF5) 2503 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2504 #endif 2505 if (isbinary) { 2506 PetscCall(MatLoad_Dense_Binary(newMat,viewer)); 2507 #if defined(PETSC_HAVE_HDF5) 2508 } else if (ishdf5) { 2509 PetscCall(MatLoad_Dense_HDF5(newMat,viewer)); 2510 #endif 2511 } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2512 PetscFunctionReturn(0); 2513 } 2514 2515 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2516 { 2517 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2518 Mat a,b; 2519 2520 PetscFunctionBegin; 2521 a = matA->A; 2522 b = matB->A; 2523 PetscCall(MatEqual(a,b,flag)); 2524 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2529 { 2530 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2531 2532 PetscFunctionBegin; 2533 PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts)); 2534 PetscCall(MatDestroy(&atb->atb)); 2535 PetscCall(PetscFree(atb)); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2540 { 2541 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2542 2543 PetscFunctionBegin; 2544 PetscCall(PetscFree2(abt->buf[0],abt->buf[1])); 2545 PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls)); 2546 PetscCall(PetscFree(abt)); 2547 PetscFunctionReturn(0); 2548 } 2549 2550 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2551 { 2552 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2553 Mat_TransMatMultDense *atb; 2554 MPI_Comm comm; 2555 PetscMPIInt size,*recvcounts; 2556 PetscScalar *carray,*sendbuf; 2557 const PetscScalar *atbarray; 2558 PetscInt i,cN=C->cmap->N,proc,k,j,lda; 2559 const PetscInt *ranges; 2560 2561 PetscFunctionBegin; 2562 MatCheckProduct(C,3); 2563 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2564 atb = (Mat_TransMatMultDense *)C->product->data; 2565 recvcounts = atb->recvcounts; 2566 sendbuf = atb->sendbuf; 2567 2568 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2569 PetscCallMPI(MPI_Comm_size(comm,&size)); 2570 2571 /* compute atbarray = aseq^T * bseq */ 2572 PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb)); 2573 2574 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2575 2576 /* arrange atbarray into sendbuf */ 2577 PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray)); 2578 PetscCall(MatDenseGetLDA(atb->atb,&lda)); 2579 for (proc=0, k=0; proc<size; proc++) { 2580 for (j=0; j<cN; j++) { 2581 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda]; 2582 } 2583 } 2584 PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray)); 2585 2586 /* sum all atbarray to local values of C */ 2587 PetscCall(MatDenseGetArrayWrite(c->A,&carray)); 2588 PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm)); 2589 PetscCall(MatDenseRestoreArrayWrite(c->A,&carray)); 2590 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2591 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2592 PetscFunctionReturn(0); 2593 } 2594 2595 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2596 { 2597 MPI_Comm comm; 2598 PetscMPIInt size; 2599 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2600 Mat_TransMatMultDense *atb; 2601 PetscBool cisdense; 2602 PetscInt i; 2603 const PetscInt *ranges; 2604 2605 PetscFunctionBegin; 2606 MatCheckProduct(C,3); 2607 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2608 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2609 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2610 SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2611 } 2612 2613 /* create matrix product C */ 2614 PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N)); 2615 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 2616 if (!cisdense) { 2617 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 2618 } 2619 PetscCall(MatSetUp(C)); 2620 2621 /* create data structure for reuse C */ 2622 PetscCallMPI(MPI_Comm_size(comm,&size)); 2623 PetscCall(PetscNew(&atb)); 2624 cM = C->rmap->N; 2625 PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts)); 2626 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2627 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2628 2629 C->product->data = atb; 2630 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2631 PetscFunctionReturn(0); 2632 } 2633 2634 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2635 { 2636 MPI_Comm comm; 2637 PetscMPIInt i, size; 2638 PetscInt maxRows, bufsiz; 2639 PetscMPIInt tag; 2640 PetscInt alg; 2641 Mat_MatTransMultDense *abt; 2642 Mat_Product *product = C->product; 2643 PetscBool flg; 2644 2645 PetscFunctionBegin; 2646 MatCheckProduct(C,4); 2647 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2648 /* check local size of A and B */ 2649 PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n); 2650 2651 PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg)); 2652 alg = flg ? 0 : 1; 2653 2654 /* setup matrix product C */ 2655 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N)); 2656 PetscCall(MatSetType(C,MATMPIDENSE)); 2657 PetscCall(MatSetUp(C)); 2658 PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag)); 2659 2660 /* create data structure for reuse C */ 2661 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2662 PetscCallMPI(MPI_Comm_size(comm,&size)); 2663 PetscCall(PetscNew(&abt)); 2664 abt->tag = tag; 2665 abt->alg = alg; 2666 switch (alg) { 2667 case 1: /* alg: "cyclic" */ 2668 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2669 bufsiz = A->cmap->N * maxRows; 2670 PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]))); 2671 break; 2672 default: /* alg: "allgatherv" */ 2673 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2674 PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls))); 2675 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2676 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2677 break; 2678 } 2679 2680 C->product->data = abt; 2681 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2682 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2683 PetscFunctionReturn(0); 2684 } 2685 2686 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2687 { 2688 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2689 Mat_MatTransMultDense *abt; 2690 MPI_Comm comm; 2691 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2692 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2693 PetscInt i,cK=A->cmap->N,k,j,bn; 2694 PetscScalar _DOne=1.0,_DZero=0.0; 2695 const PetscScalar *av,*bv; 2696 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2697 MPI_Request reqs[2]; 2698 const PetscInt *ranges; 2699 2700 PetscFunctionBegin; 2701 MatCheckProduct(C,3); 2702 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2703 abt = (Mat_MatTransMultDense*)C->product->data; 2704 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2705 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2706 PetscCallMPI(MPI_Comm_size(comm,&size)); 2707 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2708 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2709 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2710 PetscCall(MatDenseGetLDA(a->A,&i)); 2711 PetscCall(PetscBLASIntCast(i,&alda)); 2712 PetscCall(MatDenseGetLDA(b->A,&i)); 2713 PetscCall(PetscBLASIntCast(i,&blda)); 2714 PetscCall(MatDenseGetLDA(c->A,&i)); 2715 PetscCall(PetscBLASIntCast(i,&clda)); 2716 PetscCall(MatGetOwnershipRanges(B,&ranges)); 2717 bn = B->rmap->n; 2718 if (blda == bn) { 2719 sendbuf = (PetscScalar*)bv; 2720 } else { 2721 sendbuf = abt->buf[0]; 2722 for (k = 0, i = 0; i < cK; i++) { 2723 for (j = 0; j < bn; j++, k++) { 2724 sendbuf[k] = bv[i * blda + j]; 2725 } 2726 } 2727 } 2728 if (size > 1) { 2729 sendto = (rank + size - 1) % size; 2730 recvfrom = (rank + size + 1) % size; 2731 } else { 2732 sendto = recvfrom = 0; 2733 } 2734 PetscCall(PetscBLASIntCast(cK,&ck)); 2735 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2736 recvisfrom = rank; 2737 for (i = 0; i < size; i++) { 2738 /* we have finished receiving in sending, bufs can be read/modified */ 2739 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2740 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2741 2742 if (nextrecvisfrom != rank) { 2743 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2744 sendsiz = cK * bn; 2745 recvsiz = cK * nextbn; 2746 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2747 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2748 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2749 } 2750 2751 /* local aseq * sendbuf^T */ 2752 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2753 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2754 2755 if (nextrecvisfrom != rank) { 2756 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2757 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2758 } 2759 bn = nextbn; 2760 recvisfrom = nextrecvisfrom; 2761 sendbuf = recvbuf; 2762 } 2763 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2764 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2765 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2766 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2767 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2768 PetscFunctionReturn(0); 2769 } 2770 2771 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2772 { 2773 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2774 Mat_MatTransMultDense *abt; 2775 MPI_Comm comm; 2776 PetscMPIInt size; 2777 PetscScalar *cv, *sendbuf, *recvbuf; 2778 const PetscScalar *av,*bv; 2779 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2780 PetscScalar _DOne=1.0,_DZero=0.0; 2781 PetscBLASInt cm, cn, ck, alda, clda; 2782 2783 PetscFunctionBegin; 2784 MatCheckProduct(C,3); 2785 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2786 abt = (Mat_MatTransMultDense*)C->product->data; 2787 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2788 PetscCallMPI(MPI_Comm_size(comm,&size)); 2789 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2790 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2791 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2792 PetscCall(MatDenseGetLDA(a->A,&i)); 2793 PetscCall(PetscBLASIntCast(i,&alda)); 2794 PetscCall(MatDenseGetLDA(b->A,&blda)); 2795 PetscCall(MatDenseGetLDA(c->A,&i)); 2796 PetscCall(PetscBLASIntCast(i,&clda)); 2797 /* copy transpose of B into buf[0] */ 2798 bn = B->rmap->n; 2799 sendbuf = abt->buf[0]; 2800 recvbuf = abt->buf[1]; 2801 for (k = 0, j = 0; j < bn; j++) { 2802 for (i = 0; i < cK; i++, k++) { 2803 sendbuf[k] = bv[i * blda + j]; 2804 } 2805 } 2806 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2807 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2808 PetscCall(PetscBLASIntCast(cK,&ck)); 2809 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2810 PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn)); 2811 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2812 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2813 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2814 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2815 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2816 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2817 PetscFunctionReturn(0); 2818 } 2819 2820 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2821 { 2822 Mat_MatTransMultDense *abt; 2823 2824 PetscFunctionBegin; 2825 MatCheckProduct(C,3); 2826 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2827 abt = (Mat_MatTransMultDense*)C->product->data; 2828 switch (abt->alg) { 2829 case 1: 2830 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2831 break; 2832 default: 2833 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2834 break; 2835 } 2836 PetscFunctionReturn(0); 2837 } 2838 2839 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2840 { 2841 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2842 2843 PetscFunctionBegin; 2844 PetscCall(MatDestroy(&ab->Ce)); 2845 PetscCall(MatDestroy(&ab->Ae)); 2846 PetscCall(MatDestroy(&ab->Be)); 2847 PetscCall(PetscFree(ab)); 2848 PetscFunctionReturn(0); 2849 } 2850 2851 #if defined(PETSC_HAVE_ELEMENTAL) 2852 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2853 { 2854 Mat_MatMultDense *ab; 2855 2856 PetscFunctionBegin; 2857 MatCheckProduct(C,3); 2858 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2859 ab = (Mat_MatMultDense*)C->product->data; 2860 PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae)); 2861 PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be)); 2862 PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce)); 2863 PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C)); 2864 PetscFunctionReturn(0); 2865 } 2866 2867 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2868 { 2869 Mat Ae,Be,Ce; 2870 Mat_MatMultDense *ab; 2871 2872 PetscFunctionBegin; 2873 MatCheckProduct(C,4); 2874 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2875 /* check local size of A and B */ 2876 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2877 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2878 } 2879 2880 /* create elemental matrices Ae and Be */ 2881 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2882 PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 2883 PetscCall(MatSetType(Ae,MATELEMENTAL)); 2884 PetscCall(MatSetUp(Ae)); 2885 PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE)); 2886 2887 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2888 PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N)); 2889 PetscCall(MatSetType(Be,MATELEMENTAL)); 2890 PetscCall(MatSetUp(Be)); 2891 PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE)); 2892 2893 /* compute symbolic Ce = Ae*Be */ 2894 PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce)); 2895 PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce)); 2896 2897 /* setup C */ 2898 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 2899 PetscCall(MatSetType(C,MATDENSE)); 2900 PetscCall(MatSetUp(C)); 2901 2902 /* create data structure for reuse Cdense */ 2903 PetscCall(PetscNew(&ab)); 2904 ab->Ae = Ae; 2905 ab->Be = Be; 2906 ab->Ce = Ce; 2907 2908 C->product->data = ab; 2909 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2910 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2911 PetscFunctionReturn(0); 2912 } 2913 #endif 2914 /* ----------------------------------------------- */ 2915 #if defined(PETSC_HAVE_ELEMENTAL) 2916 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2917 { 2918 PetscFunctionBegin; 2919 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2920 C->ops->productsymbolic = MatProductSymbolic_AB; 2921 PetscFunctionReturn(0); 2922 } 2923 #endif 2924 2925 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2926 { 2927 Mat_Product *product = C->product; 2928 Mat A = product->A,B=product->B; 2929 2930 PetscFunctionBegin; 2931 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2932 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2933 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2934 C->ops->productsymbolic = MatProductSymbolic_AtB; 2935 PetscFunctionReturn(0); 2936 } 2937 2938 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2939 { 2940 Mat_Product *product = C->product; 2941 const char *algTypes[2] = {"allgatherv","cyclic"}; 2942 PetscInt alg,nalg = 2; 2943 PetscBool flg = PETSC_FALSE; 2944 2945 PetscFunctionBegin; 2946 /* Set default algorithm */ 2947 alg = 0; /* default is allgatherv */ 2948 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2949 if (flg) { 2950 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2951 } 2952 2953 /* Get runtime option */ 2954 if (product->api_user) { 2955 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2956 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2957 PetscOptionsEnd(); 2958 } else { 2959 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2960 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2961 PetscOptionsEnd(); 2962 } 2963 if (flg) { 2964 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2965 } 2966 2967 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2968 C->ops->productsymbolic = MatProductSymbolic_ABt; 2969 PetscFunctionReturn(0); 2970 } 2971 2972 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2973 { 2974 Mat_Product *product = C->product; 2975 2976 PetscFunctionBegin; 2977 switch (product->type) { 2978 #if defined(PETSC_HAVE_ELEMENTAL) 2979 case MATPRODUCT_AB: 2980 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2981 break; 2982 #endif 2983 case MATPRODUCT_AtB: 2984 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2985 break; 2986 case MATPRODUCT_ABt: 2987 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2988 break; 2989 default: 2990 break; 2991 } 2992 PetscFunctionReturn(0); 2993 } 2994