1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 31 PetscValidPointer(B,2); 32 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg)); 33 if (flg) *B = mat->A; 34 else { 35 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg)); 36 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 37 *B = A; 38 } 39 PetscFunctionReturn(0); 40 } 41 42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 43 { 44 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 45 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 46 47 PetscFunctionBegin; 48 PetscCall(MatCopy(Amat->A,Bmat->A,s)); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscInt j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2; 56 PetscScalar *v; 57 58 PetscFunctionBegin; 59 PetscCall(MatDenseGetArray(mat->A,&v)); 60 PetscCall(MatDenseGetLDA(mat->A,&lda)); 61 rend2 = PetscMin(rend,A->cmap->N); 62 if (rend2>rstart) { 63 for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha; 64 PetscCall(PetscLogFlops(rend2-rstart)); 65 } 66 PetscCall(MatDenseRestoreArray(mat->A,&v)); 67 PetscFunctionReturn(0); 68 } 69 70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 71 { 72 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 73 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 74 75 PetscFunctionBegin; 76 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 77 lrow = row - rstart; 78 PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 79 PetscFunctionReturn(0); 80 } 81 82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 83 { 84 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 85 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 86 87 PetscFunctionBegin; 88 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 89 lrow = row - rstart; 90 PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 95 { 96 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 97 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 98 PetscScalar *array; 99 MPI_Comm comm; 100 PetscBool flg; 101 Mat B; 102 103 PetscFunctionBegin; 104 PetscCall(MatHasCongruentLayouts(A,&flg)); 105 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 106 PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); 107 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 108 109 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg)); 110 PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 111 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm)); 112 PetscCall(MatCreate(comm,&B)); 113 PetscCall(MatSetSizes(B,m,m,m,m)); 114 PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name)); 115 PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array)); 116 PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart)); 117 PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array)); 118 PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B)); 119 *a = B; 120 PetscCall(MatDestroy(&B)); 121 } else *a = B; 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 126 { 127 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 PetscBool roworiented = A->roworiented; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; 134 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 if (roworiented) { 138 PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv)); 139 } else { 140 for (j=0; j<n; j++) { 141 if (idxn[j] < 0) continue; 142 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 143 PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv)); 144 } 145 } 146 } else if (!A->donotstash) { 147 mat->assembled = PETSC_FALSE; 148 if (roworiented) { 149 PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE)); 150 } else { 151 PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE)); 152 } 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 161 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 162 163 PetscFunctionBegin; 164 for (i=0; i<m; i++) { 165 if (idxm[i] < 0) continue; /* negative row */ 166 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 167 if (idxm[i] >= rstart && idxm[i] < rend) { 168 row = idxm[i] - rstart; 169 for (j=0; j<n; j++) { 170 if (idxn[j] < 0) continue; /* negative column */ 171 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 172 PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j)); 173 } 174 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 175 } 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 180 { 181 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 182 183 PetscFunctionBegin; 184 PetscCall(MatDenseGetLDA(a->A,lda)); 185 PetscFunctionReturn(0); 186 } 187 188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 189 { 190 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 191 PetscBool iscuda; 192 193 PetscFunctionBegin; 194 if (!a->A) { 195 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 196 PetscCall(PetscLayoutSetUp(A->rmap)); 197 PetscCall(PetscLayoutSetUp(A->cmap)); 198 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 199 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A)); 200 PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 201 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 202 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 203 } 204 PetscCall(MatDenseSetLDA(a->A,lda)); 205 PetscFunctionReturn(0); 206 } 207 208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 209 { 210 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211 212 PetscFunctionBegin; 213 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 214 PetscCall(MatDenseGetArray(a->A,array)); 215 PetscFunctionReturn(0); 216 } 217 218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 219 { 220 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 221 222 PetscFunctionBegin; 223 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 224 PetscCall(MatDenseGetArrayRead(a->A,array)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 229 { 230 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 231 232 PetscFunctionBegin; 233 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 234 PetscCall(MatDenseGetArrayWrite(a->A,array)); 235 PetscFunctionReturn(0); 236 } 237 238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 239 { 240 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 241 242 PetscFunctionBegin; 243 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 244 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 245 PetscCall(MatDensePlaceArray(a->A,array)); 246 PetscFunctionReturn(0); 247 } 248 249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 250 { 251 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 252 253 PetscFunctionBegin; 254 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 255 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 256 PetscCall(MatDenseResetArray(a->A)); 257 PetscFunctionReturn(0); 258 } 259 260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 261 { 262 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 263 264 PetscFunctionBegin; 265 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 266 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 267 PetscCall(MatDenseReplaceArray(a->A,array)); 268 PetscFunctionReturn(0); 269 } 270 271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 272 { 273 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat)); 284 PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is)); 285 PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 PetscCall(ISAllGather(iscol,&iscol_local)); 288 PetscCall(ISGetIndices(isrow,&irow)); 289 PetscCall(ISGetIndices(iscol_local,&icol)); 290 PetscCall(ISGetLocalSize(isrow,&nrows)); 291 PetscCall(ISGetLocalSize(iscol,&ncols)); 292 PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 PetscCall(MatGetLocalSize(A,&nlrows,&nlcols)); 299 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat)); 309 PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols)); 310 PetscCall(MatSetType(newmat,((PetscObject)A)->type_name)); 311 PetscCall(MatMPIDenseSetPreallocation(newmat,NULL)); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 PetscCall(MatDenseGetArray(newmatd->A,&bv)); 317 PetscCall(MatDenseGetArrayRead(mat->A,&v)); 318 PetscCall(MatDenseGetLDA(mat->A,&lda)); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 PetscCall(MatDenseRestoreArrayRead(mat->A,&v)); 326 PetscCall(MatDenseRestoreArray(newmatd->A,&bv)); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY)); 330 PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY)); 331 332 /* Free work space */ 333 PetscCall(ISRestoreIndices(isrow,&irow)); 334 PetscCall(ISRestoreIndices(iscol_local,&icol)); 335 PetscCall(ISDestroy(&iscol_local)); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 344 PetscFunctionBegin; 345 PetscCall(MatDenseRestoreArray(a->A,array)); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 352 353 PetscFunctionBegin; 354 PetscCall(MatDenseRestoreArrayRead(a->A,array)); 355 PetscFunctionReturn(0); 356 } 357 358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 359 { 360 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 361 362 PetscFunctionBegin; 363 PetscCall(MatDenseRestoreArrayWrite(a->A,array)); 364 PetscFunctionReturn(0); 365 } 366 367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 368 { 369 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 370 PetscInt nstash,reallocs; 371 372 PetscFunctionBegin; 373 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 374 375 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 376 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 377 PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 378 PetscFunctionReturn(0); 379 } 380 381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 382 { 383 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 384 PetscInt i,*row,*col,flg,j,rstart,ncols; 385 PetscMPIInt n; 386 PetscScalar *val; 387 388 PetscFunctionBegin; 389 if (!mdn->donotstash && !mat->nooffprocentries) { 390 /* wait on receives */ 391 while (1) { 392 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 393 if (!flg) break; 394 395 for (i=0; i<n;) { 396 /* Now identify the consecutive vals belonging to the same row */ 397 for (j=i,rstart=row[j]; j<n; j++) { 398 if (row[j] != rstart) break; 399 } 400 if (j < n) ncols = j-i; 401 else ncols = n-i; 402 /* Now assemble all these values with a single function call */ 403 PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 404 i = j; 405 } 406 } 407 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 408 } 409 410 PetscCall(MatAssemblyBegin(mdn->A,mode)); 411 PetscCall(MatAssemblyEnd(mdn->A,mode)); 412 PetscFunctionReturn(0); 413 } 414 415 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 416 { 417 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 418 419 PetscFunctionBegin; 420 PetscCall(MatZeroEntries(l->A)); 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 425 { 426 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 427 PetscInt i,len,*lrows; 428 429 PetscFunctionBegin; 430 /* get locally owned rows */ 431 PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL)); 432 /* fix right hand side if needed */ 433 if (x && b) { 434 const PetscScalar *xx; 435 PetscScalar *bb; 436 437 PetscCall(VecGetArrayRead(x, &xx)); 438 PetscCall(VecGetArrayWrite(b, &bb)); 439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 440 PetscCall(VecRestoreArrayRead(x, &xx)); 441 PetscCall(VecRestoreArrayWrite(b, &bb)); 442 } 443 PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL)); 444 if (diag != 0.0) { 445 Vec d; 446 447 PetscCall(MatCreateVecs(A,NULL,&d)); 448 PetscCall(VecSet(d,diag)); 449 PetscCall(MatDiagonalSet(A,d,INSERT_VALUES)); 450 PetscCall(VecDestroy(&d)); 451 } 452 PetscCall(PetscFree(lrows)); 453 PetscFunctionReturn(0); 454 } 455 456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 460 461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 const PetscScalar *ax; 465 PetscScalar *ay; 466 PetscMemType axmtype,aymtype; 467 468 PetscFunctionBegin; 469 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 470 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 471 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 472 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 473 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 474 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 475 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 476 PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy)); 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 const PetscScalar *ax; 484 PetscScalar *ay; 485 PetscMemType axmtype,aymtype; 486 487 PetscFunctionBegin; 488 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 489 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 490 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 491 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 492 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 493 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 494 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 495 PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz)); 496 PetscFunctionReturn(0); 497 } 498 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 const PetscScalar *ax; 503 PetscScalar *ay; 504 PetscMemType axmtype,aymtype; 505 506 PetscFunctionBegin; 507 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 508 PetscCall(VecSet(yy,0.0)); 509 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 510 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 511 PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype)); 512 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 513 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 514 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 515 PetscCall(VecRestoreArrayAndMemType(yy,&ay)); 516 PetscFunctionReturn(0); 517 } 518 519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 520 { 521 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 522 const PetscScalar *ax; 523 PetscScalar *ay; 524 PetscMemType axmtype,aymtype; 525 526 PetscFunctionBegin; 527 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 528 PetscCall(VecCopy(yy,zz)); 529 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 530 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 531 PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype)); 532 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 533 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 534 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 535 PetscCall(VecRestoreArrayAndMemType(zz,&ay)); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 540 { 541 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542 PetscInt lda,len,i,n,m = A->rmap->n,radd; 543 PetscScalar *x,zero = 0.0; 544 const PetscScalar *av; 545 546 PetscFunctionBegin; 547 PetscCall(VecSet(v,zero)); 548 PetscCall(VecGetArray(v,&x)); 549 PetscCall(VecGetSize(v,&n)); 550 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 551 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 552 radd = A->rmap->rstart*m; 553 PetscCall(MatDenseGetArrayRead(a->A,&av)); 554 PetscCall(MatDenseGetLDA(a->A,&lda)); 555 for (i=0; i<len; i++) { 556 x[i] = av[radd + i*lda + i]; 557 } 558 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 559 PetscCall(VecRestoreArray(v,&x)); 560 PetscFunctionReturn(0); 561 } 562 563 PetscErrorCode MatDestroy_MPIDense(Mat mat) 564 { 565 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 566 567 PetscFunctionBegin; 568 #if defined(PETSC_USE_LOG) 569 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 570 #endif 571 PetscCall(MatStashDestroy_Private(&mat->stash)); 572 PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 573 PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 574 PetscCall(MatDestroy(&mdn->A)); 575 PetscCall(VecDestroy(&mdn->lvec)); 576 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 577 PetscCall(VecDestroy(&mdn->cvec)); 578 PetscCall(MatDestroy(&mdn->cmat)); 579 580 PetscCall(PetscFree(mat->data)); 581 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 582 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL)); 596 #if defined(PETSC_HAVE_ELEMENTAL) 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL)); 598 #endif 599 #if defined(PETSC_HAVE_SCALAPACK) 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL)); 601 #endif 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL)); 605 #if defined (PETSC_HAVE_CUDA) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL)); 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL)); 608 #endif 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL)); 619 PetscFunctionReturn(0); 620 } 621 622 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 623 624 #include <petscdraw.h> 625 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 626 { 627 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 628 PetscMPIInt rank; 629 PetscViewerType vtype; 630 PetscBool iascii,isdraw; 631 PetscViewer sviewer; 632 PetscViewerFormat format; 633 634 PetscFunctionBegin; 635 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 636 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 637 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 638 if (iascii) { 639 PetscCall(PetscViewerGetType(viewer,&vtype)); 640 PetscCall(PetscViewerGetFormat(viewer,&format)); 641 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 642 MatInfo info; 643 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 644 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 645 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory)); 646 PetscCall(PetscViewerFlush(viewer)); 647 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 648 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer)); 649 PetscFunctionReturn(0); 650 } else if (format == PETSC_VIEWER_ASCII_INFO) { 651 PetscFunctionReturn(0); 652 } 653 } else if (isdraw) { 654 PetscDraw draw; 655 PetscBool isnull; 656 657 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 658 PetscCall(PetscDrawIsNull(draw,&isnull)); 659 if (isnull) PetscFunctionReturn(0); 660 } 661 662 { 663 /* assemble the entire matrix onto first processor. */ 664 Mat A; 665 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 666 PetscInt *cols; 667 PetscScalar *vals; 668 669 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 670 if (rank == 0) { 671 PetscCall(MatSetSizes(A,M,N,M,N)); 672 } else { 673 PetscCall(MatSetSizes(A,0,0,M,N)); 674 } 675 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 676 PetscCall(MatSetType(A,MATMPIDENSE)); 677 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 678 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 679 680 /* Copy the matrix ... This isn't the most efficient means, 681 but it's quick for now */ 682 A->insertmode = INSERT_VALUES; 683 684 row = mat->rmap->rstart; 685 m = mdn->A->rmap->n; 686 for (i=0; i<m; i++) { 687 PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals)); 688 PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES)); 689 PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals)); 690 row++; 691 } 692 693 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 694 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 695 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 696 if (rank == 0) { 697 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name)); 698 PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer)); 699 } 700 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 701 PetscCall(PetscViewerFlush(viewer)); 702 PetscCall(MatDestroy(&A)); 703 } 704 PetscFunctionReturn(0); 705 } 706 707 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 708 { 709 PetscBool iascii,isbinary,isdraw,issocket; 710 711 PetscFunctionBegin; 712 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 713 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 714 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 715 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 716 717 if (iascii || issocket || isdraw) { 718 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer)); 719 } else if (isbinary) { 720 PetscCall(MatView_Dense_Binary(mat,viewer)); 721 } 722 PetscFunctionReturn(0); 723 } 724 725 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 726 { 727 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 728 Mat mdn = mat->A; 729 PetscLogDouble isend[5],irecv[5]; 730 731 PetscFunctionBegin; 732 info->block_size = 1.0; 733 734 PetscCall(MatGetInfo(mdn,MAT_LOCAL,info)); 735 736 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 737 isend[3] = info->memory; isend[4] = info->mallocs; 738 if (flag == MAT_LOCAL) { 739 info->nz_used = isend[0]; 740 info->nz_allocated = isend[1]; 741 info->nz_unneeded = isend[2]; 742 info->memory = isend[3]; 743 info->mallocs = isend[4]; 744 } else if (flag == MAT_GLOBAL_MAX) { 745 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A))); 746 747 info->nz_used = irecv[0]; 748 info->nz_allocated = irecv[1]; 749 info->nz_unneeded = irecv[2]; 750 info->memory = irecv[3]; 751 info->mallocs = irecv[4]; 752 } else if (flag == MAT_GLOBAL_SUM) { 753 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A))); 754 755 info->nz_used = irecv[0]; 756 info->nz_allocated = irecv[1]; 757 info->nz_unneeded = irecv[2]; 758 info->memory = irecv[3]; 759 info->mallocs = irecv[4]; 760 } 761 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 762 info->fill_ratio_needed = 0; 763 info->factor_mallocs = 0; 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 768 { 769 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 770 771 PetscFunctionBegin; 772 switch (op) { 773 case MAT_NEW_NONZERO_LOCATIONS: 774 case MAT_NEW_NONZERO_LOCATION_ERR: 775 case MAT_NEW_NONZERO_ALLOCATION_ERR: 776 MatCheckPreallocated(A,1); 777 PetscCall(MatSetOption(a->A,op,flg)); 778 break; 779 case MAT_ROW_ORIENTED: 780 MatCheckPreallocated(A,1); 781 a->roworiented = flg; 782 PetscCall(MatSetOption(a->A,op,flg)); 783 break; 784 case MAT_FORCE_DIAGONAL_ENTRIES: 785 case MAT_KEEP_NONZERO_PATTERN: 786 case MAT_USE_HASH_TABLE: 787 case MAT_SORTED_FULL: 788 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 789 break; 790 case MAT_IGNORE_OFF_PROC_ENTRIES: 791 a->donotstash = flg; 792 break; 793 case MAT_SYMMETRIC: 794 case MAT_STRUCTURALLY_SYMMETRIC: 795 case MAT_HERMITIAN: 796 case MAT_SYMMETRY_ETERNAL: 797 case MAT_IGNORE_LOWER_TRIANGULAR: 798 case MAT_IGNORE_ZERO_ENTRIES: 799 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 800 break; 801 default: 802 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 803 } 804 PetscFunctionReturn(0); 805 } 806 807 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 808 { 809 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 810 const PetscScalar *l; 811 PetscScalar x,*v,*vv,*r; 812 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 813 814 PetscFunctionBegin; 815 PetscCall(MatDenseGetArray(mdn->A,&vv)); 816 PetscCall(MatDenseGetLDA(mdn->A,&lda)); 817 PetscCall(MatGetLocalSize(A,&s2,&s3)); 818 if (ll) { 819 PetscCall(VecGetLocalSize(ll,&s2a)); 820 PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 821 PetscCall(VecGetArrayRead(ll,&l)); 822 for (i=0; i<m; i++) { 823 x = l[i]; 824 v = vv + i; 825 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 826 } 827 PetscCall(VecRestoreArrayRead(ll,&l)); 828 PetscCall(PetscLogFlops(1.0*n*m)); 829 } 830 if (rr) { 831 const PetscScalar *ar; 832 833 PetscCall(VecGetLocalSize(rr,&s3a)); 834 PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 835 PetscCall(VecGetArrayRead(rr,&ar)); 836 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 837 PetscCall(VecGetArray(mdn->lvec,&r)); 838 PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 839 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 840 PetscCall(VecRestoreArrayRead(rr,&ar)); 841 for (i=0; i<n; i++) { 842 x = r[i]; 843 v = vv + i*lda; 844 for (j=0; j<m; j++) (*v++) *= x; 845 } 846 PetscCall(VecRestoreArray(mdn->lvec,&r)); 847 PetscCall(PetscLogFlops(1.0*n*m)); 848 } 849 PetscCall(MatDenseRestoreArray(mdn->A,&vv)); 850 PetscFunctionReturn(0); 851 } 852 853 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 854 { 855 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 856 PetscInt i,j; 857 PetscMPIInt size; 858 PetscReal sum = 0.0; 859 const PetscScalar *av,*v; 860 861 PetscFunctionBegin; 862 PetscCall(MatDenseGetArrayRead(mdn->A,&av)); 863 v = av; 864 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 865 if (size == 1) { 866 PetscCall(MatNorm(mdn->A,type,nrm)); 867 } else { 868 if (type == NORM_FROBENIUS) { 869 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 870 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 871 } 872 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 873 *nrm = PetscSqrtReal(*nrm); 874 PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n)); 875 } else if (type == NORM_1) { 876 PetscReal *tmp,*tmp2; 877 PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2)); 878 *nrm = 0.0; 879 v = av; 880 for (j=0; j<mdn->A->cmap->n; j++) { 881 for (i=0; i<mdn->A->rmap->n; i++) { 882 tmp[j] += PetscAbsScalar(*v); v++; 883 } 884 } 885 PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 886 for (j=0; j<A->cmap->N; j++) { 887 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 888 } 889 PetscCall(PetscFree2(tmp,tmp2)); 890 PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n)); 891 } else if (type == NORM_INFINITY) { /* max row norm */ 892 PetscReal ntemp; 893 PetscCall(MatNorm(mdn->A,type,&ntemp)); 894 PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 895 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 896 } 897 PetscCall(MatDenseRestoreArrayRead(mdn->A,&av)); 898 PetscFunctionReturn(0); 899 } 900 901 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 902 { 903 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 904 Mat B; 905 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 906 PetscInt j,i,lda; 907 PetscScalar *v; 908 909 PetscFunctionBegin; 910 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 911 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 912 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 913 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 914 PetscCall(MatMPIDenseSetPreallocation(B,NULL)); 915 } else B = *matout; 916 917 m = a->A->rmap->n; n = a->A->cmap->n; 918 PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v)); 919 PetscCall(MatDenseGetLDA(a->A,&lda)); 920 PetscCall(PetscMalloc1(m,&rwork)); 921 for (i=0; i<m; i++) rwork[i] = rstart + i; 922 for (j=0; j<n; j++) { 923 PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES)); 924 v += lda; 925 } 926 PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v)); 927 PetscCall(PetscFree(rwork)); 928 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 929 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 930 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 931 *matout = B; 932 } else { 933 PetscCall(MatHeaderMerge(A,&B)); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 939 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 940 941 PetscErrorCode MatSetUp_MPIDense(Mat A) 942 { 943 PetscFunctionBegin; 944 PetscCall(PetscLayoutSetUp(A->rmap)); 945 PetscCall(PetscLayoutSetUp(A->cmap)); 946 if (!A->preallocated) { 947 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 948 } 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 953 { 954 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 955 956 PetscFunctionBegin; 957 PetscCall(MatAXPY(A->A,alpha,B->A,str)); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatConjugate_MPIDense(Mat mat) 962 { 963 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 964 965 PetscFunctionBegin; 966 PetscCall(MatConjugate(a->A)); 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatRealPart_MPIDense(Mat A) 971 { 972 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 973 974 PetscFunctionBegin; 975 PetscCall(MatRealPart(a->A)); 976 PetscFunctionReturn(0); 977 } 978 979 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 980 { 981 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 982 983 PetscFunctionBegin; 984 PetscCall(MatImaginaryPart(a->A)); 985 PetscFunctionReturn(0); 986 } 987 988 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 989 { 990 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 991 992 PetscFunctionBegin; 993 PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 994 PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 995 PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col)); 996 PetscFunctionReturn(0); 997 } 998 999 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*); 1000 1001 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions) 1002 { 1003 PetscInt i,m,n; 1004 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1005 PetscReal *work; 1006 1007 PetscFunctionBegin; 1008 PetscCall(MatGetSize(A,&m,&n)); 1009 PetscCall(PetscMalloc1(n,&work)); 1010 if (type == REDUCTION_MEAN_REALPART) { 1011 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work)); 1012 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1013 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work)); 1014 } else { 1015 PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work)); 1016 } 1017 if (type == NORM_2) { 1018 for (i=0; i<n; i++) work[i] *= work[i]; 1019 } 1020 if (type == NORM_INFINITY) { 1021 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm)); 1022 } else { 1023 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm)); 1024 } 1025 PetscCall(PetscFree(work)); 1026 if (type == NORM_2) { 1027 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1028 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1029 for (i=0; i<n; i++) reductions[i] /= m; 1030 } 1031 PetscFunctionReturn(0); 1032 } 1033 1034 #if defined(PETSC_HAVE_CUDA) 1035 PetscErrorCode MatShift_MPIDenseCUDA(Mat A,PetscScalar alpha) 1036 { 1037 PetscScalar *da; 1038 PetscInt lda; 1039 1040 PetscFunctionBegin; 1041 PetscCall(MatDenseCUDAGetArray(A,&da)); 1042 PetscCall(MatDenseGetLDA(A,&lda)); 1043 PetscCall(PetscInfo(A,"Performing Shift on backend\n")); 1044 PetscCall(MatShift_DenseCUDA_Private(da,alpha,lda,A->rmap->rstart,A->rmap->rend,A->cmap->N)); 1045 PetscCall(MatDenseCUDARestoreArray(A,&da)); 1046 PetscFunctionReturn(0); 1047 } 1048 1049 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1050 { 1051 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1052 PetscInt lda; 1053 1054 PetscFunctionBegin; 1055 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1056 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1057 if (!a->cvec) { 1058 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1059 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1060 } 1061 a->vecinuse = col + 1; 1062 PetscCall(MatDenseGetLDA(a->A,&lda)); 1063 PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1064 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1065 *v = a->cvec; 1066 PetscFunctionReturn(0); 1067 } 1068 1069 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1070 { 1071 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1072 1073 PetscFunctionBegin; 1074 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1075 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1076 a->vecinuse = 0; 1077 PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1078 PetscCall(VecCUDAResetArray(a->cvec)); 1079 *v = NULL; 1080 PetscFunctionReturn(0); 1081 } 1082 1083 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1084 { 1085 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1086 PetscInt lda; 1087 1088 PetscFunctionBegin; 1089 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1090 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1091 if (!a->cvec) { 1092 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1093 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1094 } 1095 a->vecinuse = col + 1; 1096 PetscCall(MatDenseGetLDA(a->A,&lda)); 1097 PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse)); 1098 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1099 PetscCall(VecLockReadPush(a->cvec)); 1100 *v = a->cvec; 1101 PetscFunctionReturn(0); 1102 } 1103 1104 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1105 { 1106 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1107 1108 PetscFunctionBegin; 1109 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1110 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1111 a->vecinuse = 0; 1112 PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse)); 1113 PetscCall(VecLockReadPop(a->cvec)); 1114 PetscCall(VecCUDAResetArray(a->cvec)); 1115 *v = NULL; 1116 PetscFunctionReturn(0); 1117 } 1118 1119 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1120 { 1121 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1122 PetscInt lda; 1123 1124 PetscFunctionBegin; 1125 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1126 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1127 if (!a->cvec) { 1128 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1129 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1130 } 1131 a->vecinuse = col + 1; 1132 PetscCall(MatDenseGetLDA(a->A,&lda)); 1133 PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1134 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1135 *v = a->cvec; 1136 PetscFunctionReturn(0); 1137 } 1138 1139 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1140 { 1141 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1142 1143 PetscFunctionBegin; 1144 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1145 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1146 a->vecinuse = 0; 1147 PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1148 PetscCall(VecCUDAResetArray(a->cvec)); 1149 *v = NULL; 1150 PetscFunctionReturn(0); 1151 } 1152 1153 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1154 { 1155 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1156 1157 PetscFunctionBegin; 1158 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1159 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1160 PetscCall(MatDenseCUDAPlaceArray(l->A,a)); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1165 { 1166 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1170 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1171 PetscCall(MatDenseCUDAResetArray(l->A)); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1176 { 1177 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1181 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1182 PetscCall(MatDenseCUDAReplaceArray(l->A,a)); 1183 PetscFunctionReturn(0); 1184 } 1185 1186 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1187 { 1188 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1189 1190 PetscFunctionBegin; 1191 PetscCall(MatDenseCUDAGetArrayWrite(l->A,a)); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1196 { 1197 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1198 1199 PetscFunctionBegin; 1200 PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a)); 1201 PetscFunctionReturn(0); 1202 } 1203 1204 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1205 { 1206 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1207 1208 PetscFunctionBegin; 1209 PetscCall(MatDenseCUDAGetArrayRead(l->A,a)); 1210 PetscFunctionReturn(0); 1211 } 1212 1213 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1214 { 1215 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1216 1217 PetscFunctionBegin; 1218 PetscCall(MatDenseCUDARestoreArrayRead(l->A,a)); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1223 { 1224 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1225 1226 PetscFunctionBegin; 1227 PetscCall(MatDenseCUDAGetArray(l->A,a)); 1228 PetscFunctionReturn(0); 1229 } 1230 1231 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1232 { 1233 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1234 1235 PetscFunctionBegin; 1236 PetscCall(MatDenseCUDARestoreArray(l->A,a)); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1241 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1242 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1243 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1244 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1245 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1246 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1247 1248 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1249 { 1250 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1251 1252 PetscFunctionBegin; 1253 PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1254 PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1255 if (d->A) { 1256 PetscCall(MatBindToCPU(d->A,bind)); 1257 } 1258 mat->boundtocpu = bind; 1259 if (!bind) { 1260 PetscBool iscuda; 1261 1262 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda)); 1263 if (!iscuda) { 1264 PetscCall(VecDestroy(&d->cvec)); 1265 } 1266 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda)); 1267 if (!iscuda) { 1268 PetscCall(MatDestroy(&d->cmat)); 1269 } 1270 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA)); 1271 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA)); 1272 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA)); 1273 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1274 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1275 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1276 mat->ops->shift = MatShift_MPIDenseCUDA; 1277 } else { 1278 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1279 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1280 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1281 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1282 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1283 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1284 mat->ops->shift = MatShift_MPIDense; 1285 } 1286 if (d->cmat) { 1287 PetscCall(MatBindToCPU(d->cmat,bind)); 1288 } 1289 PetscFunctionReturn(0); 1290 } 1291 1292 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1293 { 1294 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1295 PetscBool iscuda; 1296 1297 PetscFunctionBegin; 1298 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1299 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 1300 if (!iscuda) PetscFunctionReturn(0); 1301 PetscCall(PetscLayoutSetUp(A->rmap)); 1302 PetscCall(PetscLayoutSetUp(A->cmap)); 1303 if (!d->A) { 1304 PetscCall(MatCreate(PETSC_COMM_SELF,&d->A)); 1305 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A)); 1306 PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 1307 } 1308 PetscCall(MatSetType(d->A,MATSEQDENSECUDA)); 1309 PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data)); 1310 A->preallocated = PETSC_TRUE; 1311 A->assembled = PETSC_TRUE; 1312 PetscFunctionReturn(0); 1313 } 1314 #endif 1315 1316 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1317 { 1318 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1319 1320 PetscFunctionBegin; 1321 PetscCall(MatSetRandom(d->A,rctx)); 1322 PetscFunctionReturn(0); 1323 } 1324 1325 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1326 { 1327 PetscFunctionBegin; 1328 *missing = PETSC_FALSE; 1329 PetscFunctionReturn(0); 1330 } 1331 1332 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1333 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1334 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1335 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1336 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1337 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1338 1339 /* -------------------------------------------------------------------*/ 1340 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1341 MatGetRow_MPIDense, 1342 MatRestoreRow_MPIDense, 1343 MatMult_MPIDense, 1344 /* 4*/ MatMultAdd_MPIDense, 1345 MatMultTranspose_MPIDense, 1346 MatMultTransposeAdd_MPIDense, 1347 NULL, 1348 NULL, 1349 NULL, 1350 /* 10*/ NULL, 1351 NULL, 1352 NULL, 1353 NULL, 1354 MatTranspose_MPIDense, 1355 /* 15*/ MatGetInfo_MPIDense, 1356 MatEqual_MPIDense, 1357 MatGetDiagonal_MPIDense, 1358 MatDiagonalScale_MPIDense, 1359 MatNorm_MPIDense, 1360 /* 20*/ MatAssemblyBegin_MPIDense, 1361 MatAssemblyEnd_MPIDense, 1362 MatSetOption_MPIDense, 1363 MatZeroEntries_MPIDense, 1364 /* 24*/ MatZeroRows_MPIDense, 1365 NULL, 1366 NULL, 1367 NULL, 1368 NULL, 1369 /* 29*/ MatSetUp_MPIDense, 1370 NULL, 1371 NULL, 1372 MatGetDiagonalBlock_MPIDense, 1373 NULL, 1374 /* 34*/ MatDuplicate_MPIDense, 1375 NULL, 1376 NULL, 1377 NULL, 1378 NULL, 1379 /* 39*/ MatAXPY_MPIDense, 1380 MatCreateSubMatrices_MPIDense, 1381 NULL, 1382 MatGetValues_MPIDense, 1383 MatCopy_MPIDense, 1384 /* 44*/ NULL, 1385 MatScale_MPIDense, 1386 MatShift_MPIDense, 1387 NULL, 1388 NULL, 1389 /* 49*/ MatSetRandom_MPIDense, 1390 NULL, 1391 NULL, 1392 NULL, 1393 NULL, 1394 /* 54*/ NULL, 1395 NULL, 1396 NULL, 1397 NULL, 1398 NULL, 1399 /* 59*/ MatCreateSubMatrix_MPIDense, 1400 MatDestroy_MPIDense, 1401 MatView_MPIDense, 1402 NULL, 1403 NULL, 1404 /* 64*/ NULL, 1405 NULL, 1406 NULL, 1407 NULL, 1408 NULL, 1409 /* 69*/ NULL, 1410 NULL, 1411 NULL, 1412 NULL, 1413 NULL, 1414 /* 74*/ NULL, 1415 NULL, 1416 NULL, 1417 NULL, 1418 NULL, 1419 /* 79*/ NULL, 1420 NULL, 1421 NULL, 1422 NULL, 1423 /* 83*/ MatLoad_MPIDense, 1424 NULL, 1425 NULL, 1426 NULL, 1427 NULL, 1428 NULL, 1429 /* 89*/ NULL, 1430 NULL, 1431 NULL, 1432 NULL, 1433 NULL, 1434 /* 94*/ NULL, 1435 NULL, 1436 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1437 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1438 NULL, 1439 /* 99*/ MatProductSetFromOptions_MPIDense, 1440 NULL, 1441 NULL, 1442 MatConjugate_MPIDense, 1443 NULL, 1444 /*104*/ NULL, 1445 MatRealPart_MPIDense, 1446 MatImaginaryPart_MPIDense, 1447 NULL, 1448 NULL, 1449 /*109*/ NULL, 1450 NULL, 1451 NULL, 1452 MatGetColumnVector_MPIDense, 1453 MatMissingDiagonal_MPIDense, 1454 /*114*/ NULL, 1455 NULL, 1456 NULL, 1457 NULL, 1458 NULL, 1459 /*119*/ NULL, 1460 NULL, 1461 NULL, 1462 NULL, 1463 NULL, 1464 /*124*/ NULL, 1465 MatGetColumnReductions_MPIDense, 1466 NULL, 1467 NULL, 1468 NULL, 1469 /*129*/ NULL, 1470 NULL, 1471 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1472 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1473 NULL, 1474 /*134*/ NULL, 1475 NULL, 1476 NULL, 1477 NULL, 1478 NULL, 1479 /*139*/ NULL, 1480 NULL, 1481 NULL, 1482 NULL, 1483 NULL, 1484 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1485 /*145*/ NULL, 1486 NULL, 1487 NULL 1488 }; 1489 1490 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1491 { 1492 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1493 PetscBool iscuda; 1494 1495 PetscFunctionBegin; 1496 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1497 PetscCall(PetscLayoutSetUp(mat->rmap)); 1498 PetscCall(PetscLayoutSetUp(mat->cmap)); 1499 if (!a->A) { 1500 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 1501 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 1502 PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N)); 1503 } 1504 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda)); 1505 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1506 PetscCall(MatSeqDenseSetPreallocation(a->A,data)); 1507 mat->preallocated = PETSC_TRUE; 1508 mat->assembled = PETSC_TRUE; 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1513 { 1514 Mat B,C; 1515 1516 PetscFunctionBegin; 1517 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C)); 1518 PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B)); 1519 PetscCall(MatDestroy(&C)); 1520 if (reuse == MAT_REUSE_MATRIX) { 1521 C = *newmat; 1522 } else C = NULL; 1523 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1524 PetscCall(MatDestroy(&B)); 1525 if (reuse == MAT_INPLACE_MATRIX) { 1526 PetscCall(MatHeaderReplace(A,&C)); 1527 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1528 PetscFunctionReturn(0); 1529 } 1530 1531 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1532 { 1533 Mat B,C; 1534 1535 PetscFunctionBegin; 1536 PetscCall(MatDenseGetLocalMatrix(A,&C)); 1537 PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1538 if (reuse == MAT_REUSE_MATRIX) { 1539 C = *newmat; 1540 } else C = NULL; 1541 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1542 PetscCall(MatDestroy(&B)); 1543 if (reuse == MAT_INPLACE_MATRIX) { 1544 PetscCall(MatHeaderReplace(A,&C)); 1545 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1546 PetscFunctionReturn(0); 1547 } 1548 1549 #if defined(PETSC_HAVE_ELEMENTAL) 1550 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1551 { 1552 Mat mat_elemental; 1553 PetscScalar *v; 1554 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1555 1556 PetscFunctionBegin; 1557 if (reuse == MAT_REUSE_MATRIX) { 1558 mat_elemental = *newmat; 1559 PetscCall(MatZeroEntries(*newmat)); 1560 } else { 1561 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1562 PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 1563 PetscCall(MatSetType(mat_elemental,MATELEMENTAL)); 1564 PetscCall(MatSetUp(mat_elemental)); 1565 PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE)); 1566 } 1567 1568 PetscCall(PetscMalloc2(m,&rows,N,&cols)); 1569 for (i=0; i<N; i++) cols[i] = i; 1570 for (i=0; i<m; i++) rows[i] = rstart + i; 1571 1572 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1573 PetscCall(MatDenseGetArray(A,&v)); 1574 PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES)); 1575 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1576 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1577 PetscCall(MatDenseRestoreArray(A,&v)); 1578 PetscCall(PetscFree2(rows,cols)); 1579 1580 if (reuse == MAT_INPLACE_MATRIX) { 1581 PetscCall(MatHeaderReplace(A,&mat_elemental)); 1582 } else { 1583 *newmat = mat_elemental; 1584 } 1585 PetscFunctionReturn(0); 1586 } 1587 #endif 1588 1589 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1590 { 1591 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1592 1593 PetscFunctionBegin; 1594 PetscCall(MatDenseGetColumn(mat->A,col,vals)); 1595 PetscFunctionReturn(0); 1596 } 1597 1598 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1599 { 1600 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1601 1602 PetscFunctionBegin; 1603 PetscCall(MatDenseRestoreColumn(mat->A,vals)); 1604 PetscFunctionReturn(0); 1605 } 1606 1607 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1608 { 1609 Mat_MPIDense *mat; 1610 PetscInt m,nloc,N; 1611 1612 PetscFunctionBegin; 1613 PetscCall(MatGetSize(inmat,&m,&N)); 1614 PetscCall(MatGetLocalSize(inmat,NULL,&nloc)); 1615 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1616 PetscInt sum; 1617 1618 if (n == PETSC_DECIDE) { 1619 PetscCall(PetscSplitOwnership(comm,&n,&N)); 1620 } 1621 /* Check sum(n) = N */ 1622 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 1623 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 1624 1625 PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat)); 1626 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 1627 } 1628 1629 /* numeric phase */ 1630 mat = (Mat_MPIDense*)(*outmat)->data; 1631 PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN)); 1632 PetscFunctionReturn(0); 1633 } 1634 1635 #if defined(PETSC_HAVE_CUDA) 1636 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1637 { 1638 Mat B; 1639 Mat_MPIDense *m; 1640 1641 PetscFunctionBegin; 1642 if (reuse == MAT_INITIAL_MATRIX) { 1643 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1644 } else if (reuse == MAT_REUSE_MATRIX) { 1645 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1646 } 1647 1648 B = *newmat; 1649 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE)); 1650 PetscCall(PetscFree(B->defaultvectype)); 1651 PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype)); 1652 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE)); 1653 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL)); 1654 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 1655 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 1656 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 1657 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 1658 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL)); 1659 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL)); 1667 m = (Mat_MPIDense*)(B)->data; 1668 if (m->A) { 1669 PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A)); 1670 } 1671 B->ops->bindtocpu = NULL; 1672 B->offloadmask = PETSC_OFFLOAD_CPU; 1673 PetscFunctionReturn(0); 1674 } 1675 1676 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1677 { 1678 Mat B; 1679 Mat_MPIDense *m; 1680 1681 PetscFunctionBegin; 1682 if (reuse == MAT_INITIAL_MATRIX) { 1683 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1684 } else if (reuse == MAT_REUSE_MATRIX) { 1685 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1686 } 1687 1688 B = *newmat; 1689 PetscCall(PetscFree(B->defaultvectype)); 1690 PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype)); 1691 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA)); 1692 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1693 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1694 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1695 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1696 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1697 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1698 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1699 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1700 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1701 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1702 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1703 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1704 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1705 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1706 m = (Mat_MPIDense*)(B->data); 1707 if (m->A) { 1708 PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A)); 1709 B->offloadmask = PETSC_OFFLOAD_BOTH; 1710 } else { 1711 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1712 } 1713 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE)); 1714 1715 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1716 PetscFunctionReturn(0); 1717 } 1718 #endif 1719 1720 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1721 { 1722 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1723 PetscInt lda; 1724 1725 PetscFunctionBegin; 1726 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1727 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1728 if (!a->cvec) { 1729 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1730 } 1731 a->vecinuse = col + 1; 1732 PetscCall(MatDenseGetLDA(a->A,&lda)); 1733 PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1734 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1735 *v = a->cvec; 1736 PetscFunctionReturn(0); 1737 } 1738 1739 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1740 { 1741 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1742 1743 PetscFunctionBegin; 1744 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1745 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1746 a->vecinuse = 0; 1747 PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1748 PetscCall(VecResetArray(a->cvec)); 1749 *v = NULL; 1750 PetscFunctionReturn(0); 1751 } 1752 1753 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1754 { 1755 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1756 PetscInt lda; 1757 1758 PetscFunctionBegin; 1759 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1760 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1761 if (!a->cvec) { 1762 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1763 } 1764 a->vecinuse = col + 1; 1765 PetscCall(MatDenseGetLDA(a->A,&lda)); 1766 PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse)); 1767 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1768 PetscCall(VecLockReadPush(a->cvec)); 1769 *v = a->cvec; 1770 PetscFunctionReturn(0); 1771 } 1772 1773 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1774 { 1775 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1776 1777 PetscFunctionBegin; 1778 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1779 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1780 a->vecinuse = 0; 1781 PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse)); 1782 PetscCall(VecLockReadPop(a->cvec)); 1783 PetscCall(VecResetArray(a->cvec)); 1784 *v = NULL; 1785 PetscFunctionReturn(0); 1786 } 1787 1788 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1789 { 1790 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1791 PetscInt lda; 1792 1793 PetscFunctionBegin; 1794 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1795 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1796 if (!a->cvec) { 1797 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1798 } 1799 a->vecinuse = col + 1; 1800 PetscCall(MatDenseGetLDA(a->A,&lda)); 1801 PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1802 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1803 *v = a->cvec; 1804 PetscFunctionReturn(0); 1805 } 1806 1807 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1808 { 1809 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1810 1811 PetscFunctionBegin; 1812 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1813 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1814 a->vecinuse = 0; 1815 PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1816 PetscCall(VecResetArray(a->cvec)); 1817 *v = NULL; 1818 PetscFunctionReturn(0); 1819 } 1820 1821 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1822 { 1823 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1824 Mat_MPIDense *c; 1825 MPI_Comm comm; 1826 1827 PetscFunctionBegin; 1828 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1829 PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1830 PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1831 if (!a->cmat) { 1832 PetscCall(MatCreate(comm,&a->cmat)); 1833 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat)); 1834 PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name)); 1835 PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap)); 1836 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1837 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1838 } else if (cend-cbegin != a->cmat->cmap->N) { 1839 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1840 PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap)); 1841 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1842 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1843 } 1844 c = (Mat_MPIDense*)a->cmat->data; 1845 PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1846 PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A)); 1847 a->cmat->preallocated = PETSC_TRUE; 1848 a->cmat->assembled = PETSC_TRUE; 1849 a->matinuse = cbegin + 1; 1850 *v = a->cmat; 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1855 { 1856 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1857 Mat_MPIDense *c; 1858 1859 PetscFunctionBegin; 1860 PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1861 PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1862 PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1863 a->matinuse = 0; 1864 c = (Mat_MPIDense*)a->cmat->data; 1865 PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A)); 1866 *v = NULL; 1867 PetscFunctionReturn(0); 1868 } 1869 1870 /*MC 1871 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1872 1873 Options Database Keys: 1874 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1875 1876 Level: beginner 1877 1878 .seealso: `MatCreateDense()` 1879 1880 M*/ 1881 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1882 { 1883 Mat_MPIDense *a; 1884 1885 PetscFunctionBegin; 1886 PetscCall(PetscNewLog(mat,&a)); 1887 mat->data = (void*)a; 1888 PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps))); 1889 1890 mat->insertmode = NOT_SET_VALUES; 1891 1892 /* build cache for off array entries formed */ 1893 a->donotstash = PETSC_FALSE; 1894 1895 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash)); 1896 1897 /* stuff used for matrix vector multiply */ 1898 a->lvec = NULL; 1899 a->Mvctx = NULL; 1900 a->roworiented = PETSC_TRUE; 1901 1902 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense)); 1903 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense)); 1904 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense)); 1905 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense)); 1906 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense)); 1907 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense)); 1908 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense)); 1909 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense)); 1910 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense)); 1911 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense)); 1912 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense)); 1913 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1914 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1915 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1916 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1917 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1918 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1919 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense)); 1920 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense)); 1921 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 1922 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ)); 1923 #if defined(PETSC_HAVE_ELEMENTAL) 1924 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental)); 1925 #endif 1926 #if defined(PETSC_HAVE_SCALAPACK) 1927 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK)); 1928 #endif 1929 #if defined(PETSC_HAVE_CUDA) 1930 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA)); 1931 #endif 1932 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense)); 1933 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1934 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1935 #if defined(PETSC_HAVE_CUDA) 1936 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1937 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1938 #endif 1939 1940 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense)); 1941 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense)); 1942 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE)); 1943 PetscFunctionReturn(0); 1944 } 1945 1946 /*MC 1947 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1948 1949 Options Database Keys: 1950 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1951 1952 Level: beginner 1953 1954 .seealso: 1955 1956 M*/ 1957 #if defined(PETSC_HAVE_CUDA) 1958 #include <petsc/private/deviceimpl.h> 1959 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1960 { 1961 PetscFunctionBegin; 1962 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1963 PetscCall(MatCreate_MPIDense(B)); 1964 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B)); 1965 PetscFunctionReturn(0); 1966 } 1967 #endif 1968 1969 /*MC 1970 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1971 1972 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1973 and MATMPIDENSE otherwise. 1974 1975 Options Database Keys: 1976 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1977 1978 Level: beginner 1979 1980 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 1981 M*/ 1982 1983 /*MC 1984 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1985 1986 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1987 and MATMPIDENSECUDA otherwise. 1988 1989 Options Database Keys: 1990 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1991 1992 Level: beginner 1993 1994 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 1995 M*/ 1996 1997 /*@C 1998 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1999 2000 Collective 2001 2002 Input Parameters: 2003 . B - the matrix 2004 - data - optional location of matrix data. Set data=NULL for PETSc 2005 to control all matrix memory allocation. 2006 2007 Notes: 2008 The dense format is fully compatible with standard Fortran 77 2009 storage by columns. 2010 2011 The data input variable is intended primarily for Fortran programmers 2012 who wish to allocate their own matrix memory space. Most users should 2013 set data=NULL. 2014 2015 Level: intermediate 2016 2017 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2018 @*/ 2019 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2020 { 2021 PetscFunctionBegin; 2022 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2023 PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data)); 2024 PetscFunctionReturn(0); 2025 } 2026 2027 /*@ 2028 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2029 array provided by the user. This is useful to avoid copying an array 2030 into a matrix 2031 2032 Not Collective 2033 2034 Input Parameters: 2035 + mat - the matrix 2036 - array - the array in column major order 2037 2038 Notes: 2039 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2040 freed when the matrix is destroyed. 2041 2042 Level: developer 2043 2044 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2045 2046 @*/ 2047 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2048 { 2049 PetscFunctionBegin; 2050 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2051 PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2052 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2053 #if defined(PETSC_HAVE_CUDA) 2054 mat->offloadmask = PETSC_OFFLOAD_CPU; 2055 #endif 2056 PetscFunctionReturn(0); 2057 } 2058 2059 /*@ 2060 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2061 2062 Not Collective 2063 2064 Input Parameters: 2065 . mat - the matrix 2066 2067 Notes: 2068 You can only call this after a call to MatDensePlaceArray() 2069 2070 Level: developer 2071 2072 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2073 2074 @*/ 2075 PetscErrorCode MatDenseResetArray(Mat mat) 2076 { 2077 PetscFunctionBegin; 2078 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2079 PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat)); 2080 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2081 PetscFunctionReturn(0); 2082 } 2083 2084 /*@ 2085 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2086 array provided by the user. This is useful to avoid copying an array 2087 into a matrix 2088 2089 Not Collective 2090 2091 Input Parameters: 2092 + mat - the matrix 2093 - array - the array in column major order 2094 2095 Notes: 2096 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2097 freed by the user. It will be freed when the matrix is destroyed. 2098 2099 Level: developer 2100 2101 .seealso: `MatDenseGetArray()`, `VecReplaceArray()` 2102 @*/ 2103 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2104 { 2105 PetscFunctionBegin; 2106 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2107 PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2108 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2109 #if defined(PETSC_HAVE_CUDA) 2110 mat->offloadmask = PETSC_OFFLOAD_CPU; 2111 #endif 2112 PetscFunctionReturn(0); 2113 } 2114 2115 #if defined(PETSC_HAVE_CUDA) 2116 /*@C 2117 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2118 array provided by the user. This is useful to avoid copying an array 2119 into a matrix 2120 2121 Not Collective 2122 2123 Input Parameters: 2124 + mat - the matrix 2125 - array - the array in column major order 2126 2127 Notes: 2128 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2129 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2130 2131 Level: developer 2132 2133 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()` 2134 @*/ 2135 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2136 { 2137 PetscFunctionBegin; 2138 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2139 PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2140 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2141 mat->offloadmask = PETSC_OFFLOAD_GPU; 2142 PetscFunctionReturn(0); 2143 } 2144 2145 /*@C 2146 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2147 2148 Not Collective 2149 2150 Input Parameters: 2151 . mat - the matrix 2152 2153 Notes: 2154 You can only call this after a call to MatDenseCUDAPlaceArray() 2155 2156 Level: developer 2157 2158 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2159 2160 @*/ 2161 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2162 { 2163 PetscFunctionBegin; 2164 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2165 PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat)); 2166 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /*@C 2171 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2172 array provided by the user. This is useful to avoid copying an array 2173 into a matrix 2174 2175 Not Collective 2176 2177 Input Parameters: 2178 + mat - the matrix 2179 - array - the array in column major order 2180 2181 Notes: 2182 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2183 The memory passed in CANNOT be freed by the user. It will be freed 2184 when the matrix is destroyed. The array should respect the matrix leading dimension. 2185 2186 Level: developer 2187 2188 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2189 @*/ 2190 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2191 { 2192 PetscFunctionBegin; 2193 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2194 PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2195 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2196 mat->offloadmask = PETSC_OFFLOAD_GPU; 2197 PetscFunctionReturn(0); 2198 } 2199 2200 /*@C 2201 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2202 2203 Not Collective 2204 2205 Input Parameters: 2206 . A - the matrix 2207 2208 Output Parameters 2209 . array - the GPU array in column major order 2210 2211 Notes: 2212 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2213 2214 Level: developer 2215 2216 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2217 @*/ 2218 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2219 { 2220 PetscFunctionBegin; 2221 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2222 PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2223 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2224 PetscFunctionReturn(0); 2225 } 2226 2227 /*@C 2228 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2229 2230 Not Collective 2231 2232 Input Parameters: 2233 + A - the matrix 2234 - array - the GPU array in column major order 2235 2236 Notes: 2237 2238 Level: developer 2239 2240 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2241 @*/ 2242 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2243 { 2244 PetscFunctionBegin; 2245 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2246 PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2247 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2248 A->offloadmask = PETSC_OFFLOAD_GPU; 2249 PetscFunctionReturn(0); 2250 } 2251 2252 /*@C 2253 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2254 2255 Not Collective 2256 2257 Input Parameters: 2258 . A - the matrix 2259 2260 Output Parameters 2261 . array - the GPU array in column major order 2262 2263 Notes: 2264 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2265 2266 Level: developer 2267 2268 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2269 @*/ 2270 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2271 { 2272 PetscFunctionBegin; 2273 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2274 PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2275 PetscFunctionReturn(0); 2276 } 2277 2278 /*@C 2279 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2280 2281 Not Collective 2282 2283 Input Parameters: 2284 + A - the matrix 2285 - array - the GPU array in column major order 2286 2287 Notes: 2288 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2289 2290 Level: developer 2291 2292 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2293 @*/ 2294 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2295 { 2296 PetscFunctionBegin; 2297 PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2298 PetscFunctionReturn(0); 2299 } 2300 2301 /*@C 2302 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2303 2304 Not Collective 2305 2306 Input Parameters: 2307 . A - the matrix 2308 2309 Output Parameters 2310 . array - the GPU array in column major order 2311 2312 Notes: 2313 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2314 2315 Level: developer 2316 2317 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2318 @*/ 2319 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2320 { 2321 PetscFunctionBegin; 2322 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2323 PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a)); 2324 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2325 PetscFunctionReturn(0); 2326 } 2327 2328 /*@C 2329 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2330 2331 Not Collective 2332 2333 Input Parameters: 2334 + A - the matrix 2335 - array - the GPU array in column major order 2336 2337 Notes: 2338 2339 Level: developer 2340 2341 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2342 @*/ 2343 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2344 { 2345 PetscFunctionBegin; 2346 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2347 PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a)); 2348 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2349 A->offloadmask = PETSC_OFFLOAD_GPU; 2350 PetscFunctionReturn(0); 2351 } 2352 #endif 2353 2354 /*@C 2355 MatCreateDense - Creates a matrix in dense format. 2356 2357 Collective 2358 2359 Input Parameters: 2360 + comm - MPI communicator 2361 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2362 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2363 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2364 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2365 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2366 to control all matrix memory allocation. 2367 2368 Output Parameter: 2369 . A - the matrix 2370 2371 Notes: 2372 The dense format is fully compatible with standard Fortran 77 2373 storage by columns. 2374 Note that, although local portions of the matrix are stored in column-major 2375 order, the matrix is partitioned across MPI ranks by row. 2376 2377 The data input variable is intended primarily for Fortran programmers 2378 who wish to allocate their own matrix memory space. Most users should 2379 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2380 2381 The user MUST specify either the local or global matrix dimensions 2382 (possibly both). 2383 2384 Level: intermediate 2385 2386 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2387 @*/ 2388 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2389 { 2390 PetscFunctionBegin; 2391 PetscCall(MatCreate(comm,A)); 2392 PetscCall(MatSetSizes(*A,m,n,M,N)); 2393 PetscCall(MatSetType(*A,MATDENSE)); 2394 PetscCall(MatSeqDenseSetPreallocation(*A,data)); 2395 PetscCall(MatMPIDenseSetPreallocation(*A,data)); 2396 PetscFunctionReturn(0); 2397 } 2398 2399 #if defined(PETSC_HAVE_CUDA) 2400 /*@C 2401 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2402 2403 Collective 2404 2405 Input Parameters: 2406 + comm - MPI communicator 2407 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2408 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2409 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2410 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2411 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2412 to control matrix memory allocation. 2413 2414 Output Parameter: 2415 . A - the matrix 2416 2417 Notes: 2418 2419 Level: intermediate 2420 2421 .seealso: `MatCreate()`, `MatCreateDense()` 2422 @*/ 2423 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2424 { 2425 PetscFunctionBegin; 2426 PetscCall(MatCreate(comm,A)); 2427 PetscValidLogicalCollectiveBool(*A,!!data,6); 2428 PetscCall(MatSetSizes(*A,m,n,M,N)); 2429 PetscCall(MatSetType(*A,MATDENSECUDA)); 2430 PetscCall(MatSeqDenseCUDASetPreallocation(*A,data)); 2431 PetscCall(MatMPIDenseCUDASetPreallocation(*A,data)); 2432 PetscFunctionReturn(0); 2433 } 2434 #endif 2435 2436 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2437 { 2438 Mat mat; 2439 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2440 2441 PetscFunctionBegin; 2442 *newmat = NULL; 2443 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat)); 2444 PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2445 PetscCall(MatSetType(mat,((PetscObject)A)->type_name)); 2446 a = (Mat_MPIDense*)mat->data; 2447 2448 mat->factortype = A->factortype; 2449 mat->assembled = PETSC_TRUE; 2450 mat->preallocated = PETSC_TRUE; 2451 2452 mat->insertmode = NOT_SET_VALUES; 2453 a->donotstash = oldmat->donotstash; 2454 2455 PetscCall(PetscLayoutReference(A->rmap,&mat->rmap)); 2456 PetscCall(PetscLayoutReference(A->cmap,&mat->cmap)); 2457 2458 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2459 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2460 2461 *newmat = mat; 2462 PetscFunctionReturn(0); 2463 } 2464 2465 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2466 { 2467 PetscBool isbinary; 2468 #if defined(PETSC_HAVE_HDF5) 2469 PetscBool ishdf5; 2470 #endif 2471 2472 PetscFunctionBegin; 2473 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2474 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2475 /* force binary viewer to load .info file if it has not yet done so */ 2476 PetscCall(PetscViewerSetUp(viewer)); 2477 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2478 #if defined(PETSC_HAVE_HDF5) 2479 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2480 #endif 2481 if (isbinary) { 2482 PetscCall(MatLoad_Dense_Binary(newMat,viewer)); 2483 #if defined(PETSC_HAVE_HDF5) 2484 } else if (ishdf5) { 2485 PetscCall(MatLoad_Dense_HDF5(newMat,viewer)); 2486 #endif 2487 } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2488 PetscFunctionReturn(0); 2489 } 2490 2491 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2492 { 2493 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2494 Mat a,b; 2495 2496 PetscFunctionBegin; 2497 a = matA->A; 2498 b = matB->A; 2499 PetscCall(MatEqual(a,b,flag)); 2500 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2501 PetscFunctionReturn(0); 2502 } 2503 2504 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2505 { 2506 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2507 2508 PetscFunctionBegin; 2509 PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts)); 2510 PetscCall(MatDestroy(&atb->atb)); 2511 PetscCall(PetscFree(atb)); 2512 PetscFunctionReturn(0); 2513 } 2514 2515 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2516 { 2517 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2518 2519 PetscFunctionBegin; 2520 PetscCall(PetscFree2(abt->buf[0],abt->buf[1])); 2521 PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls)); 2522 PetscCall(PetscFree(abt)); 2523 PetscFunctionReturn(0); 2524 } 2525 2526 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2527 { 2528 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2529 Mat_TransMatMultDense *atb; 2530 MPI_Comm comm; 2531 PetscMPIInt size,*recvcounts; 2532 PetscScalar *carray,*sendbuf; 2533 const PetscScalar *atbarray; 2534 PetscInt i,cN=C->cmap->N,proc,k,j,lda; 2535 const PetscInt *ranges; 2536 2537 PetscFunctionBegin; 2538 MatCheckProduct(C,3); 2539 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2540 atb = (Mat_TransMatMultDense *)C->product->data; 2541 recvcounts = atb->recvcounts; 2542 sendbuf = atb->sendbuf; 2543 2544 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2545 PetscCallMPI(MPI_Comm_size(comm,&size)); 2546 2547 /* compute atbarray = aseq^T * bseq */ 2548 PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb)); 2549 2550 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2551 2552 /* arrange atbarray into sendbuf */ 2553 PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray)); 2554 PetscCall(MatDenseGetLDA(atb->atb,&lda)); 2555 for (proc=0, k=0; proc<size; proc++) { 2556 for (j=0; j<cN; j++) { 2557 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda]; 2558 } 2559 } 2560 PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray)); 2561 2562 /* sum all atbarray to local values of C */ 2563 PetscCall(MatDenseGetArrayWrite(c->A,&carray)); 2564 PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm)); 2565 PetscCall(MatDenseRestoreArrayWrite(c->A,&carray)); 2566 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2567 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2568 PetscFunctionReturn(0); 2569 } 2570 2571 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2572 { 2573 MPI_Comm comm; 2574 PetscMPIInt size; 2575 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2576 Mat_TransMatMultDense *atb; 2577 PetscBool cisdense; 2578 PetscInt i; 2579 const PetscInt *ranges; 2580 2581 PetscFunctionBegin; 2582 MatCheckProduct(C,3); 2583 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2584 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2585 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2586 SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2587 } 2588 2589 /* create matrix product C */ 2590 PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N)); 2591 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 2592 if (!cisdense) { 2593 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 2594 } 2595 PetscCall(MatSetUp(C)); 2596 2597 /* create data structure for reuse C */ 2598 PetscCallMPI(MPI_Comm_size(comm,&size)); 2599 PetscCall(PetscNew(&atb)); 2600 cM = C->rmap->N; 2601 PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts)); 2602 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2603 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2604 2605 C->product->data = atb; 2606 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2607 PetscFunctionReturn(0); 2608 } 2609 2610 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2611 { 2612 MPI_Comm comm; 2613 PetscMPIInt i, size; 2614 PetscInt maxRows, bufsiz; 2615 PetscMPIInt tag; 2616 PetscInt alg; 2617 Mat_MatTransMultDense *abt; 2618 Mat_Product *product = C->product; 2619 PetscBool flg; 2620 2621 PetscFunctionBegin; 2622 MatCheckProduct(C,4); 2623 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2624 /* check local size of A and B */ 2625 PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n); 2626 2627 PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg)); 2628 alg = flg ? 0 : 1; 2629 2630 /* setup matrix product C */ 2631 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N)); 2632 PetscCall(MatSetType(C,MATMPIDENSE)); 2633 PetscCall(MatSetUp(C)); 2634 PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag)); 2635 2636 /* create data structure for reuse C */ 2637 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2638 PetscCallMPI(MPI_Comm_size(comm,&size)); 2639 PetscCall(PetscNew(&abt)); 2640 abt->tag = tag; 2641 abt->alg = alg; 2642 switch (alg) { 2643 case 1: /* alg: "cyclic" */ 2644 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2645 bufsiz = A->cmap->N * maxRows; 2646 PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]))); 2647 break; 2648 default: /* alg: "allgatherv" */ 2649 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2650 PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls))); 2651 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2652 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2653 break; 2654 } 2655 2656 C->product->data = abt; 2657 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2658 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2659 PetscFunctionReturn(0); 2660 } 2661 2662 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2663 { 2664 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2665 Mat_MatTransMultDense *abt; 2666 MPI_Comm comm; 2667 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2668 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2669 PetscInt i,cK=A->cmap->N,k,j,bn; 2670 PetscScalar _DOne=1.0,_DZero=0.0; 2671 const PetscScalar *av,*bv; 2672 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2673 MPI_Request reqs[2]; 2674 const PetscInt *ranges; 2675 2676 PetscFunctionBegin; 2677 MatCheckProduct(C,3); 2678 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2679 abt = (Mat_MatTransMultDense*)C->product->data; 2680 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2681 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2682 PetscCallMPI(MPI_Comm_size(comm,&size)); 2683 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2684 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2685 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2686 PetscCall(MatDenseGetLDA(a->A,&i)); 2687 PetscCall(PetscBLASIntCast(i,&alda)); 2688 PetscCall(MatDenseGetLDA(b->A,&i)); 2689 PetscCall(PetscBLASIntCast(i,&blda)); 2690 PetscCall(MatDenseGetLDA(c->A,&i)); 2691 PetscCall(PetscBLASIntCast(i,&clda)); 2692 PetscCall(MatGetOwnershipRanges(B,&ranges)); 2693 bn = B->rmap->n; 2694 if (blda == bn) { 2695 sendbuf = (PetscScalar*)bv; 2696 } else { 2697 sendbuf = abt->buf[0]; 2698 for (k = 0, i = 0; i < cK; i++) { 2699 for (j = 0; j < bn; j++, k++) { 2700 sendbuf[k] = bv[i * blda + j]; 2701 } 2702 } 2703 } 2704 if (size > 1) { 2705 sendto = (rank + size - 1) % size; 2706 recvfrom = (rank + size + 1) % size; 2707 } else { 2708 sendto = recvfrom = 0; 2709 } 2710 PetscCall(PetscBLASIntCast(cK,&ck)); 2711 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2712 recvisfrom = rank; 2713 for (i = 0; i < size; i++) { 2714 /* we have finished receiving in sending, bufs can be read/modified */ 2715 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2716 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2717 2718 if (nextrecvisfrom != rank) { 2719 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2720 sendsiz = cK * bn; 2721 recvsiz = cK * nextbn; 2722 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2723 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2724 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2725 } 2726 2727 /* local aseq * sendbuf^T */ 2728 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2729 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2730 2731 if (nextrecvisfrom != rank) { 2732 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2733 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2734 } 2735 bn = nextbn; 2736 recvisfrom = nextrecvisfrom; 2737 sendbuf = recvbuf; 2738 } 2739 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2740 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2741 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2742 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2743 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2744 PetscFunctionReturn(0); 2745 } 2746 2747 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2748 { 2749 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2750 Mat_MatTransMultDense *abt; 2751 MPI_Comm comm; 2752 PetscMPIInt size; 2753 PetscScalar *cv, *sendbuf, *recvbuf; 2754 const PetscScalar *av,*bv; 2755 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2756 PetscScalar _DOne=1.0,_DZero=0.0; 2757 PetscBLASInt cm, cn, ck, alda, clda; 2758 2759 PetscFunctionBegin; 2760 MatCheckProduct(C,3); 2761 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2762 abt = (Mat_MatTransMultDense*)C->product->data; 2763 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2764 PetscCallMPI(MPI_Comm_size(comm,&size)); 2765 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2766 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2767 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2768 PetscCall(MatDenseGetLDA(a->A,&i)); 2769 PetscCall(PetscBLASIntCast(i,&alda)); 2770 PetscCall(MatDenseGetLDA(b->A,&blda)); 2771 PetscCall(MatDenseGetLDA(c->A,&i)); 2772 PetscCall(PetscBLASIntCast(i,&clda)); 2773 /* copy transpose of B into buf[0] */ 2774 bn = B->rmap->n; 2775 sendbuf = abt->buf[0]; 2776 recvbuf = abt->buf[1]; 2777 for (k = 0, j = 0; j < bn; j++) { 2778 for (i = 0; i < cK; i++, k++) { 2779 sendbuf[k] = bv[i * blda + j]; 2780 } 2781 } 2782 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2783 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2784 PetscCall(PetscBLASIntCast(cK,&ck)); 2785 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2786 PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn)); 2787 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2788 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2789 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2790 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2791 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2792 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2793 PetscFunctionReturn(0); 2794 } 2795 2796 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2797 { 2798 Mat_MatTransMultDense *abt; 2799 2800 PetscFunctionBegin; 2801 MatCheckProduct(C,3); 2802 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2803 abt = (Mat_MatTransMultDense*)C->product->data; 2804 switch (abt->alg) { 2805 case 1: 2806 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2807 break; 2808 default: 2809 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2810 break; 2811 } 2812 PetscFunctionReturn(0); 2813 } 2814 2815 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2816 { 2817 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2818 2819 PetscFunctionBegin; 2820 PetscCall(MatDestroy(&ab->Ce)); 2821 PetscCall(MatDestroy(&ab->Ae)); 2822 PetscCall(MatDestroy(&ab->Be)); 2823 PetscCall(PetscFree(ab)); 2824 PetscFunctionReturn(0); 2825 } 2826 2827 #if defined(PETSC_HAVE_ELEMENTAL) 2828 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2829 { 2830 Mat_MatMultDense *ab; 2831 2832 PetscFunctionBegin; 2833 MatCheckProduct(C,3); 2834 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2835 ab = (Mat_MatMultDense*)C->product->data; 2836 PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae)); 2837 PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be)); 2838 PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce)); 2839 PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C)); 2840 PetscFunctionReturn(0); 2841 } 2842 2843 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2844 { 2845 Mat Ae,Be,Ce; 2846 Mat_MatMultDense *ab; 2847 2848 PetscFunctionBegin; 2849 MatCheckProduct(C,4); 2850 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2851 /* check local size of A and B */ 2852 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2853 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2854 } 2855 2856 /* create elemental matrices Ae and Be */ 2857 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2858 PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 2859 PetscCall(MatSetType(Ae,MATELEMENTAL)); 2860 PetscCall(MatSetUp(Ae)); 2861 PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE)); 2862 2863 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2864 PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N)); 2865 PetscCall(MatSetType(Be,MATELEMENTAL)); 2866 PetscCall(MatSetUp(Be)); 2867 PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE)); 2868 2869 /* compute symbolic Ce = Ae*Be */ 2870 PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce)); 2871 PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce)); 2872 2873 /* setup C */ 2874 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 2875 PetscCall(MatSetType(C,MATDENSE)); 2876 PetscCall(MatSetUp(C)); 2877 2878 /* create data structure for reuse Cdense */ 2879 PetscCall(PetscNew(&ab)); 2880 ab->Ae = Ae; 2881 ab->Be = Be; 2882 ab->Ce = Ce; 2883 2884 C->product->data = ab; 2885 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2886 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2887 PetscFunctionReturn(0); 2888 } 2889 #endif 2890 /* ----------------------------------------------- */ 2891 #if defined(PETSC_HAVE_ELEMENTAL) 2892 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2893 { 2894 PetscFunctionBegin; 2895 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2896 C->ops->productsymbolic = MatProductSymbolic_AB; 2897 PetscFunctionReturn(0); 2898 } 2899 #endif 2900 2901 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2902 { 2903 Mat_Product *product = C->product; 2904 Mat A = product->A,B=product->B; 2905 2906 PetscFunctionBegin; 2907 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2908 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2909 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2910 C->ops->productsymbolic = MatProductSymbolic_AtB; 2911 PetscFunctionReturn(0); 2912 } 2913 2914 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2915 { 2916 Mat_Product *product = C->product; 2917 const char *algTypes[2] = {"allgatherv","cyclic"}; 2918 PetscInt alg,nalg = 2; 2919 PetscBool flg = PETSC_FALSE; 2920 2921 PetscFunctionBegin; 2922 /* Set default algorithm */ 2923 alg = 0; /* default is allgatherv */ 2924 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2925 if (flg) { 2926 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2927 } 2928 2929 /* Get runtime option */ 2930 if (product->api_user) { 2931 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2932 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2933 PetscOptionsEnd(); 2934 } else { 2935 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2936 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2937 PetscOptionsEnd(); 2938 } 2939 if (flg) { 2940 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2941 } 2942 2943 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2944 C->ops->productsymbolic = MatProductSymbolic_ABt; 2945 PetscFunctionReturn(0); 2946 } 2947 2948 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2949 { 2950 Mat_Product *product = C->product; 2951 2952 PetscFunctionBegin; 2953 switch (product->type) { 2954 #if defined(PETSC_HAVE_ELEMENTAL) 2955 case MATPRODUCT_AB: 2956 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2957 break; 2958 #endif 2959 case MATPRODUCT_AtB: 2960 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2961 break; 2962 case MATPRODUCT_ABt: 2963 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2964 break; 2965 default: 2966 break; 2967 } 2968 PetscFunctionReturn(0); 2969 } 2970