1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 31 PetscValidPointer(B,2); 32 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg)); 33 if (flg) *B = mat->A; 34 else { 35 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg)); 36 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 37 *B = A; 38 } 39 PetscFunctionReturn(0); 40 } 41 42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 43 { 44 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 45 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 46 47 PetscFunctionBegin; 48 PetscCall(MatCopy(Amat->A,Bmat->A,s)); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatShift_MPIDense(Mat A,PetscScalar alpha) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscInt j,lda,rstart = A->rmap->rstart,rend = A->rmap->rend,rend2; 56 PetscScalar *v; 57 58 PetscFunctionBegin; 59 PetscCall(MatDenseGetArray(mat->A,&v)); 60 PetscCall(MatDenseGetLDA(mat->A,&lda)); 61 rend2 = PetscMin(rend,A->cmap->N); 62 if (rend2>rstart) { 63 for (j=rstart; j<rend2; j++) v[j-rstart+j*lda] += alpha; 64 PetscCall(PetscLogFlops(rend2-rstart)); 65 } 66 PetscCall(MatDenseRestoreArray(mat->A,&v)); 67 PetscFunctionReturn(0); 68 } 69 70 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 71 { 72 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 73 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 74 75 PetscFunctionBegin; 76 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 77 lrow = row - rstart; 78 PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 79 PetscFunctionReturn(0); 80 } 81 82 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 83 { 84 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 85 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 86 87 PetscFunctionBegin; 88 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 89 lrow = row - rstart; 90 PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 91 PetscFunctionReturn(0); 92 } 93 94 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 95 { 96 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 97 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 98 PetscScalar *array; 99 MPI_Comm comm; 100 PetscBool flg; 101 Mat B; 102 103 PetscFunctionBegin; 104 PetscCall(MatHasCongruentLayouts(A,&flg)); 105 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 106 PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); 107 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 108 109 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg)); 110 PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 111 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm)); 112 PetscCall(MatCreate(comm,&B)); 113 PetscCall(MatSetSizes(B,m,m,m,m)); 114 PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name)); 115 PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array)); 116 PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart)); 117 PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array)); 118 PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B)); 119 *a = B; 120 PetscCall(MatDestroy(&B)); 121 } else *a = B; 122 PetscFunctionReturn(0); 123 } 124 125 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 126 { 127 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 128 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 129 PetscBool roworiented = A->roworiented; 130 131 PetscFunctionBegin; 132 for (i=0; i<m; i++) { 133 if (idxm[i] < 0) continue; 134 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 135 if (idxm[i] >= rstart && idxm[i] < rend) { 136 row = idxm[i] - rstart; 137 if (roworiented) { 138 PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv)); 139 } else { 140 for (j=0; j<n; j++) { 141 if (idxn[j] < 0) continue; 142 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 143 PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv)); 144 } 145 } 146 } else if (!A->donotstash) { 147 mat->assembled = PETSC_FALSE; 148 if (roworiented) { 149 PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE)); 150 } else { 151 PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE)); 152 } 153 } 154 } 155 PetscFunctionReturn(0); 156 } 157 158 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 159 { 160 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 161 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 162 163 PetscFunctionBegin; 164 for (i=0; i<m; i++) { 165 if (idxm[i] < 0) continue; /* negative row */ 166 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 167 if (idxm[i] >= rstart && idxm[i] < rend) { 168 row = idxm[i] - rstart; 169 for (j=0; j<n; j++) { 170 if (idxn[j] < 0) continue; /* negative column */ 171 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 172 PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j)); 173 } 174 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 175 } 176 PetscFunctionReturn(0); 177 } 178 179 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 180 { 181 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 182 183 PetscFunctionBegin; 184 PetscCall(MatDenseGetLDA(a->A,lda)); 185 PetscFunctionReturn(0); 186 } 187 188 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 189 { 190 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 191 PetscBool iscuda; 192 193 PetscFunctionBegin; 194 if (!a->A) { 195 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 196 PetscCall(PetscLayoutSetUp(A->rmap)); 197 PetscCall(PetscLayoutSetUp(A->cmap)); 198 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 199 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A)); 200 PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 201 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 202 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 203 } 204 PetscCall(MatDenseSetLDA(a->A,lda)); 205 PetscFunctionReturn(0); 206 } 207 208 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 209 { 210 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211 212 PetscFunctionBegin; 213 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 214 PetscCall(MatDenseGetArray(a->A,array)); 215 PetscFunctionReturn(0); 216 } 217 218 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 219 { 220 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 221 222 PetscFunctionBegin; 223 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 224 PetscCall(MatDenseGetArrayRead(a->A,array)); 225 PetscFunctionReturn(0); 226 } 227 228 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 229 { 230 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 231 232 PetscFunctionBegin; 233 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 234 PetscCall(MatDenseGetArrayWrite(a->A,array)); 235 PetscFunctionReturn(0); 236 } 237 238 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 239 { 240 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 241 242 PetscFunctionBegin; 243 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 244 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 245 PetscCall(MatDensePlaceArray(a->A,array)); 246 PetscFunctionReturn(0); 247 } 248 249 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 250 { 251 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 252 253 PetscFunctionBegin; 254 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 255 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 256 PetscCall(MatDenseResetArray(a->A)); 257 PetscFunctionReturn(0); 258 } 259 260 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 261 { 262 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 263 264 PetscFunctionBegin; 265 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 266 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 267 PetscCall(MatDenseReplaceArray(a->A,array)); 268 PetscFunctionReturn(0); 269 } 270 271 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 272 { 273 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 274 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 275 const PetscInt *irow,*icol; 276 const PetscScalar *v; 277 PetscScalar *bv; 278 Mat newmat; 279 IS iscol_local; 280 MPI_Comm comm_is,comm_mat; 281 282 PetscFunctionBegin; 283 PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat)); 284 PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is)); 285 PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 286 287 PetscCall(ISAllGather(iscol,&iscol_local)); 288 PetscCall(ISGetIndices(isrow,&irow)); 289 PetscCall(ISGetIndices(iscol_local,&icol)); 290 PetscCall(ISGetLocalSize(isrow,&nrows)); 291 PetscCall(ISGetLocalSize(iscol,&ncols)); 292 PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */ 293 294 /* No parallel redistribution currently supported! Should really check each index set 295 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 296 original matrix! */ 297 298 PetscCall(MatGetLocalSize(A,&nlrows,&nlcols)); 299 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 300 301 /* Check submatrix call */ 302 if (scall == MAT_REUSE_MATRIX) { 303 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 304 /* Really need to test rows and column sizes! */ 305 newmat = *B; 306 } else { 307 /* Create and fill new matrix */ 308 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat)); 309 PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols)); 310 PetscCall(MatSetType(newmat,((PetscObject)A)->type_name)); 311 PetscCall(MatMPIDenseSetPreallocation(newmat,NULL)); 312 } 313 314 /* Now extract the data pointers and do the copy, column at a time */ 315 newmatd = (Mat_MPIDense*)newmat->data; 316 PetscCall(MatDenseGetArray(newmatd->A,&bv)); 317 PetscCall(MatDenseGetArrayRead(mat->A,&v)); 318 PetscCall(MatDenseGetLDA(mat->A,&lda)); 319 for (i=0; i<Ncols; i++) { 320 const PetscScalar *av = v + lda*icol[i]; 321 for (j=0; j<nrows; j++) { 322 *bv++ = av[irow[j] - rstart]; 323 } 324 } 325 PetscCall(MatDenseRestoreArrayRead(mat->A,&v)); 326 PetscCall(MatDenseRestoreArray(newmatd->A,&bv)); 327 328 /* Assemble the matrices so that the correct flags are set */ 329 PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY)); 330 PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY)); 331 332 /* Free work space */ 333 PetscCall(ISRestoreIndices(isrow,&irow)); 334 PetscCall(ISRestoreIndices(iscol_local,&icol)); 335 PetscCall(ISDestroy(&iscol_local)); 336 *B = newmat; 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 344 PetscFunctionBegin; 345 PetscCall(MatDenseRestoreArray(a->A,array)); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 350 { 351 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 352 353 PetscFunctionBegin; 354 PetscCall(MatDenseRestoreArrayRead(a->A,array)); 355 PetscFunctionReturn(0); 356 } 357 358 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 359 { 360 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 361 362 PetscFunctionBegin; 363 PetscCall(MatDenseRestoreArrayWrite(a->A,array)); 364 PetscFunctionReturn(0); 365 } 366 367 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 368 { 369 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 370 PetscInt nstash,reallocs; 371 372 PetscFunctionBegin; 373 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 374 375 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 376 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 377 PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 378 PetscFunctionReturn(0); 379 } 380 381 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 382 { 383 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 384 PetscInt i,*row,*col,flg,j,rstart,ncols; 385 PetscMPIInt n; 386 PetscScalar *val; 387 388 PetscFunctionBegin; 389 if (!mdn->donotstash && !mat->nooffprocentries) { 390 /* wait on receives */ 391 while (1) { 392 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 393 if (!flg) break; 394 395 for (i=0; i<n;) { 396 /* Now identify the consecutive vals belonging to the same row */ 397 for (j=i,rstart=row[j]; j<n; j++) { 398 if (row[j] != rstart) break; 399 } 400 if (j < n) ncols = j-i; 401 else ncols = n-i; 402 /* Now assemble all these values with a single function call */ 403 PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 404 i = j; 405 } 406 } 407 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 408 } 409 410 PetscCall(MatAssemblyBegin(mdn->A,mode)); 411 PetscCall(MatAssemblyEnd(mdn->A,mode)); 412 PetscFunctionReturn(0); 413 } 414 415 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 416 { 417 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 418 419 PetscFunctionBegin; 420 PetscCall(MatZeroEntries(l->A)); 421 PetscFunctionReturn(0); 422 } 423 424 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 425 { 426 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 427 PetscInt i,len,*lrows; 428 429 PetscFunctionBegin; 430 /* get locally owned rows */ 431 PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL)); 432 /* fix right hand side if needed */ 433 if (x && b) { 434 const PetscScalar *xx; 435 PetscScalar *bb; 436 437 PetscCall(VecGetArrayRead(x, &xx)); 438 PetscCall(VecGetArrayWrite(b, &bb)); 439 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 440 PetscCall(VecRestoreArrayRead(x, &xx)); 441 PetscCall(VecRestoreArrayWrite(b, &bb)); 442 } 443 PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL)); 444 if (diag != 0.0) { 445 Vec d; 446 447 PetscCall(MatCreateVecs(A,NULL,&d)); 448 PetscCall(VecSet(d,diag)); 449 PetscCall(MatDiagonalSet(A,d,INSERT_VALUES)); 450 PetscCall(VecDestroy(&d)); 451 } 452 PetscCall(PetscFree(lrows)); 453 PetscFunctionReturn(0); 454 } 455 456 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 457 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 458 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 459 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 460 461 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 const PetscScalar *ax; 465 PetscScalar *ay; 466 PetscMemType axmtype,aymtype; 467 468 PetscFunctionBegin; 469 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 470 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 471 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 472 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 473 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 474 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 475 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 476 PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy)); 477 PetscFunctionReturn(0); 478 } 479 480 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 481 { 482 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 483 const PetscScalar *ax; 484 PetscScalar *ay; 485 PetscMemType axmtype,aymtype; 486 487 PetscFunctionBegin; 488 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 489 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 490 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 491 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 492 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 493 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 494 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 495 PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz)); 496 PetscFunctionReturn(0); 497 } 498 499 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 500 { 501 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 502 const PetscScalar *ax; 503 PetscScalar *ay; 504 PetscMemType axmtype,aymtype; 505 506 PetscFunctionBegin; 507 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 508 PetscCall(VecSet(yy,0.0)); 509 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 510 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 511 PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype)); 512 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 513 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 514 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 515 PetscCall(VecRestoreArrayAndMemType(yy,&ay)); 516 PetscFunctionReturn(0); 517 } 518 519 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 520 { 521 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 522 const PetscScalar *ax; 523 PetscScalar *ay; 524 PetscMemType axmtype,aymtype; 525 526 PetscFunctionBegin; 527 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 528 PetscCall(VecCopy(yy,zz)); 529 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 530 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 531 PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype)); 532 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 533 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 534 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 535 PetscCall(VecRestoreArrayAndMemType(zz,&ay)); 536 PetscFunctionReturn(0); 537 } 538 539 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 540 { 541 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 542 PetscInt lda,len,i,n,m = A->rmap->n,radd; 543 PetscScalar *x,zero = 0.0; 544 const PetscScalar *av; 545 546 PetscFunctionBegin; 547 PetscCall(VecSet(v,zero)); 548 PetscCall(VecGetArray(v,&x)); 549 PetscCall(VecGetSize(v,&n)); 550 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 551 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 552 radd = A->rmap->rstart*m; 553 PetscCall(MatDenseGetArrayRead(a->A,&av)); 554 PetscCall(MatDenseGetLDA(a->A,&lda)); 555 for (i=0; i<len; i++) { 556 x[i] = av[radd + i*lda + i]; 557 } 558 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 559 PetscCall(VecRestoreArray(v,&x)); 560 PetscFunctionReturn(0); 561 } 562 563 PetscErrorCode MatDestroy_MPIDense(Mat mat) 564 { 565 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 566 567 PetscFunctionBegin; 568 #if defined(PETSC_USE_LOG) 569 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 570 #endif 571 PetscCall(MatStashDestroy_Private(&mat->stash)); 572 PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 573 PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 574 PetscCall(MatDestroy(&mdn->A)); 575 PetscCall(VecDestroy(&mdn->lvec)); 576 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 577 PetscCall(VecDestroy(&mdn->cvec)); 578 PetscCall(MatDestroy(&mdn->cmat)); 579 580 PetscCall(PetscFree(mat->data)); 581 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 582 583 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL)); 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL)); 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL)); 596 #if defined(PETSC_HAVE_ELEMENTAL) 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL)); 598 #endif 599 #if defined(PETSC_HAVE_SCALAPACK) 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL)); 601 #endif 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL)); 603 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL)); 604 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL)); 605 #if defined (PETSC_HAVE_CUDA) 606 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL)); 607 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL)); 608 #endif 609 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL)); 610 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL)); 611 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL)); 612 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL)); 613 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL)); 614 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL)); 615 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL)); 616 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL)); 617 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL)); 618 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL)); 619 PetscFunctionReturn(0); 620 } 621 622 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 623 624 #include <petscdraw.h> 625 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 626 { 627 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 628 PetscMPIInt rank; 629 PetscViewerType vtype; 630 PetscBool iascii,isdraw; 631 PetscViewer sviewer; 632 PetscViewerFormat format; 633 634 PetscFunctionBegin; 635 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 636 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 637 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 638 if (iascii) { 639 PetscCall(PetscViewerGetType(viewer,&vtype)); 640 PetscCall(PetscViewerGetFormat(viewer,&format)); 641 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 642 MatInfo info; 643 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 644 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 645 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory)); 646 PetscCall(PetscViewerFlush(viewer)); 647 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 648 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer)); 649 PetscFunctionReturn(0); 650 } else if (format == PETSC_VIEWER_ASCII_INFO) { 651 PetscFunctionReturn(0); 652 } 653 } else if (isdraw) { 654 PetscDraw draw; 655 PetscBool isnull; 656 657 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 658 PetscCall(PetscDrawIsNull(draw,&isnull)); 659 if (isnull) PetscFunctionReturn(0); 660 } 661 662 { 663 /* assemble the entire matrix onto first processor. */ 664 Mat A; 665 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 666 PetscInt *cols; 667 PetscScalar *vals; 668 669 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 670 if (rank == 0) { 671 PetscCall(MatSetSizes(A,M,N,M,N)); 672 } else { 673 PetscCall(MatSetSizes(A,0,0,M,N)); 674 } 675 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 676 PetscCall(MatSetType(A,MATMPIDENSE)); 677 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 678 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 679 680 /* Copy the matrix ... This isn't the most efficient means, 681 but it's quick for now */ 682 A->insertmode = INSERT_VALUES; 683 684 row = mat->rmap->rstart; 685 m = mdn->A->rmap->n; 686 for (i=0; i<m; i++) { 687 PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals)); 688 PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES)); 689 PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals)); 690 row++; 691 } 692 693 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 694 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 695 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 696 if (rank == 0) { 697 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name)); 698 PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer)); 699 } 700 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 701 PetscCall(PetscViewerFlush(viewer)); 702 PetscCall(MatDestroy(&A)); 703 } 704 PetscFunctionReturn(0); 705 } 706 707 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 708 { 709 PetscBool iascii,isbinary,isdraw,issocket; 710 711 PetscFunctionBegin; 712 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 713 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 714 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 715 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 716 717 if (iascii || issocket || isdraw) { 718 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer)); 719 } else if (isbinary) { 720 PetscCall(MatView_Dense_Binary(mat,viewer)); 721 } 722 PetscFunctionReturn(0); 723 } 724 725 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 726 { 727 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 728 Mat mdn = mat->A; 729 PetscLogDouble isend[5],irecv[5]; 730 731 PetscFunctionBegin; 732 info->block_size = 1.0; 733 734 PetscCall(MatGetInfo(mdn,MAT_LOCAL,info)); 735 736 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 737 isend[3] = info->memory; isend[4] = info->mallocs; 738 if (flag == MAT_LOCAL) { 739 info->nz_used = isend[0]; 740 info->nz_allocated = isend[1]; 741 info->nz_unneeded = isend[2]; 742 info->memory = isend[3]; 743 info->mallocs = isend[4]; 744 } else if (flag == MAT_GLOBAL_MAX) { 745 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A))); 746 747 info->nz_used = irecv[0]; 748 info->nz_allocated = irecv[1]; 749 info->nz_unneeded = irecv[2]; 750 info->memory = irecv[3]; 751 info->mallocs = irecv[4]; 752 } else if (flag == MAT_GLOBAL_SUM) { 753 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A))); 754 755 info->nz_used = irecv[0]; 756 info->nz_allocated = irecv[1]; 757 info->nz_unneeded = irecv[2]; 758 info->memory = irecv[3]; 759 info->mallocs = irecv[4]; 760 } 761 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 762 info->fill_ratio_needed = 0; 763 info->factor_mallocs = 0; 764 PetscFunctionReturn(0); 765 } 766 767 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 768 { 769 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 770 771 PetscFunctionBegin; 772 switch (op) { 773 case MAT_NEW_NONZERO_LOCATIONS: 774 case MAT_NEW_NONZERO_LOCATION_ERR: 775 case MAT_NEW_NONZERO_ALLOCATION_ERR: 776 MatCheckPreallocated(A,1); 777 PetscCall(MatSetOption(a->A,op,flg)); 778 break; 779 case MAT_ROW_ORIENTED: 780 MatCheckPreallocated(A,1); 781 a->roworiented = flg; 782 PetscCall(MatSetOption(a->A,op,flg)); 783 break; 784 case MAT_FORCE_DIAGONAL_ENTRIES: 785 case MAT_KEEP_NONZERO_PATTERN: 786 case MAT_USE_HASH_TABLE: 787 case MAT_SORTED_FULL: 788 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 789 break; 790 case MAT_IGNORE_OFF_PROC_ENTRIES: 791 a->donotstash = flg; 792 break; 793 case MAT_SYMMETRIC: 794 case MAT_STRUCTURALLY_SYMMETRIC: 795 case MAT_HERMITIAN: 796 case MAT_SYMMETRY_ETERNAL: 797 case MAT_IGNORE_LOWER_TRIANGULAR: 798 case MAT_IGNORE_ZERO_ENTRIES: 799 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 800 break; 801 default: 802 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 803 } 804 PetscFunctionReturn(0); 805 } 806 807 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 808 { 809 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 810 const PetscScalar *l; 811 PetscScalar x,*v,*vv,*r; 812 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 813 814 PetscFunctionBegin; 815 PetscCall(MatDenseGetArray(mdn->A,&vv)); 816 PetscCall(MatDenseGetLDA(mdn->A,&lda)); 817 PetscCall(MatGetLocalSize(A,&s2,&s3)); 818 if (ll) { 819 PetscCall(VecGetLocalSize(ll,&s2a)); 820 PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 821 PetscCall(VecGetArrayRead(ll,&l)); 822 for (i=0; i<m; i++) { 823 x = l[i]; 824 v = vv + i; 825 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 826 } 827 PetscCall(VecRestoreArrayRead(ll,&l)); 828 PetscCall(PetscLogFlops(1.0*n*m)); 829 } 830 if (rr) { 831 const PetscScalar *ar; 832 833 PetscCall(VecGetLocalSize(rr,&s3a)); 834 PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 835 PetscCall(VecGetArrayRead(rr,&ar)); 836 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 837 PetscCall(VecGetArray(mdn->lvec,&r)); 838 PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 839 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 840 PetscCall(VecRestoreArrayRead(rr,&ar)); 841 for (i=0; i<n; i++) { 842 x = r[i]; 843 v = vv + i*lda; 844 for (j=0; j<m; j++) (*v++) *= x; 845 } 846 PetscCall(VecRestoreArray(mdn->lvec,&r)); 847 PetscCall(PetscLogFlops(1.0*n*m)); 848 } 849 PetscCall(MatDenseRestoreArray(mdn->A,&vv)); 850 PetscFunctionReturn(0); 851 } 852 853 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 854 { 855 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 856 PetscInt i,j; 857 PetscMPIInt size; 858 PetscReal sum = 0.0; 859 const PetscScalar *av,*v; 860 861 PetscFunctionBegin; 862 PetscCall(MatDenseGetArrayRead(mdn->A,&av)); 863 v = av; 864 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 865 if (size == 1) { 866 PetscCall(MatNorm(mdn->A,type,nrm)); 867 } else { 868 if (type == NORM_FROBENIUS) { 869 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 870 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 871 } 872 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 873 *nrm = PetscSqrtReal(*nrm); 874 PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n)); 875 } else if (type == NORM_1) { 876 PetscReal *tmp,*tmp2; 877 PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2)); 878 *nrm = 0.0; 879 v = av; 880 for (j=0; j<mdn->A->cmap->n; j++) { 881 for (i=0; i<mdn->A->rmap->n; i++) { 882 tmp[j] += PetscAbsScalar(*v); v++; 883 } 884 } 885 PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 886 for (j=0; j<A->cmap->N; j++) { 887 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 888 } 889 PetscCall(PetscFree2(tmp,tmp2)); 890 PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n)); 891 } else if (type == NORM_INFINITY) { /* max row norm */ 892 PetscReal ntemp; 893 PetscCall(MatNorm(mdn->A,type,&ntemp)); 894 PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 895 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 896 } 897 PetscCall(MatDenseRestoreArrayRead(mdn->A,&av)); 898 PetscFunctionReturn(0); 899 } 900 901 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 902 { 903 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 904 Mat B; 905 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 906 PetscInt j,i,lda; 907 PetscScalar *v; 908 909 PetscFunctionBegin; 910 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 911 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 912 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 913 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 914 PetscCall(MatMPIDenseSetPreallocation(B,NULL)); 915 } else B = *matout; 916 917 m = a->A->rmap->n; n = a->A->cmap->n; 918 PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v)); 919 PetscCall(MatDenseGetLDA(a->A,&lda)); 920 PetscCall(PetscMalloc1(m,&rwork)); 921 for (i=0; i<m; i++) rwork[i] = rstart + i; 922 for (j=0; j<n; j++) { 923 PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES)); 924 v += lda; 925 } 926 PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v)); 927 PetscCall(PetscFree(rwork)); 928 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 929 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 930 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 931 *matout = B; 932 } else { 933 PetscCall(MatHeaderMerge(A,&B)); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 939 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 940 941 PetscErrorCode MatSetUp_MPIDense(Mat A) 942 { 943 PetscFunctionBegin; 944 PetscCall(PetscLayoutSetUp(A->rmap)); 945 PetscCall(PetscLayoutSetUp(A->cmap)); 946 if (!A->preallocated) { 947 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 948 } 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 953 { 954 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 955 956 PetscFunctionBegin; 957 PetscCall(MatAXPY(A->A,alpha,B->A,str)); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatConjugate_MPIDense(Mat mat) 962 { 963 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 964 965 PetscFunctionBegin; 966 PetscCall(MatConjugate(a->A)); 967 PetscFunctionReturn(0); 968 } 969 970 PetscErrorCode MatRealPart_MPIDense(Mat A) 971 { 972 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 973 974 PetscFunctionBegin; 975 PetscCall(MatRealPart(a->A)); 976 PetscFunctionReturn(0); 977 } 978 979 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 980 { 981 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 982 983 PetscFunctionBegin; 984 PetscCall(MatImaginaryPart(a->A)); 985 PetscFunctionReturn(0); 986 } 987 988 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 989 { 990 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 991 992 PetscFunctionBegin; 993 PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 994 PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 995 PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col)); 996 PetscFunctionReturn(0); 997 } 998 999 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*); 1000 1001 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions) 1002 { 1003 PetscInt i,m,n; 1004 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 1005 PetscReal *work; 1006 1007 PetscFunctionBegin; 1008 PetscCall(MatGetSize(A,&m,&n)); 1009 PetscCall(PetscMalloc1(n,&work)); 1010 if (type == REDUCTION_MEAN_REALPART) { 1011 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work)); 1012 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 1013 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work)); 1014 } else { 1015 PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work)); 1016 } 1017 if (type == NORM_2) { 1018 for (i=0; i<n; i++) work[i] *= work[i]; 1019 } 1020 if (type == NORM_INFINITY) { 1021 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm)); 1022 } else { 1023 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm)); 1024 } 1025 PetscCall(PetscFree(work)); 1026 if (type == NORM_2) { 1027 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1028 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1029 for (i=0; i<n; i++) reductions[i] /= m; 1030 } 1031 PetscFunctionReturn(0); 1032 } 1033 1034 #if defined(PETSC_HAVE_CUDA) 1035 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1036 { 1037 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1038 PetscInt lda; 1039 1040 PetscFunctionBegin; 1041 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1042 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1043 if (!a->cvec) { 1044 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1045 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1046 } 1047 a->vecinuse = col + 1; 1048 PetscCall(MatDenseGetLDA(a->A,&lda)); 1049 PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1050 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1051 *v = a->cvec; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1056 { 1057 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1058 1059 PetscFunctionBegin; 1060 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1061 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1062 a->vecinuse = 0; 1063 PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1064 PetscCall(VecCUDAResetArray(a->cvec)); 1065 *v = NULL; 1066 PetscFunctionReturn(0); 1067 } 1068 1069 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1070 { 1071 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1072 PetscInt lda; 1073 1074 PetscFunctionBegin; 1075 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1076 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1077 if (!a->cvec) { 1078 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1079 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1080 } 1081 a->vecinuse = col + 1; 1082 PetscCall(MatDenseGetLDA(a->A,&lda)); 1083 PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse)); 1084 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1085 PetscCall(VecLockReadPush(a->cvec)); 1086 *v = a->cvec; 1087 PetscFunctionReturn(0); 1088 } 1089 1090 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1091 { 1092 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1093 1094 PetscFunctionBegin; 1095 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1096 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1097 a->vecinuse = 0; 1098 PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse)); 1099 PetscCall(VecLockReadPop(a->cvec)); 1100 PetscCall(VecCUDAResetArray(a->cvec)); 1101 *v = NULL; 1102 PetscFunctionReturn(0); 1103 } 1104 1105 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1106 { 1107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1108 PetscInt lda; 1109 1110 PetscFunctionBegin; 1111 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1112 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1113 if (!a->cvec) { 1114 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1115 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1116 } 1117 a->vecinuse = col + 1; 1118 PetscCall(MatDenseGetLDA(a->A,&lda)); 1119 PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1120 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1121 *v = a->cvec; 1122 PetscFunctionReturn(0); 1123 } 1124 1125 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1126 { 1127 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1128 1129 PetscFunctionBegin; 1130 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1131 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1132 a->vecinuse = 0; 1133 PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1134 PetscCall(VecCUDAResetArray(a->cvec)); 1135 *v = NULL; 1136 PetscFunctionReturn(0); 1137 } 1138 1139 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1140 { 1141 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1142 1143 PetscFunctionBegin; 1144 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1145 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1146 PetscCall(MatDenseCUDAPlaceArray(l->A,a)); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1151 { 1152 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1153 1154 PetscFunctionBegin; 1155 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1156 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1157 PetscCall(MatDenseCUDAResetArray(l->A)); 1158 PetscFunctionReturn(0); 1159 } 1160 1161 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1162 { 1163 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1167 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1168 PetscCall(MatDenseCUDAReplaceArray(l->A,a)); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1173 { 1174 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCall(MatDenseCUDAGetArrayWrite(l->A,a)); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1182 { 1183 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1184 1185 PetscFunctionBegin; 1186 PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a)); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1191 { 1192 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1193 1194 PetscFunctionBegin; 1195 PetscCall(MatDenseCUDAGetArrayRead(l->A,a)); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1200 { 1201 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1202 1203 PetscFunctionBegin; 1204 PetscCall(MatDenseCUDARestoreArrayRead(l->A,a)); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1209 { 1210 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1211 1212 PetscFunctionBegin; 1213 PetscCall(MatDenseCUDAGetArray(l->A,a)); 1214 PetscFunctionReturn(0); 1215 } 1216 1217 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1218 { 1219 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1220 1221 PetscFunctionBegin; 1222 PetscCall(MatDenseCUDARestoreArray(l->A,a)); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1227 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1228 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1229 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1230 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1231 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1232 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1233 1234 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1235 { 1236 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1237 1238 PetscFunctionBegin; 1239 PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1240 PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1241 if (d->A) { 1242 PetscCall(MatBindToCPU(d->A,bind)); 1243 } 1244 mat->boundtocpu = bind; 1245 if (!bind) { 1246 PetscBool iscuda; 1247 1248 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda)); 1249 if (!iscuda) { 1250 PetscCall(VecDestroy(&d->cvec)); 1251 } 1252 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda)); 1253 if (!iscuda) { 1254 PetscCall(MatDestroy(&d->cmat)); 1255 } 1256 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA)); 1257 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA)); 1258 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA)); 1259 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1260 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1261 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1262 } else { 1263 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1264 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1265 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1266 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1267 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1268 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1269 } 1270 if (d->cmat) { 1271 PetscCall(MatBindToCPU(d->cmat,bind)); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1277 { 1278 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1279 PetscBool iscuda; 1280 1281 PetscFunctionBegin; 1282 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1283 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 1284 if (!iscuda) PetscFunctionReturn(0); 1285 PetscCall(PetscLayoutSetUp(A->rmap)); 1286 PetscCall(PetscLayoutSetUp(A->cmap)); 1287 if (!d->A) { 1288 PetscCall(MatCreate(PETSC_COMM_SELF,&d->A)); 1289 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A)); 1290 PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 1291 } 1292 PetscCall(MatSetType(d->A,MATSEQDENSECUDA)); 1293 PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data)); 1294 A->preallocated = PETSC_TRUE; 1295 A->assembled = PETSC_TRUE; 1296 PetscFunctionReturn(0); 1297 } 1298 #endif 1299 1300 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1301 { 1302 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1303 1304 PetscFunctionBegin; 1305 PetscCall(MatSetRandom(d->A,rctx)); 1306 PetscFunctionReturn(0); 1307 } 1308 1309 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1310 { 1311 PetscFunctionBegin; 1312 *missing = PETSC_FALSE; 1313 PetscFunctionReturn(0); 1314 } 1315 1316 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1317 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1318 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1319 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1320 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1321 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1322 1323 /* -------------------------------------------------------------------*/ 1324 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1325 MatGetRow_MPIDense, 1326 MatRestoreRow_MPIDense, 1327 MatMult_MPIDense, 1328 /* 4*/ MatMultAdd_MPIDense, 1329 MatMultTranspose_MPIDense, 1330 MatMultTransposeAdd_MPIDense, 1331 NULL, 1332 NULL, 1333 NULL, 1334 /* 10*/ NULL, 1335 NULL, 1336 NULL, 1337 NULL, 1338 MatTranspose_MPIDense, 1339 /* 15*/ MatGetInfo_MPIDense, 1340 MatEqual_MPIDense, 1341 MatGetDiagonal_MPIDense, 1342 MatDiagonalScale_MPIDense, 1343 MatNorm_MPIDense, 1344 /* 20*/ MatAssemblyBegin_MPIDense, 1345 MatAssemblyEnd_MPIDense, 1346 MatSetOption_MPIDense, 1347 MatZeroEntries_MPIDense, 1348 /* 24*/ MatZeroRows_MPIDense, 1349 NULL, 1350 NULL, 1351 NULL, 1352 NULL, 1353 /* 29*/ MatSetUp_MPIDense, 1354 NULL, 1355 NULL, 1356 MatGetDiagonalBlock_MPIDense, 1357 NULL, 1358 /* 34*/ MatDuplicate_MPIDense, 1359 NULL, 1360 NULL, 1361 NULL, 1362 NULL, 1363 /* 39*/ MatAXPY_MPIDense, 1364 MatCreateSubMatrices_MPIDense, 1365 NULL, 1366 MatGetValues_MPIDense, 1367 MatCopy_MPIDense, 1368 /* 44*/ NULL, 1369 MatScale_MPIDense, 1370 MatShift_MPIDense, 1371 NULL, 1372 NULL, 1373 /* 49*/ MatSetRandom_MPIDense, 1374 NULL, 1375 NULL, 1376 NULL, 1377 NULL, 1378 /* 54*/ NULL, 1379 NULL, 1380 NULL, 1381 NULL, 1382 NULL, 1383 /* 59*/ MatCreateSubMatrix_MPIDense, 1384 MatDestroy_MPIDense, 1385 MatView_MPIDense, 1386 NULL, 1387 NULL, 1388 /* 64*/ NULL, 1389 NULL, 1390 NULL, 1391 NULL, 1392 NULL, 1393 /* 69*/ NULL, 1394 NULL, 1395 NULL, 1396 NULL, 1397 NULL, 1398 /* 74*/ NULL, 1399 NULL, 1400 NULL, 1401 NULL, 1402 NULL, 1403 /* 79*/ NULL, 1404 NULL, 1405 NULL, 1406 NULL, 1407 /* 83*/ MatLoad_MPIDense, 1408 NULL, 1409 NULL, 1410 NULL, 1411 NULL, 1412 NULL, 1413 /* 89*/ NULL, 1414 NULL, 1415 NULL, 1416 NULL, 1417 NULL, 1418 /* 94*/ NULL, 1419 NULL, 1420 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1421 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1422 NULL, 1423 /* 99*/ MatProductSetFromOptions_MPIDense, 1424 NULL, 1425 NULL, 1426 MatConjugate_MPIDense, 1427 NULL, 1428 /*104*/ NULL, 1429 MatRealPart_MPIDense, 1430 MatImaginaryPart_MPIDense, 1431 NULL, 1432 NULL, 1433 /*109*/ NULL, 1434 NULL, 1435 NULL, 1436 MatGetColumnVector_MPIDense, 1437 MatMissingDiagonal_MPIDense, 1438 /*114*/ NULL, 1439 NULL, 1440 NULL, 1441 NULL, 1442 NULL, 1443 /*119*/ NULL, 1444 NULL, 1445 NULL, 1446 NULL, 1447 NULL, 1448 /*124*/ NULL, 1449 MatGetColumnReductions_MPIDense, 1450 NULL, 1451 NULL, 1452 NULL, 1453 /*129*/ NULL, 1454 NULL, 1455 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1456 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1457 NULL, 1458 /*134*/ NULL, 1459 NULL, 1460 NULL, 1461 NULL, 1462 NULL, 1463 /*139*/ NULL, 1464 NULL, 1465 NULL, 1466 NULL, 1467 NULL, 1468 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1469 /*145*/ NULL, 1470 NULL, 1471 NULL 1472 }; 1473 1474 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1475 { 1476 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1477 PetscBool iscuda; 1478 1479 PetscFunctionBegin; 1480 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1481 PetscCall(PetscLayoutSetUp(mat->rmap)); 1482 PetscCall(PetscLayoutSetUp(mat->cmap)); 1483 if (!a->A) { 1484 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 1485 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 1486 PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N)); 1487 } 1488 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda)); 1489 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1490 PetscCall(MatSeqDenseSetPreallocation(a->A,data)); 1491 mat->preallocated = PETSC_TRUE; 1492 mat->assembled = PETSC_TRUE; 1493 PetscFunctionReturn(0); 1494 } 1495 1496 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1497 { 1498 Mat B,C; 1499 1500 PetscFunctionBegin; 1501 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C)); 1502 PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B)); 1503 PetscCall(MatDestroy(&C)); 1504 if (reuse == MAT_REUSE_MATRIX) { 1505 C = *newmat; 1506 } else C = NULL; 1507 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1508 PetscCall(MatDestroy(&B)); 1509 if (reuse == MAT_INPLACE_MATRIX) { 1510 PetscCall(MatHeaderReplace(A,&C)); 1511 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1512 PetscFunctionReturn(0); 1513 } 1514 1515 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1516 { 1517 Mat B,C; 1518 1519 PetscFunctionBegin; 1520 PetscCall(MatDenseGetLocalMatrix(A,&C)); 1521 PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1522 if (reuse == MAT_REUSE_MATRIX) { 1523 C = *newmat; 1524 } else C = NULL; 1525 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1526 PetscCall(MatDestroy(&B)); 1527 if (reuse == MAT_INPLACE_MATRIX) { 1528 PetscCall(MatHeaderReplace(A,&C)); 1529 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1530 PetscFunctionReturn(0); 1531 } 1532 1533 #if defined(PETSC_HAVE_ELEMENTAL) 1534 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1535 { 1536 Mat mat_elemental; 1537 PetscScalar *v; 1538 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1539 1540 PetscFunctionBegin; 1541 if (reuse == MAT_REUSE_MATRIX) { 1542 mat_elemental = *newmat; 1543 PetscCall(MatZeroEntries(*newmat)); 1544 } else { 1545 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1546 PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 1547 PetscCall(MatSetType(mat_elemental,MATELEMENTAL)); 1548 PetscCall(MatSetUp(mat_elemental)); 1549 PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE)); 1550 } 1551 1552 PetscCall(PetscMalloc2(m,&rows,N,&cols)); 1553 for (i=0; i<N; i++) cols[i] = i; 1554 for (i=0; i<m; i++) rows[i] = rstart + i; 1555 1556 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1557 PetscCall(MatDenseGetArray(A,&v)); 1558 PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES)); 1559 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1560 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1561 PetscCall(MatDenseRestoreArray(A,&v)); 1562 PetscCall(PetscFree2(rows,cols)); 1563 1564 if (reuse == MAT_INPLACE_MATRIX) { 1565 PetscCall(MatHeaderReplace(A,&mat_elemental)); 1566 } else { 1567 *newmat = mat_elemental; 1568 } 1569 PetscFunctionReturn(0); 1570 } 1571 #endif 1572 1573 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1574 { 1575 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1576 1577 PetscFunctionBegin; 1578 PetscCall(MatDenseGetColumn(mat->A,col,vals)); 1579 PetscFunctionReturn(0); 1580 } 1581 1582 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1583 { 1584 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1585 1586 PetscFunctionBegin; 1587 PetscCall(MatDenseRestoreColumn(mat->A,vals)); 1588 PetscFunctionReturn(0); 1589 } 1590 1591 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1592 { 1593 Mat_MPIDense *mat; 1594 PetscInt m,nloc,N; 1595 1596 PetscFunctionBegin; 1597 PetscCall(MatGetSize(inmat,&m,&N)); 1598 PetscCall(MatGetLocalSize(inmat,NULL,&nloc)); 1599 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1600 PetscInt sum; 1601 1602 if (n == PETSC_DECIDE) { 1603 PetscCall(PetscSplitOwnership(comm,&n,&N)); 1604 } 1605 /* Check sum(n) = N */ 1606 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 1607 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 1608 1609 PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat)); 1610 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 1611 } 1612 1613 /* numeric phase */ 1614 mat = (Mat_MPIDense*)(*outmat)->data; 1615 PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN)); 1616 PetscFunctionReturn(0); 1617 } 1618 1619 #if defined(PETSC_HAVE_CUDA) 1620 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1621 { 1622 Mat B; 1623 Mat_MPIDense *m; 1624 1625 PetscFunctionBegin; 1626 if (reuse == MAT_INITIAL_MATRIX) { 1627 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1628 } else if (reuse == MAT_REUSE_MATRIX) { 1629 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1630 } 1631 1632 B = *newmat; 1633 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE)); 1634 PetscCall(PetscFree(B->defaultvectype)); 1635 PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype)); 1636 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE)); 1637 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL)); 1638 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 1639 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 1640 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 1641 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 1642 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL)); 1643 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL)); 1644 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL)); 1645 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL)); 1646 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL)); 1647 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL)); 1648 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL)); 1649 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL)); 1650 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL)); 1651 m = (Mat_MPIDense*)(B)->data; 1652 if (m->A) { 1653 PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A)); 1654 } 1655 B->ops->bindtocpu = NULL; 1656 B->offloadmask = PETSC_OFFLOAD_CPU; 1657 PetscFunctionReturn(0); 1658 } 1659 1660 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1661 { 1662 Mat B; 1663 Mat_MPIDense *m; 1664 1665 PetscFunctionBegin; 1666 if (reuse == MAT_INITIAL_MATRIX) { 1667 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1668 } else if (reuse == MAT_REUSE_MATRIX) { 1669 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1670 } 1671 1672 B = *newmat; 1673 PetscCall(PetscFree(B->defaultvectype)); 1674 PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype)); 1675 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA)); 1676 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1677 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1678 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1679 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1680 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1681 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1682 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1683 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1684 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1685 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1686 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1687 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1688 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1689 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1690 m = (Mat_MPIDense*)(B->data); 1691 if (m->A) { 1692 PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A)); 1693 B->offloadmask = PETSC_OFFLOAD_BOTH; 1694 } else { 1695 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1696 } 1697 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE)); 1698 1699 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1700 PetscFunctionReturn(0); 1701 } 1702 #endif 1703 1704 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1705 { 1706 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1707 PetscInt lda; 1708 1709 PetscFunctionBegin; 1710 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1711 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1712 if (!a->cvec) { 1713 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1714 } 1715 a->vecinuse = col + 1; 1716 PetscCall(MatDenseGetLDA(a->A,&lda)); 1717 PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1718 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1719 *v = a->cvec; 1720 PetscFunctionReturn(0); 1721 } 1722 1723 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1724 { 1725 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1726 1727 PetscFunctionBegin; 1728 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1729 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1730 a->vecinuse = 0; 1731 PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1732 PetscCall(VecResetArray(a->cvec)); 1733 *v = NULL; 1734 PetscFunctionReturn(0); 1735 } 1736 1737 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1738 { 1739 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1740 PetscInt lda; 1741 1742 PetscFunctionBegin; 1743 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1744 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1745 if (!a->cvec) { 1746 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1747 } 1748 a->vecinuse = col + 1; 1749 PetscCall(MatDenseGetLDA(a->A,&lda)); 1750 PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse)); 1751 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1752 PetscCall(VecLockReadPush(a->cvec)); 1753 *v = a->cvec; 1754 PetscFunctionReturn(0); 1755 } 1756 1757 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1758 { 1759 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1760 1761 PetscFunctionBegin; 1762 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1763 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1764 a->vecinuse = 0; 1765 PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse)); 1766 PetscCall(VecLockReadPop(a->cvec)); 1767 PetscCall(VecResetArray(a->cvec)); 1768 *v = NULL; 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1773 { 1774 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1775 PetscInt lda; 1776 1777 PetscFunctionBegin; 1778 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1779 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1780 if (!a->cvec) { 1781 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1782 } 1783 a->vecinuse = col + 1; 1784 PetscCall(MatDenseGetLDA(a->A,&lda)); 1785 PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1786 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1787 *v = a->cvec; 1788 PetscFunctionReturn(0); 1789 } 1790 1791 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1792 { 1793 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1794 1795 PetscFunctionBegin; 1796 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1797 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1798 a->vecinuse = 0; 1799 PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1800 PetscCall(VecResetArray(a->cvec)); 1801 *v = NULL; 1802 PetscFunctionReturn(0); 1803 } 1804 1805 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1806 { 1807 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1808 Mat_MPIDense *c; 1809 MPI_Comm comm; 1810 1811 PetscFunctionBegin; 1812 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1813 PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1814 PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1815 if (!a->cmat) { 1816 PetscCall(MatCreate(comm,&a->cmat)); 1817 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat)); 1818 PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name)); 1819 PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap)); 1820 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1821 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1822 } else if (cend-cbegin != a->cmat->cmap->N) { 1823 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1824 PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap)); 1825 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1826 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1827 } 1828 c = (Mat_MPIDense*)a->cmat->data; 1829 PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1830 PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A)); 1831 a->cmat->preallocated = PETSC_TRUE; 1832 a->cmat->assembled = PETSC_TRUE; 1833 a->matinuse = cbegin + 1; 1834 *v = a->cmat; 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1839 { 1840 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1841 Mat_MPIDense *c; 1842 1843 PetscFunctionBegin; 1844 PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1845 PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1846 PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1847 a->matinuse = 0; 1848 c = (Mat_MPIDense*)a->cmat->data; 1849 PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A)); 1850 *v = NULL; 1851 PetscFunctionReturn(0); 1852 } 1853 1854 /*MC 1855 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1856 1857 Options Database Keys: 1858 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1859 1860 Level: beginner 1861 1862 .seealso: `MatCreateDense()` 1863 1864 M*/ 1865 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1866 { 1867 Mat_MPIDense *a; 1868 1869 PetscFunctionBegin; 1870 PetscCall(PetscNewLog(mat,&a)); 1871 mat->data = (void*)a; 1872 PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps))); 1873 1874 mat->insertmode = NOT_SET_VALUES; 1875 1876 /* build cache for off array entries formed */ 1877 a->donotstash = PETSC_FALSE; 1878 1879 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash)); 1880 1881 /* stuff used for matrix vector multiply */ 1882 a->lvec = NULL; 1883 a->Mvctx = NULL; 1884 a->roworiented = PETSC_TRUE; 1885 1886 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense)); 1887 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense)); 1888 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense)); 1889 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense)); 1890 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense)); 1891 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense)); 1892 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense)); 1893 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense)); 1894 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense)); 1895 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense)); 1896 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense)); 1897 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1898 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1899 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1900 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1901 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1902 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1903 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense)); 1904 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense)); 1905 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 1906 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ)); 1907 #if defined(PETSC_HAVE_ELEMENTAL) 1908 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental)); 1909 #endif 1910 #if defined(PETSC_HAVE_SCALAPACK) 1911 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK)); 1912 #endif 1913 #if defined(PETSC_HAVE_CUDA) 1914 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA)); 1915 #endif 1916 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense)); 1917 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1918 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1919 #if defined(PETSC_HAVE_CUDA) 1920 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1921 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1922 #endif 1923 1924 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense)); 1925 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense)); 1926 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE)); 1927 PetscFunctionReturn(0); 1928 } 1929 1930 /*MC 1931 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1932 1933 Options Database Keys: 1934 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1935 1936 Level: beginner 1937 1938 .seealso: 1939 1940 M*/ 1941 #if defined(PETSC_HAVE_CUDA) 1942 #include <petsc/private/deviceimpl.h> 1943 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1944 { 1945 PetscFunctionBegin; 1946 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1947 PetscCall(MatCreate_MPIDense(B)); 1948 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B)); 1949 PetscFunctionReturn(0); 1950 } 1951 #endif 1952 1953 /*MC 1954 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1955 1956 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1957 and MATMPIDENSE otherwise. 1958 1959 Options Database Keys: 1960 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1961 1962 Level: beginner 1963 1964 .seealso: `MATSEQDENSE,MATMPIDENSE,MATDENSECUDA` 1965 M*/ 1966 1967 /*MC 1968 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1969 1970 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1971 and MATMPIDENSECUDA otherwise. 1972 1973 Options Database Keys: 1974 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1975 1976 Level: beginner 1977 1978 .seealso: `MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE` 1979 M*/ 1980 1981 /*@C 1982 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1983 1984 Collective 1985 1986 Input Parameters: 1987 . B - the matrix 1988 - data - optional location of matrix data. Set data=NULL for PETSc 1989 to control all matrix memory allocation. 1990 1991 Notes: 1992 The dense format is fully compatible with standard Fortran 77 1993 storage by columns. 1994 1995 The data input variable is intended primarily for Fortran programmers 1996 who wish to allocate their own matrix memory space. Most users should 1997 set data=NULL. 1998 1999 Level: intermediate 2000 2001 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2002 @*/ 2003 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 2004 { 2005 PetscFunctionBegin; 2006 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 2007 PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data)); 2008 PetscFunctionReturn(0); 2009 } 2010 2011 /*@ 2012 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2013 array provided by the user. This is useful to avoid copying an array 2014 into a matrix 2015 2016 Not Collective 2017 2018 Input Parameters: 2019 + mat - the matrix 2020 - array - the array in column major order 2021 2022 Notes: 2023 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2024 freed when the matrix is destroyed. 2025 2026 Level: developer 2027 2028 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2029 2030 @*/ 2031 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2032 { 2033 PetscFunctionBegin; 2034 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2035 PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2036 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2037 #if defined(PETSC_HAVE_CUDA) 2038 mat->offloadmask = PETSC_OFFLOAD_CPU; 2039 #endif 2040 PetscFunctionReturn(0); 2041 } 2042 2043 /*@ 2044 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2045 2046 Not Collective 2047 2048 Input Parameters: 2049 . mat - the matrix 2050 2051 Notes: 2052 You can only call this after a call to MatDensePlaceArray() 2053 2054 Level: developer 2055 2056 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2057 2058 @*/ 2059 PetscErrorCode MatDenseResetArray(Mat mat) 2060 { 2061 PetscFunctionBegin; 2062 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2063 PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat)); 2064 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2065 PetscFunctionReturn(0); 2066 } 2067 2068 /*@ 2069 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2070 array provided by the user. This is useful to avoid copying an array 2071 into a matrix 2072 2073 Not Collective 2074 2075 Input Parameters: 2076 + mat - the matrix 2077 - array - the array in column major order 2078 2079 Notes: 2080 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2081 freed by the user. It will be freed when the matrix is destroyed. 2082 2083 Level: developer 2084 2085 .seealso: `MatDenseGetArray()`, `VecReplaceArray()` 2086 @*/ 2087 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2088 { 2089 PetscFunctionBegin; 2090 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2091 PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2092 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2093 #if defined(PETSC_HAVE_CUDA) 2094 mat->offloadmask = PETSC_OFFLOAD_CPU; 2095 #endif 2096 PetscFunctionReturn(0); 2097 } 2098 2099 #if defined(PETSC_HAVE_CUDA) 2100 /*@C 2101 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2102 array provided by the user. This is useful to avoid copying an array 2103 into a matrix 2104 2105 Not Collective 2106 2107 Input Parameters: 2108 + mat - the matrix 2109 - array - the array in column major order 2110 2111 Notes: 2112 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2113 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2114 2115 Level: developer 2116 2117 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()` 2118 @*/ 2119 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2120 { 2121 PetscFunctionBegin; 2122 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2123 PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2124 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2125 mat->offloadmask = PETSC_OFFLOAD_GPU; 2126 PetscFunctionReturn(0); 2127 } 2128 2129 /*@C 2130 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2131 2132 Not Collective 2133 2134 Input Parameters: 2135 . mat - the matrix 2136 2137 Notes: 2138 You can only call this after a call to MatDenseCUDAPlaceArray() 2139 2140 Level: developer 2141 2142 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2143 2144 @*/ 2145 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2146 { 2147 PetscFunctionBegin; 2148 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2149 PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat)); 2150 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2151 PetscFunctionReturn(0); 2152 } 2153 2154 /*@C 2155 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2156 array provided by the user. This is useful to avoid copying an array 2157 into a matrix 2158 2159 Not Collective 2160 2161 Input Parameters: 2162 + mat - the matrix 2163 - array - the array in column major order 2164 2165 Notes: 2166 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2167 The memory passed in CANNOT be freed by the user. It will be freed 2168 when the matrix is destroyed. The array should respect the matrix leading dimension. 2169 2170 Level: developer 2171 2172 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2173 @*/ 2174 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2175 { 2176 PetscFunctionBegin; 2177 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2178 PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2179 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2180 mat->offloadmask = PETSC_OFFLOAD_GPU; 2181 PetscFunctionReturn(0); 2182 } 2183 2184 /*@C 2185 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2186 2187 Not Collective 2188 2189 Input Parameters: 2190 . A - the matrix 2191 2192 Output Parameters 2193 . array - the GPU array in column major order 2194 2195 Notes: 2196 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2197 2198 Level: developer 2199 2200 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2201 @*/ 2202 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2203 { 2204 PetscFunctionBegin; 2205 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2206 PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2207 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2208 PetscFunctionReturn(0); 2209 } 2210 2211 /*@C 2212 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2213 2214 Not Collective 2215 2216 Input Parameters: 2217 + A - the matrix 2218 - array - the GPU array in column major order 2219 2220 Notes: 2221 2222 Level: developer 2223 2224 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2225 @*/ 2226 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2227 { 2228 PetscFunctionBegin; 2229 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2230 PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2231 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2232 A->offloadmask = PETSC_OFFLOAD_GPU; 2233 PetscFunctionReturn(0); 2234 } 2235 2236 /*@C 2237 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2238 2239 Not Collective 2240 2241 Input Parameters: 2242 . A - the matrix 2243 2244 Output Parameters 2245 . array - the GPU array in column major order 2246 2247 Notes: 2248 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2249 2250 Level: developer 2251 2252 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2253 @*/ 2254 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2255 { 2256 PetscFunctionBegin; 2257 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2258 PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2259 PetscFunctionReturn(0); 2260 } 2261 2262 /*@C 2263 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2264 2265 Not Collective 2266 2267 Input Parameters: 2268 + A - the matrix 2269 - array - the GPU array in column major order 2270 2271 Notes: 2272 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2273 2274 Level: developer 2275 2276 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2277 @*/ 2278 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2279 { 2280 PetscFunctionBegin; 2281 PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2282 PetscFunctionReturn(0); 2283 } 2284 2285 /*@C 2286 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2287 2288 Not Collective 2289 2290 Input Parameters: 2291 . A - the matrix 2292 2293 Output Parameters 2294 . array - the GPU array in column major order 2295 2296 Notes: 2297 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2298 2299 Level: developer 2300 2301 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2302 @*/ 2303 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2304 { 2305 PetscFunctionBegin; 2306 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2307 PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a)); 2308 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2309 PetscFunctionReturn(0); 2310 } 2311 2312 /*@C 2313 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2314 2315 Not Collective 2316 2317 Input Parameters: 2318 + A - the matrix 2319 - array - the GPU array in column major order 2320 2321 Notes: 2322 2323 Level: developer 2324 2325 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2326 @*/ 2327 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2328 { 2329 PetscFunctionBegin; 2330 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2331 PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a)); 2332 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2333 A->offloadmask = PETSC_OFFLOAD_GPU; 2334 PetscFunctionReturn(0); 2335 } 2336 #endif 2337 2338 /*@C 2339 MatCreateDense - Creates a matrix in dense format. 2340 2341 Collective 2342 2343 Input Parameters: 2344 + comm - MPI communicator 2345 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2346 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2347 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2348 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2349 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2350 to control all matrix memory allocation. 2351 2352 Output Parameter: 2353 . A - the matrix 2354 2355 Notes: 2356 The dense format is fully compatible with standard Fortran 77 2357 storage by columns. 2358 Note that, although local portions of the matrix are stored in column-major 2359 order, the matrix is partitioned across MPI ranks by row. 2360 2361 The data input variable is intended primarily for Fortran programmers 2362 who wish to allocate their own matrix memory space. Most users should 2363 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2364 2365 The user MUST specify either the local or global matrix dimensions 2366 (possibly both). 2367 2368 Level: intermediate 2369 2370 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2371 @*/ 2372 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2373 { 2374 PetscFunctionBegin; 2375 PetscCall(MatCreate(comm,A)); 2376 PetscCall(MatSetSizes(*A,m,n,M,N)); 2377 PetscCall(MatSetType(*A,MATDENSE)); 2378 PetscCall(MatSeqDenseSetPreallocation(*A,data)); 2379 PetscCall(MatMPIDenseSetPreallocation(*A,data)); 2380 PetscFunctionReturn(0); 2381 } 2382 2383 #if defined(PETSC_HAVE_CUDA) 2384 /*@C 2385 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2386 2387 Collective 2388 2389 Input Parameters: 2390 + comm - MPI communicator 2391 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2392 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2393 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2394 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2395 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2396 to control matrix memory allocation. 2397 2398 Output Parameter: 2399 . A - the matrix 2400 2401 Notes: 2402 2403 Level: intermediate 2404 2405 .seealso: `MatCreate()`, `MatCreateDense()` 2406 @*/ 2407 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2408 { 2409 PetscFunctionBegin; 2410 PetscCall(MatCreate(comm,A)); 2411 PetscValidLogicalCollectiveBool(*A,!!data,6); 2412 PetscCall(MatSetSizes(*A,m,n,M,N)); 2413 PetscCall(MatSetType(*A,MATDENSECUDA)); 2414 PetscCall(MatSeqDenseCUDASetPreallocation(*A,data)); 2415 PetscCall(MatMPIDenseCUDASetPreallocation(*A,data)); 2416 PetscFunctionReturn(0); 2417 } 2418 #endif 2419 2420 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2421 { 2422 Mat mat; 2423 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2424 2425 PetscFunctionBegin; 2426 *newmat = NULL; 2427 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat)); 2428 PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2429 PetscCall(MatSetType(mat,((PetscObject)A)->type_name)); 2430 a = (Mat_MPIDense*)mat->data; 2431 2432 mat->factortype = A->factortype; 2433 mat->assembled = PETSC_TRUE; 2434 mat->preallocated = PETSC_TRUE; 2435 2436 mat->insertmode = NOT_SET_VALUES; 2437 a->donotstash = oldmat->donotstash; 2438 2439 PetscCall(PetscLayoutReference(A->rmap,&mat->rmap)); 2440 PetscCall(PetscLayoutReference(A->cmap,&mat->cmap)); 2441 2442 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2443 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2444 2445 *newmat = mat; 2446 PetscFunctionReturn(0); 2447 } 2448 2449 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2450 { 2451 PetscBool isbinary; 2452 #if defined(PETSC_HAVE_HDF5) 2453 PetscBool ishdf5; 2454 #endif 2455 2456 PetscFunctionBegin; 2457 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2458 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2459 /* force binary viewer to load .info file if it has not yet done so */ 2460 PetscCall(PetscViewerSetUp(viewer)); 2461 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2462 #if defined(PETSC_HAVE_HDF5) 2463 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2464 #endif 2465 if (isbinary) { 2466 PetscCall(MatLoad_Dense_Binary(newMat,viewer)); 2467 #if defined(PETSC_HAVE_HDF5) 2468 } else if (ishdf5) { 2469 PetscCall(MatLoad_Dense_HDF5(newMat,viewer)); 2470 #endif 2471 } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2476 { 2477 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2478 Mat a,b; 2479 2480 PetscFunctionBegin; 2481 a = matA->A; 2482 b = matB->A; 2483 PetscCall(MatEqual(a,b,flag)); 2484 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2485 PetscFunctionReturn(0); 2486 } 2487 2488 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2489 { 2490 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2491 2492 PetscFunctionBegin; 2493 PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts)); 2494 PetscCall(MatDestroy(&atb->atb)); 2495 PetscCall(PetscFree(atb)); 2496 PetscFunctionReturn(0); 2497 } 2498 2499 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2500 { 2501 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2502 2503 PetscFunctionBegin; 2504 PetscCall(PetscFree2(abt->buf[0],abt->buf[1])); 2505 PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls)); 2506 PetscCall(PetscFree(abt)); 2507 PetscFunctionReturn(0); 2508 } 2509 2510 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2511 { 2512 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2513 Mat_TransMatMultDense *atb; 2514 MPI_Comm comm; 2515 PetscMPIInt size,*recvcounts; 2516 PetscScalar *carray,*sendbuf; 2517 const PetscScalar *atbarray; 2518 PetscInt i,cN=C->cmap->N,proc,k,j,lda; 2519 const PetscInt *ranges; 2520 2521 PetscFunctionBegin; 2522 MatCheckProduct(C,3); 2523 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2524 atb = (Mat_TransMatMultDense *)C->product->data; 2525 recvcounts = atb->recvcounts; 2526 sendbuf = atb->sendbuf; 2527 2528 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2529 PetscCallMPI(MPI_Comm_size(comm,&size)); 2530 2531 /* compute atbarray = aseq^T * bseq */ 2532 PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb)); 2533 2534 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2535 2536 /* arrange atbarray into sendbuf */ 2537 PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray)); 2538 PetscCall(MatDenseGetLDA(atb->atb,&lda)); 2539 for (proc=0, k=0; proc<size; proc++) { 2540 for (j=0; j<cN; j++) { 2541 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda]; 2542 } 2543 } 2544 PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray)); 2545 2546 /* sum all atbarray to local values of C */ 2547 PetscCall(MatDenseGetArrayWrite(c->A,&carray)); 2548 PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm)); 2549 PetscCall(MatDenseRestoreArrayWrite(c->A,&carray)); 2550 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2551 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2556 { 2557 MPI_Comm comm; 2558 PetscMPIInt size; 2559 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2560 Mat_TransMatMultDense *atb; 2561 PetscBool cisdense; 2562 PetscInt i; 2563 const PetscInt *ranges; 2564 2565 PetscFunctionBegin; 2566 MatCheckProduct(C,3); 2567 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2568 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2569 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2570 SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2571 } 2572 2573 /* create matrix product C */ 2574 PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N)); 2575 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 2576 if (!cisdense) { 2577 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 2578 } 2579 PetscCall(MatSetUp(C)); 2580 2581 /* create data structure for reuse C */ 2582 PetscCallMPI(MPI_Comm_size(comm,&size)); 2583 PetscCall(PetscNew(&atb)); 2584 cM = C->rmap->N; 2585 PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts)); 2586 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2587 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2588 2589 C->product->data = atb; 2590 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2591 PetscFunctionReturn(0); 2592 } 2593 2594 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2595 { 2596 MPI_Comm comm; 2597 PetscMPIInt i, size; 2598 PetscInt maxRows, bufsiz; 2599 PetscMPIInt tag; 2600 PetscInt alg; 2601 Mat_MatTransMultDense *abt; 2602 Mat_Product *product = C->product; 2603 PetscBool flg; 2604 2605 PetscFunctionBegin; 2606 MatCheckProduct(C,4); 2607 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2608 /* check local size of A and B */ 2609 PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n); 2610 2611 PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg)); 2612 alg = flg ? 0 : 1; 2613 2614 /* setup matrix product C */ 2615 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N)); 2616 PetscCall(MatSetType(C,MATMPIDENSE)); 2617 PetscCall(MatSetUp(C)); 2618 PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag)); 2619 2620 /* create data structure for reuse C */ 2621 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2622 PetscCallMPI(MPI_Comm_size(comm,&size)); 2623 PetscCall(PetscNew(&abt)); 2624 abt->tag = tag; 2625 abt->alg = alg; 2626 switch (alg) { 2627 case 1: /* alg: "cyclic" */ 2628 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2629 bufsiz = A->cmap->N * maxRows; 2630 PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]))); 2631 break; 2632 default: /* alg: "allgatherv" */ 2633 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2634 PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls))); 2635 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2636 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2637 break; 2638 } 2639 2640 C->product->data = abt; 2641 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2642 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2643 PetscFunctionReturn(0); 2644 } 2645 2646 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2647 { 2648 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2649 Mat_MatTransMultDense *abt; 2650 MPI_Comm comm; 2651 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2652 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2653 PetscInt i,cK=A->cmap->N,k,j,bn; 2654 PetscScalar _DOne=1.0,_DZero=0.0; 2655 const PetscScalar *av,*bv; 2656 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2657 MPI_Request reqs[2]; 2658 const PetscInt *ranges; 2659 2660 PetscFunctionBegin; 2661 MatCheckProduct(C,3); 2662 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2663 abt = (Mat_MatTransMultDense*)C->product->data; 2664 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2665 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2666 PetscCallMPI(MPI_Comm_size(comm,&size)); 2667 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2668 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2669 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2670 PetscCall(MatDenseGetLDA(a->A,&i)); 2671 PetscCall(PetscBLASIntCast(i,&alda)); 2672 PetscCall(MatDenseGetLDA(b->A,&i)); 2673 PetscCall(PetscBLASIntCast(i,&blda)); 2674 PetscCall(MatDenseGetLDA(c->A,&i)); 2675 PetscCall(PetscBLASIntCast(i,&clda)); 2676 PetscCall(MatGetOwnershipRanges(B,&ranges)); 2677 bn = B->rmap->n; 2678 if (blda == bn) { 2679 sendbuf = (PetscScalar*)bv; 2680 } else { 2681 sendbuf = abt->buf[0]; 2682 for (k = 0, i = 0; i < cK; i++) { 2683 for (j = 0; j < bn; j++, k++) { 2684 sendbuf[k] = bv[i * blda + j]; 2685 } 2686 } 2687 } 2688 if (size > 1) { 2689 sendto = (rank + size - 1) % size; 2690 recvfrom = (rank + size + 1) % size; 2691 } else { 2692 sendto = recvfrom = 0; 2693 } 2694 PetscCall(PetscBLASIntCast(cK,&ck)); 2695 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2696 recvisfrom = rank; 2697 for (i = 0; i < size; i++) { 2698 /* we have finished receiving in sending, bufs can be read/modified */ 2699 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2700 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2701 2702 if (nextrecvisfrom != rank) { 2703 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2704 sendsiz = cK * bn; 2705 recvsiz = cK * nextbn; 2706 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2707 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2708 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2709 } 2710 2711 /* local aseq * sendbuf^T */ 2712 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2713 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2714 2715 if (nextrecvisfrom != rank) { 2716 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2717 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2718 } 2719 bn = nextbn; 2720 recvisfrom = nextrecvisfrom; 2721 sendbuf = recvbuf; 2722 } 2723 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2724 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2725 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2726 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2727 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2728 PetscFunctionReturn(0); 2729 } 2730 2731 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2732 { 2733 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2734 Mat_MatTransMultDense *abt; 2735 MPI_Comm comm; 2736 PetscMPIInt size; 2737 PetscScalar *cv, *sendbuf, *recvbuf; 2738 const PetscScalar *av,*bv; 2739 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2740 PetscScalar _DOne=1.0,_DZero=0.0; 2741 PetscBLASInt cm, cn, ck, alda, clda; 2742 2743 PetscFunctionBegin; 2744 MatCheckProduct(C,3); 2745 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2746 abt = (Mat_MatTransMultDense*)C->product->data; 2747 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2748 PetscCallMPI(MPI_Comm_size(comm,&size)); 2749 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2750 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2751 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2752 PetscCall(MatDenseGetLDA(a->A,&i)); 2753 PetscCall(PetscBLASIntCast(i,&alda)); 2754 PetscCall(MatDenseGetLDA(b->A,&blda)); 2755 PetscCall(MatDenseGetLDA(c->A,&i)); 2756 PetscCall(PetscBLASIntCast(i,&clda)); 2757 /* copy transpose of B into buf[0] */ 2758 bn = B->rmap->n; 2759 sendbuf = abt->buf[0]; 2760 recvbuf = abt->buf[1]; 2761 for (k = 0, j = 0; j < bn; j++) { 2762 for (i = 0; i < cK; i++, k++) { 2763 sendbuf[k] = bv[i * blda + j]; 2764 } 2765 } 2766 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2767 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2768 PetscCall(PetscBLASIntCast(cK,&ck)); 2769 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2770 PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn)); 2771 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2772 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2773 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2774 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2775 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2776 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2777 PetscFunctionReturn(0); 2778 } 2779 2780 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2781 { 2782 Mat_MatTransMultDense *abt; 2783 2784 PetscFunctionBegin; 2785 MatCheckProduct(C,3); 2786 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2787 abt = (Mat_MatTransMultDense*)C->product->data; 2788 switch (abt->alg) { 2789 case 1: 2790 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2791 break; 2792 default: 2793 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2794 break; 2795 } 2796 PetscFunctionReturn(0); 2797 } 2798 2799 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2800 { 2801 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2802 2803 PetscFunctionBegin; 2804 PetscCall(MatDestroy(&ab->Ce)); 2805 PetscCall(MatDestroy(&ab->Ae)); 2806 PetscCall(MatDestroy(&ab->Be)); 2807 PetscCall(PetscFree(ab)); 2808 PetscFunctionReturn(0); 2809 } 2810 2811 #if defined(PETSC_HAVE_ELEMENTAL) 2812 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2813 { 2814 Mat_MatMultDense *ab; 2815 2816 PetscFunctionBegin; 2817 MatCheckProduct(C,3); 2818 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2819 ab = (Mat_MatMultDense*)C->product->data; 2820 PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae)); 2821 PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be)); 2822 PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce)); 2823 PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C)); 2824 PetscFunctionReturn(0); 2825 } 2826 2827 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2828 { 2829 Mat Ae,Be,Ce; 2830 Mat_MatMultDense *ab; 2831 2832 PetscFunctionBegin; 2833 MatCheckProduct(C,4); 2834 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2835 /* check local size of A and B */ 2836 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2837 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2838 } 2839 2840 /* create elemental matrices Ae and Be */ 2841 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2842 PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 2843 PetscCall(MatSetType(Ae,MATELEMENTAL)); 2844 PetscCall(MatSetUp(Ae)); 2845 PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE)); 2846 2847 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2848 PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N)); 2849 PetscCall(MatSetType(Be,MATELEMENTAL)); 2850 PetscCall(MatSetUp(Be)); 2851 PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE)); 2852 2853 /* compute symbolic Ce = Ae*Be */ 2854 PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce)); 2855 PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce)); 2856 2857 /* setup C */ 2858 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 2859 PetscCall(MatSetType(C,MATDENSE)); 2860 PetscCall(MatSetUp(C)); 2861 2862 /* create data structure for reuse Cdense */ 2863 PetscCall(PetscNew(&ab)); 2864 ab->Ae = Ae; 2865 ab->Be = Be; 2866 ab->Ce = Ce; 2867 2868 C->product->data = ab; 2869 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2870 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2871 PetscFunctionReturn(0); 2872 } 2873 #endif 2874 /* ----------------------------------------------- */ 2875 #if defined(PETSC_HAVE_ELEMENTAL) 2876 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2877 { 2878 PetscFunctionBegin; 2879 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2880 C->ops->productsymbolic = MatProductSymbolic_AB; 2881 PetscFunctionReturn(0); 2882 } 2883 #endif 2884 2885 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2886 { 2887 Mat_Product *product = C->product; 2888 Mat A = product->A,B=product->B; 2889 2890 PetscFunctionBegin; 2891 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2892 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2893 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2894 C->ops->productsymbolic = MatProductSymbolic_AtB; 2895 PetscFunctionReturn(0); 2896 } 2897 2898 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2899 { 2900 Mat_Product *product = C->product; 2901 const char *algTypes[2] = {"allgatherv","cyclic"}; 2902 PetscInt alg,nalg = 2; 2903 PetscBool flg = PETSC_FALSE; 2904 2905 PetscFunctionBegin; 2906 /* Set default algorithm */ 2907 alg = 0; /* default is allgatherv */ 2908 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2909 if (flg) { 2910 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2911 } 2912 2913 /* Get runtime option */ 2914 if (product->api_user) { 2915 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2916 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2917 PetscOptionsEnd(); 2918 } else { 2919 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2920 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2921 PetscOptionsEnd(); 2922 } 2923 if (flg) { 2924 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2925 } 2926 2927 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2928 C->ops->productsymbolic = MatProductSymbolic_ABt; 2929 PetscFunctionReturn(0); 2930 } 2931 2932 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2933 { 2934 Mat_Product *product = C->product; 2935 2936 PetscFunctionBegin; 2937 switch (product->type) { 2938 #if defined(PETSC_HAVE_ELEMENTAL) 2939 case MATPRODUCT_AB: 2940 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2941 break; 2942 #endif 2943 case MATPRODUCT_AtB: 2944 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2945 break; 2946 case MATPRODUCT_ABt: 2947 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2948 break; 2949 default: 2950 break; 2951 } 2952 PetscFunctionReturn(0); 2953 } 2954