1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 31 PetscValidPointer(B,2); 32 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg)); 33 if (flg) *B = mat->A; 34 else { 35 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg)); 36 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 37 *B = A; 38 } 39 PetscFunctionReturn(0); 40 } 41 42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 43 { 44 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 45 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 46 47 PetscFunctionBegin; 48 PetscCall(MatCopy(Amat->A,Bmat->A,s)); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 56 57 PetscFunctionBegin; 58 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 59 lrow = row - rstart; 60 PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 61 PetscFunctionReturn(0); 62 } 63 64 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 65 { 66 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 67 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 68 69 PetscFunctionBegin; 70 PetscCheck(row >= rstart && row < rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 71 lrow = row - rstart; 72 PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 80 PetscScalar *array; 81 MPI_Comm comm; 82 PetscBool flg; 83 Mat B; 84 85 PetscFunctionBegin; 86 PetscCall(MatHasCongruentLayouts(A,&flg)); 87 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 88 PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); 89 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 90 91 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg)); 92 PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 93 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm)); 94 PetscCall(MatCreate(comm,&B)); 95 PetscCall(MatSetSizes(B,m,m,m,m)); 96 PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name)); 97 PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array)); 98 PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart)); 99 PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array)); 100 PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B)); 101 *a = B; 102 PetscCall(MatDestroy(&B)); 103 } else *a = B; 104 PetscFunctionReturn(0); 105 } 106 107 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 108 { 109 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 110 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 111 PetscBool roworiented = A->roworiented; 112 113 PetscFunctionBegin; 114 for (i=0; i<m; i++) { 115 if (idxm[i] < 0) continue; 116 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 117 if (idxm[i] >= rstart && idxm[i] < rend) { 118 row = idxm[i] - rstart; 119 if (roworiented) { 120 PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv)); 121 } else { 122 for (j=0; j<n; j++) { 123 if (idxn[j] < 0) continue; 124 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 125 PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv)); 126 } 127 } 128 } else if (!A->donotstash) { 129 mat->assembled = PETSC_FALSE; 130 if (roworiented) { 131 PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE)); 132 } else { 133 PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE)); 134 } 135 } 136 } 137 PetscFunctionReturn(0); 138 } 139 140 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 141 { 142 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 143 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 144 145 PetscFunctionBegin; 146 for (i=0; i<m; i++) { 147 if (idxm[i] < 0) continue; /* negative row */ 148 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 149 if (idxm[i] >= rstart && idxm[i] < rend) { 150 row = idxm[i] - rstart; 151 for (j=0; j<n; j++) { 152 if (idxn[j] < 0) continue; /* negative column */ 153 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 154 PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j)); 155 } 156 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 157 } 158 PetscFunctionReturn(0); 159 } 160 161 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 162 { 163 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 164 165 PetscFunctionBegin; 166 PetscCall(MatDenseGetLDA(a->A,lda)); 167 PetscFunctionReturn(0); 168 } 169 170 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 171 { 172 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 173 PetscBool iscuda; 174 175 PetscFunctionBegin; 176 if (!a->A) { 177 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 178 PetscCall(PetscLayoutSetUp(A->rmap)); 179 PetscCall(PetscLayoutSetUp(A->cmap)); 180 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 181 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A)); 182 PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 183 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 184 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 185 } 186 PetscCall(MatDenseSetLDA(a->A,lda)); 187 PetscFunctionReturn(0); 188 } 189 190 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 191 { 192 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 193 194 PetscFunctionBegin; 195 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 196 PetscCall(MatDenseGetArray(a->A,array)); 197 PetscFunctionReturn(0); 198 } 199 200 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 201 { 202 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 203 204 PetscFunctionBegin; 205 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 206 PetscCall(MatDenseGetArrayRead(a->A,array)); 207 PetscFunctionReturn(0); 208 } 209 210 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 211 { 212 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 213 214 PetscFunctionBegin; 215 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 216 PetscCall(MatDenseGetArrayWrite(a->A,array)); 217 PetscFunctionReturn(0); 218 } 219 220 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 223 224 PetscFunctionBegin; 225 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 226 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 227 PetscCall(MatDensePlaceArray(a->A,array)); 228 PetscFunctionReturn(0); 229 } 230 231 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 232 { 233 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 234 235 PetscFunctionBegin; 236 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 237 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 238 PetscCall(MatDenseResetArray(a->A)); 239 PetscFunctionReturn(0); 240 } 241 242 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 243 { 244 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 245 246 PetscFunctionBegin; 247 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 248 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 249 PetscCall(MatDenseReplaceArray(a->A,array)); 250 PetscFunctionReturn(0); 251 } 252 253 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 254 { 255 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 256 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 257 const PetscInt *irow,*icol; 258 const PetscScalar *v; 259 PetscScalar *bv; 260 Mat newmat; 261 IS iscol_local; 262 MPI_Comm comm_is,comm_mat; 263 264 PetscFunctionBegin; 265 PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat)); 266 PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is)); 267 PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 268 269 PetscCall(ISAllGather(iscol,&iscol_local)); 270 PetscCall(ISGetIndices(isrow,&irow)); 271 PetscCall(ISGetIndices(iscol_local,&icol)); 272 PetscCall(ISGetLocalSize(isrow,&nrows)); 273 PetscCall(ISGetLocalSize(iscol,&ncols)); 274 PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */ 275 276 /* No parallel redistribution currently supported! Should really check each index set 277 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 278 original matrix! */ 279 280 PetscCall(MatGetLocalSize(A,&nlrows,&nlcols)); 281 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 282 283 /* Check submatrix call */ 284 if (scall == MAT_REUSE_MATRIX) { 285 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 286 /* Really need to test rows and column sizes! */ 287 newmat = *B; 288 } else { 289 /* Create and fill new matrix */ 290 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat)); 291 PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols)); 292 PetscCall(MatSetType(newmat,((PetscObject)A)->type_name)); 293 PetscCall(MatMPIDenseSetPreallocation(newmat,NULL)); 294 } 295 296 /* Now extract the data pointers and do the copy, column at a time */ 297 newmatd = (Mat_MPIDense*)newmat->data; 298 PetscCall(MatDenseGetArray(newmatd->A,&bv)); 299 PetscCall(MatDenseGetArrayRead(mat->A,&v)); 300 PetscCall(MatDenseGetLDA(mat->A,&lda)); 301 for (i=0; i<Ncols; i++) { 302 const PetscScalar *av = v + lda*icol[i]; 303 for (j=0; j<nrows; j++) { 304 *bv++ = av[irow[j] - rstart]; 305 } 306 } 307 PetscCall(MatDenseRestoreArrayRead(mat->A,&v)); 308 PetscCall(MatDenseRestoreArray(newmatd->A,&bv)); 309 310 /* Assemble the matrices so that the correct flags are set */ 311 PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY)); 312 PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY)); 313 314 /* Free work space */ 315 PetscCall(ISRestoreIndices(isrow,&irow)); 316 PetscCall(ISRestoreIndices(iscol_local,&icol)); 317 PetscCall(ISDestroy(&iscol_local)); 318 *B = newmat; 319 PetscFunctionReturn(0); 320 } 321 322 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 323 { 324 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 325 326 PetscFunctionBegin; 327 PetscCall(MatDenseRestoreArray(a->A,array)); 328 PetscFunctionReturn(0); 329 } 330 331 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 332 { 333 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 334 335 PetscFunctionBegin; 336 PetscCall(MatDenseRestoreArrayRead(a->A,array)); 337 PetscFunctionReturn(0); 338 } 339 340 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 341 { 342 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 343 344 PetscFunctionBegin; 345 PetscCall(MatDenseRestoreArrayWrite(a->A,array)); 346 PetscFunctionReturn(0); 347 } 348 349 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 350 { 351 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 352 PetscInt nstash,reallocs; 353 354 PetscFunctionBegin; 355 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 356 357 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 358 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 359 PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 360 PetscFunctionReturn(0); 361 } 362 363 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 364 { 365 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 366 PetscInt i,*row,*col,flg,j,rstart,ncols; 367 PetscMPIInt n; 368 PetscScalar *val; 369 370 PetscFunctionBegin; 371 if (!mdn->donotstash && !mat->nooffprocentries) { 372 /* wait on receives */ 373 while (1) { 374 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 375 if (!flg) break; 376 377 for (i=0; i<n;) { 378 /* Now identify the consecutive vals belonging to the same row */ 379 for (j=i,rstart=row[j]; j<n; j++) { 380 if (row[j] != rstart) break; 381 } 382 if (j < n) ncols = j-i; 383 else ncols = n-i; 384 /* Now assemble all these values with a single function call */ 385 PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 386 i = j; 387 } 388 } 389 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 390 } 391 392 PetscCall(MatAssemblyBegin(mdn->A,mode)); 393 PetscCall(MatAssemblyEnd(mdn->A,mode)); 394 PetscFunctionReturn(0); 395 } 396 397 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 398 { 399 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 400 401 PetscFunctionBegin; 402 PetscCall(MatZeroEntries(l->A)); 403 PetscFunctionReturn(0); 404 } 405 406 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 407 { 408 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 409 PetscInt i,len,*lrows; 410 411 PetscFunctionBegin; 412 /* get locally owned rows */ 413 PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL)); 414 /* fix right hand side if needed */ 415 if (x && b) { 416 const PetscScalar *xx; 417 PetscScalar *bb; 418 419 PetscCall(VecGetArrayRead(x, &xx)); 420 PetscCall(VecGetArrayWrite(b, &bb)); 421 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 422 PetscCall(VecRestoreArrayRead(x, &xx)); 423 PetscCall(VecRestoreArrayWrite(b, &bb)); 424 } 425 PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL)); 426 if (diag != 0.0) { 427 Vec d; 428 429 PetscCall(MatCreateVecs(A,NULL,&d)); 430 PetscCall(VecSet(d,diag)); 431 PetscCall(MatDiagonalSet(A,d,INSERT_VALUES)); 432 PetscCall(VecDestroy(&d)); 433 } 434 PetscCall(PetscFree(lrows)); 435 PetscFunctionReturn(0); 436 } 437 438 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 439 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 440 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 441 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 442 443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 444 { 445 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 446 const PetscScalar *ax; 447 PetscScalar *ay; 448 PetscMemType axmtype,aymtype; 449 450 PetscFunctionBegin; 451 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 452 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 453 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 454 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 455 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 456 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 457 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 458 PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy)); 459 PetscFunctionReturn(0); 460 } 461 462 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 463 { 464 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 465 const PetscScalar *ax; 466 PetscScalar *ay; 467 PetscMemType axmtype,aymtype; 468 469 PetscFunctionBegin; 470 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 471 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 472 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 473 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 474 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 475 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 476 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 477 PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz)); 478 PetscFunctionReturn(0); 479 } 480 481 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 482 { 483 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 484 const PetscScalar *ax; 485 PetscScalar *ay; 486 PetscMemType axmtype,aymtype; 487 488 PetscFunctionBegin; 489 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 490 PetscCall(VecSet(yy,0.0)); 491 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 492 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 493 PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype)); 494 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 495 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 496 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 497 PetscCall(VecRestoreArrayAndMemType(yy,&ay)); 498 PetscFunctionReturn(0); 499 } 500 501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 502 { 503 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 504 const PetscScalar *ax; 505 PetscScalar *ay; 506 PetscMemType axmtype,aymtype; 507 508 PetscFunctionBegin; 509 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 510 PetscCall(VecCopy(yy,zz)); 511 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 512 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 513 PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype)); 514 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 515 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 516 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 517 PetscCall(VecRestoreArrayAndMemType(zz,&ay)); 518 PetscFunctionReturn(0); 519 } 520 521 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 522 { 523 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 524 PetscInt lda,len,i,n,m = A->rmap->n,radd; 525 PetscScalar *x,zero = 0.0; 526 const PetscScalar *av; 527 528 PetscFunctionBegin; 529 PetscCall(VecSet(v,zero)); 530 PetscCall(VecGetArray(v,&x)); 531 PetscCall(VecGetSize(v,&n)); 532 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 533 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 534 radd = A->rmap->rstart*m; 535 PetscCall(MatDenseGetArrayRead(a->A,&av)); 536 PetscCall(MatDenseGetLDA(a->A,&lda)); 537 for (i=0; i<len; i++) { 538 x[i] = av[radd + i*lda + i]; 539 } 540 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 541 PetscCall(VecRestoreArray(v,&x)); 542 PetscFunctionReturn(0); 543 } 544 545 PetscErrorCode MatDestroy_MPIDense(Mat mat) 546 { 547 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 548 549 PetscFunctionBegin; 550 #if defined(PETSC_USE_LOG) 551 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 552 #endif 553 PetscCall(MatStashDestroy_Private(&mat->stash)); 554 PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 555 PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 556 PetscCall(MatDestroy(&mdn->A)); 557 PetscCall(VecDestroy(&mdn->lvec)); 558 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 559 PetscCall(VecDestroy(&mdn->cvec)); 560 PetscCall(MatDestroy(&mdn->cmat)); 561 562 PetscCall(PetscFree(mat->data)); 563 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 564 565 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL)); 566 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL)); 567 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL)); 568 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL)); 569 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL)); 570 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL)); 571 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL)); 572 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL)); 573 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL)); 574 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL)); 575 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL)); 576 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 577 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL)); 578 #if defined(PETSC_HAVE_ELEMENTAL) 579 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL)); 580 #endif 581 #if defined(PETSC_HAVE_SCALAPACK) 582 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL)); 583 #endif 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL)); 585 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL)); 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL)); 587 #if defined (PETSC_HAVE_CUDA) 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL)); 589 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL)); 590 #endif 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL)); 592 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL)); 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL)); 601 PetscFunctionReturn(0); 602 } 603 604 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 605 606 #include <petscdraw.h> 607 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 608 { 609 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 610 PetscMPIInt rank; 611 PetscViewerType vtype; 612 PetscBool iascii,isdraw; 613 PetscViewer sviewer; 614 PetscViewerFormat format; 615 616 PetscFunctionBegin; 617 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 618 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 619 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 620 if (iascii) { 621 PetscCall(PetscViewerGetType(viewer,&vtype)); 622 PetscCall(PetscViewerGetFormat(viewer,&format)); 623 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 624 MatInfo info; 625 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 626 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 627 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory)); 628 PetscCall(PetscViewerFlush(viewer)); 629 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 630 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer)); 631 PetscFunctionReturn(0); 632 } else if (format == PETSC_VIEWER_ASCII_INFO) { 633 PetscFunctionReturn(0); 634 } 635 } else if (isdraw) { 636 PetscDraw draw; 637 PetscBool isnull; 638 639 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 640 PetscCall(PetscDrawIsNull(draw,&isnull)); 641 if (isnull) PetscFunctionReturn(0); 642 } 643 644 { 645 /* assemble the entire matrix onto first processor. */ 646 Mat A; 647 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 648 PetscInt *cols; 649 PetscScalar *vals; 650 651 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 652 if (rank == 0) { 653 PetscCall(MatSetSizes(A,M,N,M,N)); 654 } else { 655 PetscCall(MatSetSizes(A,0,0,M,N)); 656 } 657 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 658 PetscCall(MatSetType(A,MATMPIDENSE)); 659 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 660 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 661 662 /* Copy the matrix ... This isn't the most efficient means, 663 but it's quick for now */ 664 A->insertmode = INSERT_VALUES; 665 666 row = mat->rmap->rstart; 667 m = mdn->A->rmap->n; 668 for (i=0; i<m; i++) { 669 PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals)); 670 PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES)); 671 PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals)); 672 row++; 673 } 674 675 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 676 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 677 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 678 if (rank == 0) { 679 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name)); 680 PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer)); 681 } 682 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 683 PetscCall(PetscViewerFlush(viewer)); 684 PetscCall(MatDestroy(&A)); 685 } 686 PetscFunctionReturn(0); 687 } 688 689 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 690 { 691 PetscBool iascii,isbinary,isdraw,issocket; 692 693 PetscFunctionBegin; 694 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 695 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 696 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 697 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 698 699 if (iascii || issocket || isdraw) { 700 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer)); 701 } else if (isbinary) { 702 PetscCall(MatView_Dense_Binary(mat,viewer)); 703 } 704 PetscFunctionReturn(0); 705 } 706 707 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 708 { 709 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 710 Mat mdn = mat->A; 711 PetscLogDouble isend[5],irecv[5]; 712 713 PetscFunctionBegin; 714 info->block_size = 1.0; 715 716 PetscCall(MatGetInfo(mdn,MAT_LOCAL,info)); 717 718 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 719 isend[3] = info->memory; isend[4] = info->mallocs; 720 if (flag == MAT_LOCAL) { 721 info->nz_used = isend[0]; 722 info->nz_allocated = isend[1]; 723 info->nz_unneeded = isend[2]; 724 info->memory = isend[3]; 725 info->mallocs = isend[4]; 726 } else if (flag == MAT_GLOBAL_MAX) { 727 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A))); 728 729 info->nz_used = irecv[0]; 730 info->nz_allocated = irecv[1]; 731 info->nz_unneeded = irecv[2]; 732 info->memory = irecv[3]; 733 info->mallocs = irecv[4]; 734 } else if (flag == MAT_GLOBAL_SUM) { 735 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A))); 736 737 info->nz_used = irecv[0]; 738 info->nz_allocated = irecv[1]; 739 info->nz_unneeded = irecv[2]; 740 info->memory = irecv[3]; 741 info->mallocs = irecv[4]; 742 } 743 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 744 info->fill_ratio_needed = 0; 745 info->factor_mallocs = 0; 746 PetscFunctionReturn(0); 747 } 748 749 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 750 { 751 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 752 753 PetscFunctionBegin; 754 switch (op) { 755 case MAT_NEW_NONZERO_LOCATIONS: 756 case MAT_NEW_NONZERO_LOCATION_ERR: 757 case MAT_NEW_NONZERO_ALLOCATION_ERR: 758 MatCheckPreallocated(A,1); 759 PetscCall(MatSetOption(a->A,op,flg)); 760 break; 761 case MAT_ROW_ORIENTED: 762 MatCheckPreallocated(A,1); 763 a->roworiented = flg; 764 PetscCall(MatSetOption(a->A,op,flg)); 765 break; 766 case MAT_FORCE_DIAGONAL_ENTRIES: 767 case MAT_KEEP_NONZERO_PATTERN: 768 case MAT_USE_HASH_TABLE: 769 case MAT_SORTED_FULL: 770 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 771 break; 772 case MAT_IGNORE_OFF_PROC_ENTRIES: 773 a->donotstash = flg; 774 break; 775 case MAT_SYMMETRIC: 776 case MAT_STRUCTURALLY_SYMMETRIC: 777 case MAT_HERMITIAN: 778 case MAT_SYMMETRY_ETERNAL: 779 case MAT_IGNORE_LOWER_TRIANGULAR: 780 case MAT_IGNORE_ZERO_ENTRIES: 781 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 782 break; 783 default: 784 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 790 { 791 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 792 const PetscScalar *l; 793 PetscScalar x,*v,*vv,*r; 794 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 795 796 PetscFunctionBegin; 797 PetscCall(MatDenseGetArray(mdn->A,&vv)); 798 PetscCall(MatDenseGetLDA(mdn->A,&lda)); 799 PetscCall(MatGetLocalSize(A,&s2,&s3)); 800 if (ll) { 801 PetscCall(VecGetLocalSize(ll,&s2a)); 802 PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 803 PetscCall(VecGetArrayRead(ll,&l)); 804 for (i=0; i<m; i++) { 805 x = l[i]; 806 v = vv + i; 807 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 808 } 809 PetscCall(VecRestoreArrayRead(ll,&l)); 810 PetscCall(PetscLogFlops(1.0*n*m)); 811 } 812 if (rr) { 813 const PetscScalar *ar; 814 815 PetscCall(VecGetLocalSize(rr,&s3a)); 816 PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 817 PetscCall(VecGetArrayRead(rr,&ar)); 818 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 819 PetscCall(VecGetArray(mdn->lvec,&r)); 820 PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 821 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 822 PetscCall(VecRestoreArrayRead(rr,&ar)); 823 for (i=0; i<n; i++) { 824 x = r[i]; 825 v = vv + i*lda; 826 for (j=0; j<m; j++) (*v++) *= x; 827 } 828 PetscCall(VecRestoreArray(mdn->lvec,&r)); 829 PetscCall(PetscLogFlops(1.0*n*m)); 830 } 831 PetscCall(MatDenseRestoreArray(mdn->A,&vv)); 832 PetscFunctionReturn(0); 833 } 834 835 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 836 { 837 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 838 PetscInt i,j; 839 PetscMPIInt size; 840 PetscReal sum = 0.0; 841 const PetscScalar *av,*v; 842 843 PetscFunctionBegin; 844 PetscCall(MatDenseGetArrayRead(mdn->A,&av)); 845 v = av; 846 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 847 if (size == 1) { 848 PetscCall(MatNorm(mdn->A,type,nrm)); 849 } else { 850 if (type == NORM_FROBENIUS) { 851 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 852 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 853 } 854 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 855 *nrm = PetscSqrtReal(*nrm); 856 PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n)); 857 } else if (type == NORM_1) { 858 PetscReal *tmp,*tmp2; 859 PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2)); 860 *nrm = 0.0; 861 v = av; 862 for (j=0; j<mdn->A->cmap->n; j++) { 863 for (i=0; i<mdn->A->rmap->n; i++) { 864 tmp[j] += PetscAbsScalar(*v); v++; 865 } 866 } 867 PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 868 for (j=0; j<A->cmap->N; j++) { 869 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 870 } 871 PetscCall(PetscFree2(tmp,tmp2)); 872 PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n)); 873 } else if (type == NORM_INFINITY) { /* max row norm */ 874 PetscReal ntemp; 875 PetscCall(MatNorm(mdn->A,type,&ntemp)); 876 PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 877 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 878 } 879 PetscCall(MatDenseRestoreArrayRead(mdn->A,&av)); 880 PetscFunctionReturn(0); 881 } 882 883 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 884 { 885 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 886 Mat B; 887 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 888 PetscInt j,i,lda; 889 PetscScalar *v; 890 891 PetscFunctionBegin; 892 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 893 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 894 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 895 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 896 PetscCall(MatMPIDenseSetPreallocation(B,NULL)); 897 } else B = *matout; 898 899 m = a->A->rmap->n; n = a->A->cmap->n; 900 PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v)); 901 PetscCall(MatDenseGetLDA(a->A,&lda)); 902 PetscCall(PetscMalloc1(m,&rwork)); 903 for (i=0; i<m; i++) rwork[i] = rstart + i; 904 for (j=0; j<n; j++) { 905 PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES)); 906 v += lda; 907 } 908 PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v)); 909 PetscCall(PetscFree(rwork)); 910 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 911 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 912 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 913 *matout = B; 914 } else { 915 PetscCall(MatHeaderMerge(A,&B)); 916 } 917 PetscFunctionReturn(0); 918 } 919 920 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 921 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 922 923 PetscErrorCode MatSetUp_MPIDense(Mat A) 924 { 925 PetscFunctionBegin; 926 PetscCall(PetscLayoutSetUp(A->rmap)); 927 PetscCall(PetscLayoutSetUp(A->cmap)); 928 if (!A->preallocated) { 929 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 930 } 931 PetscFunctionReturn(0); 932 } 933 934 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 935 { 936 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 937 938 PetscFunctionBegin; 939 PetscCall(MatAXPY(A->A,alpha,B->A,str)); 940 PetscFunctionReturn(0); 941 } 942 943 PetscErrorCode MatConjugate_MPIDense(Mat mat) 944 { 945 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 946 947 PetscFunctionBegin; 948 PetscCall(MatConjugate(a->A)); 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatRealPart_MPIDense(Mat A) 953 { 954 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 955 956 PetscFunctionBegin; 957 PetscCall(MatRealPart(a->A)); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 962 { 963 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 964 965 PetscFunctionBegin; 966 PetscCall(MatImaginaryPart(a->A)); 967 PetscFunctionReturn(0); 968 } 969 970 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 971 { 972 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 973 974 PetscFunctionBegin; 975 PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 976 PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 977 PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col)); 978 PetscFunctionReturn(0); 979 } 980 981 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*); 982 983 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions) 984 { 985 PetscInt i,m,n; 986 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 987 PetscReal *work; 988 989 PetscFunctionBegin; 990 PetscCall(MatGetSize(A,&m,&n)); 991 PetscCall(PetscMalloc1(n,&work)); 992 if (type == REDUCTION_MEAN_REALPART) { 993 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work)); 994 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 995 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work)); 996 } else { 997 PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work)); 998 } 999 if (type == NORM_2) { 1000 for (i=0; i<n; i++) work[i] *= work[i]; 1001 } 1002 if (type == NORM_INFINITY) { 1003 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm)); 1004 } else { 1005 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm)); 1006 } 1007 PetscCall(PetscFree(work)); 1008 if (type == NORM_2) { 1009 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1010 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1011 for (i=0; i<n; i++) reductions[i] /= m; 1012 } 1013 PetscFunctionReturn(0); 1014 } 1015 1016 #if defined(PETSC_HAVE_CUDA) 1017 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1018 { 1019 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1020 PetscInt lda; 1021 1022 PetscFunctionBegin; 1023 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1024 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1025 if (!a->cvec) { 1026 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1027 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1028 } 1029 a->vecinuse = col + 1; 1030 PetscCall(MatDenseGetLDA(a->A,&lda)); 1031 PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1032 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1033 *v = a->cvec; 1034 PetscFunctionReturn(0); 1035 } 1036 1037 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1038 { 1039 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1040 1041 PetscFunctionBegin; 1042 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1043 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1044 a->vecinuse = 0; 1045 PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1046 PetscCall(VecCUDAResetArray(a->cvec)); 1047 *v = NULL; 1048 PetscFunctionReturn(0); 1049 } 1050 1051 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1052 { 1053 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1054 PetscInt lda; 1055 1056 PetscFunctionBegin; 1057 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1058 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1059 if (!a->cvec) { 1060 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1061 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1062 } 1063 a->vecinuse = col + 1; 1064 PetscCall(MatDenseGetLDA(a->A,&lda)); 1065 PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse)); 1066 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1067 PetscCall(VecLockReadPush(a->cvec)); 1068 *v = a->cvec; 1069 PetscFunctionReturn(0); 1070 } 1071 1072 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1073 { 1074 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1075 1076 PetscFunctionBegin; 1077 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1078 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1079 a->vecinuse = 0; 1080 PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse)); 1081 PetscCall(VecLockReadPop(a->cvec)); 1082 PetscCall(VecCUDAResetArray(a->cvec)); 1083 *v = NULL; 1084 PetscFunctionReturn(0); 1085 } 1086 1087 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1088 { 1089 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1090 PetscInt lda; 1091 1092 PetscFunctionBegin; 1093 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1094 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1095 if (!a->cvec) { 1096 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1097 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1098 } 1099 a->vecinuse = col + 1; 1100 PetscCall(MatDenseGetLDA(a->A,&lda)); 1101 PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1102 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1103 *v = a->cvec; 1104 PetscFunctionReturn(0); 1105 } 1106 1107 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1108 { 1109 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1110 1111 PetscFunctionBegin; 1112 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1113 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1114 a->vecinuse = 0; 1115 PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1116 PetscCall(VecCUDAResetArray(a->cvec)); 1117 *v = NULL; 1118 PetscFunctionReturn(0); 1119 } 1120 1121 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1122 { 1123 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1124 1125 PetscFunctionBegin; 1126 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1127 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1128 PetscCall(MatDenseCUDAPlaceArray(l->A,a)); 1129 PetscFunctionReturn(0); 1130 } 1131 1132 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1133 { 1134 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1135 1136 PetscFunctionBegin; 1137 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1138 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1139 PetscCall(MatDenseCUDAResetArray(l->A)); 1140 PetscFunctionReturn(0); 1141 } 1142 1143 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1144 { 1145 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1146 1147 PetscFunctionBegin; 1148 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1149 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1150 PetscCall(MatDenseCUDAReplaceArray(l->A,a)); 1151 PetscFunctionReturn(0); 1152 } 1153 1154 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1155 { 1156 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1157 1158 PetscFunctionBegin; 1159 PetscCall(MatDenseCUDAGetArrayWrite(l->A,a)); 1160 PetscFunctionReturn(0); 1161 } 1162 1163 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1164 { 1165 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1166 1167 PetscFunctionBegin; 1168 PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a)); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1173 { 1174 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCall(MatDenseCUDAGetArrayRead(l->A,a)); 1178 PetscFunctionReturn(0); 1179 } 1180 1181 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1182 { 1183 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1184 1185 PetscFunctionBegin; 1186 PetscCall(MatDenseCUDARestoreArrayRead(l->A,a)); 1187 PetscFunctionReturn(0); 1188 } 1189 1190 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1191 { 1192 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1193 1194 PetscFunctionBegin; 1195 PetscCall(MatDenseCUDAGetArray(l->A,a)); 1196 PetscFunctionReturn(0); 1197 } 1198 1199 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1200 { 1201 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1202 1203 PetscFunctionBegin; 1204 PetscCall(MatDenseCUDARestoreArray(l->A,a)); 1205 PetscFunctionReturn(0); 1206 } 1207 1208 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1209 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1210 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1211 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1212 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1213 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1214 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1215 1216 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1217 { 1218 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1219 1220 PetscFunctionBegin; 1221 PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1222 PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1223 if (d->A) { 1224 PetscCall(MatBindToCPU(d->A,bind)); 1225 } 1226 mat->boundtocpu = bind; 1227 if (!bind) { 1228 PetscBool iscuda; 1229 1230 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda)); 1231 if (!iscuda) { 1232 PetscCall(VecDestroy(&d->cvec)); 1233 } 1234 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda)); 1235 if (!iscuda) { 1236 PetscCall(MatDestroy(&d->cmat)); 1237 } 1238 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA)); 1239 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA)); 1240 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA)); 1241 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1242 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1243 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1244 } else { 1245 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1246 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1247 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1248 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1249 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1250 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1251 } 1252 if (d->cmat) { 1253 PetscCall(MatBindToCPU(d->cmat,bind)); 1254 } 1255 PetscFunctionReturn(0); 1256 } 1257 1258 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1259 { 1260 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1261 PetscBool iscuda; 1262 1263 PetscFunctionBegin; 1264 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1265 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 1266 if (!iscuda) PetscFunctionReturn(0); 1267 PetscCall(PetscLayoutSetUp(A->rmap)); 1268 PetscCall(PetscLayoutSetUp(A->cmap)); 1269 if (!d->A) { 1270 PetscCall(MatCreate(PETSC_COMM_SELF,&d->A)); 1271 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A)); 1272 PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 1273 } 1274 PetscCall(MatSetType(d->A,MATSEQDENSECUDA)); 1275 PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data)); 1276 A->preallocated = PETSC_TRUE; 1277 A->assembled = PETSC_TRUE; 1278 PetscFunctionReturn(0); 1279 } 1280 #endif 1281 1282 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1283 { 1284 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1285 1286 PetscFunctionBegin; 1287 PetscCall(MatSetRandom(d->A,rctx)); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1292 { 1293 PetscFunctionBegin; 1294 *missing = PETSC_FALSE; 1295 PetscFunctionReturn(0); 1296 } 1297 1298 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1299 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1300 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1301 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1302 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1303 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1304 1305 /* -------------------------------------------------------------------*/ 1306 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1307 MatGetRow_MPIDense, 1308 MatRestoreRow_MPIDense, 1309 MatMult_MPIDense, 1310 /* 4*/ MatMultAdd_MPIDense, 1311 MatMultTranspose_MPIDense, 1312 MatMultTransposeAdd_MPIDense, 1313 NULL, 1314 NULL, 1315 NULL, 1316 /* 10*/ NULL, 1317 NULL, 1318 NULL, 1319 NULL, 1320 MatTranspose_MPIDense, 1321 /* 15*/ MatGetInfo_MPIDense, 1322 MatEqual_MPIDense, 1323 MatGetDiagonal_MPIDense, 1324 MatDiagonalScale_MPIDense, 1325 MatNorm_MPIDense, 1326 /* 20*/ MatAssemblyBegin_MPIDense, 1327 MatAssemblyEnd_MPIDense, 1328 MatSetOption_MPIDense, 1329 MatZeroEntries_MPIDense, 1330 /* 24*/ MatZeroRows_MPIDense, 1331 NULL, 1332 NULL, 1333 NULL, 1334 NULL, 1335 /* 29*/ MatSetUp_MPIDense, 1336 NULL, 1337 NULL, 1338 MatGetDiagonalBlock_MPIDense, 1339 NULL, 1340 /* 34*/ MatDuplicate_MPIDense, 1341 NULL, 1342 NULL, 1343 NULL, 1344 NULL, 1345 /* 39*/ MatAXPY_MPIDense, 1346 MatCreateSubMatrices_MPIDense, 1347 NULL, 1348 MatGetValues_MPIDense, 1349 MatCopy_MPIDense, 1350 /* 44*/ NULL, 1351 MatScale_MPIDense, 1352 MatShift_Basic, 1353 NULL, 1354 NULL, 1355 /* 49*/ MatSetRandom_MPIDense, 1356 NULL, 1357 NULL, 1358 NULL, 1359 NULL, 1360 /* 54*/ NULL, 1361 NULL, 1362 NULL, 1363 NULL, 1364 NULL, 1365 /* 59*/ MatCreateSubMatrix_MPIDense, 1366 MatDestroy_MPIDense, 1367 MatView_MPIDense, 1368 NULL, 1369 NULL, 1370 /* 64*/ NULL, 1371 NULL, 1372 NULL, 1373 NULL, 1374 NULL, 1375 /* 69*/ NULL, 1376 NULL, 1377 NULL, 1378 NULL, 1379 NULL, 1380 /* 74*/ NULL, 1381 NULL, 1382 NULL, 1383 NULL, 1384 NULL, 1385 /* 79*/ NULL, 1386 NULL, 1387 NULL, 1388 NULL, 1389 /* 83*/ MatLoad_MPIDense, 1390 NULL, 1391 NULL, 1392 NULL, 1393 NULL, 1394 NULL, 1395 /* 89*/ NULL, 1396 NULL, 1397 NULL, 1398 NULL, 1399 NULL, 1400 /* 94*/ NULL, 1401 NULL, 1402 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1403 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1404 NULL, 1405 /* 99*/ MatProductSetFromOptions_MPIDense, 1406 NULL, 1407 NULL, 1408 MatConjugate_MPIDense, 1409 NULL, 1410 /*104*/ NULL, 1411 MatRealPart_MPIDense, 1412 MatImaginaryPart_MPIDense, 1413 NULL, 1414 NULL, 1415 /*109*/ NULL, 1416 NULL, 1417 NULL, 1418 MatGetColumnVector_MPIDense, 1419 MatMissingDiagonal_MPIDense, 1420 /*114*/ NULL, 1421 NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 /*119*/ NULL, 1426 NULL, 1427 NULL, 1428 NULL, 1429 NULL, 1430 /*124*/ NULL, 1431 MatGetColumnReductions_MPIDense, 1432 NULL, 1433 NULL, 1434 NULL, 1435 /*129*/ NULL, 1436 NULL, 1437 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1438 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1439 NULL, 1440 /*134*/ NULL, 1441 NULL, 1442 NULL, 1443 NULL, 1444 NULL, 1445 /*139*/ NULL, 1446 NULL, 1447 NULL, 1448 NULL, 1449 NULL, 1450 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1451 /*145*/ NULL, 1452 NULL, 1453 NULL 1454 }; 1455 1456 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1457 { 1458 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1459 PetscBool iscuda; 1460 1461 PetscFunctionBegin; 1462 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1463 PetscCall(PetscLayoutSetUp(mat->rmap)); 1464 PetscCall(PetscLayoutSetUp(mat->cmap)); 1465 if (!a->A) { 1466 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 1467 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 1468 PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N)); 1469 } 1470 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda)); 1471 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1472 PetscCall(MatSeqDenseSetPreallocation(a->A,data)); 1473 mat->preallocated = PETSC_TRUE; 1474 mat->assembled = PETSC_TRUE; 1475 PetscFunctionReturn(0); 1476 } 1477 1478 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1479 { 1480 Mat B,C; 1481 1482 PetscFunctionBegin; 1483 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C)); 1484 PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B)); 1485 PetscCall(MatDestroy(&C)); 1486 if (reuse == MAT_REUSE_MATRIX) { 1487 C = *newmat; 1488 } else C = NULL; 1489 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1490 PetscCall(MatDestroy(&B)); 1491 if (reuse == MAT_INPLACE_MATRIX) { 1492 PetscCall(MatHeaderReplace(A,&C)); 1493 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1494 PetscFunctionReturn(0); 1495 } 1496 1497 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1498 { 1499 Mat B,C; 1500 1501 PetscFunctionBegin; 1502 PetscCall(MatDenseGetLocalMatrix(A,&C)); 1503 PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1504 if (reuse == MAT_REUSE_MATRIX) { 1505 C = *newmat; 1506 } else C = NULL; 1507 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1508 PetscCall(MatDestroy(&B)); 1509 if (reuse == MAT_INPLACE_MATRIX) { 1510 PetscCall(MatHeaderReplace(A,&C)); 1511 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1512 PetscFunctionReturn(0); 1513 } 1514 1515 #if defined(PETSC_HAVE_ELEMENTAL) 1516 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1517 { 1518 Mat mat_elemental; 1519 PetscScalar *v; 1520 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1521 1522 PetscFunctionBegin; 1523 if (reuse == MAT_REUSE_MATRIX) { 1524 mat_elemental = *newmat; 1525 PetscCall(MatZeroEntries(*newmat)); 1526 } else { 1527 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1528 PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 1529 PetscCall(MatSetType(mat_elemental,MATELEMENTAL)); 1530 PetscCall(MatSetUp(mat_elemental)); 1531 PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE)); 1532 } 1533 1534 PetscCall(PetscMalloc2(m,&rows,N,&cols)); 1535 for (i=0; i<N; i++) cols[i] = i; 1536 for (i=0; i<m; i++) rows[i] = rstart + i; 1537 1538 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1539 PetscCall(MatDenseGetArray(A,&v)); 1540 PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES)); 1541 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1542 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1543 PetscCall(MatDenseRestoreArray(A,&v)); 1544 PetscCall(PetscFree2(rows,cols)); 1545 1546 if (reuse == MAT_INPLACE_MATRIX) { 1547 PetscCall(MatHeaderReplace(A,&mat_elemental)); 1548 } else { 1549 *newmat = mat_elemental; 1550 } 1551 PetscFunctionReturn(0); 1552 } 1553 #endif 1554 1555 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1556 { 1557 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1558 1559 PetscFunctionBegin; 1560 PetscCall(MatDenseGetColumn(mat->A,col,vals)); 1561 PetscFunctionReturn(0); 1562 } 1563 1564 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1565 { 1566 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1567 1568 PetscFunctionBegin; 1569 PetscCall(MatDenseRestoreColumn(mat->A,vals)); 1570 PetscFunctionReturn(0); 1571 } 1572 1573 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1574 { 1575 Mat_MPIDense *mat; 1576 PetscInt m,nloc,N; 1577 1578 PetscFunctionBegin; 1579 PetscCall(MatGetSize(inmat,&m,&N)); 1580 PetscCall(MatGetLocalSize(inmat,NULL,&nloc)); 1581 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1582 PetscInt sum; 1583 1584 if (n == PETSC_DECIDE) { 1585 PetscCall(PetscSplitOwnership(comm,&n,&N)); 1586 } 1587 /* Check sum(n) = N */ 1588 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 1589 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 1590 1591 PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat)); 1592 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 1593 } 1594 1595 /* numeric phase */ 1596 mat = (Mat_MPIDense*)(*outmat)->data; 1597 PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN)); 1598 PetscFunctionReturn(0); 1599 } 1600 1601 #if defined(PETSC_HAVE_CUDA) 1602 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1603 { 1604 Mat B; 1605 Mat_MPIDense *m; 1606 1607 PetscFunctionBegin; 1608 if (reuse == MAT_INITIAL_MATRIX) { 1609 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1610 } else if (reuse == MAT_REUSE_MATRIX) { 1611 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1612 } 1613 1614 B = *newmat; 1615 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE)); 1616 PetscCall(PetscFree(B->defaultvectype)); 1617 PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype)); 1618 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE)); 1619 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL)); 1620 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 1621 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 1622 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 1623 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 1624 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL)); 1625 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL)); 1626 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL)); 1627 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL)); 1633 m = (Mat_MPIDense*)(B)->data; 1634 if (m->A) { 1635 PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A)); 1636 } 1637 B->ops->bindtocpu = NULL; 1638 B->offloadmask = PETSC_OFFLOAD_CPU; 1639 PetscFunctionReturn(0); 1640 } 1641 1642 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1643 { 1644 Mat B; 1645 Mat_MPIDense *m; 1646 1647 PetscFunctionBegin; 1648 if (reuse == MAT_INITIAL_MATRIX) { 1649 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1650 } else if (reuse == MAT_REUSE_MATRIX) { 1651 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1652 } 1653 1654 B = *newmat; 1655 PetscCall(PetscFree(B->defaultvectype)); 1656 PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype)); 1657 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA)); 1658 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1659 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1669 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1671 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1672 m = (Mat_MPIDense*)(B->data); 1673 if (m->A) { 1674 PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A)); 1675 B->offloadmask = PETSC_OFFLOAD_BOTH; 1676 } else { 1677 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1678 } 1679 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE)); 1680 1681 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1682 PetscFunctionReturn(0); 1683 } 1684 #endif 1685 1686 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1687 { 1688 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1689 PetscInt lda; 1690 1691 PetscFunctionBegin; 1692 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1693 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1694 if (!a->cvec) { 1695 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1696 } 1697 a->vecinuse = col + 1; 1698 PetscCall(MatDenseGetLDA(a->A,&lda)); 1699 PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1700 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1701 *v = a->cvec; 1702 PetscFunctionReturn(0); 1703 } 1704 1705 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1706 { 1707 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1708 1709 PetscFunctionBegin; 1710 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1711 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1712 a->vecinuse = 0; 1713 PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1714 PetscCall(VecResetArray(a->cvec)); 1715 *v = NULL; 1716 PetscFunctionReturn(0); 1717 } 1718 1719 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1720 { 1721 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1722 PetscInt lda; 1723 1724 PetscFunctionBegin; 1725 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1726 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1727 if (!a->cvec) { 1728 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1729 } 1730 a->vecinuse = col + 1; 1731 PetscCall(MatDenseGetLDA(a->A,&lda)); 1732 PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse)); 1733 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1734 PetscCall(VecLockReadPush(a->cvec)); 1735 *v = a->cvec; 1736 PetscFunctionReturn(0); 1737 } 1738 1739 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1740 { 1741 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1742 1743 PetscFunctionBegin; 1744 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1745 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1746 a->vecinuse = 0; 1747 PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse)); 1748 PetscCall(VecLockReadPop(a->cvec)); 1749 PetscCall(VecResetArray(a->cvec)); 1750 *v = NULL; 1751 PetscFunctionReturn(0); 1752 } 1753 1754 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1755 { 1756 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1757 PetscInt lda; 1758 1759 PetscFunctionBegin; 1760 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1761 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1762 if (!a->cvec) { 1763 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1764 } 1765 a->vecinuse = col + 1; 1766 PetscCall(MatDenseGetLDA(a->A,&lda)); 1767 PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1768 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1769 *v = a->cvec; 1770 PetscFunctionReturn(0); 1771 } 1772 1773 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1774 { 1775 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1776 1777 PetscFunctionBegin; 1778 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1779 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1780 a->vecinuse = 0; 1781 PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1782 PetscCall(VecResetArray(a->cvec)); 1783 *v = NULL; 1784 PetscFunctionReturn(0); 1785 } 1786 1787 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1788 { 1789 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1790 Mat_MPIDense *c; 1791 MPI_Comm comm; 1792 1793 PetscFunctionBegin; 1794 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1795 PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1796 PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1797 if (!a->cmat) { 1798 PetscCall(MatCreate(comm,&a->cmat)); 1799 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat)); 1800 PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name)); 1801 PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap)); 1802 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1803 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1804 } else if (cend-cbegin != a->cmat->cmap->N) { 1805 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1806 PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap)); 1807 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1808 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1809 } 1810 c = (Mat_MPIDense*)a->cmat->data; 1811 PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1812 PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A)); 1813 a->cmat->preallocated = PETSC_TRUE; 1814 a->cmat->assembled = PETSC_TRUE; 1815 a->matinuse = cbegin + 1; 1816 *v = a->cmat; 1817 PetscFunctionReturn(0); 1818 } 1819 1820 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1821 { 1822 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1823 Mat_MPIDense *c; 1824 1825 PetscFunctionBegin; 1826 PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1827 PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1828 PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1829 a->matinuse = 0; 1830 c = (Mat_MPIDense*)a->cmat->data; 1831 PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A)); 1832 *v = NULL; 1833 PetscFunctionReturn(0); 1834 } 1835 1836 /*MC 1837 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1838 1839 Options Database Keys: 1840 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1841 1842 Level: beginner 1843 1844 .seealso: `MatCreateDense()` 1845 1846 M*/ 1847 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1848 { 1849 Mat_MPIDense *a; 1850 1851 PetscFunctionBegin; 1852 PetscCall(PetscNewLog(mat,&a)); 1853 mat->data = (void*)a; 1854 PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps))); 1855 1856 mat->insertmode = NOT_SET_VALUES; 1857 1858 /* build cache for off array entries formed */ 1859 a->donotstash = PETSC_FALSE; 1860 1861 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash)); 1862 1863 /* stuff used for matrix vector multiply */ 1864 a->lvec = NULL; 1865 a->Mvctx = NULL; 1866 a->roworiented = PETSC_TRUE; 1867 1868 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense)); 1869 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense)); 1870 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense)); 1871 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense)); 1872 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense)); 1873 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense)); 1874 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense)); 1875 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense)); 1876 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense)); 1877 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense)); 1878 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense)); 1879 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1880 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1881 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1882 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1883 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1884 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1885 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense)); 1886 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense)); 1887 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 1888 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ)); 1889 #if defined(PETSC_HAVE_ELEMENTAL) 1890 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental)); 1891 #endif 1892 #if defined(PETSC_HAVE_SCALAPACK) 1893 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK)); 1894 #endif 1895 #if defined(PETSC_HAVE_CUDA) 1896 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA)); 1897 #endif 1898 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense)); 1899 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1900 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1901 #if defined(PETSC_HAVE_CUDA) 1902 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1903 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1904 #endif 1905 1906 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense)); 1907 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense)); 1908 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE)); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 /*MC 1913 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1914 1915 Options Database Keys: 1916 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1917 1918 Level: beginner 1919 1920 .seealso: 1921 1922 M*/ 1923 #if defined(PETSC_HAVE_CUDA) 1924 #include <petsc/private/deviceimpl.h> 1925 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1926 { 1927 PetscFunctionBegin; 1928 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1929 PetscCall(MatCreate_MPIDense(B)); 1930 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B)); 1931 PetscFunctionReturn(0); 1932 } 1933 #endif 1934 1935 /*MC 1936 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1937 1938 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1939 and MATMPIDENSE otherwise. 1940 1941 Options Database Keys: 1942 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1943 1944 Level: beginner 1945 1946 .seealso: `MATSEQDENSE`, `MATMPIDENSE`, `MATDENSECUDA` 1947 M*/ 1948 1949 /*MC 1950 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1951 1952 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1953 and MATMPIDENSECUDA otherwise. 1954 1955 Options Database Keys: 1956 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1957 1958 Level: beginner 1959 1960 .seealso: `MATSEQDENSECUDA`, `MATMPIDENSECUDA`, `MATDENSE` 1961 M*/ 1962 1963 /*@C 1964 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1965 1966 Collective 1967 1968 Input Parameters: 1969 . B - the matrix 1970 - data - optional location of matrix data. Set data=NULL for PETSc 1971 to control all matrix memory allocation. 1972 1973 Notes: 1974 The dense format is fully compatible with standard Fortran 77 1975 storage by columns. 1976 1977 The data input variable is intended primarily for Fortran programmers 1978 who wish to allocate their own matrix memory space. Most users should 1979 set data=NULL. 1980 1981 Level: intermediate 1982 1983 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 1984 @*/ 1985 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1986 { 1987 PetscFunctionBegin; 1988 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1989 PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data)); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 /*@ 1994 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1995 array provided by the user. This is useful to avoid copying an array 1996 into a matrix 1997 1998 Not Collective 1999 2000 Input Parameters: 2001 + mat - the matrix 2002 - array - the array in column major order 2003 2004 Notes: 2005 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2006 freed when the matrix is destroyed. 2007 2008 Level: developer 2009 2010 .seealso: `MatDenseGetArray()`, `MatDenseResetArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2011 2012 @*/ 2013 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2014 { 2015 PetscFunctionBegin; 2016 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2017 PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2018 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2019 #if defined(PETSC_HAVE_CUDA) 2020 mat->offloadmask = PETSC_OFFLOAD_CPU; 2021 #endif 2022 PetscFunctionReturn(0); 2023 } 2024 2025 /*@ 2026 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2027 2028 Not Collective 2029 2030 Input Parameters: 2031 . mat - the matrix 2032 2033 Notes: 2034 You can only call this after a call to MatDensePlaceArray() 2035 2036 Level: developer 2037 2038 .seealso: `MatDenseGetArray()`, `MatDensePlaceArray()`, `VecPlaceArray()`, `VecGetArray()`, `VecRestoreArray()`, `VecReplaceArray()`, `VecResetArray()` 2039 2040 @*/ 2041 PetscErrorCode MatDenseResetArray(Mat mat) 2042 { 2043 PetscFunctionBegin; 2044 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2045 PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat)); 2046 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2047 PetscFunctionReturn(0); 2048 } 2049 2050 /*@ 2051 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2052 array provided by the user. This is useful to avoid copying an array 2053 into a matrix 2054 2055 Not Collective 2056 2057 Input Parameters: 2058 + mat - the matrix 2059 - array - the array in column major order 2060 2061 Notes: 2062 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2063 freed by the user. It will be freed when the matrix is destroyed. 2064 2065 Level: developer 2066 2067 .seealso: `MatDenseGetArray()`, `VecReplaceArray()` 2068 @*/ 2069 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2070 { 2071 PetscFunctionBegin; 2072 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2073 PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2074 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2075 #if defined(PETSC_HAVE_CUDA) 2076 mat->offloadmask = PETSC_OFFLOAD_CPU; 2077 #endif 2078 PetscFunctionReturn(0); 2079 } 2080 2081 #if defined(PETSC_HAVE_CUDA) 2082 /*@C 2083 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2084 array provided by the user. This is useful to avoid copying an array 2085 into a matrix 2086 2087 Not Collective 2088 2089 Input Parameters: 2090 + mat - the matrix 2091 - array - the array in column major order 2092 2093 Notes: 2094 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2095 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2096 2097 Level: developer 2098 2099 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAResetArray()` 2100 @*/ 2101 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2102 { 2103 PetscFunctionBegin; 2104 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2105 PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2106 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2107 mat->offloadmask = PETSC_OFFLOAD_GPU; 2108 PetscFunctionReturn(0); 2109 } 2110 2111 /*@C 2112 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2113 2114 Not Collective 2115 2116 Input Parameters: 2117 . mat - the matrix 2118 2119 Notes: 2120 You can only call this after a call to MatDenseCUDAPlaceArray() 2121 2122 Level: developer 2123 2124 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()` 2125 2126 @*/ 2127 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2128 { 2129 PetscFunctionBegin; 2130 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2131 PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat)); 2132 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2133 PetscFunctionReturn(0); 2134 } 2135 2136 /*@C 2137 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2138 array provided by the user. This is useful to avoid copying an array 2139 into a matrix 2140 2141 Not Collective 2142 2143 Input Parameters: 2144 + mat - the matrix 2145 - array - the array in column major order 2146 2147 Notes: 2148 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2149 The memory passed in CANNOT be freed by the user. It will be freed 2150 when the matrix is destroyed. The array should respect the matrix leading dimension. 2151 2152 Level: developer 2153 2154 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDAPlaceArray()`, `MatDenseCUDAResetArray()` 2155 @*/ 2156 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2157 { 2158 PetscFunctionBegin; 2159 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2160 PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2161 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2162 mat->offloadmask = PETSC_OFFLOAD_GPU; 2163 PetscFunctionReturn(0); 2164 } 2165 2166 /*@C 2167 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2168 2169 Not Collective 2170 2171 Input Parameters: 2172 . A - the matrix 2173 2174 Output Parameters 2175 . array - the GPU array in column major order 2176 2177 Notes: 2178 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2179 2180 Level: developer 2181 2182 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArrayRead()` 2183 @*/ 2184 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2185 { 2186 PetscFunctionBegin; 2187 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2188 PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2189 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2190 PetscFunctionReturn(0); 2191 } 2192 2193 /*@C 2194 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2195 2196 Not Collective 2197 2198 Input Parameters: 2199 + A - the matrix 2200 - array - the GPU array in column major order 2201 2202 Notes: 2203 2204 Level: developer 2205 2206 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2207 @*/ 2208 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2209 { 2210 PetscFunctionBegin; 2211 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2212 PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2213 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2214 A->offloadmask = PETSC_OFFLOAD_GPU; 2215 PetscFunctionReturn(0); 2216 } 2217 2218 /*@C 2219 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2220 2221 Not Collective 2222 2223 Input Parameters: 2224 . A - the matrix 2225 2226 Output Parameters 2227 . array - the GPU array in column major order 2228 2229 Notes: 2230 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2231 2232 Level: developer 2233 2234 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2235 @*/ 2236 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2237 { 2238 PetscFunctionBegin; 2239 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2240 PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2241 PetscFunctionReturn(0); 2242 } 2243 2244 /*@C 2245 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2246 2247 Not Collective 2248 2249 Input Parameters: 2250 + A - the matrix 2251 - array - the GPU array in column major order 2252 2253 Notes: 2254 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2255 2256 Level: developer 2257 2258 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDAGetArrayRead()` 2259 @*/ 2260 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2261 { 2262 PetscFunctionBegin; 2263 PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2264 PetscFunctionReturn(0); 2265 } 2266 2267 /*@C 2268 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2269 2270 Not Collective 2271 2272 Input Parameters: 2273 . A - the matrix 2274 2275 Output Parameters 2276 . array - the GPU array in column major order 2277 2278 Notes: 2279 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2280 2281 Level: developer 2282 2283 .seealso: `MatDenseCUDAGetArrayRead()`, `MatDenseCUDARestoreArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()` 2284 @*/ 2285 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2286 { 2287 PetscFunctionBegin; 2288 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2289 PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a)); 2290 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 /*@C 2295 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2296 2297 Not Collective 2298 2299 Input Parameters: 2300 + A - the matrix 2301 - array - the GPU array in column major order 2302 2303 Notes: 2304 2305 Level: developer 2306 2307 .seealso: `MatDenseCUDAGetArray()`, `MatDenseCUDARestoreArrayWrite()`, `MatDenseCUDAGetArrayWrite()`, `MatDenseCUDARestoreArrayRead()`, `MatDenseCUDAGetArrayRead()` 2308 @*/ 2309 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2310 { 2311 PetscFunctionBegin; 2312 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2313 PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a)); 2314 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2315 A->offloadmask = PETSC_OFFLOAD_GPU; 2316 PetscFunctionReturn(0); 2317 } 2318 #endif 2319 2320 /*@C 2321 MatCreateDense - Creates a matrix in dense format. 2322 2323 Collective 2324 2325 Input Parameters: 2326 + comm - MPI communicator 2327 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2328 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2329 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2330 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2331 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2332 to control all matrix memory allocation. 2333 2334 Output Parameter: 2335 . A - the matrix 2336 2337 Notes: 2338 The dense format is fully compatible with standard Fortran 77 2339 storage by columns. 2340 Note that, although local portions of the matrix are stored in column-major 2341 order, the matrix is partitioned across MPI ranks by row. 2342 2343 The data input variable is intended primarily for Fortran programmers 2344 who wish to allocate their own matrix memory space. Most users should 2345 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2346 2347 The user MUST specify either the local or global matrix dimensions 2348 (possibly both). 2349 2350 Level: intermediate 2351 2352 .seealso: `MatCreate()`, `MatCreateSeqDense()`, `MatSetValues()` 2353 @*/ 2354 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2355 { 2356 PetscFunctionBegin; 2357 PetscCall(MatCreate(comm,A)); 2358 PetscCall(MatSetSizes(*A,m,n,M,N)); 2359 PetscCall(MatSetType(*A,MATDENSE)); 2360 PetscCall(MatSeqDenseSetPreallocation(*A,data)); 2361 PetscCall(MatMPIDenseSetPreallocation(*A,data)); 2362 PetscFunctionReturn(0); 2363 } 2364 2365 #if defined(PETSC_HAVE_CUDA) 2366 /*@C 2367 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2368 2369 Collective 2370 2371 Input Parameters: 2372 + comm - MPI communicator 2373 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2374 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2375 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2376 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2377 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2378 to control matrix memory allocation. 2379 2380 Output Parameter: 2381 . A - the matrix 2382 2383 Notes: 2384 2385 Level: intermediate 2386 2387 .seealso: `MatCreate()`, `MatCreateDense()` 2388 @*/ 2389 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2390 { 2391 PetscFunctionBegin; 2392 PetscCall(MatCreate(comm,A)); 2393 PetscValidLogicalCollectiveBool(*A,!!data,6); 2394 PetscCall(MatSetSizes(*A,m,n,M,N)); 2395 PetscCall(MatSetType(*A,MATDENSECUDA)); 2396 PetscCall(MatSeqDenseCUDASetPreallocation(*A,data)); 2397 PetscCall(MatMPIDenseCUDASetPreallocation(*A,data)); 2398 PetscFunctionReturn(0); 2399 } 2400 #endif 2401 2402 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2403 { 2404 Mat mat; 2405 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2406 2407 PetscFunctionBegin; 2408 *newmat = NULL; 2409 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat)); 2410 PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2411 PetscCall(MatSetType(mat,((PetscObject)A)->type_name)); 2412 a = (Mat_MPIDense*)mat->data; 2413 2414 mat->factortype = A->factortype; 2415 mat->assembled = PETSC_TRUE; 2416 mat->preallocated = PETSC_TRUE; 2417 2418 mat->insertmode = NOT_SET_VALUES; 2419 a->donotstash = oldmat->donotstash; 2420 2421 PetscCall(PetscLayoutReference(A->rmap,&mat->rmap)); 2422 PetscCall(PetscLayoutReference(A->cmap,&mat->cmap)); 2423 2424 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2425 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2426 2427 *newmat = mat; 2428 PetscFunctionReturn(0); 2429 } 2430 2431 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2432 { 2433 PetscBool isbinary; 2434 #if defined(PETSC_HAVE_HDF5) 2435 PetscBool ishdf5; 2436 #endif 2437 2438 PetscFunctionBegin; 2439 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2440 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2441 /* force binary viewer to load .info file if it has not yet done so */ 2442 PetscCall(PetscViewerSetUp(viewer)); 2443 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2444 #if defined(PETSC_HAVE_HDF5) 2445 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2446 #endif 2447 if (isbinary) { 2448 PetscCall(MatLoad_Dense_Binary(newMat,viewer)); 2449 #if defined(PETSC_HAVE_HDF5) 2450 } else if (ishdf5) { 2451 PetscCall(MatLoad_Dense_HDF5(newMat,viewer)); 2452 #endif 2453 } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2454 PetscFunctionReturn(0); 2455 } 2456 2457 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2458 { 2459 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2460 Mat a,b; 2461 2462 PetscFunctionBegin; 2463 a = matA->A; 2464 b = matB->A; 2465 PetscCall(MatEqual(a,b,flag)); 2466 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2471 { 2472 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2473 2474 PetscFunctionBegin; 2475 PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts)); 2476 PetscCall(MatDestroy(&atb->atb)); 2477 PetscCall(PetscFree(atb)); 2478 PetscFunctionReturn(0); 2479 } 2480 2481 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2482 { 2483 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2484 2485 PetscFunctionBegin; 2486 PetscCall(PetscFree2(abt->buf[0],abt->buf[1])); 2487 PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls)); 2488 PetscCall(PetscFree(abt)); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2493 { 2494 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2495 Mat_TransMatMultDense *atb; 2496 MPI_Comm comm; 2497 PetscMPIInt size,*recvcounts; 2498 PetscScalar *carray,*sendbuf; 2499 const PetscScalar *atbarray; 2500 PetscInt i,cN=C->cmap->N,proc,k,j,lda; 2501 const PetscInt *ranges; 2502 2503 PetscFunctionBegin; 2504 MatCheckProduct(C,3); 2505 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2506 atb = (Mat_TransMatMultDense *)C->product->data; 2507 recvcounts = atb->recvcounts; 2508 sendbuf = atb->sendbuf; 2509 2510 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2511 PetscCallMPI(MPI_Comm_size(comm,&size)); 2512 2513 /* compute atbarray = aseq^T * bseq */ 2514 PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb)); 2515 2516 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2517 2518 /* arrange atbarray into sendbuf */ 2519 PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray)); 2520 PetscCall(MatDenseGetLDA(atb->atb,&lda)); 2521 for (proc=0, k=0; proc<size; proc++) { 2522 for (j=0; j<cN; j++) { 2523 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda]; 2524 } 2525 } 2526 PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray)); 2527 2528 /* sum all atbarray to local values of C */ 2529 PetscCall(MatDenseGetArrayWrite(c->A,&carray)); 2530 PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm)); 2531 PetscCall(MatDenseRestoreArrayWrite(c->A,&carray)); 2532 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2533 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2534 PetscFunctionReturn(0); 2535 } 2536 2537 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2538 { 2539 MPI_Comm comm; 2540 PetscMPIInt size; 2541 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2542 Mat_TransMatMultDense *atb; 2543 PetscBool cisdense; 2544 PetscInt i; 2545 const PetscInt *ranges; 2546 2547 PetscFunctionBegin; 2548 MatCheckProduct(C,3); 2549 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2550 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2551 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2552 SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2553 } 2554 2555 /* create matrix product C */ 2556 PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N)); 2557 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 2558 if (!cisdense) { 2559 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 2560 } 2561 PetscCall(MatSetUp(C)); 2562 2563 /* create data structure for reuse C */ 2564 PetscCallMPI(MPI_Comm_size(comm,&size)); 2565 PetscCall(PetscNew(&atb)); 2566 cM = C->rmap->N; 2567 PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts)); 2568 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2569 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2570 2571 C->product->data = atb; 2572 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2573 PetscFunctionReturn(0); 2574 } 2575 2576 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2577 { 2578 MPI_Comm comm; 2579 PetscMPIInt i, size; 2580 PetscInt maxRows, bufsiz; 2581 PetscMPIInt tag; 2582 PetscInt alg; 2583 Mat_MatTransMultDense *abt; 2584 Mat_Product *product = C->product; 2585 PetscBool flg; 2586 2587 PetscFunctionBegin; 2588 MatCheckProduct(C,4); 2589 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2590 /* check local size of A and B */ 2591 PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n); 2592 2593 PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg)); 2594 alg = flg ? 0 : 1; 2595 2596 /* setup matrix product C */ 2597 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N)); 2598 PetscCall(MatSetType(C,MATMPIDENSE)); 2599 PetscCall(MatSetUp(C)); 2600 PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag)); 2601 2602 /* create data structure for reuse C */ 2603 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2604 PetscCallMPI(MPI_Comm_size(comm,&size)); 2605 PetscCall(PetscNew(&abt)); 2606 abt->tag = tag; 2607 abt->alg = alg; 2608 switch (alg) { 2609 case 1: /* alg: "cyclic" */ 2610 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2611 bufsiz = A->cmap->N * maxRows; 2612 PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]))); 2613 break; 2614 default: /* alg: "allgatherv" */ 2615 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2616 PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls))); 2617 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2618 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2619 break; 2620 } 2621 2622 C->product->data = abt; 2623 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2624 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2625 PetscFunctionReturn(0); 2626 } 2627 2628 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2629 { 2630 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2631 Mat_MatTransMultDense *abt; 2632 MPI_Comm comm; 2633 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2634 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2635 PetscInt i,cK=A->cmap->N,k,j,bn; 2636 PetscScalar _DOne=1.0,_DZero=0.0; 2637 const PetscScalar *av,*bv; 2638 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2639 MPI_Request reqs[2]; 2640 const PetscInt *ranges; 2641 2642 PetscFunctionBegin; 2643 MatCheckProduct(C,3); 2644 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2645 abt = (Mat_MatTransMultDense*)C->product->data; 2646 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2647 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2648 PetscCallMPI(MPI_Comm_size(comm,&size)); 2649 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2650 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2651 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2652 PetscCall(MatDenseGetLDA(a->A,&i)); 2653 PetscCall(PetscBLASIntCast(i,&alda)); 2654 PetscCall(MatDenseGetLDA(b->A,&i)); 2655 PetscCall(PetscBLASIntCast(i,&blda)); 2656 PetscCall(MatDenseGetLDA(c->A,&i)); 2657 PetscCall(PetscBLASIntCast(i,&clda)); 2658 PetscCall(MatGetOwnershipRanges(B,&ranges)); 2659 bn = B->rmap->n; 2660 if (blda == bn) { 2661 sendbuf = (PetscScalar*)bv; 2662 } else { 2663 sendbuf = abt->buf[0]; 2664 for (k = 0, i = 0; i < cK; i++) { 2665 for (j = 0; j < bn; j++, k++) { 2666 sendbuf[k] = bv[i * blda + j]; 2667 } 2668 } 2669 } 2670 if (size > 1) { 2671 sendto = (rank + size - 1) % size; 2672 recvfrom = (rank + size + 1) % size; 2673 } else { 2674 sendto = recvfrom = 0; 2675 } 2676 PetscCall(PetscBLASIntCast(cK,&ck)); 2677 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2678 recvisfrom = rank; 2679 for (i = 0; i < size; i++) { 2680 /* we have finished receiving in sending, bufs can be read/modified */ 2681 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2682 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2683 2684 if (nextrecvisfrom != rank) { 2685 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2686 sendsiz = cK * bn; 2687 recvsiz = cK * nextbn; 2688 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2689 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2690 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2691 } 2692 2693 /* local aseq * sendbuf^T */ 2694 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2695 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2696 2697 if (nextrecvisfrom != rank) { 2698 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2699 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2700 } 2701 bn = nextbn; 2702 recvisfrom = nextrecvisfrom; 2703 sendbuf = recvbuf; 2704 } 2705 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2706 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2707 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2708 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2709 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2710 PetscFunctionReturn(0); 2711 } 2712 2713 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2714 { 2715 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2716 Mat_MatTransMultDense *abt; 2717 MPI_Comm comm; 2718 PetscMPIInt size; 2719 PetscScalar *cv, *sendbuf, *recvbuf; 2720 const PetscScalar *av,*bv; 2721 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2722 PetscScalar _DOne=1.0,_DZero=0.0; 2723 PetscBLASInt cm, cn, ck, alda, clda; 2724 2725 PetscFunctionBegin; 2726 MatCheckProduct(C,3); 2727 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2728 abt = (Mat_MatTransMultDense*)C->product->data; 2729 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2730 PetscCallMPI(MPI_Comm_size(comm,&size)); 2731 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2732 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2733 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2734 PetscCall(MatDenseGetLDA(a->A,&i)); 2735 PetscCall(PetscBLASIntCast(i,&alda)); 2736 PetscCall(MatDenseGetLDA(b->A,&blda)); 2737 PetscCall(MatDenseGetLDA(c->A,&i)); 2738 PetscCall(PetscBLASIntCast(i,&clda)); 2739 /* copy transpose of B into buf[0] */ 2740 bn = B->rmap->n; 2741 sendbuf = abt->buf[0]; 2742 recvbuf = abt->buf[1]; 2743 for (k = 0, j = 0; j < bn; j++) { 2744 for (i = 0; i < cK; i++, k++) { 2745 sendbuf[k] = bv[i * blda + j]; 2746 } 2747 } 2748 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2749 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2750 PetscCall(PetscBLASIntCast(cK,&ck)); 2751 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2752 PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn)); 2753 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2754 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2755 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2756 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2757 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2758 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2759 PetscFunctionReturn(0); 2760 } 2761 2762 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2763 { 2764 Mat_MatTransMultDense *abt; 2765 2766 PetscFunctionBegin; 2767 MatCheckProduct(C,3); 2768 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2769 abt = (Mat_MatTransMultDense*)C->product->data; 2770 switch (abt->alg) { 2771 case 1: 2772 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2773 break; 2774 default: 2775 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2776 break; 2777 } 2778 PetscFunctionReturn(0); 2779 } 2780 2781 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2782 { 2783 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2784 2785 PetscFunctionBegin; 2786 PetscCall(MatDestroy(&ab->Ce)); 2787 PetscCall(MatDestroy(&ab->Ae)); 2788 PetscCall(MatDestroy(&ab->Be)); 2789 PetscCall(PetscFree(ab)); 2790 PetscFunctionReturn(0); 2791 } 2792 2793 #if defined(PETSC_HAVE_ELEMENTAL) 2794 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2795 { 2796 Mat_MatMultDense *ab; 2797 2798 PetscFunctionBegin; 2799 MatCheckProduct(C,3); 2800 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2801 ab = (Mat_MatMultDense*)C->product->data; 2802 PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae)); 2803 PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be)); 2804 PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce)); 2805 PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C)); 2806 PetscFunctionReturn(0); 2807 } 2808 2809 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2810 { 2811 Mat Ae,Be,Ce; 2812 Mat_MatMultDense *ab; 2813 2814 PetscFunctionBegin; 2815 MatCheckProduct(C,4); 2816 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2817 /* check local size of A and B */ 2818 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2819 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2820 } 2821 2822 /* create elemental matrices Ae and Be */ 2823 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2824 PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 2825 PetscCall(MatSetType(Ae,MATELEMENTAL)); 2826 PetscCall(MatSetUp(Ae)); 2827 PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE)); 2828 2829 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2830 PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N)); 2831 PetscCall(MatSetType(Be,MATELEMENTAL)); 2832 PetscCall(MatSetUp(Be)); 2833 PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE)); 2834 2835 /* compute symbolic Ce = Ae*Be */ 2836 PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce)); 2837 PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce)); 2838 2839 /* setup C */ 2840 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 2841 PetscCall(MatSetType(C,MATDENSE)); 2842 PetscCall(MatSetUp(C)); 2843 2844 /* create data structure for reuse Cdense */ 2845 PetscCall(PetscNew(&ab)); 2846 ab->Ae = Ae; 2847 ab->Be = Be; 2848 ab->Ce = Ce; 2849 2850 C->product->data = ab; 2851 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2852 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2853 PetscFunctionReturn(0); 2854 } 2855 #endif 2856 /* ----------------------------------------------- */ 2857 #if defined(PETSC_HAVE_ELEMENTAL) 2858 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2859 { 2860 PetscFunctionBegin; 2861 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2862 C->ops->productsymbolic = MatProductSymbolic_AB; 2863 PetscFunctionReturn(0); 2864 } 2865 #endif 2866 2867 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2868 { 2869 Mat_Product *product = C->product; 2870 Mat A = product->A,B=product->B; 2871 2872 PetscFunctionBegin; 2873 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2874 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2875 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2876 C->ops->productsymbolic = MatProductSymbolic_AtB; 2877 PetscFunctionReturn(0); 2878 } 2879 2880 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2881 { 2882 Mat_Product *product = C->product; 2883 const char *algTypes[2] = {"allgatherv","cyclic"}; 2884 PetscInt alg,nalg = 2; 2885 PetscBool flg = PETSC_FALSE; 2886 2887 PetscFunctionBegin; 2888 /* Set default algorithm */ 2889 alg = 0; /* default is allgatherv */ 2890 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2891 if (flg) { 2892 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2893 } 2894 2895 /* Get runtime option */ 2896 if (product->api_user) { 2897 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2898 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2899 PetscOptionsEnd(); 2900 } else { 2901 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2902 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2903 PetscOptionsEnd(); 2904 } 2905 if (flg) { 2906 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2907 } 2908 2909 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2910 C->ops->productsymbolic = MatProductSymbolic_ABt; 2911 PetscFunctionReturn(0); 2912 } 2913 2914 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2915 { 2916 Mat_Product *product = C->product; 2917 2918 PetscFunctionBegin; 2919 switch (product->type) { 2920 #if defined(PETSC_HAVE_ELEMENTAL) 2921 case MATPRODUCT_AB: 2922 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2923 break; 2924 #endif 2925 case MATPRODUCT_AtB: 2926 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2927 break; 2928 case MATPRODUCT_ABt: 2929 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2930 break; 2931 default: 2932 break; 2933 } 2934 PetscFunctionReturn(0); 2935 } 2936