1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 31 PetscValidPointer(B,2); 32 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg)); 33 if (flg) *B = mat->A; 34 else { 35 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg)); 36 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 37 *B = A; 38 } 39 PetscFunctionReturn(0); 40 } 41 42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 43 { 44 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 45 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 46 47 PetscFunctionBegin; 48 PetscCall(MatCopy(Amat->A,Bmat->A,s)); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 56 57 PetscFunctionBegin; 58 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 59 lrow = row - rstart; 60 PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 61 PetscFunctionReturn(0); 62 } 63 64 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 65 { 66 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 67 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 68 69 PetscFunctionBegin; 70 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 71 lrow = row - rstart; 72 PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 80 PetscScalar *array; 81 MPI_Comm comm; 82 PetscBool flg; 83 Mat B; 84 85 PetscFunctionBegin; 86 PetscCall(MatHasCongruentLayouts(A,&flg)); 87 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 88 PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); 89 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 90 91 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg)); 92 PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 93 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm)); 94 PetscCall(MatCreate(comm,&B)); 95 PetscCall(MatSetSizes(B,m,m,m,m)); 96 PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name)); 97 PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array)); 98 PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart)); 99 PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array)); 100 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 101 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 102 PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B)); 103 *a = B; 104 PetscCall(MatDestroy(&B)); 105 } else *a = B; 106 PetscFunctionReturn(0); 107 } 108 109 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 110 { 111 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 112 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 113 PetscBool roworiented = A->roworiented; 114 115 PetscFunctionBegin; 116 for (i=0; i<m; i++) { 117 if (idxm[i] < 0) continue; 118 PetscCheckFalse(idxm[i] >= mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 119 if (idxm[i] >= rstart && idxm[i] < rend) { 120 row = idxm[i] - rstart; 121 if (roworiented) { 122 PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv)); 123 } else { 124 for (j=0; j<n; j++) { 125 if (idxn[j] < 0) continue; 126 PetscCheckFalse(idxn[j] >= mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 127 PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv)); 128 } 129 } 130 } else if (!A->donotstash) { 131 mat->assembled = PETSC_FALSE; 132 if (roworiented) { 133 PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE)); 134 } else { 135 PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE)); 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 143 { 144 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 145 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 146 147 PetscFunctionBegin; 148 for (i=0; i<m; i++) { 149 if (idxm[i] < 0) continue; /* negative row */ 150 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 151 if (idxm[i] >= rstart && idxm[i] < rend) { 152 row = idxm[i] - rstart; 153 for (j=0; j<n; j++) { 154 if (idxn[j] < 0) continue; /* negative column */ 155 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 156 PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j)); 157 } 158 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 159 } 160 PetscFunctionReturn(0); 161 } 162 163 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 164 { 165 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 166 167 PetscFunctionBegin; 168 PetscCall(MatDenseGetLDA(a->A,lda)); 169 PetscFunctionReturn(0); 170 } 171 172 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscBool iscuda; 176 177 PetscFunctionBegin; 178 if (!a->A) { 179 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 180 PetscCall(PetscLayoutSetUp(A->rmap)); 181 PetscCall(PetscLayoutSetUp(A->cmap)); 182 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 183 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A)); 184 PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 185 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 186 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 187 } 188 PetscCall(MatDenseSetLDA(a->A,lda)); 189 PetscFunctionReturn(0); 190 } 191 192 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 193 { 194 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 195 196 PetscFunctionBegin; 197 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 198 PetscCall(MatDenseGetArray(a->A,array)); 199 PetscFunctionReturn(0); 200 } 201 202 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 203 { 204 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 205 206 PetscFunctionBegin; 207 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 PetscCall(MatDenseGetArrayRead(a->A,array)); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 216 PetscFunctionBegin; 217 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 218 PetscCall(MatDenseGetArrayWrite(a->A,array)); 219 PetscFunctionReturn(0); 220 } 221 222 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 223 { 224 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 225 226 PetscFunctionBegin; 227 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 228 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 229 PetscCall(MatDensePlaceArray(a->A,array)); 230 PetscFunctionReturn(0); 231 } 232 233 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 234 { 235 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 236 237 PetscFunctionBegin; 238 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 239 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 240 PetscCall(MatDenseResetArray(a->A)); 241 PetscFunctionReturn(0); 242 } 243 244 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 245 { 246 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 247 248 PetscFunctionBegin; 249 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 250 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 251 PetscCall(MatDenseReplaceArray(a->A,array)); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 256 { 257 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 258 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 259 const PetscInt *irow,*icol; 260 const PetscScalar *v; 261 PetscScalar *bv; 262 Mat newmat; 263 IS iscol_local; 264 MPI_Comm comm_is,comm_mat; 265 266 PetscFunctionBegin; 267 PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat)); 268 PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is)); 269 PetscCheckFalse(comm_mat != comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 270 271 PetscCall(ISAllGather(iscol,&iscol_local)); 272 PetscCall(ISGetIndices(isrow,&irow)); 273 PetscCall(ISGetIndices(iscol_local,&icol)); 274 PetscCall(ISGetLocalSize(isrow,&nrows)); 275 PetscCall(ISGetLocalSize(iscol,&ncols)); 276 PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */ 277 278 /* No parallel redistribution currently supported! Should really check each index set 279 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 280 original matrix! */ 281 282 PetscCall(MatGetLocalSize(A,&nlrows,&nlcols)); 283 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 284 285 /* Check submatrix call */ 286 if (scall == MAT_REUSE_MATRIX) { 287 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 288 /* Really need to test rows and column sizes! */ 289 newmat = *B; 290 } else { 291 /* Create and fill new matrix */ 292 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat)); 293 PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols)); 294 PetscCall(MatSetType(newmat,((PetscObject)A)->type_name)); 295 PetscCall(MatMPIDenseSetPreallocation(newmat,NULL)); 296 } 297 298 /* Now extract the data pointers and do the copy, column at a time */ 299 newmatd = (Mat_MPIDense*)newmat->data; 300 PetscCall(MatDenseGetArray(newmatd->A,&bv)); 301 PetscCall(MatDenseGetArrayRead(mat->A,&v)); 302 PetscCall(MatDenseGetLDA(mat->A,&lda)); 303 for (i=0; i<Ncols; i++) { 304 const PetscScalar *av = v + lda*icol[i]; 305 for (j=0; j<nrows; j++) { 306 *bv++ = av[irow[j] - rstart]; 307 } 308 } 309 PetscCall(MatDenseRestoreArrayRead(mat->A,&v)); 310 PetscCall(MatDenseRestoreArray(newmatd->A,&bv)); 311 312 /* Assemble the matrices so that the correct flags are set */ 313 PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY)); 314 PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY)); 315 316 /* Free work space */ 317 PetscCall(ISRestoreIndices(isrow,&irow)); 318 PetscCall(ISRestoreIndices(iscol_local,&icol)); 319 PetscCall(ISDestroy(&iscol_local)); 320 *B = newmat; 321 PetscFunctionReturn(0); 322 } 323 324 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 325 { 326 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 327 328 PetscFunctionBegin; 329 PetscCall(MatDenseRestoreArray(a->A,array)); 330 PetscFunctionReturn(0); 331 } 332 333 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 334 { 335 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 336 337 PetscFunctionBegin; 338 PetscCall(MatDenseRestoreArrayRead(a->A,array)); 339 PetscFunctionReturn(0); 340 } 341 342 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 343 { 344 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 345 346 PetscFunctionBegin; 347 PetscCall(MatDenseRestoreArrayWrite(a->A,array)); 348 PetscFunctionReturn(0); 349 } 350 351 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 352 { 353 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 354 PetscInt nstash,reallocs; 355 356 PetscFunctionBegin; 357 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 358 359 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 360 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 361 PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 362 PetscFunctionReturn(0); 363 } 364 365 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 366 { 367 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 368 PetscInt i,*row,*col,flg,j,rstart,ncols; 369 PetscMPIInt n; 370 PetscScalar *val; 371 372 PetscFunctionBegin; 373 if (!mdn->donotstash && !mat->nooffprocentries) { 374 /* wait on receives */ 375 while (1) { 376 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 377 if (!flg) break; 378 379 for (i=0; i<n;) { 380 /* Now identify the consecutive vals belonging to the same row */ 381 for (j=i,rstart=row[j]; j<n; j++) { 382 if (row[j] != rstart) break; 383 } 384 if (j < n) ncols = j-i; 385 else ncols = n-i; 386 /* Now assemble all these values with a single function call */ 387 PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 388 i = j; 389 } 390 } 391 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 392 } 393 394 PetscCall(MatAssemblyBegin(mdn->A,mode)); 395 PetscCall(MatAssemblyEnd(mdn->A,mode)); 396 397 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 398 PetscCall(MatSetUpMultiply_MPIDense(mat)); 399 } 400 PetscFunctionReturn(0); 401 } 402 403 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 404 { 405 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 406 407 PetscFunctionBegin; 408 PetscCall(MatZeroEntries(l->A)); 409 PetscFunctionReturn(0); 410 } 411 412 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 413 { 414 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 415 PetscInt i,len,*lrows; 416 417 PetscFunctionBegin; 418 /* get locally owned rows */ 419 PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL)); 420 /* fix right hand side if needed */ 421 if (x && b) { 422 const PetscScalar *xx; 423 PetscScalar *bb; 424 425 PetscCall(VecGetArrayRead(x, &xx)); 426 PetscCall(VecGetArrayWrite(b, &bb)); 427 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 428 PetscCall(VecRestoreArrayRead(x, &xx)); 429 PetscCall(VecRestoreArrayWrite(b, &bb)); 430 } 431 PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL)); 432 if (diag != 0.0) { 433 Vec d; 434 435 PetscCall(MatCreateVecs(A,NULL,&d)); 436 PetscCall(VecSet(d,diag)); 437 PetscCall(MatDiagonalSet(A,d,INSERT_VALUES)); 438 PetscCall(VecDestroy(&d)); 439 } 440 PetscCall(PetscFree(lrows)); 441 PetscFunctionReturn(0); 442 } 443 444 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 445 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 446 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 447 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 448 449 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 450 { 451 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 452 const PetscScalar *ax; 453 PetscScalar *ay; 454 PetscMemType axmtype,aymtype; 455 456 PetscFunctionBegin; 457 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 458 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 459 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 460 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 461 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 462 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 463 PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy)); 464 PetscFunctionReturn(0); 465 } 466 467 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 468 { 469 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 470 const PetscScalar *ax; 471 PetscScalar *ay; 472 PetscMemType axmtype,aymtype; 473 474 PetscFunctionBegin; 475 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 476 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 477 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 478 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 479 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 480 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 481 PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz)); 482 PetscFunctionReturn(0); 483 } 484 485 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 486 { 487 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 488 const PetscScalar *ax; 489 PetscScalar *ay; 490 PetscMemType axmtype,aymtype; 491 492 PetscFunctionBegin; 493 PetscCall(VecSet(yy,0.0)); 494 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 495 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 496 PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype)); 497 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 498 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 499 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 500 PetscCall(VecRestoreArrayAndMemType(yy,&ay)); 501 PetscFunctionReturn(0); 502 } 503 504 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 505 { 506 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 507 const PetscScalar *ax; 508 PetscScalar *ay; 509 PetscMemType axmtype,aymtype; 510 511 PetscFunctionBegin; 512 PetscCall(VecCopy(yy,zz)); 513 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 514 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 515 PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype)); 516 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 517 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 518 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 519 PetscCall(VecRestoreArrayAndMemType(zz,&ay)); 520 PetscFunctionReturn(0); 521 } 522 523 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 524 { 525 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 526 PetscInt lda,len,i,n,m = A->rmap->n,radd; 527 PetscScalar *x,zero = 0.0; 528 const PetscScalar *av; 529 530 PetscFunctionBegin; 531 PetscCall(VecSet(v,zero)); 532 PetscCall(VecGetArray(v,&x)); 533 PetscCall(VecGetSize(v,&n)); 534 PetscCheckFalse(n != A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 535 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 536 radd = A->rmap->rstart*m; 537 PetscCall(MatDenseGetArrayRead(a->A,&av)); 538 PetscCall(MatDenseGetLDA(a->A,&lda)); 539 for (i=0; i<len; i++) { 540 x[i] = av[radd + i*lda + i]; 541 } 542 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 543 PetscCall(VecRestoreArray(v,&x)); 544 PetscFunctionReturn(0); 545 } 546 547 PetscErrorCode MatDestroy_MPIDense(Mat mat) 548 { 549 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 550 551 PetscFunctionBegin; 552 #if defined(PETSC_USE_LOG) 553 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 554 #endif 555 PetscCall(MatStashDestroy_Private(&mat->stash)); 556 PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 557 PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 558 PetscCall(MatDestroy(&mdn->A)); 559 PetscCall(VecDestroy(&mdn->lvec)); 560 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 561 PetscCall(VecDestroy(&mdn->cvec)); 562 PetscCall(MatDestroy(&mdn->cmat)); 563 564 PetscCall(PetscFree(mat->data)); 565 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 566 567 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL)); 568 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL)); 569 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL)); 570 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL)); 571 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL)); 572 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL)); 573 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL)); 574 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL)); 575 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL)); 576 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL)); 577 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL)); 578 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 579 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL)); 580 #if defined(PETSC_HAVE_ELEMENTAL) 581 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL)); 582 #endif 583 #if defined(PETSC_HAVE_SCALAPACK) 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL)); 585 #endif 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL)); 589 #if defined (PETSC_HAVE_CUDA) 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL)); 592 #endif 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL)); 603 PetscFunctionReturn(0); 604 } 605 606 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 607 608 #include <petscdraw.h> 609 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 610 { 611 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 612 PetscMPIInt rank; 613 PetscViewerType vtype; 614 PetscBool iascii,isdraw; 615 PetscViewer sviewer; 616 PetscViewerFormat format; 617 618 PetscFunctionBegin; 619 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 620 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 621 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 622 if (iascii) { 623 PetscCall(PetscViewerGetType(viewer,&vtype)); 624 PetscCall(PetscViewerGetFormat(viewer,&format)); 625 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 626 MatInfo info; 627 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 628 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 629 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory)); 630 PetscCall(PetscViewerFlush(viewer)); 631 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 632 PetscCall(PetscSFView(mdn->Mvctx,viewer)); 633 PetscFunctionReturn(0); 634 } else if (format == PETSC_VIEWER_ASCII_INFO) { 635 PetscFunctionReturn(0); 636 } 637 } else if (isdraw) { 638 PetscDraw draw; 639 PetscBool isnull; 640 641 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 642 PetscCall(PetscDrawIsNull(draw,&isnull)); 643 if (isnull) PetscFunctionReturn(0); 644 } 645 646 { 647 /* assemble the entire matrix onto first processor. */ 648 Mat A; 649 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 650 PetscInt *cols; 651 PetscScalar *vals; 652 653 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 654 if (rank == 0) { 655 PetscCall(MatSetSizes(A,M,N,M,N)); 656 } else { 657 PetscCall(MatSetSizes(A,0,0,M,N)); 658 } 659 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 660 PetscCall(MatSetType(A,MATMPIDENSE)); 661 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 662 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 663 664 /* Copy the matrix ... This isn't the most efficient means, 665 but it's quick for now */ 666 A->insertmode = INSERT_VALUES; 667 668 row = mat->rmap->rstart; 669 m = mdn->A->rmap->n; 670 for (i=0; i<m; i++) { 671 PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals)); 672 PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES)); 673 PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals)); 674 row++; 675 } 676 677 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 678 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 679 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 680 if (rank == 0) { 681 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name)); 682 PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer)); 683 } 684 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 685 PetscCall(PetscViewerFlush(viewer)); 686 PetscCall(MatDestroy(&A)); 687 } 688 PetscFunctionReturn(0); 689 } 690 691 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 692 { 693 PetscBool iascii,isbinary,isdraw,issocket; 694 695 PetscFunctionBegin; 696 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 697 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 698 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 699 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 700 701 if (iascii || issocket || isdraw) { 702 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer)); 703 } else if (isbinary) { 704 PetscCall(MatView_Dense_Binary(mat,viewer)); 705 } 706 PetscFunctionReturn(0); 707 } 708 709 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 710 { 711 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 712 Mat mdn = mat->A; 713 PetscLogDouble isend[5],irecv[5]; 714 715 PetscFunctionBegin; 716 info->block_size = 1.0; 717 718 PetscCall(MatGetInfo(mdn,MAT_LOCAL,info)); 719 720 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 721 isend[3] = info->memory; isend[4] = info->mallocs; 722 if (flag == MAT_LOCAL) { 723 info->nz_used = isend[0]; 724 info->nz_allocated = isend[1]; 725 info->nz_unneeded = isend[2]; 726 info->memory = isend[3]; 727 info->mallocs = isend[4]; 728 } else if (flag == MAT_GLOBAL_MAX) { 729 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A))); 730 731 info->nz_used = irecv[0]; 732 info->nz_allocated = irecv[1]; 733 info->nz_unneeded = irecv[2]; 734 info->memory = irecv[3]; 735 info->mallocs = irecv[4]; 736 } else if (flag == MAT_GLOBAL_SUM) { 737 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A))); 738 739 info->nz_used = irecv[0]; 740 info->nz_allocated = irecv[1]; 741 info->nz_unneeded = irecv[2]; 742 info->memory = irecv[3]; 743 info->mallocs = irecv[4]; 744 } 745 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 746 info->fill_ratio_needed = 0; 747 info->factor_mallocs = 0; 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 752 { 753 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 754 755 PetscFunctionBegin; 756 switch (op) { 757 case MAT_NEW_NONZERO_LOCATIONS: 758 case MAT_NEW_NONZERO_LOCATION_ERR: 759 case MAT_NEW_NONZERO_ALLOCATION_ERR: 760 MatCheckPreallocated(A,1); 761 PetscCall(MatSetOption(a->A,op,flg)); 762 break; 763 case MAT_ROW_ORIENTED: 764 MatCheckPreallocated(A,1); 765 a->roworiented = flg; 766 PetscCall(MatSetOption(a->A,op,flg)); 767 break; 768 case MAT_FORCE_DIAGONAL_ENTRIES: 769 case MAT_KEEP_NONZERO_PATTERN: 770 case MAT_USE_HASH_TABLE: 771 case MAT_SORTED_FULL: 772 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 773 break; 774 case MAT_IGNORE_OFF_PROC_ENTRIES: 775 a->donotstash = flg; 776 break; 777 case MAT_SYMMETRIC: 778 case MAT_STRUCTURALLY_SYMMETRIC: 779 case MAT_HERMITIAN: 780 case MAT_SYMMETRY_ETERNAL: 781 case MAT_IGNORE_LOWER_TRIANGULAR: 782 case MAT_IGNORE_ZERO_ENTRIES: 783 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 784 break; 785 default: 786 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 787 } 788 PetscFunctionReturn(0); 789 } 790 791 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 792 { 793 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 794 const PetscScalar *l; 795 PetscScalar x,*v,*vv,*r; 796 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 797 798 PetscFunctionBegin; 799 PetscCall(MatDenseGetArray(mdn->A,&vv)); 800 PetscCall(MatDenseGetLDA(mdn->A,&lda)); 801 PetscCall(MatGetLocalSize(A,&s2,&s3)); 802 if (ll) { 803 PetscCall(VecGetLocalSize(ll,&s2a)); 804 PetscCheckFalse(s2a != s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 805 PetscCall(VecGetArrayRead(ll,&l)); 806 for (i=0; i<m; i++) { 807 x = l[i]; 808 v = vv + i; 809 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 810 } 811 PetscCall(VecRestoreArrayRead(ll,&l)); 812 PetscCall(PetscLogFlops(1.0*n*m)); 813 } 814 if (rr) { 815 const PetscScalar *ar; 816 817 PetscCall(VecGetLocalSize(rr,&s3a)); 818 PetscCheckFalse(s3a != s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 819 PetscCall(VecGetArrayRead(rr,&ar)); 820 PetscCall(VecGetArray(mdn->lvec,&r)); 821 PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 822 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 823 PetscCall(VecRestoreArrayRead(rr,&ar)); 824 for (i=0; i<n; i++) { 825 x = r[i]; 826 v = vv + i*lda; 827 for (j=0; j<m; j++) (*v++) *= x; 828 } 829 PetscCall(VecRestoreArray(mdn->lvec,&r)); 830 PetscCall(PetscLogFlops(1.0*n*m)); 831 } 832 PetscCall(MatDenseRestoreArray(mdn->A,&vv)); 833 PetscFunctionReturn(0); 834 } 835 836 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 837 { 838 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 839 PetscInt i,j; 840 PetscMPIInt size; 841 PetscReal sum = 0.0; 842 const PetscScalar *av,*v; 843 844 PetscFunctionBegin; 845 PetscCall(MatDenseGetArrayRead(mdn->A,&av)); 846 v = av; 847 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 848 if (size == 1) { 849 PetscCall(MatNorm(mdn->A,type,nrm)); 850 } else { 851 if (type == NORM_FROBENIUS) { 852 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 853 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 854 } 855 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 856 *nrm = PetscSqrtReal(*nrm); 857 PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n)); 858 } else if (type == NORM_1) { 859 PetscReal *tmp,*tmp2; 860 PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2)); 861 *nrm = 0.0; 862 v = av; 863 for (j=0; j<mdn->A->cmap->n; j++) { 864 for (i=0; i<mdn->A->rmap->n; i++) { 865 tmp[j] += PetscAbsScalar(*v); v++; 866 } 867 } 868 PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 869 for (j=0; j<A->cmap->N; j++) { 870 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 871 } 872 PetscCall(PetscFree2(tmp,tmp2)); 873 PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n)); 874 } else if (type == NORM_INFINITY) { /* max row norm */ 875 PetscReal ntemp; 876 PetscCall(MatNorm(mdn->A,type,&ntemp)); 877 PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 878 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 879 } 880 PetscCall(MatDenseRestoreArrayRead(mdn->A,&av)); 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 885 { 886 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 887 Mat B; 888 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 889 PetscInt j,i,lda; 890 PetscScalar *v; 891 892 PetscFunctionBegin; 893 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 894 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 895 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 896 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 897 PetscCall(MatMPIDenseSetPreallocation(B,NULL)); 898 } else B = *matout; 899 900 m = a->A->rmap->n; n = a->A->cmap->n; 901 PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v)); 902 PetscCall(MatDenseGetLDA(a->A,&lda)); 903 PetscCall(PetscMalloc1(m,&rwork)); 904 for (i=0; i<m; i++) rwork[i] = rstart + i; 905 for (j=0; j<n; j++) { 906 PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES)); 907 v += lda; 908 } 909 PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v)); 910 PetscCall(PetscFree(rwork)); 911 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 912 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 913 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 914 *matout = B; 915 } else { 916 PetscCall(MatHeaderMerge(A,&B)); 917 } 918 PetscFunctionReturn(0); 919 } 920 921 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 922 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 923 924 PetscErrorCode MatSetUp_MPIDense(Mat A) 925 { 926 PetscFunctionBegin; 927 PetscCall(PetscLayoutSetUp(A->rmap)); 928 PetscCall(PetscLayoutSetUp(A->cmap)); 929 if (!A->preallocated) { 930 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 931 } 932 PetscFunctionReturn(0); 933 } 934 935 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 936 { 937 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 938 939 PetscFunctionBegin; 940 PetscCall(MatAXPY(A->A,alpha,B->A,str)); 941 PetscFunctionReturn(0); 942 } 943 944 PetscErrorCode MatConjugate_MPIDense(Mat mat) 945 { 946 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 947 948 PetscFunctionBegin; 949 PetscCall(MatConjugate(a->A)); 950 PetscFunctionReturn(0); 951 } 952 953 PetscErrorCode MatRealPart_MPIDense(Mat A) 954 { 955 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 956 957 PetscFunctionBegin; 958 PetscCall(MatRealPart(a->A)); 959 PetscFunctionReturn(0); 960 } 961 962 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 963 { 964 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 965 966 PetscFunctionBegin; 967 PetscCall(MatImaginaryPart(a->A)); 968 PetscFunctionReturn(0); 969 } 970 971 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 972 { 973 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 974 975 PetscFunctionBegin; 976 PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 977 PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 978 PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col)); 979 PetscFunctionReturn(0); 980 } 981 982 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*); 983 984 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions) 985 { 986 PetscInt i,m,n; 987 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 988 PetscReal *work; 989 990 PetscFunctionBegin; 991 PetscCall(MatGetSize(A,&m,&n)); 992 PetscCall(PetscMalloc1(n,&work)); 993 if (type == REDUCTION_MEAN_REALPART) { 994 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work)); 995 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 996 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work)); 997 } else { 998 PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work)); 999 } 1000 if (type == NORM_2) { 1001 for (i=0; i<n; i++) work[i] *= work[i]; 1002 } 1003 if (type == NORM_INFINITY) { 1004 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm)); 1005 } else { 1006 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm)); 1007 } 1008 PetscCall(PetscFree(work)); 1009 if (type == NORM_2) { 1010 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1011 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1012 for (i=0; i<n; i++) reductions[i] /= m; 1013 } 1014 PetscFunctionReturn(0); 1015 } 1016 1017 #if defined(PETSC_HAVE_CUDA) 1018 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1019 { 1020 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1021 PetscInt lda; 1022 1023 PetscFunctionBegin; 1024 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1025 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1026 if (!a->cvec) { 1027 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1028 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1029 } 1030 a->vecinuse = col + 1; 1031 PetscCall(MatDenseGetLDA(a->A,&lda)); 1032 PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1033 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1034 *v = a->cvec; 1035 PetscFunctionReturn(0); 1036 } 1037 1038 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1039 { 1040 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1041 1042 PetscFunctionBegin; 1043 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1044 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1045 a->vecinuse = 0; 1046 PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1047 PetscCall(VecCUDAResetArray(a->cvec)); 1048 *v = NULL; 1049 PetscFunctionReturn(0); 1050 } 1051 1052 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1053 { 1054 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1055 PetscInt lda; 1056 1057 PetscFunctionBegin; 1058 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1059 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1060 if (!a->cvec) { 1061 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1062 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1063 } 1064 a->vecinuse = col + 1; 1065 PetscCall(MatDenseGetLDA(a->A,&lda)); 1066 PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse)); 1067 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1068 PetscCall(VecLockReadPush(a->cvec)); 1069 *v = a->cvec; 1070 PetscFunctionReturn(0); 1071 } 1072 1073 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1074 { 1075 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1076 1077 PetscFunctionBegin; 1078 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1079 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1080 a->vecinuse = 0; 1081 PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse)); 1082 PetscCall(VecLockReadPop(a->cvec)); 1083 PetscCall(VecCUDAResetArray(a->cvec)); 1084 *v = NULL; 1085 PetscFunctionReturn(0); 1086 } 1087 1088 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1089 { 1090 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1091 PetscInt lda; 1092 1093 PetscFunctionBegin; 1094 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1095 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1096 if (!a->cvec) { 1097 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1098 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1099 } 1100 a->vecinuse = col + 1; 1101 PetscCall(MatDenseGetLDA(a->A,&lda)); 1102 PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1103 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1104 *v = a->cvec; 1105 PetscFunctionReturn(0); 1106 } 1107 1108 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1109 { 1110 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1111 1112 PetscFunctionBegin; 1113 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1114 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1115 a->vecinuse = 0; 1116 PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1117 PetscCall(VecCUDAResetArray(a->cvec)); 1118 *v = NULL; 1119 PetscFunctionReturn(0); 1120 } 1121 1122 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1123 { 1124 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1125 1126 PetscFunctionBegin; 1127 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1128 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1129 PetscCall(MatDenseCUDAPlaceArray(l->A,a)); 1130 PetscFunctionReturn(0); 1131 } 1132 1133 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1134 { 1135 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1136 1137 PetscFunctionBegin; 1138 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1139 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1140 PetscCall(MatDenseCUDAResetArray(l->A)); 1141 PetscFunctionReturn(0); 1142 } 1143 1144 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1145 { 1146 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1150 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1151 PetscCall(MatDenseCUDAReplaceArray(l->A,a)); 1152 PetscFunctionReturn(0); 1153 } 1154 1155 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1156 { 1157 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatDenseCUDAGetArrayWrite(l->A,a)); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1165 { 1166 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a)); 1170 PetscFunctionReturn(0); 1171 } 1172 1173 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1174 { 1175 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1176 1177 PetscFunctionBegin; 1178 PetscCall(MatDenseCUDAGetArrayRead(l->A,a)); 1179 PetscFunctionReturn(0); 1180 } 1181 1182 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1183 { 1184 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1185 1186 PetscFunctionBegin; 1187 PetscCall(MatDenseCUDARestoreArrayRead(l->A,a)); 1188 PetscFunctionReturn(0); 1189 } 1190 1191 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1192 { 1193 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1194 1195 PetscFunctionBegin; 1196 PetscCall(MatDenseCUDAGetArray(l->A,a)); 1197 PetscFunctionReturn(0); 1198 } 1199 1200 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1201 { 1202 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1203 1204 PetscFunctionBegin; 1205 PetscCall(MatDenseCUDARestoreArray(l->A,a)); 1206 PetscFunctionReturn(0); 1207 } 1208 1209 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1210 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1211 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1212 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1213 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1214 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1215 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1216 1217 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1218 { 1219 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1220 1221 PetscFunctionBegin; 1222 PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1223 PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1224 if (d->A) { 1225 PetscCall(MatBindToCPU(d->A,bind)); 1226 } 1227 mat->boundtocpu = bind; 1228 if (!bind) { 1229 PetscBool iscuda; 1230 1231 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda)); 1232 if (!iscuda) { 1233 PetscCall(VecDestroy(&d->cvec)); 1234 } 1235 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda)); 1236 if (!iscuda) { 1237 PetscCall(MatDestroy(&d->cmat)); 1238 } 1239 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA)); 1240 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA)); 1241 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA)); 1242 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1243 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1244 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1245 } else { 1246 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1247 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1248 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1249 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1250 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1251 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1252 } 1253 if (d->cmat) { 1254 PetscCall(MatBindToCPU(d->cmat,bind)); 1255 } 1256 PetscFunctionReturn(0); 1257 } 1258 1259 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1260 { 1261 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1262 PetscBool iscuda; 1263 1264 PetscFunctionBegin; 1265 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1266 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 1267 if (!iscuda) PetscFunctionReturn(0); 1268 PetscCall(PetscLayoutSetUp(A->rmap)); 1269 PetscCall(PetscLayoutSetUp(A->cmap)); 1270 if (!d->A) { 1271 PetscCall(MatCreate(PETSC_COMM_SELF,&d->A)); 1272 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A)); 1273 PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 1274 } 1275 PetscCall(MatSetType(d->A,MATSEQDENSECUDA)); 1276 PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data)); 1277 A->preallocated = PETSC_TRUE; 1278 PetscFunctionReturn(0); 1279 } 1280 #endif 1281 1282 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1283 { 1284 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1285 1286 PetscFunctionBegin; 1287 PetscCall(MatSetRandom(d->A,rctx)); 1288 PetscFunctionReturn(0); 1289 } 1290 1291 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1292 { 1293 PetscFunctionBegin; 1294 *missing = PETSC_FALSE; 1295 PetscFunctionReturn(0); 1296 } 1297 1298 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1299 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1300 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1301 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1302 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1303 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1304 1305 /* -------------------------------------------------------------------*/ 1306 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1307 MatGetRow_MPIDense, 1308 MatRestoreRow_MPIDense, 1309 MatMult_MPIDense, 1310 /* 4*/ MatMultAdd_MPIDense, 1311 MatMultTranspose_MPIDense, 1312 MatMultTransposeAdd_MPIDense, 1313 NULL, 1314 NULL, 1315 NULL, 1316 /* 10*/ NULL, 1317 NULL, 1318 NULL, 1319 NULL, 1320 MatTranspose_MPIDense, 1321 /* 15*/ MatGetInfo_MPIDense, 1322 MatEqual_MPIDense, 1323 MatGetDiagonal_MPIDense, 1324 MatDiagonalScale_MPIDense, 1325 MatNorm_MPIDense, 1326 /* 20*/ MatAssemblyBegin_MPIDense, 1327 MatAssemblyEnd_MPIDense, 1328 MatSetOption_MPIDense, 1329 MatZeroEntries_MPIDense, 1330 /* 24*/ MatZeroRows_MPIDense, 1331 NULL, 1332 NULL, 1333 NULL, 1334 NULL, 1335 /* 29*/ MatSetUp_MPIDense, 1336 NULL, 1337 NULL, 1338 MatGetDiagonalBlock_MPIDense, 1339 NULL, 1340 /* 34*/ MatDuplicate_MPIDense, 1341 NULL, 1342 NULL, 1343 NULL, 1344 NULL, 1345 /* 39*/ MatAXPY_MPIDense, 1346 MatCreateSubMatrices_MPIDense, 1347 NULL, 1348 MatGetValues_MPIDense, 1349 MatCopy_MPIDense, 1350 /* 44*/ NULL, 1351 MatScale_MPIDense, 1352 MatShift_Basic, 1353 NULL, 1354 NULL, 1355 /* 49*/ MatSetRandom_MPIDense, 1356 NULL, 1357 NULL, 1358 NULL, 1359 NULL, 1360 /* 54*/ NULL, 1361 NULL, 1362 NULL, 1363 NULL, 1364 NULL, 1365 /* 59*/ MatCreateSubMatrix_MPIDense, 1366 MatDestroy_MPIDense, 1367 MatView_MPIDense, 1368 NULL, 1369 NULL, 1370 /* 64*/ NULL, 1371 NULL, 1372 NULL, 1373 NULL, 1374 NULL, 1375 /* 69*/ NULL, 1376 NULL, 1377 NULL, 1378 NULL, 1379 NULL, 1380 /* 74*/ NULL, 1381 NULL, 1382 NULL, 1383 NULL, 1384 NULL, 1385 /* 79*/ NULL, 1386 NULL, 1387 NULL, 1388 NULL, 1389 /* 83*/ MatLoad_MPIDense, 1390 NULL, 1391 NULL, 1392 NULL, 1393 NULL, 1394 NULL, 1395 /* 89*/ NULL, 1396 NULL, 1397 NULL, 1398 NULL, 1399 NULL, 1400 /* 94*/ NULL, 1401 NULL, 1402 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1403 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1404 NULL, 1405 /* 99*/ MatProductSetFromOptions_MPIDense, 1406 NULL, 1407 NULL, 1408 MatConjugate_MPIDense, 1409 NULL, 1410 /*104*/ NULL, 1411 MatRealPart_MPIDense, 1412 MatImaginaryPart_MPIDense, 1413 NULL, 1414 NULL, 1415 /*109*/ NULL, 1416 NULL, 1417 NULL, 1418 MatGetColumnVector_MPIDense, 1419 MatMissingDiagonal_MPIDense, 1420 /*114*/ NULL, 1421 NULL, 1422 NULL, 1423 NULL, 1424 NULL, 1425 /*119*/ NULL, 1426 NULL, 1427 NULL, 1428 NULL, 1429 NULL, 1430 /*124*/ NULL, 1431 MatGetColumnReductions_MPIDense, 1432 NULL, 1433 NULL, 1434 NULL, 1435 /*129*/ NULL, 1436 NULL, 1437 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1438 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1439 NULL, 1440 /*134*/ NULL, 1441 NULL, 1442 NULL, 1443 NULL, 1444 NULL, 1445 /*139*/ NULL, 1446 NULL, 1447 NULL, 1448 NULL, 1449 NULL, 1450 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1451 /*145*/ NULL, 1452 NULL, 1453 NULL 1454 }; 1455 1456 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1457 { 1458 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1459 PetscBool iscuda; 1460 1461 PetscFunctionBegin; 1462 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1463 PetscCall(PetscLayoutSetUp(mat->rmap)); 1464 PetscCall(PetscLayoutSetUp(mat->cmap)); 1465 if (!a->A) { 1466 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 1467 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 1468 PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N)); 1469 } 1470 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda)); 1471 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1472 PetscCall(MatSeqDenseSetPreallocation(a->A,data)); 1473 mat->preallocated = PETSC_TRUE; 1474 PetscFunctionReturn(0); 1475 } 1476 1477 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1478 { 1479 Mat B,C; 1480 1481 PetscFunctionBegin; 1482 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C)); 1483 PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B)); 1484 PetscCall(MatDestroy(&C)); 1485 if (reuse == MAT_REUSE_MATRIX) { 1486 C = *newmat; 1487 } else C = NULL; 1488 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1489 PetscCall(MatDestroy(&B)); 1490 if (reuse == MAT_INPLACE_MATRIX) { 1491 PetscCall(MatHeaderReplace(A,&C)); 1492 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1493 PetscFunctionReturn(0); 1494 } 1495 1496 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1497 { 1498 Mat B,C; 1499 1500 PetscFunctionBegin; 1501 PetscCall(MatDenseGetLocalMatrix(A,&C)); 1502 PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1503 if (reuse == MAT_REUSE_MATRIX) { 1504 C = *newmat; 1505 } else C = NULL; 1506 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1507 PetscCall(MatDestroy(&B)); 1508 if (reuse == MAT_INPLACE_MATRIX) { 1509 PetscCall(MatHeaderReplace(A,&C)); 1510 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1511 PetscFunctionReturn(0); 1512 } 1513 1514 #if defined(PETSC_HAVE_ELEMENTAL) 1515 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1516 { 1517 Mat mat_elemental; 1518 PetscScalar *v; 1519 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1520 1521 PetscFunctionBegin; 1522 if (reuse == MAT_REUSE_MATRIX) { 1523 mat_elemental = *newmat; 1524 PetscCall(MatZeroEntries(*newmat)); 1525 } else { 1526 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1527 PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 1528 PetscCall(MatSetType(mat_elemental,MATELEMENTAL)); 1529 PetscCall(MatSetUp(mat_elemental)); 1530 PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE)); 1531 } 1532 1533 PetscCall(PetscMalloc2(m,&rows,N,&cols)); 1534 for (i=0; i<N; i++) cols[i] = i; 1535 for (i=0; i<m; i++) rows[i] = rstart + i; 1536 1537 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1538 PetscCall(MatDenseGetArray(A,&v)); 1539 PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES)); 1540 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1541 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1542 PetscCall(MatDenseRestoreArray(A,&v)); 1543 PetscCall(PetscFree2(rows,cols)); 1544 1545 if (reuse == MAT_INPLACE_MATRIX) { 1546 PetscCall(MatHeaderReplace(A,&mat_elemental)); 1547 } else { 1548 *newmat = mat_elemental; 1549 } 1550 PetscFunctionReturn(0); 1551 } 1552 #endif 1553 1554 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1555 { 1556 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1557 1558 PetscFunctionBegin; 1559 PetscCall(MatDenseGetColumn(mat->A,col,vals)); 1560 PetscFunctionReturn(0); 1561 } 1562 1563 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1564 { 1565 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1566 1567 PetscFunctionBegin; 1568 PetscCall(MatDenseRestoreColumn(mat->A,vals)); 1569 PetscFunctionReturn(0); 1570 } 1571 1572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1573 { 1574 Mat_MPIDense *mat; 1575 PetscInt m,nloc,N; 1576 1577 PetscFunctionBegin; 1578 PetscCall(MatGetSize(inmat,&m,&N)); 1579 PetscCall(MatGetLocalSize(inmat,NULL,&nloc)); 1580 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1581 PetscInt sum; 1582 1583 if (n == PETSC_DECIDE) { 1584 PetscCall(PetscSplitOwnership(comm,&n,&N)); 1585 } 1586 /* Check sum(n) = N */ 1587 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 1588 PetscCheckFalse(sum != N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 1589 1590 PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat)); 1591 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 1592 } 1593 1594 /* numeric phase */ 1595 mat = (Mat_MPIDense*)(*outmat)->data; 1596 PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN)); 1597 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 1598 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 1599 PetscFunctionReturn(0); 1600 } 1601 1602 #if defined(PETSC_HAVE_CUDA) 1603 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1604 { 1605 Mat B; 1606 Mat_MPIDense *m; 1607 1608 PetscFunctionBegin; 1609 if (reuse == MAT_INITIAL_MATRIX) { 1610 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1611 } else if (reuse == MAT_REUSE_MATRIX) { 1612 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1613 } 1614 1615 B = *newmat; 1616 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE)); 1617 PetscCall(PetscFree(B->defaultvectype)); 1618 PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype)); 1619 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE)); 1620 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL)); 1621 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 1622 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 1623 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 1624 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 1625 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL)); 1626 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL)); 1627 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL)); 1634 m = (Mat_MPIDense*)(B)->data; 1635 if (m->A) { 1636 PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A)); 1637 PetscCall(MatSetUpMultiply_MPIDense(B)); 1638 } 1639 B->ops->bindtocpu = NULL; 1640 B->offloadmask = PETSC_OFFLOAD_CPU; 1641 PetscFunctionReturn(0); 1642 } 1643 1644 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1645 { 1646 Mat B; 1647 Mat_MPIDense *m; 1648 1649 PetscFunctionBegin; 1650 if (reuse == MAT_INITIAL_MATRIX) { 1651 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1652 } else if (reuse == MAT_REUSE_MATRIX) { 1653 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1654 } 1655 1656 B = *newmat; 1657 PetscCall(PetscFree(B->defaultvectype)); 1658 PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype)); 1659 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA)); 1660 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1661 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1669 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1671 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1672 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1673 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1674 m = (Mat_MPIDense*)(B->data); 1675 if (m->A) { 1676 PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A)); 1677 PetscCall(MatSetUpMultiply_MPIDense(B)); 1678 B->offloadmask = PETSC_OFFLOAD_BOTH; 1679 } else { 1680 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1681 } 1682 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE)); 1683 1684 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1685 PetscFunctionReturn(0); 1686 } 1687 #endif 1688 1689 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1690 { 1691 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1692 PetscInt lda; 1693 1694 PetscFunctionBegin; 1695 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1696 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1697 if (!a->cvec) { 1698 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1699 } 1700 a->vecinuse = col + 1; 1701 PetscCall(MatDenseGetLDA(a->A,&lda)); 1702 PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1703 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1704 *v = a->cvec; 1705 PetscFunctionReturn(0); 1706 } 1707 1708 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1709 { 1710 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1711 1712 PetscFunctionBegin; 1713 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1714 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1715 a->vecinuse = 0; 1716 PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1717 PetscCall(VecResetArray(a->cvec)); 1718 *v = NULL; 1719 PetscFunctionReturn(0); 1720 } 1721 1722 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1723 { 1724 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1725 PetscInt lda; 1726 1727 PetscFunctionBegin; 1728 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1729 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1730 if (!a->cvec) { 1731 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1732 } 1733 a->vecinuse = col + 1; 1734 PetscCall(MatDenseGetLDA(a->A,&lda)); 1735 PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse)); 1736 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1737 PetscCall(VecLockReadPush(a->cvec)); 1738 *v = a->cvec; 1739 PetscFunctionReturn(0); 1740 } 1741 1742 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1743 { 1744 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1745 1746 PetscFunctionBegin; 1747 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1748 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1749 a->vecinuse = 0; 1750 PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse)); 1751 PetscCall(VecLockReadPop(a->cvec)); 1752 PetscCall(VecResetArray(a->cvec)); 1753 *v = NULL; 1754 PetscFunctionReturn(0); 1755 } 1756 1757 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1758 { 1759 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1760 PetscInt lda; 1761 1762 PetscFunctionBegin; 1763 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1764 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1765 if (!a->cvec) { 1766 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1767 } 1768 a->vecinuse = col + 1; 1769 PetscCall(MatDenseGetLDA(a->A,&lda)); 1770 PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1771 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1772 *v = a->cvec; 1773 PetscFunctionReturn(0); 1774 } 1775 1776 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1777 { 1778 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1779 1780 PetscFunctionBegin; 1781 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1782 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1783 a->vecinuse = 0; 1784 PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1785 PetscCall(VecResetArray(a->cvec)); 1786 *v = NULL; 1787 PetscFunctionReturn(0); 1788 } 1789 1790 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1791 { 1792 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1793 Mat_MPIDense *c; 1794 MPI_Comm comm; 1795 PetscBool setup = PETSC_FALSE; 1796 1797 PetscFunctionBegin; 1798 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1799 PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1800 PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1801 if (!a->cmat) { 1802 setup = PETSC_TRUE; 1803 PetscCall(MatCreate(comm,&a->cmat)); 1804 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat)); 1805 PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name)); 1806 PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap)); 1807 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1808 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1809 } else if (cend-cbegin != a->cmat->cmap->N) { 1810 setup = PETSC_TRUE; 1811 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1812 PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap)); 1813 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1814 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1815 } 1816 c = (Mat_MPIDense*)a->cmat->data; 1817 PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1818 PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A)); 1819 if (setup) { /* do we really need this? */ 1820 PetscCall(MatSetUpMultiply_MPIDense(a->cmat)); 1821 } 1822 a->cmat->preallocated = PETSC_TRUE; 1823 a->cmat->assembled = PETSC_TRUE; 1824 a->matinuse = cbegin + 1; 1825 *v = a->cmat; 1826 PetscFunctionReturn(0); 1827 } 1828 1829 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1830 { 1831 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1832 Mat_MPIDense *c; 1833 1834 PetscFunctionBegin; 1835 PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1836 PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1837 PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1838 a->matinuse = 0; 1839 c = (Mat_MPIDense*)a->cmat->data; 1840 PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A)); 1841 *v = NULL; 1842 PetscFunctionReturn(0); 1843 } 1844 1845 /*MC 1846 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1847 1848 Options Database Keys: 1849 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1850 1851 Level: beginner 1852 1853 .seealso: MatCreateDense() 1854 1855 M*/ 1856 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1857 { 1858 Mat_MPIDense *a; 1859 1860 PetscFunctionBegin; 1861 PetscCall(PetscNewLog(mat,&a)); 1862 mat->data = (void*)a; 1863 PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps))); 1864 1865 mat->insertmode = NOT_SET_VALUES; 1866 1867 /* build cache for off array entries formed */ 1868 a->donotstash = PETSC_FALSE; 1869 1870 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash)); 1871 1872 /* stuff used for matrix vector multiply */ 1873 a->lvec = NULL; 1874 a->Mvctx = NULL; 1875 a->roworiented = PETSC_TRUE; 1876 1877 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense)); 1878 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense)); 1879 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense)); 1880 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense)); 1881 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense)); 1882 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense)); 1883 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense)); 1884 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense)); 1885 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense)); 1886 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense)); 1887 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense)); 1888 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1889 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1890 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1891 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1892 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1893 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1894 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense)); 1895 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense)); 1896 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 1897 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ)); 1898 #if defined(PETSC_HAVE_ELEMENTAL) 1899 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental)); 1900 #endif 1901 #if defined(PETSC_HAVE_SCALAPACK) 1902 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK)); 1903 #endif 1904 #if defined(PETSC_HAVE_CUDA) 1905 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA)); 1906 #endif 1907 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense)); 1908 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1909 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1910 #if defined(PETSC_HAVE_CUDA) 1911 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1912 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1913 #endif 1914 1915 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense)); 1916 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense)); 1917 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE)); 1918 PetscFunctionReturn(0); 1919 } 1920 1921 /*MC 1922 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1923 1924 Options Database Keys: 1925 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1926 1927 Level: beginner 1928 1929 .seealso: 1930 1931 M*/ 1932 #if defined(PETSC_HAVE_CUDA) 1933 #include <petsc/private/deviceimpl.h> 1934 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1935 { 1936 PetscFunctionBegin; 1937 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1938 PetscCall(MatCreate_MPIDense(B)); 1939 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B)); 1940 PetscFunctionReturn(0); 1941 } 1942 #endif 1943 1944 /*MC 1945 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1946 1947 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1948 and MATMPIDENSE otherwise. 1949 1950 Options Database Keys: 1951 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1952 1953 Level: beginner 1954 1955 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1956 M*/ 1957 1958 /*MC 1959 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1960 1961 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1962 and MATMPIDENSECUDA otherwise. 1963 1964 Options Database Keys: 1965 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1966 1967 Level: beginner 1968 1969 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1970 M*/ 1971 1972 /*@C 1973 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1974 1975 Collective 1976 1977 Input Parameters: 1978 . B - the matrix 1979 - data - optional location of matrix data. Set data=NULL for PETSc 1980 to control all matrix memory allocation. 1981 1982 Notes: 1983 The dense format is fully compatible with standard Fortran 77 1984 storage by columns. 1985 1986 The data input variable is intended primarily for Fortran programmers 1987 who wish to allocate their own matrix memory space. Most users should 1988 set data=NULL. 1989 1990 Level: intermediate 1991 1992 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1993 @*/ 1994 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1995 { 1996 PetscFunctionBegin; 1997 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1998 PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data)); 1999 PetscFunctionReturn(0); 2000 } 2001 2002 /*@ 2003 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2004 array provided by the user. This is useful to avoid copying an array 2005 into a matrix 2006 2007 Not Collective 2008 2009 Input Parameters: 2010 + mat - the matrix 2011 - array - the array in column major order 2012 2013 Notes: 2014 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2015 freed when the matrix is destroyed. 2016 2017 Level: developer 2018 2019 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2020 2021 @*/ 2022 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2023 { 2024 PetscFunctionBegin; 2025 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2026 PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2027 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2028 #if defined(PETSC_HAVE_CUDA) 2029 mat->offloadmask = PETSC_OFFLOAD_CPU; 2030 #endif 2031 PetscFunctionReturn(0); 2032 } 2033 2034 /*@ 2035 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2036 2037 Not Collective 2038 2039 Input Parameters: 2040 . mat - the matrix 2041 2042 Notes: 2043 You can only call this after a call to MatDensePlaceArray() 2044 2045 Level: developer 2046 2047 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2048 2049 @*/ 2050 PetscErrorCode MatDenseResetArray(Mat mat) 2051 { 2052 PetscFunctionBegin; 2053 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2054 PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat)); 2055 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2056 PetscFunctionReturn(0); 2057 } 2058 2059 /*@ 2060 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2061 array provided by the user. This is useful to avoid copying an array 2062 into a matrix 2063 2064 Not Collective 2065 2066 Input Parameters: 2067 + mat - the matrix 2068 - array - the array in column major order 2069 2070 Notes: 2071 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2072 freed by the user. It will be freed when the matrix is destroyed. 2073 2074 Level: developer 2075 2076 .seealso: MatDenseGetArray(), VecReplaceArray() 2077 @*/ 2078 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2079 { 2080 PetscFunctionBegin; 2081 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2082 PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2083 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2084 #if defined(PETSC_HAVE_CUDA) 2085 mat->offloadmask = PETSC_OFFLOAD_CPU; 2086 #endif 2087 PetscFunctionReturn(0); 2088 } 2089 2090 #if defined(PETSC_HAVE_CUDA) 2091 /*@C 2092 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2093 array provided by the user. This is useful to avoid copying an array 2094 into a matrix 2095 2096 Not Collective 2097 2098 Input Parameters: 2099 + mat - the matrix 2100 - array - the array in column major order 2101 2102 Notes: 2103 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2104 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2105 2106 Level: developer 2107 2108 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2109 @*/ 2110 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2111 { 2112 PetscFunctionBegin; 2113 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2114 PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2115 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2116 mat->offloadmask = PETSC_OFFLOAD_GPU; 2117 PetscFunctionReturn(0); 2118 } 2119 2120 /*@C 2121 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2122 2123 Not Collective 2124 2125 Input Parameters: 2126 . mat - the matrix 2127 2128 Notes: 2129 You can only call this after a call to MatDenseCUDAPlaceArray() 2130 2131 Level: developer 2132 2133 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2134 2135 @*/ 2136 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2137 { 2138 PetscFunctionBegin; 2139 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2140 PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat)); 2141 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2142 PetscFunctionReturn(0); 2143 } 2144 2145 /*@C 2146 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2147 array provided by the user. This is useful to avoid copying an array 2148 into a matrix 2149 2150 Not Collective 2151 2152 Input Parameters: 2153 + mat - the matrix 2154 - array - the array in column major order 2155 2156 Notes: 2157 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2158 The memory passed in CANNOT be freed by the user. It will be freed 2159 when the matrix is destroyed. The array should respect the matrix leading dimension. 2160 2161 Level: developer 2162 2163 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2164 @*/ 2165 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2166 { 2167 PetscFunctionBegin; 2168 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2169 PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2170 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2171 mat->offloadmask = PETSC_OFFLOAD_GPU; 2172 PetscFunctionReturn(0); 2173 } 2174 2175 /*@C 2176 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2177 2178 Not Collective 2179 2180 Input Parameters: 2181 . A - the matrix 2182 2183 Output Parameters 2184 . array - the GPU array in column major order 2185 2186 Notes: 2187 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2188 2189 Level: developer 2190 2191 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2192 @*/ 2193 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2194 { 2195 PetscFunctionBegin; 2196 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2197 PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2198 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2199 PetscFunctionReturn(0); 2200 } 2201 2202 /*@C 2203 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2204 2205 Not Collective 2206 2207 Input Parameters: 2208 + A - the matrix 2209 - array - the GPU array in column major order 2210 2211 Notes: 2212 2213 Level: developer 2214 2215 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2216 @*/ 2217 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2218 { 2219 PetscFunctionBegin; 2220 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2221 PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2222 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2223 A->offloadmask = PETSC_OFFLOAD_GPU; 2224 PetscFunctionReturn(0); 2225 } 2226 2227 /*@C 2228 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2229 2230 Not Collective 2231 2232 Input Parameters: 2233 . A - the matrix 2234 2235 Output Parameters 2236 . array - the GPU array in column major order 2237 2238 Notes: 2239 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2240 2241 Level: developer 2242 2243 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2244 @*/ 2245 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2246 { 2247 PetscFunctionBegin; 2248 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2249 PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2250 PetscFunctionReturn(0); 2251 } 2252 2253 /*@C 2254 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2255 2256 Not Collective 2257 2258 Input Parameters: 2259 + A - the matrix 2260 - array - the GPU array in column major order 2261 2262 Notes: 2263 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2264 2265 Level: developer 2266 2267 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2268 @*/ 2269 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2270 { 2271 PetscFunctionBegin; 2272 PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2273 PetscFunctionReturn(0); 2274 } 2275 2276 /*@C 2277 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2278 2279 Not Collective 2280 2281 Input Parameters: 2282 . A - the matrix 2283 2284 Output Parameters 2285 . array - the GPU array in column major order 2286 2287 Notes: 2288 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2289 2290 Level: developer 2291 2292 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2293 @*/ 2294 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2295 { 2296 PetscFunctionBegin; 2297 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2298 PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a)); 2299 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2300 PetscFunctionReturn(0); 2301 } 2302 2303 /*@C 2304 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2305 2306 Not Collective 2307 2308 Input Parameters: 2309 + A - the matrix 2310 - array - the GPU array in column major order 2311 2312 Notes: 2313 2314 Level: developer 2315 2316 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2317 @*/ 2318 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2319 { 2320 PetscFunctionBegin; 2321 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2322 PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a)); 2323 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2324 A->offloadmask = PETSC_OFFLOAD_GPU; 2325 PetscFunctionReturn(0); 2326 } 2327 #endif 2328 2329 /*@C 2330 MatCreateDense - Creates a matrix in dense format. 2331 2332 Collective 2333 2334 Input Parameters: 2335 + comm - MPI communicator 2336 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2337 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2338 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2339 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2340 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2341 to control all matrix memory allocation. 2342 2343 Output Parameter: 2344 . A - the matrix 2345 2346 Notes: 2347 The dense format is fully compatible with standard Fortran 77 2348 storage by columns. 2349 Note that, although local portions of the matrix are stored in column-major 2350 order, the matrix is partitioned across MPI ranks by row. 2351 2352 The data input variable is intended primarily for Fortran programmers 2353 who wish to allocate their own matrix memory space. Most users should 2354 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2355 2356 The user MUST specify either the local or global matrix dimensions 2357 (possibly both). 2358 2359 Level: intermediate 2360 2361 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2362 @*/ 2363 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2364 { 2365 PetscMPIInt size; 2366 2367 PetscFunctionBegin; 2368 PetscCall(MatCreate(comm,A)); 2369 PetscCall(MatSetSizes(*A,m,n,M,N)); 2370 PetscCallMPI(MPI_Comm_size(comm,&size)); 2371 if (size > 1) { 2372 PetscBool havedata = (PetscBool)!!data; 2373 2374 PetscCall(MatSetType(*A,MATMPIDENSE)); 2375 PetscCall(MatMPIDenseSetPreallocation(*A,data)); 2376 PetscCall(MPIU_Allreduce(MPI_IN_PLACE,&havedata,1,MPIU_BOOL,MPI_LOR,comm)); 2377 if (havedata) { /* user provided data array, so no need to assemble */ 2378 PetscCall(MatSetUpMultiply_MPIDense(*A)); 2379 (*A)->assembled = PETSC_TRUE; 2380 } 2381 } else { 2382 PetscCall(MatSetType(*A,MATSEQDENSE)); 2383 PetscCall(MatSeqDenseSetPreallocation(*A,data)); 2384 } 2385 PetscFunctionReturn(0); 2386 } 2387 2388 #if defined(PETSC_HAVE_CUDA) 2389 /*@C 2390 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2391 2392 Collective 2393 2394 Input Parameters: 2395 + comm - MPI communicator 2396 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2397 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2398 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2399 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2400 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2401 to control matrix memory allocation. 2402 2403 Output Parameter: 2404 . A - the matrix 2405 2406 Notes: 2407 2408 Level: intermediate 2409 2410 .seealso: MatCreate(), MatCreateDense() 2411 @*/ 2412 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2413 { 2414 PetscMPIInt size; 2415 2416 PetscFunctionBegin; 2417 PetscCall(MatCreate(comm,A)); 2418 PetscValidLogicalCollectiveBool(*A,!!data,6); 2419 PetscCall(MatSetSizes(*A,m,n,M,N)); 2420 PetscCallMPI(MPI_Comm_size(comm,&size)); 2421 if (size > 1) { 2422 PetscCall(MatSetType(*A,MATMPIDENSECUDA)); 2423 PetscCall(MatMPIDenseCUDASetPreallocation(*A,data)); 2424 if (data) { /* user provided data array, so no need to assemble */ 2425 PetscCall(MatSetUpMultiply_MPIDense(*A)); 2426 (*A)->assembled = PETSC_TRUE; 2427 } 2428 } else { 2429 PetscCall(MatSetType(*A,MATSEQDENSECUDA)); 2430 PetscCall(MatSeqDenseCUDASetPreallocation(*A,data)); 2431 } 2432 PetscFunctionReturn(0); 2433 } 2434 #endif 2435 2436 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2437 { 2438 Mat mat; 2439 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2440 2441 PetscFunctionBegin; 2442 *newmat = NULL; 2443 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat)); 2444 PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2445 PetscCall(MatSetType(mat,((PetscObject)A)->type_name)); 2446 a = (Mat_MPIDense*)mat->data; 2447 2448 mat->factortype = A->factortype; 2449 mat->assembled = PETSC_TRUE; 2450 mat->preallocated = PETSC_TRUE; 2451 2452 mat->insertmode = NOT_SET_VALUES; 2453 a->donotstash = oldmat->donotstash; 2454 2455 PetscCall(PetscLayoutReference(A->rmap,&mat->rmap)); 2456 PetscCall(PetscLayoutReference(A->cmap,&mat->cmap)); 2457 2458 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2459 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2460 PetscCall(MatSetUpMultiply_MPIDense(mat)); 2461 2462 *newmat = mat; 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2467 { 2468 PetscBool isbinary; 2469 #if defined(PETSC_HAVE_HDF5) 2470 PetscBool ishdf5; 2471 #endif 2472 2473 PetscFunctionBegin; 2474 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2475 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2476 /* force binary viewer to load .info file if it has not yet done so */ 2477 PetscCall(PetscViewerSetUp(viewer)); 2478 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2479 #if defined(PETSC_HAVE_HDF5) 2480 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2481 #endif 2482 if (isbinary) { 2483 PetscCall(MatLoad_Dense_Binary(newMat,viewer)); 2484 #if defined(PETSC_HAVE_HDF5) 2485 } else if (ishdf5) { 2486 PetscCall(MatLoad_Dense_HDF5(newMat,viewer)); 2487 #endif 2488 } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2489 PetscFunctionReturn(0); 2490 } 2491 2492 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2493 { 2494 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2495 Mat a,b; 2496 PetscBool flg; 2497 2498 PetscFunctionBegin; 2499 a = matA->A; 2500 b = matB->A; 2501 PetscCall(MatEqual(a,b,&flg)); 2502 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2503 PetscFunctionReturn(0); 2504 } 2505 2506 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2507 { 2508 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2509 2510 PetscFunctionBegin; 2511 PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts)); 2512 PetscCall(MatDestroy(&atb->atb)); 2513 PetscCall(PetscFree(atb)); 2514 PetscFunctionReturn(0); 2515 } 2516 2517 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2518 { 2519 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2520 2521 PetscFunctionBegin; 2522 PetscCall(PetscFree2(abt->buf[0],abt->buf[1])); 2523 PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls)); 2524 PetscCall(PetscFree(abt)); 2525 PetscFunctionReturn(0); 2526 } 2527 2528 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2529 { 2530 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2531 Mat_TransMatMultDense *atb; 2532 MPI_Comm comm; 2533 PetscMPIInt size,*recvcounts; 2534 PetscScalar *carray,*sendbuf; 2535 const PetscScalar *atbarray; 2536 PetscInt i,cN=C->cmap->N,proc,k,j,lda; 2537 const PetscInt *ranges; 2538 2539 PetscFunctionBegin; 2540 MatCheckProduct(C,3); 2541 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2542 atb = (Mat_TransMatMultDense *)C->product->data; 2543 recvcounts = atb->recvcounts; 2544 sendbuf = atb->sendbuf; 2545 2546 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2547 PetscCallMPI(MPI_Comm_size(comm,&size)); 2548 2549 /* compute atbarray = aseq^T * bseq */ 2550 PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb)); 2551 2552 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2553 2554 /* arrange atbarray into sendbuf */ 2555 PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray)); 2556 PetscCall(MatDenseGetLDA(atb->atb,&lda)); 2557 for (proc=0, k=0; proc<size; proc++) { 2558 for (j=0; j<cN; j++) { 2559 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda]; 2560 } 2561 } 2562 PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray)); 2563 2564 /* sum all atbarray to local values of C */ 2565 PetscCall(MatDenseGetArrayWrite(c->A,&carray)); 2566 PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm)); 2567 PetscCall(MatDenseRestoreArrayWrite(c->A,&carray)); 2568 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2569 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2570 PetscFunctionReturn(0); 2571 } 2572 2573 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2574 { 2575 MPI_Comm comm; 2576 PetscMPIInt size; 2577 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2578 Mat_TransMatMultDense *atb; 2579 PetscBool cisdense; 2580 PetscInt i; 2581 const PetscInt *ranges; 2582 2583 PetscFunctionBegin; 2584 MatCheckProduct(C,3); 2585 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2586 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2587 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2588 SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2589 } 2590 2591 /* create matrix product C */ 2592 PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N)); 2593 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 2594 if (!cisdense) { 2595 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 2596 } 2597 PetscCall(MatSetUp(C)); 2598 2599 /* create data structure for reuse C */ 2600 PetscCallMPI(MPI_Comm_size(comm,&size)); 2601 PetscCall(PetscNew(&atb)); 2602 cM = C->rmap->N; 2603 PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts)); 2604 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2605 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2606 2607 C->product->data = atb; 2608 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2609 PetscFunctionReturn(0); 2610 } 2611 2612 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2613 { 2614 MPI_Comm comm; 2615 PetscMPIInt i, size; 2616 PetscInt maxRows, bufsiz; 2617 PetscMPIInt tag; 2618 PetscInt alg; 2619 Mat_MatTransMultDense *abt; 2620 Mat_Product *product = C->product; 2621 PetscBool flg; 2622 2623 PetscFunctionBegin; 2624 MatCheckProduct(C,4); 2625 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2626 /* check local size of A and B */ 2627 PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n); 2628 2629 PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg)); 2630 alg = flg ? 0 : 1; 2631 2632 /* setup matrix product C */ 2633 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N)); 2634 PetscCall(MatSetType(C,MATMPIDENSE)); 2635 PetscCall(MatSetUp(C)); 2636 PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag)); 2637 2638 /* create data structure for reuse C */ 2639 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2640 PetscCallMPI(MPI_Comm_size(comm,&size)); 2641 PetscCall(PetscNew(&abt)); 2642 abt->tag = tag; 2643 abt->alg = alg; 2644 switch (alg) { 2645 case 1: /* alg: "cyclic" */ 2646 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2647 bufsiz = A->cmap->N * maxRows; 2648 PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]))); 2649 break; 2650 default: /* alg: "allgatherv" */ 2651 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2652 PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls))); 2653 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2654 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2655 break; 2656 } 2657 2658 C->product->data = abt; 2659 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2660 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2661 PetscFunctionReturn(0); 2662 } 2663 2664 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2665 { 2666 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2667 Mat_MatTransMultDense *abt; 2668 MPI_Comm comm; 2669 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2670 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2671 PetscInt i,cK=A->cmap->N,k,j,bn; 2672 PetscScalar _DOne=1.0,_DZero=0.0; 2673 const PetscScalar *av,*bv; 2674 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2675 MPI_Request reqs[2]; 2676 const PetscInt *ranges; 2677 2678 PetscFunctionBegin; 2679 MatCheckProduct(C,3); 2680 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2681 abt = (Mat_MatTransMultDense*)C->product->data; 2682 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2683 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2684 PetscCallMPI(MPI_Comm_size(comm,&size)); 2685 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2686 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2687 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2688 PetscCall(MatDenseGetLDA(a->A,&i)); 2689 PetscCall(PetscBLASIntCast(i,&alda)); 2690 PetscCall(MatDenseGetLDA(b->A,&i)); 2691 PetscCall(PetscBLASIntCast(i,&blda)); 2692 PetscCall(MatDenseGetLDA(c->A,&i)); 2693 PetscCall(PetscBLASIntCast(i,&clda)); 2694 PetscCall(MatGetOwnershipRanges(B,&ranges)); 2695 bn = B->rmap->n; 2696 if (blda == bn) { 2697 sendbuf = (PetscScalar*)bv; 2698 } else { 2699 sendbuf = abt->buf[0]; 2700 for (k = 0, i = 0; i < cK; i++) { 2701 for (j = 0; j < bn; j++, k++) { 2702 sendbuf[k] = bv[i * blda + j]; 2703 } 2704 } 2705 } 2706 if (size > 1) { 2707 sendto = (rank + size - 1) % size; 2708 recvfrom = (rank + size + 1) % size; 2709 } else { 2710 sendto = recvfrom = 0; 2711 } 2712 PetscCall(PetscBLASIntCast(cK,&ck)); 2713 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2714 recvisfrom = rank; 2715 for (i = 0; i < size; i++) { 2716 /* we have finished receiving in sending, bufs can be read/modified */ 2717 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2718 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2719 2720 if (nextrecvisfrom != rank) { 2721 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2722 sendsiz = cK * bn; 2723 recvsiz = cK * nextbn; 2724 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2725 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2726 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2727 } 2728 2729 /* local aseq * sendbuf^T */ 2730 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2731 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2732 2733 if (nextrecvisfrom != rank) { 2734 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2735 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2736 } 2737 bn = nextbn; 2738 recvisfrom = nextrecvisfrom; 2739 sendbuf = recvbuf; 2740 } 2741 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2742 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2743 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2744 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2745 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2746 PetscFunctionReturn(0); 2747 } 2748 2749 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2750 { 2751 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2752 Mat_MatTransMultDense *abt; 2753 MPI_Comm comm; 2754 PetscMPIInt size; 2755 PetscScalar *cv, *sendbuf, *recvbuf; 2756 const PetscScalar *av,*bv; 2757 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2758 PetscScalar _DOne=1.0,_DZero=0.0; 2759 PetscBLASInt cm, cn, ck, alda, clda; 2760 2761 PetscFunctionBegin; 2762 MatCheckProduct(C,3); 2763 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2764 abt = (Mat_MatTransMultDense*)C->product->data; 2765 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2766 PetscCallMPI(MPI_Comm_size(comm,&size)); 2767 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2768 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2769 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2770 PetscCall(MatDenseGetLDA(a->A,&i)); 2771 PetscCall(PetscBLASIntCast(i,&alda)); 2772 PetscCall(MatDenseGetLDA(b->A,&blda)); 2773 PetscCall(MatDenseGetLDA(c->A,&i)); 2774 PetscCall(PetscBLASIntCast(i,&clda)); 2775 /* copy transpose of B into buf[0] */ 2776 bn = B->rmap->n; 2777 sendbuf = abt->buf[0]; 2778 recvbuf = abt->buf[1]; 2779 for (k = 0, j = 0; j < bn; j++) { 2780 for (i = 0; i < cK; i++, k++) { 2781 sendbuf[k] = bv[i * blda + j]; 2782 } 2783 } 2784 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2785 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2786 PetscCall(PetscBLASIntCast(cK,&ck)); 2787 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2788 PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn)); 2789 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2790 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2791 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2792 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2793 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2794 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2795 PetscFunctionReturn(0); 2796 } 2797 2798 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2799 { 2800 Mat_MatTransMultDense *abt; 2801 2802 PetscFunctionBegin; 2803 MatCheckProduct(C,3); 2804 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2805 abt = (Mat_MatTransMultDense*)C->product->data; 2806 switch (abt->alg) { 2807 case 1: 2808 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2809 break; 2810 default: 2811 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2812 break; 2813 } 2814 PetscFunctionReturn(0); 2815 } 2816 2817 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2818 { 2819 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2820 2821 PetscFunctionBegin; 2822 PetscCall(MatDestroy(&ab->Ce)); 2823 PetscCall(MatDestroy(&ab->Ae)); 2824 PetscCall(MatDestroy(&ab->Be)); 2825 PetscCall(PetscFree(ab)); 2826 PetscFunctionReturn(0); 2827 } 2828 2829 #if defined(PETSC_HAVE_ELEMENTAL) 2830 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2831 { 2832 Mat_MatMultDense *ab; 2833 2834 PetscFunctionBegin; 2835 MatCheckProduct(C,3); 2836 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2837 ab = (Mat_MatMultDense*)C->product->data; 2838 PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae)); 2839 PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be)); 2840 PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce)); 2841 PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C)); 2842 PetscFunctionReturn(0); 2843 } 2844 2845 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2846 { 2847 Mat Ae,Be,Ce; 2848 Mat_MatMultDense *ab; 2849 2850 PetscFunctionBegin; 2851 MatCheckProduct(C,4); 2852 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2853 /* check local size of A and B */ 2854 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2855 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2856 } 2857 2858 /* create elemental matrices Ae and Be */ 2859 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2860 PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 2861 PetscCall(MatSetType(Ae,MATELEMENTAL)); 2862 PetscCall(MatSetUp(Ae)); 2863 PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE)); 2864 2865 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2866 PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N)); 2867 PetscCall(MatSetType(Be,MATELEMENTAL)); 2868 PetscCall(MatSetUp(Be)); 2869 PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE)); 2870 2871 /* compute symbolic Ce = Ae*Be */ 2872 PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce)); 2873 PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce)); 2874 2875 /* setup C */ 2876 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 2877 PetscCall(MatSetType(C,MATDENSE)); 2878 PetscCall(MatSetUp(C)); 2879 2880 /* create data structure for reuse Cdense */ 2881 PetscCall(PetscNew(&ab)); 2882 ab->Ae = Ae; 2883 ab->Be = Be; 2884 ab->Ce = Ce; 2885 2886 C->product->data = ab; 2887 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2888 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2889 PetscFunctionReturn(0); 2890 } 2891 #endif 2892 /* ----------------------------------------------- */ 2893 #if defined(PETSC_HAVE_ELEMENTAL) 2894 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2895 { 2896 PetscFunctionBegin; 2897 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2898 C->ops->productsymbolic = MatProductSymbolic_AB; 2899 PetscFunctionReturn(0); 2900 } 2901 #endif 2902 2903 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2904 { 2905 Mat_Product *product = C->product; 2906 Mat A = product->A,B=product->B; 2907 2908 PetscFunctionBegin; 2909 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2910 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2911 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2912 C->ops->productsymbolic = MatProductSymbolic_AtB; 2913 PetscFunctionReturn(0); 2914 } 2915 2916 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2917 { 2918 PetscErrorCode ierr; 2919 Mat_Product *product = C->product; 2920 const char *algTypes[2] = {"allgatherv","cyclic"}; 2921 PetscInt alg,nalg = 2; 2922 PetscBool flg = PETSC_FALSE; 2923 2924 PetscFunctionBegin; 2925 /* Set default algorithm */ 2926 alg = 0; /* default is allgatherv */ 2927 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2928 if (flg) { 2929 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2930 } 2931 2932 /* Get runtime option */ 2933 if (product->api_user) { 2934 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");PetscCall(ierr); 2935 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2936 ierr = PetscOptionsEnd();PetscCall(ierr); 2937 } else { 2938 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");PetscCall(ierr); 2939 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2940 ierr = PetscOptionsEnd();PetscCall(ierr); 2941 } 2942 if (flg) { 2943 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2944 } 2945 2946 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2947 C->ops->productsymbolic = MatProductSymbolic_ABt; 2948 PetscFunctionReturn(0); 2949 } 2950 2951 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2952 { 2953 Mat_Product *product = C->product; 2954 2955 PetscFunctionBegin; 2956 switch (product->type) { 2957 #if defined(PETSC_HAVE_ELEMENTAL) 2958 case MATPRODUCT_AB: 2959 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2960 break; 2961 #endif 2962 case MATPRODUCT_AtB: 2963 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2964 break; 2965 case MATPRODUCT_ABt: 2966 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2967 break; 2968 default: 2969 break; 2970 } 2971 PetscFunctionReturn(0); 2972 } 2973