1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscBool flg; 28 29 PetscFunctionBegin; 30 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 31 PetscValidPointer(B,2); 32 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATMPIDENSE,&flg)); 33 if (flg) *B = mat->A; 34 else { 35 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQDENSE,&flg)); 36 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for matrix type %s",((PetscObject)A)->type_name); 37 *B = A; 38 } 39 PetscFunctionReturn(0); 40 } 41 42 PetscErrorCode MatCopy_MPIDense(Mat A, Mat B, MatStructure s) 43 { 44 Mat_MPIDense *Amat = (Mat_MPIDense*)A->data; 45 Mat_MPIDense *Bmat = (Mat_MPIDense*)B->data; 46 47 PetscFunctionBegin; 48 PetscCall(MatCopy(Amat->A,Bmat->A,s)); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 56 57 PetscFunctionBegin; 58 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 59 lrow = row - rstart; 60 PetscCall(MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 61 PetscFunctionReturn(0); 62 } 63 64 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 65 { 66 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 67 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 68 69 PetscFunctionBegin; 70 PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 71 lrow = row - rstart; 72 PetscCall(MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v)); 73 PetscFunctionReturn(0); 74 } 75 76 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 77 { 78 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 79 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 80 PetscScalar *array; 81 MPI_Comm comm; 82 PetscBool flg; 83 Mat B; 84 85 PetscFunctionBegin; 86 PetscCall(MatHasCongruentLayouts(A,&flg)); 87 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 88 PetscCall(PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B)); 89 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 90 91 PetscCall(PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg)); 92 PetscCheck(!flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 93 PetscCall(PetscObjectGetComm((PetscObject)(mdn->A),&comm)); 94 PetscCall(MatCreate(comm,&B)); 95 PetscCall(MatSetSizes(B,m,m,m,m)); 96 PetscCall(MatSetType(B,((PetscObject)mdn->A)->type_name)); 97 PetscCall(MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array)); 98 PetscCall(MatSeqDenseSetPreallocation(B,array+m*rstart)); 99 PetscCall(MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array)); 100 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 101 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 102 PetscCall(PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B)); 103 *a = B; 104 PetscCall(MatDestroy(&B)); 105 } else *a = B; 106 PetscFunctionReturn(0); 107 } 108 109 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 110 { 111 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 112 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 113 PetscBool roworiented = A->roworiented; 114 115 PetscFunctionBegin; 116 for (i=0; i<m; i++) { 117 if (idxm[i] < 0) continue; 118 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 119 if (idxm[i] >= rstart && idxm[i] < rend) { 120 row = idxm[i] - rstart; 121 if (roworiented) { 122 PetscCall(MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv)); 123 } else { 124 for (j=0; j<n; j++) { 125 if (idxn[j] < 0) continue; 126 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 127 PetscCall(MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv)); 128 } 129 } 130 } else if (!A->donotstash) { 131 mat->assembled = PETSC_FALSE; 132 if (roworiented) { 133 PetscCall(MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE)); 134 } else { 135 PetscCall(MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE)); 136 } 137 } 138 } 139 PetscFunctionReturn(0); 140 } 141 142 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 143 { 144 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 145 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 146 147 PetscFunctionBegin; 148 for (i=0; i<m; i++) { 149 if (idxm[i] < 0) continue; /* negative row */ 150 PetscCheck(idxm[i] < mat->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 151 if (idxm[i] >= rstart && idxm[i] < rend) { 152 row = idxm[i] - rstart; 153 for (j=0; j<n; j++) { 154 if (idxn[j] < 0) continue; /* negative column */ 155 PetscCheck(idxn[j] < mat->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 156 PetscCall(MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j)); 157 } 158 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 159 } 160 PetscFunctionReturn(0); 161 } 162 163 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 164 { 165 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 166 167 PetscFunctionBegin; 168 PetscCall(MatDenseGetLDA(a->A,lda)); 169 PetscFunctionReturn(0); 170 } 171 172 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 173 { 174 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 175 PetscBool iscuda; 176 177 PetscFunctionBegin; 178 if (!a->A) { 179 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 180 PetscCall(PetscLayoutSetUp(A->rmap)); 181 PetscCall(PetscLayoutSetUp(A->cmap)); 182 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 183 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->A)); 184 PetscCall(MatSetSizes(a->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 185 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 186 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 187 } 188 PetscCall(MatDenseSetLDA(a->A,lda)); 189 PetscFunctionReturn(0); 190 } 191 192 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 193 { 194 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 195 196 PetscFunctionBegin; 197 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 198 PetscCall(MatDenseGetArray(a->A,array)); 199 PetscFunctionReturn(0); 200 } 201 202 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 203 { 204 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 205 206 PetscFunctionBegin; 207 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 208 PetscCall(MatDenseGetArrayRead(a->A,array)); 209 PetscFunctionReturn(0); 210 } 211 212 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 213 { 214 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 215 216 PetscFunctionBegin; 217 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 218 PetscCall(MatDenseGetArrayWrite(a->A,array)); 219 PetscFunctionReturn(0); 220 } 221 222 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 223 { 224 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 225 226 PetscFunctionBegin; 227 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 228 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 229 PetscCall(MatDensePlaceArray(a->A,array)); 230 PetscFunctionReturn(0); 231 } 232 233 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 234 { 235 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 236 237 PetscFunctionBegin; 238 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 239 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 240 PetscCall(MatDenseResetArray(a->A)); 241 PetscFunctionReturn(0); 242 } 243 244 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 245 { 246 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 247 248 PetscFunctionBegin; 249 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 250 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 251 PetscCall(MatDenseReplaceArray(a->A,array)); 252 PetscFunctionReturn(0); 253 } 254 255 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 256 { 257 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 258 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 259 const PetscInt *irow,*icol; 260 const PetscScalar *v; 261 PetscScalar *bv; 262 Mat newmat; 263 IS iscol_local; 264 MPI_Comm comm_is,comm_mat; 265 266 PetscFunctionBegin; 267 PetscCall(PetscObjectGetComm((PetscObject)A,&comm_mat)); 268 PetscCall(PetscObjectGetComm((PetscObject)iscol,&comm_is)); 269 PetscCheck(comm_mat == comm_is,PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 270 271 PetscCall(ISAllGather(iscol,&iscol_local)); 272 PetscCall(ISGetIndices(isrow,&irow)); 273 PetscCall(ISGetIndices(iscol_local,&icol)); 274 PetscCall(ISGetLocalSize(isrow,&nrows)); 275 PetscCall(ISGetLocalSize(iscol,&ncols)); 276 PetscCall(ISGetSize(iscol,&Ncols)); /* global number of columns, size of iscol_local */ 277 278 /* No parallel redistribution currently supported! Should really check each index set 279 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 280 original matrix! */ 281 282 PetscCall(MatGetLocalSize(A,&nlrows,&nlcols)); 283 PetscCall(MatGetOwnershipRange(A,&rstart,&rend)); 284 285 /* Check submatrix call */ 286 if (scall == MAT_REUSE_MATRIX) { 287 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 288 /* Really need to test rows and column sizes! */ 289 newmat = *B; 290 } else { 291 /* Create and fill new matrix */ 292 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&newmat)); 293 PetscCall(MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols)); 294 PetscCall(MatSetType(newmat,((PetscObject)A)->type_name)); 295 PetscCall(MatMPIDenseSetPreallocation(newmat,NULL)); 296 } 297 298 /* Now extract the data pointers and do the copy, column at a time */ 299 newmatd = (Mat_MPIDense*)newmat->data; 300 PetscCall(MatDenseGetArray(newmatd->A,&bv)); 301 PetscCall(MatDenseGetArrayRead(mat->A,&v)); 302 PetscCall(MatDenseGetLDA(mat->A,&lda)); 303 for (i=0; i<Ncols; i++) { 304 const PetscScalar *av = v + lda*icol[i]; 305 for (j=0; j<nrows; j++) { 306 *bv++ = av[irow[j] - rstart]; 307 } 308 } 309 PetscCall(MatDenseRestoreArrayRead(mat->A,&v)); 310 PetscCall(MatDenseRestoreArray(newmatd->A,&bv)); 311 312 /* Assemble the matrices so that the correct flags are set */ 313 PetscCall(MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY)); 314 PetscCall(MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY)); 315 316 /* Free work space */ 317 PetscCall(ISRestoreIndices(isrow,&irow)); 318 PetscCall(ISRestoreIndices(iscol_local,&icol)); 319 PetscCall(ISDestroy(&iscol_local)); 320 *B = newmat; 321 PetscFunctionReturn(0); 322 } 323 324 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 325 { 326 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 327 328 PetscFunctionBegin; 329 PetscCall(MatDenseRestoreArray(a->A,array)); 330 PetscFunctionReturn(0); 331 } 332 333 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 334 { 335 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 336 337 PetscFunctionBegin; 338 PetscCall(MatDenseRestoreArrayRead(a->A,array)); 339 PetscFunctionReturn(0); 340 } 341 342 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 343 { 344 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 345 346 PetscFunctionBegin; 347 PetscCall(MatDenseRestoreArrayWrite(a->A,array)); 348 PetscFunctionReturn(0); 349 } 350 351 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 352 { 353 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 354 PetscInt nstash,reallocs; 355 356 PetscFunctionBegin; 357 if (mdn->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 358 359 PetscCall(MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range)); 360 PetscCall(MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs)); 361 PetscCall(PetscInfo(mdn->A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs)); 362 PetscFunctionReturn(0); 363 } 364 365 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 366 { 367 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 368 PetscInt i,*row,*col,flg,j,rstart,ncols; 369 PetscMPIInt n; 370 PetscScalar *val; 371 372 PetscFunctionBegin; 373 if (!mdn->donotstash && !mat->nooffprocentries) { 374 /* wait on receives */ 375 while (1) { 376 PetscCall(MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg)); 377 if (!flg) break; 378 379 for (i=0; i<n;) { 380 /* Now identify the consecutive vals belonging to the same row */ 381 for (j=i,rstart=row[j]; j<n; j++) { 382 if (row[j] != rstart) break; 383 } 384 if (j < n) ncols = j-i; 385 else ncols = n-i; 386 /* Now assemble all these values with a single function call */ 387 PetscCall(MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode)); 388 i = j; 389 } 390 } 391 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 392 } 393 394 PetscCall(MatAssemblyBegin(mdn->A,mode)); 395 PetscCall(MatAssemblyEnd(mdn->A,mode)); 396 PetscFunctionReturn(0); 397 } 398 399 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 400 { 401 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 402 403 PetscFunctionBegin; 404 PetscCall(MatZeroEntries(l->A)); 405 PetscFunctionReturn(0); 406 } 407 408 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 409 { 410 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 411 PetscInt i,len,*lrows; 412 413 PetscFunctionBegin; 414 /* get locally owned rows */ 415 PetscCall(PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL)); 416 /* fix right hand side if needed */ 417 if (x && b) { 418 const PetscScalar *xx; 419 PetscScalar *bb; 420 421 PetscCall(VecGetArrayRead(x, &xx)); 422 PetscCall(VecGetArrayWrite(b, &bb)); 423 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 424 PetscCall(VecRestoreArrayRead(x, &xx)); 425 PetscCall(VecRestoreArrayWrite(b, &bb)); 426 } 427 PetscCall(MatZeroRows(l->A,len,lrows,0.0,NULL,NULL)); 428 if (diag != 0.0) { 429 Vec d; 430 431 PetscCall(MatCreateVecs(A,NULL,&d)); 432 PetscCall(VecSet(d,diag)); 433 PetscCall(MatDiagonalSet(A,d,INSERT_VALUES)); 434 PetscCall(VecDestroy(&d)); 435 } 436 PetscCall(PetscFree(lrows)); 437 PetscFunctionReturn(0); 438 } 439 440 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 441 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 442 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 443 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 444 445 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 446 { 447 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 448 const PetscScalar *ax; 449 PetscScalar *ay; 450 PetscMemType axmtype,aymtype; 451 452 PetscFunctionBegin; 453 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 454 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 455 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 456 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 457 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 458 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 459 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 460 PetscCall((*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy)); 461 PetscFunctionReturn(0); 462 } 463 464 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 465 { 466 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 467 const PetscScalar *ax; 468 PetscScalar *ay; 469 PetscMemType axmtype,aymtype; 470 471 PetscFunctionBegin; 472 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(mat)); 473 PetscCall(VecGetArrayReadAndMemType(xx,&ax,&axmtype)); 474 PetscCall(VecGetArrayAndMemType(mdn->lvec,&ay,&aymtype)); 475 PetscCall(PetscSFBcastWithMemTypeBegin(mdn->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPI_REPLACE)); 476 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay,MPI_REPLACE)); 477 PetscCall(VecRestoreArrayAndMemType(mdn->lvec,&ay)); 478 PetscCall(VecRestoreArrayReadAndMemType(xx,&ax)); 479 PetscCall((*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz)); 480 PetscFunctionReturn(0); 481 } 482 483 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 484 { 485 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 486 const PetscScalar *ax; 487 PetscScalar *ay; 488 PetscMemType axmtype,aymtype; 489 490 PetscFunctionBegin; 491 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 492 PetscCall(VecSet(yy,0.0)); 493 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 494 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 495 PetscCall(VecGetArrayAndMemType(yy,&ay,&aymtype)); 496 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 497 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 498 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 499 PetscCall(VecRestoreArrayAndMemType(yy,&ay)); 500 PetscFunctionReturn(0); 501 } 502 503 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 504 { 505 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 506 const PetscScalar *ax; 507 PetscScalar *ay; 508 PetscMemType axmtype,aymtype; 509 510 PetscFunctionBegin; 511 if (!a->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 512 PetscCall(VecCopy(yy,zz)); 513 PetscCall((*a->A->ops->multtranspose)(a->A,xx,a->lvec)); 514 PetscCall(VecGetArrayReadAndMemType(a->lvec,&ax,&axmtype)); 515 PetscCall(VecGetArrayAndMemType(zz,&ay,&aymtype)); 516 PetscCall(PetscSFReduceWithMemTypeBegin(a->Mvctx,MPIU_SCALAR,axmtype,ax,aymtype,ay,MPIU_SUM)); 517 PetscCall(PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM)); 518 PetscCall(VecRestoreArrayReadAndMemType(a->lvec,&ax)); 519 PetscCall(VecRestoreArrayAndMemType(zz,&ay)); 520 PetscFunctionReturn(0); 521 } 522 523 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 524 { 525 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 526 PetscInt lda,len,i,n,m = A->rmap->n,radd; 527 PetscScalar *x,zero = 0.0; 528 const PetscScalar *av; 529 530 PetscFunctionBegin; 531 PetscCall(VecSet(v,zero)); 532 PetscCall(VecGetArray(v,&x)); 533 PetscCall(VecGetSize(v,&n)); 534 PetscCheck(n == A->rmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 535 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 536 radd = A->rmap->rstart*m; 537 PetscCall(MatDenseGetArrayRead(a->A,&av)); 538 PetscCall(MatDenseGetLDA(a->A,&lda)); 539 for (i=0; i<len; i++) { 540 x[i] = av[radd + i*lda + i]; 541 } 542 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 543 PetscCall(VecRestoreArray(v,&x)); 544 PetscFunctionReturn(0); 545 } 546 547 PetscErrorCode MatDestroy_MPIDense(Mat mat) 548 { 549 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 550 551 PetscFunctionBegin; 552 #if defined(PETSC_USE_LOG) 553 PetscLogObjectState((PetscObject)mat,"Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT,mat->rmap->N,mat->cmap->N); 554 #endif 555 PetscCall(MatStashDestroy_Private(&mat->stash)); 556 PetscCheck(!mdn->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 557 PetscCheck(!mdn->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 558 PetscCall(MatDestroy(&mdn->A)); 559 PetscCall(VecDestroy(&mdn->lvec)); 560 PetscCall(PetscSFDestroy(&mdn->Mvctx)); 561 PetscCall(VecDestroy(&mdn->cvec)); 562 PetscCall(MatDestroy(&mdn->cmat)); 563 564 PetscCall(PetscFree(mat->data)); 565 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,NULL)); 566 567 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL)); 568 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL)); 569 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL)); 570 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL)); 571 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL)); 572 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL)); 573 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL)); 574 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL)); 575 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL)); 576 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL)); 577 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL)); 578 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",NULL)); 579 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",NULL)); 580 #if defined(PETSC_HAVE_ELEMENTAL) 581 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL)); 582 #endif 583 #if defined(PETSC_HAVE_SCALAPACK) 584 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",NULL)); 585 #endif 586 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL)); 587 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL)); 588 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL)); 589 #if defined (PETSC_HAVE_CUDA) 590 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL)); 591 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL)); 592 #endif 593 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL)); 594 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL)); 595 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL)); 596 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL)); 597 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL)); 598 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL)); 599 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL)); 600 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL)); 601 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL)); 602 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL)); 603 PetscFunctionReturn(0); 604 } 605 606 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 607 608 #include <petscdraw.h> 609 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 610 { 611 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 612 PetscMPIInt rank; 613 PetscViewerType vtype; 614 PetscBool iascii,isdraw; 615 PetscViewer sviewer; 616 PetscViewerFormat format; 617 618 PetscFunctionBegin; 619 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank)); 620 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 621 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 622 if (iascii) { 623 PetscCall(PetscViewerGetType(viewer,&vtype)); 624 PetscCall(PetscViewerGetFormat(viewer,&format)); 625 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 626 MatInfo info; 627 PetscCall(MatGetInfo(mat,MAT_LOCAL,&info)); 628 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 629 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %" PetscInt_FMT " \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory)); 630 PetscCall(PetscViewerFlush(viewer)); 631 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 632 if (mdn->Mvctx) PetscCall(PetscSFView(mdn->Mvctx,viewer)); 633 PetscFunctionReturn(0); 634 } else if (format == PETSC_VIEWER_ASCII_INFO) { 635 PetscFunctionReturn(0); 636 } 637 } else if (isdraw) { 638 PetscDraw draw; 639 PetscBool isnull; 640 641 PetscCall(PetscViewerDrawGetDraw(viewer,0,&draw)); 642 PetscCall(PetscDrawIsNull(draw,&isnull)); 643 if (isnull) PetscFunctionReturn(0); 644 } 645 646 { 647 /* assemble the entire matrix onto first processor. */ 648 Mat A; 649 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 650 PetscInt *cols; 651 PetscScalar *vals; 652 653 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat),&A)); 654 if (rank == 0) { 655 PetscCall(MatSetSizes(A,M,N,M,N)); 656 } else { 657 PetscCall(MatSetSizes(A,0,0,M,N)); 658 } 659 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 660 PetscCall(MatSetType(A,MATMPIDENSE)); 661 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 662 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)A)); 663 664 /* Copy the matrix ... This isn't the most efficient means, 665 but it's quick for now */ 666 A->insertmode = INSERT_VALUES; 667 668 row = mat->rmap->rstart; 669 m = mdn->A->rmap->n; 670 for (i=0; i<m; i++) { 671 PetscCall(MatGetRow_MPIDense(mat,row,&nz,&cols,&vals)); 672 PetscCall(MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES)); 673 PetscCall(MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals)); 674 row++; 675 } 676 677 PetscCall(MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY)); 678 PetscCall(MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY)); 679 PetscCall(PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 680 if (rank == 0) { 681 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name)); 682 PetscCall(MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer)); 683 } 684 PetscCall(PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer)); 685 PetscCall(PetscViewerFlush(viewer)); 686 PetscCall(MatDestroy(&A)); 687 } 688 PetscFunctionReturn(0); 689 } 690 691 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 692 { 693 PetscBool iascii,isbinary,isdraw,issocket; 694 695 PetscFunctionBegin; 696 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii)); 697 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 698 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket)); 699 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw)); 700 701 if (iascii || issocket || isdraw) { 702 PetscCall(MatView_MPIDense_ASCIIorDraworSocket(mat,viewer)); 703 } else if (isbinary) { 704 PetscCall(MatView_Dense_Binary(mat,viewer)); 705 } 706 PetscFunctionReturn(0); 707 } 708 709 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 710 { 711 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 712 Mat mdn = mat->A; 713 PetscLogDouble isend[5],irecv[5]; 714 715 PetscFunctionBegin; 716 info->block_size = 1.0; 717 718 PetscCall(MatGetInfo(mdn,MAT_LOCAL,info)); 719 720 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 721 isend[3] = info->memory; isend[4] = info->mallocs; 722 if (flag == MAT_LOCAL) { 723 info->nz_used = isend[0]; 724 info->nz_allocated = isend[1]; 725 info->nz_unneeded = isend[2]; 726 info->memory = isend[3]; 727 info->mallocs = isend[4]; 728 } else if (flag == MAT_GLOBAL_MAX) { 729 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A))); 730 731 info->nz_used = irecv[0]; 732 info->nz_allocated = irecv[1]; 733 info->nz_unneeded = irecv[2]; 734 info->memory = irecv[3]; 735 info->mallocs = irecv[4]; 736 } else if (flag == MAT_GLOBAL_SUM) { 737 PetscCall(MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A))); 738 739 info->nz_used = irecv[0]; 740 info->nz_allocated = irecv[1]; 741 info->nz_unneeded = irecv[2]; 742 info->memory = irecv[3]; 743 info->mallocs = irecv[4]; 744 } 745 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 746 info->fill_ratio_needed = 0; 747 info->factor_mallocs = 0; 748 PetscFunctionReturn(0); 749 } 750 751 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 752 { 753 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 754 755 PetscFunctionBegin; 756 switch (op) { 757 case MAT_NEW_NONZERO_LOCATIONS: 758 case MAT_NEW_NONZERO_LOCATION_ERR: 759 case MAT_NEW_NONZERO_ALLOCATION_ERR: 760 MatCheckPreallocated(A,1); 761 PetscCall(MatSetOption(a->A,op,flg)); 762 break; 763 case MAT_ROW_ORIENTED: 764 MatCheckPreallocated(A,1); 765 a->roworiented = flg; 766 PetscCall(MatSetOption(a->A,op,flg)); 767 break; 768 case MAT_FORCE_DIAGONAL_ENTRIES: 769 case MAT_KEEP_NONZERO_PATTERN: 770 case MAT_USE_HASH_TABLE: 771 case MAT_SORTED_FULL: 772 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 773 break; 774 case MAT_IGNORE_OFF_PROC_ENTRIES: 775 a->donotstash = flg; 776 break; 777 case MAT_SYMMETRIC: 778 case MAT_STRUCTURALLY_SYMMETRIC: 779 case MAT_HERMITIAN: 780 case MAT_SYMMETRY_ETERNAL: 781 case MAT_IGNORE_LOWER_TRIANGULAR: 782 case MAT_IGNORE_ZERO_ENTRIES: 783 PetscCall(PetscInfo(A,"Option %s ignored\n",MatOptions[op])); 784 break; 785 default: 786 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 787 } 788 PetscFunctionReturn(0); 789 } 790 791 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 792 { 793 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 794 const PetscScalar *l; 795 PetscScalar x,*v,*vv,*r; 796 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 797 798 PetscFunctionBegin; 799 PetscCall(MatDenseGetArray(mdn->A,&vv)); 800 PetscCall(MatDenseGetLDA(mdn->A,&lda)); 801 PetscCall(MatGetLocalSize(A,&s2,&s3)); 802 if (ll) { 803 PetscCall(VecGetLocalSize(ll,&s2a)); 804 PetscCheck(s2a == s2,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT, s2a, s2); 805 PetscCall(VecGetArrayRead(ll,&l)); 806 for (i=0; i<m; i++) { 807 x = l[i]; 808 v = vv + i; 809 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 810 } 811 PetscCall(VecRestoreArrayRead(ll,&l)); 812 PetscCall(PetscLogFlops(1.0*n*m)); 813 } 814 if (rr) { 815 const PetscScalar *ar; 816 817 PetscCall(VecGetLocalSize(rr,&s3a)); 818 PetscCheck(s3a == s3,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %" PetscInt_FMT " != %" PetscInt_FMT ".", s3a, s3); 819 PetscCall(VecGetArrayRead(rr,&ar)); 820 if (!mdn->Mvctx) PetscCall(MatSetUpMultiply_MPIDense(A)); 821 PetscCall(VecGetArray(mdn->lvec,&r)); 822 PetscCall(PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 823 PetscCall(PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r,MPI_REPLACE)); 824 PetscCall(VecRestoreArrayRead(rr,&ar)); 825 for (i=0; i<n; i++) { 826 x = r[i]; 827 v = vv + i*lda; 828 for (j=0; j<m; j++) (*v++) *= x; 829 } 830 PetscCall(VecRestoreArray(mdn->lvec,&r)); 831 PetscCall(PetscLogFlops(1.0*n*m)); 832 } 833 PetscCall(MatDenseRestoreArray(mdn->A,&vv)); 834 PetscFunctionReturn(0); 835 } 836 837 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 838 { 839 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 840 PetscInt i,j; 841 PetscMPIInt size; 842 PetscReal sum = 0.0; 843 const PetscScalar *av,*v; 844 845 PetscFunctionBegin; 846 PetscCall(MatDenseGetArrayRead(mdn->A,&av)); 847 v = av; 848 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size)); 849 if (size == 1) { 850 PetscCall(MatNorm(mdn->A,type,nrm)); 851 } else { 852 if (type == NORM_FROBENIUS) { 853 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 854 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 855 } 856 PetscCall(MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 857 *nrm = PetscSqrtReal(*nrm); 858 PetscCall(PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n)); 859 } else if (type == NORM_1) { 860 PetscReal *tmp,*tmp2; 861 PetscCall(PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2)); 862 *nrm = 0.0; 863 v = av; 864 for (j=0; j<mdn->A->cmap->n; j++) { 865 for (i=0; i<mdn->A->rmap->n; i++) { 866 tmp[j] += PetscAbsScalar(*v); v++; 867 } 868 } 869 PetscCall(MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A))); 870 for (j=0; j<A->cmap->N; j++) { 871 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 872 } 873 PetscCall(PetscFree2(tmp,tmp2)); 874 PetscCall(PetscLogFlops(A->cmap->n*A->rmap->n)); 875 } else if (type == NORM_INFINITY) { /* max row norm */ 876 PetscReal ntemp; 877 PetscCall(MatNorm(mdn->A,type,&ntemp)); 878 PetscCall(MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A))); 879 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 880 } 881 PetscCall(MatDenseRestoreArrayRead(mdn->A,&av)); 882 PetscFunctionReturn(0); 883 } 884 885 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 886 { 887 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 888 Mat B; 889 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 890 PetscInt j,i,lda; 891 PetscScalar *v; 892 893 PetscFunctionBegin; 894 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 895 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B)); 896 PetscCall(MatSetSizes(B,A->cmap->n,A->rmap->n,N,M)); 897 PetscCall(MatSetType(B,((PetscObject)A)->type_name)); 898 PetscCall(MatMPIDenseSetPreallocation(B,NULL)); 899 } else B = *matout; 900 901 m = a->A->rmap->n; n = a->A->cmap->n; 902 PetscCall(MatDenseGetArrayRead(a->A,(const PetscScalar**)&v)); 903 PetscCall(MatDenseGetLDA(a->A,&lda)); 904 PetscCall(PetscMalloc1(m,&rwork)); 905 for (i=0; i<m; i++) rwork[i] = rstart + i; 906 for (j=0; j<n; j++) { 907 PetscCall(MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES)); 908 v += lda; 909 } 910 PetscCall(MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v)); 911 PetscCall(PetscFree(rwork)); 912 PetscCall(MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY)); 913 PetscCall(MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY)); 914 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 915 *matout = B; 916 } else { 917 PetscCall(MatHeaderMerge(A,&B)); 918 } 919 PetscFunctionReturn(0); 920 } 921 922 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 923 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 924 925 PetscErrorCode MatSetUp_MPIDense(Mat A) 926 { 927 PetscFunctionBegin; 928 PetscCall(PetscLayoutSetUp(A->rmap)); 929 PetscCall(PetscLayoutSetUp(A->cmap)); 930 if (!A->preallocated) { 931 PetscCall(MatMPIDenseSetPreallocation(A,NULL)); 932 } 933 PetscFunctionReturn(0); 934 } 935 936 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 937 { 938 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 939 940 PetscFunctionBegin; 941 PetscCall(MatAXPY(A->A,alpha,B->A,str)); 942 PetscFunctionReturn(0); 943 } 944 945 PetscErrorCode MatConjugate_MPIDense(Mat mat) 946 { 947 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 948 949 PetscFunctionBegin; 950 PetscCall(MatConjugate(a->A)); 951 PetscFunctionReturn(0); 952 } 953 954 PetscErrorCode MatRealPart_MPIDense(Mat A) 955 { 956 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 957 958 PetscFunctionBegin; 959 PetscCall(MatRealPart(a->A)); 960 PetscFunctionReturn(0); 961 } 962 963 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 964 { 965 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 966 967 PetscFunctionBegin; 968 PetscCall(MatImaginaryPart(a->A)); 969 PetscFunctionReturn(0); 970 } 971 972 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 973 { 974 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 975 976 PetscFunctionBegin; 977 PetscCheck(a->A,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 978 PetscCheck(a->A->ops->getcolumnvector,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 979 PetscCall((*a->A->ops->getcolumnvector)(a->A,v,col)); 980 PetscFunctionReturn(0); 981 } 982 983 PETSC_INTERN PetscErrorCode MatGetColumnReductions_SeqDense(Mat,PetscInt,PetscReal*); 984 985 PetscErrorCode MatGetColumnReductions_MPIDense(Mat A,PetscInt type,PetscReal *reductions) 986 { 987 PetscInt i,m,n; 988 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 989 PetscReal *work; 990 991 PetscFunctionBegin; 992 PetscCall(MatGetSize(A,&m,&n)); 993 PetscCall(PetscMalloc1(n,&work)); 994 if (type == REDUCTION_MEAN_REALPART) { 995 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_REALPART,work)); 996 } else if (type == REDUCTION_MEAN_IMAGINARYPART) { 997 PetscCall(MatGetColumnReductions_SeqDense(a->A,(PetscInt)REDUCTION_SUM_IMAGINARYPART,work)); 998 } else { 999 PetscCall(MatGetColumnReductions_SeqDense(a->A,type,work)); 1000 } 1001 if (type == NORM_2) { 1002 for (i=0; i<n; i++) work[i] *= work[i]; 1003 } 1004 if (type == NORM_INFINITY) { 1005 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_MAX,A->hdr.comm)); 1006 } else { 1007 PetscCall(MPIU_Allreduce(work,reductions,n,MPIU_REAL,MPIU_SUM,A->hdr.comm)); 1008 } 1009 PetscCall(PetscFree(work)); 1010 if (type == NORM_2) { 1011 for (i=0; i<n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 1012 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 1013 for (i=0; i<n; i++) reductions[i] /= m; 1014 } 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #if defined(PETSC_HAVE_CUDA) 1019 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1020 { 1021 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1022 PetscInt lda; 1023 1024 PetscFunctionBegin; 1025 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1026 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1027 if (!a->cvec) { 1028 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1029 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1030 } 1031 a->vecinuse = col + 1; 1032 PetscCall(MatDenseGetLDA(a->A,&lda)); 1033 PetscCall(MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1034 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1035 *v = a->cvec; 1036 PetscFunctionReturn(0); 1037 } 1038 1039 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1040 { 1041 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1042 1043 PetscFunctionBegin; 1044 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1045 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1046 a->vecinuse = 0; 1047 PetscCall(MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1048 PetscCall(VecCUDAResetArray(a->cvec)); 1049 *v = NULL; 1050 PetscFunctionReturn(0); 1051 } 1052 1053 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1054 { 1055 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1056 PetscInt lda; 1057 1058 PetscFunctionBegin; 1059 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1060 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1061 if (!a->cvec) { 1062 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1063 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1064 } 1065 a->vecinuse = col + 1; 1066 PetscCall(MatDenseGetLDA(a->A,&lda)); 1067 PetscCall(MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse)); 1068 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1069 PetscCall(VecLockReadPush(a->cvec)); 1070 *v = a->cvec; 1071 PetscFunctionReturn(0); 1072 } 1073 1074 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1075 { 1076 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1077 1078 PetscFunctionBegin; 1079 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1080 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1081 a->vecinuse = 0; 1082 PetscCall(MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse)); 1083 PetscCall(VecLockReadPop(a->cvec)); 1084 PetscCall(VecCUDAResetArray(a->cvec)); 1085 *v = NULL; 1086 PetscFunctionReturn(0); 1087 } 1088 1089 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1090 { 1091 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1092 PetscInt lda; 1093 1094 PetscFunctionBegin; 1095 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1096 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1097 if (!a->cvec) { 1098 PetscCall(VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1099 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec)); 1100 } 1101 a->vecinuse = col + 1; 1102 PetscCall(MatDenseGetLDA(a->A,&lda)); 1103 PetscCall(MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1104 PetscCall(VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1105 *v = a->cvec; 1106 PetscFunctionReturn(0); 1107 } 1108 1109 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1110 { 1111 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1112 1113 PetscFunctionBegin; 1114 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1115 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1116 a->vecinuse = 0; 1117 PetscCall(MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1118 PetscCall(VecCUDAResetArray(a->cvec)); 1119 *v = NULL; 1120 PetscFunctionReturn(0); 1121 } 1122 1123 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1124 { 1125 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1126 1127 PetscFunctionBegin; 1128 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1129 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1130 PetscCall(MatDenseCUDAPlaceArray(l->A,a)); 1131 PetscFunctionReturn(0); 1132 } 1133 1134 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1135 { 1136 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1137 1138 PetscFunctionBegin; 1139 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1140 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1141 PetscCall(MatDenseCUDAResetArray(l->A)); 1142 PetscFunctionReturn(0); 1143 } 1144 1145 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1146 { 1147 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1148 1149 PetscFunctionBegin; 1150 PetscCheck(!l->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1151 PetscCheck(!l->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1152 PetscCall(MatDenseCUDAReplaceArray(l->A,a)); 1153 PetscFunctionReturn(0); 1154 } 1155 1156 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1157 { 1158 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1159 1160 PetscFunctionBegin; 1161 PetscCall(MatDenseCUDAGetArrayWrite(l->A,a)); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1166 { 1167 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1168 1169 PetscFunctionBegin; 1170 PetscCall(MatDenseCUDARestoreArrayWrite(l->A,a)); 1171 PetscFunctionReturn(0); 1172 } 1173 1174 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1175 { 1176 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1177 1178 PetscFunctionBegin; 1179 PetscCall(MatDenseCUDAGetArrayRead(l->A,a)); 1180 PetscFunctionReturn(0); 1181 } 1182 1183 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1184 { 1185 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1186 1187 PetscFunctionBegin; 1188 PetscCall(MatDenseCUDARestoreArrayRead(l->A,a)); 1189 PetscFunctionReturn(0); 1190 } 1191 1192 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1193 { 1194 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1195 1196 PetscFunctionBegin; 1197 PetscCall(MatDenseCUDAGetArray(l->A,a)); 1198 PetscFunctionReturn(0); 1199 } 1200 1201 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1202 { 1203 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1204 1205 PetscFunctionBegin; 1206 PetscCall(MatDenseCUDARestoreArray(l->A,a)); 1207 PetscFunctionReturn(0); 1208 } 1209 1210 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1211 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1212 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1213 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1214 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1215 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1216 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1217 1218 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1219 { 1220 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1221 1222 PetscFunctionBegin; 1223 PetscCheck(!d->vecinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1224 PetscCheck(!d->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1225 if (d->A) { 1226 PetscCall(MatBindToCPU(d->A,bind)); 1227 } 1228 mat->boundtocpu = bind; 1229 if (!bind) { 1230 PetscBool iscuda; 1231 1232 PetscCall(PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda)); 1233 if (!iscuda) { 1234 PetscCall(VecDestroy(&d->cvec)); 1235 } 1236 PetscCall(PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda)); 1237 if (!iscuda) { 1238 PetscCall(MatDestroy(&d->cmat)); 1239 } 1240 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA)); 1241 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA)); 1242 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA)); 1243 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA)); 1244 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA)); 1245 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA)); 1246 } else { 1247 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1248 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1249 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1250 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1251 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1252 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1253 } 1254 if (d->cmat) { 1255 PetscCall(MatBindToCPU(d->cmat,bind)); 1256 } 1257 PetscFunctionReturn(0); 1258 } 1259 1260 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1261 { 1262 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1263 PetscBool iscuda; 1264 1265 PetscFunctionBegin; 1266 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1267 PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda)); 1268 if (!iscuda) PetscFunctionReturn(0); 1269 PetscCall(PetscLayoutSetUp(A->rmap)); 1270 PetscCall(PetscLayoutSetUp(A->cmap)); 1271 if (!d->A) { 1272 PetscCall(MatCreate(PETSC_COMM_SELF,&d->A)); 1273 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)d->A)); 1274 PetscCall(MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N)); 1275 } 1276 PetscCall(MatSetType(d->A,MATSEQDENSECUDA)); 1277 PetscCall(MatSeqDenseCUDASetPreallocation(d->A,d_data)); 1278 A->preallocated = PETSC_TRUE; 1279 A->assembled = PETSC_TRUE; 1280 PetscFunctionReturn(0); 1281 } 1282 #endif 1283 1284 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1285 { 1286 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1287 1288 PetscFunctionBegin; 1289 PetscCall(MatSetRandom(d->A,rctx)); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1294 { 1295 PetscFunctionBegin; 1296 *missing = PETSC_FALSE; 1297 PetscFunctionReturn(0); 1298 } 1299 1300 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1301 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1302 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1303 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1304 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1305 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1306 1307 /* -------------------------------------------------------------------*/ 1308 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1309 MatGetRow_MPIDense, 1310 MatRestoreRow_MPIDense, 1311 MatMult_MPIDense, 1312 /* 4*/ MatMultAdd_MPIDense, 1313 MatMultTranspose_MPIDense, 1314 MatMultTransposeAdd_MPIDense, 1315 NULL, 1316 NULL, 1317 NULL, 1318 /* 10*/ NULL, 1319 NULL, 1320 NULL, 1321 NULL, 1322 MatTranspose_MPIDense, 1323 /* 15*/ MatGetInfo_MPIDense, 1324 MatEqual_MPIDense, 1325 MatGetDiagonal_MPIDense, 1326 MatDiagonalScale_MPIDense, 1327 MatNorm_MPIDense, 1328 /* 20*/ MatAssemblyBegin_MPIDense, 1329 MatAssemblyEnd_MPIDense, 1330 MatSetOption_MPIDense, 1331 MatZeroEntries_MPIDense, 1332 /* 24*/ MatZeroRows_MPIDense, 1333 NULL, 1334 NULL, 1335 NULL, 1336 NULL, 1337 /* 29*/ MatSetUp_MPIDense, 1338 NULL, 1339 NULL, 1340 MatGetDiagonalBlock_MPIDense, 1341 NULL, 1342 /* 34*/ MatDuplicate_MPIDense, 1343 NULL, 1344 NULL, 1345 NULL, 1346 NULL, 1347 /* 39*/ MatAXPY_MPIDense, 1348 MatCreateSubMatrices_MPIDense, 1349 NULL, 1350 MatGetValues_MPIDense, 1351 MatCopy_MPIDense, 1352 /* 44*/ NULL, 1353 MatScale_MPIDense, 1354 MatShift_Basic, 1355 NULL, 1356 NULL, 1357 /* 49*/ MatSetRandom_MPIDense, 1358 NULL, 1359 NULL, 1360 NULL, 1361 NULL, 1362 /* 54*/ NULL, 1363 NULL, 1364 NULL, 1365 NULL, 1366 NULL, 1367 /* 59*/ MatCreateSubMatrix_MPIDense, 1368 MatDestroy_MPIDense, 1369 MatView_MPIDense, 1370 NULL, 1371 NULL, 1372 /* 64*/ NULL, 1373 NULL, 1374 NULL, 1375 NULL, 1376 NULL, 1377 /* 69*/ NULL, 1378 NULL, 1379 NULL, 1380 NULL, 1381 NULL, 1382 /* 74*/ NULL, 1383 NULL, 1384 NULL, 1385 NULL, 1386 NULL, 1387 /* 79*/ NULL, 1388 NULL, 1389 NULL, 1390 NULL, 1391 /* 83*/ MatLoad_MPIDense, 1392 NULL, 1393 NULL, 1394 NULL, 1395 NULL, 1396 NULL, 1397 /* 89*/ NULL, 1398 NULL, 1399 NULL, 1400 NULL, 1401 NULL, 1402 /* 94*/ NULL, 1403 NULL, 1404 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1405 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1406 NULL, 1407 /* 99*/ MatProductSetFromOptions_MPIDense, 1408 NULL, 1409 NULL, 1410 MatConjugate_MPIDense, 1411 NULL, 1412 /*104*/ NULL, 1413 MatRealPart_MPIDense, 1414 MatImaginaryPart_MPIDense, 1415 NULL, 1416 NULL, 1417 /*109*/ NULL, 1418 NULL, 1419 NULL, 1420 MatGetColumnVector_MPIDense, 1421 MatMissingDiagonal_MPIDense, 1422 /*114*/ NULL, 1423 NULL, 1424 NULL, 1425 NULL, 1426 NULL, 1427 /*119*/ NULL, 1428 NULL, 1429 NULL, 1430 NULL, 1431 NULL, 1432 /*124*/ NULL, 1433 MatGetColumnReductions_MPIDense, 1434 NULL, 1435 NULL, 1436 NULL, 1437 /*129*/ NULL, 1438 NULL, 1439 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1440 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1441 NULL, 1442 /*134*/ NULL, 1443 NULL, 1444 NULL, 1445 NULL, 1446 NULL, 1447 /*139*/ NULL, 1448 NULL, 1449 NULL, 1450 NULL, 1451 NULL, 1452 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1453 /*145*/ NULL, 1454 NULL, 1455 NULL 1456 }; 1457 1458 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1459 { 1460 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1461 PetscBool iscuda; 1462 1463 PetscFunctionBegin; 1464 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1465 PetscCall(PetscLayoutSetUp(mat->rmap)); 1466 PetscCall(PetscLayoutSetUp(mat->cmap)); 1467 if (!a->A) { 1468 PetscCall(MatCreate(PETSC_COMM_SELF,&a->A)); 1469 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 1470 PetscCall(MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N)); 1471 } 1472 PetscCall(PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda)); 1473 PetscCall(MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE)); 1474 PetscCall(MatSeqDenseSetPreallocation(a->A,data)); 1475 mat->preallocated = PETSC_TRUE; 1476 mat->assembled = PETSC_TRUE; 1477 PetscFunctionReturn(0); 1478 } 1479 1480 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIDense(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1481 { 1482 Mat B,C; 1483 1484 PetscFunctionBegin; 1485 PetscCall(MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&C)); 1486 PetscCall(MatConvert_SeqAIJ_SeqDense(C,MATSEQDENSE,MAT_INITIAL_MATRIX,&B)); 1487 PetscCall(MatDestroy(&C)); 1488 if (reuse == MAT_REUSE_MATRIX) { 1489 C = *newmat; 1490 } else C = NULL; 1491 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1492 PetscCall(MatDestroy(&B)); 1493 if (reuse == MAT_INPLACE_MATRIX) { 1494 PetscCall(MatHeaderReplace(A,&C)); 1495 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1496 PetscFunctionReturn(0); 1497 } 1498 1499 PetscErrorCode MatConvert_MPIDense_MPIAIJ(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1500 { 1501 Mat B,C; 1502 1503 PetscFunctionBegin; 1504 PetscCall(MatDenseGetLocalMatrix(A,&C)); 1505 PetscCall(MatConvert_SeqDense_SeqAIJ(C,MATSEQAIJ,MAT_INITIAL_MATRIX,&B)); 1506 if (reuse == MAT_REUSE_MATRIX) { 1507 C = *newmat; 1508 } else C = NULL; 1509 PetscCall(MatCreateMPIMatConcatenateSeqMat(PetscObjectComm((PetscObject)A),B,A->cmap->n,!C?MAT_INITIAL_MATRIX:MAT_REUSE_MATRIX,&C)); 1510 PetscCall(MatDestroy(&B)); 1511 if (reuse == MAT_INPLACE_MATRIX) { 1512 PetscCall(MatHeaderReplace(A,&C)); 1513 } else if (reuse == MAT_INITIAL_MATRIX) *newmat = C; 1514 PetscFunctionReturn(0); 1515 } 1516 1517 #if defined(PETSC_HAVE_ELEMENTAL) 1518 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1519 { 1520 Mat mat_elemental; 1521 PetscScalar *v; 1522 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1523 1524 PetscFunctionBegin; 1525 if (reuse == MAT_REUSE_MATRIX) { 1526 mat_elemental = *newmat; 1527 PetscCall(MatZeroEntries(*newmat)); 1528 } else { 1529 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental)); 1530 PetscCall(MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 1531 PetscCall(MatSetType(mat_elemental,MATELEMENTAL)); 1532 PetscCall(MatSetUp(mat_elemental)); 1533 PetscCall(MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE)); 1534 } 1535 1536 PetscCall(PetscMalloc2(m,&rows,N,&cols)); 1537 for (i=0; i<N; i++) cols[i] = i; 1538 for (i=0; i<m; i++) rows[i] = rstart + i; 1539 1540 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1541 PetscCall(MatDenseGetArray(A,&v)); 1542 PetscCall(MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES)); 1543 PetscCall(MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY)); 1544 PetscCall(MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY)); 1545 PetscCall(MatDenseRestoreArray(A,&v)); 1546 PetscCall(PetscFree2(rows,cols)); 1547 1548 if (reuse == MAT_INPLACE_MATRIX) { 1549 PetscCall(MatHeaderReplace(A,&mat_elemental)); 1550 } else { 1551 *newmat = mat_elemental; 1552 } 1553 PetscFunctionReturn(0); 1554 } 1555 #endif 1556 1557 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1558 { 1559 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1560 1561 PetscFunctionBegin; 1562 PetscCall(MatDenseGetColumn(mat->A,col,vals)); 1563 PetscFunctionReturn(0); 1564 } 1565 1566 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1567 { 1568 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1569 1570 PetscFunctionBegin; 1571 PetscCall(MatDenseRestoreColumn(mat->A,vals)); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1576 { 1577 Mat_MPIDense *mat; 1578 PetscInt m,nloc,N; 1579 1580 PetscFunctionBegin; 1581 PetscCall(MatGetSize(inmat,&m,&N)); 1582 PetscCall(MatGetLocalSize(inmat,NULL,&nloc)); 1583 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1584 PetscInt sum; 1585 1586 if (n == PETSC_DECIDE) { 1587 PetscCall(PetscSplitOwnership(comm,&n,&N)); 1588 } 1589 /* Check sum(n) = N */ 1590 PetscCall(MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm)); 1591 PetscCheck(sum == N,PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT,sum,N); 1592 1593 PetscCall(MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat)); 1594 PetscCall(MatSetOption(*outmat,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE)); 1595 } 1596 1597 /* numeric phase */ 1598 mat = (Mat_MPIDense*)(*outmat)->data; 1599 PetscCall(MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN)); 1600 PetscCall(MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY)); 1601 PetscCall(MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY)); 1602 PetscFunctionReturn(0); 1603 } 1604 1605 #if defined(PETSC_HAVE_CUDA) 1606 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1607 { 1608 Mat B; 1609 Mat_MPIDense *m; 1610 1611 PetscFunctionBegin; 1612 if (reuse == MAT_INITIAL_MATRIX) { 1613 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1614 } else if (reuse == MAT_REUSE_MATRIX) { 1615 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1616 } 1617 1618 B = *newmat; 1619 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE)); 1620 PetscCall(PetscFree(B->defaultvectype)); 1621 PetscCall(PetscStrallocpy(VECSTANDARD,&B->defaultvectype)); 1622 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE)); 1623 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL)); 1624 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL)); 1625 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL)); 1626 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL)); 1627 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL)); 1628 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL)); 1629 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL)); 1630 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL)); 1631 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL)); 1632 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL)); 1633 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL)); 1634 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL)); 1635 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL)); 1636 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL)); 1637 m = (Mat_MPIDense*)(B)->data; 1638 if (m->A) { 1639 PetscCall(MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A)); 1640 } 1641 B->ops->bindtocpu = NULL; 1642 B->offloadmask = PETSC_OFFLOAD_CPU; 1643 PetscFunctionReturn(0); 1644 } 1645 1646 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1647 { 1648 Mat B; 1649 Mat_MPIDense *m; 1650 1651 PetscFunctionBegin; 1652 if (reuse == MAT_INITIAL_MATRIX) { 1653 PetscCall(MatDuplicate(M,MAT_COPY_VALUES,newmat)); 1654 } else if (reuse == MAT_REUSE_MATRIX) { 1655 PetscCall(MatCopy(M,*newmat,SAME_NONZERO_PATTERN)); 1656 } 1657 1658 B = *newmat; 1659 PetscCall(PetscFree(B->defaultvectype)); 1660 PetscCall(PetscStrallocpy(VECCUDA,&B->defaultvectype)); 1661 PetscCall(PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA)); 1662 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense)); 1663 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense)); 1664 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1665 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ)); 1666 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1667 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA)); 1668 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA)); 1669 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA)); 1670 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA)); 1671 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA)); 1672 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA)); 1673 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA)); 1674 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA)); 1675 PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA)); 1676 m = (Mat_MPIDense*)(B->data); 1677 if (m->A) { 1678 PetscCall(MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A)); 1679 B->offloadmask = PETSC_OFFLOAD_BOTH; 1680 } else { 1681 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1682 } 1683 PetscCall(MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE)); 1684 1685 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1686 PetscFunctionReturn(0); 1687 } 1688 #endif 1689 1690 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1691 { 1692 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1693 PetscInt lda; 1694 1695 PetscFunctionBegin; 1696 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1697 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1698 if (!a->cvec) { 1699 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1700 } 1701 a->vecinuse = col + 1; 1702 PetscCall(MatDenseGetLDA(a->A,&lda)); 1703 PetscCall(MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse)); 1704 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1705 *v = a->cvec; 1706 PetscFunctionReturn(0); 1707 } 1708 1709 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1710 { 1711 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1712 1713 PetscFunctionBegin; 1714 PetscCheck(a->vecinuse,PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1715 PetscCheck(a->cvec,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1716 a->vecinuse = 0; 1717 PetscCall(MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse)); 1718 PetscCall(VecResetArray(a->cvec)); 1719 *v = NULL; 1720 PetscFunctionReturn(0); 1721 } 1722 1723 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1724 { 1725 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1726 PetscInt lda; 1727 1728 PetscFunctionBegin; 1729 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1730 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1731 if (!a->cvec) { 1732 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1733 } 1734 a->vecinuse = col + 1; 1735 PetscCall(MatDenseGetLDA(a->A,&lda)); 1736 PetscCall(MatDenseGetArrayRead(a->A,&a->ptrinuse)); 1737 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1738 PetscCall(VecLockReadPush(a->cvec)); 1739 *v = a->cvec; 1740 PetscFunctionReturn(0); 1741 } 1742 1743 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1744 { 1745 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1746 1747 PetscFunctionBegin; 1748 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1749 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1750 a->vecinuse = 0; 1751 PetscCall(MatDenseRestoreArrayRead(a->A,&a->ptrinuse)); 1752 PetscCall(VecLockReadPop(a->cvec)); 1753 PetscCall(VecResetArray(a->cvec)); 1754 *v = NULL; 1755 PetscFunctionReturn(0); 1756 } 1757 1758 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1759 { 1760 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1761 PetscInt lda; 1762 1763 PetscFunctionBegin; 1764 PetscCheck(!a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1765 PetscCheck(!a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1766 if (!a->cvec) { 1767 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec)); 1768 } 1769 a->vecinuse = col + 1; 1770 PetscCall(MatDenseGetLDA(a->A,&lda)); 1771 PetscCall(MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1772 PetscCall(VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda)); 1773 *v = a->cvec; 1774 PetscFunctionReturn(0); 1775 } 1776 1777 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1778 { 1779 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1780 1781 PetscFunctionBegin; 1782 PetscCheck(a->vecinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1783 PetscCheck(a->cvec,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1784 a->vecinuse = 0; 1785 PetscCall(MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse)); 1786 PetscCall(VecResetArray(a->cvec)); 1787 *v = NULL; 1788 PetscFunctionReturn(0); 1789 } 1790 1791 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1792 { 1793 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1794 Mat_MPIDense *c; 1795 MPI_Comm comm; 1796 1797 PetscFunctionBegin; 1798 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 1799 PetscCheck(!a->vecinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1800 PetscCheck(!a->matinuse,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1801 if (!a->cmat) { 1802 PetscCall(MatCreate(comm,&a->cmat)); 1803 PetscCall(PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat)); 1804 PetscCall(MatSetType(a->cmat,((PetscObject)A)->type_name)); 1805 PetscCall(PetscLayoutReference(A->rmap,&a->cmat->rmap)); 1806 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1807 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1808 } else if (cend-cbegin != a->cmat->cmap->N) { 1809 PetscCall(PetscLayoutDestroy(&a->cmat->cmap)); 1810 PetscCall(PetscLayoutCreate(comm,&a->cmat->cmap)); 1811 PetscCall(PetscLayoutSetSize(a->cmat->cmap,cend-cbegin)); 1812 PetscCall(PetscLayoutSetUp(a->cmat->cmap)); 1813 } 1814 c = (Mat_MPIDense*)a->cmat->data; 1815 PetscCheck(!c->A,comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1816 PetscCall(MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A)); 1817 a->cmat->preallocated = PETSC_TRUE; 1818 a->cmat->assembled = PETSC_TRUE; 1819 a->matinuse = cbegin + 1; 1820 *v = a->cmat; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1825 { 1826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1827 Mat_MPIDense *c; 1828 1829 PetscFunctionBegin; 1830 PetscCheck(a->matinuse,PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1831 PetscCheck(a->cmat,PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1832 PetscCheck(*v == a->cmat,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1833 a->matinuse = 0; 1834 c = (Mat_MPIDense*)a->cmat->data; 1835 PetscCall(MatDenseRestoreSubMatrix(a->A,&c->A)); 1836 *v = NULL; 1837 PetscFunctionReturn(0); 1838 } 1839 1840 /*MC 1841 MATMPIDENSE - MATMPIDENSE = "mpidense" - A matrix type to be used for distributed dense matrices. 1842 1843 Options Database Keys: 1844 . -mat_type mpidense - sets the matrix type to "mpidense" during a call to MatSetFromOptions() 1845 1846 Level: beginner 1847 1848 .seealso: MatCreateDense() 1849 1850 M*/ 1851 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1852 { 1853 Mat_MPIDense *a; 1854 1855 PetscFunctionBegin; 1856 PetscCall(PetscNewLog(mat,&a)); 1857 mat->data = (void*)a; 1858 PetscCall(PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps))); 1859 1860 mat->insertmode = NOT_SET_VALUES; 1861 1862 /* build cache for off array entries formed */ 1863 a->donotstash = PETSC_FALSE; 1864 1865 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash)); 1866 1867 /* stuff used for matrix vector multiply */ 1868 a->lvec = NULL; 1869 a->Mvctx = NULL; 1870 a->roworiented = PETSC_TRUE; 1871 1872 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense)); 1873 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense)); 1874 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense)); 1875 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense)); 1876 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense)); 1877 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense)); 1878 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense)); 1879 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense)); 1880 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense)); 1881 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense)); 1882 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense)); 1883 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense)); 1884 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense)); 1885 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense)); 1886 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense)); 1887 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense)); 1888 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense)); 1889 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense)); 1890 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense)); 1891 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpiaij_mpidense_C",MatConvert_MPIAIJ_MPIDense)); 1892 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpiaij_C",MatConvert_MPIDense_MPIAIJ)); 1893 #if defined(PETSC_HAVE_ELEMENTAL) 1894 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental)); 1895 #endif 1896 #if defined(PETSC_HAVE_SCALAPACK) 1897 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_scalapack_C",MatConvert_Dense_ScaLAPACK)); 1898 #endif 1899 #if defined(PETSC_HAVE_CUDA) 1900 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA)); 1901 #endif 1902 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense)); 1903 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1904 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1905 #if defined(PETSC_HAVE_CUDA) 1906 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense)); 1907 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ)); 1908 #endif 1909 1910 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense)); 1911 PetscCall(PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense)); 1912 PetscCall(PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE)); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 /*MC 1917 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1918 1919 Options Database Keys: 1920 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1921 1922 Level: beginner 1923 1924 .seealso: 1925 1926 M*/ 1927 #if defined(PETSC_HAVE_CUDA) 1928 #include <petsc/private/deviceimpl.h> 1929 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1930 { 1931 PetscFunctionBegin; 1932 PetscCall(PetscDeviceInitialize(PETSC_DEVICE_CUDA)); 1933 PetscCall(MatCreate_MPIDense(B)); 1934 PetscCall(MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B)); 1935 PetscFunctionReturn(0); 1936 } 1937 #endif 1938 1939 /*MC 1940 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1941 1942 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1943 and MATMPIDENSE otherwise. 1944 1945 Options Database Keys: 1946 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1947 1948 Level: beginner 1949 1950 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1951 M*/ 1952 1953 /*MC 1954 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1955 1956 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1957 and MATMPIDENSECUDA otherwise. 1958 1959 Options Database Keys: 1960 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1961 1962 Level: beginner 1963 1964 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1965 M*/ 1966 1967 /*@C 1968 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1969 1970 Collective 1971 1972 Input Parameters: 1973 . B - the matrix 1974 - data - optional location of matrix data. Set data=NULL for PETSc 1975 to control all matrix memory allocation. 1976 1977 Notes: 1978 The dense format is fully compatible with standard Fortran 77 1979 storage by columns. 1980 1981 The data input variable is intended primarily for Fortran programmers 1982 who wish to allocate their own matrix memory space. Most users should 1983 set data=NULL. 1984 1985 Level: intermediate 1986 1987 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1988 @*/ 1989 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1990 { 1991 PetscFunctionBegin; 1992 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1993 PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data)); 1994 PetscFunctionReturn(0); 1995 } 1996 1997 /*@ 1998 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1999 array provided by the user. This is useful to avoid copying an array 2000 into a matrix 2001 2002 Not Collective 2003 2004 Input Parameters: 2005 + mat - the matrix 2006 - array - the array in column major order 2007 2008 Notes: 2009 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2010 freed when the matrix is destroyed. 2011 2012 Level: developer 2013 2014 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2015 2016 @*/ 2017 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2018 { 2019 PetscFunctionBegin; 2020 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2021 PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2022 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2023 #if defined(PETSC_HAVE_CUDA) 2024 mat->offloadmask = PETSC_OFFLOAD_CPU; 2025 #endif 2026 PetscFunctionReturn(0); 2027 } 2028 2029 /*@ 2030 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2031 2032 Not Collective 2033 2034 Input Parameters: 2035 . mat - the matrix 2036 2037 Notes: 2038 You can only call this after a call to MatDensePlaceArray() 2039 2040 Level: developer 2041 2042 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2043 2044 @*/ 2045 PetscErrorCode MatDenseResetArray(Mat mat) 2046 { 2047 PetscFunctionBegin; 2048 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2049 PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat)); 2050 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2051 PetscFunctionReturn(0); 2052 } 2053 2054 /*@ 2055 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2056 array provided by the user. This is useful to avoid copying an array 2057 into a matrix 2058 2059 Not Collective 2060 2061 Input Parameters: 2062 + mat - the matrix 2063 - array - the array in column major order 2064 2065 Notes: 2066 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2067 freed by the user. It will be freed when the matrix is destroyed. 2068 2069 Level: developer 2070 2071 .seealso: MatDenseGetArray(), VecReplaceArray() 2072 @*/ 2073 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2074 { 2075 PetscFunctionBegin; 2076 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2077 PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2078 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2079 #if defined(PETSC_HAVE_CUDA) 2080 mat->offloadmask = PETSC_OFFLOAD_CPU; 2081 #endif 2082 PetscFunctionReturn(0); 2083 } 2084 2085 #if defined(PETSC_HAVE_CUDA) 2086 /*@C 2087 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2088 array provided by the user. This is useful to avoid copying an array 2089 into a matrix 2090 2091 Not Collective 2092 2093 Input Parameters: 2094 + mat - the matrix 2095 - array - the array in column major order 2096 2097 Notes: 2098 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2099 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2100 2101 Level: developer 2102 2103 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2104 @*/ 2105 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2106 { 2107 PetscFunctionBegin; 2108 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2109 PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2110 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2111 mat->offloadmask = PETSC_OFFLOAD_GPU; 2112 PetscFunctionReturn(0); 2113 } 2114 2115 /*@C 2116 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2117 2118 Not Collective 2119 2120 Input Parameters: 2121 . mat - the matrix 2122 2123 Notes: 2124 You can only call this after a call to MatDenseCUDAPlaceArray() 2125 2126 Level: developer 2127 2128 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2129 2130 @*/ 2131 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2132 { 2133 PetscFunctionBegin; 2134 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2135 PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat)); 2136 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2137 PetscFunctionReturn(0); 2138 } 2139 2140 /*@C 2141 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2142 array provided by the user. This is useful to avoid copying an array 2143 into a matrix 2144 2145 Not Collective 2146 2147 Input Parameters: 2148 + mat - the matrix 2149 - array - the array in column major order 2150 2151 Notes: 2152 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2153 The memory passed in CANNOT be freed by the user. It will be freed 2154 when the matrix is destroyed. The array should respect the matrix leading dimension. 2155 2156 Level: developer 2157 2158 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2159 @*/ 2160 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2161 { 2162 PetscFunctionBegin; 2163 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2164 PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array)); 2165 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 2166 mat->offloadmask = PETSC_OFFLOAD_GPU; 2167 PetscFunctionReturn(0); 2168 } 2169 2170 /*@C 2171 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2172 2173 Not Collective 2174 2175 Input Parameters: 2176 . A - the matrix 2177 2178 Output Parameters 2179 . array - the GPU array in column major order 2180 2181 Notes: 2182 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2183 2184 Level: developer 2185 2186 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2187 @*/ 2188 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2189 { 2190 PetscFunctionBegin; 2191 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2192 PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2193 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2194 PetscFunctionReturn(0); 2195 } 2196 2197 /*@C 2198 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2199 2200 Not Collective 2201 2202 Input Parameters: 2203 + A - the matrix 2204 - array - the GPU array in column major order 2205 2206 Notes: 2207 2208 Level: developer 2209 2210 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2211 @*/ 2212 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2213 { 2214 PetscFunctionBegin; 2215 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2216 PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a)); 2217 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2218 A->offloadmask = PETSC_OFFLOAD_GPU; 2219 PetscFunctionReturn(0); 2220 } 2221 2222 /*@C 2223 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2224 2225 Not Collective 2226 2227 Input Parameters: 2228 . A - the matrix 2229 2230 Output Parameters 2231 . array - the GPU array in column major order 2232 2233 Notes: 2234 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2235 2236 Level: developer 2237 2238 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2239 @*/ 2240 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2241 { 2242 PetscFunctionBegin; 2243 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2244 PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2245 PetscFunctionReturn(0); 2246 } 2247 2248 /*@C 2249 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2250 2251 Not Collective 2252 2253 Input Parameters: 2254 + A - the matrix 2255 - array - the GPU array in column major order 2256 2257 Notes: 2258 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2259 2260 Level: developer 2261 2262 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2263 @*/ 2264 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2265 { 2266 PetscFunctionBegin; 2267 PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a)); 2268 PetscFunctionReturn(0); 2269 } 2270 2271 /*@C 2272 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2273 2274 Not Collective 2275 2276 Input Parameters: 2277 . A - the matrix 2278 2279 Output Parameters 2280 . array - the GPU array in column major order 2281 2282 Notes: 2283 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2284 2285 Level: developer 2286 2287 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2288 @*/ 2289 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2290 { 2291 PetscFunctionBegin; 2292 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2293 PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a)); 2294 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2295 PetscFunctionReturn(0); 2296 } 2297 2298 /*@C 2299 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2300 2301 Not Collective 2302 2303 Input Parameters: 2304 + A - the matrix 2305 - array - the GPU array in column major order 2306 2307 Notes: 2308 2309 Level: developer 2310 2311 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2312 @*/ 2313 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2314 { 2315 PetscFunctionBegin; 2316 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2317 PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a)); 2318 PetscCall(PetscObjectStateIncrease((PetscObject)A)); 2319 A->offloadmask = PETSC_OFFLOAD_GPU; 2320 PetscFunctionReturn(0); 2321 } 2322 #endif 2323 2324 /*@C 2325 MatCreateDense - Creates a matrix in dense format. 2326 2327 Collective 2328 2329 Input Parameters: 2330 + comm - MPI communicator 2331 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2332 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2333 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2334 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2335 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2336 to control all matrix memory allocation. 2337 2338 Output Parameter: 2339 . A - the matrix 2340 2341 Notes: 2342 The dense format is fully compatible with standard Fortran 77 2343 storage by columns. 2344 Note that, although local portions of the matrix are stored in column-major 2345 order, the matrix is partitioned across MPI ranks by row. 2346 2347 The data input variable is intended primarily for Fortran programmers 2348 who wish to allocate their own matrix memory space. Most users should 2349 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2350 2351 The user MUST specify either the local or global matrix dimensions 2352 (possibly both). 2353 2354 Level: intermediate 2355 2356 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2357 @*/ 2358 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2359 { 2360 PetscFunctionBegin; 2361 PetscCall(MatCreate(comm,A)); 2362 PetscCall(MatSetSizes(*A,m,n,M,N)); 2363 PetscCall(MatSetType(*A,MATDENSE)); 2364 PetscCall(MatSeqDenseSetPreallocation(*A,data)); 2365 PetscCall(MatMPIDenseSetPreallocation(*A,data)); 2366 PetscFunctionReturn(0); 2367 } 2368 2369 #if defined(PETSC_HAVE_CUDA) 2370 /*@C 2371 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2372 2373 Collective 2374 2375 Input Parameters: 2376 + comm - MPI communicator 2377 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2378 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2379 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2380 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2381 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2382 to control matrix memory allocation. 2383 2384 Output Parameter: 2385 . A - the matrix 2386 2387 Notes: 2388 2389 Level: intermediate 2390 2391 .seealso: MatCreate(), MatCreateDense() 2392 @*/ 2393 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2394 { 2395 PetscFunctionBegin; 2396 PetscCall(MatCreate(comm,A)); 2397 PetscValidLogicalCollectiveBool(*A,!!data,6); 2398 PetscCall(MatSetSizes(*A,m,n,M,N)); 2399 PetscCall(MatSetType(*A,MATDENSECUDA)); 2400 PetscCall(MatSeqDenseCUDASetPreallocation(*A,data)); 2401 PetscCall(MatMPIDenseCUDASetPreallocation(*A,data)); 2402 PetscFunctionReturn(0); 2403 } 2404 #endif 2405 2406 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2407 { 2408 Mat mat; 2409 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2410 2411 PetscFunctionBegin; 2412 *newmat = NULL; 2413 PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&mat)); 2414 PetscCall(MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N)); 2415 PetscCall(MatSetType(mat,((PetscObject)A)->type_name)); 2416 a = (Mat_MPIDense*)mat->data; 2417 2418 mat->factortype = A->factortype; 2419 mat->assembled = PETSC_TRUE; 2420 mat->preallocated = PETSC_TRUE; 2421 2422 mat->insertmode = NOT_SET_VALUES; 2423 a->donotstash = oldmat->donotstash; 2424 2425 PetscCall(PetscLayoutReference(A->rmap,&mat->rmap)); 2426 PetscCall(PetscLayoutReference(A->cmap,&mat->cmap)); 2427 2428 PetscCall(MatDuplicate(oldmat->A,cpvalues,&a->A)); 2429 PetscCall(PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A)); 2430 2431 *newmat = mat; 2432 PetscFunctionReturn(0); 2433 } 2434 2435 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2436 { 2437 PetscBool isbinary; 2438 #if defined(PETSC_HAVE_HDF5) 2439 PetscBool ishdf5; 2440 #endif 2441 2442 PetscFunctionBegin; 2443 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2444 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2445 /* force binary viewer to load .info file if it has not yet done so */ 2446 PetscCall(PetscViewerSetUp(viewer)); 2447 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary)); 2448 #if defined(PETSC_HAVE_HDF5) 2449 PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5)); 2450 #endif 2451 if (isbinary) { 2452 PetscCall(MatLoad_Dense_Binary(newMat,viewer)); 2453 #if defined(PETSC_HAVE_HDF5) 2454 } else if (ishdf5) { 2455 PetscCall(MatLoad_Dense_HDF5(newMat,viewer)); 2456 #endif 2457 } else SETERRQ(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2458 PetscFunctionReturn(0); 2459 } 2460 2461 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2462 { 2463 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2464 Mat a,b; 2465 PetscBool flg; 2466 2467 PetscFunctionBegin; 2468 a = matA->A; 2469 b = matB->A; 2470 PetscCall(MatEqual(a,b,&flg)); 2471 PetscCall(MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A))); 2472 PetscFunctionReturn(0); 2473 } 2474 2475 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2476 { 2477 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2478 2479 PetscFunctionBegin; 2480 PetscCall(PetscFree2(atb->sendbuf,atb->recvcounts)); 2481 PetscCall(MatDestroy(&atb->atb)); 2482 PetscCall(PetscFree(atb)); 2483 PetscFunctionReturn(0); 2484 } 2485 2486 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2487 { 2488 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2489 2490 PetscFunctionBegin; 2491 PetscCall(PetscFree2(abt->buf[0],abt->buf[1])); 2492 PetscCall(PetscFree2(abt->recvcounts,abt->recvdispls)); 2493 PetscCall(PetscFree(abt)); 2494 PetscFunctionReturn(0); 2495 } 2496 2497 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2498 { 2499 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2500 Mat_TransMatMultDense *atb; 2501 MPI_Comm comm; 2502 PetscMPIInt size,*recvcounts; 2503 PetscScalar *carray,*sendbuf; 2504 const PetscScalar *atbarray; 2505 PetscInt i,cN=C->cmap->N,proc,k,j,lda; 2506 const PetscInt *ranges; 2507 2508 PetscFunctionBegin; 2509 MatCheckProduct(C,3); 2510 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2511 atb = (Mat_TransMatMultDense *)C->product->data; 2512 recvcounts = atb->recvcounts; 2513 sendbuf = atb->sendbuf; 2514 2515 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2516 PetscCallMPI(MPI_Comm_size(comm,&size)); 2517 2518 /* compute atbarray = aseq^T * bseq */ 2519 PetscCall(MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb)); 2520 2521 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2522 2523 /* arrange atbarray into sendbuf */ 2524 PetscCall(MatDenseGetArrayRead(atb->atb,&atbarray)); 2525 PetscCall(MatDenseGetLDA(atb->atb,&lda)); 2526 for (proc=0, k=0; proc<size; proc++) { 2527 for (j=0; j<cN; j++) { 2528 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*lda]; 2529 } 2530 } 2531 PetscCall(MatDenseRestoreArrayRead(atb->atb,&atbarray)); 2532 2533 /* sum all atbarray to local values of C */ 2534 PetscCall(MatDenseGetArrayWrite(c->A,&carray)); 2535 PetscCallMPI(MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm)); 2536 PetscCall(MatDenseRestoreArrayWrite(c->A,&carray)); 2537 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2538 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2539 PetscFunctionReturn(0); 2540 } 2541 2542 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2543 { 2544 MPI_Comm comm; 2545 PetscMPIInt size; 2546 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2547 Mat_TransMatMultDense *atb; 2548 PetscBool cisdense; 2549 PetscInt i; 2550 const PetscInt *ranges; 2551 2552 PetscFunctionBegin; 2553 MatCheckProduct(C,3); 2554 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2555 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2556 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2557 SETERRQ(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2558 } 2559 2560 /* create matrix product C */ 2561 PetscCall(MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N)); 2562 PetscCall(PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"")); 2563 if (!cisdense) { 2564 PetscCall(MatSetType(C,((PetscObject)A)->type_name)); 2565 } 2566 PetscCall(MatSetUp(C)); 2567 2568 /* create data structure for reuse C */ 2569 PetscCallMPI(MPI_Comm_size(comm,&size)); 2570 PetscCall(PetscNew(&atb)); 2571 cM = C->rmap->N; 2572 PetscCall(PetscMalloc2(cM*cN,&atb->sendbuf,size,&atb->recvcounts)); 2573 PetscCall(MatGetOwnershipRanges(C,&ranges)); 2574 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2575 2576 C->product->data = atb; 2577 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2578 PetscFunctionReturn(0); 2579 } 2580 2581 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2582 { 2583 MPI_Comm comm; 2584 PetscMPIInt i, size; 2585 PetscInt maxRows, bufsiz; 2586 PetscMPIInt tag; 2587 PetscInt alg; 2588 Mat_MatTransMultDense *abt; 2589 Mat_Product *product = C->product; 2590 PetscBool flg; 2591 2592 PetscFunctionBegin; 2593 MatCheckProduct(C,4); 2594 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2595 /* check local size of A and B */ 2596 PetscCheck(A->cmap->n == B->cmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%" PetscInt_FMT ") != B (%" PetscInt_FMT ")",A->cmap->n,B->cmap->n); 2597 2598 PetscCall(PetscStrcmp(product->alg,"allgatherv",&flg)); 2599 alg = flg ? 0 : 1; 2600 2601 /* setup matrix product C */ 2602 PetscCall(MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N)); 2603 PetscCall(MatSetType(C,MATMPIDENSE)); 2604 PetscCall(MatSetUp(C)); 2605 PetscCall(PetscObjectGetNewTag((PetscObject)C,&tag)); 2606 2607 /* create data structure for reuse C */ 2608 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2609 PetscCallMPI(MPI_Comm_size(comm,&size)); 2610 PetscCall(PetscNew(&abt)); 2611 abt->tag = tag; 2612 abt->alg = alg; 2613 switch (alg) { 2614 case 1: /* alg: "cyclic" */ 2615 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2616 bufsiz = A->cmap->N * maxRows; 2617 PetscCall(PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]))); 2618 break; 2619 default: /* alg: "allgatherv" */ 2620 PetscCall(PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]))); 2621 PetscCall(PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls))); 2622 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2623 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2624 break; 2625 } 2626 2627 C->product->data = abt; 2628 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2629 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2630 PetscFunctionReturn(0); 2631 } 2632 2633 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2634 { 2635 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2636 Mat_MatTransMultDense *abt; 2637 MPI_Comm comm; 2638 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2639 PetscScalar *sendbuf, *recvbuf=NULL, *cv; 2640 PetscInt i,cK=A->cmap->N,k,j,bn; 2641 PetscScalar _DOne=1.0,_DZero=0.0; 2642 const PetscScalar *av,*bv; 2643 PetscBLASInt cm, cn, ck, alda, blda = 0, clda; 2644 MPI_Request reqs[2]; 2645 const PetscInt *ranges; 2646 2647 PetscFunctionBegin; 2648 MatCheckProduct(C,3); 2649 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2650 abt = (Mat_MatTransMultDense*)C->product->data; 2651 PetscCall(PetscObjectGetComm((PetscObject)C,&comm)); 2652 PetscCallMPI(MPI_Comm_rank(comm,&rank)); 2653 PetscCallMPI(MPI_Comm_size(comm,&size)); 2654 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2655 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2656 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2657 PetscCall(MatDenseGetLDA(a->A,&i)); 2658 PetscCall(PetscBLASIntCast(i,&alda)); 2659 PetscCall(MatDenseGetLDA(b->A,&i)); 2660 PetscCall(PetscBLASIntCast(i,&blda)); 2661 PetscCall(MatDenseGetLDA(c->A,&i)); 2662 PetscCall(PetscBLASIntCast(i,&clda)); 2663 PetscCall(MatGetOwnershipRanges(B,&ranges)); 2664 bn = B->rmap->n; 2665 if (blda == bn) { 2666 sendbuf = (PetscScalar*)bv; 2667 } else { 2668 sendbuf = abt->buf[0]; 2669 for (k = 0, i = 0; i < cK; i++) { 2670 for (j = 0; j < bn; j++, k++) { 2671 sendbuf[k] = bv[i * blda + j]; 2672 } 2673 } 2674 } 2675 if (size > 1) { 2676 sendto = (rank + size - 1) % size; 2677 recvfrom = (rank + size + 1) % size; 2678 } else { 2679 sendto = recvfrom = 0; 2680 } 2681 PetscCall(PetscBLASIntCast(cK,&ck)); 2682 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2683 recvisfrom = rank; 2684 for (i = 0; i < size; i++) { 2685 /* we have finished receiving in sending, bufs can be read/modified */ 2686 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2687 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2688 2689 if (nextrecvisfrom != rank) { 2690 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2691 sendsiz = cK * bn; 2692 recvsiz = cK * nextbn; 2693 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2694 PetscCallMPI(MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0])); 2695 PetscCallMPI(MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1])); 2696 } 2697 2698 /* local aseq * sendbuf^T */ 2699 PetscCall(PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn)); 2700 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2701 2702 if (nextrecvisfrom != rank) { 2703 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2704 PetscCallMPI(MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE)); 2705 } 2706 bn = nextbn; 2707 recvisfrom = nextrecvisfrom; 2708 sendbuf = recvbuf; 2709 } 2710 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2711 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2712 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2713 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2714 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2715 PetscFunctionReturn(0); 2716 } 2717 2718 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2719 { 2720 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2721 Mat_MatTransMultDense *abt; 2722 MPI_Comm comm; 2723 PetscMPIInt size; 2724 PetscScalar *cv, *sendbuf, *recvbuf; 2725 const PetscScalar *av,*bv; 2726 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2727 PetscScalar _DOne=1.0,_DZero=0.0; 2728 PetscBLASInt cm, cn, ck, alda, clda; 2729 2730 PetscFunctionBegin; 2731 MatCheckProduct(C,3); 2732 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2733 abt = (Mat_MatTransMultDense*)C->product->data; 2734 PetscCall(PetscObjectGetComm((PetscObject)A,&comm)); 2735 PetscCallMPI(MPI_Comm_size(comm,&size)); 2736 PetscCall(MatDenseGetArrayRead(a->A,&av)); 2737 PetscCall(MatDenseGetArrayRead(b->A,&bv)); 2738 PetscCall(MatDenseGetArrayWrite(c->A,&cv)); 2739 PetscCall(MatDenseGetLDA(a->A,&i)); 2740 PetscCall(PetscBLASIntCast(i,&alda)); 2741 PetscCall(MatDenseGetLDA(b->A,&blda)); 2742 PetscCall(MatDenseGetLDA(c->A,&i)); 2743 PetscCall(PetscBLASIntCast(i,&clda)); 2744 /* copy transpose of B into buf[0] */ 2745 bn = B->rmap->n; 2746 sendbuf = abt->buf[0]; 2747 recvbuf = abt->buf[1]; 2748 for (k = 0, j = 0; j < bn; j++) { 2749 for (i = 0; i < cK; i++, k++) { 2750 sendbuf[k] = bv[i * blda + j]; 2751 } 2752 } 2753 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2754 PetscCallMPI(MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm)); 2755 PetscCall(PetscBLASIntCast(cK,&ck)); 2756 PetscCall(PetscBLASIntCast(c->A->rmap->n,&cm)); 2757 PetscCall(PetscBLASIntCast(c->A->cmap->n,&cn)); 2758 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2759 PetscCall(MatDenseRestoreArrayRead(a->A,&av)); 2760 PetscCall(MatDenseRestoreArrayRead(b->A,&bv)); 2761 PetscCall(MatDenseRestoreArrayWrite(c->A,&cv)); 2762 PetscCall(MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY)); 2763 PetscCall(MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY)); 2764 PetscFunctionReturn(0); 2765 } 2766 2767 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2768 { 2769 Mat_MatTransMultDense *abt; 2770 2771 PetscFunctionBegin; 2772 MatCheckProduct(C,3); 2773 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2774 abt = (Mat_MatTransMultDense*)C->product->data; 2775 switch (abt->alg) { 2776 case 1: 2777 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C)); 2778 break; 2779 default: 2780 PetscCall(MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C)); 2781 break; 2782 } 2783 PetscFunctionReturn(0); 2784 } 2785 2786 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2787 { 2788 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2789 2790 PetscFunctionBegin; 2791 PetscCall(MatDestroy(&ab->Ce)); 2792 PetscCall(MatDestroy(&ab->Ae)); 2793 PetscCall(MatDestroy(&ab->Be)); 2794 PetscCall(PetscFree(ab)); 2795 PetscFunctionReturn(0); 2796 } 2797 2798 #if defined(PETSC_HAVE_ELEMENTAL) 2799 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2800 { 2801 Mat_MatMultDense *ab; 2802 2803 PetscFunctionBegin; 2804 MatCheckProduct(C,3); 2805 PetscCheck(C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2806 ab = (Mat_MatMultDense*)C->product->data; 2807 PetscCall(MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae)); 2808 PetscCall(MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be)); 2809 PetscCall(MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce)); 2810 PetscCall(MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C)); 2811 PetscFunctionReturn(0); 2812 } 2813 2814 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2815 { 2816 Mat Ae,Be,Ce; 2817 Mat_MatMultDense *ab; 2818 2819 PetscFunctionBegin; 2820 MatCheckProduct(C,4); 2821 PetscCheck(!C->product->data,PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2822 /* check local size of A and B */ 2823 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2824 SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%" PetscInt_FMT ", %" PetscInt_FMT ") != B (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2825 } 2826 2827 /* create elemental matrices Ae and Be */ 2828 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &Ae)); 2829 PetscCall(MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N)); 2830 PetscCall(MatSetType(Ae,MATELEMENTAL)); 2831 PetscCall(MatSetUp(Ae)); 2832 PetscCall(MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE)); 2833 2834 PetscCall(MatCreate(PetscObjectComm((PetscObject)B), &Be)); 2835 PetscCall(MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N)); 2836 PetscCall(MatSetType(Be,MATELEMENTAL)); 2837 PetscCall(MatSetUp(Be)); 2838 PetscCall(MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE)); 2839 2840 /* compute symbolic Ce = Ae*Be */ 2841 PetscCall(MatCreate(PetscObjectComm((PetscObject)C),&Ce)); 2842 PetscCall(MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce)); 2843 2844 /* setup C */ 2845 PetscCall(MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE)); 2846 PetscCall(MatSetType(C,MATDENSE)); 2847 PetscCall(MatSetUp(C)); 2848 2849 /* create data structure for reuse Cdense */ 2850 PetscCall(PetscNew(&ab)); 2851 ab->Ae = Ae; 2852 ab->Be = Be; 2853 ab->Ce = Ce; 2854 2855 C->product->data = ab; 2856 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2857 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2858 PetscFunctionReturn(0); 2859 } 2860 #endif 2861 /* ----------------------------------------------- */ 2862 #if defined(PETSC_HAVE_ELEMENTAL) 2863 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2864 { 2865 PetscFunctionBegin; 2866 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2867 C->ops->productsymbolic = MatProductSymbolic_AB; 2868 PetscFunctionReturn(0); 2869 } 2870 #endif 2871 2872 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2873 { 2874 Mat_Product *product = C->product; 2875 Mat A = product->A,B=product->B; 2876 2877 PetscFunctionBegin; 2878 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2879 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2880 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2881 C->ops->productsymbolic = MatProductSymbolic_AtB; 2882 PetscFunctionReturn(0); 2883 } 2884 2885 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2886 { 2887 Mat_Product *product = C->product; 2888 const char *algTypes[2] = {"allgatherv","cyclic"}; 2889 PetscInt alg,nalg = 2; 2890 PetscBool flg = PETSC_FALSE; 2891 2892 PetscFunctionBegin; 2893 /* Set default algorithm */ 2894 alg = 0; /* default is allgatherv */ 2895 PetscCall(PetscStrcmp(product->alg,"default",&flg)); 2896 if (flg) { 2897 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2898 } 2899 2900 /* Get runtime option */ 2901 if (product->api_user) { 2902 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat"); 2903 PetscCall(PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg)); 2904 PetscOptionsEnd(); 2905 } else { 2906 PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat"); 2907 PetscCall(PetscOptionsEList("-mat_product_algorithm","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg)); 2908 PetscOptionsEnd(); 2909 } 2910 if (flg) { 2911 PetscCall(MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg])); 2912 } 2913 2914 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2915 C->ops->productsymbolic = MatProductSymbolic_ABt; 2916 PetscFunctionReturn(0); 2917 } 2918 2919 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2920 { 2921 Mat_Product *product = C->product; 2922 2923 PetscFunctionBegin; 2924 switch (product->type) { 2925 #if defined(PETSC_HAVE_ELEMENTAL) 2926 case MATPRODUCT_AB: 2927 PetscCall(MatProductSetFromOptions_MPIDense_AB(C)); 2928 break; 2929 #endif 2930 case MATPRODUCT_AtB: 2931 PetscCall(MatProductSetFromOptions_MPIDense_AtB(C)); 2932 break; 2933 case MATPRODUCT_ABt: 2934 PetscCall(MatProductSetFromOptions_MPIDense_ABt(C)); 2935 break; 2936 default: 2937 break; 2938 } 2939 PetscFunctionReturn(0); 2940 } 2941