1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40 { 41 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44 45 PetscFunctionBegin; 46 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47 lrow = row - rstart; 48 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscErrorCode ierr; 56 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57 58 PetscFunctionBegin; 59 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60 lrow = row - rstart; 61 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 66 { 67 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 68 PetscErrorCode ierr; 69 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 70 PetscScalar *array; 71 MPI_Comm comm; 72 PetscBool flg; 73 Mat B; 74 75 PetscFunctionBegin; 76 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 77 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 78 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79 if (!B) { /* This should use MatDenseGetSubMatrix (not create), but we would need a call like MatRestoreDiagonalBlock */ 80 81 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 82 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 100 { 101 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102 PetscErrorCode ierr; 103 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104 PetscBool roworiented = A->roworiented; 105 106 PetscFunctionBegin; 107 for (i=0; i<m; i++) { 108 if (idxm[i] < 0) continue; 109 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 110 if (idxm[i] >= rstart && idxm[i] < rend) { 111 row = idxm[i] - rstart; 112 if (roworiented) { 113 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 114 } else { 115 for (j=0; j<n; j++) { 116 if (idxn[j] < 0) continue; 117 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 118 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 119 } 120 } 121 } else if (!A->donotstash) { 122 mat->assembled = PETSC_FALSE; 123 if (roworiented) { 124 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 125 } else { 126 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 127 } 128 } 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 134 { 135 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 136 PetscErrorCode ierr; 137 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 138 139 PetscFunctionBegin; 140 for (i=0; i<m; i++) { 141 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 142 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 for (j=0; j<n; j++) { 146 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 147 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 148 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 149 } 150 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 156 { 157 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 158 PetscErrorCode ierr; 159 160 PetscFunctionBegin; 161 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 166 { 167 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 168 PetscErrorCode ierr; 169 170 PetscFunctionBegin; 171 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 172 PetscFunctionReturn(0); 173 } 174 175 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 176 { 177 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 182 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 183 PetscFunctionReturn(0); 184 } 185 186 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 187 { 188 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 189 PetscErrorCode ierr; 190 191 PetscFunctionBegin; 192 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 193 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 194 PetscFunctionReturn(0); 195 } 196 197 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 198 { 199 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 200 PetscErrorCode ierr; 201 202 PetscFunctionBegin; 203 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 204 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 205 PetscFunctionReturn(0); 206 } 207 208 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 209 { 210 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 211 PetscErrorCode ierr; 212 213 PetscFunctionBegin; 214 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 215 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 216 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 217 PetscFunctionReturn(0); 218 } 219 220 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 221 { 222 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 223 PetscErrorCode ierr; 224 225 PetscFunctionBegin; 226 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 227 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 228 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 229 PetscFunctionReturn(0); 230 } 231 232 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 233 { 234 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 235 PetscErrorCode ierr; 236 237 PetscFunctionBegin; 238 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 239 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 240 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 241 PetscFunctionReturn(0); 242 } 243 244 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 245 { 246 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 247 PetscErrorCode ierr; 248 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 249 const PetscInt *irow,*icol; 250 const PetscScalar *v; 251 PetscScalar *bv; 252 Mat newmat; 253 IS iscol_local; 254 MPI_Comm comm_is,comm_mat; 255 256 PetscFunctionBegin; 257 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 258 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 259 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 260 261 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 262 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 263 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 264 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 265 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 266 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 267 268 /* No parallel redistribution currently supported! Should really check each index set 269 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 270 original matrix! */ 271 272 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 273 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 274 275 /* Check submatrix call */ 276 if (scall == MAT_REUSE_MATRIX) { 277 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 278 /* Really need to test rows and column sizes! */ 279 newmat = *B; 280 } else { 281 /* Create and fill new matrix */ 282 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 283 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 284 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 285 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 286 } 287 288 /* Now extract the data pointers and do the copy, column at a time */ 289 newmatd = (Mat_MPIDense*)newmat->data; 290 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 291 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 292 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 293 for (i=0; i<Ncols; i++) { 294 const PetscScalar *av = v + lda*icol[i]; 295 for (j=0; j<nrows; j++) { 296 *bv++ = av[irow[j] - rstart]; 297 } 298 } 299 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 300 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 301 302 /* Assemble the matrices so that the correct flags are set */ 303 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 304 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 305 306 /* Free work space */ 307 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 308 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 309 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 310 *B = newmat; 311 PetscFunctionReturn(0); 312 } 313 314 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 315 { 316 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 317 PetscErrorCode ierr; 318 319 PetscFunctionBegin; 320 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 321 PetscFunctionReturn(0); 322 } 323 324 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 325 { 326 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 327 PetscErrorCode ierr; 328 329 PetscFunctionBegin; 330 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 331 PetscFunctionReturn(0); 332 } 333 334 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 335 { 336 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 337 PetscErrorCode ierr; 338 339 PetscFunctionBegin; 340 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 341 PetscFunctionReturn(0); 342 } 343 344 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 345 { 346 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 347 PetscErrorCode ierr; 348 PetscInt nstash,reallocs; 349 350 PetscFunctionBegin; 351 if (mdn->donotstash || mat->nooffprocentries) return(0); 352 353 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 354 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 355 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 356 PetscFunctionReturn(0); 357 } 358 359 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 360 { 361 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 362 PetscErrorCode ierr; 363 PetscInt i,*row,*col,flg,j,rstart,ncols; 364 PetscMPIInt n; 365 PetscScalar *val; 366 367 PetscFunctionBegin; 368 if (!mdn->donotstash && !mat->nooffprocentries) { 369 /* wait on receives */ 370 while (1) { 371 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 372 if (!flg) break; 373 374 for (i=0; i<n;) { 375 /* Now identify the consecutive vals belonging to the same row */ 376 for (j=i,rstart=row[j]; j<n; j++) { 377 if (row[j] != rstart) break; 378 } 379 if (j < n) ncols = j-i; 380 else ncols = n-i; 381 /* Now assemble all these values with a single function call */ 382 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 383 i = j; 384 } 385 } 386 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 387 } 388 389 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 390 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 391 392 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 393 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 394 } 395 PetscFunctionReturn(0); 396 } 397 398 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 399 { 400 PetscErrorCode ierr; 401 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 402 403 PetscFunctionBegin; 404 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 405 PetscFunctionReturn(0); 406 } 407 408 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 409 { 410 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 411 PetscErrorCode ierr; 412 PetscInt i,len,*lrows; 413 414 PetscFunctionBegin; 415 /* get locally owned rows */ 416 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 417 /* fix right hand side if needed */ 418 if (x && b) { 419 const PetscScalar *xx; 420 PetscScalar *bb; 421 422 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 423 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 424 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 425 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 426 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 427 } 428 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 429 if (diag != 0.0) { 430 Vec d; 431 432 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 433 ierr = VecSet(d,diag);CHKERRQ(ierr); 434 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 435 ierr = VecDestroy(&d);CHKERRQ(ierr); 436 } 437 ierr = PetscFree(lrows);CHKERRQ(ierr); 438 PetscFunctionReturn(0); 439 } 440 441 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 442 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 443 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 444 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 445 446 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 447 { 448 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 449 PetscErrorCode ierr; 450 const PetscScalar *ax; 451 PetscScalar *ay; 452 453 PetscFunctionBegin; 454 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 455 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 456 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 457 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 458 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 459 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 460 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 461 PetscFunctionReturn(0); 462 } 463 464 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 465 { 466 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 467 PetscErrorCode ierr; 468 const PetscScalar *ax; 469 PetscScalar *ay; 470 471 PetscFunctionBegin; 472 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 473 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 474 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 475 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 476 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 477 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 478 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 479 PetscFunctionReturn(0); 480 } 481 482 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 483 { 484 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 485 PetscErrorCode ierr; 486 const PetscScalar *ax; 487 PetscScalar *ay; 488 489 PetscFunctionBegin; 490 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 491 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 492 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 493 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 494 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 495 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 496 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 497 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 498 PetscFunctionReturn(0); 499 } 500 501 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 502 { 503 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 504 PetscErrorCode ierr; 505 const PetscScalar *ax; 506 PetscScalar *ay; 507 508 PetscFunctionBegin; 509 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 510 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 511 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 512 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 513 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 514 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 515 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 516 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 517 PetscFunctionReturn(0); 518 } 519 520 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 521 { 522 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 523 PetscErrorCode ierr; 524 PetscInt lda,len,i,n,m = A->rmap->n,radd; 525 PetscScalar *x,zero = 0.0; 526 const PetscScalar *av; 527 528 PetscFunctionBegin; 529 ierr = VecSet(v,zero);CHKERRQ(ierr); 530 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 531 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 532 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 533 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 534 radd = A->rmap->rstart*m; 535 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 536 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 537 for (i=0; i<len; i++) { 538 x[i] = av[radd + i*lda + i]; 539 } 540 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 541 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 542 PetscFunctionReturn(0); 543 } 544 545 PetscErrorCode MatDestroy_MPIDense(Mat mat) 546 { 547 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 548 PetscErrorCode ierr; 549 550 PetscFunctionBegin; 551 #if defined(PETSC_USE_LOG) 552 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 553 #endif 554 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 555 if (mdn->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 556 if (mdn->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 557 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 558 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 559 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 560 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 561 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 562 563 ierr = PetscFree(mat->data);CHKERRQ(ierr); 564 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 565 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 574 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 576 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 577 #if defined(PETSC_HAVE_ELEMENTAL) 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 579 #endif 580 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 583 #if defined (PETSC_HAVE_CUDA) 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 586 #endif 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 594 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 595 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 596 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 597 PetscFunctionReturn(0); 598 } 599 600 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 601 602 #include <petscdraw.h> 603 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 604 { 605 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 606 PetscErrorCode ierr; 607 PetscMPIInt rank; 608 PetscViewerType vtype; 609 PetscBool iascii,isdraw; 610 PetscViewer sviewer; 611 PetscViewerFormat format; 612 613 PetscFunctionBegin; 614 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 615 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 616 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 617 if (iascii) { 618 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 619 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 620 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 621 MatInfo info; 622 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 623 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 624 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 625 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 626 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 627 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 628 PetscFunctionReturn(0); 629 } else if (format == PETSC_VIEWER_ASCII_INFO) { 630 PetscFunctionReturn(0); 631 } 632 } else if (isdraw) { 633 PetscDraw draw; 634 PetscBool isnull; 635 636 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 637 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 638 if (isnull) PetscFunctionReturn(0); 639 } 640 641 { 642 /* assemble the entire matrix onto first processor. */ 643 Mat A; 644 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 645 PetscInt *cols; 646 PetscScalar *vals; 647 648 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 649 if (!rank) { 650 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 651 } else { 652 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 653 } 654 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 655 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 656 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 657 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 658 659 /* Copy the matrix ... This isn't the most efficient means, 660 but it's quick for now */ 661 A->insertmode = INSERT_VALUES; 662 663 row = mat->rmap->rstart; 664 m = mdn->A->rmap->n; 665 for (i=0; i<m; i++) { 666 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 667 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 668 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 669 row++; 670 } 671 672 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 673 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 674 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 675 if (!rank) { 676 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 677 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 678 } 679 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 680 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 681 ierr = MatDestroy(&A);CHKERRQ(ierr); 682 } 683 PetscFunctionReturn(0); 684 } 685 686 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 687 { 688 PetscErrorCode ierr; 689 PetscBool iascii,isbinary,isdraw,issocket; 690 691 PetscFunctionBegin; 692 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 693 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 694 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 695 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 696 697 if (iascii || issocket || isdraw) { 698 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 699 } else if (isbinary) { 700 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 701 } 702 PetscFunctionReturn(0); 703 } 704 705 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 706 { 707 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 708 Mat mdn = mat->A; 709 PetscErrorCode ierr; 710 PetscLogDouble isend[5],irecv[5]; 711 712 PetscFunctionBegin; 713 info->block_size = 1.0; 714 715 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 716 717 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 718 isend[3] = info->memory; isend[4] = info->mallocs; 719 if (flag == MAT_LOCAL) { 720 info->nz_used = isend[0]; 721 info->nz_allocated = isend[1]; 722 info->nz_unneeded = isend[2]; 723 info->memory = isend[3]; 724 info->mallocs = isend[4]; 725 } else if (flag == MAT_GLOBAL_MAX) { 726 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 727 728 info->nz_used = irecv[0]; 729 info->nz_allocated = irecv[1]; 730 info->nz_unneeded = irecv[2]; 731 info->memory = irecv[3]; 732 info->mallocs = irecv[4]; 733 } else if (flag == MAT_GLOBAL_SUM) { 734 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 735 736 info->nz_used = irecv[0]; 737 info->nz_allocated = irecv[1]; 738 info->nz_unneeded = irecv[2]; 739 info->memory = irecv[3]; 740 info->mallocs = irecv[4]; 741 } 742 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 743 info->fill_ratio_needed = 0; 744 info->factor_mallocs = 0; 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 749 { 750 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 751 PetscErrorCode ierr; 752 753 PetscFunctionBegin; 754 switch (op) { 755 case MAT_NEW_NONZERO_LOCATIONS: 756 case MAT_NEW_NONZERO_LOCATION_ERR: 757 case MAT_NEW_NONZERO_ALLOCATION_ERR: 758 MatCheckPreallocated(A,1); 759 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 760 break; 761 case MAT_ROW_ORIENTED: 762 MatCheckPreallocated(A,1); 763 a->roworiented = flg; 764 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 765 break; 766 case MAT_NEW_DIAGONALS: 767 case MAT_KEEP_NONZERO_PATTERN: 768 case MAT_USE_HASH_TABLE: 769 case MAT_SORTED_FULL: 770 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 771 break; 772 case MAT_IGNORE_OFF_PROC_ENTRIES: 773 a->donotstash = flg; 774 break; 775 case MAT_SYMMETRIC: 776 case MAT_STRUCTURALLY_SYMMETRIC: 777 case MAT_HERMITIAN: 778 case MAT_SYMMETRY_ETERNAL: 779 case MAT_IGNORE_LOWER_TRIANGULAR: 780 case MAT_IGNORE_ZERO_ENTRIES: 781 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 782 break; 783 default: 784 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 785 } 786 PetscFunctionReturn(0); 787 } 788 789 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 790 { 791 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 792 const PetscScalar *l; 793 PetscScalar x,*v,*vv,*r; 794 PetscErrorCode ierr; 795 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 796 797 PetscFunctionBegin; 798 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 799 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 800 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 801 if (ll) { 802 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 803 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 804 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 805 for (i=0; i<m; i++) { 806 x = l[i]; 807 v = vv + i; 808 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 809 } 810 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 811 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 812 } 813 if (rr) { 814 const PetscScalar *ar; 815 816 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 817 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 818 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 819 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 820 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 821 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 822 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 823 for (i=0; i<n; i++) { 824 x = r[i]; 825 v = vv + i*lda; 826 for (j=0; j<m; j++) (*v++) *= x; 827 } 828 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 829 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 830 } 831 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 832 PetscFunctionReturn(0); 833 } 834 835 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 836 { 837 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 838 PetscErrorCode ierr; 839 PetscInt i,j; 840 PetscMPIInt size; 841 PetscReal sum = 0.0; 842 const PetscScalar *av,*v; 843 844 PetscFunctionBegin; 845 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 846 v = av; 847 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr); 848 if (size == 1) { 849 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 850 } else { 851 if (type == NORM_FROBENIUS) { 852 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 853 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 854 } 855 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 856 *nrm = PetscSqrtReal(*nrm); 857 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 858 } else if (type == NORM_1) { 859 PetscReal *tmp,*tmp2; 860 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 861 *nrm = 0.0; 862 v = av; 863 for (j=0; j<mdn->A->cmap->n; j++) { 864 for (i=0; i<mdn->A->rmap->n; i++) { 865 tmp[j] += PetscAbsScalar(*v); v++; 866 } 867 } 868 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 869 for (j=0; j<A->cmap->N; j++) { 870 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 871 } 872 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 873 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 874 } else if (type == NORM_INFINITY) { /* max row norm */ 875 PetscReal ntemp; 876 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 877 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 878 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 879 } 880 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 881 PetscFunctionReturn(0); 882 } 883 884 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 885 { 886 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 887 Mat B; 888 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 889 PetscErrorCode ierr; 890 PetscInt j,i,lda; 891 PetscScalar *v; 892 893 PetscFunctionBegin; 894 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 895 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 896 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 897 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 898 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 899 } else B = *matout; 900 901 m = a->A->rmap->n; n = a->A->cmap->n; 902 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 903 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 904 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 905 for (i=0; i<m; i++) rwork[i] = rstart + i; 906 for (j=0; j<n; j++) { 907 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 908 v += lda; 909 } 910 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 911 ierr = PetscFree(rwork);CHKERRQ(ierr); 912 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 913 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 914 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 915 *matout = B; 916 } else { 917 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 918 } 919 PetscFunctionReturn(0); 920 } 921 922 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 923 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 924 925 PetscErrorCode MatSetUp_MPIDense(Mat A) 926 { 927 PetscErrorCode ierr; 928 929 PetscFunctionBegin; 930 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 931 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 932 if (!A->preallocated) { 933 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 939 { 940 PetscErrorCode ierr; 941 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 942 943 PetscFunctionBegin; 944 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 945 PetscFunctionReturn(0); 946 } 947 948 PetscErrorCode MatConjugate_MPIDense(Mat mat) 949 { 950 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 951 PetscErrorCode ierr; 952 953 PetscFunctionBegin; 954 ierr = MatConjugate(a->A);CHKERRQ(ierr); 955 PetscFunctionReturn(0); 956 } 957 958 PetscErrorCode MatRealPart_MPIDense(Mat A) 959 { 960 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 961 PetscErrorCode ierr; 962 963 PetscFunctionBegin; 964 ierr = MatRealPart(a->A);CHKERRQ(ierr); 965 PetscFunctionReturn(0); 966 } 967 968 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 969 { 970 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 971 PetscErrorCode ierr; 972 973 PetscFunctionBegin; 974 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 975 PetscFunctionReturn(0); 976 } 977 978 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 979 { 980 PetscErrorCode ierr; 981 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 982 983 PetscFunctionBegin; 984 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 985 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 986 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 987 PetscFunctionReturn(0); 988 } 989 990 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 991 992 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 993 { 994 PetscErrorCode ierr; 995 PetscInt i,n; 996 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 997 PetscReal *work; 998 999 PetscFunctionBegin; 1000 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 1001 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 1002 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 1003 if (type == NORM_2) { 1004 for (i=0; i<n; i++) work[i] *= work[i]; 1005 } 1006 if (type == NORM_INFINITY) { 1007 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1008 } else { 1009 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1010 } 1011 ierr = PetscFree(work);CHKERRQ(ierr); 1012 if (type == NORM_2) { 1013 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1014 } 1015 PetscFunctionReturn(0); 1016 } 1017 1018 #if defined(PETSC_HAVE_CUDA) 1019 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1020 { 1021 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1022 PetscErrorCode ierr; 1023 PetscInt lda; 1024 1025 PetscFunctionBegin; 1026 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1027 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1028 if (!a->cvec) { 1029 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1030 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1031 } 1032 a->vecinuse = col + 1; 1033 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1034 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1035 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1036 *v = a->cvec; 1037 PetscFunctionReturn(0); 1038 } 1039 1040 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1041 { 1042 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1043 PetscErrorCode ierr; 1044 1045 PetscFunctionBegin; 1046 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1047 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1048 a->vecinuse = 0; 1049 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1050 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1051 *v = NULL; 1052 PetscFunctionReturn(0); 1053 } 1054 1055 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1056 { 1057 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1058 PetscInt lda; 1059 PetscErrorCode ierr; 1060 1061 PetscFunctionBegin; 1062 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1063 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1064 if (!a->cvec) { 1065 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1066 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1067 } 1068 a->vecinuse = col + 1; 1069 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1070 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1071 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1072 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1073 *v = a->cvec; 1074 PetscFunctionReturn(0); 1075 } 1076 1077 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1078 { 1079 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1080 PetscErrorCode ierr; 1081 1082 PetscFunctionBegin; 1083 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1084 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1085 a->vecinuse = 0; 1086 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1087 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1088 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1089 *v = NULL; 1090 PetscFunctionReturn(0); 1091 } 1092 1093 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1094 { 1095 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1096 PetscErrorCode ierr; 1097 PetscInt lda; 1098 1099 PetscFunctionBegin; 1100 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1101 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1102 if (!a->cvec) { 1103 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1104 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cvec);CHKERRQ(ierr); 1105 } 1106 a->vecinuse = col + 1; 1107 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1108 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1109 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1110 *v = a->cvec; 1111 PetscFunctionReturn(0); 1112 } 1113 1114 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1115 { 1116 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1117 PetscErrorCode ierr; 1118 1119 PetscFunctionBegin; 1120 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1121 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1122 a->vecinuse = 0; 1123 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1124 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1125 *v = NULL; 1126 PetscFunctionReturn(0); 1127 } 1128 1129 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1130 { 1131 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1132 PetscErrorCode ierr; 1133 1134 PetscFunctionBegin; 1135 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1136 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1137 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1138 PetscFunctionReturn(0); 1139 } 1140 1141 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1142 { 1143 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1144 PetscErrorCode ierr; 1145 1146 PetscFunctionBegin; 1147 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1148 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1149 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1150 PetscFunctionReturn(0); 1151 } 1152 1153 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1154 { 1155 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1156 PetscErrorCode ierr; 1157 1158 PetscFunctionBegin; 1159 if (l->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1160 if (l->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1161 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1162 PetscFunctionReturn(0); 1163 } 1164 1165 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1166 { 1167 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1168 PetscErrorCode ierr; 1169 1170 PetscFunctionBegin; 1171 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1172 PetscFunctionReturn(0); 1173 } 1174 1175 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1176 { 1177 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1178 PetscErrorCode ierr; 1179 1180 PetscFunctionBegin; 1181 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1182 PetscFunctionReturn(0); 1183 } 1184 1185 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1186 { 1187 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1188 PetscErrorCode ierr; 1189 1190 PetscFunctionBegin; 1191 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1192 PetscFunctionReturn(0); 1193 } 1194 1195 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1196 { 1197 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1198 PetscErrorCode ierr; 1199 1200 PetscFunctionBegin; 1201 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1202 PetscFunctionReturn(0); 1203 } 1204 1205 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1206 { 1207 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1208 PetscErrorCode ierr; 1209 1210 PetscFunctionBegin; 1211 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1212 PetscFunctionReturn(0); 1213 } 1214 1215 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1216 { 1217 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1218 PetscErrorCode ierr; 1219 1220 PetscFunctionBegin; 1221 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1222 PetscFunctionReturn(0); 1223 } 1224 1225 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1226 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1227 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1228 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1229 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1230 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1231 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1232 1233 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1234 { 1235 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1236 PetscErrorCode ierr; 1237 1238 PetscFunctionBegin; 1239 if (d->vecinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1240 if (d->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1241 if (d->A) { 1242 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1243 } 1244 mat->boundtocpu = bind; 1245 if (!bind) { 1246 PetscBool iscuda; 1247 1248 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1249 if (!iscuda) { 1250 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1251 } 1252 ierr = PetscObjectTypeCompare((PetscObject)d->cmat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1253 if (!iscuda) { 1254 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1255 } 1256 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1257 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1258 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1259 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1260 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1261 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1262 } else { 1263 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1264 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1265 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1266 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1267 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1268 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1269 } 1270 if (d->cmat) { 1271 ierr = MatBindToCPU(d->cmat,bind);CHKERRQ(ierr); 1272 } 1273 PetscFunctionReturn(0); 1274 } 1275 1276 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1277 { 1278 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1279 PetscErrorCode ierr; 1280 PetscBool iscuda; 1281 1282 PetscFunctionBegin; 1283 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1284 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1285 if (!iscuda) PetscFunctionReturn(0); 1286 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1287 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1288 if (!d->A) { 1289 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1290 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1291 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1292 } 1293 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1294 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1295 A->preallocated = PETSC_TRUE; 1296 PetscFunctionReturn(0); 1297 } 1298 #endif 1299 1300 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1301 { 1302 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1303 PetscErrorCode ierr; 1304 1305 PetscFunctionBegin; 1306 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1307 PetscFunctionReturn(0); 1308 } 1309 1310 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1311 { 1312 PetscFunctionBegin; 1313 *missing = PETSC_FALSE; 1314 PetscFunctionReturn(0); 1315 } 1316 1317 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1318 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1319 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1320 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1321 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1322 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1323 1324 /* -------------------------------------------------------------------*/ 1325 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1326 MatGetRow_MPIDense, 1327 MatRestoreRow_MPIDense, 1328 MatMult_MPIDense, 1329 /* 4*/ MatMultAdd_MPIDense, 1330 MatMultTranspose_MPIDense, 1331 MatMultTransposeAdd_MPIDense, 1332 0, 1333 0, 1334 0, 1335 /* 10*/ 0, 1336 0, 1337 0, 1338 0, 1339 MatTranspose_MPIDense, 1340 /* 15*/ MatGetInfo_MPIDense, 1341 MatEqual_MPIDense, 1342 MatGetDiagonal_MPIDense, 1343 MatDiagonalScale_MPIDense, 1344 MatNorm_MPIDense, 1345 /* 20*/ MatAssemblyBegin_MPIDense, 1346 MatAssemblyEnd_MPIDense, 1347 MatSetOption_MPIDense, 1348 MatZeroEntries_MPIDense, 1349 /* 24*/ MatZeroRows_MPIDense, 1350 0, 1351 0, 1352 0, 1353 0, 1354 /* 29*/ MatSetUp_MPIDense, 1355 0, 1356 0, 1357 MatGetDiagonalBlock_MPIDense, 1358 0, 1359 /* 34*/ MatDuplicate_MPIDense, 1360 0, 1361 0, 1362 0, 1363 0, 1364 /* 39*/ MatAXPY_MPIDense, 1365 MatCreateSubMatrices_MPIDense, 1366 0, 1367 MatGetValues_MPIDense, 1368 0, 1369 /* 44*/ 0, 1370 MatScale_MPIDense, 1371 MatShift_Basic, 1372 0, 1373 0, 1374 /* 49*/ MatSetRandom_MPIDense, 1375 0, 1376 0, 1377 0, 1378 0, 1379 /* 54*/ 0, 1380 0, 1381 0, 1382 0, 1383 0, 1384 /* 59*/ MatCreateSubMatrix_MPIDense, 1385 MatDestroy_MPIDense, 1386 MatView_MPIDense, 1387 0, 1388 0, 1389 /* 64*/ 0, 1390 0, 1391 0, 1392 0, 1393 0, 1394 /* 69*/ 0, 1395 0, 1396 0, 1397 0, 1398 0, 1399 /* 74*/ 0, 1400 0, 1401 0, 1402 0, 1403 0, 1404 /* 79*/ 0, 1405 0, 1406 0, 1407 0, 1408 /* 83*/ MatLoad_MPIDense, 1409 0, 1410 0, 1411 0, 1412 0, 1413 0, 1414 /* 89*/ 0, 1415 0, 1416 0, 1417 0, 1418 0, 1419 /* 94*/ 0, 1420 0, 1421 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1422 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1423 0, 1424 /* 99*/ MatProductSetFromOptions_MPIDense, 1425 0, 1426 0, 1427 MatConjugate_MPIDense, 1428 0, 1429 /*104*/ 0, 1430 MatRealPart_MPIDense, 1431 MatImaginaryPart_MPIDense, 1432 0, 1433 0, 1434 /*109*/ 0, 1435 0, 1436 0, 1437 MatGetColumnVector_MPIDense, 1438 MatMissingDiagonal_MPIDense, 1439 /*114*/ 0, 1440 0, 1441 0, 1442 0, 1443 0, 1444 /*119*/ 0, 1445 0, 1446 0, 1447 0, 1448 0, 1449 /*124*/ 0, 1450 MatGetColumnNorms_MPIDense, 1451 0, 1452 0, 1453 0, 1454 /*129*/ 0, 1455 0, 1456 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1457 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1458 0, 1459 /*134*/ 0, 1460 0, 1461 0, 1462 0, 1463 0, 1464 /*139*/ 0, 1465 0, 1466 0, 1467 0, 1468 0, 1469 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1470 /*145*/ 0, 1471 0, 1472 0 1473 }; 1474 1475 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1476 { 1477 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1478 PetscBool iscuda; 1479 PetscErrorCode ierr; 1480 1481 PetscFunctionBegin; 1482 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1483 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1484 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1485 if (!a->A) { 1486 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1487 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1488 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1489 } 1490 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1491 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1492 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1493 mat->preallocated = PETSC_TRUE; 1494 PetscFunctionReturn(0); 1495 } 1496 1497 #if defined(PETSC_HAVE_ELEMENTAL) 1498 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1499 { 1500 Mat mat_elemental; 1501 PetscErrorCode ierr; 1502 PetscScalar *v; 1503 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1504 1505 PetscFunctionBegin; 1506 if (reuse == MAT_REUSE_MATRIX) { 1507 mat_elemental = *newmat; 1508 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1509 } else { 1510 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1511 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1512 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1513 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1514 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1515 } 1516 1517 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1518 for (i=0; i<N; i++) cols[i] = i; 1519 for (i=0; i<m; i++) rows[i] = rstart + i; 1520 1521 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1522 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1523 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1524 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1525 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1526 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1527 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1528 1529 if (reuse == MAT_INPLACE_MATRIX) { 1530 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1531 } else { 1532 *newmat = mat_elemental; 1533 } 1534 PetscFunctionReturn(0); 1535 } 1536 #endif 1537 1538 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1539 { 1540 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1541 PetscErrorCode ierr; 1542 1543 PetscFunctionBegin; 1544 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1545 PetscFunctionReturn(0); 1546 } 1547 1548 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1549 { 1550 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1551 PetscErrorCode ierr; 1552 1553 PetscFunctionBegin; 1554 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1555 PetscFunctionReturn(0); 1556 } 1557 1558 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1559 { 1560 PetscErrorCode ierr; 1561 Mat_MPIDense *mat; 1562 PetscInt m,nloc,N; 1563 1564 PetscFunctionBegin; 1565 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1566 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1567 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1568 PetscInt sum; 1569 1570 if (n == PETSC_DECIDE) { 1571 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1572 } 1573 /* Check sum(n) = N */ 1574 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1575 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1576 1577 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1578 } 1579 1580 /* numeric phase */ 1581 mat = (Mat_MPIDense*)(*outmat)->data; 1582 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1583 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1584 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1585 PetscFunctionReturn(0); 1586 } 1587 1588 #if defined(PETSC_HAVE_CUDA) 1589 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1590 { 1591 Mat B; 1592 Mat_MPIDense *m; 1593 PetscErrorCode ierr; 1594 1595 PetscFunctionBegin; 1596 if (reuse == MAT_INITIAL_MATRIX) { 1597 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1598 } else if (reuse == MAT_REUSE_MATRIX) { 1599 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1600 } 1601 1602 B = *newmat; 1603 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1604 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1605 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1606 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1607 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1608 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1609 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1610 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1611 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1612 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1613 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1614 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1615 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1616 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1617 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1618 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1619 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1620 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1621 m = (Mat_MPIDense*)(B)->data; 1622 if (m->A) { 1623 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1624 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1625 } 1626 B->ops->bindtocpu = NULL; 1627 B->offloadmask = PETSC_OFFLOAD_CPU; 1628 PetscFunctionReturn(0); 1629 } 1630 1631 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1632 { 1633 Mat B; 1634 Mat_MPIDense *m; 1635 PetscErrorCode ierr; 1636 1637 PetscFunctionBegin; 1638 if (reuse == MAT_INITIAL_MATRIX) { 1639 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1640 } else if (reuse == MAT_REUSE_MATRIX) { 1641 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1642 } 1643 1644 B = *newmat; 1645 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1646 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1647 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1650 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1651 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1652 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1653 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1654 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1655 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1656 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1657 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1658 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1659 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1660 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1661 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1662 m = (Mat_MPIDense*)(B)->data; 1663 if (m->A) { 1664 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1665 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1666 B->offloadmask = PETSC_OFFLOAD_BOTH; 1667 } else { 1668 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1669 } 1670 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1671 1672 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1673 PetscFunctionReturn(0); 1674 } 1675 #endif 1676 1677 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1678 { 1679 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1680 PetscErrorCode ierr; 1681 PetscInt lda; 1682 1683 PetscFunctionBegin; 1684 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1685 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1686 if (!a->cvec) { 1687 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1688 } 1689 a->vecinuse = col + 1; 1690 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1691 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1692 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1693 *v = a->cvec; 1694 PetscFunctionReturn(0); 1695 } 1696 1697 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1698 { 1699 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1700 PetscErrorCode ierr; 1701 1702 PetscFunctionBegin; 1703 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1704 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1705 a->vecinuse = 0; 1706 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1707 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1708 *v = NULL; 1709 PetscFunctionReturn(0); 1710 } 1711 1712 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1713 { 1714 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1715 PetscErrorCode ierr; 1716 PetscInt lda; 1717 1718 PetscFunctionBegin; 1719 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1720 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1721 if (!a->cvec) { 1722 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1723 } 1724 a->vecinuse = col + 1; 1725 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1726 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1727 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1728 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1729 *v = a->cvec; 1730 PetscFunctionReturn(0); 1731 } 1732 1733 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1734 { 1735 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1736 PetscErrorCode ierr; 1737 1738 PetscFunctionBegin; 1739 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1740 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1741 a->vecinuse = 0; 1742 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1743 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1744 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1745 *v = NULL; 1746 PetscFunctionReturn(0); 1747 } 1748 1749 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1750 { 1751 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1752 PetscErrorCode ierr; 1753 PetscInt lda; 1754 1755 PetscFunctionBegin; 1756 if (a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1757 if (a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1758 if (!a->cvec) { 1759 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1760 } 1761 a->vecinuse = col + 1; 1762 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1763 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1764 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1765 *v = a->cvec; 1766 PetscFunctionReturn(0); 1767 } 1768 1769 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1770 { 1771 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1772 PetscErrorCode ierr; 1773 1774 PetscFunctionBegin; 1775 if (!a->vecinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1776 if (!a->cvec) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal column vector"); 1777 a->vecinuse = 0; 1778 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1779 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1780 *v = NULL; 1781 PetscFunctionReturn(0); 1782 } 1783 1784 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1785 { 1786 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1787 Mat_MPIDense *c; 1788 PetscErrorCode ierr; 1789 MPI_Comm comm; 1790 PetscBool setup = PETSC_FALSE; 1791 1792 PetscFunctionBegin; 1793 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 1794 if (a->vecinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1795 if (a->matinuse) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1796 if (!a->cmat) { 1797 setup = PETSC_TRUE; 1798 ierr = MatCreate(comm,&a->cmat);CHKERRQ(ierr); 1799 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)a->cmat);CHKERRQ(ierr); 1800 ierr = MatSetType(a->cmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 1801 ierr = PetscLayoutReference(A->rmap,&a->cmat->rmap);CHKERRQ(ierr); 1802 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1803 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1804 } else if (cend-cbegin != a->cmat->cmap->N) { 1805 setup = PETSC_TRUE; 1806 ierr = PetscLayoutDestroy(&a->cmat->cmap);CHKERRQ(ierr); 1807 ierr = PetscLayoutCreate(comm,&a->cmat->cmap);CHKERRQ(ierr); 1808 ierr = PetscLayoutSetSize(a->cmat->cmap,cend-cbegin);CHKERRQ(ierr); 1809 ierr = PetscLayoutSetUp(a->cmat->cmap);CHKERRQ(ierr); 1810 } 1811 c = (Mat_MPIDense*)a->cmat->data; 1812 if (c->A) SETERRQ(comm,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1813 ierr = MatDenseGetSubMatrix(a->A,cbegin,cend,&c->A);CHKERRQ(ierr); 1814 if (setup) { /* do we really need this? */ 1815 ierr = MatSetUpMultiply_MPIDense(a->cmat);CHKERRQ(ierr); 1816 } 1817 a->cmat->preallocated = PETSC_TRUE; 1818 a->cmat->assembled = PETSC_TRUE; 1819 a->matinuse = cbegin + 1; 1820 *v = a->cmat; 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1825 { 1826 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1827 Mat_MPIDense *c; 1828 PetscErrorCode ierr; 1829 1830 PetscFunctionBegin; 1831 if (!a->matinuse) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1832 if (!a->cmat) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_PLIB,"Missing internal matrix"); 1833 if (*v != a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Not the matrix obtained from MatDenseGetSubMatrix()"); 1834 a->matinuse = 0; 1835 c = (Mat_MPIDense*)a->cmat->data; 1836 ierr = MatDenseRestoreSubMatrix(a->A,&c->A);CHKERRQ(ierr); 1837 *v = NULL; 1838 PetscFunctionReturn(0); 1839 } 1840 1841 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1842 { 1843 Mat_MPIDense *a; 1844 PetscErrorCode ierr; 1845 1846 PetscFunctionBegin; 1847 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1848 mat->data = (void*)a; 1849 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1850 1851 mat->insertmode = NOT_SET_VALUES; 1852 1853 /* build cache for off array entries formed */ 1854 a->donotstash = PETSC_FALSE; 1855 1856 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1857 1858 /* stuff used for matrix vector multiply */ 1859 a->lvec = 0; 1860 a->Mvctx = 0; 1861 a->roworiented = PETSC_TRUE; 1862 1863 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1864 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1865 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1866 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1867 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1868 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1869 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1870 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1871 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1872 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1873 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1874 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1875 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1876 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1877 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1878 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1879 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1880 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1881 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1882 #if defined(PETSC_HAVE_ELEMENTAL) 1883 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1884 #endif 1885 #if defined(PETSC_HAVE_CUDA) 1886 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1887 #endif 1888 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1889 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1890 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1891 #if defined(PETSC_HAVE_CUDA) 1892 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1893 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1894 #endif 1895 1896 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1897 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1898 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1899 PetscFunctionReturn(0); 1900 } 1901 1902 /*MC 1903 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1904 1905 Options Database Keys: 1906 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1907 1908 Level: beginner 1909 1910 .seealso: 1911 1912 M*/ 1913 #if defined(PETSC_HAVE_CUDA) 1914 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1915 { 1916 PetscErrorCode ierr; 1917 1918 PetscFunctionBegin; 1919 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1920 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1921 PetscFunctionReturn(0); 1922 } 1923 #endif 1924 1925 /*MC 1926 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1927 1928 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1929 and MATMPIDENSE otherwise. 1930 1931 Options Database Keys: 1932 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1933 1934 Level: beginner 1935 1936 1937 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1938 M*/ 1939 1940 /*MC 1941 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1942 1943 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1944 and MATMPIDENSECUDA otherwise. 1945 1946 Options Database Keys: 1947 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1948 1949 Level: beginner 1950 1951 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1952 M*/ 1953 1954 /*@C 1955 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1956 1957 Not collective 1958 1959 Input Parameters: 1960 . B - the matrix 1961 - data - optional location of matrix data. Set data=NULL for PETSc 1962 to control all matrix memory allocation. 1963 1964 Notes: 1965 The dense format is fully compatible with standard Fortran 77 1966 storage by columns. 1967 1968 The data input variable is intended primarily for Fortran programmers 1969 who wish to allocate their own matrix memory space. Most users should 1970 set data=NULL. 1971 1972 Level: intermediate 1973 1974 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1975 @*/ 1976 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1977 { 1978 PetscErrorCode ierr; 1979 1980 PetscFunctionBegin; 1981 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1982 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 /*@ 1987 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 1988 array provided by the user. This is useful to avoid copying an array 1989 into a matrix 1990 1991 Not Collective 1992 1993 Input Parameters: 1994 + mat - the matrix 1995 - array - the array in column major order 1996 1997 Notes: 1998 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 1999 freed when the matrix is destroyed. 2000 2001 Level: developer 2002 2003 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2004 2005 @*/ 2006 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2007 { 2008 PetscErrorCode ierr; 2009 2010 PetscFunctionBegin; 2011 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2012 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2013 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2014 #if defined(PETSC_HAVE_CUDA) 2015 mat->offloadmask = PETSC_OFFLOAD_CPU; 2016 #endif 2017 PetscFunctionReturn(0); 2018 } 2019 2020 /*@ 2021 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2022 2023 Not Collective 2024 2025 Input Parameters: 2026 . mat - the matrix 2027 2028 Notes: 2029 You can only call this after a call to MatDensePlaceArray() 2030 2031 Level: developer 2032 2033 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2034 2035 @*/ 2036 PetscErrorCode MatDenseResetArray(Mat mat) 2037 { 2038 PetscErrorCode ierr; 2039 2040 PetscFunctionBegin; 2041 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2042 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2043 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2044 PetscFunctionReturn(0); 2045 } 2046 2047 /*@ 2048 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2049 array provided by the user. This is useful to avoid copying an array 2050 into a matrix 2051 2052 Not Collective 2053 2054 Input Parameters: 2055 + mat - the matrix 2056 - array - the array in column major order 2057 2058 Notes: 2059 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2060 freed by the user. It will be freed when the matrix is destroyed. 2061 2062 Level: developer 2063 2064 .seealso: MatDenseGetArray(), VecReplaceArray() 2065 @*/ 2066 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2067 { 2068 PetscErrorCode ierr; 2069 2070 PetscFunctionBegin; 2071 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2072 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2073 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2074 #if defined(PETSC_HAVE_CUDA) 2075 mat->offloadmask = PETSC_OFFLOAD_CPU; 2076 #endif 2077 PetscFunctionReturn(0); 2078 } 2079 2080 #if defined(PETSC_HAVE_CUDA) 2081 /*@C 2082 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2083 array provided by the user. This is useful to avoid copying an array 2084 into a matrix 2085 2086 Not Collective 2087 2088 Input Parameters: 2089 + mat - the matrix 2090 - array - the array in column major order 2091 2092 Notes: 2093 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2094 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2095 2096 Level: developer 2097 2098 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2099 @*/ 2100 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2101 { 2102 PetscErrorCode ierr; 2103 2104 PetscFunctionBegin; 2105 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2106 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2107 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2108 mat->offloadmask = PETSC_OFFLOAD_GPU; 2109 PetscFunctionReturn(0); 2110 } 2111 2112 /*@C 2113 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2114 2115 Not Collective 2116 2117 Input Parameters: 2118 . mat - the matrix 2119 2120 Notes: 2121 You can only call this after a call to MatDenseCUDAPlaceArray() 2122 2123 Level: developer 2124 2125 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2126 2127 @*/ 2128 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2129 { 2130 PetscErrorCode ierr; 2131 2132 PetscFunctionBegin; 2133 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2134 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2135 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2136 PetscFunctionReturn(0); 2137 } 2138 2139 /*@C 2140 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2141 array provided by the user. This is useful to avoid copying an array 2142 into a matrix 2143 2144 Not Collective 2145 2146 Input Parameters: 2147 + mat - the matrix 2148 - array - the array in column major order 2149 2150 Notes: 2151 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2152 The memory passed in CANNOT be freed by the user. It will be freed 2153 when the matrix is destroyed. The array should respect the matrix leading dimension. 2154 2155 Level: developer 2156 2157 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2158 @*/ 2159 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2160 { 2161 PetscErrorCode ierr; 2162 2163 PetscFunctionBegin; 2164 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2165 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2166 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2167 mat->offloadmask = PETSC_OFFLOAD_GPU; 2168 PetscFunctionReturn(0); 2169 } 2170 2171 /*@C 2172 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2173 2174 Not Collective 2175 2176 Input Parameters: 2177 . A - the matrix 2178 2179 Output Parameters 2180 . array - the GPU array in column major order 2181 2182 Notes: 2183 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2184 2185 Level: developer 2186 2187 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2188 @*/ 2189 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2190 { 2191 PetscErrorCode ierr; 2192 2193 PetscFunctionBegin; 2194 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2195 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2196 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2197 PetscFunctionReturn(0); 2198 } 2199 2200 /*@C 2201 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2202 2203 Not Collective 2204 2205 Input Parameters: 2206 + A - the matrix 2207 - array - the GPU array in column major order 2208 2209 Notes: 2210 2211 Level: developer 2212 2213 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2214 @*/ 2215 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2216 { 2217 PetscErrorCode ierr; 2218 2219 PetscFunctionBegin; 2220 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2221 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2222 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2223 A->offloadmask = PETSC_OFFLOAD_GPU; 2224 PetscFunctionReturn(0); 2225 } 2226 2227 /*@C 2228 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2229 2230 Not Collective 2231 2232 Input Parameters: 2233 . A - the matrix 2234 2235 Output Parameters 2236 . array - the GPU array in column major order 2237 2238 Notes: 2239 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2240 2241 Level: developer 2242 2243 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2244 @*/ 2245 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2246 { 2247 PetscErrorCode ierr; 2248 2249 PetscFunctionBegin; 2250 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2251 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2252 PetscFunctionReturn(0); 2253 } 2254 2255 /*@C 2256 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2257 2258 Not Collective 2259 2260 Input Parameters: 2261 + A - the matrix 2262 - array - the GPU array in column major order 2263 2264 Notes: 2265 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2266 2267 Level: developer 2268 2269 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2270 @*/ 2271 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2272 { 2273 PetscErrorCode ierr; 2274 2275 PetscFunctionBegin; 2276 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2277 PetscFunctionReturn(0); 2278 } 2279 2280 /*@C 2281 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2282 2283 Not Collective 2284 2285 Input Parameters: 2286 . A - the matrix 2287 2288 Output Parameters 2289 . array - the GPU array in column major order 2290 2291 Notes: 2292 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2293 2294 Level: developer 2295 2296 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2297 @*/ 2298 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2299 { 2300 PetscErrorCode ierr; 2301 2302 PetscFunctionBegin; 2303 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2304 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2305 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 2309 /*@C 2310 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2311 2312 Not Collective 2313 2314 Input Parameters: 2315 + A - the matrix 2316 - array - the GPU array in column major order 2317 2318 Notes: 2319 2320 Level: developer 2321 2322 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2323 @*/ 2324 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2325 { 2326 PetscErrorCode ierr; 2327 2328 PetscFunctionBegin; 2329 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2330 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2331 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2332 A->offloadmask = PETSC_OFFLOAD_GPU; 2333 PetscFunctionReturn(0); 2334 } 2335 #endif 2336 2337 /*@C 2338 MatCreateDense - Creates a matrix in dense format. 2339 2340 Collective 2341 2342 Input Parameters: 2343 + comm - MPI communicator 2344 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2345 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2346 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2347 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2348 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2349 to control all matrix memory allocation. 2350 2351 Output Parameter: 2352 . A - the matrix 2353 2354 Notes: 2355 The dense format is fully compatible with standard Fortran 77 2356 storage by columns. 2357 2358 The data input variable is intended primarily for Fortran programmers 2359 who wish to allocate their own matrix memory space. Most users should 2360 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2361 2362 The user MUST specify either the local or global matrix dimensions 2363 (possibly both). 2364 2365 Level: intermediate 2366 2367 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2368 @*/ 2369 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2370 { 2371 PetscErrorCode ierr; 2372 PetscMPIInt size; 2373 2374 PetscFunctionBegin; 2375 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2376 PetscValidLogicalCollectiveBool(*A,!!data,6); 2377 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2378 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2379 if (size > 1) { 2380 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2381 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2382 if (data) { /* user provided data array, so no need to assemble */ 2383 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2384 (*A)->assembled = PETSC_TRUE; 2385 } 2386 } else { 2387 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2388 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2389 } 2390 PetscFunctionReturn(0); 2391 } 2392 2393 #if defined(PETSC_HAVE_CUDA) 2394 /*@C 2395 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2396 2397 Collective 2398 2399 Input Parameters: 2400 + comm - MPI communicator 2401 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2402 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2403 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2404 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2405 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2406 to control matrix memory allocation. 2407 2408 Output Parameter: 2409 . A - the matrix 2410 2411 Notes: 2412 2413 Level: intermediate 2414 2415 .seealso: MatCreate(), MatCreateDense() 2416 @*/ 2417 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2418 { 2419 PetscErrorCode ierr; 2420 PetscMPIInt size; 2421 2422 PetscFunctionBegin; 2423 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2424 PetscValidLogicalCollectiveBool(*A,!!data,6); 2425 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2426 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2427 if (size > 1) { 2428 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2429 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2430 if (data) { /* user provided data array, so no need to assemble */ 2431 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2432 (*A)->assembled = PETSC_TRUE; 2433 } 2434 } else { 2435 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2436 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2437 } 2438 PetscFunctionReturn(0); 2439 } 2440 #endif 2441 2442 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2443 { 2444 Mat mat; 2445 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2446 PetscErrorCode ierr; 2447 2448 PetscFunctionBegin; 2449 *newmat = 0; 2450 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2451 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2452 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2453 a = (Mat_MPIDense*)mat->data; 2454 2455 mat->factortype = A->factortype; 2456 mat->assembled = PETSC_TRUE; 2457 mat->preallocated = PETSC_TRUE; 2458 2459 mat->insertmode = NOT_SET_VALUES; 2460 a->donotstash = oldmat->donotstash; 2461 2462 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2463 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2464 2465 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2466 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2467 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2468 2469 *newmat = mat; 2470 PetscFunctionReturn(0); 2471 } 2472 2473 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2474 { 2475 PetscErrorCode ierr; 2476 PetscBool isbinary; 2477 #if defined(PETSC_HAVE_HDF5) 2478 PetscBool ishdf5; 2479 #endif 2480 2481 PetscFunctionBegin; 2482 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2483 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2484 /* force binary viewer to load .info file if it has not yet done so */ 2485 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2486 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2487 #if defined(PETSC_HAVE_HDF5) 2488 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2489 #endif 2490 if (isbinary) { 2491 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2492 #if defined(PETSC_HAVE_HDF5) 2493 } else if (ishdf5) { 2494 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2495 #endif 2496 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2497 PetscFunctionReturn(0); 2498 } 2499 2500 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2501 { 2502 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2503 Mat a,b; 2504 PetscBool flg; 2505 PetscErrorCode ierr; 2506 2507 PetscFunctionBegin; 2508 a = matA->A; 2509 b = matB->A; 2510 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2511 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2512 PetscFunctionReturn(0); 2513 } 2514 2515 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2516 { 2517 PetscErrorCode ierr; 2518 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2519 2520 PetscFunctionBegin; 2521 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2522 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2523 ierr = PetscFree(atb);CHKERRQ(ierr); 2524 PetscFunctionReturn(0); 2525 } 2526 2527 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2528 { 2529 PetscErrorCode ierr; 2530 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2531 2532 PetscFunctionBegin; 2533 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2534 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2535 ierr = PetscFree(abt);CHKERRQ(ierr); 2536 PetscFunctionReturn(0); 2537 } 2538 2539 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2540 { 2541 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2542 Mat_TransMatMultDense *atb; 2543 PetscErrorCode ierr; 2544 MPI_Comm comm; 2545 PetscMPIInt size,*recvcounts; 2546 PetscScalar *carray,*sendbuf; 2547 const PetscScalar *atbarray; 2548 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2549 const PetscInt *ranges; 2550 2551 PetscFunctionBegin; 2552 MatCheckProduct(C,3); 2553 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2554 atb = (Mat_TransMatMultDense *)C->product->data; 2555 recvcounts = atb->recvcounts; 2556 sendbuf = atb->sendbuf; 2557 2558 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2559 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2560 2561 /* compute atbarray = aseq^T * bseq */ 2562 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2563 2564 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2565 2566 /* arrange atbarray into sendbuf */ 2567 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2568 for (proc=0, k=0; proc<size; proc++) { 2569 for (j=0; j<cN; j++) { 2570 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2571 } 2572 } 2573 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2574 2575 /* sum all atbarray to local values of C */ 2576 ierr = MatDenseGetArrayWrite(c->A,&carray);CHKERRQ(ierr); 2577 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2578 ierr = MatDenseRestoreArrayWrite(c->A,&carray);CHKERRQ(ierr); 2579 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2580 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2581 PetscFunctionReturn(0); 2582 } 2583 2584 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2585 { 2586 PetscErrorCode ierr; 2587 MPI_Comm comm; 2588 PetscMPIInt size; 2589 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2590 Mat_TransMatMultDense *atb; 2591 PetscBool cisdense; 2592 PetscInt i; 2593 const PetscInt *ranges; 2594 2595 PetscFunctionBegin; 2596 MatCheckProduct(C,3); 2597 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2598 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2599 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2600 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2601 } 2602 2603 /* create matrix product C */ 2604 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2605 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2606 if (!cisdense) { 2607 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2608 } 2609 ierr = MatSetUp(C);CHKERRQ(ierr); 2610 2611 /* create data structure for reuse C */ 2612 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2613 ierr = PetscNew(&atb);CHKERRQ(ierr); 2614 cM = C->rmap->N; 2615 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2616 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2617 for (i=0; i<size; i++) atb->recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2618 2619 C->product->data = atb; 2620 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2621 PetscFunctionReturn(0); 2622 } 2623 2624 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2625 { 2626 PetscErrorCode ierr; 2627 MPI_Comm comm; 2628 PetscMPIInt i, size; 2629 PetscInt maxRows, bufsiz; 2630 PetscMPIInt tag; 2631 PetscInt alg; 2632 Mat_MatTransMultDense *abt; 2633 Mat_Product *product = C->product; 2634 PetscBool flg; 2635 2636 PetscFunctionBegin; 2637 MatCheckProduct(C,4); 2638 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2639 /* check local size of A and B */ 2640 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2641 2642 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2643 alg = flg ? 0 : 1; 2644 2645 /* setup matrix product C */ 2646 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2647 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2648 ierr = MatSetUp(C);CHKERRQ(ierr); 2649 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2650 2651 /* create data structure for reuse C */ 2652 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2653 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2654 ierr = PetscNew(&abt);CHKERRQ(ierr); 2655 abt->tag = tag; 2656 abt->alg = alg; 2657 switch (alg) { 2658 case 1: /* alg: "cyclic" */ 2659 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2660 bufsiz = A->cmap->N * maxRows; 2661 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2662 break; 2663 default: /* alg: "allgatherv" */ 2664 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2665 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2666 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2667 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2668 break; 2669 } 2670 2671 C->product->data = abt; 2672 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2673 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2674 PetscFunctionReturn(0); 2675 } 2676 2677 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2678 { 2679 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2680 Mat_MatTransMultDense *abt; 2681 PetscErrorCode ierr; 2682 MPI_Comm comm; 2683 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2684 PetscScalar *sendbuf, *recvbuf=0, *cv; 2685 PetscInt i,cK=A->cmap->N,k,j,bn; 2686 PetscScalar _DOne=1.0,_DZero=0.0; 2687 const PetscScalar *av,*bv; 2688 PetscBLASInt cm, cn, ck, alda, blda, clda; 2689 MPI_Request reqs[2]; 2690 const PetscInt *ranges; 2691 2692 PetscFunctionBegin; 2693 MatCheckProduct(C,3); 2694 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2695 abt = (Mat_MatTransMultDense*)C->product->data; 2696 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2697 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2698 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2699 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2700 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2701 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2702 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2703 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2704 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2705 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2706 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2707 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2708 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2709 bn = B->rmap->n; 2710 if (blda == bn) { 2711 sendbuf = (PetscScalar*)bv; 2712 } else { 2713 sendbuf = abt->buf[0]; 2714 for (k = 0, i = 0; i < cK; i++) { 2715 for (j = 0; j < bn; j++, k++) { 2716 sendbuf[k] = bv[i * blda + j]; 2717 } 2718 } 2719 } 2720 if (size > 1) { 2721 sendto = (rank + size - 1) % size; 2722 recvfrom = (rank + size + 1) % size; 2723 } else { 2724 sendto = recvfrom = 0; 2725 } 2726 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2727 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2728 recvisfrom = rank; 2729 for (i = 0; i < size; i++) { 2730 /* we have finished receiving in sending, bufs can be read/modified */ 2731 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2732 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2733 2734 if (nextrecvisfrom != rank) { 2735 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2736 sendsiz = cK * bn; 2737 recvsiz = cK * nextbn; 2738 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2739 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2740 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2741 } 2742 2743 /* local aseq * sendbuf^T */ 2744 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2745 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2746 2747 if (nextrecvisfrom != rank) { 2748 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2749 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2750 } 2751 bn = nextbn; 2752 recvisfrom = nextrecvisfrom; 2753 sendbuf = recvbuf; 2754 } 2755 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2756 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2757 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2758 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760 PetscFunctionReturn(0); 2761 } 2762 2763 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2764 { 2765 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2766 Mat_MatTransMultDense *abt; 2767 PetscErrorCode ierr; 2768 MPI_Comm comm; 2769 PetscMPIInt size; 2770 PetscScalar *cv, *sendbuf, *recvbuf; 2771 const PetscScalar *av,*bv; 2772 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2773 PetscScalar _DOne=1.0,_DZero=0.0; 2774 PetscBLASInt cm, cn, ck, alda, clda; 2775 2776 PetscFunctionBegin; 2777 MatCheckProduct(C,3); 2778 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2779 abt = (Mat_MatTransMultDense*)C->product->data; 2780 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2781 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2782 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2783 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2784 ierr = MatDenseGetArrayWrite(c->A,&cv);CHKERRQ(ierr); 2785 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2786 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2787 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2788 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2789 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2790 /* copy transpose of B into buf[0] */ 2791 bn = B->rmap->n; 2792 sendbuf = abt->buf[0]; 2793 recvbuf = abt->buf[1]; 2794 for (k = 0, j = 0; j < bn; j++) { 2795 for (i = 0; i < cK; i++, k++) { 2796 sendbuf[k] = bv[i * blda + j]; 2797 } 2798 } 2799 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2800 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2801 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2802 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2803 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2804 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2805 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2806 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2807 ierr = MatDenseRestoreArrayWrite(c->A,&cv);CHKERRQ(ierr); 2808 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2809 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2810 PetscFunctionReturn(0); 2811 } 2812 2813 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2814 { 2815 Mat_MatTransMultDense *abt; 2816 PetscErrorCode ierr; 2817 2818 PetscFunctionBegin; 2819 MatCheckProduct(C,3); 2820 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2821 abt = (Mat_MatTransMultDense*)C->product->data; 2822 switch (abt->alg) { 2823 case 1: 2824 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2825 break; 2826 default: 2827 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2828 break; 2829 } 2830 PetscFunctionReturn(0); 2831 } 2832 2833 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2834 { 2835 PetscErrorCode ierr; 2836 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2837 2838 PetscFunctionBegin; 2839 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2840 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2841 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2842 ierr = PetscFree(ab);CHKERRQ(ierr); 2843 PetscFunctionReturn(0); 2844 } 2845 2846 #if defined(PETSC_HAVE_ELEMENTAL) 2847 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2848 { 2849 PetscErrorCode ierr; 2850 Mat_MatMultDense *ab; 2851 2852 PetscFunctionBegin; 2853 MatCheckProduct(C,3); 2854 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2855 ab = (Mat_MatMultDense*)C->product->data; 2856 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2857 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2858 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2859 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2860 PetscFunctionReturn(0); 2861 } 2862 2863 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2864 { 2865 PetscErrorCode ierr; 2866 Mat Ae,Be,Ce; 2867 Mat_MatMultDense *ab; 2868 2869 PetscFunctionBegin; 2870 MatCheckProduct(C,4); 2871 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2872 /* check local size of A and B */ 2873 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2874 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2875 } 2876 2877 /* create elemental matrices Ae and Be */ 2878 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2879 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2880 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2881 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2882 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2883 2884 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2885 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2886 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2887 ierr = MatSetUp(Be);CHKERRQ(ierr); 2888 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2889 2890 /* compute symbolic Ce = Ae*Be */ 2891 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2892 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2893 2894 /* setup C */ 2895 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2896 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2897 ierr = MatSetUp(C);CHKERRQ(ierr); 2898 2899 /* create data structure for reuse Cdense */ 2900 ierr = PetscNew(&ab);CHKERRQ(ierr); 2901 ab->Ae = Ae; 2902 ab->Be = Be; 2903 ab->Ce = Ce; 2904 2905 C->product->data = ab; 2906 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2907 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2908 PetscFunctionReturn(0); 2909 } 2910 #endif 2911 /* ----------------------------------------------- */ 2912 #if defined(PETSC_HAVE_ELEMENTAL) 2913 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2914 { 2915 PetscFunctionBegin; 2916 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2917 C->ops->productsymbolic = MatProductSymbolic_AB; 2918 PetscFunctionReturn(0); 2919 } 2920 #endif 2921 2922 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2923 { 2924 Mat_Product *product = C->product; 2925 Mat A = product->A,B=product->B; 2926 2927 PetscFunctionBegin; 2928 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2929 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2930 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2931 C->ops->productsymbolic = MatProductSymbolic_AtB; 2932 PetscFunctionReturn(0); 2933 } 2934 2935 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2936 { 2937 PetscErrorCode ierr; 2938 Mat_Product *product = C->product; 2939 const char *algTypes[2] = {"allgatherv","cyclic"}; 2940 PetscInt alg,nalg = 2; 2941 PetscBool flg = PETSC_FALSE; 2942 2943 PetscFunctionBegin; 2944 /* Set default algorithm */ 2945 alg = 0; /* default is allgatherv */ 2946 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2947 if (flg) { 2948 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2949 } 2950 2951 /* Get runtime option */ 2952 if (product->api_user) { 2953 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2954 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2955 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2956 } else { 2957 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2958 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2959 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2960 } 2961 if (flg) { 2962 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2963 } 2964 2965 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2966 C->ops->productsymbolic = MatProductSymbolic_ABt; 2967 PetscFunctionReturn(0); 2968 } 2969 2970 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2971 { 2972 PetscErrorCode ierr; 2973 Mat_Product *product = C->product; 2974 2975 PetscFunctionBegin; 2976 switch (product->type) { 2977 #if defined(PETSC_HAVE_ELEMENTAL) 2978 case MATPRODUCT_AB: 2979 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2980 break; 2981 #endif 2982 case MATPRODUCT_AtB: 2983 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2984 break; 2985 case MATPRODUCT_ABt: 2986 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2987 break; 2988 default: 2989 break; 2990 } 2991 PetscFunctionReturn(0); 2992 } 2993