1 2 /* 3 Basic functions for basic parallel dense matrices. 4 */ 5 6 #include <../src/mat/impls/dense/mpi/mpidense.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8 #include <petscblaslapack.h> 9 10 /*@ 11 12 MatDenseGetLocalMatrix - For a MATMPIDENSE or MATSEQDENSE matrix returns the sequential 13 matrix that represents the operator. For sequential matrices it returns itself. 14 15 Input Parameter: 16 . A - the Seq or MPI dense matrix 17 18 Output Parameter: 19 . B - the inner matrix 20 21 Level: intermediate 22 23 @*/ 24 PetscErrorCode MatDenseGetLocalMatrix(Mat A,Mat *B) 25 { 26 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 27 PetscErrorCode ierr; 28 PetscBool flg; 29 30 PetscFunctionBegin; 31 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 32 PetscValidPointer(B,2); 33 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSE,&flg);CHKERRQ(ierr); 34 if (flg) *B = mat->A; 35 else *B = A; 36 PetscFunctionReturn(0); 37 } 38 39 PetscErrorCode MatGetRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 40 { 41 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 42 PetscErrorCode ierr; 43 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 44 45 PetscFunctionBegin; 46 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 47 lrow = row - rstart; 48 ierr = MatGetRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 49 PetscFunctionReturn(0); 50 } 51 52 PetscErrorCode MatRestoreRow_MPIDense(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v) 53 { 54 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 55 PetscErrorCode ierr; 56 PetscInt lrow,rstart = A->rmap->rstart,rend = A->rmap->rend; 57 58 PetscFunctionBegin; 59 if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"only local rows"); 60 lrow = row - rstart; 61 ierr = MatRestoreRow(mat->A,lrow,nz,(const PetscInt**)idx,(const PetscScalar**)v);CHKERRQ(ierr); 62 PetscFunctionReturn(0); 63 } 64 65 PetscErrorCode MatGetDiagonalBlock_MPIDense(Mat A,Mat *a) 66 { 67 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 68 PetscErrorCode ierr; 69 PetscInt m = A->rmap->n,rstart = A->rmap->rstart; 70 PetscScalar *array; 71 MPI_Comm comm; 72 PetscBool flg; 73 Mat B; 74 75 PetscFunctionBegin; 76 ierr = MatHasCongruentLayouts(A,&flg);CHKERRQ(ierr); 77 if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only square matrices supported."); 78 ierr = PetscObjectQuery((PetscObject)A,"DiagonalBlock",(PetscObject*)&B);CHKERRQ(ierr); 79 if (!B) { /* This should use a new function, e.g. MatDenseGetSubMatrix (not create) */ 80 81 ierr = PetscObjectTypeCompare((PetscObject)mdn->A,MATSEQDENSECUDA,&flg);CHKERRQ(ierr); 82 if (flg) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not coded for %s. Send an email to petsc-dev@mcs.anl.gov to request this feature",MATSEQDENSECUDA); 83 ierr = PetscObjectGetComm((PetscObject)(mdn->A),&comm);CHKERRQ(ierr); 84 ierr = MatCreate(comm,&B);CHKERRQ(ierr); 85 ierr = MatSetSizes(B,m,m,m,m);CHKERRQ(ierr); 86 ierr = MatSetType(B,((PetscObject)mdn->A)->type_name);CHKERRQ(ierr); 87 ierr = MatDenseGetArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 88 ierr = MatSeqDenseSetPreallocation(B,array+m*rstart);CHKERRQ(ierr); 89 ierr = MatDenseRestoreArrayRead(mdn->A,(const PetscScalar**)&array);CHKERRQ(ierr); 90 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 91 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 92 ierr = PetscObjectCompose((PetscObject)A,"DiagonalBlock",(PetscObject)B);CHKERRQ(ierr); 93 *a = B; 94 ierr = MatDestroy(&B);CHKERRQ(ierr); 95 } else *a = B; 96 PetscFunctionReturn(0); 97 } 98 99 PetscErrorCode MatSetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],const PetscScalar v[],InsertMode addv) 100 { 101 Mat_MPIDense *A = (Mat_MPIDense*)mat->data; 102 PetscErrorCode ierr; 103 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 104 PetscBool roworiented = A->roworiented; 105 106 PetscFunctionBegin; 107 for (i=0; i<m; i++) { 108 if (idxm[i] < 0) continue; 109 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 110 if (idxm[i] >= rstart && idxm[i] < rend) { 111 row = idxm[i] - rstart; 112 if (roworiented) { 113 ierr = MatSetValues(A->A,1,&row,n,idxn,v+i*n,addv);CHKERRQ(ierr); 114 } else { 115 for (j=0; j<n; j++) { 116 if (idxn[j] < 0) continue; 117 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 118 ierr = MatSetValues(A->A,1,&row,1,&idxn[j],v+i+j*m,addv);CHKERRQ(ierr); 119 } 120 } 121 } else if (!A->donotstash) { 122 mat->assembled = PETSC_FALSE; 123 if (roworiented) { 124 ierr = MatStashValuesRow_Private(&mat->stash,idxm[i],n,idxn,v+i*n,PETSC_FALSE);CHKERRQ(ierr); 125 } else { 126 ierr = MatStashValuesCol_Private(&mat->stash,idxm[i],n,idxn,v+i,m,PETSC_FALSE);CHKERRQ(ierr); 127 } 128 } 129 } 130 PetscFunctionReturn(0); 131 } 132 133 PetscErrorCode MatGetValues_MPIDense(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[]) 134 { 135 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 136 PetscErrorCode ierr; 137 PetscInt i,j,rstart = mat->rmap->rstart,rend = mat->rmap->rend,row; 138 139 PetscFunctionBegin; 140 for (i=0; i<m; i++) { 141 if (idxm[i] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */ 142 if (idxm[i] >= mat->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large"); 143 if (idxm[i] >= rstart && idxm[i] < rend) { 144 row = idxm[i] - rstart; 145 for (j=0; j<n; j++) { 146 if (idxn[j] < 0) continue; /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */ 147 if (idxn[j] >= mat->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large"); 148 ierr = MatGetValues(mdn->A,1,&row,1,&idxn[j],v+i*n+j);CHKERRQ(ierr); 149 } 150 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported"); 151 } 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode MatDenseGetLDA_MPIDense(Mat A,PetscInt *lda) 156 { 157 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 158 PetscErrorCode ierr; 159 160 PetscFunctionBegin; 161 ierr = MatDenseGetLDA(a->A,lda);CHKERRQ(ierr); 162 PetscFunctionReturn(0); 163 } 164 165 static PetscErrorCode MatDenseSetLDA_MPIDense(Mat A,PetscInt lda) 166 { 167 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 168 PetscErrorCode ierr; 169 170 PetscFunctionBegin; 171 ierr = MatDenseSetLDA(a->A,lda);CHKERRQ(ierr); 172 PetscFunctionReturn(0); 173 } 174 175 static PetscErrorCode MatDenseGetArray_MPIDense(Mat A,PetscScalar **array) 176 { 177 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 178 PetscErrorCode ierr; 179 180 PetscFunctionBegin; 181 ierr = MatDenseGetArray(a->A,array);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 static PetscErrorCode MatDenseGetArrayRead_MPIDense(Mat A,const PetscScalar **array) 186 { 187 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 188 PetscErrorCode ierr; 189 190 PetscFunctionBegin; 191 ierr = MatDenseGetArrayRead(a->A,array);CHKERRQ(ierr); 192 PetscFunctionReturn(0); 193 } 194 195 static PetscErrorCode MatDenseGetArrayWrite_MPIDense(Mat A,PetscScalar **array) 196 { 197 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 198 PetscErrorCode ierr; 199 200 PetscFunctionBegin; 201 ierr = MatDenseGetArrayWrite(a->A,array);CHKERRQ(ierr); 202 PetscFunctionReturn(0); 203 } 204 205 static PetscErrorCode MatDensePlaceArray_MPIDense(Mat A,const PetscScalar *array) 206 { 207 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 208 PetscErrorCode ierr; 209 210 PetscFunctionBegin; 211 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 212 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 213 ierr = MatDensePlaceArray(a->A,array);CHKERRQ(ierr); 214 PetscFunctionReturn(0); 215 } 216 217 static PetscErrorCode MatDenseResetArray_MPIDense(Mat A) 218 { 219 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 220 PetscErrorCode ierr; 221 222 PetscFunctionBegin; 223 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 224 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 225 ierr = MatDenseResetArray(a->A);CHKERRQ(ierr); 226 PetscFunctionReturn(0); 227 } 228 229 static PetscErrorCode MatDenseReplaceArray_MPIDense(Mat A,const PetscScalar *array) 230 { 231 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 232 PetscErrorCode ierr; 233 234 PetscFunctionBegin; 235 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 236 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 237 ierr = MatDenseReplaceArray(a->A,array);CHKERRQ(ierr); 238 PetscFunctionReturn(0); 239 } 240 241 static PetscErrorCode MatCreateSubMatrix_MPIDense(Mat A,IS isrow,IS iscol,MatReuse scall,Mat *B) 242 { 243 Mat_MPIDense *mat = (Mat_MPIDense*)A->data,*newmatd; 244 PetscErrorCode ierr; 245 PetscInt lda,i,j,rstart,rend,nrows,ncols,Ncols,nlrows,nlcols; 246 const PetscInt *irow,*icol; 247 const PetscScalar *v; 248 PetscScalar *bv; 249 Mat newmat; 250 IS iscol_local; 251 MPI_Comm comm_is,comm_mat; 252 253 PetscFunctionBegin; 254 ierr = PetscObjectGetComm((PetscObject)A,&comm_mat);CHKERRQ(ierr); 255 ierr = PetscObjectGetComm((PetscObject)iscol,&comm_is);CHKERRQ(ierr); 256 if (comm_mat != comm_is) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NOTSAMECOMM,"IS communicator must match matrix communicator"); 257 258 ierr = ISAllGather(iscol,&iscol_local);CHKERRQ(ierr); 259 ierr = ISGetIndices(isrow,&irow);CHKERRQ(ierr); 260 ierr = ISGetIndices(iscol_local,&icol);CHKERRQ(ierr); 261 ierr = ISGetLocalSize(isrow,&nrows);CHKERRQ(ierr); 262 ierr = ISGetLocalSize(iscol,&ncols);CHKERRQ(ierr); 263 ierr = ISGetSize(iscol,&Ncols);CHKERRQ(ierr); /* global number of columns, size of iscol_local */ 264 265 /* No parallel redistribution currently supported! Should really check each index set 266 to comfirm that it is OK. ... Currently supports only submatrix same partitioning as 267 original matrix! */ 268 269 ierr = MatGetLocalSize(A,&nlrows,&nlcols);CHKERRQ(ierr); 270 ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr); 271 272 /* Check submatrix call */ 273 if (scall == MAT_REUSE_MATRIX) { 274 /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Reused submatrix wrong size"); */ 275 /* Really need to test rows and column sizes! */ 276 newmat = *B; 277 } else { 278 /* Create and fill new matrix */ 279 ierr = MatCreate(PetscObjectComm((PetscObject)A),&newmat);CHKERRQ(ierr); 280 ierr = MatSetSizes(newmat,nrows,ncols,PETSC_DECIDE,Ncols);CHKERRQ(ierr); 281 ierr = MatSetType(newmat,((PetscObject)A)->type_name);CHKERRQ(ierr); 282 ierr = MatMPIDenseSetPreallocation(newmat,NULL);CHKERRQ(ierr); 283 } 284 285 /* Now extract the data pointers and do the copy, column at a time */ 286 newmatd = (Mat_MPIDense*)newmat->data; 287 ierr = MatDenseGetArray(newmatd->A,&bv);CHKERRQ(ierr); 288 ierr = MatDenseGetArrayRead(mat->A,&v);CHKERRQ(ierr); 289 ierr = MatDenseGetLDA(mat->A,&lda);CHKERRQ(ierr); 290 for (i=0; i<Ncols; i++) { 291 const PetscScalar *av = v + lda*icol[i]; 292 for (j=0; j<nrows; j++) { 293 *bv++ = av[irow[j] - rstart]; 294 } 295 } 296 ierr = MatDenseRestoreArrayRead(mat->A,&v);CHKERRQ(ierr); 297 ierr = MatDenseRestoreArray(newmatd->A,&bv);CHKERRQ(ierr); 298 299 /* Assemble the matrices so that the correct flags are set */ 300 ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 301 ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 302 303 /* Free work space */ 304 ierr = ISRestoreIndices(isrow,&irow);CHKERRQ(ierr); 305 ierr = ISRestoreIndices(iscol_local,&icol);CHKERRQ(ierr); 306 ierr = ISDestroy(&iscol_local);CHKERRQ(ierr); 307 *B = newmat; 308 PetscFunctionReturn(0); 309 } 310 311 PetscErrorCode MatDenseRestoreArray_MPIDense(Mat A,PetscScalar **array) 312 { 313 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 314 PetscErrorCode ierr; 315 316 PetscFunctionBegin; 317 ierr = MatDenseRestoreArray(a->A,array);CHKERRQ(ierr); 318 PetscFunctionReturn(0); 319 } 320 321 PetscErrorCode MatDenseRestoreArrayRead_MPIDense(Mat A,const PetscScalar **array) 322 { 323 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 324 PetscErrorCode ierr; 325 326 PetscFunctionBegin; 327 ierr = MatDenseRestoreArrayRead(a->A,array);CHKERRQ(ierr); 328 PetscFunctionReturn(0); 329 } 330 331 PetscErrorCode MatDenseRestoreArrayWrite_MPIDense(Mat A,PetscScalar **array) 332 { 333 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 334 PetscErrorCode ierr; 335 336 PetscFunctionBegin; 337 ierr = MatDenseRestoreArrayWrite(a->A,array);CHKERRQ(ierr); 338 PetscFunctionReturn(0); 339 } 340 341 PetscErrorCode MatAssemblyBegin_MPIDense(Mat mat,MatAssemblyType mode) 342 { 343 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 344 PetscErrorCode ierr; 345 PetscInt nstash,reallocs; 346 347 PetscFunctionBegin; 348 if (mdn->donotstash || mat->nooffprocentries) return(0); 349 350 ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr); 351 ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr); 352 ierr = PetscInfo2(mdn->A,"Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr); 353 PetscFunctionReturn(0); 354 } 355 356 PetscErrorCode MatAssemblyEnd_MPIDense(Mat mat,MatAssemblyType mode) 357 { 358 Mat_MPIDense *mdn=(Mat_MPIDense*)mat->data; 359 PetscErrorCode ierr; 360 PetscInt i,*row,*col,flg,j,rstart,ncols; 361 PetscMPIInt n; 362 PetscScalar *val; 363 364 PetscFunctionBegin; 365 if (!mdn->donotstash && !mat->nooffprocentries) { 366 /* wait on receives */ 367 while (1) { 368 ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr); 369 if (!flg) break; 370 371 for (i=0; i<n;) { 372 /* Now identify the consecutive vals belonging to the same row */ 373 for (j=i,rstart=row[j]; j<n; j++) { 374 if (row[j] != rstart) break; 375 } 376 if (j < n) ncols = j-i; 377 else ncols = n-i; 378 /* Now assemble all these values with a single function call */ 379 ierr = MatSetValues_MPIDense(mat,1,row+i,ncols,col+i,val+i,mat->insertmode);CHKERRQ(ierr); 380 i = j; 381 } 382 } 383 ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr); 384 } 385 386 ierr = MatAssemblyBegin(mdn->A,mode);CHKERRQ(ierr); 387 ierr = MatAssemblyEnd(mdn->A,mode);CHKERRQ(ierr); 388 389 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { 390 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 391 } 392 PetscFunctionReturn(0); 393 } 394 395 PetscErrorCode MatZeroEntries_MPIDense(Mat A) 396 { 397 PetscErrorCode ierr; 398 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 399 400 PetscFunctionBegin; 401 ierr = MatZeroEntries(l->A);CHKERRQ(ierr); 402 PetscFunctionReturn(0); 403 } 404 405 PetscErrorCode MatZeroRows_MPIDense(Mat A,PetscInt n,const PetscInt rows[],PetscScalar diag,Vec x,Vec b) 406 { 407 Mat_MPIDense *l = (Mat_MPIDense*)A->data; 408 PetscErrorCode ierr; 409 PetscInt i,len,*lrows; 410 411 PetscFunctionBegin; 412 /* get locally owned rows */ 413 ierr = PetscLayoutMapLocal(A->rmap,n,rows,&len,&lrows,NULL);CHKERRQ(ierr); 414 /* fix right hand side if needed */ 415 if (x && b) { 416 const PetscScalar *xx; 417 PetscScalar *bb; 418 419 ierr = VecGetArrayRead(x, &xx);CHKERRQ(ierr); 420 ierr = VecGetArrayWrite(b, &bb);CHKERRQ(ierr); 421 for (i=0;i<len;++i) bb[lrows[i]] = diag*xx[lrows[i]]; 422 ierr = VecRestoreArrayRead(x, &xx);CHKERRQ(ierr); 423 ierr = VecRestoreArrayWrite(b, &bb);CHKERRQ(ierr); 424 } 425 ierr = MatZeroRows(l->A,len,lrows,0.0,NULL,NULL);CHKERRQ(ierr); 426 if (diag != 0.0) { 427 Vec d; 428 429 ierr = MatCreateVecs(A,NULL,&d);CHKERRQ(ierr); 430 ierr = VecSet(d,diag);CHKERRQ(ierr); 431 ierr = MatDiagonalSet(A,d,INSERT_VALUES);CHKERRQ(ierr); 432 ierr = VecDestroy(&d);CHKERRQ(ierr); 433 } 434 ierr = PetscFree(lrows);CHKERRQ(ierr); 435 PetscFunctionReturn(0); 436 } 437 438 PETSC_INTERN PetscErrorCode MatMult_SeqDense(Mat,Vec,Vec); 439 PETSC_INTERN PetscErrorCode MatMultAdd_SeqDense(Mat,Vec,Vec,Vec); 440 PETSC_INTERN PetscErrorCode MatMultTranspose_SeqDense(Mat,Vec,Vec); 441 PETSC_INTERN PetscErrorCode MatMultTransposeAdd_SeqDense(Mat,Vec,Vec,Vec); 442 443 PetscErrorCode MatMult_MPIDense(Mat mat,Vec xx,Vec yy) 444 { 445 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 446 PetscErrorCode ierr; 447 const PetscScalar *ax; 448 PetscScalar *ay; 449 450 PetscFunctionBegin; 451 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 452 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 453 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 454 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 455 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 456 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 457 ierr = (*mdn->A->ops->mult)(mdn->A,mdn->lvec,yy);CHKERRQ(ierr); 458 PetscFunctionReturn(0); 459 } 460 461 PetscErrorCode MatMultAdd_MPIDense(Mat mat,Vec xx,Vec yy,Vec zz) 462 { 463 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 464 PetscErrorCode ierr; 465 const PetscScalar *ax; 466 PetscScalar *ay; 467 468 PetscFunctionBegin; 469 ierr = VecGetArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 470 ierr = VecGetArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 471 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 472 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ax,ay);CHKERRQ(ierr); 473 ierr = VecRestoreArrayInPlace(mdn->lvec,&ay);CHKERRQ(ierr); 474 ierr = VecRestoreArrayReadInPlace(xx,&ax);CHKERRQ(ierr); 475 ierr = (*mdn->A->ops->multadd)(mdn->A,mdn->lvec,yy,zz);CHKERRQ(ierr); 476 PetscFunctionReturn(0); 477 } 478 479 PetscErrorCode MatMultTranspose_MPIDense(Mat A,Vec xx,Vec yy) 480 { 481 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 482 PetscErrorCode ierr; 483 const PetscScalar *ax; 484 PetscScalar *ay; 485 486 PetscFunctionBegin; 487 ierr = VecSet(yy,0.0);CHKERRQ(ierr); 488 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 489 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 490 ierr = VecGetArrayInPlace(yy,&ay);CHKERRQ(ierr); 491 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 492 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 493 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 494 ierr = VecRestoreArrayInPlace(yy,&ay);CHKERRQ(ierr); 495 PetscFunctionReturn(0); 496 } 497 498 PetscErrorCode MatMultTransposeAdd_MPIDense(Mat A,Vec xx,Vec yy,Vec zz) 499 { 500 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 501 PetscErrorCode ierr; 502 const PetscScalar *ax; 503 PetscScalar *ay; 504 505 PetscFunctionBegin; 506 ierr = VecCopy(yy,zz);CHKERRQ(ierr); 507 ierr = (*a->A->ops->multtranspose)(a->A,xx,a->lvec);CHKERRQ(ierr); 508 ierr = VecGetArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 509 ierr = VecGetArrayInPlace(zz,&ay);CHKERRQ(ierr); 510 ierr = PetscSFReduceBegin(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 511 ierr = PetscSFReduceEnd(a->Mvctx,MPIU_SCALAR,ax,ay,MPIU_SUM);CHKERRQ(ierr); 512 ierr = VecRestoreArrayReadInPlace(a->lvec,&ax);CHKERRQ(ierr); 513 ierr = VecRestoreArrayInPlace(zz,&ay);CHKERRQ(ierr); 514 PetscFunctionReturn(0); 515 } 516 517 PetscErrorCode MatGetDiagonal_MPIDense(Mat A,Vec v) 518 { 519 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 520 PetscErrorCode ierr; 521 PetscInt lda,len,i,n,m = A->rmap->n,radd; 522 PetscScalar *x,zero = 0.0; 523 const PetscScalar *av; 524 525 PetscFunctionBegin; 526 ierr = VecSet(v,zero);CHKERRQ(ierr); 527 ierr = VecGetArray(v,&x);CHKERRQ(ierr); 528 ierr = VecGetSize(v,&n);CHKERRQ(ierr); 529 if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming mat and vec"); 530 len = PetscMin(a->A->rmap->n,a->A->cmap->n); 531 radd = A->rmap->rstart*m; 532 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 533 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 534 for (i=0; i<len; i++) { 535 x[i] = av[radd + i*lda + i]; 536 } 537 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 538 ierr = VecRestoreArray(v,&x);CHKERRQ(ierr); 539 PetscFunctionReturn(0); 540 } 541 542 PetscErrorCode MatDestroy_MPIDense(Mat mat) 543 { 544 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 545 PetscErrorCode ierr; 546 547 PetscFunctionBegin; 548 #if defined(PETSC_USE_LOG) 549 PetscLogObjectState((PetscObject)mat,"Rows=%D, Cols=%D",mat->rmap->N,mat->cmap->N); 550 #endif 551 ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr); 552 if (mdn->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 553 if (mdn->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 554 ierr = MatDestroy(&mdn->A);CHKERRQ(ierr); 555 ierr = VecDestroy(&mdn->lvec);CHKERRQ(ierr); 556 ierr = PetscSFDestroy(&mdn->Mvctx);CHKERRQ(ierr); 557 ierr = VecDestroy(&mdn->cvec);CHKERRQ(ierr); 558 ierr = MatDestroy(&mdn->cmat);CHKERRQ(ierr); 559 560 ierr = PetscFree(mat->data);CHKERRQ(ierr); 561 ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr); 562 563 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",NULL);CHKERRQ(ierr); 564 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",NULL);CHKERRQ(ierr); 565 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",NULL);CHKERRQ(ierr); 566 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",NULL);CHKERRQ(ierr); 567 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",NULL);CHKERRQ(ierr); 568 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",NULL);CHKERRQ(ierr); 569 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",NULL);CHKERRQ(ierr); 570 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",NULL);CHKERRQ(ierr); 571 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",NULL);CHKERRQ(ierr); 572 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",NULL);CHKERRQ(ierr); 573 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",NULL);CHKERRQ(ierr); 574 #if defined(PETSC_HAVE_ELEMENTAL) 575 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",NULL);CHKERRQ(ierr); 576 #endif 577 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",NULL);CHKERRQ(ierr); 578 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",NULL);CHKERRQ(ierr); 579 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",NULL);CHKERRQ(ierr); 580 #if defined (PETSC_HAVE_CUDA) 581 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",NULL);CHKERRQ(ierr); 582 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 583 #endif 584 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",NULL);CHKERRQ(ierr); 585 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",NULL);CHKERRQ(ierr); 586 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",NULL);CHKERRQ(ierr); 587 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",NULL);CHKERRQ(ierr); 588 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",NULL);CHKERRQ(ierr); 589 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",NULL);CHKERRQ(ierr); 590 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",NULL);CHKERRQ(ierr); 591 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",NULL);CHKERRQ(ierr); 592 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",NULL);CHKERRQ(ierr); 593 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",NULL);CHKERRQ(ierr); 594 PetscFunctionReturn(0); 595 } 596 597 PETSC_INTERN PetscErrorCode MatView_SeqDense(Mat,PetscViewer); 598 599 #include <petscdraw.h> 600 static PetscErrorCode MatView_MPIDense_ASCIIorDraworSocket(Mat mat,PetscViewer viewer) 601 { 602 Mat_MPIDense *mdn = (Mat_MPIDense*)mat->data; 603 PetscErrorCode ierr; 604 PetscMPIInt rank = mdn->rank; 605 PetscViewerType vtype; 606 PetscBool iascii,isdraw; 607 PetscViewer sviewer; 608 PetscViewerFormat format; 609 610 PetscFunctionBegin; 611 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 612 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 613 if (iascii) { 614 ierr = PetscViewerGetType(viewer,&vtype);CHKERRQ(ierr); 615 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 616 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 617 MatInfo info; 618 ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr); 619 ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr); 620 ierr = PetscViewerASCIISynchronizedPrintf(viewer," [%d] local rows %D nz %D nz alloced %D mem %D \n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,(PetscInt)info.memory);CHKERRQ(ierr); 621 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 622 ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr); 623 ierr = PetscSFView(mdn->Mvctx,viewer);CHKERRQ(ierr); 624 PetscFunctionReturn(0); 625 } else if (format == PETSC_VIEWER_ASCII_INFO) { 626 PetscFunctionReturn(0); 627 } 628 } else if (isdraw) { 629 PetscDraw draw; 630 PetscBool isnull; 631 632 ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr); 633 ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); 634 if (isnull) PetscFunctionReturn(0); 635 } 636 637 { 638 /* assemble the entire matrix onto first processor. */ 639 Mat A; 640 PetscInt M = mat->rmap->N,N = mat->cmap->N,m,row,i,nz; 641 PetscInt *cols; 642 PetscScalar *vals; 643 644 ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr); 645 if (!rank) { 646 ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr); 647 } else { 648 ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr); 649 } 650 /* Since this is a temporary matrix, MATMPIDENSE instead of ((PetscObject)A)->type_name here is probably acceptable. */ 651 ierr = MatSetType(A,MATMPIDENSE);CHKERRQ(ierr); 652 ierr = MatMPIDenseSetPreallocation(A,NULL);CHKERRQ(ierr); 653 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr); 654 655 /* Copy the matrix ... This isn't the most efficient means, 656 but it's quick for now */ 657 A->insertmode = INSERT_VALUES; 658 659 row = mat->rmap->rstart; 660 m = mdn->A->rmap->n; 661 for (i=0; i<m; i++) { 662 ierr = MatGetRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 663 ierr = MatSetValues_MPIDense(A,1,&row,nz,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 664 ierr = MatRestoreRow_MPIDense(mat,row,&nz,&cols,&vals);CHKERRQ(ierr); 665 row++; 666 } 667 668 ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 669 ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 670 ierr = PetscViewerGetSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 671 if (!rank) { 672 ierr = PetscObjectSetName((PetscObject)((Mat_MPIDense*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr); 673 ierr = MatView_SeqDense(((Mat_MPIDense*)(A->data))->A,sviewer);CHKERRQ(ierr); 674 } 675 ierr = PetscViewerRestoreSubViewer(viewer,PETSC_COMM_SELF,&sviewer);CHKERRQ(ierr); 676 ierr = PetscViewerFlush(viewer);CHKERRQ(ierr); 677 ierr = MatDestroy(&A);CHKERRQ(ierr); 678 } 679 PetscFunctionReturn(0); 680 } 681 682 PetscErrorCode MatView_MPIDense(Mat mat,PetscViewer viewer) 683 { 684 PetscErrorCode ierr; 685 PetscBool iascii,isbinary,isdraw,issocket; 686 687 PetscFunctionBegin; 688 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 689 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 690 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr); 691 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr); 692 693 if (iascii || issocket || isdraw) { 694 ierr = MatView_MPIDense_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr); 695 } else if (isbinary) { 696 ierr = MatView_Dense_Binary(mat,viewer);CHKERRQ(ierr); 697 } 698 PetscFunctionReturn(0); 699 } 700 701 PetscErrorCode MatGetInfo_MPIDense(Mat A,MatInfoType flag,MatInfo *info) 702 { 703 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 704 Mat mdn = mat->A; 705 PetscErrorCode ierr; 706 PetscLogDouble isend[5],irecv[5]; 707 708 PetscFunctionBegin; 709 info->block_size = 1.0; 710 711 ierr = MatGetInfo(mdn,MAT_LOCAL,info);CHKERRQ(ierr); 712 713 isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded; 714 isend[3] = info->memory; isend[4] = info->mallocs; 715 if (flag == MAT_LOCAL) { 716 info->nz_used = isend[0]; 717 info->nz_allocated = isend[1]; 718 info->nz_unneeded = isend[2]; 719 info->memory = isend[3]; 720 info->mallocs = isend[4]; 721 } else if (flag == MAT_GLOBAL_MAX) { 722 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 723 724 info->nz_used = irecv[0]; 725 info->nz_allocated = irecv[1]; 726 info->nz_unneeded = irecv[2]; 727 info->memory = irecv[3]; 728 info->mallocs = irecv[4]; 729 } else if (flag == MAT_GLOBAL_SUM) { 730 ierr = MPIU_Allreduce(isend,irecv,5,MPIU_PETSCLOGDOUBLE,MPI_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 731 732 info->nz_used = irecv[0]; 733 info->nz_allocated = irecv[1]; 734 info->nz_unneeded = irecv[2]; 735 info->memory = irecv[3]; 736 info->mallocs = irecv[4]; 737 } 738 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 739 info->fill_ratio_needed = 0; 740 info->factor_mallocs = 0; 741 PetscFunctionReturn(0); 742 } 743 744 PetscErrorCode MatSetOption_MPIDense(Mat A,MatOption op,PetscBool flg) 745 { 746 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 747 PetscErrorCode ierr; 748 749 PetscFunctionBegin; 750 switch (op) { 751 case MAT_NEW_NONZERO_LOCATIONS: 752 case MAT_NEW_NONZERO_LOCATION_ERR: 753 case MAT_NEW_NONZERO_ALLOCATION_ERR: 754 MatCheckPreallocated(A,1); 755 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 756 break; 757 case MAT_ROW_ORIENTED: 758 MatCheckPreallocated(A,1); 759 a->roworiented = flg; 760 ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr); 761 break; 762 case MAT_NEW_DIAGONALS: 763 case MAT_KEEP_NONZERO_PATTERN: 764 case MAT_USE_HASH_TABLE: 765 case MAT_SORTED_FULL: 766 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 767 break; 768 case MAT_IGNORE_OFF_PROC_ENTRIES: 769 a->donotstash = flg; 770 break; 771 case MAT_SYMMETRIC: 772 case MAT_STRUCTURALLY_SYMMETRIC: 773 case MAT_HERMITIAN: 774 case MAT_SYMMETRY_ETERNAL: 775 case MAT_IGNORE_LOWER_TRIANGULAR: 776 case MAT_IGNORE_ZERO_ENTRIES: 777 ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr); 778 break; 779 default: 780 SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %s",MatOptions[op]); 781 } 782 PetscFunctionReturn(0); 783 } 784 785 PetscErrorCode MatDiagonalScale_MPIDense(Mat A,Vec ll,Vec rr) 786 { 787 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 788 const PetscScalar *l; 789 PetscScalar x,*v,*vv,*r; 790 PetscErrorCode ierr; 791 PetscInt i,j,s2a,s3a,s2,s3,m=mdn->A->rmap->n,n=mdn->A->cmap->n,lda; 792 793 PetscFunctionBegin; 794 ierr = MatDenseGetArray(mdn->A,&vv);CHKERRQ(ierr); 795 ierr = MatDenseGetLDA(mdn->A,&lda);CHKERRQ(ierr); 796 ierr = MatGetLocalSize(A,&s2,&s3);CHKERRQ(ierr); 797 if (ll) { 798 ierr = VecGetLocalSize(ll,&s2a);CHKERRQ(ierr); 799 if (s2a != s2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left scaling vector non-conforming local size, %D != %D", s2a, s2); 800 ierr = VecGetArrayRead(ll,&l);CHKERRQ(ierr); 801 for (i=0; i<m; i++) { 802 x = l[i]; 803 v = vv + i; 804 for (j=0; j<n; j++) { (*v) *= x; v+= lda;} 805 } 806 ierr = VecRestoreArrayRead(ll,&l);CHKERRQ(ierr); 807 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 808 } 809 if (rr) { 810 const PetscScalar *ar; 811 812 ierr = VecGetLocalSize(rr,&s3a);CHKERRQ(ierr); 813 if (s3a != s3) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Right scaling vec non-conforming local size, %d != %d.", s3a, s3); 814 ierr = VecGetArrayRead(rr,&ar);CHKERRQ(ierr); 815 ierr = VecGetArray(mdn->lvec,&r);CHKERRQ(ierr); 816 ierr = PetscSFBcastBegin(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 817 ierr = PetscSFBcastEnd(mdn->Mvctx,MPIU_SCALAR,ar,r);CHKERRQ(ierr); 818 ierr = VecRestoreArrayRead(rr,&ar);CHKERRQ(ierr); 819 for (i=0; i<n; i++) { 820 x = r[i]; 821 v = vv + i*lda; 822 for (j=0; j<m; j++) (*v++) *= x; 823 } 824 ierr = VecRestoreArray(mdn->lvec,&r);CHKERRQ(ierr); 825 ierr = PetscLogFlops(1.0*n*m);CHKERRQ(ierr); 826 } 827 ierr = MatDenseRestoreArray(mdn->A,&vv);CHKERRQ(ierr); 828 PetscFunctionReturn(0); 829 } 830 831 PetscErrorCode MatNorm_MPIDense(Mat A,NormType type,PetscReal *nrm) 832 { 833 Mat_MPIDense *mdn = (Mat_MPIDense*)A->data; 834 PetscErrorCode ierr; 835 PetscInt i,j; 836 PetscReal sum = 0.0; 837 const PetscScalar *av,*v; 838 839 PetscFunctionBegin; 840 ierr = MatDenseGetArrayRead(mdn->A,&av);CHKERRQ(ierr); 841 v = av; 842 if (mdn->size == 1) { 843 ierr = MatNorm(mdn->A,type,nrm);CHKERRQ(ierr); 844 } else { 845 if (type == NORM_FROBENIUS) { 846 for (i=0; i<mdn->A->cmap->n*mdn->A->rmap->n; i++) { 847 sum += PetscRealPart(PetscConj(*v)*(*v)); v++; 848 } 849 ierr = MPIU_Allreduce(&sum,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 850 *nrm = PetscSqrtReal(*nrm); 851 ierr = PetscLogFlops(2.0*mdn->A->cmap->n*mdn->A->rmap->n);CHKERRQ(ierr); 852 } else if (type == NORM_1) { 853 PetscReal *tmp,*tmp2; 854 ierr = PetscCalloc2(A->cmap->N,&tmp,A->cmap->N,&tmp2);CHKERRQ(ierr); 855 *nrm = 0.0; 856 v = av; 857 for (j=0; j<mdn->A->cmap->n; j++) { 858 for (i=0; i<mdn->A->rmap->n; i++) { 859 tmp[j] += PetscAbsScalar(*v); v++; 860 } 861 } 862 ierr = MPIU_Allreduce(tmp,tmp2,A->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 863 for (j=0; j<A->cmap->N; j++) { 864 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 865 } 866 ierr = PetscFree2(tmp,tmp2);CHKERRQ(ierr); 867 ierr = PetscLogFlops(A->cmap->n*A->rmap->n);CHKERRQ(ierr); 868 } else if (type == NORM_INFINITY) { /* max row norm */ 869 PetscReal ntemp; 870 ierr = MatNorm(mdn->A,type,&ntemp);CHKERRQ(ierr); 871 ierr = MPIU_Allreduce(&ntemp,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 872 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for two norm"); 873 } 874 ierr = MatDenseRestoreArrayRead(mdn->A,&av);CHKERRQ(ierr); 875 PetscFunctionReturn(0); 876 } 877 878 PetscErrorCode MatTranspose_MPIDense(Mat A,MatReuse reuse,Mat *matout) 879 { 880 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 881 Mat B; 882 PetscInt M = A->rmap->N,N = A->cmap->N,m,n,*rwork,rstart = A->rmap->rstart; 883 PetscErrorCode ierr; 884 PetscInt j,i,lda; 885 PetscScalar *v; 886 887 PetscFunctionBegin; 888 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 889 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 890 ierr = MatSetSizes(B,A->cmap->n,A->rmap->n,N,M);CHKERRQ(ierr); 891 ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 892 ierr = MatMPIDenseSetPreallocation(B,NULL);CHKERRQ(ierr); 893 } else B = *matout; 894 895 m = a->A->rmap->n; n = a->A->cmap->n; 896 ierr = MatDenseGetArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 897 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 898 ierr = PetscMalloc1(m,&rwork);CHKERRQ(ierr); 899 for (i=0; i<m; i++) rwork[i] = rstart + i; 900 for (j=0; j<n; j++) { 901 ierr = MatSetValues(B,1,&j,m,rwork,v,INSERT_VALUES);CHKERRQ(ierr); 902 v += lda; 903 } 904 ierr = MatDenseRestoreArrayRead(a->A,(const PetscScalar**)&v);CHKERRQ(ierr); 905 ierr = PetscFree(rwork);CHKERRQ(ierr); 906 ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 907 ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 908 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 909 *matout = B; 910 } else { 911 ierr = MatHeaderMerge(A,&B);CHKERRQ(ierr); 912 } 913 PetscFunctionReturn(0); 914 } 915 916 static PetscErrorCode MatDuplicate_MPIDense(Mat,MatDuplicateOption,Mat*); 917 PETSC_INTERN PetscErrorCode MatScale_MPIDense(Mat,PetscScalar); 918 919 PetscErrorCode MatSetUp_MPIDense(Mat A) 920 { 921 PetscErrorCode ierr; 922 923 PetscFunctionBegin; 924 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 925 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 926 if (!A->preallocated) { 927 ierr = MatMPIDenseSetPreallocation(A,0);CHKERRQ(ierr); 928 } 929 PetscFunctionReturn(0); 930 } 931 932 PetscErrorCode MatAXPY_MPIDense(Mat Y,PetscScalar alpha,Mat X,MatStructure str) 933 { 934 PetscErrorCode ierr; 935 Mat_MPIDense *A = (Mat_MPIDense*)Y->data, *B = (Mat_MPIDense*)X->data; 936 937 PetscFunctionBegin; 938 ierr = MatAXPY(A->A,alpha,B->A,str);CHKERRQ(ierr); 939 PetscFunctionReturn(0); 940 } 941 942 PetscErrorCode MatConjugate_MPIDense(Mat mat) 943 { 944 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 945 PetscErrorCode ierr; 946 947 PetscFunctionBegin; 948 ierr = MatConjugate(a->A);CHKERRQ(ierr); 949 PetscFunctionReturn(0); 950 } 951 952 PetscErrorCode MatRealPart_MPIDense(Mat A) 953 { 954 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 955 PetscErrorCode ierr; 956 957 PetscFunctionBegin; 958 ierr = MatRealPart(a->A);CHKERRQ(ierr); 959 PetscFunctionReturn(0); 960 } 961 962 PetscErrorCode MatImaginaryPart_MPIDense(Mat A) 963 { 964 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 965 PetscErrorCode ierr; 966 967 PetscFunctionBegin; 968 ierr = MatImaginaryPart(a->A);CHKERRQ(ierr); 969 PetscFunctionReturn(0); 970 } 971 972 static PetscErrorCode MatGetColumnVector_MPIDense(Mat A,Vec v,PetscInt col) 973 { 974 PetscErrorCode ierr; 975 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 976 977 PetscFunctionBegin; 978 if (!a->A) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing local matrix"); 979 if (!a->A->ops->getcolumnvector) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Missing get column operation"); 980 ierr = (*a->A->ops->getcolumnvector)(a->A,v,col);CHKERRQ(ierr); 981 PetscFunctionReturn(0); 982 } 983 984 PETSC_INTERN PetscErrorCode MatGetColumnNorms_SeqDense(Mat,NormType,PetscReal*); 985 986 PetscErrorCode MatGetColumnNorms_MPIDense(Mat A,NormType type,PetscReal *norms) 987 { 988 PetscErrorCode ierr; 989 PetscInt i,n; 990 Mat_MPIDense *a = (Mat_MPIDense*) A->data; 991 PetscReal *work; 992 993 PetscFunctionBegin; 994 ierr = MatGetSize(A,NULL,&n);CHKERRQ(ierr); 995 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 996 ierr = MatGetColumnNorms_SeqDense(a->A,type,work);CHKERRQ(ierr); 997 if (type == NORM_2) { 998 for (i=0; i<n; i++) work[i] *= work[i]; 999 } 1000 if (type == NORM_INFINITY) { 1001 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_MAX,A->hdr.comm);CHKERRQ(ierr); 1002 } else { 1003 ierr = MPIU_Allreduce(work,norms,n,MPIU_REAL,MPIU_SUM,A->hdr.comm);CHKERRQ(ierr); 1004 } 1005 ierr = PetscFree(work);CHKERRQ(ierr); 1006 if (type == NORM_2) { 1007 for (i=0; i<n; i++) norms[i] = PetscSqrtReal(norms[i]); 1008 } 1009 PetscFunctionReturn(0); 1010 } 1011 1012 #if defined(PETSC_HAVE_CUDA) 1013 static PetscErrorCode MatDenseGetColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1014 { 1015 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1016 PetscErrorCode ierr; 1017 PetscInt lda; 1018 1019 PetscFunctionBegin; 1020 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1021 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1022 if (!a->cvec) { 1023 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1024 } 1025 a->vecinuse = col + 1; 1026 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1027 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1028 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1029 *v = a->cvec; 1030 PetscFunctionReturn(0); 1031 } 1032 1033 static PetscErrorCode MatDenseRestoreColumnVec_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1034 { 1035 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1036 PetscErrorCode ierr; 1037 1038 PetscFunctionBegin; 1039 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1040 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1041 a->vecinuse = 0; 1042 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1043 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1044 *v = NULL; 1045 PetscFunctionReturn(0); 1046 } 1047 1048 static PetscErrorCode MatDenseGetColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1049 { 1050 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1051 PetscInt lda; 1052 PetscErrorCode ierr; 1053 1054 PetscFunctionBegin; 1055 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1056 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1057 if (!a->cvec) { 1058 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1059 } 1060 a->vecinuse = col + 1; 1061 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1062 ierr = MatDenseCUDAGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1063 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1064 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1065 *v = a->cvec; 1066 PetscFunctionReturn(0); 1067 } 1068 1069 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1070 { 1071 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1072 PetscErrorCode ierr; 1073 1074 PetscFunctionBegin; 1075 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1076 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1077 a->vecinuse = 0; 1078 ierr = MatDenseCUDARestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1079 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1080 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1081 *v = NULL; 1082 PetscFunctionReturn(0); 1083 } 1084 1085 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1086 { 1087 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1088 PetscErrorCode ierr; 1089 PetscInt lda; 1090 1091 PetscFunctionBegin; 1092 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1093 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1094 if (!a->cvec) { 1095 ierr = VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1096 } 1097 a->vecinuse = col + 1; 1098 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1099 ierr = MatDenseCUDAGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1100 ierr = VecCUDAPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1101 *v = a->cvec; 1102 PetscFunctionReturn(0); 1103 } 1104 1105 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDenseCUDA(Mat A,PetscInt col,Vec *v) 1106 { 1107 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1108 PetscErrorCode ierr; 1109 1110 PetscFunctionBegin; 1111 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1112 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1113 a->vecinuse = 0; 1114 ierr = MatDenseCUDARestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1115 ierr = VecCUDAResetArray(a->cvec);CHKERRQ(ierr); 1116 *v = NULL; 1117 PetscFunctionReturn(0); 1118 } 1119 1120 static PetscErrorCode MatDenseGetSubMatrix_MPIDenseCUDA(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1121 { 1122 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1123 PetscInt lda; 1124 PetscErrorCode ierr; 1125 1126 PetscFunctionBegin; 1127 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1128 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1129 if (a->cmat && cend-cbegin != a->cmat->cmap->N) { 1130 ierr = MatDestroy(&a->cmat);CHKERRQ(ierr); 1131 } 1132 ierr = MatDenseCUDAGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1133 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1134 if (!a->cmat) { 1135 ierr = MatCreateDenseCUDA(PetscObjectComm((PetscObject)A),A->rmap->n,PETSC_DECIDE,A->rmap->N,cend-cbegin,(PetscScalar*)a->ptrinuse + (size_t)cbegin * (size_t)lda,&a->cmat);CHKERRQ(ierr); 1136 ierr = MatDenseSetLDA(a->cmat,lda);CHKERRQ(ierr); 1137 } else { 1138 ierr = MatDenseCUDAPlaceArray(a->cmat,a->ptrinuse + (size_t)cbegin * (size_t)lda);CHKERRQ(ierr); 1139 } 1140 a->matinuse = cbegin + 1; 1141 *v = a->cmat; 1142 PetscFunctionReturn(0); 1143 } 1144 1145 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDenseCUDA(Mat A,Mat *v) 1146 { 1147 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1148 PetscErrorCode ierr; 1149 1150 PetscFunctionBegin; 1151 if (!a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1152 if (!a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column matrix"); 1153 a->matinuse = 0; 1154 ierr = MatDenseCUDARestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1155 ierr = MatDenseCUDAResetArray(a->cmat);CHKERRQ(ierr); 1156 *v = NULL; 1157 PetscFunctionReturn(0); 1158 } 1159 1160 static PetscErrorCode MatDenseCUDAPlaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1161 { 1162 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1163 PetscErrorCode ierr; 1164 1165 PetscFunctionBegin; 1166 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1167 if (l->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1168 ierr = MatDenseCUDAPlaceArray(l->A,a);CHKERRQ(ierr); 1169 PetscFunctionReturn(0); 1170 } 1171 1172 static PetscErrorCode MatDenseCUDAResetArray_MPIDenseCUDA(Mat A) 1173 { 1174 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1175 PetscErrorCode ierr; 1176 1177 PetscFunctionBegin; 1178 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1179 if (l->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1180 ierr = MatDenseCUDAResetArray(l->A);CHKERRQ(ierr); 1181 PetscFunctionReturn(0); 1182 } 1183 1184 static PetscErrorCode MatDenseCUDAReplaceArray_MPIDenseCUDA(Mat A, const PetscScalar *a) 1185 { 1186 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1187 PetscErrorCode ierr; 1188 1189 PetscFunctionBegin; 1190 if (l->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1191 if (l->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1192 ierr = MatDenseCUDAReplaceArray(l->A,a);CHKERRQ(ierr); 1193 PetscFunctionReturn(0); 1194 } 1195 1196 static PetscErrorCode MatDenseCUDAGetArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1197 { 1198 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1199 PetscErrorCode ierr; 1200 1201 PetscFunctionBegin; 1202 ierr = MatDenseCUDAGetArrayWrite(l->A,a);CHKERRQ(ierr); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 static PetscErrorCode MatDenseCUDARestoreArrayWrite_MPIDenseCUDA(Mat A, PetscScalar **a) 1207 { 1208 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1209 PetscErrorCode ierr; 1210 1211 PetscFunctionBegin; 1212 ierr = MatDenseCUDARestoreArrayWrite(l->A,a);CHKERRQ(ierr); 1213 PetscFunctionReturn(0); 1214 } 1215 1216 static PetscErrorCode MatDenseCUDAGetArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1217 { 1218 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1219 PetscErrorCode ierr; 1220 1221 PetscFunctionBegin; 1222 ierr = MatDenseCUDAGetArrayRead(l->A,a);CHKERRQ(ierr); 1223 PetscFunctionReturn(0); 1224 } 1225 1226 static PetscErrorCode MatDenseCUDARestoreArrayRead_MPIDenseCUDA(Mat A, const PetscScalar **a) 1227 { 1228 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1229 PetscErrorCode ierr; 1230 1231 PetscFunctionBegin; 1232 ierr = MatDenseCUDARestoreArrayRead(l->A,a);CHKERRQ(ierr); 1233 PetscFunctionReturn(0); 1234 } 1235 1236 static PetscErrorCode MatDenseCUDAGetArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1237 { 1238 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1239 PetscErrorCode ierr; 1240 1241 PetscFunctionBegin; 1242 ierr = MatDenseCUDAGetArray(l->A,a);CHKERRQ(ierr); 1243 PetscFunctionReturn(0); 1244 } 1245 1246 static PetscErrorCode MatDenseCUDARestoreArray_MPIDenseCUDA(Mat A, PetscScalar **a) 1247 { 1248 Mat_MPIDense *l = (Mat_MPIDense*) A->data; 1249 PetscErrorCode ierr; 1250 1251 PetscFunctionBegin; 1252 ierr = MatDenseCUDARestoreArray(l->A,a);CHKERRQ(ierr); 1253 PetscFunctionReturn(0); 1254 } 1255 1256 static PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1257 static PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1258 static PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat,PetscInt,Vec*); 1259 static PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat,PetscInt,PetscInt,Mat*); 1260 static PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat,PetscInt,Vec*); 1261 static PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat,PetscInt,Vec*); 1262 static PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat,PetscInt,Vec*); 1263 static PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat,Mat*); 1264 1265 static PetscErrorCode MatBindToCPU_MPIDenseCUDA(Mat mat,PetscBool bind) 1266 { 1267 Mat_MPIDense *d = (Mat_MPIDense*)mat->data; 1268 PetscErrorCode ierr; 1269 1270 PetscFunctionBegin; 1271 if (d->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1272 if (d->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1273 if (d->A) { 1274 ierr = MatBindToCPU(d->A,bind);CHKERRQ(ierr); 1275 } 1276 mat->boundtocpu = bind; 1277 if (!bind) { 1278 PetscBool iscuda; 1279 1280 ierr = PetscObjectTypeCompare((PetscObject)d->cvec,VECMPICUDA,&iscuda);CHKERRQ(ierr); 1281 if (!iscuda) { 1282 ierr = VecDestroy(&d->cvec);CHKERRQ(ierr); 1283 ierr = MatDestroy(&d->cmat);CHKERRQ(ierr); 1284 } 1285 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1286 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDenseCUDA);CHKERRQ(ierr); 1287 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1288 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDenseCUDA);CHKERRQ(ierr); 1289 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1290 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDenseCUDA);CHKERRQ(ierr); 1291 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDenseCUDA);CHKERRQ(ierr); 1292 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDenseCUDA);CHKERRQ(ierr); 1293 } else { 1294 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1295 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1296 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1297 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1298 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1299 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1300 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1301 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1302 } 1303 PetscFunctionReturn(0); 1304 } 1305 1306 PetscErrorCode MatMPIDenseCUDASetPreallocation(Mat A, PetscScalar *d_data) 1307 { 1308 Mat_MPIDense *d = (Mat_MPIDense*)A->data; 1309 PetscErrorCode ierr; 1310 PetscBool iscuda; 1311 1312 PetscFunctionBegin; 1313 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 1314 ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1315 if (!iscuda) PetscFunctionReturn(0); 1316 ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr); 1317 ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr); 1318 if (!d->A) { 1319 ierr = MatCreate(PETSC_COMM_SELF,&d->A);CHKERRQ(ierr); 1320 ierr = PetscLogObjectParent((PetscObject)A,(PetscObject)d->A);CHKERRQ(ierr); 1321 ierr = MatSetSizes(d->A,A->rmap->n,A->cmap->N,A->rmap->n,A->cmap->N);CHKERRQ(ierr); 1322 } 1323 ierr = MatSetType(d->A,MATSEQDENSECUDA);CHKERRQ(ierr); 1324 ierr = MatSeqDenseCUDASetPreallocation(d->A,d_data);CHKERRQ(ierr); 1325 A->preallocated = PETSC_TRUE; 1326 PetscFunctionReturn(0); 1327 } 1328 #endif 1329 1330 static PetscErrorCode MatSetRandom_MPIDense(Mat x,PetscRandom rctx) 1331 { 1332 Mat_MPIDense *d = (Mat_MPIDense*)x->data; 1333 PetscErrorCode ierr; 1334 1335 PetscFunctionBegin; 1336 ierr = MatSetRandom(d->A,rctx);CHKERRQ(ierr); 1337 PetscFunctionReturn(0); 1338 } 1339 1340 static PetscErrorCode MatMissingDiagonal_MPIDense(Mat A,PetscBool *missing,PetscInt *d) 1341 { 1342 PetscFunctionBegin; 1343 *missing = PETSC_FALSE; 1344 PetscFunctionReturn(0); 1345 } 1346 1347 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1348 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1349 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat,Mat,Mat); 1350 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat,Mat,PetscReal,Mat); 1351 static PetscErrorCode MatEqual_MPIDense(Mat,Mat,PetscBool*); 1352 static PetscErrorCode MatLoad_MPIDense(Mat,PetscViewer); 1353 1354 /* -------------------------------------------------------------------*/ 1355 static struct _MatOps MatOps_Values = { MatSetValues_MPIDense, 1356 MatGetRow_MPIDense, 1357 MatRestoreRow_MPIDense, 1358 MatMult_MPIDense, 1359 /* 4*/ MatMultAdd_MPIDense, 1360 MatMultTranspose_MPIDense, 1361 MatMultTransposeAdd_MPIDense, 1362 0, 1363 0, 1364 0, 1365 /* 10*/ 0, 1366 0, 1367 0, 1368 0, 1369 MatTranspose_MPIDense, 1370 /* 15*/ MatGetInfo_MPIDense, 1371 MatEqual_MPIDense, 1372 MatGetDiagonal_MPIDense, 1373 MatDiagonalScale_MPIDense, 1374 MatNorm_MPIDense, 1375 /* 20*/ MatAssemblyBegin_MPIDense, 1376 MatAssemblyEnd_MPIDense, 1377 MatSetOption_MPIDense, 1378 MatZeroEntries_MPIDense, 1379 /* 24*/ MatZeroRows_MPIDense, 1380 0, 1381 0, 1382 0, 1383 0, 1384 /* 29*/ MatSetUp_MPIDense, 1385 0, 1386 0, 1387 MatGetDiagonalBlock_MPIDense, 1388 0, 1389 /* 34*/ MatDuplicate_MPIDense, 1390 0, 1391 0, 1392 0, 1393 0, 1394 /* 39*/ MatAXPY_MPIDense, 1395 MatCreateSubMatrices_MPIDense, 1396 0, 1397 MatGetValues_MPIDense, 1398 0, 1399 /* 44*/ 0, 1400 MatScale_MPIDense, 1401 MatShift_Basic, 1402 0, 1403 0, 1404 /* 49*/ MatSetRandom_MPIDense, 1405 0, 1406 0, 1407 0, 1408 0, 1409 /* 54*/ 0, 1410 0, 1411 0, 1412 0, 1413 0, 1414 /* 59*/ MatCreateSubMatrix_MPIDense, 1415 MatDestroy_MPIDense, 1416 MatView_MPIDense, 1417 0, 1418 0, 1419 /* 64*/ 0, 1420 0, 1421 0, 1422 0, 1423 0, 1424 /* 69*/ 0, 1425 0, 1426 0, 1427 0, 1428 0, 1429 /* 74*/ 0, 1430 0, 1431 0, 1432 0, 1433 0, 1434 /* 79*/ 0, 1435 0, 1436 0, 1437 0, 1438 /* 83*/ MatLoad_MPIDense, 1439 0, 1440 0, 1441 0, 1442 0, 1443 0, 1444 /* 89*/ 0, 1445 0, 1446 0, 1447 0, 1448 0, 1449 /* 94*/ 0, 1450 0, 1451 MatMatTransposeMultSymbolic_MPIDense_MPIDense, 1452 MatMatTransposeMultNumeric_MPIDense_MPIDense, 1453 0, 1454 /* 99*/ MatProductSetFromOptions_MPIDense, 1455 0, 1456 0, 1457 MatConjugate_MPIDense, 1458 0, 1459 /*104*/ 0, 1460 MatRealPart_MPIDense, 1461 MatImaginaryPart_MPIDense, 1462 0, 1463 0, 1464 /*109*/ 0, 1465 0, 1466 0, 1467 MatGetColumnVector_MPIDense, 1468 MatMissingDiagonal_MPIDense, 1469 /*114*/ 0, 1470 0, 1471 0, 1472 0, 1473 0, 1474 /*119*/ 0, 1475 0, 1476 0, 1477 0, 1478 0, 1479 /*124*/ 0, 1480 MatGetColumnNorms_MPIDense, 1481 0, 1482 0, 1483 0, 1484 /*129*/ 0, 1485 0, 1486 MatTransposeMatMultSymbolic_MPIDense_MPIDense, 1487 MatTransposeMatMultNumeric_MPIDense_MPIDense, 1488 0, 1489 /*134*/ 0, 1490 0, 1491 0, 1492 0, 1493 0, 1494 /*139*/ 0, 1495 0, 1496 0, 1497 0, 1498 0, 1499 MatCreateMPIMatConcatenateSeqMat_MPIDense, 1500 /*145*/ 0, 1501 0, 1502 0 1503 }; 1504 1505 PetscErrorCode MatMPIDenseSetPreallocation_MPIDense(Mat mat,PetscScalar *data) 1506 { 1507 Mat_MPIDense *a = (Mat_MPIDense*)mat->data; 1508 PetscBool iscuda; 1509 PetscErrorCode ierr; 1510 1511 PetscFunctionBegin; 1512 ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr); 1513 ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr); 1514 if (!a->A) { 1515 ierr = MatCreate(PETSC_COMM_SELF,&a->A);CHKERRQ(ierr); 1516 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 1517 ierr = MatSetSizes(a->A,mat->rmap->n,mat->cmap->N,mat->rmap->n,mat->cmap->N);CHKERRQ(ierr); 1518 } 1519 ierr = PetscObjectTypeCompare((PetscObject)mat,MATMPIDENSECUDA,&iscuda);CHKERRQ(ierr); 1520 ierr = MatSetType(a->A,iscuda ? MATSEQDENSECUDA : MATSEQDENSE);CHKERRQ(ierr); 1521 ierr = MatSeqDenseSetPreallocation(a->A,data);CHKERRQ(ierr); 1522 mat->preallocated = PETSC_TRUE; 1523 PetscFunctionReturn(0); 1524 } 1525 1526 #if defined(PETSC_HAVE_ELEMENTAL) 1527 PETSC_INTERN PetscErrorCode MatConvert_MPIDense_Elemental(Mat A, MatType newtype,MatReuse reuse,Mat *newmat) 1528 { 1529 Mat mat_elemental; 1530 PetscErrorCode ierr; 1531 PetscScalar *v; 1532 PetscInt m=A->rmap->n,N=A->cmap->N,rstart=A->rmap->rstart,i,*rows,*cols; 1533 1534 PetscFunctionBegin; 1535 if (reuse == MAT_REUSE_MATRIX) { 1536 mat_elemental = *newmat; 1537 ierr = MatZeroEntries(*newmat);CHKERRQ(ierr); 1538 } else { 1539 ierr = MatCreate(PetscObjectComm((PetscObject)A), &mat_elemental);CHKERRQ(ierr); 1540 ierr = MatSetSizes(mat_elemental,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 1541 ierr = MatSetType(mat_elemental,MATELEMENTAL);CHKERRQ(ierr); 1542 ierr = MatSetUp(mat_elemental);CHKERRQ(ierr); 1543 ierr = MatSetOption(mat_elemental,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1544 } 1545 1546 ierr = PetscMalloc2(m,&rows,N,&cols);CHKERRQ(ierr); 1547 for (i=0; i<N; i++) cols[i] = i; 1548 for (i=0; i<m; i++) rows[i] = rstart + i; 1549 1550 /* PETSc-Elemental interface uses axpy for setting off-processor entries, only ADD_VALUES is allowed */ 1551 ierr = MatDenseGetArray(A,&v);CHKERRQ(ierr); 1552 ierr = MatSetValues(mat_elemental,m,rows,N,cols,v,ADD_VALUES);CHKERRQ(ierr); 1553 ierr = MatAssemblyBegin(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1554 ierr = MatAssemblyEnd(mat_elemental, MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1555 ierr = MatDenseRestoreArray(A,&v);CHKERRQ(ierr); 1556 ierr = PetscFree2(rows,cols);CHKERRQ(ierr); 1557 1558 if (reuse == MAT_INPLACE_MATRIX) { 1559 ierr = MatHeaderReplace(A,&mat_elemental);CHKERRQ(ierr); 1560 } else { 1561 *newmat = mat_elemental; 1562 } 1563 PetscFunctionReturn(0); 1564 } 1565 #endif 1566 1567 static PetscErrorCode MatDenseGetColumn_MPIDense(Mat A,PetscInt col,PetscScalar **vals) 1568 { 1569 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1570 PetscErrorCode ierr; 1571 1572 PetscFunctionBegin; 1573 ierr = MatDenseGetColumn(mat->A,col,vals);CHKERRQ(ierr); 1574 PetscFunctionReturn(0); 1575 } 1576 1577 static PetscErrorCode MatDenseRestoreColumn_MPIDense(Mat A,PetscScalar **vals) 1578 { 1579 Mat_MPIDense *mat = (Mat_MPIDense*)A->data; 1580 PetscErrorCode ierr; 1581 1582 PetscFunctionBegin; 1583 ierr = MatDenseRestoreColumn(mat->A,vals);CHKERRQ(ierr); 1584 PetscFunctionReturn(0); 1585 } 1586 1587 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIDense(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat) 1588 { 1589 PetscErrorCode ierr; 1590 Mat_MPIDense *mat; 1591 PetscInt m,nloc,N; 1592 1593 PetscFunctionBegin; 1594 ierr = MatGetSize(inmat,&m,&N);CHKERRQ(ierr); 1595 ierr = MatGetLocalSize(inmat,NULL,&nloc);CHKERRQ(ierr); 1596 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 1597 PetscInt sum; 1598 1599 if (n == PETSC_DECIDE) { 1600 ierr = PetscSplitOwnership(comm,&n,&N);CHKERRQ(ierr); 1601 } 1602 /* Check sum(n) = N */ 1603 ierr = MPIU_Allreduce(&n,&sum,1,MPIU_INT,MPI_SUM,comm);CHKERRQ(ierr); 1604 if (sum != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Sum of local columns %D != global columns %D",sum,N); 1605 1606 ierr = MatCreateDense(comm,m,n,PETSC_DETERMINE,N,NULL,outmat);CHKERRQ(ierr); 1607 } 1608 1609 /* numeric phase */ 1610 mat = (Mat_MPIDense*)(*outmat)->data; 1611 ierr = MatCopy(inmat,mat->A,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1612 ierr = MatAssemblyBegin(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1613 ierr = MatAssemblyEnd(*outmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1614 PetscFunctionReturn(0); 1615 } 1616 1617 #if defined(PETSC_HAVE_CUDA) 1618 PetscErrorCode MatConvert_MPIDenseCUDA_MPIDense(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1619 { 1620 Mat B; 1621 Mat_MPIDense *m; 1622 PetscErrorCode ierr; 1623 1624 PetscFunctionBegin; 1625 if (reuse == MAT_INITIAL_MATRIX) { 1626 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1627 } else if (reuse == MAT_REUSE_MATRIX) { 1628 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1629 } 1630 1631 B = *newmat; 1632 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_TRUE);CHKERRQ(ierr); 1633 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1634 ierr = PetscStrallocpy(VECSTANDARD,&B->defaultvectype);CHKERRQ(ierr); 1635 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSE);CHKERRQ(ierr); 1636 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C",NULL);CHKERRQ(ierr); 1637 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C",NULL);CHKERRQ(ierr); 1638 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",NULL);CHKERRQ(ierr); 1639 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C",NULL);CHKERRQ(ierr); 1640 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",NULL);CHKERRQ(ierr); 1641 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C",NULL);CHKERRQ(ierr); 1642 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C",NULL);CHKERRQ(ierr); 1643 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C",NULL);CHKERRQ(ierr); 1644 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C",NULL);CHKERRQ(ierr); 1645 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C",NULL);CHKERRQ(ierr); 1646 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C",NULL);CHKERRQ(ierr); 1647 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C",NULL);CHKERRQ(ierr); 1648 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C",NULL);CHKERRQ(ierr); 1649 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C",NULL);CHKERRQ(ierr); 1650 m = (Mat_MPIDense*)(B)->data; 1651 if (m->A) { 1652 ierr = MatConvert(m->A,MATSEQDENSE,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1653 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1654 } 1655 B->ops->bindtocpu = NULL; 1656 B->offloadmask = PETSC_OFFLOAD_CPU; 1657 PetscFunctionReturn(0); 1658 } 1659 1660 PetscErrorCode MatConvert_MPIDense_MPIDenseCUDA(Mat M,MatType type,MatReuse reuse,Mat *newmat) 1661 { 1662 Mat B; 1663 Mat_MPIDense *m; 1664 PetscErrorCode ierr; 1665 1666 PetscFunctionBegin; 1667 if (reuse == MAT_INITIAL_MATRIX) { 1668 ierr = MatDuplicate(M,MAT_COPY_VALUES,newmat);CHKERRQ(ierr); 1669 } else if (reuse == MAT_REUSE_MATRIX) { 1670 ierr = MatCopy(M,*newmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 1671 } 1672 1673 B = *newmat; 1674 ierr = PetscFree(B->defaultvectype);CHKERRQ(ierr); 1675 ierr = PetscStrallocpy(VECCUDA,&B->defaultvectype);CHKERRQ(ierr); 1676 ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPIDENSECUDA);CHKERRQ(ierr); 1677 ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpidensecuda_mpidense_C", MatConvert_MPIDenseCUDA_MPIDense);CHKERRQ(ierr); 1678 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaij_mpidensecuda_C", MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1679 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpiaijcusparse_mpidensecuda_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1680 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaij_C", MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1681 ierr = PetscObjectComposeFunction((PetscObject)B,"MatProductSetFromOptions_mpidensecuda_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1682 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArray_C", MatDenseCUDAGetArray_MPIDenseCUDA);CHKERRQ(ierr); 1683 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayRead_C", MatDenseCUDAGetArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1684 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAGetArrayWrite_C", MatDenseCUDAGetArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1685 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArray_C", MatDenseCUDARestoreArray_MPIDenseCUDA);CHKERRQ(ierr); 1686 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayRead_C", MatDenseCUDARestoreArrayRead_MPIDenseCUDA);CHKERRQ(ierr); 1687 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDARestoreArrayWrite_C", MatDenseCUDARestoreArrayWrite_MPIDenseCUDA);CHKERRQ(ierr); 1688 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAPlaceArray_C", MatDenseCUDAPlaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1689 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAResetArray_C", MatDenseCUDAResetArray_MPIDenseCUDA);CHKERRQ(ierr); 1690 ierr = PetscObjectComposeFunction((PetscObject)B,"MatDenseCUDAReplaceArray_C", MatDenseCUDAReplaceArray_MPIDenseCUDA);CHKERRQ(ierr); 1691 m = (Mat_MPIDense*)(B)->data; 1692 if (m->A) { 1693 ierr = MatConvert(m->A,MATSEQDENSECUDA,MAT_INPLACE_MATRIX,&m->A);CHKERRQ(ierr); 1694 ierr = MatSetUpMultiply_MPIDense(B);CHKERRQ(ierr); 1695 B->offloadmask = PETSC_OFFLOAD_BOTH; 1696 } else { 1697 B->offloadmask = PETSC_OFFLOAD_UNALLOCATED; 1698 } 1699 ierr = MatBindToCPU_MPIDenseCUDA(B,PETSC_FALSE);CHKERRQ(ierr); 1700 1701 B->ops->bindtocpu = MatBindToCPU_MPIDenseCUDA; 1702 PetscFunctionReturn(0); 1703 } 1704 #endif 1705 1706 PetscErrorCode MatDenseGetColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1707 { 1708 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1709 PetscErrorCode ierr; 1710 PetscInt lda; 1711 1712 PetscFunctionBegin; 1713 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1714 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1715 if (!a->cvec) { 1716 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1717 } 1718 a->vecinuse = col + 1; 1719 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1720 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1721 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1722 *v = a->cvec; 1723 PetscFunctionReturn(0); 1724 } 1725 1726 PetscErrorCode MatDenseRestoreColumnVec_MPIDense(Mat A,PetscInt col,Vec *v) 1727 { 1728 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1729 PetscErrorCode ierr; 1730 1731 PetscFunctionBegin; 1732 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1733 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1734 a->vecinuse = 0; 1735 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1736 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1737 *v = NULL; 1738 PetscFunctionReturn(0); 1739 } 1740 1741 PetscErrorCode MatDenseGetColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1742 { 1743 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1744 PetscErrorCode ierr; 1745 PetscInt lda; 1746 1747 PetscFunctionBegin; 1748 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1749 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1750 if (!a->cvec) { 1751 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1752 } 1753 a->vecinuse = col + 1; 1754 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1755 ierr = MatDenseGetArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1756 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1757 ierr = VecLockReadPush(a->cvec);CHKERRQ(ierr); 1758 *v = a->cvec; 1759 PetscFunctionReturn(0); 1760 } 1761 1762 PetscErrorCode MatDenseRestoreColumnVecRead_MPIDense(Mat A,PetscInt col,Vec *v) 1763 { 1764 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1765 PetscErrorCode ierr; 1766 1767 PetscFunctionBegin; 1768 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1769 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1770 a->vecinuse = 0; 1771 ierr = MatDenseRestoreArrayRead(a->A,&a->ptrinuse);CHKERRQ(ierr); 1772 ierr = VecLockReadPop(a->cvec);CHKERRQ(ierr); 1773 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1774 *v = NULL; 1775 PetscFunctionReturn(0); 1776 } 1777 1778 PetscErrorCode MatDenseGetColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1779 { 1780 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1781 PetscErrorCode ierr; 1782 PetscInt lda; 1783 1784 PetscFunctionBegin; 1785 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1786 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1787 if (!a->cvec) { 1788 ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)A),A->rmap->bs,A->rmap->n,A->rmap->N,NULL,&a->cvec);CHKERRQ(ierr); 1789 } 1790 a->vecinuse = col + 1; 1791 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1792 ierr = MatDenseGetArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1793 ierr = VecPlaceArray(a->cvec,a->ptrinuse + (size_t)col * (size_t)lda);CHKERRQ(ierr); 1794 *v = a->cvec; 1795 PetscFunctionReturn(0); 1796 } 1797 1798 PetscErrorCode MatDenseRestoreColumnVecWrite_MPIDense(Mat A,PetscInt col,Vec *v) 1799 { 1800 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1801 PetscErrorCode ierr; 1802 1803 PetscFunctionBegin; 1804 if (!a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetColumnVec() first"); 1805 if (!a->cvec) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal column vector"); 1806 a->vecinuse = 0; 1807 ierr = MatDenseRestoreArrayWrite(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1808 ierr = VecResetArray(a->cvec);CHKERRQ(ierr); 1809 *v = NULL; 1810 PetscFunctionReturn(0); 1811 } 1812 1813 PetscErrorCode MatDenseGetSubMatrix_MPIDense(Mat A,PetscInt cbegin,PetscInt cend,Mat *v) 1814 { 1815 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1816 PetscInt lda; 1817 PetscErrorCode ierr; 1818 1819 PetscFunctionBegin; 1820 if (a->vecinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreColumnVec() first"); 1821 if (a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseRestoreSubMatrix() first"); 1822 if (a->cmat && cend-cbegin != a->cmat->cmap->N) { 1823 ierr = MatDestroy(&a->cmat);CHKERRQ(ierr); 1824 } 1825 ierr = MatDenseGetArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1826 ierr = MatDenseGetLDA(a->A,&lda);CHKERRQ(ierr); 1827 if (!a->cmat) { 1828 ierr = MatCreateDense(PetscObjectComm((PetscObject)A),A->rmap->n,PETSC_DECIDE,A->rmap->N,cend-cbegin,(PetscScalar*)a->ptrinuse + (size_t)cbegin * (size_t)lda,&a->cmat);CHKERRQ(ierr); 1829 ierr = MatDenseSetLDA(a->cmat,lda);CHKERRQ(ierr); 1830 } else { 1831 ierr = MatDensePlaceArray(a->cmat,a->ptrinuse + (size_t)cbegin * (size_t)lda);CHKERRQ(ierr); 1832 } 1833 a->matinuse = cbegin + 1; 1834 *v = a->cmat; 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode MatDenseRestoreSubMatrix_MPIDense(Mat A,Mat *v) 1839 { 1840 Mat_MPIDense *a = (Mat_MPIDense*)A->data; 1841 PetscErrorCode ierr; 1842 1843 PetscFunctionBegin; 1844 if (!a->matinuse) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Need to call MatDenseGetSubMatrix() first"); 1845 if (!a->cmat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing internal matrix"); 1846 a->matinuse = 0; 1847 ierr = MatDenseRestoreArray(a->A,(PetscScalar**)&a->ptrinuse);CHKERRQ(ierr); 1848 ierr = MatDenseResetArray(a->cmat);CHKERRQ(ierr); 1849 *v = NULL; 1850 PetscFunctionReturn(0); 1851 } 1852 1853 PETSC_EXTERN PetscErrorCode MatCreate_MPIDense(Mat mat) 1854 { 1855 Mat_MPIDense *a; 1856 PetscErrorCode ierr; 1857 1858 PetscFunctionBegin; 1859 ierr = PetscNewLog(mat,&a);CHKERRQ(ierr); 1860 mat->data = (void*)a; 1861 ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); 1862 1863 mat->insertmode = NOT_SET_VALUES; 1864 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&a->rank);CHKERRQ(ierr); 1865 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&a->size);CHKERRQ(ierr); 1866 1867 /* build cache for off array entries formed */ 1868 a->donotstash = PETSC_FALSE; 1869 1870 ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)mat),1,&mat->stash);CHKERRQ(ierr); 1871 1872 /* stuff used for matrix vector multiply */ 1873 a->lvec = 0; 1874 a->Mvctx = 0; 1875 a->roworiented = PETSC_TRUE; 1876 1877 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetLDA_C",MatDenseGetLDA_MPIDense);CHKERRQ(ierr); 1878 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseSetLDA_C",MatDenseSetLDA_MPIDense);CHKERRQ(ierr); 1879 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArray_C",MatDenseGetArray_MPIDense);CHKERRQ(ierr); 1880 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArray_C",MatDenseRestoreArray_MPIDense);CHKERRQ(ierr); 1881 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayRead_C",MatDenseGetArrayRead_MPIDense);CHKERRQ(ierr); 1882 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayRead_C",MatDenseRestoreArrayRead_MPIDense);CHKERRQ(ierr); 1883 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetArrayWrite_C",MatDenseGetArrayWrite_MPIDense);CHKERRQ(ierr); 1884 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreArrayWrite_C",MatDenseRestoreArrayWrite_MPIDense);CHKERRQ(ierr); 1885 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDensePlaceArray_C",MatDensePlaceArray_MPIDense);CHKERRQ(ierr); 1886 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseResetArray_C",MatDenseResetArray_MPIDense);CHKERRQ(ierr); 1887 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseReplaceArray_C",MatDenseReplaceArray_MPIDense);CHKERRQ(ierr); 1888 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVec_C",MatDenseGetColumnVec_MPIDense);CHKERRQ(ierr); 1889 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVec_C",MatDenseRestoreColumnVec_MPIDense);CHKERRQ(ierr); 1890 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecRead_C",MatDenseGetColumnVecRead_MPIDense);CHKERRQ(ierr); 1891 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecRead_C",MatDenseRestoreColumnVecRead_MPIDense);CHKERRQ(ierr); 1892 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumnVecWrite_C",MatDenseGetColumnVecWrite_MPIDense);CHKERRQ(ierr); 1893 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumnVecWrite_C",MatDenseRestoreColumnVecWrite_MPIDense);CHKERRQ(ierr); 1894 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetSubMatrix_C",MatDenseGetSubMatrix_MPIDense);CHKERRQ(ierr); 1895 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreSubMatrix_C",MatDenseRestoreSubMatrix_MPIDense);CHKERRQ(ierr); 1896 #if defined(PETSC_HAVE_ELEMENTAL) 1897 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_elemental_C",MatConvert_MPIDense_Elemental);CHKERRQ(ierr); 1898 #endif 1899 #if defined(PETSC_HAVE_CUDA) 1900 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpidense_mpidensecuda_C",MatConvert_MPIDense_MPIDenseCUDA);CHKERRQ(ierr); 1901 #endif 1902 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPIDenseSetPreallocation_C",MatMPIDenseSetPreallocation_MPIDense);CHKERRQ(ierr); 1903 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaij_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1904 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaij_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1905 #if defined(PETSC_HAVE_CUDA) 1906 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpiaijcusparse_mpidense_C",MatProductSetFromOptions_MPIAIJ_MPIDense);CHKERRQ(ierr); 1907 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatProductSetFromOptions_mpidense_mpiaijcusparse_C",MatProductSetFromOptions_MPIDense_MPIAIJ);CHKERRQ(ierr); 1908 #endif 1909 1910 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseGetColumn_C",MatDenseGetColumn_MPIDense);CHKERRQ(ierr); 1911 ierr = PetscObjectComposeFunction((PetscObject)mat,"MatDenseRestoreColumn_C",MatDenseRestoreColumn_MPIDense);CHKERRQ(ierr); 1912 ierr = PetscObjectChangeTypeName((PetscObject)mat,MATMPIDENSE);CHKERRQ(ierr); 1913 PetscFunctionReturn(0); 1914 } 1915 1916 /*MC 1917 MATMPIDENSECUDA - MATMPIDENSECUDA = "mpidensecuda" - A matrix type to be used for distributed dense matrices on GPUs. 1918 1919 Options Database Keys: 1920 . -mat_type mpidensecuda - sets the matrix type to "mpidensecuda" during a call to MatSetFromOptions() 1921 1922 Level: beginner 1923 1924 .seealso: 1925 1926 M*/ 1927 #if defined(PETSC_HAVE_CUDA) 1928 PETSC_EXTERN PetscErrorCode MatCreate_MPIDenseCUDA(Mat B) 1929 { 1930 PetscErrorCode ierr; 1931 1932 PetscFunctionBegin; 1933 ierr = MatCreate_MPIDense(B);CHKERRQ(ierr); 1934 ierr = MatConvert_MPIDense_MPIDenseCUDA(B,MATMPIDENSECUDA,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 1935 PetscFunctionReturn(0); 1936 } 1937 #endif 1938 1939 /*MC 1940 MATDENSE - MATDENSE = "dense" - A matrix type to be used for dense matrices. 1941 1942 This matrix type is identical to MATSEQDENSE when constructed with a single process communicator, 1943 and MATMPIDENSE otherwise. 1944 1945 Options Database Keys: 1946 . -mat_type dense - sets the matrix type to "dense" during a call to MatSetFromOptions() 1947 1948 Level: beginner 1949 1950 1951 .seealso: MATSEQDENSE,MATMPIDENSE,MATDENSECUDA 1952 M*/ 1953 1954 /*MC 1955 MATDENSECUDA - MATDENSECUDA = "densecuda" - A matrix type to be used for dense matrices on GPUs. 1956 1957 This matrix type is identical to MATSEQDENSECUDA when constructed with a single process communicator, 1958 and MATMPIDENSECUDA otherwise. 1959 1960 Options Database Keys: 1961 . -mat_type densecuda - sets the matrix type to "densecuda" during a call to MatSetFromOptions() 1962 1963 Level: beginner 1964 1965 .seealso: MATSEQDENSECUDA,MATMPIDENSECUDA,MATDENSE 1966 M*/ 1967 1968 /*@C 1969 MatMPIDenseSetPreallocation - Sets the array used to store the matrix entries 1970 1971 Not collective 1972 1973 Input Parameters: 1974 . B - the matrix 1975 - data - optional location of matrix data. Set data=NULL for PETSc 1976 to control all matrix memory allocation. 1977 1978 Notes: 1979 The dense format is fully compatible with standard Fortran 77 1980 storage by columns. 1981 1982 The data input variable is intended primarily for Fortran programmers 1983 who wish to allocate their own matrix memory space. Most users should 1984 set data=NULL. 1985 1986 Level: intermediate 1987 1988 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 1989 @*/ 1990 PetscErrorCode MatMPIDenseSetPreallocation(Mat B,PetscScalar *data) 1991 { 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 PetscValidHeaderSpecific(B,MAT_CLASSID,1); 1996 ierr = PetscTryMethod(B,"MatMPIDenseSetPreallocation_C",(Mat,PetscScalar*),(B,data));CHKERRQ(ierr); 1997 PetscFunctionReturn(0); 1998 } 1999 2000 /*@ 2001 MatDensePlaceArray - Allows one to replace the array in a dense matrix with an 2002 array provided by the user. This is useful to avoid copying an array 2003 into a matrix 2004 2005 Not Collective 2006 2007 Input Parameters: 2008 + mat - the matrix 2009 - array - the array in column major order 2010 2011 Notes: 2012 You can return to the original array with a call to MatDenseResetArray(). The user is responsible for freeing this array; it will not be 2013 freed when the matrix is destroyed. 2014 2015 Level: developer 2016 2017 .seealso: MatDenseGetArray(), MatDenseResetArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2018 2019 @*/ 2020 PetscErrorCode MatDensePlaceArray(Mat mat,const PetscScalar *array) 2021 { 2022 PetscErrorCode ierr; 2023 2024 PetscFunctionBegin; 2025 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2026 ierr = PetscUseMethod(mat,"MatDensePlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2027 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2028 #if defined(PETSC_HAVE_CUDA) 2029 mat->offloadmask = PETSC_OFFLOAD_CPU; 2030 #endif 2031 PetscFunctionReturn(0); 2032 } 2033 2034 /*@ 2035 MatDenseResetArray - Resets the matrix array to that it previously had before the call to MatDensePlaceArray() 2036 2037 Not Collective 2038 2039 Input Parameters: 2040 . mat - the matrix 2041 2042 Notes: 2043 You can only call this after a call to MatDensePlaceArray() 2044 2045 Level: developer 2046 2047 .seealso: MatDenseGetArray(), MatDensePlaceArray(), VecPlaceArray(), VecGetArray(), VecRestoreArray(), VecReplaceArray(), VecResetArray() 2048 2049 @*/ 2050 PetscErrorCode MatDenseResetArray(Mat mat) 2051 { 2052 PetscErrorCode ierr; 2053 2054 PetscFunctionBegin; 2055 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2056 ierr = PetscUseMethod(mat,"MatDenseResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2057 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2058 PetscFunctionReturn(0); 2059 } 2060 2061 /*@ 2062 MatDenseReplaceArray - Allows one to replace the array in a dense matrix with an 2063 array provided by the user. This is useful to avoid copying an array 2064 into a matrix 2065 2066 Not Collective 2067 2068 Input Parameters: 2069 + mat - the matrix 2070 - array - the array in column major order 2071 2072 Notes: 2073 The memory passed in MUST be obtained with PetscMalloc() and CANNOT be 2074 freed by the user. It will be freed when the matrix is destroyed. 2075 2076 Level: developer 2077 2078 .seealso: MatDenseGetArray(), VecReplaceArray() 2079 @*/ 2080 PetscErrorCode MatDenseReplaceArray(Mat mat,const PetscScalar *array) 2081 { 2082 PetscErrorCode ierr; 2083 2084 PetscFunctionBegin; 2085 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2086 ierr = PetscUseMethod(mat,"MatDenseReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2087 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2088 #if defined(PETSC_HAVE_CUDA) 2089 mat->offloadmask = PETSC_OFFLOAD_CPU; 2090 #endif 2091 PetscFunctionReturn(0); 2092 } 2093 2094 #if defined(PETSC_HAVE_CUDA) 2095 /*@C 2096 MatDenseCUDAPlaceArray - Allows one to replace the GPU array in a dense matrix with an 2097 array provided by the user. This is useful to avoid copying an array 2098 into a matrix 2099 2100 Not Collective 2101 2102 Input Parameters: 2103 + mat - the matrix 2104 - array - the array in column major order 2105 2106 Notes: 2107 You can return to the original array with a call to MatDenseCUDAResetArray(). The user is responsible for freeing this array; it will not be 2108 freed when the matrix is destroyed. The array must have been allocated with cudaMalloc(). 2109 2110 Level: developer 2111 2112 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAResetArray() 2113 @*/ 2114 PetscErrorCode MatDenseCUDAPlaceArray(Mat mat,const PetscScalar *array) 2115 { 2116 PetscErrorCode ierr; 2117 2118 PetscFunctionBegin; 2119 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2120 ierr = PetscUseMethod(mat,"MatDenseCUDAPlaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2121 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2122 mat->offloadmask = PETSC_OFFLOAD_GPU; 2123 PetscFunctionReturn(0); 2124 } 2125 2126 /*@C 2127 MatDenseCUDAResetArray - Resets the matrix array to that it previously had before the call to MatDenseCUDAPlaceArray() 2128 2129 Not Collective 2130 2131 Input Parameters: 2132 . mat - the matrix 2133 2134 Notes: 2135 You can only call this after a call to MatDenseCUDAPlaceArray() 2136 2137 Level: developer 2138 2139 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray() 2140 2141 @*/ 2142 PetscErrorCode MatDenseCUDAResetArray(Mat mat) 2143 { 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2148 ierr = PetscUseMethod(mat,"MatDenseCUDAResetArray_C",(Mat),(mat));CHKERRQ(ierr); 2149 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2150 PetscFunctionReturn(0); 2151 } 2152 2153 /*@C 2154 MatDenseCUDAReplaceArray - Allows one to replace the GPU array in a dense matrix with an 2155 array provided by the user. This is useful to avoid copying an array 2156 into a matrix 2157 2158 Not Collective 2159 2160 Input Parameters: 2161 + mat - the matrix 2162 - array - the array in column major order 2163 2164 Notes: 2165 This permanently replaces the GPU array and frees the memory associated with the old GPU array. 2166 The memory passed in CANNOT be freed by the user. It will be freed 2167 when the matrix is destroyed. The array should respect the matrix leading dimension. 2168 2169 Level: developer 2170 2171 .seealso: MatDenseCUDAGetArray(), MatDenseCUDAPlaceArray(), MatDenseCUDAResetArray() 2172 @*/ 2173 PetscErrorCode MatDenseCUDAReplaceArray(Mat mat,const PetscScalar *array) 2174 { 2175 PetscErrorCode ierr; 2176 2177 PetscFunctionBegin; 2178 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 2179 ierr = PetscUseMethod(mat,"MatDenseCUDAReplaceArray_C",(Mat,const PetscScalar*),(mat,array));CHKERRQ(ierr); 2180 ierr = PetscObjectStateIncrease((PetscObject)mat);CHKERRQ(ierr); 2181 mat->offloadmask = PETSC_OFFLOAD_GPU; 2182 PetscFunctionReturn(0); 2183 } 2184 2185 /*@C 2186 MatDenseCUDAGetArrayWrite - Provides write access to the CUDA buffer inside a dense matrix. 2187 2188 Not Collective 2189 2190 Input Parameters: 2191 . A - the matrix 2192 2193 Output Parameters 2194 . array - the GPU array in column major order 2195 2196 Notes: 2197 The data on the GPU may not be updated due to operations done on the CPU. If you need updated data, use MatDenseCUDAGetArray(). The array must be restored with MatDenseCUDARestoreArrayWrite() when no longer needed. 2198 2199 Level: developer 2200 2201 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArrayRead() 2202 @*/ 2203 PetscErrorCode MatDenseCUDAGetArrayWrite(Mat A, PetscScalar **a) 2204 { 2205 PetscErrorCode ierr; 2206 2207 PetscFunctionBegin; 2208 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2209 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2210 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2211 PetscFunctionReturn(0); 2212 } 2213 2214 /*@C 2215 MatDenseCUDARestoreArrayWrite - Restore write access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArrayWrite(). 2216 2217 Not Collective 2218 2219 Input Parameters: 2220 + A - the matrix 2221 - array - the GPU array in column major order 2222 2223 Notes: 2224 2225 Level: developer 2226 2227 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2228 @*/ 2229 PetscErrorCode MatDenseCUDARestoreArrayWrite(Mat A, PetscScalar **a) 2230 { 2231 PetscErrorCode ierr; 2232 2233 PetscFunctionBegin; 2234 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2235 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayWrite_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2236 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2237 A->offloadmask = PETSC_OFFLOAD_GPU; 2238 PetscFunctionReturn(0); 2239 } 2240 2241 /*@C 2242 MatDenseCUDAGetArrayRead - Provides read-only access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArrayRead() when no longer needed. 2243 2244 Not Collective 2245 2246 Input Parameters: 2247 . A - the matrix 2248 2249 Output Parameters 2250 . array - the GPU array in column major order 2251 2252 Notes: 2253 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2254 2255 Level: developer 2256 2257 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2258 @*/ 2259 PetscErrorCode MatDenseCUDAGetArrayRead(Mat A, const PetscScalar **a) 2260 { 2261 PetscErrorCode ierr; 2262 2263 PetscFunctionBegin; 2264 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2265 ierr = PetscUseMethod(A,"MatDenseCUDAGetArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2266 PetscFunctionReturn(0); 2267 } 2268 2269 /*@C 2270 MatDenseCUDARestoreArrayRead - Restore read-only access to the CUDA buffer inside a dense matrix previously obtained with a call to MatDenseCUDAGetArrayRead(). 2271 2272 Not Collective 2273 2274 Input Parameters: 2275 + A - the matrix 2276 - array - the GPU array in column major order 2277 2278 Notes: 2279 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). 2280 2281 Level: developer 2282 2283 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDAGetArrayRead() 2284 @*/ 2285 PetscErrorCode MatDenseCUDARestoreArrayRead(Mat A, const PetscScalar **a) 2286 { 2287 PetscErrorCode ierr; 2288 2289 PetscFunctionBegin; 2290 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArrayRead_C",(Mat,const PetscScalar**),(A,a));CHKERRQ(ierr); 2291 PetscFunctionReturn(0); 2292 } 2293 2294 /*@C 2295 MatDenseCUDAGetArray - Provides access to the CUDA buffer inside a dense matrix. The array must be restored with MatDenseCUDARestoreArray() when no longer needed. 2296 2297 Not Collective 2298 2299 Input Parameters: 2300 . A - the matrix 2301 2302 Output Parameters 2303 . array - the GPU array in column major order 2304 2305 Notes: 2306 Data can be copied to the GPU due to operations done on the CPU. If you need write only access, use MatDenseCUDAGetArrayWrite(). For read-only access, use MatDenseCUDAGetArrayRead(). 2307 2308 Level: developer 2309 2310 .seealso: MatDenseCUDAGetArrayRead(), MatDenseCUDARestoreArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead() 2311 @*/ 2312 PetscErrorCode MatDenseCUDAGetArray(Mat A, PetscScalar **a) 2313 { 2314 PetscErrorCode ierr; 2315 2316 PetscFunctionBegin; 2317 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2318 ierr = PetscUseMethod(A,"MatDenseCUDAGetArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2319 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2320 PetscFunctionReturn(0); 2321 } 2322 2323 /*@C 2324 MatDenseCUDARestoreArray - Restore access to the CUDA buffer inside a dense matrix previously obtained with MatDenseCUDAGetArray(). 2325 2326 Not Collective 2327 2328 Input Parameters: 2329 + A - the matrix 2330 - array - the GPU array in column major order 2331 2332 Notes: 2333 2334 Level: developer 2335 2336 .seealso: MatDenseCUDAGetArray(), MatDenseCUDARestoreArrayWrite(), MatDenseCUDAGetArrayWrite(), MatDenseCUDARestoreArrayRead(), MatDenseCUDAGetArrayRead() 2337 @*/ 2338 PetscErrorCode MatDenseCUDARestoreArray(Mat A, PetscScalar **a) 2339 { 2340 PetscErrorCode ierr; 2341 2342 PetscFunctionBegin; 2343 PetscValidHeaderSpecific(A,MAT_CLASSID,1); 2344 ierr = PetscUseMethod(A,"MatDenseCUDARestoreArray_C",(Mat,PetscScalar**),(A,a));CHKERRQ(ierr); 2345 ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr); 2346 A->offloadmask = PETSC_OFFLOAD_GPU; 2347 PetscFunctionReturn(0); 2348 } 2349 #endif 2350 2351 /*@C 2352 MatCreateDense - Creates a matrix in dense format. 2353 2354 Collective 2355 2356 Input Parameters: 2357 + comm - MPI communicator 2358 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2359 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2360 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2361 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2362 - data - optional location of matrix data. Set data=NULL (PETSC_NULL_SCALAR for Fortran users) for PETSc 2363 to control all matrix memory allocation. 2364 2365 Output Parameter: 2366 . A - the matrix 2367 2368 Notes: 2369 The dense format is fully compatible with standard Fortran 77 2370 storage by columns. 2371 2372 The data input variable is intended primarily for Fortran programmers 2373 who wish to allocate their own matrix memory space. Most users should 2374 set data=NULL (PETSC_NULL_SCALAR for Fortran users). 2375 2376 The user MUST specify either the local or global matrix dimensions 2377 (possibly both). 2378 2379 Level: intermediate 2380 2381 .seealso: MatCreate(), MatCreateSeqDense(), MatSetValues() 2382 @*/ 2383 PetscErrorCode MatCreateDense(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2384 { 2385 PetscErrorCode ierr; 2386 PetscMPIInt size; 2387 2388 PetscFunctionBegin; 2389 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2390 PetscValidLogicalCollectiveBool(*A,!!data,6); 2391 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2392 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2393 if (size > 1) { 2394 ierr = MatSetType(*A,MATMPIDENSE);CHKERRQ(ierr); 2395 ierr = MatMPIDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2396 if (data) { /* user provided data array, so no need to assemble */ 2397 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2398 (*A)->assembled = PETSC_TRUE; 2399 } 2400 } else { 2401 ierr = MatSetType(*A,MATSEQDENSE);CHKERRQ(ierr); 2402 ierr = MatSeqDenseSetPreallocation(*A,data);CHKERRQ(ierr); 2403 } 2404 PetscFunctionReturn(0); 2405 } 2406 2407 #if defined(PETSC_HAVE_CUDA) 2408 /*@C 2409 MatCreateDenseCUDA - Creates a matrix in dense format using CUDA. 2410 2411 Collective 2412 2413 Input Parameters: 2414 + comm - MPI communicator 2415 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 2416 . n - number of local columns (or PETSC_DECIDE to have calculated if N is given) 2417 . M - number of global rows (or PETSC_DECIDE to have calculated if m is given) 2418 . N - number of global columns (or PETSC_DECIDE to have calculated if n is given) 2419 - data - optional location of GPU matrix data. Set data=NULL for PETSc 2420 to control matrix memory allocation. 2421 2422 Output Parameter: 2423 . A - the matrix 2424 2425 Notes: 2426 2427 Level: intermediate 2428 2429 .seealso: MatCreate(), MatCreateDense() 2430 @*/ 2431 PetscErrorCode MatCreateDenseCUDA(MPI_Comm comm,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscScalar *data,Mat *A) 2432 { 2433 PetscErrorCode ierr; 2434 PetscMPIInt size; 2435 2436 PetscFunctionBegin; 2437 ierr = MatCreate(comm,A);CHKERRQ(ierr); 2438 PetscValidLogicalCollectiveBool(*A,!!data,6); 2439 ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr); 2440 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2441 if (size > 1) { 2442 ierr = MatSetType(*A,MATMPIDENSECUDA);CHKERRQ(ierr); 2443 ierr = MatMPIDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2444 if (data) { /* user provided data array, so no need to assemble */ 2445 ierr = MatSetUpMultiply_MPIDense(*A);CHKERRQ(ierr); 2446 (*A)->assembled = PETSC_TRUE; 2447 } 2448 } else { 2449 ierr = MatSetType(*A,MATSEQDENSECUDA);CHKERRQ(ierr); 2450 ierr = MatSeqDenseCUDASetPreallocation(*A,data);CHKERRQ(ierr); 2451 } 2452 PetscFunctionReturn(0); 2453 } 2454 #endif 2455 2456 static PetscErrorCode MatDuplicate_MPIDense(Mat A,MatDuplicateOption cpvalues,Mat *newmat) 2457 { 2458 Mat mat; 2459 Mat_MPIDense *a,*oldmat = (Mat_MPIDense*)A->data; 2460 PetscErrorCode ierr; 2461 2462 PetscFunctionBegin; 2463 *newmat = 0; 2464 ierr = MatCreate(PetscObjectComm((PetscObject)A),&mat);CHKERRQ(ierr); 2465 ierr = MatSetSizes(mat,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2466 ierr = MatSetType(mat,((PetscObject)A)->type_name);CHKERRQ(ierr); 2467 a = (Mat_MPIDense*)mat->data; 2468 2469 mat->factortype = A->factortype; 2470 mat->assembled = PETSC_TRUE; 2471 mat->preallocated = PETSC_TRUE; 2472 2473 a->size = oldmat->size; 2474 a->rank = oldmat->rank; 2475 mat->insertmode = NOT_SET_VALUES; 2476 a->donotstash = oldmat->donotstash; 2477 2478 ierr = PetscLayoutReference(A->rmap,&mat->rmap);CHKERRQ(ierr); 2479 ierr = PetscLayoutReference(A->cmap,&mat->cmap);CHKERRQ(ierr); 2480 2481 ierr = MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr); 2482 ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr); 2483 ierr = MatSetUpMultiply_MPIDense(mat);CHKERRQ(ierr); 2484 2485 *newmat = mat; 2486 PetscFunctionReturn(0); 2487 } 2488 2489 PetscErrorCode MatLoad_MPIDense(Mat newMat, PetscViewer viewer) 2490 { 2491 PetscErrorCode ierr; 2492 PetscBool isbinary; 2493 #if defined(PETSC_HAVE_HDF5) 2494 PetscBool ishdf5; 2495 #endif 2496 2497 PetscFunctionBegin; 2498 PetscValidHeaderSpecific(newMat,MAT_CLASSID,1); 2499 PetscValidHeaderSpecific(viewer,PETSC_VIEWER_CLASSID,2); 2500 /* force binary viewer to load .info file if it has not yet done so */ 2501 ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr); 2502 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr); 2503 #if defined(PETSC_HAVE_HDF5) 2504 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERHDF5, &ishdf5);CHKERRQ(ierr); 2505 #endif 2506 if (isbinary) { 2507 ierr = MatLoad_Dense_Binary(newMat,viewer);CHKERRQ(ierr); 2508 #if defined(PETSC_HAVE_HDF5) 2509 } else if (ishdf5) { 2510 ierr = MatLoad_Dense_HDF5(newMat,viewer);CHKERRQ(ierr); 2511 #endif 2512 } else SETERRQ2(PetscObjectComm((PetscObject)newMat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newMat)->type_name); 2513 PetscFunctionReturn(0); 2514 } 2515 2516 static PetscErrorCode MatEqual_MPIDense(Mat A,Mat B,PetscBool *flag) 2517 { 2518 Mat_MPIDense *matB = (Mat_MPIDense*)B->data,*matA = (Mat_MPIDense*)A->data; 2519 Mat a,b; 2520 PetscBool flg; 2521 PetscErrorCode ierr; 2522 2523 PetscFunctionBegin; 2524 a = matA->A; 2525 b = matB->A; 2526 ierr = MatEqual(a,b,&flg);CHKERRQ(ierr); 2527 ierr = MPIU_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 2528 PetscFunctionReturn(0); 2529 } 2530 2531 PetscErrorCode MatDestroy_MatTransMatMult_MPIDense_MPIDense(void *data) 2532 { 2533 PetscErrorCode ierr; 2534 Mat_TransMatMultDense *atb = (Mat_TransMatMultDense *)data; 2535 2536 PetscFunctionBegin; 2537 ierr = PetscFree2(atb->sendbuf,atb->recvcounts);CHKERRQ(ierr); 2538 ierr = MatDestroy(&atb->atb);CHKERRQ(ierr); 2539 ierr = PetscFree(atb);CHKERRQ(ierr); 2540 PetscFunctionReturn(0); 2541 } 2542 2543 PetscErrorCode MatDestroy_MatMatTransMult_MPIDense_MPIDense(void *data) 2544 { 2545 PetscErrorCode ierr; 2546 Mat_MatTransMultDense *abt = (Mat_MatTransMultDense *)data; 2547 2548 PetscFunctionBegin; 2549 ierr = PetscFree2(abt->buf[0],abt->buf[1]);CHKERRQ(ierr); 2550 ierr = PetscFree2(abt->recvcounts,abt->recvdispls);CHKERRQ(ierr); 2551 ierr = PetscFree(abt);CHKERRQ(ierr); 2552 PetscFunctionReturn(0); 2553 } 2554 2555 static PetscErrorCode MatTransposeMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2556 { 2557 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2558 Mat_TransMatMultDense *atb; 2559 PetscErrorCode ierr; 2560 MPI_Comm comm; 2561 PetscMPIInt size,*recvcounts; 2562 PetscScalar *carray,*sendbuf; 2563 const PetscScalar *atbarray; 2564 PetscInt i,cN=C->cmap->N,cM=C->rmap->N,proc,k,j; 2565 const PetscInt *ranges; 2566 2567 PetscFunctionBegin; 2568 MatCheckProduct(C,3); 2569 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2570 atb = (Mat_TransMatMultDense *)C->product->data; 2571 recvcounts = atb->recvcounts; 2572 sendbuf = atb->sendbuf; 2573 2574 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2575 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2576 2577 /* compute atbarray = aseq^T * bseq */ 2578 ierr = MatTransposeMatMult(a->A,b->A,atb->atb ? MAT_REUSE_MATRIX : MAT_INITIAL_MATRIX,PETSC_DEFAULT,&atb->atb);CHKERRQ(ierr); 2579 2580 ierr = MatGetOwnershipRanges(C,&ranges);CHKERRQ(ierr); 2581 for (i=0; i<size; i++) recvcounts[i] = (ranges[i+1] - ranges[i])*cN; 2582 2583 /* arrange atbarray into sendbuf */ 2584 ierr = MatDenseGetArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2585 for (proc=0, k=0; proc<size; proc++) { 2586 for (j=0; j<cN; j++) { 2587 for (i=ranges[proc]; i<ranges[proc+1]; i++) sendbuf[k++] = atbarray[i+j*cM]; 2588 } 2589 } 2590 ierr = MatDenseRestoreArrayRead(atb->atb,&atbarray);CHKERRQ(ierr); 2591 2592 /* sum all atbarray to local values of C */ 2593 ierr = MatDenseGetArray(c->A,&carray);CHKERRQ(ierr); 2594 ierr = MPI_Reduce_scatter(sendbuf,carray,recvcounts,MPIU_SCALAR,MPIU_SUM,comm);CHKERRQ(ierr); 2595 ierr = MatDenseRestoreArray(c->A,&carray);CHKERRQ(ierr); 2596 PetscFunctionReturn(0); 2597 } 2598 2599 static PetscErrorCode MatTransposeMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2600 { 2601 PetscErrorCode ierr; 2602 MPI_Comm comm; 2603 PetscMPIInt size; 2604 PetscInt cm=A->cmap->n,cM,cN=B->cmap->N; 2605 Mat_TransMatMultDense *atb; 2606 PetscBool cisdense; 2607 2608 PetscFunctionBegin; 2609 MatCheckProduct(C,3); 2610 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2611 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2612 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) { 2613 SETERRQ4(comm,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2614 } 2615 2616 /* create matrix product C */ 2617 ierr = MatSetSizes(C,cm,B->cmap->n,A->cmap->N,B->cmap->N);CHKERRQ(ierr); 2618 ierr = PetscObjectTypeCompareAny((PetscObject)C,&cisdense,MATMPIDENSE,MATMPIDENSECUDA,"");CHKERRQ(ierr); 2619 if (!cisdense) { 2620 ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr); 2621 } 2622 ierr = MatSetUp(C);CHKERRQ(ierr); 2623 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2624 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2625 2626 /* create data structure for reuse C */ 2627 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2628 ierr = PetscNew(&atb);CHKERRQ(ierr); 2629 cM = C->rmap->N; 2630 ierr = PetscMalloc2((size_t)cM*(size_t)cN,&atb->sendbuf,size,&atb->recvcounts);CHKERRQ(ierr); 2631 2632 C->product->data = atb; 2633 C->product->destroy = MatDestroy_MatTransMatMult_MPIDense_MPIDense; 2634 PetscFunctionReturn(0); 2635 } 2636 2637 static PetscErrorCode MatMatTransposeMultSymbolic_MPIDense_MPIDense(Mat A, Mat B, PetscReal fill, Mat C) 2638 { 2639 PetscErrorCode ierr; 2640 MPI_Comm comm; 2641 PetscMPIInt i, size; 2642 PetscInt maxRows, bufsiz; 2643 PetscMPIInt tag; 2644 PetscInt alg; 2645 Mat_MatTransMultDense *abt; 2646 Mat_Product *product = C->product; 2647 PetscBool flg; 2648 2649 PetscFunctionBegin; 2650 MatCheckProduct(C,4); 2651 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2652 /* check local size of A and B */ 2653 if (A->cmap->n != B->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local column dimensions are incompatible, A (%D) != B (%D)",A->cmap->n,B->cmap->n); 2654 2655 ierr = PetscStrcmp(product->alg,"allgatherv",&flg);CHKERRQ(ierr); 2656 alg = flg ? 0 : 1; 2657 2658 /* setup matrix product C */ 2659 ierr = MatSetSizes(C,A->rmap->n,B->rmap->n,A->rmap->N,B->rmap->N);CHKERRQ(ierr); 2660 ierr = MatSetType(C,MATMPIDENSE);CHKERRQ(ierr); 2661 ierr = MatSetUp(C);CHKERRQ(ierr); 2662 ierr = PetscObjectGetNewTag((PetscObject)C,&tag);CHKERRQ(ierr); 2663 ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2664 ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2665 2666 /* create data structure for reuse C */ 2667 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2668 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2669 ierr = PetscNew(&abt);CHKERRQ(ierr); 2670 abt->tag = tag; 2671 abt->alg = alg; 2672 switch (alg) { 2673 case 1: /* alg: "cyclic" */ 2674 for (maxRows = 0, i = 0; i < size; i++) maxRows = PetscMax(maxRows, (B->rmap->range[i + 1] - B->rmap->range[i])); 2675 bufsiz = A->cmap->N * maxRows; 2676 ierr = PetscMalloc2(bufsiz,&(abt->buf[0]),bufsiz,&(abt->buf[1]));CHKERRQ(ierr); 2677 break; 2678 default: /* alg: "allgatherv" */ 2679 ierr = PetscMalloc2(B->rmap->n * B->cmap->N, &(abt->buf[0]), B->rmap->N * B->cmap->N, &(abt->buf[1]));CHKERRQ(ierr); 2680 ierr = PetscMalloc2(size,&(abt->recvcounts),size+1,&(abt->recvdispls));CHKERRQ(ierr); 2681 for (i = 0; i <= size; i++) abt->recvdispls[i] = B->rmap->range[i] * A->cmap->N; 2682 for (i = 0; i < size; i++) abt->recvcounts[i] = abt->recvdispls[i + 1] - abt->recvdispls[i]; 2683 break; 2684 } 2685 2686 C->product->data = abt; 2687 C->product->destroy = MatDestroy_MatMatTransMult_MPIDense_MPIDense; 2688 C->ops->mattransposemultnumeric = MatMatTransposeMultNumeric_MPIDense_MPIDense; 2689 PetscFunctionReturn(0); 2690 } 2691 2692 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(Mat A, Mat B, Mat C) 2693 { 2694 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2695 Mat_MatTransMultDense *abt; 2696 PetscErrorCode ierr; 2697 MPI_Comm comm; 2698 PetscMPIInt rank,size, sendsiz, recvsiz, sendto, recvfrom, recvisfrom; 2699 PetscScalar *sendbuf, *recvbuf=0, *cv; 2700 PetscInt i,cK=A->cmap->N,k,j,bn; 2701 PetscScalar _DOne=1.0,_DZero=0.0; 2702 const PetscScalar *av,*bv; 2703 PetscBLASInt cm, cn, ck, alda, blda, clda; 2704 MPI_Request reqs[2]; 2705 const PetscInt *ranges; 2706 2707 PetscFunctionBegin; 2708 MatCheckProduct(C,3); 2709 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2710 abt = (Mat_MatTransMultDense*)C->product->data; 2711 ierr = PetscObjectGetComm((PetscObject)C,&comm);CHKERRQ(ierr); 2712 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 2713 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2714 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2715 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2716 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2717 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2718 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2719 ierr = MatDenseGetLDA(b->A,&i);CHKERRQ(ierr); 2720 ierr = PetscBLASIntCast(i,&blda);CHKERRQ(ierr); 2721 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2722 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2723 ierr = MatGetOwnershipRanges(B,&ranges);CHKERRQ(ierr); 2724 bn = B->rmap->n; 2725 if (blda == bn) { 2726 sendbuf = (PetscScalar*)bv; 2727 } else { 2728 sendbuf = abt->buf[0]; 2729 for (k = 0, i = 0; i < cK; i++) { 2730 for (j = 0; j < bn; j++, k++) { 2731 sendbuf[k] = bv[i * blda + j]; 2732 } 2733 } 2734 } 2735 if (size > 1) { 2736 sendto = (rank + size - 1) % size; 2737 recvfrom = (rank + size + 1) % size; 2738 } else { 2739 sendto = recvfrom = 0; 2740 } 2741 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2742 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2743 recvisfrom = rank; 2744 for (i = 0; i < size; i++) { 2745 /* we have finished receiving in sending, bufs can be read/modified */ 2746 PetscInt nextrecvisfrom = (recvisfrom + 1) % size; /* which process the next recvbuf will originate on */ 2747 PetscInt nextbn = ranges[nextrecvisfrom + 1] - ranges[nextrecvisfrom]; 2748 2749 if (nextrecvisfrom != rank) { 2750 /* start the cyclic sends from sendbuf, to recvbuf (which will switch to sendbuf) */ 2751 sendsiz = cK * bn; 2752 recvsiz = cK * nextbn; 2753 recvbuf = (i & 1) ? abt->buf[0] : abt->buf[1]; 2754 ierr = MPI_Isend(sendbuf, sendsiz, MPIU_SCALAR, sendto, abt->tag, comm, &reqs[0]);CHKERRQ(ierr); 2755 ierr = MPI_Irecv(recvbuf, recvsiz, MPIU_SCALAR, recvfrom, abt->tag, comm, &reqs[1]);CHKERRQ(ierr); 2756 } 2757 2758 /* local aseq * sendbuf^T */ 2759 ierr = PetscBLASIntCast(ranges[recvisfrom + 1] - ranges[recvisfrom], &cn);CHKERRQ(ierr); 2760 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","T",&cm,&cn,&ck,&_DOne,av,&alda,sendbuf,&cn,&_DZero,cv + clda * ranges[recvisfrom],&clda)); 2761 2762 if (nextrecvisfrom != rank) { 2763 /* wait for the sends and receives to complete, swap sendbuf and recvbuf */ 2764 ierr = MPI_Waitall(2, reqs, MPI_STATUSES_IGNORE);CHKERRQ(ierr); 2765 } 2766 bn = nextbn; 2767 recvisfrom = nextrecvisfrom; 2768 sendbuf = recvbuf; 2769 } 2770 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2771 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2772 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2773 PetscFunctionReturn(0); 2774 } 2775 2776 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(Mat A, Mat B, Mat C) 2777 { 2778 Mat_MPIDense *a=(Mat_MPIDense*)A->data, *b=(Mat_MPIDense*)B->data, *c=(Mat_MPIDense*)C->data; 2779 Mat_MatTransMultDense *abt; 2780 PetscErrorCode ierr; 2781 MPI_Comm comm; 2782 PetscMPIInt size; 2783 PetscScalar *cv, *sendbuf, *recvbuf; 2784 const PetscScalar *av,*bv; 2785 PetscInt blda,i,cK=A->cmap->N,k,j,bn; 2786 PetscScalar _DOne=1.0,_DZero=0.0; 2787 PetscBLASInt cm, cn, ck, alda, clda; 2788 2789 PetscFunctionBegin; 2790 MatCheckProduct(C,3); 2791 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2792 abt = (Mat_MatTransMultDense*)C->product->data; 2793 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 2794 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 2795 ierr = MatDenseGetArrayRead(a->A,&av);CHKERRQ(ierr); 2796 ierr = MatDenseGetArrayRead(b->A,&bv);CHKERRQ(ierr); 2797 ierr = MatDenseGetArray(c->A,&cv);CHKERRQ(ierr); 2798 ierr = MatDenseGetLDA(a->A,&i);CHKERRQ(ierr); 2799 ierr = PetscBLASIntCast(i,&alda);CHKERRQ(ierr); 2800 ierr = MatDenseGetLDA(b->A,&blda);CHKERRQ(ierr); 2801 ierr = MatDenseGetLDA(c->A,&i);CHKERRQ(ierr); 2802 ierr = PetscBLASIntCast(i,&clda);CHKERRQ(ierr); 2803 /* copy transpose of B into buf[0] */ 2804 bn = B->rmap->n; 2805 sendbuf = abt->buf[0]; 2806 recvbuf = abt->buf[1]; 2807 for (k = 0, j = 0; j < bn; j++) { 2808 for (i = 0; i < cK; i++, k++) { 2809 sendbuf[k] = bv[i * blda + j]; 2810 } 2811 } 2812 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2813 ierr = MPI_Allgatherv(sendbuf, bn * cK, MPIU_SCALAR, recvbuf, abt->recvcounts, abt->recvdispls, MPIU_SCALAR, comm);CHKERRQ(ierr); 2814 ierr = PetscBLASIntCast(cK,&ck);CHKERRQ(ierr); 2815 ierr = PetscBLASIntCast(c->A->rmap->n,&cm);CHKERRQ(ierr); 2816 ierr = PetscBLASIntCast(c->A->cmap->n,&cn);CHKERRQ(ierr); 2817 if (cm && cn && ck) PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&cm,&cn,&ck,&_DOne,av,&alda,recvbuf,&ck,&_DZero,cv,&clda)); 2818 ierr = MatDenseRestoreArrayRead(a->A,&av);CHKERRQ(ierr); 2819 ierr = MatDenseRestoreArrayRead(b->A,&bv);CHKERRQ(ierr); 2820 ierr = MatDenseRestoreArray(c->A,&cv);CHKERRQ(ierr); 2821 PetscFunctionReturn(0); 2822 } 2823 2824 static PetscErrorCode MatMatTransposeMultNumeric_MPIDense_MPIDense(Mat A, Mat B, Mat C) 2825 { 2826 Mat_MatTransMultDense *abt; 2827 PetscErrorCode ierr; 2828 2829 PetscFunctionBegin; 2830 MatCheckProduct(C,3); 2831 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data empty"); 2832 abt = (Mat_MatTransMultDense*)C->product->data; 2833 switch (abt->alg) { 2834 case 1: 2835 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Cyclic(A, B, C);CHKERRQ(ierr); 2836 break; 2837 default: 2838 ierr = MatMatTransposeMultNumeric_MPIDense_MPIDense_Allgatherv(A, B, C);CHKERRQ(ierr); 2839 break; 2840 } 2841 PetscFunctionReturn(0); 2842 } 2843 2844 PetscErrorCode MatDestroy_MatMatMult_MPIDense_MPIDense(void *data) 2845 { 2846 PetscErrorCode ierr; 2847 Mat_MatMultDense *ab = (Mat_MatMultDense*)data; 2848 2849 PetscFunctionBegin; 2850 ierr = MatDestroy(&ab->Ce);CHKERRQ(ierr); 2851 ierr = MatDestroy(&ab->Ae);CHKERRQ(ierr); 2852 ierr = MatDestroy(&ab->Be);CHKERRQ(ierr); 2853 ierr = PetscFree(ab);CHKERRQ(ierr); 2854 PetscFunctionReturn(0); 2855 } 2856 2857 #if defined(PETSC_HAVE_ELEMENTAL) 2858 PetscErrorCode MatMatMultNumeric_MPIDense_MPIDense(Mat A,Mat B,Mat C) 2859 { 2860 PetscErrorCode ierr; 2861 Mat_MatMultDense *ab; 2862 2863 PetscFunctionBegin; 2864 MatCheckProduct(C,3); 2865 if (!C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Missing product data"); 2866 ab = (Mat_MatMultDense*)C->product->data; 2867 ierr = MatConvert_MPIDense_Elemental(A,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Ae);CHKERRQ(ierr); 2868 ierr = MatConvert_MPIDense_Elemental(B,MATELEMENTAL,MAT_REUSE_MATRIX, &ab->Be);CHKERRQ(ierr); 2869 ierr = MatMatMultNumeric_Elemental(ab->Ae,ab->Be,ab->Ce);CHKERRQ(ierr); 2870 ierr = MatConvert(ab->Ce,MATMPIDENSE,MAT_REUSE_MATRIX,&C);CHKERRQ(ierr); 2871 PetscFunctionReturn(0); 2872 } 2873 2874 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIDense(Mat A,Mat B,PetscReal fill,Mat C) 2875 { 2876 PetscErrorCode ierr; 2877 Mat Ae,Be,Ce; 2878 Mat_MatMultDense *ab; 2879 2880 PetscFunctionBegin; 2881 MatCheckProduct(C,4); 2882 if (C->product->data) SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_PLIB,"Product data not empty"); 2883 /* check local size of A and B */ 2884 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 2885 SETERRQ4(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, A (%D, %D) != B (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2886 } 2887 2888 /* create elemental matrices Ae and Be */ 2889 ierr = MatCreate(PetscObjectComm((PetscObject)A), &Ae);CHKERRQ(ierr); 2890 ierr = MatSetSizes(Ae,PETSC_DECIDE,PETSC_DECIDE,A->rmap->N,A->cmap->N);CHKERRQ(ierr); 2891 ierr = MatSetType(Ae,MATELEMENTAL);CHKERRQ(ierr); 2892 ierr = MatSetUp(Ae);CHKERRQ(ierr); 2893 ierr = MatSetOption(Ae,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2894 2895 ierr = MatCreate(PetscObjectComm((PetscObject)B), &Be);CHKERRQ(ierr); 2896 ierr = MatSetSizes(Be,PETSC_DECIDE,PETSC_DECIDE,B->rmap->N,B->cmap->N);CHKERRQ(ierr); 2897 ierr = MatSetType(Be,MATELEMENTAL);CHKERRQ(ierr); 2898 ierr = MatSetUp(Be);CHKERRQ(ierr); 2899 ierr = MatSetOption(Be,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 2900 2901 /* compute symbolic Ce = Ae*Be */ 2902 ierr = MatCreate(PetscObjectComm((PetscObject)C),&Ce);CHKERRQ(ierr); 2903 ierr = MatMatMultSymbolic_Elemental(Ae,Be,fill,Ce);CHKERRQ(ierr); 2904 2905 /* setup C */ 2906 ierr = MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2907 ierr = MatSetType(C,MATDENSE);CHKERRQ(ierr); 2908 ierr = MatSetUp(C);CHKERRQ(ierr); 2909 2910 /* create data structure for reuse Cdense */ 2911 ierr = PetscNew(&ab);CHKERRQ(ierr); 2912 ab->Ae = Ae; 2913 ab->Be = Be; 2914 ab->Ce = Ce; 2915 2916 C->product->data = ab; 2917 C->product->destroy = MatDestroy_MatMatMult_MPIDense_MPIDense; 2918 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIDense; 2919 PetscFunctionReturn(0); 2920 } 2921 #endif 2922 /* ----------------------------------------------- */ 2923 #if defined(PETSC_HAVE_ELEMENTAL) 2924 static PetscErrorCode MatProductSetFromOptions_MPIDense_AB(Mat C) 2925 { 2926 PetscFunctionBegin; 2927 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIDense; 2928 C->ops->productsymbolic = MatProductSymbolic_AB; 2929 PetscFunctionReturn(0); 2930 } 2931 #endif 2932 2933 static PetscErrorCode MatProductSetFromOptions_MPIDense_AtB(Mat C) 2934 { 2935 Mat_Product *product = C->product; 2936 Mat A = product->A,B=product->B; 2937 2938 PetscFunctionBegin; 2939 if (A->rmap->rstart != B->rmap->rstart || A->rmap->rend != B->rmap->rend) 2940 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrix local dimensions are incompatible, (%D, %D) != (%D,%D)",A->rmap->rstart,A->rmap->rend,B->rmap->rstart,B->rmap->rend); 2941 C->ops->transposematmultsymbolic = MatTransposeMatMultSymbolic_MPIDense_MPIDense; 2942 C->ops->productsymbolic = MatProductSymbolic_AtB; 2943 PetscFunctionReturn(0); 2944 } 2945 2946 static PetscErrorCode MatProductSetFromOptions_MPIDense_ABt(Mat C) 2947 { 2948 PetscErrorCode ierr; 2949 Mat_Product *product = C->product; 2950 const char *algTypes[2] = {"allgatherv","cyclic"}; 2951 PetscInt alg,nalg = 2; 2952 PetscBool flg = PETSC_FALSE; 2953 2954 PetscFunctionBegin; 2955 /* Set default algorithm */ 2956 alg = 0; /* default is allgatherv */ 2957 ierr = PetscStrcmp(product->alg,"default",&flg);CHKERRQ(ierr); 2958 if (flg) { 2959 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2960 } 2961 2962 /* Get runtime option */ 2963 if (product->api_user) { 2964 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatMatTransposeMult","Mat");CHKERRQ(ierr); 2965 ierr = PetscOptionsEList("-matmattransmult_mpidense_mpidense_via","Algorithmic approach","MatMatTransposeMult",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2966 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2967 } else { 2968 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)C),((PetscObject)C)->prefix,"MatProduct_ABt","Mat");CHKERRQ(ierr); 2969 ierr = PetscOptionsEList("-matproduct_abt_mpidense_mpidense_via","Algorithmic approach","MatProduct_ABt",algTypes,nalg,algTypes[alg],&alg,&flg);CHKERRQ(ierr); 2970 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2971 } 2972 if (flg) { 2973 ierr = MatProductSetAlgorithm(C,(MatProductAlgorithm)algTypes[alg]);CHKERRQ(ierr); 2974 } 2975 2976 C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_MPIDense_MPIDense; 2977 C->ops->productsymbolic = MatProductSymbolic_ABt; 2978 PetscFunctionReturn(0); 2979 } 2980 2981 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense(Mat C) 2982 { 2983 PetscErrorCode ierr; 2984 Mat_Product *product = C->product; 2985 2986 PetscFunctionBegin; 2987 switch (product->type) { 2988 #if defined(PETSC_HAVE_ELEMENTAL) 2989 case MATPRODUCT_AB: 2990 ierr = MatProductSetFromOptions_MPIDense_AB(C);CHKERRQ(ierr); 2991 break; 2992 #endif 2993 case MATPRODUCT_AtB: 2994 ierr = MatProductSetFromOptions_MPIDense_AtB(C);CHKERRQ(ierr); 2995 break; 2996 case MATPRODUCT_ABt: 2997 ierr = MatProductSetFromOptions_MPIDense_ABt(C);CHKERRQ(ierr); 2998 break; 2999 default: 3000 break; 3001 } 3002 PetscFunctionReturn(0); 3003 } 3004